timegrinder

package
v3.8.30 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 3, 2024 License: BSD-2-Clause, BSD-2-Clause Imports: 8 Imported by: 2

Documentation

Overview

Package timegrinder is a package for locating and parsing timestamps within arbitrary data. It is intended to be as efficient as possible, so the most-recently-successful extraction pattern is tracked for re-use. The package also provides functions for defining arbitrary extractions through the NewUserProcessor function.

Index

Constants

View Source
const (
	AnsiCFormat                 string = `Jan _2 15:04:05 2006`
	UnixFormat                  string = `Jan _2 15:04:05 MST 2006`
	RubyFormat                  string = `Jan _2 15:04:05 -0700 2006`
	RFC822Format                string = `02 Jan 06 15:04 MST`
	RFC822ZFormat               string = `02 Jan 06 15:04 -0700`
	RFC850Format                string = `02-Jan-06 15:04:05 MST`
	RFC1123Format               string = `02 Jan 2006 15:04:05 MST`
	RFC1123ZFormat              string = `02 Jan 2006 15:04:05 -0700`
	RFC3339Format               string = `2006-01-02T15:04:05Z07:00`
	RFC3339NanoFormat           string = `2006-01-02T15:04:05.999999999Z07:00`
	ZonelessRFC3339Format       string = `2006-01-02T15:04:05.999999999`
	ApacheFormat                string = `_2/Jan/2006:15:04:05 -0700`
	ApacheNoTzFormat            string = `_2/Jan/2006:15:04:05`
	NGINXFormat                 string = `2006/01/02 15:04:05`
	SyslogFormat                string = `Jan _2 15:04:05`
	SyslogFileFormat            string = `2006-01-02T15:04:05.999999999-07:00`
	SyslogFileTZFormat          string = `2006-01-02T15:04:05.999999999-0700`
	DPKGFormat                  string = `2006-01-02 15:04:05`
	SyslogVariantFormat         string = `Jan 02 2006 15:04:05`
	UnpaddedDateTimeFormat      string = `2006-1-2 15:04:05`
	UnpaddedMilliDateTimeFormat string = `2006-1-2 15:04:05.999999999`
	UnixSecondsFormat           string = "1234567890"          // Time formatting API doesn't work, this is just for docs
	UnixMilliFormat             string = `1136473445.99`       // Time formatting API doesn't work, this is just for docs
	UnixMsFormat                string = `1136473445000`       // Time formatting API doesn't work, this is just for docs
	UnixNanoFormat              string = `1136473445000000000` // Time formatting API doesn't work, this is just for docs
	LDAPFormat                  string = `123456789012345678`  // Time formatting API doesn't work, this is just for docs
	UKFormat                    string = `02/01/2006 15:04:05,99999`
	GravwellFormat              string = `1-2-2006 15:04:05.99999`
	BindFormat                  string = `02-Jan-2006 15:04:05.999`
	DirectAdminFormat           string = `2006:01:02-15:04:05`
)

Timestamp Formats

View Source
const (
	AnsiCRegex                 string = `[JFMASOND][anebriyunlgpctov]+\s+\d{1,2}\s+\d\d:\d\d:\d\d\s+\d{4}`
	UnixRegex                  string = `[JFMASOND][anebriyunlgpctov]+\s+\d{1,2}\s+\d\d:\d\d:\d\d\s+[A-Z]{3}\s+\d{4}`
	RubyRegex                  string = `[JFMASOND][anebriyunlgpctov]+\s+\d{1,2}\s+\d\d:\d\d:\d\d\s+[\-|\+]\d{4}\s+\d{4}`
	RFC822Regex                string = `\d{2}\s[JFMASOND][anebriyunlgpctov]+\s+\d{2}\s\d\d:\d\d\s[A-Z]{3}`
	RFC822ZRegex               string = `\d{2}\s[JFMASOND][anebriyunlgpctov]+\s+\d{2}\s\d\d:\d\d\s[\-|\+]\d{4}`
	RFC850Regex                string = `\d{2}\-[JFMASOND][anebriyunlgpctov]+\-\d{2}\s\d\d:\d\d:\d\d\s[A-Z]{3}`
	RFC1123Regex               string = `\d{2} [JFMASOND][anebriyunlgpctov]+ \d{4}\s\d\d:\d\d:\d\d\s[A-Z]{3}`
	RFC1123ZRegex              string = `\d{2} [JFMASOND][anebriyunlgpctov]+ \d{4}\s\d\d:\d\d:\d\d\s[\-|\+]\d{4}`
	RFC3339Regex               string = `\d{4}-\d{2}-\d{2}T\d\d:\d\d:\d\d[Z\-+]`
	RFC3339NanoRegex           string = `\d{4}-\d{2}-\d{2}T\d\d:\d\d:\d\d.\d+[Z\-+]`
	ZonelessRFC3339Regex       string = `\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.*\d*`
	ApacheRegex                string = `\d{1,2}/[JFMASOND][anebriyunlgpctov]+/\d{4}:\d\d:\d\d:\d\d\s[\-|\+]\d{4}`
	ApacheNoTzRegex            string = `\d{1,2}/[JFMASOND][anebriyunlgpctov]+/\d{4}:\d\d:\d\d:\d\d`
	SyslogRegex                string = `[JFMASOND][anebriyunlgpctov]+\s+\d+\s+\d\d:\d\d:\d\d`
	SyslogFileRegex            string = `\d{4}-\d{2}-\d{2}T\d\d:\d\d:\d+\.?\d*[-+]\d\d:\d\d`
	SyslogFileTZRegex          string = `\d{4}-\d{2}-\d{2}T\d\d:\d\d:\d+\.?\d*[-+]\d\d\d\d`
	SyslogVariantRegex         string = `[JFMASOND][anebriyunlgpctov]+\s+\d{2}\s+\d\d\d\d\s+\d\d:\d\d:\d\d`
	DPKGRegex                  string = `\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d`
	NGINXRegex                 string = `\d{4}\/\d{2}\/\d{2}\s+\d{2}:\d{2}:\d{2}`
	UnpaddedDateTimeRegex      string = `\d\d\d\d-\d+-\d+\s+\d+:\d\d:\d\d`
	UnpaddedMilliDateTimeRegex string = `\d\d\d\d-\d+-\d+\s+\d+:\d\d:\d\d\.\d{1,9}`
	UnixSecondsRegex           string = `\A\s*(\d{9,10})(?:\D|$)`
	UnixMilliRegex             string = `\A\s*(\d{9,10}\.\d+)(?:\D|$)`
	UnixMsRegex                string = `\A\s*(\d{12,13})(?:\D|$)`
	UnixNanoRegex              string = `\A\s*(\d{18,19})(?:\D|$)`
	LDAPRegex                  string = `\A\s*(\d{18})(?:\D|$)`
	UKRegex                    string = `\d\d/\d\d/\d\d\d\d\s\d\d\:\d\d\:\d\d,\d{1,5}`
	GravwellRegex              string = `\d{1,2}\-\d{1,2}\-\d{4}\s+\d{1,2}\:\d{2}\:\d{2}(\.\d{1,6})?`
	BindRegex                  string = `\d{2}\-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)\-\d{4} \d{2}:\d{2}:\d{2}\.\d{1,3}`
	DirectAdminRegex           string = `\d{4}:\d{2}:\d{2}-\d{2}:\d{2}:\d{2}`
)

Regular Expression Extractors

View Source
const (
	DEFAULT_TIMEGRINDER_SIZE int = 16
)

Variables

View Source
var (
	ErrMissingName         = errors.New("Missing time extraction name")
	ErrMissingRegex        = errors.New("Missing extraction regular expression")
	ErrMissingFormat       = errors.New("Missing extraction time format")
	ErrInvalidFormat       = errors.New("Invalid time format, could not format and extract current time")
	ErrRegexFormatMismatch = errors.New("Could not match regex against provided format")
)

Functions

func Extract added in v3.4.4

func Extract(b []byte) (t time.Time, ok bool, err error)

func Match added in v3.8.6

func Match(b []byte) (start, end int, ok bool)

func NewAnsiCProcessor

func NewAnsiCProcessor() *processor

func NewApacheNoTZProcessor

func NewApacheNoTZProcessor() *processor

func NewApacheProcessor

func NewApacheProcessor() *processor

func NewBind added in v3.8.4

func NewBind() *processor

func NewDPKGProcessor

func NewDPKGProcessor() *processor

func NewDirectAdmin added in v3.8.16

func NewDirectAdmin() *processor

func NewGravwell

func NewGravwell() *processor

func NewLDAPProcessor

func NewLDAPProcessor() *ldapProcessor

We assume you're not ingesting data from 1970, so we look for at least 16 digits of nanoseconds

func NewNGINXProcessor

func NewNGINXProcessor() *processor

func NewRFC1123Processor

func NewRFC1123Processor() *processor

func NewRFC1123ZProcessor

func NewRFC1123ZProcessor() *processor

func NewRFC3339NanoProcessor

func NewRFC3339NanoProcessor() *processor

func NewRFC3339Processor

func NewRFC3339Processor() *processor

func NewRFC822Processor

func NewRFC822Processor() *processor

func NewRFC822ZProcessor

func NewRFC822ZProcessor() *processor

func NewRFC850Processor

func NewRFC850Processor() *processor

func NewRubyProcessor

func NewRubyProcessor() *processor

func NewSyslogFileProcessor

func NewSyslogFileProcessor() *processor

func NewSyslogFileProcessorTZ2

func NewSyslogFileProcessorTZ2() *processor

func NewSyslogProcessor

func NewSyslogProcessor() *syslogProcessor

func NewSyslogVariant

func NewSyslogVariant() *processor

func NewUnixMilliTimeProcessor

func NewUnixMilliTimeProcessor() *unixProcessor

func NewUnixMsTimeProcessor

func NewUnixMsTimeProcessor() *unixMsProcessor

We assume you're not ingesting data from 1970, so we look for at least 13 digits of nanoseconds

func NewUnixNanoTimeProcessor

func NewUnixNanoTimeProcessor() *unixNanoProcessor

We assume you're not ingesting data from 1970, so we look for at least 16 digits of nanoseconds

func NewUnixProcessor

func NewUnixProcessor() *processor

func NewUnixSecondsProcessor

func NewUnixSecondsProcessor() *unixSecondsProcessor

func NewUnpaddedDateTime

func NewUnpaddedDateTime() *processor

func NewUnpaddedMilliDateTime

func NewUnpaddedMilliDateTime() *processor

func NewUserProcessor

func NewUserProcessor(name, rxps, fmts string) (*processor, error)

func NewZonelessRFC3339

func NewZonelessRFC3339() *processor

func ValidateFormatOverride

func ValidateFormatOverride(s string) (err error)

Types

type Config

type Config struct {
	//force TimeGrinder to scan all possible formats on first entry, seeding with left most
	//We assume that most streams are not going to using a bunch of different timestamps
	//so we take the hit on the first iteration to try to get the left most time format
	EnableLeftMostSeed bool
	// FormatOverride sets a format (e.g. "AnsiC") which should be tried first during parsing.
	FormatOverride string
}

Config defines a few configuration options when instantiating a new TimeGrinder.

type CustomFormat added in v3.7.1

type CustomFormat struct {
	//Normal Custom Format Extractions
	Name   string
	Regex  string
	Format string

	// optional pre-extraction system that can go get the meat of a timestamp before actually trying to handle the timestamp
	Extraction_Regex string
	// contains filtered or unexported fields
}

func (CustomFormat) ExtractionRegex added in v3.7.1

func (cf CustomFormat) ExtractionRegex() string

func (*CustomFormat) Validate added in v3.7.1

func (cf *CustomFormat) Validate() (err error)

Validate will check that the custom format is well formed and usable we require a name, extraction regex, and time decoding format. We will attempt to compile the regex and will also try to encode and decode the timeformat. The time format must be capable of encoding and decoding. Validate will also detect if we are missing a date so that extractions will compensate

type Format

type Format string

Timestamp Override Names

const (
	AnsiC                 Format = `AnsiC`
	Unix                  Format = `Unix`
	Ruby                  Format = `Ruby`
	RFC822                Format = `RFC822`
	RFC822Z               Format = `RFC822Z`
	RFC850                Format = `RFC850`
	RFC1123               Format = `RFC1123`
	RFC1123Z              Format = `RFC1123Z`
	RFC3339               Format = `RFC3339`
	RFC3339Nano           Format = `RFC3339Nano`
	Apache                Format = `Apache`
	ApacheNoTz            Format = `ApacheNoTz`
	Syslog                Format = `Syslog`
	SyslogFile            Format = `SyslogFile`
	SyslogFileTZ          Format = `SyslogFileTZ`
	DPKG                  Format = `DPKG`
	NGINX                 Format = `NGINX`
	UnixMilli             Format = `UnixMilli`
	ZonelessRFC3339       Format = `ZonelessRFC3339`
	SyslogVariant         Format = `SyslogVariant`
	UnpaddedDateTime      Format = `UnpaddedDateTime`
	UnpaddedMilliDateTime Format = `UnpaddedMilliDateTime`
	UnixSeconds           Format = `UnixSeconds`
	UnixMs                Format = `UnixMs`
	UnixNano              Format = `UnixNano`
	LDAP                  Format = `LDAP`
	UK                    Format = `UK`
	Bind                  Format = `Bind`
	Gravwell              Format = `Gravwell`
	DirectAdmin           Format = `DirectAdmin`
)

func FormatDirective deprecated

func FormatDirective(s string) (r Format, err error)

FormatDirective takes a string and attempts to match it against a case insensitive format directive This function is useful in taking string designations for time formats, checking if they are valid and converting them to an iota int for overriding the timegrinder

Deprecated: The directive string should be entirely handled by an initialized timegrinder

func (Format) String

func (o Format) String() string

func (Format) ToLower

func (o Format) ToLower() string

type Processor

type Processor interface {
	Extract([]byte, *time.Location) (time.Time, bool, int)
	Match([]byte) (int, int, bool)
	Format() string
	ToString(time.Time) string
	ExtractionRegex() string
	Name() string
}

func NewCustomProcessor added in v3.7.1

func NewCustomProcessor(cf CustomFormat) (p Processor, err error)

func NewUK

func NewUK() Processor

type TimeGrinder

type TimeGrinder struct {
	Config
	// contains filtered or unexported fields
}

func New

func New(c Config) (tg *TimeGrinder, err error)

New constructs and returns a new TimeGrinder object. On error, it will return a nil and error variable. The TimeGrinder object is completely safe for concurrent use.

func NewTimeGrinder

func NewTimeGrinder(c Config) (*TimeGrinder, error)

NewTimeGrinder just calls New, it is maintained for API compatability but may go away soon. Use New.

func (*TimeGrinder) AddProcessor

func (tg *TimeGrinder) AddProcessor(p Processor) (idx int, err error)

AddProcessor inserts a new Processor at the *beginning* of the processor list. For compatibility, it still returns the index of the inserted processor, but that index will always be 0.

func (*TimeGrinder) DebugExtract

func (tg *TimeGrinder) DebugExtract(data []byte) (t time.Time, offset int, name string, err error)

DebugExtract returns a time, offset, and error. If no time was extracted, the offset is -1 Error indicates a catastrophic failure.

func (*TimeGrinder) DebugMatch added in v3.8.8

func (tg *TimeGrinder) DebugMatch(data []byte) (ts time.Time, name string, start, end int, ok bool)

DebugMatch attempts to match a timestamp within a given data set and returns additional metadata about which processor matched and where in the data it matched

func (*TimeGrinder) Extract

func (tg *TimeGrinder) Extract(data []byte) (t time.Time, ok bool, err error)

Extract returns time and error. If no time can be extracted time is the zero value and bool is false. Error indicates a catastrophic failure.

func (*TimeGrinder) GetProcessor added in v3.8.8

func (tg *TimeGrinder) GetProcessor(name string) (p Processor, ok bool)

func (*TimeGrinder) Match

func (tg *TimeGrinder) Match(data []byte) (start, end int, ok bool)

Match identifies where in a byte array a properly formatted timestamp could be and returns the indexes in the data slice of that format. It DOES NOT attempt to parse the timestamp. This is a faster way to say "a timestamp could be here". ok is always true on successful match

func (*TimeGrinder) OverrideProcessor

func (tg *TimeGrinder) OverrideProcessor() (Processor, error)

func (*TimeGrinder) SetFormatOverride added in v3.8.6

func (tg *TimeGrinder) SetFormatOverride(v string) (err error)

func (*TimeGrinder) SetLocalTime

func (tg *TimeGrinder) SetLocalTime()

func (*TimeGrinder) SetTimezone

func (tg *TimeGrinder) SetTimezone(f string) error

func (*TimeGrinder) SetUTC

func (tg *TimeGrinder) SetUTC()

Directories

Path Synopsis
************************************************************************
************************************************************************

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL