syntax

package module
v0.0.0-...-b3f0c40 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 19, 2021 License: MIT Imports: 4 Imported by: 4

README

Package regex/syntax

Package syntax provides regular expressions parser as well as AST definitions.

Rationale

The advantages of this package over stdlib regexp/syntax:

  1. Does not transformations/optimizations during the parsing. The produced parse tree is loseless.

  2. Simpler AST representation.

  3. Can parse most PCRE operations in addition to re2 syntax. It can also handle PHP/Perl style patterns with delimiters.

  4. This package is easier to extend than something from the standard library.

This package does almost no assumptions about how generated AST is going to be used so it preserves as much syntax information as possible.

It's easy to write another intermediate representation on top of it. The main function of this package is to convert a textual regexp pattern into a more structured form that can be processed more easily.

Users

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Expr

type Expr struct {
	// The operations that this expression performs. See `operation.go`.
	Op Operation

	Form Form

	// Pos describes a source location inside regexp pattern.
	Pos Position

	// Args is a list of sub-expressions of this expression.
	//
	// See Operation constants documentation to learn how to
	// interpret the particular expression args.
	Args []Expr

	// Value holds expression textual value.
	//
	// Usually, that value is identical to src[Begin():End()],
	// but this is not true for programmatically generated objects.
	Value string
	// contains filtered or unexported fields
}

func (Expr) Begin

func (e Expr) Begin() uint16

Begin returns expression leftmost offset.

func (Expr) End

func (e Expr) End() uint16

End returns expression rightmost offset.

func (Expr) LastArg

func (e Expr) LastArg() Expr

LastArg returns expression last argument.

Should not be called on expressions that may have 0 arguments.

type Form

type Form byte
const (
	FormDefault Form = iota
	FormEscapeHexFull
	FormEscapeUniFull
	FormNamedCaptureAngle
	FormNamedCaptureQuote
	FormQuoteUnclosed
)

type Operation

type Operation byte
const (
	OpNone Operation = iota

	// OpConcat is a concatenation of ops.
	// Examples: `xy` `abc\d` “
	// Args - concatenated ops
	//
	// As a special case, OpConcat with 0 Args is used for "empty"
	// set of operations.
	OpConcat

	// OpDot is a '.' wildcard.
	OpDot

	// OpAlt is x|y alternation of ops.
	// Examples: `a|bc` `x(.*?)|y(.*?)`
	// Args - union-connected regexp branches
	OpAlt

	// OpStar is a shorthand for {0,} repetition.
	// Examples: `x*`
	// Args[0] - repeated expression
	OpStar

	// OpPlus is a shorthand for {1,} repetition.
	// Examples: `x+`
	// Args[0] - repeated expression
	OpPlus

	// OpQuestion is a shorthand for {0,1} repetition.
	// Examples: `x?`
	// Args[0] - repeated expression
	OpQuestion

	// OpNonGreedy makes its operand quantifier non-greedy.
	// Examples: `x??` `x*?` `x+?`
	// Args[0] - quantified expression
	OpNonGreedy

	// OpPossessive makes its operand quantifier possessive.
	// Examples: `x?+` `x*+` `x++`
	// Args[0] - quantified expression
	OpPossessive

	// OpCaret is ^ anchor.
	OpCaret

	// OpDollar is $ anchor.
	OpDollar

	// OpLiteral is a collection of consecutive chars.
	// Examples: `ab` `10x`
	// Args - enclosed characters (OpChar)
	OpLiteral

	// OpChar is a single literal pattern character.
	// Examples: `a` `6` `ф`
	OpChar

	// OpString is an artificial element that is used in other expressions.
	OpString

	// OpQuote is a \Q...\E enclosed literal.
	// Examples: `\Q.?\E` `\Q?q[]=1`
	// FormQuoteUnclosed: `\Qabc`
	// Args[0] - literal value (OpString)
	OpQuote

	// OpEscapeChar is a single char escape.
	// Examples: `\d` `\a` `\n`
	// Args[0] - escaped value (OpString)
	OpEscapeChar

	// OpEscapeMeta is an escaped meta char.
	// Examples: `\(` `\[` `\+`
	// Args[0] - escaped value (OpString)
	OpEscapeMeta

	// OpEscapeOctal is an octal char code escape (up to 3 digits).
	// Examples: `\123` `\12`
	// Args[0] - escaped value (OpString)
	OpEscapeOctal

	// OpEscapeHex is a hex char code escape.
	// Examples: `\x7F` `\xF7`
	// FormEscapeHexFull examples: `\x{10FFFF}` `\x{F}`.
	// Args[0] - escaped value (OpString)
	OpEscapeHex

	// OpEscapeUni is a Unicode char class escape.
	// Examples: `\pS` `\pL` `\PL`
	// FormEscapeUniFull examples: `\p{Greek}` `\p{Symbol}` `\p{^L}`
	// Args[0] - escaped value (OpString)
	OpEscapeUni

	// OpCharClass is a char class enclosed in [].
	// Examples: `[abc]` `[a-z0-9\]]`
	// Args - char class elements (can include OpCharRange and OpPosixClass)
	OpCharClass

	// OpNegCharClass is a negated char class enclosed in [].
	// Examples: `[^abc]` `[^a-z0-9\]]`
	// Args - char class elements (can include OpCharRange and OpPosixClass)
	OpNegCharClass

	// OpCharRange is an inclusive char range inside a char class.
	// Examples: `0-9` `A-Z`
	// Args[0] - range lower bound
	// Args[1] - range upper bound
	OpCharRange

	// OpPosixClass is a named ASCII char set inside a char class.
	// Examples: `[:alpha:]` `[:blank:]`
	OpPosixClass

	// OpRepeat is a {min,max} repetition quantifier.
	// Examples: `x{5}` `x{min,max}` `x{min,}`
	// Args[0] - repeated expression
	// Args[1] - repeat count (OpString)
	OpRepeat

	// OpCapture is `(re)` capturing group.
	// Examples: `(abc)` `(x|y)`
	// Args[0] - enclosed expression
	OpCapture

	// OpNamedCapture is `(?P<name>re)` capturing group.
	// Examples: `(?P<foo>abc)` `(?P<name>x|y)`
	// FormNamedCaptureAngle examples: `(?<foo>abc)` `(?<name>x|y)`
	// FormNamedCaptureQuote examples: `(?'foo'abc)` `(?'name'x|y)`
	// Args[0] - enclosed expression (OpConcat with 0 args for empty group)
	// Args[1] - group name (OpString)
	OpNamedCapture

	// OpGroup is `(?:re)` non-capturing group.
	// Examples: `(?:abc)` `(?:x|y)`
	// Args[0] - enclosed expression (OpConcat with 0 args for empty group)
	OpGroup

	// OpGroupWithFlags is `(?flags:re)` non-capturing group.
	// Examples: `(?i:abc)` `(?i:x|y)`
	// Args[0] - enclosed expression (OpConcat with 0 args for empty group)
	// Args[1] - flags (OpString)
	OpGroupWithFlags

	// OpAtomicGroup is `(?>re)` non-capturing group without backtracking.
	// Examples: `(?>foo)` `(?>)`
	// Args[0] - enclosed expression (OpConcat with 0 args for empty group)
	OpAtomicGroup

	// OpPositiveLookahead is `(?=re)` asserts that following text matches re.
	// Examples: `(?=foo)`
	// Args[0] - enclosed expression (OpConcat with 0 args for empty group)
	OpPositiveLookahead

	// OpNegativeLookahead is `(?!re)` asserts that following text doesn't match re.
	// Examples: `(?!foo)`
	// Args[0] - enclosed expression (OpConcat with 0 args for empty group)
	OpNegativeLookahead

	// OpPositiveLookbehind is `(?<=re)` asserts that preceding text matches re.
	// Examples: `(?<=foo)`
	// Args[0] - enclosed expression (OpConcat with 0 args for empty group)
	OpPositiveLookbehind

	// OpNegativeLookbehind is `(?=re)` asserts that preceding text doesn't match re.
	// Examples: `(?<!foo)`
	// Args[0] - enclosed expression (OpConcat with 0 args for empty group)
	OpNegativeLookbehind

	// OpFlagOnlyGroup is `(?flags)` form that affects current group flags.
	// Examples: `(?i)` `(?i-m)` `(?-im)`
	// Args[0] - flags (OpString)
	OpFlagOnlyGroup

	// OpComment is a group-like regexp comment expression.
	// Examples: `(?#text)` `(?#)`
	OpComment

	// OpNone2 is a sentinel value that is never part of the AST.
	// OpNone and OpNone2 can be used to cover all ops in a range.
	OpNone2
)

func (Operation) String

func (i Operation) String() string

type ParseError

type ParseError struct {
	Pos     Position
	Message string
}

func (ParseError) Error

func (e ParseError) Error() string

type Parser

type Parser struct {
	// contains filtered or unexported fields
}

func NewParser

func NewParser(opts *ParserOptions) *Parser

func (*Parser) Parse

func (p *Parser) Parse(pattern string) (result *Regexp, err error)

func (*Parser) ParsePCRE

func (p *Parser) ParsePCRE(pattern string) (*RegexpPCRE, error)

ParsePCRE parses PHP-style pattern with delimiters. An example of such pattern is `/foo/i`.

type ParserOptions

type ParserOptions struct {
	// NoLiterals disables OpChar merging into OpLiteral.
	NoLiterals bool
}

type Position

type Position struct {
	Begin uint16
	End   uint16
}

type Regexp

type Regexp struct {
	Pattern string
	Expr    Expr
}

type RegexpPCRE

type RegexpPCRE struct {
	Pattern string
	Expr    Expr

	Source    string
	Modifiers string
	Delim     [2]byte
}

func (*RegexpPCRE) HasModifier

func (re *RegexpPCRE) HasModifier(mod byte) bool

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL