syntax

package

v1.7.0 Latest Latest Go to latest Published: Jul 17, 2022 License: MIT Imports: 11 Imported by: 18

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/dlclark/regexp2

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func CharDescription(ch rune) string
func Escape(input string) string
func IsECMAWordChar(r rune) bool
func IsWordChar(r rune) bool
func Unescape(input string) (string, error)
type AnchorLoc
- func (anchors AnchorLoc) String() string
type BmPrefix
- func (b *BmPrefix) Dump(indent string) string
- func (b *BmPrefix) IsMatch(text []rune, index, beglimit, endlimit int) bool
- func (b *BmPrefix) Scan(text []rune, index, beglimit, endlimit int) int
- func (b *BmPrefix) String() string
type CharSet
- func (c CharSet) CharIn(ch rune) bool
- func (c CharSet) Copy() CharSet
- func (c CharSet) HasSubtraction() bool
- func (c CharSet) IsEmpty() bool
- func (c CharSet) IsMergeable() bool
- func (c CharSet) IsNegated() bool
- func (c CharSet) IsSingleton() bool
- func (c CharSet) IsSingletonInverse() bool
- func (c CharSet) SingletonChar() rune
- func (c CharSet) String() string
type Code
- func Write(tree *RegexTree) (*Code, error)
- func (c *Code) Dump() string
- func (c *Code) OpcodeDescription(offset int) string
type Error
- func (e *Error) Error() string
type ErrorCode
- func (e ErrorCode) String() string
type InstOp
type Prefix
type RegexOptions
type RegexTree
- func Parse(re string, op RegexOptions) (*RegexTree, error)
- func (t *RegexTree) Dump() string
type ReplacerData
- func NewReplacerData(rep string, caps map[int]int, capsize int, capnames map[string]int, ...) (*ReplacerData, error)

Constants ¶

View Source

const (
	LowercaseSet = 0 // Set to arg.
	LowercaseAdd = 1 // Add arg.
	LowercaseBor = 2 // Bitwise or with 1.
	LowercaseBad = 3 // Bitwise and with 1 and add original.
)

View Source

const (
	Onerep    InstOp = 0 // lef,back char,min,max    a {n}
	Notonerep        = 1 // lef,back char,min,max    .{n}
	Setrep           = 2 // lef,back set,min,max     [\d]{n}

	Oneloop    = 3 // lef,back char,min,max    a {,n}
	Notoneloop = 4 // lef,back char,min,max    .{,n}
	Setloop    = 5 // lef,back set,min,max     [\d]{,n}

	Onelazy    = 6 // lef,back char,min,max    a {,n}?
	Notonelazy = 7 // lef,back char,min,max    .{,n}?
	Setlazy    = 8 // lef,back set,min,max     [\d]{,n}?

	One    = 9  // lef      char            a
	Notone = 10 // lef      char            [^a]
	Set    = 11 // lef      set             [a-z\s]  \w \s \d

	Multi = 12 // lef      string          abcd
	Ref   = 13 // lef      group           \#

	Bol         = 14 //                          ^
	Eol         = 15 //                          $
	Boundary    = 16 //                          \b
	Nonboundary = 17 //                          \B
	Beginning   = 18 //                          \A
	Start       = 19 //                          \G
	EndZ        = 20 //                          \Z
	End         = 21 //                          \Z

	Nothing = 22 //                          Reject!

	Lazybranch      = 23 // back     jump            straight first
	Branchmark      = 24 // back     jump            branch first for loop
	Lazybranchmark  = 25 // back     jump            straight first for loop
	Nullcount       = 26 // back     val             set counter, null mark
	Setcount        = 27 // back     val             set counter, make mark
	Branchcount     = 28 // back     jump,limit      branch++ if zero<=c<limit
	Lazybranchcount = 29 // back     jump,limit      same, but straight first
	Nullmark        = 30 // back                     save position
	Setmark         = 31 // back                     save position
	Capturemark     = 32 // back     group           define group
	Getmark         = 33 // back                     recall position
	Setjump         = 34 // back                     save backtrack state
	Backjump        = 35 //                          zap back to saved state
	Forejump        = 36 //                          zap backtracking state
	Testref         = 37 //                          backtrack if ref undefined
	Goto            = 38 //          jump            just go

	Prune = 39 //                          prune it baby
	Stop  = 40 //                          done!

	ECMABoundary    = 41 //                          \b
	NonECMABoundary = 42 //                          \B

	Mask  = 63  // Mask to get unmodified ordinary operator
	Rtl   = 64  // bit to indicate that we're reverse scanning.
	Back  = 128 // bit to indicate that we're backtracking.
	Back2 = 256 // bit to indicate that we're backtracking on a second branch.
	Ci    = 512 // bit to indicate that we're case-insensitive.
)

View Source

const (
	IgnoreCase              RegexOptions = 0x0001 // "i"
	Multiline                            = 0x0002 // "m"
	ExplicitCapture                      = 0x0004 // "n"
	Compiled                             = 0x0008 // "c"
	Singleline                           = 0x0010 // "s"
	IgnorePatternWhitespace              = 0x0020 // "x"
	RightToLeft                          = 0x0040 // "r"
	Debug                                = 0x0080 // "d"
	ECMAScript                           = 0x0100 // "e"
	RE2                                  = 0x0200 // RE2 compat mode
	Unicode                              = 0x0400 // "u"
)

View Source

const (
	// internal issue
	ErrInternalError ErrorCode = "regexp/syntax: internal error"
	// Parser errors
	ErrUnterminatedComment        = "unterminated comment"
	ErrInvalidCharRange           = "invalid character class range"
	ErrInvalidRepeatSize          = "invalid repeat count"
	ErrInvalidUTF8                = "invalid UTF-8"
	ErrCaptureGroupOutOfRange     = "capture group number out of range"
	ErrUnexpectedParen            = "unexpected )"
	ErrMissingParen               = "missing closing )"
	ErrMissingBrace               = "missing closing }"
	ErrInvalidRepeatOp            = "invalid nested repetition operator"
	ErrMissingRepeatArgument      = "missing argument to repetition operator"
	ErrConditionalExpression      = "illegal conditional (?(...)) expression"
	ErrTooManyAlternates          = "too many | in (?()|)"
	ErrUnrecognizedGrouping       = "unrecognized grouping construct: (%v"
	ErrInvalidGroupName           = "invalid group name: group names must begin with a word character and have a matching terminator"
	ErrCapNumNotZero              = "capture number cannot be zero"
	ErrUndefinedBackRef           = "reference to undefined group number %v"
	ErrUndefinedNameRef           = "reference to undefined group name %v"
	ErrAlternationCantCapture     = "alternation conditions do not capture and cannot be named"
	ErrAlternationCantHaveComment = "alternation conditions cannot be comments"
	ErrMalformedReference         = "(?(%v) ) malformed"
	ErrUndefinedReference         = "(?(%v) ) reference to undefined group"
	ErrIllegalEndEscape           = "illegal \\ at end of pattern"
	ErrMalformedSlashP            = "malformed \\p{X} character escape"
	ErrIncompleteSlashP           = "incomplete \\p{X} character escape"
	ErrUnknownSlashP              = "unknown unicode category, script, or property '%v'"
	ErrUnrecognizedEscape         = "unrecognized escape sequence \\%v"
	ErrMissingControl             = "missing control character"
	ErrUnrecognizedControl        = "unrecognized control character"
	ErrTooFewHex                  = "insufficient hexadecimal digits"
	ErrInvalidHex                 = "hex values may not be larger than 0x10FFFF"
	ErrMalformedNameRef           = "malformed \\k<...> named back reference"
	ErrBadClassInCharRange        = "cannot include class \\%v in character range"
	ErrUnterminatedBracket        = "unterminated [] set"
	ErrSubtractionMustBeLast      = "a subtraction must be the last element in a character class"
	ErrReversedCharRange          = "[%c-%c] range in reverse order"
)

View Source

const (
	Q byte = 5 // quantifier
	S      = 4 // ordinary stopper
	Z      = 3 // ScanBlank stopper
	X      = 2 // whitespace
	E      = 1 // should be escaped
)

View Source

const (
	AnchorBeginning    AnchorLoc = 0x0001
	AnchorBol                    = 0x0002
	AnchorStart                  = 0x0004
	AnchorEol                    = 0x0008
	AnchorEndZ                   = 0x0010
	AnchorEnd                    = 0x0020
	AnchorBoundary               = 0x0040
	AnchorECMABoundary           = 0x0080
)

where the regex can be pegged

View Source

const (

	//MaxPrefixSize is the largest number of runes we'll use for a BoyerMoyer prefix
	MaxPrefixSize = 50
)

Variables ¶

View Source

var (
	AnyClass          = getCharSetFromOldString([]rune{0}, false)
	ECMAAnyClass      = getCharSetFromOldString([]rune{0, 0x000a, 0x000b, 0x000d, 0x000e}, false)
	NoneClass         = getCharSetFromOldString(nil, false)
	ECMAWordClass     = getCharSetFromOldString(ecmaWord, false)
	NotECMAWordClass  = getCharSetFromOldString(ecmaWord, true)
	ECMASpaceClass    = getCharSetFromOldString(ecmaSpace, false)
	NotECMASpaceClass = getCharSetFromOldString(ecmaSpace, true)
	ECMADigitClass    = getCharSetFromOldString(ecmaDigit, false)
	NotECMADigitClass = getCharSetFromOldString(ecmaDigit, true)

	WordClass     = getCharSetFromCategoryString(false, false, wordCategoryText)
	NotWordClass  = getCharSetFromCategoryString(true, false, wordCategoryText)
	SpaceClass    = getCharSetFromCategoryString(false, false, spaceCategoryText)
	NotSpaceClass = getCharSetFromCategoryString(true, false, spaceCategoryText)
	DigitClass    = getCharSetFromCategoryString(false, false, "Nd")
	NotDigitClass = getCharSetFromCategoryString(false, true, "Nd")

	RE2SpaceClass    = getCharSetFromOldString(re2Space, false)
	NotRE2SpaceClass = getCharSetFromOldString(re2Space, true)
)

View Source

var ErrReplacementError = errors.New("Replacement pattern error.")

ErrReplacementError is a general error during parsing the replacement text

Functions ¶

func CharDescription ¶

func CharDescription(ch rune) string

CharDescription Produces a human-readable description for a single character.

func Escape ¶

func Escape(input string) string

func IsECMAWordChar ¶

func IsECMAWordChar(r rune) bool

func IsWordChar ¶

func IsWordChar(r rune) bool

According to UTS#18 Unicode Regular Expressions (http://www.unicode.org/reports/tr18/) RL 1.4 Simple Word Boundaries The class of <word_character> includes all Alphabetic values from the Unicode character database, from UnicodeData.txt [UData], plus the U+200C ZERO WIDTH NON-JOINER and U+200D ZERO WIDTH JOINER.

func Unescape ¶

func Unescape(input string) (string, error)

Types ¶

type AnchorLoc ¶

type AnchorLoc int16

func (AnchorLoc) String ¶

func (anchors AnchorLoc) String() string

anchorDescription returns a human-readable description of the anchors

type BmPrefix ¶

type BmPrefix struct {
	// contains filtered or unexported fields
}

BmPrefix precomputes the Boyer-Moore tables for fast string scanning. These tables allow you to scan for the first occurrence of a string within a large body of text without examining every character. The performance of the heuristic depends on the actual string and the text being searched, but usually, the longer the string that is being searched for, the fewer characters need to be examined.

func (*BmPrefix) Dump ¶

func (b *BmPrefix) Dump(indent string) string

Dump returns the contents of the filter as a human readable string

func (*BmPrefix) IsMatch ¶

func (b *BmPrefix) IsMatch(text []rune, index, beglimit, endlimit int) bool

When a regex is anchored, we can do a quick IsMatch test instead of a Scan

func (*BmPrefix) Scan ¶

func (b *BmPrefix) Scan(text []rune, index, beglimit, endlimit int) int

Scan uses the Boyer-Moore algorithm to find the first occurrence of the specified string within text, beginning at index, and constrained within beglimit and endlimit.

The direction and case-sensitivity of the match is determined by the arguments to the RegexBoyerMoore constructor.

func (*BmPrefix) String ¶

func (b *BmPrefix) String() string

type CharSet ¶

type CharSet struct {
	// contains filtered or unexported fields
}

CharSet combines start-end rune ranges and unicode categories representing a set of characters

func (CharSet) CharIn ¶

func (c CharSet) CharIn(ch rune) bool

CharIn returns true if the rune is in our character set (either ranges or categories). It handles negations and subtracted sub-charsets.

func (CharSet) Copy ¶

func (c CharSet) Copy() CharSet

Copy makes a deep copy to prevent accidental mutation of a set

func (CharSet) HasSubtraction ¶

func (c CharSet) HasSubtraction() bool

func (CharSet) IsEmpty ¶

func (c CharSet) IsEmpty() bool

func (CharSet) IsMergeable ¶

func (c CharSet) IsMergeable() bool

func (CharSet) IsNegated ¶

func (c CharSet) IsNegated() bool

func (CharSet) IsSingleton ¶

func (c CharSet) IsSingleton() bool

func (CharSet) IsSingletonInverse ¶

func (c CharSet) IsSingletonInverse() bool

func (CharSet) SingletonChar ¶

func (c CharSet) SingletonChar() rune

SingletonChar will return the char from the first range without validation. It assumes you have checked for IsSingleton or IsSingletonInverse and will panic given bad input

func (CharSet) String ¶

func (c CharSet) String() string

gets a human-readable description for a set string

type Code ¶

type Code struct {
	Codes       []int       // the code
	Strings     [][]rune    // string table
	Sets        []*CharSet  //character set table
	TrackCount  int         // how many instructions use backtracking
	Caps        map[int]int // mapping of user group numbers -> impl group slots
	Capsize     int         // number of impl group slots
	FcPrefix    *Prefix     // the set of candidate first characters (may be null)
	BmPrefix    *BmPrefix   // the fixed prefix string as a Boyer-Moore machine (may be null)
	Anchors     AnchorLoc   // the set of zero-length start anchors (RegexFCD.Bol, etc)
	RightToLeft bool        // true if right to left
}

func Write ¶

func Write(tree *RegexTree) (*Code, error)

func (*Code) Dump ¶

func (c *Code) Dump() string

func (*Code) OpcodeDescription ¶

func (c *Code) OpcodeDescription(offset int) string

OpcodeDescription is a humman readable string of the specific offset

type Error ¶

type Error struct {
	Code ErrorCode
	Expr string
	Args []interface{}
}

An Error describes a failure to parse a regular expression and gives the offending expression.

func (*Error) Error ¶

func (e *Error) Error() string

type ErrorCode ¶

type ErrorCode string

An ErrorCode describes a failure to parse a regular expression.

func (ErrorCode) String ¶

func (e ErrorCode) String() string

type InstOp ¶

type InstOp int

type Prefix ¶

type Prefix struct {
	PrefixStr       []rune
	PrefixSet       CharSet
	CaseInsensitive bool
}

type RegexOptions ¶

type RegexOptions int32

type RegexTree ¶

type RegexTree struct {
	Capnames map[string]int
	Caplist  []string
	// contains filtered or unexported fields
}

func Parse ¶

func Parse(re string, op RegexOptions) (*RegexTree, error)

Parse converts a regex string into a parse tree

func (*RegexTree) Dump ¶

func (t *RegexTree) Dump() string

type ReplacerData ¶

type ReplacerData struct {
	Rep     string
	Strings []string
	Rules   []int
}

func NewReplacerData ¶

func NewReplacerData(rep string, caps map[int]int, capsize int, capnames map[string]int, op RegexOptions) (*ReplacerData, error)

NewReplacerData will populate a reusable replacer data struct based on the given replacement string and the capture group data from a regexp

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL