parser

package

v1.4.6 Latest Latest Go to latest Published: Jan 31, 2021 License: MIT Imports: 14 Imported by: 1

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

devt.de/krotik/ecal

Links

Open Source Insights

Documentation ¶

Overview ¶

Package parser contains a ECAL parser.

Lexer for Source Text ¶

Lex() is a lexer function to convert a given search query into a list of tokens.

Based on a talk by Rob Pike: Lexical Scanning in Go

https://www.youtube.com/watch?v=HxaD_trXwRE

The lexer's output is pushed into a channel which is consumed by the parser. This design enables the concurrent processing of the input text by lexer and parser.

Parser ¶

Parse() is a parser which produces a parse tree from a given set of lexer tokens.

Based on an article by Douglas Crockford: Top Down Operator Precedence

http://crockford.com/javascript/tdop/tdop.html

which is based on the ideas of Vaughan Pratt and his paper: Top Down Operator Precedence

http://portal.acm.org/citation.cfm?id=512931 https://tdop.github.io/

ParseWithRuntime() parses a given input and decorates the resulting parse tree with runtime components which can be used to interpret the parsed query.

Index ¶

Constants
Variables
func IsNotEndAndNotTokens(p *parser, tokens []LexTokenID) bool
func IsNotEndAndToken(p *parser, i LexTokenID) bool
func IsValidTokenID(value int) bool
func Lex(name string, input string) chan LexToken
func PrettyPrint(ast *ASTNode) (string, error)
type ASTNode
- func ASTFromJSONObject(jsonAST map[string]interface{}) (*ASTNode, error)
- func Parse(name string, input string) (*ASTNode, error)
- func ParseWithRuntime(name string, input string, rp RuntimeProvider) (*ASTNode, error)
- func (n *ASTNode) Equals(other *ASTNode, ignoreTokenPosition bool) (bool, string)
- func (n *ASTNode) String() string
- func (n *ASTNode) ToJSONObject() map[string]interface{}
type Error
- func (pe *Error) Error() string
type LABuffer
- func NewLABuffer(c chan LexToken, size int) *LABuffer
- func (b *LABuffer) Next() (LexToken, bool)
- func (b *LABuffer) Peek(pos int) (LexToken, bool)
type LexToken
- func LexToList(name string, input string) []LexToken
- func NewLexTokenInstance(t LexToken) *LexToken
- func (t LexToken) Equals(other LexToken, ignorePosition bool) (bool, string)
- func (t LexToken) PosString() string
- func (t LexToken) String() string
- func (t LexToken) Type() string
- func (t LexToken) Value() string
type LexTokenID
type MetaData
type Runtime
type RuntimeProvider
type Scope

Constants ¶

View Source

const (
	MetaDataPreComment  = "MetaDataPreComment"
	MetaDataPostComment = "MetaDataPostComment"
	MetaDataGeneral     = "MetaDataGeneral"
)

Available meta data types

View Source

const (
	NodeEOF = "EOF"

	NodeSTRING     = "string"     // String constant
	NodeNUMBER     = "number"     // Number constant
	NodeIDENTIFIER = "identifier" // Idendifier

	NodeSTATEMENTS = "statements" // List of statements
	NodeFUNCCALL   = "funccall"   // Function call
	NodeCOMPACCESS = "compaccess" // Composition structure access
	NodeLIST       = "list"       // List value
	NodeMAP        = "map"        // Map value
	NodePARAMS     = "params"     // Function parameters
	NodeGUARD      = "guard"      // Guard expressions for conditional statements

	NodeGEQ = ">="
	NodeLEQ = "<="
	NodeNEQ = "!="
	NodeEQ  = "=="
	NodeGT  = ">"
	NodeLT  = "<"

	NodeKVP    = "kvp"    // Key-value pair
	NodePRESET = "preset" // Preset value

	NodePLUS   = "plus"
	NodeMINUS  = "minus"
	NodeTIMES  = "times"
	NodeDIV    = "div"
	NodeMODINT = "modint"
	NodeDIVINT = "divint"

	NodeASSIGN = ":="
	NodeLET    = "let"

	NodeIMPORT = "import"

	NodeSINK       = "sink"
	NodeKINDMATCH  = "kindmatch"
	NodeSCOPEMATCH = "scopematch"
	NodeSTATEMATCH = "statematch"
	NodePRIORITY   = "priority"
	NodeSUPPRESSES = "suppresses"

	NodeFUNC   = "function"
	NodeRETURN = "return"

	NodeAND = "and"
	NodeOR  = "or"
	NodeNOT = "not"

	NodeLIKE      = "like"
	NodeIN        = "in"
	NodeHASPREFIX = "hasprefix"
	NodeHASSUFFIX = "hassuffix"
	NodeNOTIN     = "notin"

	NodeTRUE  = "true"
	NodeFALSE = "false"
	NodeNULL  = "null"

	NodeIF = "if"

	NodeLOOP     = "loop"
	NodeBREAK    = "break"
	NodeCONTINUE = "continue"

	NodeTRY       = "try"
	NodeEXCEPT    = "except"
	NodeAS        = "as"
	NodeOTHERWISE = "otherwise"
	NodeFINALLY   = "finally"

	NodeMUTEX = "mutex"
)

Available parser AST node types

View Source

const IndentationLevel = 4

IndentationLevel is the level of indentation which the pretty printer should use

View Source

const RuneEOF = -1

RuneEOF is a special rune which represents the end of the input

Variables ¶

View Source

var (
	ErrUnexpectedEnd            = errors.New("Unexpected end")
	ErrLexicalError             = errors.New("Lexical error")
	ErrUnknownToken             = errors.New("Unknown term")
	ErrImpossibleNullDenotation = errors.New("Term cannot start an expression")
	ErrImpossibleLeftDenotation = errors.New("Term can only start an expression")
	ErrUnexpectedToken          = errors.New("Unexpected term")
)

Parser related error types

View Source

var KeywordMap = map[string]LexTokenID{

	"let": TokenLET,

	"import": TokenIMPORT,
	"as":     TokenAS,

	"sink":       TokenSINK,
	"kindmatch":  TokenKINDMATCH,
	"scopematch": TokenSCOPEMATCH,
	"statematch": TokenSTATEMATCH,
	"priority":   TokenPRIORITY,
	"suppresses": TokenSUPPRESSES,

	"func":   TokenFUNC,
	"return": TokenRETURN,

	"and": TokenAND,
	"or":  TokenOR,
	"not": TokenNOT,

	"like":      TokenLIKE,
	"hasprefix": TokenHASPREFIX,
	"hassuffix": TokenHASSUFFIX,

	"in":    TokenIN,
	"notin": TokenNOTIN,

	"false": TokenFALSE,
	"true":  TokenTRUE,
	"null":  TokenNULL,

	"if":   TokenIF,
	"elif": TokenELIF,
	"else": TokenELSE,

	"for":      TokenFOR,
	"break":    TokenBREAK,
	"continue": TokenCONTINUE,

	"try":       TokenTRY,
	"except":    TokenEXCEPT,
	"otherwise": TokenOTHERWISE,
	"finally":   TokenFINALLY,

	"mutex": TokenMUTEX,
}

KeywordMap is a map of keywords - these require spaces between them

View Source

var NamePattern = regexp.MustCompile("^[A-Za-z][A-Za-z0-9]*$")

NamePattern is the pattern for valid names.

View Source

var SymbolMap = map[string]LexTokenID{

	">=": TokenGEQ,
	"<=": TokenLEQ,
	"!=": TokenNEQ,
	"==": TokenEQ,
	">":  TokenGT,
	"<":  TokenLT,

	"(": TokenLPAREN,
	")": TokenRPAREN,
	"[": TokenLBRACK,
	"]": TokenRBRACK,
	"{": TokenLBRACE,
	"}": TokenRBRACE,

	".": TokenDOT,
	",": TokenCOMMA,
	";": TokenSEMICOLON,

	":": TokenCOLON,
	"=": TokenEQUAL,

	"+":  TokenPLUS,
	"-":  TokenMINUS,
	"*":  TokenTIMES,
	"/":  TokenDIV,
	"//": TokenDIVINT,
	"%":  TokenMODINT,

	":=": TokenASSIGN,
}

SymbolMap is a map of special symbols which will always be unique - these will separate unquoted strings Symbols can be maximal 2 characters long.

Functions ¶

func IsNotEndAndNotTokens ¶ added in v1.0.3

func IsNotEndAndNotTokens(p *parser, tokens []LexTokenID) bool

IsNotEndAndNotTokens checks if the next token is not of a specific type or the end has been reached.

func IsNotEndAndToken ¶

func IsNotEndAndToken(p *parser, i LexTokenID) bool

IsNotEndAndToken checks if the next token is of a specific type or the end has been reached.

func IsValidTokenID ¶

func IsValidTokenID(value int) bool

IsValidTokenID check if a given token ID is valid.

func Lex ¶

func Lex(name string, input string) chan LexToken

Lex lexes a given input. Returns a channel which contains tokens.

func PrettyPrint ¶

func PrettyPrint(ast *ASTNode) (string, error)

PrettyPrint produces pretty printed code from a given AST.

Types ¶

type ASTNode ¶

type ASTNode struct {
	Name     string     // Name of the node
	Token    *LexToken  // Lexer token of this ASTNode
	Meta     []MetaData // Meta data for this ASTNode (e.g. comments)
	Children []*ASTNode // Child nodes
	Runtime  Runtime    // Runtime component for this ASTNode
	// contains filtered or unexported fields
}

ASTNode models a node in the AST

func ASTFromJSONObject ¶

func ASTFromJSONObject(jsonAST map[string]interface{}) (*ASTNode, error)

ASTFromJSONObject creates an AST from a JSON Object. The following nested map structure is expected:

{
	name     : <name of node>

	// Optional node information
	value    : <value of node>
	children : [ <child nodes> ]

	// Optional token information
	id       : <token id>
}

func Parse ¶

func Parse(name string, input string) (*ASTNode, error)

Parse parses a given input string and returns an AST.

func ParseWithRuntime ¶

func ParseWithRuntime(name string, input string, rp RuntimeProvider) (*ASTNode, error)

ParseWithRuntime parses a given input string and returns an AST decorated with runtime components.

func (*ASTNode) Equals ¶

func (n *ASTNode) Equals(other *ASTNode, ignoreTokenPosition bool) (bool, string)

Equals checks if this AST data equals another AST data. Returns also a message describing what is the found difference.

func (*ASTNode) String ¶

func (n *ASTNode) String() string

String returns a string representation of this token.

func (*ASTNode) ToJSONObject ¶

func (n *ASTNode) ToJSONObject() map[string]interface{}

ToJSONObject returns this ASTNode and all its children as a JSON object.

type Error ¶

type Error struct {
	Source string // Name of the source which was given to the parser
	Type   error  // Error type (to be used for equal checks)
	Detail string // Details of this error
	Line   int    // Line of the error
	Pos    int    // Position of the error
}

Error models a parser related error.

func (*Error) Error ¶

func (pe *Error) Error() string

Error returns a human-readable string representation of this error.

type LABuffer ¶

type LABuffer struct {
	// contains filtered or unexported fields
}

LABuffer models a look-ahead buffer.

func NewLABuffer ¶

func NewLABuffer(c chan LexToken, size int) *LABuffer

NewLABuffer creates a new NewLABuffer instance.

func (*LABuffer) Next ¶

func (b *LABuffer) Next() (LexToken, bool)

Next returns the next item.

func (*LABuffer) Peek ¶

func (b *LABuffer) Peek(pos int) (LexToken, bool)

Peek looks inside the buffer starting with 0 as the next item.

type LexToken ¶

type LexToken struct {
	ID             LexTokenID // Token kind
	Pos            int        // Starting position (in bytes)
	Val            string     // Token value
	Identifier     bool       // Flag if the value is an identifier (not quoted and not a number)
	AllowEscapes   bool       // Flag if the value did interpret escape charaters
	PrefixNewlines int        // Number of newlines which precede this token
	Lsource        string     // Input source label (e.g. filename)
	Lline          int        // Line in the input this token appears
	Lpos           int        // Position in the input line this token appears
}

LexToken represents a token which is returned by the lexer.

func LexToList ¶

func LexToList(name string, input string) []LexToken

LexToList lexes a given input. Returns a list of tokens.

func NewLexTokenInstance ¶

func NewLexTokenInstance(t LexToken) *LexToken

NewLexTokenInstance creates a new LexToken object instance from given LexToken values.

func (LexToken) Equals ¶

func (t LexToken) Equals(other LexToken, ignorePosition bool) (bool, string)

Equals checks if this LexToken equals another LexToken. Returns also a message describing what is the found difference.

func (LexToken) PosString ¶

func (t LexToken) PosString() string

PosString returns the position of this token in the origianl input as a string.

func (LexToken) String ¶

func (t LexToken) String() string

String returns a string representation of a token.

func (LexToken) Type ¶

func (t LexToken) Type() string

Type returns the meta data type.

func (LexToken) Value ¶

func (t LexToken) Value() string

Value returns the meta data value.

type LexTokenID ¶

type LexTokenID int

LexTokenID represents a unique lexer token ID

const (
	TokenError LexTokenID = iota // Lexing error token with a message as val
	TokenEOF                     // End-of-file token
	TokenANY                     // Unspecified token (used when building an AST from a Go map structure)

	TokenPRECOMMENT  // Comment /* ... */
	TokenPOSTCOMMENT // Comment # ...

	TokenSTRING     // String constant
	TokenNUMBER     // Number constant
	TokenIDENTIFIER // Idendifier

	TokenSTATEMENTS // A code block
	TokenFUNCCALL   // A function call
	TokenCOMPACCESS // Access to a composition structure
	TokenLIST       // List value
	TokenMAP        // MAP value
	TokenPARAMS     // Function parameters
	TokenGUARD      // Conditional statements

	TOKENodeSYMBOLS // Used to separate symbols from other tokens in this list

	TokenGEQ
	TokenLEQ
	TokenNEQ
	TokenEQ
	TokenGT
	TokenLT

	TokenLPAREN
	TokenRPAREN
	TokenLBRACK
	TokenRBRACK
	TokenLBRACE
	TokenRBRACE

	TokenDOT
	TokenCOMMA
	TokenSEMICOLON

	TokenCOLON
	TokenEQUAL

	TokenPLUS
	TokenMINUS
	TokenTIMES
	TokenDIV
	TokenDIVINT
	TokenMODINT

	TokenASSIGN
	TokenLET

	TOKENodeKEYWORDS // Used to separate keywords from other tokens in this list

	TokenIMPORT
	TokenAS

	TokenSINK
	TokenKINDMATCH
	TokenSCOPEMATCH
	TokenSTATEMATCH
	TokenPRIORITY
	TokenSUPPRESSES

	TokenFUNC
	TokenRETURN

	TokenAND
	TokenOR
	TokenNOT

	TokenLIKE
	TokenIN
	TokenHASPREFIX
	TokenHASSUFFIX
	TokenNOTIN

	TokenFALSE
	TokenTRUE
	TokenNULL

	TokenIF
	TokenELIF
	TokenELSE

	TokenFOR
	TokenBREAK
	TokenCONTINUE

	TokenTRY
	TokenEXCEPT
	TokenOTHERWISE
	TokenFINALLY

	TokenMUTEX

	TokenENDLIST
)

Available lexer token types

type MetaData ¶

type MetaData interface {

	/*
		Type returns the type of the meta data.
	*/
	Type() string

	/*
		Value returns the value of the meta data.
	*/
	Value() string
}

MetaData is auxiliary data which can be attached to ASTs.

type Runtime ¶

type Runtime interface {

	/*
	   Validate this runtime component and all its child components.
	*/
	Validate() error

	/*
		Eval evaluate this runtime component. It gets passed the current variable
		scope an instance state and a thread ID.

		The instance state is created per execution instance and can be used
		for generator functions to store their current state. It gets replaced
		by a new object in certain situations (e.g. a function call).

		The thread ID can be used to identify a running process.
	*/
	Eval(Scope, map[string]interface{}, uint64) (interface{}, error)
}

Runtime provides the runtime for an ASTNode.

type RuntimeProvider ¶

type RuntimeProvider interface {

	/*
	   Runtime returns a runtime component for a given ASTNode.
	*/
	Runtime(node *ASTNode) Runtime
}

RuntimeProvider provides runtime components for a parse tree.

type Scope ¶

type Scope interface {

	/*
	   Name returns the name of this scope.
	*/
	Name() string

	/*
	   NewChild creates a new child scope.
	*/
	NewChild(name string) Scope

	/*
		Clear clears this scope of all stored values. This will clear children scopes
		but not remove parent scopes.
	*/
	Clear()

	/*
	   Parent returns the parent scope or nil.
	*/
	Parent() Scope

	/*
	   SetValue sets a new value for a variable.
	*/
	SetValue(varName string, varValue interface{}) error

	/*
	   SetLocalValue sets a new value for a local variable.
	*/
	SetLocalValue(varName string, varValue interface{}) error

	/*
	   GetValue gets the current value of a variable.
	*/
	GetValue(varName string) (interface{}, bool, error)

	/*
	   String returns a string representation of this scope.
	*/
	String() string

	/*
	   ToJSONObject returns this ASTNode and all its children as a JSON object.
	*/
	ToJSONObject() map[string]interface{}
}

Scope models an environment which stores data.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL