Documentation
¶
Overview ¶
Package lexparse defines a set of interfaces that can be used to define generic lexers and parsers over byte streams.
Example (InfixCalculator) ¶
Example_infixCalculator demonstrates an infix expression calculator using a Pratt parser. It makes use of the ScanningLexer to tokenize the input expression and builds an expression tree that is then evaluated using the Calculate function.
package main
import (
"context"
"errors"
"fmt"
"io"
"strconv"
"strings"
"github.com/ianlewis/lexparse"
)
var (
errUnexpectedIdentifier = errors.New("unexpected identifier")
errUnclosedParen = errors.New("unclosed parenthesis")
errUnexpectedParen = errors.New("unexpected closing parenthesis")
errDivByZero = errors.New("division by zero")
errInvalidNode = errors.New("invalid node")
)
type nodeType int
const (
// nodeTypeNum is a leaf node comprised of a number.
nodeTypeNum nodeType = iota
// nodeTypeOper is a binary node whose children are the left and right.
nodeTypeOper
)
// exprNode is a node in the expression tree.
type exprNode struct {
typ nodeType
num float64 // Only used for nodeTypeNum.
oper string // Only used for nodeTypeOper.
}
func (n *exprNode) precedence() int {
if n.typ != nodeTypeOper {
panic(fmt.Sprintf("node %v is not an operator node", n))
}
switch n.oper {
case "+", "-":
return 1
case "*", "/":
return 2
default:
return 0
}
}
func (n *exprNode) String() string {
switch n.typ {
case nodeTypeNum:
return fmt.Sprintf("%g", n.num)
case nodeTypeOper:
return n.oper
default:
return fmt.Sprintf("UnknownNodeType(%d)", n.typ)
}
}
func tokenErr(err error, t *lexparse.Token) error {
return fmt.Errorf("%w: %q, line %d, column %d", err,
t.Value, t.Start.Line, t.Start.Column)
}
// pratt implements a Pratt operator-precedence parser for infix expressions.
func pratt(ctx *lexparse.ParserContext[*exprNode]) error {
n, err := parseExpr(ctx, 0, 0)
ctx.SetRoot(n)
return err
}
func parseExpr(
ctx *lexparse.ParserContext[*exprNode],
depth, minPrecedence int,
) (*lexparse.Node[*exprNode], error) {
// Check if the context is canceled.
select {
case <-ctx.Done():
//nolint:wrapcheck // We want to return the original context error.
return nil, ctx.Err()
default:
}
token := ctx.Next()
var lhs *lexparse.Node[*exprNode]
switch token.Type {
case lexparse.TokenTypeFloat, lexparse.TokenTypeInt:
num, err := strconv.ParseFloat(token.Value, 64)
if err != nil {
return nil, tokenErr(err, token)
}
lhs = ctx.NewNode(&exprNode{
typ: nodeTypeNum,
num: num,
})
case '(':
// Parse the expression inside the parentheses.
lhs2, err := parseExpr(ctx, depth+1, 0)
if err != nil {
return nil, err
}
lhs = lhs2
t2 := ctx.Next()
if t2.Type != ')' {
return nil, tokenErr(errUnclosedParen, t2)
}
case lexparse.TokenTypeEOF:
return nil, tokenErr(io.ErrUnexpectedEOF, token)
default:
return nil, tokenErr(errUnexpectedIdentifier, token)
}
outerL:
for {
var opVal *exprNode
opToken := ctx.Peek()
switch opToken.Type {
case '+', '-', '*', '/':
opVal = &exprNode{
typ: nodeTypeOper,
oper: opToken.Value,
}
case lexparse.TokenTypeEOF:
break outerL
case ')':
if depth == 0 {
return nil, tokenErr(errUnexpectedParen, opToken)
}
break outerL
default:
return nil, tokenErr(errUnexpectedIdentifier, opToken)
}
if opVal.precedence() < minPrecedence {
// If the operator precedence is less than the minimum precedence,
// stop parsing.
return lhs, nil
}
_ = ctx.Next() // Consume the operator token.
opNode := ctx.NewNode(opVal)
rhs, err := parseExpr(ctx, depth, opNode.Value.precedence())
if err != nil {
return nil, err
}
// Add the operator's children.
opNode.Children = append(opNode.Children, lhs, rhs)
lhs = opNode
}
return lhs, nil
}
// Calculate performs calculation based on the parsed expression tree.
func Calculate(root *lexparse.Node[*exprNode]) (float64, error) {
switch root.Value.typ {
case nodeTypeNum:
return root.Value.num, nil
case nodeTypeOper:
if len(root.Children) != 2 {
return 0.0, fmt.Errorf("%w: invalid children: %v", errInvalidNode, root.Value)
}
left, err := Calculate(root.Children[0])
if err != nil {
return 0.0, err
}
right, err := Calculate(root.Children[1])
if err != nil {
return 0.0, err
}
switch root.Value.oper {
case "+":
return left + right, nil
case "-":
return left - right, nil
case "*":
return left * right, nil
case "/":
if right == 0 {
return 0.0, errDivByZero
}
return left / right, nil
default:
return 0.0, fmt.Errorf("%w: operator: %s", errInvalidNode, root.Value.oper)
}
default:
return 0.0, fmt.Errorf("%w: node type: %v", errInvalidNode, root.Value.typ)
}
}
// Example_infixCalculator demonstrates an infix expression calculator
// using a Pratt parser. It makes use of the ScanningLexer to tokenize
// the input expression and builds an expression tree that is then evaluated
// using the Calculate function.
func main() {
r := strings.NewReader(`6.1 * ( 2.8 + 3.2 ) / 7.6 - 2.4`)
tree, err := lexparse.LexParse(
context.Background(),
lexparse.NewScanningLexer(r),
lexparse.ParseStateFn(pratt),
)
if err != nil {
panic(err)
}
// Print the expression tree.
fmt.Println(tree)
txt, err := Calculate(tree)
if err != nil {
panic(err)
}
// Print the evaluation result.
fmt.Print(txt)
}
Output: - (1:27) ├── * (1:5) │ ├── 6.1 (1:1) │ └── / (1:21) │ ├── + (1:13) │ │ ├── 2.8 (1:9) │ │ └── 3.2 (1:15) │ └── 7.6 (1:23) └── 2.4 (1:29) 2.4157894736842107
Example (IniParser) ¶
Example_iniParser demonstrates parsing a simple INI file. It does not support nested sections, or escape sequences.
package main
import (
"context"
"errors"
"fmt"
"io"
"regexp"
"strings"
"github.com/ianlewis/lexparse"
)
const (
// lexINITypeIden represents an identifier token (key or section name).
lexINITypeIden lexparse.TokenType = iota
// lexINITypeOper represents an operator token.
lexINITypeOper
// lexINITypeValue represents a property value token.
lexINITypeValue
// lexINITypeComment represents a comment token.
lexINITypeComment
)
type iniNodeType int
const (
// iniNodeTypeRoot represents the root node of the INI parse tree.
iniNodeTypeRoot iniNodeType = iota
// iniNodeTypeSection represents a section node in the INI parse tree.
iniNodeTypeSection
// iniNodeTypeProperty represents a property node in the INI parse tree.
iniNodeTypeProperty
)
type iniNode struct {
typ iniNodeType
// sectionName is only used for section nodes.
sectionName string
// propertyName and propertyValue are only used for property nodes.
propertyName string
propertyValue string
}
func (n *iniNode) String() string {
switch n.typ {
case iniNodeTypeRoot:
return "root"
case iniNodeTypeSection:
return fmt.Sprintf("[%s]", n.sectionName)
case iniNodeTypeProperty:
return fmt.Sprintf("%s = %s", n.propertyName, n.propertyValue)
default:
return "<Unknown>"
}
}
var iniIdenRegexp = regexp.MustCompile(`^[A-Za-z0-9]+$`)
var (
errINIIdentifier = errors.New("unexpected identifier")
errINISectionName = errors.New("invalid section name")
errINIPropertyName = errors.New("invalid property name")
)
// lexINI is the initial lexer state for INI files.
//
//nolint:ireturn // returning the generic interface is needed to return the previous value.
func lexINI(ctx *lexparse.CustomLexerContext) (lexparse.LexState, error) {
for {
rn := ctx.Peek()
switch rn {
case ' ', '\t', '\r', '\n':
ctx.Discard()
case '[', ']', '=':
return lexparse.LexStateFn(lexINIOper), nil
case ';', '#':
return lexparse.LexStateFn(lexINIComment), nil
case lexparse.EOF:
return nil, io.EOF
default:
return lexparse.LexStateFn(lexINIIden), nil
}
}
}
// lexINIOper lexes an operator token.
//
//nolint:ireturn // returning the generic interface is needed to return the previous value.
func lexINIOper(ctx *lexparse.CustomLexerContext) (lexparse.LexState, error) {
oper := ctx.NextRune()
ctx.Emit(lexINITypeOper)
if oper == '=' {
return lexparse.LexStateFn(lexINIValue), nil
}
return lexparse.LexStateFn(lexINI), nil
}
// lexINIIden lexes an identifier token (section name or property key).
//
//nolint:ireturn // returning the generic interface is needed to return the previous value.
func lexINIIden(ctx *lexparse.CustomLexerContext) (lexparse.LexState, error) {
if next := ctx.Find([]string{"]", "="}); next != "" {
ctx.Emit(lexINITypeIden)
return lexparse.LexStateFn(lexINIOper), nil
}
return nil, io.ErrUnexpectedEOF
}
// lexINIValue lexes a property value token.
//
//nolint:ireturn // returning the generic interface is needed to return the previous value.
func lexINIValue(ctx *lexparse.CustomLexerContext) (lexparse.LexState, error) {
ctx.Find([]string{";", "\n"})
ctx.Emit(lexINITypeValue)
return lexparse.LexStateFn(lexINI), nil
}
// lexINIComment lexes a comment token.
//
//nolint:ireturn // returning the generic interface is needed to return the previous value.
func lexINIComment(ctx *lexparse.CustomLexerContext) (lexparse.LexState, error) {
ctx.Find([]string{"\n"})
ctx.Emit(lexINITypeComment)
return lexparse.LexStateFn(lexINI), nil
}
// iniTokenErr formats an error message with token context.
func iniTokenErr(err error, t *lexparse.Token) error {
return fmt.Errorf("%w: %q, line %d, column %d", err,
t.Value, t.Start.Line, t.Start.Column)
}
// parseINIInit is the initial parser state for INI files.
func parseINIInit(ctx *lexparse.ParserContext[*iniNode]) error {
// Replace the root node with a new root node.
_ = ctx.Replace(&iniNode{
typ: iniNodeTypeRoot,
})
// Create the empty section node for the global section.
_ = ctx.Push(&iniNode{
typ: iniNodeTypeSection,
sectionName: "",
})
ctx.PushState(lexparse.ParseStateFn(parseINI))
return nil
}
// parseINI parses the top-level structure of an INI file.
func parseINI(ctx *lexparse.ParserContext[*iniNode]) error {
t := ctx.Peek()
switch t.Type {
case lexINITypeOper:
ctx.PushState(lexparse.ParseStateFn(parseSection))
case lexINITypeIden:
ctx.PushState(lexparse.ParseStateFn(parseProperty))
case lexINITypeComment:
_ = ctx.Next() // Discard comment
ctx.PushState(lexparse.ParseStateFn(parseINI))
case lexparse.TokenTypeEOF:
return nil
default:
return iniTokenErr(errINIIdentifier, t)
}
return nil
}
// parseSection parses a section header.
func parseSection(ctx *lexparse.ParserContext[*iniNode]) error {
openBracket := ctx.Next()
if openBracket.Type != lexINITypeOper || openBracket.Value != "[" {
return iniTokenErr(errINIIdentifier, openBracket)
}
sectionToken := ctx.Next()
if sectionToken.Type != lexINITypeIden {
return iniTokenErr(errINIIdentifier, sectionToken)
}
closeBracket := ctx.Next()
if closeBracket.Type != lexINITypeOper || closeBracket.Value != "]" {
return iniTokenErr(errINIIdentifier, closeBracket)
}
sectionName := strings.TrimSpace(sectionToken.Value)
// Validate the section name.
if !iniIdenRegexp.MatchString(sectionName) {
return iniTokenErr(errINISectionName, sectionToken)
}
// Create a new node for the section and push it onto the parse tree.
// The current node is now the new section node.
_ = ctx.Climb()
_ = ctx.Push(&iniNode{
typ: iniNodeTypeSection,
sectionName: sectionName,
})
ctx.PushState(lexparse.ParseStateFn(parseINI))
return nil
}
// parseProperty parses a property key-value pair.
func parseProperty(ctx *lexparse.ParserContext[*iniNode]) error {
keyToken := ctx.Next()
if keyToken.Type != lexINITypeIden {
return iniTokenErr(errINIIdentifier, keyToken)
}
keyName := strings.TrimSpace(keyToken.Value)
// Validate the property name.
if !iniIdenRegexp.MatchString(keyName) {
return iniTokenErr(errINIPropertyName, keyToken)
}
eqToken := ctx.Next()
if eqToken.Type != lexINITypeOper || eqToken.Value != "=" {
return iniTokenErr(errINIIdentifier, eqToken)
}
valueToken := ctx.Next()
if valueToken.Type != lexINITypeValue {
return iniTokenErr(errINIIdentifier, valueToken)
}
// Create a new node for the property and add it to the current section.
ctx.Node(&iniNode{
typ: iniNodeTypeProperty,
propertyName: keyName,
propertyValue: strings.TrimSpace(valueToken.Value),
})
ctx.PushState(lexparse.ParseStateFn(parseINI))
return nil
}
// Example_iniParser demonstrates parsing a simple INI file. It does not support
// nested sections, or escape sequences.
func main() {
r := strings.NewReader(`; last modified 1 April 2001 by John Doe
[owner]
name = John Doe
organization = Acme Widgets Inc.
[database]
; use IP address in case network name resolution is not working
server = 192.0.2.62
port = 143
file = "payroll.dat"
`)
// Produces a tree representation of the INI file.
// Each child of the root node is a section node, which in turn
// has property nodes as children. The global section is represented
// as a section node with an empty name.
tree, err := lexparse.LexParse(
context.Background(),
lexparse.NewCustomLexer(r, lexparse.LexStateFn(lexINI)),
lexparse.ParseStateFn(parseINIInit),
)
if err != nil {
panic(err)
}
fmt.Print(tree)
}
Output: root (0:0) ├── [] (0:0) ├── [owner] (2:7) │ ├── name = John Doe (3:7) │ └── organization = Acme Widgets Inc. (4:15) └── [database] (6:10) ├── server = 192.0.2.62 (8:9) ├── port = 143 (9:7) └── file = "payroll.dat" (10:7)
Example (TemplateEngine) ¶
Example_templateEngine implements a simple text templating language. The language replaces variables identified with double brackets (e.g. `{{ var }}`) with data values for those variables.
LexParse is used to lex and parse the template into a parse tree. This tree can be passed with a data map to the Execute function to interpret the template and retrieve a final result.
This example includes some best practices for error handling, such as including line and column numbers in error messages.
package main
import (
"context"
"errors"
"fmt"
"io"
"os"
"regexp"
"strconv"
"strings"
"unicode"
"github.com/ianlewis/lexparse"
)
const (
lexTypeText lexparse.TokenType = iota
lexTypeBlockStart
lexTypeBlockEnd
lexTypeVarStart
lexTypeVarEnd
lexTypeIdentifier
)
const (
tokenBlockStart = "{%"
tokenBlockEnd = "%}"
tokenVarStart = "{{"
tokenVarEnd = "}}"
tokenIf = "if"
tokenElse = "else"
tokenEndif = "endif"
)
var (
errRune = errors.New("unexpected rune")
errIdentifier = errors.New("unexpected identifier")
)
// Identifier regexp.
var (
idenRegexp = regexp.MustCompile(`[a-zA-Z]+[a-zA-Z0-9]*`)
symbolRegexp = regexp.MustCompile(`[{}%]+`)
)
type tmplNodeType int
const (
// nodeTypeSeq is a node whose children are various text,if,var nodes in order.
nodeTypeSeq tmplNodeType = iota
// nodeTypeText is a leaf node comprised of text.
nodeTypeText
// nodeTypeBranch is a binary node whose first child is the 'if' sequence
// node and second is the 'else' sequence node.
nodeTypeBranch
// nodeTypeVar nodes are variable leaf nodes.
nodeTypeVar
)
type tmplNode struct {
typ tmplNodeType
// Fields below are populated based on node type.
varName string
text string
}
func (n *tmplNode) String() string {
switch n.typ {
case nodeTypeSeq:
return "[]"
case nodeTypeText:
return fmt.Sprintf("%q", n.text)
case nodeTypeBranch:
return "if/else"
case nodeTypeVar:
return fmt.Sprintf("{{%s}}", n.varName)
default:
return "<Unknown>"
}
}
func lexTokenErr(err error, t *lexparse.Token) error {
return fmt.Errorf("%w: %s", err, t)
}
// lexText tokenizes normal text.
//
//nolint:ireturn // returning interface is required to satisfy lexparse.LexState.
func lexText(ctx *lexparse.CustomLexerContext) (lexparse.LexState, error) {
for {
p := string(ctx.PeekN(2))
if p == tokenBlockStart || p == tokenVarStart {
if ctx.Width() > 0 {
ctx.Emit(lexTypeText)
}
return lexparse.LexStateFn(lexCode), nil
}
// Advance the input.
if !ctx.Advance() {
// End of input. Emit the text up to this point.
if ctx.Width() > 0 {
ctx.Emit(lexTypeText)
}
return nil, io.EOF
}
}
}
// lexCode tokenizes template code.
//
//nolint:ireturn // returning interface is required to satisfy lexparse.LexState.
func lexCode(ctx *lexparse.CustomLexerContext) (lexparse.LexState, error) {
// Consume whitespace and discard it.
// TODO(#94): use backtracking
for unicode.IsSpace(ctx.Peek()) {
if !ctx.Discard() {
// End of input
return nil, io.EOF
}
}
rn := ctx.Peek()
switch {
case idenRegexp.MatchString(string(rn)):
return lexparse.LexStateFn(lexIden), nil
case symbolRegexp.MatchString(string(rn)):
return lexparse.LexStateFn(lexSymbol), nil
default:
return nil, fmt.Errorf("%w: %q; line: %d, column: %d", errRune,
rn, ctx.Pos().Line, ctx.Pos().Column)
}
}
// lexIden tokenizes identifiers (e.g. variable names).
//
//nolint:ireturn // returning interface is required to satisfy lexparse.LexState.
func lexIden(ctx *lexparse.CustomLexerContext) (lexparse.LexState, error) {
for {
if rn := ctx.Peek(); !idenRegexp.MatchString(string(rn)) {
ctx.Emit(lexTypeIdentifier)
return lexparse.LexStateFn(lexCode), nil
}
if !ctx.Advance() {
return nil, io.EOF
}
}
}
// lexSymbol tokenizes template symbols (e.g. {%, {{, }}, %}).
//
//nolint:ireturn // returning interface is required to satisfy lexparse.LexState.
func lexSymbol(ctx *lexparse.CustomLexerContext) (lexparse.LexState, error) {
for {
switch ctx.Token() {
case tokenVarStart:
ctx.Emit(lexTypeVarStart)
return lexparse.LexStateFn(lexCode), nil
case tokenVarEnd:
ctx.Emit(lexTypeVarEnd)
return lexparse.LexStateFn(lexText), nil
case tokenBlockStart:
ctx.Emit(lexTypeBlockStart)
return lexparse.LexStateFn(lexCode), nil
case tokenBlockEnd:
ctx.Emit(lexTypeBlockEnd)
return lexparse.LexStateFn(lexText), nil
default:
if rn := ctx.Peek(); !symbolRegexp.MatchString(string(rn)) {
return nil, fmt.Errorf("symbol: %w: %q; line: %d, column: %d",
errRune, rn, ctx.Pos().Line, ctx.Pos().Column)
}
}
if !ctx.Advance() {
return nil, io.EOF
}
}
}
// parseRoot updates the root node to be a sequence block.
func parseRoot(ctx *lexparse.ParserContext[*tmplNode]) error {
ctx.Replace(&tmplNode{
typ: nodeTypeSeq,
})
ctx.PushState(lexparse.ParseStateFn(parseSeq))
return nil
}
// parseSeq delegates to another parse function based on token type.
func parseSeq(ctx *lexparse.ParserContext[*tmplNode]) error {
token := ctx.Peek()
switch token.Type {
case lexTypeText:
ctx.PushState(lexparse.ParseStateFn(parseText))
case lexTypeVarStart:
ctx.PushState(lexparse.ParseStateFn(parseVarStart))
case lexTypeBlockStart:
ctx.PushState(lexparse.ParseStateFn(parseBlockStart))
default:
}
return nil
}
// parseText handles normal text.
func parseText(ctx *lexparse.ParserContext[*tmplNode]) error {
token := ctx.Next()
// Emit a text node.
ctx.Node(&tmplNode{
typ: nodeTypeText,
text: token.Value,
})
// Return to handling a sequence.
ctx.PushState(lexparse.ParseStateFn(parseSeq))
return nil
}
// parseVarStart handles var start (e.g. '{{').
func parseVarStart(ctx *lexparse.ParserContext[*tmplNode]) error {
// Consume the var start token.
_ = ctx.Next()
ctx.PushState(
lexparse.ParseStateFn(parseVar),
lexparse.ParseStateFn(parseVarEnd),
)
return nil
}
// parseVar handles replacement variables (e.g. the 'var' in {{ var }}).
func parseVar(ctx *lexparse.ParserContext[*tmplNode]) error {
switch token := ctx.Next(); token.Type {
case lexTypeIdentifier:
// Validate the variable name.
if !idenRegexp.MatchString(token.Value) {
return lexTokenErr(fmt.Errorf("%w: invalid variable name", errIdentifier), token)
}
// Add a variable node.
_ = ctx.Node(&tmplNode{
typ: nodeTypeVar,
varName: token.Value,
})
return nil
case lexparse.TokenTypeEOF:
return fmt.Errorf("%w: parsing variable name", io.ErrUnexpectedEOF)
default:
return lexTokenErr(errIdentifier, token)
}
}
// parseVarEnd handles var end (e.g. '}}').
func parseVarEnd(ctx *lexparse.ParserContext[*tmplNode]) error {
switch token := ctx.Next(); token.Type {
case lexTypeVarEnd:
// Go back to parsing template init state.
ctx.PushState(lexparse.ParseStateFn(parseSeq))
return nil
case lexparse.TokenTypeEOF:
return fmt.Errorf("%w: unclosed variable, expected %q", io.ErrUnexpectedEOF, tokenVarEnd)
default:
return fmt.Errorf("%w: expected %q", lexTokenErr(errIdentifier, token), tokenVarEnd)
}
}
// parseBranch handles the start if conditional block.
func parseBranch(ctx *lexparse.ParserContext[*tmplNode]) error {
switch token := ctx.Next(); token.Type {
case lexTypeIdentifier:
if token.Value != tokenIf {
return fmt.Errorf("%w: expected %q", errIdentifier, tokenIf)
}
// Add a branch node.
_ = ctx.Push(&tmplNode{
typ: nodeTypeBranch,
})
ctx.PushState(
// Parse the conditional expression. Currently only a simple
// variable is supported.
lexparse.ParseStateFn(parseVar),
// Parse the '%}'
lexparse.ParseStateFn(parseBlockEnd),
// Parse the if block.
lexparse.ParseStateFn(parseIf),
// Parse an 'else' (or 'endif')
lexparse.ParseStateFn(parseElse),
)
return nil
case lexparse.TokenTypeEOF:
return fmt.Errorf("%w: expected %q", io.ErrUnexpectedEOF, tokenIf)
default:
return lexTokenErr(errIdentifier, token)
}
}
// parseIf handles the if body.
func parseIf(ctx *lexparse.ParserContext[*tmplNode]) error {
// Add an if body sequence node.
_ = ctx.Push(&tmplNode{
typ: nodeTypeSeq,
})
ctx.PushState(lexparse.ParseStateFn(parseSeq))
return nil
}
// parseElse handles an else (or endif) block.
func parseElse(ctx *lexparse.ParserContext[*tmplNode]) error {
token := ctx.Peek()
switch token.Type {
case lexTypeIdentifier:
// Validate we are at a sequence node.
if cur := ctx.Pos(); cur.Value.typ != nodeTypeSeq {
return lexTokenErr(errIdentifier, token)
}
case lexparse.TokenTypeEOF:
return fmt.Errorf("%w: unclosed if block, looking for %q or %q", io.ErrUnexpectedEOF, tokenElse, tokenEndif)
default:
return lexTokenErr(errIdentifier, token)
}
switch token.Value {
case tokenElse:
// Consume the token.
_ = ctx.Next()
// Climb the tree back to the conditional.
ctx.Climb()
// Validate that we are in a conditional and there isn't already an else branch.
if cur := ctx.Pos(); cur.Value.typ != nodeTypeBranch || len(cur.Children) != 2 {
return lexTokenErr(errIdentifier, token)
}
// Add an else sequence node to the conditional.
_ = ctx.Push(&tmplNode{
typ: nodeTypeSeq,
})
ctx.PushState(
// Parse the '%}'
lexparse.ParseStateFn(parseBlockEnd),
// parse the else sequence block.
lexparse.ParseStateFn(parseSeq),
// parse the endif.
lexparse.ParseStateFn(parseEndif),
)
case tokenEndif:
ctx.PushState(lexparse.ParseStateFn(parseEndif))
default:
return lexTokenErr(fmt.Errorf("%w: looking for %q or %q", errIdentifier, tokenElse, tokenEndif), token)
}
return nil
}
// parseEndif handles either an endif block.
func parseEndif(ctx *lexparse.ParserContext[*tmplNode]) error {
switch token := ctx.Next(); token.Type {
case lexTypeIdentifier:
if token.Value != tokenEndif {
return lexTokenErr(fmt.Errorf("%w: looking for %q", errIdentifier, tokenEndif), token)
}
// Climb out of the sequence node.
ctx.Climb()
// Climb out of the branch node.
ctx.Climb()
ctx.PushState(
// parse the '%}'
lexparse.ParseStateFn(parseBlockEnd),
// Go back to parsing a sequence.
lexparse.ParseStateFn(parseSeq),
)
return nil
case lexparse.TokenTypeEOF:
return fmt.Errorf("%w: looking for %q", io.ErrUnexpectedEOF, tokenEndif)
default:
return lexTokenErr(errIdentifier, token)
}
}
// parseBlockStart handles the start of a template block '{%'.
func parseBlockStart(ctx *lexparse.ParserContext[*tmplNode]) error {
// Validate the block start token.
switch token := ctx.Next(); token.Type {
case lexTypeBlockStart:
// OK
fmt.Fprintln(os.Stderr, token.Value)
case lexparse.TokenTypeEOF:
return fmt.Errorf("%w: expected %q", io.ErrUnexpectedEOF, tokenBlockStart)
default:
return lexTokenErr(errIdentifier, token)
}
// Validate the command token.
token := ctx.Peek()
switch token.Type {
case lexTypeIdentifier:
// OK
case lexparse.TokenTypeEOF:
return fmt.Errorf("%w: expected %q", io.ErrUnexpectedEOF, tokenBlockStart)
default:
return fmt.Errorf("%w: expected %q, %q, or %q",
lexTokenErr(errIdentifier, token), tokenIf, tokenElse, tokenEndif)
}
// Handle the block command.
switch token.Value {
case tokenIf:
ctx.PushState(lexparse.ParseStateFn(parseBranch))
case tokenElse, tokenEndif:
// NOTE: parseElse,parseEndif should already be on the stack.
default:
return lexTokenErr(
fmt.Errorf("%w: expected %q, %q, or %q", errIdentifier, tokenIf, tokenElse, tokenEndif), token)
}
return nil
}
// parseBlockEnd handles the end of a template block '%}'.
func parseBlockEnd(ctx *lexparse.ParserContext[*tmplNode]) error {
switch token := ctx.Next(); token.Type {
case lexTypeBlockEnd:
return nil
case lexparse.TokenTypeEOF:
return fmt.Errorf("%w: expected %q", io.ErrUnexpectedEOF, tokenBlockEnd)
default:
return lexTokenErr(fmt.Errorf("%w: expected %q", errIdentifier, tokenBlockEnd), token)
}
}
// Execute renders the template with the given data.
func Execute(root *lexparse.Node[*tmplNode], data map[string]string) (string, error) {
var b strings.Builder
// Support basic boolean values.
if _, ok := data["true"]; !ok {
data["true"] = "true"
}
if _, ok := data["false"]; !ok {
data["false"] = "false"
}
if err := execNode(root, data, &b); err != nil {
return "", err
}
return b.String(), nil
}
func execNode(root *lexparse.Node[*tmplNode], data map[string]string, bldr *strings.Builder) error {
for _, node := range root.Children {
switch node.Value.typ {
case nodeTypeText:
// Write raw text to the output.
bldr.WriteString(node.Value.text)
case nodeTypeVar:
// Replace templated variables with given data.
bldr.WriteString(data[node.Value.varName])
case nodeTypeBranch:
// condition sanity check
if len(node.Children) < 2 {
panic(fmt.Sprintf("invalid branch: %#v", node))
}
// Get the condition.
cond := node.Children[0]
// Condition sanity check
if cond.Value.typ != nodeTypeVar {
panic(fmt.Sprintf("invalid branch condition: %#v", cond))
}
v, err := strconv.ParseBool(data[node.Value.varName])
if (err == nil && v) || (err != nil && data[node.Value.varName] != "") {
if err := execNode(node.Children[0], data, bldr); err != nil {
return err
}
} else {
if err := execNode(node.Children[1], data, bldr); err != nil {
return err
}
}
case nodeTypeSeq:
if err := execNode(node, data, bldr); err != nil {
return err
}
}
}
return nil
}
// Example_templateEngine implements a simple text templating language. The
// language replaces variables identified with double brackets
// (e.g. `{{ var }}`) with data values for those variables.
//
// LexParse is used to lex and parse the template into a parse tree. This tree
// can be passed with a data map to the Execute function to interpret the
// template and retrieve a final result.
//
// This example includes some best practices for error handling, such as
// including line and column numbers in error messages.
func main() {
r := strings.NewReader(`Hello, {% if subject %}{{ subject }}{% else %}World{% endif %}!`)
tree, err := lexparse.LexParse(
context.Background(),
lexparse.NewCustomLexer(r, lexparse.LexStateFn(lexText)),
lexparse.ParseStateFn(parseRoot),
)
if err != nil {
panic(err)
}
fmt.Println(tree)
txt, err := Execute(tree, map[string]string{"subject": "世界"})
if err != nil {
panic(err)
}
fmt.Print(txt)
}
Output: [] (0:0) ├── "Hello, " (1:1) ├── if/else (1:11) │ ├── {{subject}} (1:14) │ ├── [] (1:22) │ │ └── {{subject}} (1:27) │ └── [] (1:40) │ └── "World" (1:47) └── "!" (1:63) Hello, 世界!
Index ¶
- Constants
- type CustomLexer
- type CustomLexerContext
- func (ctx *CustomLexerContext) Advance() bool
- func (ctx *CustomLexerContext) AdvanceN(n int) int
- func (ctx *CustomLexerContext) Cursor() Position
- func (ctx *CustomLexerContext) Discard() bool
- func (ctx *CustomLexerContext) DiscardN(n int) int
- func (ctx *CustomLexerContext) DiscardTo(query []string) string
- func (ctx *CustomLexerContext) Emit(typ TokenType) *Token
- func (ctx *CustomLexerContext) Find(query []string) string
- func (ctx *CustomLexerContext) Ignore()
- func (ctx *CustomLexerContext) NextRune() rune
- func (ctx *CustomLexerContext) Peek() rune
- func (ctx *CustomLexerContext) PeekN(n int) []rune
- func (ctx *CustomLexerContext) Pos() Position
- func (ctx *CustomLexerContext) Token() string
- func (ctx *CustomLexerContext) Width() int
- type LexState
- type Lexer
- type Node
- type ParseState
- type Parser
- type ParserContext
- func (ctx *ParserContext[V]) Climb() *Node[V]
- func (ctx *ParserContext[V]) NewNode(v V) *Node[V]
- func (ctx *ParserContext[V]) Next() *Token
- func (ctx *ParserContext[V]) Node(v V) *Node[V]
- func (ctx *ParserContext[V]) Peek() *Token
- func (ctx *ParserContext[V]) Pos() *Node[V]
- func (ctx *ParserContext[V]) Push(v V) *Node[V]
- func (ctx *ParserContext[V]) PushState(states ...ParseState[V])
- func (ctx *ParserContext[V]) Replace(v V) V
- func (ctx *ParserContext[V]) Root() *Node[V]
- func (ctx *ParserContext[V]) SetRoot(root *Node[V])
- type Position
- type ScanningLexer
- type Token
- type TokenSource
- type TokenType
Examples ¶
Constants ¶
const ( // TokenTypeEOF represents the end of the file. TokenTypeEOF = TokenType(scanner.EOF) // TokenTypeIdent represents an identifier. TokenTypeIdent = TokenType(scanner.Ident) // TokenTypeInt represents an integer literal. TokenTypeInt = TokenType(scanner.Int) // TokenTypeFloat represents a floating-point literal. TokenTypeFloat = TokenType(scanner.Float) // TokenTypeChar represents a character literal. TokenTypeChar = TokenType(scanner.Char) // TokenTypeString represents a string literal. TokenTypeString = TokenType(scanner.String) // TokenTypeRawString represents a raw string literal. TokenTypeRawString = TokenType(scanner.RawString) // TokenTypeComment represents a comment. TokenTypeComment = TokenType(scanner.Comment) )
const EOF rune = -1
EOF is a rune that indicates that the lexer has finished processing.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type CustomLexer ¶ added in v0.2.0
type CustomLexer struct {
// contains filtered or unexported fields
}
CustomLexer lexically processes a byte stream. It is implemented as a finite-state machine in which each LexState implements it's own processing.
A CustomLexer maintains an internal cursor which marks the start of the next token being currently processed. The Lexer can then advance the reader to find the end of the token before emitting it.
func NewCustomLexer ¶ added in v0.2.0
func NewCustomLexer(reader io.Reader, startingState LexState) *CustomLexer
NewCustomLexer creates a new Lexer initialized with the given starting LexState. The Lexer takes ownership of the tokens channel and closes it when lexing is completed.
func (*CustomLexer) Err ¶ added in v0.2.0
func (l *CustomLexer) Err() error
Err returns any errors that the lexer encountered.
func (*CustomLexer) NextToken ¶ added in v0.2.0
func (l *CustomLexer) NextToken(ctx context.Context) *Token
NextToken implements [Lexer.NextToken] and returns the next token from the input stream. If the end of the input is reached, a token with type TokenTypeEOF is returned.
func (*CustomLexer) SetFilename ¶ added in v0.3.0
func (l *CustomLexer) SetFilename(name string)
SetFilename sets the filename in the lexer's positional information.
type CustomLexerContext ¶ added in v0.3.0
type CustomLexerContext struct {
//nolint:containedctx // Embedding context required for interface compliance.
context.Context
// contains filtered or unexported fields
}
CustomLexerContext is a context that carries a reference to the current CustomLexer. It is passed to [LexState.Run] method to allow the state implementation to interact with the lexer.
func (*CustomLexerContext) Advance ¶ added in v0.3.0
func (ctx *CustomLexerContext) Advance() bool
Advance attempts to advance the underlying reader a single rune and returns true if actually advanced. The current token cursor position is not updated.
func (*CustomLexerContext) AdvanceN ¶ added in v0.3.0
func (ctx *CustomLexerContext) AdvanceN(n int) int
AdvanceN attempts to advance the underlying reader n runes and returns the number actually advanced. The current token cursor position is not updated.
func (*CustomLexerContext) Cursor ¶ added in v0.3.0
func (ctx *CustomLexerContext) Cursor() Position
Cursor returns the current position of the underlying cursor marking the beginning of the current token being processed.
func (*CustomLexerContext) Discard ¶ added in v0.3.0
func (ctx *CustomLexerContext) Discard() bool
Discard attempts to discard the next rune, advancing the current token cursor, and returns true if actually discarded.
func (*CustomLexerContext) DiscardN ¶ added in v0.3.0
func (ctx *CustomLexerContext) DiscardN(n int) int
DiscardN attempts to discard n runes, advancing the current token cursor position, and returns the number actually discarded.
func (*CustomLexerContext) DiscardTo ¶ added in v0.3.0
func (ctx *CustomLexerContext) DiscardTo(query []string) string
DiscardTo searches the input for one of the given search strings, advancing the reader, and stopping when one of the strings is found. The token cursor is advanced and data prior to the search string is discarded. The string found is returned. If no match is found an empty string is returned.
func (*CustomLexerContext) Emit ¶ added in v0.3.0
func (ctx *CustomLexerContext) Emit(typ TokenType) *Token
Emit emits the token between the current cursor position and reader position and returns the token. If the lexer is not currently active, this is a no-op. This advances the current token cursor.
func (*CustomLexerContext) Find ¶ added in v0.3.0
func (ctx *CustomLexerContext) Find(query []string) string
Find searches the input for one of the given search strings, advancing the reader, and stopping when one of the strings is found. The token cursor is not advanced. The string found is returned. If no match is found an empty string is returned.
func (*CustomLexerContext) Ignore ¶ added in v0.3.0
func (ctx *CustomLexerContext) Ignore()
Ignore ignores the previous input and resets the token start position to the current reader position.
func (*CustomLexerContext) NextRune ¶ added in v0.3.0
func (ctx *CustomLexerContext) NextRune() rune
NextRune returns the next rune of input, advancing the reader while not advancing the token cursor.
func (*CustomLexerContext) Peek ¶ added in v0.3.0
func (ctx *CustomLexerContext) Peek() rune
Peek returns the next rune from the buffer without advancing the reader or current token cursor.
func (*CustomLexerContext) PeekN ¶ added in v0.3.0
func (ctx *CustomLexerContext) PeekN(n int) []rune
PeekN returns the next n runes from the buffer without advancing the reader or current token cursor. PeekN may return fewer runes than requested if an error occurs or at end of input.
func (*CustomLexerContext) Pos ¶ added in v0.3.0
func (ctx *CustomLexerContext) Pos() Position
Pos returns the current position of the underlying reader.
func (*CustomLexerContext) Token ¶ added in v0.3.0
func (ctx *CustomLexerContext) Token() string
Token returns the current token value.
func (*CustomLexerContext) Width ¶ added in v0.3.0
func (ctx *CustomLexerContext) Width() int
Width returns the current width of the token being processed. It is equivalent to l.Pos().Offset - l.Cursor().Offset.
type LexState ¶
type LexState interface {
// Run returns the next state to transition to or an error. If the returned
// error is io.EOF then the Lexer finishes processing normally.
Run(ctx *CustomLexerContext) (LexState, error)
}
LexState is the state of the current lexing state machine. It defines the logic to process the current state and returns the next state.
func LexStateFn ¶
func LexStateFn(f func(*CustomLexerContext) (LexState, error)) LexState
LexStateFn creates a State from the given Run function.
type Lexer ¶
type Lexer interface {
// NextToken returns the next token from the input. If there are no more
// tokens, the context is canceled, or an error occurs, it returns a Token
// with Type set to TokenTypeEOF.
NextToken(ctx context.Context) *Token
// Err returns the error encountered by the lexer, if any. If the error
// encountered is io.EOF, it will return nil.
Err() error
}
Lexer is an interface that defines the methods for a lexer that tokenizes input streams. It reads from an input stream and emits tokens.
type Node ¶
type Node[V comparable] struct { Parent *Node[V] Children []*Node[V] Value V // Start is the start position in the input where the value was found. Start Position }
Node is the structure for a single node in the parse tree.
func LexParse ¶
func LexParse[V comparable]( ctx context.Context, lex Lexer, startingState ParseState[V], ) (*Node[V], error)
LexParse lexes the content the given lexer and feeds the tokens concurrently to the parser starting at startingState. The resulting root node of the parse tree is returned.
type ParseState ¶
type ParseState[V comparable] interface { // Run executes the logic at the current state, returning an error if one is // encountered. Implementations are expected to add new [Node] objects to // the AST using [Parser.Push] or [Parser.Node). As necessary, new parser // state should be pushed onto the stack as needed using [Parser.PushState]. Run(ctx *ParserContext[V]) error }
ParseState is the state of the current parsing state machine. It defines the logic to process the current state and returns the next state.
func ParseStateFn ¶
func ParseStateFn[V comparable](f func(*ParserContext[V]) error) ParseState[V]
ParseStateFn creates a State from the given Run function.
type Parser ¶
type Parser[V comparable] struct { // contains filtered or unexported fields }
Parser reads the tokens produced by a Lexer and builds a parse tree. It is implemented as a stack of states (ParseState) in which each state implements it's own processing.
Parser maintains a current position in the parse tree which can be utilized by parser states.
func NewParser ¶
func NewParser[V comparable](tokens TokenSource, startingState ParseState[V]) *Parser[V]
NewParser creates a new Parser that reads from the tokens channel. The parser is initialized with a root node with an empty value.
func (*Parser[V]) Parse ¶
Parse builds a parse tree by repeatedly pulling ParseState objects from the stack and running them, starting with the initial state. Parsing can be canceled by ctx.
The caller can request that the parser stop by canceling ctx.
type ParserContext ¶ added in v0.3.0
type ParserContext[V comparable] struct { //nolint:containedctx // Embedding context required for interface compliance. context.Context // contains filtered or unexported fields }
ParserContext provides context for the current parsing operation, including access to the parser state and methods to manipulate the parse tree.
func (*ParserContext[V]) Climb ¶ added in v0.3.0
func (ctx *ParserContext[V]) Climb() *Node[V]
Climb updates the current node position to the current node's parent returning the previous current node. It is a no-op that returns the root node if called on the root node. Updates the end position of the parent node to the end position of the current node.
func (*ParserContext[V]) NewNode ¶ added in v0.3.0
func (ctx *ParserContext[V]) NewNode(v V) *Node[V]
NewNode creates a new node at the current token position and returns it without adding it to the tree.
func (*ParserContext[V]) Next ¶ added in v0.3.0
func (ctx *ParserContext[V]) Next() *Token
Next returns the next token from the lexer. This is the new current token position.
func (*ParserContext[V]) Node ¶ added in v0.3.0
func (ctx *ParserContext[V]) Node(v V) *Node[V]
Node creates a new node at the current token position and adds it as a child to the current node. The current node is not updated.
func (*ParserContext[V]) Peek ¶ added in v0.3.0
func (ctx *ParserContext[V]) Peek() *Token
Peek returns the next token from the lexer without consuming it.
func (*ParserContext[V]) Pos ¶ added in v0.3.0
func (ctx *ParserContext[V]) Pos() *Node[V]
Pos returns the current node position in the tree.
func (*ParserContext[V]) Push ¶ added in v0.3.0
func (ctx *ParserContext[V]) Push(v V) *Node[V]
Push creates a new node, adds it as a child to the current node, updates the current node to the new node, and returns the new node.
func (*ParserContext[V]) PushState ¶ added in v0.3.0
func (ctx *ParserContext[V]) PushState(states ...ParseState[V])
PushState pushes a number of new expected future states onto the state stack in reverse order.
func (*ParserContext[V]) Replace ¶ added in v0.3.0
func (ctx *ParserContext[V]) Replace(v V) V
Replace replaces the current node with a new node with the given value. The old node is removed from the tree and its value is returned. Can be used to replace the root node.
func (*ParserContext[V]) Root ¶ added in v0.3.0
func (ctx *ParserContext[V]) Root() *Node[V]
Root returns the root of the parse tree.
func (*ParserContext[V]) SetRoot ¶ added in v0.3.0
func (ctx *ParserContext[V]) SetRoot(root *Node[V])
SetRoot sets the root of the parse tree to the given node. The current node is also set to the root node. This is useful for resetting the parser to a new root node.
type Position ¶ added in v0.2.0
type Position struct {
// Filename is the name of the file being read. It can be empty if the
// input is not from a file.
Filename string
// Offset is the byte offset in the input stream, starting at 0.
Offset int
// Line is the line number in the input stream, starting at 1.
Line int
// Column is the column number in the line, starting at 1. It counts
// characters in the line, including whitespace and newlines.
Column int
}
Position represents a position in an input.
type ScanningLexer ¶ added in v0.2.0
type ScanningLexer struct {
// contains filtered or unexported fields
}
ScanningLexer is a lexer that uses the text/scanner package to tokenize input streams.
func NewScanningLexer ¶ added in v0.2.0
func NewScanningLexer(r io.Reader) *ScanningLexer
NewScanningLexer creates a new ScanningLexer that reads from the given io.Reader.
func (*ScanningLexer) Err ¶ added in v0.2.0
func (l *ScanningLexer) Err() error
Err implements Lexer.Err. It returns the first error encountered by the lexer, if any.
func (*ScanningLexer) NextToken ¶ added in v0.2.0
func (l *ScanningLexer) NextToken(_ context.Context) *Token
NextToken implements Lexer.NextToken. It returns the next token from the input stream.
func (*ScanningLexer) SetFilename ¶ added in v0.3.0
func (l *ScanningLexer) SetFilename(name string)
SetFilename sets the filename in the lexer's positional information.
type Token ¶
type Token struct {
// Type is the Token's type.
Type TokenType
// Value is the Token's value.
Value string
// Start is the start position in the byte stream where the Token was found.
Start Position
// End is the end position in the byte stream where the Token was found.
End Position
}
Token is a tokenized input which can be emitted by a Lexer.
type TokenSource ¶ added in v0.2.0
type TokenSource interface {
// NextToken returns the next token from the source. When tokens are
// exhausted, it returns a Token with Type set to TokenTypeEOF.
NextToken(ctx context.Context) *Token
}
TokenSource is an interface that defines a source of tokens for the parser.