Documentation
¶
Overview ¶
Package predictive provides data structures and algorithms for building predictive parsers. A predictive parser is a top-down recursive-descent parser without backtracking.
Top-down parsing involves constructing a parse tree for the input string, starting from the root node (representing the start symbol) and expanding the nodes in preorder. Equivalently, top-down parsing can be viewed as finding a leftmost derivation for an input string.
A recursive descent parser is a top-down parser constructed from mutually recursive procedures (or their non-recursive equivalents), where each procedure corresponds to a nonterminal in the grammar. This structure closely mirrors the grammar, making it intuitive and directly aligned with the rules it recognizes.
Predictive parsers can be constructed for a class of grammars called LL(1). The first "L" in LL(1) stands for scanning the input from left to right, the second "L" for producing a leftmost derivation, and the "1" for using one input symbol of lookahead at each step. The class of LL(1) grammars is expressive enough to cover most programming constructs, such as arithmetic expressions and simple control structures.
For more details on parsing theory, refer to "Compilers: Principles, Techniques, and Tools (2nd Edition)".
Example (BuildParsingTable) ¶
package main import ( "fmt" "github.com/moorara/algo/grammar" "github.com/moorara/algo/parser/predictive" ) func main() { G := grammar.NewCFG( []grammar.Terminal{"+", "*", "(", ")", "id"}, []grammar.NonTerminal{"E", "E′", "T", "T′", "F"}, []*grammar.Production{ {Head: "E", Body: grammar.String[grammar.Symbol]{grammar.NonTerminal("T"), grammar.NonTerminal("E′")}}, // E → T E′ {Head: "E′", Body: grammar.String[grammar.Symbol]{grammar.Terminal("+"), grammar.NonTerminal("T"), grammar.NonTerminal("E′")}}, // E′ → + T E′ {Head: "E′", Body: grammar.E}, // E′ → ε {Head: "T", Body: grammar.String[grammar.Symbol]{grammar.NonTerminal("F"), grammar.NonTerminal("T′")}}, // T → F T′ {Head: "T′", Body: grammar.String[grammar.Symbol]{grammar.Terminal("*"), grammar.NonTerminal("F"), grammar.NonTerminal("T′")}}, // T′ → * F T′ {Head: "T′", Body: grammar.E}, // T′ → ε {Head: "F", Body: grammar.String[grammar.Symbol]{grammar.Terminal("("), grammar.NonTerminal("E"), grammar.Terminal(")")}}, // F → ( E ) {Head: "F", Body: grammar.String[grammar.Symbol]{grammar.Terminal("id")}}, // F → id }, "E", ) table, err := predictive.BuildParsingTable(G) if err != nil { panic(err) } fmt.Println(table) }
Example (Parse) ¶
package main import ( "fmt" "io" "strings" "github.com/moorara/algo/grammar" "github.com/moorara/algo/lexer" "github.com/moorara/algo/lexer/input" "github.com/moorara/algo/parser/predictive" ) // ExprLexer is an implementation of lexer.Lexer for basic math expressions. type ExprLexer struct { in *input.Input state int } func NewExprLexer(src io.Reader) (lexer.Lexer, error) { in, err := input.New("expression", src, 4096) if err != nil { return nil, err } return &ExprLexer{ in: in, }, nil } func (l *ExprLexer) NextToken() (lexer.Token, error) { var r rune var err error for r, err = l.in.Next(); err == nil; r, err = l.in.Next() { if token, ok := l.advanceDFA(r); ok { return token, nil } } if err == io.EOF { return l.finalizeDFA() } return lexer.Token{}, err } // advanceDFA simulates a deterministic finite automata to identify tokens. func (l *ExprLexer) advanceDFA(r rune) (lexer.Token, bool) { switch l.state { case 0: switch r { case ' ', '\t', '\n': l.state = 0 case '+', '-', '*', '/', '(', ')': l.state = 1 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': l.state = 2 case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z': l.state = 4 } case 2: switch r { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': l.state = 2 case ' ', '\t', '\n', '+', '-', '*', '/', '(', ')', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z': l.state = 3 } case 4: switch r { case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z': l.state = 4 case ' ', '\t', '\n', '+', '-', '*', '/', '(', ')', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': l.state = 5 } default: panic("WTF?") } switch l.state { case 0: l.in.Skip() case 1: l.state = 0 lexeme, pos := l.in.Lexeme() return lexer.Token{Terminal: grammar.Terminal(r), Lexeme: lexeme, Pos: pos}, true case 3: l.state = 0 l.in.Retract() lexeme, pos := l.in.Lexeme() return lexer.Token{Terminal: grammar.Terminal("num"), Lexeme: lexeme, Pos: pos}, true case 5: l.state = 0 l.in.Retract() lexeme, pos := l.in.Lexeme() return lexer.Token{Terminal: grammar.Terminal("id"), Lexeme: lexeme, Pos: pos}, true } return lexer.Token{}, false } // finalizeDFA is called after all inputs have been processed by the DFA. // It generates the final token based on the current state of the lexer. func (l *ExprLexer) finalizeDFA() (lexer.Token, error) { lexeme, pos := l.in.Lexeme() switch l.state { case 2: l.state = 0 return lexer.Token{Terminal: grammar.Terminal("num"), Lexeme: lexeme, Pos: pos}, nil case 4: l.state = 0 return lexer.Token{Terminal: grammar.Terminal("id"), Lexeme: lexeme, Pos: pos}, nil default: return lexer.Token{}, io.EOF } } func main() { src := strings.NewReader(` (price + tax * quantity) * (discount + shipping) * (weight + volume) + total `) l, err := NewExprLexer(src) if err != nil { panic(err) } G := grammar.NewCFG( []grammar.Terminal{"+", "*", "(", ")", "id"}, []grammar.NonTerminal{"E", "E′", "T", "T′", "F"}, []*grammar.Production{ {Head: "E", Body: grammar.String[grammar.Symbol]{grammar.NonTerminal("T"), grammar.NonTerminal("E′")}}, // E → T E′ {Head: "E′", Body: grammar.String[grammar.Symbol]{grammar.Terminal("+"), grammar.NonTerminal("T"), grammar.NonTerminal("E′")}}, // E′ → + T E′ {Head: "E′", Body: grammar.E}, // E′ → ε {Head: "T", Body: grammar.String[grammar.Symbol]{grammar.NonTerminal("F"), grammar.NonTerminal("T′")}}, // T → F T′ {Head: "T′", Body: grammar.String[grammar.Symbol]{grammar.Terminal("*"), grammar.NonTerminal("F"), grammar.NonTerminal("T′")}}, // T′ → * F T′ {Head: "T′", Body: grammar.E}, // T′ → ε {Head: "F", Body: grammar.String[grammar.Symbol]{grammar.Terminal("("), grammar.NonTerminal("E"), grammar.Terminal(")")}}, // F → ( E ) {Head: "F", Body: grammar.String[grammar.Symbol]{grammar.Terminal("id")}}, // F → id }, "E", ) p := predictive.New(G, l) err = p.Parse( func(token *lexer.Token) error { fmt.Printf("Token: %s\n", token) return nil }, func(prod *grammar.Production) error { fmt.Printf("Production: %s\n", prod) return nil }, ) if err != nil { panic(err) } }
Example (ParseAndBuildAST) ¶
You can copy-paste the output of this example into https://edotor.net to view the result.
package main import ( "fmt" "io" "strings" "github.com/moorara/algo/grammar" "github.com/moorara/algo/lexer" "github.com/moorara/algo/lexer/input" "github.com/moorara/algo/parser" "github.com/moorara/algo/parser/predictive" ) // ExprLexer is an implementation of lexer.Lexer for basic math expressions. type ExprLexer struct { in *input.Input state int } func NewExprLexer(src io.Reader) (lexer.Lexer, error) { in, err := input.New("expression", src, 4096) if err != nil { return nil, err } return &ExprLexer{ in: in, }, nil } func (l *ExprLexer) NextToken() (lexer.Token, error) { var r rune var err error for r, err = l.in.Next(); err == nil; r, err = l.in.Next() { if token, ok := l.advanceDFA(r); ok { return token, nil } } if err == io.EOF { return l.finalizeDFA() } return lexer.Token{}, err } // advanceDFA simulates a deterministic finite automata to identify tokens. func (l *ExprLexer) advanceDFA(r rune) (lexer.Token, bool) { switch l.state { case 0: switch r { case ' ', '\t', '\n': l.state = 0 case '+', '-', '*', '/', '(', ')': l.state = 1 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': l.state = 2 case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z': l.state = 4 } case 2: switch r { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': l.state = 2 case ' ', '\t', '\n', '+', '-', '*', '/', '(', ')', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z': l.state = 3 } case 4: switch r { case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z': l.state = 4 case ' ', '\t', '\n', '+', '-', '*', '/', '(', ')', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': l.state = 5 } default: panic("WTF?") } switch l.state { case 0: l.in.Skip() case 1: l.state = 0 lexeme, pos := l.in.Lexeme() return lexer.Token{Terminal: grammar.Terminal(r), Lexeme: lexeme, Pos: pos}, true case 3: l.state = 0 l.in.Retract() lexeme, pos := l.in.Lexeme() return lexer.Token{Terminal: grammar.Terminal("num"), Lexeme: lexeme, Pos: pos}, true case 5: l.state = 0 l.in.Retract() lexeme, pos := l.in.Lexeme() return lexer.Token{Terminal: grammar.Terminal("id"), Lexeme: lexeme, Pos: pos}, true } return lexer.Token{}, false } // finalizeDFA is called after all inputs have been processed by the DFA. // It generates the final token based on the current state of the lexer. func (l *ExprLexer) finalizeDFA() (lexer.Token, error) { lexeme, pos := l.in.Lexeme() switch l.state { case 2: l.state = 0 return lexer.Token{Terminal: grammar.Terminal("num"), Lexeme: lexeme, Pos: pos}, nil case 4: l.state = 0 return lexer.Token{Terminal: grammar.Terminal("id"), Lexeme: lexeme, Pos: pos}, nil default: return lexer.Token{}, io.EOF } } func main() { src := strings.NewReader(` (price + tax * quantity) * (discount + shipping) * (weight + volume) + total `) l, err := NewExprLexer(src) if err != nil { panic(err) } G := grammar.NewCFG( []grammar.Terminal{"+", "*", "(", ")", "id"}, []grammar.NonTerminal{"E", "E′", "T", "T′", "F"}, []*grammar.Production{ {Head: "E", Body: grammar.String[grammar.Symbol]{grammar.NonTerminal("T"), grammar.NonTerminal("E′")}}, // E → T E′ {Head: "E′", Body: grammar.String[grammar.Symbol]{grammar.Terminal("+"), grammar.NonTerminal("T"), grammar.NonTerminal("E′")}}, // E′ → + T E′ {Head: "E′", Body: grammar.E}, // E′ → ε {Head: "T", Body: grammar.String[grammar.Symbol]{grammar.NonTerminal("F"), grammar.NonTerminal("T′")}}, // T → F T′ {Head: "T′", Body: grammar.String[grammar.Symbol]{grammar.Terminal("*"), grammar.NonTerminal("F"), grammar.NonTerminal("T′")}}, // T′ → * F T′ {Head: "T′", Body: grammar.E}, // T′ → ε {Head: "F", Body: grammar.String[grammar.Symbol]{grammar.Terminal("("), grammar.NonTerminal("E"), grammar.Terminal(")")}}, // F → ( E ) {Head: "F", Body: grammar.String[grammar.Symbol]{grammar.Terminal("id")}}, // F → id }, "E", ) p := predictive.New(G, l) root, err := p.ParseAndBuildAST() if err != nil { panic(err) } n := root.(*parser.InternalNode) fmt.Println(n.DOT()) }
Index ¶
- func New(G *grammar.CFG, lexer lexer.Lexer) parser.Parser
- type ParsingTable
- func (t *ParsingTable) Conflicts() error
- func (t *ParsingTable) Equal(rhs *ParsingTable) bool
- func (t *ParsingTable) GetProduction(A grammar.NonTerminal, a grammar.Terminal) (*grammar.Production, bool)
- func (t *ParsingTable) IsEmpty(A grammar.NonTerminal, a grammar.Terminal) bool
- func (t *ParsingTable) IsSync(A grammar.NonTerminal, a grammar.Terminal) bool
- func (t *ParsingTable) String() string
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type ParsingTable ¶
type ParsingTable struct {
// contains filtered or unexported fields
}
ParsingTable represents a parsing table for a predictive parser.
func BuildParsingTable ¶
func BuildParsingTable(G *grammar.CFG) (*ParsingTable, error)
BuildParsingTable constructs a parsing table for a predictive parser.
This method constructs a parsing table for any context-free grammar. To identify errors in the table, use the Conflicts method. Some errors may be resolved by eliminating left recursion and applying left factoring to the grammar. However, certain grammars cannot be transformed into LL(1) even after these transformations. Some languages may have no LL(1) grammar at all.
func NewParsingTable ¶ added in v0.10.5
func NewParsingTable(terminals []grammar.Terminal, nonTerminals []grammar.NonTerminal) *ParsingTable
NewParsingTable creates an empty parsing table for a predictive parser.
func (*ParsingTable) Conflicts ¶ added in v0.10.6
func (t *ParsingTable) Conflicts() error
Conflicts checks for conflicts in the parsing table. If there are multiple productions for at least one combination of non-terminal A and terminal a, the method returns an error containing details about the conflicting productions. If no conflicts are found, it returns nil.
func (*ParsingTable) Equal ¶ added in v0.10.5
func (t *ParsingTable) Equal(rhs *ParsingTable) bool
Equal determines whether or not two parsing tables are the same.
func (*ParsingTable) GetProduction ¶ added in v0.10.2
func (t *ParsingTable) GetProduction(A grammar.NonTerminal, a grammar.Terminal) (*grammar.Production, bool)
GetProduction returns the single production from the M[A,a] entry if exactly one production exists. It returns the production and true if successful, or a default value and false otherwise.
func (*ParsingTable) IsEmpty ¶ added in v0.10.2
func (t *ParsingTable) IsEmpty(A grammar.NonTerminal, a grammar.Terminal) bool
IsEmpty returns true if there are no productions in the M[A,a] entry.
func (*ParsingTable) IsSync ¶ added in v0.10.2
func (t *ParsingTable) IsSync(A grammar.NonTerminal, a grammar.Terminal) bool
IsSync returns true if the M[A,a] entry is marked as a synchronization symbol for non-terminal A.
func (*ParsingTable) String ¶ added in v0.10.5
func (t *ParsingTable) String() string
String returns a human-readable string representation of the parsing table.