predictive

package

v0.11.0 Latest Latest Go to latest Published: Feb 22, 2025 License: ISC Imports: 13 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/moorara/algo

Links

Open Source Insights

Documentation ¶

Overview ¶

Package predictive provides data structures and algorithms for building predictive parsers. A predictive parser is a top-down recursive-descent parser without backtracking.

Top-down parsing involves constructing a parse tree for the input string, starting from the root node (representing the start symbol) and expanding the nodes in preorder. Equivalently, top-down parsing can be viewed as finding a leftmost derivation for an input string.

A recursive descent parser is a top-down parser constructed from mutually recursive procedures (or their non-recursive equivalents), where each procedure corresponds to a nonterminal in the grammar. This structure closely mirrors the grammar, making it intuitive and directly aligned with the rules it recognizes.

Predictive parsers can be constructed for a class of grammars called LL(1). The first "L" in LL(1) stands for scanning the input from left to right, the second "L" for producing a leftmost derivation, and the "1" for using one input symbol of lookahead at each step. The class of LL(1) grammars is expressive enough to cover most programming constructs, such as arithmetic expressions and simple control structures.

For more details on parsing theory, refer to "Compilers: Principles, Techniques, and Tools (2nd Edition)".

Example (BuildParsingTable) ¶

package main

import (
	"fmt"

	"github.com/moorara/algo/grammar"
	"github.com/moorara/algo/parser/predictive"
)

func main() {
	G := grammar.NewCFG(
		[]grammar.Terminal{"+", "*", "(", ")", "id"},
		[]grammar.NonTerminal{"E", "E′", "T", "T′", "F"},
		[]*grammar.Production{
			{Head: "E", Body: grammar.String[grammar.Symbol]{grammar.NonTerminal("T"), grammar.NonTerminal("E′")}},                         // E → T E′
			{Head: "E′", Body: grammar.String[grammar.Symbol]{grammar.Terminal("+"), grammar.NonTerminal("T"), grammar.NonTerminal("E′")}}, // E′ → + T E′
			{Head: "E′", Body: grammar.E}, // E′ → ε
			{Head: "T", Body: grammar.String[grammar.Symbol]{grammar.NonTerminal("F"), grammar.NonTerminal("T′")}},                         // T → F T′
			{Head: "T′", Body: grammar.String[grammar.Symbol]{grammar.Terminal("*"), grammar.NonTerminal("F"), grammar.NonTerminal("T′")}}, // T′ → * F T′
			{Head: "T′", Body: grammar.E}, // T′ → ε
			{Head: "F", Body: grammar.String[grammar.Symbol]{grammar.Terminal("("), grammar.NonTerminal("E"), grammar.Terminal(")")}}, // F → ( E )
			{Head: "F", Body: grammar.String[grammar.Symbol]{grammar.Terminal("id")}},                                                 // F → id
		},
		"E",
	)

	table, err := predictive.BuildParsingTable(G)
	if err != nil {
		panic(err)
	}

	fmt.Println(table)
}

Example (Parse) ¶

package main

import (
	"fmt"
	"io"
	"strings"

	"github.com/moorara/algo/grammar"
	"github.com/moorara/algo/lexer"
	"github.com/moorara/algo/lexer/input"
	"github.com/moorara/algo/parser/predictive"
)

// ExprLexer is an implementation of lexer.Lexer for basic math expressions.
type ExprLexer struct {
	in    *input.Input
	state int
}

func NewExprLexer(src io.Reader) (lexer.Lexer, error) {
	in, err := input.New("expression", src, 4096)
	if err != nil {
		return nil, err
	}

	return &ExprLexer{
		in: in,
	}, nil
}

func (l *ExprLexer) NextToken() (lexer.Token, error) {
	var r rune
	var err error

	for r, err = l.in.Next(); err == nil; r, err = l.in.Next() {
		if token, ok := l.advanceDFA(r); ok {
			return token, nil
		}
	}

	if err == io.EOF {
		return l.finalizeDFA()
	}

	return lexer.Token{}, err
}

// advanceDFA simulates a deterministic finite automata to identify tokens.
func (l *ExprLexer) advanceDFA(r rune) (lexer.Token, bool) {

	switch l.state {
	case 0:
		switch r {
		case ' ', '\t', '\n':
			l.state = 0
		case '+', '-', '*', '/', '(', ')':
			l.state = 1
		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
			l.state = 2
		case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z':
			l.state = 4
		}

	case 2:
		switch r {
		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
			l.state = 2
		case ' ', '\t', '\n',
			'+', '-', '*', '/', '(', ')',
			'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z':
			l.state = 3
		}

	case 4:
		switch r {
		case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z':
			l.state = 4
		case ' ', '\t', '\n',
			'+', '-', '*', '/', '(', ')',
			'0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
			l.state = 5
		}

	default:
		panic("WTF?")
	}

	switch l.state {
	case 0:
		l.in.Skip()

	case 1:
		l.state = 0
		lexeme, pos := l.in.Lexeme()
		return lexer.Token{Terminal: grammar.Terminal(r), Lexeme: lexeme, Pos: pos}, true

	case 3:
		l.state = 0
		l.in.Retract()
		lexeme, pos := l.in.Lexeme()
		return lexer.Token{Terminal: grammar.Terminal("num"), Lexeme: lexeme, Pos: pos}, true

	case 5:
		l.state = 0
		l.in.Retract()
		lexeme, pos := l.in.Lexeme()
		return lexer.Token{Terminal: grammar.Terminal("id"), Lexeme: lexeme, Pos: pos}, true
	}

	return lexer.Token{}, false
}

// finalizeDFA is called after all inputs have been processed by the DFA.
// It generates the final token based on the current state of the lexer.
func (l *ExprLexer) finalizeDFA() (lexer.Token, error) {
	lexeme, pos := l.in.Lexeme()

	switch l.state {
	case 2:
		l.state = 0
		return lexer.Token{Terminal: grammar.Terminal("num"), Lexeme: lexeme, Pos: pos}, nil
	case 4:
		l.state = 0
		return lexer.Token{Terminal: grammar.Terminal("id"), Lexeme: lexeme, Pos: pos}, nil
	default:
		return lexer.Token{}, io.EOF
	}
}

func main() {
	src := strings.NewReader(`
		(price + tax * quantity) * 
			(discount + shipping) * 
		(weight + volume) + total
	`)

	l, err := NewExprLexer(src)
	if err != nil {
		panic(err)
	}

	G := grammar.NewCFG(
		[]grammar.Terminal{"+", "*", "(", ")", "id"},
		[]grammar.NonTerminal{"E", "E′", "T", "T′", "F"},
		[]*grammar.Production{
			{Head: "E", Body: grammar.String[grammar.Symbol]{grammar.NonTerminal("T"), grammar.NonTerminal("E′")}},                         // E → T E′
			{Head: "E′", Body: grammar.String[grammar.Symbol]{grammar.Terminal("+"), grammar.NonTerminal("T"), grammar.NonTerminal("E′")}}, // E′ → + T E′
			{Head: "E′", Body: grammar.E}, // E′ → ε
			{Head: "T", Body: grammar.String[grammar.Symbol]{grammar.NonTerminal("F"), grammar.NonTerminal("T′")}},                         // T → F T′
			{Head: "T′", Body: grammar.String[grammar.Symbol]{grammar.Terminal("*"), grammar.NonTerminal("F"), grammar.NonTerminal("T′")}}, // T′ → * F T′
			{Head: "T′", Body: grammar.E}, // T′ → ε
			{Head: "F", Body: grammar.String[grammar.Symbol]{grammar.Terminal("("), grammar.NonTerminal("E"), grammar.Terminal(")")}}, // F → ( E )
			{Head: "F", Body: grammar.String[grammar.Symbol]{grammar.Terminal("id")}},                                                 // F → id
		},
		"E",
	)

	p := predictive.New(G, l)

	err = p.Parse(
		func(token *lexer.Token) error {
			fmt.Printf("Token: %s\n", token)
			return nil
		},
		func(prod *grammar.Production) error {
			fmt.Printf("Production: %s\n", prod)
			return nil
		},
	)

	if err != nil {
		panic(err)
	}
}

Example (ParseAndBuildAST) ¶

You can copy-paste the output of this example into https://edotor.net to view the result.

package main

import (
	"fmt"
	"io"
	"strings"

	"github.com/moorara/algo/grammar"
	"github.com/moorara/algo/lexer"
	"github.com/moorara/algo/lexer/input"
	"github.com/moorara/algo/parser"
	"github.com/moorara/algo/parser/predictive"
)

// ExprLexer is an implementation of lexer.Lexer for basic math expressions.
type ExprLexer struct {
	in    *input.Input
	state int
}

func NewExprLexer(src io.Reader) (lexer.Lexer, error) {
	in, err := input.New("expression", src, 4096)
	if err != nil {
		return nil, err
	}

	return &ExprLexer{
		in: in,
	}, nil
}

func (l *ExprLexer) NextToken() (lexer.Token, error) {
	var r rune
	var err error

	for r, err = l.in.Next(); err == nil; r, err = l.in.Next() {
		if token, ok := l.advanceDFA(r); ok {
			return token, nil
		}
	}

	if err == io.EOF {
		return l.finalizeDFA()
	}

	return lexer.Token{}, err
}

// advanceDFA simulates a deterministic finite automata to identify tokens.
func (l *ExprLexer) advanceDFA(r rune) (lexer.Token, bool) {

	switch l.state {
	case 0:
		switch r {
		case ' ', '\t', '\n':
			l.state = 0
		case '+', '-', '*', '/', '(', ')':
			l.state = 1
		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
			l.state = 2
		case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z':
			l.state = 4
		}

	case 2:
		switch r {
		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
			l.state = 2
		case ' ', '\t', '\n',
			'+', '-', '*', '/', '(', ')',
			'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z':
			l.state = 3
		}

	case 4:
		switch r {
		case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z':
			l.state = 4
		case ' ', '\t', '\n',
			'+', '-', '*', '/', '(', ')',
			'0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
			l.state = 5
		}

	default:
		panic("WTF?")
	}

	switch l.state {
	case 0:
		l.in.Skip()

	case 1:
		l.state = 0
		lexeme, pos := l.in.Lexeme()
		return lexer.Token{Terminal: grammar.Terminal(r), Lexeme: lexeme, Pos: pos}, true

	case 3:
		l.state = 0
		l.in.Retract()
		lexeme, pos := l.in.Lexeme()
		return lexer.Token{Terminal: grammar.Terminal("num"), Lexeme: lexeme, Pos: pos}, true

	case 5:
		l.state = 0
		l.in.Retract()
		lexeme, pos := l.in.Lexeme()
		return lexer.Token{Terminal: grammar.Terminal("id"), Lexeme: lexeme, Pos: pos}, true
	}

	return lexer.Token{}, false
}

// finalizeDFA is called after all inputs have been processed by the DFA.
// It generates the final token based on the current state of the lexer.
func (l *ExprLexer) finalizeDFA() (lexer.Token, error) {
	lexeme, pos := l.in.Lexeme()

	switch l.state {
	case 2:
		l.state = 0
		return lexer.Token{Terminal: grammar.Terminal("num"), Lexeme: lexeme, Pos: pos}, nil
	case 4:
		l.state = 0
		return lexer.Token{Terminal: grammar.Terminal("id"), Lexeme: lexeme, Pos: pos}, nil
	default:
		return lexer.Token{}, io.EOF
	}
}

func main() {
	src := strings.NewReader(`
		(price + tax * quantity) * 
			(discount + shipping) * 
		(weight + volume) + total
	`)

	l, err := NewExprLexer(src)
	if err != nil {
		panic(err)
	}

	G := grammar.NewCFG(
		[]grammar.Terminal{"+", "*", "(", ")", "id"},
		[]grammar.NonTerminal{"E", "E′", "T", "T′", "F"},
		[]*grammar.Production{
			{Head: "E", Body: grammar.String[grammar.Symbol]{grammar.NonTerminal("T"), grammar.NonTerminal("E′")}},                         // E → T E′
			{Head: "E′", Body: grammar.String[grammar.Symbol]{grammar.Terminal("+"), grammar.NonTerminal("T"), grammar.NonTerminal("E′")}}, // E′ → + T E′
			{Head: "E′", Body: grammar.E}, // E′ → ε
			{Head: "T", Body: grammar.String[grammar.Symbol]{grammar.NonTerminal("F"), grammar.NonTerminal("T′")}},                         // T → F T′
			{Head: "T′", Body: grammar.String[grammar.Symbol]{grammar.Terminal("*"), grammar.NonTerminal("F"), grammar.NonTerminal("T′")}}, // T′ → * F T′
			{Head: "T′", Body: grammar.E}, // T′ → ε
			{Head: "F", Body: grammar.String[grammar.Symbol]{grammar.Terminal("("), grammar.NonTerminal("E"), grammar.Terminal(")")}}, // F → ( E )
			{Head: "F", Body: grammar.String[grammar.Symbol]{grammar.Terminal("id")}},                                                 // F → id
		},
		"E",
	)

	p := predictive.New(G, l)

	root, err := p.ParseAndBuildAST()
	if err != nil {
		panic(err)
	}

	n := root.(*parser.InternalNode)
	fmt.Println(n.DOT())
}

Index ¶

func New(G *grammar.CFG, lexer lexer.Lexer) parser.Parser
type ParsingTable
- func BuildParsingTable(G *grammar.CFG) (*ParsingTable, error)
- func NewParsingTable(terminals []grammar.Terminal, nonTerminals []grammar.NonTerminal) *ParsingTable

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func New ¶

func New(G *grammar.CFG, lexer lexer.Lexer) parser.Parser

New creates a new predictive parser for a given context-free grammar (CFG). It requires a lexer for lexical analysis, which reads the input tokens (terminal symbols).

Types ¶

type ParsingTable ¶

type ParsingTable struct {
	// contains filtered or unexported fields
}

ParsingTable represents a parsing table for a predictive parser.

func BuildParsingTable ¶

func BuildParsingTable(G *grammar.CFG) (*ParsingTable, error)

BuildParsingTable constructs a parsing table for a predictive parser.

This method constructs a parsing table for any context-free grammar. To identify errors in the table, use the Conflicts method. Some errors may be resolved by eliminating left recursion and applying left factoring to the grammar. However, certain grammars cannot be transformed into LL(1) even after these transformations. Some languages may have no LL(1) grammar at all.

func NewParsingTable ¶ added in v0.10.5

func NewParsingTable(terminals []grammar.Terminal, nonTerminals []grammar.NonTerminal) *ParsingTable

NewParsingTable creates an empty parsing table for a predictive parser.

func (*ParsingTable) Conflicts ¶ added in v0.10.6

func (t *ParsingTable) Conflicts() error

Conflicts checks for conflicts in the parsing table. If there are multiple productions for at least one combination of non-terminal A and terminal a, the method returns an error containing details about the conflicting productions. If no conflicts are found, it returns nil.

func (*ParsingTable) Equal ¶ added in v0.10.5

func (t *ParsingTable) Equal(rhs *ParsingTable) bool

Equal determines whether or not two parsing tables are the same.

func (*ParsingTable) GetProduction ¶ added in v0.10.2

func (t *ParsingTable) GetProduction(A grammar.NonTerminal, a grammar.Terminal) (*grammar.Production, bool)

GetProduction returns the single production from the M[A,a] entry if exactly one production exists. It returns the production and true if successful, or a default value and false otherwise.

func (*ParsingTable) IsEmpty ¶ added in v0.10.2

func (t *ParsingTable) IsEmpty(A grammar.NonTerminal, a grammar.Terminal) bool

IsEmpty returns true if there are no productions in the M[A,a] entry.

func (*ParsingTable) IsSync ¶ added in v0.10.2

func (t *ParsingTable) IsSync(A grammar.NonTerminal, a grammar.Terminal) bool

IsSync returns true if the M[A,a] entry is marked as a synchronization symbol for non-terminal A.

func (*ParsingTable) String ¶ added in v0.10.5

func (t *ParsingTable) String() string

String returns a human-readable string representation of the parsing table.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL