parser

package
v0.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 5, 2025 License: MIT Imports: 8 Imported by: 0

Documentation

Overview

Package parser provides a parser for the M28 programming language.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type FormatSpec

type FormatSpec struct {
	HasSpec    bool
	Conversion string // 'r', 's', or 'a'
	Fill       rune
	Align      rune // '<', '>', '^', '='
	Sign       rune // '+', '-', ' '
	Alt        bool // '#' flag
	Zero       bool // '0' flag
	Width      int
	Precision  int  // -1 if not specified
	Type       rune // 'b', 'd', 'f', 's', etc.
	Debug      bool // '=' suffix for self-documenting
}

FormatSpec represents a format specification like :03.2f

type Interpolation added in v0.3.0

type Interpolation struct {
	Type     InterpolationType
	Expr     core.Value
	Position int // Position in the template for error reporting
}

Interpolation represents a single interpolation in an s-string

type InterpolationType added in v0.3.0

type InterpolationType string

InterpolationType represents the type of interpolation in s-strings

const (
	InterpValue      InterpolationType = "value"       // {x} - insert value
	InterpCode       InterpolationType = "code"        // {=x} - insert code/symbol
	InterpSplice     InterpolationType = "splice"      // {*x} - splice sequence
	InterpDictSplice InterpolationType = "dict-splice" // {**x} - splice dict
	InterpGensym     InterpolationType = "gensym"      // {x#} - auto-gensym
)

type ParseError added in v0.3.0

type ParseError struct {
	Message  string
	Line     int
	Col      int
	Lexeme   string
	Source   string // Full source code for context
	Filename string
}

ParseError represents a parsing error with source location info

func (*ParseError) Error added in v0.3.0

func (e *ParseError) Error() string

func (*ParseError) FormatWithContext added in v0.3.0

func (e *ParseError) FormatWithContext() string

FormatWithContext returns the error with source code context

type Parser

type Parser struct {
	// contains filtered or unexported fields
}

Parser parses M28 code into expressions

func NewParser

func NewParser() *Parser

NewParser creates a new parser

func (*Parser) Parse

func (p *Parser) Parse(input string) (core.Value, error)

Parse parses a string into a value

func (*Parser) ParseToAST added in v0.3.0

func (p *Parser) ParseToAST(input string) (ast.ASTNode, *core.IRMetadata, error)

ParseToAST parses input and returns an AST node with metadata This is the new AST-based parsing path (Phase 2)

func (*Parser) SetFilename

func (p *Parser) SetFilename(filename string)

SetFilename sets the filename for error reporting

type PythonParser added in v0.3.0

type PythonParser struct {
	// contains filtered or unexported fields
}

PythonParser implements a recursive descent parser for Python syntax

func NewPythonParser added in v0.3.0

func NewPythonParser(tokens []Token) *PythonParser

NewPythonParser creates a new Python parser from a token stream

func (*PythonParser) Parse added in v0.3.0

func (p *PythonParser) Parse() ([]ast.ASTNode, error)

Parse parses the token stream into a list of AST nodes

type PythonTokenizer added in v0.3.0

type PythonTokenizer struct {
	// contains filtered or unexported fields
}

PythonTokenizer performs lexical analysis on Python source code Key differences from M28 tokenizer: 1. Indentation-aware (generates INDENT/DEDENT tokens) 2. Newlines are significant 3. Tracks indentation levels with a stack

func NewPythonTokenizer added in v0.3.0

func NewPythonTokenizer(input string) *PythonTokenizer

NewPythonTokenizer creates a new Python tokenizer

func (*PythonTokenizer) Tokenize added in v0.3.0

func (t *PythonTokenizer) Tokenize() ([]Token, error)

Tokenize performs lexical analysis and returns all tokens

type Token added in v0.3.0

type Token struct {
	Type     TokenType  // Token type
	Lexeme   string     // Original text
	Value    core.Value // Parsed value (for literals)
	Line     int        // Source line number (1-indexed)
	Col      int        // Source column number (1-indexed)
	StartPos int        // Absolute character position (start)
	EndPos   int        // Absolute character position (end)
}

Token represents a lexical token

func (Token) IsKeyword added in v0.3.0

func (t Token) IsKeyword() bool

IsKeyword returns true if the token is a keyword

func (Token) IsLiteral added in v0.3.0

func (t Token) IsLiteral() bool

IsLiteral returns true if the token is a literal value

func (Token) IsOperator added in v0.3.0

func (t Token) IsOperator() bool

IsOperator returns true if the token is an operator

func (Token) String added in v0.3.0

func (t Token) String() string

String returns a string representation of the token

type TokenType added in v0.3.0

type TokenType int

TokenType represents the type of a token

const (
	// Literals
	TOKEN_NUMBER  TokenType = iota // 42, 3.14, 0x1A
	TOKEN_STRING                   // "hello", 'world'
	TOKEN_FSTRING                  // f"hello {name}"
	TOKEN_SSTRING                  // s"(+ {x} 1)", rs"..."
	TOKEN_TRUE                     // True, true
	TOKEN_FALSE                    // False, false
	TOKEN_NIL                      // None, nil

	// Identifiers/Symbols
	TOKEN_IDENTIFIER // foo, bar, my_var

	// Arithmetic Operators
	TOKEN_PLUS         // +
	TOKEN_MINUS        // -
	TOKEN_STAR         // *
	TOKEN_SLASH        // /
	TOKEN_DOUBLE_SLASH // //
	TOKEN_PERCENT      // %
	TOKEN_DOUBLE_STAR  // **

	// Comparison Operators
	TOKEN_EQ // ==
	TOKEN_NE // !=
	TOKEN_LT // <
	TOKEN_GT // >
	TOKEN_LE // <=
	TOKEN_GE // >=

	// Bitwise Operators
	TOKEN_AMPERSAND // &
	TOKEN_PIPE      // |
	TOKEN_CARET     // ^
	TOKEN_DOUBLE_LT // <<
	TOKEN_DOUBLE_GT // >>
	TOKEN_TILDE     // ~ (bitwise NOT)

	// Logical Operators (keywords)
	TOKEN_AND    // and
	TOKEN_OR     // or
	TOKEN_NOT    // not
	TOKEN_IN     // in
	TOKEN_NOT_IN // not in
	TOKEN_IS     // is
	TOKEN_IS_NOT // is not

	// Definition keyword
	TOKEN_DEF // def

	// Assignment Operators
	TOKEN_ASSIGN          // =
	TOKEN_PLUS_EQ         // +=
	TOKEN_MINUS_EQ        // -=
	TOKEN_STAR_EQ         // *=
	TOKEN_SLASH_EQ        // /=
	TOKEN_DOUBLE_SLASH_EQ // //=
	TOKEN_PERCENT_EQ      // %=
	TOKEN_DOUBLE_STAR_EQ  // **=
	TOKEN_AMPERSAND_EQ    // &=
	TOKEN_PIPE_EQ         // |=
	TOKEN_CARET_EQ        // ^=
	TOKEN_DOUBLE_LT_EQ    // <<=
	TOKEN_DOUBLE_GT_EQ    // >>=
	TOKEN_AT_EQ           // @=

	// Delimiters
	TOKEN_LPAREN     // (
	TOKEN_RPAREN     // )
	TOKEN_LBRACKET   // [
	TOKEN_RBRACKET   // ]
	TOKEN_LBRACE     // {
	TOKEN_RBRACE     // }
	TOKEN_COMMA      // ,
	TOKEN_DOT        // .
	TOKEN_COLON      // :
	TOKEN_COLONEQUAL // := (walrus operator)
	TOKEN_SEMICOLON  // ;

	// Special/Reader Macros
	TOKEN_BACKTICK      // ` (quasiquote)
	TOKEN_COMMA_UNQUOTE // , (when used as unquote in macro context)
	TOKEN_COMMA_AT      // ,@ (unquote-splicing)
	TOKEN_AT            // @ (decorators)

	// Python-specific keywords
	TOKEN_CLASS    // class
	TOKEN_IF       // if
	TOKEN_ELIF     // elif
	TOKEN_ELSE     // else
	TOKEN_FOR      // for
	TOKEN_WHILE    // while
	TOKEN_BREAK    // break
	TOKEN_CONTINUE // continue
	TOKEN_RETURN   // return
	TOKEN_PASS     // pass
	TOKEN_IMPORT   // import
	TOKEN_FROM     // from
	TOKEN_AS       // as
	TOKEN_TRY      // try
	TOKEN_EXCEPT   // except
	TOKEN_FINALLY  // finally
	TOKEN_RAISE    // raise
	TOKEN_WITH     // with
	TOKEN_LAMBDA   // lambda
	TOKEN_YIELD    // yield
	TOKEN_ASYNC    // async
	TOKEN_AWAIT    // await
	TOKEN_ASSERT   // assert
	TOKEN_DEL      // del
	TOKEN_GLOBAL   // global
	TOKEN_NONLOCAL // nonlocal
	TOKEN_MATCH    // match (Python 3.10+)
	TOKEN_CASE     // case (Python 3.10+)

	// Python-specific operators/tokens
	TOKEN_ARROW    // -> (type hint arrow)
	TOKEN_ELLIPSIS // ... (Ellipsis)
	TOKEN_BANG     // ! (not standard Python, but useful)

	// Indentation tokens (Python-specific)
	TOKEN_INDENT // Increase indentation level
	TOKEN_DEDENT // Decrease indentation level

	// Augmented assignment tokens (normalized names)
	TOKEN_PLUS_ASSIGN        // +=
	TOKEN_MINUS_ASSIGN       // -=
	TOKEN_STAR_ASSIGN        // *=
	TOKEN_SLASH_ASSIGN       // /=
	TOKEN_DOUBLESLASH        // //
	TOKEN_DOUBLESLASH_ASSIGN // //=
	TOKEN_PERCENT_ASSIGN     // %=
	TOKEN_DOUBLESTAR         // **
	TOKEN_DOUBLESTAR_ASSIGN  // **=
	TOKEN_AMPERSAND_ASSIGN   // &=
	TOKEN_PIPE_ASSIGN        // |=
	TOKEN_CARET_ASSIGN       // ^=
	TOKEN_LSHIFT             // <<
	TOKEN_LSHIFT_ASSIGN      // <<=
	TOKEN_RSHIFT             // >>
	TOKEN_RSHIFT_ASSIGN      // >>=

	// Comparison operators (normalized names)
	TOKEN_EQUALEQUAL   // ==
	TOKEN_NOTEQUAL     // !=
	TOKEN_LESS         // <
	TOKEN_GREATER      // >
	TOKEN_LESSEQUAL    // <=
	TOKEN_GREATEREQUAL // >=

	// Meta tokens
	TOKEN_NEWLINE // Significant in REPL and Python
	TOKEN_COMMENT // # comment
	TOKEN_EOF     // End of file/input
	TOKEN_ERROR   // Lexical error
)

func (TokenType) String added in v0.3.0

func (tt TokenType) String() string

String returns the name of the token type

type Tokenizer added in v0.3.0

type Tokenizer struct {
	// contains filtered or unexported fields
}

Tokenizer performs lexical analysis on M28 source code

func NewTokenizer added in v0.3.0

func NewTokenizer(input string) *Tokenizer

NewTokenizer creates a new tokenizer for the given input

func (*Tokenizer) Tokenize added in v0.3.0

func (t *Tokenizer) Tokenize() ([]Token, error)

Tokenize performs lexical analysis and returns all tokens

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL