Documentation
¶
Overview ¶
Package extract performs deterministic structural extraction from source code using tree-sitter.
Index ¶
- func AddLogMessage(nodes []types.Node, nid, msg string)
- func AddThrowMessage(nodes []types.Node, nid, msg string)
- func Extract(paths []string, root string) *types.ExtractionResult
- func ExtractBlade(path string) *types.ExtractionResult
- func ExtractDart(path string) *types.ExtractionResult
- func ExtractElixir(path string) *types.ExtractionResult
- func ExtractGeneric(path string, config *LanguageConfig) *types.ExtractionResult
- func ExtractGo(path string) *types.ExtractionResult
- func ExtractJS(path string) *types.ExtractionResult
- func ExtractJulia(path string) *types.ExtractionResult
- func ExtractObjC(path string) *types.ExtractionResult
- func ExtractPowerShell(path string) *types.ExtractionResult
- func ExtractPython(path string) *types.ExtractionResult
- func ExtractRust(path string) *types.ExtractionResult
- func ExtractTS(path string) *types.ExtractionResult
- func ExtractVerilog(path string) *types.ExtractionResult
- func ExtractZig(path string) *types.ExtractionResult
- func FileStem(path string) string
- func MakeID(parts ...string) string
- func ReadText(source []byte, startByte, endByte uint32) string
- func SetComment(nodes []types.Node, nid, comment string)
- func SourceLoc(line int) string
- func TagNode(nodes []types.Node, nid, tag string)
- type BehaviorConfig
- type CallResolveFn
- type ExtraWalkFn
- type Extractor
- type GenericContext
- func (ctx *GenericContext) AddEdge(src, tgt, relation string, line int)
- func (ctx *GenericContext) AddExternalNode(nid, label string)
- func (ctx *GenericContext) AddNode(nid, label string, line int)
- func (ctx *GenericContext) AddNodeWithDecl(nid, label string, line int, declNode *ts.Node)
- func (ctx *GenericContext) RT(n *ts.Node) string
- type ImportHandler
- type InheritanceFn
- type LanguageConfig
- func CConfig() *LanguageConfig
- func CSharpConfig() *LanguageConfig
- func CppConfig() *LanguageConfig
- func JavaConfig() *LanguageConfig
- func KotlinConfig() *LanguageConfig
- func LuaConfig() *LanguageConfig
- func PHPConfig() *LanguageConfig
- func RubyConfig() *LanguageConfig
- func ScalaConfig() *LanguageConfig
- func SwiftConfig() *LanguageConfig
- type ResolveFuncNameFn
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func AddLogMessage ¶
AddLogMessage appends a log message string to a node.
func AddThrowMessage ¶
AddThrowMessage appends a throw/panic/raise message string to a node.
func Extract ¶
func Extract(paths []string, root string) *types.ExtractionResult
Extract runs AST extraction on a list of file paths and merges results. It performs cross-file call resolution after all files are processed. The root parameter is the project root directory, used for cache keying.
Files are processed in batches to bound peak memory usage. Each batch's results are flushed to a temporary file, then all batches are merged from disk for the final cross-file resolution pass.
func ExtractBlade ¶
func ExtractBlade(path string) *types.ExtractionResult
ExtractBlade extracts includes, livewire components, and wire bindings from a .blade.php file.
func ExtractDart ¶
func ExtractDart(path string) *types.ExtractionResult
ExtractDart extracts classes, functions, and imports from a .dart file using regex.
func ExtractElixir ¶
func ExtractElixir(path string) *types.ExtractionResult
func ExtractGeneric ¶
func ExtractGeneric(path string, config *LanguageConfig) *types.ExtractionResult
ExtractGeneric is a config-driven AST extractor that handles most languages.
func ExtractGo ¶
func ExtractGo(path string) *types.ExtractionResult
ExtractGo extracts functions, methods, type declarations, and imports from a .go file.
func ExtractJS ¶
func ExtractJS(path string) *types.ExtractionResult
ExtractJS extracts classes, functions, imports, and call graphs from a .js/.jsx/.mjs file.
func ExtractJulia ¶
func ExtractJulia(path string) *types.ExtractionResult
func ExtractObjC ¶
func ExtractObjC(path string) *types.ExtractionResult
func ExtractPowerShell ¶
func ExtractPowerShell(path string) *types.ExtractionResult
func ExtractPython ¶
func ExtractPython(path string) *types.ExtractionResult
ExtractPython extracts classes, functions, imports, and call graphs from a .py file.
func ExtractRust ¶
func ExtractRust(path string) *types.ExtractionResult
ExtractRust extracts structs, impl blocks, traits, functions, use statements from a .rs file.
func ExtractTS ¶
func ExtractTS(path string) *types.ExtractionResult
ExtractTS extracts from .ts/.tsx files using the TypeScript grammar.
func ExtractVerilog ¶
func ExtractVerilog(path string) *types.ExtractionResult
func ExtractZig ¶
func ExtractZig(path string) *types.ExtractionResult
func FileStem ¶
FileStem returns a stem qualified with the parent directory name to avoid ID collisions when multiple files share the same filename in different directories.
func SetComment ¶
SetComment sets the comment/docstring on a node identified by nid. Only sets if the node's comment is currently empty.
Types ¶
type BehaviorConfig ¶
type BehaviorConfig struct {
ThrowNodeTypes []string // Node types that indicate throwing (e.g., "throw_statement")
CatchNodeTypes []string // Node types that indicate error handling (e.g., "catch_clause")
ThrowCallNames []string // Bare function calls that indicate throwing (e.g., "exit", "abort")
LogCallNames []string // Bare function calls that indicate logging (e.g., "printf")
LogObjects []string // Object names whose method calls indicate logging (e.g., "Console")
ExecCallNames []string // Bare function calls that run external processes (e.g., "system")
FSCallNames []string // Bare function calls for file system access (e.g., "fopen")
FSObjects []string // Object prefixes for FS access (e.g., "File.")
NetObjects []string // Object prefixes for network access (e.g., "HttpClient.")
OtelCallNames []string // Bare function calls for observability (e.g., "Counter", "Gauge")
OtelObjects []string // Object prefixes for observability (e.g., "Tracer.", "prometheus.")
AsyncNodeTypes []string // Node types indicating async (e.g., "await_expression")
UnsafeNodeTypes []string // Node types indicating unsafe code (e.g., "unsafe_block")
}
BehaviorConfig declares patterns for tagging function nodes with behavioral labels.
type CallResolveFn ¶
CallResolveFn handles language-specific call resolution in walk_calls. Returns (calleeName, isMemberCall, handled).
type ExtraWalkFn ¶
type ExtraWalkFn func(ctx *GenericContext, node *ts.Node, parentClassNID string) bool
ExtraWalkFn handles language-specific node types during the generic walk. Returns true if the node was handled.
type Extractor ¶
type Extractor func(path string) *types.ExtractionResult
Extractor is a function that extracts nodes and edges from a single file.
type GenericContext ¶
type GenericContext struct {
Lang *ts.Language
Config *LanguageConfig
Source []byte
Stem string
StrPath string
FileNID string
Nodes *[]types.Node
Edges *[]types.Edge
SeenIDs map[string]bool
FunctionBodies *[]bodyEntry
}
GenericContext holds mutable state during generic extraction.
func (*GenericContext) AddEdge ¶
func (ctx *GenericContext) AddEdge(src, tgt, relation string, line int)
AddEdge adds an edge.
func (*GenericContext) AddExternalNode ¶
func (ctx *GenericContext) AddExternalNode(nid, label string)
AddExternalNode adds a node that may come from outside the current file (e.g., base classes).
func (*GenericContext) AddNode ¶
func (ctx *GenericContext) AddNode(nid, label string, line int)
AddNode adds a node if not already seen.
func (*GenericContext) AddNodeWithDecl ¶
func (ctx *GenericContext) AddNodeWithDecl(nid, label string, line int, declNode *ts.Node)
AddNodeWithDecl adds a node and extracts any preceding comment.
type ImportHandler ¶
type ImportHandler func(node *ts.Node, lang *ts.Language, source []byte, fileNID, stem, strPath string, edges *[]types.Edge)
ImportHandler processes an import node and appends edges.
type InheritanceFn ¶
type InheritanceFn func(ctx *GenericContext, classNode *ts.Node, classNID string, line int)
InheritanceFn handles language-specific inheritance extraction for class nodes.
type LanguageConfig ¶
type LanguageConfig struct {
// Language grammar loaded via grammars package.
Lang *ts.Language
// AST node types.
ClassTypes map[string]bool
FunctionTypes map[string]bool
ImportTypes map[string]bool
CallTypes map[string]bool
// Name extraction.
NameField string
NameFallbackChildTypes []string
// Body detection.
BodyField string
BodyFallbackChildTypes []string
// Call name extraction.
CallFunctionField string
CallArgumentsField string
CallAccessorNodeTypes map[string]bool
CallAccessorField string
FunctionBoundaryTypes map[string]bool
// Whether to add "()" to function labels.
FunctionLabelParens bool
// Optional handlers.
ImportHandler ImportHandler
ResolveFuncNameFn ResolveFuncNameFn
ExtraWalkFn ExtraWalkFn
CallResolveFn CallResolveFn
InheritanceFn InheritanceFn
// Behavioral tagging config.
Behavior *BehaviorConfig
}
LanguageConfig drives the generic AST extractor for a given language.
func CConfig ¶
func CConfig() *LanguageConfig
func CSharpConfig ¶
func CSharpConfig() *LanguageConfig
func CppConfig ¶
func CppConfig() *LanguageConfig
func JavaConfig ¶
func JavaConfig() *LanguageConfig
func KotlinConfig ¶
func KotlinConfig() *LanguageConfig
func LuaConfig ¶
func LuaConfig() *LanguageConfig
func PHPConfig ¶
func PHPConfig() *LanguageConfig
func RubyConfig ¶
func RubyConfig() *LanguageConfig
func ScalaConfig ¶
func ScalaConfig() *LanguageConfig
func SwiftConfig ¶
func SwiftConfig() *LanguageConfig