Documentation
¶
Index ¶
- type ASTFeatureExtractor
- func (a *ASTFeatureExtractor) ExtractFeatures(ast *apted.TreeNode) ([]string, error)
- func (a *ASTFeatureExtractor) ExtractNodeSequences(ast *apted.TreeNode, k int) ([]string, error)
- func (a *ASTFeatureExtractor) ExtractSubtreeHashes(ast *apted.TreeNode, maxHeight int) ([]string, error)
- func (a *ASTFeatureExtractor) WithOptions(maxHeight, k int, includeTypes, includeLiterals bool) *ASTFeatureExtractor
- func (a *ASTFeatureExtractor) WithPatterns(patterns []string) *ASTFeatureExtractor
- type CentroidGrouping
- type CompleteLinkageGrouping
- type ConnectedGrouping
- type FeatureExtractor
- type GroupableItem
- type GroupingConfig
- type GroupingMode
- type GroupingStrategy
- type ItemGroup
- type ItemPair
- type KCoreGrouping
- type StarMedoidGrouping
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ASTFeatureExtractor ¶
type ASTFeatureExtractor struct {
// PatternNames is the list of node type names to check for structural patterns.
// Language-specific: set this to match your AST node types.
// If nil, no pattern features are extracted.
PatternNames []string
// contains filtered or unexported fields
}
ASTFeatureExtractor implements FeatureExtractor for TreeNode.
func NewASTFeatureExtractor ¶
func NewASTFeatureExtractor() *ASTFeatureExtractor
NewASTFeatureExtractor creates a feature extractor with sensible defaults.
func (*ASTFeatureExtractor) ExtractFeatures ¶
func (a *ASTFeatureExtractor) ExtractFeatures(ast *apted.TreeNode) ([]string, error)
ExtractFeatures builds a mixed set of features from the tree.
func (*ASTFeatureExtractor) ExtractNodeSequences ¶
ExtractNodeSequences returns k-grams from pre-order traversal labels.
func (*ASTFeatureExtractor) ExtractSubtreeHashes ¶
func (a *ASTFeatureExtractor) ExtractSubtreeHashes(ast *apted.TreeNode, maxHeight int) ([]string, error)
ExtractSubtreeHashes computes bottom-up hashes of subtrees up to maxHeight.
func (*ASTFeatureExtractor) WithOptions ¶
func (a *ASTFeatureExtractor) WithOptions(maxHeight, k int, includeTypes, includeLiterals bool) *ASTFeatureExtractor
WithOptions allows overriding defaults.
func (*ASTFeatureExtractor) WithPatterns ¶
func (a *ASTFeatureExtractor) WithPatterns(patterns []string) *ASTFeatureExtractor
WithPatterns sets the pattern names for structural feature extraction.
type CentroidGrouping ¶
type CentroidGrouping[T GroupableItem] struct { // contains filtered or unexported fields }
CentroidGrouping starts with the highest-similarity pair, forms a group, then expands by adding items that have similarity >= threshold to ALL existing group members.
func (*CentroidGrouping[T]) GroupItems ¶
func (cg *CentroidGrouping[T]) GroupItems(pairs []*ItemPair[T]) []*ItemGroup[T]
func (*CentroidGrouping[T]) Name ¶
func (cg *CentroidGrouping[T]) Name() string
type CompleteLinkageGrouping ¶
type CompleteLinkageGrouping[T GroupableItem] struct { // contains filtered or unexported fields }
CompleteLinkageGrouping finds maximal cliques using the Bron-Kerbosch algorithm with pivoting. Each clique becomes a group. If an item appears in multiple cliques, it is assigned to the largest one.
func (*CompleteLinkageGrouping[T]) GroupItems ¶
func (cl *CompleteLinkageGrouping[T]) GroupItems(pairs []*ItemPair[T]) []*ItemGroup[T]
func (*CompleteLinkageGrouping[T]) Name ¶
func (cl *CompleteLinkageGrouping[T]) Name() string
type ConnectedGrouping ¶
type ConnectedGrouping[T GroupableItem] struct { // contains filtered or unexported fields }
ConnectedGrouping groups items by connected components using Union-Find.
func (*ConnectedGrouping[T]) GroupItems ¶
func (c *ConnectedGrouping[T]) GroupItems(pairs []*ItemPair[T]) []*ItemGroup[T]
func (*ConnectedGrouping[T]) Name ¶
func (c *ConnectedGrouping[T]) Name() string
type FeatureExtractor ¶
type FeatureExtractor interface {
ExtractFeatures(ast *apted.TreeNode) ([]string, error)
ExtractSubtreeHashes(ast *apted.TreeNode, maxHeight int) ([]string, error)
ExtractNodeSequences(ast *apted.TreeNode, k int) ([]string, error)
}
FeatureExtractor converts AST trees into feature sets for Jaccard similarity.
type GroupableItem ¶
type GroupableItem interface {
ItemID() int
ItemKey() string // Sorting key: "filepath|startLine|endLine|startCol|endCol"
}
GroupableItem represents an item that can be grouped (e.g. CodeFragment or domain.Clone).
type GroupingConfig ¶
type GroupingConfig struct {
Mode GroupingMode
Threshold float64
KCoreK int
}
GroupingConfig holds configuration for the grouping strategy.
type GroupingMode ¶
type GroupingMode string
GroupingMode selects the grouping algorithm.
const ( ModeConnected GroupingMode = "connected" ModeKCore GroupingMode = "k_core" ModeStarMedoid GroupingMode = "star_medoid" ModeCompleteLinkage GroupingMode = "complete_linkage" ModeCentroid GroupingMode = "centroid" )
type GroupingStrategy ¶
type GroupingStrategy[T GroupableItem] interface { GroupItems(pairs []*ItemPair[T]) []*ItemGroup[T] Name() string }
GroupingStrategy is the interface for grouping algorithms.
func NewGroupingStrategy ¶
func NewGroupingStrategy[T GroupableItem](config GroupingConfig) GroupingStrategy[T]
NewGroupingStrategy returns the appropriate strategy based on config.Mode.
type ItemGroup ¶
type ItemGroup[T GroupableItem] struct { ID int Items []T GroupType int Similarity float64 }
ItemGroup represents a grouping result.
type ItemPair ¶
type ItemPair[T GroupableItem] struct { Item1 T Item2 T Similarity float64 PairType int // Clone type (1-4) }
ItemPair represents a pair of items with similarity information.
type KCoreGrouping ¶
type KCoreGrouping[T GroupableItem] struct { // contains filtered or unexported fields }
KCoreGrouping finds k-core subgraphs and groups their connected components.
func (*KCoreGrouping[T]) GroupItems ¶
func (kc *KCoreGrouping[T]) GroupItems(pairs []*ItemPair[T]) []*ItemGroup[T]
func (*KCoreGrouping[T]) Name ¶
func (kc *KCoreGrouping[T]) Name() string
type StarMedoidGrouping ¶
type StarMedoidGrouping[T GroupableItem] struct { // contains filtered or unexported fields }
StarMedoidGrouping builds groups by iteratively selecting the item with the highest average similarity to its neighbors (the medoid), grouping it with its neighbors, and repeating.
func (*StarMedoidGrouping[T]) GroupItems ¶
func (s *StarMedoidGrouping[T]) GroupItems(pairs []*ItemPair[T]) []*ItemGroup[T]
func (*StarMedoidGrouping[T]) Name ¶
func (s *StarMedoidGrouping[T]) Name() string