clone

package
v0.1.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 15, 2026 License: MIT Imports: 6 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ASTFeatureExtractor

type ASTFeatureExtractor struct {

	// PatternNames is the list of node type names to check for structural patterns.
	// Language-specific: set this to match your AST node types.
	// If nil, no pattern features are extracted.
	PatternNames []string
	// contains filtered or unexported fields
}

ASTFeatureExtractor implements FeatureExtractor for TreeNode.

func NewASTFeatureExtractor

func NewASTFeatureExtractor() *ASTFeatureExtractor

NewASTFeatureExtractor creates a feature extractor with sensible defaults.

func (*ASTFeatureExtractor) ExtractFeatures

func (a *ASTFeatureExtractor) ExtractFeatures(ast *apted.TreeNode) ([]string, error)

ExtractFeatures builds a mixed set of features from the tree.

func (*ASTFeatureExtractor) ExtractNodeSequences

func (a *ASTFeatureExtractor) ExtractNodeSequences(ast *apted.TreeNode, k int) ([]string, error)

ExtractNodeSequences returns k-grams from pre-order traversal labels.

func (*ASTFeatureExtractor) ExtractSubtreeHashes

func (a *ASTFeatureExtractor) ExtractSubtreeHashes(ast *apted.TreeNode, maxHeight int) ([]string, error)

ExtractSubtreeHashes computes bottom-up hashes of subtrees up to maxHeight.

func (*ASTFeatureExtractor) WithOptions

func (a *ASTFeatureExtractor) WithOptions(maxHeight, k int, includeTypes, includeLiterals bool) *ASTFeatureExtractor

WithOptions allows overriding defaults.

func (*ASTFeatureExtractor) WithPatterns

func (a *ASTFeatureExtractor) WithPatterns(patterns []string) *ASTFeatureExtractor

WithPatterns sets the pattern names for structural feature extraction.

type CentroidGrouping

type CentroidGrouping[T GroupableItem] struct {
	// contains filtered or unexported fields
}

CentroidGrouping starts with the highest-similarity pair, forms a group, then expands by adding items that have similarity >= threshold to ALL existing group members.

func (*CentroidGrouping[T]) GroupItems

func (cg *CentroidGrouping[T]) GroupItems(pairs []*ItemPair[T]) []*ItemGroup[T]

func (*CentroidGrouping[T]) Name

func (cg *CentroidGrouping[T]) Name() string

type CompleteLinkageGrouping

type CompleteLinkageGrouping[T GroupableItem] struct {
	// contains filtered or unexported fields
}

CompleteLinkageGrouping finds maximal cliques using the Bron-Kerbosch algorithm with pivoting. Each clique becomes a group. If an item appears in multiple cliques, it is assigned to the largest one.

func (*CompleteLinkageGrouping[T]) GroupItems

func (cl *CompleteLinkageGrouping[T]) GroupItems(pairs []*ItemPair[T]) []*ItemGroup[T]

func (*CompleteLinkageGrouping[T]) Name

func (cl *CompleteLinkageGrouping[T]) Name() string

type ConnectedGrouping

type ConnectedGrouping[T GroupableItem] struct {
	// contains filtered or unexported fields
}

ConnectedGrouping groups items by connected components using Union-Find.

func (*ConnectedGrouping[T]) GroupItems

func (c *ConnectedGrouping[T]) GroupItems(pairs []*ItemPair[T]) []*ItemGroup[T]

func (*ConnectedGrouping[T]) Name

func (c *ConnectedGrouping[T]) Name() string

type FeatureExtractor

type FeatureExtractor interface {
	ExtractFeatures(ast *apted.TreeNode) ([]string, error)
	ExtractSubtreeHashes(ast *apted.TreeNode, maxHeight int) ([]string, error)
	ExtractNodeSequences(ast *apted.TreeNode, k int) ([]string, error)
}

FeatureExtractor converts AST trees into feature sets for Jaccard similarity.

type GroupableItem

type GroupableItem interface {
	ItemID() int
	ItemKey() string // Sorting key: "filepath|startLine|endLine|startCol|endCol"
}

GroupableItem represents an item that can be grouped (e.g. CodeFragment or domain.Clone).

type GroupingConfig

type GroupingConfig struct {
	Mode      GroupingMode
	Threshold float64
	KCoreK    int
}

GroupingConfig holds configuration for the grouping strategy.

type GroupingMode

type GroupingMode string

GroupingMode selects the grouping algorithm.

const (
	ModeConnected       GroupingMode = "connected"
	ModeKCore           GroupingMode = "k_core"
	ModeStarMedoid      GroupingMode = "star_medoid"
	ModeCompleteLinkage GroupingMode = "complete_linkage"
	ModeCentroid        GroupingMode = "centroid"
)

type GroupingStrategy

type GroupingStrategy[T GroupableItem] interface {
	GroupItems(pairs []*ItemPair[T]) []*ItemGroup[T]
	Name() string
}

GroupingStrategy is the interface for grouping algorithms.

func NewGroupingStrategy

func NewGroupingStrategy[T GroupableItem](config GroupingConfig) GroupingStrategy[T]

NewGroupingStrategy returns the appropriate strategy based on config.Mode.

type ItemGroup

type ItemGroup[T GroupableItem] struct {
	ID         int
	Items      []T
	GroupType  int
	Similarity float64
}

ItemGroup represents a grouping result.

type ItemPair

type ItemPair[T GroupableItem] struct {
	Item1      T
	Item2      T
	Similarity float64
	PairType   int // Clone type (1-4)
}

ItemPair represents a pair of items with similarity information.

type KCoreGrouping

type KCoreGrouping[T GroupableItem] struct {
	// contains filtered or unexported fields
}

KCoreGrouping finds k-core subgraphs and groups their connected components.

func (*KCoreGrouping[T]) GroupItems

func (kc *KCoreGrouping[T]) GroupItems(pairs []*ItemPair[T]) []*ItemGroup[T]

func (*KCoreGrouping[T]) Name

func (kc *KCoreGrouping[T]) Name() string

type StarMedoidGrouping

type StarMedoidGrouping[T GroupableItem] struct {
	// contains filtered or unexported fields
}

StarMedoidGrouping builds groups by iteratively selecting the item with the highest average similarity to its neighbors (the medoid), grouping it with its neighbors, and repeating.

func (*StarMedoidGrouping[T]) GroupItems

func (s *StarMedoidGrouping[T]) GroupItems(pairs []*ItemPair[T]) []*ItemGroup[T]

func (*StarMedoidGrouping[T]) Name

func (s *StarMedoidGrouping[T]) Name() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL