analyze

package

v0.2.0 Latest Latest Go to latest Published: Apr 20, 2026 License: MIT Imports: 8 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/plexusone/graphize

Links

Open Source Insights

Documentation ¶

Overview ¶

Package analyze provides graph analysis functions.

Package analyze provides graph analysis features including god nodes, community detection, and surprising connections.

Package analyze provides graph analysis utilities.

Package analyze provides graph analysis functions.

Index ¶

Constants
Variables
func AmbiguousEdges(edges []*graph.Edge) []*graph.Edge
func BridgeEdges(edges []*graph.Edge, communities map[int][]string) []*graph.Edge
func CommunityLabels(communities []Community, nodes []*graph.Node) map[int]string
func CountEdgesByConfidence(edges []*graph.Edge) map[string]int
func CountEdgesByType(edges []*graph.Edge) map[string]int
func CrossFileEdges(nodes []*graph.Node, edges []*graph.Edge) []*graph.Edge
func FormatHealth(h *CorpusHealth) string
func GroupEdgesByConfidence(edges []*graph.Edge) map[string][]*graph.Edge
func GroupEdgesByType(edges []*graph.Edge) map[string][]*graph.Edge
func IsolatedNodes(nodes []*graph.Node, edges []*graph.Edge, threshold int) []*graph.Node
func LowConfidenceEdges(edges []*graph.Edge, threshold float64) []*graph.Edge
type BetweennessOptions
- func DefaultBetweennessOptions() BetweennessOptions
type BetweennessResult
- func CalculateBetweenness(nodes []*graph.Node, edges []*graph.Edge, opts BetweennessOptions) *BetweennessResult
type BridgeNode
- func FindBridges(nodes []*graph.Node, edges []*graph.Edge, topN int) []BridgeNode
- func FindBridgesWithCommunities(nodes []*graph.Node, edges []*graph.Edge, communities map[int][]string, ...) []BridgeNode
type ClusterAlgorithm
type ClusterOptions
type ClusterResult
- func DetectCommunities(nodes []*graph.Node, edges []*graph.Edge) *ClusterResult
- func DetectCommunitiesWithOptions(nodes []*graph.Node, edges []*graph.Edge, opts ClusterOptions) *ClusterResult
type Community
type CommunityInfo
type CorpusHealth
- func CheckCorpusHealth(nodes []*graph.Node, edges []*graph.Edge, opts HealthOptions) *CorpusHealth
- func CheckCorpusHealthFromSource(nodes []*graph.Node, edges []*graph.Edge, sourceDir string) (*CorpusHealth, error)
type EdgeChange
type GodNode
- func GodNodes(nodes []*graph.Node, edges []*graph.Edge, topN int) []GodNode
type GraphDiff
type HealthOptions
type HubNode
type LouvainOptions
type LouvainResult
type NodeChange
type PackageStats
- func AnalyzePackages(nodes []*graph.Node, edges []*graph.Edge) []PackageStats
type Question
- func SuggestQuestions(nodes []*graph.Node, edges []*graph.Edge, communities map[int][]string, ...) []Question
type Report
- func GenerateReport(nodes []*graph.Node, edges []*graph.Edge, opts ReportOptions) *Report
- func (r *Report) FormatMarkdown(opts ReportOptions) string
type ReportOptions
- func DefaultReportOptions() ReportOptions
type ReportSummary
type Surprise
- func SurprisingConnections(nodes []*graph.Node, edges []*graph.Edge, communities map[int][]string, ...) []Surprise

Constants ¶

View Source

const (
	AlgorithmLouvain             = analyze.AlgorithmLouvain
	AlgorithmConnectedComponents = analyze.AlgorithmConnectedComponents
)

Re-export algorithm constants

Variables ¶

View Source

var (
	DefaultClusterOptions    = analyze.DefaultClusterOptions
	DefaultLouvainOptions    = analyze.DefaultLouvainOptions
	DetectCommunitiesLouvain = analyze.DetectCommunitiesLouvain
	LouvainToClusters        = analyze.LouvainToClusters
)

Re-export cluster functions

View Source

var (
	NodesByType       = analyze.NodesByType
	EdgesByType       = analyze.EdgesByType
	EdgesByConfidence = analyze.EdgesByConfidence
	HubScore          = analyze.HubScore
	AuthorityScore    = analyze.AuthorityScore
	InferredEdges     = analyze.InferredEdges
	CohesionScore     = analyze.CohesionScore
)

Re-export generic functions from graphfs

View Source

var DiffGraphs = analyze.DiffGraphs

Re-export diff functions

Functions ¶

func AmbiguousEdges ¶

func AmbiguousEdges(edges []*graph.Edge) []*graph.Edge

AmbiguousEdges returns all edges with AMBIGUOUS confidence. These are candidates for human review.

func BridgeEdges ¶

func BridgeEdges(edges []*graph.Edge, communities map[int][]string) []*graph.Edge

BridgeEdges finds edges that connect otherwise disconnected parts of the graph. These are edges whose removal would increase the number of connected components. For simplicity, we approximate this by finding cross-community edges.

func CommunityLabels ¶

func CommunityLabels(communities []Community, nodes []*graph.Node) map[int]string

CommunityLabels generates human-readable labels for communities based on member nodes.

func CountEdgesByConfidence ¶

func CountEdgesByConfidence(edges []*graph.Edge) map[string]int

CountEdgesByConfidence returns edge counts per confidence level.

func CountEdgesByType ¶

func CountEdgesByType(edges []*graph.Edge) map[string]int

CountEdgesByType returns edge counts per type.

func CrossFileEdges ¶

func CrossFileEdges(nodes []*graph.Node, edges []*graph.Edge) []*graph.Edge

CrossFileEdges returns edges that connect nodes in different source files.

func FormatHealth ¶

func FormatHealth(h *CorpusHealth) string

FormatHealth formats health assessment as human-readable text.

func GroupEdgesByConfidence ¶

func GroupEdgesByConfidence(edges []*graph.Edge) map[string][]*graph.Edge

GroupEdgesByConfidence groups edges by their Confidence field.

func GroupEdgesByType ¶

func GroupEdgesByType(edges []*graph.Edge) map[string][]*graph.Edge

GroupEdgesByType groups edges by their Type field.

func IsolatedNodes ¶

func IsolatedNodes(nodes []*graph.Node, edges []*graph.Edge, threshold int) []*graph.Node

IsolatedNodes returns nodes with degree <= threshold. These represent potential documentation gaps or orphaned code.

func LowConfidenceEdges ¶

func LowConfidenceEdges(edges []*graph.Edge, threshold float64) []*graph.Edge

LowConfidenceEdges returns INFERRED edges with confidence score below threshold.

Types ¶

type BetweennessOptions ¶

type BetweennessOptions struct {
	// TopN limits results to top N bridges (0 = all)
	TopN int
	// ExcludeNodeTypes filters out these node types from results
	ExcludeNodeTypes []string
	// ExcludeEdgeTypes filters out these edge types from the graph
	ExcludeEdgeTypes []string
	// Communities provides community membership for bridge analysis
	Communities map[int][]string
}

BetweennessOptions configures betweenness centrality calculation.

func DefaultBetweennessOptions ¶

func DefaultBetweennessOptions() BetweennessOptions

DefaultBetweennessOptions returns sensible defaults.

type BetweennessResult ¶

type BetweennessResult struct {
	// Bridges are nodes with highest betweenness centrality
	Bridges []BridgeNode `json:"bridges"`
	// Scores maps node ID to its betweenness centrality score
	Scores map[string]float64 `json:"scores"`
	// MaxScore is the highest betweenness score in the graph
	MaxScore float64 `json:"max_score"`
}

BetweennessResult contains the results of betweenness centrality analysis.

func CalculateBetweenness ¶

func CalculateBetweenness(nodes []*graph.Node, edges []*graph.Edge, opts BetweennessOptions) *BetweennessResult

CalculateBetweenness computes betweenness centrality for all nodes. Betweenness centrality measures how often a node lies on shortest paths between other nodes. High betweenness indicates architectural bridges.

type BridgeNode ¶

type BridgeNode struct {
	ID         string  `json:"id"`
	Label      string  `json:"label"`
	Type       string  `json:"type"`
	Centrality float64 `json:"centrality"`
	// Communities this node connects (if community detection was run)
	ConnectsCommunities []int `json:"connects_communities,omitempty"`
}

BridgeNode represents a node with high betweenness centrality. These nodes act as bridges connecting different parts of the graph.

func FindBridges ¶

func FindBridges(nodes []*graph.Node, edges []*graph.Edge, topN int) []BridgeNode

FindBridges is a convenience function that finds architectural bridges using default options.

func FindBridgesWithCommunities ¶

func FindBridgesWithCommunities(nodes []*graph.Node, edges []*graph.Edge, communities map[int][]string, topN int) []BridgeNode

FindBridgesWithCommunities finds bridges and annotates which communities they connect.

type ClusterAlgorithm ¶

type ClusterAlgorithm = analyze.ClusterAlgorithm

Re-export cluster options from graphfs

type ClusterOptions ¶

type ClusterOptions = analyze.ClusterOptions

Re-export cluster options from graphfs

type ClusterResult ¶

type ClusterResult = analyze.ClusterResult

Re-export ClusterResult from graphfs for backward compatibility

func DetectCommunities ¶

func DetectCommunities(nodes []*graph.Node, edges []*graph.Edge) *ClusterResult

DetectCommunities performs community detection using the Louvain algorithm.

func DetectCommunitiesWithOptions ¶

func DetectCommunitiesWithOptions(nodes []*graph.Node, edges []*graph.Edge, opts ClusterOptions) *ClusterResult

DetectCommunitiesWithOptions performs community detection with configurable options.

type Community ¶

type Community = analyze.Community

Re-export generic types from graphfs for backward compatibility

type CommunityInfo ¶

type CommunityInfo struct {
	ID       int      `json:"id"`
	Size     int      `json:"size"`
	Cohesion float64  `json:"cohesion"`
	Label    string   `json:"label"`
	Members  []string `json:"members,omitempty"`
}

CommunityInfo contains community details for the report.

type CorpusHealth ¶

type CorpusHealth struct {
	// FileCount is the number of source files analyzed.
	FileCount int `json:"file_count"`

	// WordCount is the estimated word count in source files.
	WordCount int `json:"word_count"`

	// EstimatedTokens is the estimated token count for source files.
	EstimatedTokens int `json:"estimated_tokens"`

	// GraphNodes is the number of nodes in the graph.
	GraphNodes int `json:"graph_nodes"`

	// GraphEdges is the number of edges in the graph.
	GraphEdges int `json:"graph_edges"`

	// GraphTokens is the estimated token count for graph representation.
	GraphTokens int `json:"graph_tokens"`

	// TokenReduction is the percentage reduction in tokens (0-100).
	// A value of 75 means the graph uses 75% fewer tokens than raw source.
	TokenReduction float64 `json:"token_reduction"`

	// Verdict is the overall health assessment: "valuable", "marginal", or "limited".
	Verdict string `json:"verdict"`

	// VerdictReason explains the verdict in human-readable terms.
	VerdictReason string `json:"verdict_reason"`
}

CorpusHealth represents the health assessment of a knowledge graph corpus.

func CheckCorpusHealth ¶

func CheckCorpusHealth(nodes []*graph.Node, edges []*graph.Edge, opts HealthOptions) *CorpusHealth

CheckCorpusHealth assesses the health and value of a knowledge graph. It compares the graph representation against the source code to determine if the graph provides meaningful token reduction and structural insight.

func CheckCorpusHealthFromSource ¶

func CheckCorpusHealthFromSource(nodes []*graph.Node, edges []*graph.Edge, sourceDir string) (*CorpusHealth, error)

CheckCorpusHealthFromSource calculates health metrics by walking source files.

type EdgeChange ¶

type EdgeChange = analyze.EdgeChange

Re-export diff types from graphfs for backward compatibility

type GodNode ¶

type GodNode = HubNode

GodNode is an alias for HubNode with code-specific naming. Deprecated: Use HubNode instead.

func GodNodes ¶

func GodNodes(nodes []*graph.Node, edges []*graph.Edge, topN int) []GodNode

GodNodes returns the top N most connected nodes in the graph. Excludes file-level hub nodes (packages, files) to focus on meaningful architectural abstractions like functions and types.

type GraphDiff ¶

type GraphDiff = analyze.GraphDiff

Re-export diff types from graphfs for backward compatibility

type HealthOptions ¶

type HealthOptions struct {
	// IncludeFileContent indicates whether to count tokens from source files.
	// If false, uses estimates based on node/edge counts.
	IncludeFileContent bool

	// SourceTokens is the pre-computed source token count (if known).
	// If set, this overrides file walking.
	SourceTokens int

	// FileCount is the pre-computed file count (if known).
	FileCount int
}

HealthOptions configures corpus health assessment.

type HubNode ¶

type HubNode = analyze.HubNode

Re-export generic types from graphfs for backward compatibility

type LouvainOptions ¶

type LouvainOptions = analyze.LouvainOptions

Re-export cluster options from graphfs

type LouvainResult ¶

type LouvainResult = analyze.LouvainResult

Re-export cluster options from graphfs

type NodeChange ¶

type NodeChange = analyze.NodeChange

Re-export diff types from graphfs for backward compatibility

type PackageStats ¶

type PackageStats struct {
	Name      string `json:"name"`
	Files     int    `json:"files"`
	Functions int    `json:"functions"`
	Types     int    `json:"types"`
	Imports   int    `json:"imports"`
}

PackageStats returns statistics about packages in the graph.

func AnalyzePackages ¶

func AnalyzePackages(nodes []*graph.Node, edges []*graph.Edge) []PackageStats

AnalyzePackages returns statistics for each package in the graph.

type Question ¶

type Question struct {
	Type     string `json:"type"`
	Question string `json:"question"`
	Why      string `json:"why"`
}

Question represents a suggested question about the codebase.

func SuggestQuestions ¶

func SuggestQuestions(nodes []*graph.Node, edges []*graph.Edge, communities map[int][]string, topN int) []Question

SuggestQuestions generates questions based on graph analysis. Questions help identify areas needing human review or documentation.

type Report ¶

type Report struct {
	// Summary contains basic counts
	Summary ReportSummary `json:"summary"`

	// GodNodes are the most connected nodes
	GodNodes []GodNode `json:"god_nodes"`

	// Bridges are nodes with high betweenness centrality
	Bridges []BridgeNode `json:"bridges"`

	// Communities detected via Louvain algorithm
	Communities  []CommunityInfo  `json:"communities"`
	Modularity   float64          `json:"modularity"`
	CommunityMap map[int][]string `json:"-"` // for internal use

	// Surprises are unexpected connections
	Surprises []Surprise `json:"surprises"`

	// IsolatedNodes have very few connections
	IsolatedNodes []*graph.Node `json:"isolated_nodes"`

	// PackageStats per package
	PackageStats []PackageStats `json:"package_stats"`

	// CrossFileEdgeCount is the number of edges crossing file boundaries
	CrossFileEdgeCount int `json:"cross_file_edge_count"`

	// Questions suggested based on analysis
	Questions []Question `json:"questions"`
}

Report contains all sections of a graph analysis report.

func GenerateReport ¶

func GenerateReport(nodes []*graph.Node, edges []*graph.Edge, opts ReportOptions) *Report

GenerateReport creates a complete analysis report from the graph.

func (*Report) FormatMarkdown ¶

func (r *Report) FormatMarkdown(opts ReportOptions) string

FormatMarkdown converts the report to markdown format.

type ReportOptions ¶

type ReportOptions struct {
	// TopN limits result lists (god nodes, surprises, etc.)
	TopN int

	// IncludeMembers includes community member lists in output
	IncludeMembers bool
}

ReportOptions configures report generation.

func DefaultReportOptions ¶

func DefaultReportOptions() ReportOptions

DefaultReportOptions returns sensible defaults.

type ReportSummary ¶

type ReportSummary struct {
	TotalNodes       int            `json:"total_nodes"`
	TotalEdges       int            `json:"total_edges"`
	NodeTypeCounts   map[string]int `json:"node_types"`
	EdgeTypeCounts   map[string]int `json:"edge_types"`
	ConfidenceCounts map[string]int `json:"confidence_counts"`
}

ReportSummary contains basic graph statistics.

type Surprise ¶

type Surprise struct {
	From            string   `json:"from"`
	FromLabel       string   `json:"from_label"`
	To              string   `json:"to"`
	ToLabel         string   `json:"to_label"`
	Type            string   `json:"type"`
	Confidence      string   `json:"confidence"`
	Score           float64  `json:"score"`
	ConfidenceScore float64  `json:"confidence_score,omitempty"`
	Why             string   `json:"why"`
	SourceFiles     []string `json:"source_files,omitempty"`
}

Surprise represents a surprising connection in the graph.

func SurprisingConnections ¶

func SurprisingConnections(nodes []*graph.Node, edges []*graph.Edge, communities map[int][]string, topN int) []Surprise

SurprisingConnections finds edges that are unexpected or non-obvious. Prioritizes: 1. AMBIGUOUS edges (uncertain relationships) 2. INFERRED edges (LLM-inferred relationships) 3. Cross-file edges between unrelated components

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL