Documentation
¶
Overview ¶
Package anomaly provides temporal anomaly detection over commit history. It uses Z-score analysis with a sliding window to detect sudden quality degradation in per-tick metrics (files changed, lines added/removed, churn).
Index ¶
- Constants
- func AggregateCommitsToTicks(commitMetrics map[string]*CommitAnomalyData, ...) map[int]*TickMetrics
- func ComputeZScores(values []float64, window int) []float64
- func EnrichAndRewrite(store analyze.ReportStore, analyzerID string, windowSize int, ...) error
- func GenerateStoreSections(reader analyze.ReportReader) ([]plotpage.Section, error)
- func RegisterPlotSections()
- func RegisterStoreTimeSeriesExtractor(analyzerFlag string, fn StoreTimeSeriesExtractor)
- func WriteEnrichmentToStore(w analyze.ReportWriter, externalAnomalies []ExternalAnomaly, ...) error
- type AggregateData
- type Analyzer
- func (h *Analyzer) ApplySnapshot(snap analyze.PlumbingSnapshot)
- func (h *Analyzer) CPUHeavy() bool
- func (h *Analyzer) Configure(facts map[string]any) error
- func (h *Analyzer) Consume(_ context.Context, ac *analyze.Context) (analyze.TC, error)
- func (h *Analyzer) ExtractCommitTimeSeries(report analyze.Report) map[string]any
- func (h *Analyzer) Fork(n int) []analyze.HistoryAnalyzer
- func (h *Analyzer) Initialize(_ *gitlib.Repository) error
- func (h *Analyzer) Merge(_ []analyze.HistoryAnalyzer)
- func (h *Analyzer) Name() string
- func (h *Analyzer) ReleaseSnapshot(_ analyze.PlumbingSnapshot)
- func (h *Analyzer) SnapshotPlumbing() analyze.PlumbingSnapshot
- func (h *Analyzer) WriteToStore(ctx context.Context, ticks []analyze.TICK, w analyze.ReportWriter) error
- type CommitAnomalyData
- type ComputedMetrics
- type ExternalAnomaly
- type ExternalSummary
- type RawMetrics
- type Record
- type ReportData
- type StoreTimeSeriesExtractor
- type TickData
- type TickMetrics
- type TimeSeriesEntry
- type ZScoreSet
Constants ¶
const ( ConfigAnomalyThreshold = "TemporalAnomaly.Threshold" ConfigAnomalyWindowSize = "TemporalAnomaly.WindowSize" )
Configuration keys.
const ( DefaultAnomalyThreshold = float32(2.0) DefaultAnomalyWindowSize = 20 // MinWindowSize is the minimum valid sliding window size. MinWindowSize = 2 // MinThreshold is the minimum valid Z-score threshold. MinThreshold = float32(0.1) )
Default configuration values.
const ( KindTimeSeries = "time_series" KindAnomalyRecord = "anomaly_record" KindAggregate = "aggregate" KindExternalAnomaly = "external_anomaly" KindExternalSummary = "external_summary" )
Store record kind constants.
Variables ¶
This section is empty.
Functions ¶
func AggregateCommitsToTicks ¶
func AggregateCommitsToTicks( commitMetrics map[string]*CommitAnomalyData, commitsByTick map[int][]gitlib.Hash, ) map[int]*TickMetrics
AggregateCommitsToTicks builds per-tick metrics from per-commit data grouped by the commits_by_tick mapping. This replaces the need for a separate per-tick accumulation path during Consume.
func ComputeZScores ¶
ComputeZScores computes the Z-score for each value using a trailing sliding window of the given size. For index i, the window is values[max(0, i-window):i]. The Z-score measures how many standard deviations the current value is from the window mean. When the standard deviation is zero and the value equals the mean, the Z-score is 0. When the standard deviation is zero and the value differs from the mean, the Z-score is +/- stats.ZScoreMaxSentinel.
func EnrichAndRewrite ¶
func EnrichAndRewrite( store analyze.ReportStore, analyzerID string, windowSize int, threshold float64, ) error
EnrichAndRewrite reads the anomaly analyzer's structured store kinds, detects cross-analyzer anomalies from other analyzers' store data, then rewrites all anomaly kinds (original + enrichment) to the store.
func GenerateStoreSections ¶
func GenerateStoreSections(reader analyze.ReportReader) ([]plotpage.Section, error)
GenerateStoreSections reads pre-computed anomaly data from a ReportReader and builds the same plot sections as GenerateSections, without materializing a full Report or recomputing metrics.
func RegisterPlotSections ¶
func RegisterPlotSections()
RegisterPlotSections registers the anomaly plot section renderer with the analyze package.
func RegisterStoreTimeSeriesExtractor ¶
func RegisterStoreTimeSeriesExtractor(analyzerFlag string, fn StoreTimeSeriesExtractor)
RegisterStoreTimeSeriesExtractor registers a store-based extractor for the given analyzer flag.
func WriteEnrichmentToStore ¶
func WriteEnrichmentToStore( w analyze.ReportWriter, externalAnomalies []ExternalAnomaly, externalSummaries []ExternalSummary, ) error
WriteEnrichmentToStore writes external anomaly and summary records to the writer. Called by the enrichment pipeline after cross-analyzer anomaly detection.
Types ¶
type AggregateData ¶
type AggregateData struct {
TotalTicks int `json:"total_ticks" yaml:"total_ticks"`
TotalAnomalies int `json:"total_anomalies" yaml:"total_anomalies"`
AnomalyRate float64 `json:"anomaly_rate" yaml:"anomaly_rate"`
Threshold float32 `json:"threshold" yaml:"threshold"`
WindowSize int `json:"window_size" yaml:"window_size"`
ChurnMean float64 `json:"churn_mean" yaml:"churn_mean"`
ChurnStdDev float64 `json:"churn_stddev" yaml:"churn_stddev"`
FilesMean float64 `json:"files_mean" yaml:"files_mean"`
FilesStdDev float64 `json:"files_stddev" yaml:"files_stddev"`
LangDiversityMean float64 `json:"lang_diversity_mean" yaml:"lang_diversity_mean"`
LangDiversityStdDev float64 `json:"lang_diversity_stddev" yaml:"lang_diversity_stddev"`
AuthorCountMean float64 `json:"author_count_mean" yaml:"author_count_mean"`
AuthorCountStdDev float64 `json:"author_count_stddev" yaml:"author_count_stddev"`
}
AggregateData contains summary statistics for the anomaly analysis.
func ReadAggregateIfPresent ¶
func ReadAggregateIfPresent(reader analyze.ReportReader, kinds []string) (AggregateData, error)
ReadAggregateIfPresent reads the single aggregate record, returning zero value if absent.
type Analyzer ¶
type Analyzer struct {
*analyze.BaseHistoryAnalyzer[*ComputedMetrics]
common.NoStateHibernation
TreeDiff *plumbing.TreeDiffAnalyzer
Ticks *plumbing.TicksSinceStart
LineStats *plumbing.LinesStatsCalculator
Languages *plumbing.LanguagesDetectionAnalyzer
Identity *plumbing.IdentityDetector
// Configuration (read-only after Configure).
Threshold float32
WindowSize int
// contains filtered or unexported fields
}
Analyzer detects temporal anomalies in commit history using Z-score analysis over a sliding window of per-tick metrics. Per-commit results are emitted as TCs; accumulated state lives in the Aggregator, not in the analyzer.
func (*Analyzer) ApplySnapshot ¶
func (h *Analyzer) ApplySnapshot(snap analyze.PlumbingSnapshot)
ApplySnapshot restores plumbing state from a previously captured snapshot.
func (*Analyzer) CPUHeavy ¶
CPUHeavy returns false because the anomaly analyzer does not perform expensive UAST processing per commit.
func (*Analyzer) Consume ¶
Consume processes a single commit and returns a TC with per-commit metrics. The analyzer does not retain any per-commit state; all output is in the TC.
func (*Analyzer) ExtractCommitTimeSeries ¶
ExtractCommitTimeSeries extracts per-commit anomaly metrics from a finalized report. Implements analyze.CommitTimeSeriesProvider.
func (*Analyzer) Fork ¶
func (h *Analyzer) Fork(n int) []analyze.HistoryAnalyzer
Fork creates independent copies of the analyzer for parallel processing.
func (*Analyzer) Initialize ¶
func (h *Analyzer) Initialize(_ *gitlib.Repository) error
Initialize prepares the analyzer for processing commits.
func (*Analyzer) Merge ¶
func (h *Analyzer) Merge(_ []analyze.HistoryAnalyzer)
Merge is a no-op. Per-commit results are emitted as TCs and collected by the framework, not accumulated inside the analyzer.
func (*Analyzer) ReleaseSnapshot ¶
func (h *Analyzer) ReleaseSnapshot(_ analyze.PlumbingSnapshot)
ReleaseSnapshot releases resources owned by the snapshot. The anomaly analyzer does not hold UAST trees, so this is a no-op.
func (*Analyzer) SnapshotPlumbing ¶
func (h *Analyzer) SnapshotPlumbing() analyze.PlumbingSnapshot
SnapshotPlumbing captures the current plumbing output state.
func (*Analyzer) WriteToStore ¶
func (h *Analyzer) WriteToStore(ctx context.Context, ticks []analyze.TICK, w analyze.ReportWriter) error
WriteToStore implements analyze.StoreWriter. It converts ticks to a report, computes all metrics, and streams pre-computed results as individual records:
- "time_series": per-tick TimeSeriesEntry records (sorted by tick).
- "anomaly_record": per-anomaly Record entries (sorted by Z-score desc).
- "aggregate": single AggregateData record.
type CommitAnomalyData ¶
type CommitAnomalyData struct {
FilesChanged int `json:"files_changed"`
LinesAdded int `json:"lines_added"`
LinesRemoved int `json:"lines_removed"`
NetChurn int `json:"net_churn"`
Files []string `json:"files,omitempty"`
Languages map[string]int `json:"languages,omitempty"`
AuthorID int `json:"author_id"`
}
CommitAnomalyData holds raw metrics for a single commit.
type ComputedMetrics ¶
type ComputedMetrics struct {
Anomalies []Record `json:"anomalies" yaml:"anomalies"`
TimeSeries []TimeSeriesEntry `json:"time_series" yaml:"time_series"`
Aggregate AggregateData `json:"aggregate" yaml:"aggregate"`
ExternalAnomalies []ExternalAnomaly `json:"external_anomalies,omitempty" yaml:"external_anomalies,omitempty"`
ExternalSummaries []ExternalSummary `json:"external_summaries,omitempty" yaml:"external_summaries,omitempty"`
}
ComputedMetrics holds all computed metric results for the anomaly analyzer.
func ComputeAllMetrics ¶
func ComputeAllMetrics(report analyze.Report) (*ComputedMetrics, error)
ComputeAllMetrics runs all anomaly metrics and returns the results.
func (*ComputedMetrics) AnalyzerName ¶
func (m *ComputedMetrics) AnalyzerName() string
AnalyzerName returns the name of the analyzer.
func (*ComputedMetrics) ToJSON ¶
func (m *ComputedMetrics) ToJSON() any
ToJSON returns the metrics in a format suitable for JSON marshaling.
func (*ComputedMetrics) ToYAML ¶
func (m *ComputedMetrics) ToYAML() any
ToYAML returns the metrics in a format suitable for YAML marshaling.
type ExternalAnomaly ¶
type ExternalAnomaly struct {
Source string `json:"source" yaml:"source"`
Dimension string `json:"dimension" yaml:"dimension"`
Tick int `json:"tick" yaml:"tick"`
ZScore float64 `json:"z_score" yaml:"z_score"`
RawValue float64 `json:"raw_value" yaml:"raw_value"`
}
ExternalAnomaly describes an anomaly detected on an external analyzer's time series dimension.
func ReadExternalAnomaliesIfPresent ¶
func ReadExternalAnomaliesIfPresent(reader analyze.ReportReader, kinds []string) ([]ExternalAnomaly, error)
ReadExternalAnomaliesIfPresent reads all external_anomaly records.
type ExternalSummary ¶
type ExternalSummary struct {
Source string `json:"source" yaml:"source"`
Dimension string `json:"dimension" yaml:"dimension"`
Mean float64 `json:"mean" yaml:"mean"`
StdDev float64 `json:"stddev" yaml:"stddev"`
Anomalies int `json:"anomalies" yaml:"anomalies"`
HighestZ float64 `json:"highest_z" yaml:"highest_z"`
}
ExternalSummary summarizes anomaly detection results for one external dimension.
func ReadExternalSummariesIfPresent ¶
func ReadExternalSummariesIfPresent(reader analyze.ReportReader, kinds []string) ([]ExternalSummary, error)
ReadExternalSummariesIfPresent reads all external_summary records.
type RawMetrics ¶
type RawMetrics struct {
FilesChanged int `json:"files_changed" yaml:"files_changed"`
LinesAdded int `json:"lines_added" yaml:"lines_added"`
LinesRemoved int `json:"lines_removed" yaml:"lines_removed"`
NetChurn int `json:"net_churn" yaml:"net_churn"`
LanguageDiversity int `json:"language_diversity" yaml:"language_diversity"`
AuthorCount int `json:"author_count" yaml:"author_count"`
}
RawMetrics holds the raw metric values for a single tick.
type Record ¶
type Record struct {
Tick int `json:"tick" yaml:"tick"`
ZScores ZScoreSet `json:"z_scores" yaml:"z_scores"`
MaxAbsZScore float64 `json:"max_abs_z_score" yaml:"max_abs_z_score"`
Metrics RawMetrics `json:"metrics" yaml:"metrics"`
Files []string `json:"files" yaml:"files"`
}
Record describes a detected anomaly at a specific tick.
func ReadAnomaliesIfPresent ¶
func ReadAnomaliesIfPresent(reader analyze.ReportReader, kinds []string) ([]Record, error)
ReadAnomaliesIfPresent reads all anomaly_record records, returning nil if absent.
type ReportData ¶
type ReportData struct {
Anomalies []Record
TickMetrics map[int]*TickMetrics
Threshold float32
WindowSize int
ExternalAnomalies []ExternalAnomaly
ExternalSummaries []ExternalSummary
}
ReportData is the parsed input data for anomaly metrics computation.
func ParseReportData ¶
func ParseReportData(report analyze.Report) (*ReportData, error)
ParseReportData extracts ReportData from an analyzer report. Expects canonical format: commit_metrics and commits_by_tick.
type StoreTimeSeriesExtractor ¶
type StoreTimeSeriesExtractor func(reader analyze.ReportReader) (ticks []int, dimensions map[string][]float64)
StoreTimeSeriesExtractor extracts tick-indexed dimensions from a store reader. It is used by analyzers that write structured store kinds.
type TickData ¶
type TickData struct {
// CommitMetrics maps commit hash (hex) to per-commit CommitAnomalyData.
CommitMetrics map[string]*CommitAnomalyData
}
TickData is the per-tick aggregated payload for the anomaly analyzer. It holds per-commit metrics for the canonical report format.
type TickMetrics ¶
type TickMetrics struct {
FilesChanged int
LinesAdded int
LinesRemoved int
NetChurn int
Files []string
Languages map[string]int // language name → file count for this tick.
AuthorIDs map[int]struct{} // unique author IDs seen in this tick.
}
TickMetrics holds the raw metrics collected for a single tick.
type TimeSeriesEntry ¶
type TimeSeriesEntry struct {
Tick int `json:"tick" yaml:"tick"`
Metrics RawMetrics `json:"metrics" yaml:"metrics"`
IsAnomaly bool `json:"is_anomaly" yaml:"is_anomaly"`
ChurnZScore float64 `json:"churn_z_score" yaml:"churn_z_score"`
LanguageDiversity int `json:"language_diversity" yaml:"language_diversity"`
AuthorCount int `json:"author_count" yaml:"author_count"`
}
TimeSeriesEntry holds per-tick data for the time series output.
func ReadTimeSeriesIfPresent ¶
func ReadTimeSeriesIfPresent(reader analyze.ReportReader, kinds []string) ([]TimeSeriesEntry, error)
ReadTimeSeriesIfPresent reads all time_series records, returning nil if absent.
type ZScoreSet ¶
type ZScoreSet struct {
NetChurn float64 `json:"net_churn" yaml:"net_churn"`
FilesChanged float64 `json:"files_changed" yaml:"files_changed"`
LinesAdded float64 `json:"lines_added" yaml:"lines_added"`
LinesRemoved float64 `json:"lines_removed" yaml:"lines_removed"`
LanguageDiversity float64 `json:"language_diversity" yaml:"language_diversity"`
AuthorCount float64 `json:"author_count" yaml:"author_count"`
}
ZScoreSet holds per-metric Z-scores for a single tick.