Documentation
¶
Overview ¶
Package quality implements Go-native data quality checks, profiling, and anomaly detection.
Index ¶
- func NewChecksModule(name string, config map[string]any) (sdk.ModuleInstance, error)
- func NewDBTTestStep(name string, _ map[string]any) (sdk.StepInstance, error)
- func NewGEValidateStep(name string, _ map[string]any) (sdk.StepInstance, error)
- func NewQualityAnomalyStep(name string, _ map[string]any) (sdk.StepInstance, error)
- func NewQualityCheckStep(name string, _ map[string]any) (sdk.StepInstance, error)
- func NewQualityCompareStep(name string, _ map[string]any) (sdk.StepInstance, error)
- func NewQualityProfileStep(name string, _ map[string]any) (sdk.StepInstance, error)
- func NewQualitySchemaValidateStep(name string, _ map[string]any) (sdk.StepInstance, error)
- func NewSodaCheckStep(name string, _ map[string]any) (sdk.StepInstance, error)
- func RegisterChecksModule(name string, m *ChecksModule) error
- func UnregisterChecksModule(name string)
- type AnomalyResult
- type CheckResult
- type ChecksConfig
- type ChecksModule
- type ColumnProfile
- type ContractColumn
- type ContractResult
- type ContractSchema
- type DBQuerier
- type DBTResult
- type DataContract
- type GEResult
- type Percentiles
- type ProfileResult
- type QualityCheck
- type QualityChecker
- type SodaResult
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func NewChecksModule ¶
NewChecksModule creates a new quality.checks module from config.
func NewDBTTestStep ¶
func NewGEValidateStep ¶
func NewQualityAnomalyStep ¶
func NewQualityCheckStep ¶
func NewQualityCompareStep ¶
func NewQualityProfileStep ¶
func NewSodaCheckStep ¶
func RegisterChecksModule ¶
func RegisterChecksModule(name string, m *ChecksModule) error
RegisterChecksModule registers a ChecksModule under the given name.
func UnregisterChecksModule ¶
func UnregisterChecksModule(name string)
UnregisterChecksModule removes a registered ChecksModule.
Types ¶
type AnomalyResult ¶
type AnomalyResult struct {
Column string `json:"column"`
Method string `json:"method"` // zscore, iqr
Anomalies int `json:"anomalies"`
Threshold float64 `json:"threshold"`
SampleSize int `json:"sampleSize"`
}
AnomalyResult holds the outcome of anomaly detection for one column.
func DetectAnomalies ¶
func DetectAnomalies(values []float64, method string, threshold float64) *AnomalyResult
DetectAnomalies detects anomalies in the given slice of float64 values. method: "zscore" (flag |z| > threshold) or "iqr" (flag below Q1-1.5*IQR or above Q3+1.5*IQR). If threshold <= 0, defaults to 3.0 for zscore and 1.5 for iqr.
type CheckResult ¶
type CheckResult struct {
Check string `json:"check"`
Passed bool `json:"passed"`
Message string `json:"message"`
Value any `json:"value,omitempty"`
}
CheckResult is the outcome of one quality check.
type ChecksConfig ¶
type ChecksConfig struct {
Provider string `json:"provider" yaml:"provider"`
ContractsDir string `json:"contractsDir" yaml:"contractsDir"`
Database string `json:"database" yaml:"database"`
}
ChecksConfig holds configuration for the quality.checks module.
type ChecksModule ¶
type ChecksModule struct {
// contains filtered or unexported fields
}
ChecksModule implements the quality.checks module.
func LookupChecksModule ¶
func LookupChecksModule(name string) (*ChecksModule, error)
LookupChecksModule returns the registered ChecksModule by name.
func NewChecksModuleWithExecutor ¶
func NewChecksModuleWithExecutor(name string, exec DBQuerier) *ChecksModule
NewChecksModuleWithExecutor creates a ChecksModule with an injected DBQuerier (for testing).
func (*ChecksModule) ContractsDir ¶
func (m *ChecksModule) ContractsDir() string
ContractsDir returns the directory used to load data contracts.
func (*ChecksModule) Executor ¶
func (m *ChecksModule) Executor() DBQuerier
Executor returns the DBQuerier for this module (may be nil if no DB is configured).
func (*ChecksModule) Init ¶
func (m *ChecksModule) Init() error
Init validates the module configuration.
func (*ChecksModule) SetExecutor ¶
func (m *ChecksModule) SetExecutor(exec DBQuerier)
SetExecutor replaces the DBQuerier (used for lazy injection or test overrides).
type ColumnProfile ¶
type ColumnProfile struct {
NullCount int64 `json:"nullCount"`
NullRate float64 `json:"nullRate"`
DistinctCount int64 `json:"distinctCount"`
Min any `json:"min,omitempty"`
Max any `json:"max,omitempty"`
Mean *float64 `json:"mean,omitempty"`
StdDev *float64 `json:"stdDev,omitempty"`
Percentiles *Percentiles `json:"percentiles,omitempty"`
}
ColumnProfile holds statistics for a single column.
type ContractColumn ¶
type ContractColumn struct {
Name string `json:"name" yaml:"name"`
Type string `json:"type" yaml:"type"`
Nullable bool `json:"nullable,omitempty" yaml:"nullable,omitempty"`
Pattern string `json:"pattern,omitempty" yaml:"pattern,omitempty"`
}
ContractColumn describes a single expected column.
type ContractResult ¶
type ContractResult struct {
Dataset string `json:"dataset"`
Passed bool `json:"passed"`
SchemaOK bool `json:"schemaOk"`
QualityOK bool `json:"qualityOk"`
SchemaErrors []string `json:"schemaErrors,omitempty"`
QualityResults []CheckResult `json:"qualityResults"`
}
ContractResult is the outcome of validating a dataset against a contract.
func ValidateContract ¶
func ValidateContract(ctx context.Context, exec DBQuerier, contract DataContract) (*ContractResult, error)
ValidateContract runs schema and quality checks against a live dataset.
type ContractSchema ¶
type ContractSchema struct {
Columns []ContractColumn `json:"columns" yaml:"columns"`
}
ContractSchema lists the columns expected in a dataset.
type DBQuerier ¶
type DBQuerier interface {
QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row
QueryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error)
}
DBQuerier is the subset of *sql.DB used by quality checks and the profiler. *sql.DB satisfies this interface natively; go-sqlmock returns *sql.DB, so tests work without adapters.
type DBTResult ¶
type DBTResult struct {
Passed int `json:"passed"`
Failed int `json:"failed"`
Errors int `json:"errors"`
Skipped int `json:"skipped"`
Results []string `json:"results,omitempty"`
}
DBTResult holds the parsed output of `dbt test`.
type DataContract ¶
type DataContract struct {
Dataset string `json:"dataset" yaml:"dataset"`
Owner string `json:"owner" yaml:"owner"`
Schema ContractSchema `json:"schema" yaml:"schema"`
Quality []QualityCheck `json:"quality" yaml:"quality"`
}
DataContract describes a dataset's expected schema and quality assertions.
func ParseContract ¶
func ParseContract(path string) (*DataContract, error)
ParseContract reads and parses a YAML data contract file.
type GEResult ¶
type GEResult struct {
Success bool `json:"success"`
Evaluated int `json:"evaluated"`
Successful int `json:"successful"`
Failed int `json:"failed"`
}
GEResult holds the parsed output of `great_expectations checkpoint run`.
type Percentiles ¶
type Percentiles struct {
P25 float64 `json:"p25"`
P50 float64 `json:"p50"`
P75 float64 `json:"p75"`
P90 float64 `json:"p90"`
P99 float64 `json:"p99"`
}
Percentiles holds common percentile values for numeric columns.
type ProfileResult ¶
type ProfileResult struct {
Table string `json:"table"`
RowCount int64 `json:"rowCount"`
Columns map[string]*ColumnProfile `json:"columns"`
}
ProfileResult holds statistical profile of a table.
func Profile ¶
func Profile(ctx context.Context, exec DBQuerier, table string, columns []string) (*ProfileResult, error)
Profile computes a statistical profile of the named table and columns. If columns is empty, callers should pass the desired column names; this function does not auto-discover columns from information_schema.
type QualityCheck ¶
type QualityCheck struct {
Type string `json:"type" yaml:"type"`
Config map[string]any `json:"config,omitempty" yaml:"config,omitempty"`
}
QualityCheck describes a quality assertion to run against a table.
type QualityChecker ¶
type QualityChecker interface {
Run(ctx context.Context, exec DBQuerier, table string, config map[string]any) (*CheckResult, error)
}
QualityChecker runs a single quality check against a table.