quality

package
v0.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 29, 2026 License: MIT Imports: 17 Imported by: 0

Documentation

Overview

Package quality implements Go-native data quality checks, profiling, and anomaly detection.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func NewChecksModule

func NewChecksModule(name string, config map[string]any) (sdk.ModuleInstance, error)

NewChecksModule creates a new quality.checks module from config.

func NewDBTTestStep

func NewDBTTestStep(name string, _ map[string]any) (sdk.StepInstance, error)

func NewGEValidateStep

func NewGEValidateStep(name string, _ map[string]any) (sdk.StepInstance, error)

func NewQualityAnomalyStep

func NewQualityAnomalyStep(name string, _ map[string]any) (sdk.StepInstance, error)

func NewQualityCheckStep

func NewQualityCheckStep(name string, _ map[string]any) (sdk.StepInstance, error)

func NewQualityCompareStep

func NewQualityCompareStep(name string, _ map[string]any) (sdk.StepInstance, error)

func NewQualityProfileStep

func NewQualityProfileStep(name string, _ map[string]any) (sdk.StepInstance, error)

func NewQualitySchemaValidateStep

func NewQualitySchemaValidateStep(name string, _ map[string]any) (sdk.StepInstance, error)

func NewSodaCheckStep

func NewSodaCheckStep(name string, _ map[string]any) (sdk.StepInstance, error)

func RegisterChecksModule

func RegisterChecksModule(name string, m *ChecksModule) error

RegisterChecksModule registers a ChecksModule under the given name.

func UnregisterChecksModule

func UnregisterChecksModule(name string)

UnregisterChecksModule removes a registered ChecksModule.

Types

type AnomalyResult

type AnomalyResult struct {
	Column     string  `json:"column"`
	Method     string  `json:"method"` // zscore, iqr
	Anomalies  int     `json:"anomalies"`
	Threshold  float64 `json:"threshold"`
	SampleSize int     `json:"sampleSize"`
}

AnomalyResult holds the outcome of anomaly detection for one column.

func DetectAnomalies

func DetectAnomalies(values []float64, method string, threshold float64) *AnomalyResult

DetectAnomalies detects anomalies in the given slice of float64 values. method: "zscore" (flag |z| > threshold) or "iqr" (flag below Q1-1.5*IQR or above Q3+1.5*IQR). If threshold <= 0, defaults to 3.0 for zscore and 1.5 for iqr.

type CheckResult

type CheckResult struct {
	Check   string `json:"check"`
	Passed  bool   `json:"passed"`
	Message string `json:"message"`
	Value   any    `json:"value,omitempty"`
}

CheckResult is the outcome of one quality check.

func RunCheck

func RunCheck(ctx context.Context, exec DBQuerier, checkType, table string, config map[string]any) (*CheckResult, error)

RunCheck executes a named quality check type against a table.

type ChecksConfig

type ChecksConfig struct {
	Provider     string `json:"provider"     yaml:"provider"`
	ContractsDir string `json:"contractsDir" yaml:"contractsDir"`
	Database     string `json:"database"     yaml:"database"`
}

ChecksConfig holds configuration for the quality.checks module.

type ChecksModule

type ChecksModule struct {
	// contains filtered or unexported fields
}

ChecksModule implements the quality.checks module.

func LookupChecksModule

func LookupChecksModule(name string) (*ChecksModule, error)

LookupChecksModule returns the registered ChecksModule by name.

func NewChecksModuleWithExecutor

func NewChecksModuleWithExecutor(name string, exec DBQuerier) *ChecksModule

NewChecksModuleWithExecutor creates a ChecksModule with an injected DBQuerier (for testing).

func (*ChecksModule) ContractsDir

func (m *ChecksModule) ContractsDir() string

ContractsDir returns the directory used to load data contracts.

func (*ChecksModule) Executor

func (m *ChecksModule) Executor() DBQuerier

Executor returns the DBQuerier for this module (may be nil if no DB is configured).

func (*ChecksModule) Init

func (m *ChecksModule) Init() error

Init validates the module configuration.

func (*ChecksModule) SetExecutor

func (m *ChecksModule) SetExecutor(exec DBQuerier)

SetExecutor replaces the DBQuerier (used for lazy injection or test overrides).

func (*ChecksModule) Start

func (m *ChecksModule) Start(_ context.Context) error

Start registers the module in the global registry.

func (*ChecksModule) Stop

func (m *ChecksModule) Stop(_ context.Context) error

Stop deregisters the module.

type ColumnProfile

type ColumnProfile struct {
	NullCount     int64        `json:"nullCount"`
	NullRate      float64      `json:"nullRate"`
	DistinctCount int64        `json:"distinctCount"`
	Min           any          `json:"min,omitempty"`
	Max           any          `json:"max,omitempty"`
	Mean          *float64     `json:"mean,omitempty"`
	StdDev        *float64     `json:"stdDev,omitempty"`
	Percentiles   *Percentiles `json:"percentiles,omitempty"`
}

ColumnProfile holds statistics for a single column.

type ContractColumn

type ContractColumn struct {
	Name     string `json:"name"               yaml:"name"`
	Type     string `json:"type"               yaml:"type"`
	Nullable bool   `json:"nullable,omitempty" yaml:"nullable,omitempty"`
	Pattern  string `json:"pattern,omitempty"  yaml:"pattern,omitempty"`
}

ContractColumn describes a single expected column.

type ContractResult

type ContractResult struct {
	Dataset        string        `json:"dataset"`
	Passed         bool          `json:"passed"`
	SchemaOK       bool          `json:"schemaOk"`
	QualityOK      bool          `json:"qualityOk"`
	SchemaErrors   []string      `json:"schemaErrors,omitempty"`
	QualityResults []CheckResult `json:"qualityResults"`
}

ContractResult is the outcome of validating a dataset against a contract.

func ValidateContract

func ValidateContract(ctx context.Context, exec DBQuerier, contract DataContract) (*ContractResult, error)

ValidateContract runs schema and quality checks against a live dataset.

type ContractSchema

type ContractSchema struct {
	Columns []ContractColumn `json:"columns" yaml:"columns"`
}

ContractSchema lists the columns expected in a dataset.

type DBQuerier

type DBQuerier interface {
	QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row
	QueryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error)
}

DBQuerier is the subset of *sql.DB used by quality checks and the profiler. *sql.DB satisfies this interface natively; go-sqlmock returns *sql.DB, so tests work without adapters.

type DBTResult

type DBTResult struct {
	Passed  int      `json:"passed"`
	Failed  int      `json:"failed"`
	Errors  int      `json:"errors"`
	Skipped int      `json:"skipped"`
	Results []string `json:"results,omitempty"`
}

DBTResult holds the parsed output of `dbt test`.

type DataContract

type DataContract struct {
	Dataset string         `json:"dataset" yaml:"dataset"`
	Owner   string         `json:"owner"   yaml:"owner"`
	Schema  ContractSchema `json:"schema"  yaml:"schema"`
	Quality []QualityCheck `json:"quality" yaml:"quality"`
}

DataContract describes a dataset's expected schema and quality assertions.

func ParseContract

func ParseContract(path string) (*DataContract, error)

ParseContract reads and parses a YAML data contract file.

type GEResult

type GEResult struct {
	Success    bool `json:"success"`
	Evaluated  int  `json:"evaluated"`
	Successful int  `json:"successful"`
	Failed     int  `json:"failed"`
}

GEResult holds the parsed output of `great_expectations checkpoint run`.

type Percentiles

type Percentiles struct {
	P25 float64 `json:"p25"`
	P50 float64 `json:"p50"`
	P75 float64 `json:"p75"`
	P90 float64 `json:"p90"`
	P99 float64 `json:"p99"`
}

Percentiles holds common percentile values for numeric columns.

type ProfileResult

type ProfileResult struct {
	Table    string                    `json:"table"`
	RowCount int64                     `json:"rowCount"`
	Columns  map[string]*ColumnProfile `json:"columns"`
}

ProfileResult holds statistical profile of a table.

func Profile

func Profile(ctx context.Context, exec DBQuerier, table string, columns []string) (*ProfileResult, error)

Profile computes a statistical profile of the named table and columns. If columns is empty, callers should pass the desired column names; this function does not auto-discover columns from information_schema.

type QualityCheck

type QualityCheck struct {
	Type   string         `json:"type"             yaml:"type"`
	Config map[string]any `json:"config,omitempty" yaml:"config,omitempty"`
}

QualityCheck describes a quality assertion to run against a table.

type QualityChecker

type QualityChecker interface {
	Run(ctx context.Context, exec DBQuerier, table string, config map[string]any) (*CheckResult, error)
}

QualityChecker runs a single quality check against a table.

type SodaResult

type SodaResult struct {
	Passed int      `json:"passed"`
	Failed int      `json:"failed"`
	Errors int      `json:"errors"`
	Lines  []string `json:"lines,omitempty"`
}

SodaResult holds the parsed output of `soda scan`.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL