Documentation
¶
Index ¶
- Constants
- type BetweenRange
- type CheckExpression
- type CheckScope
- type ChecksFileConfig
- type ColumnInfo
- type ColumnMetrics
- type ColumnsNotPresentConfig
- type ConnectionConfig
- type DataQualityCheck
- type DataQualityCheckType
- type DataSource
- type DataSourceType
- type DbqConfig
- type DbqConnector
- type DbqDataProfiler
- type DbqDataSourceAdapter
- type DbqDataValidator
- type DbqDataValidatorImpl
- type ExpectColumnsConfig
- type ExpectColumnsOrderedConfig
- type NumericStats
- type OnFailAction
- type SchemaCheckConfig
- type TableMetrics
- type TaskPool
- type ValidationResult
- type ValidationRule
Constants ¶
View Source
const ( CheckTypeSchemaCheck = "schema_check" CheckTypeRawQuery = "raw_query" )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BetweenRange ¶ added in v0.5.0
type BetweenRange struct { Min interface{} Max interface{} }
type CheckExpression ¶ added in v0.5.0
type CheckExpression struct { FunctionName string FunctionParameters []string Scope CheckScope Operator string ThresholdValue interface{} }
func ParseCheckExpression ¶ added in v0.5.0
func ParseCheckExpression(expression string) (*CheckExpression, error)
type CheckScope ¶ added in v0.5.0
type CheckScope string
const ( ScopeSchema CheckScope = "schema" ScopeTable CheckScope = "table" ScopeColumn CheckScope = "column" )
type ChecksFileConfig ¶ added in v0.1.0
type ChecksFileConfig struct { Version string `yaml:"version"` Rules []ValidationRule `yaml:"rules"` }
func LoadChecksFileConfig ¶ added in v0.1.0
func LoadChecksFileConfig(fileName string) (*ChecksFileConfig, error)
type ColumnInfo ¶
ColumnInfo represents the basic information of a column.
type ColumnMetrics ¶
type ColumnMetrics struct { ColumnName string `json:"col_name"` ColumnComment string `json:"col_comment"` ColumnPosition uint `json:"col_position"` DataType string `json:"data_type"` NullCount uint64 `json:"null_count"` BlankCount *int64 `json:"blank_count,omitempty"` // string only MinValue *float64 `json:"min_value,omitempty"` // numeric only MaxValue *float64 `json:"max_value,omitempty"` // numeric only AvgValue *float64 `json:"avg_value,omitempty"` // numeric only StddevValue *float64 `json:"stddev_value,omitempty"` // numeric only (Population StdDev) MostFrequentValue *string `json:"most_frequent_value,omitempty"` // pointer to handle NULL as most frequent ProfilingDurationMs int64 `json:"profiling_duration_ms"` }
ColumnMetrics represents the metrics of a column.
type ColumnsNotPresentConfig ¶ added in v0.5.0
type ConnectionConfig ¶ added in v0.1.0
type DataQualityCheck ¶ added in v0.1.0
type DataQualityCheck struct { Expression string `yaml:"-"` Description string `yaml:"desc,omitempty"` OnFail OnFailAction `yaml:"on_fail,omitempty"` Query string `yaml:"query,omitempty"` // Schema check fields SchemaCheck *SchemaCheckConfig `yaml:"schema_check,omitempty"` ParsedCheck *CheckExpression `yaml:"-"` }
func (*DataQualityCheck) UnmarshalYAML ¶ added in v0.5.0
func (c *DataQualityCheck) UnmarshalYAML(node *yaml.Node) error
type DataQualityCheckType ¶ added in v0.1.0
type DataQualityCheckType string
DataQualityCheckType represents the type of data quality check.
type DataSource ¶
type DataSource struct { ID string `yaml:"id"` Type DataSourceType `yaml:"type"` Configuration ConnectionConfig `yaml:"configuration"` Datasets []string `yaml:"datasets"` }
type DataSourceType ¶ added in v0.0.8
type DataSourceType string
const ( DataSourceTypeClickhouse DataSourceType = "clickhouse" DataSourceTypePostgresql DataSourceType = "postgresql" DataSourceTypeMysql DataSourceType = "mysql" )
type DbqConfig ¶
type DbqConfig struct { Version string `yaml:"version"` DataSources []DataSource `yaml:"datasources"` }
type DbqConnector ¶
type DbqConnector interface { // Ping checks if the connection to the data source is alive. Ping(ctx context.Context) (string, error) // ImportDatasets imports datasets from the data source, with an optional filter. ImportDatasets(ctx context.Context, filter string) ([]string, error) }
DbqConnector is the interface that wraps the basic connector methods.
type DbqDataProfiler ¶ added in v0.1.0
type DbqDataProfiler interface { // ProfileDataset is an entry point that runs profiling process by tying all specific profiling calls together // todo: consider extracting it into separate entity ProfileDataset(ctx context.Context, dataset string, sample bool, maxConcurrent int, collectErrors bool) (*TableMetrics, error) GetColumns(ctx context.Context, databaseName string, tableName string) ([]*ColumnInfo, error) GetTotalRows(ctx context.Context, dataset string) (uint64, error) GetNullCount(ctx context.Context, dataset string, column *ColumnInfo) (uint64, error) GetBlankCount(ctx context.Context, dataset string, column *ColumnInfo) (int64, error) GetNumericStats(ctx context.Context, dataset string, column *ColumnInfo) (*NumericStats, error) GetMostFrequentValue(ctx context.Context, dataset string, column *ColumnInfo) (*string, error) GetSampleData(ctx context.Context, dataset string) ([]map[string]interface{}, error) IsNumericType(dataType string) bool IsStringType(dataType string) bool }
DbqDataProfiler is the interface that wraps the basic data profiling methods
type DbqDataSourceAdapter ¶ added in v0.5.0
type DbqDataSourceAdapter interface { // InterpretDataQualityCheck generates a SQL query specific for datasource for a data quality check InterpretDataQualityCheck(check *DataQualityCheck, dataset string, defaultWhere string) (string, error) // ExecuteQuery executes the SQL query and returns the query result ExecuteQuery(ctx context.Context, query string) (interface{}, error) }
type DbqDataValidator ¶ added in v0.1.0
type DbqDataValidator interface { // RunCheck runs a data quality check and returns the result. RunCheck(ctx context.Context, adapter DbqDataSourceAdapter, check *DataQualityCheck, dataset string, defaultWhere string) *ValidationResult }
DbqDataValidator is the interface that wraps the basic data validation methods.
func NewDbqDataValidator ¶ added in v0.5.0
func NewDbqDataValidator(logger *slog.Logger) DbqDataValidator
type DbqDataValidatorImpl ¶ added in v0.5.0
type DbqDataValidatorImpl struct {
// contains filtered or unexported fields
}
func (DbqDataValidatorImpl) RunCheck ¶ added in v0.5.0
func (d DbqDataValidatorImpl) RunCheck(ctx context.Context, adapter DbqDataSourceAdapter, check *DataQualityCheck, dataset string, defaultWhere string) *ValidationResult
type ExpectColumnsConfig ¶ added in v0.5.0
type ExpectColumnsConfig struct {
Columns []string `yaml:"columns"`
}
type ExpectColumnsOrderedConfig ¶ added in v0.5.0
type ExpectColumnsOrderedConfig struct {
ColumnsOrder []string `yaml:"columns_order"`
}
type NumericStats ¶ added in v0.2.0
type NumericStats struct { MinValue *float64 MaxValue *float64 AvgValue *float64 StddevValue *float64 }
NumericStats represents the numeric statistics of a column.
type OnFailAction ¶
type OnFailAction string
const ( OnFailActionWarn OnFailAction = "warn" OnFailActionError OnFailAction = "error" )
type SchemaCheckConfig ¶ added in v0.5.0
type SchemaCheckConfig struct { ExpectColumnsOrdered *ExpectColumnsOrderedConfig `yaml:"expect_columns_ordered,omitempty"` ExpectColumns *ExpectColumnsConfig `yaml:"expect_columns,omitempty"` ColumnsNotPresent *ColumnsNotPresentConfig `yaml:"columns_not_present,omitempty"` }
type TableMetrics ¶
type TableMetrics struct { ProfiledAt int64 `json:"profiled_at"` TableName string `json:"table_name"` DatabaseName string `json:"database_name"` TotalRows uint64 `json:"total_rows"` ColumnsMetrics map[string]*ColumnMetrics `json:"columns_metrics"` RowsSample []map[string]interface{} `json:"rows_sample"` ProfilingDurationMs int64 `json:"profiling_duration_ms"` DbqErrors []error `json:"__dbq_errors"` }
TableMetrics represents the metrics of a table.
type TaskPool ¶ added in v0.0.5
type TaskPool struct {
// contains filtered or unexported fields
}
type ValidationResult ¶ added in v0.0.7
type ValidationResult struct { CheckID string `json:"check_id"` Pass bool `json:"pass"` QueryResultValue string `json:"query_result_value,omitempty"` Error string `json:"error,omitempty"` }
ValidationResult represents the result of a data quality check.
type ValidationRule ¶ added in v0.1.0
type ValidationRule struct { Dataset string `yaml:"dataset"` Where string `yaml:"where,omitempty"` Checks []DataQualityCheck `yaml:"checks"` }
Source Files
¶
Click to show internal directories.
Click to hide internal directories.