types

package
v0.8.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 19, 2026 License: MIT Imports: 1 Imported by: 0

Documentation

Overview

Package types provides shared type definitions for pulse.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Aggregation

type Aggregation struct {
	// Type is the aggregation operation to perform.
	Type AggregationType `json:"type"`

	// Field is the name of the data field to aggregate.
	Field string `json:"field"`

	// Label is an optional output label for the aggregation result.
	Label string `json:"label,omitempty"`

	// Params holds type-specific configuration as raw JSON.
	// Used by aggregation types that require additional parameters (e.g., AGG_PERCENTILE).
	Params json.RawMessage `json:"params,omitempty"`
}

Aggregation defines a single aggregation operation to apply to a field.

type AggregationType

type AggregationType string

AggregationType identifies a specific aggregation operation.

const (
	AGG_COUNT          AggregationType = "AGG_COUNT"
	AGG_SUM            AggregationType = "AGG_SUM"
	AGG_AVERAGE        AggregationType = "AGG_AVERAGE"
	AGG_MIN            AggregationType = "AGG_MIN"
	AGG_MAX            AggregationType = "AGG_MAX"
	AGG_STDDEV         AggregationType = "AGG_STDDEV"
	AGG_RANGE          AggregationType = "AGG_RANGE"
	AGG_FREQUENCY      AggregationType = "AGG_FREQUENCY"
	AGG_ZSCORE         AggregationType = "AGG_ZSCORE"
	AGG_MEDIAN         AggregationType = "AGG_MEDIAN"
	AGG_VARIANCE       AggregationType = "AGG_VARIANCE"
	AGG_MODE           AggregationType = "AGG_MODE"
	AGG_SKEWNESS       AggregationType = "AGG_SKEWNESS"
	AGG_KURTOSIS       AggregationType = "AGG_KURTOSIS"
	AGG_DISTINCT_COUNT AggregationType = "AGG_DISTINCT_COUNT"
	AGG_PERCENTILE     AggregationType = "AGG_PERCENTILE"

	// AGG_NULL_COUNT counts records where the field is null. Inverse of
	// AGG_COUNT, which counts records where the field is non-null. Stays
	// streamable (O(1) state per row, no buffering).
	AGG_NULL_COUNT AggregationType = "AGG_NULL_COUNT"
)

func AllAggregationTypes

func AllAggregationTypes() []AggregationType

AllAggregationTypes returns all defined aggregation types.

func (AggregationType) Mergeable added in v0.8.0

func (t AggregationType) Mergeable() bool

Mergeable reports whether this aggregation type's running state can be combined across partitions of the input via an associative+ commutative merge (count/sum/min/max/null_count), a parallel-friendly recurrence (Welford-mean / variance / stddev), or a union of per- value count maps (frequency / mode / distinct_count). The per-shard parallel reducer in service/shard_reduce.go consults this method (mirrored by processing.CanMergeRequest) to decide whether to fan out shard processing across a bounded worker pool.

Mergeable implies Streamable — a buffered-only aggregator cannot expose mergeable state. The default branch returns false so newly- added aggregator types must opt in explicitly. AGG_MEDIAN / AGG_PERCENTILE / AGG_ZSCORE require a sorted view of every value and stay non-mergeable; AGG_SKEWNESS / AGG_KURTOSIS rely on M3/M4 recurrences whose parallel-merge formula is non-trivial and is deferred to a follow-up — they fall through to the serial path.

func (AggregationType) Streamable added in v0.2.0

func (t AggregationType) Streamable() bool

Streamable reports whether this aggregation type supports the single-pass streaming execution path. Streamable aggregators implement processing.OnlineAggregator (UpdateRow + Finalize) and produce a result with O(1) or O(unique) state per row.

Source of truth for predict.Streamable; cross-checked at test time against the processing registry by TestRegistryStreamabilityMatchesTypes.

Default branch returns false so newly-added aggregator types must opt in explicitly.

type Attribute

type Attribute struct {
	// Type is the attribute computation to perform.
	Type AttributeType `json:"type"`

	// Field is the name of the source data field. Optional for the
	// regression attribute family (ATTR_REG_FITTED / RESIDUAL / LEVERAGE)
	// which read Target + Predictors instead; when empty the output Label
	// defaults to "<TYPE>_<Target>".
	Field string `json:"field"`

	// Label is the output name for the derived attribute.
	Label string `json:"label,omitempty"`

	// Expression is a runtime expression for ATTR_FORMULA type.
	Expression string `json:"expression,omitempty"`

	// Params holds type-specific configuration as raw JSON.
	// Each attribute type defines its own params schema.
	Params json.RawMessage `json:"params,omitempty"`

	// Target is the dependent variable for ATTR_REG_FITTED / RESIDUAL /
	// LEVERAGE. Required for those types.
	Target string `json:"target,omitempty"`

	// Predictors lists the independent variables for the regression-
	// attribute family. At least one predictor is required.
	Predictors []string `json:"predictors,omitempty"`

	// Penalty selects the OLS regularization scheme for ATTR_REG_FITTED /
	// ATTR_REG_RESIDUAL. One of "" (unpenalized), "l1" (lasso), "l2"
	// (ridge), or "elasticnet". ATTR_REG_LEVERAGE rejects any non-empty
	// Penalty (penalized leverage and GLM leverage are deferred to a
	// later phase).
	Penalty string `json:"penalty,omitempty"`

	// Alpha is the regularization strength for the regression-attribute
	// family when Penalty is non-empty. Must be > 0 in that case.
	Alpha float64 `json:"alpha,omitempty"`

	// L1Ratio is the elastic-net mixing parameter for the regression-
	// attribute family when Penalty == "elasticnet" (0 < L1Ratio < 1).
	L1Ratio float64 `json:"l1_ratio,omitempty"`
}

Attribute defines a derived attribute computation.

type AttributeType

type AttributeType string

AttributeType identifies a specific derived-attribute computation.

const (
	ATTR_ZSCORE     AttributeType = "ATTR_ZSCORE"
	ATTR_TSCORE     AttributeType = "ATTR_TSCORE"
	ATTR_NORMALIZED AttributeType = "ATTR_NORMALIZED"
	ATTR_FORMULA    AttributeType = "ATTR_FORMULA"
	ATTR_PERCENTILE AttributeType = "ATTR_PERCENTILE"
	ATTR_DATE_PART  AttributeType = "ATTR_DATE_PART"

	// ATTR_REG_FITTED emits the per-row fitted value ŷᵢ = Xᵢ · β + β₀ for
	// each filter-passing record, using a regression fit computed in a
	// streaming prepass over the same record set. Two-pass: prepass folds
	// the OLS sufficient statistics, finalize freezes the coefficient
	// vector, pass 2 emits ŷᵢ. Carries its own RegressionSpec-shaped
	// fields (Target, Predictors, Penalty, Alpha, L1Ratio); refits
	// internally per attribute (Option A). Accepts any OLS penalty
	// (unpenalized, ridge, lasso, elasticnet); GLM and Bayesian fits are
	// deferred.
	ATTR_REG_FITTED AttributeType = "ATTR_REG_FITTED"

	// ATTR_REG_RESIDUAL emits the per-row residual yᵢ − ŷᵢ for each
	// filter-passing record, using the same OLS prepass machinery as
	// ATTR_REG_FITTED. Sums to ≈ 0 when the fit includes an intercept
	// (always true for unpenalized OLS) and the response is observed for
	// every contributing row.
	ATTR_REG_RESIDUAL AttributeType = "ATTR_REG_RESIDUAL"

	// ATTR_REG_LEVERAGE emits the per-row hat-matrix diagonal
	// hᵢᵢ = xᵢ · (XᵀX)⁻¹ · xᵢᵀ for each filter-passing record. Restricted
	// to unpenalized OLS — penalized leverage uses a different formula
	// (involving the regularized resolvent) and GLM leverage requires the
	// IRLS weight matrix; both deferred to Phase 9. Specs with any
	// Penalty set are rejected at factory time with PROCESSING_CONFIG.
	ATTR_REG_LEVERAGE AttributeType = "ATTR_REG_LEVERAGE"
)

func AllAttributeTypes

func AllAttributeTypes() []AttributeType

AllAttributeTypes returns all defined attribute types.

func (AttributeType) Streamable added in v0.2.0

func (t AttributeType) Streamable() bool

Streamable reports whether this attribute type can be computed in a streaming path. Three tiers exist at runtime:

  • Row-local: FORMULA, DATE_PART implement processing.RowLocalAttribute and execute inline with no PrePass.
  • Two-pass: ZSCORE, TSCORE, NORMALIZED implement processing.TwoPassAttribute and need a PrePass over filter-passing records, Finalize, then per-row Row() in pass 2 (iter.Reset()).
  • Buffered-only: PERCENTILE needs a sorted view of every value; no streaming algorithm preserves exact rank semantics.

Streamable() returns true for the first two tiers since both routes avoid materializing the full record set in memory.

type Cohort

type Cohort struct {
	// Filename is the name of the .pulse file.
	Filename string `json:"filename"`

	// DataDir is the directory containing the cohort file.
	DataDir string `json:"data_dir,omitempty"`
}

Cohort identifies a .pulse data file for processing.

type ComposedRequest

type ComposedRequest struct {
	// Requests is the list of individual requests to execute.
	Requests []*Request `json:"requests"`
}

ComposedRequest bundles multiple requests for batch execution.

type FacetDiscrete added in v0.7.0

type FacetDiscrete struct {
	// Values is the per-value count list, sorted descending by count
	// (ties broken ascending by value-string for determinism).
	Values []FacetValueCount `json:"values"`

	// DistinctCount is the total distinct non-null values seen. May
	// exceed len(Values) when DiscreteTopK was set.
	DistinctCount int64 `json:"distinct_count"`

	// TruncatedAt is the count of values dropped by DiscreteTopK
	// truncation. Zero means no cap was applied.
	TruncatedAt int `json:"truncated_at,omitempty"`
}

FacetDiscrete summarises a categorical/boolean field.

type FacetField added in v0.7.0

type FacetField struct {
	// Kind is "discrete" | "numeric".
	Kind string `json:"kind"`

	// TypeName is the field's Pulse type as a string ("u8", "f64",
	// "categorical_u16", ...).
	TypeName string `json:"type_name"`

	// Description is the field's schema description.
	Description string `json:"description,omitempty"`

	// NullCount is the number of filtered records where this field was
	// null.
	NullCount int64 `json:"null_count"`

	// Discrete is non-nil when Kind == "discrete".
	Discrete *FacetDiscrete `json:"discrete,omitempty"`

	// Numeric is non-nil when Kind == "numeric".
	Numeric *FacetNumeric `json:"numeric,omitempty"`
}

FacetField wraps either a discrete or numeric summary.

type FacetHistogram added in v0.7.0

type FacetHistogram struct {
	Min  float64 `json:"min"`
	Max  float64 `json:"max"`
	Bins []int64 `json:"bins"`
}

FacetHistogram is a fixed-width binning of a numeric field. Bins is a slice of length HistogramBins; bin i covers [Min + i*step, Min + (i+1)*step) where step = (Max-Min)/HistogramBins. Values that fall on Max land in the last bin.

type FacetNumeric added in v0.7.0

type FacetNumeric struct {
	Min         float64            `json:"min"`
	Max         float64            `json:"max"`
	Mean        float64            `json:"mean"`
	StdDev      float64            `json:"stddev"`
	Count       int64              `json:"count"`
	Sum         float64            `json:"sum"`
	Percentiles map[string]float64 `json:"percentiles,omitempty"`
	Histogram   *FacetHistogram    `json:"histogram,omitempty"`
}

FacetNumeric summarises a numeric field. Min, Max, Mean, StdDev, Sum, and Count are always populated; Percentiles and Histogram are populated when the request asked for them.

type FacetRequest added in v0.7.0

type FacetRequest struct {
	// Cohort selects the .pulse file. Same shape as Request.Cohort.
	Cohort *Cohort `json:"cohort,omitempty"`

	// Fields is the explicit list of fields to summarise. Empty is an
	// error.
	Fields []string `json:"fields"`

	// Filterers optionally narrow the record set before accumulation.
	// Same semantics as Request.Filterers.
	Filterers []*Filterer `json:"filterers,omitempty"`

	// AdditiveFields lists fields for which the engine computes
	// "contribution" counts — accumulated independently of the matching
	// base filter, with the named field's own predicates stripped so all
	// values are counted equally. Empty = no additive analysis.
	AdditiveFields []string `json:"additive_fields,omitempty"`

	// DiscreteTopK caps the number of distinct values returned per
	// discrete field. Zero = no cap (return all). Capped fields carry a
	// TruncatedAt count in the response so callers know what was dropped.
	DiscreteTopK int `json:"discrete_top_k,omitempty"`

	// NumericPercentiles lists percentile points (strictly in (0, 1)) to
	// compute for each numeric field. Nil/empty = none. Adding any
	// percentile forces the buffered execution path.
	NumericPercentiles []float64 `json:"numeric_percentiles,omitempty"`

	// IncludeHistogram, when true, computes a fixed-width histogram for
	// each numeric field with HistogramBins buckets. Defaults to 20 bins
	// when bins unset and IncludeHistogram=true. Streaming-compatible
	// only when HistogramRange is supplied.
	IncludeHistogram bool `json:"include_histogram,omitempty"`

	// HistogramBins is the bucket count when IncludeHistogram is true.
	// Defaults to 20 when zero. Capped at 256.
	HistogramBins int `json:"histogram_bins,omitempty"`

	// HistogramRange supplies the [min, max] bounds for fixed-width
	// histogram binning. Required when IncludeHistogram is true. The two
	// values must satisfy min < max.
	HistogramRange [2]float64 `json:"histogram_range,omitempty"`
}

FacetRequest specifies a multi-field rich facet computation. It is the input shape consumed by pulse.FacetSchema. The simpler single-field pulse.Facet entry point accepts only (path, field) and returns distinct values; FacetSchema returns per-value counts, null tallies, numeric statistics, optional percentiles, optional histograms, and optional "additive" contribution counts.

type FacetResult added in v0.7.0

type FacetResult struct {
	// Fields maps each requested field name to its summary.
	Fields map[string]*FacetField `json:"fields"`

	// Additive maps each AdditiveFields entry to its independent
	// contribution counts. Same FacetField shape; values reflect counts
	// with the field's own filter predicates stripped.
	Additive map[string]*FacetField `json:"additive,omitempty"`

	// TotalRecords is the cohort's header record count (unfiltered).
	TotalRecords int64 `json:"total_records"`

	// FilteredRecords is the count of records passing Filterers.
	FilteredRecords int64 `json:"filtered_records"`

	// Warnings carry per-field diagnostics (top-K truncation, etc.).
	Warnings []string `json:"warnings,omitempty"`
}

FacetResult is the response shape returned by pulse.FacetSchema.

type FacetValueCount added in v0.7.0

type FacetValueCount struct {
	Value string `json:"value"`
	Count int64  `json:"count"`
}

FacetValueCount is one value/count tuple inside FacetDiscrete.Values.

type Feature added in v0.2.0

type Feature struct {
	// Type is the feature operator to perform.
	Type FeatureType `json:"type"`

	// Field is the source field name. Required by every operator except
	// FEAT_TRAIN_TEST_SPLIT (which reads no field by default — params may
	// optionally name a stratify field).
	Field string `json:"field,omitempty"`

	// Label is an output column name (single-output operators) or output
	// column prefix (multi-output operators). When empty, the operator
	// derives a default — typically "<TYPE>_<field>".
	Label string `json:"label,omitempty"`

	// Params holds operator-specific parameters as raw JSON. See the
	// feature-engineering skill for the per-operator schema.
	Params json.RawMessage `json:"params,omitempty"`
}

Feature defines a feature engineering operation. Features run pre-filter (before any FILTER_* predicate) and may produce one or more derived columns. Global-pass features (TARGET_ENCODE, FREQUENCY_ENCODE) require a stats sweep before per-row write; per-row features compute one row at a time.

type FeatureType added in v0.2.0

type FeatureType string

FeatureType identifies a specific ML feature engineering operator. Features run pre-filter and may emit one or more output columns.

const (
	FEAT_LOG              FeatureType = "FEAT_LOG"
	FEAT_SQRT             FeatureType = "FEAT_SQRT"
	FEAT_BUCKETIZE        FeatureType = "FEAT_BUCKETIZE"
	FEAT_ONE_HOT          FeatureType = "FEAT_ONE_HOT"
	FEAT_DATE_FEATURES    FeatureType = "FEAT_DATE_FEATURES"
	FEAT_FREQUENCY_ENCODE FeatureType = "FEAT_FREQUENCY_ENCODE"
	FEAT_TARGET_ENCODE    FeatureType = "FEAT_TARGET_ENCODE"
	FEAT_TRAIN_TEST_SPLIT FeatureType = "FEAT_TRAIN_TEST_SPLIT"
	FEAT_POLY             FeatureType = "FEAT_POLY"
)

func AllFeatureTypes added in v0.2.0

func AllFeatureTypes() []FeatureType

AllFeatureTypes returns every defined feature type in alphabetical order.

func (FeatureType) Streamable added in v0.2.0

func (t FeatureType) Streamable() bool

Streamable reports whether this feature type can run in the pre-pass+finalize+emit streaming pipeline (feature.StreamingComputer).

Source of truth is feature.IsStreamable(req.Features, schema) at runtime; this method mirrors the per-type capability used by predict.

type FileRequest

type FileRequest struct {
	// Filename is the name of the file.
	Filename string `json:"filename"`

	// DataDir is the directory containing the file.
	DataDir string `json:"data_dir,omitempty"`
}

FileRequest identifies a file for operations like inspect.

type FileResponse

type FileResponse struct {
	// Filename is the name of the file.
	Filename string `json:"filename"`

	// RecordCount is the number of records in the file.
	RecordCount int64 `json:"record_count"`

	// Fields is the list of field names in the file.
	Fields []string `json:"fields,omitempty"`
}

FileResponse describes a file's metadata.

type Filterer

type Filterer struct {
	// Type is the filter operation to perform.
	Type FiltererType `json:"type"`

	// Field is the name of the data field to filter on.
	// Not required for FILTER_EXPRESSION.
	Field string `json:"field,omitempty"`

	// Values is a list of values for include/exclude/range filters.
	Values []string `json:"values,omitempty"`

	// Expression is a runtime expression for FILTER_EXPRESSION type.
	Expression string `json:"expression,omitempty"`
}

Filterer defines a filter to apply to records before processing.

type FiltererType

type FiltererType string

FiltererType identifies a specific filter operation.

const (
	FILTER_INCLUDE    FiltererType = "FILTER_INCLUDE"
	FILTER_EXCLUDE    FiltererType = "FILTER_EXCLUDE"
	FILTER_RANGE      FiltererType = "FILTER_RANGE"
	FILTER_EXPRESSION FiltererType = "FILTER_EXPRESSION"

	// FILTER_NULL keeps or drops records based on null state of a field.
	// Mode is "is_null" (keep null-valued records) or "is_not_null" (keep
	// non-null records). Row-local; streamable.
	FILTER_NULL FiltererType = "FILTER_NULL"
)

func AllFiltererTypes

func AllFiltererTypes() []FiltererType

AllFiltererTypes returns all defined filterer types.

func (FiltererType) Streamable added in v0.2.0

func (t FiltererType) Streamable() bool

Streamable reports whether this filterer type evaluates per-row without looking at other rows. All registered filterers are row-local today.

type FrameSpec added in v0.2.0

type FrameSpec struct {
	Mode      string `json:"mode"`
	Preceding *int   `json:"preceding,omitempty"`
	Following *int   `json:"following,omitempty"`
}

FrameSpec specifies the window frame bounds. Mode is "rows" — only frame mode supported in v1. Preceding nil means UNBOUNDED PRECEDING; Following nil means UNBOUNDED FOLLOWING. Following==0 with Preceding==0 selects the current row only.

type Group

type Group struct {
	// Type is the grouping operation to perform.
	Type GroupType `json:"type"`

	// Field is the name of the data field to group by.
	Field string `json:"field"`

	// Interval is used by GROUP_ROUNDED and GROUP_RANGE to define the bucket width.
	Interval float64 `json:"interval,omitempty"`

	// Params holds type-specific configuration as raw JSON.
	// Used by group types that require additional parameters (e.g., GROUP_DATE).
	Params json.RawMessage `json:"params,omitempty"`
}

Group defines a grouping operation to partition results.

type GroupType

type GroupType string

GroupType identifies a specific grouping operation.

const (
	GROUP_CATEGORY GroupType = "GROUP_CATEGORY"
	GROUP_ROUNDED  GroupType = "GROUP_ROUNDED"
	GROUP_RANGE    GroupType = "GROUP_RANGE"
	GROUP_QUANTILE GroupType = "GROUP_QUANTILE"
	GROUP_DATE     GroupType = "GROUP_DATE"
)

func AllGroupTypes

func AllGroupTypes() []GroupType

AllGroupTypes returns all defined group types.

func (GroupType) Mergeable added in v0.8.0

func (t GroupType) Mergeable() bool

Mergeable reports whether this group type's per-key state can be combined across partitions of the input. CATEGORY and RANGE (online) derive their key purely from the row's value so per-shard buckets merge by key-union; QUANTILE/DATE depend on the full set or a finalize-time bucketization that the parallel reducer cannot replicate piecewise. ROUNDED could be mergeable in principle but is deferred — the parallel orchestrator only opts in on combinations we exercise in goldens today.

func (GroupType) Streamable added in v0.2.0

func (t GroupType) Streamable() bool

Streamable reports whether this group type can emit groups before the input is exhausted. CATEGORY/ROUNDED/RANGE bucket per row; QUANTILE/DATE require finalize-time work over the full set.

The streaming Process path does not currently emit grouped output even for streamable groupers — Request.Streamable returns false whenever groups are present. The method is wired through so a future grouped streaming iterator can flip the gate without re-deriving the rule.

type OrderKey added in v0.2.0

type OrderKey struct {
	Field string `json:"field"`
	Desc  bool   `json:"desc,omitempty"`
}

OrderKey specifies an ordering key for a window's ORDER BY clause.

type Output

type Output struct {
	// Format is the output format (e.g., "json", "csv").
	Format string `json:"format"`

	// Filename is an optional output file path.
	Filename string `json:"filename,omitempty"`

	// Pretty enables indented/formatted output.
	Pretty bool `json:"pretty,omitempty"`

	// IncludeNil controls whether nil/null values appear in output.
	IncludeNil bool `json:"include_nil,omitempty"`
}

Output configures how processing results are formatted.

type RegressionResult added in v0.6.0

type RegressionResult struct {
	// Name echoes RegressionSpec.Name or the synthesized default.
	Name string `json:"name,omitempty"`

	// Type is the regression operator that produced this result.
	Type RegressionType `json:"type"`

	// Family echoes RegressionSpec.Family (GLM only).
	Family string `json:"family,omitempty"`

	// Link echoes RegressionSpec.Link (GLM only).
	Link string `json:"link,omitempty"`

	// Penalty echoes RegressionSpec.Penalty (REG_OLS only).
	Penalty string `json:"penalty,omitempty"`

	// Alpha echoes RegressionSpec.Alpha (regularized OLS only).
	Alpha float64 `json:"alpha,omitempty"`

	// L1Ratio echoes RegressionSpec.L1Ratio (elastic-net OLS only).
	L1Ratio float64 `json:"l1_ratio,omitempty"`

	// Prior echoes RegressionSpec.Prior (Bayes only).
	Prior string `json:"prior,omitempty"`

	// Resample echoes RegressionSpec.Resample when set.
	Resample string `json:"resample,omitempty"`

	// Selection echoes RegressionSpec.Selection when set.
	Selection string `json:"selection,omitempty"`

	// Criterion echoes RegressionSpec.Criterion when Selection is set.
	Criterion string `json:"criterion,omitempty"`

	// Coefficients maps predictor name (including the synthesized
	// "(intercept)" key) to its fitted scalar value.
	Coefficients map[string]float64 `json:"coefficients,omitempty"`

	// StdErrors maps predictor name to its asymptotic standard error.
	StdErrors map[string]float64 `json:"std_errors,omitempty"`

	// PValues maps predictor name to its two-sided p-value under the
	// null β = 0.
	PValues map[string]float64 `json:"p_values,omitempty"`

	// R2 is the coefficient of determination (REG_OLS / REG_BAYES_LINEAR).
	R2 float64 `json:"r2,omitempty"`

	// AdjR2 is the adjusted R² (REG_OLS / REG_BAYES_LINEAR).
	AdjR2 float64 `json:"adj_r2,omitempty"`

	// Deviance is the model deviance (REG_GLM).
	Deviance float64 `json:"deviance,omitempty"`

	// NullDeviance is the deviance of the intercept-only model
	// (REG_GLM).
	NullDeviance float64 `json:"null_deviance,omitempty"`

	// PseudoR2 is a McFadden-style 1 − Deviance/NullDeviance (REG_GLM).
	PseudoR2 float64 `json:"pseudo_r2,omitempty"`

	// NObs is the number of observations used to fit the model.
	NObs int `json:"n_obs,omitempty"`

	// ResidualStdErr is the residual standard error
	// (REG_OLS / REG_BAYES_LINEAR).
	ResidualStdErr float64 `json:"residual_std_err,omitempty"`

	// ConvergedIters is the IRLS / coordinate-descent iteration count
	// that produced the final estimate (iterative fits only). Zero
	// when the fit is closed-form (REG_OLS without penalty,
	// REG_BAYES_LINEAR).
	ConvergedIters int `json:"converged_iters,omitempty"`

	// SelectedFeatures lists the predictors retained by Selection (in
	// fit order); empty when Selection is unset.
	SelectedFeatures []string `json:"selected_features,omitempty"`

	// CredibleIntervals maps predictor name to its
	// [lower, upper] posterior credible interval (REG_BAYES_LINEAR).
	CredibleIntervals map[string][2]float64 `json:"credible_intervals,omitempty"`
}

RegressionResult is the per-spec outcome embedded in Response.Regressions. Fields irrelevant to a given operator carry their zero value; engines never partially populate a result on failure.

Locked at Phase 0: every field of every operator family is declared now. Later phases populate the engine-specific branches but never change the struct shape.

type RegressionSpec added in v0.6.0

type RegressionSpec struct {
	// Type names the regression operator (REG_OLS, REG_GLM,
	// REG_BAYES_LINEAR).
	Type RegressionType `json:"type"`

	// Name is an optional alias for the result entry; defaults to a
	// synthesized name derived from Type and Target.
	Name string `json:"name,omitempty"`

	// Target is the response variable's field name. Required.
	Target string `json:"target"`

	// Predictors lists the predictor (independent variable) field
	// names. At least one is required. A single predictor produces
	// simple / linear regression; multiple predictors produce multiple
	// linear regression.
	Predictors []string `json:"predictors,omitempty"`

	// Penalty selects the regularization scheme for REG_OLS. One of
	// "" (no penalty), "l1" (lasso), "l2" (ridge), or "elasticnet".
	// Ignored by REG_GLM and REG_BAYES_LINEAR.
	Penalty string `json:"penalty,omitempty"`

	// Alpha is the regularization strength for l1 / l2 / elasticnet
	// penalties. Must be > 0 when Penalty is non-empty.
	Alpha float64 `json:"alpha,omitempty"`

	// L1Ratio is the elastic-net mixing parameter in [0, 1]; 0 is pure
	// l2, 1 is pure l1. Only read when Penalty == "elasticnet".
	L1Ratio float64 `json:"l1_ratio,omitempty"`

	// Family is the GLM error family. One of "binomial" (logistic),
	// "poisson", or "gamma". Required for REG_GLM.
	Family string `json:"family,omitempty"`

	// Link is the GLM link function. Defaults are family-specific
	// ("logit" for binomial, "log" for poisson and gamma).
	Link string `json:"link,omitempty"`

	// MaxIters caps the IRLS / coordinate-descent iteration count for
	// iterative fits. Defaults to a registry-specific value when zero.
	MaxIters int `json:"max_iters,omitempty"`

	// Tol is the relative convergence tolerance for iterative fits.
	// Defaults to a registry-specific value when zero.
	Tol float64 `json:"tol,omitempty"`

	// Prior names the prior family for REG_BAYES_LINEAR. Only "nig"
	// (Normal-Inverse-Gamma, conjugate) is supported in v1.
	Prior string `json:"prior,omitempty"`

	// PriorMu is the prior mean vector for the coefficients
	// (REG_BAYES_LINEAR). Length must match the predictor count; zero
	// vector when nil.
	PriorMu []float64 `json:"prior_mu,omitempty"`

	// PriorPrecision is the prior precision scalar (REG_BAYES_LINEAR).
	PriorPrecision float64 `json:"prior_precision,omitempty"`

	// PriorShape and PriorRate parameterize the inverse-gamma prior on
	// the residual variance (REG_BAYES_LINEAR).
	PriorShape float64 `json:"prior_shape,omitempty"`
	PriorRate  float64 `json:"prior_rate,omitempty"`

	// CredibleLevel is the credible-interval mass for the posterior
	// summaries (REG_BAYES_LINEAR). Defaults to 0.95 when zero.
	CredibleLevel float64 `json:"credible_level,omitempty"`

	// Resample selects an orthogonal resampling layer applied to any
	// regression family. One of "" (none), "jackknife" (leave-one-out),
	// or "bootstrap". Non-empty forces the buffered path.
	Resample string `json:"resample,omitempty"`

	// BootstrapIters is the bootstrap replicate count when
	// Resample == "bootstrap". Ignored otherwise.
	BootstrapIters int `json:"bootstrap_iters,omitempty"`

	// RNGSeed seeds the bootstrap RNG for reproducibility. Defaults to
	// 0 (a deterministic seed derived from the request).
	RNGSeed int64 `json:"rng_seed,omitempty"`

	// Selection requests an automated subset-selection wrapper around
	// any regression family. One of "" (none), "forward", "backward",
	// or "stepwise". Non-empty forces the buffered path.
	Selection string `json:"selection,omitempty"`

	// Criterion is the information criterion driving Selection. One of
	// "aic" or "bic". Required when Selection is non-empty.
	Criterion string `json:"criterion,omitempty"`
}

RegressionSpec describes a single regression operator entry in a Request. Every field of every variant is declared up front; engines populate or ignore branches based on Type and modifiers.

The wire shape is shared across the three operator families so a request parser only needs to dispatch on Type. Unused branches stay at their zero values.

func (RegressionSpec) Streamable added in v0.6.0

func (s RegressionSpec) Streamable() bool

Streamable reports whether this concrete RegressionSpec can run via the streaming execution path today. The check folds the type-level Streamable() value with one layer:

  • modifier downgrade: any non-empty Resample or Selection forces the buffered path.

REG_OLS streams both unpenalized (Phase 1) and penalized (l1 / l2 / elasticnet, Phase 2) — the streaming Gram is identical; the iterative solver runs at finalize over a p×p matrix, not over rows. REG_BAYES_LINEAR (Phase 4) streams the same Welford sufficient stats and applies the conjugate Normal-Inverse-Gamma posterior at finalize.

type RegressionType added in v0.6.0

type RegressionType string

RegressionType identifies a specific regression-modeling operator.

Pulse exposes three top-level regression operators that cover the thirteen textbook regression variants through a small, composable core:

  • REG_OLS — ordinary least squares; optional l1/l2/elasticnet penalty, simple/multiple predictors, polynomial features supplied upstream by FEAT_POLY for polynomial regression.
  • REG_GLM — generalized linear model; Family ∈ {binomial, poisson, gamma} with matching Link. Covers logistic and Poisson regression.
  • REG_BAYES_LINEAR — Bayesian linear regression via conjugate Normal-Inverse-Gamma prior.

Two spec-level modifiers compose with any of the three: `Resample` (jackknife / bootstrap) and `Selection` (forward / backward / stepwise) plus the matching information criterion (AIC / BIC).

Per-operator semantics, modifier behavior, and the 13-name → spec mapping live in skills/regression-modeling.md.

const (
	// REG_OLS fits ordinary least squares with optional regularization.
	// Streams over sufficient statistics (n, Σx, Σy, XᵀX, Xᵀy, Σy²) when
	// Penalty is empty; the same Gram matrix plus a regularized
	// finalize-solve handles the l1/l2/elasticnet variants.
	REG_OLS RegressionType = "REG_OLS"

	// REG_GLM fits a generalized linear model via IRLS. Always buffered:
	// Newton-Raphson needs multiple passes over the data.
	REG_GLM RegressionType = "REG_GLM"

	// REG_BAYES_LINEAR fits a Bayesian linear model under a conjugate
	// Normal-Inverse-Gamma prior. Streams the same sufficient statistics
	// as REG_OLS, then applies the closed-form posterior update.
	REG_BAYES_LINEAR RegressionType = "REG_BAYES_LINEAR"
)

func AllRegressionTypes added in v0.6.0

func AllRegressionTypes() []RegressionType

AllRegressionTypes returns every defined regression operator in alphabetical order.

func (RegressionType) Streamable added in v0.6.0

func (t RegressionType) Streamable() bool

Streamable reports whether a regression type can run via the streaming execution path in isolation, ignoring spec-level modifiers like Resample or Selection. REG_OLS and REG_BAYES_LINEAR stream over sufficient statistics; REG_GLM always needs IRLS and therefore the buffered path.

Use RegressionSpec.Streamable() instead of this type-level method when checking a concrete request: the spec-level helper downgrades streamability whenever Resample or Selection is set, mirroring the runtime gate.

Default branch returns false so newly-added regression types must opt in.

type Request

type Request struct {
	// Cohort identifies the .pulse file to process.
	Cohort *Cohort `json:"cohort,omitempty"`

	// Filterers is the list of filters to apply before processing.
	Filterers []*Filterer `json:"filterers,omitempty"`

	// Aggregations is the list of aggregation operations.
	Aggregations []*Aggregation `json:"aggregations,omitempty"`

	// Attributes is the list of derived attribute computations.
	Attributes []*Attribute `json:"attributes,omitempty"`

	// Groups is the list of grouping operations.
	Groups []*Group `json:"groups,omitempty"`

	// Outputs configures result formatting.
	Outputs []*Output `json:"outputs,omitempty"`

	// Windows is the list of window operations evaluated after aggregation.
	Windows []*Window `json:"windows,omitempty"`

	// Features is the list of pre-filter feature engineering operators.
	// Each operator may add one or more derived columns to the working
	// schema before filters and downstream stages run.
	Features []*Feature `json:"features,omitempty"`

	// Sort orders response rows by the listed keys. Applied last in the
	// pipeline (after windows). Each key field must reference a schema
	// field, an aggregation/attribute/group/window output label, or any
	// column produced by upstream stages. Stable sort; nulls last
	// regardless of direction.
	Sort []OrderKey `json:"sort,omitempty"`

	// Tests is the list of tier-1 statistical tests evaluated alongside
	// aggregators against the raw row stream. Online-moments tests
	// (TEST_T, TEST_WELCH, TEST_CHISQ, TEST_ANOVA_F) execute in the
	// streaming Process path; sort-required tests (TEST_KS) force the
	// buffered path. Results land in Response.Tests in the same order.
	Tests []*Test `json:"tests,omitempty"`

	// PostTests is the list of tier-2 statistical tests evaluated after
	// the window stage on the materialized result row set. Typical uses:
	// ANOVA across grouper buckets, Tukey HSD post-hoc on per-group
	// means, trend tests over windowed series. Always buffered (the
	// result row set must be materialized before tier-2 runs). Results
	// land in Response.PostTests in the same order.
	PostTests []*Test `json:"post_tests,omitempty"`

	// Regressions is the list of regression-modeling operators (REG_OLS,
	// REG_GLM, REG_BAYES_LINEAR) evaluated against the filtered record
	// set. Each spec produces one RegressionResult in Response.Regressions
	// in matching order. Streamability follows RegressionSpec.Streamable
	// — closed-form OLS/Bayes stream over sufficient statistics; GLM,
	// Resample, and Selection variants force the buffered path.
	Regressions []*RegressionSpec `json:"regressions,omitempty"`
}

Request is the primary processing request type. It specifies a cohort, filters, aggregations, attributes, groups, and output config.

type Response

type Response struct {
	// Data holds the result rows as key-value maps.
	Data []map[string]any `json:"data,omitempty"`

	// Metadata holds information about the processing run.
	Metadata *ResponseMetadata `json:"metadata,omitempty"`

	// Tests holds tier-1 statistical test results, one per entry in
	// Request.Tests and in the same order.
	Tests []*TestResult `json:"tests,omitempty"`

	// PostTests holds tier-2 statistical test results, one per entry in
	// Request.PostTests and in the same order.
	PostTests []*TestResult `json:"post_tests,omitempty"`

	// Regressions holds the per-spec regression fits, one entry per
	// Request.Regressions in the same order. Engines never partially
	// populate a result on failure; a failed fit surfaces as a
	// PROCESSING_REGRESSION_* error on the envelope instead.
	Regressions []*RegressionResult `json:"regressions,omitempty"`
}

Response is the processing result type.

type ResponseMetadata

type ResponseMetadata struct {
	// TotalRows is the total number of records in the cohort.
	TotalRows int64 `json:"total_rows"`

	// FilteredRows is the number of records after filtering.
	FilteredRows int64 `json:"filtered_rows"`

	// CohortFile is the filename of the processed cohort.
	CohortFile string `json:"cohort_file,omitempty"`
}

ResponseMetadata holds metadata about a processing result.

type Test added in v0.3.0

type Test struct {
	// Type is the statistical test to perform.
	Type TestType `json:"type"`

	// Field is the primary numeric field under test. Required for TEST_T,
	// TEST_WELCH, TEST_ANOVA_F, TEST_KS, and TEST_TREND. Omitted for
	// TEST_CHISQ (uses Rows × Cols instead).
	Field string `json:"field,omitempty"`

	// Field2 is the secondary numeric field reference used by paired or
	// bivariate tests (TEST_PEARSON_R, TEST_PAIRED_T). For TEST_PAIRED_T
	// the difference d = Field − Field2 is tested. For TEST_PEARSON_R
	// the correlation between Field and Field2 is reported. Empty for
	// every other test.
	Field2 string `json:"field2,omitempty"`

	// SplitBy is a categorical field whose distinct values partition Field
	// into groups. Required for two-sample TEST_T / TEST_WELCH and for
	// TEST_ANOVA_F. Two-sample tests expect exactly 2 groups; ANOVA expects
	// ≥ 2.
	SplitBy string `json:"split_by,omitempty"`

	// Rows is the row-axis categorical field for TEST_CHISQ contingency.
	Rows string `json:"rows,omitempty"`

	// Cols is the column-axis categorical field for TEST_CHISQ contingency.
	Cols string `json:"cols,omitempty"`

	// SubjectField identifies the within-subject grouping for
	// repeated-measures designs (TEST_ANOVA_RM). Each distinct value
	// represents one subject contributing one observation per condition
	// (SplitBy). Empty for every other test.
	SubjectField string `json:"subject_field,omitempty"`

	// Alpha is the significance level. Defaults to 0.05 when zero.
	// Must lie in the open interval (0, 1).
	Alpha float64 `json:"alpha,omitempty"`

	// Label is the output alias for the test result. When empty, defaults
	// to "<TYPE>" or "<TYPE>_<field>" depending on the operator.
	Label string `json:"label,omitempty"`

	// OrderBy supplies ordering keys for tests that need a sorted series
	// (TEST_TREND, TEST_KS when run as a series test). Tier-2 trend tests
	// typically reference an output column produced by an upstream window
	// or grouper.
	OrderBy []OrderKey `json:"order_by,omitempty"`

	// Params holds operator-specific configuration as raw JSON.
	//   TEST_T (one-sample): {"mu": <hypothesized mean>}
	//   TEST_T / TEST_WELCH (two-sample): {"variant": "welch"} (default)
	//   TEST_KS: {"alternative": "two-sided"|"less"|"greater"}
	//   TEST_TUKEY_HSD: {"ms_within": <f64>, "df_within": <f64>}
	//   TEST_TREND: {"variant": "mann_kendall"} (default)
	Params json.RawMessage `json:"params,omitempty"`
}

Test defines a single statistical test to evaluate during a Process run. Tests appear in two request slots: Request.Tests (tier 1, row-level) and Request.PostTests (tier 2, result-level). The shape is shared because most fields are common across tiers; per-test validation rules live in the registry and predict layer.

type TestResult added in v0.3.0

type TestResult struct {
	// Label echoes the request Label or the operator's synthesized default.
	Label string `json:"label,omitempty"`

	// Type is the test that produced this result.
	Type TestType `json:"type"`

	// Variant names the specific algorithm when a test supports more than
	// one (e.g. "welch_two_sample", "mann_kendall", "independence").
	Variant string `json:"variant,omitempty"`

	// Statistic is the test statistic (t, F, χ², KS D, Mann-Kendall S, etc.).
	Statistic float64 `json:"statistic"`

	// DF is the degrees of freedom. Tests with multiple DF values (e.g.
	// ANOVA's between/within) report the numerator DF here and surface the
	// remainder in Details.
	DF float64 `json:"df,omitempty"`

	// PValue is the two-sided p-value under the null hypothesis.
	PValue float64 `json:"p_value"`

	// Alpha is the significance threshold applied to RejectNull.
	Alpha float64 `json:"alpha"`

	// RejectNull is true when PValue < Alpha.
	RejectNull bool `json:"reject_null"`

	// Details holds operator-specific payload (per-group n/mean/variance,
	// contingency table, pairwise comparisons, confidence intervals,
	// effect-size measures). Marshals naturally as a nested JSON object.
	Details map[string]any `json:"details,omitempty"`

	// Warnings carries per-test diagnostics (low N, near-zero variance,
	// expected-count thresholds, etc.) so the envelope-level Warnings list
	// is not the only signal.
	Warnings []string `json:"warnings,omitempty"`
}

TestResult is the per-test outcome embedded in Response.Tests and Response.PostTests. Common statistics live as top-level fields; operator- specific payload (contingency tables, pairwise comparisons, effect sizes, per-group moments) lives in Details so the result shape stays predictable across the catalog.

type TestType added in v0.3.0

type TestType string

TestType identifies a specific statistical test operation.

Pulse splits statistical testing into two tiers, both expressed via this enum and both executed inside a single Process pipeline:

  • Tier 1 (row tests): listed in Request.Tests, evaluated against the raw row stream alongside aggregators. Online-moments tests reuse the running mean/variance/n that aggregators already compute, so they add near-zero cost when their inputs overlap with active aggregations.
  • Tier 2 (post tests): listed in Request.PostTests, evaluated after the window stage on the materialized result row set. Useful for ANOVA across grouper buckets, post-hoc pairwise comparisons, and trend tests over windowed series.

Per-type semantics, required fields, and streamability are documented in skills/statistical-testing.md.

const (
	// TEST_T is a one-sample or two-sample Welch t-test on a numeric field.
	// When SplitBy is set, the test partitions Field by SplitBy and compares
	// the two resulting groups; otherwise it tests Field's mean against the
	// hypothesized value supplied in Params.
	TEST_T TestType = "TEST_T"

	// TEST_WELCH is an explicit two-sample Welch t-test alias used when the
	// caller wants to be unambiguous about the variant. Behaves identically
	// to TEST_T with SplitBy set; provided so requests document intent.
	TEST_WELCH TestType = "TEST_WELCH"

	// TEST_CHISQ is a chi-square independence test on a 2D contingency
	// table built from two categorical fields (Rows × Cols).
	TEST_CHISQ TestType = "TEST_CHISQ"

	// TEST_ANOVA_F is a one-way analysis of variance F-test comparing the
	// means of a numeric Field across k groups defined by a categorical
	// SplitBy field. k must be ≥ 2.
	TEST_ANOVA_F TestType = "TEST_ANOVA_F"

	// TEST_KS is a Kolmogorov-Smirnov two-sample distribution test. Not
	// streamable — requires sorted ECDFs.
	TEST_KS TestType = "TEST_KS"

	// TEST_TUKEY_HSD is a post-hoc pairwise comparison of group means using
	// Tukey's Honestly Significant Difference. Tier-2 only: consumes the
	// per-group means and counts produced by upstream aggregators.
	TEST_TUKEY_HSD TestType = "TEST_TUKEY_HSD"

	// TEST_TREND is a Mann-Kendall trend test over an ordered numeric
	// series. Tier-2 typical: runs over windowed result rows; tier-1
	// possible when the raw field is naturally ordered by an OrderBy key.
	TEST_TREND TestType = "TEST_TREND"

	// TEST_PEARSON_R is the parametric correlation test between two
	// numeric fields. Streams via an extended Welford recurrence that
	// also tracks the running cross-product. p-value via the t-statistic
	// r·√((n−2)/(1−r²)) with df = n − 2.
	TEST_PEARSON_R TestType = "TEST_PEARSON_R"

	// TEST_PAIRED_T is the paired-sample t-test on the per-row difference
	// d = Field − Field2. Streams via the same Welford machinery as the
	// one-sample TEST_T, driven by two-field reads.
	TEST_PAIRED_T TestType = "TEST_PAIRED_T"

	// TEST_PROP_Z is the two-proportion z-test on a categorical SplitBy
	// field where each row counts as a success or failure based on
	// Params.success (the dictionary value treated as a "success" on
	// Field). Streams via per-group success / total counts.
	TEST_PROP_Z TestType = "TEST_PROP_Z"

	// TEST_MANN_WHITNEY_U is the nonparametric two-sample alternative to
	// TEST_T. Buffered: ranks the combined value set under tie correction,
	// sums the ranks in group A → R_A, then computes
	//   U_A = R_A − n_A(n_A+1)/2
	// Two-sided p-value via the normal approximation with tie correction.
	TEST_MANN_WHITNEY_U TestType = "TEST_MANN_WHITNEY_U"

	// TEST_WILCOXON_SR is the Wilcoxon signed-rank test on the per-row
	// difference d = Field − Field2. Buffered: drops zero-diff pairs,
	// ranks |d| under tie correction, sums positive-sign ranks → W.
	// Two-sided p-value via the normal approximation with tie correction.
	TEST_WILCOXON_SR TestType = "TEST_WILCOXON_SR"

	// TEST_KRUSKAL_WALLIS is the nonparametric k-group alternative to
	// TEST_ANOVA_F. Buffered: ranks the combined value set under tie
	// correction, sums ranks per group, then
	//   H = (12/(N(N+1))) · Σ (R_i²/n_i) − 3(N+1)
	// p-value via chi-square survival with k−1 df.
	TEST_KRUSKAL_WALLIS TestType = "TEST_KRUSKAL_WALLIS"

	// TEST_SPEARMAN_R is the rank-based correlation between Field and
	// Field2 (monotonic association). Buffered: mid-ranks each column
	// under tie correction, then runs Pearson on the ranks. p-value via
	//   t = ρ · √((n−2)/(1−ρ²))   df = n − 2
	TEST_SPEARMAN_R TestType = "TEST_SPEARMAN_R"

	// TEST_KENDALL_TAU is concordance-based correlation between Field
	// and Field2. Buffered O(n²) pair count under tie correction. Two-
	// sided p-value via normal approximation with the standard variance
	// adjustment for ties in either column.
	TEST_KENDALL_TAU TestType = "TEST_KENDALL_TAU"

	// TEST_ANOVA_WELCH is the heteroscedasticity-robust one-way ANOVA.
	// Streams via the same per-group Welford buckets as TEST_ANOVA_F.
	// Statistic uses per-group weights w_i = n_i / s²_i, with the
	// Welch-Satterthwaite denominator correction for unequal variances.
	TEST_ANOVA_WELCH TestType = "TEST_ANOVA_WELCH"

	// TEST_ANOVA_RM is the repeated-measures one-way ANOVA. Buffered:
	// requires SubjectField to pivot rows into the wide subject ×
	// condition table. Decomposes SS into between-subjects, treatment,
	// and error components; F = MS_treatment / MS_error.
	TEST_ANOVA_RM TestType = "TEST_ANOVA_RM"

	// TEST_BROWN_FORSYTHE is a homogeneity-of-variance test. Replaces
	// each value with its absolute deviation from the group median, then
	// runs one-way ANOVA on the deviations. Buffered: per-group medians
	// require a sort.
	TEST_BROWN_FORSYTHE TestType = "TEST_BROWN_FORSYTHE"

	// TEST_FISHER_EXACT is the exact two-sided p-value for a 2×2
	// contingency table. The canonical small-sample alternative to
	// TEST_CHISQ when any expected cell count is below 5. Buffered:
	// requires the full contingency table.
	TEST_FISHER_EXACT TestType = "TEST_FISHER_EXACT"

	// TEST_SHAPIRO_WILK is the Shapiro-Wilk normality test on Field.
	// When SplitBy is set the test runs per-group; otherwise it runs
	// over the full filtered set. Buffered: requires the ordered values.
	// Implementation supports n ≤ 5000; n above the bound surfaces a
	// PULSE_TEST_SHAPIRO_N_BOUND warning.
	TEST_SHAPIRO_WILK TestType = "TEST_SHAPIRO_WILK"
)

func AllTestTypes added in v0.3.0

func AllTestTypes() []TestType

AllTestTypes returns every defined statistical test type in alphabetical order.

func (TestType) Streamable added in v0.3.0

func (t TestType) Streamable() bool

Streamable reports whether this test type can be evaluated in the streaming Process path as a tier-1 row test. Two tiers exist at runtime:

  • Online-moments tests (TEST_T, TEST_WELCH, TEST_CHISQ, TEST_ANOVA_F) reuse the running (mean, variance, n) and per-bucket counts already produced by the streaming aggregator path. They consume zero extra passes when their inputs overlap with an active aggregator.
  • Buffered-only tests (TEST_KS, TEST_TUKEY_HSD, TEST_TREND) require a sorted view, a finalized post-hoc matrix, or an ordered series and cannot stream. Predict flags requests containing these tests as non-streamable.

Tier-2 PostTests are always buffered regardless of TestType: they execute over the materialized result row set, after windows. Streamable here reports tier-1 capability only.

Default branch returns false so newly-added test types must opt in.

type VersionResponse

type VersionResponse struct {
	// Version is the semantic version string.
	Version string `json:"version"`

	// BuildDate is the build timestamp.
	BuildDate string `json:"build_date,omitempty"`

	// GoVersion is the Go compiler version used.
	GoVersion string `json:"go_version,omitempty"`
}

VersionResponse provides build and version information.

type Window added in v0.2.0

type Window struct {
	// Type is the window operation to perform.
	Type WindowType `json:"type"`

	// Field is the source field name. Required for all operators except
	// ROW_NUMBER, RANK, and DENSE_RANK.
	Field string `json:"field,omitempty"`

	// Label is the output column name. When empty, defaults to "<TYPE>_<field>"
	// or "<TYPE>" for ROW_NUMBER/RANK/DENSE_RANK.
	Label string `json:"label,omitempty"`

	// PartitionBy is the list of fields whose distinct values define
	// independent partitions for window evaluation. Empty means a single
	// partition over all rows.
	PartitionBy []string `json:"partition_by,omitempty"`

	// OrderBy is the list of order keys. Required (≥1) for every window operator.
	OrderBy []OrderKey `json:"order_by"`

	// Frame is the window frame specification. Required for RUNNING_*, MOVING_AVG,
	// and EWMA. Rejected for LAG, LEAD, ROW_NUMBER, RANK, DENSE_RANK, PCT_CHANGE.
	Frame *FrameSpec `json:"frame,omitempty"`

	// Params holds operator-specific parameters as raw JSON.
	// LAG/LEAD: {"offset": 1, "default": null}
	// EWMA: {"alpha": 0.5}
	// PCT_CHANGE: {"periods": 1}
	Params json.RawMessage `json:"params,omitempty"`
}

Window defines a window operation.

type WindowType added in v0.2.0

type WindowType string

WindowType identifies a specific window operation.

const (
	WIN_LAG         WindowType = "WIN_LAG"
	WIN_LEAD        WindowType = "WIN_LEAD"
	WIN_ROW_NUMBER  WindowType = "WIN_ROW_NUMBER"
	WIN_RANK        WindowType = "WIN_RANK"
	WIN_DENSE_RANK  WindowType = "WIN_DENSE_RANK"
	WIN_RUNNING_SUM WindowType = "WIN_RUNNING_SUM"
	WIN_RUNNING_AVG WindowType = "WIN_RUNNING_AVG"
	WIN_MOVING_AVG  WindowType = "WIN_MOVING_AVG"
	WIN_EWMA        WindowType = "WIN_EWMA"
	WIN_PCT_CHANGE  WindowType = "WIN_PCT_CHANGE"
)

func AllWindowTypes added in v0.2.0

func AllWindowTypes() []WindowType

AllWindowTypes returns all defined window types in alphabetical order.

func (WindowType) Streamable added in v0.2.0

func (t WindowType) Streamable() bool

Streamable reports whether this window type can be computed without buffering. All window operators run over the post-aggregate row set in a final pass; none stream today.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL