types

package
v0.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 11, 2026 License: MIT Imports: 1 Imported by: 0

Documentation

Overview

Package types provides shared type definitions for pulse.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Aggregation

type Aggregation struct {
	// Type is the aggregation operation to perform.
	Type AggregationType `json:"type"`

	// Field is the name of the data field to aggregate.
	Field string `json:"field"`

	// Label is an optional output label for the aggregation result.
	Label string `json:"label,omitempty"`

	// Params holds type-specific configuration as raw JSON.
	// Used by aggregation types that require additional parameters (e.g., AGG_PERCENTILE).
	Params json.RawMessage `json:"params,omitempty"`
}

Aggregation defines a single aggregation operation to apply to a field.

type AggregationType

type AggregationType string

AggregationType identifies a specific aggregation operation.

const (
	AGG_COUNT          AggregationType = "AGG_COUNT"
	AGG_SUM            AggregationType = "AGG_SUM"
	AGG_AVERAGE        AggregationType = "AGG_AVERAGE"
	AGG_MIN            AggregationType = "AGG_MIN"
	AGG_MAX            AggregationType = "AGG_MAX"
	AGG_STDDEV         AggregationType = "AGG_STDDEV"
	AGG_RANGE          AggregationType = "AGG_RANGE"
	AGG_FREQUENCY      AggregationType = "AGG_FREQUENCY"
	AGG_ZSCORE         AggregationType = "AGG_ZSCORE"
	AGG_MEDIAN         AggregationType = "AGG_MEDIAN"
	AGG_VARIANCE       AggregationType = "AGG_VARIANCE"
	AGG_MODE           AggregationType = "AGG_MODE"
	AGG_SKEWNESS       AggregationType = "AGG_SKEWNESS"
	AGG_KURTOSIS       AggregationType = "AGG_KURTOSIS"
	AGG_DISTINCT_COUNT AggregationType = "AGG_DISTINCT_COUNT"
	AGG_PERCENTILE     AggregationType = "AGG_PERCENTILE"

	AGG_GEO_CENTROID AggregationType = "AGG_GEO_CENTROID"
	AGG_GEO_BBOX     AggregationType = "AGG_GEO_BBOX"
)

func AllAggregationTypes

func AllAggregationTypes() []AggregationType

AllAggregationTypes returns all defined aggregation types.

func (AggregationType) Streamable added in v0.2.0

func (t AggregationType) Streamable() bool

Streamable reports whether this aggregation type supports the single-pass streaming execution path. Streamable aggregators implement processing.OnlineAggregator (UpdateRow + Finalize) and produce a result with O(1) or O(unique) state per row.

Source of truth for predict.Streamable; cross-checked at test time against the processing registry by TestRegistryStreamabilityMatchesTypes.

Default branch returns false so newly-added aggregator types must opt in explicitly.

type Attribute

type Attribute struct {
	// Type is the attribute computation to perform.
	Type AttributeType `json:"type"`

	// Field is the name of the source data field.
	Field string `json:"field"`

	// Label is the output name for the derived attribute.
	Label string `json:"label,omitempty"`

	// Expression is a runtime expression for ATTR_FORMULA type.
	Expression string `json:"expression,omitempty"`

	// Params holds type-specific configuration as raw JSON.
	// Each attribute type defines its own params schema.
	Params json.RawMessage `json:"params,omitempty"`
}

Attribute defines a derived attribute computation.

type AttributeType

type AttributeType string

AttributeType identifies a specific derived-attribute computation.

const (
	ATTR_ZSCORE     AttributeType = "ATTR_ZSCORE"
	ATTR_TSCORE     AttributeType = "ATTR_TSCORE"
	ATTR_NORMALIZED AttributeType = "ATTR_NORMALIZED"
	ATTR_FORMULA    AttributeType = "ATTR_FORMULA"
	ATTR_PERCENTILE AttributeType = "ATTR_PERCENTILE"
	ATTR_DATE_PART  AttributeType = "ATTR_DATE_PART"
)

func AllAttributeTypes

func AllAttributeTypes() []AttributeType

AllAttributeTypes returns all defined attribute types.

func (AttributeType) Streamable added in v0.2.0

func (t AttributeType) Streamable() bool

Streamable reports whether this attribute type can be computed in a streaming path. Three tiers exist at runtime:

  • Row-local: FORMULA, DATE_PART implement processing.RowLocalAttribute and execute inline with no PrePass.
  • Two-pass: ZSCORE, TSCORE, NORMALIZED implement processing.TwoPassAttribute and need a PrePass over filter-passing records, Finalize, then per-row Row() in pass 2 (iter.Reset()).
  • Buffered-only: PERCENTILE needs a sorted view of every value; no streaming algorithm preserves exact rank semantics.

Streamable() returns true for the first two tiers since both routes avoid materializing the full record set in memory.

type Cohort

type Cohort struct {
	// Filename is the name of the .pulse file.
	Filename string `json:"filename"`

	// DataDir is the directory containing the cohort file.
	DataDir string `json:"data_dir,omitempty"`
}

Cohort identifies a .pulse data file for processing.

type ComposedRequest

type ComposedRequest struct {
	// Requests is the list of individual requests to execute.
	Requests []*Request `json:"requests"`
}

ComposedRequest bundles multiple requests for batch execution.

type Feature added in v0.2.0

type Feature struct {
	// Type is the feature operator to perform.
	Type FeatureType `json:"type"`

	// Field is the source field name. Required by every operator except
	// FEAT_TRAIN_TEST_SPLIT (which reads no field by default — params may
	// optionally name a stratify field).
	Field string `json:"field,omitempty"`

	// Label is an output column name (single-output operators) or output
	// column prefix (multi-output operators). When empty, the operator
	// derives a default — typically "<TYPE>_<field>".
	Label string `json:"label,omitempty"`

	// Params holds operator-specific parameters as raw JSON. See the
	// feature-engineering skill for the per-operator schema.
	Params json.RawMessage `json:"params,omitempty"`
}

Feature defines a feature engineering operation. Features run pre-filter (before any FILTER_* predicate) and may produce one or more derived columns. Global-pass features (TARGET_ENCODE, FREQUENCY_ENCODE) require a stats sweep before per-row write; per-row features compute one row at a time.

type FeatureType added in v0.2.0

type FeatureType string

FeatureType identifies a specific ML feature engineering operator. Features run pre-filter and may emit one or more output columns.

const (
	FEAT_LOG              FeatureType = "FEAT_LOG"
	FEAT_SQRT             FeatureType = "FEAT_SQRT"
	FEAT_BUCKETIZE        FeatureType = "FEAT_BUCKETIZE"
	FEAT_ONE_HOT          FeatureType = "FEAT_ONE_HOT"
	FEAT_DATE_FEATURES    FeatureType = "FEAT_DATE_FEATURES"
	FEAT_FREQUENCY_ENCODE FeatureType = "FEAT_FREQUENCY_ENCODE"
	FEAT_TARGET_ENCODE    FeatureType = "FEAT_TARGET_ENCODE"
	FEAT_TRAIN_TEST_SPLIT FeatureType = "FEAT_TRAIN_TEST_SPLIT"
)

func AllFeatureTypes added in v0.2.0

func AllFeatureTypes() []FeatureType

AllFeatureTypes returns every defined feature type in alphabetical order.

func (FeatureType) Streamable added in v0.2.0

func (t FeatureType) Streamable() bool

Streamable reports whether this feature type can run in the pre-pass+finalize+emit streaming pipeline (feature.StreamingComputer).

Source of truth is feature.IsStreamable(req.Features, schema) at runtime; this method mirrors the per-type capability used by predict.

type FileRequest

type FileRequest struct {
	// Filename is the name of the file.
	Filename string `json:"filename"`

	// DataDir is the directory containing the file.
	DataDir string `json:"data_dir,omitempty"`
}

FileRequest identifies a file for operations like inspect.

type FileResponse

type FileResponse struct {
	// Filename is the name of the file.
	Filename string `json:"filename"`

	// RecordCount is the number of records in the file.
	RecordCount int64 `json:"record_count"`

	// Fields is the list of field names in the file.
	Fields []string `json:"fields,omitempty"`
}

FileResponse describes a file's metadata.

type Filterer

type Filterer struct {
	// Type is the filter operation to perform.
	Type FiltererType `json:"type"`

	// Field is the name of the data field to filter on.
	// Not required for FILTER_EXPRESSION.
	Field string `json:"field,omitempty"`

	// Values is a list of values for include/exclude/range filters.
	Values []string `json:"values,omitempty"`

	// Expression is a runtime expression for FILTER_EXPRESSION type.
	Expression string `json:"expression,omitempty"`
}

Filterer defines a filter to apply to records before processing.

type FiltererType

type FiltererType string

FiltererType identifies a specific filter operation.

const (
	FILTER_INCLUDE    FiltererType = "FILTER_INCLUDE"
	FILTER_EXCLUDE    FiltererType = "FILTER_EXCLUDE"
	FILTER_RANGE      FiltererType = "FILTER_RANGE"
	FILTER_EXPRESSION FiltererType = "FILTER_EXPRESSION"

	FILTER_GEO_WITHIN          FiltererType = "FILTER_GEO_WITHIN"
	FILTER_GEO_WITHIN_RADIUS_M FiltererType = "FILTER_GEO_WITHIN_RADIUS_M"
)

func AllFiltererTypes

func AllFiltererTypes() []FiltererType

AllFiltererTypes returns all defined filterer types.

func (FiltererType) Streamable added in v0.2.0

func (t FiltererType) Streamable() bool

Streamable reports whether this filterer type evaluates per-row without looking at other rows. All registered filterers are row-local today.

type FrameSpec added in v0.2.0

type FrameSpec struct {
	Mode      string `json:"mode"`
	Preceding *int   `json:"preceding,omitempty"`
	Following *int   `json:"following,omitempty"`
}

FrameSpec specifies the window frame bounds. Mode is "rows" — only frame mode supported in v1. Preceding nil means UNBOUNDED PRECEDING; Following nil means UNBOUNDED FOLLOWING. Following==0 with Preceding==0 selects the current row only.

type Group

type Group struct {
	// Type is the grouping operation to perform.
	Type GroupType `json:"type"`

	// Field is the name of the data field to group by.
	Field string `json:"field"`

	// Interval is used by GROUP_ROUNDED and GROUP_RANGE to define the bucket width.
	Interval float64 `json:"interval,omitempty"`

	// Params holds type-specific configuration as raw JSON.
	// Used by group types that require additional parameters (e.g., GROUP_DATE).
	Params json.RawMessage `json:"params,omitempty"`
}

Group defines a grouping operation to partition results.

type GroupType

type GroupType string

GroupType identifies a specific grouping operation.

const (
	GROUP_CATEGORY GroupType = "GROUP_CATEGORY"
	GROUP_ROUNDED  GroupType = "GROUP_ROUNDED"
	GROUP_RANGE    GroupType = "GROUP_RANGE"
	GROUP_QUANTILE GroupType = "GROUP_QUANTILE"
	GROUP_DATE     GroupType = "GROUP_DATE"
	GROUP_H3_CELL  GroupType = "GROUP_H3_CELL"
)

func AllGroupTypes

func AllGroupTypes() []GroupType

AllGroupTypes returns all defined group types.

func (GroupType) Streamable added in v0.2.0

func (t GroupType) Streamable() bool

Streamable reports whether this group type can emit groups before the input is exhausted. CATEGORY/ROUNDED/RANGE/H3_CELL bucket per row; QUANTILE/DATE require finalize-time work over the full set.

The streaming Process path does not currently emit grouped output even for streamable groupers — Request.Streamable returns false whenever groups are present. The method is wired through so a future grouped streaming iterator can flip the gate without re-deriving the rule.

type OrderKey added in v0.2.0

type OrderKey struct {
	Field string `json:"field"`
	Desc  bool   `json:"desc,omitempty"`
}

OrderKey specifies an ordering key for a window's ORDER BY clause.

type Output

type Output struct {
	// Format is the output format (e.g., "json", "csv").
	Format string `json:"format"`

	// Filename is an optional output file path.
	Filename string `json:"filename,omitempty"`

	// Pretty enables indented/formatted output.
	Pretty bool `json:"pretty,omitempty"`

	// IncludeNil controls whether nil/null values appear in output.
	IncludeNil bool `json:"include_nil,omitempty"`
}

Output configures how processing results are formatted.

type Request

type Request struct {
	// Cohort identifies the .pulse file to process.
	Cohort *Cohort `json:"cohort,omitempty"`

	// Filterers is the list of filters to apply before processing.
	Filterers []*Filterer `json:"filterers,omitempty"`

	// Aggregations is the list of aggregation operations.
	Aggregations []*Aggregation `json:"aggregations,omitempty"`

	// Attributes is the list of derived attribute computations.
	Attributes []*Attribute `json:"attributes,omitempty"`

	// Groups is the list of grouping operations.
	Groups []*Group `json:"groups,omitempty"`

	// Outputs configures result formatting.
	Outputs []*Output `json:"outputs,omitempty"`

	// Windows is the list of window operations evaluated after aggregation.
	Windows []*Window `json:"windows,omitempty"`

	// Features is the list of pre-filter feature engineering operators.
	// Each operator may add one or more derived columns to the working
	// schema before filters and downstream stages run.
	Features []*Feature `json:"features,omitempty"`

	// Sort orders response rows by the listed keys. Applied last in the
	// pipeline (after windows). Each key field must reference a schema
	// field, an aggregation/attribute/group/window output label, or any
	// column produced by upstream stages. Stable sort; nulls last
	// regardless of direction.
	Sort []OrderKey `json:"sort,omitempty"`
}

Request is the primary processing request type. It specifies a cohort, filters, aggregations, attributes, groups, and output config.

type Response

type Response struct {
	// Data holds the result rows as key-value maps.
	Data []map[string]any `json:"data,omitempty"`

	// Metadata holds information about the processing run.
	Metadata *ResponseMetadata `json:"metadata,omitempty"`
}

Response is the processing result type.

type ResponseMetadata

type ResponseMetadata struct {
	// TotalRows is the total number of records in the cohort.
	TotalRows int64 `json:"total_rows"`

	// FilteredRows is the number of records after filtering.
	FilteredRows int64 `json:"filtered_rows"`

	// CohortFile is the filename of the processed cohort.
	CohortFile string `json:"cohort_file,omitempty"`
}

ResponseMetadata holds metadata about a processing result.

type VersionResponse

type VersionResponse struct {
	// Version is the semantic version string.
	Version string `json:"version"`

	// BuildDate is the build timestamp.
	BuildDate string `json:"build_date,omitempty"`

	// GoVersion is the Go compiler version used.
	GoVersion string `json:"go_version,omitempty"`
}

VersionResponse provides build and version information.

type Window added in v0.2.0

type Window struct {
	// Type is the window operation to perform.
	Type WindowType `json:"type"`

	// Field is the source field name. Required for all operators except
	// ROW_NUMBER, RANK, and DENSE_RANK.
	Field string `json:"field,omitempty"`

	// Label is the output column name. When empty, defaults to "<TYPE>_<field>"
	// or "<TYPE>" for ROW_NUMBER/RANK/DENSE_RANK.
	Label string `json:"label,omitempty"`

	// PartitionBy is the list of fields whose distinct values define
	// independent partitions for window evaluation. Empty means a single
	// partition over all rows.
	PartitionBy []string `json:"partition_by,omitempty"`

	// OrderBy is the list of order keys. Required (≥1) for every window operator.
	OrderBy []OrderKey `json:"order_by"`

	// Frame is the window frame specification. Required for RUNNING_*, MOVING_AVG,
	// and EWMA. Rejected for LAG, LEAD, ROW_NUMBER, RANK, DENSE_RANK, PCT_CHANGE.
	Frame *FrameSpec `json:"frame,omitempty"`

	// Params holds operator-specific parameters as raw JSON.
	// LAG/LEAD: {"offset": 1, "default": null}
	// EWMA: {"alpha": 0.5}
	// PCT_CHANGE: {"periods": 1}
	Params json.RawMessage `json:"params,omitempty"`
}

Window defines a window operation.

type WindowType added in v0.2.0

type WindowType string

WindowType identifies a specific window operation.

const (
	WIN_LAG         WindowType = "WIN_LAG"
	WIN_LEAD        WindowType = "WIN_LEAD"
	WIN_ROW_NUMBER  WindowType = "WIN_ROW_NUMBER"
	WIN_RANK        WindowType = "WIN_RANK"
	WIN_DENSE_RANK  WindowType = "WIN_DENSE_RANK"
	WIN_RUNNING_SUM WindowType = "WIN_RUNNING_SUM"
	WIN_RUNNING_AVG WindowType = "WIN_RUNNING_AVG"
	WIN_MOVING_AVG  WindowType = "WIN_MOVING_AVG"
	WIN_EWMA        WindowType = "WIN_EWMA"
	WIN_PCT_CHANGE  WindowType = "WIN_PCT_CHANGE"
)

func AllWindowTypes added in v0.2.0

func AllWindowTypes() []WindowType

AllWindowTypes returns all defined window types in alphabetical order.

func (WindowType) Streamable added in v0.2.0

func (t WindowType) Streamable() bool

Streamable reports whether this window type can be computed without buffering. All window operators run over the post-aggregate row set in a final pass; none stream today.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL