Documentation
¶
Overview ¶
Package types provides shared type definitions for pulse.
Index ¶
- type Aggregation
- type AggregationType
- type Attribute
- type AttributeType
- type Cohort
- type ComposedRequest
- type Feature
- type FeatureType
- type FileRequest
- type FileResponse
- type Filterer
- type FiltererType
- type FrameSpec
- type Group
- type GroupType
- type OrderKey
- type Output
- type Request
- type Response
- type ResponseMetadata
- type VersionResponse
- type Window
- type WindowType
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Aggregation ¶
type Aggregation struct {
// Type is the aggregation operation to perform.
Type AggregationType `json:"type"`
// Field is the name of the data field to aggregate.
Field string `json:"field"`
// Label is an optional output label for the aggregation result.
Label string `json:"label,omitempty"`
// Params holds type-specific configuration as raw JSON.
// Used by aggregation types that require additional parameters (e.g., AGG_PERCENTILE).
Params json.RawMessage `json:"params,omitempty"`
}
Aggregation defines a single aggregation operation to apply to a field.
type AggregationType ¶
type AggregationType string
AggregationType identifies a specific aggregation operation.
const ( AGG_COUNT AggregationType = "AGG_COUNT" AGG_SUM AggregationType = "AGG_SUM" AGG_AVERAGE AggregationType = "AGG_AVERAGE" AGG_MIN AggregationType = "AGG_MIN" AGG_MAX AggregationType = "AGG_MAX" AGG_STDDEV AggregationType = "AGG_STDDEV" AGG_RANGE AggregationType = "AGG_RANGE" AGG_FREQUENCY AggregationType = "AGG_FREQUENCY" AGG_ZSCORE AggregationType = "AGG_ZSCORE" AGG_MEDIAN AggregationType = "AGG_MEDIAN" AGG_VARIANCE AggregationType = "AGG_VARIANCE" AGG_MODE AggregationType = "AGG_MODE" AGG_SKEWNESS AggregationType = "AGG_SKEWNESS" AGG_KURTOSIS AggregationType = "AGG_KURTOSIS" AGG_DISTINCT_COUNT AggregationType = "AGG_DISTINCT_COUNT" AGG_PERCENTILE AggregationType = "AGG_PERCENTILE" AGG_GEO_CENTROID AggregationType = "AGG_GEO_CENTROID" AGG_GEO_BBOX AggregationType = "AGG_GEO_BBOX" )
func AllAggregationTypes ¶
func AllAggregationTypes() []AggregationType
AllAggregationTypes returns all defined aggregation types.
func (AggregationType) Streamable ¶ added in v0.2.0
func (t AggregationType) Streamable() bool
Streamable reports whether this aggregation type supports the single-pass streaming execution path. Streamable aggregators implement processing.OnlineAggregator (UpdateRow + Finalize) and produce a result with O(1) or O(unique) state per row.
Source of truth for predict.Streamable; cross-checked at test time against the processing registry by TestRegistryStreamabilityMatchesTypes.
Default branch returns false so newly-added aggregator types must opt in explicitly.
type Attribute ¶
type Attribute struct {
// Type is the attribute computation to perform.
Type AttributeType `json:"type"`
// Field is the name of the source data field.
Field string `json:"field"`
// Label is the output name for the derived attribute.
Label string `json:"label,omitempty"`
// Expression is a runtime expression for ATTR_FORMULA type.
Expression string `json:"expression,omitempty"`
// Params holds type-specific configuration as raw JSON.
// Each attribute type defines its own params schema.
Params json.RawMessage `json:"params,omitempty"`
}
Attribute defines a derived attribute computation.
type AttributeType ¶
type AttributeType string
AttributeType identifies a specific derived-attribute computation.
const ( ATTR_ZSCORE AttributeType = "ATTR_ZSCORE" ATTR_TSCORE AttributeType = "ATTR_TSCORE" ATTR_NORMALIZED AttributeType = "ATTR_NORMALIZED" ATTR_FORMULA AttributeType = "ATTR_FORMULA" ATTR_PERCENTILE AttributeType = "ATTR_PERCENTILE" ATTR_DATE_PART AttributeType = "ATTR_DATE_PART" )
func AllAttributeTypes ¶
func AllAttributeTypes() []AttributeType
AllAttributeTypes returns all defined attribute types.
func (AttributeType) Streamable ¶ added in v0.2.0
func (t AttributeType) Streamable() bool
Streamable reports whether this attribute type can be computed in a streaming path. Three tiers exist at runtime:
- Row-local: FORMULA, DATE_PART implement processing.RowLocalAttribute and execute inline with no PrePass.
- Two-pass: ZSCORE, TSCORE, NORMALIZED implement processing.TwoPassAttribute and need a PrePass over filter-passing records, Finalize, then per-row Row() in pass 2 (iter.Reset()).
- Buffered-only: PERCENTILE needs a sorted view of every value; no streaming algorithm preserves exact rank semantics.
Streamable() returns true for the first two tiers since both routes avoid materializing the full record set in memory.
type Cohort ¶
type Cohort struct {
// Filename is the name of the .pulse file.
Filename string `json:"filename"`
// DataDir is the directory containing the cohort file.
DataDir string `json:"data_dir,omitempty"`
}
Cohort identifies a .pulse data file for processing.
type ComposedRequest ¶
type ComposedRequest struct {
// Requests is the list of individual requests to execute.
Requests []*Request `json:"requests"`
}
ComposedRequest bundles multiple requests for batch execution.
type Feature ¶ added in v0.2.0
type Feature struct {
// Type is the feature operator to perform.
Type FeatureType `json:"type"`
// Field is the source field name. Required by every operator except
// FEAT_TRAIN_TEST_SPLIT (which reads no field by default — params may
// optionally name a stratify field).
Field string `json:"field,omitempty"`
// Label is an output column name (single-output operators) or output
// column prefix (multi-output operators). When empty, the operator
// derives a default — typically "<TYPE>_<field>".
Label string `json:"label,omitempty"`
// Params holds operator-specific parameters as raw JSON. See the
// feature-engineering skill for the per-operator schema.
Params json.RawMessage `json:"params,omitempty"`
}
Feature defines a feature engineering operation. Features run pre-filter (before any FILTER_* predicate) and may produce one or more derived columns. Global-pass features (TARGET_ENCODE, FREQUENCY_ENCODE) require a stats sweep before per-row write; per-row features compute one row at a time.
type FeatureType ¶ added in v0.2.0
type FeatureType string
FeatureType identifies a specific ML feature engineering operator. Features run pre-filter and may emit one or more output columns.
const ( FEAT_LOG FeatureType = "FEAT_LOG" FEAT_SQRT FeatureType = "FEAT_SQRT" FEAT_BUCKETIZE FeatureType = "FEAT_BUCKETIZE" FEAT_ONE_HOT FeatureType = "FEAT_ONE_HOT" FEAT_DATE_FEATURES FeatureType = "FEAT_DATE_FEATURES" FEAT_FREQUENCY_ENCODE FeatureType = "FEAT_FREQUENCY_ENCODE" FEAT_TARGET_ENCODE FeatureType = "FEAT_TARGET_ENCODE" FEAT_TRAIN_TEST_SPLIT FeatureType = "FEAT_TRAIN_TEST_SPLIT" )
func AllFeatureTypes ¶ added in v0.2.0
func AllFeatureTypes() []FeatureType
AllFeatureTypes returns every defined feature type in alphabetical order.
func (FeatureType) Streamable ¶ added in v0.2.0
func (t FeatureType) Streamable() bool
Streamable reports whether this feature type can run in the pre-pass+finalize+emit streaming pipeline (feature.StreamingComputer).
Source of truth is feature.IsStreamable(req.Features, schema) at runtime; this method mirrors the per-type capability used by predict.
type FileRequest ¶
type FileRequest struct {
// Filename is the name of the file.
Filename string `json:"filename"`
// DataDir is the directory containing the file.
DataDir string `json:"data_dir,omitempty"`
}
FileRequest identifies a file for operations like inspect.
type FileResponse ¶
type FileResponse struct {
// Filename is the name of the file.
Filename string `json:"filename"`
// RecordCount is the number of records in the file.
RecordCount int64 `json:"record_count"`
// Fields is the list of field names in the file.
Fields []string `json:"fields,omitempty"`
}
FileResponse describes a file's metadata.
type Filterer ¶
type Filterer struct {
// Type is the filter operation to perform.
Type FiltererType `json:"type"`
// Field is the name of the data field to filter on.
// Not required for FILTER_EXPRESSION.
Field string `json:"field,omitempty"`
// Values is a list of values for include/exclude/range filters.
Values []string `json:"values,omitempty"`
// Expression is a runtime expression for FILTER_EXPRESSION type.
Expression string `json:"expression,omitempty"`
}
Filterer defines a filter to apply to records before processing.
type FiltererType ¶
type FiltererType string
FiltererType identifies a specific filter operation.
const ( FILTER_INCLUDE FiltererType = "FILTER_INCLUDE" FILTER_EXCLUDE FiltererType = "FILTER_EXCLUDE" FILTER_RANGE FiltererType = "FILTER_RANGE" FILTER_EXPRESSION FiltererType = "FILTER_EXPRESSION" FILTER_GEO_WITHIN FiltererType = "FILTER_GEO_WITHIN" FILTER_GEO_WITHIN_RADIUS_M FiltererType = "FILTER_GEO_WITHIN_RADIUS_M" )
func AllFiltererTypes ¶
func AllFiltererTypes() []FiltererType
AllFiltererTypes returns all defined filterer types.
func (FiltererType) Streamable ¶ added in v0.2.0
func (t FiltererType) Streamable() bool
Streamable reports whether this filterer type evaluates per-row without looking at other rows. All registered filterers are row-local today.
type FrameSpec ¶ added in v0.2.0
type FrameSpec struct {
Mode string `json:"mode"`
Preceding *int `json:"preceding,omitempty"`
Following *int `json:"following,omitempty"`
}
FrameSpec specifies the window frame bounds. Mode is "rows" — only frame mode supported in v1. Preceding nil means UNBOUNDED PRECEDING; Following nil means UNBOUNDED FOLLOWING. Following==0 with Preceding==0 selects the current row only.
type Group ¶
type Group struct {
// Type is the grouping operation to perform.
Type GroupType `json:"type"`
// Field is the name of the data field to group by.
Field string `json:"field"`
// Interval is used by GROUP_ROUNDED and GROUP_RANGE to define the bucket width.
Interval float64 `json:"interval,omitempty"`
// Params holds type-specific configuration as raw JSON.
// Used by group types that require additional parameters (e.g., GROUP_DATE).
Params json.RawMessage `json:"params,omitempty"`
}
Group defines a grouping operation to partition results.
type GroupType ¶
type GroupType string
GroupType identifies a specific grouping operation.
func AllGroupTypes ¶
func AllGroupTypes() []GroupType
AllGroupTypes returns all defined group types.
func (GroupType) Streamable ¶ added in v0.2.0
Streamable reports whether this group type can emit groups before the input is exhausted. CATEGORY/ROUNDED/RANGE/H3_CELL bucket per row; QUANTILE/DATE require finalize-time work over the full set.
The streaming Process path does not currently emit grouped output even for streamable groupers — Request.Streamable returns false whenever groups are present. The method is wired through so a future grouped streaming iterator can flip the gate without re-deriving the rule.
type Output ¶
type Output struct {
// Format is the output format (e.g., "json", "csv").
Format string `json:"format"`
// Filename is an optional output file path.
Filename string `json:"filename,omitempty"`
// Pretty enables indented/formatted output.
Pretty bool `json:"pretty,omitempty"`
// IncludeNil controls whether nil/null values appear in output.
IncludeNil bool `json:"include_nil,omitempty"`
}
Output configures how processing results are formatted.
type Request ¶
type Request struct {
// Cohort identifies the .pulse file to process.
Cohort *Cohort `json:"cohort,omitempty"`
// Filterers is the list of filters to apply before processing.
Filterers []*Filterer `json:"filterers,omitempty"`
// Aggregations is the list of aggregation operations.
Aggregations []*Aggregation `json:"aggregations,omitempty"`
// Attributes is the list of derived attribute computations.
Attributes []*Attribute `json:"attributes,omitempty"`
// Groups is the list of grouping operations.
Groups []*Group `json:"groups,omitempty"`
// Outputs configures result formatting.
Outputs []*Output `json:"outputs,omitempty"`
// Windows is the list of window operations evaluated after aggregation.
Windows []*Window `json:"windows,omitempty"`
// Features is the list of pre-filter feature engineering operators.
// Each operator may add one or more derived columns to the working
// schema before filters and downstream stages run.
Features []*Feature `json:"features,omitempty"`
// Sort orders response rows by the listed keys. Applied last in the
// pipeline (after windows). Each key field must reference a schema
// field, an aggregation/attribute/group/window output label, or any
// column produced by upstream stages. Stable sort; nulls last
// regardless of direction.
Sort []OrderKey `json:"sort,omitempty"`
}
Request is the primary processing request type. It specifies a cohort, filters, aggregations, attributes, groups, and output config.
type Response ¶
type Response struct {
// Data holds the result rows as key-value maps.
Data []map[string]any `json:"data,omitempty"`
// Metadata holds information about the processing run.
Metadata *ResponseMetadata `json:"metadata,omitempty"`
}
Response is the processing result type.
type ResponseMetadata ¶
type ResponseMetadata struct {
// TotalRows is the total number of records in the cohort.
TotalRows int64 `json:"total_rows"`
// FilteredRows is the number of records after filtering.
FilteredRows int64 `json:"filtered_rows"`
// CohortFile is the filename of the processed cohort.
CohortFile string `json:"cohort_file,omitempty"`
}
ResponseMetadata holds metadata about a processing result.
type VersionResponse ¶
type VersionResponse struct {
// Version is the semantic version string.
Version string `json:"version"`
// BuildDate is the build timestamp.
BuildDate string `json:"build_date,omitempty"`
// GoVersion is the Go compiler version used.
GoVersion string `json:"go_version,omitempty"`
}
VersionResponse provides build and version information.
type Window ¶ added in v0.2.0
type Window struct {
// Type is the window operation to perform.
Type WindowType `json:"type"`
// Field is the source field name. Required for all operators except
// ROW_NUMBER, RANK, and DENSE_RANK.
Field string `json:"field,omitempty"`
// Label is the output column name. When empty, defaults to "<TYPE>_<field>"
// or "<TYPE>" for ROW_NUMBER/RANK/DENSE_RANK.
Label string `json:"label,omitempty"`
// PartitionBy is the list of fields whose distinct values define
// independent partitions for window evaluation. Empty means a single
// partition over all rows.
PartitionBy []string `json:"partition_by,omitempty"`
// OrderBy is the list of order keys. Required (≥1) for every window operator.
OrderBy []OrderKey `json:"order_by"`
// Frame is the window frame specification. Required for RUNNING_*, MOVING_AVG,
// and EWMA. Rejected for LAG, LEAD, ROW_NUMBER, RANK, DENSE_RANK, PCT_CHANGE.
Frame *FrameSpec `json:"frame,omitempty"`
// Params holds operator-specific parameters as raw JSON.
// LAG/LEAD: {"offset": 1, "default": null}
// EWMA: {"alpha": 0.5}
// PCT_CHANGE: {"periods": 1}
Params json.RawMessage `json:"params,omitempty"`
}
Window defines a window operation.
type WindowType ¶ added in v0.2.0
type WindowType string
WindowType identifies a specific window operation.
const ( WIN_LAG WindowType = "WIN_LAG" WIN_LEAD WindowType = "WIN_LEAD" WIN_ROW_NUMBER WindowType = "WIN_ROW_NUMBER" WIN_RANK WindowType = "WIN_RANK" WIN_DENSE_RANK WindowType = "WIN_DENSE_RANK" WIN_RUNNING_SUM WindowType = "WIN_RUNNING_SUM" WIN_RUNNING_AVG WindowType = "WIN_RUNNING_AVG" WIN_MOVING_AVG WindowType = "WIN_MOVING_AVG" WIN_EWMA WindowType = "WIN_EWMA" WIN_PCT_CHANGE WindowType = "WIN_PCT_CHANGE" )
func AllWindowTypes ¶ added in v0.2.0
func AllWindowTypes() []WindowType
AllWindowTypes returns all defined window types in alphabetical order.
func (WindowType) Streamable ¶ added in v0.2.0
func (t WindowType) Streamable() bool
Streamable reports whether this window type can be computed without buffering. All window operators run over the post-aggregate row set in a final pass; none stream today.