proto

package
v0.0.0-...-7d47eef Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 12, 2023 License: Apache-2.0, MIT Imports: 4 Imported by: 0

Documentation

Overview

Package proto includes all proto definitions used in the golang package in one large package.

It uses go generate tools to generate it from the source code, but we include the generated files in github, so one doesn't need to install anything.

Index

Constants

View Source
const (
	Default_Column_Type         = ColumnType_UNKNOWN
	Default_Column_IsManualType = bool(false)
	Default_Column_CountNas     = int64(0)
	Default_Column_IsUnstacked  = bool(false)
)

Default values for Column fields.

View Source
const (
	Default_CategoricalSpec_MinValueCount                  = int32(5)
	Default_CategoricalSpec_MaxNumberOfUniqueValues        = int32(2000)
	Default_CategoricalSpec_OffsetValueByOneDuringTraining = bool(false)
)

Default values for CategoricalSpec fields.

View Source
const (
	Default_DiscretizedNumericalSpec_MaximumNumBins = int64(255)
	Default_DiscretizedNumericalSpec_MinObsInBins   = int32(3)
)

Default values for DiscretizedNumericalSpec fields.

View Source
const (
	Default_Tokenizer_Splitter    = Tokenizer_SEPARATOR
	Default_Tokenizer_Separator   = string(" ;,")
	Default_Tokenizer_Regex       = string("([\\S]+)")
	Default_Tokenizer_ToLowerCase = bool(true)
)

Default values for Tokenizer fields.

View Source
const (
	Default_DataSpecificationGuide_IgnoreColumnsWithoutGuides              = bool(false)
	Default_DataSpecificationGuide_MaxNumScannedRowsToGuessType            = int64(1000)
	Default_DataSpecificationGuide_DetectBooleanAsNumerical                = bool(false)
	Default_DataSpecificationGuide_DetectNumericalAsDiscretizedNumerical   = bool(false)
	Default_DataSpecificationGuide_MaxNumScannedRowsToAccumulateStatistics = int64(-1)
	Default_DataSpecificationGuide_UnstackNumericalSetAsNumericals         = bool(true)
	Default_DataSpecificationGuide_IgnoreUnknownTypeColumns                = bool(false)
)

Default values for DataSpecificationGuide fields.

View Source
const (
	Default_ColumnGuide_AllowMultiMatch = bool(false)
	Default_ColumnGuide_IgnoreColumn    = bool(false)
)

Default values for ColumnGuide fields.

View Source
const (
	Default_CategoricalGuide_MinVocabFrequency = int32(5)
	Default_CategoricalGuide_MaxVocabCount     = int32(2000)
)

Default values for CategoricalGuide fields.

View Source
const (
	Default_DiscretizedNumericalGuide_MaximumNumBins = int64(255)
	Default_DiscretizedNumericalGuide_MinObsInBins   = int32(3)
)

Default values for DiscretizedNumericalGuide fields.

View Source
const (
	Default_Tokenizer_Grouping_Unigrams = bool(true)
	Default_Tokenizer_Grouping_Bigrams  = bool(false)
	Default_Tokenizer_Grouping_Trigrams = bool(false)
)

Default values for Tokenizer_Grouping fields.

View Source
const (
	Default_NumericalSpec_Mean = float64(0)
)

Default values for NumericalSpec fields.

View Source
const (
	Default_Unstacked_Type = ColumnType_UNKNOWN
)

Default values for Unstacked fields.

Variables

View Source
var (
	ColumnType_name = map[int32]string{
		0:  "UNKNOWN",
		1:  "NUMERICAL",
		2:  "NUMERICAL_SET",
		3:  "NUMERICAL_LIST",
		4:  "CATEGORICAL",
		5:  "CATEGORICAL_SET",
		6:  "CATEGORICAL_LIST",
		7:  "BOOLEAN",
		8:  "STRING",
		9:  "DISCRETIZED_NUMERICAL",
		10: "HASH",
	}
	ColumnType_value = map[string]int32{
		"UNKNOWN":               0,
		"NUMERICAL":             1,
		"NUMERICAL_SET":         2,
		"NUMERICAL_LIST":        3,
		"CATEGORICAL":           4,
		"CATEGORICAL_SET":       5,
		"CATEGORICAL_LIST":      6,
		"BOOLEAN":               7,
		"STRING":                8,
		"DISCRETIZED_NUMERICAL": 9,
		"HASH":                  10,
	}
)

Enum value maps for ColumnType.

View Source
var (
	Tokenizer_Splitter_name = map[int32]string{
		0: "INVALID",
		1: "SEPARATOR",
		2: "REGEX_MATCH",
		3: "CHARACTER",
	}
	Tokenizer_Splitter_value = map[string]int32{
		"INVALID":     0,
		"SEPARATOR":   1,
		"REGEX_MATCH": 2,
		"CHARACTER":   3,
	}
)

Enum value maps for Tokenizer_Splitter.

View Source
var File_yggdrasil_decision_forests_dataset_data_spec_proto protoreflect.FileDescriptor
View Source
var File_yggdrasil_decision_forests_dataset_weight_proto protoreflect.FileDescriptor

Functions

This section is empty.

Types

type BooleanSpec

type BooleanSpec struct {

	// Number of true values.
	CountTrue *int64 `protobuf:"varint,1,opt,name=count_true,json=countTrue" json:"count_true,omitempty"`
	// Number of false values.
	CountFalse *int64 `protobuf:"varint,2,opt,name=count_false,json=countFalse" json:"count_false,omitempty"`
	// contains filtered or unexported fields
}

Specification of a boolean column.

func (*BooleanSpec) Descriptor deprecated

func (*BooleanSpec) Descriptor() ([]byte, []int)

Deprecated: Use BooleanSpec.ProtoReflect.Descriptor instead.

func (*BooleanSpec) GetCountFalse

func (x *BooleanSpec) GetCountFalse() int64

func (*BooleanSpec) GetCountTrue

func (x *BooleanSpec) GetCountTrue() int64

func (*BooleanSpec) ProtoMessage

func (*BooleanSpec) ProtoMessage()

func (*BooleanSpec) ProtoReflect

func (x *BooleanSpec) ProtoReflect() protoreflect.Message

func (*BooleanSpec) Reset

func (x *BooleanSpec) Reset()

func (*BooleanSpec) String

func (x *BooleanSpec) String() string

type CategoricalGuide

type CategoricalGuide struct {

	// Minimum frequency of an categorical value not to be replaced by the <RARE>
	// special value.
	MinVocabFrequency *int32 `protobuf:"varint,1,opt,name=min_vocab_frequency,json=minVocabFrequency,def=5" json:"min_vocab_frequency,omitempty"`
	// Maximum number of unique categorical values. If more values are present,
	// the less frequent values are considered <RARE>.
	MaxVocabCount *int32 `protobuf:"varint,2,opt,name=max_vocab_count,json=maxVocabCount,def=2000" json:"max_vocab_count,omitempty"`
	// If is_already_integerized=false, a dictionary is build for the feature.
	// Even if the feature is an integer or a float. If
	// is_already_integerized=true, the value is directly interpreted as an
	// index and should follow the following convention:
	//   - The value should be greater or equal to -1.
	//   - The value -1 is the "missing value".
	//   - The value 0 is the "out-of-dictionary value".
	//   - Several simpleML algorithms assume this is a "dense index" i.e. if the
	//     column is an input feature, it is best to have it being dense.
	IsAlreadyIntegerized *bool `protobuf:"varint,3,opt,name=is_already_integerized,json=isAlreadyIntegerized" json:"is_already_integerized,omitempty"`
	// If "is_already_integerized=true" and if
	// "number_of_already_integerized_values" is set,
	// "number_of_already_integerized_values" is the number of unique values. Such
	// attribute accepts values in [-1, number_of_already_integerized_values).
	// Values outside of this range will be considered "out-of-vocabulary".
	//
	// Note that if the dataset used to infer the dataspec contains an example
	// with a value > number_of_already_integerized_values, the example value will
	// be used instead of "number_of_already_integerized_values".
	NumberOfAlreadyIntegerizedValues *int64 `` /* 157-byte string literal not displayed */
	// contains filtered or unexported fields
}

func (*CategoricalGuide) Descriptor deprecated

func (*CategoricalGuide) Descriptor() ([]byte, []int)

Deprecated: Use CategoricalGuide.ProtoReflect.Descriptor instead.

func (*CategoricalGuide) GetIsAlreadyIntegerized

func (x *CategoricalGuide) GetIsAlreadyIntegerized() bool

func (*CategoricalGuide) GetMaxVocabCount

func (x *CategoricalGuide) GetMaxVocabCount() int32

func (*CategoricalGuide) GetMinVocabFrequency

func (x *CategoricalGuide) GetMinVocabFrequency() int32

func (*CategoricalGuide) GetNumberOfAlreadyIntegerizedValues

func (x *CategoricalGuide) GetNumberOfAlreadyIntegerizedValues() int64

func (*CategoricalGuide) ProtoMessage

func (*CategoricalGuide) ProtoMessage()

func (*CategoricalGuide) ProtoReflect

func (x *CategoricalGuide) ProtoReflect() protoreflect.Message

func (*CategoricalGuide) Reset

func (x *CategoricalGuide) Reset()

func (*CategoricalGuide) String

func (x *CategoricalGuide) String() string

type CategoricalSpec

type CategoricalSpec struct {

	// The most frequent value.
	MostFrequentValue *int64 `protobuf:"varint,1,opt,name=most_frequent_value,json=mostFrequentValue" json:"most_frequent_value,omitempty"`
	// The number of unique values (including the reserved OOD(=0) value).
	// All the values should be 0 <= value < number_of_unique_values.
	//
	// The value "0" is reserved for the out-of-dictionary value. Therefore, in
	// the case of a categorical column with two possible values "X" and "Y", the
	// proto will be:
	//
	//   number_of_unique_values = 3
	//   is_already_integerized=false
	//   items { key: "OOD" value { index: 0 }}
	//   items { key: "X" value { index: 1 }}
	//   items { key: "Y" value { index: 2 }}
	//
	// Missing values are implicit and take index=-1. They don't need to be
	// specified in "items".
	NumberOfUniqueValues *int64 `protobuf:"varint,2,opt,name=number_of_unique_values,json=numberOfUniqueValues" json:"number_of_unique_values,omitempty"`
	// Minimum frequency of a value not to be replaced by the <OOD> special
	// value. Used when computing value dictionary.
	MinValueCount *int32 `protobuf:"varint,3,opt,name=min_value_count,json=minValueCount,def=5" json:"min_value_count,omitempty"`
	// Maximum number of unique categorical values. If more values are present,
	// the less frequent values are considered <OOD>. Used when computing value
	// dictionary. If "max_number_of_unique_values" == -1, the items are not
	// pruned.
	MaxNumberOfUniqueValues *int32 `` /* 139-byte string literal not displayed */
	// If true, values are interpreted directed as an integer. If false, values
	// are indexed in the "items" dictionary.
	IsAlreadyIntegerized *bool `protobuf:"varint,5,opt,name=is_already_integerized,json=isAlreadyIntegerized" json:"is_already_integerized,omitempty"`
	// Dictionary of values. Only available if is_already_integerized=false. In
	// this case, items.size() is equal to number_of_unique_values.
	Items map[string]*CategoricalSpec_VocabValue `` /* 130-byte string literal not displayed */
	// If true, integer categorical values  provided by the user have been offset
	// by 1. Such pre-processing is done in TensorFlow Decision Forests. See
	// "CATEGORICAL_INTEGER_OFFSET".
	OffsetValueByOneDuringTraining *bool `` /* 159-byte string literal not displayed */
	// contains filtered or unexported fields
}

Specification of a categorical column.

func (*CategoricalSpec) Descriptor deprecated

func (*CategoricalSpec) Descriptor() ([]byte, []int)

Deprecated: Use CategoricalSpec.ProtoReflect.Descriptor instead.

func (*CategoricalSpec) GetIsAlreadyIntegerized

func (x *CategoricalSpec) GetIsAlreadyIntegerized() bool

func (*CategoricalSpec) GetItems

func (*CategoricalSpec) GetMaxNumberOfUniqueValues

func (x *CategoricalSpec) GetMaxNumberOfUniqueValues() int32

func (*CategoricalSpec) GetMinValueCount

func (x *CategoricalSpec) GetMinValueCount() int32

func (*CategoricalSpec) GetMostFrequentValue

func (x *CategoricalSpec) GetMostFrequentValue() int64

func (*CategoricalSpec) GetNumberOfUniqueValues

func (x *CategoricalSpec) GetNumberOfUniqueValues() int64

func (*CategoricalSpec) GetOffsetValueByOneDuringTraining

func (x *CategoricalSpec) GetOffsetValueByOneDuringTraining() bool

func (*CategoricalSpec) ProtoMessage

func (*CategoricalSpec) ProtoMessage()

func (*CategoricalSpec) ProtoReflect

func (x *CategoricalSpec) ProtoReflect() protoreflect.Message

func (*CategoricalSpec) Reset

func (x *CategoricalSpec) Reset()

func (*CategoricalSpec) String

func (x *CategoricalSpec) String() string

type CategoricalSpec_VocabValue

type CategoricalSpec_VocabValue struct {

	// Index of the value.
	Index *int64 `protobuf:"varint,1,opt,name=index" json:"index,omitempty"`
	// Frequency of the value.
	Count *int64 `protobuf:"varint,2,opt,name=count" json:"count,omitempty"`
	// contains filtered or unexported fields
}

Possible value of a non integerized categorical, categorical set, or categorical list attribute.

func (*CategoricalSpec_VocabValue) Descriptor deprecated

func (*CategoricalSpec_VocabValue) Descriptor() ([]byte, []int)

Deprecated: Use CategoricalSpec_VocabValue.ProtoReflect.Descriptor instead.

func (*CategoricalSpec_VocabValue) GetCount

func (x *CategoricalSpec_VocabValue) GetCount() int64

func (*CategoricalSpec_VocabValue) GetIndex

func (x *CategoricalSpec_VocabValue) GetIndex() int64

func (*CategoricalSpec_VocabValue) ProtoMessage

func (*CategoricalSpec_VocabValue) ProtoMessage()

func (*CategoricalSpec_VocabValue) ProtoReflect

func (*CategoricalSpec_VocabValue) Reset

func (x *CategoricalSpec_VocabValue) Reset()

func (*CategoricalSpec_VocabValue) String

func (x *CategoricalSpec_VocabValue) String() string

type Column

type Column struct {

	// Type of data.
	Type *ColumnType `protobuf:"varint,1,opt,name=type,enum=yggdrasil_decision_forests.dataset.proto.ColumnType,def=0" json:"type,omitempty"`
	// Column unique name.
	Name *string `protobuf:"bytes,2,opt,name=name" json:"name,omitempty"`
	// If true, the type is set manually by the user (instead of been
	// automatically detected). This field is purely used for debugging purpose
	// and has no impact on the computation. Note that if a column guide matches
	// this column, and if this column guide does not contain a type,
	// is_manual_type is set to false (as if there were no column guide match).
	IsManualType *bool `protobuf:"varint,3,opt,name=is_manual_type,json=isManualType,def=0" json:"is_manual_type,omitempty"`
	// Tokenization. For non-integerized list or sets columns (numerical or
	// categorical).
	Tokenizer *Tokenizer `protobuf:"bytes,4,opt,name=tokenizer" json:"tokenizer,omitempty"`
	// Data for numerical (simple, list or set) attribute types.
	Numerical *NumericalSpec `protobuf:"bytes,5,opt,name=numerical" json:"numerical,omitempty"`
	// Data for categorical (simple, list or set) attribute types.
	Categorical *CategoricalSpec `protobuf:"bytes,6,opt,name=categorical" json:"categorical,omitempty"`
	// Number of NAs (i.e. not available) record when building the dataspec.
	CountNas *int64 `protobuf:"varint,7,opt,name=count_nas,json=countNas,def=0" json:"count_nas,omitempty"`
	// Numerical value stored as an index + a dictionary.
	DiscretizedNumerical *DiscretizedNumericalSpec `protobuf:"bytes,8,opt,name=discretized_numerical,json=discretizedNumerical" json:"discretized_numerical,omitempty"`
	// Data for boolean attribute types.
	Boolean *BooleanSpec `protobuf:"bytes,9,opt,name=boolean" json:"boolean,omitempty"`
	// For all the types defined as a collection of multiple values.
	MultiValues *MultiValuesSpec `protobuf:"bytes,10,opt,name=multi_values,json=multiValues" json:"multi_values,omitempty"`
	// Is the feature derived from unstacking a multi-dimensional dimension?
	IsUnstacked *bool `protobuf:"varint,11,opt,name=is_unstacked,json=isUnstacked,def=0" json:"is_unstacked,omitempty"`
	// contains filtered or unexported fields
}

Definition of a column in a dataset.

func (*Column) Descriptor deprecated

func (*Column) Descriptor() ([]byte, []int)

Deprecated: Use Column.ProtoReflect.Descriptor instead.

func (*Column) GetBoolean

func (x *Column) GetBoolean() *BooleanSpec

func (*Column) GetCategorical

func (x *Column) GetCategorical() *CategoricalSpec

func (*Column) GetCountNas

func (x *Column) GetCountNas() int64

func (*Column) GetDiscretizedNumerical

func (x *Column) GetDiscretizedNumerical() *DiscretizedNumericalSpec

func (*Column) GetIsManualType

func (x *Column) GetIsManualType() bool

func (*Column) GetIsUnstacked

func (x *Column) GetIsUnstacked() bool

func (*Column) GetMultiValues

func (x *Column) GetMultiValues() *MultiValuesSpec

func (*Column) GetName

func (x *Column) GetName() string

func (*Column) GetNumerical

func (x *Column) GetNumerical() *NumericalSpec

func (*Column) GetTokenizer

func (x *Column) GetTokenizer() *Tokenizer

func (*Column) GetType

func (x *Column) GetType() ColumnType

func (*Column) ProtoMessage

func (*Column) ProtoMessage()

func (*Column) ProtoReflect

func (x *Column) ProtoReflect() protoreflect.Message

func (*Column) Reset

func (x *Column) Reset()

func (*Column) String

func (x *Column) String() string

type ColumnGuide

type ColumnGuide struct {

	// Regular expression on the column name.
	ColumnNamePattern *string `protobuf:"bytes,1,opt,name=column_name_pattern,json=columnNamePattern" json:"column_name_pattern,omitempty"`
	// Type of the column.
	Type       *ColumnType       `protobuf:"varint,2,opt,name=type,enum=yggdrasil_decision_forests.dataset.proto.ColumnType" json:"type,omitempty"`
	Categorial *CategoricalGuide `protobuf:"bytes,3,opt,name=categorial" json:"categorial,omitempty"`
	Numerical  *NumericalGuide   `protobuf:"bytes,4,opt,name=numerical" json:"numerical,omitempty"`
	// If "tokenizer" is specified, and if the dataset container can represent a
	// list of token natively (i.e. list of strings e.g. tf.Example), the first
	// string entry (if any) will be tokenized. If the attribute contains more
	// than one entry, an error will be raised.
	Tokenizer *TokenizerGuide `protobuf:"bytes,5,opt,name=tokenizer" json:"tokenizer,omitempty"`
	// If true, a column can be matched against multiple different "ColumnGuide"
	// with the last ColumnGuide having higher priority. For example, it the
	// "type" is set in two matching column guides, the type defined in the last
	// column guide will be used. If false, an error will be raised if more than
	// one column guide is matching a column.
	AllowMultiMatch      *bool                      `protobuf:"varint,6,opt,name=allow_multi_match,json=allowMultiMatch,def=0" json:"allow_multi_match,omitempty"`
	DiscretizedNumerical *DiscretizedNumericalGuide `protobuf:"bytes,7,opt,name=discretized_numerical,json=discretizedNumerical" json:"discretized_numerical,omitempty"`
	// If true, matching columns are ignored and won't be in the dataspec.
	IgnoreColumn *bool `protobuf:"varint,8,opt,name=ignore_column,json=ignoreColumn,def=0" json:"ignore_column,omitempty"`
	// contains filtered or unexported fields
}

func (*ColumnGuide) Descriptor deprecated

func (*ColumnGuide) Descriptor() ([]byte, []int)

Deprecated: Use ColumnGuide.ProtoReflect.Descriptor instead.

func (*ColumnGuide) GetAllowMultiMatch

func (x *ColumnGuide) GetAllowMultiMatch() bool

func (*ColumnGuide) GetCategorial

func (x *ColumnGuide) GetCategorial() *CategoricalGuide

func (*ColumnGuide) GetColumnNamePattern

func (x *ColumnGuide) GetColumnNamePattern() string

func (*ColumnGuide) GetDiscretizedNumerical

func (x *ColumnGuide) GetDiscretizedNumerical() *DiscretizedNumericalGuide

func (*ColumnGuide) GetIgnoreColumn

func (x *ColumnGuide) GetIgnoreColumn() bool

func (*ColumnGuide) GetNumerical

func (x *ColumnGuide) GetNumerical() *NumericalGuide

func (*ColumnGuide) GetTokenizer

func (x *ColumnGuide) GetTokenizer() *TokenizerGuide

func (*ColumnGuide) GetType

func (x *ColumnGuide) GetType() ColumnType

func (*ColumnGuide) ProtoMessage

func (*ColumnGuide) ProtoMessage()

func (*ColumnGuide) ProtoReflect

func (x *ColumnGuide) ProtoReflect() protoreflect.Message

func (*ColumnGuide) Reset

func (x *ColumnGuide) Reset()

func (*ColumnGuide) String

func (x *ColumnGuide) String() string

type ColumnType

type ColumnType int32

Type of dataset columns.

const (
	ColumnType_UNKNOWN               ColumnType = 0
	ColumnType_NUMERICAL             ColumnType = 1
	ColumnType_NUMERICAL_SET         ColumnType = 2
	ColumnType_NUMERICAL_LIST        ColumnType = 3
	ColumnType_CATEGORICAL           ColumnType = 4
	ColumnType_CATEGORICAL_SET       ColumnType = 5
	ColumnType_CATEGORICAL_LIST      ColumnType = 6
	ColumnType_BOOLEAN               ColumnType = 7
	ColumnType_STRING                ColumnType = 8
	ColumnType_DISCRETIZED_NUMERICAL ColumnType = 9
	ColumnType_HASH                  ColumnType = 10
)

func (ColumnType) Descriptor

func (ColumnType) Descriptor() protoreflect.EnumDescriptor

func (ColumnType) Enum

func (x ColumnType) Enum() *ColumnType

func (ColumnType) EnumDescriptor deprecated

func (ColumnType) EnumDescriptor() ([]byte, []int)

Deprecated: Use ColumnType.Descriptor instead.

func (ColumnType) Number

func (x ColumnType) Number() protoreflect.EnumNumber

func (ColumnType) String

func (x ColumnType) String() string

func (ColumnType) Type

func (*ColumnType) UnmarshalJSON deprecated

func (x *ColumnType) UnmarshalJSON(b []byte) error

Deprecated: Do not use.

type DataSpecification

type DataSpecification struct {

	// The columns.
	Columns []*Column `protobuf:"bytes,1,rep,name=columns" json:"columns,omitempty"`
	// The number of rows of the dataset used to create this dataset (if a
	// dataset was used).
	CreatedNumRows *int64 `protobuf:"varint,2,opt,name=created_num_rows,json=createdNumRows" json:"created_num_rows,omitempty"`
	// Meta-data about features that were unstacked e.g. with the
	// "unstack_numerical_set_as_numericals" control field.
	Unstackeds []*Unstacked `protobuf:"bytes,3,rep,name=unstackeds" json:"unstackeds,omitempty"`
	// contains filtered or unexported fields
}

Specification of the columns of a dataset. List the available columns ( including their name, type, and extra information e.g. dictionaries).

func (*DataSpecification) Descriptor deprecated

func (*DataSpecification) Descriptor() ([]byte, []int)

Deprecated: Use DataSpecification.ProtoReflect.Descriptor instead.

func (*DataSpecification) GetColumns

func (x *DataSpecification) GetColumns() []*Column

func (*DataSpecification) GetCreatedNumRows

func (x *DataSpecification) GetCreatedNumRows() int64

func (*DataSpecification) GetUnstackeds

func (x *DataSpecification) GetUnstackeds() []*Unstacked

func (*DataSpecification) ProtoMessage

func (*DataSpecification) ProtoMessage()

func (*DataSpecification) ProtoReflect

func (x *DataSpecification) ProtoReflect() protoreflect.Message

func (*DataSpecification) Reset

func (x *DataSpecification) Reset()

func (*DataSpecification) String

func (x *DataSpecification) String() string

type DataSpecificationAccumulator

type DataSpecificationAccumulator struct {
	Columns []*DataSpecificationAccumulator_Column `protobuf:"bytes,1,rep,name=columns" json:"columns,omitempty"`
	// contains filtered or unexported fields
}

Structure containing intermediary information for the computation of a DataSpecification.

func (*DataSpecificationAccumulator) Descriptor deprecated

func (*DataSpecificationAccumulator) Descriptor() ([]byte, []int)

Deprecated: Use DataSpecificationAccumulator.ProtoReflect.Descriptor instead.

func (*DataSpecificationAccumulator) GetColumns

func (*DataSpecificationAccumulator) ProtoMessage

func (*DataSpecificationAccumulator) ProtoMessage()

func (*DataSpecificationAccumulator) ProtoReflect

func (*DataSpecificationAccumulator) Reset

func (x *DataSpecificationAccumulator) Reset()

func (*DataSpecificationAccumulator) String

type DataSpecificationAccumulator_Column

type DataSpecificationAccumulator_Column struct {

	// Sum and sum of error for the Kahan summation. Used for numerical columns.
	KahanSum              *float64 `protobuf:"fixed64,1,opt,name=kahan_sum,json=kahanSum" json:"kahan_sum,omitempty"`
	KahanSumError         *float64 `protobuf:"fixed64,2,opt,name=kahan_sum_error,json=kahanSumError" json:"kahan_sum_error,omitempty"`
	MinValue              *float64 `protobuf:"fixed64,3,opt,name=min_value,json=minValue" json:"min_value,omitempty"`
	MaxValue              *float64 `protobuf:"fixed64,4,opt,name=max_value,json=maxValue" json:"max_value,omitempty"`
	KahanSumOfSquare      *float64 `protobuf:"fixed64,6,opt,name=kahan_sum_of_square,json=kahanSumOfSquare" json:"kahan_sum_of_square,omitempty"`
	KahanSumOfSquareError *float64 `protobuf:"fixed64,7,opt,name=kahan_sum_of_square_error,json=kahanSumOfSquareError" json:"kahan_sum_of_square_error,omitempty"`
	// Mapping between float values (represented as an uint32) and the number of
	// times this value was saw.
	//
	// Note: Map don't allow float indexed maps.
	DiscretizedNumerical map[uint32]int32 `` /* 191-byte string literal not displayed */
	// contains filtered or unexported fields
}

func (*DataSpecificationAccumulator_Column) Descriptor deprecated

func (*DataSpecificationAccumulator_Column) Descriptor() ([]byte, []int)

Deprecated: Use DataSpecificationAccumulator_Column.ProtoReflect.Descriptor instead.

func (*DataSpecificationAccumulator_Column) GetDiscretizedNumerical

func (x *DataSpecificationAccumulator_Column) GetDiscretizedNumerical() map[uint32]int32

func (*DataSpecificationAccumulator_Column) GetKahanSum

func (*DataSpecificationAccumulator_Column) GetKahanSumError

func (x *DataSpecificationAccumulator_Column) GetKahanSumError() float64

func (*DataSpecificationAccumulator_Column) GetKahanSumOfSquare

func (x *DataSpecificationAccumulator_Column) GetKahanSumOfSquare() float64

func (*DataSpecificationAccumulator_Column) GetKahanSumOfSquareError

func (x *DataSpecificationAccumulator_Column) GetKahanSumOfSquareError() float64

func (*DataSpecificationAccumulator_Column) GetMaxValue

func (*DataSpecificationAccumulator_Column) GetMinValue

func (*DataSpecificationAccumulator_Column) ProtoMessage

func (*DataSpecificationAccumulator_Column) ProtoMessage()

func (*DataSpecificationAccumulator_Column) ProtoReflect

func (*DataSpecificationAccumulator_Column) Reset

func (*DataSpecificationAccumulator_Column) String

type DataSpecificationGuide

type DataSpecificationGuide struct {

	// Guide applied to one or a sub-set of columns according to a regular
	// expression match.
	ColumnGuides []*ColumnGuide `protobuf:"bytes,1,rep,name=column_guides,json=columnGuides" json:"column_guides,omitempty"`
	// Default guide for all columns.
	// Also apply to columns matched with "column_guides", but with a lower
	// priority. For example, if an configuration option is set both in
	// "default_column_guide" and "column_guides", the value is "column_guides"
	// will be used.
	DefaultColumnGuide *ColumnGuide `protobuf:"bytes,2,opt,name=default_column_guide,json=defaultColumnGuide" json:"default_column_guide,omitempty"`
	// If true, columns that don't match any "column_guides" regular expression
	// are ignored.
	IgnoreColumnsWithoutGuides *bool `` /* 143-byte string literal not displayed */
	// Maximum number of rows to scan to infer the column types.
	// Set the value "-1" to use all rows (i.e. use the entire dataset).
	// Note: The type inference logic is only used if the user does not specify
	// the type manually.
	MaxNumScannedRowsToGuessType *int64 `` /* 158-byte string literal not displayed */
	// If true, columns initially detected as BOOLEAN (i.e. only containing "0"
	// and "1" values) will be detected as NUMERICAL.
	DetectBooleanAsNumerical *bool `` /* 137-byte string literal not displayed */
	// Detects numerical values (i.e. NUMERICAL) as DISCRETIZED_NUMERICAL.
	// DISCRETIZED_NUMERICAL values are discretized at loading time. Some
	// algorithms (e.g. the simpleML decision forest algorithms) will handle
	// NUMERICAL and DISCRETIZED_NUMERICAL types differently. Generally,
	// discretized columns are faster to train but can lead to sub-optimal models.
	DetectNumericalAsDiscretizedNumerical *bool `` /* 178-byte string literal not displayed */
	// Maximum number of rows to scan to compute column statistics (e.g.
	// dictionary, ratio of missing values, mean value).
	// Set the value "-1" to use all rows (i.e. use the entire dataset).
	MaxNumScannedRowsToAccumulateStatistics *int64 `` /* 189-byte string literal not displayed */
	// If true, unstack numerical sets are multiple numerical features. This
	// operation is useful to consume multi-dimensional numerical vectors i.e.
	// list of numerical values with always the same size and semantic per
	// dimension.
	UnstackNumericalSetAsNumericals *bool `` /* 160-byte string literal not displayed */
	// Remove columns of unknown type. For example, if the column has no values
	// (all the values are missing) and its type is not specified by the user.
	IgnoreUnknownTypeColumns *bool `` /* 137-byte string literal not displayed */
	// contains filtered or unexported fields
}

Configuration for the automated "inference" logic of the data specification (see header for the definition of data specification). For example, the DataSpecificationGuide allows to express the following:

  • The column called "feature_1" is NUMERICAL.
  • The columns matching the regex "num_feature_.*" are NUMERICAL.
  • Ignore the column called "feature_1".
  • Ignore the columns matching the regex "num_feature_.*".
  • Ignore the columns matching none of the set rules.
  • The column called "feature_1" is a CATEGORICAL_SET and should be tokenized by commas.
  • The column called "feature_1" is a CATEGORICAL and the categorical values seen less than 50 times should be ignored (considered out-of-bag).
  • The size of the CATEGORICAL and CATEGORICAL_SET column dictionaries should not have more than 1000 items.
  • Column that look BOOLEAN should be interpreted as NUMERICAL.
  • Use the first 100'000 record in the dataset to best infer the semantic of the columns.

func (*DataSpecificationGuide) Descriptor deprecated

func (*DataSpecificationGuide) Descriptor() ([]byte, []int)

Deprecated: Use DataSpecificationGuide.ProtoReflect.Descriptor instead.

func (*DataSpecificationGuide) GetColumnGuides

func (x *DataSpecificationGuide) GetColumnGuides() []*ColumnGuide

func (*DataSpecificationGuide) GetDefaultColumnGuide

func (x *DataSpecificationGuide) GetDefaultColumnGuide() *ColumnGuide

func (*DataSpecificationGuide) GetDetectBooleanAsNumerical

func (x *DataSpecificationGuide) GetDetectBooleanAsNumerical() bool

func (*DataSpecificationGuide) GetDetectNumericalAsDiscretizedNumerical

func (x *DataSpecificationGuide) GetDetectNumericalAsDiscretizedNumerical() bool

func (*DataSpecificationGuide) GetIgnoreColumnsWithoutGuides

func (x *DataSpecificationGuide) GetIgnoreColumnsWithoutGuides() bool

func (*DataSpecificationGuide) GetIgnoreUnknownTypeColumns

func (x *DataSpecificationGuide) GetIgnoreUnknownTypeColumns() bool

func (*DataSpecificationGuide) GetMaxNumScannedRowsToAccumulateStatistics

func (x *DataSpecificationGuide) GetMaxNumScannedRowsToAccumulateStatistics() int64

func (*DataSpecificationGuide) GetMaxNumScannedRowsToGuessType

func (x *DataSpecificationGuide) GetMaxNumScannedRowsToGuessType() int64

func (*DataSpecificationGuide) GetUnstackNumericalSetAsNumericals

func (x *DataSpecificationGuide) GetUnstackNumericalSetAsNumericals() bool

func (*DataSpecificationGuide) ProtoMessage

func (*DataSpecificationGuide) ProtoMessage()

func (*DataSpecificationGuide) ProtoReflect

func (x *DataSpecificationGuide) ProtoReflect() protoreflect.Message

func (*DataSpecificationGuide) Reset

func (x *DataSpecificationGuide) Reset()

func (*DataSpecificationGuide) String

func (x *DataSpecificationGuide) String() string

type DiscretizedNumericalGuide

type DiscretizedNumericalGuide struct {
	MaximumNumBins *int64 `protobuf:"varint,1,opt,name=maximum_num_bins,json=maximumNumBins,def=255" json:"maximum_num_bins,omitempty"`
	// Minimum number of examples in a bin.
	MinObsInBins *int32 `protobuf:"varint,2,opt,name=min_obs_in_bins,json=minObsInBins,def=3" json:"min_obs_in_bins,omitempty"`
	// contains filtered or unexported fields
}

func (*DiscretizedNumericalGuide) Descriptor deprecated

func (*DiscretizedNumericalGuide) Descriptor() ([]byte, []int)

Deprecated: Use DiscretizedNumericalGuide.ProtoReflect.Descriptor instead.

func (*DiscretizedNumericalGuide) GetMaximumNumBins

func (x *DiscretizedNumericalGuide) GetMaximumNumBins() int64

func (*DiscretizedNumericalGuide) GetMinObsInBins

func (x *DiscretizedNumericalGuide) GetMinObsInBins() int32

func (*DiscretizedNumericalGuide) ProtoMessage

func (*DiscretizedNumericalGuide) ProtoMessage()

func (*DiscretizedNumericalGuide) ProtoReflect

func (*DiscretizedNumericalGuide) Reset

func (x *DiscretizedNumericalGuide) Reset()

func (*DiscretizedNumericalGuide) String

func (x *DiscretizedNumericalGuide) String() string

type DiscretizedNumericalSpec

type DiscretizedNumericalSpec struct {

	// Boundaries in between the bins.
	// The number of bins is boundaries.size() + 1.
	Boundaries []float32 `protobuf:"fixed32,1,rep,packed,name=boundaries" json:"boundaries,omitempty"`
	// Number of unique numerical values before the discretization.
	OriginalNumUniqueValues *int64 `` /* 128-byte string literal not displayed */
	// Maximum number of bins (at construction time).
	// // Defaults to 255 bins, that is 254 boundaries.
	MaximumNumBins *int64 `protobuf:"varint,3,opt,name=maximum_num_bins,json=maximumNumBins,def=255" json:"maximum_num_bins,omitempty"`
	// Minimum number of examples in a bin.
	MinObsInBins *int32 `protobuf:"varint,4,opt,name=min_obs_in_bins,json=minObsInBins,def=3" json:"min_obs_in_bins,omitempty"`
	// contains filtered or unexported fields
}

Specification of a discretized numerical column.

A "discretized numerical" value "i" is encoded as index (integer) between -1 (inclusive) and "n = boundaries.size()" (also inclusive).

If i==-1, the value is missing.
If i==0, the original numerical value is lower (strictly) than
"boundaries.front()". If i==boundaries.size(), the original value is higher
(non strictly) to "boundaries.back()". If i \in [1, boundaries.size()[, the
original value is in between "boundaries[i-1]" and "boundaries[i]".

Because encoding a numericla value into a discretized numerical value is loosely, the original numerical value cannot be recovered. In this case, the following logic is applied:

If i==-1, the numercal value is "std::nan" (this is now simpleML represent
missing numerical values). If i==0, the numercal value is
"boundaries.front()-1". If i==boundaries.size(), the numercal value is
"boundaries.back()+1". If i \in [1, boundaries.size()[, the numerical value
is "(boundaries[i-1]+boundaries[i])/2".

func (*DiscretizedNumericalSpec) Descriptor deprecated

func (*DiscretizedNumericalSpec) Descriptor() ([]byte, []int)

Deprecated: Use DiscretizedNumericalSpec.ProtoReflect.Descriptor instead.

func (*DiscretizedNumericalSpec) GetBoundaries

func (x *DiscretizedNumericalSpec) GetBoundaries() []float32

func (*DiscretizedNumericalSpec) GetMaximumNumBins

func (x *DiscretizedNumericalSpec) GetMaximumNumBins() int64

func (*DiscretizedNumericalSpec) GetMinObsInBins

func (x *DiscretizedNumericalSpec) GetMinObsInBins() int32

func (*DiscretizedNumericalSpec) GetOriginalNumUniqueValues

func (x *DiscretizedNumericalSpec) GetOriginalNumUniqueValues() int64

func (*DiscretizedNumericalSpec) ProtoMessage

func (*DiscretizedNumericalSpec) ProtoMessage()

func (*DiscretizedNumericalSpec) ProtoReflect

func (x *DiscretizedNumericalSpec) ProtoReflect() protoreflect.Message

func (*DiscretizedNumericalSpec) Reset

func (x *DiscretizedNumericalSpec) Reset()

func (*DiscretizedNumericalSpec) String

func (x *DiscretizedNumericalSpec) String() string

type LinkedWeightDefinition

type LinkedWeightDefinition struct {

	// Attribute index used to compute the weight.
	AttributeIdx *int32 `protobuf:"varint,1,opt,name=attribute_idx,json=attributeIdx" json:"attribute_idx,omitempty"`
	// Types that are assignable to Type:
	//	*LinkedWeightDefinition_Numerical
	//	*LinkedWeightDefinition_Categorical
	Type isLinkedWeightDefinition_Type `protobuf_oneof:"type"`
	// contains filtered or unexported fields
}

Internal linked version of the weight definition. The attributes and values are indexed according to the dataspec.

func (*LinkedWeightDefinition) Descriptor deprecated

func (*LinkedWeightDefinition) Descriptor() ([]byte, []int)

Deprecated: Use LinkedWeightDefinition.ProtoReflect.Descriptor instead.

func (*LinkedWeightDefinition) GetAttributeIdx

func (x *LinkedWeightDefinition) GetAttributeIdx() int32

func (*LinkedWeightDefinition) GetCategorical

func (*LinkedWeightDefinition) GetNumerical

func (*LinkedWeightDefinition) GetType

func (m *LinkedWeightDefinition) GetType() isLinkedWeightDefinition_Type

func (*LinkedWeightDefinition) ProtoMessage

func (*LinkedWeightDefinition) ProtoMessage()

func (*LinkedWeightDefinition) ProtoReflect

func (x *LinkedWeightDefinition) ProtoReflect() protoreflect.Message

func (*LinkedWeightDefinition) Reset

func (x *LinkedWeightDefinition) Reset()

func (*LinkedWeightDefinition) String

func (x *LinkedWeightDefinition) String() string

type LinkedWeightDefinition_Categorical

type LinkedWeightDefinition_Categorical struct {
	// Weight definition if the controlling attribute is a categorical
	// attribute.
	Categorical *LinkedWeightDefinition_CategoricalWeight `protobuf:"bytes,3,opt,name=categorical,oneof"`
}

type LinkedWeightDefinition_CategoricalWeight

type LinkedWeightDefinition_CategoricalWeight struct {

	// Index of "categorical_mapping". Maps a weight value for each categorical
	// attribute value. See the dataspec for the mapping attribute value string
	// to attribute value index.
	CategoricalValueIdx_2Weight []float32 `` /* 147-byte string literal not displayed */
	// contains filtered or unexported fields
}

func (*LinkedWeightDefinition_CategoricalWeight) Descriptor deprecated

func (*LinkedWeightDefinition_CategoricalWeight) Descriptor() ([]byte, []int)

Deprecated: Use LinkedWeightDefinition_CategoricalWeight.ProtoReflect.Descriptor instead.

func (*LinkedWeightDefinition_CategoricalWeight) GetCategoricalValueIdx_2Weight

func (x *LinkedWeightDefinition_CategoricalWeight) GetCategoricalValueIdx_2Weight() []float32

func (*LinkedWeightDefinition_CategoricalWeight) ProtoMessage

func (*LinkedWeightDefinition_CategoricalWeight) ProtoReflect

func (*LinkedWeightDefinition_CategoricalWeight) Reset

func (*LinkedWeightDefinition_CategoricalWeight) String

type LinkedWeightDefinition_Numerical

type LinkedWeightDefinition_Numerical struct {
	// Weight definition if the controlling attribute is a numerical attribute.
	Numerical *LinkedWeightDefinition_NumericalWeight `protobuf:"bytes,2,opt,name=numerical,oneof"`
}

type LinkedWeightDefinition_NumericalWeight

type LinkedWeightDefinition_NumericalWeight struct {
	// contains filtered or unexported fields
}

func (*LinkedWeightDefinition_NumericalWeight) Descriptor deprecated

func (*LinkedWeightDefinition_NumericalWeight) Descriptor() ([]byte, []int)

Deprecated: Use LinkedWeightDefinition_NumericalWeight.ProtoReflect.Descriptor instead.

func (*LinkedWeightDefinition_NumericalWeight) ProtoMessage

func (*LinkedWeightDefinition_NumericalWeight) ProtoReflect

func (*LinkedWeightDefinition_NumericalWeight) Reset

func (*LinkedWeightDefinition_NumericalWeight) String

type MultiValuesSpec

type MultiValuesSpec struct {

	// Maximum number of observed items.
	MaxObservedSize *int32 `protobuf:"varint,1,opt,name=max_observed_size,json=maxObservedSize" json:"max_observed_size,omitempty"`
	// Minimum number of observed items.
	MinObservedSize *int32 `protobuf:"varint,2,opt,name=min_observed_size,json=minObservedSize" json:"min_observed_size,omitempty"`
	// contains filtered or unexported fields
}

Specification for types with multiple values.

func (*MultiValuesSpec) Descriptor deprecated

func (*MultiValuesSpec) Descriptor() ([]byte, []int)

Deprecated: Use MultiValuesSpec.ProtoReflect.Descriptor instead.

func (*MultiValuesSpec) GetMaxObservedSize

func (x *MultiValuesSpec) GetMaxObservedSize() int32

func (*MultiValuesSpec) GetMinObservedSize

func (x *MultiValuesSpec) GetMinObservedSize() int32

func (*MultiValuesSpec) ProtoMessage

func (*MultiValuesSpec) ProtoMessage()

func (*MultiValuesSpec) ProtoReflect

func (x *MultiValuesSpec) ProtoReflect() protoreflect.Message

func (*MultiValuesSpec) Reset

func (x *MultiValuesSpec) Reset()

func (*MultiValuesSpec) String

func (x *MultiValuesSpec) String() string

type NumericalGuide

type NumericalGuide struct {
	// contains filtered or unexported fields
}

func (*NumericalGuide) Descriptor deprecated

func (*NumericalGuide) Descriptor() ([]byte, []int)

Deprecated: Use NumericalGuide.ProtoReflect.Descriptor instead.

func (*NumericalGuide) ProtoMessage

func (*NumericalGuide) ProtoMessage()

func (*NumericalGuide) ProtoReflect

func (x *NumericalGuide) ProtoReflect() protoreflect.Message

func (*NumericalGuide) Reset

func (x *NumericalGuide) Reset()

func (*NumericalGuide) String

func (x *NumericalGuide) String() string

type NumericalSpec

type NumericalSpec struct {

	// Mean value (excluding the NaN).
	Mean              *float64 `protobuf:"fixed64,1,opt,name=mean,def=0" json:"mean,omitempty"`
	MinValue          *float32 `protobuf:"fixed32,2,opt,name=min_value,json=minValue" json:"min_value,omitempty"`
	MaxValue          *float32 `protobuf:"fixed32,3,opt,name=max_value,json=maxValue" json:"max_value,omitempty"`
	StandardDeviation *float64 `protobuf:"fixed64,4,opt,name=standard_deviation,json=standardDeviation" json:"standard_deviation,omitempty"`
	// contains filtered or unexported fields
}

Specification of a numerical column.

func (*NumericalSpec) Descriptor deprecated

func (*NumericalSpec) Descriptor() ([]byte, []int)

Deprecated: Use NumericalSpec.ProtoReflect.Descriptor instead.

func (*NumericalSpec) GetMaxValue

func (x *NumericalSpec) GetMaxValue() float32

func (*NumericalSpec) GetMean

func (x *NumericalSpec) GetMean() float64

func (*NumericalSpec) GetMinValue

func (x *NumericalSpec) GetMinValue() float32

func (*NumericalSpec) GetStandardDeviation

func (x *NumericalSpec) GetStandardDeviation() float64

func (*NumericalSpec) ProtoMessage

func (*NumericalSpec) ProtoMessage()

func (*NumericalSpec) ProtoReflect

func (x *NumericalSpec) ProtoReflect() protoreflect.Message

func (*NumericalSpec) Reset

func (x *NumericalSpec) Reset()

func (*NumericalSpec) String

func (x *NumericalSpec) String() string

type Tokenizer

type Tokenizer struct {

	// How to convert a string into a list/set of symbols.
	Splitter *Tokenizer_Splitter `` /* 134-byte string literal not displayed */
	// Separator characters. Used if splitter=SEPARATOR.
	Separator *string `protobuf:"bytes,2,opt,name=separator,def= ;," json:"separator,omitempty"`
	// Splitting regular expression. Used if splitter=REGEX_MATCH.
	Regex *string `protobuf:"bytes,3,opt,name=regex,def=([\\S]+)" json:"regex,omitempty"`
	// Cast strings to lower case before tokenization.
	ToLowerCase *bool `protobuf:"varint,4,opt,name=to_lower_case,json=toLowerCase,def=1" json:"to_lower_case,omitempty"`
	// Grouping of the tokens.
	Grouping *Tokenizer_Grouping `protobuf:"bytes,5,opt,name=grouping" json:"grouping,omitempty"`
	// contains filtered or unexported fields
}

Tokenization parameters.

func (*Tokenizer) Descriptor deprecated

func (*Tokenizer) Descriptor() ([]byte, []int)

Deprecated: Use Tokenizer.ProtoReflect.Descriptor instead.

func (*Tokenizer) GetGrouping

func (x *Tokenizer) GetGrouping() *Tokenizer_Grouping

func (*Tokenizer) GetRegex

func (x *Tokenizer) GetRegex() string

func (*Tokenizer) GetSeparator

func (x *Tokenizer) GetSeparator() string

func (*Tokenizer) GetSplitter

func (x *Tokenizer) GetSplitter() Tokenizer_Splitter

func (*Tokenizer) GetToLowerCase

func (x *Tokenizer) GetToLowerCase() bool

func (*Tokenizer) ProtoMessage

func (*Tokenizer) ProtoMessage()

func (*Tokenizer) ProtoReflect

func (x *Tokenizer) ProtoReflect() protoreflect.Message

func (*Tokenizer) Reset

func (x *Tokenizer) Reset()

func (*Tokenizer) String

func (x *Tokenizer) String() string

type TokenizerGuide

type TokenizerGuide struct {
	Tokenizer *Tokenizer `protobuf:"bytes,1,opt,name=tokenizer" json:"tokenizer,omitempty"`
	// contains filtered or unexported fields
}

func (*TokenizerGuide) Descriptor deprecated

func (*TokenizerGuide) Descriptor() ([]byte, []int)

Deprecated: Use TokenizerGuide.ProtoReflect.Descriptor instead.

func (*TokenizerGuide) GetTokenizer

func (x *TokenizerGuide) GetTokenizer() *Tokenizer

func (*TokenizerGuide) ProtoMessage

func (*TokenizerGuide) ProtoMessage()

func (*TokenizerGuide) ProtoReflect

func (x *TokenizerGuide) ProtoReflect() protoreflect.Message

func (*TokenizerGuide) Reset

func (x *TokenizerGuide) Reset()

func (*TokenizerGuide) String

func (x *TokenizerGuide) String() string

type Tokenizer_Grouping

type Tokenizer_Grouping struct {
	Unigrams *bool `protobuf:"varint,1,opt,name=unigrams,def=1" json:"unigrams,omitempty"`
	Bigrams  *bool `protobuf:"varint,2,opt,name=bigrams,def=0" json:"bigrams,omitempty"`
	Trigrams *bool `protobuf:"varint,3,opt,name=trigrams,def=0" json:"trigrams,omitempty"`
	// contains filtered or unexported fields
}

func (*Tokenizer_Grouping) Descriptor deprecated

func (*Tokenizer_Grouping) Descriptor() ([]byte, []int)

Deprecated: Use Tokenizer_Grouping.ProtoReflect.Descriptor instead.

func (*Tokenizer_Grouping) GetBigrams

func (x *Tokenizer_Grouping) GetBigrams() bool

func (*Tokenizer_Grouping) GetTrigrams

func (x *Tokenizer_Grouping) GetTrigrams() bool

func (*Tokenizer_Grouping) GetUnigrams

func (x *Tokenizer_Grouping) GetUnigrams() bool

func (*Tokenizer_Grouping) ProtoMessage

func (*Tokenizer_Grouping) ProtoMessage()

func (*Tokenizer_Grouping) ProtoReflect

func (x *Tokenizer_Grouping) ProtoReflect() protoreflect.Message

func (*Tokenizer_Grouping) Reset

func (x *Tokenizer_Grouping) Reset()

func (*Tokenizer_Grouping) String

func (x *Tokenizer_Grouping) String() string

type Tokenizer_Splitter

type Tokenizer_Splitter int32

Possible string tokenization algorithms.

const (
	Tokenizer_INVALID Tokenizer_Splitter = 0
	// Split a string according to the user specified separator.
	Tokenizer_SEPARATOR Tokenizer_Splitter = 1
	// Split a string by extracting token using the user specified regular
	// expression.
	Tokenizer_REGEX_MATCH Tokenizer_Splitter = 2
	// Split a string into individual characters. Does not remove spaces and
	// non-printable characters.
	Tokenizer_CHARACTER Tokenizer_Splitter = 3
)

func (Tokenizer_Splitter) Descriptor

func (Tokenizer_Splitter) Enum

func (Tokenizer_Splitter) EnumDescriptor deprecated

func (Tokenizer_Splitter) EnumDescriptor() ([]byte, []int)

Deprecated: Use Tokenizer_Splitter.Descriptor instead.

func (Tokenizer_Splitter) Number

func (Tokenizer_Splitter) String

func (x Tokenizer_Splitter) String() string

func (Tokenizer_Splitter) Type

func (*Tokenizer_Splitter) UnmarshalJSON deprecated

func (x *Tokenizer_Splitter) UnmarshalJSON(b []byte) error

Deprecated: Do not use.

type Unstacked

type Unstacked struct {

	// Name of the column that was unstacked.
	OriginalName *string `protobuf:"bytes,1,opt,name=original_name,json=originalName" json:"original_name,omitempty"`
	// Index of the first column containing the unstacked feature.
	BeginColumnIdx *int32 `protobuf:"varint,2,opt,name=begin_column_idx,json=beginColumnIdx" json:"begin_column_idx,omitempty"`
	// Number of unstacked elements.
	Size *int32 `protobuf:"varint,3,opt,name=size" json:"size,omitempty"`
	// Type of the columns.
	Type *ColumnType `protobuf:"varint,4,opt,name=type,enum=yggdrasil_decision_forests.dataset.proto.ColumnType,def=0" json:"type,omitempty"`
	// contains filtered or unexported fields
}

Information about unstacked column. An unstacked column is a multi-dimensional column (e.g. an embedding) that has been split into multiple scalar columns.

func (*Unstacked) Descriptor deprecated

func (*Unstacked) Descriptor() ([]byte, []int)

Deprecated: Use Unstacked.ProtoReflect.Descriptor instead.

func (*Unstacked) GetBeginColumnIdx

func (x *Unstacked) GetBeginColumnIdx() int32

func (*Unstacked) GetOriginalName

func (x *Unstacked) GetOriginalName() string

func (*Unstacked) GetSize

func (x *Unstacked) GetSize() int32

func (*Unstacked) GetType

func (x *Unstacked) GetType() ColumnType

func (*Unstacked) ProtoMessage

func (*Unstacked) ProtoMessage()

func (*Unstacked) ProtoReflect

func (x *Unstacked) ProtoReflect() protoreflect.Message

func (*Unstacked) Reset

func (x *Unstacked) Reset()

func (*Unstacked) String

func (x *Unstacked) String() string

type WeightDefinition

type WeightDefinition struct {

	// [Required] Name of the attribute that controls the weights of the examples.
	Attribute *string `protobuf:"bytes,1,opt,name=attribute" json:"attribute,omitempty"`
	// Types that are assignable to Type:
	//	*WeightDefinition_Numerical
	//	*WeightDefinition_Categorical
	Type isWeightDefinition_Type `protobuf_oneof:"type"`
	// contains filtered or unexported fields
}

func (*WeightDefinition) Descriptor deprecated

func (*WeightDefinition) Descriptor() ([]byte, []int)

Deprecated: Use WeightDefinition.ProtoReflect.Descriptor instead.

func (*WeightDefinition) GetAttribute

func (x *WeightDefinition) GetAttribute() string

func (*WeightDefinition) GetCategorical

func (*WeightDefinition) GetNumerical

func (*WeightDefinition) GetType

func (m *WeightDefinition) GetType() isWeightDefinition_Type

func (*WeightDefinition) ProtoMessage

func (*WeightDefinition) ProtoMessage()

func (*WeightDefinition) ProtoReflect

func (x *WeightDefinition) ProtoReflect() protoreflect.Message

func (*WeightDefinition) Reset

func (x *WeightDefinition) Reset()

func (*WeightDefinition) String

func (x *WeightDefinition) String() string

type WeightDefinition_Categorical

type WeightDefinition_Categorical struct {
	// The attribute is interpreted as a categorical attribute. A weight is
	// defined for each possible value.
	Categorical *WeightDefinition_CategoricalWeight `protobuf:"bytes,3,opt,name=categorical,oneof"`
}

type WeightDefinition_CategoricalWeight

type WeightDefinition_CategoricalWeight struct {

	// Pair of categorical value and weight.
	Items []*WeightDefinition_CategoricalWeight_Item `protobuf:"bytes,1,rep,name=items" json:"items,omitempty"`
	// contains filtered or unexported fields
}

Solve the following mapping to get the weight.

func (*WeightDefinition_CategoricalWeight) Descriptor deprecated

func (*WeightDefinition_CategoricalWeight) Descriptor() ([]byte, []int)

Deprecated: Use WeightDefinition_CategoricalWeight.ProtoReflect.Descriptor instead.

func (*WeightDefinition_CategoricalWeight) GetItems

func (*WeightDefinition_CategoricalWeight) ProtoMessage

func (*WeightDefinition_CategoricalWeight) ProtoMessage()

func (*WeightDefinition_CategoricalWeight) ProtoReflect

func (*WeightDefinition_CategoricalWeight) Reset

func (*WeightDefinition_CategoricalWeight) String

type WeightDefinition_CategoricalWeight_Item

type WeightDefinition_CategoricalWeight_Item struct {

	// [Required] A value to map to a corresponding weight.
	Value *string `protobuf:"bytes,1,opt,name=value" json:"value,omitempty"`
	// [Required] The weight.
	Weight *float32 `protobuf:"fixed32,3,opt,name=weight" json:"weight,omitempty"`
	// contains filtered or unexported fields
}

func (*WeightDefinition_CategoricalWeight_Item) Descriptor deprecated

func (*WeightDefinition_CategoricalWeight_Item) Descriptor() ([]byte, []int)

Deprecated: Use WeightDefinition_CategoricalWeight_Item.ProtoReflect.Descriptor instead.

func (*WeightDefinition_CategoricalWeight_Item) GetValue

func (*WeightDefinition_CategoricalWeight_Item) GetWeight

func (*WeightDefinition_CategoricalWeight_Item) ProtoMessage

func (*WeightDefinition_CategoricalWeight_Item) ProtoReflect

func (*WeightDefinition_CategoricalWeight_Item) Reset

func (*WeightDefinition_CategoricalWeight_Item) String

type WeightDefinition_Numerical

type WeightDefinition_Numerical struct {
	// The attribute is interpreted as a numerical value.
	Numerical *WeightDefinition_NumericalWeight `protobuf:"bytes,2,opt,name=numerical,oneof"`
}

type WeightDefinition_NumericalWeight

type WeightDefinition_NumericalWeight struct {
	// contains filtered or unexported fields
}

The weight is directly the numerical value.

func (*WeightDefinition_NumericalWeight) Descriptor deprecated

func (*WeightDefinition_NumericalWeight) Descriptor() ([]byte, []int)

Deprecated: Use WeightDefinition_NumericalWeight.ProtoReflect.Descriptor instead.

func (*WeightDefinition_NumericalWeight) ProtoMessage

func (*WeightDefinition_NumericalWeight) ProtoMessage()

func (*WeightDefinition_NumericalWeight) ProtoReflect

func (*WeightDefinition_NumericalWeight) Reset

func (*WeightDefinition_NumericalWeight) String

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL