preprocessing

package
v0.0.0-...-fcddba5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 19, 2023 License: MIT Imports: 8 Imported by: 0

Documentation

Overview

Package preprocessing includes scaling, centering, normalization, binarization and imputation methods.

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type FunctionTransformer

type FunctionTransformer struct {
	Func, InverseFunc func(X, Y *mat.Dense) (X1, Y1 *mat.Dense)
}

FunctionTransformer Constructs a transformer from an arbitrary callable.

Example
X, Y := mat.NewDense(2, 4, []float64{1, 2, 3, 4, 5, 6, 7, 8}), (*mat.Dense)(nil)

var firstColumn []float64
dropFirstColumn := func(X, Y *mat.Dense) (X1, Y1 *mat.Dense) {
	m, n := X.Dims()
	firstColumn = make([]float64, m)
	mat.Col(firstColumn, 0, X)
	X1 = mat.NewDense(m, n-1, nil)
	X1.Copy(X.Slice(0, m, 1, n))
	Y1 = Y
	return
}
undoDropFirstColumn := func(X, Y *mat.Dense) (X1, Y1 *mat.Dense) {
	m, n := X.Dims()
	n++
	X1 = mat.NewDense(m, n, nil)
	X1.SetCol(0, firstColumn)
	X1.Slice(0, m, 1, n).(*mat.Dense).Copy(X)
	Y1 = Y
	return
}
allButFirstColumn := NewFunctionTransformer(dropFirstColumn, undoDropFirstColumn)
X1, _ := allButFirstColumn.Transform(X, Y)
fmt.Println(mat.Formatted(X1))
X2, _ := allButFirstColumn.InverseTransform(X1, nil)
fmt.Println(mat.Formatted(X2))

// additional example from http://scikit-learn.org/stable/modules/preprocessing.html#custom-transformers
transformer := NewFunctionTransformer(
	func(X, Y *mat.Dense) (X1, Y1 *mat.Dense) {
		Xmat := X.RawMatrix()
		X1 = mat.NewDense(Xmat.Rows, Xmat.Cols, nil)
		X1.Apply(func(i, j int, v float64) float64 { return math.Log1p(v) }, X)
		Y1 = Y
		return

	}, func(X, Y *mat.Dense) (X1, Y1 *mat.Dense) {
		Xmat := X.RawMatrix()
		X1 = mat.NewDense(Xmat.Rows, Xmat.Cols, nil)
		X1.Apply(func(i, j int, v float64) float64 { return math.Exp(v) - 1 }, X)
		Y1 = Y
		return

	},
)
X = mat.NewDense(2, 2, []float64{0, 1, 2, 3})
X1, _ = transformer.Transform(X, nil)
fmt.Printf("log1p:\n%.8f\n", mat.Formatted(X1))
Output:

⎡2  3  4⎤
⎣6  7  8⎦
⎡1  2  3  4⎤
⎣5  6  7  8⎦
log1p:
⎡0.00000000  0.69314718⎤
⎣1.09861229  1.38629436⎦

func NewFunctionTransformer

func NewFunctionTransformer(f, invf func(X, Y *mat.Dense) (X1, Y1 *mat.Dense)) *FunctionTransformer

NewFunctionTransformer ...

func (*FunctionTransformer) Fit

func (m *FunctionTransformer) Fit(X, Y mat.Matrix) base.Fiter

Fit ...

func (*FunctionTransformer) FitTransform

func (m *FunctionTransformer) FitTransform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

FitTransform fit to dat, then transform it

func (*FunctionTransformer) InverseTransform

func (m *FunctionTransformer) InverseTransform(X, Y *mat.Dense) (X1, Y1 *mat.Dense)

InverseTransform ...

func (*FunctionTransformer) Transform

func (m *FunctionTransformer) Transform(X, Y mat.Matrix) (X1, Y1 *mat.Dense)

Transform ...

func (*FunctionTransformer) TransformerClone

func (m *FunctionTransformer) TransformerClone() base.Transformer

TransformerClone ...

type Imputer

type Imputer struct {
	Strategy      string
	MissingValues []float64
}

Imputer ... Stragegy is mean|median|most_frequent. default to mean

Example
var nan = math.NaN()
X := mat.NewDense(5, 2, []float64{1, 2, 3, 4, nan, 6, 7, 8, 7, 10})
fmt.Println("replacing X.At(2,0) with...")
for _, s := range []string{"mean", "median", "most_frequent"} {

	X1, _ := (&Imputer{Strategy: s}).FitTransform(X, nil)
	fmt.Printf("%s\n%g\n", s, mat.Formatted(X1))

}
// additional example adapted from http://scikit-learn.org/stable/modules/preprocessing.html#imputation-of-missing-values
imp := NewImputer()
imp.Fit(mat.NewDense(3, 2, []float64{1, 2, nan, 3, 7, 6}), nil)
X = mat.NewDense(3, 2, []float64{nan, 2, 6, nan, 7, 6})
X1, _ := imp.Transform(X, nil)
fmt.Printf("imputation-of-missing-values:\n%g\n", mat.Formatted(X1))
Output:

replacing X.At(2,0) with...
mean
⎡  1    2⎤
⎢  3    4⎥
⎢4.5    6⎥
⎢  7    8⎥
⎣  7   10⎦
median
⎡ 1   2⎤
⎢ 3   4⎥
⎢ 3   6⎥
⎢ 7   8⎥
⎣ 7  10⎦
most_frequent
⎡ 1   2⎤
⎢ 3   4⎥
⎢ 7   6⎥
⎢ 7   8⎥
⎣ 7  10⎦
imputation-of-missing-values:
⎡                 4                   2⎤
⎢                 6  3.6666666666666665⎥
⎣                 7                   6⎦

func NewImputer

func NewImputer() *Imputer

NewImputer ...

func (*Imputer) Fit

func (m *Imputer) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit for Imputer ...

func (*Imputer) FitTransform

func (m *Imputer) FitTransform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

FitTransform fit to dat, then transform it

func (*Imputer) InverseTransform

func (m *Imputer) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform for Imputer ...

func (*Imputer) Transform

func (m *Imputer) Transform(Xmatrix, Ymatrix mat.Matrix) (Xout, Yout *mat.Dense)

Transform for Imputer ...

func (*Imputer) TransformerClone

func (m *Imputer) TransformerClone() base.Transformer

TransformerClone ...

type InverseTransformer

type InverseTransformer interface {
	Transformer
	InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)
}

InverseTransformer is a transformer able to inverse his tranformation

type KBinsDiscretizer

type KBinsDiscretizer struct {
	NBins    int
	Encode   string
	Strategy string
	BinEdges [][]float64
}

KBinsDiscretizer structure Encode = "onehot-dense","ordinal" Strategy = "quantile","uniform","kmeans"

Example
// example from https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.KBinsDiscretizer.html#sklearn.preprocessing.KBinsDiscretizer.fit
X := mat.NewDense(4, 4, []float64{
	-2, 1, -4, -1,
	-1, 2, -3, -0.5,
	0, 3, -2, 0.5,
	1, 4, -1, 2})
est := NewKBinsDiscretizer(3)
est.Encode = "ordinal"
est.Strategy = "uniform"
Xt, _ := est.FitTransform(X, nil)
fmt.Printf("Xt:\n%g\n", mat.Formatted(Xt))
fmt.Printf("est.BinEdges[0]:\n%g\n", est.BinEdges[0])
Xinv, _ := est.InverseTransform(Xt, nil)
fmt.Printf("est.InverseTransform(Xt):\n%g\n", mat.Formatted(Xinv))
Output:

Xt:
⎡0  0  0  0⎤
⎢1  1  1  0⎥
⎢2  2  2  1⎥
⎣2  2  2  2⎦
est.BinEdges[0]:
[-2 -1 0 1]
est.InverseTransform(Xt):
⎡-1.5   1.5  -3.5  -0.5⎤
⎢-0.5   2.5  -2.5  -0.5⎥
⎢ 0.5   3.5  -1.5   0.5⎥
⎣ 0.5   3.5  -1.5   1.5⎦

func NewKBinsDiscretizer

func NewKBinsDiscretizer(NBins int) *KBinsDiscretizer

NewKBinsDiscretizer returns a discretizer with Encode="onehot-dense" ans strategy="quantile"

func (*KBinsDiscretizer) Fit

func (m *KBinsDiscretizer) Fit(X, Y mat.Matrix) base.Fiter

Fit fits the transformer

func (*KBinsDiscretizer) FitTransform

func (m *KBinsDiscretizer) FitTransform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

FitTransform fitts the data then transforms it

func (*KBinsDiscretizer) InverseTransform

func (m *KBinsDiscretizer) InverseTransform(X mat.Matrix, Y mat.Mutable) (Xout, Yout *mat.Dense)

InverseTransform transforms discretized data back to original feature space.

func (*KBinsDiscretizer) Transform

func (m *KBinsDiscretizer) Transform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

Transform discretizes the Data

func (*KBinsDiscretizer) TransformerClone

func (m *KBinsDiscretizer) TransformerClone() Transformer

TransformerClone ...

type LabelBinarizer

type LabelBinarizer struct {
	NegLabel, PosLabel float64
	Classes            [][]float64
}

LabelBinarizer Binarize labels in a one-vs-all fashion

Example
X, Y := (*mat.Dense)(nil), mat.NewDense(5, 1, []float64{1, 2, 6, 4, 2})
lb := &LabelBinarizer{}
lb.Fit(X, Y)
fmt.Println(lb.Classes)

_, Yout := lb.Transform(nil, mat.NewDense(2, 1, []float64{1, 6}))
fmt.Println(mat.Formatted(Yout))
_, Y2 := lb.InverseTransform(nil, Yout)
fmt.Println(mat.Formatted(Y2.T()))
Output:

[[1 2 4 6]]
⎡1  0  0  0⎤
⎣0  0  0  1⎦
[1  6]

func NewLabelBinarizer

func NewLabelBinarizer(NegLabel, PosLabel float64) *LabelBinarizer

NewLabelBinarizer ...

func (*LabelBinarizer) Fit

func (m *LabelBinarizer) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit for binarizer register classes

func (*LabelBinarizer) FitTransform

func (m *LabelBinarizer) FitTransform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

FitTransform fit to dat, then transform it

func (*LabelBinarizer) InverseTransform

func (m *LabelBinarizer) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform for LabelBinarizer

func (*LabelBinarizer) Transform

func (m *LabelBinarizer) Transform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

Transform for LabelBinarizer

func (*LabelBinarizer) TransformerClone

func (m *LabelBinarizer) TransformerClone() base.Transformer

TransformerClone ...

type LabelEncoder

type LabelEncoder struct {
	Classes [][]float64
	Support [][]float64
}

LabelEncoder Encode labels with value between 0 and n_classes-1.

Example
// adapted from http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html#sklearn.preprocessing.LabelEncoder
le := NewLabelEncoder()
Y := mat.NewDense(4, 1, []float64{1, 2, 2, 6})
le.Fit(nil, Y)
fmt.Println(le.Classes)
_, Y1 := le.Transform(nil, mat.NewDense(4, 1, []float64{1, 1, 2, 6}))
fmt.Println(mat.Formatted(Y1.T()))
_, Y2 := le.InverseTransform(nil, mat.NewDense(4, 1, []float64{0, 0, 1, 2}))
fmt.Println(mat.Formatted(Y2.T()))
Output:

[[1 2 6]]
[0  0  1  2]
[1  1  2  6]

func NewLabelEncoder

func NewLabelEncoder() *LabelEncoder

NewLabelEncoder ...

func (*LabelEncoder) Fit

func (m *LabelEncoder) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit for LabelEncoder ...

func (*LabelEncoder) FitTransform

func (m *LabelEncoder) FitTransform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

FitTransform fit to dat, then transform it

func (*LabelEncoder) InverseTransform

func (m *LabelEncoder) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform for LabelEncoder ...

func (*LabelEncoder) PartialFit

func (m *LabelEncoder) PartialFit(X, Y *mat.Dense) base.Transformer

PartialFit for LabelEncoder ...

func (*LabelEncoder) Transform

func (m *LabelEncoder) Transform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

Transform for LabelEncoder ...

func (*LabelEncoder) TransformerClone

func (m *LabelEncoder) TransformerClone() base.Transformer

TransformerClone ...

type MinMaxScaler

type MinMaxScaler struct {
	FeatureRange                            []float
	Scale, Min, DataMin, DataMax, DataRange *mat.Dense
	NSamplesSeen                            int
}

MinMaxScaler rescale data between FeatureRange

Example
// adapted from http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html#sklearn.preprocessing.MinMaxScaler
data := mat.NewDense(4, 2, []float64{-1., 2, -.5, 6, 0, 10, 1, 18})
scaler := NewMinMaxScaler([]float64{0, 1})
scaler.Fit(data, nil)
fmt.Println(mat.Formatted(scaler.DataMax))
X1, _ := scaler.Transform(data, nil)
fmt.Println(mat.Formatted(X1))
X2, _ := scaler.Transform(mat.NewDense(1, 2, []float64{2, 2}), nil)
fmt.Println(mat.Formatted(X2))
Output:

[ 1  18]
⎡   0     0⎤
⎢0.25  0.25⎥
⎢ 0.5   0.5⎥
⎣   1     1⎦
[1.5    0]

func NewMinMaxScaler

func NewMinMaxScaler(featureRange []float) *MinMaxScaler

NewMinMaxScaler creates an *MinMaxScaler with FeatureRange 0..1

func (*MinMaxScaler) Fit

func (scaler *MinMaxScaler) Fit(X, Y mat.Matrix) base.Fiter

Fit computes Sale and Min

func (*MinMaxScaler) FitTransform

func (scaler *MinMaxScaler) FitTransform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

FitTransform fit to dat, then transform it

func (*MinMaxScaler) InverseTransform

func (scaler *MinMaxScaler) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform rescale data into original bounds

func (*MinMaxScaler) PartialFit

func (scaler *MinMaxScaler) PartialFit(Xmatrix, Ymatrix mat.Matrix) Transformer

PartialFit updates Scale and Min with partial data

func (*MinMaxScaler) Reset

func (scaler *MinMaxScaler) Reset() *MinMaxScaler

Reset resets scaler to its initial state

func (*MinMaxScaler) Transform

func (scaler *MinMaxScaler) Transform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

Transform applies scaling to X

func (*MinMaxScaler) TransformerClone

func (scaler *MinMaxScaler) TransformerClone() base.Transformer

TransformerClone ...

type MultiLabelBinarizer

type MultiLabelBinarizer struct {
	Classes []interface{}

	Less func(i, j int) bool
}

MultiLabelBinarizer Transform between iterable of iterables and a multilabel format

Example
mlb := NewMultiLabelBinarizer()

fmt.Println("NewMultiLabelBinarizer matrix test")
Y0 := mat.NewDense(2, 2, []float64{1, 3, 2, 3})
_, Y1 := mlb.FitTransform(nil, Y0)
fmt.Println(mat.Formatted(Y1))
fmt.Println("Classes", mlb.Classes)
_, Y2 := mlb.InverseTransform(nil, Y1)
fmt.Println(mat.Formatted(Y2.(*mat.Dense)))

fmt.Println("NewMultiLabelBinarizer string test")
_, Y1 = mlb.FitTransform2(nil, [][]string{{"sci-fi", "thriller"}, {"comedy", "comedy"}})
fmt.Println(mat.Formatted(Y1))
fmt.Println("Classes", mlb.Classes)
_, Y2s := mlb.InverseTransform(nil, Y1)
fmt.Println(Y2s)
Output:

NewMultiLabelBinarizer matrix test
⎡1  0  0  0  0  1⎤
⎣0  1  0  0  0  1⎦
Classes [1 2 3]
⎡1  3⎤
⎣2  3⎦
NewMultiLabelBinarizer string test
⎡0  1  0  0  0  1⎤
⎣1  0  0  1  0  0⎦
Classes [comedy sci-fi thriller]
[[sci-fi thriller] [comedy comedy]]

func NewMultiLabelBinarizer

func NewMultiLabelBinarizer() *MultiLabelBinarizer

NewMultiLabelBinarizer ...

func (*MultiLabelBinarizer) Fit

func (m *MultiLabelBinarizer) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit for MultiLabelBinarizer ... if Y is [][]string, use Fit2. this one is only to satisfy Transformer interface

func (*MultiLabelBinarizer) Fit2

func (m *MultiLabelBinarizer) Fit2(X mat.Matrix, Y interface{}) *MultiLabelBinarizer

Fit2 for MultiLabelBinarizer ... Y type can be *mat.Dense | [][]string

func (*MultiLabelBinarizer) FitTransform

func (m *MultiLabelBinarizer) FitTransform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

FitTransform fit to dat, then transform it

func (*MultiLabelBinarizer) FitTransform2

func (m *MultiLabelBinarizer) FitTransform2(X mat.Matrix, Y interface{}) (Xout, Yout *mat.Dense)

FitTransform2 can take a [][]string in Y

func (*MultiLabelBinarizer) InverseTransform

func (m *MultiLabelBinarizer) InverseTransform(X, Y *mat.Dense) (Xout *mat.Dense, Yout interface{})

InverseTransform for MultiLabelBinarizer ... Yout type is same as the one passed int Fit

func (*MultiLabelBinarizer) Transform

func (m *MultiLabelBinarizer) Transform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

Transform for MultiLabelBinarizer ... Y type must be the same passed int Fit

func (*MultiLabelBinarizer) Transform2

func (m *MultiLabelBinarizer) Transform2(X mat.Matrix, Y interface{}) (Xout, Yout *mat.Dense)

Transform2 handles Y types ùmat.dense and [][]string

func (*MultiLabelBinarizer) TransformerClone

func (m *MultiLabelBinarizer) TransformerClone() base.Transformer

TransformerClone ...

type PCA

type PCA struct {
	mat.SVD
	MinVarianceRatio                       float64
	NComponents                            int
	SingularValues, ExplainedVarianceRatio []float64
}

PCA is a thin single value decomposition transformer

Example
X := mat.NewDense(6, 2, []float64{-1., -1., -2., -1., -3., -2., 1., 1., 2., 1., 3., 2.})
pca := NewPCA()
pca.Fit(X, nil)
Xp, _ := pca.Transform(X, nil)
fmt.Printf("explained  : %.3f\n", pca.ExplainedVarianceRatio)
fmt.Printf("Svalues    : %.3f\n", pca.SingularValues)
fmt.Printf("transformed: %.3f\n", Xp.RawMatrix().Data)
X2, _ := pca.InverseTransform(Xp, nil)
fmt.Printf("inversed   : %.3f\n", X2.RawMatrix().Data)
//expected:=[-1.383405778728807 0.293578697080941
// -2.221898016633681 -0.2513348437429921
// -3.605303795362488 0.04224385333794878
// 1.383405778728807 -0.293578697080941
// 2.221898016633681 0.2513348437429921
// 3.605303795362488 -0.04224385333794878]
Output:

explained  : [0.992 0.008]
Svalues    : [6.301 0.550]
transformed: [-1.383 0.294 -2.222 -0.251 -3.605 0.042 1.383 -0.294 2.222 0.251 3.605 -0.042]
inversed   : [-1.000 -1.000 -2.000 -1.000 -3.000 -2.000 1.000 1.000 2.000 1.000 3.000 2.000]

func NewPCA

func NewPCA() *PCA

NewPCA returns a *PCA

func (*PCA) Fit

func (m *PCA) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit computes the svd of X

func (*PCA) FitTransform

func (m *PCA) FitTransform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

FitTransform fit to dat, then transform it

func (*PCA) InverseTransform

func (m *PCA) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform put X into original space

func (*PCA) Transform

func (m *PCA) Transform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

Transform Transforms X

func (*PCA) TransformerClone

func (m *PCA) TransformerClone() base.Transformer

TransformerClone ...

type Shuffler

type Shuffler struct {
	Perm        []int
	RandomState base.Source
}

Shuffler shuffles rows of X and Y

Example
X, Y := mat.NewDense(2, 3, []float64{1, 2, 3, 4, 5, 6}), mat.NewDense(2, 3, []float64{7, 8, 9, 10, 11, 12})
m := NewShuffler()
m.RandomState = base.NewSource(7)
X1, Y1 := m.FitTransform(X, Y)

fmt.Println("Transformed:")
fmt.Printf("%s", base.MatStr(X1, Y1))
X2, Y2 := m.InverseTransform(X1, Y1)
fmt.Println("InverseTransformed:")
fmt.Printf("%s", base.MatStr(X2, Y2))
Output:

Transformed:
4	5	6	10	11	12
1	2	3	7	8	9
InverseTransformed:
1	2	3	7	8	9
4	5	6	10	11	12

func NewShuffler

func NewShuffler() *Shuffler

NewShuffler returns a *Shuffler

func (*Shuffler) Fit

func (m *Shuffler) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit for Shuffler

func (*Shuffler) FitTransform

func (m *Shuffler) FitTransform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

FitTransform fit to dat, then transform it

func (*Shuffler) InverseTransform

func (m *Shuffler) InverseTransform(X, Y *mat.Dense) (Xout, Yout *mat.Dense)

InverseTransform for Shuffler

func (*Shuffler) Transform

func (m *Shuffler) Transform(X, Y mat.Matrix) (Xout, Yout *mat.Dense)

Transform for Shuffler

func (*Shuffler) TransformerClone

func (m *Shuffler) TransformerClone() base.Transformer

TransformerClone ...

type Transformer

type Transformer = base.Transformer

// Transformer is an interface for various preprocessors

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL