modelselection

package
v0.0.0-...-beb861e Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 11, 2020 License: MIT Imports: 10 Imported by: 2

Documentation

Overview

Package modelselection contains KFold, GridSearchCV, CrossValidate

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func ParameterGrid

func ParameterGrid(paramGrid map[string][]interface{}) (out []map[string]interface{})

ParameterGrid ...

Example
paramArray := ParameterGrid(map[string][]interface{}{"a": {1, 2, 3}, "b": {10, 11}})
sortParamArray(paramArray)
for _, m := range paramArray {
	fmt.Println(m["a"], m["b"])
}
Output:

1 10
1 11
2 10
2 11
3 10
3 11

func TrainTestSplit

func TrainTestSplit(X, Y mat.Matrix, testsize float64, randomstate uint64) (Xtrain, Xtest, ytrain, ytest *mat.Dense)

TrainTestSplit splits X and Y into test set and train set testsize must be between 0 and 1 it produce same sets than scikit-learn

Example
/*
   >>> import numpy as np
   >>> from sklearn.model_selection import train_test_split
   >>> X, y = np.arange(10).reshape((5, 2)), range(5)
   >>> X_train, X_test, y_train, y_test = train_test_split(
   ...     X, y, test_size=0.33, random_state=42)
   ...
   >>> X_train
   array([[4, 5],
          [0, 1],
          [6, 7]])
   >>> y_train
   [2, 0, 3]
   >>> X_test
   array([[2, 3],
          [8, 9]])
   >>> y_test
   [1, 4]

*/st
   [1, 4]

*/
X := mat.NewDense(5, 2, []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
Y := mat.NewDense(5, 1, []float64{0, 1, 2, 3, 4})
RandomState := uint64(42)
Xtrain, Xtest, Ytrain, Ytest := TrainTestSplit(X, Y, .33, RandomState)
fmt.Printf("X_train:\n%g\n", mat.Formatted(Xtrain))
fmt.Printf("Y_train:\n%g\n", mat.Formatted(Ytrain))
fmt.Printf("X_test:\n%g\n", mat.Formatted(Xtest))
fmt.Printf("Y_test:\n%g\n", mat.Formatted(Ytest))
Output:

X_train:
⎡4  5⎤
⎢0  1⎥
⎣6  7⎦
Y_train:
⎡2⎤
⎢0⎥
⎣3⎦
X_test:
⎡2  3⎤
⎣8  9⎦
Y_test:
⎡1⎤
⎣4⎦

Types

type CrossValidateResult

type CrossValidateResult struct {
	TestScore          []float64
	FitTime, ScoreTime []time.Duration
	Estimator          []base.Predicter
}

CrossValidateResult is the struct result of CrossValidate. it includes TestScore,FitTime,ScoreTime,Estimator

func CrossValidate

func CrossValidate(estimator base.Predicter, X, Y *mat.Dense, groups []int, scorer func(Ytrue, Ypred mat.Matrix) float64, cv Splitter, NJobs int) (res CrossValidateResult)

CrossValidate Evaluate a score by cross-validation scorer is a func(Ytrue,Ypred) float64 only mean_squared_error for now NJobs is the number of goroutines. if <=0, runtime.NumCPU is used

Example
// example adapted from https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_validate.html#sklearn.model_selection.cross_validate
for _, NJobs := range []int{1, 3} {
	randomState := rand.New(base.NewLockedSource(5))
	diabetes := datasets.LoadDiabetes()
	X, y := diabetes.X.Slice(0, 150, 0, diabetes.X.RawMatrix().Cols).(*mat.Dense), diabetes.Y.Slice(0, 150, 0, 1).(*mat.Dense)
	lasso := linearModel.NewLasso()
	scorer := func(Y, Ypred mat.Matrix) float64 {
		e := metrics.R2Score(Y, Ypred, nil, "").At(0, 0)
		return e
	}
	cvresults := CrossValidate(lasso, X, y, nil, scorer, &KFold{NSplits: 3, Shuffle: true, RandomState: randomState}, NJobs)
	sort.Sort(cvresults)
	fmt.Printf("%.8f\n", cvresults.TestScore)
}
Output:

[0.29391770 0.25681807 0.24695688]
[0.29391770 0.25681807 0.24695688]

func (CrossValidateResult) Len

func (r CrossValidateResult) Len() int

Len for CrossValidateResult to implement sort.Interface

func (CrossValidateResult) Less

func (r CrossValidateResult) Less(i, j int) bool

Less for CrossValidateResult to implement sort.Interface

func (CrossValidateResult) Swap

func (r CrossValidateResult) Swap(i, j int)

Swap for CrossValidateResult to implement sort.Interface

type GridSearchCV

type GridSearchCV struct {
	Estimator          base.Predicter
	ParamGrid          map[string][]interface{}
	Scorer             func(Ytrue, Ypred mat.Matrix) float64
	CV                 Splitter
	Verbose            bool
	NJobs              int
	LowerScoreIsBetter bool
	UseChannels        bool
	RandomState        rand.Source

	CVResults     map[string][]interface{}
	BestEstimator base.Predicter
	BestScore     float64
	BestParams    map[string]interface{}
	BestIndex     int
	NOutputs      int
}

GridSearchCV ... Estimator is the base estimator. it must implement base.Predicter Scorer is a function __returning a higher score when Ypred is better__ CV is a splitter (defaults to KFold)

Example
RandomState := base.NewLockedSource(7)
ds := datasets.LoadBoston()
X, Y := preprocessing.NewStandardScaler().FitTransform(ds.X, ds.Y)

mlp := neuralnetwork.NewMLPRegressor([]int{20}, "relu", "adam", 1e-4)
mlp.RandomState = RandomState
mlp.Shuffle = false
mlp.BatchSize = 20
mlp.LearningRateInit = .005
mlp.MaxIter = 100

scorer := func(Y, Ypred mat.Matrix) float64 {
	return metrics.MeanSquaredError(Y, Ypred, nil, "").At(0, 0)
}
gscv := &GridSearchCV{
	Estimator: mlp,
	ParamGrid: map[string][]interface{}{
		"Alpha":       {1e-4, 2e-4, 5e-4, 1e-3},
		"WeightDecay": {1e-4, 1e-5, 1e-6, 5e-7, 2e-7, 1e-7, 5e-8, 2e-8, 1e-8, 0},
	},
	Scorer:             scorer,
	LowerScoreIsBetter: true,
	// CV:                 &KFold{NSplits: 3, RandomState: RandomState, Shuffle: true},
	Verbose: true,
	NJobs:   -1}
gscv.Fit(X, Y)
fmt.Println("Alpha", gscv.BestParams["Alpha"])
fmt.Println("WeightDecay", gscv.BestParams["WeightDecay"])
// fmt.Println(gscv.CVResults["score"])
Output:

Alpha 0.0001
WeightDecay 5e-08

func (*GridSearchCV) Fit

func (gscv *GridSearchCV) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit ...

func (*GridSearchCV) GetNOutputs

func (gscv *GridSearchCV) GetNOutputs() int

GetNOutputs returns output columns number for Y to pass to predict

func (*GridSearchCV) IsClassifier

func (gscv *GridSearchCV) IsClassifier() bool

IsClassifier returns underlaying estimater IsClassifier

func (*GridSearchCV) Predict

func (gscv *GridSearchCV) Predict(X mat.Matrix, Y mat.Mutable) *mat.Dense

Predict ...

func (*GridSearchCV) PredicterClone

func (gscv *GridSearchCV) PredicterClone() base.Predicter

PredicterClone ...

func (*GridSearchCV) Score

func (gscv *GridSearchCV) Score(X, Y mat.Matrix) float64

Score for gridSearchCV returns best estimator score

type KFold

type KFold struct {
	NSplits     int
	Shuffle     bool
	RandomState base.RandomState
}

KFold ...

Example
randomState := rand.New(base.NewLockedSource(7))
X := mat.NewDense(6, 1, []float64{1, 2, 3, 4, 5, 6})
subtest := func(shuffle bool) {
	fmt.Println("shuffle", shuffle)
	kf := &KFold{NSplits: 3, Shuffle: shuffle, RandomState: randomState}
	for sp := range kf.Split(X, nil) {
		fmt.Printf("%#v\n", sp)
	}

}
subtest(false)
subtest(true)
Output:

shuffle false
modelselection.Split{TrainIndex:[]int{0, 1, 2, 3}, TestIndex:[]int{4, 5}}
modelselection.Split{TrainIndex:[]int{4, 5, 2, 3}, TestIndex:[]int{0, 1}}
modelselection.Split{TrainIndex:[]int{0, 4, 5, 3}, TestIndex:[]int{1, 2}}
shuffle true
modelselection.Split{TrainIndex:[]int{5, 0, 2, 3}, TestIndex:[]int{4, 1}}
modelselection.Split{TrainIndex:[]int{5, 3, 2, 0}, TestIndex:[]int{1, 4}}
modelselection.Split{TrainIndex:[]int{2, 4, 1, 0}, TestIndex:[]int{5, 3}}

func (*KFold) GetNSplits

func (splitter *KFold) GetNSplits(X, Y *mat.Dense) int

GetNSplits for KFold

func (*KFold) Split

func (splitter *KFold) Split(X, Y *mat.Dense) (ch chan Split)

Split generate Split structs

func (*KFold) SplitterClone

func (splitter *KFold) SplitterClone() Splitter

SplitterClone ...

type RandomState

type RandomState = rand.Rand

RandomState is to init a new random source for reproducibility

type Split

type Split struct{ TrainIndex, TestIndex []int }

Split ...

type Splitter

type Splitter interface {
	Split(X, Y *mat.Dense) (ch chan Split)
	GetNSplits(X, Y *mat.Dense) int
	SplitterClone() Splitter
}

Splitter is the interface for splitters like KFold

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL