linearmodel

package

v0.0.0-...-beb861e Latest Latest Go to latest Published: Jul 11, 2020 License: MIT Imports: 17 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/pa-m/sklearn

Links

Open Source Insights

Documentation ¶

Overview ¶

Package linearmodel implements generalized linear models. It includes Ridge regression, Bayesian Regression, Lasso and Elastic Net estimators computed with Least Angle Regression and coordinate descent. It also implements Stochastic Gradient Descent related algorithms.

Index ¶

Variables
func CrossEntropyLoss(Ytrue, X mat.Matrix, Theta, Ypred, Ydiff, grad *mat.Dense, ...) (J float64)
func EnetPath(X, Y *mat.Dense, L1Ratio, eps float64, NAlphas int, Alphas *[]float64, ...) (alphas []float64, coefs []*mat.Dense, dualGaps []float64, nIters []int)
func LassoPath(X, Y *mat.Dense, eps float64, NAlphas int, Alphas *[]float64, ...) (alphas []float64, coefs []*mat.Dense, dualGaps []float64, nIters []int)
func LogLoss(Ytrue, X mat.Matrix, Theta, Ypred, Ydiff, grad *mat.Dense, ...) (J float64)
func PreprocessData(X, Y *mat.Dense, FitIntercept, Normalize bool, SampleWeight *mat.VecDense) (Xout, Yout, XOffset, YOffset, XScale *mat.Dense)
func SquareLoss(Ytrue, X mat.Matrix, Theta, Ypred, Ydiff, grad *mat.Dense, ...) (J float64)
type Activation
type BayesianRidge
- func NewBayesianRidge() *BayesianRidge
- func (regr *BayesianRidge) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter
- func (regr *BayesianRidge) GetNOutputs() int
- func (*BayesianRidge) IsClassifier() bool
- func (regr *BayesianRidge) Predict(X mat.Matrix, Ymutable mat.Mutable) *mat.Dense
- func (regr *BayesianRidge) Predict2(X, Y, yStd *mat.Dense)
- func (regr *BayesianRidge) PredicterClone() base.Predicter
type CDResult
type ElasticNet
- func NewElasticNet() *ElasticNet
- func (regr *ElasticNet) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter
- func (regr *ElasticNet) GetNOutputs() int
- func (*ElasticNet) IsClassifier() bool
- func (regr *ElasticNet) PredicterClone() base.Predicter
type Lasso
- func NewLasso() *Lasso
type LinFitOptions
type LinFitResult
- func LinFit(X, Ytrue *mat.Dense, opts *LinFitOptions) *LinFitResult
- func LinFitGOM(X, Ytrue *mat.Dense, opts *LinFitOptions) *LinFitResult
type LinearModel
- func (regr *LinearModel) DecisionFunction(X mat.Matrix, Ymutable mat.Mutable)
- func (regr *LinearModel) GetNOutputs() int
- func (regr *LinearModel) Score(X, Y mat.Matrix) float64
type LinearRegression
- func NewLinearRegression() *LinearRegression
- func (regr *LinearRegression) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter
- func (*LinearRegression) IsClassifier() bool
- func (regr *LinearRegression) Predict(X mat.Matrix, Ymutable mat.Mutable) *mat.Dense
- func (regr *LinearRegression) PredicterClone() base.Predicter
type LogisticRegression
- func NewLogisticRegression() *LogisticRegression
- func (m *LogisticRegression) Fit(X, Y mat.Matrix) base.Fiter
- func (m *LogisticRegression) GetNOutputs() int
- func (m *LogisticRegression) IsClassifier() bool
- func (m *LogisticRegression) Predict(X mat.Matrix, Y mat.Mutable) *mat.Dense
- func (m *LogisticRegression) PredictProbas(Xmatrix mat.Matrix, Ymutable mat.Mutable) *mat.Dense
- func (m *LogisticRegression) PredicterClone() base.Predicter
- func (m *LogisticRegression) Score(Xmatrix, Ymatrix mat.Matrix) float64
type Loss
type MultiTaskElasticNet
- func NewMultiTaskElasticNet() *MultiTaskElasticNet
type MultiTaskLasso
- func NewMultiTaskLasso() *MultiTaskLasso
type RegularizedRegression
- func NewRidge() *RegularizedRegression
- func (regr *RegularizedRegression) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter
type Ridge
- func (m *Ridge) PredicterClone() base.Predicter
type SGDRegressor
- func NewSGDRegressor() *SGDRegressor
- func (regr *SGDRegressor) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter
- func (*SGDRegressor) IsClassifier() bool
- func (regr *SGDRegressor) Predict(X mat.Matrix, Ymutable mat.Mutable) *mat.Dense
- func (regr *SGDRegressor) PredicterClone() base.Predicter

Constants ¶

This section is empty.

Variables ¶

View Source

var LossFunctions = map[string]Loss{"square": SquareLoss, "log": LogLoss, "cross-entropy": CrossEntropyLoss}

LossFunctions is the map of implemented loss functions

Functions ¶

func CrossEntropyLoss ¶

func CrossEntropyLoss(Ytrue, X mat.Matrix, Theta, Ypred, Ydiff, grad *mat.Dense, Alpha, L1Ratio float64, nSamples int, activation Activation, disableRegularizationOfFirstFeature bool) (J float64)

CrossEntropyLoss is the loss for LogisticRegression and Classifiers J: -y*math.Log(h)-(1.-y)*log(1.-h) grad: hprime*(-y/h + (1-y)/(1-h))

func EnetPath ¶

func EnetPath(X, Y *mat.Dense, L1Ratio, eps float64, NAlphas int, Alphas *[]float64, verbose, positive bool) (alphas []float64, coefs []*mat.Dense, dualGaps []float64, nIters []int)

EnetPath Compute elastic net path with coordinate descent no preprocessing is done here, you must have called PreprocessData before

func LassoPath ¶

func LassoPath(X, Y *mat.Dense, eps float64, NAlphas int, Alphas *[]float64, verbose, positive bool) (alphas []float64, coefs []*mat.Dense, dualGaps []float64, nIters []int)

LassoPath Compute lasso path with coordinate descent

Example ¶

// adapted from https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/linear_model/coordinate_descent.py
X := mat.NewDense(3, 2, []float64{1, 2.3, 2, 5.4, 3.1, 4.3})
Y := mat.NewDense(3, 1, []float64{1, 2, 3.1})
alphas, coefPath, _, _ := LassoPath(X, Y, 1e-3, 3, &[]float64{5, 1, .5}, false, false)
for icoef, coef := range coefPath {
	fmt.Printf("alpha=%.1f :\n%.3f\n", alphas[icoef], mat.Formatted(coef.T()))
}

Output:

alpha=5.0 :
[0.000  0.216]
alpha=1.0 :
[0.000  0.443]
alpha=0.5 :
[0.474  0.235]

func LogLoss ¶

func LogLoss(Ytrue, X mat.Matrix, Theta, Ypred, Ydiff, grad *mat.Dense, Alpha, L1Ratio float64, nSamples int, activation Activation, disableRegularizationOfFirstFeature bool) (J float64)

LogLoss for one versus rest classifiers

func PreprocessData ¶

func PreprocessData(X, Y *mat.Dense, FitIntercept, Normalize bool, SampleWeight *mat.VecDense) (Xout, Yout, XOffset, YOffset, XScale *mat.Dense)

PreprocessData center and normalize data

func SquareLoss ¶

func SquareLoss(Ytrue, X mat.Matrix, Theta, Ypred, Ydiff, grad *mat.Dense, Alpha, L1Ratio float64, nSamples int, activation Activation, disableRegularizationOfFirstFeature bool) (J float64)

SquareLoss Quadratic Loss, for regressions Ytrue, X, Theta must be passed in Ypred,Ydiff,Ytmp are temporary matrices passed in here to avoid reallocations. nothing to initialize for them except storage Alpha, L1Ratio are regularization parameters J: mat.Pow(h-y,2)/2 grad: hprime*(h-y)

Types ¶

type Activation ¶

type Activation = base.Activation

Activation is borrowed from base package

type BayesianRidge ¶

type BayesianRidge struct {
	LinearModel
	NIter                                 int
	Tol, Alpha1, Alpha2, Lambda1, Lambda2 float
	ComputeScore, Verbose                 bool
	Alpha, Lambda                         float
	Sigma                                 *mat.Dense
	Scores                                []float
}

BayesianRidge regression struct

Example ¶

nSamples, nFeatures, nOutputs := 10000, 5, 5
X := mat.NewDense(nSamples, nFeatures, nil)
X.Apply(func(i int, j int, v float64) float64 {
	return rand.NormFloat64() * 20
}, X)
f := func(X mat.Matrix, i, o int) float {
	if o == 0 {
		return 1. + 2.*X.At(i, 0) + 3.*X.At(i, 1) + 4.*X.At(i, 2)
	}
	return 1. - 2.*X.At(i, 0) + 3.*X.At(i, 1) + float64(o)*X.At(i, 2)

}
Y := mat.NewDense(nSamples, nOutputs, nil)
Y.Apply(func(i int, o int, v float64) float64 {
	return f(X, i, o)
}, Y)
m := NewBayesianRidge()
//start := time.Now()
m.Fit(X, Y)
//elapsed := time.Since(start)
Ypred := mat.NewDense(nSamples, nOutputs, nil)
m.Predict(X, Ypred)
r2score := metrics.R2Score(Y, Ypred, nil, "variance_weighted").At(0, 0)
if r2score > .999 {
	fmt.Println("BayesianRidge ok")
}

Output:

BayesianRidge ok

func NewBayesianRidge ¶

func NewBayesianRidge() *BayesianRidge

NewBayesianRidge creates a *BayesianRidge with defaults

func (*BayesianRidge) Fit ¶

func (regr *BayesianRidge) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit the model

Parameters
----------
X : numpy array of shape [nSamples,nFeatures]
    Training data
y : numpy array of shape [nSamples]
    Target values. Will be cast to X's dtype if necessary

func (*BayesianRidge) GetNOutputs ¶

func (regr *BayesianRidge) GetNOutputs() int

GetNOutputs returns output columns number for Y to pass to predict

func (*BayesianRidge) IsClassifier ¶

func (*BayesianRidge) IsClassifier() bool

IsClassifier returns false for BayesianRidge

func (*BayesianRidge) Predict ¶

func (regr *BayesianRidge) Predict(X mat.Matrix, Ymutable mat.Mutable) *mat.Dense

Predict using the linear model. In addition to the mean of the predictive distribution, also its standard deviation can be returned. Parameters ---------- X : {array-like, sparse matrix}, shape = (nSamples, nFeatures)

Samples.

Returns ------- yMean : array, shape = (nSamples,)

Mean of predictive distribution of query points.

"""

func (*BayesianRidge) Predict2 ¶

func (regr *BayesianRidge) Predict2(X, Y, yStd *mat.Dense)

Predict2 returns y and stddev

func (*BayesianRidge) PredicterClone ¶

func (regr *BayesianRidge) PredicterClone() base.Predicter

PredicterClone for BayesianRidge

type CDResult ¶

type CDResult struct {
	Gap, Eps float64
	NIter    int
}

CDResult is the coordinate descent specific part in the regression result

type ElasticNet ¶

type ElasticNet struct {
	LinearRegression
	Tol, Alpha, L1Ratio float64
	MaxIter             int
	Selection           string
	WarmStart, Positive bool
	CDResult            CDResult
}

ElasticNet is the struct for coordinate descent regularized regressions: ElasticNet,Ridge,Lasso Selection is cyclic or random. defaults to cyclic

Example ¶

// adapted from http://scikit-learn.org/stable/_downloads/plot_train_error_vs_test_error.ipynb
if !*visualDebug {
	return
}
// Generate sample data
//NSamplesTrain, NSamplesTest, NFeatures := 75, 150, 500
NSamplesTrain, NSamplesTest, NFeatures := 75, 150, 500
rand.Seed(0)
coef := mat.NewDense(NFeatures, 1, nil)
// only the top 10% features are impacting the model
for feat := 0; feat < 50; feat++ {
	coef.Set(feat, 0, rand.NormFloat64())
}
X := mat.NewDense(NSamplesTrain+NSamplesTest, NFeatures, nil)
{
	x := X.RawMatrix().Data
	for i := range x {
		x[i] = rand.NormFloat64()
	}

}
Y := &mat.Dense{}
Y.Mul(X, coef)
// Split train and test data
rowslice := func(X mat.RawMatrixer, start, end int) *mat.Dense {
	rm := X.RawMatrix()
	return mat.NewDense(end-start, rm.Cols, rm.Data[start*rm.Stride:end*rm.Stride])
}
Xtrain, Xtest := rowslice(X, 0, NSamplesTrain), rowslice(X, NSamplesTrain, NSamplesTrain+NSamplesTest)
Ytrain, Ytest := rowslice(Y, 0, NSamplesTrain), rowslice(Y, NSamplesTrain, NSamplesTrain+NSamplesTest)
// Compute train and test errors
//nalphas := 60
nalphas := 20
logalphas := make([]float64, nalphas)
for i := range logalphas {
	logalphas[i] = -5 + 8*float64(i)/float64(nalphas)
}
trainErrors := make([]float64, nalphas)
testErrors := make([]float64, nalphas)
for ialpha, logalpha := range logalphas {
	//fmt.Println("ialpha=", ialpha)
	enet := NewElasticNet()
	enet.L1Ratio = 0.7
	enet.Alpha = math.Pow(10, logalpha)
	//enet.Tol = 1e-15
	//enet.Optimizer = base.NewAdadeltaOptimizer()
	//enet.Options.GOMethodCreator = func() optimize.Method { return &optimize.CG{} }
	enet.Fit(Xtrain, Ytrain)
	trainErrors[ialpha] = enet.Score(Xtrain, Ytrain)
	score := enet.Score(Xtest, Ytest)
	testErrors[ialpha] = score
}
// iAlphaOptim := floats.MaxIdx(testErrors)
// alphaOptim := math.Pow(10, logalphas[iAlphaOptim])
// fmt.Printf("Optimal regularization parameter : %.6f", alphaOptim)
//   # Plot outputs

if *visualDebug {

	// plot result
	p, _ := plot.New()

	xys := func(X, Y []float64) plotter.XYs {
		var data plotter.XYs

		for i := range X {
			data = append(data, struct{ X, Y float64 }{X[i], Y[i]})
		}
		return data
	}
	s, _ := plotter.NewLine(xys(logalphas, trainErrors))
	s.Color = color.RGBA{0, 0, 255, 255}
	l, _ := plotter.NewLine(xys(logalphas, testErrors))
	l.Color = color.RGBA{255, 128, 0, 255}
	p.Add(s, l)
	p.Legend.Add("train", s)
	p.Legend.Add("test", l)

	// Save the plot to a PNG file.
	pngfile := "/tmp/elasticnet.png"
	os.Remove(pngfile)
	if err := p.Save(4*vg.Inch, 3*vg.Inch, pngfile); err != nil {
		panic(err)
	}
	cmd := exec.Command("display", pngfile)
	err := cmd.Start()
	if err != nil {
		fmt.Println(err.Error())
	}
	time.Sleep(200 * time.Millisecond)
	os.Remove(pngfile)
}

Output:

func NewElasticNet ¶

func NewElasticNet() *ElasticNet

NewElasticNet creates a *ElasticNet with Alpha=1 and L1Ratio=0.5

func (*ElasticNet) Fit ¶

func (regr *ElasticNet) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit ElasticNetRegression with coordinate descent

func (*ElasticNet) GetNOutputs ¶

func (regr *ElasticNet) GetNOutputs() int

GetNOutputs returns output columns number for Y to pass to predict

func (*ElasticNet) IsClassifier ¶

func (*ElasticNet) IsClassifier() bool

IsClassifier returns false for ElasticNet

func (*ElasticNet) PredicterClone ¶

func (regr *ElasticNet) PredicterClone() base.Predicter

PredicterClone for ElasticNet

type Lasso ¶

type Lasso = ElasticNet

Lasso is an alias for ElasticNet

Example ¶

// adapted from https://www.analyticsvidhya.com/blog/2016/01/complete-tutorial-ridge-lasso-regression-python/ §4
NSamples, NFeatures := 60, 15
X, Y := mat.NewDense(NSamples, NFeatures, nil), mat.NewDense(60, 1, nil)
for sample, i := 0, 60; i < 300; sample, i = sample+1, i+4 {
	X.Set(sample, 0, float64(i)*math.Pi/180.)
	Y.Set(sample, 0, math.Sin(X.At(sample, 0)) /*+rand.NormFloat64()*.15*/)
	//fmt.Printf("%d %.3f %.3f\t", i, X.At(sample, 0), Y.At(sample, 0))
}
//fmt.Println()
v := &mat.VecDense{}
for power := 2; power <= 15; power++ {
	v.ColViewOf(X, power-1)
	v.MulElemVec(X.ColView(0), X.ColView(power-2))
}
//fmt.Println(mat.Formatted(X))
//fmt.Println(mat.Sum(Y.ColView(0)) / float64(NSamples))
//fmt.Println(X.At(4, 14), Y.At(4, 0))
m := NewLasso()
m.FitIntercept = true
m.Normalize = true
m.Alpha = 1e-5
m.L1Ratio = 1
m.MaxIter = 1e5
m.Tol = 1e-4

m.Fit(X, Y)
Ypred := &mat.Dense{}
m.Predict(X, Ypred)
rss := &mat.VecDense{}
rss.SubVec(Ypred.ColView(0), Y.ColView(0))
rss.MulElemVec(rss, rss)
//fmt.Println("gap", m.CDResult.Gap, "Eps", m.CDResult.Eps, "nIter", m.CDResult.NIter)
fmt.Printf("rss=%.4f intercept=%.4f coef=%.4f\n", mat.Sum(rss), mat.Formatted(m.Intercept.T()), mat.Formatted(m.Coef.T()))

Output:

rss=0.0149 intercept=[0.0570] coef=[ 1.2368  -0.3934  -0.0127   0.0000   0.0007   0.0001   0.0000   0.0000   0.0000  -0.0000  -0.0000  -0.0000  -0.0000  -0.0000  -0.0000]

func NewLasso ¶

func NewLasso() *Lasso

NewLasso creates a *ElasticNetRegression with Alpha=1 and L1Ratio = 1

type LinFitOptions ¶

type LinFitOptions struct {
	Epochs, MiniBatchSize int
	Tol                   float64
	Solver                string
	SolverConfigure       func(base.Optimizer)
	// Alpha is regularization factor for Ridge,Lasso
	Alpha float64
	// L1Ratio is the part of L1 regularization 0 for ridge,1 for Lasso
	L1Ratio                             float64
	Loss                                Loss
	Activation                          Activation
	GOMethodCreator                     func() optimize.Method
	ThetaInitializer                    func(Theta *mat.Dense)
	Recorder                            optimize.Recorder
	PerOutputFit                        bool
	DisableRegularizationOfFirstFeature bool
}

LinFitOptions are options for LinFit

type LinFitResult ¶

type LinFitResult struct {
	Converged bool
	RMSE, J   float64
	Epoch     int
	Theta     *mat.Dense
}

LinFitResult is the result or LinFit

func LinFit ¶

func LinFit(X, Ytrue *mat.Dense, opts *LinFitOptions) *LinFitResult

LinFit is an internal helper to fit linear regressions

func LinFitGOM ¶

func LinFitGOM(X, Ytrue *mat.Dense, opts *LinFitOptions) *LinFitResult

LinFitGOM fits a regression with a gonum/optimizer Method

type LinearModel ¶

type LinearModel struct {
	FitIntercept, Normalize          bool
	XOffset, XScale, Coef, Intercept *mat.Dense
}

LinearModel is a base struct for multioutput regressions

func (*LinearModel) DecisionFunction ¶

func (regr *LinearModel) DecisionFunction(X mat.Matrix, Ymutable mat.Mutable)

DecisionFunction fills Y with X dot Coef+Intercept

func (*LinearModel) GetNOutputs ¶

func (regr *LinearModel) GetNOutputs() int

GetNOutputs returns output columns number for Y to pass to predict

func (*LinearModel) Score ¶

func (regr *LinearModel) Score(X, Y mat.Matrix) float64

Score returns R2Score between Y and X dot Coef+Intercept

type LinearRegression ¶

type LinearRegression struct {
	LinearModel
}

LinearRegression ia Ordinary least squares Linear Regression. Parameters ---------- fitIntercept : boolean, optional, default True

whether to calculate the intercept for this model. If set
to False, no intercept will be used in calculations
(e.g. data is expected to be already centered).

normalize : boolean, optional, default False

This parameter is ignored when ``fitIntercept`` is set to False.
If True, the regressors X will be normalized before regression by
subtracting the mean and dividing by the l2-norm.
If you wish to standardize, please use
:class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on
an estimator with ``normalize=False``.

---------- coef : array, shape (nFeatures, ) or (nTargets, nFeatures)

Estimated coefficients for the linear regression problem.
If multiple targets are passed during the fit (y 2D), this
is a 2D array of shape (nTargets, nFeatures), while if only
one target is passed, this is a 1D array of length nFeatures.

intercept : array

Independent term in the linear model.

Example ¶

// adapted from http://scikit-learn.org/stable/_downloads/plot_ols.ipynb

// # Load the diabetes dataset
diabetes := datasets.LoadDiabetes()

// # Use only one feature
NSamples, _ := diabetes.X.Dims()
diabetesX := diabetes.X.Slice(0, NSamples, 2, 3).(*mat.Dense)

// # Split the data into training/testing sets
diabetesXtrain := diabetesX.Slice(0, NSamples-20, 0, 1).(*mat.Dense)
diabetesXtest := diabetesX.Slice(NSamples-20, NSamples, 0, 1).(*mat.Dense)

// # Split the targets into training/testing sets
diabetesYtrain := diabetes.Y.Slice(0, NSamples-20, 0, 1).(*mat.Dense)
diabetesYtest := diabetes.Y.Slice(NSamples-20, NSamples, 0, 1).(*mat.Dense)

// # Create linear regression object
regr := NewLinearRegression()

// # Train the model using the training sets
regr.Fit(diabetesXtrain, diabetesYtrain)

// # Make predictions using the testing set
NTestSamples := 20
diabetesYpred := mat.NewDense(NTestSamples, 1, nil)
regr.Predict(diabetesXtest, diabetesYpred)

// # The coefficients
fmt.Printf("Coefficients: %.3f\n", mat.Formatted(regr.Coef))
// # The mean squared error
fmt.Printf("Mean squared error: %.2f\n", metrics.MeanSquaredError(diabetesYtest, diabetesYpred, nil, "").At(0, 0))
// # Explained variance score: 1 is perfect prediction
fmt.Printf("Variance score: %.2f\n", metrics.R2Score(diabetesYtest, diabetesYpred, nil, "").At(0, 0))

//   # Plot outputs
canPlot := false
if canPlot {

	// plot result
	p, _ := plot.New()

	xys := func(X, Y mat.Matrix) plotter.XYs {
		var data plotter.XYs
		NTestSamples, _ = X.Dims()
		for sample := 0; sample < NTestSamples; sample++ {
			data = append(data, struct{ X, Y float64 }{X.At(sample, 0), Y.At(sample, 0)})
		}
		return data
	}
	s, _ := plotter.NewScatter(xys(diabetesXtest, diabetesYtest))
	l, _ := plotter.NewLine(xys(diabetesXtest, diabetesYpred))
	l.Color = color.RGBA{0, 0, 255, 255}
	p.Add(s, l)

	// Save the plot to a PNG file.
	pngfile := "/tmp/linearregression.png"
	os.Remove(pngfile)
	if err := p.Save(4*vg.Inch, 3*vg.Inch, pngfile); err != nil {
		panic(err)
	}
	cmd := exec.Command("display", pngfile)
	err := cmd.Start()
	if err != nil {
		fmt.Println(err.Error())
	}
	time.Sleep(200 * time.Millisecond)
	os.Remove(pngfile)
}

Output:

Coefficients: [938.238]
Mean squared error: 2548.07
Variance score: 0.47

func NewLinearRegression ¶

func NewLinearRegression() *LinearRegression

NewLinearRegression create a *LinearRegression with defaults implemented as mat.Dense.Solve

func (*LinearRegression) Fit ¶

func (regr *LinearRegression) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit fits Coef for a LinearRegression

func (*LinearRegression) IsClassifier ¶

func (*LinearRegression) IsClassifier() bool

IsClassifier returns false for LinearRegression

func (*LinearRegression) Predict ¶

func (regr *LinearRegression) Predict(X mat.Matrix, Ymutable mat.Mutable) *mat.Dense

Predict predicts y for X using Coef

func (*LinearRegression) PredicterClone ¶

func (regr *LinearRegression) PredicterClone() base.Predicter

PredicterClone for LinearRegression

type LogisticRegression ¶

type LogisticRegression struct {
	Alpha         float64          `json:"alpha"`
	MaxIter       int              `json:"max_iter"`
	LossFuncName  string           `json:"loss_func_name"`
	RandomState   base.RandomState `json:"random_state"`
	Tol           float64          `json:"tol"`
	Verbose       bool             `json:"verbose"`
	NIterNoChange int              `json:"n_iter_no_change"`

	// Outputs
	NLayers       int
	NIter         int
	NOutputs      int
	Intercept     []float64      `json:"intercepts_"`
	Coef          blas64.General `json:"coefs_"`
	OutActivation string         `json:"out_activation_"`
	Loss          float64

	LossCurve          []float64
	BestLoss           float64
	NoImprovementCount int
	InterceptsGrads    []float64
	CoefsGrads         blas64.General
	// contains filtered or unexported fields
}

LogisticRegression Logistic Regression (aka logit, MaxEnt) classifier. In the multiclass case, the training algorithm uses the one-vs-rest (OvR) scheme if the ‘multi_class’ option is set to ‘ovr’, and uses the cross-entropy loss if the ‘multi_class’ option is set to ‘multinomial’. This class implements regularized logistic regression using the ‘lbfgs’ solvers. support only L2 regularization with primal formulation.

Example ¶

package main

import (
	"flag"
	"fmt"
	"image/color"
	"log"
	"math"
	"os"
	"os/exec"
	"time"

	"github.com/pa-m/sklearn/base"
	"github.com/pa-m/sklearn/datasets"
	"gonum.org/v1/gonum/diff/fd"
	"gonum.org/v1/gonum/mat"
	"gonum.org/v1/gonum/optimize"
	"gonum.org/v1/plot"
	"gonum.org/v1/plot/plotter"
	"gonum.org/v1/plot/vg"
	"gonum.org/v1/plot/vg/draw"
)

var _ base.Predicter = &LogisticRegression{}
var visualDebug = flag.Bool("visual", false, "output images for benchmarks and test data")

func main() {

	// adapted from http://scikit-learn.org/stable/_downloads/plot_iris_logistic.ipynb
	ds := datasets.LoadIris()

	// we only take the first _ features.
	nSamples, _ := ds.X.Dims()
	X, YTrueClasses := ds.X.Slice(0, nSamples, 0, 2).(*mat.Dense), ds.Y
	h := .02 // step size in the mesh

	regr := NewLogisticRegression()
	regr.Alpha = 1e-5
	regr.beforeMinimize = func(problem optimize.Problem, initX []float64) {
		// check gradients
		settings := &fd.Settings{Step: 1e-8}
		gradFromModel := make([]float64, len(initX))
		gradFromFD := make([]float64, len(initX))
		problem.Func(initX)
		problem.Grad(gradFromModel, initX)
		fd.Gradient(gradFromFD, problem.Func, initX, settings)
		for i := range initX {
			if math.Abs(gradFromFD[i]-gradFromModel[i]) > 1e-4 {
				panic(fmt.Errorf("bad gradient, expected:\n%.3f\ngot:\n%.3f", gradFromFD, gradFromModel))
			}
		}
	}

	log.SetPrefix("ExampleLogisticRegression_Fit_iris:")
	defer log.SetPrefix("")

	// we create an instance of our Classifier and fit the data.
	regr.Fit(X, YTrueClasses)

	accuracy := regr.Score(X, YTrueClasses)
	if accuracy >= 0.833 {
		fmt.Println("ok")
	} else {
		fmt.Printf("Accuracy:%.3f\n", accuracy)
	}

	// Put the result into a color plot
	if *visualDebug {
		// Plot the decision boundary. For that, we will assign a color to each point in the mesh [x_min, x_max]x[y_min, y_max].
		var xmin, xmax = mat.Min(X.ColView(0)) - .5, mat.Max(X.ColView(0)) + .5

		var ymin, ymax = mat.Min(X.ColView(1)) - .5, mat.Max(X.ColView(1)) + .5

		// xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
		nparange := func(min, max, h float64) []float64 {
			c := make([]float64, 0)
			for v := min; v <= max; v += h {
				c = append(c, v)
			}
			return c
		}
		npmeshgrid := func(xrange, yrange []float64) (xx, yy []float64) {
			for y := ymin; y <= ymax; y += h {
				for x := xmin; x <= xmax; x += h {
					xx = append(xx, x)
					yy = append(yy, y)
				}
			}
			return
		}
		npc := func(c ...[]float64) (XZ *mat.Dense) {
			XZ = mat.NewDense(len(c[0]), len(c), nil)
			for j, src := range c {
				XZ.SetCol(j, src)
			}
			return
		}
		var xx, yy = npmeshgrid(nparange(xmin, xmax, h), nparange(ymin, ymax, h))
		Xgrid := npc(xx, yy)
		Z := regr.Predict(Xgrid, nil)

		plt, _ := plot.New()
		xys := func(X, Y mat.Matrix, cls int) (xy plotter.XYs) {
			imax, _ := Y.Dims()
			for i := 0; i < imax; i++ {
				if int(Y.At(i, 0)) == cls {
					xy = append(xy, struct{ X, Y float64 }{X.At(i, 0), X.At(i, 1)})
				}
			}
			return
		}
		colors1 := []color.RGBA{{166, 206, 227, 255}, {253, 191, 111, 255}, {177, 89, 40, 255}}
		for cls := 0; cls <= 2; cls++ {
			s, _ := plotter.NewScatter(xys(Xgrid, Z, cls))
			s.GlyphStyle.Shape = draw.BoxGlyph{}
			s.GlyphStyle.Color = colors1[cls]
			s.GlyphStyle.Radius = 1
			plt.Add(s)

			s1, _ := plotter.NewScatter(xys(X, YTrueClasses, cls))
			s1.GlyphStyle.Shape = draw.CircleGlyph{}
			s1.GlyphStyle.Radius = 4
			s1.GlyphStyle.Color = colors1[cls]
			plt.Add(s1)
			plt.Legend.Add(ds.TargetNames[cls], s1)
		}
		plt.X.Label.Text = ds.FeatureNames[0]
		plt.Y.Label.Text = ds.FeatureNames[1]
		// Save the plot to a PNG file.
		pngfile := "/tmp/ExampleLogisticRegression.png"
		os.Remove(pngfile)
		if err := plt.Save(7*vg.Inch, 7*vg.Inch, pngfile); err != nil {
			panic(err)
		}
		cmd := exec.Command("display", pngfile)
		err := cmd.Start()
		if err != nil {
			fmt.Println(err.Error())
		}
		time.Sleep(200 * time.Millisecond)
		os.Remove(pngfile)

	}
}

Output:

ok

func NewLogisticRegression ¶

func NewLogisticRegression() *LogisticRegression

NewLogisticRegression returns a LogisticRegression with defaults: Alpha=1/C=1; Tol=1e-4

func (*LogisticRegression) Fit ¶

func (m *LogisticRegression) Fit(X, Y mat.Matrix) base.Fiter

Fit compute Coef and Intercept

func (*LogisticRegression) GetNOutputs ¶

func (m *LogisticRegression) GetNOutputs() int

GetNOutputs returns output columns number for Y to pass to predict

func (*LogisticRegression) IsClassifier ¶

func (m *LogisticRegression) IsClassifier() bool

IsClassifier return true if LossFuncName is not square_loss

func (*LogisticRegression) Predict ¶

func (m *LogisticRegression) Predict(X mat.Matrix, Y mat.Mutable) *mat.Dense

Predict do forward pass and fills Y (Y must be mat.Mutable)

func (*LogisticRegression) PredictProbas ¶

func (m *LogisticRegression) PredictProbas(Xmatrix mat.Matrix, Ymutable mat.Mutable) *mat.Dense

PredictProbas return probability estimates. The returned estimates for all classes are ordered by the label of classes.

func (*LogisticRegression) PredicterClone ¶

func (m *LogisticRegression) PredicterClone() base.Predicter

PredicterClone ...

func (*LogisticRegression) Score ¶

func (m *LogisticRegression) Score(Xmatrix, Ymatrix mat.Matrix) float64

Score for LogisticRegression is accuracy

type Loss ¶

type Loss func(Ytrue, X mat.Matrix, Theta, Ypred, Ydiff, grad *mat.Dense, Alpha, L1Ratio float64, nSamples int, activation Activation, disableRegularizationOfFirstFeature bool) (J float64)

Loss puts cost in J and cost gradient in grad. Ytrue, X, Theta must be passed in Ypred,Ydiff,Ytmp are temporary matrices passed in here to avoid reallocations. nothing to initialize for them except storage Alpha and L1Ratio are for regularization Loss derivative is dJWrtTheta=dJWrth*dhWrtz*X featurestart is 1 instead of 0 when first feature is ones

type MultiTaskElasticNet ¶

type MultiTaskElasticNet = Lasso

MultiTaskElasticNet is an alias for ElasticNet

Example ¶

// example adapted from one in https://github.com/scikit-learn/scikit-learn/blob/0.19.1/sklearn/linear_model/coordinate_descent.py
clf := NewMultiTaskElasticNet()
clf.Alpha = .1
clf.Normalize = false
X, Y := mat.NewDense(3, 2, []float64{0, 0, 1, 1, 2, 2}), mat.NewDense(3, 2, []float64{0, 0, 1, 1, 2, 2})
clf.Fit(X, Y)
fmt.Printf("%.8f\n", mat.Formatted(clf.Coef.T()))
fmt.Printf("%.8f\n", mat.Formatted(clf.Intercept))
fmt.Printf("gap:%5e eps:%5e nItem:%d", clf.CDResult.Gap, clf.CDResult.Eps, clf.CDResult.NIter)

Output:

⎡0.45663524  0.45612256⎤
⎣0.45663524  0.45612256⎦
[0.08724220  0.08724220]
gap:7.023365e-05 eps:4.000000e-04 nItem:52

func NewMultiTaskElasticNet ¶

func NewMultiTaskElasticNet() *MultiTaskElasticNet

NewMultiTaskElasticNet creates a *ElasticNet with Alpha=1 and L1Ratio=0.5

type MultiTaskLasso ¶

type MultiTaskLasso = Lasso

MultiTaskLasso is an alias for ElasticNet/Lasso

Example ¶

// example adapted from one in https://github.com/scikit-learn/scikit-learn/blob/0.19.1/sklearn/linear_model/coordinate_descent.py
clf := NewMultiTaskLasso()
clf.Alpha = .1
clf.Fit(
	mat.NewDense(3, 2, []float64{0, 0, 1, 1, 2, 2}),
	mat.NewDense(3, 2, []float64{0, 0, 1, 1, 2, 2}),
)
fmt.Printf("%.8f\n", mat.Formatted(clf.Coef.T()))
fmt.Printf("%.8f\n", mat.Formatted(clf.Intercept))

Output:

⎡0.89393398  0.00000000⎤
⎣0.89393398  0.00000000⎦
[0.10606602  0.10606602]

func NewMultiTaskLasso ¶

func NewMultiTaskLasso() *MultiTaskLasso

NewMultiTaskLasso creates a *RegularizedRegression with Alpha=1 and L1Ratio=1

type RegularizedRegression ¶

type RegularizedRegression struct {
	LinearRegression
	Solver              string
	SolverConfigure     func(base.Optimizer)
	Tol, Alpha, L1Ratio float64
	LossFunction        Loss
	ActivationFunction  Activation
	Options             LinFitOptions
}

RegularizedRegression is a common structure for ElasticNet,Lasso and Ridge

func NewRidge ¶

func NewRidge() *RegularizedRegression

NewRidge creates a *RegularizedRegression with Alpha=1. and L1Ratio=0

func (*RegularizedRegression) Fit ¶

func (regr *RegularizedRegression) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit fits Coef for a LinearRegression

type Ridge ¶

type Ridge = RegularizedRegression

Ridge is an alias for RegularizedRegression

Example ¶

X, Y := mat.NewDense(3, 2, []float64{0, 0, 1, 1, 2, 2}), mat.NewDense(3, 2, []float64{0, 0, 1, 1, 2, 2})
clf := NewRidge()
clf.Tol = 1e-3
clf.Normalize = false
clf.Alpha = 1
clf.L1Ratio = 0.
clf.Fit(X, Y)
fmt.Printf("Coef:\n%.2f\n", mat.Formatted(clf.Coef.T()))
fmt.Printf("Intercept:\n%.2f\n", mat.Formatted(clf.Intercept.T()))
Ypred := &mat.Dense{}
clf.Predict(X, Ypred)
fmt.Printf("Ypred:\n%.2f\n", mat.Formatted(Ypred))

Output:

Coef:
⎡0.40  0.40⎤
⎣0.40  0.40⎦
Intercept:
⎡0.20⎤
⎣0.20⎦
Ypred:
⎡0.20  0.20⎤
⎢1.00  1.00⎥
⎣1.80  1.80⎦

func (*Ridge) PredicterClone ¶

func (m *Ridge) PredicterClone() base.Predicter

PredicterClone for Ridge

type SGDRegressor ¶

type SGDRegressor struct {
	LinearModel
	Tol, Alpha, L1Ratio float
	NJobs               int
	Method              optimize.Method
}

SGDRegressor base struct should be named GonumOptimizeRegressor implemented as a per-output optimization of (possibly regularized) square-loss with gonum/optimize methods

func NewSGDRegressor ¶

func NewSGDRegressor() *SGDRegressor

NewSGDRegressor creates a *SGDRegressor with defaults

func (*SGDRegressor) Fit ¶

func (regr *SGDRegressor) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit learns Coef

func (*SGDRegressor) IsClassifier ¶

func (*SGDRegressor) IsClassifier() bool

IsClassifier returns false for SGDRegressor

func (*SGDRegressor) Predict ¶

func (regr *SGDRegressor) Predict(X mat.Matrix, Ymutable mat.Mutable) *mat.Dense

Predict predicts y from X using Coef

func (*SGDRegressor) PredicterClone ¶

func (regr *SGDRegressor) PredicterClone() base.Predicter

PredicterClone for SGDRegressor

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL