regression

package
v0.0.0-...-731a333 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 25, 2016 License: GPL-3.0 Imports: 2 Imported by: 0

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func ClosedForm

func ClosedForm(X *mat64.Dense, Y *mat64.Vector) *mat64.Vector

ClosedForm computes coefficients using a closed-form solution defined by the equation W=inverse(X'*X)*X'*Y. Important: this solution is very inefficient as it requires to compute an inverse of the matrix X'*X which is O(N^3). Also, for the inverse of the matrix to exist the number of ROWS must be greater than the number of FEATURES.

func GradientDescent

func GradientDescent(X *mat64.Dense, Y *mat64.Vector, step, tolerance float64, maxIterations int) (*mat64.Vector, int)

GradientDescent algorithm for the linear regression computes the parameters of the function by taking a gradient (partial derivatives) of the RSS and going down the slope iteratively. Iteration stops when the maxIterations is exceeded or when the change in the gradient is less than epsilon.

Example
package main

import (
	"encoding/csv"
	"fmt"
	"github.com/mennanov/mlearn"
	"github.com/mennanov/mlearn/features"
	"github.com/mennanov/mlearn/regression"
	"io"
	"os"
)

// loadCSVFile reads the csv file and extracts its features and target vectors as slices of strings.
func loadCSVFile(file string, targetColumn int, featureColumns []int) ([][]string, []string) {
	f, err := os.Open(file)
	if err != nil {
		panic(err)
	}
	defer f.Close()

	csvReader := csv.NewReader(f)
	featuresMatrix := [][]string{}
	targetsVector := []string{}
	for i := 0; ; i++ {
		row, err := csvReader.Read()
		if err != nil {
			if err == io.EOF {
				err = nil
			}
			break
		}
		// Skip the headers row.
		if i == 0 {
			continue
		}
		// Add a target value for the current row.
		targetsVector = append(targetsVector, row[targetColumn])
		// Add a features vector (a slice of strings) for the current row.
		row_features := make([]string, len(featureColumns))
		for j, c := range featureColumns {
			row_features[j] = row[c]
		}
		featuresMatrix = append(featuresMatrix, row_features)
	}
	return featuresMatrix, targetsVector
}

func main() {
	columnIdx := []int{3, 4, 5, 6, 7, 14, 15, 17, 18}
	targetColumnIdx := 2
	encoders := []features.Encoder{
		&features.NumericMultiplicationEncoder{Columns: []int{0, 0}, ColumnName: "bedrooms_square"},
		&features.NumericMultiplicationEncoder{Columns: []int{0, 1}, ColumnName: "bedrooms_bathrooms"},
		&features.NumericEncoder{Column: 1, ColumnName: "bathrooms"},
		&features.NumericEncoder{Column: 2, ColumnName: "sqft_living"},
		&features.NumericMultiplicationEncoder{Columns: []int{4, 4}, ColumnName: "floors_square"},
		&features.NumericEncoder{Column: 5, ColumnName: "yr_built"},
		&features.NumericEncoder{Column: 6, ColumnName: "yr_renovated"},
		&features.NumericSumEncoder{Columns: []int{7, 8}, ColumnName: "lat_lng"},
	}
	featuresTrainStr, targetsTrainStr := loadCSVFile("../data/kc_house_train_data.csv", targetColumnIdx,
		columnIdx)
	featuresTrain, columns, err := mlearn.NewMatrixFromData(featuresTrainStr, encoders...)
	if err != nil {
		panic(err)
	}
	targetsTrain, err := mlearn.NewVectorFromStringData(targetsTrainStr)
	if err != nil {
		panic(err)
	}
	fmt.Println(columns)
	r, _ := featuresTrain.Dims()
	weights, iterations := regression.GradientDescent(featuresTrain, targetsTrain, 1.1e-12, 5e-2, 1000)
	fmt.Println("Gradient Descend converged after iterations: ", iterations)
	rssTrain := regression.RSS(targetsTrain, featuresTrain, weights)
	rmseTrain := regression.RMSE(rssTrain, r)
	fmt.Println("Train RSS:", rssTrain, "Train RMSE:", rmseTrain)
	// Load the test data set.
	featuresTestStr, targetsTestStr := loadCSVFile("../data/kc_house_test_data.csv", targetColumnIdx,
		columnIdx)
	featuresTest, _, err := mlearn.NewMatrixFromData(featuresTestStr, encoders...)
	if err != nil {
		panic(err)
	}
	targetsTest, err := mlearn.NewVectorFromStringData(targetsTestStr)
	if err != nil {
		panic(err)
	}
	r, _ = featuresTest.Dims()
	rssTest := regression.RSS(targetsTest, featuresTest, weights)
	rmseTest := regression.RMSE(rssTest, r)
	fmt.Println("Test RSS:", rssTest, "Test RMSE:", rmseTest)
}
Output:

[intercept bedrooms_square bedrooms_bathrooms bathrooms sqft_living floors_square yr_built yr_renovated lat_lng]
Gradient Descend converged after iterations:  366
Train RSS: 1.183078156129154e+15 Train RMSE: 260874.63908916235
Test RSS: 2.7031485613954122e+14 Test RMSE: 252822.72892846845

func Predict

func Predict(X *mat64.Dense, W *mat64.Vector) *mat64.Vector

Predict the value using given weights vector W. Returns the vector of predictions.

func RMSE

func RMSE(rss float64, n int) float64

RMSE is a Root-Mean-Square error or a standard deviation in the case of RSS.

func RSS

func RSS(Y *mat64.Vector, X *mat64.Dense, W *mat64.Vector) float64

RSS stands for Residual sum of squares.

func RSSGradient

func RSSGradient(Y *mat64.Vector, X *mat64.Dense, W *mat64.Vector, step float64) *mat64.Vector

RSSGradient computes a gradient of the RSS function. Gradient is just a vector of partial derivatives with respect to j-th feature W.

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL