Documentation
¶
Overview ¶
Package cluster gathers popular unsupervised clustering algorithms. contains DBSCAN and KMeans.
Index ¶
- func EuclideanDistance(a, b mat.Vector) float64
- func MinkowskiDistanceP(a, b mat.Vector, p float64) float64
- type DBSCAN
- func (m *DBSCAN) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter
- func (m *DBSCAN) GetNOutputs() int
- func (m *DBSCAN) IsClassifier() bool
- func (m *DBSCAN) Predict(X mat.Matrix, Ymutable mat.Mutable) *mat.Dense
- func (m *DBSCAN) PredicterClone() base.Predicter
- func (m *DBSCAN) Score(X, Y mat.Matrix) float64
- type DBSCANConfig
- type Distance
- type KMeans
- func (m *KMeans) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter
- func (m *KMeans) GetNOutputs() int
- func (m *KMeans) IsClassifier() bool
- func (m *KMeans) Predict(X mat.Matrix, Ymutable mat.Mutable) *mat.Dense
- func (m *KMeans) PredicterClone() base.Predicter
- func (m *KMeans) Score(X, Y mat.Matrix) float64
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func EuclideanDistance ¶
EuclideanDistance is a Distancer
Types ¶
type DBSCAN ¶
type DBSCAN struct { DBSCANConfig SampleWeight []float64 // members filled by Fit NeighborsModel *neighbors.NearestNeighbors Labels []int CoreSampleIndices []int }
DBSCAN classifier struct
Example ¶
// adapted from http://scikit-learn.org/stable/_downloads/plot_dbscan.ipynb // Generate sample data centers := mat.NewDense(3, 2, []float64{1, 1, -1, -1, 1, -1}) NSamples := 750 X, _ := datasets.MakeBlobs(&datasets.MakeBlobsConfig{NSamples: NSamples, Centers: centers, ClusterStd: .3}) X, _ = preprocessing.NewStandardScaler().FitTransform(X, nil) db := NewDBSCAN(&DBSCANConfig{Eps: .3, MinSamples: 10, Algorithm: "kd_tree"}) db.Fit(X, nil) coreSampleMask := make([]bool, len(db.Labels)) for sample := range db.CoreSampleIndices { coreSampleMask[sample] = true } labels := db.Labels labelsmap := make(map[int]int) for _, l := range labels { labelsmap[l] = l } nclusters := len(labelsmap) if _, ok := labelsmap[-1]; ok { nclusters-- } fmt.Printf("Estimated number of clusters: %d\n", nclusters) if *visualDebug { // plot result p, err := plot.New() if err != nil { panic(err) } p.Title.Text = fmt.Sprintf("Estimated number of clusters: %d", nclusters) for cl := range labelsmap { var data plotter.XYs for sample := 0; sample < NSamples; sample++ { if labels[sample] == cl { data = append(data, struct{ X, Y float64 }{X.At(sample, 0), X.At(sample, 1)}) } } s, err := plotter.NewScatter(data) if err != nil { panic(err) } var color0 color.RGBA switch cl { case -1: color0 = color.RGBA{0, 0, 0, 255} case 0: color0 = color.RGBA{176, 0, 0, 255} case 1: color0 = color.RGBA{0, 176, 0, 255} case 2: color0 = color.RGBA{0, 0, 176, 255} } s.GlyphStyle.Color = color0 s.GlyphStyle.Shape = draw.CircleGlyph{} p.Add(s) //p.Legend.Add(fmt.Sprintf("scatter %d", cl), s) } // Save the plot to a PNG file. pngfile := "/tmp/ExampleDBSCAN.png" os.Remove(pngfile) if err := p.Save(6*vg.Inch, 4*vg.Inch, pngfile); err != nil { panic(err) } cmd := exec.Command("display", pngfile) err = cmd.Start() if err != nil { fmt.Println(err.Error()) } time.Sleep(200 * time.Millisecond) os.Remove(pngfile) }
Output: Estimated number of clusters: 3
func NewDBSCAN ¶
func NewDBSCAN(config *DBSCANConfig) *DBSCAN
NewDBSCAN creates an *DBSCAN if config is nil, defaults are used defaults are Eps:.5 MinSamples:5 Metric:"euclidean" algorithm="auto" LeafSize:30 P:2 NJobs:runtime.NumCPU()
func (*DBSCAN) Fit ¶
Fit for DBSCAN X : mat.Dense of shape (n_samples, n_features) A feature array`. m.SampleWeight is used if not nil it is the Weight of each sample, such that a sample with a weight of at least “min_samples“ is by itself a core sample; a sample with negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1. Y : Ignored, may be nil
func (*DBSCAN) GetNOutputs ¶
GetNOutputs returns output columns number for Y to pass to predict
func (*DBSCAN) IsClassifier ¶
IsClassifier returns true for DBSCAN
func (*DBSCAN) PredicterClone ¶
PredicterClone for DBSCAN
type DBSCANConfig ¶
type DBSCANConfig struct { Eps float64 MinSamples float64 Metric string MetricsParam interface{} Algorithm string LeafSize int P float64 NJobs int }
DBSCANConfig is the configuration structure for NewDBSCAN
type KMeans ¶
type KMeans struct { // Required members NClusters int // Optional members NJobs int Distance func(X, Y mat.Vector) float64 // Runtime filled members Centroids *mat.Dense }
KMeans grouping algo
Example ¶
package main import ( "fmt" "image/color" "os" "os/exec" "time" "github.com/pa-m/sklearn/base" "github.com/pa-m/sklearn/datasets" "gonum.org/v1/gonum/mat" "gonum.org/v1/plot" "gonum.org/v1/plot/plotter" "gonum.org/v1/plot/vg" "gonum.org/v1/plot/vg/draw" ) var ( _ base.Predicter = &KMeans{} ) func main() { ds := datasets.LoadIris() X := ds.X NSamples, _ := X.Dims() Y := mat.NewDense(NSamples, 1, nil) kmeans := &KMeans{NClusters: 3} start := time.Now() _ = start kmeans.Fit(X, nil) kmeans.Predict(X, Y) //fmt.Printf("elapsed %s s\n", time.Since(start)) // v https://github.com/gonum/plot/wiki/Example-plots if *visualDebug { xplot, yplot := 0, 2 getData := func(value int) (scatterData plotter.XYs) { for i := 0; i < NSamples; i++ { if int(Y.At(i, 0)) == value { scatterData = append(scatterData, struct{ X, Y float64 }{X.At(i, xplot), X.At(i, yplot)}) } } return } p, err := plot.New() if err != nil { panic(err) } p.Title.Text = "kmeans" p.X.Label.Text = "X" p.Y.Label.Text = "Y" // Draw a grid behind the data p.Add(plotter.NewGrid()) for cl := 0; cl < kmeans.NClusters; cl++ { s, err := plotter.NewScatter(getData(cl)) if err != nil { panic(err) } var color0 color.RGBA switch cl { case 0: color0 = color.RGBA{176, 0, 0, 255} case 1: color0 = color.RGBA{0, 176, 0, 255} case 2: color0 = color.RGBA{0, 0, 176, 255} } s.GlyphStyle.Color = color0 p.Add(s) p.Legend.Add(fmt.Sprintf("scatter %d", cl), s) { c := kmeans.Centroids.RowView(cl) sc, err := plotter.NewScatter(plotter.XYs{{X: c.AtVec(xplot), Y: c.AtVec(yplot)}}) if err != nil { panic(err) } sc.GlyphStyle.Shape = draw.PlusGlyph{} sc.GlyphStyle.Color = color0 p.Add(sc) } } // Save the plot to a PNG file. pngfile := "/tmp/plt.png" os.Remove(pngfile) if err := p.Save(4*vg.Inch, 4*vg.Inch, pngfile); err != nil { panic(err) } cmd := exec.Command("display", pngfile) err = cmd.Start() if err != nil { fmt.Println(err.Error()) } time.Sleep(200 * time.Millisecond) os.Remove(pngfile) } }
Output:
func (*KMeans) Fit ¶
Fit compute centroids Y is useless here but we want all classifiers have the same interface. pass nil
func (*KMeans) GetNOutputs ¶
GetNOutputs returns output columns number for Y to pass to predict
func (*KMeans) IsClassifier ¶
IsClassifier returns true for KMeans
func (*KMeans) PredicterClone ¶
PredicterClone for KMeans