cluster

package
Version: v0.0.0-...-beb861e Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 11, 2020 License: MIT Imports: 7 Imported by: 1

Documentation

Overview

Package cluster gathers popular unsupervised clustering algorithms. contains DBSCAN and KMeans.

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func EuclideanDistance

func EuclideanDistance(a, b mat.Vector) float64

EuclideanDistance is a Distancer

func MinkowskiDistanceP

func MinkowskiDistanceP(a, b mat.Vector, p float64) float64

MinkowskiDistanceP ...

Types

type DBSCAN

type DBSCAN struct {
	DBSCANConfig
	SampleWeight []float64
	// members filled by Fit
	NeighborsModel    *neighbors.NearestNeighbors
	Labels            []int
	CoreSampleIndices []int
}

DBSCAN classifier struct

Example
// adapted from http://scikit-learn.org/stable/_downloads/plot_dbscan.ipynb
// Generate sample data
centers := mat.NewDense(3, 2, []float64{1, 1, -1, -1, 1, -1})
NSamples := 750
X, _ := datasets.MakeBlobs(&datasets.MakeBlobsConfig{NSamples: NSamples, Centers: centers, ClusterStd: .3})

X, _ = preprocessing.NewStandardScaler().FitTransform(X, nil)
db := NewDBSCAN(&DBSCANConfig{Eps: .3, MinSamples: 10, Algorithm: "kd_tree"})
db.Fit(X, nil)
coreSampleMask := make([]bool, len(db.Labels))
for sample := range db.CoreSampleIndices {
	coreSampleMask[sample] = true
}
labels := db.Labels
labelsmap := make(map[int]int)
for _, l := range labels {
	labelsmap[l] = l
}
nclusters := len(labelsmap)
if _, ok := labelsmap[-1]; ok {
	nclusters--
}
fmt.Printf("Estimated number of clusters: %d\n", nclusters)

if *visualDebug {

	// plot result
	p, err := plot.New()
	if err != nil {
		panic(err)
	}
	p.Title.Text = fmt.Sprintf("Estimated number of clusters: %d", nclusters)
	for cl := range labelsmap {
		var data plotter.XYs
		for sample := 0; sample < NSamples; sample++ {
			if labels[sample] == cl {
				data = append(data, struct{ X, Y float64 }{X.At(sample, 0), X.At(sample, 1)})
			}
		}
		s, err := plotter.NewScatter(data)
		if err != nil {
			panic(err)
		}
		var color0 color.RGBA
		switch cl {
		case -1:
			color0 = color.RGBA{0, 0, 0, 255}
		case 0:
			color0 = color.RGBA{176, 0, 0, 255}
		case 1:
			color0 = color.RGBA{0, 176, 0, 255}
		case 2:
			color0 = color.RGBA{0, 0, 176, 255}
		}
		s.GlyphStyle.Color = color0
		s.GlyphStyle.Shape = draw.CircleGlyph{}
		p.Add(s)
		//p.Legend.Add(fmt.Sprintf("scatter %d", cl), s)

	}
	// Save the plot to a PNG file.
	pngfile := "/tmp/ExampleDBSCAN.png"
	os.Remove(pngfile)
	if err := p.Save(6*vg.Inch, 4*vg.Inch, pngfile); err != nil {
		panic(err)
	}
	cmd := exec.Command("display", pngfile)
	err = cmd.Start()
	if err != nil {
		fmt.Println(err.Error())
	}
	time.Sleep(200 * time.Millisecond)
	os.Remove(pngfile)
}
Output:

Estimated number of clusters: 3

func NewDBSCAN

func NewDBSCAN(config *DBSCANConfig) *DBSCAN

NewDBSCAN creates an *DBSCAN if config is nil, defaults are used defaults are Eps:.5 MinSamples:5 Metric:"euclidean" algorithm="auto" LeafSize:30 P:2 NJobs:runtime.NumCPU()

func (*DBSCAN) Fit

func (m *DBSCAN) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit for DBSCAN X : mat.Dense of shape (n_samples, n_features) A feature array`. m.SampleWeight is used if not nil it is the Weight of each sample, such that a sample with a weight of at least “min_samples“ is by itself a core sample; a sample with negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1. Y : Ignored, may be nil

func (*DBSCAN) GetNOutputs

func (m *DBSCAN) GetNOutputs() int

GetNOutputs returns output columns number for Y to pass to predict

func (*DBSCAN) IsClassifier

func (m *DBSCAN) IsClassifier() bool

IsClassifier returns true for DBSCAN

func (*DBSCAN) Predict

func (m *DBSCAN) Predict(X mat.Matrix, Ymutable mat.Mutable) *mat.Dense

Predict for DBSCAN return Labels in Y. X must me the same passed to Fit

func (*DBSCAN) PredicterClone

func (m *DBSCAN) PredicterClone() base.Predicter

PredicterClone for DBSCAN

func (*DBSCAN) Score

func (m *DBSCAN) Score(X, Y mat.Matrix) float64

Score for DBSCAN returns 1

type DBSCANConfig

type DBSCANConfig struct {
	Eps          float64
	MinSamples   float64
	Metric       string
	MetricsParam interface{}
	Algorithm    string
	LeafSize     int
	P            float64
	NJobs        int
}

DBSCANConfig is the configuration structure for NewDBSCAN

type Distance

type Distance func(a, b mat.Vector) float64

Distance has Distance(Vector,Vector)float64

func MinkowskiDistance

func MinkowskiDistance(p float64) Distance

MinkowskiDistance ...

type KMeans

type KMeans struct {
	// Required members
	NClusters int
	// Optional members
	NJobs    int
	Distance func(X, Y mat.Vector) float64
	// Runtime filled members
	Centroids *mat.Dense
}

KMeans grouping algo

Example
package main

import (
	"fmt"
	"image/color"
	"os"
	"os/exec"
	"time"

	"github.com/pa-m/sklearn/base"
	"github.com/pa-m/sklearn/datasets"
	"gonum.org/v1/gonum/mat"
	"gonum.org/v1/plot"
	"gonum.org/v1/plot/plotter"
	"gonum.org/v1/plot/vg"
	"gonum.org/v1/plot/vg/draw"
)

var (
	_ base.Predicter = &KMeans{}
)

func main() {
	ds := datasets.LoadIris()
	X := ds.X
	NSamples, _ := X.Dims()
	Y := mat.NewDense(NSamples, 1, nil)
	kmeans := &KMeans{NClusters: 3}
	start := time.Now()
	_ = start
	kmeans.Fit(X, nil)
	kmeans.Predict(X, Y)
	//fmt.Printf("elapsed %s s\n", time.Since(start))

	// v https://github.com/gonum/plot/wiki/Example-plots
	if *visualDebug {
		xplot, yplot := 0, 2
		getData := func(value int) (scatterData plotter.XYs) {
			for i := 0; i < NSamples; i++ {
				if int(Y.At(i, 0)) == value {
					scatterData = append(scatterData, struct{ X, Y float64 }{X.At(i, xplot), X.At(i, yplot)})
				}
			}
			return
		}
		p, err := plot.New()
		if err != nil {
			panic(err)
		}
		p.Title.Text = "kmeans"
		p.X.Label.Text = "X"
		p.Y.Label.Text = "Y"
		// Draw a grid behind the data
		p.Add(plotter.NewGrid())
		for cl := 0; cl < kmeans.NClusters; cl++ {
			s, err := plotter.NewScatter(getData(cl))
			if err != nil {
				panic(err)
			}
			var color0 color.RGBA
			switch cl {
			case 0:
				color0 = color.RGBA{176, 0, 0, 255}
			case 1:
				color0 = color.RGBA{0, 176, 0, 255}
			case 2:
				color0 = color.RGBA{0, 0, 176, 255}
			}
			s.GlyphStyle.Color = color0
			p.Add(s)
			p.Legend.Add(fmt.Sprintf("scatter %d", cl), s)
			{
				c := kmeans.Centroids.RowView(cl)
				sc, err := plotter.NewScatter(plotter.XYs{{X: c.AtVec(xplot), Y: c.AtVec(yplot)}})
				if err != nil {
					panic(err)
				}
				sc.GlyphStyle.Shape = draw.PlusGlyph{}
				sc.GlyphStyle.Color = color0
				p.Add(sc)
			}

		}
		// Save the plot to a PNG file.
		pngfile := "/tmp/plt.png"
		os.Remove(pngfile)
		if err := p.Save(4*vg.Inch, 4*vg.Inch, pngfile); err != nil {
			panic(err)
		}
		cmd := exec.Command("display", pngfile)
		err = cmd.Start()
		if err != nil {
			fmt.Println(err.Error())
		}
		time.Sleep(200 * time.Millisecond)
		os.Remove(pngfile)
	}
}
Output:

func (*KMeans) Fit

func (m *KMeans) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit compute centroids Y is useless here but we want all classifiers have the same interface. pass nil

func (*KMeans) GetNOutputs

func (m *KMeans) GetNOutputs() int

GetNOutputs returns output columns number for Y to pass to predict

func (*KMeans) IsClassifier

func (m *KMeans) IsClassifier() bool

IsClassifier returns true for KMeans

func (*KMeans) Predict

func (m *KMeans) Predict(X mat.Matrix, Ymutable mat.Mutable) *mat.Dense

Predict fills y with indices of centroids

func (*KMeans) PredicterClone

func (m *KMeans) PredicterClone() base.Predicter

PredicterClone for KMeans

func (*KMeans) Score

func (m *KMeans) Score(X, Y mat.Matrix) float64

Score for KMeans returns 1

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL