kmeans

package
v0.0.0-...-38d63f0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 17, 2015 License: BSD-3-Clause Imports: 3 Imported by: 2

Documentation

Overview

Package kmeans implements Lloyd's k-means clustering for ℝⁿ data.

Example
package main

import (
	"fmt"
	"strings"

	"github.com/biogo/cluster/kmeans"
)

type Feature struct {
	ID    string
	Start int
	End   int
}

func (f *Feature) Len() int { return f.End - f.Start }

type Features []*Feature

func (f Features) Len() int               { return len(f) }
func (f Features) Values(i int) []float64 { return []float64{float64(f[i].Start), float64(f[i].End)} }

var feats = []*Feature{
	{ID: "0", Start: 1, End: 1700},
	{ID: "1", Start: 2, End: 1700},
	{ID: "2", Start: 3, End: 610},
	{ID: "3", Start: 2, End: 605},
	{ID: "4", Start: 1, End: 600},
	{ID: "5", Start: 2, End: 750},
	{ID: "6", Start: 650, End: 900},
	{ID: "7", Start: 700, End: 950},
	{ID: "8", Start: 1000, End: 1700},
	{ID: "9", Start: 950, End: 1712},
	{ID: "10", Start: 1000, End: 1650},
}

// Cluster feat.Features on the basis of location where:
//
//	epsilon is allowable error, and
//	effort is number of attempts to achieve error < epsilon for any k.
func ClusterFeatures(f []*Feature, epsilon float64, effort int) (*kmeans.Kmeans, error) {
	km, err := kmeans.New(Features(f))
	if err != nil {
		return nil, err
	}

	values := km.Values()
	cut := make([]float64, len(values))
	for i, v := range values {
		v := v.V()
		l := epsilon * (v[1] - v[0])
		cut[i] = l * l
	}

	for k := 1; k <= len(f); k++ {
	ATTEMPT:
		for attempt := 0; attempt < effort; attempt++ {
			km.Seed(k)
			km.Cluster()
			centers := km.Centers()
			for i, v := range values {
				cv := centers[v.Cluster()].V()
				vv := v.V()
				dx, dy := cv[0]-vv[0], cv[1]-vv[1]
				ok := dx*dx+dy*dy < cut[i]
				if !ok {
					continue ATTEMPT
				}
			}
			return km, nil
		}
	}

	panic("cannot reach")
}

func main() {
	km, err := ClusterFeatures(feats, 0.15, 5)
	if err != nil {
		return
	}
	for ci, c := range km.Centers() {
		fmt.Printf("Cluster %d:\n", ci)
		for _, i := range c.Members() {
			f := feats[i]
			fmt.Printf("%2s %s%s\n",
				f.ID,
				strings.Repeat(" ", f.Start/20),
				strings.Repeat("-", f.Len()/20),
			)
		}
		fmt.Println()
	}

	var within float64
	for _, ss := range km.Within() {
		within += ss
	}
	fmt.Printf("betweenSS / totalSS = %.6f\n", 1-(within/km.Total()))

}
Output:
Cluster 0:
 0 ------------------------------------------------------------------------------------
 1 ------------------------------------------------------------------------------------

Cluster 1:
 2 ------------------------------
 3 ------------------------------
 4 -----------------------------
 5 -------------------------------------

Cluster 2:
 6                                 ------------
 7                                    ------------

Cluster 3:
 8                                                   -----------------------------------
 9                                                --------------------------------------
10                                                   --------------------------------

betweenSS / totalSS = 0.995335

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Kmeans

type Kmeans struct {
	// contains filtered or unexported fields
}

Kmeans implements clustering of ℝⁿ data according to the Lloyd k-means algorithm.

func New

func New(data cluster.Interface) (*Kmeans, error)

New creates a new k-means object populated with data from an Interface value, data.

func (*Kmeans) Centers

func (km *Kmeans) Centers() []cluster.Center

Centers returns the k centers determined by a previous call to Cluster.

func (*Kmeans) Cluster

func (km *Kmeans) Cluster() error

Cluster runs a clustering of the data using the k-means algorithm.

func (*Kmeans) Seed

func (km *Kmeans) Seed(k int)

Seed generates the initial means for the k-means algorithm according to the k-means++ algorithm

func (*Kmeans) SetCenters

func (km *Kmeans) SetCenters(c []cluster.Center)

SetCenters sets the locations of the centers to c.

func (*Kmeans) Total

func (km *Kmeans) Total() float64

Total calculates the total sum of squares for the data relative to the data mean.

func (*Kmeans) Values

func (km *Kmeans) Values() []cluster.Value

Values returns a slice of the values in the Kmeans.

func (*Kmeans) Within

func (km *Kmeans) Within() []float64

Within calculates the sum of squares within each cluster. Returns nil if Cluster has not been called.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL