kmeans

package
v0.22.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 29, 2023 License: Apache-2.0 Imports: 4 Imported by: 0

Documentation

Overview

Package kmeans provides a general interface for solving kmeans clustering problems. The base interface is the Model which is a collection of points, cluster models and constraints. The interface Solver is constructed by kmeans.NewSolver. The solver can be invoked using Solver.Solve and returns a Solution.

A new Model is created:

points := []measure.Point{
		{2.5, 2.5},
		{7.5, 7.5},
		{5.0, 7.5},
	}

numberOfClusters := 2

model, err := kmeans.NewModel(points, numberOfClusters)

A Solver is created and invoked to produce a Solution:

solver, err := kmeans.NewSolver(model)

solution, err  := solver.Solve(kmeans.NewSolveOptions())

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Cluster

type Cluster interface {
	// Centroid returns the centroid of the cluster.
	Centroid() measure.Point

	// ClusterModel returns the cluster model of which the invoking
	// cluster is an instance.
	ClusterModel() ClusterModel

	// Indices returns the indices of the points in the cluster. The
	// index refers to the index of the point in the model.
	Indices() []int

	// Points returns the points in the cluster. The points are the
	// points in the model at the position Indices.
	Points() []measure.Point

	// WithinClusterSumOfSquares returns the sum of the squared
	// distances between each point and the cluster centroid.
	WithinClusterSumOfSquares() float64
}

Cluster is a cluster of points. A cluster is defined by a centroid and a set of points. The centroid is the center of the cluster.

type ClusterModel

type ClusterModel interface {
	// ExcludedPointIndices returns the points that were excluded from the
	// cluster.
	ExcludedPointIndices() []int

	// MaximumPoints returns the maximum number of points that can be
	// assigned to the cluster.
	MaximumPoints() int
	// MaximumSumValueConstraints returns the constraints on the sum of
	// the values of the points in the cluster.
	MaximumSumValueConstraints() []MaximumSumValueConstraint

	// SetExcludedPointIndices sets the points that were excluded from the
	// cluster. The indices contain the index of the point in the
	// model to be excluded.
	SetExcludedPointIndices(indices []int) error
	// SetMaximumPoints sets the maximum number of points that can be
	// assigned to the cluster.
	SetMaximumPoints(maximumPoints int)
	// SetMaximumSumValue sets the maximum value constraint for
	// the cluster. The maximum value constraint limits the number of
	// points that can be assigned to the cluster based on the value
	// of a point defined by values. Values must be the same length as
	// points in the model.
	SetMaximumSumValue(
		maximumValue int,
		values []int,
	) (MaximumSumValueConstraint, error)
}

ClusterModel is a model of a cluster.

Example
package main

import (
	"fmt"

	"github.com/nextmv-io/sdk/cluster/kmeans"
	"github.com/nextmv-io/sdk/measure"
)

func main() {
	points := []measure.Point{
		{2.5, 2.5},
		{7.5, 7.5},
		{5.0, 7.5},
	}
	// Create a model.
	model, err := kmeans.NewModel(points, 2)
	if err != nil {
		panic(err)
	}

	cm1 := model.ClusterModels()[0]
	cm2 := model.ClusterModels()[1]

	// Set maximum points in first cluster to 2.
	cm1.SetMaximumPoints(2)
	// Exclude the first point from the first cluster.
	err = cm1.SetExcludedPointIndices([]int{0})

	if err != nil {
		panic(err)
	}

	// The values of the points in the second cluster
	// must sum to 10 or less. The values of the points
	// used are 6,7 and 8 in order of the points in the model.
	msv, err := cm1.SetMaximumSumValue(
		10.0,
		[]int{6, 7, 4},
	)
	if err != nil {
		panic(err)
	}

	// Set maximum points in second cluster to 1.
	cm2.SetMaximumPoints(1)
	// Exclude the second and third point from the
	// second cluster.
	err = cm2.SetExcludedPointIndices([]int{1, 2})

	if err != nil {
		panic(err)
	}

	// Print the maximum points in the first cluster.
	fmt.Println(cm1.MaximumPoints())
	// Print the excluded point indices in the first cluster.
	fmt.Println(cm1.ExcludedPointIndices())
	// Print the maximum sum value in the first cluster.
	fmt.Println(msv.MaximumValue())
	// Print the values of the points used in the maximum
	// sum value constraint.
	fmt.Println(msv.Values())
	// Print the maximum points in the second cluster.
	fmt.Println(cm2.MaximumPoints())
	// Print the excluded point indices in the second cluster.
	fmt.Println(cm2.ExcludedPointIndices())

}
Output:

2
[0]
10
[6 7 4]
1
[1 2]

type MaximumSumValueConstraint

type MaximumSumValueConstraint interface {
	// MaximumValue returns the maximum value of the sum of the values
	// of the points in the cluster.
	MaximumValue() int
	// Values returns the values of the points in the cluster to use to
	// calculate the sum of the values.
	Values() []int
}

MaximumSumValueConstraint is a constraint on the sum of the values of points in a cluster. The values of the points are defined by the PointToInt interface.

type Model

type Model interface {
	// ClusterModels returns the cluster models. The cluster models
	// define the constraints for the clusters.
	ClusterModels() []ClusterModel

	// Points returns the points to be clustered.
	Points() []measure.Point
}

Model is a model of a k-means clustering problem.

Example
package main

import (
	"fmt"

	"github.com/nextmv-io/sdk/cluster/kmeans"
	"github.com/nextmv-io/sdk/measure"
)

func main() {
	points := []measure.Point{
		{2.5, 2.5},
		{7.5, 7.5},
		{5.0, 7.5},
	}
	// Create a model.
	model, err := kmeans.NewModel(points, 2)
	if err != nil {
		panic(err)
	}
	// Print the number of points in the model.
	fmt.Println(len(model.Points()))
	// Print the number of cluster models in the model.
	fmt.Println(len(model.ClusterModels()))

}
Output:

3
2

func NewModel

func NewModel(
	points []measure.Point,
	clusters int,
	option ...Option,
) (Model, error)

NewModel creates a new Model with the given points and number of clusters.

type Option

type Option func(Model) error

An Option configures a k-means model.

func ExcludedPoints

func ExcludedPoints(excludedPoints [][]int) Option

ExcludedPoints defines the points that are excluded from the cluster. The excludedPoints contain the index of the point in the model to be excluded for each cluster. The length of excludedPoints must be equal to the number of clusters in the model. The slice of excludedPoints for a cluster can be any size of indices of points.

func MaximumPoints

func MaximumPoints(maximumPoints []int) Option

MaximumPoints defines the maximum number of points that can be assigned to a cluster. maximumPoints must be equal to the number of slices in the model.

func MaximumSumValue

func MaximumSumValue(
	maximumValue []int,
	values [][]int,
) Option

MaximumSumValue defines the maximum value constraint for the cluster. The maximum value constraint limits the number of points that can be assigned to the cluster based on the value of a point defined by values. Values must be the same length as points in the model.

type Solution

type Solution interface {
	// Clusters returns the clusters derived from the solution.
	Clusters() []Cluster
	// Feasible returns true if the solution is feasible. A solution is
	// feasible if the solver was able to find a solution that satisfied
	// the constraints of the model.
	Feasible() bool
	// RunTime returns the time it took to derive the solution.
	RunTime() time.Duration
	// Unassigned returns the points that were not assigned to any
	// cluster.
	Unassigned() []measure.Point
	// UnassignedIndices returns the indices of the points that were
	// not assigned to any cluster.
	UnassignedIndices() []int
}

Solution is a solution to a k-means clustering problem.

type SolveOptions

type SolveOptions interface {
	// Candidates returns the number of candidate solutions to
	// consider when solving the model associated with the invoking
	// solver. Defaults to 1.
	Candidates() int
	// MaximumDuration returns the maximum duration to spend
	// solving the model associated with the invoking solver.
	// Defaults to 24 hour.
	MaximumDuration() time.Duration
	// Measure returns the measure used to calculate the distance
	// between points to derive the solution. Defaults to Euclidean
	// measure.
	Measure() measure.ByPoint

	// Random returns the random number generator used to derive the
	// solution. Defaults to a new random number generator seeded
	// with the current time.
	Random() *rand.Rand

	// SetCandidates sets the number of candidate solutions to
	// consider when solving the model associated with the invoking
	// solver. Returns the invoking solver options.
	SetCandidates(candidates int) SolveOptions
	// SetMaximumDuration sets the maximum duration to spend
	// solving the model associated with the invoking solver.
	// Returns the invoking solver options.
	SetMaximumDuration(maximumDuration time.Duration) SolveOptions
	// SetMeasure sets the measure used to calculate the distance
	// between points to derive the solution. Returns the invoking
	// solver options.
	SetMeasure(measure measure.ByPoint) SolveOptions
	// SetRandom sets the random number generator used to derive the
	// solution. Returns the invoking solver options.
	SetRandom(random *rand.Rand) SolveOptions
}

SolveOptions is a set of options that can be used to influence the behavior of a solver.

func NewSolveOptions

func NewSolveOptions() SolveOptions

NewSolveOptions returns default solver options.

type Solver

type Solver interface {
	// Solve is the entrypoint to solve the model associated with
	// the invoking solver. Returns a solution when the invoking solver
	// reaches a conclusion.
	Solve(options SolveOptions) (Solution, error)
}

Solver is the interface for a k-means solver.

Example
package main

import (
	"fmt"

	"github.com/nextmv-io/sdk/cluster/kmeans"
	"github.com/nextmv-io/sdk/measure"
)

func main() {
	points := []measure.Point{
		{2.5, 2.5},
		{7.5, 7.5},
		{5.0, 7.5},
	}
	// Create a model.
	model, err := kmeans.NewModel(points, 2)
	if err != nil {
		panic(err)
	}

	// Set maximum points in first cluster to one to make the
	// solution predictable.
	model.ClusterModels()[0].SetMaximumPoints(1)

	// Create a solver using the model.
	solver, err := kmeans.NewSolver(model)
	if err != nil {
		panic(err)
	}

	// Create solve options to configure the solver.
	solveOptions := kmeans.NewSolveOptions()

	// Solve the model using the solve options.
	solution, err := solver.Solve(solveOptions)
	if err != nil {
		panic(err)
	}

	// Print the number of clusters in the solution.
	fmt.Println(len(solution.Clusters()))
	// Print the number of unassigned points in the solution.
	fmt.Println(len(solution.Unassigned()))
	// Print the number of points in the first cluster.
	fmt.Println(len(solution.Clusters()[0].Points()))
	// Print the number of points in the second cluster.
	fmt.Println(len(solution.Clusters()[1].Points()))
	// Print the centroid of the second cluster.

}
Output:

2
0
1
2

func NewSolver

func NewSolver(model Model) (Solver, error)

NewSolver returns a new Solver.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL