classifiers

package
v0.0.0-...-62718c5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 15, 2021 License: MIT Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	// ErrNotClassified indicates that a document could not be classified
	ErrNotClassified = errors.New("unable to classify document")
)

Functions

func EulerDistance

func EulerDistance(X, Y, W []float64) float64

EluerDistance

func HuffmanDistance

func HuffmanDistance(X, Y, W []float64) float64

HuffmanDistance

Types

type Class

type Class struct {
	Name                    string
	Counter                 int
	Words                   map[string]types.Word
	Probability             int
	Temp_tokenProbabilities float64
}

*

  • The possible class outcomes.

type DistanceMethodType

type DistanceMethodType uint8
const (
	//Euler distance method
	DMT_EulerMethod DistanceMethodType = iota
	// Huffman
	DMT_HuffmanMethod
)

type IntentClassifier

type IntentClassifier struct {
	Feat2cat  map[string]map[string]int
	CatCount  map[string]int
	Mu        sync.RWMutex
	Tokenizer string
}

func NewIntentClassifier

func NewIntentClassifier() *IntentClassifier

New initializes a new naive Classifier using the standard tokenizer

func (*IntentClassifier) Classify

func (c *IntentClassifier) Classify(r string) (string, error)

Classify attempts to classify a document. If the document cannot be classified (eg. because the classifier has not been trained), an error is returned.

func (*IntentClassifier) Load

func (c *IntentClassifier) Load(filePath string) error

Load from the output file.

func (*IntentClassifier) Save

func (c *IntentClassifier) Save(file string) error

save to a file

func (*IntentClassifier) Train

func (c *IntentClassifier) Train(r string, category string) error

Train provides supervisory training to the classifier

type KNearestNeighbors

type KNearestNeighbors struct {
	/**
	 * The number of neighbors to consider when making a prediction.
	 *
	 * @var int
	 */
	K int

	/**
	 * The distance function to use when computing the distances.
	 *
	 */
	DistanceMethod DistanceMethodType

	/**
	 * Should we use the inverse distances as confidence scores when
	 * making predictions?
	 * Weight define the weight vector for multi-dimension data
	 */
	Weight []float64

	// The training samples that make up the neighborhood of the problem space.
	Samples [][]float64

	//The memoized labels of the training set.
	Labels []string
}

*

  • K Nearest Neighbors *

  • A distance-based algorithm that locates the K nearest neighbors from the

  • training set and uses a majority vote to classify the unknown sample. K

  • Nearest Neighbors is considered a lazy learning Estimator because it does all

  • of its computation at prediction time. *

  • @category Machine Learning

  • @author Bruce Mubangwa

    **usage

train := [][]float64{
		[]float64{5.3, 3.7},
		[]float64{5.1, 3.8},
		[]float64{7.2, 3},
		[]float64{5.4, 3.4},
		[]float64{5.1, 3.3},
		[]float64{5.4, 3.9},
		[]float64{7.4, 2.8},
		[]float64{6.1, 2.8},
		[]float64{7.3, 2.9},
		[]float64{6, 2.7},
		[]float64{5.8, 2.8},
		[]float64{6.3, 2.3},
		[]float64{5.1, 2.5},
		[]float64{6.3, 2.5},
		[]float64{5.5, 2.4},
	}

	labels := []string{
		"Setosa",
		"Setosa",
		"Virginica",
		"Setosa",
		"Setosa",
		"Setosa",
		"Virginica",
		"Versicolor",
		"Virginica",
		"Versicolor",
		"Virginica",
		"Versicolor",
		"Versicolor",
		"Versicolor",
		"Versicolor",
	}
	dm := classifiers.DMT_EulerMethod
	w := []float64{0.5, 0.5}

	var test [][]float64

	test = [][]float64{
		[]float64{5.2, 3.1},
	}
	for i := 1; i < 12; i++ {

		knn := classifiers.NewKNearestNeighbors(i, dm, w)
		knn.LearnBatch(train, labels)
		res := knn.Classify(test)
		if res[0] != "Setosa" {
			fmt.Printf("k = %d failed", i)
			fmt.Println()
		}
	}

func NewKNearestNeighbors

func NewKNearestNeighbors(k int, dm DistanceMethodType, w []float64) *KNearestNeighbors

func (*KNearestNeighbors) Classify

func (k *KNearestNeighbors) Classify(test [][]float64) []string

*

  • Make predictions from a dataset.
  • Classify, train is an nxp matrix, where n denotes the n training data,
  • and p represents the number of attributes. The test is an mxp matrix.

func (*KNearestNeighbors) LearnBatch

func (k *KNearestNeighbors) LearnBatch(train [][]float64, label []string)

*

  • Store the sample and outcome arrays. No other work to be done as this is
  • a lazy learning algorithm. *

func (*KNearestNeighbors) Load

func (k *KNearestNeighbors) Load(filePath string) error

Load from the output file.

func (*KNearestNeighbors) Save

func (k *KNearestNeighbors) Save(file string) error

save to a file

type NaiveBayes

type NaiveBayes struct {
	// contains filtered or unexported fields
}

`bayes`: A Naive-Bayes classifier for Go

`bayes` takes a document (piece of text), and tells you what category that document belongs to.

## What can I use this for?

You can use this for categorizing any text content into any arbitrary set of **categories**. For example:

- is an email **spam**, or **not spam** ? - is a news article about **technology**, **politics**, or **sports** ? - is a piece of text expressing **positive** emotions, or **negative** emotions?

## Usage

    classifier := classifiers.NewNaiveBayes()

	classifier.Learn("amazing, awesome movie!! Yeah!! Oh boy.", "positive")
	classifier.Learn("Sweet, this is incredibly, amazing, perfect, great!!", "positive")
	classifier.Learn("terrible, shitty thing. Damn. Sucks!!", "negative")

	fmt.Println(classifier.Classify("awesome, cool shitty thing"))

func NewNaiveBayes

func NewNaiveBayes() *NaiveBayes

func (*NaiveBayes) Classify

func (nb *NaiveBayes) Classify(text string) (string, float64)

*

  • Determine what category or class `text` belongs to.

func (*NaiveBayes) GobDecode

func (nb *NaiveBayes) GobDecode(data []byte) error

GobDecode implements GoDecoder.

func (*NaiveBayes) GobEncode

func (nb *NaiveBayes) GobEncode() ([]byte, error)

GobEncode implements GobEncoder. This is necessary because RNN contains several unexported fields. It would be easier to simply export them by changing to uppercase, but for comparison purposes, I wanted to keep the field names the same between Go and the original Python code.

func (*NaiveBayes) Learn

func (nb *NaiveBayes) Learn(text, class string)

*

  • train our naive-bayes classifier by telling it what `category`
  • the `text` corresponds to.

func (*NaiveBayes) LearnDocument

func (nb *NaiveBayes) LearnDocument(document types.Document, class string)

func (*NaiveBayes) LearnSentence

func (nb *NaiveBayes) LearnSentence(sentence types.Sentence, class string)

func (*NaiveBayes) Load

func (nb *NaiveBayes) Load(filePath string) error

Load from the output file.

func (*NaiveBayes) Save

func (nb *NaiveBayes) Save(file string) error

type SortedDistance

type SortedDistance struct {
	Idx  []int
	Dist []float64

	Cur int
}

SortedDistance

func NewSortedDistance

func NewSortedDistance(size int) *SortedDistance

NewSortedDistance initial the SortedDistance with the size

func (*SortedDistance) GetIdx

func (s *SortedDistance) GetIdx() []int

GetIdx

func (*SortedDistance) Len

func (s *SortedDistance) Len() int

Len

func (*SortedDistance) Less

func (s *SortedDistance) Less(i, j int) bool

Less return true if [i] < [j].

func (*SortedDistance) Put

func (s *SortedDistance) Put(idx int, dist float64)

Put

func (*SortedDistance) SelectTopKIdx

func (s *SortedDistance) SelectTopKIdx(k int) []int

SelectTopKIdx return the index of the train

func (*SortedDistance) Swap

func (s *SortedDistance) Swap(i, j int)

Swap

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL