Documentation ¶
Overview ¶
Package sticker provides a framework for multi-label classification.
Index ¶
- Variables
- func Abs32(x float32) float32
- func AvgTotalVariationAmongSparseVectors(svs SparseVectors) float32
- func Ceil32(x float32) float32
- func ClassifyAllToBinaryClass(Z []float32) []bool
- func ClassifyToBinaryClass(z float32) bool
- func DecodeDataset(ds *Dataset, r io.Reader) error
- func DecodeDatasetWithGobDecoder(ds *Dataset, decoder *gob.Decoder) error
- func DecodeJaccardHashing(hashing *JaccardHashing, r io.Reader) error
- func DecodeJaccardHashingWithGobDecoder(hashing *JaccardHashing, decoder *gob.Decoder) error
- func DecodeLabelConst(model *LabelConst, r io.Reader) error
- func DecodeLabelConstWithGobDecoder(model *LabelConst, decoder *gob.Decoder) error
- func DecodeLabelNear(model *LabelNear, r io.Reader) error
- func DecodeLabelNearWithGobDecoder(model *LabelNear, decoder *gob.Decoder) error
- func DecodeLabelNearest(model *LabelNearest, r io.Reader) error
- func DecodeLabelNearestWithGobDecoder(model *LabelNearest, decoder *gob.Decoder) error
- func DecodeLabelOne(model *LabelOne, r io.Reader) error
- func DecodeLabelOneWithGobDecoder(model *LabelOne, decoder *gob.Decoder) error
- func DotCount(x, y FeatureVector) (float32, int)
- func EncodeDataset(ds *Dataset, w io.Writer) error
- func EncodeDatasetWithGobEncoder(ds *Dataset, encoder *gob.Encoder) error
- func EncodeJaccardHashing(hashing *JaccardHashing, w io.Writer) error
- func EncodeJaccardHashingWithGobEncoder(hashing *JaccardHashing, encoder *gob.Encoder) error
- func EncodeLabelConst(model *LabelConst, w io.Writer) error
- func EncodeLabelConstWithGobEncoder(model *LabelConst, encoder *gob.Encoder) error
- func EncodeLabelNear(model *LabelNear, w io.Writer) error
- func EncodeLabelNearWithGobEncoder(model *LabelNear, encoder *gob.Encoder) error
- func EncodeLabelNearest(model *LabelNearest, w io.Writer) error
- func EncodeLabelNearestWithGobEncoder(model *LabelNearest, encoder *gob.Encoder) error
- func EncodeLabelOne(model *LabelOne, w io.Writer) error
- func EncodeLabelOneWithGobEncoder(model *LabelOne, encoder *gob.Encoder) error
- func Exp32(x float32) float32
- func Floor32(x float32) float32
- func HashUint32(x uint32) uint32
- func IdealDCG(K uint) float32
- func Inf32(sign int) float32
- func InvertRanks(labelRanks LabelVector) map[uint32]int
- func IsInf32(f float32, sign int) bool
- func IsNaN32(f float32) (is bool)
- func Log32(x float32) float32
- func LogBinary32(x float32) float32
- func Modf32(x float32) (i, f float32)
- func NaN32() float32
- func Pow32(x, y float32) float32
- func ReportMaxPrecision(Y LabelVectors, K uint) []float32
- func ReportNDCG(Y LabelVectors, K uint, Yhat LabelVectors) []float32
- func ReportPrecision(Y LabelVectors, K uint, Yhat LabelVectors) []float32
- func Sqrt32(x float32) float32
- func SummarizeFloat32Slice(x []float32) (min, q25, med, q75, max, avg float32)
- type BinaryClassifier
- func (bc *BinaryClassifier) Predict(x FeatureVector) float32
- func (bc *BinaryClassifier) PredictAll(X FeatureVectors) []float32
- func (bc *BinaryClassifier) PredictAndCount(x FeatureVector) (float32, uint32)
- func (bc *BinaryClassifier) PredictAndCountAll(X FeatureVectors) ([]float32, []uint32)
- func (bc *BinaryClassifier) ReportPerformance(X FeatureVectors, Y []bool) (tn, fn, fp, tp uint, predVals []float32, Yhat []bool)
- type BinaryClassifierTrainer
- type Dataset
- type FeatureVector
- type FeatureVectors
- type Float32Slice
- type JaccardHashing
- func (hashing *JaccardHashing) Add(vec FeatureVector, i uint32)
- func (hashing *JaccardHashing) FindNears(vec FeatureVector) KeyCountMap32
- func (hashing *JaccardHashing) GobEncode() ([]byte, error)
- func (hashing *JaccardHashing) Hash(vec FeatureVector) []uint32
- func (hashing *JaccardHashing) K() uint
- func (hashing *JaccardHashing) L() uint
- func (hashing *JaccardHashing) R() uint
- func (hashing *JaccardHashing) ResetRng()
- func (hashing *JaccardHashing) Summary() (backetUsage []int, backetHist []int)
- type KeyCount32
- type KeyCountMap32
- type KeyCounts32
- type KeyValue32
- type KeyValues32
- type KeyValues32OrderedByKey
- type KeyValues32OrderedByValue
- type LabelConst
- type LabelNear
- func (model *LabelNear) FindNears(x FeatureVector, c, S uint, beta float32) KeyValues32
- func (model *LabelNear) GobEncode() ([]byte, error)
- func (model *LabelNear) Predict(x FeatureVector, K, c, S uint, alpha, beta float32) (LabelVector, map[uint32]float32, KeyValues32)
- func (model *LabelNear) PredictAll(X FeatureVectors, K, c, S uint, alpha, beta float32) LabelVectors
- type LabelNearParameters
- type LabelNearest
- func (model *LabelNearest) FindNearests(x FeatureVector, S uint, beta float32) KeyValues32
- func (model *LabelNearest) FindNearestsWithContext(x FeatureVector, S uint, beta float32, ctx LabelNearestContext) KeyValues32
- func (model *LabelNearest) GobEncode() ([]byte, error)
- func (model *LabelNearest) NewContext() LabelNearestContext
- func (model *LabelNearest) Predict(x FeatureVector, K, S uint, alpha, beta float32) (LabelVector, map[uint32]float32, KeyValues32)
- func (model *LabelNearest) PredictAll(X FeatureVectors, K, S uint, alpha, beta float32) LabelVectors
- func (model *LabelNearest) PredictWithContext(x FeatureVector, K, S uint, alpha, beta float32, ctx LabelNearestContext) (LabelVector, map[uint32]float32, KeyValues32)
- type LabelNearestContext
- type LabelOne
- type LabelOneParameters
- type LabelVector
- type LabelVectors
- type SimCountPair
- type SparseVector
- type SparseVectors
Constants ¶
This section is empty.
Variables ¶
var BinaryClassifierTrainers = map[string]BinaryClassifierTrainer{ "L1Logistic_PrimalSGD": BinaryClassifierTrainer_L1Logistic_PrimalSGD, "L1SVC_PrimalSGD": BinaryClassifierTrainer_L1SVC_PrimalSGD, }
BinaryClassifierTrainers is the map from the binary classifier trainer name to the corresponding binary classifier trainer.
Functions ¶
func AvgTotalVariationAmongSparseVectors ¶
func AvgTotalVariationAmongSparseVectors(svs SparseVectors) float32
AvgTotalVariationAmongSparseVectors returns the average total-variation distance among the sparse vectors. This function returns 0.0 if there is at most one sparse vector, and returns NaN if some sparse vectors are empty.
func ClassifyAllToBinaryClass ¶
ClassifyAllToBinaryClass returns the bool slice whose entry indicates positive label if the corresponding z value is positive, otherwise negative label.
func ClassifyToBinaryClass ¶
ClassifyToBinaryClass returns true indicating positive label if the z value is positive, otherwise false indicating negative label.
func DecodeDataset ¶
DecodeDataset decodes Dataset from r. Directly passing *os.File used by a gob.Decoder to this function causes mysterious errors. Thus, if users use gob.Decoder, then they should call DecodeDatasetWithGobDecoder.
This function returns an error in decoding.
func DecodeDatasetWithGobDecoder ¶
DecodeDatasetWithGobDecoder decodes Dataset using decoder.
This function returns an error in decoding.
func DecodeJaccardHashing ¶
func DecodeJaccardHashing(hashing *JaccardHashing, r io.Reader) error
DecodeJaccardHashing decodes Dataset from r. Directly passing *os.File used by a gob.Decoder to this function causes mysterious errors. Thus, if users use gob.Decoder, then they should call DecodeJaccardHashingWithGobDecoder.
This function returns an error in decoding.
func DecodeJaccardHashingWithGobDecoder ¶
func DecodeJaccardHashingWithGobDecoder(hashing *JaccardHashing, decoder *gob.Decoder) error
DecodeJaccardHashingWithGobDecoder decodes JaccardHashing using decoder.
This function returns an error in decoding.
func DecodeLabelConst ¶
func DecodeLabelConst(model *LabelConst, r io.Reader) error
DecodeLabelConst decodes LabelConst from r. Directly passing *os.File used by a gob.Decoder to this function causes mysterious errors. Thus, if users use gob.Decoder, then they should call DecodeLabelConstWithGobDecoder.
This function returns an error in decoding.
func DecodeLabelConstWithGobDecoder ¶
func DecodeLabelConstWithGobDecoder(model *LabelConst, decoder *gob.Decoder) error
DecodeLabelConstWithGobDecoder decodes LabelConst using decoder.
This function returns an error in decoding.
func DecodeLabelNear ¶
DecodeLabelNear decodes LabelNear from r. Directly passing *os.File used by a gob.Decoder to this function causes mysterious errors. Thus, if users use gob.Decoder, then they should call DecodeLabelNearWithGobDecoder.
This function returns an error in decoding.
func DecodeLabelNearWithGobDecoder ¶
DecodeLabelNearWithGobDecoder decodes LabelNear using decoder.
This function returns an error in decoding.
func DecodeLabelNearest ¶
func DecodeLabelNearest(model *LabelNearest, r io.Reader) error
DecodeLabelNearest decodes LabelNearest from r. Directly passing *os.File used by a gob.Decoder to this function causes mysterious errors. Thus, if users use gob.Decoder, then they should call DecodeLabelNearestWithGobDecoder.
This function returns an error in decoding.
func DecodeLabelNearestWithGobDecoder ¶
func DecodeLabelNearestWithGobDecoder(model *LabelNearest, decoder *gob.Decoder) error
DecodeLabelNearestWithGobDecoder decodes LabelNearest using decoder.
This function returns an error in decoding.
func DecodeLabelOne ¶
DecodeLabelOne decodes LabelOne from r. Directly passing *os.File used by a gob.Decoder to this function causes mysterious errors. Thus, if users use gob.Decoder, then they should call DecodeLabelOneWithGobDecoder.
This function returns an error in decoding.
func DecodeLabelOneWithGobDecoder ¶
DecodeLabelOneWithGobDecoder decodes LabelOne using decoder.
This function returns an error in decoding.
func DotCount ¶
func DotCount(x, y FeatureVector) (float32, int)
DotCount returns the inner product and the size of the intersect of the supports between x and y.
func EncodeDataset ¶
EncodeDataset encodes Dataset to w. Directly passing *os.File used by a gob.Encoder to this function causes mysterious errors. Thus, if users use gob.Encoder, then they should call EncodeDatasetWithGobEncoder.
This function returns an error in encoding.
func EncodeDatasetWithGobEncoder ¶
EncodeDatasetWithGobEncoder decodes Dataset using encoder.
This function returns an error in decoding.
func EncodeJaccardHashing ¶
func EncodeJaccardHashing(hashing *JaccardHashing, w io.Writer) error
EncodeJaccardHashing encodes JaccardHashing to w. Directly passing *os.File used by a gob.Encoder to this function causes mysterious errors. Thus, if users use gob.Encoder, then they should call EncodeJaccardHashingWithGobEncoder.
This function returns an error in encoding.
func EncodeJaccardHashingWithGobEncoder ¶
func EncodeJaccardHashingWithGobEncoder(hashing *JaccardHashing, encoder *gob.Encoder) error
EncodeJaccardHashingWithGobEncoder decodes JaccardHashing using encoder.
This function returns an error in decoding.
func EncodeLabelConst ¶
func EncodeLabelConst(model *LabelConst, w io.Writer) error
EncodeLabelConst encodes LabelConst to w. Directly passing *os.File used by a gob.Encoder to this function causes mysterious errors. Thus, if users use gob.Encoder, then they should call EncodeLabelBoostWithGobEncoder.
This function returns an error in encoding.
func EncodeLabelConstWithGobEncoder ¶
func EncodeLabelConstWithGobEncoder(model *LabelConst, encoder *gob.Encoder) error
EncodeLabelConstWithGobEncoder decodes LabelConst using encoder.
This function returns an error in decoding.
func EncodeLabelNear ¶
EncodeLabelNear encodes LabelNear to w. Directly passing *os.File used by a gob.Encoder to this function causes mysterious errors. Thus, if users use gob.Encoder, then they should call EncodeLabelNearWithGobEncoder.
This function returns an error in encoding.
func EncodeLabelNearWithGobEncoder ¶
EncodeLabelNearWithGobEncoder decodes LabelNear using encoder.
This function returns an error in decoding.
func EncodeLabelNearest ¶
func EncodeLabelNearest(model *LabelNearest, w io.Writer) error
EncodeLabelNearest encodes LabelNearest to w. Directly passing *os.File used by a gob.Encoder to this function causes mysterious errors. Thus, if users use gob.Encoder, then they should call EncodeLabelNearestWithGobEncoder.
This function returns an error in encoding.
func EncodeLabelNearestWithGobEncoder ¶
func EncodeLabelNearestWithGobEncoder(model *LabelNearest, encoder *gob.Encoder) error
EncodeLabelNearestWithGobEncoder decodes LabelNearest using encoder.
This function returns an error in decoding.
func EncodeLabelOne ¶
EncodeLabelOne encodes LabelOne to w. Directly passing *os.File used by a gob.Encoder to this function causes mysterious errors. Thus, if users use gob.Encoder, then they should call EncodeLabelOneWithGobEncoder.
This function returns an error in encoding.
func EncodeLabelOneWithGobEncoder ¶
EncodeLabelOneWithGobEncoder decodes LabelOne using encoder.
This function returns an error in decoding.
func HashUint32 ¶
HashUint32 returns the hashed value of the given uint32 x. This comes from MurmurHash3 fmix32 (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp).
func IdealDCG ¶
IdealDCG returns the calculated ideal DCG. Ideal DCG@K is defined as \sum_{k=1}^K 1/log_2(1+k), which is the maximum of possible DCG@K values. These value are cached persistently.
Ideal DCG@0 is undefined, so this function returns NaN.
func InvertRanks ¶
func InvertRanks(labelRanks LabelVector) map[uint32]int
InvertRanks returns the inverted ranking list.
func LogBinary32 ¶
LogBinary32 is the float32-version of math.Log2.
func ReportMaxPrecision ¶
func ReportMaxPrecision(Y LabelVectors, K uint) []float32
ReportMaxPrecision reports the maximum Precision@K value of each label vector in Y.
func ReportNDCG ¶
func ReportNDCG(Y LabelVectors, K uint, Yhat LabelVectors) []float32
ReportNDCG reports the nDCG@K (normalized DCG@K) value of each label vector in Y.
nDCG@0 is undefined, so this function returns a slice filled with NaN.
NOTICE: The maximum nDCG@K is always 1.0, because nDCG@K is normalized.
func ReportPrecision ¶
func ReportPrecision(Y LabelVectors, K uint, Yhat LabelVectors) []float32
ReportPrecision reports the Precision@K value of each label vector in Y.
func SummarizeFloat32Slice ¶
SummarizeFloat32Slice returns the 5-summary(minimum, 1st quantile, median, 3rd quantile and maximum) and the average of the given float32 slice.
Types ¶
type BinaryClassifier ¶
type BinaryClassifier struct { // Bias is the bias parameter. Bias float32 // Weight is the weight parameter. Weight SparseVector // The following members are not required. // // Beta is used by some solvers (using dual problems) as the optimization target. // Weight can be expressed as the sum of y_ix_i weighted with the corresponding elements of Beta. Beta []float32 }
BinaryClassifier is the data structure having information about binary classifiers.
BinaryClassifier classifies the given entry x as positive if dot(Weight, x) + Bias > 0, otherwise as negative.
func BinaryClassifierTrainer_L1Logistic_PrimalSGD ¶
func BinaryClassifierTrainer_L1Logistic_PrimalSGD(X FeatureVectors, Y []bool, C, epsilon float32, debug *log.Logger) (*BinaryClassifier, error)
BinaryClassifierTrainer_L1Logistic_PrimalSGD returns an trained BinaryClassifier with FTRL-Proximal (McMahan+ 2013) method for L1-penalized logistic regression. This can be used for estimating the probability which the given data point belongs to the positive class, and this algorithm would produce the smaller model.
This function returns no error currently.
References:
(McMahan+ 2013) H. B. McMahan, et al. "Ad Click Prediction: a View from the Trenches." Proceedings of the 19th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, 2013.
func BinaryClassifierTrainer_L1SVC_PrimalSGD ¶
func BinaryClassifierTrainer_L1SVC_PrimalSGD(X FeatureVectors, Y []bool, C, epsilon float32, debug *log.Logger) (*BinaryClassifier, error)
BinaryClassifierTrainer_L1SVC_PrimalSGD trains a L1-Support Vector Classifier with primal stochastic gradient descent. This is registered to BinaryClassifierTrainers.
The used update procedure is the one used by Online Passive-Aggressive Algorithm (Crammer+ 2006) with the dynamic penalty parameter depending on the round number t. This update is proven to be safe, that is, this leads to sane results even when the learning rate is large (Karampatziakis+ 2011, SubSection 4.2). Thus, although we fix the eta0 as 1.0 and the learning rate as eta0 / t, this algorithm is enough fast and accurate.
This function returns no error currently.
Reference:
(Crammer+ 2006) K.Crammer, O. Dekel, J. Keshet, S. Shalev-Shwarts, and Y. Singer. "Online Passive-Aggressive Algorithms." Journal of Machine Learning Research, vol. 7, pp. 551-585, 2006.
(Karampatziakis+ 2011) N. Karampatziakis, and J. Langford, "Online Importance Weight Aware Updates." Association for Uncertainty in Artificial Intelligence, 2011.
func (*BinaryClassifier) Predict ¶
func (bc *BinaryClassifier) Predict(x FeatureVector) float32
Predict returns the predicted value dot(Weight, x) + Bias.
func (*BinaryClassifier) PredictAll ¶
func (bc *BinaryClassifier) PredictAll(X FeatureVectors) []float32
PredictAll returns the predicted values dot(Weight, x) + Bias for each feature vector.
func (*BinaryClassifier) PredictAndCount ¶
func (bc *BinaryClassifier) PredictAndCount(x FeatureVector) (float32, uint32)
PredictAndCount returns the predicted value dot(Weight, x) + Bias and the splitter count (the number of times the splitter hits).
func (*BinaryClassifier) PredictAndCountAll ¶
func (bc *BinaryClassifier) PredictAndCountAll(X FeatureVectors) ([]float32, []uint32)
PredictAndCountAll returns the predicted values dot(Weight, x) + Bias and the splitter count for each feature vector.
func (*BinaryClassifier) ReportPerformance ¶
func (bc *BinaryClassifier) ReportPerformance(X FeatureVectors, Y []bool) (tn, fn, fp, tp uint, predVals []float32, Yhat []bool)
ReportPerformance returns the true-negative/false-negative/false-positive/true-positive and the predicted values on X.
type BinaryClassifierTrainer ¶
type BinaryClassifierTrainer func(X FeatureVectors, Y []bool, C, epsilon float32, debug *log.Logger) (*BinaryClassifier, error)
BinaryClassifierTrainer is the type of binary classifier trainers. A trainer returns a new BinaryClassifier on X and Y. C is the inverse of the penalty parameter. epsilon is the tolerance parameter for checking the convergence. debug is used for debug logs.
type Dataset ¶
type Dataset struct { X FeatureVectors Y LabelVectors }
Dataset is a collection of the pair of one feature vector and one label vector.
func ReadTextDataset ¶
ReadTextDataset returns a new Dataset from reader.
The data from reader is assumed to be formatted like used in LIBLINEAR and LIBSVM. It is a plain text whose cells are separated by single space. The first line has the number of entries, features, and labels. The remaining lines have entries of the dataset. Each entry is encoded in one line like (comma-separated label) (feature:value)*.
This function returns an error in reading the dataset.
func (*Dataset) FeatureSubSet ¶
FeatureSubSet returns the sub-set of the dataset whose entry has only features in the given set of features. For efficiency, the label vectors of sub-dataset has references to the one of the dataset.
func (*Dataset) GobEncode ¶
GobEncode returns the error always, because users should encode large Dataset objects with EncodeDataset.
type FeatureVector ¶
type FeatureVector = KeyValues32OrderedByKey
FeatureVector is the sparse and static feature vector. The elements should be ordered by feature ID (key).
type FeatureVectors ¶
type FeatureVectors []FeatureVector
FeatureVectors is the FeatureVector slice.
func (FeatureVectors) Dim ¶
func (X FeatureVectors) Dim() (d int)
Dim returns the calculated dimension of FeatureVectors. This is the maximum feature key plus 1.
type Float32Slice ¶
type Float32Slice []float32
Float32Slice implements the interface sort.Interface.
func (Float32Slice) Len ¶
func (x Float32Slice) Len() int
func (Float32Slice) Less ¶
func (x Float32Slice) Less(i, j int) bool
func (Float32Slice) Swap ¶
func (x Float32Slice) Swap(i, j int)
type JaccardHashing ¶
type JaccardHashing struct {
// contains filtered or unexported fields
}
JaccardHashing is the optimal Densified One Permutation Hashing (DOPH) for estimating Jaccard similarity (Wang+ 2017).
References:
(Wang+ 2017) Y. Wang, A. Shrivastava, and J. Ryu. "FLASH: Randomized Algorithms Accelerated over CPU-GPU for Ultra-High Dimensional Similarity Search." arXiv preprint arXiv:1709.01190, 2017.
func NewJaccardHashing ¶
func NewJaccardHashing(K, L, R uint) *JaccardHashing
NewJaccardHashing returns an new JaccardHashing. Recommended K, L and R are 64, 16 and 64, respectively.
func (*JaccardHashing) Add ¶
func (hashing *JaccardHashing) Add(vec FeatureVector, i uint32)
Add adds the given feature vector as i-th index to the hash tables.
func (*JaccardHashing) FindNears ¶
func (hashing *JaccardHashing) FindNears(vec FeatureVector) KeyCountMap32
FindNears returns the histogram of the neighbors from the given feature vector.
func (*JaccardHashing) GobEncode ¶
func (hashing *JaccardHashing) GobEncode() ([]byte, error)
GobEncode returns the error always, because users should encode large JaccardHashing objects with EncodeJaccardHashing.
func (*JaccardHashing) Hash ¶
func (hashing *JaccardHashing) Hash(vec FeatureVector) []uint32
Hash returns the K hashed values of the given feature vector.
func (*JaccardHashing) K ¶
func (hashing *JaccardHashing) K() uint
K returns the number of the hash tables.
func (*JaccardHashing) L ¶
func (hashing *JaccardHashing) L() uint
L returns the bit-width for backet indices in each hash table.
func (*JaccardHashing) R ¶
func (hashing *JaccardHashing) R() uint
R returns the size of a reservoir of each backet.
func (*JaccardHashing) ResetRng ¶
func (hashing *JaccardHashing) ResetRng()
ResetRng resets the internal random number generator.
func (*JaccardHashing) Summary ¶
func (hashing *JaccardHashing) Summary() (backetUsage []int, backetHist []int)
Summary returns the slice of bucket usage and the bucket size (size of each reservoir) histogram.
type KeyCount32 ¶
type KeyCount32 struct {
Key, Count uint32
}
KeyCount32 is the pair of uint32 feature key and its uint32 value.
type KeyCountMap32 ¶
type KeyCountMap32 KeyCounts32
KeyCountMap32 is the faster map of KeyCount32s. This cannot has entries with value 0.
Currently, this does not support expansion at insertion. Users can iterate the entries with raw access to the internal, so this is for expert use in order to achieve faster counting.
func NewKeyCountMap32 ¶
func NewKeyCountMap32(capacity uint) KeyCountMap32
NewKeyCountMap32 returns a new KeyCountMap32.
func (KeyCountMap32) Get ¶
func (m KeyCountMap32) Get(key uint32) KeyCount32
Get returns the entry with the given key.
func (KeyCountMap32) Inc ¶
func (m KeyCountMap32) Inc(key uint32) KeyCount32
Inc increments the entry's value with the given key, and returns the entry.
func (KeyCountMap32) Map ¶
func (m KeyCountMap32) Map() map[uint32]uint32
Map returns the map version of self.
type KeyCounts32 ¶
type KeyCounts32 []KeyCount32
KeyCounts32 is the slice of KeyCount32.
func (KeyCounts32) ExtractLargestCountsByInsert ¶
func (kcs KeyCounts32) ExtractLargestCountsByInsert(K uint) KeyCounts32
ExtractLargestCountsByInsert returns the only K largest entries.
func (KeyCounts32) SortLargestCountsWithHeap ¶
func (kcs KeyCounts32) SortLargestCountsWithHeap(K uint) KeyCounts32
SortLargestCountsWithHeap sorts the only K largest entries at the first as maintaining the heap, and returns the shrunk slice to the self.
type KeyValue32 ¶
KeyValue32 is the pair of uint32 feature key and its float32 value.
type KeyValues32OrderedByKey ¶
type KeyValues32OrderedByKey KeyValues32
KeyValues32OrderedByKey is KeyValues32 implementing sort.Interface for sorting in the key order. If the keys are same, then these key-values are sorted in increasing value order.
func (KeyValues32OrderedByKey) Len ¶
func (kvs KeyValues32OrderedByKey) Len() int
func (KeyValues32OrderedByKey) Less ¶
func (kvs KeyValues32OrderedByKey) Less(i, j int) bool
func (KeyValues32OrderedByKey) Swap ¶
func (kvs KeyValues32OrderedByKey) Swap(i, j int)
type KeyValues32OrderedByValue ¶
type KeyValues32OrderedByValue KeyValues32
KeyValues32OrderedByValue is KeyValues32 implementing sort.Interface for sorting in the value order. If the values are same, then these key-values are sorted in decreasing key order, because this is intended for sorting in decreasing key/value order in reverse mode.
func (KeyValues32OrderedByValue) Len ¶
func (kvs KeyValues32OrderedByValue) Len() int
func (KeyValues32OrderedByValue) Less ¶
func (kvs KeyValues32OrderedByValue) Less(i, j int) bool
func (KeyValues32OrderedByValue) Swap ¶
func (kvs KeyValues32OrderedByValue) Swap(i, j int)
type LabelConst ¶
type LabelConst struct { // LabelList and LabelFreqList are the label and its frequency list in descending order in the training set occurrences. LabelList LabelVector LabelFreqList []float32 }
LabelConst is the multi-label constant model.
func TrainLabelConst ¶
func TrainLabelConst(ds *Dataset, debug *log.Logger) (*LabelConst, error)
TrainLabelConst returns an trained LabelConst on the given dataset ds.
func (*LabelConst) GobEncode ¶
func (model *LabelConst) GobEncode() ([]byte, error)
GobEncode returns the error always, because users should encode large LabelConst objects with EncodeLabelConst.
func (*LabelConst) PredictAll ¶
func (model *LabelConst) PredictAll(X FeatureVectors, K uint) LabelVectors
PredictAll returns the top-K labels for each data entry in X.
type LabelNear ¶
type LabelNear struct { // Dataset is the training dataset. Dataset *Dataset // Hashing is the Jaccard hashing. Hashing *JaccardHashing }
LabelNear is a faster implementation of LabelNearest which uses the optimal Densified One Permutation Hashing (DOPH) and the reservoir sampling (Wang+ 2017).
References:
(Wang+ 2017) Y. Wang, A. Shrivastava, and J. Ryu. "FLASH: Randomized Algorithms Accelerated over CPU-GPU for Ultra-High Dimensional Similarity Search." arXiv preprint arXiv:1709.01190, 2017.
func TrainLabelNear ¶
func TrainLabelNear(ds *Dataset, params *LabelNearParameters, debug *log.Logger) (*LabelNear, error)
TrainLabelNear returns an trained LabelNear on the given training dataset ds.
Currently, this function returns no error.
func (*LabelNear) FindNears ¶
func (model *LabelNear) FindNears(x FeatureVector, c, S uint, beta float32) KeyValues32
FindNears returns the S near entries with each similarity for the given entry. The quantity c*S is used for sieving the candidates by hashing. See Predict for hyper-parameter details.
func (*LabelNear) GobEncode ¶
GobEncode returns the error always, because users should encode large LabelNear objects with EncodeLabelNear.
func (*LabelNear) Predict ¶
func (model *LabelNear) Predict(x FeatureVector, K, c, S uint, alpha, beta float32) (LabelVector, map[uint32]float32, KeyValues32)
Predict returns the results for the given data entry x with the sparse S-near neighborhood. The returned results are the top-K labels, the label histogram, and the slice of the data entry index and its similarity.
alpha is the smoothing parameter for weighting the votes by each neighbor. beta is the smoothing parameter for balancing the Jaccard similarity and the cosine similarity.
func (*LabelNear) PredictAll ¶
func (model *LabelNear) PredictAll(X FeatureVectors, K, c, S uint, alpha, beta float32) LabelVectors
PredictAll returns the top-K labels for each data entry in X with the sparse S-near neighborhood. See Predict for hyper-parameter details.
type LabelNearParameters ¶
type LabelNearParameters struct { // K is the number of the hash tables. K uint // L is the bit-width of bucket indices in each hash table. L uint // R is the size of a reservoir of each bucket. R uint }
LabelNearParameters is the parameters for LabelNear.
func NewLabelNearParameters ¶
func NewLabelNearParameters() *LabelNearParameters
NewLabelNearParameters returns an new default LabelNearParameters.
type LabelNearest ¶
type LabelNearest struct { // NfeaturesList is the slice of the number of features contained in each training data entry. NfeaturesList []uint32 // FeatureIndexList is the map from the feature to the feature index. // The feature index contains the list of the pair of the entry index and the corresponding feature value. // The feature vector for the each entry is normalized for computing cos similarity effectively in the inference. FeatureIndexList map[uint32]KeyValues32 // LabelVectors is the label vectors in the training dataset. LabelVectors LabelVectors }
LabelNearest is the sparse weighted nearest neighborhood. Sparse means in the following 3 reasons:
(i) The similarity used in constructing the nearest neighborhood defined by the inner-product on the features activated in the given entry. (ii) The positive labels assigned to the entries in the training dataset are only used for averaging. (iii) The entries in the training dataset whose inner-product is not positive are not used.
LabelNearest is only the optimized data structure of the training dataset for searching nearest neighborhood.
func TrainLabelNearest ¶
func TrainLabelNearest(ds *Dataset, debug *log.Logger) (*LabelNearest, error)
TrainLabelNearest returns an trained LabelNearest on the given training dataset ds.
Currently, this function returns no error.
func (*LabelNearest) FindNearests ¶
func (model *LabelNearest) FindNearests(x FeatureVector, S uint, beta float32) KeyValues32
FindNearests returns the S nearest entries with each similarity for the given entry. See Predict for hyper-parameter details.
func (*LabelNearest) FindNearestsWithContext ¶
func (model *LabelNearest) FindNearestsWithContext(x FeatureVector, S uint, beta float32, ctx LabelNearestContext) KeyValues32
FindNearestsWithContext is FindNearests with the specified LabelNearestContext.
func (*LabelNearest) GobEncode ¶
func (model *LabelNearest) GobEncode() ([]byte, error)
GobEncode returns the error always, because users should encode large LabelNearest objects with EncodeLabelNearest.
func (*LabelNearest) NewContext ¶
func (model *LabelNearest) NewContext() LabelNearestContext
NewContext returns a new context for some inference memory.
func (*LabelNearest) Predict ¶
func (model *LabelNearest) Predict(x FeatureVector, K, S uint, alpha, beta float32) (LabelVector, map[uint32]float32, KeyValues32)
Predict returns the results for the given data entry x with the sparse S-nearest neighborhood. The returned results are the top-K labels, the label histogram, and the slice of the data entry index and its similarity.
alpha is the smoothing parameter for weighting the votes by each neighbor. beta is the smoothing parameter for balancing the Jaccard similarity and the cosine similarity.
func (*LabelNearest) PredictAll ¶
func (model *LabelNearest) PredictAll(X FeatureVectors, K, S uint, alpha, beta float32) LabelVectors
PredictAll returns the top-K labels for each data entry in X with the sparse S-nearest neighborhood. See Predict for hyper-parameter details.
func (*LabelNearest) PredictWithContext ¶
func (model *LabelNearest) PredictWithContext(x FeatureVector, K, S uint, alpha, beta float32, ctx LabelNearestContext) (LabelVector, map[uint32]float32, KeyValues32)
PredictWithContext is Predict with the specified LabelNearestContext.
type LabelNearestContext ¶
type LabelNearestContext []SimCountPair
LabelNearestContext is a context used in inference. This is not protected by any mutex, so this should not be accessed by multiple goroutines.
type LabelOne ¶
type LabelOne struct { // Params is the used LabelOneParameters. Params *LabelOneParameters // Biases is the bias slice used by splitters on each classifier. Biases []float32 // Weights is the weight sparse matrix used by each classifier. // This is the map from the feature key to the (roundID, the weight on the feature of #roundID splitter) slice. // This data structure reduces the number of times that the classifier accesses the golang's map a lot. WeightLists map[uint32]KeyValues32 // Labels is the label slice used in each classifier. // The t-th label is the target label of the t-th classifier. Labels LabelVector // The following members are not required. // // Summaries is the summary object slice for each boosting round. // The entries in this summary is considered to provide compact and useful information in best-effort, so this specification would be loose and rapidly changing. Summaries []map[string]interface{} }
LabelOne is the One-versus-Rest classifier for multi-label ranking. The t-th classifier (t = 1, ..., T) is the classifier for the top-t frequently occurring label.
func TrainLabelOne ¶
TrainLabelOne returns an trained LabelOne on the given dataset ds.
func (*LabelOne) GobEncode ¶
GobEncode returns the error always, because users should encode large LabelOne objects with EncodeLabelOne.
func (*LabelOne) Predict ¶
func (model *LabelOne) Predict(x FeatureVector, K uint, T uint) LabelVector
Predict returns the top-K predicted labels for the given data entry x with the first T rounds.
func (*LabelOne) PredictAll ¶
func (model *LabelOne) PredictAll(X FeatureVectors, K uint, T uint) LabelVectors
PredictAll returns the slice of the top-K predicted labels for each data entry in X with the first T rounds.
type LabelOneParameters ¶
type LabelOneParameters struct { // ClassifierTrainerName is the used BinaryClassifierTrainer name. ClassifierTrainerName string // C is the penalty parameter for BinaryClassifierTrainer. C float32 // Epsilon is the tolerance parameter for BinaryClassifierTrainer. Epsilon float32 // T is the maximum number of the rounds, which is equal to the maximum number of the target labels. T uint }
LabelOneParameters is the parameters for LabelOne.
func NewLabelOneParameters ¶
func NewLabelOneParameters() *LabelOneParameters
NewLabelOneParameters returns an LabelOneParameters initialized with the default values.
type LabelVector ¶
type LabelVector []uint32
LabelVector is the sparse label vector which is the slice of label key.
func RankTopK ¶
func RankTopK(labelDist SparseVector, K uint) LabelVector
RankTopK returns the top-K labels.
func (LabelVector) Len ¶
func (labels LabelVector) Len() int
func (LabelVector) Less ¶
func (labels LabelVector) Less(i, j int) bool
func (LabelVector) Swap ¶
func (labels LabelVector) Swap(i, j int)
type LabelVectors ¶
type LabelVectors []LabelVector
LabelVectors is the LabelVector slice.
func (LabelVectors) Dim ¶
func (Y LabelVectors) Dim() (d int)
Dim returns the calculated dimension of label vectors. This is the maximum label ID plus 1.
type SimCountPair ¶
SimCountPair is the data structure for float32 similarity and uint32 count.
type SparseVector ¶
SparseVector is the map from uint32 key to float32 value.
func SparsifyVector ¶
func SparsifyVector(v []float32) SparseVector
SparsifyVector returns a SparseVector converted from v.
Source Files ¶
Directories ¶
Path | Synopsis |
---|---|
Package plugin provides plugin functions for sticker.
|
Package plugin provides plugin functions for sticker. |