ranking

package

v0.2.6 Latest Latest Go to latest Published: Sep 26, 2021 License: Apache-2.0 Imports: 19 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/zhenghaoz/gorse

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func EncodeModel(m Model) ([]byte, error)
func Evaluate(estimator MatrixFactorization, testSet, trainSet *DataSet, ...) []float32
func GetModelName(m Model) string
func HR(targetSet *i32set.Set, rankList []int32) float32
func LoadDataFromBuiltIn(dataSetName string) (*DataSet, *DataSet, error)
func MAP(targetSet *i32set.Set, rankList []int32) float32
func MRR(targetSet *i32set.Set, rankList []int32) float32
func NDCG(targetSet *i32set.Set, rankList []int32) float32
func Precision(targetSet *i32set.Set, rankList []int32) float32
func Rank(model MatrixFactorization, userId int32, candidates []int32, topN int) ([]int32, []float32)
func Recall(targetSet *i32set.Set, rankList []int32) float32
type ALS
- func NewALS(params model.Params) *ALS
- func (als *ALS) Clear()
- func (als *ALS) Fit(trainSet, valSet *DataSet, config *FitConfig) Score
- func (als *ALS) GetParamsGrid() model.ParamsGrid
- func (als *ALS) Init(trainSet *DataSet)
- func (als *ALS) InternalPredict(userIndex, itemIndex int32) float32
- func (als *ALS) Invalid() bool
- func (als *ALS) Predict(userId, itemId string) float32
- func (als *ALS) SetParams(params model.Params)
type BPR
- func NewBPR(params model.Params) *BPR
- func (bpr *BPR) Clear()
- func (bpr *BPR) Fit(trainSet, valSet *DataSet, config *FitConfig) Score
- func (bpr *BPR) GetParamsGrid() model.ParamsGrid
- func (bpr *BPR) Init(trainSet *DataSet)
- func (bpr *BPR) InternalPredict(userIndex, itemIndex int32) float32
- func (bpr *BPR) Invalid() bool
- func (bpr *BPR) Predict(userId, itemId string) float32
- func (bpr *BPR) SetParams(params model.Params)
type BaseMatrixFactorization
- func (model *BaseMatrixFactorization) GetItemIndex() base.Index
- func (model *BaseMatrixFactorization) GetUserIndex() base.Index
- func (model *BaseMatrixFactorization) Init(trainSet *DataSet)
type CCD
- func NewCCD(params model.Params) *CCD
- func (ccd *CCD) Clear()
- func (ccd *CCD) Fit(trainSet, valSet *DataSet, config *FitConfig) Score
- func (ccd *CCD) GetParamsGrid() model.ParamsGrid
- func (ccd *CCD) Init(trainSet *DataSet)
- func (ccd *CCD) InternalPredict(userIndex, itemIndex int32) float32
- func (ccd *CCD) Invalid() bool
- func (ccd *CCD) Predict(userId, itemId string) float32
- func (ccd *CCD) SetParams(params model.Params)
type DataSet
- func LoadDataFromCSV(fileName, sep string, hasHeader bool) *DataSet
- func NewDirectIndexDataset() *DataSet
- func NewMapIndexDataset() *DataSet
- func (dataset *DataSet) AddFeedback(userId, itemId string, insertUserItem bool)
- func (dataset *DataSet) AddItem(itemId string)
- func (dataset *DataSet) AddUser(userId string)
- func (dataset *DataSet) Count() int
- func (dataset *DataSet) GetIndex(i int) (int32, int32)
- func (dataset *DataSet) ItemCount() int
- func (dataset *DataSet) NegativeSample(excludeSet *DataSet, numCandidates int) [][]int32
- func (dataset *DataSet) SetNegatives(userId string, negatives []string)
- func (dataset *DataSet) Split(numTestUsers int, seed int64) (*DataSet, *DataSet)
- func (dataset *DataSet) UserCount() int
type FitConfig
- func NewFitConfig() *FitConfig
- func (config *FitConfig) LoadDefaultIfNil() *FitConfig
- func (config *FitConfig) SetJobs(nJobs int) *FitConfig
- func (config *FitConfig) SetTracker(tracker model.Tracker) *FitConfig
- func (config *FitConfig) SetVerbose(verbose int) *FitConfig
type MatrixFactorization
- func DecodeModel(buf []byte) (MatrixFactorization, error)
type Metric
type Model
- func Clone(m Model) Model
- func NewModel(name string, params model.Params) (Model, error)
type ModelSearcher
- func NewModelSearcher(nEpoch, nTrials, nJobs int) *ModelSearcher
- func (searcher *ModelSearcher) Fit(trainSet, valSet *DataSet, tracker model.Tracker, runner model.Runner) error
- func (searcher *ModelSearcher) GetBestModel() (string, Model, Score)
type ParamsSearchResult
- func GridSearchCV(estimator MatrixFactorization, trainSet *DataSet, testSet *DataSet, ...) ParamsSearchResult
- func RandomSearchCV(estimator MatrixFactorization, trainSet *DataSet, testSet *DataSet, ...) ParamsSearchResult
- func (r *ParamsSearchResult) AddScore(params model.Params, score Score)
type Score
type SnapshotManger
- func (sm *SnapshotManger) AddSnapshot(score Score, weights ...interface{})
- func (sm *SnapshotManger) AddSnapshotNoCopy(score Score, weights ...interface{})

Constants ¶

View Source

const (
	CollaborativeBPR = "bpr"
	CollaborativeALS = "als"
	CollaborativeCCD = "ccd"
)

Variables ¶

This section is empty.

Functions ¶

func EncodeModel ¶

func EncodeModel(m Model) ([]byte, error)

func Evaluate ¶

func Evaluate(estimator MatrixFactorization, testSet, trainSet *DataSet, topK, numCandidates, nJobs int, scorers ...Metric) []float32

Evaluate evaluates a model in top-n tasks.

func GetModelName ¶ added in v0.2.5

func GetModelName(m Model) string

func HR ¶

func HR(targetSet *i32set.Set, rankList []int32) float32

HR means Hit Ratio.

func LoadDataFromBuiltIn ¶

func LoadDataFromBuiltIn(dataSetName string) (*DataSet, *DataSet, error)

LoadDataFromBuiltIn loads a built-in Data set. Now support:

func MAP ¶

func MAP(targetSet *i32set.Set, rankList []int32) float32

MAP means Mean Average Precision. mAP: http://sdsawtelle.github.io/blog/output/mean-average-precision-MAP-for-recommender-systems.html

func MRR ¶

func MRR(targetSet *i32set.Set, rankList []int32) float32

MRR means Mean Reciprocal Rank.

The mean reciprocal rank is a statistic measure for evaluating any process that produces a list of possible responses to a sample of queries, ordered by probability of correctness. The reciprocal rank of a query response is the multiplicative inverse of the rank of the first correct answer: 1 for first place, 1⁄2 for second place, 1⁄3 for third place and so on. The mean reciprocal rank is the average of the reciprocal ranks of results for a sample of queries Q:

MRR = \frac{1}{Q} \sum^{|Q|}_{i=1} \frac{1}{rank_i}

func NDCG ¶

func NDCG(targetSet *i32set.Set, rankList []int32) float32

NDCG means Normalized Discounted Cumulative Gain.

func Precision ¶

func Precision(targetSet *i32set.Set, rankList []int32) float32

Precision is the fraction of relevant ItemFeedback among the recommended ItemFeedback.

\frac{|relevant documents| \cap |retrieved documents|} {|{retrieved documents}|}

func Rank ¶

func Rank(model MatrixFactorization, userId int32, candidates []int32, topN int) ([]int32, []float32)

func Recall ¶

func Recall(targetSet *i32set.Set, rankList []int32) float32

Recall is the fraction of relevant ItemFeedback that have been recommended over the total amount of relevant ItemFeedback.

\frac{|relevant documents| \cap |retrieved documents|} {|{relevant documents}|}

Types ¶

type ALS ¶

type ALS struct {
	BaseMatrixFactorization
	// Model parameters
	UserFactor *mat.Dense // p_u
	ItemFactor *mat.Dense // q_i
	// contains filtered or unexported fields
}

ALS [7] is the Weighted Regularized Matrix Factorization, which exploits unique properties of implicit feedback datasets. It treats the data as indication of positive and negative preference associated with vastly varying confidence levels. This leads to a factor model which is especially tailored for implicit feedback recommenders. Authors also proposed a scalable optimization procedure, which scales linearly with the data size. Hyper-parameters:

NFactors   - The number of latent factors. Default is 10.
NEpochs    - The number of training epochs. Default is 50.
InitMean   - The mean of initial latent factors. Default is 0.
InitStdDev - The standard deviation of initial latent factors. Default is 0.1.
Reg        - The strength of regularization.

func NewALS ¶

func NewALS(params model.Params) *ALS

NewALS creates a ALS model.

func (*ALS) Clear ¶

func (als *ALS) Clear()

func (*ALS) Fit ¶

func (als *ALS) Fit(trainSet, valSet *DataSet, config *FitConfig) Score

Fit the ALS model.

func (*ALS) GetParamsGrid ¶

func (als *ALS) GetParamsGrid() model.ParamsGrid

func (*ALS) Init ¶

func (als *ALS) Init(trainSet *DataSet)

func (*ALS) InternalPredict ¶

func (als *ALS) InternalPredict(userIndex, itemIndex int32) float32

func (*ALS) Invalid ¶ added in v0.2.2

func (als *ALS) Invalid() bool

func (*ALS) Predict ¶

func (als *ALS) Predict(userId, itemId string) float32

Predict by the ALS model.

func (*ALS) SetParams ¶

func (als *ALS) SetParams(params model.Params)

SetParams sets hyper-parameters for the ALS model.

type BPR ¶

type BPR struct {
	BaseMatrixFactorization
	// Model parameters
	UserFactor [][]float32 // p_u
	ItemFactor [][]float32 // q_i
	// contains filtered or unexported fields
}

BPR means Bayesian Personal Ranking, is a pairwise learning algorithm for matrix factorization model with implicit feedback. The pairwise ranking between item i and j for user u is estimated by:

p(i >_u j) = \sigma( p_u^T (q_i - q_j) )

Hyper-parameters:

 Reg 		- The regularization parameter of the cost function that is
			  optimized. Default is 0.01.
 Lr 		- The learning rate of SGD. Default is 0.05.
 nFactors	- The number of latent factors. Default is 10.
 NEpochs	- The number of iteration of the SGD procedure. Default is 100.
 InitMean	- The mean of initial random latent factors. Default is 0.
 InitStdDev	- The standard deviation of initial random latent factors. Default is 0.001.

func NewBPR ¶

func NewBPR(params model.Params) *BPR

NewBPR creates a BPR model.

func (*BPR) Clear ¶

func (bpr *BPR) Clear()

func (*BPR) Fit ¶

func (bpr *BPR) Fit(trainSet, valSet *DataSet, config *FitConfig) Score

Fit the BPR model.

func (*BPR) GetParamsGrid ¶

func (bpr *BPR) GetParamsGrid() model.ParamsGrid

func (*BPR) Init ¶

func (bpr *BPR) Init(trainSet *DataSet)

func (*BPR) InternalPredict ¶

func (bpr *BPR) InternalPredict(userIndex, itemIndex int32) float32

func (*BPR) Invalid ¶ added in v0.2.2

func (bpr *BPR) Invalid() bool

func (*BPR) Predict ¶

func (bpr *BPR) Predict(userId, itemId string) float32

Predict by the BPR model.

func (*BPR) SetParams ¶

func (bpr *BPR) SetParams(params model.Params)

SetParams sets hyper-parameters of the BPR model.

type BaseMatrixFactorization ¶

type BaseMatrixFactorization struct {
	model.BaseModel
	UserIndex base.Index
	ItemIndex base.Index
}

func (*BaseMatrixFactorization) GetItemIndex ¶

func (model *BaseMatrixFactorization) GetItemIndex() base.Index

func (*BaseMatrixFactorization) GetUserIndex ¶

func (model *BaseMatrixFactorization) GetUserIndex() base.Index

func (*BaseMatrixFactorization) Init ¶

func (model *BaseMatrixFactorization) Init(trainSet *DataSet)

type CCD ¶

type CCD struct {
	BaseMatrixFactorization
	// Model parameters
	UserFactor [][]float32
	ItemFactor [][]float32
	// contains filtered or unexported fields
}

func NewCCD ¶

func NewCCD(params model.Params) *CCD

NewCCD creates a eALS model.

func (*CCD) Clear ¶

func (ccd *CCD) Clear()

func (*CCD) Fit ¶

func (ccd *CCD) Fit(trainSet, valSet *DataSet, config *FitConfig) Score

func (*CCD) GetParamsGrid ¶

func (ccd *CCD) GetParamsGrid() model.ParamsGrid

func (*CCD) Init ¶

func (ccd *CCD) Init(trainSet *DataSet)

func (*CCD) InternalPredict ¶

func (ccd *CCD) InternalPredict(userIndex, itemIndex int32) float32

func (*CCD) Invalid ¶ added in v0.2.2

func (ccd *CCD) Invalid() bool

func (*CCD) Predict ¶

func (ccd *CCD) Predict(userId, itemId string) float32

Predict by the ALS model.

func (*CCD) SetParams ¶

func (ccd *CCD) SetParams(params model.Params)

SetParams sets hyper-parameters for the ALS model.

type DataSet ¶

type DataSet struct {
	UserIndex     base.Index
	ItemIndex     base.Index
	FeedbackUsers base.Integers
	FeedbackItems base.Integers
	UserFeedback  [][]int32
	ItemFeedback  [][]int32
	Negatives     [][]int32
	ItemLabels    [][]int32
	UserLabels    [][]int32
	// statistics
	NumItemLabels int32
	NumUserLabels int32
}

DataSet contains preprocessed data structures for recommendation models.

func LoadDataFromCSV ¶

func LoadDataFromCSV(fileName, sep string, hasHeader bool) *DataSet

LoadDataFromCSV loads Data from a CSV file. The CSV file should be:

[optional header]
<userId 1> <sep> <itemId 1> <sep> <rating 1> <sep> <extras>
<userId 2> <sep> <itemId 2> <sep> <rating 2> <sep> <extras>
<userId 3> <sep> <itemId 3> <sep> <rating 3> <sep> <extras>
...

For example, the `u.Data` from MovieLens 100K is:

196\t242\t3\t881250949
186\t302\t3\t891717742
22\t377\t1\t878887116

func NewDirectIndexDataset ¶

func NewDirectIndexDataset() *DataSet

func NewMapIndexDataset ¶

func NewMapIndexDataset() *DataSet

NewMapIndexDataset creates a data set.

func (*DataSet) AddFeedback ¶

func (dataset *DataSet) AddFeedback(userId, itemId string, insertUserItem bool)

func (*DataSet) AddItem ¶

func (dataset *DataSet) AddItem(itemId string)

func (*DataSet) AddUser ¶

func (dataset *DataSet) AddUser(userId string)

func (*DataSet) Count ¶

func (dataset *DataSet) Count() int

func (*DataSet) GetIndex ¶

func (dataset *DataSet) GetIndex(i int) (int32, int32)

GetIndex gets the i-th record by <user index, item index, rating>.

func (*DataSet) ItemCount ¶

func (dataset *DataSet) ItemCount() int

ItemCount returns the number of ItemFeedback.

func (*DataSet) NegativeSample ¶

func (dataset *DataSet) NegativeSample(excludeSet *DataSet, numCandidates int) [][]int32

func (*DataSet) SetNegatives ¶

func (dataset *DataSet) SetNegatives(userId string, negatives []string)

func (*DataSet) Split ¶

func (dataset *DataSet) Split(numTestUsers int, seed int64) (*DataSet, *DataSet)

Split dataset by user-leave-one-out method. The argument `numTestUsers` determines the number of users in the test set. If numTestUsers is equal or greater than the number of total users or numTestUsers <= 0, all users are presented in the test set.

func (*DataSet) UserCount ¶

func (dataset *DataSet) UserCount() int

UserCount returns the number of UserFeedback.

type FitConfig ¶

type FitConfig struct {
	Jobs       int
	Verbose    int
	Candidates int
	TopK       int
	Tracker    model.Tracker
}

func NewFitConfig ¶ added in v0.2.2

func NewFitConfig() *FitConfig

func (*FitConfig) LoadDefaultIfNil ¶

func (config *FitConfig) LoadDefaultIfNil() *FitConfig

func (*FitConfig) SetJobs ¶ added in v0.2.2

func (config *FitConfig) SetJobs(nJobs int) *FitConfig

func (*FitConfig) SetTracker ¶ added in v0.2.4

func (config *FitConfig) SetTracker(tracker model.Tracker) *FitConfig

func (*FitConfig) SetVerbose ¶ added in v0.2.5

func (config *FitConfig) SetVerbose(verbose int) *FitConfig

type MatrixFactorization ¶

type MatrixFactorization interface {
	Model
	// Predict the rating given by a user (userId) to a item (itemId).
	Predict(userId, itemId string) float32
	// InternalPredict predicts rating given by a user index and a item index
	InternalPredict(userIndex, itemIndex int32) float32
	// GetUserIndex returns user index.
	GetUserIndex() base.Index
}

func DecodeModel ¶

func DecodeModel(buf []byte) (MatrixFactorization, error)

type Metric ¶

type Metric func(targetSet *i32set.Set, rankList []int32) float32

Metric is used by evaluators in personalized ranking tasks.

type Model ¶

type Model interface {
	model.Model
	// Fit a model with a train set and parameters.
	Fit(trainSet *DataSet, validateSet *DataSet, config *FitConfig) Score
	// GetItemIndex returns item index.
	GetItemIndex() base.Index
}

func Clone ¶

func Clone(m Model) Model

Clone a model with deep copy.

func NewModel ¶

func NewModel(name string, params model.Params) (Model, error)

type ModelSearcher ¶

type ModelSearcher struct {
	// contains filtered or unexported fields
}

ModelSearcher is a thread-safe personal ranking model searcher.

func NewModelSearcher ¶

func NewModelSearcher(nEpoch, nTrials, nJobs int) *ModelSearcher

NewModelSearcher creates a thread-safe personal ranking model searcher.

func (*ModelSearcher) Fit ¶

func (searcher *ModelSearcher) Fit(trainSet, valSet *DataSet, tracker model.Tracker, runner model.Runner) error

func (*ModelSearcher) GetBestModel ¶

func (searcher *ModelSearcher) GetBestModel() (string, Model, Score)

GetBestModel returns the optimal personal ranking model.

type ParamsSearchResult ¶

type ParamsSearchResult struct {
	BestModel  Model
	BestScore  Score
	BestParams model.Params
	BestIndex  int
	Scores     []Score
	Params     []model.Params
}

ParamsSearchResult contains the return of grid search.

func GridSearchCV ¶

func GridSearchCV(estimator MatrixFactorization, trainSet *DataSet, testSet *DataSet, paramGrid model.ParamsGrid,
	_ int64, fitConfig *FitConfig, runner model.Runner) ParamsSearchResult

GridSearchCV finds the best parameters for a model.

func RandomSearchCV ¶

func RandomSearchCV(estimator MatrixFactorization, trainSet *DataSet, testSet *DataSet, paramGrid model.ParamsGrid,
	numTrials int, seed int64, fitConfig *FitConfig, runner model.Runner) ParamsSearchResult

RandomSearchCV searches hyper-parameters by random.

func (*ParamsSearchResult) AddScore ¶

func (r *ParamsSearchResult) AddScore(params model.Params, score Score)

type Score ¶

type Score struct {
	NDCG      float32
	Precision float32
	Recall    float32
}

type SnapshotManger ¶

type SnapshotManger struct {
	BestWeights []interface{}
	BestScore   Score
}

SnapshotManger manages the best snapshot.

func (*SnapshotManger) AddSnapshot ¶

func (sm *SnapshotManger) AddSnapshot(score Score, weights ...interface{})

AddSnapshot adds a copied snapshot.

func (*SnapshotManger) AddSnapshotNoCopy ¶

func (sm *SnapshotManger) AddSnapshotNoCopy(score Score, weights ...interface{})

AddSnapshotNoCopy adds a snapshot without copy.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL