dataset

package
v0.5.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 20, 2025 License: Apache-2.0 Imports: 18 Imported by: 0

Documentation

Index

Constants

View Source
const NotId = int32(-1)

NotId represents an ID doesn't exist.

Variables

This section is empty.

Functions

func LoadDataFromBuiltIn

func LoadDataFromBuiltIn(dataSetName string) (*Dataset, *Dataset, error)

func MarshalIndex

func MarshalIndex(w io.Writer, index *Index) error

MarshalIndex marshal index into byte stream.

func MarshalUnifiedIndex

func MarshalUnifiedIndex(w io.Writer, index UnifiedIndex) error

MarshalIndex marshal index into byte stream.

Types

type CFSplit

type CFSplit interface {
	// CountUsers returns the number of users.
	CountUsers() int
	// CountItems returns the number of items.
	CountItems() int
	// CountFeedback returns the number of (positive) feedback.
	CountFeedback() int
	// GetUserDict returns the frequency dictionary of users.
	GetUserDict() *FreqDict
	// GetItemDict returns the frequency dictionary of items.
	GetItemDict() *FreqDict
	// GetUserFeedback returns the (positive) feedback of users.
	GetUserFeedback() [][]int32
	// GetItemFeedback returns the (positive) feedback of items.
	GetItemFeedback() [][]int32
	// SampleUserNegatives samples negative (feedback) for users.
	SampleUserNegatives(excludeSet CFSplit, numCandidates int) [][]int32
}

CFSplit is the dataset split for collaborative filtering.

type CTRSplit

type CTRSplit interface {
	Count() int
	CountUsers() int
	CountItems() int
	CountUserLabels() int
	CountItemLabels() int
	CountContextLabels() int
	CountPositive() int
	CountNegative() int
	GetIndex() UnifiedIndex
	GetTarget(i int) float32
	Get(i int) ([]int32, []float32, float32)
}

CTRSplit is the dataset split for click-through rate prediction.

type Dataset

type Dataset struct {
	// contains filtered or unexported fields
}

func NewDataset

func NewDataset(timestamp time.Time, userCount, itemCount int) *Dataset

func (*Dataset) AddFeedback

func (d *Dataset) AddFeedback(userId, itemId string)

func (*Dataset) AddItem

func (d *Dataset) AddItem(item data.Item)

func (*Dataset) AddUser

func (d *Dataset) AddUser(user data.User)

func (*Dataset) CountFeedback

func (d *Dataset) CountFeedback() int

func (*Dataset) CountItems

func (d *Dataset) CountItems() int

func (*Dataset) CountUsers

func (d *Dataset) CountUsers() int

func (*Dataset) GetCategories

func (d *Dataset) GetCategories() map[string]int

func (*Dataset) GetItemColumnValuesIDF

func (d *Dataset) GetItemColumnValuesIDF() []float32

func (*Dataset) GetItemDict

func (d *Dataset) GetItemDict() *FreqDict

func (*Dataset) GetItemFeedback

func (d *Dataset) GetItemFeedback() [][]int32

func (*Dataset) GetItemIDF

func (d *Dataset) GetItemIDF() []float32

GetItemIDF returns the IDF of items.

IDF(i) = log(U/freq(i))

U is the number of users. freq(i) is the frequency of item i in all feedback.

func (*Dataset) GetItems

func (d *Dataset) GetItems() []data.Item

func (*Dataset) GetTimestamp

func (d *Dataset) GetTimestamp() time.Time

func (*Dataset) GetUserColumnValuesIDF

func (d *Dataset) GetUserColumnValuesIDF() []float32

func (*Dataset) GetUserDict

func (d *Dataset) GetUserDict() *FreqDict

func (*Dataset) GetUserFeedback

func (d *Dataset) GetUserFeedback() [][]int32

func (*Dataset) GetUserIDF

func (d *Dataset) GetUserIDF() []float32

GetUserIDF returns the IDF of users.

IDF(u) = log(I/freq(u))

I is the number of items. freq(u) is the frequency of user u in all feedback.

func (*Dataset) GetUsers

func (d *Dataset) GetUsers() []data.User

func (*Dataset) SampleUserNegatives

func (d *Dataset) SampleUserNegatives(excludeSet CFSplit, numCandidates int) [][]int32

func (*Dataset) SplitCF

func (d *Dataset) SplitCF(numTestUsers int, seed int64) (CFSplit, CFSplit)

SplitCF splits dataset by user-leave-one-out method. The argument `numTestUsers` determines the number of users in the test set. If numTestUsers is equal or greater than the number of total users or numTestUsers <= 0, all users are presented in the test set.

type FreqDict

type FreqDict struct {
	// contains filtered or unexported fields
}

func NewFreqDict

func NewFreqDict() (d *FreqDict)

func (*FreqDict) Add

func (d *FreqDict) Add(s string) (y int32)

func (*FreqDict) AddNoCount

func (d *FreqDict) AddNoCount(s string) (y int32)

func (*FreqDict) Count

func (d *FreqDict) Count() int32

func (*FreqDict) Freq

func (d *FreqDict) Freq(id int32) int32

func (*FreqDict) Id

func (d *FreqDict) Id(s string) int32

func (*FreqDict) String

func (d *FreqDict) String(id int32) (s string, ok bool)

func (*FreqDict) ToIndex

func (d *FreqDict) ToIndex() *Index

type ID

type ID int32

type Index

type Index struct {
	Numbers map[string]int32 // sparse ID -> dense index
	Names   []string         // dense index -> sparse ID
}

Index manages the map between sparse Names and dense indices. A sparse ID is a user ID or item ID. The dense index is the internal user index or item index optimized for faster parameter access and less memory usage.

func NewMapIndex

func NewMapIndex() *Index

NewMapIndex creates a Index.

func UnmarshalIndex

func UnmarshalIndex(r io.Reader) (*Index, error)

UnmarshalIndex unmarshal index from byte stream.

func (*Index) Add

func (idx *Index) Add(name string)

Add adds a new ID to the indexer.

func (*Index) GetNames

func (idx *Index) GetNames() []string

GetNames returns all names in current index.

func (*Index) Len

func (idx *Index) Len() int32

Len returns the number of indexed Names.

func (*Index) Marshal

func (idx *Index) Marshal(w io.Writer) error

Marshal map index into byte stream.

func (*Index) ToName

func (idx *Index) ToName(index int32) string

ToName converts a dense index to a sparse ID.

func (*Index) ToNumber

func (idx *Index) ToNumber(name string) int32

ToNumber converts a sparse ID to a dense index.

func (*Index) Unmarshal

func (idx *Index) Unmarshal(r io.Reader) error

Unmarshal map index from byte stream.

type Labels

type Labels struct {
	// contains filtered or unexported fields
}

func NewLabels

func NewLabels() *Labels

type UnifiedDirectIndex

type UnifiedDirectIndex struct {
	N int32
}

UnifiedDirectIndex maps string to integer in literal.

func (*UnifiedDirectIndex) CountContextLabels

func (unified *UnifiedDirectIndex) CountContextLabels() int32

CountContextLabels should be used by unit testing only.

func (*UnifiedDirectIndex) CountItemLabels

func (unified *UnifiedDirectIndex) CountItemLabels() int32

CountItemLabels should be used by unit testing only.

func (*UnifiedDirectIndex) CountItems

func (unified *UnifiedDirectIndex) CountItems() int32

CountItems should be used by unit testing only.

func (*UnifiedDirectIndex) CountUserLabels

func (unified *UnifiedDirectIndex) CountUserLabels() int32

CountUserLabels should be used by unit testing only.

func (*UnifiedDirectIndex) CountUsers

func (unified *UnifiedDirectIndex) CountUsers() int32

CountUsers should be used by unit testing only.

func (*UnifiedDirectIndex) EncodeContextLabel

func (unified *UnifiedDirectIndex) EncodeContextLabel(label string) int32

EncodeContextLabel should be used by unit testing only.

func (*UnifiedDirectIndex) EncodeItem

func (unified *UnifiedDirectIndex) EncodeItem(itemId string) int32

EncodeItem should be used by unit testing only.

func (*UnifiedDirectIndex) EncodeItemLabel

func (unified *UnifiedDirectIndex) EncodeItemLabel(itemLabel string) int32

EncodeItemLabel should be used by unit testing only.

func (*UnifiedDirectIndex) EncodeUser

func (unified *UnifiedDirectIndex) EncodeUser(userId string) int32

EncodeUser should be used by unit testing only.

func (*UnifiedDirectIndex) EncodeUserLabel

func (unified *UnifiedDirectIndex) EncodeUserLabel(userLabel string) int32

EncodeUserLabel should be used by unit testing only.

func (*UnifiedDirectIndex) GetContextLabels

func (unified *UnifiedDirectIndex) GetContextLabels() []string

GetContextLabels should be used by unit testing only.

func (*UnifiedDirectIndex) GetItemLabels

func (unified *UnifiedDirectIndex) GetItemLabels() []string

GetItemLabels should be used by unit testing only.

func (*UnifiedDirectIndex) GetItems

func (unified *UnifiedDirectIndex) GetItems() []string

GetItems should be used by unit testing only.

func (*UnifiedDirectIndex) GetUserLabels

func (unified *UnifiedDirectIndex) GetUserLabels() []string

GetUserLabels should be used by unit testing only.

func (*UnifiedDirectIndex) GetUsers

func (unified *UnifiedDirectIndex) GetUsers() []string

GetUsers should be used by unit testing only.

func (*UnifiedDirectIndex) Len

func (unified *UnifiedDirectIndex) Len() int32

Len should be used by unit testing only.

func (*UnifiedDirectIndex) Marshal

func (unified *UnifiedDirectIndex) Marshal(w io.Writer) error

Marshal direct index into byte stream.

func (*UnifiedDirectIndex) Unmarshal

func (unified *UnifiedDirectIndex) Unmarshal(r io.Reader) error

Unmarshal direct index from byte stream.

type UnifiedIndex

type UnifiedIndex interface {
	Len() int32
	EncodeUser(userId string) int32
	EncodeItem(itemId string) int32
	EncodeUserLabel(userLabel string) int32
	EncodeItemLabel(itemLabel string) int32
	EncodeContextLabel(ctxLabel string) int32
	GetUsers() []string
	GetItems() []string
	GetUserLabels() []string
	GetItemLabels() []string
	GetContextLabels() []string
	CountUsers() int32
	CountItems() int32
	CountUserLabels() int32
	CountItemLabels() int32
	CountContextLabels() int32
	Marshal(w io.Writer) error
	Unmarshal(r io.Reader) error
}

UnifiedIndex maps users, items and labels into a unified encoding space.

func NewUnifiedDirectIndex

func NewUnifiedDirectIndex(n int32) UnifiedIndex

NewUnifiedDirectIndex creates a UnifiedDirectIndex.

func UnmarshalUnifiedIndex

func UnmarshalUnifiedIndex(r io.Reader) (UnifiedIndex, error)

UnmarshalIndex unmarshal index from byte stream.

type UnifiedMapIndex

type UnifiedMapIndex struct {
	UserIndex      *Index
	ItemIndex      *Index
	UserLabelIndex *Index
	ItemLabelIndex *Index
	CtxLabelIndex  *Index
}

UnifiedMapIndex is the id -> index mapper for factorization machines. The division of id is: | user | item | user label | item label | context label |

func (*UnifiedMapIndex) CountContextLabels

func (unified *UnifiedMapIndex) CountContextLabels() int32

CountContextLabels returns the number of context labels.

func (*UnifiedMapIndex) CountItemLabels

func (unified *UnifiedMapIndex) CountItemLabels() int32

CountItemLabels returns the number of item labels.

func (*UnifiedMapIndex) CountItems

func (unified *UnifiedMapIndex) CountItems() int32

CountItems returns the number of items.

func (*UnifiedMapIndex) CountUserLabels

func (unified *UnifiedMapIndex) CountUserLabels() int32

CountUserLabels returns the number of user labels.

func (*UnifiedMapIndex) CountUsers

func (unified *UnifiedMapIndex) CountUsers() int32

CountUsers returns the number of users.

func (*UnifiedMapIndex) EncodeContextLabel

func (unified *UnifiedMapIndex) EncodeContextLabel(label string) int32

EncodeContextLabel converts a context label to a integer in the encoding space.

func (*UnifiedMapIndex) EncodeItem

func (unified *UnifiedMapIndex) EncodeItem(itemId string) int32

EncodeItem converts a item id to a integer in the encoding space.

func (*UnifiedMapIndex) EncodeItemLabel

func (unified *UnifiedMapIndex) EncodeItemLabel(itemLabel string) int32

EncodeItemLabel converts a item label to a integer in the encoding space.

func (*UnifiedMapIndex) EncodeUser

func (unified *UnifiedMapIndex) EncodeUser(userId string) int32

EncodeUser converts a user id to a integer in the encoding space.

func (*UnifiedMapIndex) EncodeUserLabel

func (unified *UnifiedMapIndex) EncodeUserLabel(userLabel string) int32

EncodeUserLabel converts a user label to a integer in the encoding space.

func (*UnifiedMapIndex) GetContextLabels

func (unified *UnifiedMapIndex) GetContextLabels() []string

GetContextLabels returns all context labels.

func (*UnifiedMapIndex) GetItemLabels

func (unified *UnifiedMapIndex) GetItemLabels() []string

GetItemLabels returns all item labels.

func (*UnifiedMapIndex) GetItems

func (unified *UnifiedMapIndex) GetItems() []string

GetItems returns all items.

func (*UnifiedMapIndex) GetUserLabels

func (unified *UnifiedMapIndex) GetUserLabels() []string

GetUserLabels returns all user labels.

func (*UnifiedMapIndex) GetUsers

func (unified *UnifiedMapIndex) GetUsers() []string

GetUsers returns all users.

func (*UnifiedMapIndex) Len

func (unified *UnifiedMapIndex) Len() int32

Len returns the size of unified index.

func (*UnifiedMapIndex) Marshal

func (unified *UnifiedMapIndex) Marshal(w io.Writer) error

Marshal map index into byte stream.

func (*UnifiedMapIndex) Unmarshal

func (unified *UnifiedMapIndex) Unmarshal(r io.Reader) error

Unmarshal map index from byte stream.

type UnifiedMapIndexBuilder

type UnifiedMapIndexBuilder struct {
	UserIndex      *Index
	ItemIndex      *Index
	UserLabelIndex *Index
	ItemLabelIndex *Index
	CtxLabelIndex  *Index
}

UnifiedMapIndexBuilder is the builder for UnifiedMapIndex.

func NewUnifiedMapIndexBuilder

func NewUnifiedMapIndexBuilder() *UnifiedMapIndexBuilder

NewUnifiedMapIndexBuilder creates a UnifiedMapIndexBuilder.

func (*UnifiedMapIndexBuilder) AddCtxLabel

func (builder *UnifiedMapIndexBuilder) AddCtxLabel(ctxLabel string)

AddCtxLabel adds a context label the unified index.

func (*UnifiedMapIndexBuilder) AddItem

func (builder *UnifiedMapIndexBuilder) AddItem(itemId string)

AddItem adds a item into the unified index.

func (*UnifiedMapIndexBuilder) AddItemLabel

func (builder *UnifiedMapIndexBuilder) AddItemLabel(itemLabel string)

AddItemLabel adds a item label into the unified index.

func (*UnifiedMapIndexBuilder) AddUser

func (builder *UnifiedMapIndexBuilder) AddUser(userId string)

AddUser adds a user into the unified index.

func (*UnifiedMapIndexBuilder) AddUserLabel

func (builder *UnifiedMapIndexBuilder) AddUserLabel(userLabel string)

AddUserLabel adds a user label into the unified index.

func (*UnifiedMapIndexBuilder) Build

func (builder *UnifiedMapIndexBuilder) Build() UnifiedIndex

Build UnifiedMapIndex from UnifiedMapIndexBuilder.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL