data

package module
v0.0.0-...-3311a37 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 10, 2020 License: Apache-2.0 Imports: 18 Imported by: 0

README

veri-data

Data structure for veri feature store

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CalculateAverage

func CalculateAverage(avg []float64, p []float64, n float64) []float64

CalculateAverage calculates average of two arrays divided by n

func EncodeSearchConfig

func EncodeSearchConfig(p *SearchConfig) []byte

func GetSearchKey

func GetSearchKey(datum *Datum, config *SearchConfig) string

func VectorDistance

func VectorDistance(arr1 []float64, arr2 []float64) float64

VectorDistance calculates distance of two vector by euclidean distance

func VectorMultiplication

func VectorMultiplication(arr1 []float64, arr2 []float64) float64

VectorMultiplication calculates elementwise of multiplication of two vectors

Types

type Collector

type Collector struct {
	List           []*ScoredDatum
	ScoreFunc      func(arr1 []float64, arr2 []float64) float64
	MaxScore       float64
	DatumKey       *DatumKey
	N              uint32
	HigherIsBetter bool
}

Collector collects results

func (*Collector) Insert

func (c *Collector) Insert(scoredDatum *ScoredDatum) error

Insert add a new scored datum to collector

func (*Collector) Send

func (c *Collector) Send(list *bpb.KVList) error

Senc collects the results

type Data

type Data struct {
	Name        string
	Avg         []float64
	N           int64
	MaxDistance float64
	Hist        []float64
	Timestamp   int64
	DB          *badger.DB
	DBPath      string
	Dirty       bool
	Sources     *cache.Cache
	QueryCache  *cache.Cache
}

Data represents a dataset with similar struture

func NewData

func NewData(name, path string) (*Data, error)

NewData creates a data struct

func NewPreData

func NewPreData(name, path string) *Data

NewPreData creates a data struct

func NewTempData

func NewTempData(name string) (*Data, error)

NewTempData return an inmemory badger instance

func (*Data) AddSource

func (dt *Data) AddSource(dataSource DataSource)

AddSource adds a source

func (*Data) Close

func (dt *Data) Close() error

Close currently closes underlying kv store

func (*Data) GetID

func (dt *Data) GetID() string

func (*Data) GetStats

func (dt *Data) GetStats() *Stats

Get Stats out of data

func (*Data) InitData

func (dt *Data) InitData() error

func (*Data) Insert

func (dt *Data) Insert(datum *Datum, config *InsertConfig) error

Insert inserts data to internal kv store

func (*Data) Process

func (dt *Data) Process(force bool) error

Process runs through keys and calculates statistics

func (*Data) Run

func (dt *Data) Run() error

Run runs statistical calculation regularly

func (*Data) Search

func (dt *Data) Search(datum *Datum, config *SearchConfig) *Collector

Search does a search based on distances of keys

func (*Data) StreamAll

func (dt *Data) StreamAll(datumStream chan<- *Datum) error

func (*Data) StreamInsert

func (dt *Data) StreamInsert(datumStream <-chan *InsertDatumWithConfig) error

StreamInsert inserts data in stream

func (*Data) StreamSample

func (dt *Data) StreamSample(datumStream chan<- *Datum, fraction float64) error

func (*Data) StreamSearch

func (dt *Data) StreamSearch(datum *Datum, scoredDatumStream chan<- *ScoredDatum, queryWaitGroup *sync.WaitGroup, config *SearchConfig) error

StreamSearch does a search based on distances of keys

func (*Data) SuperSearch

func (dt *Data) SuperSearch(datum *Datum, scoredDatumStreamOutput chan<- *ScoredDatum, config *SearchConfig) error

SuperSearch searches and merges other resources

func (*Data) Sync

func (dt *Data) Sync(source DataSource, waitGroup *sync.WaitGroup) error

func (*Data) SyncAll

func (dt *Data) SyncAll() error

type DataSource

type DataSource interface {
	StreamSearch(datum *Datum, scoredDatumStream chan<- *ScoredDatum, queryWaitGroup *sync.WaitGroup, config *SearchConfig) error
	StreamInsert(datumStream <-chan *InsertDatumWithConfig) error
	Insert(datum *Datum, config *InsertConfig) error
	GetStats() *Stats
	GetID() string
}

type Dataset

type Dataset struct {
	DataList *cache.Cache
	Path     string
}

func NewDataset

func NewDataset(path string) *Dataset

func (*Dataset) CreateIfNotExists

func (dts *Dataset) CreateIfNotExists(name string) error

func (*Dataset) Delete

func (dts *Dataset) Delete(name string) error

func (*Dataset) Get

func (dts *Dataset) Get(name string) (*Data, error)

func (*Dataset) GetOrCreateIfNotExists

func (dts *Dataset) GetOrCreateIfNotExists(name string) (*Data, error)

func (*Dataset) List

func (dts *Dataset) List() []string

type Datum

type Datum struct {
	Key   *DatumKey
	Value *DatumValue
}

Datum is a general feature holder

func NewDatum

func NewDatum(feature []float64,
	dim1 uint32,
	dim2 uint32,
	size1 uint32,
	size2 uint32,
	groupLabel []byte,
	label []byte,
	version int64,
) *Datum

NewDatum is an utily function to initialize datum type

func ToDatum

func ToDatum(key, value []byte) (*Datum, error)

func (*Datum) GetKey

func (datum *Datum) GetKey() ([]byte, error)

func (*Datum) GetValue

func (datum *Datum) GetValue() ([]byte, error)

type DatumKey

type DatumKey struct {
	Feature    []float64
	Dim1       uint32
	Dim2       uint32
	Size1      uint32
	Size2      uint32
	GroupLabel []byte
}

DatumKey is a key for Datum

func ToDatumKey

func ToDatumKey(byteArray []byte) (*DatumKey, error)

type DatumValue

type DatumValue struct {
	Label   []byte
	Version int64
}

DatumValue is value of a Datum

func ToDatumValue

func ToDatumValue(byteArray []byte) (*DatumValue, error)

type InsertConfig

type InsertConfig struct {
	TTL time.Duration
}

type InsertDatumWithConfig

type InsertDatumWithConfig struct {
	Config *InsertConfig
	Datum  *Datum
}

type ScoredDatum

type ScoredDatum struct {
	Datum *Datum
	Score float64
}

ScoredDatum helps to keep Data ordered

type SearchConfig

type SearchConfig struct {
	ScoreFuncName  string                                       `json:"scoreFuncName"`
	ScoreFunc      func(arr1 []float64, arr2 []float64) float64 `json:"-"`
	HigherIsBetter bool                                         `json:"higherIsBetter"`
	Limit          uint32                                       `json:"limit"`
	Duration       time.Duration                                `json:"-"`
}

func DefaultSearchConfig

func DefaultSearchConfig() *SearchConfig

type Stats

type Stats struct {
	Avg         []float64
	N           int64
	MaxDistance float64
	Hist        []float64
	Timestamp   int64
}

Stats to share about data

type StreamCollector

type StreamCollector struct {
	DatumStream chan<- *Datum
}

StreamCollector collects results

func (*StreamCollector) Send

func (c *StreamCollector) Send(list *bpb.KVList) error

Send collects the results

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL