stats

package
v0.18.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 9, 2024 License: Apache-2.0 Imports: 14 Imported by: 0

Documentation

Overview

Copyright 2023 Dolthub, Inc.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Index

Constants

This section is empty.

Variables

View Source
var ErrJoinStringStatistics = errors.New("joining string histograms is unsupported")

Functions

func AlignBuckets

func AlignBuckets(h1, h2 sql.Histogram, lBound1, lBound2 sql.Row, s1Types, s2Types []sql.Type, cmp func(sql.Row, sql.Row) (int, error)) (sql.Histogram, sql.Histogram, error)

AlignBuckets produces two histograms with the same number of buckets. Start by using upper bound keys to truncate histogram with a larger keyspace. Then for every misaligned pair of buckets, cut the one with the higher bound value on the smaller's key. We use a linear interpolation to divide keys when splitting.

func Empty

func Empty(s sql.Statistic) bool

func GetNewCounts added in v0.18.1

func GetNewCounts(buckets []sql.HistogramBucket) (rowCount uint64, distinctCount uint64, nullCount uint64)

func IndexFds

func IndexFds(tableName string, sch sql.Schema, idx sql.Index) (*sql.FuncDepSet, sql.ColSet, error)

func InterpolateNewCounts

func InterpolateNewCounts(from, to sql.Statistic) sql.Statistic

func Intersect

func Intersect(b1, b2 []sql.HistogramBucket, types []sql.Type) ([]sql.HistogramBucket, error)

func Join

func Join(s1, s2 sql.Statistic, prefixCnt int, debug bool) (sql.Statistic, error)

Join performs an alignment algorithm on two sets of statistics, and then pairwise estimates bucket cardinalities by joining most common values (mcvs) directly and assuming key uniformity otherwise. Only numeric types are supported.

func McvPrefixGt

func McvPrefixGt(statistic sql.Statistic, i int, val interface{}) (sql.Statistic, error)

func McvPrefixGte

func McvPrefixGte(statistic sql.Statistic, i int, val interface{}) (sql.Statistic, error)

func McvPrefixIsNotNull

func McvPrefixIsNotNull(statistic sql.Statistic, i int, val interface{}) (sql.Statistic, error)

func McvPrefixIsNull

func McvPrefixIsNull(statistic sql.Statistic, i int, val interface{}) (sql.Statistic, error)

func McvPrefixLt

func McvPrefixLt(statistic sql.Statistic, i int, val interface{}) (sql.Statistic, error)

func McvPrefixLte

func McvPrefixLte(statistic sql.Statistic, i int, val interface{}) (sql.Statistic, error)

func NewExpDistIter

func NewExpDistIter(colCnt, rowCnt int, lambda float64) sql.RowIter

func NewNormDistIter

func NewNormDistIter(colCnt, rowCnt int, mean, std float64) sql.RowIter

func NewStatsIter added in v0.18.1

func NewStatsIter(ctx *sql.Context, dStats ...sql.Statistic) (*statsIter, error)

func ParseRow added in v0.18.1

func ParseRow(rowStr string, types []sql.Type) (sql.Row, error)

func ParseTypeStrings

func ParseTypeStrings(typs []string) ([]sql.Type, error)

func PrefixGt

func PrefixGt(buckets []sql.HistogramBucket, types []sql.Type, val interface{}) ([]sql.HistogramBucket, error)

func PrefixGtHist

func PrefixGtHist(h []sql.HistogramBucket, target sql.Row, cmp func(sql.Row, sql.Row) (int, error)) (int, error)

func PrefixGte

func PrefixGte(buckets []sql.HistogramBucket, types []sql.Type, val interface{}) ([]sql.HistogramBucket, error)

func PrefixGteHist

func PrefixGteHist(h []sql.HistogramBucket, target sql.Row, cmp func(sql.Row, sql.Row) (int, error)) (int, error)

func PrefixIsNotNull

func PrefixIsNotNull(buckets []sql.HistogramBucket) ([]sql.HistogramBucket, error)

func PrefixIsNull

func PrefixIsNull(buckets []sql.HistogramBucket) ([]sql.HistogramBucket, error)

func PrefixKey

func PrefixKey(buckets []sql.HistogramBucket, idxCols sql.ColSet, types []sql.Type, oldFds *sql.FuncDepSet, key []interface{}, nullable []bool) ([]sql.HistogramBucket, *sql.FuncDepSet, error)

func PrefixLt

func PrefixLt(buckets []sql.HistogramBucket, types []sql.Type, val interface{}) ([]sql.HistogramBucket, error)

func PrefixLtHist

func PrefixLtHist(h []sql.HistogramBucket, target sql.Row, cmp func(sql.Row, sql.Row) (int, error)) (int, error)

func PrefixLte

func PrefixLte(buckets []sql.HistogramBucket, types []sql.Type, val interface{}) ([]sql.HistogramBucket, error)

func PrefixLteHist

func PrefixLteHist(h []sql.HistogramBucket, target sql.Row, cmp func(sql.Row, sql.Row) (int, error)) (int, error)

func StringifyKey added in v0.18.1

func StringifyKey(r sql.Row, types []sql.Type) string

func Union

func Union(b1, b2 []sql.HistogramBucket, types []sql.Type) ([]sql.HistogramBucket, error)

func UpdateCounts

func UpdateCounts(statistic sql.Statistic) sql.Statistic

Types

type Bucket

type Bucket struct {
	RowCnt      uint64    `json:"row_count"`
	DistinctCnt uint64    `json:"distinct_count"`
	NullCnt     uint64    `json:"null_count"`
	McvsCnt     []uint64  `json:"mcv_counts"`
	BoundCnt    uint64    `json:"bound_count"`
	BoundVal    sql.Row   `json:"upper_bound"`
	McvVals     []sql.Row `json:"mcvs"`
}

func NewHistogramBucket

func NewHistogramBucket(rowCount, distinctCount, nullCount, boundCount uint64, boundValue sql.Row, mcvCounts []uint64, mcvs []sql.Row) *Bucket

func (Bucket) BoundCount

func (b Bucket) BoundCount() uint64

func (Bucket) DistinctCount

func (b Bucket) DistinctCount() uint64

func (Bucket) McvCounts

func (b Bucket) McvCounts() []uint64

func (Bucket) Mcvs

func (b Bucket) Mcvs() []sql.Row

func (Bucket) NullCount

func (b Bucket) NullCount() uint64

func (Bucket) RowCount

func (b Bucket) RowCount() uint64

func (Bucket) UpperBound

func (b Bucket) UpperBound() sql.Row

type HeapRow

type HeapRow struct {
	Row   sql.Row
	Count int
}

func NewHeapRow

func NewHeapRow(r sql.Row, cnt int) HeapRow

type SqlHeap

type SqlHeap struct {
	// contains filtered or unexported fields
}

An SqlHeap is a min-heap of ints.

func NewSqlHeap

func NewSqlHeap(k int) *SqlHeap

func (SqlHeap) Array

func (h SqlHeap) Array() []sql.Row

func (SqlHeap) Counts

func (h SqlHeap) Counts() []uint64

func (SqlHeap) Len

func (h SqlHeap) Len() int

func (SqlHeap) Less

func (h SqlHeap) Less(i, j int) bool

func (*SqlHeap) Pop

func (h *SqlHeap) Pop() any

func (*SqlHeap) Push

func (h *SqlHeap) Push(x any)

func (SqlHeap) Swap

func (h SqlHeap) Swap(i, j int)

type Statistic

type Statistic struct {
	RowCnt      uint64            `json:"row_count"`
	DistinctCnt uint64            `json:"distinct_count"`
	NullCnt     uint64            `json:"null_count"`
	AvgRowSize  uint64            `json:"avg_size"`
	Created     time.Time         `json:"created_at"`
	Qual        sql.StatQualifier `json:"qualifier"`
	Cols        []string          `json:"columns"`
	Typs        []sql.Type        `json:"-"`
	Hist        sql.Histogram     `json:"buckets"`
	IdxClass    uint8             `json:"index_class"`
	LowerBnd    sql.Row           `json:"lower_bound"`
	Fds         *sql.FuncDepSet   `json:"-"`
	Colset      sql.ColSet        `json:"-"`
}

func NewStatistic

func NewStatistic(rowCount, distinctCount, nullCount, avgSize uint64, createdAt time.Time, qualifier sql.StatQualifier, columns []string, types []sql.Type, histogram []sql.HistogramBucket, class sql.IndexClass, lowerBound sql.Row) *Statistic

func (*Statistic) AvgSize

func (s *Statistic) AvgSize() uint64

func (*Statistic) ColSet

func (s *Statistic) ColSet() sql.ColSet

func (*Statistic) Columns

func (s *Statistic) Columns() []string

func (*Statistic) CreatedAt

func (s *Statistic) CreatedAt() time.Time

func (*Statistic) DistinctCount

func (s *Statistic) DistinctCount() uint64

func (*Statistic) FuncDeps

func (s *Statistic) FuncDeps() *sql.FuncDepSet

func (*Statistic) Histogram

func (s *Statistic) Histogram() sql.Histogram

func (*Statistic) IndexClass

func (s *Statistic) IndexClass() sql.IndexClass

func (*Statistic) LowerBound

func (s *Statistic) LowerBound() sql.Row

func (*Statistic) NullCount

func (s *Statistic) NullCount() uint64

func (*Statistic) Qualifier

func (s *Statistic) Qualifier() sql.StatQualifier

func (*Statistic) RowCount

func (s *Statistic) RowCount() uint64

func (*Statistic) SetColumns

func (s *Statistic) SetColumns(c []string)

func (*Statistic) SetQualifier

func (s *Statistic) SetQualifier(q sql.StatQualifier)

func (*Statistic) SetTypes

func (s *Statistic) SetTypes(t []sql.Type)

func (*Statistic) ToInterface

func (s *Statistic) ToInterface() interface{}

func (*Statistic) Types

func (s *Statistic) Types() []sql.Type

func (*Statistic) WithAvgSize

func (s *Statistic) WithAvgSize(i uint64) sql.Statistic

func (*Statistic) WithColSet

func (s *Statistic) WithColSet(cols sql.ColSet) sql.Statistic

func (*Statistic) WithDistinctCount

func (s *Statistic) WithDistinctCount(i uint64) sql.Statistic

func (*Statistic) WithFuncDeps

func (s *Statistic) WithFuncDeps(fds *sql.FuncDepSet) sql.Statistic

func (*Statistic) WithHistogram

func (s *Statistic) WithHistogram(h sql.Histogram) (sql.Statistic, error)

func (*Statistic) WithLowerBound

func (s *Statistic) WithLowerBound(r sql.Row) sql.Statistic

func (*Statistic) WithNullCount

func (s *Statistic) WithNullCount(i uint64) sql.Statistic

func (*Statistic) WithRowCount

func (s *Statistic) WithRowCount(i uint64) sql.Statistic

type StatisticJSON added in v0.18.1

type StatisticJSON struct {
	RowCnt      uint64            `json:"row_count"`
	DistinctCnt uint64            `json:"distinct_count"`
	NullCnt     uint64            `json:"null_count"`
	AvgRowSize  uint64            `json:"avg_size"`
	Created     time.Time         `json:"created_at"`
	Qual        sql.StatQualifier `json:"qualifier"`
	Cols        []string          `json:"columns"`
	Typs        []sql.Type        `json:"-"`
	Hist        []*Bucket         `json:"buckets"`
	IdxClass    uint8             `json:"index_class"`
	LowerBnd    sql.Row           `json:"lower_bound"`
	Fds         *sql.FuncDepSet   `json:"-"`
	Colset      sql.ColSet        `json:"-"`
}

StatisticJSON is used as an intermediary to deserialize the memory stats object. Otherwise, the histogram would have to be deserialized separately.

func (*StatisticJSON) ToStatistic added in v0.18.1

func (j *StatisticJSON) ToStatistic() *Statistic

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL