statistics

package

v7.0.4 Latest Latest Go to latest Published: Jan 30, 2023 License: Apache-2.0 Imports: 54 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/lemon-mint/tidb

Documentation ¶

Index ¶

Constants
Variables
func BuildHistAndTopN(ctx sessionctx.Context, numBuckets, numTopN int, id int64, ...) (*Histogram, *TopN, error)
func CETraceExpr(sctx sessionctx.Context, tableID int64, tp string, expr expression.Expression, ...)
func CETraceRange(sctx sessionctx.Context, tableID int64, colNames []string, ...)
func CMSketchAndTopNFromProto(protoSketch *tipb.CMSketch) (*CMSketch, *TopN)
func CMSketchToProto(c *CMSketch, topn *TopN) *tipb.CMSketch
func CheckAnalyzeVerOnTable(tbl *Table, version *int) bool
func CollectFeedback(sc *stmtctx.StatementContext, q *QueryFeedback, numOfRanges int) bool
func ConvertDatumsType(vals []types.Datum, ft *types.FieldType, loc *time.Location) error
func DecodeCMSketchAndTopN(data []byte, topNRows []chunk.Row) (*CMSketch, *TopN, error)
func DecodeFeedback(val []byte, q *QueryFeedback, c *CMSketch, t *TopN, ft *types.FieldType) error
func DropEvicted(item TableCacheItem)
func EncodeCMSketchWithoutTopN(c *CMSketch) ([]byte, error)
func EncodeFMSketch(c *FMSketch) ([]byte, error)
func EncodeFeedback(q *QueryFeedback) ([]byte, error)
func ExprToString(e expression.Expression) (string, error)
func FMSketchToProto(s *FMSketch) *tipb.FMSketch
func FindPrefixOfIndexByCol(cols []*expression.Column, idxColIDs []int64, cachedPath *planutil.AccessPath) []*expression.Column
func GetIndexPrefixLens(data []byte, numCols int) (prefixLens []int, err error)
func GetMergedTopNFromSortedSlice(sorted []TopNMeta, n uint32) (*TopN, []TopNMeta)
func GetOrdinalOfRangeCond(sc *stmtctx.StatementContext, ran *ranger.Range) int
func GetPseudoRowCountByColumnRanges(sc *stmtctx.StatementContext, tableRowCount float64, ...) (float64, error)
func HistogramEqual(a, b *Histogram, ignoreID bool) bool
func HistogramToProto(hg *Histogram) *tipb.Histogram
func IsAnalyzed(flag int64) bool
func MergePartTopN2GlobalTopN(loc *time.Location, version int, topNs []*TopN, n uint32, hists []*Histogram, ...) (*TopN, []TopNMeta, []*Histogram, error)
func MergeTopN(topNs []*TopN, n uint32) (*TopN, []TopNMeta)
func NewCMSketchAndTopN(d, w int32, sample [][]byte, numTop uint32, rowCount uint64) (*CMSketch, *TopN, uint64, uint64)
func NewTopnStatsMergeWorker(taskCh <-chan *TopnStatsMergeTask, respCh chan<- *TopnStatsMergeResponse, ...) *topnStatsMergeWorker
func ResetAnalyzeFlag(flag int64) int64
func RowSamplesToProto(samples WeightedRowSampleHeap) []*tipb.RowSample
func RowToDatums(row chunk.Row, fields []*ast.ResultField) []types.Datum
func SampleCollectorToProto(c *SampleCollector) *tipb.SampleCollector
func SplitFeedbackByQueryType(feedbacks []Feedback) ([]Feedback, []Feedback)
func SupportColumnType(ft *types.FieldType) bool
func UpdateCMSketchAndTopN(c *CMSketch, t *TopN, eqFeedbacks []Feedback) (*CMSketch, *TopN)
func ValueToString(vars *variable.SessionVars, value *types.Datum, idxCols int, ...) (string, error)
type AnalyzeJob
type AnalyzeProgress
- func (p *AnalyzeProgress) GetDeltaCount() int64
- func (p *AnalyzeProgress) GetLastDumpTime() time.Time
- func (p *AnalyzeProgress) SetLastDumpTime(t time.Time)
- func (p *AnalyzeProgress) Update(rowCount int64) (dumpCount int64)
type AnalyzeResult
type AnalyzeResults
type AnalyzeTableID
- func (h *AnalyzeTableID) Equals(t *AnalyzeTableID) bool
- func (h *AnalyzeTableID) GetStatisticsID() int64
- func (h *AnalyzeTableID) IsPartitionTable() bool
- func (h *AnalyzeTableID) String() string
type BernoulliRowSampleCollector
- func NewBernoulliRowSampleCollector(sampleRate float64, totalLen int) *BernoulliRowSampleCollector
- func (s *BernoulliRowSampleCollector) Base() *baseCollector
- func (s BernoulliRowSampleCollector) FromProto(pbCollector *tipb.RowSampleCollector, memTracker *memory.Tracker)
- func (s *BernoulliRowSampleCollector) MergeCollector(subCollector RowSampleCollector)
- func (s BernoulliRowSampleCollector) ToProto() *tipb.RowSampleCollector
type Bucket
type BucketFeedback
type CMSketch
- func NewCMSketch(d, w int32) *CMSketch
- func (c *CMSketch) CalcDefaultValForAnalyze(NDV uint64)
- func (c *CMSketch) Copy() *CMSketch
- func (c *CMSketch) Equal(rc *CMSketch) bool
- func (c *CMSketch) GetWidthAndDepth() (int32, int32)
- func (c *CMSketch) InsertBytes(bytes []byte)
- func (c *CMSketch) InsertBytesByCount(bytes []byte, count uint64)
- func (c *CMSketch) MemoryUsage() (sum int64)
- func (c *CMSketch) MergeCMSketch(rc *CMSketch) error
- func (c *CMSketch) MergeCMSketch4IncrementalAnalyze(rc *CMSketch, numTopN uint32) error
- func (c *CMSketch) QueryBytes(d []byte) uint64
- func (c *CMSketch) SubValue(h1, h2 uint64, count uint64)
- func (c *CMSketch) TotalCount() uint64
type CacheItemMemoryUsage
type Column
- func (c *Column) AvgColSize(count int64, isKey bool) float64
- func (c *Column) AvgColSizeChunkFormat(count int64) float64
- func (c *Column) AvgColSizeListInDisk(count int64) float64
- func (c *Column) BetweenRowCount(sctx sessionctx.Context, l, r types.Datum, lowEncoded, highEncoded []byte) float64
- func (c *Column) DropEvicted()
- func (c *Column) GetColumnRowCount(sctx sessionctx.Context, ranges []*ranger.Range, ...) (float64, error)
- func (c *Column) GetIncreaseFactor(realtimeRowCount int64) float64
- func (c *Column) IsAllEvicted() bool
- func (c *Column) IsInvalid(sctx sessionctx.Context, collPseudo bool) bool
- func (c *Column) ItemID() int64
- func (c *Column) MemoryUsage() CacheItemMemoryUsage
- func (c *Column) String() string
- func (c *Column) TotalRowCount() float64
type ColumnMemUsage
- func (c *ColumnMemUsage) CMSMemUsage() int64
- func (c *ColumnMemUsage) HistMemUsage() int64
- func (c *ColumnMemUsage) ItemID() int64
- func (c *ColumnMemUsage) TopnMemUsage() int64
- func (c *ColumnMemUsage) TotalMemoryUsage() int64
- func (c *ColumnMemUsage) TrackingMemUsage() int64
type ErrorRate
- func (e *ErrorRate) Merge(rate *ErrorRate)
- func (e *ErrorRate) NotAccurate() bool
- func (e *ErrorRate) Update(rate float64)
type ExtendedStatsColl
- func NewExtendedStatsColl() *ExtendedStatsColl
type ExtendedStatsItem
type FMSketch
- func DecodeFMSketch(data []byte) (*FMSketch, error)
- func FMSketchFromProto(protoSketch *tipb.FMSketch) *FMSketch
- func NewFMSketch(maxSize int) *FMSketch
- func (s *FMSketch) Copy() *FMSketch
- func (s *FMSketch) InsertRowValue(sc *stmtctx.StatementContext, values []types.Datum) error
- func (s *FMSketch) InsertValue(sc *stmtctx.StatementContext, value types.Datum) error
- func (s *FMSketch) MemoryUsage() (sum int64)
- func (s *FMSketch) MergeFMSketch(rs *FMSketch)
- func (s *FMSketch) NDV() int64
type Feedback
- func CleanRangeFeedbackByTopN(feedbacks []Feedback, topN *TopN) []Feedback
- func NonOverlappedFeedbacks(sc *stmtctx.StatementContext, fbs []Feedback) ([]Feedback, bool)
type HistColl
- func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) *HistColl
- func (coll *HistColl) GetAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isEncodedKey bool, ...) (size float64)
- func (coll *HistColl) GetAvgRowSizeListInDisk(cols []*expression.Column) (size float64)
- func (coll *HistColl) GetIndexAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isUnique bool) (size float64)
- func (coll *HistColl) GetRowCountByColumnRanges(sctx sessionctx.Context, colID int64, colRanges []*ranger.Range) (float64, error)
- func (coll *HistColl) GetRowCountByIndexRanges(sctx sessionctx.Context, idxID int64, indexRanges []*ranger.Range) (float64, error)
- func (coll *HistColl) GetRowCountByIntColumnRanges(sctx sessionctx.Context, colID int64, intRanges []*ranger.Range) (result float64, err error)
- func (coll *HistColl) GetSelectivityByFilter(sctx sessionctx.Context, defaultSelectivity float64, ...) (ok bool, selectivity float64, err error)
- func (coll *HistColl) GetTableAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, storeType kv.StoreType, ...) (size float64)
- func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl
- func (coll *HistColl) NewHistCollBySelectivity(sctx sessionctx.Context, statsNodes []*StatsNode) *HistColl
- func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression, ...) (float64, []*StatsNode, error)
type Histogram
- func BuildColumn(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, ...) (*Histogram, error)
- func BuildColumnHist(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, ...) (*Histogram, error)
- func HistogramFromProto(protoHg *tipb.Histogram) *Histogram
- func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int, ...) (*Histogram, error)
- func MergePartitionHist2GlobalHist(sc *stmtctx.StatementContext, hists []*Histogram, popedTopN []TopNMeta, ...) (*Histogram, error)
- func NewHistogram(id, ndv, nullCount int64, version uint64, tp *types.FieldType, bucketSize int, ...) *Histogram
- func UpdateHistogram(h *Histogram, feedback *QueryFeedback, statsVer int) *Histogram
- func UpdateHistogramWithBucketCount(h *Histogram, feedback *QueryFeedback, statsVer int, bucketCount int) *Histogram
- func (hg *Histogram) AddIdxVals(idxValCntPairs []TopNMeta)
- func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64)
- func (hg *Histogram) AppendBucketWithNDV(lower *types.Datum, upper *types.Datum, count, repeat, ndv int64)
- func (hg *Histogram) AvgCountPerNotNullValue(totalCount int64) float64
- func (hg *Histogram) BetweenRowCount(a, b types.Datum) float64
- func (hg *Histogram) BucketToString(bktID, idxCols int) string
- func (hg *Histogram) ConvertTo(sc *stmtctx.StatementContext, tp *types.FieldType) (*Histogram, error)
- func (hg *Histogram) Copy() *Histogram
- func (hg *Histogram) DecodeTo(tp *types.FieldType, timeZone *time.Location) error
- func (hg *Histogram) ExtractTopN(cms *CMSketch, topN *TopN, numCols int, numTopN uint32) error
- func (hg *Histogram) GetIncreaseFactor(totalCount int64) float64
- func (hg *Histogram) GetLower(idx int) *types.Datum
- func (hg *Histogram) GetUpper(idx int) *types.Datum
- func (hg *Histogram) IsIndexHist() bool
- func (hg *Histogram) Len() int
- func (hg *Histogram) LessRowCountWithBktIdx(value types.Datum) (float64, int)
- func (hg *Histogram) MemoryUsage() (sum int64)
- func (hg *Histogram) PreCalculateScalar()
- func (hg *Histogram) RemoveUpperBound() *Histogram
- func (hg *Histogram) RemoveVals(valCntPairs []TopNMeta)
- func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*ranger.Range, encoded bool) ([]*ranger.Range, bool)
- func (hg *Histogram) ToString(idxCols int) string
- func (hg *Histogram) TotalRowCount() float64
- func (hg *Histogram) TruncateHistogram(numBkt int) *Histogram
type Index
- func (idx *Index) BetweenRowCount(l, r types.Datum) float64
- func (idx *Index) EvictAllStats()
- func (idx *Index) GetIncreaseFactor(realtimeRowCount int64) float64
- func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRanges []*ranger.Range, ...) (float64, error)
- func (idx *Index) IsAllEvicted() bool
- func (idx *Index) IsEvicted() bool
- func (idx *Index) IsInvalid(collPseudo bool) bool
- func (idx *Index) ItemID() int64
- func (idx *Index) MemoryUsage() CacheItemMemoryUsage
- func (idx *Index) QueryBytes(d []byte) uint64
- func (idx *Index) String() string
- func (idx *Index) TotalRowCount() float64
type IndexMemUsage
- func (c *IndexMemUsage) CMSMemUsage() int64
- func (c *IndexMemUsage) HistMemUsage() int64
- func (c *IndexMemUsage) ItemID() int64
- func (c *IndexMemUsage) TopnMemUsage() int64
- func (c *IndexMemUsage) TotalMemoryUsage() int64
- func (c *IndexMemUsage) TrackingMemUsage() int64
type QueryFeedback
- func NewQueryFeedback(physicalID int64, hist *Histogram, expected int64, desc bool) *QueryFeedback
- func (q *QueryFeedback) Actual() int64
- func (q *QueryFeedback) CalcErrorRate() float64
- func (q *QueryFeedback) DecodeIntValues() *QueryFeedback
- func (q *QueryFeedback) DecodeToRanges(isIndex bool) ([]*ranger.Range, error)
- func (q *QueryFeedback) Invalidate()
- func (q *QueryFeedback) StoreRanges(ranges []*ranger.Range)
- func (q *QueryFeedback) Update(startKey kv.Key, counts, ndvs []int64)
type QueryFeedbackKey
type QueryFeedbackMap
- func NewQueryFeedbackMap() *QueryFeedbackMap
- func (m *QueryFeedbackMap) Append(q *QueryFeedback)
- func (m *QueryFeedbackMap) Merge(r *QueryFeedbackMap)
- func (m *QueryFeedbackMap) SiftFeedbacks()
type ReservoirRowSampleCollector
- func NewReservoirRowSampleCollector(maxSampleSize int, totalLen int) *ReservoirRowSampleCollector
- func (s *ReservoirRowSampleCollector) Base() *baseCollector
- func (s ReservoirRowSampleCollector) FromProto(pbCollector *tipb.RowSampleCollector, memTracker *memory.Tracker)
- func (s *ReservoirRowSampleCollector) MergeCollector(subCollector RowSampleCollector)
- func (s ReservoirRowSampleCollector) ToProto() *tipb.RowSampleCollector
type ReservoirRowSampleItem
- func (i ReservoirRowSampleItem) MemUsage() (sum int64)
type RowSampleBuilder
- func (s *RowSampleBuilder) Collect() (RowSampleCollector, error)
type RowSampleCollector
- func NewRowSampleCollector(maxSampleSize int, sampleRate float64, totalLen int) RowSampleCollector
type SampleBuilder
- func (s SampleBuilder) CollectColumnStats() ([]*SampleCollector, *SortedBuilder, error)
type SampleCollector
- func SampleCollectorFromProto(collector *tipb.SampleCollector) *SampleCollector
- func (c *SampleCollector) CalcTotalSize()
- func (c *SampleCollector) ExtractTopN(numTop uint32, sc *stmtctx.StatementContext, tp *types.FieldType, ...) error
- func (c *SampleCollector) MergeSampleCollector(sc *stmtctx.StatementContext, rc *SampleCollector)
type SampleItem
- func CopySampleItems(items []*SampleItem) []*SampleItem
- func SortSampleItems(sc *stmtctx.StatementContext, items []*SampleItem) ([]*SampleItem, error)
type SortedBuilder
- func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *types.FieldType, ...) *SortedBuilder
- func (b *SortedBuilder) Hist() *Histogram
- func (b *SortedBuilder) Iterate(data types.Datum) error
type StatsLoadedStatus
- func NewStatsAllEvictedStatus() StatsLoadedStatus
- func NewStatsFullLoadStatus() StatsLoadedStatus
- func (s StatsLoadedStatus) IsAllEvicted() bool
- func (s StatsLoadedStatus) IsCMSEvicted() bool
- func (s StatsLoadedStatus) IsEssentialStatsLoaded() bool
- func (s StatsLoadedStatus) IsFullLoad() bool
- func (s StatsLoadedStatus) IsLoadNeeded() bool
- func (s StatsLoadedStatus) IsStatsInitialized() bool
- func (s StatsLoadedStatus) IsTopNEvicted() bool
- func (s StatsLoadedStatus) StatusToString() string
type StatsNode
- func GetUsableSetsByGreedy(nodes []*StatsNode) (newBlocks []*StatsNode)
- func MockStatsNode(id int64, m int64, num int) *StatsNode
type StatsReader
- func GetStatsReader(snapshot uint64, exec sqlexec.RestrictedSQLExecutor) (reader *StatsReader, err error)
- func (sr *StatsReader) Close() error
- func (sr *StatsReader) IsHistory() bool
- func (sr *StatsReader) Read(sql string, args ...interface{}) (rows []chunk.Row, fields []*ast.ResultField, err error)
type StatsWrapper
- func NewStatsWrapper(hg []*Histogram, topN []*TopN) *StatsWrapper
type Table
- func PseudoTable(tblInfo *model.TableInfo) *Table
- func (t *Table) ColumnBetweenRowCount(sctx sessionctx.Context, a, b types.Datum, colID int64) (float64, error)
- func (t *Table) ColumnByName(colName string) *Column
- func (t *Table) ColumnEqualRowCount(sctx sessionctx.Context, value types.Datum, colID int64) (float64, error)
- func (t *Table) ColumnGreaterRowCount(sctx sessionctx.Context, value types.Datum, colID int64) float64
- func (t *Table) ColumnLessRowCount(sctx sessionctx.Context, value types.Datum, colID int64) float64
- func (t *Table) Copy() *Table
- func (t *Table) GetColRowCount() float64
- func (t *Table) GetStatsHealthy() (int64, bool)
- func (t *Table) GetStatsInfo(ID int64, isIndex bool) (int64, *Histogram, *CMSketch, *TopN, *FMSketch, bool)
- func (t *Table) IndexStartWithColumn(colName string) *Index
- func (t *Table) IsInitialized() bool
- func (t *Table) IsOutdated() bool
- func (t *Table) MemoryUsage() *TableMemoryUsage
- func (t *Table) PseudoAvgCountPerValue() float64
- func (t *Table) String() string
type TableCacheItem
type TableMemoryUsage
- func (t *TableMemoryUsage) TotalColTrackingMemUsage() (sum int64)
- func (t *TableMemoryUsage) TotalIdxTrackingMemUsage() (sum int64)
- func (t *TableMemoryUsage) TotalTrackingMemUsage() int64
type TopN
- func NewTopN(n int) *TopN
- func TopNFromProto(protoTopN []*tipb.CMSketchTopN) *TopN
- func (c *TopN) AppendTopN(data []byte, count uint64)
- func (c *TopN) BetweenCount(l, r []byte) uint64
- func (c *TopN) Copy() *TopN
- func (c *TopN) DecodedString(ctx sessionctx.Context, colTypes []byte) (string, error)
- func (c *TopN) Equal(cc *TopN) bool
- func (c *TopN) LowerBound(d []byte) (idx int, match bool)
- func (c *TopN) MemoryUsage() (sum int64)
- func (c *TopN) Num() int
- func (c *TopN) QueryTopN(d []byte) (uint64, bool)
- func (c *TopN) RemoveVal(val []byte)
- func (c *TopN) Sort()
- func (c *TopN) String() string
- func (c *TopN) TotalCount() uint64
type TopNMeta
- func MergeTopNAndUpdateCMSketch(dst, src *TopN, c *CMSketch, numTop uint32) []TopNMeta
- func SortTopnMeta(topnMetas []TopNMeta) []TopNMeta
type TopnStatsMergeResponse
type TopnStatsMergeTask
- func NewTopnStatsMergeTask(start, end int) *TopnStatsMergeTask
type WeightedRowSampleHeap
- func (h WeightedRowSampleHeap) Len() int
- func (h WeightedRowSampleHeap) Less(i, j int) bool
- func (h *WeightedRowSampleHeap) Pop() interface{}
- func (h *WeightedRowSampleHeap) Push(i interface{})
- func (h WeightedRowSampleHeap) Swap(i, j int)

Constants ¶

View Source

const (
	// AnalyzePending means the analyze job is pending
	AnalyzePending = "pending"
	// AnalyzeRunning means the analyze job is running
	AnalyzeRunning = "running"
	// AnalyzeFinished means the analyze job has finished
	AnalyzeFinished = "finished"
	// AnalyzeFailed means the analyze job has failed
	AnalyzeFailed = "failed"
)

View Source

const (
	// Version0 is the state that no statistics is actually collected, only the meta info.(the total count and the average col size)
	Version0 = 0
	// Version1 maintains the statistics in the following way.
	// Column stats: CM Sketch is built in TiKV using full data. Histogram is built from samples. TopN is extracted from CM Sketch.
	//    TopN + CM Sketch represent all data. Histogram also represents all data.
	// Index stats: CM Sketch and Histogram is built in TiKV using full data. TopN is extracted from histogram. Then values covered by TopN is removed from CM Sketch.
	//    TopN + CM Sketch represent all data. Histogram also represents all data.
	// Int PK column stats is always Version1 because it only has histogram built from full data.
	// Fast analyze is always Version1 currently.
	Version1 = 1
	// Version2 maintains the statistics in the following way.
	// Column stats: CM Sketch is not used. TopN and Histogram are built from samples. TopN + Histogram represent all data.
	// Index stats: CM SKetch is not used. TopN and Histograms are built from samples.
	//    Then values covered by TopN is removed from Histogram. TopN + Histogram represent all data.
	// Both Column and Index's NDVs are collected by full scan.
	Version2 = 2
)

constants for stats version. These const can be used for solving compatibility issue.

View Source

const (
	IndexType = iota
	PkType
	ColType
)

The type of the StatsNode.

View Source

const (
	// PseudoVersion means the pseudo statistics version is 0.
	PseudoVersion uint64 = 0

	// PseudoRowCount export for other pkg to use.
	// When we haven't analyzed a table, we use pseudo statistics to estimate costs.
	// It has row count 10000, equal condition selects 1/1000 of total rows, less condition selects 1/3 of total rows,
	// between condition selects 1/40 of total rows.
	PseudoRowCount = 10000
)

View Source

const AnalyzeFlag = 1

AnalyzeFlag is set when the statistics comes from analyze and has not been modified by feedback.

View Source

const EmptyBucketSize = int64(unsafe.Sizeof(Bucket{}))

EmptyBucketSize is the size of empty bucket, 3*8=24 now.

View Source

const EmptyHistogramSize = int64(unsafe.Sizeof(Histogram{}))

EmptyHistogramSize is the size of empty histogram, about 112 = 8*6 for int64 & float64, 24*2 for arrays, 8*2 for references.

View Source

const EmptyReservoirSampleItemSize = int64(unsafe.Sizeof(ReservoirRowSampleItem{}))

EmptyReservoirSampleItemSize = (24 + 16 + 8) now.

View Source

const EmptySampleItemSize = int64(unsafe.Sizeof(SampleItem{}))

EmptySampleItemSize is the size of empty SampleItem, 96 = 72 (datum) + 8 (int) + 16.

View Source

const EmptyScalarSize = int64(unsafe.Sizeof(scalar{}))

EmptyScalarSize is the size of empty scalar.

View Source

const MaxErrorRate = 0.25

MaxErrorRate is the max error rate of estimate row count of a not pseudo column. If the table is pseudo, but the average error rate is less than MaxErrorRate, then the column is not pseudo.

View Source

const MaxSampleValueLength = mysql.MaxFieldVarCharLength / 2

MaxSampleValueLength defines the max length of the useful samples. If one sample value exceeds the max length, we drop it before building the stats.

Variables ¶

View Source

var (
	// MaxNumberOfRanges is the max number of ranges before split to collect feedback.
	MaxNumberOfRanges = 20
	// FeedbackProbability is the probability to collect the feedback.
	FeedbackProbability = atomic.NewFloat64(0)
)

View Source

var (
	// ErrQueryInterrupted indicates interrupted
	ErrQueryInterrupted = dbterror.ClassExecutor.NewStd(mysql.ErrQueryInterrupted)
)

View Source

var HistogramNeededItems = neededStatsMap{/* contains filtered or unexported fields */}

HistogramNeededItems stores the columns/indices whose Histograms need to be loaded from physical kv layer. Currently, we only load index/pk's Histogram from kv automatically. Columns' are loaded by needs.

View Source

var MaxQueryFeedbackCount = atomic.NewInt64(1 << 9)

MaxQueryFeedbackCount is the max number of feedbacks that are cached in memory.

View Source

var RatioOfPseudoEstimate = atomic.NewFloat64(0.7)

RatioOfPseudoEstimate means if modifyCount / statsTblCount is greater than this ratio, we think the stats is invalid and use pseudo estimation.

Functions ¶

func BuildHistAndTopN ¶

func BuildHistAndTopN(
	ctx sessionctx.Context,
	numBuckets, numTopN int,
	id int64,
	collector *SampleCollector,
	tp *types.FieldType,
	isColumn bool,
	memTracker *memory.Tracker,
) (*Histogram, *TopN, error)

BuildHistAndTopN build a histogram and TopN for a column or an index from samples.

func CETraceExpr ¶

func CETraceExpr(sctx sessionctx.Context, tableID int64, tp string, expr expression.Expression, rowCount float64)

CETraceExpr appends an expression and related information into CE trace

func CETraceRange ¶

func CETraceRange(sctx sessionctx.Context, tableID int64, colNames []string, ranges []*ranger.Range, tp string, rowCount uint64)

CETraceRange appends a list of ranges and related information into CE trace

func CMSketchAndTopNFromProto ¶

func CMSketchAndTopNFromProto(protoSketch *tipb.CMSketch) (*CMSketch, *TopN)

CMSketchAndTopNFromProto converts CMSketch and TopN from its protobuf representation.

func CMSketchToProto ¶

func CMSketchToProto(c *CMSketch, topn *TopN) *tipb.CMSketch

CMSketchToProto converts CMSketch to its protobuf representation.

func CheckAnalyzeVerOnTable ¶

func CheckAnalyzeVerOnTable(tbl *Table, version *int) bool

CheckAnalyzeVerOnTable checks whether the given version is the one from the tbl. If not, it will return false and set the version to the tbl's. We use this check to make sure all the statistics of the table are in the same version.

func CollectFeedback ¶

func CollectFeedback(sc *stmtctx.StatementContext, q *QueryFeedback, numOfRanges int) bool

CollectFeedback decides whether to collect the feedback. It returns false when: 1: the feedback is not generated by select query; 2: the histogram is nil or has no buckets; 3: the number of scan ranges exceeds the limit because it may affect the performance; 4: it does not pass the probabilistic sampler.

func ConvertDatumsType ¶

func ConvertDatumsType(vals []types.Datum, ft *types.FieldType, loc *time.Location) error

ConvertDatumsType converts the datums type to `ft`.

func DecodeCMSketchAndTopN ¶

func DecodeCMSketchAndTopN(data []byte, topNRows []chunk.Row) (*CMSketch, *TopN, error)

DecodeCMSketchAndTopN decode a CMSketch from the given byte slice.

func DecodeFeedback ¶

func DecodeFeedback(val []byte, q *QueryFeedback, c *CMSketch, t *TopN, ft *types.FieldType) error

DecodeFeedback decodes a byte slice to feedback.

func DropEvicted ¶

func DropEvicted(item TableCacheItem)

DropEvicted drop stats for table column/index

func EncodeCMSketchWithoutTopN ¶

func EncodeCMSketchWithoutTopN(c *CMSketch) ([]byte, error)

EncodeCMSketchWithoutTopN encodes the given CMSketch to byte slice. Note that it does not include the topN.

func EncodeFMSketch ¶

func EncodeFMSketch(c *FMSketch) ([]byte, error)

EncodeFMSketch encodes the given FMSketch to byte slice.

func EncodeFeedback ¶

func EncodeFeedback(q *QueryFeedback) ([]byte, error)

EncodeFeedback encodes the given feedback to byte slice.

func ExprToString ¶

func ExprToString(e expression.Expression) (string, error)

ExprToString prints an Expression into a string which can appear in a SQL.

It might be too tricky because it makes use of TiDB allowing using internal function name in SQL. For example, you can write `eq`(a, 1), which is the same as a = 1. We should have implemented this by first implementing a method to turn an expression to an AST

then call astNode.Restore(), like the Constant case here. But for convenience, we use this trick for now.

It may be more appropriate to put this in expression package. But currently we only use it for CE trace,

and it may not be general enough to handle all possible expressions. So we put it here for now.

func FMSketchToProto ¶

func FMSketchToProto(s *FMSketch) *tipb.FMSketch

FMSketchToProto converts FMSketch to its protobuf representation.

func FindPrefixOfIndexByCol ¶

func FindPrefixOfIndexByCol(cols []*expression.Column, idxColIDs []int64, cachedPath *planutil.AccessPath) []*expression.Column

FindPrefixOfIndexByCol will find columns in index by checking the unique id or the virtual expression. So it will return at once no matching column is found.

func GetIndexPrefixLens ¶

func GetIndexPrefixLens(data []byte, numCols int) (prefixLens []int, err error)

GetIndexPrefixLens returns an array representing

func GetMergedTopNFromSortedSlice ¶

func GetMergedTopNFromSortedSlice(sorted []TopNMeta, n uint32) (*TopN, []TopNMeta)

GetMergedTopNFromSortedSlice returns merged topn

func GetOrdinalOfRangeCond ¶

func GetOrdinalOfRangeCond(sc *stmtctx.StatementContext, ran *ranger.Range) int

GetOrdinalOfRangeCond gets the ordinal of the position range condition, if not exist, it returns the end position.

func GetPseudoRowCountByColumnRanges ¶

func GetPseudoRowCountByColumnRanges(sc *stmtctx.StatementContext, tableRowCount float64, columnRanges []*ranger.Range, colIdx int) (float64, error)

GetPseudoRowCountByColumnRanges calculate the row count by the ranges if there's no statistics information for this column.

func HistogramEqual ¶

func HistogramEqual(a, b *Histogram, ignoreID bool) bool

HistogramEqual tests if two histograms are equal.

func HistogramToProto ¶

func HistogramToProto(hg *Histogram) *tipb.Histogram

HistogramToProto converts Histogram to its protobuf representation. Note that when this is used, the lower/upper bound in the bucket must be BytesDatum.

func IsAnalyzed ¶

func IsAnalyzed(flag int64) bool

IsAnalyzed checks whether this flag contains AnalyzeFlag.

func MergePartTopN2GlobalTopN ¶

func MergePartTopN2GlobalTopN(loc *time.Location, version int, topNs []*TopN, n uint32, hists []*Histogram,
	isIndex bool, kiiled *uint32) (*TopN, []TopNMeta, []*Histogram, error)

MergePartTopN2GlobalTopN is used to merge the partition-level topN to global-level topN. The input parameters:

`topNs` are the partition-level topNs to be merged.
`n` is the size of the global-level topN. Notice: This value can be 0 and has no default value, we must explicitly specify this value.
`hists` are the partition-level histograms. Some values not in topN may be placed in the histogram. We need it here to make the value in the global-level TopN more accurate.

The output parameters:

`*TopN` is the final global-level topN.
`[]TopNMeta` is the left topN value from the partition-level TopNs, but is not placed to global-level TopN. We should put them back to histogram latter.
`[]*Histogram` are the partition-level histograms which just delete some values when we merge the global-level topN.

func MergeTopN ¶

func MergeTopN(topNs []*TopN, n uint32) (*TopN, []TopNMeta)

MergeTopN is used to merge more TopN structures to generate a new TopN struct by the given size. The input parameters are multiple TopN structures to be merged and the size of the new TopN that will be generated. The output parameters are the newly generated TopN structure and the remaining numbers. Notice: The n can be 0. So n has no default value, we must explicitly specify this value.

func NewCMSketchAndTopN ¶

func NewCMSketchAndTopN(d, w int32, sample [][]byte, numTop uint32, rowCount uint64) (*CMSketch, *TopN, uint64, uint64)

NewCMSketchAndTopN returns a new CM sketch with TopN elements, the estimate NDV and the scale ratio.

func NewTopnStatsMergeWorker ¶

func NewTopnStatsMergeWorker(
	taskCh <-chan *TopnStatsMergeTask,
	respCh chan<- *TopnStatsMergeResponse,
	wrapper *StatsWrapper,
	killed *uint32) *topnStatsMergeWorker

NewTopnStatsMergeWorker returns topn merge worker

func ResetAnalyzeFlag ¶

func ResetAnalyzeFlag(flag int64) int64

ResetAnalyzeFlag resets the AnalyzeFlag because it has been modified by feedback.

func RowSamplesToProto ¶

func RowSamplesToProto(samples WeightedRowSampleHeap) []*tipb.RowSample

RowSamplesToProto converts the samp slice to the pb struct.

func RowToDatums ¶

func RowToDatums(row chunk.Row, fields []*ast.ResultField) []types.Datum

RowToDatums converts row to datum slice.

func SampleCollectorToProto ¶

func SampleCollectorToProto(c *SampleCollector) *tipb.SampleCollector

SampleCollectorToProto converts SampleCollector to its protobuf representation.

func SplitFeedbackByQueryType ¶

func SplitFeedbackByQueryType(feedbacks []Feedback) ([]Feedback, []Feedback)

SplitFeedbackByQueryType splits the feedbacks into equality feedbacks and range feedbacks.

func SupportColumnType ¶

func SupportColumnType(ft *types.FieldType) bool

SupportColumnType checks if the type of the column can be updated by feedback.

func UpdateCMSketchAndTopN ¶

func UpdateCMSketchAndTopN(c *CMSketch, t *TopN, eqFeedbacks []Feedback) (*CMSketch, *TopN)

UpdateCMSketchAndTopN updates the CMSketch and TopN by feedback.

func ValueToString ¶

func ValueToString(vars *variable.SessionVars, value *types.Datum, idxCols int, idxColumnTypes []byte) (string, error)

ValueToString converts a possible encoded value to a formatted string. If the value is encoded, then idxCols equals to number of origin values, else idxCols is 0.

Types ¶

type AnalyzeJob ¶

type AnalyzeJob struct {
	ID            *uint64
	DBName        string
	TableName     string
	PartitionName string
	JobInfo       string
	StartTime     time.Time
	EndTime       time.Time
	Progress      AnalyzeProgress
}

AnalyzeJob is used to represent the status of one analyze job.

type AnalyzeProgress ¶

type AnalyzeProgress struct {
	sync.Mutex
	// contains filtered or unexported fields
}

AnalyzeProgress represents the process of one analyze job.

func (*AnalyzeProgress) GetDeltaCount ¶

func (p *AnalyzeProgress) GetDeltaCount() int64

GetDeltaCount returns the delta count which hasn't been dumped into mysql.analyze_jobs.

func (*AnalyzeProgress) GetLastDumpTime ¶

func (p *AnalyzeProgress) GetLastDumpTime() time.Time

GetLastDumpTime returns the last dump time.

func (*AnalyzeProgress) SetLastDumpTime ¶

func (p *AnalyzeProgress) SetLastDumpTime(t time.Time)

SetLastDumpTime sets the last dump time.

func (*AnalyzeProgress) Update ¶

func (p *AnalyzeProgress) Update(rowCount int64) (dumpCount int64)

Update adds rowCount to the delta count. If the updated delta count reaches threshold, it returns the delta count for dumping it into mysql.analyze_jobs and resets the delta count to 0. Otherwise it returns 0.

type AnalyzeResult ¶

type AnalyzeResult struct {
	Hist    []*Histogram
	Cms     []*CMSketch
	TopNs   []*TopN
	Fms     []*FMSketch
	IsIndex int
}

AnalyzeResult is used to represent analyze result.

type AnalyzeResults ¶

type AnalyzeResults struct {
	TableID  AnalyzeTableID
	Ars      []*AnalyzeResult
	Count    int64
	ExtStats *ExtendedStatsColl
	Err      error
	Job      *AnalyzeJob
	StatsVer int
	Snapshot uint64
	// BaseCount is the original count in mysql.stats_meta at the beginning of analyze.
	BaseCount int64
	// BaseModifyCnt is the original modify_count in mysql.stats_meta at the beginning of analyze.
	BaseModifyCnt int64
}

AnalyzeResults represents the analyze results of a task.

type AnalyzeTableID ¶

type AnalyzeTableID struct {
	TableID int64
	// PartitionID is used for the construction of partition table statistics. It indicate the ID of the partition.
	// If the table is not the partition table, the PartitionID will be equal to -1.
	PartitionID int64
}

AnalyzeTableID is hybrid table id used to analyze table.

func (*AnalyzeTableID) Equals ¶

func (h *AnalyzeTableID) Equals(t *AnalyzeTableID) bool

Equals indicates whether two table id is equal.

func (*AnalyzeTableID) GetStatisticsID ¶

func (h *AnalyzeTableID) GetStatisticsID() int64

GetStatisticsID is used to obtain the table ID to build statistics. If the 'PartitionID == -1', we use the TableID to build the statistics for non-partition tables. Otherwise, we use the PartitionID to build the statistics of the partitions in the partition tables.

func (*AnalyzeTableID) IsPartitionTable ¶

func (h *AnalyzeTableID) IsPartitionTable() bool

IsPartitionTable indicates whether the table is partition table.

func (*AnalyzeTableID) String ¶

func (h *AnalyzeTableID) String() string

type BernoulliRowSampleCollector ¶

type BernoulliRowSampleCollector struct {
	SampleRate float64
	// contains filtered or unexported fields
}

BernoulliRowSampleCollector collects the samples from the source and organize the sample by row. It will maintain the following things:

Row samples.
FM sketches(To calculate the NDV).
Null counts.
The data sizes.
The number of rows.

It uses the bernoulli sampling to collect the data.

func NewBernoulliRowSampleCollector ¶

func NewBernoulliRowSampleCollector(sampleRate float64, totalLen int) *BernoulliRowSampleCollector

NewBernoulliRowSampleCollector creates the new collector by the given inputs.

func (*BernoulliRowSampleCollector) Base ¶

func (s *BernoulliRowSampleCollector) Base() *baseCollector

Base implements the interface RowSampleCollector.

func (BernoulliRowSampleCollector) FromProto ¶

func (s BernoulliRowSampleCollector) FromProto(pbCollector *tipb.RowSampleCollector, memTracker *memory.Tracker)

func (*BernoulliRowSampleCollector) MergeCollector ¶

func (s *BernoulliRowSampleCollector) MergeCollector(subCollector RowSampleCollector)

MergeCollector merges the collectors to a final one.

func (BernoulliRowSampleCollector) ToProto ¶

func (s BernoulliRowSampleCollector) ToProto() *tipb.RowSampleCollector

ToProto converts the collector to pb struct.

type Bucket ¶

type Bucket struct {
	Count  int64
	Repeat int64
	NDV    int64
}

Bucket store the bucket count and repeat.

type BucketFeedback ¶

type BucketFeedback struct {
	// contains filtered or unexported fields
}

BucketFeedback stands for all the feedback for a bucket.

type CMSketch ¶

type CMSketch struct {
	// contains filtered or unexported fields
}

CMSketch is used to estimate point queries. Refer: https://en.wikipedia.org/wiki/Count-min_sketch

func NewCMSketch ¶

func NewCMSketch(d, w int32) *CMSketch

NewCMSketch returns a new CM sketch.

func (*CMSketch) CalcDefaultValForAnalyze ¶

func (c *CMSketch) CalcDefaultValForAnalyze(NDV uint64)

CalcDefaultValForAnalyze calculate the default value for Analyze. The value of it is count / NDV in CMSketch. This means count and NDV are not include topN.

func (*CMSketch) Copy ¶

func (c *CMSketch) Copy() *CMSketch

Copy makes a copy for current CMSketch.

func (*CMSketch) Equal ¶

func (c *CMSketch) Equal(rc *CMSketch) bool

Equal tests if two CM Sketch equal, it is only used for test.

func (*CMSketch) GetWidthAndDepth ¶

func (c *CMSketch) GetWidthAndDepth() (int32, int32)

GetWidthAndDepth returns the width and depth of CM Sketch.

func (*CMSketch) InsertBytes ¶

func (c *CMSketch) InsertBytes(bytes []byte)

InsertBytes inserts the bytes value into the CM Sketch.

func (*CMSketch) InsertBytesByCount ¶

func (c *CMSketch) InsertBytesByCount(bytes []byte, count uint64)

InsertBytesByCount adds the bytes value into the TopN (if value already in TopN) or CM Sketch by delta, this does not updates c.defaultValue.

func (*CMSketch) MemoryUsage ¶

func (c *CMSketch) MemoryUsage() (sum int64)

MemoryUsage returns the total memory usage of a CMSketch. only calc the hashtable size(CMSketch.table) and the CMSketch.topN data are not tracked because size of CMSketch.topN take little influence We ignore the size of other metadata in CMSketch.

func (*CMSketch) MergeCMSketch ¶

func (c *CMSketch) MergeCMSketch(rc *CMSketch) error

MergeCMSketch merges two CM Sketch.

func (*CMSketch) MergeCMSketch4IncrementalAnalyze ¶

func (c *CMSketch) MergeCMSketch4IncrementalAnalyze(rc *CMSketch, numTopN uint32) error

MergeCMSketch4IncrementalAnalyze merges two CM Sketch for incremental analyze. Since there is no value that appears partially in `c` and `rc` for incremental analyze, it uses `max` to merge them. Here is a simple proof: when we query from the CM sketch, we use the `min` to get the answer:

(1): For values that only appears in `c, using `max` to merge them affects the `min` query result less than using `sum`;
(2): For values that only appears in `rc`, it is the same as condition (1);
(3): For values that appears both in `c` and `rc`, if they do not appear partially in `c` and `rc`, for example,
     if `v` appears 5 times in the table, it can appears 5 times in `c` and 3 times in `rc`, then `max` also gives the correct answer.

So in fact, if we can know the number of appearances of each value in the first place, it is better to use `max` to construct the CM sketch rather than `sum`.

func (*CMSketch) QueryBytes ¶

func (c *CMSketch) QueryBytes(d []byte) uint64

QueryBytes is used to query the count of specified bytes.

func (*CMSketch) SubValue ¶

func (c *CMSketch) SubValue(h1, h2 uint64, count uint64)

SubValue remove a value from the CMSketch.

func (*CMSketch) TotalCount ¶

func (c *CMSketch) TotalCount() uint64

TotalCount returns the total count in the sketch, it is only used for test.

type CacheItemMemoryUsage ¶

type CacheItemMemoryUsage interface {
	ItemID() int64
	TotalMemoryUsage() int64
	TrackingMemUsage() int64
	HistMemUsage() int64
	TopnMemUsage() int64
	CMSMemUsage() int64
}

CacheItemMemoryUsage indicates the memory usage of TableCacheItem

type Column ¶

type Column struct {
	Histogram
	CMSketch   *CMSketch
	TopN       *TopN
	FMSketch   *FMSketch
	PhysicalID int64
	Count      int64
	Info       *model.ColumnInfo
	IsHandle   bool
	ErrorRate
	Flag           int64
	LastAnalyzePos types.Datum
	StatsVer       int64 // StatsVer is the version of the current stats, used to maintain compatibility

	// StatsLoadedStatus indicates the status of column statistics
	StatsLoadedStatus
}

Column represents a column histogram.

func (*Column) AvgColSize ¶

func (c *Column) AvgColSize(count int64, isKey bool) float64

AvgColSize is the average column size of the histogram. These sizes are derived from function `encode` and `Datum::ConvertTo`, so we need to update them if those 2 functions are changed.

func (*Column) AvgColSizeChunkFormat ¶

func (c *Column) AvgColSizeChunkFormat(count int64) float64

AvgColSizeChunkFormat is the average column size of the histogram. These sizes are derived from function `Encode` and `DecodeToChunk`, so we need to update them if those 2 functions are changed.

func (*Column) AvgColSizeListInDisk ¶

func (c *Column) AvgColSizeListInDisk(count int64) float64

AvgColSizeListInDisk is the average column size of the histogram. These sizes are derived from `chunk.ListInDisk` so we need to update them if those 2 functions are changed.

func (*Column) BetweenRowCount ¶

func (c *Column) BetweenRowCount(sctx sessionctx.Context, l, r types.Datum, lowEncoded, highEncoded []byte) float64

BetweenRowCount estimates the row count for interval [l, r).

func (*Column) DropEvicted ¶

func (c *Column) DropEvicted()

DropEvicted implements TableCacheItem DropEvicted drops evicted structures

func (*Column) GetColumnRowCount ¶

func (c *Column) GetColumnRowCount(sctx sessionctx.Context, ranges []*ranger.Range, realtimeRowCount, modifyCount int64, pkIsHandle bool) (float64, error)

GetColumnRowCount estimates the row count by a slice of Range.

func (*Column) GetIncreaseFactor ¶

func (c *Column) GetIncreaseFactor(realtimeRowCount int64) float64

GetIncreaseFactor get the increase factor to adjust the final estimated count when the table is modified.

func (*Column) IsAllEvicted ¶

func (c *Column) IsAllEvicted() bool

IsAllEvicted indicates whether all stats evicted

func (*Column) IsInvalid ¶

func (c *Column) IsInvalid(sctx sessionctx.Context, collPseudo bool) bool

IsInvalid checks if this column is invalid. If this column has histogram but not loaded yet, then we mark it as need histogram.

func (*Column) ItemID ¶

func (c *Column) ItemID() int64

ItemID implements TableCacheItem

func (*Column) MemoryUsage ¶

func (c *Column) MemoryUsage() CacheItemMemoryUsage

MemoryUsage returns the total memory usage of Histogram, CMSketch, FMSketch in Column. We ignore the size of other metadata in Column

func (*Column) String ¶

func (c *Column) String() string

func (*Column) TotalRowCount ¶

func (c *Column) TotalRowCount() float64

TotalRowCount returns the total count of this column.

type ColumnMemUsage ¶

type ColumnMemUsage struct {
	ColumnID          int64
	HistogramMemUsage int64
	CMSketchMemUsage  int64
	FMSketchMemUsage  int64
	TopNMemUsage      int64
	TotalMemUsage     int64
}

ColumnMemUsage records column memory usage

func (*ColumnMemUsage) CMSMemUsage ¶

func (c *ColumnMemUsage) CMSMemUsage() int64

CMSMemUsage implements CacheItemMemoryUsage

func (*ColumnMemUsage) HistMemUsage ¶

func (c *ColumnMemUsage) HistMemUsage() int64

HistMemUsage implements CacheItemMemoryUsage

func (*ColumnMemUsage) ItemID ¶

func (c *ColumnMemUsage) ItemID() int64

ItemID implements CacheItemMemoryUsage

func (*ColumnMemUsage) TopnMemUsage ¶

func (c *ColumnMemUsage) TopnMemUsage() int64

TopnMemUsage implements CacheItemMemoryUsage

func (*ColumnMemUsage) TotalMemoryUsage ¶

func (c *ColumnMemUsage) TotalMemoryUsage() int64

TotalMemoryUsage implements CacheItemMemoryUsage

func (*ColumnMemUsage) TrackingMemUsage ¶

func (c *ColumnMemUsage) TrackingMemUsage() int64

TrackingMemUsage implements CacheItemMemoryUsage

type ErrorRate ¶

type ErrorRate struct {
	ErrorTotal float64
	QueryTotal int64
}

ErrorRate is the error rate of estimate row count by bucket and cm sketch.

func (*ErrorRate) Merge ¶

func (e *ErrorRate) Merge(rate *ErrorRate)

Merge range merges two ErrorRate.

func (*ErrorRate) NotAccurate ¶

func (e *ErrorRate) NotAccurate() bool

NotAccurate is true when the total of query is zero or the average error rate is greater than MaxErrorRate.

func (*ErrorRate) Update ¶

func (e *ErrorRate) Update(rate float64)

Update updates the ErrorRate.

type ExtendedStatsColl ¶

type ExtendedStatsColl struct {
	Stats             map[string]*ExtendedStatsItem
	LastUpdateVersion uint64
}

ExtendedStatsColl is a collection of cached items for mysql.stats_extended records.

func NewExtendedStatsColl ¶

func NewExtendedStatsColl() *ExtendedStatsColl

NewExtendedStatsColl allocate an ExtendedStatsColl struct.

type ExtendedStatsItem ¶

type ExtendedStatsItem struct {
	ColIDs     []int64
	Tp         uint8
	ScalarVals float64
	StringVals string
}

ExtendedStatsItem is the cached item of a mysql.stats_extended record.

type FMSketch ¶

type FMSketch struct {
	// contains filtered or unexported fields
}

FMSketch is used to count the number of distinct elements in a set.

func DecodeFMSketch ¶

func DecodeFMSketch(data []byte) (*FMSketch, error)

DecodeFMSketch decode a FMSketch from the given byte slice.

func FMSketchFromProto ¶

func FMSketchFromProto(protoSketch *tipb.FMSketch) *FMSketch

FMSketchFromProto converts FMSketch from its protobuf representation.

func NewFMSketch ¶

func NewFMSketch(maxSize int) *FMSketch

NewFMSketch returns a new FM sketch.

func (*FMSketch) Copy ¶

func (s *FMSketch) Copy() *FMSketch

Copy makes a copy for current FMSketch.

func (*FMSketch) InsertRowValue ¶

func (s *FMSketch) InsertRowValue(sc *stmtctx.StatementContext, values []types.Datum) error

InsertRowValue inserts multi-column values to the sketch.

func (*FMSketch) InsertValue ¶

func (s *FMSketch) InsertValue(sc *stmtctx.StatementContext, value types.Datum) error

InsertValue inserts a value into the FM sketch.

func (*FMSketch) MemoryUsage ¶

func (s *FMSketch) MemoryUsage() (sum int64)

MemoryUsage returns the total memory usage of a FMSketch.

func (*FMSketch) MergeFMSketch ¶

func (s *FMSketch) MergeFMSketch(rs *FMSketch)

MergeFMSketch merges two FM Sketch.

func (*FMSketch) NDV ¶

func (s *FMSketch) NDV() int64

NDV returns the ndv of the sketch.

type Feedback ¶

type Feedback struct {
	Lower  *types.Datum
	Upper  *types.Datum
	Count  int64
	Repeat int64
	Ndv    int64
}

Feedback represents the total scan count in range [lower, upper).

func CleanRangeFeedbackByTopN ¶

func CleanRangeFeedbackByTopN(feedbacks []Feedback, topN *TopN) []Feedback

CleanRangeFeedbackByTopN will not update the part containing the TopN.

func NonOverlappedFeedbacks ¶

func NonOverlappedFeedbacks(sc *stmtctx.StatementContext, fbs []Feedback) ([]Feedback, bool)

NonOverlappedFeedbacks extracts a set of feedbacks which are not overlapped with each other.

type HistColl ¶

type HistColl struct {
	PhysicalID int64
	Columns    map[int64]*Column
	Indices    map[int64]*Index
	// Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner.
	Idx2ColumnIDs map[int64][]int64
	// ColID2IdxIDs maps the column id to a list index ids whose first column is it. It's used to calculate the selectivity in planner.
	ColID2IdxIDs map[int64][]int64
	Count        int64
	ModifyCount  int64 // Total modify count in a table.

	// HavePhysicalID is true means this HistColl is from single table and have its ID's information.
	// The physical id is used when try to load column stats from storage.
	HavePhysicalID bool
	Pseudo         bool
}

HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity.

func (*HistColl) GenerateHistCollFromColumnInfo ¶

func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) *HistColl

GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxIDs and IdxID2ColIDs is built from the given parameter.

func (*HistColl) GetAvgRowSize ¶

func (coll *HistColl) GetAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isEncodedKey bool, isForScan bool) (size float64)

GetAvgRowSize computes average row size for given columns.

func (*HistColl) GetAvgRowSizeListInDisk ¶

func (coll *HistColl) GetAvgRowSizeListInDisk(cols []*expression.Column) (size float64)

GetAvgRowSizeListInDisk computes average row size for given columns.

func (*HistColl) GetIndexAvgRowSize ¶

func (coll *HistColl) GetIndexAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isUnique bool) (size float64)

GetIndexAvgRowSize computes average row size for a index scan.

func (*HistColl) GetRowCountByColumnRanges ¶

func (coll *HistColl) GetRowCountByColumnRanges(sctx sessionctx.Context, colID int64, colRanges []*ranger.Range) (float64, error)

GetRowCountByColumnRanges estimates the row count by a slice of Range.

func (*HistColl) GetRowCountByIndexRanges ¶

func (coll *HistColl) GetRowCountByIndexRanges(sctx sessionctx.Context, idxID int64, indexRanges []*ranger.Range) (float64, error)

GetRowCountByIndexRanges estimates the row count by a slice of Range.

func (*HistColl) GetRowCountByIntColumnRanges ¶

func (coll *HistColl) GetRowCountByIntColumnRanges(sctx sessionctx.Context, colID int64, intRanges []*ranger.Range) (result float64, err error)

GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.

func (*HistColl) GetSelectivityByFilter ¶

func (coll *HistColl) GetSelectivityByFilter(sctx sessionctx.Context,
	defaultSelectivity float64,
	filters []expression.Expression) (ok bool, selectivity float64, err error)

GetSelectivityByFilter try to estimate selectivity of expressions by evaluate the expressions using TopN and NULL. The data represented by the Histogram would use the defaultSelectivity parameter as the selectivity. Currently, this method can only handle expressions involving a single column.

func (*HistColl) GetTableAvgRowSize ¶

func (coll *HistColl) GetTableAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, storeType kv.StoreType, handleInCols bool) (size float64)

GetTableAvgRowSize computes average row size for a table scan, exclude the index key-value pairs.

func (*HistColl) ID2UniqueID ¶

func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl

ID2UniqueID generates a new HistColl whose `Columns` is built from UniqueID of given columns.

func (*HistColl) NewHistCollBySelectivity ¶

func (coll *HistColl) NewHistCollBySelectivity(sctx sessionctx.Context, statsNodes []*StatsNode) *HistColl

NewHistCollBySelectivity creates new HistColl by the given statsNodes.

func (*HistColl) Selectivity ¶

func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression, filledPaths []*planutil.AccessPath) (float64, []*StatsNode, error)

Selectivity is a function calculate the selectivity of the expressions. The definition of selectivity is (row count after filter / row count before filter). And exprs must be CNF now, in other words, `exprs[0] and exprs[1] and ... and exprs[len - 1]` should be held when you call this. Currently the time complexity is o(n^2).

type Histogram ¶

type Histogram struct {
	ID        int64 // Column ID.
	NDV       int64 // Number of distinct values.
	NullCount int64 // Number of null values.
	// LastUpdateVersion is the version that this histogram updated last time.
	LastUpdateVersion uint64

	Tp *types.FieldType

	// Histogram elements.
	//
	// A bucket bound is the smallest and greatest values stored in the bucket. The lower and upper bound
	// are stored in one column.
	//
	// A bucket count is the number of items stored in all previous buckets and the current bucket.
	// Bucket counts are always in increasing order.
	//
	// A bucket repeat is the number of repeats of the bucket value, it can be used to find popular values.
	Bounds  *chunk.Chunk
	Buckets []Bucket

	// TotColSize is the total column size for the histogram.
	// For unfixed-len types, it includes LEN and BYTE.
	TotColSize int64

	// Correlation is the statistical correlation between physical row ordering and logical ordering of
	// the column values. This ranges from -1 to +1, and it is only valid for Column histogram, not for
	// Index histogram.
	Correlation float64
	// contains filtered or unexported fields
}

Histogram represents statistics for a column or index.

func BuildColumn ¶

func BuildColumn(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, tp *types.FieldType) (*Histogram, error)

BuildColumn builds histogram from samples for column.

func BuildColumnHist ¶

func BuildColumnHist(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, tp *types.FieldType, count int64, ndv int64, nullCount int64) (*Histogram, error)

BuildColumnHist build a histogram for a column. numBuckets: number of buckets for the histogram. id: the id of the table. collector: the collector of samples. tp: the FieldType for the column. count: represents the row count for the column. ndv: represents the number of distinct values for the column. nullCount: represents the number of null values for the column.

func HistogramFromProto ¶

func HistogramFromProto(protoHg *tipb.Histogram) *Histogram

HistogramFromProto converts Histogram from its protobuf representation. Note that we will set BytesDatum for the lower/upper bound in the bucket, the decode will be after all histograms merged.

func MergeHistograms ¶

func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int, statsVer int) (*Histogram, error)

MergeHistograms merges two histograms.

func MergePartitionHist2GlobalHist ¶

func MergePartitionHist2GlobalHist(sc *stmtctx.StatementContext, hists []*Histogram, popedTopN []TopNMeta, expBucketNumber int64, isIndex bool) (*Histogram, error)

MergePartitionHist2GlobalHist merges hists (partition-level Histogram) to a global-level Histogram

func NewHistogram ¶

func NewHistogram(id, ndv, nullCount int64, version uint64, tp *types.FieldType, bucketSize int, totColSize int64) *Histogram

NewHistogram creates a new histogram.

func UpdateHistogram ¶

func UpdateHistogram(h *Histogram, feedback *QueryFeedback, statsVer int) *Histogram

UpdateHistogram updates the histogram according buckets.

func UpdateHistogramWithBucketCount ¶

func UpdateHistogramWithBucketCount(h *Histogram, feedback *QueryFeedback, statsVer int, bucketCount int) *Histogram

UpdateHistogramWithBucketCount updates the histogram according buckets with customized bucketCount for testing.

func (*Histogram) AddIdxVals ¶

func (hg *Histogram) AddIdxVals(idxValCntPairs []TopNMeta)

AddIdxVals adds the given values to the histogram.

func (*Histogram) AppendBucket ¶

func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64)

AppendBucket appends a bucket into `hg`.

func (*Histogram) AppendBucketWithNDV ¶

func (hg *Histogram) AppendBucketWithNDV(lower *types.Datum, upper *types.Datum, count, repeat, ndv int64)

AppendBucketWithNDV appends a bucket into `hg` and set value for field `NDV`.

func (*Histogram) AvgCountPerNotNullValue ¶

func (hg *Histogram) AvgCountPerNotNullValue(totalCount int64) float64

AvgCountPerNotNullValue gets the average row count per value by the data of histogram.

func (*Histogram) BetweenRowCount ¶

func (hg *Histogram) BetweenRowCount(a, b types.Datum) float64

BetweenRowCount estimates the row count where column greater or equal to a and less than b.

func (*Histogram) BucketToString ¶

func (hg *Histogram) BucketToString(bktID, idxCols int) string

BucketToString change the given bucket to string format.

func (*Histogram) ConvertTo ¶

func (hg *Histogram) ConvertTo(sc *stmtctx.StatementContext, tp *types.FieldType) (*Histogram, error)

ConvertTo converts the histogram bucket values into `tp`.

func (*Histogram) Copy ¶

func (hg *Histogram) Copy() *Histogram

Copy deep copies the histogram.

func (*Histogram) DecodeTo ¶

func (hg *Histogram) DecodeTo(tp *types.FieldType, timeZone *time.Location) error

DecodeTo decodes the histogram bucket values into `tp`.

func (*Histogram) ExtractTopN ¶

func (hg *Histogram) ExtractTopN(cms *CMSketch, topN *TopN, numCols int, numTopN uint32) error

ExtractTopN extracts topn from histogram.

func (*Histogram) GetIncreaseFactor ¶

func (hg *Histogram) GetIncreaseFactor(totalCount int64) float64

GetIncreaseFactor will return a factor of data increasing after the last analysis.

func (*Histogram) GetLower ¶

func (hg *Histogram) GetLower(idx int) *types.Datum

GetLower gets the lower bound of bucket `idx`.

func (*Histogram) GetUpper ¶

func (hg *Histogram) GetUpper(idx int) *types.Datum

GetUpper gets the upper bound of bucket `idx`.

func (*Histogram) IsIndexHist ¶

func (hg *Histogram) IsIndexHist() bool

IsIndexHist checks whether current histogram is one for index.

func (*Histogram) Len ¶

func (hg *Histogram) Len() int

Len is the number of buckets in the histogram.

func (*Histogram) LessRowCountWithBktIdx ¶

func (hg *Histogram) LessRowCountWithBktIdx(value types.Datum) (float64, int)

LessRowCountWithBktIdx estimates the row count where the column less than value.

func (*Histogram) MemoryUsage ¶

func (hg *Histogram) MemoryUsage() (sum int64)

MemoryUsage returns the total memory usage of this Histogram.

func (*Histogram) PreCalculateScalar ¶

func (hg *Histogram) PreCalculateScalar()

PreCalculateScalar converts the lower and upper to scalar. When the datum type is KindString or KindBytes, we also calculate their common prefix length, because when a value falls between lower and upper, the common prefix of lower and upper equals to the common prefix of the lower, upper and the value. For some simple types like `Int64`, we do not convert it because we can directly infer the scalar value.

func (*Histogram) RemoveUpperBound ¶

func (hg *Histogram) RemoveUpperBound() *Histogram

RemoveUpperBound removes the upper bound from histogram. It is used when merge stats for incremental analyze.

func (*Histogram) RemoveVals ¶

func (hg *Histogram) RemoveVals(valCntPairs []TopNMeta)

RemoveVals remove the given values from the histogram. This function contains an **ASSUMPTION**: valCntPairs is sorted in ascending order.

func (*Histogram) SplitRange ¶

func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*ranger.Range, encoded bool) ([]*ranger.Range, bool)

SplitRange splits the range according to the histogram lower bound. Note that we treat first bucket's lower bound as -inf and last bucket's upper bound as +inf, so all the split ranges will totally fall in one of the (-inf, l(1)), [l(1), l(2)),...[l(n-2), l(n-1)), [l(n-1), +inf), where n is the number of buckets, l(i) is the i-th bucket's lower bound.

func (*Histogram) ToString ¶

func (hg *Histogram) ToString(idxCols int) string

ToString gets the string representation for the histogram.

func (*Histogram) TotalRowCount ¶

func (hg *Histogram) TotalRowCount() float64

TotalRowCount returns the total count of this histogram.

func (*Histogram) TruncateHistogram ¶

func (hg *Histogram) TruncateHistogram(numBkt int) *Histogram

TruncateHistogram truncates the histogram to `numBkt` buckets.

type Index ¶

type Index struct {
	Histogram
	CMSketch *CMSketch
	TopN     *TopN
	FMSketch *FMSketch
	ErrorRate
	StatsVer       int64 // StatsVer is the version of the current stats, used to maintain compatibility
	Info           *model.IndexInfo
	Flag           int64
	LastAnalyzePos types.Datum
	PhysicalID     int64
	StatsLoadedStatus
}

Index represents an index histogram.

func (*Index) BetweenRowCount ¶

func (idx *Index) BetweenRowCount(l, r types.Datum) float64

BetweenRowCount estimates the row count for interval [l, r).

func (*Index) EvictAllStats ¶

func (idx *Index) EvictAllStats()

EvictAllStats evicts all stats Note that this function is only used for test

func (*Index) GetIncreaseFactor ¶

func (idx *Index) GetIncreaseFactor(realtimeRowCount int64) float64

GetIncreaseFactor get the increase factor to adjust the final estimated count when the table is modified.

func (*Index) GetRowCount ¶

func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (float64, error)

GetRowCount returns the row count of the given ranges. It uses the modifyCount to adjust the influence of modifications on the table.

func (*Index) IsAllEvicted ¶

func (idx *Index) IsAllEvicted() bool

IsAllEvicted indicates whether all stats evicted

func (*Index) IsEvicted ¶

func (idx *Index) IsEvicted() bool

IsEvicted returns whether index statistics got evicted

func (*Index) IsInvalid ¶

func (idx *Index) IsInvalid(collPseudo bool) bool

IsInvalid checks if this index is invalid.

func (*Index) ItemID ¶

func (idx *Index) ItemID() int64

ItemID implements TableCacheItem

func (*Index) MemoryUsage ¶

func (idx *Index) MemoryUsage() CacheItemMemoryUsage

MemoryUsage returns the total memory usage of a Histogram and CMSketch in Index. We ignore the size of other metadata in Index.

func (*Index) QueryBytes ¶

func (idx *Index) QueryBytes(d []byte) uint64

QueryBytes is used to query the count of specified bytes.

func (*Index) String ¶

func (idx *Index) String() string

func (*Index) TotalRowCount ¶

func (idx *Index) TotalRowCount() float64

TotalRowCount returns the total count of this index.

type IndexMemUsage ¶

type IndexMemUsage struct {
	IndexID           int64
	HistogramMemUsage int64
	CMSketchMemUsage  int64
	TopNMemUsage      int64
	TotalMemUsage     int64
}

IndexMemUsage records index memory usage

func (*IndexMemUsage) CMSMemUsage ¶

func (c *IndexMemUsage) CMSMemUsage() int64

CMSMemUsage implements CacheItemMemoryUsage

func (*IndexMemUsage) HistMemUsage ¶

func (c *IndexMemUsage) HistMemUsage() int64

HistMemUsage implements CacheItemMemoryUsage

func (*IndexMemUsage) ItemID ¶

func (c *IndexMemUsage) ItemID() int64

ItemID implements CacheItemMemoryUsage

func (*IndexMemUsage) TopnMemUsage ¶

func (c *IndexMemUsage) TopnMemUsage() int64

TopnMemUsage implements CacheItemMemoryUsage

func (*IndexMemUsage) TotalMemoryUsage ¶

func (c *IndexMemUsage) TotalMemoryUsage() int64

TotalMemoryUsage implements CacheItemMemoryUsage

func (*IndexMemUsage) TrackingMemUsage ¶

func (c *IndexMemUsage) TrackingMemUsage() int64

TrackingMemUsage implements CacheItemMemoryUsage

type QueryFeedback ¶

type QueryFeedback struct {
	Hist       *Histogram
	Feedback   []Feedback
	PhysicalID int64
	Tp         int
	Expected   int64 // Expected is the Expected scan count of corresponding query.

	Valid goatomic.Bool // Valid represents the whether this query feedback is still Valid.
	// contains filtered or unexported fields
}

QueryFeedback is used to represent the query feedback info. It contains the query's scan ranges and number of rows in each range.

func NewQueryFeedback ¶

func NewQueryFeedback(physicalID int64, hist *Histogram, expected int64, desc bool) *QueryFeedback

NewQueryFeedback returns a new query feedback.

func (*QueryFeedback) Actual ¶

func (q *QueryFeedback) Actual() int64

Actual gets the actual row count.

func (*QueryFeedback) CalcErrorRate ¶

func (q *QueryFeedback) CalcErrorRate() float64

CalcErrorRate calculates the error rate the current QueryFeedback.

func (*QueryFeedback) DecodeIntValues ¶

func (q *QueryFeedback) DecodeIntValues() *QueryFeedback

DecodeIntValues is called when the current Feedback stores encoded int values.

func (*QueryFeedback) DecodeToRanges ¶

func (q *QueryFeedback) DecodeToRanges(isIndex bool) ([]*ranger.Range, error)

DecodeToRanges decode the feedback to ranges.

func (*QueryFeedback) Invalidate ¶

func (q *QueryFeedback) Invalidate()

Invalidate is used to invalidate the query feedback.

func (*QueryFeedback) StoreRanges ¶

func (q *QueryFeedback) StoreRanges(ranges []*ranger.Range)

StoreRanges stores the ranges for update.

func (*QueryFeedback) Update ¶

func (q *QueryFeedback) Update(startKey kv.Key, counts, ndvs []int64)

Update updates the query feedback. `startKey` is the start scan key of the partial result, used to find the range for update. `counts` is the scan counts of each range, used to update the feedback count info.

type QueryFeedbackKey ¶

type QueryFeedbackKey struct {
	PhysicalID int64
	HistID     int64
	Tp         int
}

QueryFeedbackKey is the key for a group of feedbacks on the same index/column.

type QueryFeedbackMap ¶

type QueryFeedbackMap struct {
	Size      int
	Feedbacks map[QueryFeedbackKey][]*QueryFeedback
}

QueryFeedbackMap is the collection of feedbacks.

func NewQueryFeedbackMap ¶

func NewQueryFeedbackMap() *QueryFeedbackMap

NewQueryFeedbackMap builds a feedback collection.

func (*QueryFeedbackMap) Append ¶

func (m *QueryFeedbackMap) Append(q *QueryFeedback)

Append adds a feedback into map.

func (*QueryFeedbackMap) Merge ¶

func (m *QueryFeedbackMap) Merge(r *QueryFeedbackMap)

Merge combines 2 collections of feedbacks.

func (*QueryFeedbackMap) SiftFeedbacks ¶

func (m *QueryFeedbackMap) SiftFeedbacks()

SiftFeedbacks eliminates feedbacks which are overlapped with others. It is a tradeoff between feedback accuracy and its overhead.

type ReservoirRowSampleCollector ¶

type ReservoirRowSampleCollector struct {
	MaxSampleSize int
	// contains filtered or unexported fields
}

ReservoirRowSampleCollector collects the samples from the source and organize the samples by row. It will maintain the following things:

Row samples.
FM sketches(To calculate the NDV).
Null counts.
The data sizes.
The number of rows.

It uses weighted reservoir sampling(A-Res) to do the sampling.

func NewReservoirRowSampleCollector ¶

func NewReservoirRowSampleCollector(maxSampleSize int, totalLen int) *ReservoirRowSampleCollector

NewReservoirRowSampleCollector creates the new collector by the given inputs.

func (*ReservoirRowSampleCollector) Base ¶

func (s *ReservoirRowSampleCollector) Base() *baseCollector

Base implements the RowSampleCollector interface.

func (ReservoirRowSampleCollector) FromProto ¶

func (s ReservoirRowSampleCollector) FromProto(pbCollector *tipb.RowSampleCollector, memTracker *memory.Tracker)

func (*ReservoirRowSampleCollector) MergeCollector ¶

func (s *ReservoirRowSampleCollector) MergeCollector(subCollector RowSampleCollector)

MergeCollector merges the collectors to a final one.

func (ReservoirRowSampleCollector) ToProto ¶

func (s ReservoirRowSampleCollector) ToProto() *tipb.RowSampleCollector

ToProto converts the collector to pb struct.

type ReservoirRowSampleItem ¶

type ReservoirRowSampleItem struct {
	Columns []types.Datum
	Weight  int64
	Handle  kv.Handle
}

ReservoirRowSampleItem is the item for the ReservoirRowSampleCollector. The weight is needed for the sampling algorithm.

func (ReservoirRowSampleItem) MemUsage ¶

func (i ReservoirRowSampleItem) MemUsage() (sum int64)

MemUsage returns the memory usage of sample item.

type RowSampleBuilder ¶

type RowSampleBuilder struct {
	Sc              *stmtctx.StatementContext
	RecordSet       sqlexec.RecordSet
	ColsFieldType   []*types.FieldType
	Collators       []collate.Collator
	ColGroups       [][]int64
	MaxSampleSize   int
	SampleRate      float64
	MaxFMSketchSize int
	Rng             *rand.Rand
}

RowSampleBuilder is used to construct the ReservoirRowSampleCollector to get the samples.

func (*RowSampleBuilder) Collect ¶

func (s *RowSampleBuilder) Collect() (RowSampleCollector, error)

Collect first builds the collector. Then maintain the null count, FM sketch and the data size for each column and column group. Then use the weighted reservoir sampling to collect the samples.

type RowSampleCollector ¶

type RowSampleCollector interface {
	MergeCollector(collector RowSampleCollector)

	Base() *baseCollector
	// contains filtered or unexported methods
}

RowSampleCollector implements the needed interface for a row-based sample collector.

func NewRowSampleCollector ¶

func NewRowSampleCollector(maxSampleSize int, sampleRate float64, totalLen int) RowSampleCollector

NewRowSampleCollector creates a collector from the given inputs.

type SampleBuilder ¶

type SampleBuilder struct {
	Sc              *stmtctx.StatementContext
	RecordSet       sqlexec.RecordSet
	ColLen          int // ColLen is the number of columns need to be sampled.
	PkBuilder       *SortedBuilder
	MaxBucketSize   int64
	MaxSampleSize   int64
	MaxFMSketchSize int64
	CMSketchDepth   int32
	CMSketchWidth   int32
	Collators       []collate.Collator
	ColsFieldType   []*types.FieldType
}

SampleBuilder is used to build samples for columns. Also, if primary key is handle, it will directly build histogram for it.

func (SampleBuilder) CollectColumnStats ¶

func (s SampleBuilder) CollectColumnStats() ([]*SampleCollector, *SortedBuilder, error)

CollectColumnStats collects sample from the result set using Reservoir Sampling algorithm, and estimates NDVs using FM Sketch during the collecting process. It returns the sample collectors which contain total count, null count, distinct values count and CM Sketch. It also returns the statistic builder for PK which contains the histogram. See https://en.wikipedia.org/wiki/Reservoir_sampling

type SampleCollector ¶

type SampleCollector struct {
	Samples []*SampleItem

	IsMerger      bool
	NullCount     int64
	Count         int64 // Count is the number of non-null rows.
	MaxSampleSize int64
	FMSketch      *FMSketch
	CMSketch      *CMSketch
	TopN          *TopN
	TotalSize     int64 // TotalSize is the total size of column.
	MemSize       int64 // major memory size of this sample collector.
	// contains filtered or unexported fields
}

SampleCollector will collect Samples and calculate the count and ndv of an attribute.

func SampleCollectorFromProto ¶

func SampleCollectorFromProto(collector *tipb.SampleCollector) *SampleCollector

SampleCollectorFromProto converts SampleCollector from its protobuf representation.

func (*SampleCollector) CalcTotalSize ¶

func (c *SampleCollector) CalcTotalSize()

CalcTotalSize is to calculate total size based on samples.

func (*SampleCollector) ExtractTopN ¶

func (c *SampleCollector) ExtractTopN(numTop uint32, sc *stmtctx.StatementContext, tp *types.FieldType, timeZone *time.Location) error

ExtractTopN extracts the topn from the CM Sketch.

func (*SampleCollector) MergeSampleCollector ¶

func (c *SampleCollector) MergeSampleCollector(sc *stmtctx.StatementContext, rc *SampleCollector)

MergeSampleCollector merges two sample collectors.

type SampleItem ¶

type SampleItem struct {
	// Value is the sampled column value.
	Value types.Datum
	// Ordinal is original position of this item in SampleCollector before sorting. This
	// is used for computing correlation.
	Ordinal int
	// Handle is the handle of the sample in its key.
	// This property is used to calculate Ordinal in fast analyze.
	Handle kv.Handle
}

SampleItem is an item of sampled column value.

func CopySampleItems ¶

func CopySampleItems(items []*SampleItem) []*SampleItem

CopySampleItems returns a deep copy of SampleItem slice.

func SortSampleItems ¶

func SortSampleItems(sc *stmtctx.StatementContext, items []*SampleItem) ([]*SampleItem, error)

SortSampleItems shallow copies and sorts a slice of SampleItem.

type SortedBuilder ¶

type SortedBuilder struct {
	Count int64
	// contains filtered or unexported fields
}

SortedBuilder is used to build histograms for PK and index.

func NewSortedBuilder ¶

func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *types.FieldType, statsVer int) *SortedBuilder

NewSortedBuilder creates a new SortedBuilder.

func (*SortedBuilder) Hist ¶

func (b *SortedBuilder) Hist() *Histogram

Hist returns the histogram built by SortedBuilder.

func (*SortedBuilder) Iterate ¶

func (b *SortedBuilder) Iterate(data types.Datum) error

Iterate updates the histogram incrementally.

type StatsLoadedStatus ¶

type StatsLoadedStatus struct {
	// contains filtered or unexported fields
}

StatsLoadedStatus indicates the status of statistics

func NewStatsAllEvictedStatus ¶

func NewStatsAllEvictedStatus() StatsLoadedStatus

NewStatsAllEvictedStatus returns the status that only loads count/nullCount/NDV and doesn't load CMSketch/TopN/Histogram. When we load table stats, column stats is in allEvicted status by default. CMSketch/TopN/Histogram of column is only loaded when we really need column stats.

func NewStatsFullLoadStatus ¶

func NewStatsFullLoadStatus() StatsLoadedStatus

NewStatsFullLoadStatus returns the status that the column/index fully loaded

func (StatsLoadedStatus) IsAllEvicted ¶

func (s StatsLoadedStatus) IsAllEvicted() bool

IsAllEvicted indicates whether all the stats got evicted or not.

func (StatsLoadedStatus) IsCMSEvicted ¶

func (s StatsLoadedStatus) IsCMSEvicted() bool

IsCMSEvicted indicates whether the cms got evicted now.

func (StatsLoadedStatus) IsEssentialStatsLoaded ¶

func (s StatsLoadedStatus) IsEssentialStatsLoaded() bool

IsEssentialStatsLoaded indicates whether the essential statistics is loaded. If the column/index was loaded, and at least histogram and topN still exists, the necessary statistics is still loaded.

func (StatsLoadedStatus) IsFullLoad ¶

func (s StatsLoadedStatus) IsFullLoad() bool

IsFullLoad indicates whether the stats are full loaded

func (StatsLoadedStatus) IsLoadNeeded ¶

func (s StatsLoadedStatus) IsLoadNeeded() bool

IsLoadNeeded indicates whether it needs load statistics during LoadNeededHistograms or sync stats If the column/index was loaded and any statistics of it is evicting, it also needs re-load statistics.

func (StatsLoadedStatus) IsStatsInitialized ¶

func (s StatsLoadedStatus) IsStatsInitialized() bool

IsStatsInitialized indicates whether the column/index's statistics was loaded from storage before. Note that `IsStatsInitialized` only can be set in initializing

func (StatsLoadedStatus) IsTopNEvicted ¶

func (s StatsLoadedStatus) IsTopNEvicted() bool

IsTopNEvicted indicates whether the topn got evicted now.

func (StatsLoadedStatus) StatusToString ¶

func (s StatsLoadedStatus) StatusToString() string

StatusToString gets the string info of StatsLoadedStatus

type StatsNode ¶

type StatsNode struct {
	Tp int
	ID int64

	// Ranges contains all the Ranges we got.
	Ranges []*ranger.Range
	// Selectivity indicates the Selectivity of this column/index.
	Selectivity float64
	// contains filtered or unexported fields
}

StatsNode is used for calculating selectivity.

func GetUsableSetsByGreedy ¶

func GetUsableSetsByGreedy(nodes []*StatsNode) (newBlocks []*StatsNode)

GetUsableSetsByGreedy will select the indices and pk used for calculate selectivity by greedy algorithm.

func MockStatsNode ¶

func MockStatsNode(id int64, m int64, num int) *StatsNode

MockStatsNode is only used for test.

type StatsReader ¶

type StatsReader struct {
	// contains filtered or unexported fields
}

StatsReader is used for simplifying code that needs to read statistics from system tables(mysql.stats_xxx) in different sqls but requires the same transactions.

Note that: 1. Remember to call (*StatsReader).Close after reading all statistics. 2. StatsReader is not thread-safe. Different goroutines cannot call (*StatsReader).Read concurrently.

func GetStatsReader ¶

func GetStatsReader(snapshot uint64, exec sqlexec.RestrictedSQLExecutor) (reader *StatsReader, err error)

GetStatsReader returns a StatsReader.

func (*StatsReader) Close ¶

func (sr *StatsReader) Close() error

Close closes the StatsReader.

func (*StatsReader) IsHistory ¶

func (sr *StatsReader) IsHistory() bool

IsHistory indicates whether to read history statistics.

func (*StatsReader) Read ¶

func (sr *StatsReader) Read(sql string, args ...interface{}) (rows []chunk.Row, fields []*ast.ResultField, err error)

Read is a thin wrapper reading statistics from storage by sql command.

type StatsWrapper ¶

type StatsWrapper struct {
	AllHg   []*Histogram
	AllTopN []*TopN
}

StatsWrapper wrapper stats

func NewStatsWrapper ¶

func NewStatsWrapper(hg []*Histogram, topN []*TopN) *StatsWrapper

NewStatsWrapper returns wrapper

type Table ¶

type Table struct {
	HistColl
	Version       uint64
	Name          string
	ExtendedStats *ExtendedStatsColl
	// TblInfoUpdateTS is the UpdateTS of the TableInfo used when filling this struct.
	// It is the schema version of the corresponding table. It is used to skip redundant
	// loading of stats, i.e, if the cached stats is already update-to-date with mysql.stats_xxx tables,
	// and the schema of the table does not change, we don't need to load the stats for this
	// table again.
	TblInfoUpdateTS uint64
}

Table represents statistics for a table.

func PseudoTable ¶

func PseudoTable(tblInfo *model.TableInfo) *Table

PseudoTable creates a pseudo table statistics.

func (*Table) ColumnBetweenRowCount ¶

func (t *Table) ColumnBetweenRowCount(sctx sessionctx.Context, a, b types.Datum, colID int64) (float64, error)

ColumnBetweenRowCount estimates the row count where column greater or equal to a and less than b.

func (*Table) ColumnByName ¶

func (t *Table) ColumnByName(colName string) *Column

ColumnByName finds the statistics.Column for the given column.

func (*Table) ColumnEqualRowCount ¶

func (t *Table) ColumnEqualRowCount(sctx sessionctx.Context, value types.Datum, colID int64) (float64, error)

ColumnEqualRowCount estimates the row count where the column equals to value.

func (*Table) ColumnGreaterRowCount ¶

func (t *Table) ColumnGreaterRowCount(sctx sessionctx.Context, value types.Datum, colID int64) float64

ColumnGreaterRowCount estimates the row count where the column greater than value.

func (*Table) ColumnLessRowCount ¶

func (t *Table) ColumnLessRowCount(sctx sessionctx.Context, value types.Datum, colID int64) float64

ColumnLessRowCount estimates the row count where the column less than value. Note that null values are not counted.

func (*Table) Copy ¶

func (t *Table) Copy() *Table

Copy copies the current table.

func (*Table) GetColRowCount ¶

func (t *Table) GetColRowCount() float64

GetColRowCount tries to get the row count of the a column if possible. This method is useful because this row count doesn't consider the modify count.

func (*Table) GetStatsHealthy ¶

func (t *Table) GetStatsHealthy() (int64, bool)

GetStatsHealthy calculates stats healthy if the table stats is not pseudo. If the table stats is pseudo, it returns 0, false, otherwise it returns stats healthy, true.

func (*Table) GetStatsInfo ¶

func (t *Table) GetStatsInfo(ID int64, isIndex bool) (int64, *Histogram, *CMSketch, *TopN, *FMSketch, bool)

GetStatsInfo returns their statistics according to the ID of the column or index, including histogram, CMSketch, TopN and FMSketch.

func (*Table) IndexStartWithColumn ¶

func (t *Table) IndexStartWithColumn(colName string) *Index

IndexStartWithColumn finds the first index whose first column is the given column.

func (*Table) IsInitialized ¶

func (t *Table) IsInitialized() bool

IsInitialized returns true if any column/index stats of the table is initialized.

func (*Table) IsOutdated ¶

func (t *Table) IsOutdated() bool

IsOutdated returns true if the table stats is outdated.

func (*Table) MemoryUsage ¶

func (t *Table) MemoryUsage() *TableMemoryUsage

MemoryUsage returns the total memory usage of this Table. it will only calc the size of Columns and Indices stats data of table. We ignore the size of other metadata in Table

func (*Table) PseudoAvgCountPerValue ¶

func (t *Table) PseudoAvgCountPerValue() float64

PseudoAvgCountPerValue gets a pseudo average count if histogram not exists.

func (*Table) String ¶

func (t *Table) String() string

String implements Stringer interface.

type TableCacheItem ¶

type TableCacheItem interface {
	ItemID() int64
	MemoryUsage() CacheItemMemoryUsage
	IsAllEvicted() bool
	// contains filtered or unexported methods
}

TableCacheItem indicates the unit item stored in statsCache, eg: Column/Index

type TableMemoryUsage ¶

type TableMemoryUsage struct {
	TableID         int64
	TotalMemUsage   int64
	ColumnsMemUsage map[int64]CacheItemMemoryUsage
	IndicesMemUsage map[int64]CacheItemMemoryUsage
}

TableMemoryUsage records tbl memory usage

func (*TableMemoryUsage) TotalColTrackingMemUsage ¶

func (t *TableMemoryUsage) TotalColTrackingMemUsage() (sum int64)

TotalColTrackingMemUsage returns total columns' tracking memory usage

func (*TableMemoryUsage) TotalIdxTrackingMemUsage ¶

func (t *TableMemoryUsage) TotalIdxTrackingMemUsage() (sum int64)

TotalIdxTrackingMemUsage returns total indices' tracking memory usage

func (*TableMemoryUsage) TotalTrackingMemUsage ¶

func (t *TableMemoryUsage) TotalTrackingMemUsage() int64

TotalTrackingMemUsage return total tracking memory usage

type TopN ¶

type TopN struct {
	TopN []TopNMeta
}

TopN stores most-common values, which is used to estimate point queries.

func NewTopN ¶

func NewTopN(n int) *TopN

NewTopN creates the new TopN struct by the given size.

func TopNFromProto ¶

func TopNFromProto(protoTopN []*tipb.CMSketchTopN) *TopN

TopNFromProto converts TopN from its protobuf representation.

func (*TopN) AppendTopN ¶

func (c *TopN) AppendTopN(data []byte, count uint64)

AppendTopN appends a topn into the TopN struct.

func (*TopN) BetweenCount ¶

func (c *TopN) BetweenCount(l, r []byte) uint64

BetweenCount estimates the row count for interval [l, r).

func (*TopN) Copy ¶

func (c *TopN) Copy() *TopN

Copy makes a copy for current TopN.

func (*TopN) DecodedString ¶

func (c *TopN) DecodedString(ctx sessionctx.Context, colTypes []byte) (string, error)

DecodedString returns the value with decoded result.

func (*TopN) Equal ¶

func (c *TopN) Equal(cc *TopN) bool

Equal checks whether the two TopN are equal.

func (*TopN) LowerBound ¶

func (c *TopN) LowerBound(d []byte) (idx int, match bool)

LowerBound searches on the sorted top-n items, returns the smallest index i such that the value at element i is not less than `d`.

func (*TopN) MemoryUsage ¶

func (c *TopN) MemoryUsage() (sum int64)

MemoryUsage returns the total memory usage of a topn.

func (*TopN) Num ¶

func (c *TopN) Num() int

Num returns the ndv of the TopN.

TopN is declared directly in Histogram. So the Len is occupied by the Histogram. We use Num instead.

func (*TopN) QueryTopN ¶

func (c *TopN) QueryTopN(d []byte) (uint64, bool)

QueryTopN returns the results for (h1, h2) in murmur3.Sum128(), if not exists, return (0, false).

func (*TopN) RemoveVal ¶

func (c *TopN) RemoveVal(val []byte)

RemoveVal remove the val from TopN if it exists.

func (*TopN) Sort ¶

func (c *TopN) Sort()

Sort sorts the topn items.

func (*TopN) String ¶

func (c *TopN) String() string

func (*TopN) TotalCount ¶

func (c *TopN) TotalCount() uint64

TotalCount returns how many data is stored in TopN.

type TopNMeta ¶

type TopNMeta struct {
	Encoded []byte
	Count   uint64
}

TopNMeta stores the unit of the TopN.

func MergeTopNAndUpdateCMSketch ¶

func MergeTopNAndUpdateCMSketch(dst, src *TopN, c *CMSketch, numTop uint32) []TopNMeta

MergeTopNAndUpdateCMSketch merges the src TopN into the dst, and spilled values will be inserted into the CMSketch.

func SortTopnMeta ¶

func SortTopnMeta(topnMetas []TopNMeta) []TopNMeta

SortTopnMeta sort topnMeta

type TopnStatsMergeResponse ¶

type TopnStatsMergeResponse struct {
	TopN       *TopN
	PopedTopn  []TopNMeta
	RemoveVals [][]TopNMeta
	Err        error
}

TopnStatsMergeResponse indicates topn merge worker response

type TopnStatsMergeTask ¶

type TopnStatsMergeTask struct {
	// contains filtered or unexported fields
}

TopnStatsMergeTask indicates a task for merge topn stats

func NewTopnStatsMergeTask ¶

func NewTopnStatsMergeTask(start, end int) *TopnStatsMergeTask

NewTopnStatsMergeTask returns task

type WeightedRowSampleHeap ¶

type WeightedRowSampleHeap []*ReservoirRowSampleItem

WeightedRowSampleHeap implements the Heap interface.

func (WeightedRowSampleHeap) Len ¶

func (h WeightedRowSampleHeap) Len() int

Len implements the Heap interface.

func (WeightedRowSampleHeap) Less ¶

func (h WeightedRowSampleHeap) Less(i, j int) bool

Less implements the Heap interface.

func (*WeightedRowSampleHeap) Pop ¶

func (h *WeightedRowSampleHeap) Pop() interface{}

Pop implements the Heap interface.

func (*WeightedRowSampleHeap) Push ¶

func (h *WeightedRowSampleHeap) Push(i interface{})

Push implements the Heap interface.

func (WeightedRowSampleHeap) Swap ¶

func (h WeightedRowSampleHeap) Swap(i, j int)

Swap implements the Heap interface.

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
handle

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL