raftstore

package
v0.0.0-...-6576891 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 25, 2023 License: Apache-2.0 Imports: 54 Imported by: 0

Documentation

Index

Constants

View Source
const (
	InitEpochVer     uint64 = 1
	InitEpochConfVer uint64 = 1
	KvTS             uint64 = 1
	RaftTS           uint64 = 0
)

Bootstrap

View Source
const (
	KB          uint64 = 1024
	MB          uint64 = 1024 * 1024
	SplitSizeMb uint64 = 96
)

Config

View Source
const (
	LocalPrefix byte = 0x01

	// We save two types region data in DB, for raft and other meta data.
	// When the store starts, we should iterate all region meta data to
	// construct peer, no need to travel large raft data, so we separate them
	// with different prefixes.
	RegionRaftPrefix byte = 0x02
	RegionMetaPrefix byte = 0x03
	RegionRaftLogLen      = 19 // REGION_RAFT_PREFIX_KEY + region_id + suffix + index

	// Following are the suffix after the local prefix.
	// For region id
	RaftLogSuffix           byte = 0x01
	RaftStateSuffix         byte = 0x02
	ApplyStateSuffix        byte = 0x03
	SnapshotRaftStateSuffix byte = 0x04

	// For region meta
	RegionStateSuffix byte = 0x01
)

keys

View Source
const (
	MaxCheckClusterBootstrappedRetryCount = 60
	CheckClusterBootstrapRetrySeconds     = 3
)

node

View Source
const (
	// When we create a region peer, we should initialize its log term/index > 0,
	// so that we can force the follower peer to sync the snapshot first.
	RaftInitLogTerm  = 5
	RaftInitLogIndex = 5

	MaxSnapRetryCnt = 5

	MaxCacheCapacity = 1024 - 1
)

const

View Source
const (
	RaftInvalidIndex uint64 = 0
	InvalidID        uint64 = 0
)

util

View Source
const (
	NsecPerMsec uint64 = 1000000
	SecShift    uint64 = 10
	MsecMask    uint64 = (1 << SecShift) - 1
)

util

View Source
const LockstoreFileName = "lockstore.dump"

LockstoreFileName defines the lockstore file name.

View Source
const MaxDeleteBatchSize int = 32 * 1024

MaxDeleteBatchSize represents the batch size. In our tests, we found that if the batch size is too large, running deleteAllInRange will reduce OLTP QPS by 30% ~ 60%. We found that 32K is a proper choice.

Variables

View Source
var (
	MinKey = []byte{0}

	// Data key has two prefix, meta 'm' and table 't',
	// extra keys has prefix 'm' + 1 = 'n',
	// extra table keys has prefix 't' + 1 = 'u', end key would be 'v'.
	MinDataKey = []byte{'m'}
	MaxDataKey = []byte{'v'}

	RegionMetaMinKey = []byte{LocalPrefix, RegionMetaPrefix}
	RegionMetaMaxKey = []byte{LocalPrefix, RegionMetaPrefix + 1}
)

keys

View Source
var (
	InternalKeyPrefix        = []byte{0xff}
	InternalRegionMetaPrefix = append(InternalKeyPrefix, "region"...)
	InternalStoreMetaKey     = append(InternalKeyPrefix, "store"...)
	InternalSafePointKey     = append(InternalKeyPrefix, "safepoint"...)
)

InternalKey

Functions

func ApplyStateKey

func ApplyStateKey(regionID uint64) []byte

ApplyStateKey makes the apply state key with the given region id.

func BindRespError

func BindRespError(resp *raft_cmdpb.RaftCmdResponse, err error)

BindRespError binds the RaftCmdResponse to the error.

func BindRespTerm

func BindRespTerm(resp *raft_cmdpb.RaftCmdResponse, term uint64)

BindRespTerm binds the RaftCmdResponse to the term.

func BootstrapStore

func BootstrapStore(engines *Engines, clussterID, storeID uint64) error

BootstrapStore is used to bootstrap the store.

func CheckKeyInRegion

func CheckKeyInRegion(key []byte, region *metapb.Region) error

CheckKeyInRegion checks if key in region range [`start_key`, `end_key`).

func CheckKeyInRegionExclusive

func CheckKeyInRegionExclusive(key []byte, region *metapb.Region) error

CheckKeyInRegionExclusive checks if key in region range (`start_key`, `end_key`).

func CheckKeyInRegionInclusive

func CheckKeyInRegionInclusive(key []byte, region *metapb.Region) error

CheckKeyInRegionInclusive check if key in region range [`start_key`, `end_key`].

func CheckRegionEpoch

func CheckRegionEpoch(req *raft_cmdpb.RaftCmdRequest, region *metapb.Region, includeRegion bool) error

CheckRegionEpoch checks the region epoch.

func ClearMeta

func ClearMeta(engines *Engines, kvWB, raftWB *WriteBatch, regionID uint64, lastIndex uint64) error

ClearMeta deletes meta.

func ClearPrepareBootstrap

func ClearPrepareBootstrap(engines *Engines, regionID uint64) error

ClearPrepareBootstrap clears the cluster information and raft state.

func ClearPrepareBootstrapState

func ClearPrepareBootstrapState(engines *Engines) error

ClearPrepareBootstrapState clears the cluster information.

func CloneMsg

func CloneMsg(origin, cloned proto.Message) error

CloneMsg clones the Message.

func CompactRaftLog

func CompactRaftLog(tag string, state *applyState, compactIndex, compactTerm uint64) error

CompactRaftLog discards all log entries prior to compact_index. We must guarantee that the compact_index is not greater than applied index.

func CreateRaftLogCompactionFilter

func CreateRaftLogCompactionFilter(targetLevel int, startKey, endKey []byte) badger.CompactionFilter

CreateRaftLogCompactionFilter creates a new badger.CompactionFilter.

func ErrResp

func ErrResp(err error) *raft_cmdpb.RaftCmdResponse

ErrResp returns a RaftCmdResponse which is bound to the error.

func ErrRespRegionNotFound

func ErrRespRegionNotFound(regionID uint64) *raft_cmdpb.RaftCmdResponse

ErrRespRegionNotFound returns a RaftCmdResponse which is bound to the RegionNotFound error.

func ErrRespStaleCommand

func ErrRespStaleCommand(term uint64) *raft_cmdpb.RaftCmdResponse

ErrRespStaleCommand returns a RaftCmdResponse which is bound to the ErrStaleCommand and the term.

func ErrRespWithTerm

func ErrRespWithTerm(err error, term uint64) *raft_cmdpb.RaftCmdResponse

ErrRespWithTerm returns a RaftCmdResponse which is bound to the error and the term.

func ErrToPbError

func ErrToPbError(e error) *errorpb.Error

ErrToPbError converts error to *errorpb.Error.

func GetChangePeerCmd

func GetChangePeerCmd(msg *raft_cmdpb.RaftCmdRequest) *raft_cmdpb.ChangePeerRequest

GetChangePeerCmd gets a command which is used to change peer.

func IsEpochStale

func IsEpochStale(epoch *metapb.RegionEpoch, checkEpoch *metapb.RegionEpoch) bool

IsEpochStale checks whether epoch is staler than check epoch.

func IsUrgentRequest

func IsUrgentRequest(rlog raftlog.RaftLog) bool

IsUrgentRequest checks whether the request should be committed on all followers as soon as possible. We enable follower lazy commit to get a better performance. But it may not be appropriate for some requests.

func NewCustomWriteBatch

func NewCustomWriteBatch(startTS, commitTS uint64, ctx *kvrpcpb.Context) mvcc.WriteBatch

NewCustomWriteBatch returns a new mvcc.WriteBatch.

func NewDBWriter

func NewDBWriter(conf *config.Config, router *Router) mvcc.DBWriter

NewDBWriter creates a new mvcc.DBWriter.

func NewTestRaftWriter

func NewTestRaftWriter(dbBundle *mvcc.DBBundle, engine *Engines) mvcc.DBWriter

NewTestRaftWriter creates a new mvcc.DBWriter with the given *mvcc.DBBundle and *Engines.

func NotifyReqRegionRemoved

func NotifyReqRegionRemoved(regionID uint64, cb *Callback)

NotifyReqRegionRemoved notifies the callback with a RaftCmdResponse which is bound to the ErrRegionNotFound.

func NotifyStaleReq

func NotifyStaleReq(term uint64, cb *Callback)

NotifyStaleReq notifies the callback with a RaftCmdResponse which is bound to the ErrStaleCommand and the term.

func PeerEqual

func PeerEqual(l, r *metapb.Peer) bool

PeerEqual returns a boolean value indicating whether two peers are equal.

func PrepareBootstrap

func PrepareBootstrap(engins *Engines, storeID, regionID, peerID uint64) (*metapb.Region, error)

PrepareBootstrap initializes cluster information and raft state.

func Quorum

func Quorum(total int) int

Quorum returns a quorum with the total.

func RaftLogIndex

func RaftLogIndex(key []byte) (uint64, error)

RaftLogIndex gets the log index from raft log key generated by `raft_log_key`.

func RaftLogKey

func RaftLogKey(regionID, index uint64) []byte

RaftLogKey makes the raft log key with the given region id and index.

func RaftStateKey

func RaftStateKey(regionID uint64) []byte

RaftStateKey makes the raft state key with the given region id.

func RawEndKey

func RawEndKey(region *metapb.Region) []byte

RawEndKey gets the `end_key` of current region in encoded form.

func RawStartKey

func RawStartKey(region *metapb.Region) []byte

RawStartKey gets the `start_key` of current region in encoded form.

func RegionEqual

func RegionEqual(l, r *metapb.Region) bool

RegionEqual returns a boolean value indicating whether two regions are equal.

func RegionMetaPrefixKey

func RegionMetaPrefixKey(regionID uint64) []byte

RegionMetaPrefixKey returns the region meta prefix key with the given region id.

func RegionRaftPrefixKey

func RegionRaftPrefixKey(regionID uint64) []byte

RegionRaftPrefixKey returns the region raft prefix key with the given region id.

func RegionStateKey

func RegionStateKey(regionID uint64) []byte

RegionStateKey returns the region state key with the given region id.

func RestoreLockStore

func RestoreLockStore(offset uint64, bundle *mvcc.DBBundle, raftDB *badger.DB) error

RestoreLockStore restores the lock store.

func SnapshotRaftStateKey

func SnapshotRaftStateKey(regionID uint64) []byte

SnapshotRaftStateKey makes the snapshot raft state key with the given region id.

func TimeToU64

func TimeToU64(t time.Time) uint64

TimeToU64 converts time t to a uint64 type.

func U64ToTime

func U64ToTime(u uint64) time.Time

U64ToTime converts uint64 u to a time value.

func WritePeerState

func WritePeerState(kvWB *WriteBatch, region *metapb.Region, state rspb.PeerState, mergeState *rspb.MergeState)

WritePeerState adds the peer state to the WriteBatch.

Types

type ApplyOptions

type ApplyOptions struct {
	DBBundle *mvcc.DBBundle
	Region   *metapb.Region
	Abort    *uint32
	Builder  *sstable.Builder
	WB       *WriteBatch
}

ApplyOptions represents the apply snapshot options.

type ApplyResult

type ApplyResult struct {
	HasPut      bool
	RegionState *rspb.RegionLocalState
}

ApplyResult represents the apply result.

type ApplySnapResult

type ApplySnapResult struct {
	// PrevRegion is the region before snapshot applied
	PrevRegion *metapb.Region
	Region     *metapb.Region
}

ApplySnapResult defines a result of applying snapshot.

type CFFile

type CFFile struct {
	CF          CFName
	Path        string
	TmpPath     string
	ClonePath   string
	SstWriter   *rocksdb.SstFileWriter
	File        *os.File
	KVCount     int
	Size        uint64
	WrittenSize uint64
	Checksum    uint32
	WriteDigest hash.Hash32
}

CFFile represents a column families file.

type CFName

type CFName = string

CFName represents a column families name.

const (
	CFDefault CFName = "default"
	CFLock    CFName = "lock"
	CFWrite   CFName = "write"
	CFRaft    CFName = "raft"
)

snapshot

type CacheQueryStats

type CacheQueryStats struct {
	// contains filtered or unexported fields
}

CacheQueryStats is used to record the status of cache querying.

type Callback

type Callback struct {
	// contains filtered or unexported fields
}

Callback represents a callback.

func NewCallback

func NewCallback() *Callback

NewCallback creates a new Callback.

func (*Callback) Done

func (cb *Callback) Done(resp *raft_cmdpb.RaftCmdResponse)

Done sets the RaftCmdResponse and calls Done() on the WaitGroup.

type Config

type Config struct {
	// true for high reliability, prevent data loss when power failure.
	SyncLog bool
	// minimizes disruption when a partitioned node rejoins the cluster by using a two phase election.
	Prevote    bool
	RaftdbPath string

	SnapPath string

	// store capacity. 0 means no limit.
	Capacity uint64

	// raft_base_tick_interval is a base tick interval (ms).
	RaftBaseTickInterval        time.Duration
	RaftHeartbeatTicks          int
	RaftElectionTimeoutTicks    int
	RaftMinElectionTimeoutTicks int
	RaftMaxElectionTimeoutTicks int
	RaftMaxSizePerMsg           uint64
	RaftMaxInflightMsgs         int

	// When the entry exceed the max size, reject to propose it.
	RaftEntryMaxSize uint64

	// Interval to gc unnecessary raft log (ms).
	RaftLogGCTickInterval time.Duration
	// A threshold to gc stale raft log, must >= 1.
	RaftLogGcThreshold uint64
	// When entry count exceed this value, gc will be forced trigger.
	RaftLogGcCountLimit uint64
	// When the approximate size of raft log entries exceed this value,
	// gc will be forced trigger.
	RaftLogGcSizeLimit uint64
	// When a peer is not responding for this time, leader will not keep entry cache for it.
	RaftEntryCacheLifeTime time.Duration
	// When a peer is newly added, reject transferring leader to the peer for a while.
	RaftRejectTransferLeaderDuration time.Duration

	// Interval (ms) to check region whether need to be split or not.
	SplitRegionCheckTickInterval time.Duration
	// When size change of region exceed the diff since last check, it
	// will be checked again whether it should be split.
	RegionSplitCheckDiff uint64
	// Interval (ms) to check whether start compaction for a region.
	RegionCompactCheckInterval time.Duration
	// delay time before deleting a stale peer
	CleanStalePeerDelay time.Duration
	// Number of regions for each time checking.
	RegionCompactCheckStep uint64
	// Minimum number of tombstones to trigger manual compaction.
	RegionCompactMinTombstones uint64
	// Minimum percentage of tombstones to trigger manual compaction.
	// Should between 1 and 100.
	RegionCompactTombstonesPencent uint64
	PdHeartbeatTickInterval        time.Duration
	PdStoreHeartbeatTickInterval   time.Duration
	SnapMgrGcTickInterval          time.Duration
	SnapGcTimeout                  time.Duration

	NotifyCapacity  uint64
	MessagesPerTick uint64

	// When a peer is not active for max_peer_down_duration,
	// the peer is considered to be down and is reported to PD.
	MaxPeerDownDuration time.Duration

	// If the leader of a peer is missing for longer than max_leader_missing_duration,
	// the peer would ask pd to confirm whether it is valid in any region.
	// If the peer is stale and is not valid in any region, it will destroy itself.
	MaxLeaderMissingDuration time.Duration
	// Similar to the max_leader_missing_duration, instead it will log warnings and
	// try to alert monitoring systems, if there is any.
	AbnormalLeaderMissingDuration time.Duration
	PeerStaleStateCheckInterval   time.Duration

	LeaderTransferMaxLogLag uint64

	SnapApplyBatchSize uint64

	// Interval (ms) to check region whether the data is consistent.
	ConsistencyCheckInterval time.Duration

	ReportRegionFlowInterval time.Duration

	// The lease provided by a successfully proposed and applied entry.
	RaftStoreMaxLeaderLease time.Duration

	// Right region derive origin region id when split.
	RightDeriveWhenSplit bool

	AllowRemoveLeader bool

	// Max log gap allowed to propose merge.
	MergeMaxLogGap uint64

	// Interval to re-propose merge.
	MergeCheckTickInterval time.Duration

	UseDeleteRange bool

	ApplyMaxBatchSize uint64
	ApplyPoolSize     uint64

	StoreMaxBatchSize uint64

	ConcurrentSendSnapLimit uint64
	ConcurrentRecvSnapLimit uint64

	GrpcInitialWindowSize uint64
	GrpcKeepAliveTime     time.Duration
	GrpcKeepAliveTimeout  time.Duration
	GrpcRaftConnNum       uint64

	Addr          string
	AdvertiseAddr string
	Labels        []StoreLabel

	SplitCheck *splitCheckConfig
}

Config is the representation of configuration settings.

func NewDefaultConfig

func NewDefaultConfig() *Config

NewDefaultConfig creates a default config.

func (*Config) Validate

func (c *Config) Validate() error

Validate returns an error message if the check is invalid.

type ConsistencyState

type ConsistencyState struct {
	LastCheckTime time.Time
	// (computed_result_or_to_be_verified, index, hash)
	Index uint64
	Hash  []byte
}

ConsistencyState is used for consistency check.

type DestroyPeerJob

type DestroyPeerJob struct {
	Initialized bool
	AsyncRemove bool
	RegionID    uint64
	Peer        *metapb.Peer
}

DestroyPeerJob defines a job which is used to destroy a peer.

type Engines

type Engines struct {
	// contains filtered or unexported fields
}

Engines represents storage engines

func NewEngines

func NewEngines(kvEngine *mvcc.DBBundle, raftEngine *badger.DB, kvPath, raftPath string) *Engines

NewEngines creates a new Engines.

func (*Engines) SyncKVWAL

func (en *Engines) SyncKVWAL() error

SyncKVWAL syncs the kv wal.

func (*Engines) SyncRaftWAL

func (en *Engines) SyncRaftWAL() error

SyncRaftWAL syncs the raft wal.

func (*Engines) WriteKV

func (en *Engines) WriteKV(wb *WriteBatch) error

WriteKV flushes the WriteBatch to the kv.

func (*Engines) WriteRaft

func (en *Engines) WriteRaft(wb *WriteBatch) error

WriteRaft flushes the WriteBatch to the raft.

type EntryCache

type EntryCache struct {
	// contains filtered or unexported fields
}

EntryCache represents an entry cache.

type ErrEpochNotMatch

type ErrEpochNotMatch struct {
	Message string
	Regions []*metapb.Region
}

ErrEpochNotMatch is returned when this epoch is not match.

func (*ErrEpochNotMatch) Error

func (e *ErrEpochNotMatch) Error() string

type ErrKeyNotInRegion

type ErrKeyNotInRegion struct {
	Key    []byte
	Region *metapb.Region
}

ErrKeyNotInRegion is returned when this key is not in the region.

func (*ErrKeyNotInRegion) Error

func (e *ErrKeyNotInRegion) Error() string

type ErrNotLeader

type ErrNotLeader struct {
	RegionID uint64
	Leader   *metapb.Peer
}

ErrNotLeader is returned when this region is not Leader.

func (*ErrNotLeader) Error

func (e *ErrNotLeader) Error() string

type ErrRaftEntryTooLarge

type ErrRaftEntryTooLarge struct {
	RegionID  uint64
	EntrySize uint64
}

ErrRaftEntryTooLarge is returned when the raft entry is too large.

func (*ErrRaftEntryTooLarge) Error

func (e *ErrRaftEntryTooLarge) Error() string

type ErrRegionNotFound

type ErrRegionNotFound struct {
	RegionID uint64
}

ErrRegionNotFound is returned when this region is not found.

func (*ErrRegionNotFound) Error

func (e *ErrRegionNotFound) Error() string

type ErrServerIsBusy

type ErrServerIsBusy struct {
	Reason    string
	BackoffMs uint64
}

ErrServerIsBusy is returned when the server is busy.

func (*ErrServerIsBusy) Error

func (e *ErrServerIsBusy) Error() string

type ErrStaleCommand

type ErrStaleCommand struct{}

ErrStaleCommand is returned when the command is stale.

func (*ErrStaleCommand) Error

func (e *ErrStaleCommand) Error() string

type ErrStoreNotMatch

type ErrStoreNotMatch struct {
	RequestStoreID uint64
	ActualStoreID  uint64
}

ErrStoreNotMatch is returned when the store is not match.

func (*ErrStoreNotMatch) Error

func (e *ErrStoreNotMatch) Error() string

type GenSnapTask

type GenSnapTask struct {
	// contains filtered or unexported fields
}

GenSnapTask represents a task to generate snapshot.

type GlobalContext

type GlobalContext struct {
	// contains filtered or unexported fields
}

GlobalContext represents a global context.

type IOLimiter

type IOLimiter = rate.Limiter

IOLimiter controls how frequently events are allowed to happen.

func NewInfLimiter

func NewInfLimiter() *IOLimiter

NewInfLimiter returns a new IOLimiter.

type InvokeContext

type InvokeContext struct {
	RegionID   uint64
	RaftState  raftState
	ApplyState applyState

	SnapRegion *metapb.Region
	// contains filtered or unexported fields
}

InvokeContext represents a invoker context.

func NewInvokeContext

func NewInvokeContext(store *PeerStorage) *InvokeContext

NewInvokeContext returns a new InvokeContext.

type JobStatus

type JobStatus = uint32

JobStatus represents a job status.

const (
	JobStatusPending JobStatus = 0 + iota
	JobStatusRunning
	JobStatusCancelling
	JobStatusCancelled
	JobStatusFinished
	JobStatusFailed
)

JobStatus

type LeaderChecker

type LeaderChecker interface {
	IsLeader(ctx *kvrpcpb.Context, router *Router) *errorpb.Error
}

LeaderChecker represents a leader checker interface.

type Lease

type Lease struct {
	// contains filtered or unexported fields
}

Lease records an expired time, for examining the current moment is in lease or not. It's dedicated to the Raft leader lease mechanism, contains either state of

  1. Suspect Timestamp A suspicious leader lease timestamp, which marks the leader may still hold or lose its lease until the clock time goes over this timestamp.
  2. Valid Timestamp A valid leader lease timestamp, which marks the leader holds the lease for now. The lease is valid until the clock time goes over this timestamp.

```text Time |---------------------------------->

 ^               ^
Now           Suspect TS

State: | Suspect | Suspect

|---------------------------------->

 ^               ^
Now           Valid TS

State: | Valid | Expired ```

Note:

  • Valid timestamp would increase when raft log entries are applied in current term.
  • Suspect timestamp would be set after the message `MsgTimeoutNow` is sent by current peer. The message `MsgTimeoutNow` starts a leader transfer procedure. During this procedure, current peer as an old leader may still hold its lease or lose it. It's possible there is a new leader elected and current peer as an old leader doesn't step down due to network partition from the new leader. In that case, current peer lose its leader lease. Within this suspect leader lease expire time, read requests could not be performed locally.
  • The valid leader lease should be `lease = max_lease - (commit_ts - send_ts)` And the expired timestamp for that leader lease is `commit_ts + lease`, which is `send_ts + max_lease` in short.

func NewLease

func NewLease(maxLease time.Duration) *Lease

NewLease creates a new Lease.

func (*Lease) Expire

func (l *Lease) Expire()

Expire sets the lease state to expired.

func (*Lease) ExpireRemoteLease

func (l *Lease) ExpireRemoteLease()

ExpireRemoteLease sets the remote lease state to expired.

func (*Lease) Inspect

func (l *Lease) Inspect(ts *time.Time) LeaseState

Inspect the lease state for the ts or now.

func (*Lease) MaybeNewRemoteLease

func (l *Lease) MaybeNewRemoteLease(term uint64) *RemoteLease

MaybeNewRemoteLease returns a new `RemoteLease` if there is none.

func (*Lease) Renew

func (l *Lease) Renew(sendTs time.Time)

Renew the lease to the bound.

func (*Lease) Suspect

func (l *Lease) Suspect(sendTs time.Time)

Suspect the lease to the bound.

type LeaseState

type LeaseState int

LeaseState represents the lease state.

const (
	// The lease is suspicious, may be invalid.
	LeaseStateSuspect LeaseState = 1 + iota
	// The lease is valid.
	LeaseStateValid
	// The lease is expired.
	LeaseStateExpired
)

LeaseState

type LimitWriter

type LimitWriter struct {
	// contains filtered or unexported fields
}

LimitWriter represents a limit writer.

func (*LimitWriter) Write

func (lw *LimitWriter) Write(b []byte) (int, error)

type MetaFile

type MetaFile struct {
	Meta *rspb.SnapshotMeta
	Path string
	File *os.File

	// for writing snapshot
	TmpPath string
}

MetaFile represents a meta file.

type Msg

type Msg struct {
	Type     MsgType
	RegionID uint64
	Data     interface{}
}

Msg represents a message.

func NewMsg

func NewMsg(tp MsgType, data interface{}) Msg

NewMsg creates a msg.

func NewPeerMsg

func NewPeerMsg(tp MsgType, regionID uint64, data interface{}) Msg

NewPeerMsg creates a peer msg.

type MsgComputeHashResult

type MsgComputeHashResult struct {
	Index uint64
	Hash  []byte
}

MsgComputeHashResult defines a message which is used to compute hash result.

type MsgGCSnap

type MsgGCSnap struct {
	Snaps []SnapKeyWithSending
}

MsgGCSnap defines a message which is used to collect snapshot.

type MsgHalfSplitRegion

type MsgHalfSplitRegion struct {
	RegionEpoch *metapb.RegionEpoch
}

MsgHalfSplitRegion defines a message which is used to split region in half.

type MsgMergeResult

type MsgMergeResult struct {
	TargetPeer *metapb.Peer
	Stale      bool
}

MsgMergeResult defines a message which is used to merge result.

type MsgRaftCmd

type MsgRaftCmd struct {
	SendTime time.Time
	Request  raftlog.RaftLog
	Callback *Callback
}

MsgRaftCmd defines a message of raft command.

type MsgSignificant

type MsgSignificant struct {
	Type           MsgSignificantType
	ToPeerID       uint64
	SnapshotStatus raft.SnapshotStatus
}

MsgSignificant represents a significant msg.

type MsgSignificantType

type MsgSignificantType int

MsgSignificantType represents a significant type of msg.

const (
	MsgSignificantTypeStatus      MsgSignificantType = 1
	MsgSignificantTypeUnreachable MsgSignificantType = 2
)

MsgSignificantType

type MsgSplitRegion

type MsgSplitRegion struct {
	RegionEpoch *metapb.RegionEpoch
	// It's an encoded key.
	// TODO: support meta key.
	SplitKeys [][]byte
	Callback  *Callback
}

MsgSplitRegion defines a message which is used to split region.

type MsgStoreClearRegionSizeInRange

type MsgStoreClearRegionSizeInRange struct {
	StartKey []byte
	EndKey   []byte
}

MsgStoreClearRegionSizeInRange defines a message which is used to clear region size in range.

type MsgType

type MsgType int64

MsgType represents a msg type.

const (
	MsgTypeNull                   MsgType = 0
	MsgTypeRaftMessage            MsgType = 1
	MsgTypeRaftCmd                MsgType = 2
	MsgTypeSplitRegion            MsgType = 3
	MsgTypeComputeResult          MsgType = 4
	MsgTypeRegionApproximateSize  MsgType = 5
	MsgTypeRegionApproximateKeys  MsgType = 6
	MsgTypeCompactionDeclineBytes MsgType = 7
	MsgTypeHalfSplitRegion        MsgType = 8
	MsgTypeMergeResult            MsgType = 9
	MsgTypeGcSnap                 MsgType = 10
	MsgTypeClearRegionSize        MsgType = 11
	MsgTypeTick                   MsgType = 12
	MsgTypeSignificantMsg         MsgType = 13
	MsgTypeStart                  MsgType = 14
	MsgTypeApplyRes               MsgType = 15
	MsgTypeNoop                   MsgType = 16

	MsgTypeStoreRaftMessage   MsgType = 101
	MsgTypeStoreSnapshotStats MsgType = 102
	// Clear region size and keys for all regions in the range, so we can force them to re-calculate
	// their size later.
	MsgTypeStoreClearRegionSizeInRange MsgType = 104
	MsgTypeStoreCompactedEvent         MsgType = 105
	MsgTypeStoreTick                   MsgType = 106
	MsgTypeStoreStart                  MsgType = 107

	MsgTypeFsmNormal  MsgType = 201
	MsgTypeFsmControl MsgType = 202

	MsgTypeApply             MsgType = 301
	MsgTypeApplyRegistration MsgType = 302
	MsgTypeApplyProposal     MsgType = 303
	MsgTypeApplyCatchUpLogs  MsgType = 304
	MsgTypeApplyLogsUpToDate MsgType = 305
	MsgTypeApplyDestroy      MsgType = 306
	MsgTypeApplySnapshot     MsgType = 307
)

Msg

type Node

type Node struct {
	// contains filtered or unexported fields
}

Node represents a raft store node.

func NewNode

func NewNode(system *raftBatchSystem, store *metapb.Store, cfg *Config, pdClient pd.Client, observer PeerEventObserver) *Node

NewNode creates a new raft store node.

func (*Node) BootstrapCluster

func (n *Node) BootstrapCluster(ctx context.Context, engines *Engines, firstRegion *metapb.Region) (newCluster bool, err error)

BootstrapCluster is used to bootstrap the cluster.

func (*Node) Start

func (n *Node) Start(ctx context.Context, engines *Engines, trans Transport, snapMgr *SnapManager, pdWorker *worker, router *router) error

Start starts raft store node.

type Peer

type Peer struct {
	Meta *metapb.Peer

	RaftGroup *raft.RawNode

	// Record the last instant of each peer's heartbeat response.
	PeerHeartbeats map[uint64]time.Time

	// Record the instants of peers being added into the configuration.
	// Remove them after they are not pending any more.
	PeersStartPendingTime map[uint64]time.Time
	RecentAddedPeer       *RecentAddedPeer

	// an inaccurate difference in region size since last reset.
	SizeDiffHint uint64

	// approximate size of the region.
	ApproximateSize *uint64
	// approximate keys of the region.
	ApproximateKeys         *uint64
	CompactionDeclinedBytes uint64

	ConsistencyState *ConsistencyState

	Tag string

	// Index of last scheduled committed raft log.
	LastApplyingIdx  uint64
	LastCompactedIdx uint64

	// Approximate size of logs that is applied but not compacted yet.
	RaftLogSizeHint uint64

	PendingRemove bool

	PendingMergeState *rspb.MergeState

	PendingMergeApplyResult *WaitApplyResultState
	PeerStat                PeerStat
	// contains filtered or unexported fields
}

Peer represents a peer.

func NewPeer

func NewPeer(storeID uint64, cfg *Config, engines *Engines, region *metapb.Region, regionSched chan<- task,
	peer *metapb.Peer) (*Peer, error)

NewPeer creates a new peer.

func (*Peer) Activate

func (p *Peer) Activate(applyMsgs *applyMsgs)

Activate registers self to applyMsgs so that the peer is then usable. Also trigger `RegionChangeEvent::Create` here.

func (*Peer) AnyNewPeerCatchUp

func (p *Peer) AnyNewPeerCatchUp(peerID uint64) bool

AnyNewPeerCatchUp returns `true` if any new peer catches up with the leader in replicating logs. And updates `PeersStartPendingTime` if needed.

func (*Peer) ApplyReads

func (p *Peer) ApplyReads(kv *mvcc.DBBundle, ready *raft.Ready)

ApplyReads applies reads.

func (*Peer) CheckPeers

func (p *Peer) CheckPeers()

CheckPeers checks and updates `peer_heartbeats` for the peer.

func (*Peer) CheckStaleState

func (p *Peer) CheckStaleState(cfg *Config) StaleState

CheckStaleState checks the stale state of the peer.

func (*Peer) CollectDownPeers

func (p *Peer) CollectDownPeers(maxDuration time.Duration) []*pdpb.PeerStats

CollectDownPeers collects all down peers.

func (*Peer) CollectPendingPeers

func (p *Peer) CollectPendingPeers() []*metapb.Peer

CollectPendingPeers collects all pending peers and update `peers_start_pending_time`.

func (*Peer) Destroy

func (p *Peer) Destroy(engine *Engines, keepData bool) error

Destroy does the real destroy task which includes: 1. Set the region to tombstone; 2. Clear data; 3. Notify all pending requests.

func (*Peer) GetMinProgress

func (p *Peer) GetMinProgress() uint64

GetMinProgress gets min match.

func (*Peer) GetRaftStatus

func (p *Peer) GetRaftStatus() *raft.Status

GetRaftStatus gets the raft status.

func (*Peer) GetRole

func (p *Peer) GetRole() raft.StateType

GetRole gets the raft state type.

func (*Peer) HandleRaftReadyAppend

func (p *Peer) HandleRaftReadyAppend(trans Transport, applyMsgs *applyMsgs, kvWB, raftWB *WriteBatch, observer PeerEventObserver) *ReadyICPair

HandleRaftReadyAppend returns a ready IC pair

func (*Peer) HandleRaftReadyApply

func (p *Peer) HandleRaftReadyApply(kv *mvcc.DBBundle, applyMsgs *applyMsgs, ready *raft.Ready)

HandleRaftReadyApply handles raft ready apply msgs.

func (*Peer) HasPendingSnapshot

func (p *Peer) HasPendingSnapshot() bool

HasPendingSnapshot returns `true` if the raft group has replicated a snapshot but not committed it yet.

func (*Peer) HeartbeatPd

func (p *Peer) HeartbeatPd(pdScheduler chan<- task)

HeartbeatPd adds a region heartbeat task to the pd scheduler.

func (*Peer) IsApplyingSnapshot

func (p *Peer) IsApplyingSnapshot() bool

IsApplyingSnapshot returns whether the peer is applying a snapshot or not.

func (*Peer) IsLeader

func (p *Peer) IsLeader() bool

IsLeader returns whether the state type is leader or not.

func (*Peer) LeaderID

func (p *Peer) LeaderID() uint64

LeaderID returns the leader id of the raft group.

func (*Peer) MaybeCampaign

func (p *Peer) MaybeCampaign(parentIsLeader bool) bool

MaybeCampaign tries to campaign.

func (*Peer) MaybeDestroy

func (p *Peer) MaybeDestroy() *DestroyPeerJob

MaybeDestroy tries to destroy itself. Returns a job (if needed) to do more cleaning tasks.

func (*Peer) MaybeRenewLeaderLease

func (p *Peer) MaybeRenewLeaderLease(ts time.Time)

MaybeRenewLeaderLease tries to renew leader lease.

func (*Peer) OnRoleChanged

func (p *Peer) OnRoleChanged(observer PeerEventObserver, ready *raft.Ready)

OnRoleChanged will be invoked after peer state has changed

func (*Peer) PeerID

func (p *Peer) PeerID() uint64

PeerID returns the peer id.

func (*Peer) PostApply

func (p *Peer) PostApply(kv *mvcc.DBBundle, applyState applyState, appliedIndexTerm uint64, merged bool, applyMetrics applyMetrics) bool

PostApply returns a boolean value indicating whether the peer has ready.

func (*Peer) PostPropose

func (p *Peer) PostPropose(meta *ProposalMeta, isConfChange bool, cb *Callback)

PostPropose tries to renew leader lease on every consistent read/write request.

func (*Peer) PostRaftReadyPersistent

func (p *Peer) PostRaftReadyPersistent(trans Transport, applyMsgs *applyMsgs, ready *raft.Ready, invokeCtx *InvokeContext) *ApplySnapResult

PostRaftReadyPersistent updates the memory state after ready changes are flushed to disk successfully.

func (*Peer) PostSplit

func (p *Peer) PostSplit()

PostSplit resets delete_keys_hint and size_diff_hint.

func (*Peer) PrePropose

func (p *Peer) PrePropose(cfg *Config, rlog raftlog.RaftLog) (*ProposalContext, error)

PrePropose returns a proposal context.

func (*Peer) Propose

func (p *Peer) Propose(kv *mvcc.DBBundle, cfg *Config, cb *Callback, rlog raftlog.RaftLog, errResp *raft_cmdpb.RaftCmdResponse) bool

Propose a request.

Return true means the request has been proposed successfully.

func (*Peer) ProposeConfChange

func (p *Peer) ProposeConfChange(cfg *Config, req *raft_cmdpb.RaftCmdRequest) (uint64, error)

ProposeConfChange fails in such cases: 1. A pending conf change has not been applied yet; 2. Removing the leader is not allowed in the configuration; 3. The conf change makes the raft group not healthy; 4. The conf change is dropped by raft group internally.

func (*Peer) ProposeNormal

func (p *Peer) ProposeNormal(cfg *Config, rlog raftlog.RaftLog) (uint64, error)

ProposeNormal returns a propose index.

func (*Peer) ProposeTransferLeader

func (p *Peer) ProposeTransferLeader(cfg *Config, req *raft_cmdpb.RaftCmdRequest, cb *Callback) bool

ProposeTransferLeader returns true if the transfer leader request is accepted.

func (*Peer) ReadyToHandlePendingSnap

func (p *Peer) ReadyToHandlePendingSnap() bool

ReadyToHandlePendingSnap returns a boolean value indicating whether the peer is ready to handle a pending snapshot.

func (*Peer) Region

func (p *Peer) Region() *metapb.Region

Region returns the region of the peer.

func (*Peer) Send

func (p *Peer) Send(trans Transport, msgs []eraftpb.Message) error

Send sends messages to the transport.

func (*Peer) SetRegion

func (p *Peer) SetRegion(region *metapb.Region)

SetRegion sets the region of the peer.

This will update the region of the peer, caller must ensure the region has been preserved in a durable device.

func (*Peer) Step

func (p *Peer) Step(m *eraftpb.Message) error

Step steps the raft message.

func (*Peer) Stop

func (p *Peer) Stop()

Stop cancels the task of applying a snapshot.

func (*Peer) Store

func (p *Peer) Store() *PeerStorage

Store returns the peer storage.

func (*Peer) Term

func (p *Peer) Term() uint64

Term returns the term of the raft.

type PeerEventContext

type PeerEventContext struct {
	LeaderChecker LeaderChecker
	RegionID      uint64
}

PeerEventContext represents a peer event context.

type PeerEventObserver

type PeerEventObserver interface {
	// OnPeerCreate will be invoked when there is a new peer created.
	OnPeerCreate(ctx *PeerEventContext, region *metapb.Region)
	// OnPeerApplySnap will be invoked when there is a replicate peer's snapshot applied.
	OnPeerApplySnap(ctx *PeerEventContext, region *metapb.Region)
	// OnPeerDestroy will be invoked when a peer is destroyed.
	OnPeerDestroy(ctx *PeerEventContext)
	// OnSplitRegion will be invoked when region split into new regions with corresponding peers.
	OnSplitRegion(derived *metapb.Region, regions []*metapb.Region, peers []*PeerEventContext)
	// OnRegionConfChange will be invoked after conf change updated region's epoch.
	OnRegionConfChange(ctx *PeerEventContext, epoch *metapb.RegionEpoch)
	// OnRoleChange will be invoked after peer state has changed
	OnRoleChange(regionID uint64, newState raft.StateType)
}

PeerEventObserver represents a peer event Observer.

type PeerStat

type PeerStat struct {
	WrittenBytes uint64
	WrittenKeys  uint64
}

PeerStat represents a peer stat.

type PeerStorage

type PeerStorage struct {
	Engines *Engines

	Tag string
	// contains filtered or unexported fields
}

PeerStorage implements the raft.Storage interface.

func NewPeerStorage

func NewPeerStorage(engines *Engines, region *metapb.Region, regionSched chan<- task, peerID uint64, tag string) (*PeerStorage, error)

NewPeerStorage creates a new PeerStorage.

func (*PeerStorage) Append

func (ps *PeerStorage) Append(invokeCtx *InvokeContext, entries []eraftpb.Entry, raftWB *WriteBatch) error

Append the given entries to the raft log using previous last index or self.last_index. Return the new last index for later update. After we commit in engine, we can set last_index to the return one.

func (*PeerStorage) AppliedIndex

func (ps *PeerStorage) AppliedIndex() uint64

AppliedIndex returns applied index of the peer storage.

func (*PeerStorage) ApplySnapshot

func (ps *PeerStorage) ApplySnapshot(ctx *InvokeContext, snap *eraftpb.Snapshot, kvWB *WriteBatch, raftWB *WriteBatch) error

ApplySnapshot Applies the peer with the given snapshot.

func (*PeerStorage) CancelApplyingSnap

func (ps *PeerStorage) CancelApplyingSnap() bool

CancelApplyingSnap cancels a task of applying snapshot.

func (*PeerStorage) CheckApplyingSnap

func (ps *PeerStorage) CheckApplyingSnap() bool

CheckApplyingSnap checks if the storage is applying a snapshot.

func (*PeerStorage) ClearData

func (ps *PeerStorage) ClearData() error

ClearData clears the data.

func (*PeerStorage) CompactTo

func (ps *PeerStorage) CompactTo(idx uint64)

CompactTo compacts the cache with the given index.

func (*PeerStorage) Entries

func (ps *PeerStorage) Entries(low, high, maxSize uint64) ([]eraftpb.Entry, error)

Entries implements the raft.Storage Entries method.

func (*PeerStorage) FirstIndex

func (ps *PeerStorage) FirstIndex() (uint64, error)

FirstIndex implements the raft.Storage FirstIndex method.

func (*PeerStorage) InitialState

func (ps *PeerStorage) InitialState() (eraftpb.HardState, eraftpb.ConfState, error)

InitialState implements the raft.Storage InitialState method.

func (*PeerStorage) IsApplyingSnapshot

func (ps *PeerStorage) IsApplyingSnapshot() bool

IsApplyingSnapshot returns whether the peer storage is applying a snapshot or not.

func (*PeerStorage) LastIndex

func (ps *PeerStorage) LastIndex() (uint64, error)

LastIndex implements the raft.Storage LastIndex method.

func (*PeerStorage) MaybeGCCache

func (ps *PeerStorage) MaybeGCCache(replicatedIdx, appliedIdx uint64)

MaybeGCCache tries to clear the cache.

func (*PeerStorage) PostReadyPersistent

func (ps *PeerStorage) PostReadyPersistent(ctx *InvokeContext) *ApplySnapResult

PostReadyPersistent updates the memory state after ready changes are flushed to disk successfully.

func (*PeerStorage) Region

func (ps *PeerStorage) Region() *metapb.Region

Region returns the region of the peer storage.

func (*PeerStorage) SaveReadyState

func (ps *PeerStorage) SaveReadyState(kvWB, raftWB *WriteBatch, ready *raft.Ready) (*InvokeContext, error)

SaveReadyState saves memory states to disk.

This function only write data to `ready_ctx`'s `WriteBatch`. It's caller's duty to write it explicitly to disk. If it's flushed to disk successfully, `post_ready` should be called to update the memory states properly. Do not modify ready in this function, this is a requirement to advance the ready object properly later.

func (*PeerStorage) ScheduleApplyingSnapshot

func (ps *PeerStorage) ScheduleApplyingSnapshot()

ScheduleApplyingSnapshot schedules a task of applying snapshot.

func (*PeerStorage) SetRegion

func (ps *PeerStorage) SetRegion(region *metapb.Region)

SetRegion sets the region.

func (*PeerStorage) Snapshot

func (ps *PeerStorage) Snapshot() (eraftpb.Snapshot, error)

Snapshot implements the raft.Storage Snapshot method.

func (*PeerStorage) Term

func (ps *PeerStorage) Term(idx uint64) (uint64, error)

Term implements the raft.Storage Term method.

type PeerTick

type PeerTick int

PeerTick represents a peer tick.

const (
	PeerTickRaft             PeerTick = 0
	PeerTickRaftLogGC        PeerTick = 1
	PeerTickSplitRegionCheck PeerTick = 2
	PeerTickPdHeartbeat      PeerTick = 3
	PeerTickCheckMerge       PeerTick = 4
	PeerTickPeerStaleState   PeerTick = 5
)

PeerTick

type ProposalContext

type ProposalContext byte

ProposalContext represents a proposal context.

const (
	ProposalContextSyncLog      ProposalContext = 1
	ProposalContextSplit        ProposalContext = 1 << 1
	ProposalContextPrepareMerge ProposalContext = 1 << 2
)

ProposalContext

func NewProposalContextFromBytes

func NewProposalContextFromBytes(ctx []byte) *ProposalContext

NewProposalContextFromBytes creates a ProposalContext with the given bytes.

func (ProposalContext) ToBytes

func (c ProposalContext) ToBytes() []byte

ToBytes converts the ProposalContext to bytes.

type ProposalMeta

type ProposalMeta struct {
	Index          uint64
	Term           uint64
	RenewLeaseTime *time.Time
}

ProposalMeta represents a proposal meta.

type ProposalQueue

type ProposalQueue struct {
	// contains filtered or unexported fields
}

ProposalQueue represents a proposal queue.

func (*ProposalQueue) Clear

func (q *ProposalQueue) Clear()

Clear clears the proposal queue.

func (*ProposalQueue) PopFront

func (q *ProposalQueue) PopFront(term uint64) *ProposalMeta

PopFront pops the front ProposalMeta from the proposal queue.

func (*ProposalQueue) Push

func (q *ProposalQueue) Push(meta *ProposalMeta)

Push pushes the ProposalMeta to the proposal queue.

type RaftClient

type RaftClient struct {
	sync.RWMutex
	// contains filtered or unexported fields
}

RaftClient represents a raft client.

func (*RaftClient) Flush

func (c *RaftClient) Flush()

Flush flushes the RaftClient.

func (*RaftClient) Send

func (c *RaftClient) Send(msg *raft_serverpb.RaftMessage)

Send sends the raft message.

func (*RaftClient) Stop

func (c *RaftClient) Stop()

Stop stops the RaftClient.

type RaftContext

type RaftContext struct {
	*GlobalContext

	ReadyRes []*ReadyICPair
	// contains filtered or unexported fields
}

RaftContext represents a raft context.

type RaftInnerServer

type RaftInnerServer struct {
	// contains filtered or unexported fields
}

RaftInnerServer implements the tikv.InnerServer interface.

func NewRaftInnerServer

func NewRaftInnerServer(globalConfig *config.Config, engines *Engines, raftConfig *Config) *RaftInnerServer

NewRaftInnerServer returns a new RaftInnerServer.

func (*RaftInnerServer) BatchRaft

func (ris *RaftInnerServer) BatchRaft(stream tikvpb.Tikv_BatchRaftServer) error

BatchRaft implements the tikv.InnerServer BatchRaft method.

func (*RaftInnerServer) GetRaftstoreRouter

func (ris *RaftInnerServer) GetRaftstoreRouter() *Router

GetRaftstoreRouter gets the raftstore Router.

func (*RaftInnerServer) GetStoreMeta

func (ris *RaftInnerServer) GetStoreMeta() *metapb.Store

GetStoreMeta gets the store meta of the RaftInnerServer.

func (*RaftInnerServer) Raft

func (ris *RaftInnerServer) Raft(stream tikvpb.Tikv_RaftServer) error

Raft implements the tikv.InnerServer Raft method.

func (*RaftInnerServer) SetPeerEventObserver

func (ris *RaftInnerServer) SetPeerEventObserver(ob PeerEventObserver)

SetPeerEventObserver sets the peer event observer.

func (*RaftInnerServer) Setup

func (ris *RaftInnerServer) Setup(pdClient pd.Client)

Setup implements the tikv.InnerServer Setup method.

func (*RaftInnerServer) Snapshot

func (ris *RaftInnerServer) Snapshot(stream tikvpb.Tikv_SnapshotServer) error

Snapshot implements the tikv.InnerServer Snapshot method.

func (*RaftInnerServer) Start

func (ris *RaftInnerServer) Start(pdClient pd.Client) error

Start implements the tikv.InnerServer Start method.

func (*RaftInnerServer) Stop

func (ris *RaftInnerServer) Stop() error

Stop implements the tikv.InnerServer Stop method.

type RaftRegionManager

type RaftRegionManager struct {
	// contains filtered or unexported fields
}

RaftRegionManager represents a raft region manager.

func NewRaftRegionManager

func NewRaftRegionManager(store *metapb.Store, router *Router, detector *tikv.DetectorServer) *RaftRegionManager

NewRaftRegionManager returns a new raft region manager.

func (*RaftRegionManager) Close

func (rm *RaftRegionManager) Close() error

Close implements the RegionManager interface.

func (*RaftRegionManager) GetRegionFromCtx

func (rm *RaftRegionManager) GetRegionFromCtx(ctx *kvrpcpb.Context) (tikv.RegionCtx, *errorpb.Error)

GetRegionFromCtx implements the RegionManager interface.

func (*RaftRegionManager) GetStoreAddrByStoreID

func (rm *RaftRegionManager) GetStoreAddrByStoreID(storeID uint64) (string, error)

func (*RaftRegionManager) GetStoreIDByAddr

func (rm *RaftRegionManager) GetStoreIDByAddr(addr string) (uint64, error)

func (*RaftRegionManager) GetStoreInfoFromCtx

func (rm *RaftRegionManager) GetStoreInfoFromCtx(ctx *kvrpcpb.Context) (string, uint64, *errorpb.Error)

func (*RaftRegionManager) OnPeerApplySnap

func (rm *RaftRegionManager) OnPeerApplySnap(ctx *PeerEventContext, region *metapb.Region)

OnPeerApplySnap will be invoked when there is a replicate peer's snapshot applied.

func (*RaftRegionManager) OnPeerCreate

func (rm *RaftRegionManager) OnPeerCreate(ctx *PeerEventContext, region *metapb.Region)

OnPeerCreate will be invoked when there is a new peer created.

func (*RaftRegionManager) OnPeerDestroy

func (rm *RaftRegionManager) OnPeerDestroy(ctx *PeerEventContext)

OnPeerDestroy will be invoked when a peer is destroyed.

func (*RaftRegionManager) OnRegionConfChange

func (rm *RaftRegionManager) OnRegionConfChange(ctx *PeerEventContext, epoch *metapb.RegionEpoch)

OnRegionConfChange will be invoked after conf change updated region's epoch.

func (*RaftRegionManager) OnRoleChange

func (rm *RaftRegionManager) OnRoleChange(regionID uint64, newState raft.StateType)

OnRoleChange will be invoked after peer state has changed

func (*RaftRegionManager) OnSplitRegion

func (rm *RaftRegionManager) OnSplitRegion(derived *metapb.Region, regions []*metapb.Region, peers []*PeerEventContext)

OnSplitRegion will be invoked when region split into new regions with corresponding peers.

func (*RaftRegionManager) SplitRegion

SplitRegion implements the RegionManager interface.

type ReadExecutor

type ReadExecutor struct {
	// contains filtered or unexported fields
}

ReadExecutor represents a executor which is used to read.

func NewReadExecutor

func NewReadExecutor(checkEpoch bool) *ReadExecutor

NewReadExecutor creates a new ReadExecutor.

func (*ReadExecutor) Execute

Execute executes the command.

type ReadIndexQueue

type ReadIndexQueue struct {
	// contains filtered or unexported fields
}

ReadIndexQueue defines a ReadIndex queue.

func (*ReadIndexQueue) ClearUncommitted

func (q *ReadIndexQueue) ClearUncommitted(term uint64)

ClearUncommitted clears the uncommitted ReadIndex requests.

func (*ReadIndexQueue) NextID

func (q *ReadIndexQueue) NextID() uint64

NextID returns the next id.

func (*ReadIndexQueue) PopFront

func (q *ReadIndexQueue) PopFront() *ReadIndexRequest

PopFront pops the front ReadIndexRequest from the ReadIndex queue.

type ReadIndexRequest

type ReadIndexRequest struct {
	// contains filtered or unexported fields
}

ReadIndexRequest defines a ReadIndex request.

func NewReadIndexRequest

func NewReadIndexRequest(id uint64, cmds []*ReqCbPair, renewLeaseTime *time.Time) *ReadIndexRequest

NewReadIndexRequest creates a new ReadIndexRequest.

type ReadyICPair

type ReadyICPair struct {
	Ready raft.Ready
	IC    *InvokeContext
}

ReadyICPair represents a ready IC pair.

type RecentAddedPeer

type RecentAddedPeer struct {
	RejectDurationAsSecs uint64
	ID                   uint64
	AddedTime            time.Time
}

RecentAddedPeer represents a recent added peer.

func NewRecentAddedPeer

func NewRecentAddedPeer(rejectDurationAsSecs uint64) *RecentAddedPeer

NewRecentAddedPeer returns a new RecentAddedPeer.

func (*RecentAddedPeer) Contains

func (r *RecentAddedPeer) Contains(id uint64) bool

Contains returns true if the given id is equal to the RecentAddedPeer ID and elapsed time is before rejected time.

func (*RecentAddedPeer) Update

func (r *RecentAddedPeer) Update(id uint64, now time.Time)

Update updates the id and time for the RecentAddedPeer.

type RegionOptions

type RegionOptions struct {
	StoreAddr  string
	PDAddr     string
	RegionSize int64
}

RegionOptions represents the region options.

type RemoteLease

type RemoteLease struct {
	// contains filtered or unexported fields
}

RemoteLease represents a remote lease, it can only be derived by `Lease`. It will be sent to the local read thread, so name it remote. If Lease expires, the remote must expire too.

func (*RemoteLease) Expire

func (r *RemoteLease) Expire()

Expire sets the remote lease state to expired.

func (*RemoteLease) Inspect

func (r *RemoteLease) Inspect(ts *time.Time) LeaseState

Inspect returns the lease state with the given time.

func (*RemoteLease) Renew

func (r *RemoteLease) Renew(bound time.Time)

Renew renews the lease to the bound.

func (*RemoteLease) Term

func (r *RemoteLease) Term() uint64

Term returns the term of the RemoteLease.

type ReqCbPair

type ReqCbPair struct {
	Req *raft_cmdpb.RaftCmdRequest
	Cb  *Callback
}

ReqCbPair represents a request callback pair.

type RequestInspector

type RequestInspector interface {
	// contains filtered or unexported methods
}

RequestInspector defines a request inspector interface.

type RequestPolicy

type RequestPolicy int

RequestPolicy represents a request policy.

const (
	// Handle the read request directly without dispatch.
	RequestPolicyReadLocal RequestPolicy = 0 + iota
	// Handle the read request via raft's SafeReadIndex mechanism.
	RequestPolicyReadIndex
	RequestPolicyProposeNormal
	RequestPolicyProposeTransferLeader
	RequestPolicyProposeConfChange
	RequestPolicyInvalid
)

RequestPolicy

func Inspect

Inspect returns a request policy with the given RaftCmdRequest.

type Router

type Router struct {
	// contains filtered or unexported fields
}

Router exports SendCommand method for other packages.

func (*Router) SendCommand

func (r *Router) SendCommand(req *raft_cmdpb.RaftCmdRequest, cb *Callback) error

SendCommand sends the RaftCmdRequest with the given Callback.

func (*Router) SplitRegion

func (r *Router) SplitRegion(ctx *kvrpcpb.Context, keys [][]byte) ([]*metapb.Region, error)

SplitRegion splits region by the split keys.

type ServerTransport

type ServerTransport struct {
	// contains filtered or unexported fields
}

ServerTransport represents a server transport.

func NewServerTransport

func NewServerTransport(raftClient *RaftClient, snapScheduler chan<- task, router *router) *ServerTransport

NewServerTransport creates a new ServerTransport.

func (*ServerTransport) Flush

func (t *ServerTransport) Flush()

Flush flushes the raft client.

func (*ServerTransport) ReportSnapshotStatus

func (t *ServerTransport) ReportSnapshotStatus(msg *raft_serverpb.RaftMessage, status raft.SnapshotStatus)

ReportSnapshotStatus reports the snapshot status.

func (*ServerTransport) ReportUnreachable

func (t *ServerTransport) ReportUnreachable(msg *raft_serverpb.RaftMessage)

ReportUnreachable sends the unreachable message.

func (*ServerTransport) Send

Send sends the RaftMessage.

func (*ServerTransport) SendSnapshotSock

func (t *ServerTransport) SendSnapshotSock(msg *raft_serverpb.RaftMessage)

SendSnapshotSock sends the snapshot.

type Snap

type Snap struct {
	CFFiles []*CFFile

	MetaFile  *MetaFile
	SizeTrack *int64
	// contains filtered or unexported fields
}

Snap implements the Snapshot interface.

func NewSnap

func NewSnap(dir string, key SnapKey, sizeTrack *int64, isSending, toBuild bool,
	deleter SnapshotDeleter, limiter *IOLimiter) (*Snap, error)

NewSnap returns a new snap.

func NewSnapForApplying

func NewSnapForApplying(dir string, key SnapKey, sizeTrack *int64, deleter SnapshotDeleter) (*Snap, error)

NewSnapForApplying returns a new snap for applying.

func NewSnapForBuilding

func NewSnapForBuilding(dir string, key SnapKey, sizeTrack *int64, deleter SnapshotDeleter, limiter *IOLimiter) (*Snap, error)

NewSnapForBuilding returns a new snap for building.

func NewSnapForReceiving

func NewSnapForReceiving(dir string, key SnapKey, snapshotMeta *rspb.SnapshotMeta,
	sizeTrack *int64, deleter SnapshotDeleter, limiter *IOLimiter) (*Snap, error)

NewSnapForReceiving returns a new snap for receiving.

func NewSnapForSending

func NewSnapForSending(dir string, key SnapKey, sizeTrack *int64, deleter SnapshotDeleter) (*Snap, error)

NewSnapForSending returns a new snap for sending.

func (*Snap) Apply

func (s *Snap) Apply(opts ApplyOptions) (ApplyResult, error)

Apply implements the Snapshot Apply method.

func (*Snap) Build

func (s *Snap) Build(dbSnap *regionSnapshot, region *metapb.Region, snapData *rspb.RaftSnapshotData, stat *SnapStatistics, deleter SnapshotDeleter) error

Build implements the Snapshot Build method.

func (*Snap) Delete

func (s *Snap) Delete()

Delete implements the Snapshot Delete method.

func (*Snap) Drop

func (s *Snap) Drop()

Drop implements the Snapshot Drop method.

func (*Snap) Exists

func (s *Snap) Exists() bool

Exists implements the Snapshot Exists method.

func (*Snap) Meta

func (s *Snap) Meta() (os.FileInfo, error)

Meta implements the Snapshot Meta method.

func (*Snap) Path

func (s *Snap) Path() string

Path implements the Snapshot Path method.

func (*Snap) Read

func (s *Snap) Read(b []byte) (int, error)

Read implements the Snapshot Read method.

func (*Snap) Save

func (s *Snap) Save() error

Save implements the Snapshot Save method.

func (*Snap) TotalSize

func (s *Snap) TotalSize() (total uint64)

TotalSize implements the Snapshot TotalSize method.

func (*Snap) Write

func (s *Snap) Write(b []byte) (int, error)

Write implements the Snapshot Write method.

type SnapEntry

type SnapEntry int

SnapEntry represents a snapshot entry.

const (
	SnapEntryGenerating SnapEntry = 1
	SnapEntrySending    SnapEntry = 2
	SnapEntryReceiving  SnapEntry = 3
	SnapEntryApplying   SnapEntry = 4
)

SnapEntry

func (SnapEntry) String

func (e SnapEntry) String() string

String returns a string representation of the snapshot entry. `

type SnapKey

type SnapKey struct {
	RegionID uint64
	Term     uint64
	Index    uint64
}

SnapKey represents the snapshot key.

func SnapKeyFromRegionSnap

func SnapKeyFromRegionSnap(regionID uint64, snap *eraftpb.Snapshot) SnapKey

SnapKeyFromRegionSnap returns the snapshot key from the region snapshot.

func SnapKeyFromSnap

func SnapKeyFromSnap(snap *eraftpb.Snapshot) (SnapKey, error)

SnapKeyFromSnap returns the snapshot key from the snapshot.

func (SnapKey) String

func (k SnapKey) String() string

String returns a string representation of the snapshot key.

type SnapKeyWithSending

type SnapKeyWithSending struct {
	SnapKey   SnapKey
	IsSending bool
}

SnapKeyWithSending represents a snapshot key with sending.

type SnapManager

type SnapManager struct {
	MaxTotalSize uint64
	// contains filtered or unexported fields
}

SnapManager represents a snapshot manager.

func NewSnapManager

func NewSnapManager(path string, router *router) *SnapManager

NewSnapManager returns a new SnapManager.

func (*SnapManager) DeleteSnapshot

func (sm *SnapManager) DeleteSnapshot(key SnapKey, snapshot Snapshot, checkEntry bool) bool

DeleteSnapshot deletes a snapshot.

func (*SnapManager) Deregister

func (sm *SnapManager) Deregister(key SnapKey, entry SnapEntry)

Deregister deregisters a snapshot entry with the given snapshot key.

func (*SnapManager) GetSnapshotForApplying

func (sm *SnapManager) GetSnapshotForApplying(snapKey SnapKey) (Snapshot, error)

GetSnapshotForApplying gets the snapshot for applying with the given snapshot key.

func (*SnapManager) GetSnapshotForBuilding

func (sm *SnapManager) GetSnapshotForBuilding(key SnapKey) (Snapshot, error)

GetSnapshotForBuilding gets the snapshot for building with the given snapshot key.

func (*SnapManager) GetSnapshotForReceiving

func (sm *SnapManager) GetSnapshotForReceiving(snapKey SnapKey, data []byte) (Snapshot, error)

GetSnapshotForReceiving gets the snapshot for receiving with the given snapshot key and data.

func (*SnapManager) GetSnapshotForSending

func (sm *SnapManager) GetSnapshotForSending(snapKey SnapKey) (Snapshot, error)

GetSnapshotForSending gets the snapshot for sending with the given snapshot key.

func (*SnapManager) GetTotalSnapSize

func (sm *SnapManager) GetTotalSnapSize() uint64

GetTotalSnapSize gets the total snapshot size.

func (*SnapManager) HasRegistered

func (sm *SnapManager) HasRegistered(key SnapKey) bool

HasRegistered checks if the snapshot key is registered.

func (*SnapManager) ListIdleSnap

func (sm *SnapManager) ListIdleSnap() ([]SnapKeyWithSending, error)

ListIdleSnap lists all idle snapshots in the SnapManager.

func (*SnapManager) Register

func (sm *SnapManager) Register(key SnapKey, entry SnapEntry)

Register registers a snapshot entry with the given snapshot key.

func (*SnapManager) Stats

func (sm *SnapManager) Stats() SnapStats

Stats returns the snapshot stats of the SnapManager.

type SnapManagerBuilder

type SnapManagerBuilder struct {
	// contains filtered or unexported fields
}

SnapManagerBuilder represents a snapshot manager builder.

func (*SnapManagerBuilder) Build

func (smb *SnapManagerBuilder) Build(path string, router *router) *SnapManager

Build builds a router with the given path.

func (*SnapManagerBuilder) MaxTotalSize

func (smb *SnapManagerBuilder) MaxTotalSize(v uint64) *SnapManagerBuilder

MaxTotalSize returns the max total size of the SnapManagerBuilder.

type SnapState

type SnapState struct {
	StateType SnapStateType
	Status    *JobStatus
	Receiver  chan *eraftpb.Snapshot
}

SnapState represents a snapshot state.

type SnapStateType

type SnapStateType int

SnapStateType represents a snapshot state type.

const (
	SnapStateRelax SnapStateType = 0 + iota
	SnapStateGenerating
	SnapStateApplying
	SnapStateApplyAborted
)

SnapStateType

type SnapStatistics

type SnapStatistics struct {
	Size    uint64
	KVCount int
}

SnapStatistics represents a snapshot statistics.

type SnapStats

type SnapStats struct {
	ReceivingCount int
	SendingCount   int
}

SnapStats represents a snapshot stats.

type Snapshot

type Snapshot interface {
	io.Reader
	io.Writer
	Build(dbBundle *regionSnapshot, region *metapb.Region, snapData *rspb.RaftSnapshotData, stat *SnapStatistics, deleter SnapshotDeleter) error
	Path() string
	Exists() bool
	Delete()
	Meta() (os.FileInfo, error)
	TotalSize() uint64
	Save() error
	Apply(option ApplyOptions) (ApplyResult, error)
}

Snapshot is an interface for snapshot. It's used in these scenarios:

  1. build local snapshot
  2. read local snapshot and then replicate it to remote raftstores
  3. receive snapshot from remote raftstore and write it to local storage
  4. apply snapshot
  5. snapshot gc

type SnapshotDeleter

type SnapshotDeleter interface {
	// DeleteSnapshot returns true if it successfully delete the specified snapshot.
	DeleteSnapshot(key SnapKey, snapshot Snapshot, checkEntry bool) bool
}

SnapshotDeleter is a trait for deleting snapshot. It's used to ensure that the snapshot deletion happens under the protection of locking to avoid race case for concurrent read/write.

type StaleState

type StaleState int

StaleState represents a stale state.

const (
	StaleStateValid StaleState = 0 + iota
	StaleStateToValidate
	StaleStateLeaderMissing
)

StaleState

type StoreContext

type StoreContext struct {
	*GlobalContext
	// contains filtered or unexported fields
}

StoreContext represents a store context.

type StoreLabel

type StoreLabel struct {
	LabelKey, LabelValue string
}

StoreLabel stores the information of one store label.

type StoreTick

type StoreTick int

StoreTick represents a store tick.

const (
	StoreTickCompactCheck     StoreTick = 0
	StoreTickPdStoreHeartbeat StoreTick = 1
	StoreTickSnapGC           StoreTick = 2
	StoreTickConsistencyCheck StoreTick = 3
)

StoreTick

type TestRaftWriter

type TestRaftWriter struct {
	// contains filtered or unexported fields
}

TestRaftWriter is used to mock raft write related prewrite and commit operations without sending real raft commands

func (*TestRaftWriter) Close

func (w *TestRaftWriter) Close()

Close implements the mvcc.DBWriter Close method.

func (*TestRaftWriter) DeleteRange

func (w *TestRaftWriter) DeleteRange(start, end []byte, latchHandle mvcc.LatchHandle) error

DeleteRange implements the mvcc.DBWriter DeleteRange method.

func (*TestRaftWriter) NewWriteBatch

func (w *TestRaftWriter) NewWriteBatch(startTS, commitTS uint64, ctx *kvrpcpb.Context) mvcc.WriteBatch

NewWriteBatch implements the mvcc.DBWriter NewWriteBatch method.

func (*TestRaftWriter) Open

func (w *TestRaftWriter) Open()

Open implements the mvcc.DBWriter Open method.

func (*TestRaftWriter) Write

func (w *TestRaftWriter) Write(batch mvcc.WriteBatch) error

Write implements the mvcc.DBWriter Write method.

type Transport

type Transport interface {
	Send(msg *rspb.RaftMessage) error
}

Transport represents the transport interface.

type WaitApplyResultState

type WaitApplyResultState struct {
	// contains filtered or unexported fields
}

WaitApplyResultState is a struct that stores the state to wait for `PrepareMerge` apply result.

When handling the apply result of a `CommitMerge`, the source peer may have not handle the apply result of the `PrepareMerge`, so the target peer has to abort current handle process and wait for it asynchronously.

type WriteBatch

type WriteBatch struct {
	// contains filtered or unexported fields
}

WriteBatch writes a batch of entries.

func (*WriteBatch) Delete

func (wb *WriteBatch) Delete(key y.Key)

Delete deletes the key from the entries.

func (*WriteBatch) DeleteLock

func (wb *WriteBatch) DeleteLock(key []byte)

DeleteLock deletes the key from the lockEntries.

func (*WriteBatch) Len

func (wb *WriteBatch) Len() int

Len returns the length of the WriteBatch.

func (*WriteBatch) MustWriteToKV

func (wb *WriteBatch) MustWriteToKV(db *mvcc.DBBundle)

MustWriteToKV wraps WriteToKV and will panic if error is not nil.

func (*WriteBatch) MustWriteToRaft

func (wb *WriteBatch) MustWriteToRaft(db *badger.DB)

MustWriteToRaft wraps WriteToRaft and will panic if error is not nil.

func (*WriteBatch) Reset

func (wb *WriteBatch) Reset()

Reset resets the WriteBatch.

func (*WriteBatch) Rollback

func (wb *WriteBatch) Rollback(key y.Key)

Rollback rolls back the key.

func (*WriteBatch) RollbackToSafePoint

func (wb *WriteBatch) RollbackToSafePoint()

RollbackToSafePoint rolls back to the safe point.

func (*WriteBatch) Set

func (wb *WriteBatch) Set(key y.Key, val []byte)

Set adds the key-value pair to the entries.

func (*WriteBatch) SetLock

func (wb *WriteBatch) SetLock(key, val []byte)

SetLock adds the key-value pair to the lockEntries.

func (*WriteBatch) SetMsg

func (wb *WriteBatch) SetMsg(key y.Key, msg proto.Message) error

SetMsg adds the y.Key and proto.Message to the entries..

func (*WriteBatch) SetOpLock

func (wb *WriteBatch) SetOpLock(key y.Key, userMeta []byte)

SetOpLock adds an op lock entry to the entries.

func (*WriteBatch) SetSafePoint

func (wb *WriteBatch) SetSafePoint()

SetSafePoint sets a safe point.

func (*WriteBatch) SetWithUserMeta

func (wb *WriteBatch) SetWithUserMeta(key y.Key, val, userMeta []byte)

SetWithUserMeta adds the key-value pair with the user meta.

func (*WriteBatch) WriteToKV

func (wb *WriteBatch) WriteToKV(bundle *mvcc.DBBundle) error

WriteToKV flushes WriteBatch to DB by two steps:

  1. Write entries to badger. After save ApplyState to badger, subsequent regionSnapshot will start at new raft index.
  2. Update lockStore, the date in lockStore may be older than the DB, so we need to restore then entries from raft log.

func (*WriteBatch) WriteToRaft

func (wb *WriteBatch) WriteToRaft(db *badger.DB) error

WriteToRaft flushes WriteBatch to raft.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL