bitcaskdb

package module
Version: v1.5.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 19, 2022 License: MIT Imports: 22 Imported by: 0

README

bitcaskdb

MIT License GoDoc Go Report Card Releases

Original code is bitcask and bitcaskdb modifies I/O operations and implement replication.
Small Value are still operated in memory, Large Value are directly I/O operation on disk.
This makes it possible to perform Merge operations and large data store with minimal RAM utilization.

A high performance Key/Value store written in Go with a predictable read/write performance and high throughput. Uses a Bitcask on-disk layout (LSM+WAL) similar to Riak.

Installation

go get github.com/octu0/bitcaskdb

Example

bitcaskdb methods are implemented to use io.Reader / io.ReadCloser, etc.

import (
	"bytes"
	"io"
	"fmt"

	"github.com/octu0/bitcaskdb"
)

func main() {
	db, err := bitcaskdb.Open("./data/mydb")
	if err != nil {
		panic(err)
	}
	defer db.Close()

	// PutBytes() can be set using byte slice
	db.PutBytes([]byte("hello"), []byte("world"))

	// Get() returns io.ReadCloser
	r, err := db.Get([]byte("hello"))
	if err != nil {
		panic(err)
	}
	defer r.Close()

	data, _ := io.ReadAll(r)

	// Put() can be specify io.Reader
	db.Put([]byte("foo"), bytes.NewReader([]byte("very large data...")))

	// PutWithTTL()/PutBytesWithTTL() can be set to data with expiration time
	db.PutWithTTL([]byte("bar"), bytes.NewReader(data), 10*time.Second)

	// Sync() flushes all buffers to disk
	db.Sync()

	r, err := db.Get([]byte("foo"))
	if err != nil {
		panic(err)
	}
	defer r.Close()

	head := make([]byte, 4)
	r.Read(head)

	// Delete() can delete data with key
	db.Delete([]byte("foo"))

	// RunGC() deletes all expired keys
	db.RunGC()

	// Merge() rebuilds databases and reclaims disk space
	db.Merge()
}

Benchmark

bitcaskdb is tuned for larger sizes of Value, in particular there is a major improvement for inputs and outputs using io.Reader.
Default Buffer size is aligned to 128KB, this value can be changed with runtime.Context.

goos: darwin
goarch: amd64
pkg: github.com/octu0/bitcaskdb
cpu: Intel(R) Core(TM) i7-8569U CPU @ 2.80GHz

BenchmarkGet
BenchmarkGet/prologic/bitcask/128B
BenchmarkGet/prologic/bitcask/128B-8      1317308       897.5 ns/op   142.62 MB/s       160 B/op     1 allocs/op
BenchmarkGet/prologic/bitcask/256B
BenchmarkGet/prologic/bitcask/256B-8      1229084       973.1 ns/op   263.08 MB/s       288 B/op     1 allocs/op
BenchmarkGet/prologic/bitcask/128K
BenchmarkGet/prologic/bitcask/128K-8        34060       30690 ns/op  4270.86 MB/s    139264 B/op     1 allocs/op
BenchmarkGet/prologic/bitcask/256K
BenchmarkGet/prologic/bitcask/256K-8        18928       56895 ns/op  4607.53 MB/s    270337 B/op     1 allocs/op
BenchmarkGet/prologic/bitcask/512K
BenchmarkGet/prologic/bitcask/512K-8         9092      130977 ns/op  4002.91 MB/s    532483 B/op     1 allocs/op
BenchmarkGet/octu0/bitcaskdb/128B
BenchmarkGet/octu0/bitcaskdb/128B-8        102139       54958 ns/op     2.33 MB/s    205145 B/op    21 allocs/op
BenchmarkGet/octu0/bitcaskdb/256B
BenchmarkGet/octu0/bitcaskdb/256B-8         35295       45820 ns/op     5.59 MB/s    206286 B/op    21 allocs/op
BenchmarkGet/octu0/bitcaskdb/128K
BenchmarkGet/octu0/bitcaskdb/128K-8         32757       40773 ns/op  3214.66 MB/s    199695 B/op    21 allocs/op
BenchmarkGet/octu0/bitcaskdb/256K
BenchmarkGet/octu0/bitcaskdb/256K-8         25388       49611 ns/op  5284.03 MB/s    214647 B/op    21 allocs/op
BenchmarkGet/octu0/bitcaskdb/512K
BenchmarkGet/octu0/bitcaskdb/512K-8         18439       69490 ns/op  7544.78 MB/s    215778 B/op    21 allocs/op

BenchmarkPut
BenchmarkPut/prologic/bitcask/WithNosync/128B
BenchmarkPut/prologic/bitcask/WithNosync/128B-8       85623       13452 ns/op     9.52 MB/s     41 B/op     2 allocs/op
BenchmarkPut/prologic/bitcask/WithNosync/256B
BenchmarkPut/prologic/bitcask/WithNosync/256B-8       69417       21407 ns/op    11.96 MB/s     43 B/op     2 allocs/op
BenchmarkPut/prologic/bitcask/WithNosync/128K
BenchmarkPut/prologic/bitcask/WithNosync/128K-8         140     8566745 ns/op    15.30 MB/s   1569 B/op     9 allocs/op
BenchmarkPut/prologic/bitcask/WithNosync/256K
BenchmarkPut/prologic/bitcask/WithNosync/256K-8         100    16770433 ns/op    15.63 MB/s   3052 B/op    16 allocs/op
BenchmarkPut/prologic/bitcask/WithNosync/512K
BenchmarkPut/prologic/bitcask/WithNosync/512K-8         100    34380284 ns/op    15.25 MB/s   6193 B/op    31 allocs/op
BenchmarkPut/octu0/bitcaskdb/WithNosync/128B
BenchmarkPut/octu0/bitcaskdb/WithNosync/128B-8       333992        3548 ns/op    36.07 MB/s    200 B/op     9 allocs/op
BenchmarkPut/octu0/bitcaskdb/WithNosync/256B
BenchmarkPut/octu0/bitcaskdb/WithNosync/256B-8       305065        3900 ns/op    65.64 MB/s    208 B/op    10 allocs/op
BenchmarkPut/octu0/bitcaskdb/WithNosync/128K
BenchmarkPut/octu0/bitcaskdb/WithNosync/128K-8        10000      242319 ns/op   540.91 MB/s    594 B/op    10 allocs/op
BenchmarkPut/octu0/bitcaskdb/WithNosync/256K
BenchmarkPut/octu0/bitcaskdb/WithNosync/256K-8         7059      447583 ns/op   585.69 MB/s    461 B/op    10 allocs/op
BenchmarkPut/octu0/bitcaskdb/WithNosync/512K
BenchmarkPut/octu0/bitcaskdb/WithNosync/512K-8         3835      812899 ns/op   644.96 MB/s    848 B/op    10 allocs/op

License

MIT, see LICENSE file for details.

Documentation

Index

Constants

View Source
const (
	// DefaultDirFileModeBeforeUmask is the default os.FileMode used when creating directories
	DefaultDirFileModeBeforeUmask = os.FileMode(0700)

	// DefaultFileFileModeBeforeUmask is the default os.FileMode used when creating files
	DefaultFileFileModeBeforeUmask = os.FileMode(0600)

	// DefaultMaxDatafileSize is the default maximum datafile size in bytes
	DefaultMaxDatafileSize = 100 * 1024 * 1024 // 100MB

	// Data size exceeding this threshold to temporarily copied to TempDir
	DefaultCopyTempThrshold int64 = 10 * 1024 * 1024

	// DefaultSync is the default file synchronization action
	DefaultSync = false

	DefaultNoRepliEmit         bool          = true
	DefaultRepliBindIP         string        = "[0.0.0.0]"
	DefaultRepliBindPort       int           = 4220
	DefaultNoRepliRecv         bool          = true
	DefaultRepliServerIP       string        = "127.0.0.1"
	DefaultRepliServerPort     int           = 4220
	DefaultRepliRequestTimeout time.Duration = 10 * time.Second
)
View Source
const (
	AppName string = "bitcaskdb"
	Version string = "1.5.1"
)

Variables

View Source
var (
	// ErrKeyNotFound is the error returned when a key is not found
	ErrKeyNotFound = errors.New("error: key not found")

	// ErrKeyExpired is the error returned when a key is queried which has
	// already expired (due to ttl)
	ErrKeyExpired = errors.New("error: key expired")

	// ErrEmptyKey is the error returned for a value with an empty key.
	ErrEmptyKey = errors.New("error: empty key")

	// ErrDatabaseLocked is the error returned if the database is locked
	// (typically opened by another process)
	ErrDatabaseLocked = errors.New("error: database locked")

	// ErrInvalidRange is the error returned when the range scan is invalid
	ErrInvalidRange = errors.New("error: invalid range")

	// ErrMergeInProgress is the error returned if merge is called when already a merge
	// is in progress
	ErrMergeInProgress = errors.New("error: merge already in progress")
)

Functions

This section is empty.

Types

type Bitcask

type Bitcask struct {
	// contains filtered or unexported fields
}

Bitcask is a struct that represents a on-disk LSM and WAL data structure and in-memory hash of key/value pairs as per the Bitcask paper and seen in the Riak database.

func Open

func Open(path string, funcs ...OptionFunc) (*Bitcask, error)

Open opens the database at the given path with optional options. Options can be provided with the `WithXXX` functions that provide configuration options as functions.

func (*Bitcask) Close

func (b *Bitcask) Close() error

Close closes the database and removes the lock. It is important to call Close() as this is the only way to cleanup the lock held by the open database.

func (*Bitcask) Delete

func (b *Bitcask) Delete(key []byte) error

Delete deletes the named key.

func (*Bitcask) DeleteAll

func (b *Bitcask) DeleteAll() error

DeleteAll deletes all the keys. If an I/O error occurs the error is returned.

func (*Bitcask) Fold

func (b *Bitcask) Fold(f func(key []byte) error) (err error)

Fold iterates over all keys in the database calling the function `f` for each key. If the function returns an error, no further keys are processed and the error is returned.

func (*Bitcask) Get

func (b *Bitcask) Get(key []byte) (io.ReadCloser, error)

Get fetches value for a key

func (*Bitcask) Has

func (b *Bitcask) Has(key []byte) bool

Has returns true if the key exists in the database, false otherwise.

func (*Bitcask) Keys

func (b *Bitcask) Keys() chan []byte

Keys returns all keys in the database as a channel of keys

func (*Bitcask) Len

func (b *Bitcask) Len() int

Len returns the total number of keys in the database

func (*Bitcask) Merge

func (b *Bitcask) Merge() error

Merge merges all datafiles in the database. Old keys are squashed and deleted keys removes. Duplicate key/value pairs are also removed. Call this function periodically to reclaim disk space.

func (*Bitcask) MergeWithWaitLimit added in v1.3.1

func (b *Bitcask) MergeWithWaitLimit(lim *priorate.Limiter) error

func (*Bitcask) MergeWithWaitLimitByBytesPerSecond added in v1.3.1

func (b *Bitcask) MergeWithWaitLimitByBytesPerSecond(bytesPerSecond int) error

func (*Bitcask) Put

func (b *Bitcask) Put(key []byte, value io.Reader) error

Put stores the key and value in the database.

func (*Bitcask) PutBytes

func (b *Bitcask) PutBytes(key, value []byte) error

func (*Bitcask) PutBytesWithTTL

func (b *Bitcask) PutBytesWithTTL(key, value []byte, ttl time.Duration) error

func (*Bitcask) PutWithTTL

func (b *Bitcask) PutWithTTL(key []byte, value io.Reader, ttl time.Duration) error

PutWithTTL stores the key and value in the database with the given TTL

func (*Bitcask) Range

func (b *Bitcask) Range(start, end []byte, f func(key []byte) error) (err error)

Range performs a range scan of keys matching a range of keys between the start key and end key and calling the function `f` with the keys found. If the function returns an error no further keys are processed and the first error returned.

func (*Bitcask) Reopen

func (b *Bitcask) Reopen() error

Reopen closes and reopsns the database

func (*Bitcask) RunGC

func (b *Bitcask) RunGC() error

RunGC deletes all expired keys

func (*Bitcask) Scan

func (b *Bitcask) Scan(prefix []byte, f func(key []byte) error) error

Scan performs a prefix scan of keys matching the given prefix and calling the function `f` with the keys found. If the function returns an error no further keys are processed and the first error is returned.

func (*Bitcask) Sift

func (b *Bitcask) Sift(f func(key []byte) (bool, error)) error

Sift iterates over all keys in the database calling the function `f` for each key. If the KV pair is expired or the function returns true, that key is deleted from the database. If the function returns an error on any key, no further keys are processed, no keys are deleted, and the first error is returned.

func (*Bitcask) SiftRange

func (b *Bitcask) SiftRange(start, end []byte, f func(key []byte) (bool, error)) (err error)

SiftRange performs a range scan of keys matching a range of keys between the start key and end key and calling the function `f` with the keys found. If the KV pair is expired or the function returns true, that key is deleted from the database. If the function returns an error on any key, no further keys are processed, no keys are deleted, and the first error is returned.

func (*Bitcask) SiftScan

func (b *Bitcask) SiftScan(prefix []byte, f func(key []byte) (bool, error)) (err error)

SiftScan iterates over all keys in the database beginning with the given prefix, calling the function `f` for each key. If the KV pair is expired or the function returns true, that key is deleted from the database.

If the function returns an error on any key, no further keys are processed,

no keys are deleted, and the first error is returned.

func (*Bitcask) Stats

func (b *Bitcask) Stats() (Stats, error)

Stats returns statistics about the database including the number of data files, keys and overall size on disk of the data

func (*Bitcask) Sync

func (b *Bitcask) Sync() error

Sync flushes all buffers to disk ensuring all data is written

type OptionFunc

type OptionFunc func(*option) error

Option is a function that takes a option struct and modifies it

func WithCopyTempThreshold

func WithCopyTempThreshold(size int64) OptionFunc

func WithDirFileModeBeforeUmask

func WithDirFileModeBeforeUmask(mode os.FileMode) OptionFunc

WithDirFileModeBeforeUmask sets the FileMode used for each new file created.

func WithFileFileModeBeforeUmask

func WithFileFileModeBeforeUmask(mode os.FileMode) OptionFunc

WithFileFileModeBeforeUmask sets the FileMode used for each new file created.

func WithLogger

func WithLogger(logger *log.Logger) OptionFunc

func WithMaxDatafileSize

func WithMaxDatafileSize(size int) OptionFunc

WithMaxDatafileSize sets the maximum datafile size option

func WithRepli

func WithRepli(bindIP string, bindPort int) OptionFunc

func WithRepliClient

func WithRepliClient(serverIP string, serverPort int) OptionFunc

func WithRepliClientRequestTimeout

func WithRepliClientRequestTimeout(rto time.Duration) OptionFunc

func WithRuntimeContext

func WithRuntimeContext(ctx runtime.Context) OptionFunc

func WithSync

func WithSync(sync bool) OptionFunc

WithSync causes Sync() to be called on every key/value written increasing durability and safety at the expense of performance

func WithTempDir

func WithTempDir(dir string) OptionFunc

func WithValidateChecksum

func WithValidateChecksum(enable bool) OptionFunc

type Stats

type Stats struct {
	Datafiles        int
	Keys             int
	Size             int64
	ReclaimableSpace int64
}

Stats is a struct returned by Stats() on an open Bitcask instance

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL