datas

package
v0.0.0-...-892de5e Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 23, 2020 License: Apache-2.0 Imports: 23 Imported by: 0

Documentation

Overview

Package datas defines and implements the database layer used in Noms.

Index

Constants

View Source
const (
	ParentsField = "parents"
	// Added in July, 2020. Commits created with versions before this was
	// added have only a Set of parents. Commits created after this was
	// added carry a List of parents, because parent order can matter.
	// `"parents"` is still written as a Set as well, so that commits
	// created with newer versions of still usable by older versions.
	ParentsListField = "parents_list"
	ValueField       = "value"
	CommitMetaField  = "meta"
	CommitName       = "Commit"
)
View Source
const (
	Listed = iota
	DownloadStart
	DownloadSuccess
	DownloadFailed
)
View Source
const (
	TagMetaField      = "meta"
	TagCommitRefField = "ref"
	TagName           = "Tag"
)

Variables

View Source
var (
	ErrOptimisticLockFailed = errors.New("optimistic lock failed on database Root update")
	ErrMergeNeeded          = errors.New("dataset head is not ancestor of commit")
)
View Source
var DatasetFullRe = regexp.MustCompile("^" + DatasetRe.String() + "$")

DatasetFullRe is a regexp that matches a only a target string that is entirely legal Dataset name.

View Source
var DatasetRe = regexp.MustCompile(`[a-zA-Z0-9\-_/]+`)

DatasetRe is a regexp that matches a legal Dataset name anywhere within the target string.

View Source
var ErrDBUpToDate = errors.New("the database does not need to be pulled as it's already up to date")

ErrDBUpToDate is the error code returned from NewPuller in the event that there is no work to do.

View Source
var ErrIncompatibleSourceChunkStore = errors.New("the chunk store of the source database does not implement NBSCompressedChunkStore.")

ErrIncompatibleSourceChunkStore is the error code returned from NewPuller in the event that the source ChunkStore does not implement `NBSCompressedChunkStore`.

View Source
var ErrNoData = errors.New("no data")

Functions

func CanUsePuller

func CanUsePuller(db Database) bool

CanUsePuller returns true if a datas.Puller can be used to pull data from one Database into another. Not all Databases support this yet.

func Clone

func Clone(ctx context.Context, srcDB, sinkDB Database, eventCh chan<- TableFileEvent) error

func FindCommonAncestor

func FindCommonAncestor(ctx context.Context, c1, c2 types.Ref, vr1, vr2 types.ValueReader) (a types.Ref, ok bool, err error)

FindCommonAncestor returns the most recent common ancestor of c1 and c2, if one exists, setting ok to true. If there is no common ancestor, ok is set to false. Refs of |c1| are dereferenced through |vr1|, while refs of |c2| are dereference through |vr2|.

func GetCSStatSummaryForDB

func GetCSStatSummaryForDB(db Database) string

func IsCommit

func IsCommit(v types.Value) (bool, error)

func IsCommitType

func IsCommitType(nbf *types.NomsBinFormat, t *types.Type) bool

func IsRefOfCommitType

func IsRefOfCommitType(nbf *types.NomsBinFormat, t *types.Type) bool

func IsTag

func IsTag(v types.Value) (bool, error)

func IsValidDatasetName

func IsValidDatasetName(name string) bool

func NewCommit

func NewCommit(ctx context.Context, value types.Value, parentsList types.List, meta types.Struct) (types.Struct, error)

NewCommit creates a new commit object.

A commit has the following type:

```

struct Commit {
  meta: M,
  parents: Set<Ref<Cycle<Commit>>>,
  parentsList: List<Ref<Cycle<Commit>>>,
  value: T,
}

``` where M is a struct type and T is any type.

func NewTag

func NewTag(_ context.Context, commitRef types.Ref, meta types.Struct) (types.Struct, error)

NewTag creates a new tag object.

A tag has the following type:

```

struct Tag {
  meta: M,
  commitRef: T,
}

``` where M is a struct type and R is a ref type.

func Pull

func Pull(ctx context.Context, srcDB, sinkDB Database, sourceRef types.Ref, progressCh chan PullProgress) error

Pull objects that descend from sourceRef from srcDB to sinkDB.

func PullWithoutBatching

func PullWithoutBatching(ctx context.Context, srcDB, sinkDB Database, sourceRef types.Ref, progressCh chan PullProgress) error

PullWithoutBatching effectively removes the batching of chunk retrieval done on each level of the tree. This means all chunks from one level of the tree will be retrieved from the underlying chunk store in one call, which pushes the optimization problem down to the chunk store which can make smarter decisions.

Types

type CloneTableFileEvent

type CloneTableFileEvent int

type CmpChnkAndRefs

type CmpChnkAndRefs struct {
	// contains filtered or unexported fields
}

CmpChnkAndRefs holds a CompressedChunk and all of it's references

type CommitOptions

type CommitOptions struct {
	// ParentsList, if provided is the parent commits of the commit we are
	// creating.
	ParentsList types.List

	// Meta is a Struct that describes arbitrary metadata about this Commit,
	// e.g. a timestamp or descriptive text.
	Meta types.Struct

	// Policy will be called to attempt to merge this Commit with the current
	// Head, if this is not a fast-forward. If Policy is nil, no merging will
	// be attempted. Note that because Commit() retries in some cases, Policy
	// might also be called multiple times with different values.
	Policy merge.Policy
}

CommitOptions is used to pass options into Commit.

type Database

type Database interface {
	// To implement types.ValueWriter, Database implementations provide
	// WriteValue(). WriteValue() writes v to this Database, though v is not
	// guaranteed to be be persistent until after a subsequent Commit(). The
	// return value is the Ref of v.
	// Written values won't be persisted until a commit-alike
	types.ValueReadWriter

	// Close must have no side-effects
	io.Closer

	// Datasets returns the root of the database which is a
	// Map<String, Ref<Commit>> where string is a datasetID.
	Datasets(ctx context.Context) (types.Map, error)

	// GetDataset returns a Dataset struct containing the current mapping of
	// datasetID in the above Datasets Map.
	GetDataset(ctx context.Context, datasetID string) (Dataset, error)

	// Rebase brings this Database's view of the world inline with upstream.
	Rebase(ctx context.Context) error

	// Commit updates the Commit that ds.ID() in this database points at. All
	// Values that have been written to this Database are guaranteed to be
	// persistent after Commit() returns.
	// The new Commit struct is constructed using v, opts.Parents, and
	// opts.Meta. If opts.Parents is the zero value (types.Set{}) then
	// the current head is used. If opts.Meta is the zero value
	// (types.Struct{}) then a fully initialized empty Struct is passed to
	// NewCommit.
	// The returned Dataset is always the newest snapshot, regardless of
	// success or failure, and Datasets() is updated to match backing storage
	// upon return as well. If the update cannot be performed, e.g., because
	// of a conflict, Commit returns an 'ErrMergeNeeded' error.
	Commit(ctx context.Context, ds Dataset, v types.Value, opts CommitOptions) (Dataset, error)

	// CommitDangling creates a new commit that is unreferenced by any Dataset.
	// This method is used in the course of programmatic updates such as Rebase
	// All Values that have been written to this Database are guaranteed to be
	// persistent after CommitDangling() returns.
	// The new Commit struct is of the same form as structs created by Commit()
	CommitDangling(ctx context.Context, v types.Value, opts CommitOptions) (types.Struct, error)

	// CommitValue updates the Commit that ds.ID() in this database points at.
	// All Values that have been written to this Database are guaranteed to be
	// persistent after Commit().
	// The new Commit struct is constructed using `v`, and the current Head of
	// `ds` as the lone Parent.
	// The returned Dataset is always the newest snapshot, regardless of
	// success or failure, and Datasets() is updated to match backing storage
	// upon return as well. If the update cannot be performed, e.g., because
	// of a conflict, Commit returns an 'ErrMergeNeeded' error.
	CommitValue(ctx context.Context, ds Dataset, v types.Value) (Dataset, error)

	// Tag stores an immutable reference to a Value. It takes a Ref and a Dataset
	// whose head must be nil (ie a newly created Dataset).
	// The new Tag struct is constructed with `ref` and metadata about the tag
	// contained in the struct `opts.Meta`.
	// The returned Dataset is always the newest snapshot, regardless of
	// success or failure, and Datasets() is updated to match backing storage
	// upon return as well.
	Tag(ctx context.Context, ds Dataset, ref types.Ref, opts TagOptions) (Dataset, error)

	// Delete removes the Dataset named ds.ID() from the map at the root of
	// the Database. The Dataset data is not necessarily cleaned up at this
	// time, but may be garbage collected in the future.
	// The returned Dataset is always the newest snapshot, regardless of
	// success or failure, and Datasets() is updated to match backing storage
	// upon return as well. If the update cannot be performed, e.g., because
	// of a conflict, Delete returns an 'ErrMergeNeeded' error.
	Delete(ctx context.Context, ds Dataset) (Dataset, error)

	// SetHeadToCommit ignores any lineage constraints (e.g. the current Head being in
	// commit’s Parent set) and force-sets a mapping from datasetID: commit in
	// this database.
	// All Values that have been written to this Database are guaranteed to be
	// persistent after SetHeadToCommit(). If the update cannot be performed, e.g.,
	// because another process moved the current Head out from under you,
	// error will be non-nil.
	// The newest snapshot of the Dataset is always returned, so the caller an
	// easily retry using the latest.
	// Regardless, Datasets() is updated to match backing storage upon return.
	SetHead(ctx context.Context, ds Dataset, newHeadRef types.Ref) (Dataset, error)

	// FastForward takes a types.Ref to a Commit object and makes it the new
	// Head of ds iff it is a descendant of the current Head. Intended to be
	// used e.g. after a call to Pull(). If the update cannot be performed,
	// e.g., because another process moved the current Head out from under
	// you, err will be non-nil.
	// The newest snapshot of the Dataset is always returned, so the caller
	// can easily retry using the latest.
	// Regardless, Datasets() is updated to match backing storage upon return.
	FastForward(ctx context.Context, ds Dataset, newHeadRef types.Ref) (Dataset, error)

	// Stats may return some kind of struct that reports statistics about the
	// ChunkStore that backs this Database instance. The type is
	// implementation-dependent, and impls may return nil
	Stats() interface{}

	// StatsSummary may return a string containing summarized statistics for
	// the ChunkStore that backs this Database. It must return "Unsupported"
	// if this operation is not supported.
	StatsSummary() string

	Flush(ctx context.Context) error
	// contains filtered or unexported methods
}

Database provides versioned storage for noms values. While Values can be directly read and written from a Database, it is generally more appropriate to read data by inspecting the Head of a Dataset and write new data by updating the Head of a Dataset via Commit() or similar. Particularly, new data is not guaranteed to be persistent until after a Commit (Delete, SetHeadToCommit, or FastForward) operation completes. The Database API is stateful, meaning that calls to GetDataset() or Datasets() occurring after a call to Commit() (et al) will represent the result of the Commit().

func NewDatabase

func NewDatabase(cs chunks.ChunkStore) Database

type Dataset

type Dataset struct {
	// contains filtered or unexported fields
}

Dataset is a named Commit within a Database.

func (Dataset) Database

func (ds Dataset) Database() Database

Database returns the Database object in which this Dataset is stored. WARNING: This method is under consideration for deprecation.

func (Dataset) HasHead

func (ds Dataset) HasHead() bool

HasHead() returns 'true' if this dataset has a Head Commit, false otherwise.

func (Dataset) ID

func (ds Dataset) ID() string

ID returns the name of this Dataset.

func (Dataset) MaybeHead

func (ds Dataset) MaybeHead() (types.Struct, bool)

MaybeHead returns the current Head Commit of this Dataset, which contains the current root of the Dataset's value tree, if available. If not, it returns a new Commit and 'false'.

func (Dataset) MaybeHeadRef

func (ds Dataset) MaybeHeadRef() (types.Ref, bool, error)

MaybeHeadRef returns the Ref of the current Head Commit of this Dataset, which contains the current root of the Dataset's value tree, if available. If not, it returns an empty Ref and 'false'.

func (Dataset) MaybeHeadValue

func (ds Dataset) MaybeHeadValue() (types.Value, bool, error)

MaybeHeadValue returns the Value field of the current head Commit, if available. If not it returns nil and 'false'.

type FileReaderWithSize

type FileReaderWithSize struct {
	*os.File
	// contains filtered or unexported fields
}

func (FileReaderWithSize) Size

func (rd FileReaderWithSize) Size() int64

type FilledWriters

type FilledWriters struct {
	// contains filtered or unexported fields
}

FilledWriters store CmpChunkTableWriter that have been filled and are ready to be flushed. In the future will likely add the md5 of the data to this structure to be used to verify table upload calls.

type NBSCompressedChunkStore

type NBSCompressedChunkStore interface {
	chunks.ChunkStore
	GetManyCompressed(context.Context, hash.HashSet, chan<- nbs.CompressedChunk) error
}

type PullProgress

type PullProgress struct {
	DoneCount, KnownCount, ApproxWrittenBytes uint64
}

type Puller

type Puller struct {
	// contains filtered or unexported fields
}

Puller is used to sync data between to Databases

func NewPuller

func NewPuller(ctx context.Context, tempDir string, chunksPerTF int, srcDB, sinkDB Database, rootChunkHash hash.Hash, eventCh chan PullerEvent) (*Puller, error)

NewPuller creates a new Puller instance to do the syncing. If a nil puller is returned without error that means that there is nothing to pull and the sinkDB is already up to date.

func (*Puller) Pull

func (p *Puller) Pull(ctx context.Context) error

Pull executes the sync operation

type PullerEvent

type PullerEvent struct {
	EventType      PullerEventType
	TWEventDetails TreeWalkEventDetails
	TFEventDetails TableFileEventDetails
}

func NewTFPullerEvent

func NewTFPullerEvent(et PullerEventType, details *TableFileEventDetails) PullerEvent

func NewTWPullerEvent

func NewTWPullerEvent(et PullerEventType, details *TreeWalkEventDetails) PullerEvent

type PullerEventType

type PullerEventType int
const (
	NewLevelTWEvent PullerEventType = iota
	DestDBHasTWEvent
	LevelUpdateTWEvent
	LevelDoneTWEvent
	StartUploadTableFile
	EndUpdateTableFile
)

type TableFileEvent

type TableFileEvent struct {
	EventType  CloneTableFileEvent
	TableFiles []nbs.TableFile
}

type TableFileEventDetails

type TableFileEventDetails struct {
	TableFileCount     int
	TableFilesUploaded int
	CurrentFileSize    int64
}

type TagOptions

type TagOptions struct {
	// Meta is a Struct that describes arbitrary metadata about this Tag,
	// e.g. a timestamp or descriptive text.
	Meta types.Struct
}

TagOptions is used to pass options into Tag.

type TreeWalkEventDetails

type TreeWalkEventDetails struct {
	TreeLevel           int
	ChunksInLevel       int
	ChunksAlreadyHad    int
	ChunksBuffered      int
	ChildrenFound       int
	TableFilesGenerated int
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL