hashtree

package

v1.8.1 Latest Latest Go to latest Published: Dec 19, 2018 License: Apache-2.0 Imports: 24 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/ysimonson/pachyderm

Links

Open Source Insights

README ¶

This is a small library for working with modified Merkle Trees. We store one of these data structures in block storage (e.g. S3) for each PFS commit, so that we know, with each subsequent commit, what files changed and need to be reprocessed by any pipelines.

Documentation ¶

Index ¶

Constants
Variables
func GetRangeFromIndex(r io.Reader, prefix string) (uint64, uint64, error)
func Glob(rs []io.ReadCloser, pattern string, f func(string, *NodeProto) error) (retErr error)
func GlobLiteralPrefix(pattern string) string
func HashFileNode(n *FileNodeProto) []byte
func IsGlob(pattern string) bool
func List(rs []io.ReadCloser, pattern string, f func(string, *NodeProto) error) (retErr error)
func Merge(w *Writer, rs []*Reader) (uint64, error)
func NewFilter(numTrees int64, tree int64) func(k []byte) (bool, error)
func PathToTree(path string, numTrees int64) uint64
func PutHashTree(pachClient *client.APIClient, tree HashTree, tags ...string) (*pfs.Object, error)
func ValidatePath(path string) error
func Walk(rs []io.ReadCloser, walkPath string, ...) error
type BucketHeader
- func (*BucketHeader) Descriptor() ([]byte, []int)
- func (m *BucketHeader) GetBucket() string
- func (m *BucketHeader) Marshal() (dAtA []byte, err error)
- func (m *BucketHeader) MarshalTo(dAtA []byte) (int, error)
- func (*BucketHeader) ProtoMessage()
- func (m *BucketHeader) Reset()
- func (m *BucketHeader) Size() (n int)
- func (m *BucketHeader) String() string
- func (m *BucketHeader) Unmarshal(dAtA []byte) error
- func (m *BucketHeader) XXX_DiscardUnknown()
- func (m *BucketHeader) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (dst *BucketHeader) XXX_Merge(src proto.Message)
- func (m *BucketHeader) XXX_Size() int
- func (m *BucketHeader) XXX_Unmarshal(b []byte) error
type Cache
- func NewCache(size int) (*Cache, error)
type ChildCursor
- func NewChildCursor(tx *bolt.Tx, path string) *ChildCursor
- func (d *ChildCursor) K() []byte
- func (d *ChildCursor) Next() ([]byte, []byte)
- func (d *ChildCursor) V() []byte
type DirectoryNodeProto
- func (*DirectoryNodeProto) Descriptor() ([]byte, []int)
- func (m *DirectoryNodeProto) GetChildren() []string
- func (m *DirectoryNodeProto) GetShared() *Shared
- func (m *DirectoryNodeProto) Marshal() (dAtA []byte, err error)
- func (m *DirectoryNodeProto) MarshalTo(dAtA []byte) (int, error)
- func (*DirectoryNodeProto) ProtoMessage()
- func (m *DirectoryNodeProto) Reset()
- func (m *DirectoryNodeProto) Size() (n int)
- func (m *DirectoryNodeProto) String() string
- func (m *DirectoryNodeProto) Unmarshal(dAtA []byte) error
- func (m *DirectoryNodeProto) XXX_DiscardUnknown()
- func (m *DirectoryNodeProto) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (dst *DirectoryNodeProto) XXX_Merge(src proto.Message)
- func (m *DirectoryNodeProto) XXX_Size() int
- func (m *DirectoryNodeProto) XXX_Unmarshal(b []byte) error
type ErrCode
- func Code(err error) ErrCode
type FileNodeProto
- func (*FileNodeProto) Descriptor() ([]byte, []int)
- func (m *FileNodeProto) GetBlockRefs() []*pfs.BlockRef
- func (m *FileNodeProto) GetHasHeaderFooter() bool
- func (m *FileNodeProto) GetObjects() []*pfs.Object
- func (m *FileNodeProto) Marshal() (dAtA []byte, err error)
- func (m *FileNodeProto) MarshalTo(dAtA []byte) (int, error)
- func (*FileNodeProto) ProtoMessage()
- func (m *FileNodeProto) Reset()
- func (m *FileNodeProto) Size() (n int)
- func (m *FileNodeProto) String() string
- func (m *FileNodeProto) Unmarshal(dAtA []byte) error
- func (m *FileNodeProto) XXX_DiscardUnknown()
- func (m *FileNodeProto) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (dst *FileNodeProto) XXX_Merge(src proto.Message)
- func (m *FileNodeProto) XXX_Size() int
- func (m *FileNodeProto) XXX_Unmarshal(b []byte) error
type HashTree
- func DeserializeDBHashTree(storageRoot string, r io.Reader) (_ HashTree, retErr error)
- func GetHashTreeObject(pachClient *client.APIClient, storageRoot string, treeRef *pfs.Object) (HashTree, error)
- func GetHashTreeTag(pachClient *client.APIClient, storageRoot string, treeRef *pfs.Tag) (HashTree, error)
- func NewDBHashTree(storageRoot string) (HashTree, error)
type HashTreeProto
- func (*HashTreeProto) Descriptor() ([]byte, []int)
- func (m *HashTreeProto) GetFs() map[string]*NodeProto
- func (m *HashTreeProto) GetVersion() int32
- func (m *HashTreeProto) Marshal() (dAtA []byte, err error)
- func (m *HashTreeProto) MarshalTo(dAtA []byte) (int, error)
- func (*HashTreeProto) ProtoMessage()
- func (m *HashTreeProto) Reset()
- func (m *HashTreeProto) Size() (n int)
- func (m *HashTreeProto) String() string
- func (m *HashTreeProto) Unmarshal(dAtA []byte) error
- func (m *HashTreeProto) XXX_DiscardUnknown()
- func (m *HashTreeProto) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (dst *HashTreeProto) XXX_Merge(src proto.Message)
- func (m *HashTreeProto) XXX_Size() int
- func (m *HashTreeProto) XXX_Unmarshal(b []byte) error
type Index
- func (*Index) Descriptor() ([]byte, []int)
- func (m *Index) GetK() []byte
- func (m *Index) GetOffset() uint64
- func (m *Index) Marshal() (dAtA []byte, err error)
- func (m *Index) MarshalTo(dAtA []byte) (int, error)
- func (*Index) ProtoMessage()
- func (m *Index) Reset()
- func (m *Index) Size() (n int)
- func (m *Index) String() string
- func (m *Index) Unmarshal(dAtA []byte) error
- func (m *Index) XXX_DiscardUnknown()
- func (m *Index) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (dst *Index) XXX_Merge(src proto.Message)
- func (m *Index) XXX_Size() int
- func (m *Index) XXX_Unmarshal(b []byte) error
type MergeNode
type NodeProto
- func Get(rs []io.ReadCloser, filePath string) (*NodeProto, error)
- func (*NodeProto) Descriptor() ([]byte, []int)
- func (m *NodeProto) GetDirNode() *DirectoryNodeProto
- func (m *NodeProto) GetFileNode() *FileNodeProto
- func (m *NodeProto) GetHash() []byte
- func (m *NodeProto) GetName() string
- func (m *NodeProto) GetSubtreeSize() int64
- func (m *NodeProto) Marshal() (dAtA []byte, err error)
- func (m *NodeProto) MarshalTo(dAtA []byte) (int, error)
- func (*NodeProto) ProtoMessage()
- func (m *NodeProto) Reset()
- func (m *NodeProto) Size() (n int)
- func (m *NodeProto) String() string
- func (m *NodeProto) Unmarshal(dAtA []byte) error
- func (m *NodeProto) XXX_DiscardUnknown()
- func (m *NodeProto) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (dst *NodeProto) XXX_Merge(src proto.Message)
- func (m *NodeProto) XXX_Size() int
- func (m *NodeProto) XXX_Unmarshal(b []byte) error
type Ordered
- func NewOrdered(root string) *Ordered
- func (o *Ordered) PutDir(path string)
- func (o *Ordered) PutFile(path string, hash []byte, size int64, fileNodeProto *FileNodeProto)
- func (o *Ordered) Serialize(_w io.Writer) error
type Reader
- func NewReader(r io.Reader, filter func(k []byte) (bool, error)) *Reader
- func (r *Reader) Read() (*MergeNode, error)
type Shared
- func (*Shared) Descriptor() ([]byte, []int)
- func (m *Shared) GetFooter() *pfs.Object
- func (m *Shared) GetFooterSize() int64
- func (m *Shared) GetHeader() *pfs.Object
- func (m *Shared) GetHeaderSize() int64
- func (m *Shared) Marshal() (dAtA []byte, err error)
- func (m *Shared) MarshalTo(dAtA []byte) (int, error)
- func (*Shared) ProtoMessage()
- func (m *Shared) Reset()
- func (m *Shared) Size() (n int)
- func (m *Shared) String() string
- func (m *Shared) Unmarshal(dAtA []byte) error
- func (m *Shared) XXX_DiscardUnknown()
- func (m *Shared) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (dst *Shared) XXX_Merge(src proto.Message)
- func (m *Shared) XXX_Size() int
- func (m *Shared) XXX_Unmarshal(b []byte) error
type Unordered
- func NewUnordered(root string) *Unordered
- func (u *Unordered) Ordered() *Ordered
- func (u *Unordered) PutFile(path string, hash []byte, size int64, blockRefs ...*pfs.BlockRef)
type Writer
- func NewWriter(w io.Writer) *Writer
- func (w *Writer) Copy(r *Reader) error
- func (w *Writer) Index() ([]byte, error)
- func (w *Writer) Write(n *MergeNode) error

Constants ¶

View Source

const (
	// FsBucket is the filesystem bucket
	FsBucket = "fs"
	// ChangedBucket is the changed bucket
	ChangedBucket = "changed"

	// DefaultMergeConcurrency is the default concurrency when merging hashtrees.
	DefaultMergeConcurrency = 10
	// IndexPath is the suffix to append to the path of a hashtree for the index.
	IndexPath = "-index"
	// IndexSize is the size of the index chunks.
	IndexSize = uint64(1 << (10 * 2))
)

Variables ¶

View Source

var (
	ErrInvalidLengthHashtree = fmt.Errorf("proto: negative length found during unmarshaling")
	ErrIntOverflowHashtree   = fmt.Errorf("proto: integer overflow")
)

View Source

var (

	// SentinelByte is the delimiter for hashtree buckets.
	// A path should not have a globbing character.
	SentinelByte = []byte{'*'}
)

Functions ¶

func GetRangeFromIndex ¶ added in v1.8.0

func GetRangeFromIndex(r io.Reader, prefix string) (uint64, uint64, error)

GetRangeFromIndex returns a subtree byte range in a serialized hashtree based on a passed in prefix.

func Glob ¶ added in v1.8.0

func Glob(rs []io.ReadCloser, pattern string, f func(string, *NodeProto) error) (retErr error)

Glob executes a callback for each path that matches the glob pattern.

func GlobLiteralPrefix ¶ added in v1.8.0

func GlobLiteralPrefix(pattern string) string

GlobLiteralPrefix returns the prefix before the first glob character

func HashFileNode ¶ added in v1.8.0

func HashFileNode(n *FileNodeProto) []byte

HashFileNode computes the hash of 'node' and writes the result into node.Hash. Exported so that PFS can compute the hash of synthetic nodes (filenodes that inherit headers/footers from their parent directories)

func IsGlob ¶ added in v1.8.0

func IsGlob(pattern string) bool

IsGlob checks if the pattern contains a glob character

func List ¶ added in v1.8.0

func List(rs []io.ReadCloser, pattern string, f func(string, *NodeProto) error) (retErr error)

List executes a callback for each file under a directory (or a file if the path is a file).

func Merge ¶ added in v1.8.0

func Merge(w *Writer, rs []*Reader) (uint64, error)

Merge merges a collection of hashtree readers into a hashtree writer.

func NewFilter ¶ added in v1.8.0

func NewFilter(numTrees int64, tree int64) func(k []byte) (bool, error)

NewFilter creates a filter for a hashtree shard.

func PathToTree ¶ added in v1.8.0

func PathToTree(path string, numTrees int64) uint64

PathToTree computes the hashtree shard for a path.

func PutHashTree ¶ added in v1.8.0

func PutHashTree(pachClient *client.APIClient, tree HashTree, tags ...string) (*pfs.Object, error)

PutHashTree is a convenience function for putting a HashTree to an object store.

func ValidatePath ¶ added in v1.7.2

func ValidatePath(path string) error

ValidatePath checks if a file path is legal

func Walk ¶ added in v1.8.0

func Walk(rs []io.ReadCloser, walkPath string, f func(path string, node *NodeProto) error) error

Walk executes a callback against every node in the subtree of path.

Types ¶

type BucketHeader ¶ added in v1.8.0

type BucketHeader struct {
	Bucket               string   `protobuf:"bytes,1,opt,name=bucket,proto3" json:"bucket,omitempty"`
	XXX_NoUnkeyedLiteral struct{} `json:"-"`
	XXX_unrecognized     []byte   `json:"-"`
	XXX_sizecache        int32    `json:"-"`
}

func (*BucketHeader) Descriptor ¶ added in v1.8.0

func (*BucketHeader) Descriptor() ([]byte, []int)

func (*BucketHeader) GetBucket ¶ added in v1.8.0

func (m *BucketHeader) GetBucket() string

func (*BucketHeader) Marshal ¶ added in v1.8.0

func (m *BucketHeader) Marshal() (dAtA []byte, err error)

func (*BucketHeader) MarshalTo ¶ added in v1.8.0

func (m *BucketHeader) MarshalTo(dAtA []byte) (int, error)

func (*BucketHeader) ProtoMessage ¶ added in v1.8.0

func (*BucketHeader) ProtoMessage()

func (*BucketHeader) Reset ¶ added in v1.8.0

func (m *BucketHeader) Reset()

func (*BucketHeader) Size ¶ added in v1.8.0

func (m *BucketHeader) Size() (n int)

func (*BucketHeader) String ¶ added in v1.8.0

func (m *BucketHeader) String() string

func (*BucketHeader) Unmarshal ¶ added in v1.8.0

func (m *BucketHeader) Unmarshal(dAtA []byte) error

func (*BucketHeader) XXX_DiscardUnknown ¶ added in v1.8.1

func (m *BucketHeader) XXX_DiscardUnknown()

func (*BucketHeader) XXX_Marshal ¶ added in v1.8.1

func (m *BucketHeader) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*BucketHeader) XXX_Merge ¶ added in v1.8.1

func (dst *BucketHeader) XXX_Merge(src proto.Message)

func (*BucketHeader) XXX_Size ¶ added in v1.8.1

func (m *BucketHeader) XXX_Size() int

func (*BucketHeader) XXX_Unmarshal ¶ added in v1.8.1

func (m *BucketHeader) XXX_Unmarshal(b []byte) error

type Cache ¶ added in v1.8.0

type Cache struct {
	*lru.Cache
}

Cache is an LRU cache for hashtrees.

func NewCache ¶ added in v1.8.0

func NewCache(size int) (*Cache, error)

NewCache creates a new cache.

type ChildCursor ¶ added in v1.8.0

type ChildCursor struct {
	// contains filtered or unexported fields
}

ChildCursor efficiently iterates the children of a directory

func NewChildCursor ¶ added in v1.8.0

func NewChildCursor(tx *bolt.Tx, path string) *ChildCursor

NewChildCursor creates a new child cursor.

func (*ChildCursor) K ¶ added in v1.8.0

func (d *ChildCursor) K() []byte

K gets the key.

func (*ChildCursor) Next ¶ added in v1.8.0

func (d *ChildCursor) Next() ([]byte, []byte)

Next gets the next key, value pair.

func (*ChildCursor) V ¶ added in v1.8.0

func (d *ChildCursor) V() []byte

V gets the value.

type DirectoryNodeProto ¶

type DirectoryNodeProto struct {
	// Children of this directory. Note that paths are relative, so if "/foo/bar"
	// has a child "baz", that means that there is a file at "/foo/bar/baz".
	//
	// 'Children' is ordered alphabetically, to quickly check if a new file is
	// overwriting an existing one.
	Children []string `protobuf:"bytes,3,rep,name=children,proto3" json:"children,omitempty"`
	// shared, if set, references data that will be prepended and appended to all
	// direct children of this directory (which must all have has_header_footer
	// set to true).
	Shared               *Shared  `protobuf:"bytes,4,opt,name=shared,proto3" json:"shared,omitempty"`
	XXX_NoUnkeyedLiteral struct{} `json:"-"`
	XXX_unrecognized     []byte   `json:"-"`
	XXX_sizecache        int32    `json:"-"`
}

DirectoryNodeProto is a node corresponding to a directory.

func (*DirectoryNodeProto) Descriptor ¶

func (*DirectoryNodeProto) Descriptor() ([]byte, []int)

func (*DirectoryNodeProto) GetChildren ¶

func (m *DirectoryNodeProto) GetChildren() []string

func (*DirectoryNodeProto) GetShared ¶ added in v1.8.0

func (m *DirectoryNodeProto) GetShared() *Shared

func (*DirectoryNodeProto) Marshal ¶ added in v1.5.0

func (m *DirectoryNodeProto) Marshal() (dAtA []byte, err error)

func (*DirectoryNodeProto) MarshalTo ¶ added in v1.5.0

func (m *DirectoryNodeProto) MarshalTo(dAtA []byte) (int, error)

func (*DirectoryNodeProto) ProtoMessage ¶

func (*DirectoryNodeProto) ProtoMessage()

func (*DirectoryNodeProto) Reset ¶

func (m *DirectoryNodeProto) Reset()

func (*DirectoryNodeProto) Size ¶ added in v1.5.0

func (m *DirectoryNodeProto) Size() (n int)

func (*DirectoryNodeProto) String ¶

func (m *DirectoryNodeProto) String() string

func (*DirectoryNodeProto) Unmarshal ¶ added in v1.5.0

func (m *DirectoryNodeProto) Unmarshal(dAtA []byte) error

func (*DirectoryNodeProto) XXX_DiscardUnknown ¶ added in v1.8.1

func (m *DirectoryNodeProto) XXX_DiscardUnknown()

func (*DirectoryNodeProto) XXX_Marshal ¶ added in v1.8.1

func (m *DirectoryNodeProto) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*DirectoryNodeProto) XXX_Merge ¶ added in v1.8.1

func (dst *DirectoryNodeProto) XXX_Merge(src proto.Message)

func (*DirectoryNodeProto) XXX_Size ¶ added in v1.8.1

func (m *DirectoryNodeProto) XXX_Size() int

func (*DirectoryNodeProto) XXX_Unmarshal ¶ added in v1.8.1

func (m *DirectoryNodeProto) XXX_Unmarshal(b []byte) error

type ErrCode ¶

type ErrCode uint8

ErrCode identifies different kinds of errors returned by methods in HashTree below. The ErrCode of any such error can be retrieved with Code().

const (
	// OK is returned on success
	OK ErrCode = iota

	// Unknown is returned by Code() when an error wasn't emitted by the HashTree
	// implementation.
	Unknown

	// Internal is returned when a HashTree encounters a bug (usually due to the
	// violation of an internal invariant).
	Internal

	// CannotSerialize is returned when Serialize(io.Writer) fails, normally
	// due to it being called on an OpenHashTree
	CannotSerialize

	// CannotDeserialize is returned when Deserialize(bytes) fails, perhaps due to
	// 'bytes' being corrupted. Or it being called on an OpenHashTree.
	CannotDeserialize

	// Unsupported is returned when Deserialize(bytes) encounters an unsupported
	// (likely old) serialized HashTree.
	Unsupported

	// PathNotFound is returned when Get() or DeleteFile() is called with a path
	// that doesn't lead to a node.
	PathNotFound

	// MalformedGlob is returned when Glob() is called with an invalid glob
	// pattern.
	MalformedGlob

	// PathConflict is returned when a path that is expected to point to a
	// directory in fact points to a file, or the reverse. For example:
	// 1. PutFile is called with a path that points to a directory.
	// 2. PutFile is called with a path that contains a prefix that
	//    points to a file.
	// 3. Merge is forced to merge a directory into a file
	PathConflict

	// ObjectNotFound is returned when GetObject() is called with an object
	// that doesn't exist.
	ObjectNotFound

	// HeaderFooterConflict is returned when PutFileHeaderFooter is called on a
	// path of the form parent/child, but the DirectoryNode at 'parent' doesn't
	// have a header or footer (headers and footers cannot be added to directories
	// retroactively, as that would require modifying all of the directory's
	// children to indicate that they include header data in their parent)
	HeaderFooterConflict
)

func Code ¶

func Code(err error) ErrCode

Code returns the "error code" of 'err' if it was returned by one of the HashTree methods, or "Unknown" if 'err' was emitted by some other function (error codes are defined in interface.go)

type FileNodeProto ¶

type FileNodeProto struct {
	// objects are references to the object store, whose targets contain this
	// file's contents. Exactly one of objects or block_refs should be set.
	Objects []*pfs.Object `protobuf:"bytes,4,rep,name=objects,proto3" json:"objects,omitempty"`
	// block_refs are direct references to blocks in object storage that contain
	// this file's contents. Unlike objects, using block_refs precludes
	// deduplication, but halves the number of object store requests needed to
	// read a file. Exactly one of objects or block_refs should be set.
	BlockRefs []*pfs.BlockRef `protobuf:"bytes,5,rep,name=block_refs,json=blockRefs,proto3" json:"block_refs,omitempty"`
	// has_header_footer indicates whether the File corresponding to this node
	// inherits header or footer data from the parent directory. Typically this
	// is false, and when it's true, determining this file's contents requires
	// reading the parent directory's metadata in addition to this file's
	// block_refs/objects. Without this signal, all calls to pfs.GetFile() would
	// need to check the parent directory's metadata before beginning to return
	// the file's contents, which would be slow.)
	HasHeaderFooter      bool     `protobuf:"varint,6,opt,name=has_header_footer,json=hasHeaderFooter,proto3" json:"has_header_footer,omitempty"`
	XXX_NoUnkeyedLiteral struct{} `json:"-"`
	XXX_unrecognized     []byte   `json:"-"`
	XXX_sizecache        int32    `json:"-"`
}

FileNodeProto is a node corresponding to a file (which is also a leaf node).

func (*FileNodeProto) Descriptor ¶

func (*FileNodeProto) Descriptor() ([]byte, []int)

func (*FileNodeProto) GetBlockRefs ¶

func (m *FileNodeProto) GetBlockRefs() []*pfs.BlockRef

func (*FileNodeProto) GetHasHeaderFooter ¶ added in v1.8.0

func (m *FileNodeProto) GetHasHeaderFooter() bool

func (*FileNodeProto) GetObjects ¶ added in v1.3.19

func (m *FileNodeProto) GetObjects() []*pfs.Object

func (*FileNodeProto) Marshal ¶ added in v1.5.0

func (m *FileNodeProto) Marshal() (dAtA []byte, err error)

func (*FileNodeProto) MarshalTo ¶ added in v1.5.0

func (m *FileNodeProto) MarshalTo(dAtA []byte) (int, error)

func (*FileNodeProto) ProtoMessage ¶

func (*FileNodeProto) ProtoMessage()

func (*FileNodeProto) Reset ¶

func (m *FileNodeProto) Reset()

func (*FileNodeProto) Size ¶ added in v1.5.0

func (m *FileNodeProto) Size() (n int)

func (*FileNodeProto) String ¶

func (m *FileNodeProto) String() string

func (*FileNodeProto) Unmarshal ¶ added in v1.5.0

func (m *FileNodeProto) Unmarshal(dAtA []byte) error

func (*FileNodeProto) XXX_DiscardUnknown ¶ added in v1.8.1

func (m *FileNodeProto) XXX_DiscardUnknown()

func (*FileNodeProto) XXX_Marshal ¶ added in v1.8.1

func (m *FileNodeProto) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*FileNodeProto) XXX_Merge ¶ added in v1.8.1

func (dst *FileNodeProto) XXX_Merge(src proto.Message)

func (*FileNodeProto) XXX_Size ¶ added in v1.8.1

func (m *FileNodeProto) XXX_Size() int

func (*FileNodeProto) XXX_Unmarshal ¶ added in v1.8.1

func (m *FileNodeProto) XXX_Unmarshal(b []byte) error

type HashTree ¶

type HashTree interface {
	// Read methods
	// Get retrieves a file.
	Get(path string) (*NodeProto, error)

	// List calls f with the files and subdirectories of the directory at 'path'.
	List(path string, f func(node *NodeProto) error) error

	// ListAll is like List but aggregates its results into a slice.
	ListAll(path string) ([]*NodeProto, error)

	// Glob calls f with the file/directory paths and nodes that match 'pattern'.
	Glob(pattern string, f func(path string, node *NodeProto) error) error

	// FSSize gets the size of the file system that this tree represents.
	// It's essentially a helper around h.Get("/").SubtreeBytes
	FSSize() int64

	// Walk calls a given function against every node in the hash tree.
	// The order of traversal is not guaranteed.  If any invocation of the
	// function returns an error, the walk stops and returns the error.
	Walk(path string, f func(path string, node *NodeProto) error) error

	// Diff returns the diff of 2 HashTrees at particular Paths. It takes a
	// callback function f, which will be called with paths that are not
	// identical to the same path in the other HashTree.
	// Specify '-1' for fully recursive, or '1' for shallow diff
	Diff(oldHashTree HashTree, newPath string, oldPath string, recursiveDepth int64, f func(path string, node *NodeProto, new bool) error) error

	// Serialize serializes a binary version of the HashTree to w.
	Serialize(w io.Writer) error

	// Copy returns a copy of the HashTree
	Copy() (HashTree, error)

	// PutDirHeaderFooter creates a directory at 'path' with the given header
	// and/or footer, or updates the header used by the directory at 'path' if
	// one is already set (if a directory was created without header/footer
	// metadata, PutDirHeaderFooter cannot convert it to a header/footer
	// directory, and will simply return an error.
	//
	// If a directory is a header/footer directory, files that it directly
	// contains will inherit the header/footer that the directory stores without
	// incurring any extra storage themselves. Also, this header metadata will be
	// exposed to PFS, allowing for nice semantics there (for example,
	// pfs.GetFile with a glob argument will only append the header data once to
	// the concatenated contents of all matching files, so that uploading
	// header-containing data with PutFileSplit and then downloading it with
	// GetFile+glob yields the original data. Also, pfs.InspectFile will make
	// sure file hashes reflect the header content, even if it changes after the
	// file is initially created and the file's non-header content stays
	// unchanged, etc)
	//
	// Note: If 'header' or 'footer' is empty, then header data will no longer be
	// appended to children of 'file's parent directory, but because
	// HasHeaderFooter will still be set on all of those children, Pachyderm will
	// still check the parent directory for header data in the future).
	PutDirHeaderFooter(path string, header, footer *pfs.Object, headerSize, footerSize int64) error

	// PutFile appends data to a file (and creates the file if it doesn't exist).
	PutFile(path string, objects []*pfs.Object, size int64) error

	// PutFileHeaderFooter is the same as PutFile, except that it marks the
	// FileNode at 'path' as having a header stored in its parent directory and
	// validates that the parent directory has the right header/footer data
	// structures (see PutDirHeaderFooter). Note that before calling
	// PutFileHeaderFooter(path, ...), you must call
	// PutDirHeaderFooter(dir(path)) to create the parent directory correctly
	PutFileHeaderFooter(path string, objects []*pfs.Object, size int64) error

	// PutFileOverwrite is the same as PutFile, except that instead of
	// appending the objects to the end of the given file, the objects
	// are inserted to the given index, and the existing objects starting
	// from the given index are removed.
	//
	// sizeDelta is the delta between the size of the objects added and
	// the size of the objects removed.
	PutFileOverwrite(path string, objects []*pfs.Object, overwriteIndex *pfs.OverwriteIndex, sizeDelta int64) error

	// PutDir creates a directory (or does nothing if one exists).
	PutDir(path string) error

	// DeleteFile deletes a regular file or directory (along with its children).
	DeleteFile(path string) error

	// Hash updates all of the hashes and node size metadata, it also checks
	// for conflicts.
	Hash() error

	// Deserialize deserializes a HashTree from r, into the receiver of the function.
	Deserialize(r io.Reader) error

	// Destroy cleans up the on disk structures for the hashtree. Further
	// operations on the database will error. Blocks for pending txns.
	Destroy() error
}

HashTree is the signature of a hash tree provided by this library. To get a new HashTree, create an OpenHashTree with NewHashTree(), modify it, and then call Finish() on it.

func DeserializeDBHashTree ¶ added in v1.8.0

func DeserializeDBHashTree(storageRoot string, r io.Reader) (_ HashTree, retErr error)

DeserializeDBHashTree deserializes a hashtree into a database (bolt) backed hashtree.

func GetHashTreeObject ¶ added in v1.8.0

func GetHashTreeObject(pachClient *client.APIClient, storageRoot string, treeRef *pfs.Object) (HashTree, error)

GetHashTreeObject is a convenience function to deserialize a HashTree from an object in the object store.

func GetHashTreeTag ¶ added in v1.8.0

func GetHashTreeTag(pachClient *client.APIClient, storageRoot string, treeRef *pfs.Tag) (HashTree, error)

GetHashTreeTag is a convenience function to deserialize a HashTree from an tagged object in the object store.

func NewDBHashTree ¶ added in v1.8.0

func NewDBHashTree(storageRoot string) (HashTree, error)

NewDBHashTree creates a database (bolt) backed hashtree.

type HashTreeProto ¶

type HashTreeProto struct {
	// Version is an arbitrary version number, set by the corresponding library
	// in hashtree.go.  This ensures that if the hash function used to create
	// these trees is changed, we won't run into errors when deserializing old
	// trees. The current version is 1.
	Version int32 `protobuf:"varint,1,opt,name=version,proto3" json:"version,omitempty"`
	// Fs maps each node's path to the NodeProto with that node's details.
	// See "Potential Optimizations" at the end for a compression scheme that
	// could be useful if this map gets too large.
	//
	// Note that the key must end in "/" if an only if the value has .dir_node set
	// (i.e. iff the path points to a directory).
	Fs                   map[string]*NodeProto `` /* 145-byte string literal not displayed */
	XXX_NoUnkeyedLiteral struct{}              `json:"-"`
	XXX_unrecognized     []byte                `json:"-"`
	XXX_sizecache        int32                 `json:"-"`
}

HashTreeProto is a tree corresponding to the complete file contents of a pachyderm repo at a given commit (based on a Merkle Tree). We store one HashTree for every PFS commit.

func (*HashTreeProto) Descriptor ¶

func (*HashTreeProto) Descriptor() ([]byte, []int)

func (*HashTreeProto) GetFs ¶

func (m *HashTreeProto) GetFs() map[string]*NodeProto

func (*HashTreeProto) GetVersion ¶

func (m *HashTreeProto) GetVersion() int32

func (*HashTreeProto) Marshal ¶ added in v1.5.0

func (m *HashTreeProto) Marshal() (dAtA []byte, err error)

func (*HashTreeProto) MarshalTo ¶ added in v1.5.0

func (m *HashTreeProto) MarshalTo(dAtA []byte) (int, error)

func (*HashTreeProto) ProtoMessage ¶

func (*HashTreeProto) ProtoMessage()

func (*HashTreeProto) Reset ¶

func (m *HashTreeProto) Reset()

func (*HashTreeProto) Size ¶ added in v1.3.19

func (m *HashTreeProto) Size() (n int)

func (*HashTreeProto) String ¶

func (m *HashTreeProto) String() string

func (*HashTreeProto) Unmarshal ¶ added in v1.5.0

func (m *HashTreeProto) Unmarshal(dAtA []byte) error

func (*HashTreeProto) XXX_DiscardUnknown ¶ added in v1.8.1

func (m *HashTreeProto) XXX_DiscardUnknown()

func (*HashTreeProto) XXX_Marshal ¶ added in v1.8.1

func (m *HashTreeProto) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*HashTreeProto) XXX_Merge ¶ added in v1.8.1

func (dst *HashTreeProto) XXX_Merge(src proto.Message)

func (*HashTreeProto) XXX_Size ¶ added in v1.8.1

func (m *HashTreeProto) XXX_Size() int

func (*HashTreeProto) XXX_Unmarshal ¶ added in v1.8.1

func (m *HashTreeProto) XXX_Unmarshal(b []byte) error

type Index ¶ added in v1.8.0

type Index struct {
	K                    []byte   `protobuf:"bytes,1,opt,name=k,proto3" json:"k,omitempty"`
	Offset               uint64   `protobuf:"varint,2,opt,name=offset,proto3" json:"offset,omitempty"`
	XXX_NoUnkeyedLiteral struct{} `json:"-"`
	XXX_unrecognized     []byte   `json:"-"`
	XXX_sizecache        int32    `json:"-"`
}

func (*Index) Descriptor ¶ added in v1.8.0

func (*Index) Descriptor() ([]byte, []int)

func (*Index) GetK ¶ added in v1.8.0

func (m *Index) GetK() []byte

func (*Index) GetOffset ¶ added in v1.8.0

func (m *Index) GetOffset() uint64

func (*Index) Marshal ¶ added in v1.8.0

func (m *Index) Marshal() (dAtA []byte, err error)

func (*Index) MarshalTo ¶ added in v1.8.0

func (m *Index) MarshalTo(dAtA []byte) (int, error)

func (*Index) ProtoMessage ¶ added in v1.8.0

func (*Index) ProtoMessage()

func (*Index) Reset ¶ added in v1.8.0

func (m *Index) Reset()

func (*Index) Size ¶ added in v1.8.0

func (m *Index) Size() (n int)

func (*Index) String ¶ added in v1.8.0

func (m *Index) String() string

func (*Index) Unmarshal ¶ added in v1.8.0

func (m *Index) Unmarshal(dAtA []byte) error

func (*Index) XXX_DiscardUnknown ¶ added in v1.8.1

func (m *Index) XXX_DiscardUnknown()

func (*Index) XXX_Marshal ¶ added in v1.8.1

func (m *Index) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*Index) XXX_Merge ¶ added in v1.8.1

func (dst *Index) XXX_Merge(src proto.Message)

func (*Index) XXX_Size ¶ added in v1.8.1

func (m *Index) XXX_Size() int

func (*Index) XXX_Unmarshal ¶ added in v1.8.1

func (m *Index) XXX_Unmarshal(b []byte) error

type MergeNode ¶ added in v1.8.0

type MergeNode struct {
	// contains filtered or unexported fields
}

MergeNode is a node that is typically used for merging.

type NodeProto ¶

type NodeProto struct {
	// Name is the name (not path) of the file/directory (e.g. /lib).
	Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
	// Hash is a hash of the node's name and contents (which includes the
	// BlockRefs of a file and the Children of a directory). This can be used to
	// detect if the name or contents have changed between versions.
	Hash []byte `protobuf:"bytes,2,opt,name=hash,proto3" json:"hash,omitempty"`
	// subtree_size is the size of the subtree under node; i.e. if this is a directory,
	// subtree_size includes all children.
	SubtreeSize int64 `protobuf:"varint,3,opt,name=subtree_size,json=subtreeSize,proto3" json:"subtree_size,omitempty"`
	// Exactly one of the following fields must be set. The type of this node will
	// be determined by which field is set.
	FileNode             *FileNodeProto      `protobuf:"bytes,4,opt,name=file_node,json=fileNode,proto3" json:"file_node,omitempty"`
	DirNode              *DirectoryNodeProto `protobuf:"bytes,5,opt,name=dir_node,json=dirNode,proto3" json:"dir_node,omitempty"`
	XXX_NoUnkeyedLiteral struct{}            `json:"-"`
	XXX_unrecognized     []byte              `json:"-"`
	XXX_sizecache        int32               `json:"-"`
}

NodeProto is a node in the file tree (either a file or a directory)

func Get ¶ added in v1.8.0

func Get(rs []io.ReadCloser, filePath string) (*NodeProto, error)

Get gets a hashtree node.

func (*NodeProto) Descriptor ¶

func (*NodeProto) Descriptor() ([]byte, []int)

func (*NodeProto) GetDirNode ¶

func (m *NodeProto) GetDirNode() *DirectoryNodeProto

func (*NodeProto) GetFileNode ¶

func (m *NodeProto) GetFileNode() *FileNodeProto

func (*NodeProto) GetHash ¶

func (m *NodeProto) GetHash() []byte

func (*NodeProto) GetName ¶

func (m *NodeProto) GetName() string

func (*NodeProto) GetSubtreeSize ¶

func (m *NodeProto) GetSubtreeSize() int64

func (*NodeProto) Marshal ¶ added in v1.5.0

func (m *NodeProto) Marshal() (dAtA []byte, err error)

func (*NodeProto) MarshalTo ¶ added in v1.5.0

func (m *NodeProto) MarshalTo(dAtA []byte) (int, error)

func (*NodeProto) ProtoMessage ¶

func (*NodeProto) ProtoMessage()

func (*NodeProto) Reset ¶

func (m *NodeProto) Reset()

func (*NodeProto) Size ¶ added in v1.5.0

func (m *NodeProto) Size() (n int)

func (*NodeProto) String ¶

func (m *NodeProto) String() string

func (*NodeProto) Unmarshal ¶ added in v1.5.0

func (m *NodeProto) Unmarshal(dAtA []byte) error

func (*NodeProto) XXX_DiscardUnknown ¶ added in v1.8.1

func (m *NodeProto) XXX_DiscardUnknown()

func (*NodeProto) XXX_Marshal ¶ added in v1.8.1

func (m *NodeProto) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*NodeProto) XXX_Merge ¶ added in v1.8.1

func (dst *NodeProto) XXX_Merge(src proto.Message)

func (*NodeProto) XXX_Size ¶ added in v1.8.1

func (m *NodeProto) XXX_Size() int

func (*NodeProto) XXX_Unmarshal ¶ added in v1.8.1

func (m *NodeProto) XXX_Unmarshal(b []byte) error

type Ordered ¶ added in v1.8.0

type Ordered struct {
	// contains filtered or unexported fields
}

Ordered is an in memory version of the hashtree that is optimized and only works for lexicographically ordered inserts followed by serialization.

func NewOrdered ¶ added in v1.8.0

func NewOrdered(root string) *Ordered

NewOrdered creates a new ordered hashtree.

func (*Ordered) PutDir ¶ added in v1.8.0

func (o *Ordered) PutDir(path string)

PutDir puts a directory in the hashtree.

func (*Ordered) PutFile ¶ added in v1.8.0

func (o *Ordered) PutFile(path string, hash []byte, size int64, fileNodeProto *FileNodeProto)

PutFile puts a file in the hashtree.

func (*Ordered) Serialize ¶ added in v1.8.0

func (o *Ordered) Serialize(_w io.Writer) error

Serialize serializes an ordered hashtree.

type Reader ¶ added in v1.8.0

type Reader struct {
	// contains filtered or unexported fields
}

Reader can read a serialized hashtree into a sequence of merge nodes.

func NewReader ¶ added in v1.8.0

func NewReader(r io.Reader, filter func(k []byte) (bool, error)) *Reader

NewReader creates a new hashtree reader.

func (*Reader) Read ¶ added in v1.8.0

func (r *Reader) Read() (*MergeNode, error)

Read reads the next merge node.

type Shared ¶ added in v1.8.0

type Shared struct {
	// At least one of header or footer must be set
	Header *pfs.Object `protobuf:"bytes,1,opt,name=header,proto3" json:"header,omitempty"`
	Footer *pfs.Object `protobuf:"bytes,2,opt,name=footer,proto3" json:"footer,omitempty"`
	// The size of header & footer (must be separated for Copy())
	HeaderSize           int64    `protobuf:"varint,3,opt,name=header_size,json=headerSize,proto3" json:"header_size,omitempty"`
	FooterSize           int64    `protobuf:"varint,4,opt,name=footer_size,json=footerSize,proto3" json:"footer_size,omitempty"`
	XXX_NoUnkeyedLiteral struct{} `json:"-"`
	XXX_unrecognized     []byte   `json:"-"`
	XXX_sizecache        int32    `json:"-"`
}

Shared refers to data common to all direct children of a directory (i.e. headers and footers)

func (*Shared) Descriptor ¶ added in v1.8.0

func (*Shared) Descriptor() ([]byte, []int)

func (*Shared) GetFooter ¶ added in v1.8.0

func (m *Shared) GetFooter() *pfs.Object

func (*Shared) GetFooterSize ¶ added in v1.8.0

func (m *Shared) GetFooterSize() int64

func (*Shared) GetHeader ¶ added in v1.8.0

func (m *Shared) GetHeader() *pfs.Object

func (*Shared) GetHeaderSize ¶ added in v1.8.0

func (m *Shared) GetHeaderSize() int64

func (*Shared) Marshal ¶ added in v1.8.0

func (m *Shared) Marshal() (dAtA []byte, err error)

func (*Shared) MarshalTo ¶ added in v1.8.0

func (m *Shared) MarshalTo(dAtA []byte) (int, error)

func (*Shared) ProtoMessage ¶ added in v1.8.0

func (*Shared) ProtoMessage()

func (*Shared) Reset ¶ added in v1.8.0

func (m *Shared) Reset()

func (*Shared) Size ¶ added in v1.8.0

func (m *Shared) Size() (n int)

func (*Shared) String ¶ added in v1.8.0

func (m *Shared) String() string

func (*Shared) Unmarshal ¶ added in v1.8.0

func (m *Shared) Unmarshal(dAtA []byte) error

func (*Shared) XXX_DiscardUnknown ¶ added in v1.8.1

func (m *Shared) XXX_DiscardUnknown()

func (*Shared) XXX_Marshal ¶ added in v1.8.1

func (m *Shared) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*Shared) XXX_Merge ¶ added in v1.8.1

func (dst *Shared) XXX_Merge(src proto.Message)

func (*Shared) XXX_Size ¶ added in v1.8.1

func (m *Shared) XXX_Size() int

func (*Shared) XXX_Unmarshal ¶ added in v1.8.1

func (m *Shared) XXX_Unmarshal(b []byte) error

type Unordered ¶ added in v1.8.0

type Unordered struct {
	// contains filtered or unexported fields
}

Unordered is an in memory version of the hashtree that supports random inserts. This will look more like the old version of hashtrees over time, with the key differences being that it supports arbitrary rooting and can easily be converted into a sorted tree.

func NewUnordered ¶ added in v1.8.0

func NewUnordered(root string) *Unordered

NewUnordered creates a new unordered hashtree.

func (*Unordered) Ordered ¶ added in v1.8.0

func (u *Unordered) Ordered() *Ordered

Ordered converts an unordered hashtree into an ordered hashtree.

func (*Unordered) PutFile ¶ added in v1.8.0

func (u *Unordered) PutFile(path string, hash []byte, size int64, blockRefs ...*pfs.BlockRef)

PutFile puts a file in the hashtree.

type Writer ¶ added in v1.8.0

type Writer struct {
	// contains filtered or unexported fields
}

Writer can write a serialized hashtree from a sequence of merge nodes.

func NewWriter ¶ added in v1.8.0

func NewWriter(w io.Writer) *Writer

NewWriter creates a new hashtree writer.

func (*Writer) Copy ¶ added in v1.8.0

func (w *Writer) Copy(r *Reader) error

Copy copies a hashtree reader in a writer.

func (*Writer) Index ¶ added in v1.8.0

func (w *Writer) Index() ([]byte, error)

Index returns the index for a hashtree writer.

func (*Writer) Write ¶ added in v1.8.0

func (w *Writer) Write(n *MergeNode) error

Write writes the next merge node.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL