compress

package

v1.0.0 Latest Latest Go to latest Published: Sep 20, 2023 License: Apache-2.0 Imports: 26 Imported by: 5

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/ledgerwatch/erigon-lib

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func PersistDictrionary(fileName string, db *DictionaryBuilder) error
func ReadSimpleFile(fileName string, walker func(v []byte) error) error
func SetDecompressionTableCondensity(fromBitSize int)
type CompressionQueue
- func (cq CompressionQueue) Len() int
- func (cq CompressionQueue) Less(i, j int) bool
- func (cq *CompressionQueue) Pop() interface{}
- func (cq *CompressionQueue) Push(x interface{})
- func (cq *CompressionQueue) Swap(i, j int)
type CompressionRatio
- func Ratio(f1, f2 string) (CompressionRatio, error)
- func (r CompressionRatio) String() string
type CompressionWord
type Compressor
- func NewCompressor(ctx context.Context, logPrefix, outputFile, tmpDir string, ...) (*Compressor, error)
- func (c *Compressor) AddUncompressedWord(word []byte) error
- func (c *Compressor) AddWord(word []byte) error
- func (c *Compressor) Close()
- func (c *Compressor) Compress() error
- func (c *Compressor) Count() int
- func (c *Compressor) DisableFsync()
- func (c *Compressor) SetTrace(trace bool)
type DecompressedFile
- func NewUncompressedFile(filePath string) (*DecompressedFile, error)
- func (f *DecompressedFile) Append(v []byte) error
- func (f *DecompressedFile) AppendUncompressed(v []byte) error
- func (f *DecompressedFile) Close()
- func (f *DecompressedFile) ForEach(walker func(v []byte, compressed bool) error) error
type Decompressor
- func NewDecompressor(compressedFilePath string) (d *Decompressor, err error)
- func (d *Decompressor) Close()
- func (d *Decompressor) Count() int
- func (d *Decompressor) DisableReadAhead()
- func (d *Decompressor) EmptyWordsCount() int
- func (d *Decompressor) EnableMadvNormal() *Decompressor
- func (d *Decompressor) EnableReadAhead() *Decompressor
- func (d *Decompressor) EnableWillNeed() *Decompressor
- func (d *Decompressor) FileName() string
- func (d *Decompressor) FilePath() string
- func (d *Decompressor) MakeGetter() *Getter
- func (d *Decompressor) ModTime() time.Time
- func (d *Decompressor) Size() int64
- func (d *Decompressor) WithReadAhead(f func() error) error
type DictAggregator
- func (da *DictAggregator) Load(loadFunc etl.LoadFunc, args etl.TransformArgs) error
type DictionaryBuilder
- func DictionaryBuilderFromCollectors(ctx context.Context, logPrefix, tmpDir string, collectors []*etl.Collector, ...) (*DictionaryBuilder, error)
- func (db *DictionaryBuilder) Close()
- func (db *DictionaryBuilder) ForEach(f func(score uint64, word []byte))
- func (db *DictionaryBuilder) Len() int
- func (db *DictionaryBuilder) Less(i, j int) bool
- func (db *DictionaryBuilder) Pop() interface{}
- func (db *DictionaryBuilder) Push(x interface{})
- func (db *DictionaryBuilder) Reset(limit int)
- func (db *DictionaryBuilder) Sort()
- func (db *DictionaryBuilder) Swap(i, j int)
type DynamicCell
type Getter
- func (g *Getter) FastNext(buf []byte) ([]byte, uint64)
- func (g *Getter) FileName() string
- func (g *Getter) HasNext() bool
- func (g *Getter) Match(buf []byte) (bool, uint64)
- func (g *Getter) MatchCmp(buf []byte) int
- func (g *Getter) MatchPrefix(prefix []byte) bool
- func (g *Getter) MatchPrefixCmp(prefix []byte) int
- func (g *Getter) MatchPrefixUncompressed(prefix []byte) int
- func (g *Getter) Next(buf []byte) ([]byte, uint64)
- func (g *Getter) NextUncompressed() ([]byte, uint64)
- func (g *Getter) Reset(offset uint64)
- func (g *Getter) Size() int
- func (g *Getter) Skip() (uint64, int)
- func (g *Getter) SkipUncompressed() (uint64, int)
- func (g *Getter) Trace(t bool)
type HuffmanCoder
type Pattern
type PatternHeap
- func (ph PatternHeap) Len() int
- func (ph PatternHeap) Less(i, j int) bool
- func (ph *PatternHeap) Pop() interface{}
- func (ph *PatternHeap) Push(x interface{})
- func (ph *PatternHeap) Swap(i, j int)
type PatternHuff
- func (h *PatternHuff) AddOne()
- func (h *PatternHuff) AddZero()
- func (h *PatternHuff) SetDepth(depth int)
type PatternList
- func (pl PatternList) Len() int
type Position
type PositionHeap
- func (ph PositionHeap) Len() int
- func (ph PositionHeap) Less(i, j int) bool
- func (ph *PositionHeap) Pop() interface{}
- func (ph *PositionHeap) Push(x interface{})
- func (ph *PositionHeap) Swap(i, j int)
type PositionHuff
- func (h *PositionHuff) AddOne()
- func (h *PositionHuff) AddZero()
- func (h *PositionHuff) SetDepth(depth int)
type PositionList
- func (pl PositionList) Len() int
type Ring
- func NewRing() *Ring
- func (r *Ring) Get(i int) *DynamicCell
- func (r Ring) Len() int
- func (r *Ring) PushBack() *DynamicCell
- func (r *Ring) PushFront() *DynamicCell
- func (r *Ring) Reset()
- func (r *Ring) Truncate(i int)

Constants ¶

View Source

const MinPatternScore = 1024

MinPatternScore is minimum score (per superstring) required to consider including pattern into the dictionary

Variables ¶

This section is empty.

Functions ¶

func PersistDictrionary ¶

func PersistDictrionary(fileName string, db *DictionaryBuilder) error

func ReadSimpleFile ¶

func ReadSimpleFile(fileName string, walker func(v []byte) error) error

func SetDecompressionTableCondensity ¶

func SetDecompressionTableCondensity(fromBitSize int)

Types ¶

type CompressionQueue ¶

type CompressionQueue []*CompressionWord

func (CompressionQueue) Len ¶

func (cq CompressionQueue) Len() int

func (CompressionQueue) Less ¶

func (cq CompressionQueue) Less(i, j int) bool

func (*CompressionQueue) Pop ¶

func (cq *CompressionQueue) Pop() interface{}

func (*CompressionQueue) Push ¶

func (cq *CompressionQueue) Push(x interface{})

func (*CompressionQueue) Swap ¶

func (cq *CompressionQueue) Swap(i, j int)

type CompressionRatio ¶

type CompressionRatio float64

func Ratio ¶

func Ratio(f1, f2 string) (CompressionRatio, error)

func (CompressionRatio) String ¶

func (r CompressionRatio) String() string

type CompressionWord ¶

type CompressionWord struct {
	// contains filtered or unexported fields
}

CompressionWord hold a word to be compressed (if flag is set), and the result of compression To allow multiple words to be processed concurrently, order field is used to collect all the words after processing without disrupting their order

type Compressor ¶

type Compressor struct {
	Ratio CompressionRatio
	// contains filtered or unexported fields
}

Compressor is the main operating type for performing per-word compression After creating a compression, one needs to add superstrings to it, using `AddWord` function In order to add word without compression, function `AddUncompressedWord` needs to be used Compressor only tracks which words are compressed and which are not until the compressed file is created. After that, the user of the file needs to know when to call `Next` or `NextUncompressed` function on the decompressor. After that, `Compress` function needs to be called to perform the compression and eventually create output file

func NewCompressor ¶

func NewCompressor(ctx context.Context, logPrefix, outputFile, tmpDir string, minPatternScore uint64, workers int, lvl log.Lvl, logger log.Logger) (*Compressor, error)

func (*Compressor) AddUncompressedWord ¶

func (c *Compressor) AddUncompressedWord(word []byte) error

func (*Compressor) AddWord ¶

func (c *Compressor) AddWord(word []byte) error

func (*Compressor) Close ¶

func (c *Compressor) Close()

func (*Compressor) Compress ¶

func (c *Compressor) Compress() error

func (*Compressor) Count ¶

func (c *Compressor) Count() int

func (*Compressor) DisableFsync ¶

func (c *Compressor) DisableFsync()

func (*Compressor) SetTrace ¶

func (c *Compressor) SetTrace(trace bool)

type DecompressedFile ¶

type DecompressedFile struct {
	// contains filtered or unexported fields
}

DecompressedFile - .dat file format - simple format for temporary data store

func NewUncompressedFile ¶

func NewUncompressedFile(filePath string) (*DecompressedFile, error)

func (*DecompressedFile) Append ¶

func (f *DecompressedFile) Append(v []byte) error

func (*DecompressedFile) AppendUncompressed ¶

func (f *DecompressedFile) AppendUncompressed(v []byte) error

func (*DecompressedFile) Close ¶

func (f *DecompressedFile) Close()

func (*DecompressedFile) ForEach ¶

func (f *DecompressedFile) ForEach(walker func(v []byte, compressed bool) error) error

ForEach - Read keys from the file and generate superstring (with extra byte 0x1 prepended to each character, and with 0x0 0x0 pair inserted between keys and values) We only consider values with length > 2, because smaller values are not compressible without going into bits

type Decompressor ¶

type Decompressor struct {
	// contains filtered or unexported fields
}

Decompressor provides access to the superstrings in a file produced by a compressor

func NewDecompressor ¶

func NewDecompressor(compressedFilePath string) (d *Decompressor, err error)

func (*Decompressor) Close ¶

func (d *Decompressor) Close()

func (*Decompressor) Count ¶

func (d *Decompressor) Count() int

func (*Decompressor) DisableReadAhead ¶

func (d *Decompressor) DisableReadAhead()

DisableReadAhead - usage: `defer d.EnableReadAhead().DisableReadAhead()`. Please don't use this funcs without `defer` to avoid leak.

func (*Decompressor) EmptyWordsCount ¶

func (d *Decompressor) EmptyWordsCount() int

func (*Decompressor) EnableMadvNormal ¶

func (d *Decompressor) EnableMadvNormal() *Decompressor

func (*Decompressor) EnableReadAhead ¶

func (d *Decompressor) EnableReadAhead() *Decompressor

func (*Decompressor) EnableWillNeed ¶

func (d *Decompressor) EnableWillNeed() *Decompressor

func (*Decompressor) FileName ¶

func (d *Decompressor) FileName() string

func (*Decompressor) FilePath ¶

func (d *Decompressor) FilePath() string

func (*Decompressor) MakeGetter ¶

func (d *Decompressor) MakeGetter() *Getter

MakeGetter creates an object that can be used to access superstrings in the decompressor's file Getter is not thread-safe, but there can be multiple getters used simultaneously and concurrently for the same decompressor

func (*Decompressor) ModTime ¶

func (d *Decompressor) ModTime() time.Time

func (*Decompressor) Size ¶

func (d *Decompressor) Size() int64

func (*Decompressor) WithReadAhead ¶

func (d *Decompressor) WithReadAhead(f func() error) error

WithReadAhead - Expect read in sequential order. (Hence, pages in the given range can be aggressively read ahead, and may be freed soon after they are accessed.)

type DictAggregator ¶

type DictAggregator struct {
	// contains filtered or unexported fields
}

func (*DictAggregator) Load ¶

func (da *DictAggregator) Load(loadFunc etl.LoadFunc, args etl.TransformArgs) error

type DictionaryBuilder ¶

type DictionaryBuilder struct {
	// contains filtered or unexported fields
}

func DictionaryBuilderFromCollectors ¶

func DictionaryBuilderFromCollectors(ctx context.Context, logPrefix, tmpDir string, collectors []*etl.Collector, lvl log.Lvl, logger log.Logger) (*DictionaryBuilder, error)

func (*DictionaryBuilder) Close ¶

func (db *DictionaryBuilder) Close()

func (*DictionaryBuilder) ForEach ¶

func (db *DictionaryBuilder) ForEach(f func(score uint64, word []byte))

func (*DictionaryBuilder) Len ¶

func (db *DictionaryBuilder) Len() int

func (*DictionaryBuilder) Less ¶

func (db *DictionaryBuilder) Less(i, j int) bool

func (*DictionaryBuilder) Pop ¶

func (db *DictionaryBuilder) Pop() interface{}

func (*DictionaryBuilder) Push ¶

func (db *DictionaryBuilder) Push(x interface{})

func (*DictionaryBuilder) Reset ¶

func (db *DictionaryBuilder) Reset(limit int)

func (*DictionaryBuilder) Sort ¶

func (db *DictionaryBuilder) Sort()

func (*DictionaryBuilder) Swap ¶

func (db *DictionaryBuilder) Swap(i, j int)

type DynamicCell ¶

type DynamicCell struct {
	// contains filtered or unexported fields
}

DynamicCell represents result of dynamic programming for certain starting position

type Getter ¶

type Getter struct {
	// contains filtered or unexported fields
}

Getter represent "reader" or "interator" that can move accross the data of the decompressor The full state of the getter can be captured by saving dataP, and dataBit

func (*Getter) FastNext ¶

func (g *Getter) FastNext(buf []byte) ([]byte, uint64)

FastNext extracts a compressed word from current offset in the file into the given buf, returning a new byte slice which contains extracted word. It is important to allocate enough buf size. Could throw an error if word in file is larger then the buf size. After extracting next word, it moves to the beginning of the next one

func (*Getter) FileName ¶

func (g *Getter) FileName() string

func (*Getter) HasNext ¶

func (g *Getter) HasNext() bool

func (*Getter) Match ¶

func (g *Getter) Match(buf []byte) (bool, uint64)

Match returns true and next offset if the word at current offset fully matches the buf returns false and current offset otherwise.

func (*Getter) MatchCmp ¶

func (g *Getter) MatchCmp(buf []byte) int

MatchCmp lexicographically compares given buf with the word at the current offset in the file. returns 0 if buf == word, -1 if buf < word, 1 if buf > word

func (*Getter) MatchPrefix ¶

func (g *Getter) MatchPrefix(prefix []byte) bool

MatchPrefix only checks if the word at the current offset has a buf prefix. Does not move offset to the next word.

func (*Getter) MatchPrefixCmp ¶

func (g *Getter) MatchPrefixCmp(prefix []byte) int

MatchPrefixCmp lexicographically compares given prefix with the word at the current offset in the file. returns 0 if buf == word, -1 if buf < word, 1 if buf > word

func (*Getter) MatchPrefixUncompressed ¶

func (g *Getter) MatchPrefixUncompressed(prefix []byte) int

func (*Getter) Next ¶

func (g *Getter) Next(buf []byte) ([]byte, uint64)

Next extracts a compressed word from current offset in the file and appends it to the given buf, returning the result of appending After extracting next word, it moves to the beginning of the next one

func (*Getter) NextUncompressed ¶

func (g *Getter) NextUncompressed() ([]byte, uint64)

func (*Getter) Reset ¶

func (g *Getter) Reset(offset uint64)

func (*Getter) Size ¶

func (g *Getter) Size() int

func (*Getter) Skip ¶

func (g *Getter) Skip() (uint64, int)

Skip moves offset to the next word and returns the new offset and the length of the word.

func (*Getter) SkipUncompressed ¶

func (g *Getter) SkipUncompressed() (uint64, int)

func (*Getter) Trace ¶

func (g *Getter) Trace(t bool)

type HuffmanCoder ¶

type HuffmanCoder struct {
	// contains filtered or unexported fields
}

type Pattern ¶

type Pattern struct {
	// contains filtered or unexported fields
}

Pattern is representation of a pattern that is searched in the superstrings to compress them patterns are stored in a patricia tree and contain pattern score (calculated during the initial dictionary building), frequency of usage, and code

type PatternHeap ¶

type PatternHeap []*PatternHuff

PatternHeap is priority queue of pattern for the purpose of building Huffman tree to determine efficient coding. Patterns with least usage have highest priority. We use a tie-breaker to make sure the resulting Huffman code is canonical

func (PatternHeap) Len ¶

func (ph PatternHeap) Len() int

func (PatternHeap) Less ¶

func (ph PatternHeap) Less(i, j int) bool

func (*PatternHeap) Pop ¶

func (ph *PatternHeap) Pop() interface{}

func (*PatternHeap) Push ¶

func (ph *PatternHeap) Push(x interface{})

func (*PatternHeap) Swap ¶

func (ph *PatternHeap) Swap(i, j int)

type PatternHuff ¶

type PatternHuff struct {
	// contains filtered or unexported fields
}

PatternHuff is an intermediate node in a huffman tree of patterns It has two children, each of which may either be another intermediate node (h0 or h1) or leaf node, which is Pattern (p0 or p1).

func (*PatternHuff) AddOne ¶

func (h *PatternHuff) AddOne()

func (*PatternHuff) AddZero ¶

func (h *PatternHuff) AddZero()

func (*PatternHuff) SetDepth ¶

func (h *PatternHuff) SetDepth(depth int)

type PatternList ¶

type PatternList []*Pattern

PatternList is a sorted list of pattern for the purpose of building Huffman tree to determine efficient coding. Patterns with least usage come first, we use numerical code as a tie breaker to make sure the resulting Huffman code is canonical

func (PatternList) Len ¶

func (pl PatternList) Len() int

type Position ¶

type Position struct {
	// contains filtered or unexported fields
}

type PositionHeap ¶

type PositionHeap []*PositionHuff

func (PositionHeap) Len ¶

func (ph PositionHeap) Len() int

func (PositionHeap) Less ¶

func (ph PositionHeap) Less(i, j int) bool

func (*PositionHeap) Pop ¶

func (ph *PositionHeap) Pop() interface{}

func (*PositionHeap) Push ¶

func (ph *PositionHeap) Push(x interface{})

func (*PositionHeap) Swap ¶

func (ph *PositionHeap) Swap(i, j int)

type PositionHuff ¶

type PositionHuff struct {
	// contains filtered or unexported fields
}

func (*PositionHuff) AddOne ¶

func (h *PositionHuff) AddOne()

func (*PositionHuff) AddZero ¶

func (h *PositionHuff) AddZero()

func (*PositionHuff) SetDepth ¶

func (h *PositionHuff) SetDepth(depth int)

type PositionList ¶

type PositionList []*Position

func (PositionList) Len ¶

func (pl PositionList) Len() int

type Ring ¶

type Ring struct {
	// contains filtered or unexported fields
}

func NewRing ¶

func NewRing() *Ring

func (*Ring) Get ¶

func (r *Ring) Get(i int) *DynamicCell

func (Ring) Len ¶

func (r Ring) Len() int

func (*Ring) PushBack ¶

func (r *Ring) PushBack() *DynamicCell

func (*Ring) PushFront ¶

func (r *Ring) PushFront() *DynamicCell

func (*Ring) Reset ¶

func (r *Ring) Reset()

func (*Ring) Truncate ¶

func (r *Ring) Truncate(i int)

Truncate removes all items starting from i

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL