seekable

package module
v0.6.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 15, 2023 License: MIT Imports: 13 Imported by: 4

README

License GoDoc Build Status Go Report

ZSTD seekable compression format implementation in Go

Seekable ZSTD compression format implemented in Golang.

This library provides a random access reader (using uncompressed file offsets) for ZSTD-compressed streams. This can be used for creating transparent compression layers. Coupled with Content Defined Chunking (CDC) it can also be used as a robust de-duplication layer.

Installation

go get -u github.com/SaveTheRbtz/zstd-seekable-format-go

Using the seekable format

Writing is done through the Writer interface:

import (
	"github.com/klauspost/compress/zstd"
	seekable "github.com/SaveTheRbtz/zstd-seekable-format-go"
)

enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest))
if err != nil {
	log.Fatal(err)
}
defer enc.Close()

w, err := seekable.NewWriter(f, enc)
if err != nil {
	log.Fatal(err)
}

// Write data in chunks.
for _, b := range [][]byte{[]byte("Hello"), []byte(" "), []byte("World!")} {
	_, err = w.Write(b)
	if err != nil {
		log.Fatal(err)
	}
}

// Close and flush seek table.
err = w.Close()
if err != nil {
	log.Fatal(err)
}

NB! Do not forget to call Close since it is responsible for flushing the seek table.

Reading can either be done through ReaderAt interface:

dec, err := zstd.NewReader(nil)
if err != nil {
	log.Fatal(err)
}
defer dec.Close()

r, err := seekable.NewReader(f, dec)
if err != nil {
	log.Fatal(err)
}
defer r.Close()

ello := make([]byte, 4)
// ReaderAt
r.ReadAt(ello, 1)
if !bytes.Equal(ello, []byte("ello")) {
	log.Fatalf("%+v != ello", ello)
}

Or through the ReadSeeker:

world := make([]byte, 5)
// Seeker
r.Seek(-6, io.SeekEnd)
// Reader
r.Read(world)
if !bytes.Equal(world, []byte("World")) {
	log.Fatalf("%+v != World", world)
}

Seekable format utilizes ZSTD skippable frames so it is a valid ZSTD stream:

// Standard ZSTD Reader
f.Seek(0, io.SeekStart)
dec, err := zstd.NewReader(f)
if err != nil {
	log.Fatal(err)
}
defer dec.Close()

all, err := io.ReadAll(dec)
if err != nil {
	log.Fatal(err)
}
if !bytes.Equal(all, []byte("Hello World!")) {
	log.Fatalf("%+v != Hello World!", all)
}

Documentation

Overview

Package adds an ability create ZSTD files in seekable format and randomly access them using uncompressed offsets.

Example
package main

import (
	"fmt"
	"io"
	"log"
	"os"

	"github.com/klauspost/compress/zstd"

	seekable "github.com/SaveTheRbtz/zstd-seekable-format-go"
)

func main() {
	f, err := os.CreateTemp("", "example")
	if err != nil {
		log.Fatal(err)
	}
	defer os.Remove(f.Name())

	enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest))
	if err != nil {
		log.Fatal(err)
	}
	defer enc.Close()

	w, err := seekable.NewWriter(f, enc)
	if err != nil {
		log.Fatal(err)
	}

	// Write data in chunks.
	for _, b := range [][]byte{[]byte("Hello"), []byte(" "), []byte("World!")} {
		_, err = w.Write(b)
		if err != nil {
			log.Fatal(err)
		}
	}

	// Close and flush seek table.
	err = w.Close()
	if err != nil {
		log.Fatal(err)
	}

	dec, err := zstd.NewReader(nil)
	if err != nil {
		log.Fatal(err)
	}
	defer dec.Close()

	r, err := seekable.NewReader(f, dec)
	if err != nil {
		log.Fatal(err)
	}
	defer r.Close()

	ello := make([]byte, 4)
	// ReaderAt
	_, err = r.ReadAt(ello, 1)
	if err != nil {
		log.Fatal(err)
	}
	fmt.Printf("Offset: 1 from the start: %s\n", string(ello))

	world := make([]byte, 5)
	// Seeker
	_, err = r.Seek(-6, io.SeekEnd)
	if err != nil {
		log.Fatal(err)
	}
	// Reader
	_, err = r.Read(world)
	if err != nil {
		log.Fatal(err)
	}
	fmt.Printf("Offset: -6 from the end: %s\n", string(world))

	_, _ = f.Seek(0, io.SeekStart)

	// Standard ZSTD Reader.
	dec, err = zstd.NewReader(f)
	if err != nil {
		log.Fatal(err)
	}
	defer dec.Close()

	all, err := io.ReadAll(dec)
	if err != nil {
		log.Fatal(err)
	}

	fmt.Printf("Whole string: %s\n", string(all))

}
Output:

Offset: 1 from the start: ello
Offset: -6 from the end: World
Whole string: Hello World!

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func WithREnvironment

func WithREnvironment(e env.REnvironment) rOption

func WithRLogger

func WithRLogger(l *zap.Logger) rOption

func WithWEnvironment

func WithWEnvironment(e env.WEnvironment) wOption

func WithWLogger

func WithWLogger(l *zap.Logger) wOption

Types

type Decoder

type Decoder interface {
	// GetIndexByDecompOffset returns FrameOffsetEntry for an offset in the decompressed stream.
	// Will return nil if offset is greater or equal than Size().
	GetIndexByDecompOffset(off uint64) *env.FrameOffsetEntry

	// GetIndexByID returns FrameOffsetEntry for a given frame id.
	// Will return nil if offset is greater or equal than NumFrames() or less than 0.
	GetIndexByID(id int64) *env.FrameOffsetEntry

	// Size returns the size of the uncompressed stream.
	Size() int64

	// NumFrames returns number of frames in the compressed stream.
	NumFrames() int64

	// Close closes the decoder feeing up any resources.
	Close() error
}

Decoder is a byte-oriented API that is useful for cases where wrapping io.ReadSeeker is not desirable.

func NewDecoder

func NewDecoder(seekTable []byte, decoder ZSTDDecoder, opts ...rOption) (Decoder, error)

NewDecoder creates a byte-oriented Decode interface from a given seektable index. This index can either be produced by either Writer's WriteSeekTable or Encoder's EndStream. Decoder can be used concurrently.

type Encoder

type Encoder interface {
	// Encode returns compressed data and appends a frame to in-memory seek table.
	Encode(src []byte) ([]byte, error)

	// EndStream returns in-memory seek table as a ZSTD's skippable frame.
	EndStream() ([]byte, error)
}

Encoder is a byte-oriented API that is useful where wrapping io.Writer is not desirable.

func NewEncoder

func NewEncoder(encoder ZSTDEncoder, opts ...wOption) (Encoder, error)

type Reader

type Reader interface {
	// Seek implements io.Seeker interface to randomly access data.
	// This method is NOT goroutine-safe and CAN NOT be called
	// concurrently since it modifies the underlying offset.
	Seek(offset int64, whence int) (int64, error)

	// Read implements io.Reader interface to sequentially access data.
	// This method is NOT goroutine-safe and CAN NOT be called
	// concurrently since it modifies the underlying offset.
	Read(p []byte) (n int, err error)

	// ReadAt implements io.ReaderAt interface to randomly access data.
	// This method is goroutine-safe and can be called concurrently ONLY if
	// the underlying reader supports io.ReaderAt interface.
	ReadAt(p []byte, off int64) (n int, err error)

	// Close implements io.Closer interface free up any resources.
	Close() error
}

func NewReader

func NewReader(rs io.ReadSeeker, decoder ZSTDDecoder, opts ...rOption) (Reader, error)

NewReader returns ZSTD stream reader that can be randomly accessed using uncompressed data offset. Ideally, passed io.ReadSeeker should implement io.ReaderAt interface.

type Writer

type Writer interface {
	// Write writes a chunk of data as a separate frame into the datastream.
	//
	// Note that Write does not do any coalescing nor splitting of data,
	// so each write will map to a separate ZSTD Frame.
	Write(src []byte) (int, error)

	// Close implement io.Closer interface.  It writes the seek table footer
	// and releases occupied memory.
	//
	// Caller is still responsible to Close the underlying writer.
	Close() (err error)
}

func NewWriter

func NewWriter(w io.Writer, encoder ZSTDEncoder, opts ...wOption) (Writer, error)

NewWriter wraps the passed io.Writer and Encoder into and indexed ZSTD stream. Resulting stream then can be randomly accessed through the Reader and Decoder interfaces.

type ZSTDDecoder

type ZSTDDecoder interface {
	DecodeAll(input, dst []byte) ([]byte, error)
}

ZSTDDecoder is the decompressor. Tested with github.com/klauspost/compress/zstd.

type ZSTDEncoder

type ZSTDEncoder interface {
	EncodeAll(src, dst []byte) []byte
}

ZSTDEncoder is the compressor. Tested with github.com/klauspost/compress/zstd.

Directories

Path Synopsis
cmd

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL