extsort

package module
v0.6.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 11, 2023 License: Apache-2.0 Imports: 10 Imported by: 4

README

ExtSort

Go Reference Test License

External merge sort algorithm, implemented in Go. Sort arbitrarily large data sets with a predictable amount of memory using disk.

Example:

Sorting lines:

import(
  "fmt"

  "github.com/bsm/extsort"
)

func main() {
	// Init sorter.
	sorter := extsort.New(nil)
	defer sorter.Close()

	// Append plain data.
	_ = sorter.Append([]byte("foo"))
	_ = sorter.Append([]byte("bar"))
	_ = sorter.Append([]byte("baz"))
	_ = sorter.Append([]byte("dau"))

	// Sort and iterate.
	iter, err := sorter.Sort()
	if err != nil {
		panic(err)
	}
	defer iter.Close()

	for iter.Next() {
		fmt.Println(string(iter.Data()))
	}
	if err := iter.Err(); err != nil {
		panic(err)
	}

}

Map-style API with de-duplication:

import(
  "fmt"

  "github.com/bsm/extsort"
)

func main() {
	// Init with de-duplication.
	sorter := extsort.New(&extsort.Options{
		Dedupe: bytes.Equal,
	})
	defer sorter.Close()

	// Put key/value data.
	_ = sorter.Put([]byte("foo"), []byte("v1"))
	_ = sorter.Put([]byte("bar"), []byte("v2"))
	_ = sorter.Put([]byte("baz"), []byte("v3"))
	_ = sorter.Put([]byte("bar"), []byte("v4"))	// duplicate
	_ = sorter.Put([]byte("dau"), []byte("v5"))

	// Sort and iterate.
	iter, err := sorter.Sort()
	if err != nil {
		panic(err)
	}
	defer iter.Close()

	for iter.Next() {
		fmt.Println(string(iter.Key()), string(iter.Value()))
	}
	if err := iter.Err(); err != nil {
		panic(err)
	}

}

Documentation

Overview

Example (Map)
package main

import (
	"bytes"
	"fmt"

	"github.com/bsm/extsort"
)

func main() {
	// Init with de-duplication.
	sorter := extsort.New(&extsort.Options{
		Dedupe: bytes.Equal,
	})
	defer sorter.Close()

	// Put key/value data.
	_ = sorter.Put([]byte("foo"), []byte("v1"))
	_ = sorter.Put([]byte("bar"), []byte("v2"))
	_ = sorter.Put([]byte("baz"), []byte("v3"))
	_ = sorter.Put([]byte("bar"), []byte("v4")) // duplicate
	_ = sorter.Put([]byte("dau"), []byte("v5"))

	// Sort and iterate.
	iter, err := sorter.Sort()
	if err != nil {
		panic(err)
	}
	defer iter.Close()

	for iter.Next() {
		fmt.Println(string(iter.Key()), string(iter.Value()))
	}
	if err := iter.Err(); err != nil {
		panic(err)
	}

}
Output:

bar v4
baz v3
dau v5
foo v1
Example (Plain)
package main

import (
	"fmt"

	"github.com/bsm/extsort"
)

func main() {
	// Init sorter.
	sorter := extsort.New(nil)
	defer sorter.Close()

	// Append plain data.
	_ = sorter.Append([]byte("foo"))
	_ = sorter.Append([]byte("bar"))
	_ = sorter.Append([]byte("baz"))
	_ = sorter.Append([]byte("dau"))

	// Sort and iterate.
	iter, err := sorter.Sort()
	if err != nil {
		panic(err)
	}
	defer iter.Close()

	for iter.Next() {
		fmt.Println(string(iter.Data()))
	}
	if err := iter.Err(); err != nil {
		panic(err)
	}

}
Output:

bar
baz
dau
foo

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Compare added in v0.6.0

type Compare func(a, b []byte) int

Compares byte chunks. -1 for a < b and 0 for a == b.

type Compression

type Compression uint8

Compression codec.

const (
	CompressionNone Compression = iota
	CompressionGzip
	CompressionSnappy
)

Supported compression types.

type Equal added in v0.5.0

type Equal func(a, b []byte) bool

Equal compares two byte chunks for equality.

type Iterator

type Iterator struct {
	// contains filtered or unexported fields
}

Iterator instances are used to iterate over sorted output.

func (*Iterator) Close

func (i *Iterator) Close() error

Close closes the iterator.

func (*Iterator) Data

func (i *Iterator) Data() []byte

Data returns the data at the current cursor position (alias for Key).

func (*Iterator) Err

func (i *Iterator) Err() error

Err returns the error, if occurred.

func (*Iterator) Key added in v0.5.0

func (i *Iterator) Key() []byte

Key returns the key at the current cursor position.

func (*Iterator) Next

func (i *Iterator) Next() bool

Next advances the iterator to the next item and returns true if successful.

func (*Iterator) Value added in v0.5.0

func (i *Iterator) Value() []byte

Value returns the value at the current cursor position.

type Options

type Options struct {
	// WorkDir specifies the working directory.
	// By default os.TempDir() is used.
	WorkDir string

	// Keep temporary files until Close.
	// Default: Immediately remove temporary files.
	KeepFiles bool

	// Compare defines the compare function.
	// Default: bytes.Compare
	Compare Compare

	// Sort defines the sort function that is used.
	// Default: sort.Sort
	Sort func(sort.Interface)

	// Dedupe defines the compare function for de-duplication.
	// Default: nil (= do not de-dupe)
	// Keeps the last added item.
	Dedupe Equal

	// BufferSize limits the memory buffer used for sorting.
	// Default: 64MiB (must be at least 64KiB)
	BufferSize int

	// Compression optionally uses compression for temporary output.
	Compression Compression
}

Options contains sorting options

type Sorter

type Sorter struct {
	// contains filtered or unexported fields
}

Sorter is responsible for sorting.

func New

func New(opt *Options) *Sorter

New inits a sorter

func (*Sorter) Append

func (s *Sorter) Append(data []byte) error

Append appends a data chunk to the sorter.

func (*Sorter) Close

func (s *Sorter) Close() error

Close stops the processing and removes temporary files.

func (*Sorter) Put added in v0.5.0

func (s *Sorter) Put(key, value []byte) error

Put inserts a key value pair into the sorter.

func (*Sorter) Size added in v0.5.0

func (s *Sorter) Size() int64

Size returns the buffered and written size.

func (*Sorter) Sort

func (s *Sorter) Sort() (*Iterator, error)

Sort applies the sort algorithm and returns an interator.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL