ptrie

package module
v1.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 2, 2024 License: Apache-2.0 Imports: 10 Imported by: 4

README

Trie (Prefix tree)

GoReportCard GoDoc

This library is compatible with Go 1.11+

Please refer to CHANGELOG.md if you encounter breaking changes.

Motivation

The goal of this project is to provide serverless prefix tree friendly implementation. where one function can easily building tree and publishing to some cloud storge. Then the second load trie to perform various operations.

Introduction

A trie (prefix tree) is a space-optimized tree data structure in which each node that is merged with its parent. Unlike regular trees (where whole keys are from their beginning up to the point of inequality), the key at each node is compared chunk by chunk,

Prefix tree has the following application:

  • text document searching
  • rule based matching
  • constructing associative arrays for string keys

Character comparision complexity:

  • Brute Force: O(d n k)
  • Prefix Trie: O(d log(k))

Where

  • d: number of characters in document
  • n: number of keywords
  • k: average keyword length
Usage

    trie := ptrie.New()
    for key, value := range pairs {
        if err = trie.Put(key, value); err != nil {
            log.Fatal(err)
         }
    }
    //...
    has := trie.Has(key)
    value, has := trie.Get(key)
    //...
    matched := trie.MatchAll(input,  func(key []byte, value interface{}) bool {
        fmt.Printf("matched: key: %s, value %v\n", key, value)
        return true 
    })
    
  1. Building

    trie := ptrie.New()
    
    for key, value := range pairs {
         if err = trie.Put(key, value); err != nil {
         	log.Fatal(err)
         }
    }
    
    writer := new(bytes.Buffer)
	if err := trie.Encode(writer); err != nil {
		log.Fatal(err)
	}
	encoded := write.Bytes()
	//write encode data

  1. Loading

    //V type can be any type
    var v *V
    

    trie := ptrie.New()
    trie.UseType(reflect.TypeOf(v))
    if err := trie.Decode(reader); err != nil {
    	log.Fatal(err)
    }

  1. Traversing (range map)

    trie.Walk(func(key []byte, value interface{}) bool {
		fmt.Printf("key: %s, value %v\n", key, value)
		return true
	})

  1. Lookup

    has := trie.Has(key)
    value, has := trie.Get(key)

  1. MatchPrefix

    var input []byte
    ...

    matched := trie.MatchPrefix(input,  func(key []byte, value interface{}) bool {
        fmt.Printf("matched: key: %s, value %v\n", key, value)
        return true 
    })

  1. MatchAll

    var input []byte
    ...

    matched := trie.MatchAll(input,  func(key []byte, value interface{}) bool {
        fmt.Printf("matched: key: %s, value %v\n", key, value)
        return true 
    })

Benchmark

The benchmark count all words that are part of the following extracts:

Lorem Ipsum

  1. Short: avg line size: 20, words: 13
  2. Long: avg line size: 711, words: 551
Benchmark_LoremBruteForceShort-8    	  500000	      3646 ns/op
Benchmark_LoremTrieShort-8          	  500000	      2376 ns/op
Benchmark_LoremBruteForceLong-8     	    1000	   1612877 ns/op
Benchmark_LoremTrieLong-8           	   10000	    119990 ns/op

Hamlet

  1. Short: avg line size: 20, words: 49
  2. Long: avg line size: 41, words: 105
Benchmark_HamletBruteForceShort-8   	   30000	     44306 ns/op
Benchmark_HamletTrieShort-8         	  100000	     18530 ns/op
Benchmark_HamletBruteForceLong-8    	   10000	    226836 ns/op
Benchmark_HamletTrieLong-8          	   50000	     39329 ns/op
Code coverage

GoCover

License

The source code is made available under the terms of the Apache License, Version 2, as stated in the file LICENSE.

Individual files may be made available under their own specific license, all compatible with Apache License, Version 2. Please see individual files for details.

Credits and Acknowledgements

Library Author: Adrian Witas

Documentation

Index

Constants

View Source
const (
	//NodeTypeValue value type
	NodeTypeValue = uint8(2)
	//NodeTypeEdge edge type
	NodeTypeEdge = uint8(4)
)

Variables

This section is empty.

Functions

This section is empty.

Types

type Bit64Set

type Bit64Set uint64

Bit64Set represent 64bit set

func (Bit64Set) IsSet

func (s Bit64Set) IsSet(value uint8) bool

IsSet returns true if bit is set

func (Bit64Set) Put

func (s Bit64Set) Put(value uint8) Bit64Set

Put creates a new bit set for supplied value, value and not be grater than 64

type Bytes

type Bytes []byte

Bytes represents byte slice

func (Bytes) LastSharedIndex

func (b Bytes) LastSharedIndex(bs []byte) int

LastSharedIndex computes the last prefix shared indexed

func (Bytes) Len

func (b Bytes) Len() int

func (Bytes) Less

func (b Bytes) Less(i, j int) bool

func (Bytes) Swap

func (b Bytes) Swap(i, j int)

type Decoder

type Decoder interface {
	Decode(reader io.Reader) error
}

Decoder decoder

type Encoder

type Encoder interface {
	Encode(writer io.Writer) error
}

Encoder encoder

type KeyProvider

type KeyProvider interface {
	Key() interface{}
}

KeyProvider represents entity key provider

type Merger

type Merger[T any] func(previous, next T) (merged T)

Merger represents node value merger

type Node

type Node[T any] struct {
	Type       uint8
	ValueIndex uint32

	Prefix   []byte //24
	Nodes[T]        //24
	// contains filtered or unexported fields
}

Node represents a node

func (*Node[T]) Data added in v0.3.1

func (n *Node[T]) Data() []byte

func (*Node[T]) Decode

func (n *Node[T]) Decode(reader io.Reader) error

Decode decode node

func (*Node[T]) Encode

func (n *Node[T]) Encode(writer io.Writer) error

Encode encode node

func (*Node[T]) Equals added in v0.3.1

func (n *Node[T]) Equals(d *Node[T]) bool

func (*Node[T]) LoadNode added in v0.3.1

func (n *Node[T]) LoadNode(data []byte)

func (*Node[T]) Read added in v0.3.1

func (n *Node[T]) Read(data *[]byte)

func (*Node[T]) Size added in v0.3.1

func (n *Node[T]) Size() int

type Nodes

type Nodes[T any] []Node[T]

Nodes represents node slice

func (Nodes[T]) IndexOf

func (n Nodes[T]) IndexOf(b byte) int

IndexOf returns index of expectMatched byte or -1

func (Nodes[T]) Len

func (n Nodes[T]) Len() int

func (Nodes[T]) Less

func (n Nodes[T]) Less(i, j int) bool

func (Nodes[T]) Swap

func (n Nodes[T]) Swap(i, j int)

type OnMatch

type OnMatch[T any] func(key []byte, value T) bool

OnMatch represents matching input handler, return value instruct trie to continue search

type Trie

type Trie[T any] interface {
	Put(key []byte, value T) error

	Merge(key []byte, value T, merger Merger[T]) error

	Get(key []byte) (T, bool)

	Has(key []byte) bool

	//Walk all tries value nodes.
	Walk(handler Visitor[T])

	//MatchPrefix matches input prefix, ie. input: dev.domain.com, would match with trie keys like: dev, dev.domain
	MatchPrefix(input []byte, handler OnMatch[T]) bool

	//MatchAll matches input with any occurrences of tries keys.
	MatchAll(input []byte, handler OnMatch[T]) bool

	UseType(vType reflect.Type)

	//Decode decodes concurrently trie nodes and values
	Decode(reader io.Reader) error

	//DecodeSequentially decode sequentially trie nodes and values
	DecodeSequentially(reader io.Reader) error

	Encode(writer io.Writer) error

	ValueCount() int

	Write(writer io.Writer) error

	Read(reader io.Reader) error

	Root() *Node[T]
}

Trie represents prefix tree interface

func New

func New[T any]() Trie[T]

New create new prefix trie

type Visitor

type Visitor[T any] func(key []byte, value T) bool

Visitor represents value node visitor handler

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL