Documentation
¶
Overview ¶
Package intern implements fast, immutable string interning.
The package is a cgo binding for libintern:
https://github.com/chriso/intern
Interning is a way of storing distinct strings only once in memory:
https://en.wikipedia.org/wiki/String_interning
Each string is assigned an ID of type uint32. IDs start at 1 and increment towards 2^32-1:
repository := intern.NewRepository() id := repository.intern("foo") fmt.Println(id) // => 1 id := repository.intern("bar") fmt.Println(id) // => 2 id := repository.intern("foo") fmt.Println(id) // => 1 id := repository.intern("qux") fmt.Println(id) // => 3
Two-way lookup is provided:
if id, ok := repository.Lookup("foo"); ok { fmt.Printf("string 'foo' has ID: %v", id) } if str, ok := repository.LookupID(1); ok { fmt.Printf("string with ID 1: %v", str) }
The package also provides a way to iterate unique strings in order of ID, optimize string repositories using frequency analysis, and restore string repositories to a previous snapshot.
This package is *NOT* safe to use from multiple goroutines without locking, e.g. https://golang.org/pkg/sync/#Mutex
Index ¶
- Variables
- type Cursor
- type Frequency
- type Repository
- func (repo *Repository) AllocatedBytes() uint64
- func (repo *Repository) Count() uint32
- func (repo *Repository) Cursor() *Cursor
- func (repo *Repository) Intern(str string) uint32
- func (repo *Repository) Lookup(str string) (uint32, bool)
- func (repo *Repository) LookupID(id uint32) (string, bool)
- func (repo *Repository) Optimize(freq *Frequency) *Repository
- func (repo *Repository) PageSize() uint64
- func (repo *Repository) Restore(snapshot *Snapshot) error
- func (repo *Repository) Snapshot() *Snapshot
- type Snapshot
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var ErrInvalidSnapshot = fmt.Errorf("invalid snapshot")
ErrInvalidSnapshot is returned by Repository.Restore when the repository and snapshot are incompatible
Functions ¶
This section is empty.
Types ¶
type Cursor ¶
type Cursor struct {
// contains filtered or unexported fields
}
Cursor is used to iterate strings in a repository
type Frequency ¶
type Frequency struct {
// contains filtered or unexported fields
}
Frequency is used to track string frequencies
func NewFrequency ¶
func NewFrequency() *Frequency
NewFrequency creates a new string frequency tracker
func (*Frequency) Add ¶
Add adds a string ID. This should be called after interning a string and getting back the ID
func (*Frequency) AddAll ¶
func (freq *Frequency) AddAll(repo *Repository)
AddAll adds all string IDs, to ensure that each string is present in the optimized repository
type Repository ¶
type Repository struct {
// contains filtered or unexported fields
}
Repository stores a collection of unique strings
func (*Repository) AllocatedBytes ¶
func (repo *Repository) AllocatedBytes() uint64
AllocatedBytes returns the total number of bytes allocated by the string repository
func (*Repository) Count ¶
func (repo *Repository) Count() uint32
Count returns the total number of unique strings in the repository
Example ¶
repo := intern.NewRepository() fmt.Printf("Initial count is %d\n", repo.Count()) strings := []string{"foo", "bar", "qux", "qux", "qux", "foo"} for _, str := range strings { repo.Intern(str) } fmt.Printf("There are now %d unique strings\n", repo.Count())
Output: Initial count is 0 There are now 3 unique strings
func (*Repository) Cursor ¶
func (repo *Repository) Cursor() *Cursor
Cursor creates a new cursor for iterating strings
Example ¶
repo := intern.NewRepository() strings := []string{"foo", "bar", "baz"} for _, str := range strings { repo.Intern(str) } cursor := repo.Cursor() for cursor.Next() { fmt.Printf("String %#v has id %d\n", cursor.String(), cursor.ID()) }
Output: String "foo" has id 1 String "bar" has id 2 String "baz" has id 3
func (*Repository) Intern ¶
func (repo *Repository) Intern(str string) uint32
Intern interns a string and returns its unique ID. Note that IDs increment from 1. This function will panic if the string does not fit in one page - len(string) < repo.PageSize() - or if the uint32 IDs overflow. It is the caller's responsibility to check that these constraints are met
Example ¶
repo := intern.NewRepository() fmt.Println(repo.Intern("foo")) fmt.Println(repo.Intern("bar")) fmt.Println(repo.Intern("baz")) fmt.Println(repo.Intern("foo"))
Output: 1 2 3 1
func (*Repository) Lookup ¶
func (repo *Repository) Lookup(str string) (uint32, bool)
Lookup returns the ID associated with a string, or false if the ID does not exist in the repository
Example ¶
repo := intern.NewRepository() repo.Intern("foo") for _, str := range []string{"foo", "bar"} { if id, ok := repo.Lookup(str); ok { fmt.Printf("Found string %#v with id %d\n", str, id) } else { fmt.Printf("Did not find string %#v\n", str) } }
Output: Found string "foo" with id 1 Did not find string "bar"
func (*Repository) LookupID ¶
func (repo *Repository) LookupID(id uint32) (string, bool)
LookupID returns the string associated with an ID, or false if the string does not exist in the repository
Example ¶
repo := intern.NewRepository() repo.Intern("foo") for _, id := range []uint32{1, 2} { if str, ok := repo.LookupID(id); ok { fmt.Printf("Found string %#v with id %d\n", str, id) } else { fmt.Printf("Did not find id %d\n", id) } }
Output: Found string "foo" with id 1 Did not find id 2
func (*Repository) Optimize ¶
func (repo *Repository) Optimize(freq *Frequency) *Repository
Optimize creates a new, optimized string repository which stores the most frequently seen strings together. The string with the lowest ID (1) is the most frequently seen string
Example ¶
repo := intern.NewRepository() frequencies := intern.NewFrequency() strings := []string{"foo", "bar", "qux", "qux", "qux", "foo"} for _, str := range strings { id := repo.Intern(str) frequencies.Add(id) } optimized := repo.Optimize(frequencies) cursor := optimized.Cursor() for cursor.Next() { fmt.Printf("String %#v has id %d\n", cursor.String(), cursor.ID()) }
Output: String "qux" has id 1 String "foo" has id 2 String "bar" has id 3
func (*Repository) PageSize ¶
func (repo *Repository) PageSize() uint64
PageSize returns the compile-time page size setting
func (*Repository) Restore ¶
func (repo *Repository) Restore(snapshot *Snapshot) error
Restore restores the string repository to a previous snapshot
Example ¶
repo := intern.NewRepository() repo.Intern("foo") snapshot := repo.Snapshot() repo.Intern("bar") repo.Intern("qux") repo.Restore(snapshot) repo.Intern("xyz") cursor := repo.Cursor() for cursor.Next() { fmt.Printf("String %#v has id %d\n", cursor.String(), cursor.ID()) }
Output: String "foo" has id 1 String "xyz" has id 2
func (*Repository) Snapshot ¶
func (repo *Repository) Snapshot() *Snapshot
Snapshot creates a new snapshot of the repository. It can later be restored to this position