Documentation
¶
Index ¶
- Constants
- Variables
- func DatabaseSchemes() []string
- func DeriveModelDimensions(ctx context.Context, model string, opts ...options.Option) (int, error)
- func InflateDuckDBRecord(ctx context.Context, rows any) (*embeddingsdb.Record, error)
- func ListRecords(ctx context.Context, db Database, list_opts *ListRecordsOptions, ...) iter.Seq2[*embeddingsdb.Record, error]
- func RegisterDatabase(ctx context.Context, scheme string, init_func DatabaseInitializationFunc) error
- func SetupDuckDBDatabase(ctx context.Context, db *sql.DB, opts *SetupDuckDBDatabaseOptions) error
- type Database
- type DatabaseInitializationFunc
- type DuckDBDatabase
- func (db *DuckDBDatabase) AddBatchedRecord(ctx context.Context, opts ...options.Option) error
- func (db *DuckDBDatabase) AddRecord(ctx context.Context, rec *embeddingsdb.Record, opts ...options.Option) (bool, error)
- func (db *DuckDBDatabase) BatchedRecordsCount(ctx context.Context, opts ...options.Option) (int, error)
- func (db *DuckDBDatabase) Close(ctx context.Context) error
- func (db *DuckDBDatabase) Dimensions(ctx context.Context, opts ...options.Option) ([]int, error)
- func (db *DuckDBDatabase) Export(ctx context.Context, uri string, opts ...options.Option) error
- func (db *DuckDBDatabase) GetRecord(ctx context.Context, req *embeddingsdb.GetRecordRequest, ...) (*embeddingsdb.Record, error)
- func (db *DuckDBDatabase) IterateRecords(ctx context.Context, opts ...options.Option) iter.Seq2[*embeddingsdb.Record, error]
- func (db *DuckDBDatabase) LastUpdate(ctx context.Context, opts ...options.Option) (int64, error)
- func (db *DuckDBDatabase) ListRecords(ctx context.Context, pg_opts pagination.Options, opts ...options.Option) ([]*embeddingsdb.Record, pagination.Results, error)
- func (db *DuckDBDatabase) Models(ctx context.Context, opts ...options.Option) ([]string, error)
- func (db *DuckDBDatabase) PaginationType(ctx context.Context, opts ...options.Option) (PaginationType, error)
- func (db *DuckDBDatabase) Providers(ctx context.Context, opts ...options.Option) ([]string, error)
- func (db *DuckDBDatabase) RemoveRecord(ctx context.Context, req *embeddingsdb.RemoveRecordRequest, ...) error
- func (db *DuckDBDatabase) SimilarRecords(ctx context.Context, req *embeddingsdb.SimilarRecordsRequest, ...) ([]*embeddingsdb.SimilarRecord, error)
- func (db *DuckDBDatabase) URI() string
- type ListRecordsOptions
- type NullDatabase
- func (db *NullDatabase) AddBatchedRecord(ctx context.Context, opts ...options.Option) error
- func (db *NullDatabase) AddRecord(ctx context.Context, rec *embeddingsdb.Record, opts ...options.Option) (bool, error)
- func (db *NullDatabase) BatchedRecordsCount(ctx context.Context, opts ...options.Option) (int, error)
- func (db *NullDatabase) Close(ctx context.Context) error
- func (db *NullDatabase) Dimensions(ctx context.Context, opts ...options.Option) ([]int, error)
- func (db *NullDatabase) Export(ctx context.Context, uri string, opts ...options.Option) error
- func (db *NullDatabase) GetRecord(ctx context.Context, req *embeddingsdb.GetRecordRequest, ...) (*embeddingsdb.Record, error)
- func (db *NullDatabase) IterateRecords(ctx context.Context, opts ...options.Option) iter.Seq2[*embeddingsdb.Record, error]
- func (db *NullDatabase) LastUpdate(ctx context.Context, opts ...options.Option) (int64, error)
- func (db *NullDatabase) ListRecords(ctx context.Context, pg_opts pagination.Options, opts ...options.Option) ([]*embeddingsdb.Record, pagination.Results, error)
- func (db *NullDatabase) Models(ctx context.Context, opts ...options.Option) ([]string, error)
- func (db *NullDatabase) PaginationType(ctx context.Context, opts ...options.Option) (PaginationType, error)
- func (db *NullDatabase) Providers(ctx context.Context, opts ...options.Option) ([]string, error)
- func (db *NullDatabase) RemoveRecord(ctx context.Context, req *embeddingsdb.RemoveRecordRequest, ...) error
- func (db *NullDatabase) SimilarRecords(ctx context.Context, rec *embeddingsdb.SimilarRecordsRequest, ...) ([]*embeddingsdb.SimilarRecord, error)
- func (db *NullDatabase) URI() string
- type PaginationType
- type S3VectorsDatabase
- func (db *S3VectorsDatabase) AddBatchedRecord(ctx context.Context, opts ...options.Option) error
- func (db *S3VectorsDatabase) AddRecord(ctx context.Context, rec *embeddingsdb.Record, opts ...options.Option) (bool, error)
- func (db *S3VectorsDatabase) BatchedRecordsCount(ctx context.Context, opts ...options.Option) (int, error)
- func (db *S3VectorsDatabase) Close(ctx context.Context) error
- func (db *S3VectorsDatabase) Dimensions(ctx context.Context, opts ...options.Option) ([]int, error)
- func (db *S3VectorsDatabase) Export(ctx context.Context, uri string, opts ...options.Option) error
- func (db *S3VectorsDatabase) GetRecord(ctx context.Context, req *embeddingsdb.GetRecordRequest, ...) (*embeddingsdb.Record, error)
- func (db *S3VectorsDatabase) IterateRecords(ctx context.Context, opts ...options.Option) iter.Seq2[*embeddingsdb.Record, error]
- func (db *S3VectorsDatabase) LastUpdate(ctx context.Context, opts ...options.Option) (int64, error)
- func (db *S3VectorsDatabase) ListRecords(ctx context.Context, pg_opts pagination.Options, opts ...options.Option) ([]*embeddingsdb.Record, pagination.Results, error)
- func (db *S3VectorsDatabase) Models(ctx context.Context, opts ...options.Option) ([]string, error)
- func (db *S3VectorsDatabase) PaginationType(ctx context.Context, opts ...options.Option) (PaginationType, error)
- func (db *S3VectorsDatabase) Providers(ctx context.Context, opts ...options.Option) ([]string, error)
- func (db *S3VectorsDatabase) RemoveRecord(ctx context.Context, req *embeddingsdb.RemoveRecordRequest, ...) error
- func (db *S3VectorsDatabase) SimilarRecords(ctx context.Context, req *embeddingsdb.SimilarRecordsRequest, ...) ([]*embeddingsdb.SimilarRecord, error)
- type SetupDuckDBDatabaseOptions
Constants ¶
const CountablePaginationTypeLabel string = "countable"
const CursorPaginationTypeLabel string = "cursor"
const DuckDBDatabaseScheme string = "duckdb"
const NullDatabaseScheme string = "null"
const NullPaginationTypeLabel string = "null"
const S3VectorsDatabaseScheme = "s3vectors"
S3VectorsDatabaseScheme is the URI scheme used to create a database backed by Amazon S3 Vectors.
Variables ¶
var RecordNotFound = errors.New("Record not found")
Functions ¶
func DatabaseSchemes ¶
func DatabaseSchemes() []string
Schemes returns the list of schemes that have been registered.
func DeriveModelDimensions ¶ added in v0.11.0
func InflateDuckDBRecord ¶ added in v0.7.1
func ListRecords ¶ added in v0.13.0
func ListRecords(ctx context.Context, db Database, list_opts *ListRecordsOptions, opts ...options.Option) iter.Seq2[*embeddingsdb.Record, error]
ListRecords returns an [iter.Seq2[*embeddingsdb.Record, error]] iterator for listing all the records in an `embeddingsdb` database. It handles all the pagination requirements derived from 'opts'.
func RegisterDatabase ¶
func RegisterDatabase(ctx context.Context, scheme string, init_func DatabaseInitializationFunc) error
RegisterDatabase registers 'scheme' as a key pointing to 'init_func' in an internal lookup table used to create new `Database` instances by the `NewDatabase` method.
func SetupDuckDBDatabase ¶ added in v0.7.1
Types ¶
type Database ¶
type Database interface {
// Return the URI string used to instantiate the Database instance.
URI() string
// Add adds a [embeddingsdb.Record] instance to the underlying database implementation. Returns true or false if the addition was batched.
AddRecord(context.Context, *embeddingsdb.Record, ...options.Option) (bool, error)
// The number of batched records currently waiting to be added.
BatchedRecordsCount(context.Context, ...options.Option) (int, error)
// Add the pending batched records.
AddBatchedRecords(context.Context, ...options.Option) error
// Return the EmbeddingsDB instance record matching 'provider', 'depiction_id' and 'model'.
GetRecord(context.Context, *embeddingsdb.GetRecordRequest, ...options.Option) (*embeddingsdb.Record, error)
// Remove a record from an EmbeddingsDB instance.
RemoveRecord(context.Context, *embeddingsdb.RemoveRecordRequest, ...options.Option) error
// ListRecords returns a paginated list of records stored in the database.
ListRecords(context.Context, pagination.Options, ...options.Option) ([]*embeddingsdb.Record, pagination.Results, error)
// IterateRecords returns an [iter.Seq2[*embeddingsdb.Record, error]] for each record stored in the database.
IterateRecords(context.Context, ...options.Option) iter.Seq2[*embeddingsdb.Record, error]
// Find similar records for a given model and record instance.
SimilarRecords(context.Context, *embeddingsdb.SimilarRecordsRequest, ...options.Option) ([]*embeddingsdb.SimilarRecord, error)
// Export the contents of the database. Where and how a database is exported are left as details for specific implementations.
Export(context.Context, string, ...options.Option) error
// Return the Unix timestamp of the last update to the Database instance.
LastUpdate(context.Context, ...options.Option) (int64, error)
// Return the list of dimensions supported by this Database implementation.
Dimensions(context.Context, ...options.Option) ([]int, error)
// Return the unique list of models, for zero (all) or more providers, across all the embeddings.
Models(context.Context, ...options.Option) ([]string, error)
// Return the unique list of providers across all the embeddings.
Providers(context.Context, ...options.Option) ([]string, error)
// Return the pagination type used by the database implementation.
PaginationType(context.Context, ...options.Option) (PaginationType, error)
// Close performs and terminating functions required by the database.
Close(context.Context) error
}
Database defines an interface for adding and querying vector embeddings of embeddingsdb.Record records.
func NewDatabase ¶
NewDatabase returns a new `Database` instance configured by 'uri'. The value of 'uri' is parsed as a `url.URL` and its scheme is used as the key for a corresponding `DatabaseInitializationFunc` function used to instantiate the new `Database`. It is assumed that the scheme (and initialization function) have been registered by the `RegisterDatabase` method.
func NewDuckDBDatabase ¶ added in v0.7.1
Create a new DuckDBDatabase instance for managing embeddings using the DuckDB database and VSS extension derived from 'uri' which is expected to take the form of:
duckdb://{PATH}?{QUERY_PARAMETERS}
Valid query parameters are: * `dimensions` – The number of dimensions for the embeddings being stored. Default is 512. * `max-distance` – Update the default maximum distance when querying for similar embeddings. Default is 1.0. * `max-results` – Update the default number of records to return when querying for similar embeddings. Default is 10.
func NewS3VectorsDatabase ¶ added in v0.11.0
Create a new S3VectorsDatabase instance for managing embeddings using the Amazon Web Services S3Vectors serice derived from 'uri' which is expected to take the form of:
s3vectors://{BUCKET_NAME}?{QUERY_PARAMETERS}
Where `{BUCKET_NAME}` is the name of the S3Vectors bucket where embeddings are stored. This will be created dynamically at runtime if it does not already exist. Valid query parameters are:
- `index` - The name of the S3Vectors index where embeddings are stored. This will be created dynamically at runtime if it does not already exist.
- `region` - The AWS region where your S3Vectors bucket is stored.
- `credentials` - A valid `aaronland/go-aws/v3/auth` credentials string.
- `dimensions` – The number of dimensions for the embeddings being stored. Default is 512.
- `max-distance` – Update the default maximum distance when querying for similar embeddings. Default is 1.0.
- `max-results` – Update the default number of records to return when querying for similar embeddings. Default is 10.
- `refresh-tags` - A boolean flag to update denormalized database properties in to index-specific "tags".
- `dynamodb-table` – Use a custom DynamoDB table name for storing and querying record data. Default is "s3vectors".
type DatabaseInitializationFunc ¶
DatabaseInitializationFunc is a function defined by individual database package and used to create an instance of that database
type DuckDBDatabase ¶ added in v0.7.1
type DuckDBDatabase struct {
Database
// contains filtered or unexported fields
}
func (*DuckDBDatabase) AddBatchedRecord ¶ added in v0.8.0
Add adds a embeddingsdb.Record instance to the underlying database implementation. Returns true or false if the addition was batched.
func (*DuckDBDatabase) AddRecord ¶ added in v0.7.1
func (db *DuckDBDatabase) AddRecord(ctx context.Context, rec *embeddingsdb.Record, opts ...options.Option) (bool, error)
Add adds a embeddingsdb.Record instance to the underlying database implementation. Returns true or false if the addition was batched.
func (*DuckDBDatabase) BatchedRecordsCount ¶ added in v0.8.0
func (db *DuckDBDatabase) BatchedRecordsCount(ctx context.Context, opts ...options.Option) (int, error)
The number of batched records currently waiting to be added.
func (*DuckDBDatabase) Close ¶ added in v0.7.1
func (db *DuckDBDatabase) Close(ctx context.Context) error
Close performs and terminating functions required by the database.
func (*DuckDBDatabase) Dimensions ¶ added in v0.11.0
Return the list of dimensions supported by this Database implementation.
func (*DuckDBDatabase) Export ¶ added in v0.7.1
Export the contents of the database. This method will export the DuckDB database to 'uri'.
func (*DuckDBDatabase) GetRecord ¶ added in v0.7.1
func (db *DuckDBDatabase) GetRecord(ctx context.Context, req *embeddingsdb.GetRecordRequest, opts ...options.Option) (*embeddingsdb.Record, error)
Return the EmbeddingsDB instance record matching 'provider', 'depiction_id' and 'model'.
func (*DuckDBDatabase) IterateRecords ¶ added in v0.7.1
func (db *DuckDBDatabase) IterateRecords(ctx context.Context, opts ...options.Option) iter.Seq2[*embeddingsdb.Record, error]
IterateRecords returns an [iter.Seq2[*embeddingsdb.Record, error]] for each record stored in the database.
func (*DuckDBDatabase) LastUpdate ¶ added in v0.7.1
Return the Unix timestamp of the last update to the Database instance.
func (*DuckDBDatabase) ListRecords ¶ added in v0.7.1
func (db *DuckDBDatabase) ListRecords(ctx context.Context, pg_opts pagination.Options, opts ...options.Option) ([]*embeddingsdb.Record, pagination.Results, error)
ListRecords returns a paginated list of records stored in the database.
func (*DuckDBDatabase) Models ¶ added in v0.7.1
Return the unique list of models, for zero (all) or more providers, across all the embeddings.
func (*DuckDBDatabase) PaginationType ¶ added in v0.11.0
func (db *DuckDBDatabase) PaginationType(ctx context.Context, opts ...options.Option) (PaginationType, error)
Return the pagination type used by the database.
func (*DuckDBDatabase) Providers ¶ added in v0.7.1
Return the unique list of providers across all the embeddings.
func (*DuckDBDatabase) RemoveRecord ¶ added in v0.7.1
func (db *DuckDBDatabase) RemoveRecord(ctx context.Context, req *embeddingsdb.RemoveRecordRequest, opts ...options.Option) error
Remove a record from an EmbeddingsDB instance.
func (*DuckDBDatabase) SimilarRecords ¶ added in v0.7.1
func (db *DuckDBDatabase) SimilarRecords(ctx context.Context, req *embeddingsdb.SimilarRecordsRequest, opts ...options.Option) ([]*embeddingsdb.SimilarRecord, error)
Find similar records for a given model and record instance.
func (*DuckDBDatabase) URI ¶ added in v0.7.1
func (db *DuckDBDatabase) URI() string
Return the URI string used to instantiate the Database instance.
type ListRecordsOptions ¶ added in v0.13.0
type ListRecordsOptions struct {
// The number of records to return in each set of paginated results.
PerPage int64
// The initial page number to return paginated results for.
StartPage int64
// The maximum page number to return paginated results for. If -1 then this flag is ignored.
EndPage int64
}
ListRecordOptions defines configuration options for calling the `ListRecords` method.
func DefaultListRecordsOptions ¶ added in v0.13.0
func DefaultListRecordsOptions() *ListRecordsOptions
DefaultListRecordsOptions returns a ListRecordsOptions with default values for returning all the records in an `embeddings` database in paginated sets of 1000 records.
type NullDatabase ¶
type NullDatabase struct {
Database
}
func (*NullDatabase) AddBatchedRecord ¶ added in v0.8.0
Add the pending batched records.
func (*NullDatabase) AddRecord ¶
func (db *NullDatabase) AddRecord(ctx context.Context, rec *embeddingsdb.Record, opts ...options.Option) (bool, error)
Add adds a embeddingsdb.Record instance to the underlying database implementation. Returns true or false if the addition was batched.
func (*NullDatabase) BatchedRecordsCount ¶ added in v0.8.0
func (db *NullDatabase) BatchedRecordsCount(ctx context.Context, opts ...options.Option) (int, error)
The number of batched records currently waiting to be added.
func (*NullDatabase) Close ¶
func (db *NullDatabase) Close(ctx context.Context) error
Close performs and terminating functions required by the database.
func (*NullDatabase) Dimensions ¶ added in v0.11.0
Return the list of dimensions supported by this Database implementation.
func (*NullDatabase) Export ¶
Export the contents of the database. Where and how a database is exported are left as details for specific implementations.
func (*NullDatabase) GetRecord ¶
func (db *NullDatabase) GetRecord(ctx context.Context, req *embeddingsdb.GetRecordRequest, opts ...options.Option) (*embeddingsdb.Record, error)
Return the EmbeddingsDB instance record matching 'provider', 'depiction_id' and 'model'.
func (*NullDatabase) IterateRecords ¶ added in v0.3.0
func (db *NullDatabase) IterateRecords(ctx context.Context, opts ...options.Option) iter.Seq2[*embeddingsdb.Record, error]
IterateRecords returns an [iter.Seq2[*embeddingsdb.Record, error]] for each record stored in the database.
func (*NullDatabase) LastUpdate ¶
Return the Unix timestamp of the last update to the Database instance.
func (*NullDatabase) ListRecords ¶ added in v0.5.0
func (db *NullDatabase) ListRecords(ctx context.Context, pg_opts pagination.Options, opts ...options.Option) ([]*embeddingsdb.Record, pagination.Results, error)
ListRecords returns a paginated list of records stored in the database.
func (*NullDatabase) Models ¶ added in v0.1.0
Return the unique list of models, for zero (all) or more providers, across all the embeddings.
func (*NullDatabase) PaginationType ¶ added in v0.11.0
func (db *NullDatabase) PaginationType(ctx context.Context, opts ...options.Option) (PaginationType, error)
Return the pagination type used by the database.
func (*NullDatabase) Providers ¶ added in v0.1.0
Return the unique list of providers across all the embeddings.
func (*NullDatabase) RemoveRecord ¶ added in v0.6.0
func (db *NullDatabase) RemoveRecord(ctx context.Context, req *embeddingsdb.RemoveRecordRequest, opts ...options.Option) error
Remove a record from an EmbeddingsDB instance.
func (*NullDatabase) SimilarRecords ¶
func (db *NullDatabase) SimilarRecords(ctx context.Context, rec *embeddingsdb.SimilarRecordsRequest, opts ...options.Option) ([]*embeddingsdb.SimilarRecord, error)
Find similar records for a given model and record instance.
func (*NullDatabase) URI ¶
func (db *NullDatabase) URI() string
Return the URI string used to instantiate the Database instance.
type PaginationType ¶ added in v0.11.0
type PaginationType uint8
const ( NullPaginationType PaginationType = iota CountablePaginationType CursorPaginationType )
func NewPaginationType ¶ added in v0.11.0
func NewPaginationType(label string) (PaginationType, error)
func (PaginationType) String ¶ added in v0.11.0
func (p PaginationType) String() string
type S3VectorsDatabase ¶ added in v0.11.0
type S3VectorsDatabase struct {
Database
// contains filtered or unexported fields
}
S3VectorsDatabase is a concrete implementation of the embeddingsdb.Database interface that stores embeddings in an S3 Vectors bucket and index. It optionally maintains a DynamoDB table for fast listing by provider or model.
func (*S3VectorsDatabase) AddBatchedRecord ¶ added in v0.11.0
Add the pending batched records.
func (*S3VectorsDatabase) AddRecord ¶ added in v0.11.0
func (db *S3VectorsDatabase) AddRecord(ctx context.Context, rec *embeddingsdb.Record, opts ...options.Option) (bool, error)
Add adds a embeddingsdb.Record instance to the underlying database implementation. Returns true or false if the addition was batched.
func (*S3VectorsDatabase) BatchedRecordsCount ¶ added in v0.11.0
func (db *S3VectorsDatabase) BatchedRecordsCount(ctx context.Context, opts ...options.Option) (int, error)
The number of batched records currently waiting to be added.
func (*S3VectorsDatabase) Close ¶ added in v0.11.0
func (db *S3VectorsDatabase) Close(ctx context.Context) error
Close performs and terminating functions required by the database.
func (*S3VectorsDatabase) Dimensions ¶ added in v0.11.0
Return the list of dimensions supported by this Database implementation.
func (*S3VectorsDatabase) Export ¶ added in v0.11.0
Export the contents of the database. Where and how a database is exported are left as details for specific implementations.
func (*S3VectorsDatabase) GetRecord ¶ added in v0.11.0
func (db *S3VectorsDatabase) GetRecord(ctx context.Context, req *embeddingsdb.GetRecordRequest, opts ...options.Option) (*embeddingsdb.Record, error)
GetRecord retrieves a single Record from S3 Vectors using the key composed from provider, model and depiction_id. If the record is not found, RecordNotFound is returned.
func (*S3VectorsDatabase) IterateRecords ¶ added in v0.11.0
func (db *S3VectorsDatabase) IterateRecords(ctx context.Context, opts ...options.Option) iter.Seq2[*embeddingsdb.Record, error]
IterateRecords returns a [iter.Seq2[*embeddingsdb.Record, error]] that yields every record stored in the database. The sequence is lazy and will continue until the context is cancelled or an error occurs.
func (*S3VectorsDatabase) LastUpdate ¶ added in v0.11.0
Return the Unix timestamp of the last update to the Database instance. As of this writing this always returns 0 because the cost of constantly crawling the index and the mechanics of denormalizing this data and then keeping in sync are too high.
func (*S3VectorsDatabase) ListRecords ¶ added in v0.11.0
func (db *S3VectorsDatabase) ListRecords(ctx context.Context, pg_opts pagination.Options, opts ...options.Option) ([]*embeddingsdb.Record, pagination.Results, error)
ListRecords returns a paginated list of all records in the database. When a DynamoDB client is configured the method falls back to using it for filtering by provider or model. The returned Results object contains the pagination cursors.
func (*S3VectorsDatabase) Models ¶ added in v0.11.0
Return the unique list of models, for zero (all) or more providers, across all the embeddings.
func (*S3VectorsDatabase) PaginationType ¶ added in v0.11.0
func (db *S3VectorsDatabase) PaginationType(ctx context.Context, opts ...options.Option) (PaginationType, error)
Return the pagination type used by the database.
func (*S3VectorsDatabase) Providers ¶ added in v0.11.0
func (db *S3VectorsDatabase) Providers(ctx context.Context, opts ...options.Option) ([]string, error)
Return the unique list of providers across all the embeddings.
func (*S3VectorsDatabase) RemoveRecord ¶ added in v0.11.0
func (db *S3VectorsDatabase) RemoveRecord(ctx context.Context, req *embeddingsdb.RemoveRecordRequest, opts ...options.Option) error
RemoveRecord deletes the record identified by req from the S3 Vectors index and, if configured, from the DynamoDB table. Errors from either store are returned.
func (*S3VectorsDatabase) SimilarRecords ¶ added in v0.11.0
func (db *S3VectorsDatabase) SimilarRecords(ctx context.Context, req *embeddingsdb.SimilarRecordsRequest, opts ...options.Option) ([]*embeddingsdb.SimilarRecord, error)
SimilarRecords searches for embeddings similar to those in req. The result slice contains the matching records together with their similarity distance. The search can be restricted by provider, model, distance and a list of depiction IDs to exclude via the supplied options.