embedding

package

v0.0.113 Latest Latest Go to latest Published: Aug 5, 2024 License: MIT Imports: 26 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/hupe1980/golc

Links

Open Source Insights

Documentation ¶

Overview ¶

Package embedding contains the implementation to create vector embeddings from text using different APIs

Index ¶

Variables
type AzureOpenAIOptions
type Bedrock
- func NewBedrock(client BedrockRuntimeClient, modelID string, optFns ...func(o *BedrockOptions)) *Bedrock
- func NewBedrockAmazon(client BedrockRuntimeClient, optFns ...func(o *BedrockAmazonOptions)) *Bedrock
- func NewBedrockCohere(client BedrockRuntimeClient, optFns ...func(o *BedrockCohereOptions)) *Bedrock
- func (e *Bedrock) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)
- func (e *Bedrock) EmbedText(ctx context.Context, text string) ([]float32, error)
type BedrockAmazonOptions
type BedrockCohereOptions
type BedrockInputOutputAdapter
- func NewBedrockInputOutputAdapter(provider string) *BedrockInputOutputAdapter
- func (bioa *BedrockInputOutputAdapter) PrepareInput(text string, modelParams map[string]any) ([]byte, error)
- func (bioa *BedrockInputOutputAdapter) PrepareOutput(response []byte) ([]float32, error)
type BedrockOptions
type BedrockRuntimeClient
type Cohere
- func NewCohere(apiKey string, optFns ...func(o *CohereOptions)) *Cohere
- func NewCohereFromClient(client CohereClient, optFns ...func(o *CohereOptions)) *Cohere
- func (e *Cohere) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)
- func (e *Cohere) EmbedText(ctx context.Context, text string) ([]float32, error)
type CohereClient
type CohereOptions
type Cybertron
- func NewCybertron(optFns ...func(o *CybertronOptions)) (*Cybertron, error)
- func NewCybertronFromEncoder(encoder textencoding.Interface, optFns ...func(o *CybertronFromEncoderOptions)) (*Cybertron, error)
- func (e *Cybertron) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)
- func (e *Cybertron) EmbedText(ctx context.Context, text string) ([]float32, error)
type CybertronFromEncoderOptions
type CybertronOptions
type Ernie
- func NewErnie(clientID, clientSecret string, optFns ...func(o *ErnieOptions)) *Ernie
- func NewErnieFromClient(client ErnieClient, optFns ...func(o *ErnieOptions)) *Ernie
- func (e *Ernie) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)
- func (e *Ernie) EmbedText(ctx context.Context, text string) ([]float32, error)
type ErnieClient
type ErnieOptions
type Fake
- func NewFake(size int) *Fake
- func (e *Fake) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)
- func (e *Fake) EmbedText(ctx context.Context, text string) ([]float32, error)
type GoogleGenAI
- func NewGoogleGenAI(client GoogleGenAIClient, optFns ...func(o *GoogleGenAIOptions)) *GoogleGenAI
- func (e *GoogleGenAI) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)
- func (e *GoogleGenAI) EmbedText(ctx context.Context, text string) ([]float32, error)
type GoogleGenAIClient
type GoogleGenAIOptions
type HuggingFaceHub
- func NewHuggingFaceHub(token string, optFns ...func(o *HuggingFaceHubOptions)) *HuggingFaceHub
- func NewHuggingFaceHubFromClient(client HuggingFaceHubClient, optFns ...func(o *HuggingFaceHubOptions)) *HuggingFaceHub
- func (e *HuggingFaceHub) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)
- func (e *HuggingFaceHub) EmbedText(ctx context.Context, text string) ([]float32, error)
type HuggingFaceHubClient
type HuggingFaceHubOptions
type Ollama
- func NewOllama(client OllamaClient, optFns ...func(o *OllamaOptions)) *Ollama
- func (e *Ollama) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)
- func (e *Ollama) EmbedText(ctx context.Context, text string) ([]float32, error)
type OllamaClient
type OllamaOptions
type OpenAI
- func NewAzureOpenAI(apiKey, baseURL string, optFns ...func(o *AzureOpenAIOptions)) *OpenAI
- func NewOpenAI(apiKey string, optFns ...func(o *OpenAIOptions)) *OpenAI
- func NewOpenAIFromClient(client OpenAIClient, optFns ...func(o *OpenAIOptions)) *OpenAI
- func (e *OpenAI) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)
- func (e *OpenAI) EmbedText(ctx context.Context, text string) ([]float32, error)
type OpenAIClient
type OpenAIOptions

Constants ¶

This section is empty.

Variables ¶

View Source

var DefaultOpenAIConfig = OpenAIOptions{
	ModelName:              "text-embedding-3-small",
	EmbeddingContextLength: 8191,
	ChunkSize:              1000,
	MaxRetries:             3,
}

Functions ¶

This section is empty.

Types ¶

type AzureOpenAIOptions ¶ added in v0.0.26

type AzureOpenAIOptions struct {
	OpenAIOptions
	APIVersion string
	Deployment string
}

type Bedrock ¶ added in v0.0.73

type Bedrock struct {
	// contains filtered or unexported fields
}

Bedrock is a struct representing the Bedrock model embedding functionality.

func NewBedrock ¶ added in v0.0.73

func NewBedrock(client BedrockRuntimeClient, modelID string, optFns ...func(o *BedrockOptions)) *Bedrock

NewBedrock creates a new instance of Bedrock with the provided BedrockRuntimeClient and optional configuration.

func NewBedrockAmazon ¶ added in v0.0.108

func NewBedrockAmazon(client BedrockRuntimeClient, optFns ...func(o *BedrockAmazonOptions)) *Bedrock

NewBedrockAmazon creates a new instance of Bedrock with the Amazon provider.

func NewBedrockCohere ¶ added in v0.0.108

func NewBedrockCohere(client BedrockRuntimeClient, optFns ...func(o *BedrockCohereOptions)) *Bedrock

NewBedrockCohere creates a new instance of Bedrock with the Cohere provider.

func (*Bedrock) BatchEmbedText ¶ added in v0.0.93

func (e *Bedrock) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*Bedrock) EmbedText ¶ added in v0.0.93

func (e *Bedrock) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type BedrockAmazonOptions ¶ added in v0.0.108

type BedrockAmazonOptions struct {
	// Model id to use.
	ModelID string `map:"model_id,omitempty"`
}

BedrockAmazonOptions is a struct containing options for configuring the Amazon Bedrock model.

type BedrockCohereOptions ¶ added in v0.0.108

type BedrockCohereOptions struct {
	// Model id to use.
	ModelID string `map:"model_id,omitempty"`

	InputType string `map:"input_type"`

	Truncate string `map:"truncate"`
}

BedrockCohereOptions is a struct containing options for configuring the Cohere Bedrock model.

type BedrockInputOutputAdapter ¶ added in v0.0.108

type BedrockInputOutputAdapter struct {
	// contains filtered or unexported fields
}

BedrockInputOutputAdapter is a helper struct for preparing input and handling output for Bedrock model.

func NewBedrockInputOutputAdapter ¶ added in v0.0.108

func NewBedrockInputOutputAdapter(provider string) *BedrockInputOutputAdapter

NewBedrockInputOutputAdpter creates a new instance of BedrockInputOutputAdpter.

func (*BedrockInputOutputAdapter) PrepareInput ¶ added in v0.0.108

func (bioa *BedrockInputOutputAdapter) PrepareInput(text string, modelParams map[string]any) ([]byte, error)

PrepareInput prepares the input for the Bedrock model based on the specified provider.

func (*BedrockInputOutputAdapter) PrepareOutput ¶ added in v0.0.108

func (bioa *BedrockInputOutputAdapter) PrepareOutput(response []byte) ([]float32, error)

PrepareOutput prepares the output for the Bedrock model based on the specified provider.

type BedrockOptions ¶ added in v0.0.73

type BedrockOptions struct {
	MaxConcurrency int

	// Model params to use.
	ModelParams map[string]any `map:"model_params,omitempty"`
}

BedrockOptions contains options for configuring the Bedrock model.

type BedrockRuntimeClient ¶ added in v0.0.73

type BedrockRuntimeClient interface {
	InvokeModel(ctx context.Context, params *bedrockruntime.InvokeModelInput, optFns ...func(*bedrockruntime.Options)) (*bedrockruntime.InvokeModelOutput, error)
}

BedrockRuntimeClient is an interface for the Bedrock model runtime client.

type Cohere ¶ added in v0.0.39

type Cohere struct {
	// contains filtered or unexported fields
}

Cohere is a client for the Cohere API.

func NewCohere ¶ added in v0.0.39

func NewCohere(apiKey string, optFns ...func(o *CohereOptions)) *Cohere

NewCohere creates a new Cohere instance with the provided API key and options. It returns the initialized Cohere instance or an error if initialization fails.

func NewCohereFromClient ¶ added in v0.0.39

func NewCohereFromClient(client CohereClient, optFns ...func(o *CohereOptions)) *Cohere

NewCohereFromClient creates a new Cohere instance from an existing Cohere client and options. It returns the initialized Cohere instance.

func (*Cohere) BatchEmbedText ¶ added in v0.0.93

func (e *Cohere) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*Cohere) EmbedText ¶ added in v0.0.93

func (e *Cohere) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single query and returns its embedding.

type CohereClient ¶ added in v0.0.39

type CohereClient interface {
	Embed(ctx context.Context, request *cohere.EmbedRequest, opts ...core.RequestOption) (*cohere.EmbedResponse, error)
}

CohereClient is an interface for the Cohere client.

type CohereOptions ¶ added in v0.0.39

type CohereOptions struct {
	// Model name to use.
	Model string
	// Truncate embeddings that are too long from start or end ("NONE"|"START"|"END")
	Truncate string
	// MaxRetries represents the maximum number of retries to make when embedding.
	MaxRetries uint `map:"max_retries,omitempty"`
}

CohereOptions contains options for configuring the Cohere instance.

type Cybertron ¶ added in v0.0.103

type Cybertron struct {
	// contains filtered or unexported fields
}

Cybertron represents an embedder powered by Cybertron.

func NewCybertron ¶ added in v0.0.103

func NewCybertron(optFns ...func(o *CybertronOptions)) (*Cybertron, error)

NewCybertron creates a new instance of the Cybertron embedder.

func NewCybertronFromEncoder ¶ added in v0.0.103

func NewCybertronFromEncoder(encoder textencoding.Interface, optFns ...func(o *CybertronFromEncoderOptions)) (*Cybertron, error)

NewCybertronFromEncoder creates a new Cybertron embedder from an existing encoder.

func (*Cybertron) BatchEmbedText ¶ added in v0.0.103

func (e *Cybertron) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*Cybertron) EmbedText ¶ added in v0.0.103

func (e *Cybertron) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type CybertronFromEncoderOptions ¶ added in v0.0.103

type CybertronFromEncoderOptions struct {
	// PoolingStrategy specifies the pooling strategy for embedding calculation.
	PoolingStrategy int
}

CybertronFromEncoderOption represents options for the Cybertron embedder.

type CybertronOptions ¶ added in v0.0.103

type CybertronOptions struct {
	CybertronFromEncoderOptions
	// ModelName is the name of the model (format: <org>/<model>).
	Model string
	// ModelsDir is the directory where the models are stored.
	ModelsDir string
	// HubAccessToken is the access token for the Hugging Face Hub.
	HubAccessToken string
}

CybertronOptions represents options for the Cybertron embedder.

type Ernie ¶ added in v0.0.67

type Ernie struct {
	// contains filtered or unexported fields
}

Ernie represents the text embedding component powered by Ernie.

func NewErnie ¶ added in v0.0.67

func NewErnie(clientID, clientSecret string, optFns ...func(o *ErnieOptions)) *Ernie

NewErnie creates a new instance of the Ernie text embedding component with default options.

func NewErnieFromClient ¶ added in v0.0.67

func NewErnieFromClient(client ErnieClient, optFns ...func(o *ErnieOptions)) *Ernie

NewErnieFromClient creates a new instance of the Ernie text embedding component with a custom ErnieClient and optional configuration.

func (*Ernie) BatchEmbedText ¶ added in v0.0.93

func (e *Ernie) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*Ernie) EmbedText ¶ added in v0.0.93

func (e *Ernie) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type ErnieClient ¶ added in v0.0.67

type ErnieClient interface {
	// CreateEmbedding generates text embeddings using the specified model and request.
	CreateEmbedding(ctx context.Context, model string, request ernie.EmbeddingRequest) (*ernie.EmbeddingResponse, error)
}

ErnieClient is an interface for interacting with the Ernie API for text embedding.

type ErnieOptions ¶ added in v0.0.67

type ErnieOptions struct {
	Model string
}

ErnieOptions represents configuration options for the Ernie text embedding component.

type Fake ¶

type Fake struct {
	Size int
}

func NewFake ¶

func NewFake(size int) *Fake

func (*Fake) BatchEmbedText ¶ added in v0.0.93

func (e *Fake) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*Fake) EmbedText ¶ added in v0.0.93

func (e *Fake) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type GoogleGenAI ¶ added in v0.0.92

type GoogleGenAI struct {
	// contains filtered or unexported fields
}

GoogleGenAI is a client for the GoogleGenAI embedding service.

func NewGoogleGenAI ¶ added in v0.0.92

func NewGoogleGenAI(client GoogleGenAIClient, optFns ...func(o *GoogleGenAIOptions)) *GoogleGenAI

NewGoogleGenAI creates a new instance of the GoogleGenAI client.

func (*GoogleGenAI) BatchEmbedText ¶ added in v0.0.93

func (e *GoogleGenAI) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*GoogleGenAI) EmbedText ¶ added in v0.0.93

func (e *GoogleGenAI) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type GoogleGenAIClient ¶ added in v0.0.92

type GoogleGenAIClient interface {
	EmbedContent(context.Context, *generativelanguagepb.EmbedContentRequest, ...gax.CallOption) (*generativelanguagepb.EmbedContentResponse, error)
	BatchEmbedContents(context.Context, *generativelanguagepb.BatchEmbedContentsRequest, ...gax.CallOption) (*generativelanguagepb.BatchEmbedContentsResponse, error)
}

GoogleGenAIClient is an interface for the GoogleGenAI client.

type GoogleGenAIOptions ¶ added in v0.0.92

type GoogleGenAIOptions struct {
	ModelName string
}

GoogleGenAIOptions contains options for configuring the GoogleGenAI client.

type HuggingFaceHub ¶ added in v0.0.66

type HuggingFaceHub struct {
	// contains filtered or unexported fields
}

HuggingFaceHub represents an embedder for Hugging Face Hub models.

func NewHuggingFaceHub ¶ added in v0.0.66

func NewHuggingFaceHub(token string, optFns ...func(o *HuggingFaceHubOptions)) *HuggingFaceHub

NewHuggingFaceHub creates a new instance of the HuggingFaceHub embedder.

func NewHuggingFaceHubFromClient ¶ added in v0.0.66

func NewHuggingFaceHubFromClient(client HuggingFaceHubClient, optFns ...func(o *HuggingFaceHubOptions)) *HuggingFaceHub

NewHuggingFaceHubFromClient creates a new instance of the HuggingFaceHub embedder from a custom client.

func (*HuggingFaceHub) BatchEmbedText ¶ added in v0.0.93

func (e *HuggingFaceHub) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*HuggingFaceHub) EmbedText ¶ added in v0.0.93

func (e *HuggingFaceHub) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type HuggingFaceHubClient ¶ added in v0.0.66

type HuggingFaceHubClient interface {
	// FeatureExtractionWithAutomaticReduction performs feature extraction with automatic reduction.
	// It returns the extraction response or an error if the operation fails.
	FeatureExtractionWithAutomaticReduction(ctx context.Context, req *huggingface.FeatureExtractionRequest) (huggingface.FeatureExtractionWithAutomaticReductionResponse, error)
}

HuggingFaceHubClient represents a client for interacting with Hugging Face Hub.

type HuggingFaceHubOptions ¶ added in v0.0.66

type HuggingFaceHubOptions struct {
	// Model to use for embedding.
	Model string
	// Options represents optional settings for the feature extraction.
	Options huggingface.Options
}

type Ollama ¶ added in v0.0.96

type Ollama struct {
	// contains filtered or unexported fields
}

Ollama is a struct representing the Ollama embedding model.

func NewOllama ¶ added in v0.0.96

func NewOllama(client OllamaClient, optFns ...func(o *OllamaOptions)) *Ollama

NewOllama creates a new instance of the Ollama embedding model.

func (*Ollama) BatchEmbedText ¶ added in v0.0.96

func (e *Ollama) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*Ollama) EmbedText ¶ added in v0.0.96

func (e *Ollama) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type OllamaClient ¶ added in v0.0.96

type OllamaClient interface {
	CreateEmbedding(ctx context.Context, req *ollama.EmbeddingRequest) (*ollama.EmbeddingResponse, error)
}

OllamaClient is an interface for interacting with the Ollama model's embedding functionality.

type OllamaOptions ¶ added in v0.0.96

type OllamaOptions struct {
	MaxConcurrency int
	// ModelName is the name of the Gemini model to use.
	ModelName string `map:"model_name,omitempty"`
}

OllamaOptions contains options for configuring the Ollama model.

type OpenAI ¶ added in v0.0.6

type OpenAI struct {
	// contains filtered or unexported fields
}

func NewAzureOpenAI ¶ added in v0.0.26

func NewAzureOpenAI(apiKey, baseURL string, optFns ...func(o *AzureOpenAIOptions)) *OpenAI

func NewOpenAI ¶ added in v0.0.6

func NewOpenAI(apiKey string, optFns ...func(o *OpenAIOptions)) *OpenAI

func NewOpenAIFromClient ¶ added in v0.0.38

func NewOpenAIFromClient(client OpenAIClient, optFns ...func(o *OpenAIOptions)) *OpenAI

func (*OpenAI) BatchEmbedText ¶ added in v0.0.93

func (e *OpenAI) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*OpenAI) EmbedText ¶ added in v0.0.93

func (e *OpenAI) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type OpenAIClient ¶ added in v0.0.68

type OpenAIClient interface {
	CreateEmbeddings(ctx context.Context, conv openai.EmbeddingRequestConverter) (res openai.EmbeddingResponse, err error)
}

type OpenAIOptions ¶ added in v0.0.6

type OpenAIOptions struct {
	// Model name to use.
	ModelName              string
	EmbeddingContextLength int
	// Maximum number of texts to embed in each batch
	ChunkSize int
	// BaseURL is the base URL of the OpenAI service.
	BaseURL string
	// OrgID is the organization ID for accessing the OpenAI service.
	OrgID string
	// MaxRetries represents the maximum number of retries to make when embedding.
	MaxRetries uint `map:"max_retries,omitempty"`
}

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL