Documentation
¶
Overview ¶
Package model defines the interfaces for different AI model types.
Index ¶
- func CosineSimilarity(a, b []float64) float64
- func DotProduct(a, b []float64) float64
- func EuclideanDistance(a, b []float64) float64
- type EmbedCallOptions
- type EmbedResult
- type Embedding
- type EmbeddingModel
- type EmbeddingResponse
- type EmbeddingUsage
- type GeneratedImage
- type ImageCallOptions
- type ImageModel
- type ImageResponse
- type ImageResult
- type ImageUsage
- type LanguageModel
- type LanguageModelInfo
- type RankedDocument
- type RerankCallOptions
- type RerankResponse
- type RerankResult
- type RerankUsage
- type RerankingModel
- type SpeechCallOptions
- type SpeechModel
- type SpeechResponse
- type SpeechResult
- type SpeechUsage
- type TranscribeCallOptions
- type TranscriptionModel
- type TranscriptionResponse
- type TranscriptionResult
- type TranscriptionSegment
- type TranscriptionUsage
- type TranscriptionWord
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CosineSimilarity ¶
CosineSimilarity calculates the cosine similarity between two embeddings.
func DotProduct ¶
DotProduct calculates the dot product between two embeddings.
func EuclideanDistance ¶
EuclideanDistance calculates the Euclidean distance between two embeddings.
Types ¶
type EmbedCallOptions ¶
type EmbedCallOptions struct {
// Values is the list of texts to embed.
Values []string
// Dimensions is the desired embedding dimensions (if model supports it).
Dimensions *int
// ProviderOptions are provider-specific options.
ProviderOptions map[string]any
// Headers are additional HTTP headers.
Headers map[string]string
}
EmbedCallOptions contains the options for embedding generation.
type EmbedResult ¶
type EmbedResult struct {
// Embeddings contains the generated embeddings.
Embeddings []Embedding
// Usage contains usage information.
Usage EmbeddingUsage
// Warnings contains any warnings from the embedding process.
// Mirrors ai-sdk's EmbeddingModelV3 `warnings: SharedV3Warning[]`.
Warnings []stream.Warning
// Response contains provider-specific response data.
Response EmbeddingResponse
}
EmbedResult contains the result of an embedding call.
type Embedding ¶
type Embedding struct {
// Values is the embedding vector.
Values []float64
// Index is the index of the input text this embedding corresponds to.
Index int
}
Embedding represents a single embedding.
type EmbeddingModel ¶
type EmbeddingModel interface {
// ID returns the model identifier.
ID() string
// Provider returns the provider identifier.
Provider() string
// MaxEmbeddingsPerCall returns the maximum number of texts that can be embedded in a single call.
MaxEmbeddingsPerCall() int
// Dimensions returns the embedding dimensions (0 if variable or unknown).
Dimensions() int
// Embed generates embeddings for the provided texts.
Embed(ctx context.Context, opts EmbedCallOptions) (*EmbedResult, error)
}
EmbeddingModel is the interface for text embedding models.
type EmbeddingResponse ¶
type EmbeddingResponse struct {
// ID is the response identifier.
ID string
// Model is the model used for generation.
Model string
// Headers contains response headers.
Headers map[string]string
}
EmbeddingResponse contains provider-specific response metadata.
type EmbeddingUsage ¶
type EmbeddingUsage struct {
// Tokens is the total number of tokens used.
Tokens int
}
EmbeddingUsage contains usage information for embedding generation.
type GeneratedImage ¶
type GeneratedImage struct {
// Base64 is the base64-encoded image data.
Base64 string
// URL is the URL of the generated image (if available).
URL string
// MimeType is the MIME type of the image (e.g., "image/png").
MimeType string
// Seed is the seed used for generation (if available).
Seed *int64
// RevisedPrompt is the revised prompt used for generation (if available).
RevisedPrompt string
}
GeneratedImage represents a single generated image.
type ImageCallOptions ¶
type ImageCallOptions struct {
// Prompt is the text description of the image to generate.
Prompt string
// N is the number of images to generate.
N int
// Size is the size of the generated images (e.g., "1024x1024").
Size string
// AspectRatio is the aspect ratio (e.g., "16:9", "1:1").
AspectRatio string
// Seed for deterministic generation (if supported).
Seed *int64
// ProviderOptions are provider-specific options.
ProviderOptions map[string]any
// Headers are additional HTTP headers.
Headers map[string]string
// Files is an optional list of input images for image-to-image or
// editing workflows. Each entry is raw bytes in any common image
// encoding; providers detect the MIME type from the magic bytes
// before forwarding the payload (ai-sdk ImageModelV3 `files`).
Files [][]byte
// Mask is an optional mask image for inpainting where non-zero
// pixels indicate regions to regenerate (ai-sdk ImageModelV3 `mask`).
Mask []byte
}
ImageCallOptions contains the options for image generation.
type ImageModel ¶
type ImageModel interface {
// ID returns the model identifier.
ID() string
// Provider returns the provider identifier.
Provider() string
// MaxImagesPerCall returns the maximum number of images that can be generated in a single call.
MaxImagesPerCall() int
// Generate generates images based on the provided options.
Generate(ctx context.Context, opts ImageCallOptions) (*ImageResult, error)
}
ImageModel is the interface for image generation models.
type ImageResponse ¶
type ImageResponse struct {
// ID is the response identifier.
ID string
// Model is the model used for generation.
Model string
// Timestamp is the creation timestamp.
Timestamp int64
// Headers contains response headers.
Headers map[string]string
}
ImageResponse contains provider-specific response metadata.
type ImageResult ¶
type ImageResult struct {
// Images contains the generated images.
Images []GeneratedImage
// Warnings contains any warnings from the generation process.
Warnings []stream.Warning
// Usage contains usage information (if available).
Usage *ImageUsage
// Response contains provider-specific response data.
Response ImageResponse
// ProviderMetadata contains provider-specific metadata returned by
// the model (ai-sdk ImageModelV3 `providerMetadata`). Keys are
// provider IDs, values are provider-defined payloads.
ProviderMetadata map[string]any
}
ImageResult contains the result of an image generation call.
type ImageUsage ¶
type ImageUsage struct {
// TotalTokens is the total number of tokens used (for models that use tokens).
TotalTokens int
// Steps is the number of diffusion steps (for diffusion models).
Steps int
}
ImageUsage contains usage information for image generation.
type LanguageModel ¶
LanguageModel is the interface for text generation models. This is an alias for stream.Model for consistency with the existing codebase.
type LanguageModelInfo ¶
type LanguageModelInfo struct {
// ID is the model identifier.
ID string
// Provider is the provider identifier.
Provider string
// MaxTokens is the maximum number of tokens the model can generate.
MaxTokens int
// ContextWindow is the maximum context window size.
ContextWindow int
// SupportsTools indicates if the model supports tool/function calling.
SupportsTools bool
// SupportsVision indicates if the model supports image inputs.
SupportsVision bool
// SupportsStreaming indicates if the model supports streaming responses.
SupportsStreaming bool
// SupportsReasoning indicates if the model supports extended thinking.
SupportsReasoning bool
}
LanguageModelInfo contains metadata about a language model.
type RankedDocument ¶
type RankedDocument struct {
// Index is the original index of the document.
Index int
// Score is the relevance score (higher is more relevant).
Score float64
// Document is the document text (if ReturnDocuments was true).
Document string
}
RankedDocument represents a document with its relevance score.
type RerankCallOptions ¶
type RerankCallOptions struct {
// Query is the query to rank documents against.
Query string
// Documents is the list of documents to rerank.
Documents []string
// TopN is the number of top results to return (0 means return all).
TopN int
// ReturnDocuments specifies whether to include document text in results.
ReturnDocuments bool
// ProviderOptions are provider-specific options.
ProviderOptions map[string]any
// Headers are additional HTTP headers.
Headers map[string]string
}
RerankCallOptions contains the options for reranking.
type RerankResponse ¶
type RerankResponse struct {
// ID is the response identifier.
ID string
// Model is the model used for reranking.
Model string
// Headers contains response headers.
Headers map[string]string
}
RerankResponse contains provider-specific response metadata.
type RerankResult ¶
type RerankResult struct {
// Results contains the reranked documents.
Results []RankedDocument
// Usage contains usage information.
Usage RerankUsage
// Warnings contains any warnings from the reranking process.
// Mirrors ai-sdk's RerankingModelV3 `warnings: SharedV3Warning[]`.
Warnings []stream.Warning
// Response contains provider-specific response data.
Response RerankResponse
}
RerankResult contains the result of a reranking call.
type RerankUsage ¶
type RerankUsage struct {
// SearchUnits is the number of search units used.
SearchUnits int
// Tokens is the total number of tokens processed.
Tokens int
}
RerankUsage contains usage information for reranking.
type RerankingModel ¶
type RerankingModel interface {
// ID returns the model identifier.
ID() string
// Provider returns the provider identifier.
Provider() string
// MaxDocumentsPerCall returns the maximum number of documents that can be reranked in a single call.
MaxDocumentsPerCall() int
// Rerank reranks documents based on their relevance to a query.
Rerank(ctx context.Context, opts RerankCallOptions) (*RerankResult, error)
}
RerankingModel is the interface for document reranking models.
type SpeechCallOptions ¶
type SpeechCallOptions struct {
// Text is the text to convert to speech.
Text string
// Voice is the voice to use for generation.
Voice string
// OutputFormat is the desired output format (e.g., "mp3", "wav", "opus").
OutputFormat string
// Speed is the speed of the generated audio (0.25 to 4.0, 1.0 is normal).
Speed *float64
// ProviderOptions are provider-specific options.
ProviderOptions map[string]any
// Headers are additional HTTP headers.
Headers map[string]string
}
SpeechCallOptions contains the options for speech generation.
type SpeechModel ¶
type SpeechModel interface {
// ID returns the model identifier.
ID() string
// Provider returns the provider identifier.
Provider() string
// Generate generates speech from text.
Generate(ctx context.Context, opts SpeechCallOptions) (*SpeechResult, error)
}
SpeechModel is the interface for text-to-speech models.
type SpeechResponse ¶
type SpeechResponse struct {
// ID is the response identifier.
ID string
// Model is the model used for generation.
Model string
// Headers contains response headers.
Headers map[string]string
}
SpeechResponse contains provider-specific response metadata.
type SpeechResult ¶
type SpeechResult struct {
// Audio is the generated audio data.
Audio []byte
// AudioReader provides streaming access to the audio data.
AudioReader io.Reader
// MimeType is the MIME type of the audio (e.g., "audio/mpeg").
MimeType string
// Duration is the duration of the audio in seconds (if available).
Duration *float64
// Warnings contains any warnings from the generation process.
Warnings []stream.Warning
// Usage contains usage information (if available).
Usage *SpeechUsage
// Response contains provider-specific response data.
Response SpeechResponse
}
SpeechResult contains the result of a speech generation call.
type SpeechUsage ¶
type SpeechUsage struct {
// Characters is the number of characters processed.
Characters int
// Seconds is the duration of generated audio in seconds.
Seconds float64
}
SpeechUsage contains usage information for speech generation.
type TranscribeCallOptions ¶
type TranscribeCallOptions struct {
// Audio is the audio data to transcribe.
Audio []byte
// AudioReader provides streaming access to audio data (alternative to Audio).
AudioReader io.Reader
// AudioURL is a URL to the audio file (alternative to Audio/AudioReader).
AudioURL string
// MimeType is the MIME type of the audio (e.g., "audio/wav", "audio/mp3").
MimeType string
// Filename is the filename of the audio (used for format detection).
Filename string
// Language is the language code of the audio (e.g., "en", "es").
// If not provided, the model will attempt to detect the language.
Language string
// Prompt is an optional hint for the model (can improve accuracy).
Prompt string
// ProviderOptions are provider-specific options.
ProviderOptions map[string]any
// Headers are additional HTTP headers.
Headers map[string]string
}
TranscribeCallOptions contains the options for transcription.
type TranscriptionModel ¶
type TranscriptionModel interface {
// ID returns the model identifier.
ID() string
// Provider returns the provider identifier.
Provider() string
// Transcribe transcribes audio to text.
Transcribe(ctx context.Context, opts TranscribeCallOptions) (*TranscriptionResult, error)
}
TranscriptionModel is the interface for speech-to-text models.
type TranscriptionResponse ¶
type TranscriptionResponse struct {
// ID is the response identifier.
ID string
// Model is the model used for transcription.
Model string
// Headers contains response headers.
Headers map[string]string
}
TranscriptionResponse contains provider-specific response metadata.
type TranscriptionResult ¶
type TranscriptionResult struct {
// Text is the transcribed text.
Text string
// Segments contains detailed segment information (if available).
Segments []TranscriptionSegment
// Language is the detected language code.
Language string
// Duration is the duration of the audio in seconds (if available).
Duration *float64
// Warnings contains any warnings from the transcription process.
Warnings []stream.Warning
// Usage contains usage information (if available).
Usage *TranscriptionUsage
// Response contains provider-specific response data.
Response TranscriptionResponse
}
TranscriptionResult contains the result of a transcription call.
type TranscriptionSegment ¶
type TranscriptionSegment struct {
// ID is the segment identifier.
ID int
// Text is the transcribed text for this segment.
Text string
// Start is the start time in seconds.
Start float64
// End is the end time in seconds.
End float64
// Confidence is the confidence score (0 to 1).
Confidence float64
// Words contains word-level information (if available).
Words []TranscriptionWord
}
TranscriptionSegment represents a segment of transcribed audio.
type TranscriptionUsage ¶
type TranscriptionUsage struct {
// DurationSeconds is the duration of audio processed in seconds.
DurationSeconds float64
}
TranscriptionUsage contains usage information for transcription.
type TranscriptionWord ¶
type TranscriptionWord struct {
// Word is the transcribed word.
Word string
// Start is the start time in seconds.
Start float64
// End is the end time in seconds.
End float64
// Confidence is the confidence score (0 to 1).
Confidence float64
}
TranscriptionWord represents a single word in the transcription.