model

package

v0.1.2 Latest Latest Go to latest Published: May 6, 2026 License: Apache-2.0 Imports: 3 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/airlockrun/goai

Links

Open Source Insights

Documentation ¶

Overview ¶

Package model defines the interfaces for different AI model types.

Index ¶

func CosineSimilarity(a, b []float64) float64
func DotProduct(a, b []float64) float64
func EuclideanDistance(a, b []float64) float64
type EmbedCallOptions
type EmbedResult
type Embedding
type EmbeddingModel
type EmbeddingResponse
type EmbeddingUsage
type GeneratedImage
type ImageCallOptions
type ImageModel
type ImageResponse
type ImageResult
type ImageUsage
type LanguageModel
type LanguageModelInfo
type RankedDocument
type RerankCallOptions
type RerankResponse
type RerankResult
type RerankUsage
type RerankingModel
type SpeechCallOptions
type SpeechModel
type SpeechResponse
type SpeechResult
type SpeechUsage
type TranscribeCallOptions
type TranscriptionModel
type TranscriptionResponse
type TranscriptionResult
type TranscriptionSegment
type TranscriptionUsage
type TranscriptionWord

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func CosineSimilarity ¶

func CosineSimilarity(a, b []float64) float64

CosineSimilarity calculates the cosine similarity between two embeddings.

func DotProduct ¶

func DotProduct(a, b []float64) float64

DotProduct calculates the dot product between two embeddings.

func EuclideanDistance ¶

func EuclideanDistance(a, b []float64) float64

EuclideanDistance calculates the Euclidean distance between two embeddings.

Types ¶

type EmbedCallOptions ¶

type EmbedCallOptions struct {
	// Values is the list of texts to embed.
	Values []string

	// Dimensions is the desired embedding dimensions (if model supports it).
	Dimensions *int

	// ProviderOptions are provider-specific options.
	ProviderOptions map[string]any

	// Headers are additional HTTP headers.
	Headers map[string]string
}

EmbedCallOptions contains the options for embedding generation.

type EmbedResult ¶

type EmbedResult struct {
	// Embeddings contains the generated embeddings.
	Embeddings []Embedding

	// Usage contains usage information.
	Usage EmbeddingUsage

	// Warnings contains any warnings from the embedding process.
	// Mirrors ai-sdk's EmbeddingModelV3 `warnings: SharedV3Warning[]`.
	Warnings []stream.Warning

	// Response contains provider-specific response data.
	Response EmbeddingResponse
}

EmbedResult contains the result of an embedding call.

type Embedding ¶

type Embedding struct {
	// Values is the embedding vector.
	Values []float64

	// Index is the index of the input text this embedding corresponds to.
	Index int
}

Embedding represents a single embedding.

type EmbeddingModel ¶

type EmbeddingModel interface {
	// ID returns the model identifier.
	ID() string

	// Provider returns the provider identifier.
	Provider() string

	// MaxEmbeddingsPerCall returns the maximum number of texts that can be embedded in a single call.
	MaxEmbeddingsPerCall() int

	// Dimensions returns the embedding dimensions (0 if variable or unknown).
	Dimensions() int

	// Embed generates embeddings for the provided texts.
	Embed(ctx context.Context, opts EmbedCallOptions) (*EmbedResult, error)
}

EmbeddingModel is the interface for text embedding models.

type EmbeddingResponse ¶

type EmbeddingResponse struct {
	// ID is the response identifier.
	ID string

	// Model is the model used for generation.
	Model string

	// Headers contains response headers.
	Headers map[string]string
}

EmbeddingResponse contains provider-specific response metadata.

type EmbeddingUsage ¶

type EmbeddingUsage struct {
	// Tokens is the total number of tokens used.
	Tokens int
}

EmbeddingUsage contains usage information for embedding generation.

type GeneratedImage ¶

type GeneratedImage struct {
	// Base64 is the base64-encoded image data.
	Base64 string

	// URL is the URL of the generated image (if available).
	URL string

	// MimeType is the MIME type of the image (e.g., "image/png").
	MimeType string

	// Seed is the seed used for generation (if available).
	Seed *int64

	// RevisedPrompt is the revised prompt used for generation (if available).
	RevisedPrompt string
}

GeneratedImage represents a single generated image.

type ImageCallOptions ¶

type ImageCallOptions struct {
	// Prompt is the text description of the image to generate.
	Prompt string

	// N is the number of images to generate.
	N int

	// Size is the size of the generated images (e.g., "1024x1024").
	Size string

	// AspectRatio is the aspect ratio (e.g., "16:9", "1:1").
	AspectRatio string

	// Seed for deterministic generation (if supported).
	Seed *int64

	// ProviderOptions are provider-specific options.
	ProviderOptions map[string]any

	// Headers are additional HTTP headers.
	Headers map[string]string

	// Files is an optional list of input images for image-to-image or
	// editing workflows. Each entry is raw bytes in any common image
	// encoding; providers detect the MIME type from the magic bytes
	// before forwarding the payload (ai-sdk ImageModelV3 `files`).
	Files [][]byte

	// Mask is an optional mask image for inpainting where non-zero
	// pixels indicate regions to regenerate (ai-sdk ImageModelV3 `mask`).
	Mask []byte
}

ImageCallOptions contains the options for image generation.

type ImageModel ¶

type ImageModel interface {
	// ID returns the model identifier.
	ID() string

	// Provider returns the provider identifier.
	Provider() string

	// MaxImagesPerCall returns the maximum number of images that can be generated in a single call.
	MaxImagesPerCall() int

	// Generate generates images based on the provided options.
	Generate(ctx context.Context, opts ImageCallOptions) (*ImageResult, error)
}

ImageModel is the interface for image generation models.

type ImageResponse ¶

type ImageResponse struct {
	// ID is the response identifier.
	ID string

	// Model is the model used for generation.
	Model string

	// Timestamp is the creation timestamp.
	Timestamp int64

	// Headers contains response headers.
	Headers map[string]string
}

ImageResponse contains provider-specific response metadata.

type ImageResult ¶

type ImageResult struct {
	// Images contains the generated images.
	Images []GeneratedImage

	// Warnings contains any warnings from the generation process.
	Warnings []stream.Warning

	// Usage contains usage information (if available).
	Usage *ImageUsage

	// Response contains provider-specific response data.
	Response ImageResponse

	// ProviderMetadata contains provider-specific metadata returned by
	// the model (ai-sdk ImageModelV3 `providerMetadata`). Keys are
	// provider IDs, values are provider-defined payloads.
	ProviderMetadata map[string]any
}

ImageResult contains the result of an image generation call.

type ImageUsage ¶

type ImageUsage struct {
	// TotalTokens is the total number of tokens used (for models that use tokens).
	TotalTokens int

	// Steps is the number of diffusion steps (for diffusion models).
	Steps int
}

ImageUsage contains usage information for image generation.

type LanguageModel ¶

type LanguageModel = stream.Model

LanguageModel is the interface for text generation models. This is an alias for stream.Model for consistency with the existing codebase.

type LanguageModelInfo ¶

type LanguageModelInfo struct {
	// ID is the model identifier.
	ID string

	// Provider is the provider identifier.
	Provider string

	// MaxTokens is the maximum number of tokens the model can generate.
	MaxTokens int

	// ContextWindow is the maximum context window size.
	ContextWindow int

	// SupportsTools indicates if the model supports tool/function calling.
	SupportsTools bool

	// SupportsVision indicates if the model supports image inputs.
	SupportsVision bool

	// SupportsStreaming indicates if the model supports streaming responses.
	SupportsStreaming bool

	// SupportsReasoning indicates if the model supports extended thinking.
	SupportsReasoning bool
}

LanguageModelInfo contains metadata about a language model.

type RankedDocument ¶

type RankedDocument struct {
	// Index is the original index of the document.
	Index int

	// Score is the relevance score (higher is more relevant).
	Score float64

	// Document is the document text (if ReturnDocuments was true).
	Document string
}

RankedDocument represents a document with its relevance score.

type RerankCallOptions ¶

type RerankCallOptions struct {
	// Query is the query to rank documents against.
	Query string

	// Documents is the list of documents to rerank.
	Documents []string

	// TopN is the number of top results to return (0 means return all).
	TopN int

	// ReturnDocuments specifies whether to include document text in results.
	ReturnDocuments bool

	// ProviderOptions are provider-specific options.
	ProviderOptions map[string]any

	// Headers are additional HTTP headers.
	Headers map[string]string
}

RerankCallOptions contains the options for reranking.

type RerankResponse ¶

type RerankResponse struct {
	// ID is the response identifier.
	ID string

	// Model is the model used for reranking.
	Model string

	// Headers contains response headers.
	Headers map[string]string
}

RerankResponse contains provider-specific response metadata.

type RerankResult ¶

type RerankResult struct {
	// Results contains the reranked documents.
	Results []RankedDocument

	// Usage contains usage information.
	Usage RerankUsage

	// Warnings contains any warnings from the reranking process.
	// Mirrors ai-sdk's RerankingModelV3 `warnings: SharedV3Warning[]`.
	Warnings []stream.Warning

	// Response contains provider-specific response data.
	Response RerankResponse
}

RerankResult contains the result of a reranking call.

type RerankUsage ¶

type RerankUsage struct {
	// SearchUnits is the number of search units used.
	SearchUnits int

	// Tokens is the total number of tokens processed.
	Tokens int
}

RerankUsage contains usage information for reranking.

type RerankingModel ¶

type RerankingModel interface {
	// ID returns the model identifier.
	ID() string

	// Provider returns the provider identifier.
	Provider() string

	// MaxDocumentsPerCall returns the maximum number of documents that can be reranked in a single call.
	MaxDocumentsPerCall() int

	// Rerank reranks documents based on their relevance to a query.
	Rerank(ctx context.Context, opts RerankCallOptions) (*RerankResult, error)
}

RerankingModel is the interface for document reranking models.

type SpeechCallOptions ¶

type SpeechCallOptions struct {
	// Text is the text to convert to speech.
	Text string

	// Voice is the voice to use for generation.
	Voice string

	// OutputFormat is the desired output format (e.g., "mp3", "wav", "opus").
	OutputFormat string

	// Speed is the speed of the generated audio (0.25 to 4.0, 1.0 is normal).
	Speed *float64

	// ProviderOptions are provider-specific options.
	ProviderOptions map[string]any

	// Headers are additional HTTP headers.
	Headers map[string]string
}

SpeechCallOptions contains the options for speech generation.

type SpeechModel ¶

type SpeechModel interface {
	// ID returns the model identifier.
	ID() string

	// Provider returns the provider identifier.
	Provider() string

	// Generate generates speech from text.
	Generate(ctx context.Context, opts SpeechCallOptions) (*SpeechResult, error)
}

SpeechModel is the interface for text-to-speech models.

type SpeechResponse ¶

type SpeechResponse struct {
	// ID is the response identifier.
	ID string

	// Model is the model used for generation.
	Model string

	// Headers contains response headers.
	Headers map[string]string
}

SpeechResponse contains provider-specific response metadata.

type SpeechResult ¶

type SpeechResult struct {
	// Audio is the generated audio data.
	Audio []byte

	// AudioReader provides streaming access to the audio data.
	AudioReader io.Reader

	// MimeType is the MIME type of the audio (e.g., "audio/mpeg").
	MimeType string

	// Duration is the duration of the audio in seconds (if available).
	Duration *float64

	// Warnings contains any warnings from the generation process.
	Warnings []stream.Warning

	// Usage contains usage information (if available).
	Usage *SpeechUsage

	// Response contains provider-specific response data.
	Response SpeechResponse
}

SpeechResult contains the result of a speech generation call.

type SpeechUsage ¶

type SpeechUsage struct {
	// Characters is the number of characters processed.
	Characters int

	// Seconds is the duration of generated audio in seconds.
	Seconds float64
}

SpeechUsage contains usage information for speech generation.

type TranscribeCallOptions ¶

type TranscribeCallOptions struct {
	// Audio is the audio data to transcribe.
	Audio []byte

	// AudioReader provides streaming access to audio data (alternative to Audio).
	AudioReader io.Reader

	// AudioURL is a URL to the audio file (alternative to Audio/AudioReader).
	AudioURL string

	// MimeType is the MIME type of the audio (e.g., "audio/wav", "audio/mp3").
	MimeType string

	// Filename is the filename of the audio (used for format detection).
	Filename string

	// Language is the language code of the audio (e.g., "en", "es").
	// If not provided, the model will attempt to detect the language.
	Language string

	// Prompt is an optional hint for the model (can improve accuracy).
	Prompt string

	// ProviderOptions are provider-specific options.
	ProviderOptions map[string]any

	// Headers are additional HTTP headers.
	Headers map[string]string
}

TranscribeCallOptions contains the options for transcription.

type TranscriptionModel ¶

type TranscriptionModel interface {
	// ID returns the model identifier.
	ID() string

	// Provider returns the provider identifier.
	Provider() string

	// Transcribe transcribes audio to text.
	Transcribe(ctx context.Context, opts TranscribeCallOptions) (*TranscriptionResult, error)
}

TranscriptionModel is the interface for speech-to-text models.

type TranscriptionResponse ¶

type TranscriptionResponse struct {
	// ID is the response identifier.
	ID string

	// Model is the model used for transcription.
	Model string

	// Headers contains response headers.
	Headers map[string]string
}

TranscriptionResponse contains provider-specific response metadata.

type TranscriptionResult ¶

type TranscriptionResult struct {
	// Text is the transcribed text.
	Text string

	// Segments contains detailed segment information (if available).
	Segments []TranscriptionSegment

	// Language is the detected language code.
	Language string

	// Duration is the duration of the audio in seconds (if available).
	Duration *float64

	// Warnings contains any warnings from the transcription process.
	Warnings []stream.Warning

	// Usage contains usage information (if available).
	Usage *TranscriptionUsage

	// Response contains provider-specific response data.
	Response TranscriptionResponse
}

TranscriptionResult contains the result of a transcription call.

type TranscriptionSegment ¶

type TranscriptionSegment struct {
	// ID is the segment identifier.
	ID int

	// Text is the transcribed text for this segment.
	Text string

	// Start is the start time in seconds.
	Start float64

	// End is the end time in seconds.
	End float64

	// Confidence is the confidence score (0 to 1).
	Confidence float64

	// Words contains word-level information (if available).
	Words []TranscriptionWord
}

TranscriptionSegment represents a segment of transcribed audio.

type TranscriptionUsage ¶

type TranscriptionUsage struct {
	// DurationSeconds is the duration of audio processed in seconds.
	DurationSeconds float64
}

TranscriptionUsage contains usage information for transcription.

type TranscriptionWord ¶

type TranscriptionWord struct {
	// Word is the transcribed word.
	Word string

	// Start is the start time in seconds.
	Start float64

	// End is the end time in seconds.
	End float64

	// Confidence is the confidence score (0 to 1).
	Confidence float64
}

TranscriptionWord represents a single word in the transcription.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL