Documentation
¶
Index ¶
- Constants
- func BuildElementLocationPrompt(elementLabel string, nearbyElements []string) string
- func BuildScreenTypePrompt(uiautomatorElements []string) string
- type AnalyzeResult
- type CircuitClient
- func (cc *CircuitClient) Analyze(ctx context.Context, imgBase64 string) (*types.VisionResponse, error)
- func (cc *CircuitClient) AnalyzeAsync(ctx context.Context, imgBase64, prompt string) <-chan AnalyzeResult
- func (cc *CircuitClient) AnalyzeWithPrompt(ctx context.Context, imgBase64, prompt string) (*types.VisionResponse, error)
- func (cc *CircuitClient) CircuitState() resilience.State
- func (cc *CircuitClient) Model() string
- func (cc *CircuitClient) OllamaURL() string
- type Client
- func (c *Client) Analyze(ctx context.Context, imgBase64 string) (*types.VisionResponse, error)
- func (c *Client) AnalyzeAsync(ctx context.Context, imgBase64 string, prompt string) <-chan AnalyzeResult
- func (c *Client) AnalyzeAsyncWithCircuit(ctx context.Context, ...) <-chan AnalyzeResult
- func (c *Client) AnalyzeWithPrompt(ctx context.Context, imgBase64 string, prompt string) (*types.VisionResponse, error)
- func (c *Client) Model() string
- func (c *Client) OllamaURL() string
- type OllamaRequest
- type OllamaResponse
Constants ¶
const ( // ScreenTypePrompt asks the model to identify what screen we are on. ScreenTypePrompt = `` /* 353-byte string literal not displayed */ // ElementDetectionPrompt asks the model for coordinates of a specific element. ElementDetectionPrompt = `` /* 275-byte string literal not displayed */ )
Prompt templates for different vision agent tasks.
Variables ¶
This section is empty.
Functions ¶
func BuildElementLocationPrompt ¶
BuildElementLocationPrompt constructs a prompt for finding a specific element.
func BuildScreenTypePrompt ¶
BuildScreenTypePrompt constructs a refined prompt for screen identification.
Types ¶
type AnalyzeResult ¶
type AnalyzeResult struct {
Response *types.VisionResponse
RawResponse string // The raw JSON string from the vision model
Err error // Error from async analysis
}
AnalyzeResult contains the result of async vision analysis.
type CircuitClient ¶
type CircuitClient struct {
// contains filtered or unexported fields
}
CircuitClient wraps a vision client with circuit breaker protection.
func NewCircuitClient ¶
func NewCircuitClient(url, model string) *CircuitClient
NewCircuitClient creates a vision client with circuit breaker protection.
func NewCircuitClientFromClient ¶
func NewCircuitClientFromClient(client *Client) *CircuitClient
NewCircuitClientFromClient wraps an existing client with circuit breaker.
func (*CircuitClient) Analyze ¶
func (cc *CircuitClient) Analyze(ctx context.Context, imgBase64 string) (*types.VisionResponse, error)
Analyze sends a screenshot to the vision model with circuit breaker protection. Calls the underlying client's Analyze directly to preserve the rich default prompt.
func (*CircuitClient) AnalyzeAsync ¶
func (cc *CircuitClient) AnalyzeAsync(ctx context.Context, imgBase64, prompt string) <-chan AnalyzeResult
AnalyzeAsync performs async vision analysis with circuit breaker protection.
func (*CircuitClient) AnalyzeWithPrompt ¶
func (cc *CircuitClient) AnalyzeWithPrompt(ctx context.Context, imgBase64, prompt string) (*types.VisionResponse, error)
AnalyzeWithPrompt sends a screenshot and prompt with circuit breaker protection.
func (*CircuitClient) CircuitState ¶
func (cc *CircuitClient) CircuitState() resilience.State
CircuitState returns the current state of the circuit breaker.
func (*CircuitClient) Model ¶
func (cc *CircuitClient) Model() string
Model returns the configured vision model name.
func (*CircuitClient) OllamaURL ¶
func (cc *CircuitClient) OllamaURL() string
OllamaURL returns the configured Ollama API URL.
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
Client handles interaction with a multi-modal vision model service (Ollama).
func (*Client) Analyze ¶
Analyze sends a screenshot to the vision model for screen classification. Uses a default prompt suitable for identifying screen type.
func (*Client) AnalyzeAsync ¶
func (c *Client) AnalyzeAsync(ctx context.Context, imgBase64 string, prompt string) <-chan AnalyzeResult
AnalyzeAsync starts vision analysis and returns a result channel. The analysis runs in a goroutine, allowing the caller to continue other work (like capturing the next screenshot) while waiting.
func (*Client) AnalyzeAsyncWithCircuit ¶
func (c *Client) AnalyzeAsyncWithCircuit( ctx context.Context, circuit func(fn func() (interface{}, error)) (interface{}, error), imgBase64 string, prompt string, ) <-chan AnalyzeResult
AnalyzeAsyncWithCircuit starts vision analysis through a circuit breaker. The circuitBreaker parameter should be a function that wraps the analysis in circuit breaker protection.
func (*Client) AnalyzeWithPrompt ¶
func (c *Client) AnalyzeWithPrompt(ctx context.Context, imgBase64 string, prompt string) (*types.VisionResponse, error)
AnalyzeWithPrompt sends a screenshot and prompt to the vision model.
type OllamaRequest ¶
type OllamaRequest struct {
Model string `json:"model"`
Prompt string `json:"prompt"`
Images []string `json:"images"` // Base64 encoded images
Stream bool `json:"stream"`
Format string `json:"format,omitempty"` // "json"
}
OllamaRequest represents the payload for Ollama's generate API.