Documentation
¶
Overview ¶
Package agent provides a high-level, agent-optimized API for browser automation.
Unlike the core browse package which follows Gin's middleware/handler pattern for human developers, the agent package is designed for AI agents and programmatic callers. It provides:
- Stateful sessions with automatic page lifecycle management
- Structured JSON-serializable results (not plain strings)
- Built-in retry and auto-wait on all operations
- High-level compound actions (FillForm, ExtractTable, ClickAndWait)
- Snapshot-based page state for agent context windows
Index ¶
- func EstimateTokens(s string) int
- func SavePlaybook(pb *Playbook, path string) error
- type Action
- type ActionExpect
- type AnnotatedElement
- type AnnotatedResult
- type AuthWallResult
- type BatchAction
- type BatchActionResult
- type BatchResult
- type ButtonInfo
- type ConsoleMessage
- type ContentOptions
- type CookieDismissResult
- type DOMChange
- type DOMDiff
- type DOMElement
- type DialogInfo
- type ElementResult
- type ExtractAllResult
- type ExtractedPattern
- type FieldResult
- type FormDiscoveryResult
- type FormFieldInfo
- type FormResult
- type FrameInfo
- type HistoryEntry
- type HybridElement
- type HybridResult
- type InputInfo
- type LinkInfo
- type NetworkCapture
- type Observation
- type PageDiff
- type PageReadiness
- type PageResult
- type Playbook
- type PlaybookResult
- type Profile
- type PromptCandidate
- type PromptSelectResult
- type SelectorSuggestion
- type SemanticFieldResult
- type SemanticFillResult
- type Session
- func (s *Session) AccessibilityTree() (string, error)
- func (s *Session) AnnotatedScreenshot() (*AnnotatedResult, error)
- func (s *Session) ApplyProfile(profile *Profile) error
- func (s *Session) AutoExtract() (*ExtractedPattern, error)
- func (s *Session) CaptureProfile() (*Profile, error)
- func (s *Session) CapturedRequests(pattern string) []NetworkCapture
- func (s *Session) CheckReadiness() (*PageReadiness, error)
- func (s *Session) ClearCapturedRequests()
- func (s *Session) Click(selector string) (*PageResult, error)
- func (s *Session) ClickAndWait(selector string) (*PageResult, error)
- func (s *Session) ClickLabel(label int) (*PageResult, error)
- func (s *Session) Close() error
- func (s *Session) CloseTab(name string) error
- func (s *Session) CompareTabs(tab1, tab2 string) (*PageDiff, error)
- func (s *Session) ComponentState(selector string) (map[string]any, error)
- func (s *Session) ConsoleErrors() ([]ConsoleMessage, error)
- func (s *Session) DetectAuthWall() (*AuthWallResult, error)
- func (s *Session) DetectDialog() (*DialogInfo, error)
- func (s *Session) DetectedFrameworks() ([]string, error)
- func (s *Session) DisableNetworkCapture()
- func (s *Session) DiscoverForm(formSelector string) (*FormDiscoveryResult, error)
- func (s *Session) DismissCookieBanner() (*CookieDismissResult, error)
- func (s *Session) DispatchEvent(selector, eventType string, detail map[string]any) error
- func (s *Session) DoubleClick(selector string) (*PageResult, error)
- func (s *Session) DragDrop(fromSelector, toSelector string) (*PageResult, error)
- func (s *Session) EnableNetworkCapture(patterns ...string) error
- func (s *Session) Eval(js string) (any, error)
- func (s *Session) ExecuteBatch(actions []BatchAction) (*BatchResult, error)
- func (s *Session) Extract(selector string) (*ElementResult, error)
- func (s *Session) ExtractAll(selector string) (*ExtractAllResult, error)
- func (s *Session) ExtractTable(tableSelector string) (*TableResult, error)
- func (s *Session) FillForm(fields map[string]string) (*FormResult, error)
- func (s *Session) FillFormSemantic(fields map[string]string) (*SemanticFillResult, error)
- func (s *Session) FindByCoordinates(x, y int) (*PromptSelectResult, error)
- func (s *Session) Focus(selector string) (*PageResult, error)
- func (s *Session) GetAppState() (map[string]any, error)
- func (s *Session) HasElement(selector string) bool
- func (s *Session) Hover(selector string) (*PageResult, error)
- func (s *Session) HybridObserve() (*HybridResult, error)
- func (s *Session) InFrame() bool
- func (s *Session) ListTabs() ([]TabInfo, error)
- func (s *Session) LoadProfile(path string) error
- func (s *Session) Markdown() (string, error)
- func (s *Session) Navigate(url string) (*PageResult, error)
- func (s *Session) NavigateAndDismissCookies(url string) (*PageResult, error)
- func (s *Session) Observe() (*Observation, error)
- func (s *Session) ObserveDiff() (*Observation, *DOMDiff, error)
- func (s *Session) ObserveWithBudget(budget int) (*Observation, error)
- func (s *Session) OpenTab(name, url string) (*PageResult, error)
- func (s *Session) PDF() ([]byte, error)
- func (s *Session) Page() *browse.Page
- func (s *Session) ReactState(selector string) (map[string]any, error)
- func (s *Session) ReadableText() (string, error)
- func (s *Session) ReplayPlaybook(pb *Playbook) (*PlaybookResult, error)
- func (s *Session) RightClick(selector string) (*PageResult, error)
- func (s *Session) SaveProfile(path string) error
- func (s *Session) Screenshot() ([]byte, error)
- func (s *Session) ScrollAndCollect(selector string, maxItems int) (*ExtractAllResult, error)
- func (s *Session) ScrollBy(x, y int) (*PageResult, error)
- func (s *Session) ScrollTo(selector string) (*PageResult, error)
- func (s *Session) SelectByPrompt(prompt string) (*PromptSelectResult, error)
- func (s *Session) SelectOption(selector, optionText string) (*ElementResult, error)
- func (s *Session) SessionHistory(n int) []HistoryEntry
- func (s *Session) SetContentOptions(opts ContentOptions)
- func (s *Session) Snapshot() (*PageResult, error)
- func (s *Session) StartRecordingPlaybook(name string)
- func (s *Session) StartTrace() error
- func (s *Session) StopRecordingPlaybook() (*Playbook, error)
- func (s *Session) StopTrace(path string) (*TraceResult, error)
- func (s *Session) SuggestSelectors(failedSelector string) ([]SelectorSuggestion, error)
- func (s *Session) SwitchTab(name string) (*PageResult, error)
- func (s *Session) SwitchToFrame(selector string) (*PageResult, error)
- func (s *Session) SwitchToMainFrame() (*PageResult, error)
- func (s *Session) Type(selector, text string) (*ElementResult, error)
- func (s *Session) UploadFile(selector, filePath string) error
- func (s *Session) VueState(selector string) (map[string]any, error)
- func (s *Session) WaitFor(selector string) error
- func (s *Session) WaitForRouteChange(timeout time.Duration) (*PageResult, error)
- func (s *Session) WaitForSPA() error
- func (s *Session) WebVitals() (*WebVitalsResult, error)
- type SessionConfig
- type TabInfo
- type TableResult
- type TraceEvent
- type TraceResult
- type WebVitalsResult
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func EstimateTokens ¶
EstimateTokens returns an approximate token count for a string. Uses the heuristic of 1 token ≈ 4 characters.
func SavePlaybook ¶ added in v0.6.0
SavePlaybook saves a playbook to a JSON file.
Types ¶
type Action ¶ added in v0.6.0
type Action struct {
Type string `json:"type"` // navigate, click, type, select, scroll, wait, extract
Selector string `json:"selector,omitempty"` // CSS selector or :text() selector
Value string `json:"value,omitempty"` // URL for navigate, text for type, option for select
Label int `json:"label,omitempty"` // label number for click_label
Fields map[string]string `json:"fields,omitempty"` // for fill_form_semantic
Expected *ActionExpect `json:"expected,omitempty"` // expected outcome for replay validation
}
Action represents a single recorded browser action.
type ActionExpect ¶ added in v0.6.0
type ActionExpect struct {
URL string `json:"url,omitempty"` // expected URL after action
Title string `json:"title,omitempty"` // expected page title
Selector string `json:"selector,omitempty"` // element that should exist after action
Text string `json:"text,omitempty"` // expected text content
}
ActionExpect describes the expected outcome of an action for replay validation.
type AnnotatedElement ¶
type AnnotatedElement struct {
Label int `json:"label"`
Selector string `json:"selector"`
Tag string `json:"tag"`
Type string `json:"type,omitempty"`
Text string `json:"text,omitempty"`
Href string `json:"href,omitempty"`
X int `json:"x"`
Y int `json:"y"`
Width int `json:"width"`
Height int `json:"height"`
}
AnnotatedElement maps a numbered label to an interactive element.
type AnnotatedResult ¶
type AnnotatedResult struct {
Image []byte `json:"-"` // PNG/JPEG image data
Elements []AnnotatedElement `json:"elements"`
Count int `json:"count"`
}
AnnotatedResult holds an annotated screenshot with element-label mapping.
type AuthWallResult ¶ added in v1.0.0
type AuthWallResult struct {
Detected bool `json:"detected"`
Type string `json:"type,omitempty"` // login, paywall, captcha, age_gate, none
Confidence int `json:"confidence"` // 0-100
Reason string `json:"reason,omitempty"`
LoginURL string `json:"login_url,omitempty"`
}
AuthWallResult describes whether the page appears to be behind an auth wall.
type BatchAction ¶ added in v1.2.0
type BatchAction struct {
Action string `json:"action"`
Selector string `json:"selector,omitempty"`
Value string `json:"value,omitempty"`
Label int `json:"label,omitempty"`
Fields map[string]string `json:"fields,omitempty"`
}
BatchAction describes a single action within a batch operation.
type BatchActionResult ¶ added in v1.2.0
type BatchActionResult struct {
Index int `json:"index"`
Action string `json:"action"`
Success bool `json:"success"`
Error string `json:"error,omitempty"`
}
BatchActionResult describes the outcome of a single action within a batch.
type BatchResult ¶ added in v1.2.0
type BatchResult struct {
Total int `json:"total"`
Succeeded int `json:"succeeded"`
Failed int `json:"failed"`
Results []BatchActionResult `json:"results"`
}
BatchResult is the structured response from ExecuteBatch.
type ButtonInfo ¶
type ButtonInfo struct {
Text string `json:"text"`
ID string `json:"id,omitempty"`
Type string `json:"type,omitempty"`
Cost string `json:"cost,omitempty"` // "high" (submit/navigation), "medium" (action), "low" (toggle)
}
ButtonInfo describes a button element.
type ConsoleMessage ¶ added in v1.0.0
type ConsoleMessage struct {
Level string `json:"level"` // log, warn, error, info
Text string `json:"text"`
Source string `json:"source,omitempty"`
}
ConsoleMessage represents a captured browser console message.
type ContentOptions ¶
type ContentOptions struct {
// MaxLength is the maximum character length of returned content. Default 4000.
MaxLength int
// MaxLinks caps the number of links returned in observations. Default 25.
MaxLinks int
// MaxInputs caps the number of inputs returned. Default 20.
MaxInputs int
// MaxButtons caps the number of buttons returned. Default 15.
MaxButtons int
// MaxItems caps ExtractAll results. Default 50.
MaxItems int
// MaxRows caps table rows. Default 100.
MaxRows int
// MaxScreenshotBytes is the maximum screenshot size in bytes. Default 5MB.
// Screenshots exceeding this are auto-compressed (JPEG + downscale).
MaxScreenshotBytes int
}
ContentOptions controls how page content is extracted and truncated.
func DefaultContentOptions ¶
func DefaultContentOptions() ContentOptions
DefaultContentOptions returns sensible defaults for LLM context windows.
type CookieDismissResult ¶ added in v0.9.0
type CookieDismissResult struct {
Found bool `json:"found"`
Method string `json:"method,omitempty"` // "selector", "text", "none"
Selector string `json:"selector,omitempty"` // which selector matched
Text string `json:"text,omitempty"` // button text that was clicked
Banner string `json:"banner,omitempty"` // banner selector if found but not dismissed
}
CookieDismissResult describes the outcome of cookie banner dismissal.
type DOMChange ¶
type DOMChange struct {
Tag string `json:"tag"`
ID string `json:"id,omitempty"`
Attribute string `json:"attribute,omitempty"`
OldValue string `json:"old_value,omitempty"`
NewValue string `json:"new_value,omitempty"`
ChangeType string `json:"change_type"` // "attribute", "text", "children"
}
DOMChange describes a modification to an existing element.
type DOMDiff ¶
type DOMDiff struct {
Added []DOMElement `json:"added,omitempty"`
Removed []DOMElement `json:"removed,omitempty"`
Modified []DOMChange `json:"modified,omitempty"`
HasDiff bool `json:"has_diff"`
Classification string `json:"classification,omitempty"` // navigation, content_loaded, modal_appeared, form_error, notification, loading_complete, element_state_changed, minor_update
Summary string `json:"summary,omitempty"` // human-readable one-line summary
}
DOMDiff represents changes between two Observe() calls.
type DOMElement ¶
type DOMElement struct {
Tag string `json:"tag"`
ID string `json:"id,omitempty"`
Classes string `json:"classes,omitempty"`
Text string `json:"text,omitempty"`
}
DOMElement describes an element that was added or removed.
type DialogInfo ¶ added in v1.0.5
type DialogInfo struct {
Found bool `json:"found"`
Type string `json:"type,omitempty"` // dialog, modal, overlay, alert, confirm, prompt
Title string `json:"title,omitempty"` // dialog title/heading
Text string `json:"text,omitempty"` // dialog body text
Buttons []string `json:"buttons,omitempty"` // available action buttons
Inputs []InputInfo `json:"inputs,omitempty"` // input fields in the dialog
Selector string `json:"selector,omitempty"` // CSS selector for the dialog element
}
DialogInfo describes a visible modal/dialog on the page.
type ElementResult ¶
type ElementResult struct {
Selector string `json:"selector"`
Text string `json:"text,omitempty"`
Value string `json:"value,omitempty"`
Action string `json:"action"`
}
ElementResult is the structured response for single-element operations.
type ExtractAllResult ¶
type ExtractAllResult struct {
Selector string `json:"selector"`
Count int `json:"count"`
Total int `json:"total"`
Truncated bool `json:"truncated,omitempty"`
Items []string `json:"items"`
}
ExtractAllResult is the structured response for multi-element extraction.
type ExtractedPattern ¶ added in v1.0.0
type ExtractedPattern struct {
Pattern string `json:"pattern"` // CSS selector for the repeating container
Count int `json:"count"` // number of items found
Fields []string `json:"fields"` // detected field names
Items []map[string]string `json:"items"` // extracted structured data
}
ExtractedPattern represents a repeating pattern found on the page (product cards, list items, etc).
type FieldResult ¶
type FieldResult struct {
Selector string `json:"selector"`
Value string `json:"value,omitempty"`
Success bool `json:"success"`
Error string `json:"error,omitempty"`
}
FieldResult describes the outcome of filling a single field.
type FormDiscoveryResult ¶
type FormDiscoveryResult struct {
FormSelector string `json:"form_selector"`
Action string `json:"action,omitempty"`
Method string `json:"method,omitempty"`
Fields []FormFieldInfo `json:"fields"`
}
FormDiscoveryResult is the structured response from form field discovery.
type FormFieldInfo ¶
type FormFieldInfo struct {
Selector string `json:"selector"`
Label string `json:"label"`
Type string `json:"type"`
Name string `json:"name,omitempty"`
ID string `json:"id,omitempty"`
Placeholder string `json:"placeholder,omitempty"`
Required bool `json:"required,omitempty"`
Options []string `json:"options,omitempty"`
}
FormFieldInfo describes a discovered form field with its label.
func MatchFormField ¶
func MatchFormField(humanName string, fields []FormFieldInfo) *FormFieldInfo
MatchFormField finds the best matching field for a human-readable name using weighted fuzzy matching on label, name, id, placeholder, and type. Returns nil if no match is found. Exported for direct testing and reuse.
type FormResult ¶
type FormResult struct {
Fields []FieldResult `json:"fields"`
Success bool `json:"success"`
}
FormResult is the structured response for form filling.
type FrameInfo ¶ added in v1.2.0
type FrameInfo struct {
FrameID string `json:"frame_id"`
URL string `json:"url"`
Name string `json:"name,omitempty"`
Selector string `json:"selector,omitempty"`
}
FrameInfo describes an iframe discovered on the page.
type HistoryEntry ¶ added in v0.9.0
type HistoryEntry struct {
Action string `json:"action"`
Selector string `json:"selector,omitempty"`
URL string `json:"url,omitempty"`
Result string `json:"result,omitempty"`
Timestamp string `json:"timestamp"`
}
HistoryEntry records a single action in the session history.
type HybridElement ¶ added in v1.2.0
type HybridElement struct {
Index int `json:"index"`
Tag string `json:"tag"`
Text string `json:"text"`
Selector string `json:"selector"`
Role string `json:"role,omitempty"`
X float64 `json:"x"`
Y float64 `json:"y"`
Width float64 `json:"width"`
Height float64 `json:"height"`
}
HybridElement describes an interactive element with its bounding box.
type HybridResult ¶ added in v1.2.0
type HybridResult struct {
Screenshot []byte `json:"screenshot,omitempty"`
Elements []HybridElement `json:"elements"`
Width int `json:"viewport_width"`
Height int `json:"viewport_height"`
}
HybridResult holds a clean screenshot alongside bounding-box data for all interactive elements, enabling coordinate-based element selection.
type InputInfo ¶
type InputInfo struct {
ID string `json:"id,omitempty"`
Name string `json:"name,omitempty"`
Type string `json:"type"`
Value string `json:"value,omitempty"`
Placeholder string `json:"placeholder,omitempty"`
}
InputInfo describes an input element.
type LinkInfo ¶
type LinkInfo struct {
Text string `json:"text"`
Href string `json:"href"`
Cost string `json:"cost,omitempty"` // "high" (navigation), "medium" (ajax), "low" (anchor)
}
LinkInfo describes a link on the page.
type NetworkCapture ¶
type NetworkCapture struct {
URL string `json:"url"`
Method string `json:"method"`
Status int `json:"status"`
MimeType string `json:"mime_type,omitempty"`
RequestHeaders map[string]string `json:"request_headers,omitempty"`
ResponseHeaders map[string]string `json:"response_headers,omitempty"`
RequestBody string `json:"request_body,omitempty"`
ResponseBody string `json:"response_body,omitempty"`
RequestBodyTruncated bool `json:"request_body_truncated,omitempty"`
ResponseBodyTruncated bool `json:"response_body_truncated,omitempty"`
}
NetworkCapture holds a captured network request/response pair.
type Observation ¶
type Observation struct {
URL string `json:"url"`
Title string `json:"title"`
Text string `json:"text"`
Links []LinkInfo `json:"links,omitempty"`
Inputs []InputInfo `json:"inputs,omitempty"`
Buttons []ButtonInfo `json:"buttons,omitempty"`
Interactive int `json:"interactive_elements"`
Meta map[string]string `json:"meta,omitempty"`
HasDialog bool `json:"has_dialog,omitempty"`
DialogType string `json:"dialog_type,omitempty"` // dialog, modal, overlay
DialogText string `json:"dialog_text,omitempty"`
}
Observation is a structured snapshot of the visible page for agent context.
type PageDiff ¶ added in v1.0.0
type PageDiff struct {
URL1 string `json:"url1"`
URL2 string `json:"url2"`
Title1 string `json:"title1"`
Title2 string `json:"title2"`
OnlyIn1 []string `json:"only_in_1,omitempty"`
OnlyIn2 []string `json:"only_in_2,omitempty"`
Different map[string][2]string `json:"different,omitempty"` // field -> [value1, value2]
}
PageDiff compares two pages and returns the differences.
type PageReadiness ¶ added in v0.9.0
type PageReadiness struct {
Score int `json:"score"` // 0-100
State string `json:"state"` // loading, interactive, complete, spa_ready
PendingXHR int `json:"pending_xhr"` // in-flight XHR/fetch requests
PendingImages int `json:"pending_images"` // images still loading
HasSkeleton bool `json:"has_skeleton,omitempty"` // skeleton/placeholder elements present
HasSpinner bool `json:"has_spinner,omitempty"` // loading spinner present
Suggestions []string `json:"suggestions,omitempty"` // what to wait for
}
PageReadiness describes how ready a page is for interaction.
type PageResult ¶
PageResult is the structured response after a navigation or page-level action.
type Playbook ¶ added in v0.6.0
type Playbook struct {
Name string `json:"name"`
URL string `json:"url"` // starting URL
Actions []Action `json:"actions"`
CreatedAt time.Time `json:"created_at"`
}
Playbook is a recorded sequence of browser actions that can be replayed.
func LoadPlaybook ¶ added in v0.6.0
LoadPlaybook loads a playbook from a JSON file.
type PlaybookResult ¶ added in v0.6.0
type PlaybookResult struct {
Success bool `json:"success"`
StepsRun int `json:"steps_run"`
TotalSteps int `json:"total_steps"`
FailedAt int `json:"failed_at,omitempty"`
FailedAction *Action `json:"failed_action,omitempty"`
Error string `json:"error,omitempty"`
Extracted map[string]string `json:"extracted,omitempty"` // data from extract actions
}
PlaybookResult is the outcome of replaying a playbook.
type Profile ¶
type Profile struct {
Cookies []browse.Cookie `json:"cookies,omitempty"`
LocalStorage map[string]string `json:"local_storage,omitempty"`
}
Profile holds serializable browser state that can be saved/loaded across sessions.
type PromptCandidate ¶ added in v1.2.0
type PromptCandidate struct {
Selector string `json:"selector"`
Text string `json:"text"`
Score float64 `json:"score"`
}
PromptCandidate describes one candidate element from natural language matching.
type PromptSelectResult ¶ added in v1.2.0
type PromptSelectResult struct {
Selector string `json:"selector"`
Text string `json:"text"`
Tag string `json:"tag"`
Role string `json:"role,omitempty"`
Confidence float64 `json:"confidence"`
Candidates []PromptCandidate `json:"candidates,omitempty"`
}
PromptSelectResult is the structured response from SelectByPrompt.
type SelectorSuggestion ¶ added in v0.9.0
type SelectorSuggestion struct {
Selector string `json:"selector"`
Tag string `json:"tag"`
Text string `json:"text,omitempty"`
ID string `json:"id,omitempty"`
Classes string `json:"classes,omitempty"`
}
SelectorSuggestion describes a similar element when a selector fails.
type SemanticFieldResult ¶
type SemanticFieldResult struct {
HumanName string `json:"human_name"`
Selector string `json:"selector,omitempty"`
Value string `json:"value,omitempty"`
Success bool `json:"success"`
Error string `json:"error,omitempty"`
}
SemanticFieldResult describes the outcome of filling one semantically-matched field.
type SemanticFillResult ¶
type SemanticFillResult struct {
Fields []SemanticFieldResult `json:"fields"`
Success bool `json:"success"`
}
SemanticFillResult is the structured response from semantic form filling.
type Session ¶
type Session struct {
// contains filtered or unexported fields
}
Session manages a stateful browser automation session for an agent. All methods are goroutine-safe via an internal mutex.
func NewSession ¶
func NewSession(cfg SessionConfig) (*Session, error)
NewSession creates and launches a new browser session.
func NewSessionFromBrowser ¶
func NewSessionFromBrowser(b browse.Browser, cfg SessionConfig) *Session
NewSessionFromBrowser creates a session from an existing Browser implementation. Use this to inject a mock browser for testing or to reuse a pre-configured Engine.
func (*Session) AccessibilityTree ¶
AccessibilityTree returns a compact tree representation of the page's accessible elements. This is much smaller than HTML and captures the semantic structure that matters for interaction.
func (*Session) AnnotatedScreenshot ¶
func (s *Session) AnnotatedScreenshot() (*AnnotatedResult, error)
AnnotatedScreenshot captures a screenshot with numbered labels overlaid on interactive elements. Returns the image data and a mapping of label numbers to element info (selector, text, type). Designed for multimodal LLMs that can reference elements by number instead of CSS selectors.
func (*Session) ApplyProfile ¶
ApplyProfile restores cookies and localStorage from a Profile. This is the domain operation — it does not involve filesystem I/O.
func (*Session) AutoExtract ¶ added in v1.0.0
func (s *Session) AutoExtract() (*ExtractedPattern, error)
AutoExtract detects repeating patterns on the page and extracts structured data. Works without selectors — finds product cards, search results, list items, table rows automatically.
func (*Session) CaptureProfile ¶
CaptureProfile extracts the current browser state (cookies + localStorage) as a Profile. This is the domain operation — it does not involve filesystem I/O.
func (*Session) CapturedRequests ¶
func (s *Session) CapturedRequests(pattern string) []NetworkCapture
CapturedRequests returns captured network requests, optionally filtered by URL pattern.
func (*Session) CheckReadiness ¶ added in v0.9.0
func (s *Session) CheckReadiness() (*PageReadiness, error)
CheckReadiness returns the current page readiness state.
func (*Session) ClearCapturedRequests ¶
func (s *Session) ClearCapturedRequests()
ClearCapturedRequests clears all captured requests.
func (*Session) Click ¶
func (s *Session) Click(selector string) (*PageResult, error)
Click clicks an element and returns the updated page state.
func (*Session) ClickAndWait ¶
func (s *Session) ClickAndWait(selector string) (*PageResult, error)
ClickAndWait clicks an element and waits for a full page navigation.
func (*Session) ClickLabel ¶
func (s *Session) ClickLabel(label int) (*PageResult, error)
ClickLabel clicks the element with the given annotation label number. Use after AnnotatedScreenshot to interact by label instead of selector.
func (*Session) CloseTab ¶ added in v0.4.0
CloseTab closes a named tab. Cannot close the active tab.
func (*Session) CompareTabs ¶ added in v1.0.0
CompareTabs compares the content of two named tabs.
func (*Session) ComponentState ¶
ComponentState extracts state/props from a framework component at the given selector. Auto-detects the framework (React, Vue 2/3, Svelte, Preact, Angular, Alpine, Lit).
func (*Session) ConsoleErrors ¶ added in v1.0.0
func (s *Session) ConsoleErrors() ([]ConsoleMessage, error)
ConsoleErrors returns captured console.error and console.warn messages from the page.
func (*Session) DetectAuthWall ¶ added in v1.0.0
func (s *Session) DetectAuthWall() (*AuthWallResult, error)
DetectAuthWall checks if the current page is an authentication wall, paywall, or CAPTCHA.
func (*Session) DetectDialog ¶ added in v1.0.5
func (s *Session) DetectDialog() (*DialogInfo, error)
DetectDialog checks if a modal, dialog, or overlay is currently visible on the page.
func (*Session) DetectedFrameworks ¶
DetectedFrameworks returns which frontend frameworks are active on the current page.
func (*Session) DisableNetworkCapture ¶
func (s *Session) DisableNetworkCapture()
DisableNetworkCapture stops capturing network requests.
func (*Session) DiscoverForm ¶
func (s *Session) DiscoverForm(formSelector string) (*FormDiscoveryResult, error)
DiscoverForm analyzes form fields on the page and returns their labels and selectors. If formSelector is empty, discovers all forms on the page.
func (*Session) DismissCookieBanner ¶ added in v0.9.0
func (s *Session) DismissCookieBanner() (*CookieDismissResult, error)
DismissCookieBanner attempts to find and dismiss common cookie consent banners. Tries common selectors and text patterns. Returns whether a banner was found and dismissed.
func (*Session) DispatchEvent ¶
DispatchEvent dispatches a DOM event on an element.
func (*Session) DoubleClick ¶ added in v0.3.0
func (s *Session) DoubleClick(selector string) (*PageResult, error)
DoubleClick double-clicks an element.
func (*Session) DragDrop ¶ added in v0.3.0
func (s *Session) DragDrop(fromSelector, toSelector string) (*PageResult, error)
DragDrop drags an element from one selector to another.
func (*Session) EnableNetworkCapture ¶
EnableNetworkCapture starts capturing XHR/fetch responses matching the given URL patterns. Empty patterns captures all requests. Patterns are matched as substrings.
func (*Session) ExecuteBatch ¶ added in v1.2.0
func (s *Session) ExecuteBatch(actions []BatchAction) (*BatchResult, error)
func (*Session) Extract ¶
func (s *Session) Extract(selector string) (*ElementResult, error)
Extract returns the text content of an element.
func (*Session) ExtractAll ¶
func (s *Session) ExtractAll(selector string) (*ExtractAllResult, error)
ExtractAll returns text content from all matching elements.
func (*Session) ExtractTable ¶
func (s *Session) ExtractTable(tableSelector string) (*TableResult, error)
ExtractTable extracts structured table data.
func (*Session) FillForm ¶
func (s *Session) FillForm(fields map[string]string) (*FormResult, error)
FillForm fills multiple form fields and returns their resulting values.
func (*Session) FillFormSemantic ¶
func (s *Session) FillFormSemantic(fields map[string]string) (*SemanticFillResult, error)
FillFormSemantic fills form fields using human-readable names instead of CSS selectors. Keys are names like "Email", "Password", "First Name". The method auto-discovers form fields and matches by label, name, placeholder, and id.
func (*Session) FindByCoordinates ¶ added in v1.2.0
func (s *Session) FindByCoordinates(x, y int) (*PromptSelectResult, error)
func (*Session) Focus ¶ added in v0.3.0
func (s *Session) Focus(selector string) (*PageResult, error)
Focus sets focus on an element (triggers :focus CSS state).
func (*Session) GetAppState ¶
GetAppState extracts global application state from all detected frameworks. Checks: Redux, Next.js, Nuxt, Remix, SvelteKit, Gatsby, Alpine stores, HTMX config, and common SSR hydration patterns.
func (*Session) HasElement ¶
HasElement checks if an element exists on the page.
func (*Session) Hover ¶ added in v0.3.0
func (s *Session) Hover(selector string) (*PageResult, error)
Hover moves the mouse over an element, triggering CSS :hover states and tooltips.
func (*Session) HybridObserve ¶ added in v1.2.0
func (s *Session) HybridObserve() (*HybridResult, error)
func (*Session) InFrame ¶ added in v1.2.0
InFrame returns true if the session is currently targeting an iframe.
func (*Session) ListTabs ¶ added in v0.4.0
ListTabs returns all open tabs with their URLs and titles.
func (*Session) LoadProfile ¶
LoadProfile reads a JSON profile file and applies it to the session. Convenience wrapper around file read + ApplyProfile.
func (*Session) Markdown ¶
Markdown returns a compact markdown representation of the page content. This is much smaller than raw HTML and easier for LLMs to process.
func (*Session) Navigate ¶
func (s *Session) Navigate(url string) (*PageResult, error)
Navigate loads a URL and returns structured page info.
func (*Session) NavigateAndDismissCookies ¶ added in v0.9.0
func (s *Session) NavigateAndDismissCookies(url string) (*PageResult, error)
NavigateAndDismissCookies navigates to a URL and auto-dismisses any cookie banner.
func (*Session) Observe ¶
func (s *Session) Observe() (*Observation, error)
Observe returns a structured snapshot of the page's current state, including all interactive elements (links, inputs, buttons). This is designed to fit into an agent's context window as a concise representation of what actions are available on the page.
func (*Session) ObserveDiff ¶
func (s *Session) ObserveDiff() (*Observation, *DOMDiff, error)
ObserveDiff returns the current page observation along with a structured diff of what changed since the last observation. On the first call, the diff is empty. This is much more token-efficient than re-sending the full page state each time.
func (*Session) ObserveWithBudget ¶
func (s *Session) ObserveWithBudget(budget int) (*Observation, error)
ObserveWithBudget returns a page observation constrained to approximately the given token budget. Content is prioritized: interactive elements first, then headings, then main content. 1 token ≈ 4 characters.
func (*Session) OpenTab ¶ added in v0.4.0
func (s *Session) OpenTab(name, url string) (*PageResult, error)
OpenTab creates a new named tab and navigates to the URL. The new tab becomes the active tab.
func (*Session) Page ¶ added in v1.0.4
Page returns the underlying Page for advanced operations. The caller must not hold the session mutex.
func (*Session) ReactState ¶
ReactState extracts React component state/props from an element.
func (*Session) ReadableText ¶
ReadableText extracts just the main readable text content, stripping navigation, sidebars, and boilerplate. Uses a heuristic based on text density.
func (*Session) ReplayPlaybook ¶ added in v0.6.0
func (s *Session) ReplayPlaybook(pb *Playbook) (*PlaybookResult, error)
ReplayPlaybook executes a recorded playbook deterministically. Returns the result including any extracted data and where it failed (if applicable).
func (*Session) RightClick ¶ added in v0.3.0
func (s *Session) RightClick(selector string) (*PageResult, error)
RightClick right-clicks an element (opens context menu).
func (*Session) SaveProfile ¶
SaveProfile captures browser state and writes it to a JSON file. Convenience wrapper around CaptureProfile + file write.
func (*Session) Screenshot ¶
Screenshot captures the page as an image. Automatically compresses to fit within MaxScreenshotBytes (default 5MB).
func (*Session) ScrollAndCollect ¶ added in v1.0.0
func (s *Session) ScrollAndCollect(selector string, maxItems int) (*ExtractAllResult, error)
ScrollAndCollect auto-scrolls the page and collects items as they lazy-load. Scrolls until no new items appear or maxItems is reached.
func (*Session) ScrollBy ¶ added in v0.3.0
func (s *Session) ScrollBy(x, y int) (*PageResult, error)
ScrollBy scrolls the page by a pixel offset.
func (*Session) ScrollTo ¶ added in v0.3.0
func (s *Session) ScrollTo(selector string) (*PageResult, error)
ScrollTo scrolls to bring an element into view.
func (*Session) SelectByPrompt ¶ added in v1.2.0
func (s *Session) SelectByPrompt(prompt string) (*PromptSelectResult, error)
SelectByPrompt finds an element using natural language matching against visible text, aria-label, placeholder, title, and other accessibility attributes. Returns the best match with a confidence score and up to 3 candidates.
func (*Session) SelectOption ¶ added in v0.3.0
func (s *Session) SelectOption(selector, optionText string) (*ElementResult, error)
SelectOption selects an option from a <select> element by visible text.
func (*Session) SessionHistory ¶ added in v0.9.0
func (s *Session) SessionHistory(n int) []HistoryEntry
SessionHistory returns the last N actions performed in this session. Provides conversation-aware context so agents don't lose track of what they've done.
func (*Session) SetContentOptions ¶
func (s *Session) SetContentOptions(opts ContentOptions)
SetContentOptions configures content limits for the session.
func (*Session) Snapshot ¶
func (s *Session) Snapshot() (*PageResult, error)
Snapshot returns the current page state without performing any action.
func (*Session) StartRecordingPlaybook ¶ added in v0.6.0
StartRecordingPlaybook begins recording all agent actions into a playbook.
func (*Session) StartTrace ¶ added in v1.2.0
func (*Session) StopRecordingPlaybook ¶ added in v0.6.0
StopRecordingPlaybook stops recording and returns the playbook.
func (*Session) StopTrace ¶ added in v1.2.0
func (s *Session) StopTrace(path string) (*TraceResult, error)
func (*Session) SuggestSelectors ¶ added in v0.9.0
func (s *Session) SuggestSelectors(failedSelector string) ([]SelectorSuggestion, error)
SuggestSelectors finds elements similar to a failed selector. Called automatically when querySelector fails.
func (*Session) SwitchTab ¶ added in v0.4.0
func (s *Session) SwitchTab(name string) (*PageResult, error)
SwitchTab activates a named tab.
func (*Session) SwitchToFrame ¶ added in v1.2.0
func (s *Session) SwitchToFrame(selector string) (*PageResult, error)
SwitchToFrame switches the session's execution context to the iframe matching the given CSS selector. Subsequent Evaluate, Click, Type, etc. calls will operate inside the iframe until SwitchToMainFrame is called.
func (*Session) SwitchToMainFrame ¶ added in v1.2.0
func (s *Session) SwitchToMainFrame() (*PageResult, error)
SwitchToMainFrame resets the session back to the main frame context.
func (*Session) Type ¶
func (s *Session) Type(selector, text string) (*ElementResult, error)
Type types text into an input element.
func (*Session) UploadFile ¶ added in v1.0.0
UploadFile triggers a file upload on a file input element.
func (*Session) WaitForRouteChange ¶
func (s *Session) WaitForRouteChange(timeout time.Duration) (*PageResult, error)
WaitForRouteChange waits for a SPA client-side route change (pushState/replaceState/hashchange).
func (*Session) WaitForSPA ¶
WaitForSPA waits for SPA framework hydration/rendering to complete. Detects React, Vue, Angular, Svelte, Next.js, Nuxt, and generic content presence.
func (*Session) WebVitals ¶ added in v1.2.0
func (s *Session) WebVitals() (*WebVitalsResult, error)
type SessionConfig ¶
type SessionConfig struct {
Headless bool
Timeout time.Duration
UserAgent string
Viewport [2]int // [width, height], zero means default
AllowPrivateIPs bool // Allow navigation to private/loopback IPs
RemoteCDP string // WebSocket URL for remote Chrome (skips local launch)
Stealth bool // Apply anti-detection stealth patches to every new page
}
SessionConfig configures a new Session.
type TabInfo ¶ added in v0.4.0
type TabInfo struct {
Name string `json:"name"`
URL string `json:"url"`
Title string `json:"title"`
Active bool `json:"active"`
}
TabInfo describes an open tab.
type TableResult ¶
type TableResult struct {
Selector string `json:"selector"`
Headers []string `json:"headers"`
Rows [][]string `json:"rows"`
RowCount int `json:"row_count"`
ColCount int `json:"col_count"`
Truncated bool `json:"truncated,omitempty"`
}
TableResult is the structured response for table extraction.
type TraceEvent ¶ added in v1.2.0
type TraceEvent struct {
Index int `json:"index"`
Action string `json:"action"`
Selector string `json:"selector,omitempty"`
Value string `json:"value,omitempty"`
URL string `json:"url,omitempty"`
Timestamp int64 `json:"timestamp_ms"`
Duration int64 `json:"duration_ms"`
Error string `json:"error,omitempty"`
BeforeImg string `json:"before_screenshot,omitempty"`
AfterImg string `json:"after_screenshot,omitempty"`
}
TraceEvent records a single action during a trace session.
type TraceResult ¶ added in v1.2.0
type TraceResult struct {
Path string `json:"path"`
EventCount int `json:"event_count"`
Duration int64 `json:"total_duration_ms"`
Size int64 `json:"file_size_bytes"`
}
TraceResult is the structured response after stopping a trace and writing the zip.
type WebVitalsResult ¶ added in v1.2.0
type WebVitalsResult struct {
LCP float64 `json:"lcp_ms"`
CLS float64 `json:"cls"`
INP float64 `json:"inp_ms"`
TTFB float64 `json:"ttfb_ms"`
DOMContentLoaded float64 `json:"dom_content_loaded_ms"`
FirstPaint float64 `json:"first_paint_ms"`
LCPRating string `json:"lcp_rating"`
CLSRating string `json:"cls_rating"`
INPRating string `json:"inp_rating"`
OverallRating string `json:"overall_rating"`
}
WebVitalsResult holds Core Web Vitals and related performance metrics.