Documentation
¶
Overview ¶
Package browser provides headless Chrome integration via Rod.
Index ¶
- func ExtractAJAXURLsFromJS(jsContent string) []string
- func IsNonRoutingHash(hash string) bool
- func NormalizeHashURL(rawURL string) string
- type AJAXEndpoint
- type AJAXForm
- type AJAXHandler
- func (h *AJAXHandler) AnalyzeAJAX(page *rod.Page) *AJAXResult
- func (h *AJAXHandler) ExtractAJAXEndpoints(page *rod.Page) []AJAXEndpoint
- func (h *AJAXHandler) ExtractAJAXForms(page *rod.Page) []AJAXForm
- func (h *AJAXHandler) GetCapturedRequests(page *rod.Page) []NetworkRequest
- func (h *AJAXHandler) InjectAJAXInterceptor(page *rod.Page) error
- func (h *AJAXHandler) MonitorDynamicContent(page *rod.Page, duration time.Duration) []string
- func (h *AJAXHandler) TriggerAJAXEvents(page *rod.Page) error
- func (h *AJAXHandler) WaitForAJAX(page *rod.Page, timeout time.Duration) error
- type AJAXResult
- type Browser
- func (b *Browser) Close() error
- func (b *Browser) GetConfig() Config
- func (b *Browser) NeedsRecycle() bool
- func (b *Browser) PageCount() int
- func (b *Browser) Visit(ctx context.Context, url string, headers map[string]string, ...) (*PageResult, error)
- func (b *Browser) VisitHashRoute(ctx context.Context, baseURL string, hashRoute string, ...) (*PageResult, error)
- func (b *Browser) VisitHashRouteWithOptions(ctx context.Context, baseURL string, hashRoute string, ...) (*PageResult, error)
- func (b *Browser) VisitWithOptions(ctx context.Context, url string, headers map[string]string, ...) (*PageResult, error)
- type Config
- type FormData
- type InputData
- type Interceptor
- func (i *Interceptor) AddFilter(filter RequestFilter)
- func (i *Interceptor) Clear()
- func (i *Interceptor) GetAPIRequests() []NetworkRequest
- func (i *Interceptor) GetRequests() []NetworkRequest
- func (i *Interceptor) GetTimeline() *RequestTimeline
- func (i *Interceptor) GroupByEndpoint() []RequestGroup
- func (i *Interceptor) Record(req NetworkRequest)
- func (i *Interceptor) Stats() InterceptorStats
- type InterceptorStats
- type NetworkRequest
- type PageResult
- type Pool
- func (p *Pool) Acquire(ctx context.Context) (*Browser, error)
- func (p *Pool) Close() error
- func (p *Pool) Release(browser *Browser)
- func (p *Pool) Size() int
- func (p *Pool) Stats() PoolStats
- func (p *Pool) Visit(ctx context.Context, url string, headers map[string]string, ...) (*PageResult, error)
- func (p *Pool) VisitHashRoute(ctx context.Context, baseURL string, hashRoute string, ...) (*PageResult, error)
- func (p *Pool) VisitHashRouteWithOptions(ctx context.Context, baseURL string, hashRoute string, ...) (*PageResult, error)
- func (p *Pool) VisitWithOptions(ctx context.Context, url string, headers map[string]string, ...) (*PageResult, error)
- type PoolStats
- type RequestFilter
- type RequestGroup
- type RequestTimeline
- type SPAConfig
- type SPAHandler
- func (h *SPAHandler) ApplyStealthMode(page *rod.Page) error
- func (h *SPAHandler) DetectMicroFrontends(page *rod.Page) ([]string, error)
- func (h *SPAHandler) ExtractShadowDOMContent(page *rod.Page) ([]string, error)
- func (h *SPAHandler) GetContentHash(page *rod.Page) (string, error)
- func (h *SPAHandler) HandleAuthRedirect(page *rod.Page) (bool, string)
- func (h *SPAHandler) HandleInfiniteScroll(page *rod.Page, maxScrolls int, scrollDelay time.Duration) ([]string, error)
- func (h *SPAHandler) InjectNetworkMonitor(page *rod.Page) error
- func (h *SPAHandler) IsSoftError(page *rod.Page) (bool, string)
- func (h *SPAHandler) RecoverFromHang(page *rod.Page) error
- func (h *SPAHandler) SetupPageErrorHandling(page *rod.Page)
- func (h *SPAHandler) WaitForContent(page *rod.Page) error
- type VisitOptions
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ExtractAJAXURLsFromJS ¶
ExtractAJAXURLsFromJS extracts AJAX URLs from JavaScript content.
func IsNonRoutingHash ¶
IsNonRoutingHash checks if a hash change is likely non-routing (UI state).
func NormalizeHashURL ¶
NormalizeHashURL normalizes a hash-based URL for deduplication.
Types ¶
type AJAXEndpoint ¶
type AJAXEndpoint struct {
URL string
Method string
ContentType string
Parameters []string
Source string // click, scroll, form, script, etc.
Trigger string // Element or event that triggered the request
}
AJAXEndpoint represents a discovered AJAX endpoint.
type AJAXForm ¶
type AJAXForm struct {
FormID string
FormName string
Action string
Method string
SubmitType string // jquery, fetch, xhr, axios
Inputs []InputData
CallbackURL string
}
AJAXForm represents a form that submits via AJAX.
type AJAXHandler ¶
type AJAXHandler struct {
// contains filtered or unexported fields
}
AJAXHandler provides comprehensive AJAX handling for web pages.
func (*AJAXHandler) AnalyzeAJAX ¶
func (h *AJAXHandler) AnalyzeAJAX(page *rod.Page) *AJAXResult
AnalyzeAJAX performs comprehensive AJAX analysis on a page.
func (*AJAXHandler) ExtractAJAXEndpoints ¶
func (h *AJAXHandler) ExtractAJAXEndpoints(page *rod.Page) []AJAXEndpoint
ExtractAJAXEndpoints extracts AJAX endpoints from JavaScript code.
func (*AJAXHandler) ExtractAJAXForms ¶
func (h *AJAXHandler) ExtractAJAXForms(page *rod.Page) []AJAXForm
ExtractAJAXForms finds forms that submit via AJAX.
func (*AJAXHandler) GetCapturedRequests ¶
func (h *AJAXHandler) GetCapturedRequests(page *rod.Page) []NetworkRequest
GetCapturedRequests retrieves all captured AJAX requests from the page.
func (*AJAXHandler) InjectAJAXInterceptor ¶
func (h *AJAXHandler) InjectAJAXInterceptor(page *rod.Page) error
InjectAJAXInterceptor injects JavaScript to intercept all AJAX calls.
func (*AJAXHandler) MonitorDynamicContent ¶
MonitorDynamicContent monitors for dynamically loaded content.
func (*AJAXHandler) TriggerAJAXEvents ¶
func (h *AJAXHandler) TriggerAJAXEvents(page *rod.Page) error
TriggerAJAXEvents triggers common events that might cause AJAX requests.
func (*AJAXHandler) WaitForAJAX ¶
WaitForAJAX waits for pending AJAX requests to complete.
type AJAXResult ¶
type AJAXResult struct {
Requests []NetworkRequest
AJAXEndpoints []AJAXEndpoint
AJAXForms []AJAXForm
DynamicContent []string
}
AJAXResult contains results from AJAX analysis.
type Browser ¶
type Browser struct {
// contains filtered or unexported fields
}
Browser wraps a Rod browser instance.
func (*Browser) NeedsRecycle ¶
NeedsRecycle checks if the browser needs recycling.
func (*Browser) Visit ¶
func (b *Browser) Visit(ctx context.Context, url string, headers map[string]string, cookies []*http.Cookie) (*PageResult, error)
Visit navigates to a URL and extracts page data.
func (*Browser) VisitHashRoute ¶
func (b *Browser) VisitHashRoute(ctx context.Context, baseURL string, hashRoute string, headers map[string]string, cookies []*http.Cookie) (*PageResult, error)
VisitHashRoute navigates to a hash-based route within an existing page. This is used for SPA navigation where the hash change doesn't reload the page.
func (*Browser) VisitHashRouteWithOptions ¶
func (b *Browser) VisitHashRouteWithOptions(ctx context.Context, baseURL string, hashRoute string, headers map[string]string, cookies []*http.Cookie, opts VisitOptions) (*PageResult, error)
VisitHashRouteWithOptions navigates to a hash-based route with options.
func (*Browser) VisitWithOptions ¶
func (b *Browser) VisitWithOptions(ctx context.Context, url string, headers map[string]string, cookies []*http.Cookie, opts VisitOptions) (*PageResult, error)
VisitWithOptions navigates to a URL with custom options.
type Config ¶
type Config struct {
PoolSize int `json:"pool_size"`
Headless bool `json:"headless"`
Timeout time.Duration `json:"timeout"`
UserAgent string `json:"user_agent"`
ViewportWidth int `json:"viewport_width"`
ViewportHeight int `json:"viewport_height"`
RecycleAfter int `json:"recycle_after"`
IgnoreHTTPSErrors bool `json:"ignore_https_errors"`
FastMode bool `json:"fast_mode"` // Skip heavy analysis for speed
}
Config defines browser configuration.
func DefaultConfig ¶
func DefaultConfig() Config
DefaultConfig returns default browser configuration.
type FormData ¶
type FormData struct {
Action string
Method string
Enctype string
ID string
Name string
Inputs []InputData
}
FormData represents form data extracted from a page.
type InputData ¶
type InputData struct {
Name string
Type string
Value string
Required bool
Placeholder string
Pattern string
MaxLength int
MinLength int
}
InputData represents form input data.
type Interceptor ¶
type Interceptor struct {
// contains filtered or unexported fields
}
Interceptor captures network requests during page loads.
func NewInterceptor ¶
func NewInterceptor() *Interceptor
NewInterceptor creates a new request interceptor.
func (*Interceptor) AddFilter ¶
func (i *Interceptor) AddFilter(filter RequestFilter)
AddFilter adds a filter for capturing specific requests.
func (*Interceptor) GetAPIRequests ¶
func (i *Interceptor) GetAPIRequests() []NetworkRequest
GetAPIRequests returns requests that look like API calls.
func (*Interceptor) GetRequests ¶
func (i *Interceptor) GetRequests() []NetworkRequest
GetRequests returns all captured requests.
func (*Interceptor) GetTimeline ¶
func (i *Interceptor) GetTimeline() *RequestTimeline
GetTimeline returns requests grouped by time.
func (*Interceptor) GroupByEndpoint ¶
func (i *Interceptor) GroupByEndpoint() []RequestGroup
GroupByEndpoint groups requests by their endpoint.
func (*Interceptor) Record ¶
func (i *Interceptor) Record(req NetworkRequest)
Record records a network request.
func (*Interceptor) Stats ¶
func (i *Interceptor) Stats() InterceptorStats
Stats returns interceptor statistics.
type InterceptorStats ¶
type InterceptorStats struct {
TotalRequests int `json:"total_requests"`
APIRequests int `json:"api_requests"`
ByType map[string]int `json:"by_type"`
ByMethod map[string]int `json:"by_method"`
}
Stats returns interceptor statistics.
type NetworkRequest ¶
type NetworkRequest struct {
URL string
Method string
Headers map[string]string
PostData string
ResourceType string
Timestamp time.Time
}
NetworkRequest represents an intercepted network request.
type PageResult ¶
type PageResult struct {
URL string
FinalURL string
StatusCode int
ContentType string
HTML string
Title string
Links []string
Scripts []string
Forms []FormData
XHRRequests []NetworkRequest
WebSockets []string
Cookies []*http.Cookie
ResponseTime time.Duration
Error error
Framework *framework.DetectionResult
FrameworkRoutes []framework.Route
FrameworkLinks []framework.Link
// AJAX-specific results
AJAXEndpoints []AJAXEndpoint
AJAXForms []AJAXForm
DynamicContent []string
// SPA-specific results
ContentHash string // Hash of page content for dedup
IsSoftError bool // True if page shows error content with 200 status
SoftErrorMsg string // Error message if soft error detected
ShadowDOMLinks []string // Links extracted from Shadow DOM
IsAuthPage bool // True if redirected to auth page
AuthURL string // URL of auth page if redirected
}
PageResult contains the result of a page visit.
type Pool ¶
type Pool struct {
// contains filtered or unexported fields
}
Pool manages a pool of browser instances.
func (*Pool) Visit ¶
func (p *Pool) Visit(ctx context.Context, url string, headers map[string]string, cookies []*http.Cookie) (*PageResult, error)
Visit acquires a browser, visits the URL, and releases it.
func (*Pool) VisitHashRoute ¶
func (p *Pool) VisitHashRoute(ctx context.Context, baseURL string, hashRoute string, headers map[string]string, cookies []*http.Cookie) (*PageResult, error)
VisitHashRoute visits a hash-based route within an SPA.
func (*Pool) VisitHashRouteWithOptions ¶
func (p *Pool) VisitHashRouteWithOptions(ctx context.Context, baseURL string, hashRoute string, headers map[string]string, cookies []*http.Cookie, opts VisitOptions) (*PageResult, error)
VisitHashRouteWithOptions visits a hash-based route with options.
func (*Pool) VisitWithOptions ¶
func (p *Pool) VisitWithOptions(ctx context.Context, url string, headers map[string]string, cookies []*http.Cookie, opts VisitOptions) (*PageResult, error)
VisitWithOptions acquires a browser, visits the URL with options, and releases it.
type PoolStats ¶
type PoolStats struct {
Size int `json:"size"`
Available int `json:"available"`
TotalPages int `json:"total_pages"`
}
Stats returns pool statistics.
type RequestFilter ¶
RequestFilter defines a filter for network requests.
type RequestGroup ¶
type RequestGroup struct {
Endpoint string
Method string
Requests []NetworkRequest
Parameters map[string][]string
}
RequestGroup groups requests by endpoint.
type RequestTimeline ¶
type RequestTimeline struct {
Start time.Time
End time.Time
Requests []NetworkRequest
}
RequestTimeline represents requests over time.
type SPAConfig ¶
type SPAConfig struct {
// Maximum wait time for content to load
MaxWaitTime time.Duration
// Minimum content length to consider page loaded
MinContentLength int
// Selectors that indicate loading state
LoadingSelectors []string
// Selectors that indicate content is ready
ReadySelectors []string
// Enable content hash deduplication
EnableContentDedup bool
// Maximum retries for rendering
MaxRetries int
// Enable stealth mode (anti-detection)
StealthMode bool
}
SPAConfig contains SPA handling configuration.
func DefaultSPAConfig ¶
func DefaultSPAConfig() SPAConfig
DefaultSPAConfig returns sensible defaults for SPA handling.
type SPAHandler ¶
type SPAHandler struct {
// contains filtered or unexported fields
}
SPAHandler handles Single Page Application specific challenges.
func NewSPAHandler ¶
func NewSPAHandler(config SPAConfig) *SPAHandler
NewSPAHandler creates a new SPA handler.
func (*SPAHandler) ApplyStealthMode ¶
func (h *SPAHandler) ApplyStealthMode(page *rod.Page) error
ApplyStealthMode applies anti-detection measures.
func (*SPAHandler) DetectMicroFrontends ¶
func (h *SPAHandler) DetectMicroFrontends(page *rod.Page) ([]string, error)
DetectMicroFrontends detects multiple Angular/SPA instances.
func (*SPAHandler) ExtractShadowDOMContent ¶
func (h *SPAHandler) ExtractShadowDOMContent(page *rod.Page) ([]string, error)
ExtractShadowDOMContent extracts content from Shadow DOM elements.
func (*SPAHandler) GetContentHash ¶
func (h *SPAHandler) GetContentHash(page *rod.Page) (string, error)
GetContentHash returns a hash of the page's meaningful content.
func (*SPAHandler) HandleAuthRedirect ¶
func (h *SPAHandler) HandleAuthRedirect(page *rod.Page) (bool, string)
HandleAuthRedirect detects and handles auth redirects.
func (*SPAHandler) HandleInfiniteScroll ¶
func (h *SPAHandler) HandleInfiniteScroll(page *rod.Page, maxScrolls int, scrollDelay time.Duration) ([]string, error)
HandleInfiniteScroll handles infinite scroll pages with limits.
func (*SPAHandler) InjectNetworkMonitor ¶
func (h *SPAHandler) InjectNetworkMonitor(page *rod.Page) error
InjectNetworkMonitor injects a network request monitor.
func (*SPAHandler) IsSoftError ¶
func (h *SPAHandler) IsSoftError(page *rod.Page) (bool, string)
IsSoftError checks if the page shows a soft error (404 page with 200 status).
func (*SPAHandler) RecoverFromHang ¶
func (h *SPAHandler) RecoverFromHang(page *rod.Page) error
RecoverFromHang attempts to recover from a stuck page.
func (*SPAHandler) SetupPageErrorHandling ¶
func (h *SPAHandler) SetupPageErrorHandling(page *rod.Page)
SetupPageErrorHandling sets up error handlers for the page.
func (*SPAHandler) WaitForContent ¶
func (h *SPAHandler) WaitForContent(page *rod.Page) error
WaitForContent waits for SPA content to be fully loaded.
type VisitOptions ¶
type VisitOptions struct {
FastMode bool // Skip SPA framework detection and AJAX analysis
SPAMode bool // Enable advanced SPA handling (content wait, stealth, etc.)
MaxWaitTime time.Duration // Maximum wait time for SPA content
EnableStealth bool // Enable anti-detection measures
CheckSoftError bool // Check for soft 404 errors
}
VisitOptions contains options for a single page visit.