Documentation
¶
Index ¶
- Variables
- func GaussianSleep(ctx context.Context, mean, stddev, minDelay, maxDelay time.Duration) error
- func IsBlocked(err error) bool
- func NewAntiBotHTTPClient(timeout time.Duration, proxyPool *ProxyPool) (*http.Client, error)
- func NewHTTPClient(timeout time.Duration, proxyURL string) (*http.Client, error)
- func NewHTTPHandler(engine *Engine, cfg Config, metricsHandler http.Handler) http.Handler
- func RetryAfterSeconds(headers http.Header) int
- func SecCHUA(ua string) string
- func SecCHUAMobile(ua string) string
- func SecCHUAPlatform(ua string) string
- type APIError
- type AdaptiveRateLimiter
- type AntiBotConfig
- type BlockInfo
- type BlockSignal
- type BlockedError
- type BlockedEvent
- type BraveProvider
- type CapSolverSolver
- type ChainSolver
- type ChallengeSolution
- type ChallengeSolver
- type CircuitEvent
- type CircuitState
- type Client
- type Config
- type DDGProvider
- type Diagnostics
- type Engine
- type EngineOptions
- type ErrorKind
- type Event
- type EventHook
- type EventType
- type FlareSolverrSolver
- type Options
- type PrometheusCollector
- type Provider
- type ProviderError
- type ProxyEntry
- type ProxyPool
- func (p *ProxyPool) Len() int
- func (p *ProxyPool) MarkFailed(e *ProxyEntry)
- func (p *ProxyPool) MarkSuccess(e *ProxyEntry)
- func (p *ProxyPool) Next() *ProxyEntry
- func (p *ProxyPool) SetCooldown(cooldown time.Duration, maxConsecFails int)
- func (p *ProxyPool) SetWeight(proxyURL string, weight int)
- func (p *ProxyPool) Stats() []ProxyStats
- type ProxyStats
- type Result
- type RotationStrategy
- type SafeSearch
- type SearchError
- type SearchOptions
- type SearchRequest
- type SearchResponse
- type SerpAPIProvider
- type TavilyProvider
- type TwoCaptchaSolver
- type UAEntry
- type UserAgentPool
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var ( ErrNoVQD = errors.New("goddgs: vqd token not found") ErrNoResults = errors.New("goddgs: no results found") ErrUnexpectedBody = errors.New("goddgs: unexpected response body") ErrBlocked = errors.New("goddgs: response appears blocked") // ErrCircuitOpen is returned when the circuit breaker has tripped after too // many consecutive block responses. Wait for CircuitBreakerCooldown before retrying. ErrCircuitOpen = errors.New("goddgs: circuit breaker open — session is burned, retry later") )
Functions ¶
func GaussianSleep ¶
GaussianSleep sleeps for a duration drawn from N(mean, stddev²), clamped to [minDelay, maxDelay]. It respects context cancellation.
func NewAntiBotHTTPClient ¶
NewAntiBotHTTPClient creates an http.Client pre-configured with the full anti-bot stack: Chrome TLS fingerprint, browser-profile headers, UA rotation, and a persistent cookie jar. It is equivalent to building a Client with NewAntiBotConfig() but returns a raw http.Client for use outside of Client.
proxyPool is optional — pass nil for direct connections.
func NewHTTPClient ¶
NewHTTPClient creates a plain http.Client with an optional proxy.
func NewHTTPHandler ¶
NewHTTPHandler builds a production HTTP API handler. Endpoints: /healthz, /readyz, /v1/search and optional /metrics.
func RetryAfterSeconds ¶
RetryAfterSeconds parses the Retry-After response header and returns the suggested delay in seconds, or 0 if the header is absent or unparseable.
func SecCHUA ¶
SecCHUA derives the Sec-CH-UA header value for Chromium-based user-agents. Returns "" for non-Chromium browsers (Firefox, Safari).
func SecCHUAMobile ¶
SecCHUAMobile returns "?1" for mobile user-agents, "?0" otherwise.
func SecCHUAPlatform ¶
SecCHUAPlatform returns the Sec-CH-UA-Platform hint for the given user-agent.
Types ¶
type AdaptiveRateLimiter ¶
type AdaptiveRateLimiter struct {
// contains filtered or unexported fields
}
AdaptiveRateLimiter enforces a minimum gap between requests and automatically adjusts that gap in response to block/success signals.
On block: current = min(current × growFactor, maxDelay) On success: current = max(current × shrinkFactor, baseDelay)
Wait() blocks until the gap since the last request is satisfied, using Gaussian jitter around the current delay.
func (*AdaptiveRateLimiter) Current ¶
func (r *AdaptiveRateLimiter) Current() time.Duration
Current returns the current configured inter-request delay.
func (*AdaptiveRateLimiter) OnBlock ¶
func (r *AdaptiveRateLimiter) OnBlock()
OnBlock increases the inter-request delay (called when a block is detected).
func (*AdaptiveRateLimiter) OnSuccess ¶
func (r *AdaptiveRateLimiter) OnSuccess()
OnSuccess gradually relaxes the delay back toward base (called on clean responses).
type AntiBotConfig ¶
type AntiBotConfig struct {
// UARotation picks a fresh, market-share-weighted user-agent from a pool of
// 24 realistic browser UAs on every request. Also keeps Sec-CH-UA, platform,
// and mobile hints consistent with the chosen UA.
UARotation bool
// ChromeTLS uses the utls library to perform TLS handshakes with Chrome's
// exact ClientHello specification, making the JA3 / JA4 fingerprint
// aligned with real Chrome browser behavior.
ChromeTLS bool
// SessionWarmup performs a GET to the DDG homepage before the first search
// to acquire session cookies (__ddg1_, __ddg2_, etc.). Without these cookies
// some DDG search endpoints return degraded or blocked responses.
SessionWarmup bool
// WarmupTTL controls how long a warmed-up session is considered fresh.
// When the TTL expires, the next search re-warms the session.
// Default: 20 minutes.
WarmupTTL time.Duration
// ProxyPool is an optional proxy pool for IP rotation.
// When set, each request is routed through the next available proxy,
// with automatic failover and per-proxy health tracking.
ProxyPool *ProxyPool
// AdaptiveRateLimit enables a rate limiter that increases the inter-request
// delay when blocks are detected and gradually relaxes back to base on
// consecutive successes.
AdaptiveRateLimit bool
// AdaptiveBaseDelay is the minimum (and initial) inter-request delay.
// Default: 300 ms.
AdaptiveBaseDelay time.Duration
// AdaptiveMaxDelay is the ceiling for the adaptive delay.
// Default: 30 s.
AdaptiveMaxDelay time.Duration
// GaussianJitter adds normally-distributed randomness to request timing,
// making inter-request intervals statistically indistinguishable from
// human browsing. Stddev is AdaptiveBaseDelay × 0.25 by default.
GaussianJitter bool
// SessionInvalidateOnBlock resets the cookie jar and warmup state whenever
// a block signal is detected. This triggers a fresh session on the next
// request, which can help recover from session-based throttling.
SessionInvalidateOnBlock bool
// VQDInvalidateOnBlock clears the cached VQD token for the affected query
// when a block is detected, forcing a fresh token fetch on retry.
VQDInvalidateOnBlock bool
// ChallengeSolvers is an ordered list of CAPTCHA/challenge solvers invoked
// when a block signal is detected. The first solver that Supports() the
// signal and succeeds wins; subsequent solvers are not tried. Configure
// with the most reliable solver first (e.g. FlareSolverr, then 2captcha).
//
// Example: NewFlareSolverrSolver(""), NewTwoCaptchaSolver(key)
ChallengeSolvers []ChallengeSolver
// CircuitBreakerThreshold is the number of consecutive block responses
// that opens the circuit breaker, causing all further attempts to fail
// immediately with ErrCircuitOpen until the cooldown expires.
// Default: 5. Set to 0 to disable the circuit breaker.
CircuitBreakerThreshold int
// CircuitBreakerCooldown is how long the circuit stays open once tripped.
// Default: 60 s.
CircuitBreakerCooldown time.Duration
}
AntiBotConfig holds advanced browser-compatible transport/session options. Use NewAntiBotConfig() to obtain a fully-enabled default configuration.
All techniques are independent and can be toggled individually.
func NewAntiBotConfig ¶
func NewAntiBotConfig() *AntiBotConfig
NewAntiBotConfig returns an AntiBotConfig with all recommended techniques enabled and sensible defaults. This is the recommended starting point.
type BlockInfo ¶
type BlockInfo struct {
Signal BlockSignal
DetectorKey string // machine-readable sub-key identifying the specific signal
}
BlockInfo holds the result of a block-signal detection pass.
func DetectBlockSignal ¶
DetectBlockSignal inspects an HTTP response for bot-detection signals. It checks response headers first (fast path), then scans the body.
func (BlockInfo) IsDetected ¶
IsDetected reports whether any block signal was found.
type BlockSignal ¶
type BlockSignal int
BlockSignal categorises the type of bot-detection challenge detected in a response.
const ( BlockSignalNone BlockSignal = iota BlockSignalCloudflare // Cloudflare Bot Management / IUAM challenge BlockSignalReCAPTCHA // Google reCAPTCHA or hCaptcha BlockSignalAkamai // Akamai Bot Manager BlockSignalPerimeterX // PerimeterX / HUMAN Security BlockSignalDataDome // DataDome BlockSignalGeneric // Rate-limit or generic bot challenge )
func (BlockSignal) String ¶
func (s BlockSignal) String() string
type BlockedError ¶
type BlockedError struct {
Event BlockedEvent
}
func (*BlockedError) Error ¶
func (e *BlockedError) Error() string
func (*BlockedError) Unwrap ¶
func (e *BlockedError) Unwrap() error
type BlockedEvent ¶
type BraveProvider ¶
type BraveProvider struct {
// contains filtered or unexported fields
}
func NewBraveProvider ¶
func NewBraveProvider(apiKey string, hc *http.Client) *BraveProvider
func (*BraveProvider) Enabled ¶
func (p *BraveProvider) Enabled() bool
func (*BraveProvider) Name ¶
func (p *BraveProvider) Name() string
func (*BraveProvider) Search ¶
func (p *BraveProvider) Search(ctx context.Context, req SearchRequest) ([]Result, error)
type CapSolverSolver ¶
type CapSolverSolver struct {
// APIKey is the CapSolver account client key.
APIKey string
// BaseURL defaults to https://api.capsolver.com.
BaseURL string
// PollInterval is how long to wait between result polls. Default: 3s.
PollInterval time.Duration
// PollTimeout is the maximum total wait for a solution. Default: 120s.
PollTimeout time.Duration
// contains filtered or unexported fields
}
CapSolverSolver resolves CAPTCHA challenges via the capsolver.com API. Supports: reCAPTCHA v2/v3, hCaptcha, and Cloudflare Turnstile.
Get an API key at https://capsolver.com/.
func NewCapSolverSolver ¶
func NewCapSolverSolver(apiKey string) *CapSolverSolver
NewCapSolverSolver creates a solver backed by capsolver.com.
func (*CapSolverSolver) Solve ¶
func (c *CapSolverSolver) Solve(ctx context.Context, pageURL string, info BlockInfo, body []byte) (*ChallengeSolution, error)
func (*CapSolverSolver) Supports ¶
func (c *CapSolverSolver) Supports(signal BlockSignal) bool
type ChainSolver ¶
type ChainSolver struct {
Solvers []ChallengeSolver
}
ChainSolver tries a list of solvers in order and returns the first success. Configure it with the most reliable/fastest solver first.
func (*ChainSolver) Solve ¶
func (c *ChainSolver) Solve(ctx context.Context, pageURL string, info BlockInfo, body []byte) (*ChallengeSolution, error)
func (*ChainSolver) Supports ¶
func (c *ChainSolver) Supports(signal BlockSignal) bool
type ChallengeSolution ¶
type ChallengeSolution struct {
// Cookies are injected into the client's cookie jar.
// For Cloudflare this typically includes cf_clearance.
Cookies []*http.Cookie
// UserAgent must be used for all subsequent requests when non-empty.
// Cloudflare ties cf_clearance to the exact UA the challenge was solved with.
UserAgent string
// Token is a raw CAPTCHA response string (g-recaptcha-response,
// g-turnstile-response, etc.) for flows that need to POST it somewhere.
Token string
}
ChallengeSolution is the credential set returned by a solver after it successfully resolves a bot-detection challenge. Apply it to the session before retrying the blocked request.
type ChallengeSolver ¶
type ChallengeSolver interface {
// Supports reports whether this solver handles the given block signal.
Supports(signal BlockSignal) bool
// Solve attempts to resolve the challenge for pageURL.
// body is the blocked response body; it may be nil.
// Implementations must respect ctx cancellation.
Solve(ctx context.Context, pageURL string, info BlockInfo, body []byte) (*ChallengeSolution, error)
}
ChallengeSolver resolves a detected bot-detection challenge and returns credentials that allow the blocked session to continue.
type CircuitEvent ¶
type CircuitState ¶
type CircuitState string
const ( CircuitStateClosed CircuitState = "closed" CircuitStateOpen CircuitState = "open" )
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
func (*Client) SearchPages ¶
type Config ¶
type Config struct {
BraveAPIKey string
TavilyAPIKey string
SerpAPIKey string
DuckDuckGoBase string
LinksBase string
HTMLBase string
ProviderOrder []string
Timeout time.Duration
MaxRetries int
DisableHTMLFallback bool
}
func LoadConfigFromEnv ¶
func LoadConfigFromEnv() Config
type DDGProvider ¶
type DDGProvider struct {
// contains filtered or unexported fields
}
func NewDDGProvider ¶
func NewDDGProvider(client *Client) *DDGProvider
func (*DDGProvider) Enabled ¶
func (p *DDGProvider) Enabled() bool
func (*DDGProvider) Name ¶
func (p *DDGProvider) Name() string
func (*DDGProvider) Search ¶
func (p *DDGProvider) Search(ctx context.Context, req SearchRequest) ([]Result, error)
type Diagnostics ¶
type Diagnostics struct {
BlockInfo *BlockInfo
Attempts int
ProviderChain []string
Timings map[string]time.Duration
Errors []ProviderError
}
Diagnostics contains execution metadata for observability and debugging.
type Engine ¶
type Engine struct {
// contains filtered or unexported fields
}
func NewDefaultEngineFromConfig ¶
Example ¶
cfg := Config{ProviderOrder: []string{"ddg"}, Timeout: 5_000_000_000, MaxRetries: 2}
eng, err := NewDefaultEngineFromConfig(cfg)
if err != nil {
fmt.Println(err)
return
}
fmt.Println(len(eng.EnabledProviders()) > 0)
Output: true
func NewEngine ¶
func NewEngine(opts EngineOptions) (*Engine, error)
func (*Engine) EnabledProviders ¶
func (*Engine) Search ¶
func (e *Engine) Search(ctx context.Context, req SearchRequest) (SearchResponse, error)
type EngineOptions ¶
type Event ¶
type Event struct {
Type EventType
Provider string
Duration time.Duration
ErrKind ErrorKind
Success bool
Block *BlockInfo
QueryHash string
}
Event is emitted by the engine for hooks/metrics.
type FlareSolverrSolver ¶
type FlareSolverrSolver struct {
// Endpoint is the FlareSolverr API URL (default: http://localhost:8191/v1).
Endpoint string
// MaxTimeout is the maximum milliseconds FlareSolverr may spend solving.
// Default: 60 000 (60 s). Complex challenges may need more.
MaxTimeout int
// contains filtered or unexported fields
}
FlareSolverrSolver uses a running FlareSolverr instance to bypass Cloudflare Bot Management: IUAM ("Just a moment…"), Turnstile, and JS challenges.
FlareSolverr runs a real Chrome browser internally, so it handles any JS-based challenge Cloudflare issues. Run it with:
docker run -d -p 8191:8191 ghcr.io/flaresolverr/flaresolverr:latest
The cf_clearance cookie it returns is tied to the UserAgent it used, so the client MUST send that exact UA in all subsequent requests.
func NewFlareSolverrSolver ¶
func NewFlareSolverrSolver(endpoint string) *FlareSolverrSolver
NewFlareSolverrSolver creates a solver for a FlareSolverr instance. endpoint defaults to http://localhost:8191/v1 when empty.
func (*FlareSolverrSolver) Solve ¶
func (f *FlareSolverrSolver) Solve(ctx context.Context, pageURL string, _ BlockInfo, _ []byte) (*ChallengeSolution, error)
func (*FlareSolverrSolver) Supports ¶
func (f *FlareSolverrSolver) Supports(signal BlockSignal) bool
type Options ¶
type Options struct {
HTTPClient *http.Client
DuckDuckGoBase string
LinksBase string
HTMLBase string
UserAgent string
Referer string
Headers map[string]string
RequestTimeout time.Duration
RetryMax int
RetryBaseDelay time.Duration
RetryJitterFrac float64
MinRequestInterval time.Duration
VQDTTL time.Duration
DisableHTMLFallback bool
BlockedStatusCodes map[int]struct{}
BlockedBodyPatterns []*regexp.Regexp
OnBlocked func(BlockedEvent)
OnCircuit func(CircuitEvent)
// AntiBot enables advanced browser-compatible transport/session behavior.
// Use NewAntiBotConfig() for
// recommended defaults, or nil to keep the original behaviour.
// When set and Options.HTTPClient is nil, the client is built with Chrome TLS
// fingerprinting, browser-profile headers, and the configured cookie jar.
AntiBot *AntiBotConfig
}
type PrometheusCollector ¶
type PrometheusCollector struct {
// contains filtered or unexported fields
}
func NewPrometheusCollector ¶
func NewPrometheusCollector(reg prometheus.Registerer) *PrometheusCollector
func (*PrometheusCollector) Hook ¶
func (c *PrometheusCollector) Hook(ev Event)
func (*PrometheusCollector) ObserveCircuitEvent ¶
func (c *PrometheusCollector) ObserveCircuitEvent(provider string, ev CircuitEvent)
ObserveCircuitEvent records low-level client breaker transitions.
func (*PrometheusCollector) SetProviderEnabled ¶
func (c *PrometheusCollector) SetProviderEnabled(provider string, enabled bool)
type Provider ¶
type Provider interface {
Name() string
Enabled() bool
Search(ctx context.Context, req SearchRequest) ([]Result, error)
}
Provider is a search backend that can be chained by Engine.
type ProviderError ¶
type ProviderError struct {
Provider string `json:"provider"`
Kind ErrorKind `json:"kind"`
Message string `json:"message"`
Details map[string]string `json:"details,omitempty"`
}
ProviderError captures one provider failure in the chain.
type ProxyEntry ¶
type ProxyEntry struct {
URL string
Weight int // relative weight for RotateWeighted (default 1)
// contains filtered or unexported fields
}
ProxyEntry is a single proxy URL with health-tracking state.
type ProxyPool ¶
type ProxyPool struct {
// contains filtered or unexported fields
}
ProxyPool manages a set of HTTP/HTTPS/SOCKS5 proxies with automatic health tracking and configurable rotation strategy.
func NewProxyPool ¶
func NewProxyPool(proxyURLs []string, strategy RotationStrategy) (*ProxyPool, error)
NewProxyPool creates a pool from a list of proxy URLs. Each URL must be in the form "http://host:port", "https://...", or "socks5://...". Returns an error if any URL is unparseable.
func (*ProxyPool) MarkFailed ¶
func (p *ProxyPool) MarkFailed(e *ProxyEntry)
MarkFailed records a failed request through entry and may put it in cooldown.
func (*ProxyPool) MarkSuccess ¶
func (p *ProxyPool) MarkSuccess(e *ProxyEntry)
MarkSuccess records a successful request through entry.
func (*ProxyPool) Next ¶
func (p *ProxyPool) Next() *ProxyEntry
Next returns the next available proxy according to the rotation strategy. Returns nil if all proxies are currently in cooldown.
func (*ProxyPool) SetCooldown ¶
SetCooldown adjusts the health-failure policy.
func (*ProxyPool) SetWeight ¶
SetWeight sets the selection weight of a proxy by URL (used for RotateWeighted).
func (*ProxyPool) Stats ¶
func (p *ProxyPool) Stats() []ProxyStats
Stats returns an immutable snapshot of every proxy's health metrics.
type ProxyStats ¶
type ProxyStats struct {
URL string
Requests int64
Failures int64
ConsecutiveFails int
InCooldown bool
}
ProxyStats is an immutable snapshot of a ProxyEntry's metrics.
type RotationStrategy ¶
type RotationStrategy int
RotationStrategy controls how ProxyPool selects the next proxy.
const ( RotateRoundRobin RotationStrategy = iota // cycle through proxies in order RotateRandom // uniform random selection RotateWeighted // weighted random based on Weight field )
type SearchError ¶
type SearchError struct {
Kind ErrorKind
Provider string
Temporary bool
Cause error
Details map[string]string
}
SearchError is a structured error returned by providers/engine.
func (*SearchError) Error ¶
func (e *SearchError) Error() string
func (*SearchError) Unwrap ¶
func (e *SearchError) Unwrap() error
type SearchOptions ¶
type SearchOptions struct {
MaxResults int
Region string
SafeSearch SafeSearch
TimeRange string
Offset int
}
type SearchRequest ¶
type SearchRequest struct {
Query string
MaxResults int
Region string
SafeSearch SafeSearch
TimeRange string
Offset int
}
SearchRequest is the provider-agnostic input contract.
type SearchResponse ¶
type SearchResponse struct {
Results []Result
Provider string
FallbackUsed bool
Diagnostics Diagnostics
}
SearchResponse is the provider-agnostic output contract.
type SerpAPIProvider ¶
type SerpAPIProvider struct {
// contains filtered or unexported fields
}
func NewSerpAPIProvider ¶
func NewSerpAPIProvider(apiKey string, hc *http.Client) *SerpAPIProvider
func (*SerpAPIProvider) Enabled ¶
func (p *SerpAPIProvider) Enabled() bool
func (*SerpAPIProvider) Name ¶
func (p *SerpAPIProvider) Name() string
func (*SerpAPIProvider) Search ¶
func (p *SerpAPIProvider) Search(ctx context.Context, req SearchRequest) ([]Result, error)
type TavilyProvider ¶
type TavilyProvider struct {
// contains filtered or unexported fields
}
func NewTavilyProvider ¶
func NewTavilyProvider(apiKey string, hc *http.Client) *TavilyProvider
func (*TavilyProvider) Enabled ¶
func (p *TavilyProvider) Enabled() bool
func (*TavilyProvider) Name ¶
func (p *TavilyProvider) Name() string
func (*TavilyProvider) Search ¶
func (p *TavilyProvider) Search(ctx context.Context, req SearchRequest) ([]Result, error)
type TwoCaptchaSolver ¶
type TwoCaptchaSolver struct {
// APIKey is the 2captcha account API key.
APIKey string
// BaseURL defaults to https://2captcha.com. Override for self-hosted
// or compatible (anti-captcha, etc.) endpoints.
BaseURL string
// PollInterval is how long to wait between result polls. Default: 5s.
PollInterval time.Duration
// PollTimeout is the maximum total wait for a solution. Default: 120s.
PollTimeout time.Duration
// contains filtered or unexported fields
}
TwoCaptchaSolver resolves CAPTCHA challenges via the 2captcha.com API. Supports: reCAPTCHA v2/v3, hCaptcha, and Cloudflare Turnstile.
Get an API key at https://2captcha.com/. Balance is checked per-solve; the caller receives an error if the balance is insufficient.
func NewTwoCaptchaSolver ¶
func NewTwoCaptchaSolver(apiKey string) *TwoCaptchaSolver
NewTwoCaptchaSolver creates a solver backed by 2captcha.com.
func (*TwoCaptchaSolver) Solve ¶
func (t *TwoCaptchaSolver) Solve(ctx context.Context, pageURL string, info BlockInfo, body []byte) (*ChallengeSolution, error)
func (*TwoCaptchaSolver) Supports ¶
func (t *TwoCaptchaSolver) Supports(signal BlockSignal) bool
type UserAgentPool ¶
type UserAgentPool struct {
// contains filtered or unexported fields
}
UserAgentPool provides weighted random selection of realistic browser user-agents. Weights approximate global browser market share.
func NewUserAgentPool ¶
func NewUserAgentPool() *UserAgentPool
NewUserAgentPool returns a pool pre-populated with realistic browser UAs weighted by approximate global market share (Chrome ~65%, Safari ~19%, Firefox ~4%, Edge ~4%).
func (*UserAgentPool) Pick ¶
func (p *UserAgentPool) Pick() UAEntry
Pick returns a random UAEntry, weighted by browser market share.
func (*UserAgentPool) PickUA ¶
func (p *UserAgentPool) PickUA() string
PickUA returns just the user-agent string.