Documentation
¶
Index ¶
- Constants
- Variables
- func AddNetworkBytes(ctx context.Context, n int64)
- func AppendEnrichedSearchResult(env *Envelope, raw SearchResult, ctx EnrichContext, extractedAt time.Time)
- func BrowserProfileIDsFromContext(ctx context.Context) []string
- func BuildAcceptLanguageHeader(langCode string) string
- func BuildCacheKey(engine string, action string, q Query) string
- func CORSMiddleware(cfg CORSConfig) fiber.Handler
- func CaptchaSolverMetrics() map[string]uint64
- func ClassifySearchHTTPStatus(status int) error
- func ClosePageWithTimeout(ctx context.Context, page *rod.Page, timeout time.Duration) error
- func ClosestMatching(el *rod.Element, selector string, maxHops int) *rod.Element
- func ComputePagination(start int, pageSize int) (int, int, error)
- func ConvertSearchResultsMap(searchResultsMap map[string]SearchResult) *[]SearchResult
- func CountOrganicResults(results []SearchResult) int
- func CountryFromRegion(hint string) string
- func DeferClosePage(ctx context.Context, page *rod.Page, browser *Browser) func()
- func DrainAndCloseResponse(resp *http.Response)
- func EnsureContext(ctx context.Context) context.Context
- func FirstNonEmptyAttribute(root *rod.Element, attr string, selectors ...string) string
- func FirstNonEmptyText(root *rod.Element, selectors ...string) string
- func HasAnySelector(page *rod.Page, selectors []string) bool
- func HasAttribute(el *rod.Element, attr string) bool
- func InitLogger(isVerbose, isDebug bool, format string)
- func IsAuthenticatedSocksProxyURL(raw string) bool
- func IsContextDone(err error) bool
- func IsProxyNetworkError(err error) bool
- func IsRodObjectNotFound(err error) bool
- func JSONErrorMiddleware() fiber.ErrorHandler
- func MaskProxyURL(raw string) string
- func MustParseBlockedResourceTypes(raw string) []proto.NetworkResourceType
- func NetworkBytesFromContext(ctx context.Context) int64
- func NewRawHTTPClient(query Query) (*http.Client, error)
- func NormalizeLogFormat(raw string) (string, error)
- func NormalizeProxyRequestOverride(raw string) (string, error)
- func NormalizeProxyTag(raw string) (string, error)
- func NormalizeProxyURL(raw string) (string, error)
- func NormalizeProxyURLs(rawURLs []string) ([]string, error)
- func NormalizeURLForClustering(rawURL string) string
- func OrganicLimitReached(results []SearchResult, limit int) bool
- func ParseBlockedResourceTypes(raw string) ([]proto.NetworkResourceType, error)
- func PrepareEngineContext(ctx context.Context, query Query, engineName string, ...) context.Context
- func PrimaryLanguageTag(langCode string) string
- func QueryHash(raw string) string
- func QueryHashFromQuery(q Query) string
- func RawSearchRequest(ctx context.Context, searchURL string, query Query) (*http.Response, error)
- func ReadRawSearchBody(resp *http.Response) ([]byte, error)
- func RecoverEnginePanic(engine string, recovered interface{}, logger *EngineLogger) error
- func RecoverEnginePanicWithContext(ctx context.Context, engine string, recovered interface{}, ...) error
- func RenderMarkdown(env *Envelope) []byte
- func RenderMarkdownImage(env *ImageEnvelope) []byte
- func RenderNDJSON(env *Envelope) []byte
- func RenderNDJSONImage(env *ImageEnvelope) []byte
- func RenderText(env *Envelope) []byte
- func RenderTextImage(env *ImageEnvelope) []byte
- func RequestContextMiddleware() fiber.Handler
- func RequestIDFromContext(ctx context.Context) string
- func RequestLoggerMiddleware() fiber.Handler
- func ResultID(engine, rawURL string) string
- func SetAcceptLanguageHeader(req *http.Request, langCode string)
- func SetBrowserProfileID(ctx context.Context, profileID string)
- func ShouldBypassCacheForProxyMarket(q Query) bool
- func SleepContext(ctx context.Context, d time.Duration) error
- func TenantFromContext(ctx context.Context) string
- func TimezoneForLocale(loc Locale) string
- func WaitForElements(ctx context.Context, page *rod.Page, selectors []string, timeout time.Duration) (rod.Elements, string, error)
- func WithBrowserProfileUsage(ctx context.Context) context.Context
- func WithEngine(ctx context.Context, engine string) context.Context
- func WithForcedProfileID(ctx context.Context, profileID string) context.Context
- func WithMinimalBrowserProfile(ctx context.Context) context.Context
- func WithNetworkUsage(ctx context.Context) context.Context
- func WithProfileRegion(ctx context.Context, region string) context.Context
- func WithProxyLaneKey(ctx context.Context, key ProxyLaneKey) context.Context
- func WithQueryHash(ctx context.Context, queryHash string) context.Context
- func WithRequest(ctx context.Context) *logrus.Entry
- func WithRequestEngine(ctx context.Context, engine string) *logrus.Entry
- func WithRequestID(ctx context.Context, requestID string) context.Context
- func WithRequestProxyURL(ctx context.Context, proxyURL string) context.Context
- func WithTenant(ctx context.Context, tenant string) context.Context
- type APIError
- type Browser
- type BrowserOpts
- type BrowserPoolStats
- type CORSConfig
- type CacheEntry
- type CaptchaSolver
- type CircuitBreaker
- func (cb *CircuitBreaker) AllowRequest(ctx context.Context) bool
- func (cb *CircuitBreaker) AvgSuccessLatency() (time.Duration, bool)
- func (cb *CircuitBreaker) RecordFailure(ctx context.Context)
- func (cb *CircuitBreaker) RecordSuccess(ctx context.Context)
- func (cb *CircuitBreaker) RecordSuccessDuration(ctx context.Context, elapsed time.Duration)
- func (cb *CircuitBreaker) State() CircuitState
- func (cb *CircuitBreaker) Stats() map[string]interface{}
- type CircuitBreakerConfig
- type CircuitBreakerManager
- type CircuitState
- type Classification
- type Cluster
- type ClusterOccurrence
- type DomainInfo
- type EngineErrorDetail
- type EngineHealth
- type EngineLogger
- func (el *EngineLogger) Debug(message string, args ...any)
- func (el *EngineLogger) Error(message string, args ...any)
- func (el *EngineLogger) Fatal(message string, args ...any)
- func (el *EngineLogger) Fields(fields logrus.Fields) *EngineLogger
- func (el *EngineLogger) Info(message string, args ...any)
- func (el *EngineLogger) Panic(message string, args ...any)
- func (el *EngineLogger) Warn(message string, args ...any)
- func (el *EngineLogger) WithRequest(ctx context.Context) *EngineLogger
- type EnrichContext
- type Envelope
- type FeatureItem
- type FeatureLink
- type HTMLParser
- type HealthStatus
- type ImageData
- type ImageEnvelope
- type ImageResult
- type ImageSource
- type JSONErrorResponse
- type LaneStats
- type LaneStore
- func (s *LaneStore) Cookies(key ProxyLaneKey) []*proto.NetworkCookie
- func (s *LaneStore) DropCookies(key ProxyLaneKey)
- func (s *LaneStore) Profile(key ProxyLaneKey, create func() browserprofile.Profile) browserprofile.Profile
- func (s *LaneStore) SaveCookies(key ProxyLaneKey, cookies []*proto.NetworkCookie)
- func (s *LaneStore) Stats() LaneStats
- type Locale
- type MegaSearchResult
- type Pagination
- type Position
- type ProxiesConfig
- type ProxiesHealthConfig
- type ProxyConfig
- type ProxyEngineStats
- type ProxyEntryConfig
- type ProxyExecutionMeta
- type ProxyLaneKey
- type ProxyLanesConfig
- type ProxyPolicy
- type ProxyRegistry
- func (r *ProxyRegistry) BuildStats() ProxyStats
- func (r *ProxyRegistry) HasHealthyProxyForTag(tag string) bool
- func (r *ProxyRegistry) NextByTag(tag string) string
- func (r *ProxyRegistry) NextByTagWithContext(ctx context.Context, tag string) string
- func (r *ProxyRegistry) ReportFailure(ctx context.Context, proxyURL string)
- func (r *ProxyRegistry) ReportSuccess(_ context.Context, proxyURL string)
- type ProxyStats
- type ProxyStatsEntry
- type ProxyTagSummary
- type Query
- type QueryEcho
- type ReadinessStatus
- type RegionTarget
- type ResilientConfig
- type ResilientSearcher
- func (rs *ResilientSearcher) GetCircuitBreakerStats() []map[string]interface{}
- func (rs *ResilientSearcher) GetProxyStats() ProxyStats
- func (rs *ResilientSearcher) ResolveMegaProxyMeta(q Query, engines []SearchEngine) ProxyExecutionMeta
- func (rs *ResilientSearcher) SearchAllImageParallel(ctx context.Context, q Query, engines []SearchEngine) ([]MegaSearchResult, []string, []string)
- func (rs *ResilientSearcher) SearchAllParallel(ctx context.Context, q Query, engines []SearchEngine) ([]MegaSearchResult, []string, []string)
- func (rs *ResilientSearcher) SearchImagePrimary(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
- func (rs *ResilientSearcher) SearchImageWithFallback(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
- func (rs *ResilientSearcher) SearchPrimary(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
- func (rs *ResilientSearcher) SearchWithFallback(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
- type ResponseCache
- type ResponseMeta
- type Result
- type ResultType
- type RetryConfig
- type RetryResult
- type SearchEngine
- type SearchEngineOptions
- type SearchResult
- type SerpFeature
- func DeduplicateSerpFeatures(features []SerpFeature) []SerpFeature
- func EnrichSerpFeature(raw SerpFeature, engine string, sourceResultID string, extractedAt time.Time) SerpFeature
- func ExtractSerpFeaturesBySelectors(doc *goquery.Document, specs []SerpFeatureSelector) []SerpFeature
- func FeaturesFromPage(page *rod.Page, extract func(*goquery.Document) []SerpFeature) []SerpFeature
- type SerpFeatureSelector
- type Server
- type ServerOptions
Constants ¶
const ( ReasonInvalidLimit = "INVALID_LIMIT" ReasonInvalidStart = "INVALID_START" ReasonInvalidParam = "INVALID_PARAM" ReasonEmptyQuery = "EMPTY_QUERY" ReasonNoEngines = "NO_ENGINES" ReasonUnknownFormat = "UNKNOWN_FORMAT" ReasonRequestProxyURLDisabled = "REQUEST_PROXY_URL_DISABLED" ReasonUnsupportedProxyScheme = "UNSUPPORTED_PROXY_SCHEME" )
Common validation reason codes.
const ( LogFormatJSON = "json" LogFormatText = "text" )
const ( ProxyRuntimeBrowser = "browser" ProxyRuntimeRaw = "raw" ProxyModeOff = "off" ProxyModeTagPool = "tag_pool" ProxyModeRequestURL = "request_url" DefaultProxyFailureThreshold = 3 ProxyOverrideDirect = "direct" // ProxyPoolQuarantineDuration is how long an exhausted tag pool stays quarantined // before a single probe proxy is re-enabled for recovery testing. ProxyPoolQuarantineDuration = 5 * time.Minute )
const DefaultProxyLaneMaxLanes = 100
const MaxQueryLimit = 100
MaxQueryLimit is the maximum allowed value for the limit parameter.
Variables ¶
var DefaultFingerprintArtifactDir = filepath.Join("core", "testdata")
DefaultFingerprintArtifactDir is the artifact directory used when none is configured. It is relative to the server's working directory at start time.
var ErrAllEnginesFailed = fmt.Errorf("all search engines failed")
var ErrBlocked = errors.New("blocked")
ErrBlocked is returned when the search engine blocks the browser request.
var ErrCaptcha = errors.New("captcha detected")
ErrCaptcha is returned when the engine detects a captcha challenge page. This error is treated as non-retryable by resilient search policies.
var ErrCircuitOpen = fmt.Errorf("circuit breaker is open - engine temporarily disabled")
var ErrEmptyResult = errors.New("empty_result")
ErrEmptyResult signals a successful fetch that returned zero organic results. It is not a failure; the proxy stays healthy and no credit is charged.
var ErrEngineInternal = errors.New("engine internal error")
ErrEngineInternal is returned when an engine recovered from an unexpected panic and converted it into a typed error.
var ErrParser = errors.New("parser failure")
ErrParser is returned when SERP parsing selectors drift or expected fields cannot be extracted from an otherwise loaded page.
var ErrProxyAuth = errors.New("proxy_auth")
ErrProxyAuth is returned when proxy credentials are rejected. Proxy health is degraded on this error.
var ErrProxyConnect = errors.New("proxy_connect")
ErrProxyConnect is returned when the proxy cannot establish a network connection. Proxy health is degraded on this error.
var ErrRateLimited = errors.New("rate_limited")
ErrRateLimited is returned when the search engine returns an HTTP rate limit.
var ErrSearchTimeout = errors.New("timeout. Cannot find element on page")
ErrSearchTimeout is returned when required SERP elements are not found before selector or page timeouts expire.
var ErrTimeout = errors.New("timeout")
ErrTimeout is returned when a network-level timeout occurs on the proxy path. Proxy health is degraded on this error.
Functions ¶
func AddNetworkBytes ¶ added in v0.7.13
func AppendEnrichedSearchResult ¶ added in v0.7.15
func AppendEnrichedSearchResult(env *Envelope, raw SearchResult, ctx EnrichContext, extractedAt time.Time)
AppendEnrichedSearchResult preserves the legacy results[] surface while copying any extracted SERP features onto the top-level feature surface.
func BrowserProfileIDsFromContext ¶ added in v0.7.13
func BuildAcceptLanguageHeader ¶ added in v0.7.2
BuildAcceptLanguageHeader formats an Accept-Language value from a lang code. Example: "de" -> "de-DE,de;q=0.9", "en-GB" -> "en-GB,en;q=0.9", "sw" -> "sw".
func BuildCacheKey ¶ added in v0.6.0
func CORSMiddleware ¶ added in v0.6.0
func CORSMiddleware(cfg CORSConfig) fiber.Handler
func CaptchaSolverMetrics ¶ added in v0.7.2
func ClassifySearchHTTPStatus ¶ added in v0.7.13
func ClosePageWithTimeout ¶ added in v0.7.2
ClosePageWithTimeout bounds page close calls so shutdown paths don't hang.
func ClosestMatching ¶ added in v0.7.13
ClosestMatching walks up the ancestor chain (including el itself) and returns the first element matching selector, or nil if none is found within maxHops. rod has no native Closest helper, so this is a bounded walk used by parsers that need to recover a wrapping <a> from a nested title node.
func ComputePagination ¶ added in v0.6.0
ComputePagination translates an absolute start offset into page index and in-page offset for a fixed page size.
func ConvertSearchResultsMap ¶ added in v0.4.1
func ConvertSearchResultsMap(searchResultsMap map[string]SearchResult) *[]SearchResult
ConvertSearchResultsMap converts a map-based collection to a rank-sorted slice and returns it by pointer.
func CountOrganicResults ¶ added in v0.7.13
func CountOrganicResults(results []SearchResult) int
CountOrganicResults returns the number of non-ad results in a mixed SERP.
func CountryFromRegion ¶ added in v0.7.13
CountryFromRegion extracts a two-letter country/market code from a region hint. See region.CountryFromRegion for details.
func DeferClosePage ¶ added in v0.7.13
DeferClosePage returns a cleanup function that closes page unless the browser is configured to leave pages open for debugging.
func DrainAndCloseResponse ¶ added in v0.7.2
DrainAndCloseResponse drains unread bytes before closing so HTTP transports can safely reuse connections when callers don't consume the full body.
func EnsureContext ¶ added in v0.7.2
EnsureContext returns ctx when set; otherwise a non-nil placeholder context.
func FirstNonEmptyAttribute ¶ added in v0.7.13
FirstNonEmptyAttribute returns the trimmed value of attr from the first selector under root whose attribute is non-empty.
func FirstNonEmptyText ¶ added in v0.7.13
FirstNonEmptyText returns the trimmed text of the first selector under root that yields non-empty content. Empty string if none match.
func HasAnySelector ¶ added in v0.7.13
HasAnySelector returns true if at least one of the supplied selectors currently matches in the page DOM. It does not wait — pair with WaitForElements when hydration may be in flight.
func HasAttribute ¶ added in v0.7.13
HasAttribute reports whether el carries attr (regardless of value).
func InitLogger ¶
func IsAuthenticatedSocksProxyURL ¶ added in v0.6.0
func IsContextDone ¶ added in v0.7.2
IsContextDone reports whether err is a cancellation/deadline error.
func IsProxyNetworkError ¶ added in v0.7.2
IsProxyNetworkError reports whether err is a network-level error that indicates a faulty proxy (connect failure, auth rejection, or timeout). Parser drift, captcha pages, and engine errors must NOT degrade proxy health.
func IsRodObjectNotFound ¶ added in v0.7.2
IsRodObjectNotFound reports element/object lookup misses across rod error variants used by selector calls.
func JSONErrorMiddleware ¶ added in v0.6.0
func JSONErrorMiddleware() fiber.ErrorHandler
func MaskProxyURL ¶ added in v0.6.0
func MustParseBlockedResourceTypes ¶ added in v0.7.2
func MustParseBlockedResourceTypes(raw string) []proto.NetworkResourceType
MustParseBlockedResourceTypes is like ParseBlockedResourceTypes but panics on error. Only call this after the value has already been validated by ParseBlockedResourceTypes.
func NetworkBytesFromContext ¶ added in v0.7.13
func NormalizeLogFormat ¶ added in v0.7.2
func NormalizeProxyRequestOverride ¶ added in v0.6.0
func NormalizeProxyTag ¶ added in v0.6.0
func NormalizeProxyURL ¶ added in v0.6.0
func NormalizeProxyURLs ¶ added in v0.6.0
func NormalizeURLForClustering ¶ added in v0.7.2
NormalizeURLForClustering returns a URL suitable for cross-engine grouping (same as normalizeURL but exported for use in cluster building).
func OrganicLimitReached ¶ added in v0.7.15
func OrganicLimitReached(results []SearchResult, limit int) bool
OrganicLimitReached reports whether enough organic results have been collected to satisfy limit. A non-positive limit means "no limit", so it is never reached and pagination continues until the engine runs out.
func ParseBlockedResourceTypes ¶ added in v0.7.2
func ParseBlockedResourceTypes(raw string) ([]proto.NetworkResourceType, error)
ParseBlockedResourceTypes parses a comma-separated config value into NetworkResourceType values accepted by the request blocker.
func PrepareEngineContext ¶ added in v0.7.13
func PrepareEngineContext(ctx context.Context, query Query, engineName string, minimalBrowserProfile bool) context.Context
PrepareEngineContext applies request-scoped metadata expected by all engine search implementations.
func PrimaryLanguageTag ¶ added in v0.7.2
PrimaryLanguageTag returns the BCP47 primary tag for a lang code, filling in a default country for bare languages (e.g. "de" -> "de-DE"). Returns "" when the input has no language subtag.
func QueryHashFromQuery ¶ added in v0.7.2
func RawSearchRequest ¶ added in v0.7.13
RawSearchRequest builds and executes a raw-mode SERP HTTP GET. It uses the shared raw HTTP client (TLS fingerprinting, network usage tracking, proxy support), randomizes the User-Agent, and applies the Accept-Language header derived from the query locale. The caller owns the returned response and must drain/close it (see DrainAndCloseResponse).
func ReadRawSearchBody ¶ added in v0.7.13
func RecoverEnginePanic ¶ added in v0.7.2
func RecoverEnginePanic(engine string, recovered interface{}, logger *EngineLogger) error
RecoverEnginePanic converts recovered panics to a typed engine error and logs stack trace with engine context.
func RecoverEnginePanicWithContext ¶ added in v0.7.2
func RecoverEnginePanicWithContext(ctx context.Context, engine string, recovered interface{}, logger *EngineLogger) error
func RenderMarkdown ¶ added in v0.7.2
RenderMarkdown formats an Envelope as a Markdown document suitable for Slack/Discord/email nodes in n8n workflows.
func RenderMarkdownImage ¶ added in v0.7.2
func RenderMarkdownImage(env *ImageEnvelope) []byte
RenderMarkdownImage formats an ImageEnvelope as Markdown.
func RenderNDJSON ¶ added in v0.7.2
RenderNDJSON formats an Envelope as newline-delimited JSON.
func RenderNDJSONImage ¶ added in v0.7.2
func RenderNDJSONImage(env *ImageEnvelope) []byte
RenderNDJSONImage formats an ImageEnvelope as newline-delimited JSON.
func RenderText ¶ added in v0.7.2
RenderText formats an Envelope as a minimal plain-text block optimised for LLM context windows (~25-30% fewer tokens than JSON for the same data).
func RenderTextImage ¶ added in v0.7.2
func RenderTextImage(env *ImageEnvelope) []byte
RenderTextImage formats an ImageEnvelope as plain text.
func RequestContextMiddleware ¶ added in v0.7.2
func RequestIDFromContext ¶ added in v0.7.2
func RequestLoggerMiddleware ¶ added in v0.6.0
func SetAcceptLanguageHeader ¶ added in v0.7.2
SetAcceptLanguageHeader sets the Accept-Language header from a lang code. No-op when the code has no language subtag.
func SetBrowserProfileID ¶ added in v0.7.13
func ShouldBypassCacheForProxyMarket ¶ added in v0.7.13
func SleepContext ¶ added in v0.7.2
SleepContext blocks for d or until ctx is canceled.
func TenantFromContext ¶ added in v0.7.13
func TimezoneForLocale ¶ added in v0.7.13
TimezoneForLocale returns an IANA timezone for a locale's country, or for the default country of the language when country is empty. Returns "" for unknown locales — caller should retain the existing profile timezone.
func WaitForElements ¶ added in v0.7.13
func WaitForElements(ctx context.Context, page *rod.Page, selectors []string, timeout time.Duration) (rod.Elements, string, error)
WaitForElements probes the supplied CSS selectors until one returns at least one matching element or timeout elapses. It exists because rod's page.Search/Elements and the surrounding WaitLoad/WaitStable do not wait for a *specific* selector to hydrate — modern SPA SERPs (DDG, Bing, Google) regularly fire `load` and even reach DOM-stable before result rows render, causing parsers to see an empty page on the first probe and forcing the caller's retry layer to reload.
The probe loop returns as soon as a selector matches, returning the matched elements and the selector that hit. On timeout it returns ErrSearchTimeout so callers can disambiguate between "no results" / "captcha" by inspecting the page directly.
func WithBrowserProfileUsage ¶ added in v0.7.13
func WithEngine ¶ added in v0.7.2
func WithForcedProfileID ¶ added in v0.7.13
func WithMinimalBrowserProfile ¶ added in v0.7.13
func WithProfileRegion ¶ added in v0.7.2
func WithProxyLaneKey ¶ added in v0.7.13
func WithProxyLaneKey(ctx context.Context, key ProxyLaneKey) context.Context
func WithQueryHash ¶ added in v0.7.2
func WithRequestEngine ¶ added in v0.7.2
func WithRequestID ¶ added in v0.7.2
func WithRequestProxyURL ¶ added in v0.7.13
Types ¶
type APIError ¶ added in v0.7.2
type APIError struct {
HTTPStatus int
ErrorCode string
Reason string
Message string
Meta map[string]interface{}
}
APIError represents a client-facing error with a stable machine-readable reason code.
type Browser ¶
type Browser struct {
BrowserOpts
CaptchaSolver *CaptchaSolver
// contains filtered or unexported fields
}
Browser wraps a launched Chromium instance used by engine implementations.
func NewBrowser ¶
func NewBrowser(opts BrowserOpts) (*Browser, error)
NewBrowser launches a new Chromium process via Rod launcher and returns a Browser wrapper configured with proxy and captcha solver settings.
func (*Browser) IsInitialized ¶
IsInitialized reports whether the browser launcher has been created.
type BrowserOpts ¶
type BrowserOpts struct {
// IsHeadless runs Chromium without visible UI.
IsHeadless bool
// IsLeakless forces child browser process cleanup when the parent exits.
IsLeakless bool
// Timeout is applied to browser connect and page navigation operations.
Timeout time.Duration
// LanguageCode sets Accept-Language for emulated requests.
LanguageCode string
// WaitRequests waits for request-idle state after navigation.
WaitRequests bool
// LeavePageOpen keeps pages open after search operations.
LeavePageOpen bool
// WaitLoadTime caps the document load-event wait after navigation. Selector
// parsing still decides whether the page is usable after this wait expires.
WaitLoadTime time.Duration
// CaptchaSolverApiKey enables 2Captcha integration for supported engines.
CaptchaSolverApiKey string
// CaptchaSolverEnabled gates solver invocation regardless of engine flags.
CaptchaSolverEnabled bool
// BrowserPath optionally points to a specific browser executable.
BrowserPath string
// ProxyURL defines the upstream proxy for browser traffic.
ProxyURL string
// ProxyLaneStore keeps sticky proxy lane profiles and cookies.
ProxyLaneStore *LaneStore
// Insecure allows invalid TLS certificates for browser requests.
Insecure bool
// UserAgent optionally overrides browser-reported user agent during emulation.
UserAgent string
// BlockResourceTypes are blocked during page navigation when non-empty.
// Typical tokens map to these types: image, font, css(stylesheet), js(script), media.
BlockResourceTypes []proto.NetworkResourceType
// BlockTrackers toggles static tracker-domain blocking.
BlockTrackers bool
}
BrowserOpts configures Chromium launch and navigation behavior.
func (*BrowserOpts) Check ¶ added in v0.2.1
func (o *BrowserOpts) Check()
Check applies default option values when optional fields are unset.
type BrowserPoolStats ¶ added in v0.7.13
type BrowserPoolStats struct {
Active int `json:"active"`
Max int `json:"max"`
EvictedLRU int `json:"evicted_lru"`
EvictedIdle int `json:"evicted_idle"`
}
BrowserPoolStats describes the live state of the per-process browser pool that keeps one Chrome per authenticated upstream proxy identity. Reported via /stats/proxy as `browser_processes`.
type CORSConfig ¶ added in v0.6.0
func DefaultCORSConfig ¶ added in v0.6.0
func DefaultCORSConfig() CORSConfig
type CacheEntry ¶ added in v0.6.0
type CaptchaSolver ¶ added in v0.4.1
type CaptchaSolver struct {
// contains filtered or unexported fields
}
func NewSolver ¶ added in v0.4.1
func NewSolver(apikey string) *CaptchaSolver
func (*CaptchaSolver) SolveReCaptcha2 ¶ added in v0.4.1
func (cs *CaptchaSolver) SolveReCaptcha2(sitekey, pageURL, dataS, proxyURL string) (string, string, error)
type CircuitBreaker ¶ added in v0.6.0
type CircuitBreaker struct {
// contains filtered or unexported fields
}
CircuitBreaker tracks failure state for one engine.
func NewCircuitBreaker ¶ added in v0.6.0
func NewCircuitBreaker(name string, cfg CircuitBreakerConfig) *CircuitBreaker
func (*CircuitBreaker) AllowRequest ¶ added in v0.6.0
func (cb *CircuitBreaker) AllowRequest(ctx context.Context) bool
func (*CircuitBreaker) AvgSuccessLatency ¶ added in v0.7.13
func (cb *CircuitBreaker) AvgSuccessLatency() (time.Duration, bool)
func (*CircuitBreaker) RecordFailure ¶ added in v0.6.0
func (cb *CircuitBreaker) RecordFailure(ctx context.Context)
func (*CircuitBreaker) RecordSuccess ¶ added in v0.6.0
func (cb *CircuitBreaker) RecordSuccess(ctx context.Context)
func (*CircuitBreaker) RecordSuccessDuration ¶ added in v0.7.13
func (cb *CircuitBreaker) RecordSuccessDuration(ctx context.Context, elapsed time.Duration)
func (*CircuitBreaker) State ¶ added in v0.6.0
func (cb *CircuitBreaker) State() CircuitState
func (*CircuitBreaker) Stats ¶ added in v0.6.0
func (cb *CircuitBreaker) Stats() map[string]interface{}
type CircuitBreakerConfig ¶ added in v0.6.0
type CircuitBreakerConfig struct {
FailureThreshold int
RecoveryTimeout time.Duration
SuccessThreshold int
}
func DefaultCircuitBreakerConfig ¶ added in v0.6.0
func DefaultCircuitBreakerConfig() CircuitBreakerConfig
type CircuitBreakerManager ¶ added in v0.6.0
type CircuitBreakerManager struct {
// contains filtered or unexported fields
}
func NewCircuitBreakerManager ¶ added in v0.6.0
func NewCircuitBreakerManager(cfg CircuitBreakerConfig) *CircuitBreakerManager
func (*CircuitBreakerManager) AllStats ¶ added in v0.6.0
func (m *CircuitBreakerManager) AllStats() []map[string]interface{}
func (*CircuitBreakerManager) Get ¶ added in v0.6.0
func (m *CircuitBreakerManager) Get(engineName string) *CircuitBreaker
type CircuitState ¶ added in v0.6.0
type CircuitState int
const ( CircuitClosed CircuitState = iota CircuitOpen CircuitHalfOpen )
func (CircuitState) String ¶ added in v0.6.0
func (s CircuitState) String() string
type Classification ¶ added in v0.7.2
type Classification struct {
ContentType string `json:"content_type,omitempty"`
SourceHint string `json:"source_hint,omitempty"`
}
Classification holds URL-path heuristic hints for downstream consumers.
func ClassifyURL ¶ added in v0.7.2
func ClassifyURL(rawURL, domain string) *Classification
ClassifyURL returns a rough content-type and source hint derived from the URL path alone; no network calls.
type Cluster ¶ added in v0.7.2
type Cluster struct {
ID string `json:"id"`
CanonicalURL string `json:"canonical_url"`
Domain string `json:"domain"`
Title string `json:"title"`
Occurrences []ClusterOccurrence `json:"occurrences"`
EnginesCount int `json:"engines_count"`
BestRank int `json:"best_rank"`
Score float64 `json:"score"`
}
Cluster groups results that refer to the same canonical URL across engines. Populated only by /mega/search. Full type defined in clusters.go.
func BuildClusters ¶ added in v0.7.2
BuildClusters groups results by normalized URL and scores them by cross-engine agreement. enginesQueried is the total number of engines that were asked (denominator for the score formula).
Score = sum(1/rank for each occurrence) / enginesQueried, capped at 1.0.
type ClusterOccurrence ¶ added in v0.7.2
type ClusterOccurrence struct {
Engine string `json:"engine"`
Rank int `json:"rank"`
ResultID string `json:"result_id"`
}
ClusterOccurrence links one engine result back into the flat results list.
type DomainInfo ¶ added in v0.7.2
type DomainInfo struct {
TLD string `json:"tld,omitempty"`
SLD string `json:"sld,omitempty"`
// Category is one of "gov", "edu", "mil", "news", "forum", "marketplace",
// "social", or "" when the domain does not match any known category.
Category string `json:"category"`
}
DomainInfo carries TLD-derived category signals for a result domain.
func EnrichDomainInfo ¶ added in v0.7.2
func EnrichDomainInfo(domain string) *DomainInfo
EnrichDomainInfo derives TLD/category signals from a bare hostname.
type EngineErrorDetail ¶ added in v0.7.13
type EngineErrorDetail struct {
Engine string `json:"engine"`
Error string `json:"error"`
Message string `json:"message,omitempty"`
}
EngineErrorDetail is a client-facing, sanitized per-engine failure summary.
type EngineHealth ¶ added in v0.6.0
type EngineHealth struct {
Name string `json:"name"`
Initialized bool `json:"initialized"`
Status string `json:"status"`
}
EngineHealth describes availability of one configured engine.
type EngineLogger ¶ added in v0.5.3
type EngineLogger struct {
// contains filtered or unexported fields
}
EngineLogger provides structured logging for search engines with a fixed engine field.
func NewEngineLogger ¶ added in v0.5.3
func NewEngineLogger(engine string) *EngineLogger
func (*EngineLogger) Debug ¶ added in v0.5.3
func (el *EngineLogger) Debug(message string, args ...any)
func (*EngineLogger) Error ¶ added in v0.5.3
func (el *EngineLogger) Error(message string, args ...any)
func (*EngineLogger) Fatal ¶ added in v0.5.3
func (el *EngineLogger) Fatal(message string, args ...any)
func (*EngineLogger) Fields ¶ added in v0.7.2
func (el *EngineLogger) Fields(fields logrus.Fields) *EngineLogger
Fields returns a new EngineLogger with additional structured fields merged in.
func (*EngineLogger) Info ¶ added in v0.5.3
func (el *EngineLogger) Info(message string, args ...any)
func (*EngineLogger) Panic ¶ added in v0.5.3
func (el *EngineLogger) Panic(message string, args ...any)
func (*EngineLogger) Warn ¶ added in v0.5.3
func (el *EngineLogger) Warn(message string, args ...any)
func (*EngineLogger) WithRequest ¶ added in v0.7.2
func (el *EngineLogger) WithRequest(ctx context.Context) *EngineLogger
type EnrichContext ¶ added in v0.7.2
EnrichContext carries request-scoped values needed to enrich a raw result.
type Envelope ¶ added in v0.7.2
type Envelope struct {
Query QueryEcho `json:"query"`
Meta ResponseMeta `json:"meta"`
Results []Result `json:"results"`
SerpFeatures []SerpFeature `json:"serp_features"`
Pagination Pagination `json:"pagination"`
// Clusters is only populated by /mega/search (see clusters.go).
Clusters *[]Cluster `json:"clusters,omitempty"`
}
Envelope is the top-level v2 response wrapper for all search endpoints.
func NewEnvelope ¶ added in v0.7.2
NewEnvelope builds a fresh Envelope pre-filled with query echo and an open meta block. Call Finalize before serializing.
type FeatureItem ¶ added in v0.7.15
type FeatureItem struct {
Title string `json:"title,omitempty"`
Text string `json:"text,omitempty"`
Link string `json:"link,omitempty"`
}
FeatureItem is one child entry inside a grouped SERP feature.
type FeatureLink ¶ added in v0.7.15
FeatureLink is a source or citation associated with a SERP feature.
type HTMLParser ¶ added in v0.7.13
type HTMLParser interface {
Name() string
ParseHTML(io.Reader) ([]SearchResult, error)
}
HTMLParser is implemented by engines that can parse a SERP HTML document without a live browser. Used to expose POST /parse/{engine} endpoints.
type HealthStatus ¶ added in v0.6.0
type HealthStatus struct {
Status string `json:"status"`
Uptime string `json:"uptime"`
Engines []EngineHealth `json:"engines"`
System map[string]interface{} `json:"system"`
}
HealthStatus is returned by /health and summarizes service state.
type ImageData ¶ added in v0.7.2
type ImageData struct {
URL string `json:"url"`
Thumbnail string `json:"thumbnail,omitempty"`
Width int `json:"width,omitempty"`
Height int `json:"height,omitempty"`
}
ImageData holds image-specific URL and dimension fields.
type ImageEnvelope ¶ added in v0.7.2
type ImageEnvelope struct {
Query QueryEcho `json:"query"`
Meta ResponseMeta `json:"meta"`
Results []ImageResult `json:"results"`
Pagination Pagination `json:"pagination"`
}
ImageEnvelope is the top-level v2 response wrapper for image search endpoints.
func NewImageEnvelope ¶ added in v0.7.2
func NewImageEnvelope(q Query, requestID string, startedAt time.Time, engines []string) *ImageEnvelope
NewImageEnvelope builds a fresh ImageEnvelope.
type ImageResult ¶ added in v0.7.2
type ImageResult struct {
ID string `json:"id"`
Rank int `json:"rank"`
Type ResultType `json:"type"`
Title string `json:"title"`
Image ImageData `json:"image"`
Source ImageSource `json:"source"`
Engine string `json:"engine"`
}
ImageResult is the v2 shape for image search results.
func EnrichImageResult ¶ added in v0.7.2
func EnrichImageResult(raw SearchResult, ctx EnrichContext) ImageResult
EnrichImageResult converts a raw engine result into the v2 ImageResult shape.
type ImageSource ¶ added in v0.7.2
ImageSource holds page-level context for an image result.
type JSONErrorResponse ¶ added in v0.6.0
type LaneStore ¶ added in v0.7.13
type LaneStore struct {
// contains filtered or unexported fields
}
func NewLaneStore ¶ added in v0.7.13
func (*LaneStore) Cookies ¶ added in v0.7.13
func (s *LaneStore) Cookies(key ProxyLaneKey) []*proto.NetworkCookie
func (*LaneStore) DropCookies ¶ added in v0.7.13
func (s *LaneStore) DropCookies(key ProxyLaneKey)
func (*LaneStore) Profile ¶ added in v0.7.13
func (s *LaneStore) Profile(key ProxyLaneKey, create func() browserprofile.Profile) browserprofile.Profile
func (*LaneStore) SaveCookies ¶ added in v0.7.13
func (s *LaneStore) SaveCookies(key ProxyLaneKey, cookies []*proto.NetworkCookie)
type Locale ¶ added in v0.7.2
Locale is a parsed language/region pair derived from a BCP47-style code. The canonical definition lives in the dependency-free core/region subpackage; this alias preserves the historical core.Locale name for existing callers.
func ParseLocale ¶ added in v0.7.2
ParseLocale parses a language code such as "en", "EN-us", or "de_AT" into a Locale. See region.ParseLocale for details.
type MegaSearchResult ¶ added in v0.5.3
type MegaSearchResult struct {
SearchResult
Engine string `json:"engine"`
}
MegaSearchResult extends SearchResult with the engine source name.
type Pagination ¶ added in v0.7.2
type Pagination struct {
Page int `json:"page"`
HasMore bool `json:"has_more"`
NextStart int `json:"next_start"`
}
Pagination carries cursor information for client-side loop termination.
type Position ¶ added in v0.7.2
type Position struct {
// Absolute is the 1-based rank counting from the first result of the first page,
// across both organic and ad blocks. Always emitted so SEO callers can plot
// rank vs. on-page position without inferring it from the result order.
Absolute int `json:"absolute"`
}
Position describes where a result sits in the overall result stream.
type ProxiesConfig ¶ added in v0.6.0
type ProxiesConfig struct {
Global string `json:"global,omitempty" mapstructure:"global"`
Entries []ProxyEntryConfig `json:"entries" mapstructure:"entries"`
Health ProxiesHealthConfig `json:"health" mapstructure:"health"`
AllowRequestProxyURL bool `json:"allow_request_proxy_url" mapstructure:"allow_request_proxy_url"`
Lanes ProxyLanesConfig `json:"lanes" mapstructure:"lanes"`
}
func DefaultProxiesConfig ¶ added in v0.6.0
func DefaultProxiesConfig() ProxiesConfig
func NormalizeProxiesConfig ¶ added in v0.6.0
func NormalizeProxiesConfig(cfg ProxiesConfig) (ProxiesConfig, error)
type ProxiesHealthConfig ¶ added in v0.6.0
type ProxiesHealthConfig struct {
FailureThreshold int `json:"failure_threshold" mapstructure:"failure_threshold"`
}
type ProxyConfig ¶ added in v0.6.0
type ProxyConfig struct {
Runtime string // raw or browser runtime behavior
Proxies ProxiesConfig // canonical proxy inventory
EnginePolicies map[string]string // engine-specific proxy tags
Registry *ProxyRegistry // optional shared registry from caller
}
func DefaultProxyConfig ¶ added in v0.6.0
func DefaultProxyConfig() ProxyConfig
func NormalizeProxyConfig ¶ added in v0.6.0
func NormalizeProxyConfig(cfg ProxyConfig) (ProxyConfig, error)
type ProxyEngineStats ¶ added in v0.6.0
type ProxyEntryConfig ¶ added in v0.6.0
type ProxyExecutionMeta ¶ added in v0.6.0
type ProxyLaneKey ¶ added in v0.7.13
func NormalizeProxyLaneKey ¶ added in v0.7.13
func NormalizeProxyLaneKey(key ProxyLaneKey) ProxyLaneKey
func ProxyLaneKeyForTenant ¶ added in v0.7.13
func ProxyLaneKeyForTenant(engine string, tenant string, q Query, proxyURL string) ProxyLaneKey
func (ProxyLaneKey) Empty ¶ added in v0.7.13
func (k ProxyLaneKey) Empty() bool
func (ProxyLaneKey) ID ¶ added in v0.7.13
func (k ProxyLaneKey) ID() string
type ProxyLanesConfig ¶ added in v0.7.13
type ProxyLanesConfig struct {
Enabled bool `json:"enabled" mapstructure:"enabled"`
MaxLanes int `json:"max_lanes" mapstructure:"max_lanes"`
DropCookiesOnChallenge bool `json:"drop_cookies_on_challenge" mapstructure:"drop_cookies_on_challenge"`
}
func DefaultProxyLanesConfig ¶ added in v0.7.13
func DefaultProxyLanesConfig() ProxyLanesConfig
func NormalizeProxyLanesConfig ¶ added in v0.7.13
func NormalizeProxyLanesConfig(cfg ProxyLanesConfig) ProxyLanesConfig
type ProxyPolicy ¶ added in v0.6.0
type ProxyPolicy struct {
Mode string `json:"mode" mapstructure:"mode"`
Tag string `json:"tag,omitempty" mapstructure:"tag"`
}
func ResolveEffectiveProxyPolicy ¶ added in v0.6.0
func ResolveEffectiveProxyPolicy(globalProxyURL string, engineTag string) ProxyPolicy
type ProxyRegistry ¶ added in v0.6.0
type ProxyRegistry struct {
// contains filtered or unexported fields
}
func NewProxyRegistry ¶ added in v0.6.0
func NewProxyRegistry(entries []ProxyEntryConfig, failureThreshold int) (*ProxyRegistry, error)
func (*ProxyRegistry) BuildStats ¶ added in v0.6.0
func (r *ProxyRegistry) BuildStats() ProxyStats
func (*ProxyRegistry) HasHealthyProxyForTag ¶ added in v0.6.0
func (r *ProxyRegistry) HasHealthyProxyForTag(tag string) bool
func (*ProxyRegistry) NextByTag ¶ added in v0.6.0
func (r *ProxyRegistry) NextByTag(tag string) string
func (*ProxyRegistry) NextByTagWithContext ¶ added in v0.7.2
func (r *ProxyRegistry) NextByTagWithContext(ctx context.Context, tag string) string
func (*ProxyRegistry) ReportFailure ¶ added in v0.6.0
func (r *ProxyRegistry) ReportFailure(ctx context.Context, proxyURL string)
ReportFailure increments the failure counter for proxyURL. The proxy is disabled once the failure threshold is reached. If the owning tag pool becomes fully exhausted, a quarantine timer is started so that NextByTagWithContext will not immediately re-enable all proxies.
Only proxy-network errors (ErrProxyConnect, ErrProxyAuth, ErrTimeout) should degrade proxy health. Callers must not call this for captcha or parser errors.
func (*ProxyRegistry) ReportSuccess ¶ added in v0.6.0
func (r *ProxyRegistry) ReportSuccess(_ context.Context, proxyURL string)
type ProxyStats ¶ added in v0.6.0
type ProxyStats struct {
ConfiguredCount int `json:"configured_count"`
HealthyCount int `json:"healthy_count"`
UnhealthyCount int `json:"unhealthy_count"`
RequestProxyURLEnabled bool `json:"request_proxy_url_enabled"`
Lanes LaneStats `json:"lanes"`
BrowserProcesses BrowserPoolStats `json:"browser_processes"`
Tags map[string]ProxyTagSummary `json:"tags"`
Entries []ProxyStatsEntry `json:"entries"`
Engines map[string]ProxyEngineStats `json:"engines,omitempty"`
}
type ProxyStatsEntry ¶ added in v0.6.0
type ProxyTagSummary ¶ added in v0.6.0
type Query ¶
type Query struct {
// Text is the search phrase, for example "golang fiber tutorial".
Text string
// LangCode is an engine language hint such as "EN", "DE", or "RU".
LangCode string
// Region is an engine market/location hint. Yandex accepts numeric lr IDs;
// global engines accept country-style hints such as "RU" or "en-RU".
Region string
// DateInterval filters by date range in YYYYMMDD..YYYYMMDD format.
// Example: "20250101..20250331".
DateInterval string
// Filetype is a file extension filter, for example "pdf" or "docx".
Filetype string
// Site restricts results to a specific domain, for example "github.com".
Site string
// Limit is the maximum number of results requested by the client.
Limit int
// Start is an engine pagination offset. Values are engine-specific:
// Google commonly uses 0,10,20 while some engines use page indexes.
Start int
// Filter controls duplicate filtering when supported by the engine.
// For Google, false includes similar results and true hides them.
Filter bool
// Features enables parsing SERP feature modules (AI summaries, answer boxes,
// people-also-ask, related searches) on the browser Search path when
// supported by the engine. Such entries may be returned with non-positive
// internal rank values.
Features bool
// ProxyURL is a direct proxy URL used by raw HTTP search paths.
ProxyURL string
// ProxyCountry identifies the proxy market country for cache/error metadata.
ProxyCountry string
// ProxyClass identifies the proxy class such as datacenter or residential.
ProxyClass string
// ProxyProvider identifies the upstream proxy provider.
ProxyProvider string
// ProxySessionID identifies a sticky balancer session/lane.
ProxySessionID string
// ProxyOverride is a request-scoped proxy policy override (tag or "direct"),
// typically parsed from the X-Use-Proxy header.
ProxyOverride string
// Insecure enables insecure TLS for request/browser execution.
Insecure bool
}
Query holds request parameters used by HTTP handlers and search engines. Example minimal query: Query{Text: "golang", Limit: 10}.
func (*Query) InitFromContext ¶
InitFromContext populates Query from HTTP query parameters and request headers. It validates numeric/boolean inputs and returns an *APIError for invalid client input (400) or a plain error for internal failures.
type QueryEcho ¶ added in v0.7.2
type QueryEcho struct {
Text string `json:"text"`
Lang string `json:"lang,omitempty"`
Region string `json:"region,omitempty"`
EnginesRequested []string `json:"engines_requested"`
}
QueryEcho echoes the interpreted query parameters back to the client.
type ReadinessStatus ¶ added in v0.7.2
type ReadinessStatus struct {
Status string `json:"status"`
}
ReadinessStatus is returned by /ready to indicate if this instance can receive new traffic.
type RegionTarget ¶ added in v0.7.15
type RegionTarget = region.RegionTarget
RegionTarget is the resolved, per-engine targeting for a user-supplied region hint. See region.RegionTarget for details.
func ResolveRegion ¶ added in v0.7.15
func ResolveRegion(hint string) RegionTarget
ResolveRegion turns a free-text region hint into per-engine targeting. See region.ResolveRegion for accepted inputs and semantics.
type ResilientConfig ¶ added in v0.6.0
type ResilientConfig struct {
Retry RetryConfig
CircuitBreaker CircuitBreakerConfig
Proxy ProxyConfig
}
func DefaultResilientConfig ¶ added in v0.6.0
func DefaultResilientConfig() ResilientConfig
type ResilientSearcher ¶ added in v0.6.0
type ResilientSearcher struct {
// contains filtered or unexported fields
}
ResilientSearcher wraps engines with retry and circuit breaker protection.
func NewResilientSearcher ¶ added in v0.6.0
func NewResilientSearcher(engines []SearchEngine, cfg ResilientConfig) *ResilientSearcher
func (*ResilientSearcher) GetCircuitBreakerStats ¶ added in v0.6.0
func (rs *ResilientSearcher) GetCircuitBreakerStats() []map[string]interface{}
func (*ResilientSearcher) GetProxyStats ¶ added in v0.6.0
func (rs *ResilientSearcher) GetProxyStats() ProxyStats
func (*ResilientSearcher) ResolveMegaProxyMeta ¶ added in v0.6.0
func (rs *ResilientSearcher) ResolveMegaProxyMeta(q Query, engines []SearchEngine) ProxyExecutionMeta
func (*ResilientSearcher) SearchAllImageParallel ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchAllImageParallel(ctx context.Context, q Query, engines []SearchEngine) ([]MegaSearchResult, []string, []string)
func (*ResilientSearcher) SearchAllParallel ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchAllParallel(ctx context.Context, q Query, engines []SearchEngine) ([]MegaSearchResult, []string, []string)
SearchAllParallel applies retry/circuit protections per engine for mega search. Returns results, list of engines that responded, and list of engines that failed.
func (*ResilientSearcher) SearchImagePrimary ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchImagePrimary(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
func (*ResilientSearcher) SearchImageWithFallback ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchImageWithFallback(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
func (*ResilientSearcher) SearchPrimary ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchPrimary(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
SearchPrimary keeps dedicated endpoints engine-pure (no fallback).
func (*ResilientSearcher) SearchWithFallback ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchWithFallback(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
SearchWithFallback retries primary and then tries other initialized engines.
type ResponseCache ¶ added in v0.6.0
type ResponseCache struct {
// contains filtered or unexported fields
}
ResponseCache is a bounded in-memory TTL cache for dedicated endpoint responses.
func NewResponseCache ¶ added in v0.6.0
func NewResponseCache(ttl time.Duration, maxSize int) *ResponseCache
func (*ResponseCache) RecordBypass ¶ added in v0.6.0
func (c *ResponseCache) RecordBypass()
func (*ResponseCache) Set ¶ added in v0.6.0
func (c *ResponseCache) Set(key string, data []byte)
func (*ResponseCache) Stats ¶ added in v0.6.0
func (c *ResponseCache) Stats() map[string]interface{}
type ResponseMeta ¶ added in v0.7.2
type ResponseMeta struct {
RequestID string `json:"request_id"`
RequestedAt string `json:"requested_at"`
TookMs int64 `json:"took_ms"`
EnginesResponded []string `json:"engines_responded,omitempty"`
EnginesFailed []string `json:"engines_failed"`
EngineErrors []EngineErrorDetail `json:"engine_errors,omitempty"`
Version string `json:"version"`
}
ResponseMeta carries request-level metadata for observability and debugging.
type Result ¶ added in v0.7.2
type Result struct {
ID string `json:"id"`
Rank int `json:"rank"`
Type ResultType `json:"type"`
Title string `json:"title"`
URL string `json:"url"`
DisplayURL string `json:"display_url"`
Snippet string `json:"snippet"`
Domain string `json:"domain"`
Favicon string `json:"favicon"`
Position *Position `json:"position,omitempty"`
Engine string `json:"engine"`
DomainInfo *DomainInfo `json:"domain_info,omitempty"`
Classification *Classification `json:"classification,omitempty"`
}
Result is the v2 normalized result returned in search responses. Optional fields (Position, DomainInfo, Classification) are omitted when empty.
func EnrichResult ¶ added in v0.7.2
func EnrichResult(raw SearchResult, ctx EnrichContext) Result
EnrichResult converts a raw engine result into the v2 Result shape.
type ResultType ¶ added in v0.7.2
type ResultType string
ResultType is the SERP block type for a search result.
const ( ResultTypeOrganic ResultType = "organic" ResultTypeAd ResultType = "ad" ResultTypeFeaturedSnippet ResultType = "featured_snippet" ResultTypeKnowledgePanel ResultType = "knowledge_panel" ResultTypePeopleAlsoAsk ResultType = "people_also_ask" ResultTypeVideo ResultType = "video" ResultTypeImage ResultType = "image" ResultTypeNews ResultType = "news" ResultTypeShopping ResultType = "shopping" ResultTypeLocal ResultType = "local" ResultTypeAnswerBox ResultType = "answer_box" ResultTypeAISummary ResultType = "ai_summary" ResultTypeRelatedQuestions ResultType = "related_questions" ResultTypeRelatedSearches ResultType = "related_searches" ResultTypeSitelinks ResultType = "sitelinks" ResultTypeVideos ResultType = "videos" ResultTypeImagesInline ResultType = "images_inline" ResultTypeCalculator ResultType = "calculator" ResultTypeWeather ResultType = "weather" ResultTypeDictionary ResultType = "dictionary" )
func ValidateResultType ¶ added in v0.7.2
func ValidateResultType(t ResultType) (ResultType, string)
ValidateResultType returns the input type if it is a known enum value, otherwise returns ResultTypeOrganic with a warning message.
type RetryConfig ¶ added in v0.6.0
type RetryConfig struct {
MaxRetries int
InitialBackoff time.Duration
MaxBackoff time.Duration
BackoffFactor float64
}
RetryConfig controls retry behavior.
func DefaultRetryConfig ¶ added in v0.6.0
func DefaultRetryConfig() RetryConfig
type RetryResult ¶ added in v0.6.0
type RetryResult struct {
Results []SearchResult
Err error
Attempts int
Engine string
}
func RetryableSearch ¶ added in v0.6.0
func RetryableSearch(ctx context.Context, cfg RetryConfig, engineName string, searchFn func(context.Context) ([]SearchResult, error)) RetryResult
RetryableSearch executes searchFn with exponential backoff retries. CAPTCHA, block, rate-limit, parser, engine-internal, and proxy-unavailable errors are not retried.
type SearchEngine ¶
type SearchEngine interface {
// Search runs a web search request and returns normalized results.
// Implementations should return sentinel errors such as ErrCaptcha and
// ErrSearchTimeout for policy-aware handling.
Search(context.Context, Query) ([]SearchResult, error)
// SearchImage runs an image search request and returns normalized results.
SearchImage(context.Context, Query) ([]SearchResult, error)
// IsInitialized reports whether the engine is ready to serve requests.
IsInitialized() bool
// Name returns a stable engine identifier used in routes and telemetry.
Name() string
// GetRateLimiter returns an engine-specific limiter used by resilient search.
GetRateLimiter() *rate.Limiter
}
SearchEngine defines the contract required by the HTTP server and resilient search pipeline.
type SearchEngineOptions ¶ added in v0.2.1
type SearchEngineOptions struct {
// RateRequests is the allowed number of requests within RateTime seconds.
RateRequests int `mapstructure:"rate_requests"`
// RateTime defines the rate-limiting window size in seconds.
RateTime int64 `mapstructure:"rate_seconds"`
// RateBurst is the token bucket burst size for short spikes.
RateBurst int `mapstructure:"rate_burst"`
// SelectorTimeout is the per-selector wait timeout in seconds.
SelectorTimeout int64 `mapstructure:"selector_timeout"`
// IsSolveCaptcha enables automatic captcha solving when engine support and
// solver credentials are configured.
IsSolveCaptcha bool `mapstructure:"captcha"`
}
SearchEngineOptions controls engine pacing, selector waits, and captcha handling behavior shared by browser and raw implementations.
func (*SearchEngineOptions) GetRateLimiter ¶ added in v0.7.13
func (o *SearchEngineOptions) GetRateLimiter() *rate.Limiter
GetRateLimiter returns a limiter configured from SearchEngineOptions. Call Init() first so RateBurst is non-zero.
func (*SearchEngineOptions) GetRatelimit ¶ added in v0.2.1
func (o *SearchEngineOptions) GetRatelimit() time.Duration
GetRatelimit returns the interval between two allowed requests. Call Init() first so RateRequests / RateTime are non-zero.
func (*SearchEngineOptions) GetSelectorTimeout ¶ added in v0.2.1
func (o *SearchEngineOptions) GetSelectorTimeout() time.Duration
GetSelectorTimeout returns the selector wait timeout as time.Duration.
func (*SearchEngineOptions) Init ¶ added in v0.2.1
func (o *SearchEngineOptions) Init()
Init sets default option values when fields are zero.
type SearchResult ¶
type SearchResult struct {
// Rank is the 1-based position within this result type. For SEO callers,
// organic rank must not be shifted by ads.
Rank int `json:"rank"`
// AbsoluteRank is the 1-based position in the mixed SERP stream.
AbsoluteRank int `json:"absolute_rank,omitempty"`
// Type is the SERP block type when an engine can classify a non-standard
// SERP module without changing the public SearchEngine interface.
Type ResultType `json:"type,omitempty"`
// URL is the canonical result URL.
URL string `json:"url"`
// Title is the result headline shown on the SERP.
Title string `json:"title"`
// Description is the snippet text associated with the result.
Description string `json:"description"`
// Ad reports whether the result is sponsored.
Ad bool `json:"ad"`
// Features carries extracted SERP modules alongside the legacy result stream.
Features []SerpFeature `json:"-"`
}
SearchResult represents one normalized result item returned by any engine.
func AttachFeaturesToFirstResult ¶ added in v0.7.15
func AttachFeaturesToFirstResult(results []SearchResult, features []SerpFeature) []SearchResult
AttachFeaturesToFirstResult keeps ParseHTML signatures unchanged while letting server response building split features onto the new top-level field.
func DeduplicateResults ¶ added in v0.4.1
func DeduplicateResults(results []SearchResult) []SearchResult
DeduplicateResults removes items with duplicate URLs and returns a result set sorted by rank in ascending order.
func LimitOrganicResults ¶ added in v0.7.13
func LimitOrganicResults(results []SearchResult, limit int) []SearchResult
LimitOrganicResults keeps all ads and at most limit non-ad results.
type SerpFeature ¶ added in v0.7.15
type SerpFeature struct {
ID string `json:"id"`
Engine string `json:"engine"`
Type ResultType `json:"type"`
Title string `json:"title,omitempty"`
Text string `json:"text,omitempty"`
Items []FeatureItem `json:"items,omitempty"`
Links []FeatureLink `json:"links,omitempty"`
SourceResultIDs []string `json:"source_result_ids,omitempty"`
Position *Position `json:"position,omitempty"`
Confidence float64 `json:"confidence,omitempty"`
ExtractedAt string `json:"extracted_at"`
}
SerpFeature is a normalized non-organic SERP module surfaced separately from rankable results.
func DeduplicateSerpFeatures ¶ added in v0.7.15
func DeduplicateSerpFeatures(features []SerpFeature) []SerpFeature
DeduplicateSerpFeatures removes duplicate modules emitted by overlapping selectors while preserving original order.
func EnrichSerpFeature ¶ added in v0.7.15
func EnrichSerpFeature(raw SerpFeature, engine string, sourceResultID string, extractedAt time.Time) SerpFeature
EnrichSerpFeature stamps a raw feature with stable public fields.
func ExtractSerpFeaturesBySelectors ¶ added in v0.7.15
func ExtractSerpFeaturesBySelectors(doc *goquery.Document, specs []SerpFeatureSelector) []SerpFeature
ExtractSerpFeaturesBySelectors converts engine-native SERP module markup into normalized features. It is intentionally conservative: a matched container is emitted only when it yields text, grouped items, or source links.
func FeaturesFromPage ¶ added in v0.7.15
func FeaturesFromPage(page *rod.Page, extract func(*goquery.Document) []SerpFeature) []SerpFeature
FeaturesFromPage renders a live rod page to HTML and runs a document-level feature extractor over it. Every engine's browser path shares this boilerplate (page.HTML -> goquery doc -> extract), so it lives here rather than being copied per engine. Returns nil on any rendering/parse error.
type SerpFeatureSelector ¶ added in v0.7.15
type SerpFeatureSelector struct {
Type ResultType
Title string
Container []string
TitleSelector []string
TextSelector []string
ItemSelector []string
LinkSelector []string
Position int
Confidence float64
// SingleMatch emits at most one feature for this spec: the first container
// node (across Container selectors, in order) that yields content. Use it
// for modules whose container selector also matches nested sub-panels, which
// would otherwise fragment one logical module into many features.
SingleMatch bool
}
SerpFeatureSelector describes one engine-native SERP module shape.
type Server ¶
type Server struct {
// contains filtered or unexported fields
}
Server exposes OpenSERP HTTP endpoints backed by one or more search engines.
func NewServer ¶
func NewServer(host string, port int, searchEngines ...SearchEngine) *Server
NewServer creates a Server with DefaultServerOptions and registers all routes for the provided engines.
func NewServerWithOptions ¶ added in v0.6.0
func NewServerWithOptions(host string, port int, opts ServerOptions, searchEngines ...SearchEngine) *Server
NewServerWithOptions builds a Server, installs middleware, and registers API routes. The returned server is ready to Listen; call Shutdown for graceful stop.
func (*Server) SetDraining ¶ added in v0.7.2
SetDraining controls readiness state exposed by /ready.
type ServerOptions ¶ added in v0.6.0
type ServerOptions struct {
// CacheTTL controls response cache entry lifetime. Zero disables caching.
CacheTTL time.Duration
// CacheMaxSize is the maximum number of cached entries.
CacheMaxSize int
// EnableCORS enables cross-origin headers with the CORS config below.
EnableCORS bool
// CORS contains allowed origins, methods, and headers when CORS is enabled.
CORS CORSConfig
// AllowEndpointFallback allows dedicated engine routes to fall back to other
// healthy engines when the primary engine fails.
AllowEndpointFallback bool
// EnableDebugEndpoints enables debug-only routes such as fingerprint checks.
EnableDebugEndpoints bool
// FingerprintArtifactDir is where debug fingerprint screenshots are written.
FingerprintArtifactDir string
// FingerprintBrowserOpts are the defaults for debug fingerprint runs.
FingerprintBrowserOpts BrowserOpts
// Resilience defines retry/circuit-breaker/proxy strategy settings.
Resilience ResilientConfig
// MegaTimeout bounds total wait time for a /mega/* request. When a
// mega request exceeds this deadline, engines that have already
// responded contribute their results and slower engines are reported
// as failed with a context-deadline error. Zero disables the bound
// (legacy behavior — wait until the slowest engine finishes).
MegaTimeout time.Duration
}
ServerOptions configures HTTP server middleware and resilience behavior.
func DefaultServerOptions ¶ added in v0.6.0
func DefaultServerOptions() ServerOptions
DefaultServerOptions returns production-oriented defaults for cache, CORS, and resilient search policies.
Source Files
¶
- browser.go
- browser_profile_usage.go
- cache.go
- captcha.go
- circuit_breaker.go
- clusters.go
- common.go
- context.go
- enrichment_domain.go
- errors.go
- feature_selectors.go
- format_markdown.go
- format_text.go
- html_parser.go
- http_client.go
- locale.go
- logger.go
- middleware.go
- network_usage.go
- page_helpers.go
- profile_context.go
- proxy.go
- proxy_context.go
- proxy_lane.go
- region.go
- resilient.go
- response.go
- response_builder.go
- result.go
- retry.go
- server.go
Directories
¶
| Path | Synopsis |
|---|---|
|
Package region resolves a free-text region hint into per-engine search targeting (Google UULE canonical names, Yandex lr IDs, ISO country codes).
|
Package region resolves a free-text region hint into per-engine search targeting (Google UULE canonical names, Yandex lr IDs, ISO country codes). |