Documentation
¶
Index ¶
- Constants
- Variables
- func BuildAcceptLanguageHeader(langCode string) string
- func BuildCacheKey(engine string, action string, q Query) string
- func CORSMiddleware(cfg CORSConfig) fiber.Handler
- func CaptchaSolverMetrics() map[string]uint64
- func ClosePageWithTimeout(ctx context.Context, page *rod.Page, timeout time.Duration) error
- func ComputePagination(start int, pageSize int) (int, int, error)
- func ConvertSearchResultsMap(searchResultsMap map[string]SearchResult) *[]SearchResult
- func DrainAndCloseResponse(resp *http.Response)
- func EnsureContext(ctx context.Context) context.Context
- func InitLogger(isVerbose, isDebug bool, format string)
- func IsAuthenticatedSocksProxyURL(raw string) bool
- func IsContextDone(err error) bool
- func IsProxyNetworkError(err error) bool
- func IsRodObjectNotFound(err error) bool
- func JSONErrorMiddleware() fiber.ErrorHandler
- func MaskProxyURL(raw string) string
- func MustParseBlockedResourceTypes(raw string) []proto.NetworkResourceType
- func NewRawHTTPClient(query Query) (*http.Client, error)
- func NormalizeLogFormat(raw string) (string, error)
- func NormalizeProxyRequestOverride(raw string) (string, error)
- func NormalizeProxyTag(raw string) (string, error)
- func NormalizeProxyURL(raw string) (string, error)
- func NormalizeProxyURLs(rawURLs []string) ([]string, error)
- func NormalizeURLForClustering(rawURL string) string
- func ParseBlockedResourceTypes(raw string) ([]proto.NetworkResourceType, error)
- func PrimaryLanguageTag(langCode string) string
- func QueryHash(raw string) string
- func QueryHashFromQuery(q Query) string
- func RecoverEnginePanic(engine string, recovered interface{}, logger *EngineLogger) error
- func RecoverEnginePanicWithContext(ctx context.Context, engine string, recovered interface{}, ...) error
- func RenderMarkdown(env *Envelope) []byte
- func RenderMarkdownImage(env *ImageEnvelope) []byte
- func RenderNDJSON(env *Envelope) []byte
- func RenderNDJSONImage(env *ImageEnvelope) []byte
- func RenderText(env *Envelope) []byte
- func RenderTextImage(env *ImageEnvelope) []byte
- func RequestContextMiddleware() fiber.Handler
- func RequestIDFromContext(ctx context.Context) string
- func RequestLoggerMiddleware() fiber.Handler
- func ResultID(engine, rawURL string) string
- func SetAcceptLanguageHeader(req *http.Request, langCode string)
- func SleepContext(ctx context.Context, d time.Duration) error
- func WithEngine(ctx context.Context, engine string) context.Context
- func WithProfileRegion(ctx context.Context, region string) context.Context
- func WithQueryHash(ctx context.Context, queryHash string) context.Context
- func WithRequest(ctx context.Context) *logrus.Entry
- func WithRequestEngine(ctx context.Context, engine string) *logrus.Entry
- func WithRequestID(ctx context.Context, requestID string) context.Context
- func WithTenant(ctx context.Context, tenant string) context.Context
- type APIError
- type Browser
- type BrowserOpts
- type CORSConfig
- type CacheEntry
- type CaptchaSolver
- type CircuitBreaker
- type CircuitBreakerConfig
- type CircuitBreakerManager
- type CircuitState
- type Classification
- type Cluster
- type ClusterOccurrence
- type DomainInfo
- type EngineHealth
- type EngineLogger
- func (el *EngineLogger) Debug(message string, args ...any)
- func (el *EngineLogger) Error(message string, args ...any)
- func (el *EngineLogger) Fatal(message string, args ...any)
- func (el *EngineLogger) Fields(fields logrus.Fields) *EngineLogger
- func (el *EngineLogger) Info(message string, args ...any)
- func (el *EngineLogger) Panic(message string, args ...any)
- func (el *EngineLogger) Warn(message string, args ...any)
- func (el *EngineLogger) WithRequest(ctx context.Context) *EngineLogger
- type EnrichContext
- type Envelope
- type HealthStatus
- type ImageData
- type ImageEnvelope
- type ImageResult
- type ImageSource
- type JSONErrorResponse
- type Locale
- type MegaSearchResult
- type Pagination
- type Position
- type ProxiesConfig
- type ProxiesHealthConfig
- type ProxyConfig
- type ProxyEngineStats
- type ProxyEntryConfig
- type ProxyExecutionMeta
- type ProxyPolicy
- type ProxyRegistry
- func (r *ProxyRegistry) BuildStats() ProxyStats
- func (r *ProxyRegistry) HasHealthyProxyForTag(tag string) bool
- func (r *ProxyRegistry) NextByTag(tag string) string
- func (r *ProxyRegistry) NextByTagWithContext(ctx context.Context, tag string) string
- func (r *ProxyRegistry) ReportFailure(ctx context.Context, proxyURL string)
- func (r *ProxyRegistry) ReportSuccess(_ context.Context, proxyURL string)
- type ProxyStats
- type ProxyStatsEntry
- type ProxyTagSummary
- type Query
- type QueryEcho
- type ReadinessStatus
- type ResilientConfig
- type ResilientSearcher
- func (rs *ResilientSearcher) GetCircuitBreakerStats() []map[string]interface{}
- func (rs *ResilientSearcher) GetProxyStats() ProxyStats
- func (rs *ResilientSearcher) ResolveMegaProxyMeta(q Query, engines []SearchEngine) ProxyExecutionMeta
- func (rs *ResilientSearcher) SearchAllImageParallel(ctx context.Context, q Query, engines []SearchEngine) ([]MegaSearchResult, []string, []string)
- func (rs *ResilientSearcher) SearchAllParallel(ctx context.Context, q Query, engines []SearchEngine) ([]MegaSearchResult, []string, []string)
- func (rs *ResilientSearcher) SearchImagePrimary(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
- func (rs *ResilientSearcher) SearchImageWithFallback(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
- func (rs *ResilientSearcher) SearchPrimary(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
- func (rs *ResilientSearcher) SearchWithFallback(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
- type ResponseCache
- type ResponseMeta
- type Result
- type ResultType
- type RetryConfig
- type RetryResult
- type SearchEngine
- type SearchEngineOptions
- type SearchResult
- type Server
- type ServerOptions
Constants ¶
const ( ReasonInvalidLimit = "INVALID_LIMIT" ReasonInvalidStart = "INVALID_START" ReasonInvalidParam = "INVALID_PARAM" ReasonEmptyQuery = "EMPTY_QUERY" ReasonNoEngines = "NO_ENGINES" ReasonUnknownFormat = "UNKNOWN_FORMAT" )
Common validation reason codes.
const ( LogFormatJSON = "json" LogFormatText = "text" )
const ( ProxyRuntimeBrowser = "browser" ProxyRuntimeRaw = "raw" ProxyModeOff = "off" ProxyModeTagPool = "tag_pool" DefaultProxyFailureThreshold = 3 ProxyOverrideDirect = "direct" // ProxyPoolQuarantineDuration is how long an exhausted tag pool stays quarantined // before a single probe proxy is re-enabled for recovery testing. ProxyPoolQuarantineDuration = 5 * time.Minute )
const MaxQueryLimit = 100
MaxQueryLimit is the maximum allowed value for the limit parameter.
Variables ¶
var DefaultFingerprintArtifactDir = filepath.Join("core", "testdata")
DefaultFingerprintArtifactDir is the artifact directory used when none is configured. It is relative to the server's working directory at start time.
var ErrAllEnginesFailed = fmt.Errorf("all search engines failed")
var ErrCaptcha = errors.New("captcha detected")
ErrCaptcha is returned when the engine detects a captcha challenge page. This error is treated as non-retryable by resilient search policies.
var ErrCircuitOpen = fmt.Errorf("circuit breaker is open - engine temporarily disabled")
var ErrEmptyResult = errors.New("empty_result")
ErrEmptyResult signals a successful fetch that returned zero organic results. It is not a failure; the proxy stays healthy and no credit is charged.
var ErrEngineInternal = errors.New("engine internal error")
ErrEngineInternal is returned when an engine recovered from an unexpected panic and converted it into a typed error.
var ErrParser = errors.New("parser failure")
ErrParser is returned when SERP parsing selectors drift or expected fields cannot be extracted from an otherwise loaded page.
var ErrProxyAuth = errors.New("proxy_auth")
ErrProxyAuth is returned when proxy credentials are rejected. Proxy health is degraded on this error.
var ErrProxyConnect = errors.New("proxy_connect")
ErrProxyConnect is returned when the proxy cannot establish a network connection. Proxy health is degraded on this error.
var ErrSearchTimeout = errors.New("timeout. Cannot find element on page")
ErrSearchTimeout is returned when required SERP elements are not found before selector or page timeouts expire.
var ErrTimeout = errors.New("timeout")
ErrTimeout is returned when a network-level timeout occurs on the proxy path. Proxy health is degraded on this error.
Functions ¶
func BuildAcceptLanguageHeader ¶ added in v0.7.2
BuildAcceptLanguageHeader formats an Accept-Language value from a lang code. Example: "de" -> "de-DE,de;q=0.9", "en-GB" -> "en-GB,en;q=0.9", "sw" -> "sw".
func BuildCacheKey ¶ added in v0.6.0
func CORSMiddleware ¶ added in v0.6.0
func CORSMiddleware(cfg CORSConfig) fiber.Handler
func CaptchaSolverMetrics ¶ added in v0.7.2
func ClosePageWithTimeout ¶ added in v0.7.2
ClosePageWithTimeout bounds page close calls so shutdown paths don't hang.
func ComputePagination ¶ added in v0.6.0
ComputePagination translates an absolute start offset into page index and in-page offset for a fixed page size.
func ConvertSearchResultsMap ¶ added in v0.4.1
func ConvertSearchResultsMap(searchResultsMap map[string]SearchResult) *[]SearchResult
ConvertSearchResultsMap converts a map-based collection to a rank-sorted slice and returns it by pointer.
func DrainAndCloseResponse ¶ added in v0.7.2
DrainAndCloseResponse drains unread bytes before closing so HTTP transports can safely reuse connections when callers don't consume the full body.
func EnsureContext ¶ added in v0.7.2
EnsureContext returns ctx when set; otherwise a non-nil placeholder context.
func InitLogger ¶
func IsAuthenticatedSocksProxyURL ¶ added in v0.6.0
func IsContextDone ¶ added in v0.7.2
IsContextDone reports whether err is a cancellation/deadline error.
func IsProxyNetworkError ¶ added in v0.7.2
IsProxyNetworkError reports whether err is a network-level error that indicates a faulty proxy (connect failure, auth rejection, or timeout). Parser drift, captcha pages, and engine errors must NOT degrade proxy health.
func IsRodObjectNotFound ¶ added in v0.7.2
IsRodObjectNotFound reports element/object lookup misses across rod error variants used by selector calls.
func JSONErrorMiddleware ¶ added in v0.6.0
func JSONErrorMiddleware() fiber.ErrorHandler
func MaskProxyURL ¶ added in v0.6.0
func MustParseBlockedResourceTypes ¶ added in v0.7.2
func MustParseBlockedResourceTypes(raw string) []proto.NetworkResourceType
MustParseBlockedResourceTypes is like ParseBlockedResourceTypes but panics on error. Only call this after the value has already been validated by ParseBlockedResourceTypes.
func NormalizeLogFormat ¶ added in v0.7.2
func NormalizeProxyRequestOverride ¶ added in v0.6.0
func NormalizeProxyTag ¶ added in v0.6.0
func NormalizeProxyURL ¶ added in v0.6.0
func NormalizeProxyURLs ¶ added in v0.6.0
func NormalizeURLForClustering ¶ added in v0.7.2
NormalizeURLForClustering returns a URL suitable for cross-engine grouping (same as normalizeURL but exported for use in cluster building).
func ParseBlockedResourceTypes ¶ added in v0.7.2
func ParseBlockedResourceTypes(raw string) ([]proto.NetworkResourceType, error)
ParseBlockedResourceTypes parses a comma-separated config value into NetworkResourceType values accepted by the request blocker.
func PrimaryLanguageTag ¶ added in v0.7.2
PrimaryLanguageTag returns the BCP47 primary tag for a lang code, filling in a default country for bare languages (e.g. "de" -> "de-DE"). Returns "" when the input has no language subtag.
func QueryHashFromQuery ¶ added in v0.7.2
func RecoverEnginePanic ¶ added in v0.7.2
func RecoverEnginePanic(engine string, recovered interface{}, logger *EngineLogger) error
RecoverEnginePanic converts recovered panics to a typed engine error and logs stack trace with engine context.
func RecoverEnginePanicWithContext ¶ added in v0.7.2
func RecoverEnginePanicWithContext(ctx context.Context, engine string, recovered interface{}, logger *EngineLogger) error
func RenderMarkdown ¶ added in v0.7.2
RenderMarkdown formats an Envelope as a Markdown document suitable for Slack/Discord/email nodes in n8n workflows.
func RenderMarkdownImage ¶ added in v0.7.2
func RenderMarkdownImage(env *ImageEnvelope) []byte
RenderMarkdownImage formats an ImageEnvelope as Markdown.
func RenderNDJSON ¶ added in v0.7.2
RenderNDJSON formats an Envelope as newline-delimited JSON (one Result per line). The envelope meta is omitted from the body; clients should read response headers.
func RenderNDJSONImage ¶ added in v0.7.2
func RenderNDJSONImage(env *ImageEnvelope) []byte
RenderNDJSONImage formats an ImageEnvelope as newline-delimited JSON.
func RenderText ¶ added in v0.7.2
RenderText formats an Envelope as a minimal plain-text block optimised for LLM context windows (~25-30% fewer tokens than JSON for the same data).
func RenderTextImage ¶ added in v0.7.2
func RenderTextImage(env *ImageEnvelope) []byte
RenderTextImage formats an ImageEnvelope as plain text.
func RequestContextMiddleware ¶ added in v0.7.2
func RequestIDFromContext ¶ added in v0.7.2
func RequestLoggerMiddleware ¶ added in v0.6.0
func SetAcceptLanguageHeader ¶ added in v0.7.2
SetAcceptLanguageHeader sets the Accept-Language header from a lang code. No-op when the code has no language subtag.
func SleepContext ¶ added in v0.7.2
SleepContext blocks for d or until ctx is canceled.
func WithEngine ¶ added in v0.7.2
func WithProfileRegion ¶ added in v0.7.2
func WithQueryHash ¶ added in v0.7.2
func WithRequestEngine ¶ added in v0.7.2
func WithRequestID ¶ added in v0.7.2
Types ¶
type APIError ¶ added in v0.7.2
APIError represents a client-facing error with a stable machine-readable reason code.
type Browser ¶
type Browser struct {
BrowserOpts
CaptchaSolver *CaptchaSolver
// contains filtered or unexported fields
}
Browser wraps a launched Chromium instance used by engine implementations.
func NewBrowser ¶
func NewBrowser(opts BrowserOpts) (*Browser, error)
NewBrowser launches a new Chromium process via Rod launcher and returns a Browser wrapper configured with proxy and captcha solver settings.
func (*Browser) IsInitialized ¶
IsInitialized reports whether the browser launcher has been created.
type BrowserOpts ¶
type BrowserOpts struct {
// IsHeadless runs Chromium without visible UI.
IsHeadless bool
// IsLeakless forces child browser process cleanup when the parent exits.
IsLeakless bool
// Timeout is applied to browser connect and page navigation operations.
Timeout time.Duration
// LanguageCode sets Accept-Language for emulated requests.
LanguageCode string
// WaitRequests waits for request-idle state after navigation.
WaitRequests bool
// LeavePageOpen keeps pages open after search operations.
LeavePageOpen bool
// WaitLoadTime is kept for config backwards-compatibility but no longer used;
// Navigate now calls WaitStable instead.
WaitLoadTime time.Duration
// CaptchaSolverApiKey enables 2Captcha integration for supported engines.
CaptchaSolverApiKey string
// CaptchaSolverEnabled gates solver invocation regardless of engine flags.
CaptchaSolverEnabled bool
// BrowserPath optionally points to a specific browser executable.
BrowserPath string
// ProxyURL defines the upstream proxy for browser traffic.
ProxyURL string
// Insecure allows invalid TLS certificates for browser requests.
Insecure bool
// UserAgent optionally overrides browser-reported user agent during emulation.
UserAgent string
// BlockResourceTypes are blocked during page navigation when non-empty.
// Typical tokens map to these types: image, font, css(stylesheet), js(script), media.
BlockResourceTypes []proto.NetworkResourceType
// BlockTrackers toggles static tracker-domain blocking.
BlockTrackers bool
}
BrowserOpts configures Chromium launch and navigation behavior.
func (*BrowserOpts) Check ¶ added in v0.2.1
func (o *BrowserOpts) Check()
Check applies default option values when optional fields are unset.
type CORSConfig ¶ added in v0.6.0
func DefaultCORSConfig ¶ added in v0.6.0
func DefaultCORSConfig() CORSConfig
type CacheEntry ¶ added in v0.6.0
type CaptchaSolver ¶ added in v0.4.1
type CaptchaSolver struct {
// contains filtered or unexported fields
}
func NewSolver ¶ added in v0.4.1
func NewSolver(apikey string) *CaptchaSolver
func (*CaptchaSolver) SolveReCaptcha2 ¶ added in v0.4.1
func (cs *CaptchaSolver) SolveReCaptcha2(sitekey, pageURL, dataS, proxyURL string) (string, string, error)
type CircuitBreaker ¶ added in v0.6.0
type CircuitBreaker struct {
// contains filtered or unexported fields
}
CircuitBreaker tracks failure state for one engine.
func NewCircuitBreaker ¶ added in v0.6.0
func NewCircuitBreaker(name string, cfg CircuitBreakerConfig) *CircuitBreaker
func (*CircuitBreaker) AllowRequest ¶ added in v0.6.0
func (cb *CircuitBreaker) AllowRequest(ctx context.Context) bool
func (*CircuitBreaker) RecordFailure ¶ added in v0.6.0
func (cb *CircuitBreaker) RecordFailure(ctx context.Context)
func (*CircuitBreaker) RecordSuccess ¶ added in v0.6.0
func (cb *CircuitBreaker) RecordSuccess(ctx context.Context)
func (*CircuitBreaker) State ¶ added in v0.6.0
func (cb *CircuitBreaker) State() CircuitState
func (*CircuitBreaker) Stats ¶ added in v0.6.0
func (cb *CircuitBreaker) Stats() map[string]interface{}
type CircuitBreakerConfig ¶ added in v0.6.0
type CircuitBreakerConfig struct {
FailureThreshold int
RecoveryTimeout time.Duration
SuccessThreshold int
}
func DefaultCircuitBreakerConfig ¶ added in v0.6.0
func DefaultCircuitBreakerConfig() CircuitBreakerConfig
type CircuitBreakerManager ¶ added in v0.6.0
type CircuitBreakerManager struct {
// contains filtered or unexported fields
}
func NewCircuitBreakerManager ¶ added in v0.6.0
func NewCircuitBreakerManager(cfg CircuitBreakerConfig) *CircuitBreakerManager
func (*CircuitBreakerManager) AllStats ¶ added in v0.6.0
func (m *CircuitBreakerManager) AllStats() []map[string]interface{}
func (*CircuitBreakerManager) Get ¶ added in v0.6.0
func (m *CircuitBreakerManager) Get(engineName string) *CircuitBreaker
type CircuitState ¶ added in v0.6.0
type CircuitState int
const ( CircuitClosed CircuitState = iota CircuitOpen CircuitHalfOpen )
func (CircuitState) String ¶ added in v0.6.0
func (s CircuitState) String() string
type Classification ¶ added in v0.7.2
type Classification struct {
ContentType string `json:"content_type"`
SourceHint string `json:"source_hint"`
}
Classification holds URL-path heuristic hints for downstream consumers.
func ClassifyURL ¶ added in v0.7.2
func ClassifyURL(rawURL, domain string) *Classification
ClassifyURL returns a rough content-type and source hint derived from the URL path alone; no network calls.
type Cluster ¶ added in v0.7.2
type Cluster struct {
ID string `json:"id"`
CanonicalURL string `json:"canonical_url"`
Domain string `json:"domain"`
Title string `json:"title"`
Occurrences []ClusterOccurrence `json:"occurrences"`
EnginesCount int `json:"engines_count"`
BestRank int `json:"best_rank"`
Score float64 `json:"score"`
}
Cluster groups results that refer to the same canonical URL across engines. Populated only by /mega/search. Full type defined in clusters.go.
func BuildClusters ¶ added in v0.7.2
BuildClusters groups results by normalized URL and scores them by cross-engine agreement. enginesQueried is the total number of engines that were asked (denominator for the score formula).
Score = sum(1/rank for each occurrence) / enginesQueried, capped at 1.0.
type ClusterOccurrence ¶ added in v0.7.2
type ClusterOccurrence struct {
Engine string `json:"engine"`
Rank int `json:"rank"`
ResultID string `json:"result_id"`
}
ClusterOccurrence links one engine result back into the flat results list.
type DomainInfo ¶ added in v0.7.2
type DomainInfo struct {
TLD string `json:"tld"`
SLD string `json:"sld"`
IsGov bool `json:"is_gov"`
IsEdu bool `json:"is_edu"`
IsMil bool `json:"is_mil"`
IsNews bool `json:"is_news"`
IsForum bool `json:"is_forum"`
IsMarketplace bool `json:"is_marketplace"`
IsSocial bool `json:"is_social"`
}
DomainInfo carries TLD-derived category signals for a result domain.
func EnrichDomainInfo ¶ added in v0.7.2
func EnrichDomainInfo(domain string) *DomainInfo
EnrichDomainInfo derives TLD/category signals from a bare hostname.
type EngineHealth ¶ added in v0.6.0
type EngineHealth struct {
Name string `json:"name"`
Initialized bool `json:"initialized"`
Status string `json:"status"`
}
EngineHealth describes availability of one configured engine.
type EngineLogger ¶ added in v0.5.3
type EngineLogger struct {
// contains filtered or unexported fields
}
EngineLogger provides structured logging for search engines with a fixed engine field.
func NewEngineLogger ¶ added in v0.5.3
func NewEngineLogger(engine string) *EngineLogger
func (*EngineLogger) Debug ¶ added in v0.5.3
func (el *EngineLogger) Debug(message string, args ...any)
func (*EngineLogger) Error ¶ added in v0.5.3
func (el *EngineLogger) Error(message string, args ...any)
func (*EngineLogger) Fatal ¶ added in v0.5.3
func (el *EngineLogger) Fatal(message string, args ...any)
func (*EngineLogger) Fields ¶ added in v0.7.2
func (el *EngineLogger) Fields(fields logrus.Fields) *EngineLogger
Fields returns a new EngineLogger with additional structured fields merged in.
func (*EngineLogger) Info ¶ added in v0.5.3
func (el *EngineLogger) Info(message string, args ...any)
func (*EngineLogger) Panic ¶ added in v0.5.3
func (el *EngineLogger) Panic(message string, args ...any)
func (*EngineLogger) Warn ¶ added in v0.5.3
func (el *EngineLogger) Warn(message string, args ...any)
func (*EngineLogger) WithRequest ¶ added in v0.7.2
func (el *EngineLogger) WithRequest(ctx context.Context) *EngineLogger
type EnrichContext ¶ added in v0.7.2
EnrichContext carries request-scoped values needed to enrich a raw result.
type Envelope ¶ added in v0.7.2
type Envelope struct {
Query QueryEcho `json:"query"`
Meta ResponseMeta `json:"meta"`
Results []Result `json:"results"`
Pagination Pagination `json:"pagination"`
// Clusters is only populated by /mega/search (see clusters.go).
Clusters *[]Cluster `json:"clusters,omitempty"`
}
Envelope is the top-level v1 response wrapper for all search endpoints.
func NewEnvelope ¶ added in v0.7.2
NewEnvelope builds a fresh Envelope pre-filled with query echo and an open meta block. Call Finalize before serializing.
type HealthStatus ¶ added in v0.6.0
type HealthStatus struct {
Status string `json:"status"`
Uptime string `json:"uptime"`
Engines []EngineHealth `json:"engines"`
System map[string]interface{} `json:"system"`
}
HealthStatus is returned by /health and summarizes service state.
type ImageData ¶ added in v0.7.2
type ImageData struct {
URL string `json:"url"`
Thumbnail string `json:"thumbnail,omitempty"`
Width int `json:"width,omitempty"`
Height int `json:"height,omitempty"`
}
ImageData holds image-specific URL and dimension fields.
type ImageEnvelope ¶ added in v0.7.2
type ImageEnvelope struct {
Query QueryEcho `json:"query"`
Meta ResponseMeta `json:"meta"`
Results []ImageResult `json:"results"`
Pagination Pagination `json:"pagination"`
}
ImageEnvelope is the top-level v1 response wrapper for image search endpoints.
func NewImageEnvelope ¶ added in v0.7.2
func NewImageEnvelope(q Query, requestID string, startedAt time.Time, engines []string) *ImageEnvelope
NewImageEnvelope builds a fresh ImageEnvelope.
type ImageResult ¶ added in v0.7.2
type ImageResult struct {
ID string `json:"id"`
Rank int `json:"rank"`
Type ResultType `json:"type"`
Title string `json:"title"`
Image ImageData `json:"image"`
Source ImageSource `json:"source"`
Engine string `json:"engine"`
}
ImageResult is the v1 shape for image search results.
func EnrichImageResult ¶ added in v0.7.2
func EnrichImageResult(raw SearchResult, ctx EnrichContext) ImageResult
EnrichImageResult converts a raw engine result into the v1 ImageResult shape.
type ImageSource ¶ added in v0.7.2
ImageSource holds page-level context for an image result.
type JSONErrorResponse ¶ added in v0.6.0
type Locale ¶ added in v0.7.2
Locale is a parsed language/region pair derived from a BCP47-style code. Language is the lowercase 2-letter language subtag (e.g. "en", "de"). Country is the uppercase 2-letter region subtag (e.g. "US", "DE"); it may be empty when the input had no region and the caller did not request a default.
func ParseLocale ¶ added in v0.7.2
ParseLocale parses a language code such as "en", "EN-us", or "de_AT" into a Locale. Returns the zero value when the input is empty or has no language subtag. Country is uppercased; Language is lowercased.
type MegaSearchResult ¶ added in v0.5.3
type MegaSearchResult struct {
SearchResult
Engine string `json:"engine"`
}
MegaSearchResult extends SearchResult with the engine source name.
type Pagination ¶ added in v0.7.2
type Pagination struct {
Page int `json:"page"`
HasMore bool `json:"has_more"`
NextStart int `json:"next_start"`
}
Pagination carries cursor information for client-side loop termination.
type Position ¶ added in v0.7.2
type Position struct {
// Absolute is the 1-based rank counting from the first result of the first page.
Absolute int `json:"absolute"`
// Page is the 1-based page number derived from start/limit.
Page int `json:"page"`
// OnPage is the 1-based rank within this page.
OnPage int `json:"on_page"`
}
Position describes where a result sits in the overall result stream.
type ProxiesConfig ¶ added in v0.6.0
type ProxiesConfig struct {
Global string `json:"global,omitempty" mapstructure:"global"`
Entries []ProxyEntryConfig `json:"entries" mapstructure:"entries"`
Health ProxiesHealthConfig `json:"health" mapstructure:"health"`
}
func DefaultProxiesConfig ¶ added in v0.6.0
func DefaultProxiesConfig() ProxiesConfig
func NormalizeProxiesConfig ¶ added in v0.6.0
func NormalizeProxiesConfig(cfg ProxiesConfig) (ProxiesConfig, error)
type ProxiesHealthConfig ¶ added in v0.6.0
type ProxiesHealthConfig struct {
FailureThreshold int `json:"failure_threshold" mapstructure:"failure_threshold"`
}
type ProxyConfig ¶ added in v0.6.0
type ProxyConfig struct {
Runtime string // raw or browser runtime behavior
Proxies ProxiesConfig // canonical proxy inventory
EnginePolicies map[string]string // engine-specific proxy tags
Registry *ProxyRegistry // optional shared registry from caller
}
func DefaultProxyConfig ¶ added in v0.6.0
func DefaultProxyConfig() ProxyConfig
func NormalizeProxyConfig ¶ added in v0.6.0
func NormalizeProxyConfig(cfg ProxyConfig) (ProxyConfig, error)
type ProxyEngineStats ¶ added in v0.6.0
type ProxyEntryConfig ¶ added in v0.6.0
type ProxyExecutionMeta ¶ added in v0.6.0
type ProxyPolicy ¶ added in v0.6.0
type ProxyPolicy struct {
Mode string `json:"mode" mapstructure:"mode"`
Tag string `json:"tag,omitempty" mapstructure:"tag"`
}
func ResolveEffectiveProxyPolicy ¶ added in v0.6.0
func ResolveEffectiveProxyPolicy(globalProxyURL string, engineTag string) ProxyPolicy
type ProxyRegistry ¶ added in v0.6.0
type ProxyRegistry struct {
// contains filtered or unexported fields
}
func NewProxyRegistry ¶ added in v0.6.0
func NewProxyRegistry(entries []ProxyEntryConfig, failureThreshold int) (*ProxyRegistry, error)
func (*ProxyRegistry) BuildStats ¶ added in v0.6.0
func (r *ProxyRegistry) BuildStats() ProxyStats
func (*ProxyRegistry) HasHealthyProxyForTag ¶ added in v0.6.0
func (r *ProxyRegistry) HasHealthyProxyForTag(tag string) bool
func (*ProxyRegistry) NextByTag ¶ added in v0.6.0
func (r *ProxyRegistry) NextByTag(tag string) string
func (*ProxyRegistry) NextByTagWithContext ¶ added in v0.7.2
func (r *ProxyRegistry) NextByTagWithContext(ctx context.Context, tag string) string
func (*ProxyRegistry) ReportFailure ¶ added in v0.6.0
func (r *ProxyRegistry) ReportFailure(ctx context.Context, proxyURL string)
ReportFailure increments the failure counter for proxyURL. The proxy is disabled once the failure threshold is reached. If the owning tag pool becomes fully exhausted, a quarantine timer is started so that NextByTagWithContext will not immediately re-enable all proxies.
Only proxy-network errors (ErrProxyConnect, ErrProxyAuth, ErrTimeout) should degrade proxy health. Callers must not call this for captcha or parser errors.
func (*ProxyRegistry) ReportSuccess ¶ added in v0.6.0
func (r *ProxyRegistry) ReportSuccess(_ context.Context, proxyURL string)
type ProxyStats ¶ added in v0.6.0
type ProxyStats struct {
ConfiguredCount int `json:"configured_count"`
HealthyCount int `json:"healthy_count"`
UnhealthyCount int `json:"unhealthy_count"`
Tags map[string]ProxyTagSummary `json:"tags"`
Entries []ProxyStatsEntry `json:"entries"`
Engines map[string]ProxyEngineStats `json:"engines,omitempty"`
}
type ProxyStatsEntry ¶ added in v0.6.0
type ProxyTagSummary ¶ added in v0.6.0
type Query ¶
type Query struct {
// Text is the search phrase, for example "golang fiber tutorial".
Text string
// LangCode is an engine language hint such as "EN", "DE", or "RU".
LangCode string
// DateInterval filters by date range in YYYYMMDD..YYYYMMDD format.
// Example: "20250101..20250331".
DateInterval string
// Filetype is a file extension filter, for example "pdf" or "docx".
Filetype string
// Site restricts results to a specific domain, for example "github.com".
Site string
// Limit is the maximum number of results requested by the client.
Limit int
// Start is an engine pagination offset. Values are engine-specific:
// Google commonly uses 0,10,20 while some engines use page indexes.
Start int
// Filter controls duplicate filtering when supported by the engine.
// For Google, false includes similar results and true hides them.
Filter bool
// Answers enables parsing answer modules when supported by the engine.
// Such entries may be returned with negative rank values.
Answers bool
// ProxyURL is a direct proxy URL used by raw HTTP search paths.
ProxyURL string
// ProxyOverride is a request-scoped proxy policy override (tag or "direct"),
// typically parsed from the X-Use-Proxy header.
ProxyOverride string
// Insecure enables insecure TLS for request/browser execution.
Insecure bool
}
Query holds request parameters used by HTTP handlers and search engines. Example minimal query: Query{Text: "golang", Limit: 10}.
func (*Query) InitFromContext ¶
InitFromContext populates Query from HTTP query parameters and request headers. It validates numeric/boolean inputs and returns an *APIError for invalid client input (400) or a plain error for internal failures.
type QueryEcho ¶ added in v0.7.2
type QueryEcho struct {
Text string `json:"text"`
Lang string `json:"lang,omitempty"`
EnginesRequested []string `json:"engines_requested"`
}
QueryEcho echoes the interpreted query parameters back to the client.
type ReadinessStatus ¶ added in v0.7.2
type ReadinessStatus struct {
Status string `json:"status"`
}
ReadinessStatus is returned by /ready to indicate if this instance can receive new traffic.
type ResilientConfig ¶ added in v0.6.0
type ResilientConfig struct {
Retry RetryConfig
CircuitBreaker CircuitBreakerConfig
Proxy ProxyConfig
}
func DefaultResilientConfig ¶ added in v0.6.0
func DefaultResilientConfig() ResilientConfig
type ResilientSearcher ¶ added in v0.6.0
type ResilientSearcher struct {
// contains filtered or unexported fields
}
ResilientSearcher wraps engines with retry and circuit breaker protection.
func NewResilientSearcher ¶ added in v0.6.0
func NewResilientSearcher(engines []SearchEngine, cfg ResilientConfig) *ResilientSearcher
func (*ResilientSearcher) GetCircuitBreakerStats ¶ added in v0.6.0
func (rs *ResilientSearcher) GetCircuitBreakerStats() []map[string]interface{}
func (*ResilientSearcher) GetProxyStats ¶ added in v0.6.0
func (rs *ResilientSearcher) GetProxyStats() ProxyStats
func (*ResilientSearcher) ResolveMegaProxyMeta ¶ added in v0.6.0
func (rs *ResilientSearcher) ResolveMegaProxyMeta(q Query, engines []SearchEngine) ProxyExecutionMeta
func (*ResilientSearcher) SearchAllImageParallel ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchAllImageParallel(ctx context.Context, q Query, engines []SearchEngine) ([]MegaSearchResult, []string, []string)
func (*ResilientSearcher) SearchAllParallel ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchAllParallel(ctx context.Context, q Query, engines []SearchEngine) ([]MegaSearchResult, []string, []string)
SearchAllParallel applies retry/circuit protections per engine for mega search. Returns results, list of engines that responded, and list of engines that failed.
func (*ResilientSearcher) SearchImagePrimary ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchImagePrimary(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
func (*ResilientSearcher) SearchImageWithFallback ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchImageWithFallback(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
func (*ResilientSearcher) SearchPrimary ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchPrimary(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
SearchPrimary keeps dedicated endpoints engine-pure (no fallback).
func (*ResilientSearcher) SearchWithFallback ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchWithFallback(ctx context.Context, primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
SearchWithFallback retries primary and then tries other initialized engines.
type ResponseCache ¶ added in v0.6.0
type ResponseCache struct {
// contains filtered or unexported fields
}
ResponseCache is a bounded in-memory TTL cache for dedicated endpoint responses.
func NewResponseCache ¶ added in v0.6.0
func NewResponseCache(ttl time.Duration, maxSize int) *ResponseCache
func (*ResponseCache) RecordBypass ¶ added in v0.6.0
func (c *ResponseCache) RecordBypass()
func (*ResponseCache) Set ¶ added in v0.6.0
func (c *ResponseCache) Set(key string, data []byte)
func (*ResponseCache) Stats ¶ added in v0.6.0
func (c *ResponseCache) Stats() map[string]interface{}
type ResponseMeta ¶ added in v0.7.2
type ResponseMeta struct {
RequestID string `json:"request_id"`
RequestedAt string `json:"requested_at"`
TookMs int64 `json:"took_ms"`
EnginesFailed []string `json:"engines_failed"`
Version string `json:"version"`
}
ResponseMeta carries request-level metadata for observability and debugging.
type Result ¶ added in v0.7.2
type Result struct {
ID string `json:"id"`
Rank int `json:"rank"`
Type ResultType `json:"type"`
Title string `json:"title"`
URL string `json:"url"`
DisplayURL string `json:"display_url"`
Snippet string `json:"snippet"`
Domain string `json:"domain"`
Favicon string `json:"favicon"`
IsAd bool `json:"is_ad"`
Position Position `json:"position"`
Engine string `json:"engine"`
DomainInfo *DomainInfo `json:"domain_info,omitempty"`
Classification *Classification `json:"classification,omitempty"`
}
Result is the v1 normalized result returned in every search response.
func EnrichResult ¶ added in v0.7.2
func EnrichResult(raw SearchResult, ctx EnrichContext) Result
EnrichResult converts a raw engine result into the v1 Result shape.
type ResultType ¶ added in v0.7.2
type ResultType string
ResultType is the SERP block type for a search result.
const ( ResultTypeOrganic ResultType = "organic" ResultTypeAd ResultType = "ad" ResultTypeFeaturedSnippet ResultType = "featured_snippet" ResultTypeKnowledgePanel ResultType = "knowledge_panel" ResultTypePeopleAlsoAsk ResultType = "people_also_ask" ResultTypeVideo ResultType = "video" ResultTypeImage ResultType = "image" ResultTypeNews ResultType = "news" ResultTypeShopping ResultType = "shopping" ResultTypeLocal ResultType = "local" ResultTypeAnswerBox ResultType = "answer_box" )
func ValidateResultType ¶ added in v0.7.2
func ValidateResultType(t ResultType) (ResultType, string)
ValidateResultType returns the input type if it is a known enum value, otherwise returns ResultTypeOrganic with a warning message.
type RetryConfig ¶ added in v0.6.0
type RetryConfig struct {
MaxRetries int
InitialBackoff time.Duration
MaxBackoff time.Duration
BackoffFactor float64
}
RetryConfig controls retry behavior.
func DefaultRetryConfig ¶ added in v0.6.0
func DefaultRetryConfig() RetryConfig
type RetryResult ¶ added in v0.6.0
type RetryResult struct {
Results []SearchResult
Err error
Attempts int
Engine string
}
func RetryableSearch ¶ added in v0.6.0
func RetryableSearch(ctx context.Context, cfg RetryConfig, engineName string, searchFn func(context.Context) ([]SearchResult, error)) RetryResult
RetryableSearch executes searchFn with exponential backoff retries. CAPTCHA, parser, engine-internal, and proxy-unavailable errors are not retried.
type SearchEngine ¶
type SearchEngine interface {
// Search runs a web search request and returns normalized results.
// Implementations should return sentinel errors such as ErrCaptcha and
// ErrSearchTimeout for policy-aware handling.
Search(context.Context, Query) ([]SearchResult, error)
// SearchImage runs an image search request and returns normalized results.
SearchImage(context.Context, Query) ([]SearchResult, error)
// IsInitialized reports whether the engine is ready to serve requests.
IsInitialized() bool
// Name returns a stable engine identifier used in routes and telemetry.
Name() string
// GetRateLimiter returns an engine-specific limiter used by resilient search.
GetRateLimiter() *rate.Limiter
}
SearchEngine defines the contract required by the HTTP server and resilient search pipeline.
type SearchEngineOptions ¶ added in v0.2.1
type SearchEngineOptions struct {
// RateRequests is the allowed number of requests within RateTime seconds.
RateRequests int `mapstructure:"rate_requests"`
// RateTime defines the rate-limiting window size in seconds.
RateTime int64 `mapstructure:"rate_seconds"`
// RateBurst is the token bucket burst size for short spikes.
RateBurst int `mapstructure:"rate_burst"`
// SelectorTimeout is the per-selector wait timeout in seconds.
SelectorTimeout int64 `mapstructure:"selector_timeout"`
// IsSolveCaptcha enables automatic captcha solving when engine support and
// solver credentials are configured.
IsSolveCaptcha bool `mapstructure:"captcha"`
}
SearchEngineOptions controls engine pacing, selector waits, and captcha handling behavior shared by browser and raw implementations.
func (*SearchEngineOptions) GetRatelimit ¶ added in v0.2.1
func (o *SearchEngineOptions) GetRatelimit() time.Duration
GetRatelimit returns the interval between two allowed requests.
func (*SearchEngineOptions) GetSelectorTimeout ¶ added in v0.2.1
func (o *SearchEngineOptions) GetSelectorTimeout() time.Duration
GetSelectorTimeout returns the selector wait timeout as time.Duration.
func (*SearchEngineOptions) Init ¶ added in v0.2.1
func (o *SearchEngineOptions) Init()
Init sets default option values when fields are zero.
type SearchResult ¶
type SearchResult struct {
// Rank is a 1-based position in engine output. Some engines use negative
// ranks for non-organic blocks such as ads or instant answers.
Rank int `json:"rank"`
// URL is the canonical result URL.
URL string `json:"url"`
// Title is the result headline shown on the SERP.
Title string `json:"title"`
// Description is the snippet text associated with the result.
Description string `json:"description"`
// Ad reports whether the result is sponsored.
Ad bool `json:"ad"`
}
SearchResult represents one normalized result item returned by any engine.
func DeduplicateResults ¶ added in v0.4.1
func DeduplicateResults(results []SearchResult) []SearchResult
DeduplicateResults removes items with duplicate URLs and returns a result set sorted by rank in ascending order.
type Server ¶
type Server struct {
// contains filtered or unexported fields
}
Server exposes OpenSERP HTTP endpoints backed by one or more search engines.
func NewServer ¶
func NewServer(host string, port int, searchEngines ...SearchEngine) *Server
NewServer creates a Server with DefaultServerOptions and registers all routes for the provided engines.
func NewServerWithOptions ¶ added in v0.6.0
func NewServerWithOptions(host string, port int, opts ServerOptions, searchEngines ...SearchEngine) *Server
NewServerWithOptions builds a Server, installs middleware, and registers API routes. The returned server is ready to Listen; call Shutdown for graceful stop.
func (*Server) SetDraining ¶ added in v0.7.2
SetDraining controls readiness state exposed by /ready.
type ServerOptions ¶ added in v0.6.0
type ServerOptions struct {
// CacheTTL controls response cache entry lifetime. Zero disables caching.
CacheTTL time.Duration
// CacheMaxSize is the maximum number of cached entries.
CacheMaxSize int
// EnableCORS enables cross-origin headers with the CORS config below.
EnableCORS bool
// CORS contains allowed origins, methods, and headers when CORS is enabled.
CORS CORSConfig
// AllowEndpointFallback allows dedicated engine routes to fall back to other
// healthy engines when the primary engine fails.
AllowEndpointFallback bool
// EnableDebugEndpoints enables debug-only routes such as fingerprint checks.
EnableDebugEndpoints bool
// FingerprintArtifactDir is where debug fingerprint screenshots are written.
FingerprintArtifactDir string
// FingerprintBrowserOpts are the defaults for debug fingerprint runs.
FingerprintBrowserOpts BrowserOpts
// Resilience defines retry/circuit-breaker/proxy strategy settings.
Resilience ResilientConfig
}
ServerOptions configures HTTP server middleware and resilience behavior.
func DefaultServerOptions ¶ added in v0.6.0
func DefaultServerOptions() ServerOptions
DefaultServerOptions returns production-oriented defaults for cache, CORS, and resilient search policies.