Documentation
¶
Index ¶
- Constants
- Variables
- func BuildCacheKey(engine string, action string, q Query) string
- func CORSMiddleware(cfg CORSConfig) fiber.Handler
- func ComputePagination(start int, pageSize int) (int, int, error)
- func ConvertSearchResultsMap(searchResultsMap map[string]SearchResult) *[]SearchResult
- func InitLogger(isVerbose, isDebug bool)
- func IsAuthenticatedSocksProxyURL(raw string) bool
- func JSONErrorMiddleware() fiber.ErrorHandler
- func LogWithEngine(level logrus.Level, engine, message string, args ...interface{})
- func MaskProxyURL(raw string) string
- func NewRawHTTPClient(query Query) (*http.Client, error)
- func NormalizeProxyRequestOverride(raw string) (string, error)
- func NormalizeProxyTag(raw string) (string, error)
- func NormalizeProxyURL(raw string) (string, error)
- func NormalizeProxyURLs(rawURLs []string) ([]string, error)
- func RequestLoggerMiddleware() fiber.Handler
- type Browser
- type BrowserOpts
- type CORSConfig
- type CacheEntry
- type CaptchaSolver
- type CircuitBreaker
- type CircuitBreakerConfig
- type CircuitBreakerManager
- type CircuitState
- type EngineHealth
- type EngineLogger
- func (el *EngineLogger) Debug(message string, args ...interface{})
- func (el *EngineLogger) Error(message string, args ...interface{})
- func (el *EngineLogger) Fatal(message string, args ...interface{})
- func (el *EngineLogger) Info(message string, args ...interface{})
- func (el *EngineLogger) Panic(message string, args ...interface{})
- func (el *EngineLogger) Warn(message string, args ...interface{})
- type HealthStatus
- type JSONErrorResponse
- type MegaSearchResult
- type ProxiesConfig
- type ProxiesHealthConfig
- type ProxyConfig
- type ProxyEngineStats
- type ProxyEntryConfig
- type ProxyExecutionMeta
- type ProxyPolicy
- type ProxyRegistry
- type ProxyStats
- type ProxyStatsEntry
- type ProxyTagSummary
- type Query
- type ResilientConfig
- type ResilientSearcher
- func (rs *ResilientSearcher) GetCircuitBreakerStats() []map[string]interface{}
- func (rs *ResilientSearcher) GetProxyStats() ProxyStats
- func (rs *ResilientSearcher) ResolveMegaProxyMeta(q Query, engines []SearchEngine) ProxyExecutionMeta
- func (rs *ResilientSearcher) SearchAllImageParallel(q Query, engines []SearchEngine) []MegaSearchResult
- func (rs *ResilientSearcher) SearchAllParallel(q Query, engines []SearchEngine) []MegaSearchResult
- func (rs *ResilientSearcher) SearchImagePrimary(primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
- func (rs *ResilientSearcher) SearchImageWithFallback(primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
- func (rs *ResilientSearcher) SearchPrimary(primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
- func (rs *ResilientSearcher) SearchWithFallback(primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
- type ResponseCache
- type RetryConfig
- type RetryResult
- type SearchEngine
- type SearchEngineOptions
- type SearchResult
- type Server
- type ServerOptions
Constants ¶
const ( ProxyRuntimeBrowser = "browser" ProxyRuntimeRaw = "raw" ProxyModeOff = "off" ProxyModeTagPool = "tag_pool" DefaultProxyFailureThreshold = 3 ProxyOverrideDirect = "direct" )
Variables ¶
var ErrAllEnginesFailed = fmt.Errorf("all search engines failed")
var ErrCaptcha = errors.New("captcha detected")
var ErrCircuitOpen = fmt.Errorf("circuit breaker is open - engine temporarily disabled")
var ErrSearchTimeout = errors.New("timeout. Cannot find element on page")
Functions ¶
func BuildCacheKey ¶ added in v0.6.0
func CORSMiddleware ¶ added in v0.6.0
func CORSMiddleware(cfg CORSConfig) fiber.Handler
func ComputePagination ¶ added in v0.6.0
func ConvertSearchResultsMap ¶ added in v0.4.1
func ConvertSearchResultsMap(searchResultsMap map[string]SearchResult) *[]SearchResult
func InitLogger ¶
func InitLogger(isVerbose, isDebug bool)
func IsAuthenticatedSocksProxyURL ¶ added in v0.6.0
func JSONErrorMiddleware ¶ added in v0.6.0
func JSONErrorMiddleware() fiber.ErrorHandler
func LogWithEngine ¶ added in v0.5.3
LogWithEngine logs a message with engine information (deprecated - use EngineLogger instead)
func MaskProxyURL ¶ added in v0.6.0
func NormalizeProxyRequestOverride ¶ added in v0.6.0
func NormalizeProxyTag ¶ added in v0.6.0
func NormalizeProxyURL ¶ added in v0.6.0
func NormalizeProxyURLs ¶ added in v0.6.0
func RequestLoggerMiddleware ¶ added in v0.6.0
Types ¶
type Browser ¶
type Browser struct {
BrowserOpts
CaptchaSolver *CaptchaSolver
// contains filtered or unexported fields
}
func NewBrowser ¶
func NewBrowser(opts BrowserOpts) (*Browser, error)
func (*Browser) IsInitialized ¶
Check whether browser instance is already created
type BrowserOpts ¶
type BrowserOpts struct {
IsHeadless bool // Use browser interface
IsLeakless bool // Force to kill browser
Timeout time.Duration // Timeout
LanguageCode string
WaitRequests bool // Wait requests to complete after navigation
LeavePageOpen bool // Leave pages and browser open
WaitLoadTime time.Duration // Time to wait till page loads
CaptchaSolverApiKey string // 2Captcha api key
BrowserPath string // Explicit browser executable path
ProxyURL string // Proxy URL
Insecure bool // Allow insecure TLS connections
UseStealth bool // Use go-rod stealth plugin
}
func (*BrowserOpts) Check ¶ added in v0.2.1
func (o *BrowserOpts) Check()
Initialize browser parameters with default values if they are not set
type CORSConfig ¶ added in v0.6.0
func DefaultCORSConfig ¶ added in v0.6.0
func DefaultCORSConfig() CORSConfig
type CacheEntry ¶ added in v0.6.0
type CaptchaSolver ¶ added in v0.4.1
type CaptchaSolver struct {
// contains filtered or unexported fields
}
func NewSolver ¶ added in v0.4.1
func NewSolver(apikey string) *CaptchaSolver
func (*CaptchaSolver) SolveReCaptcha2 ¶ added in v0.4.1
func (cs *CaptchaSolver) SolveReCaptcha2(sitekey, pageUrl, dataS string) (string, string, error)
type CircuitBreaker ¶ added in v0.6.0
type CircuitBreaker struct {
// contains filtered or unexported fields
}
CircuitBreaker tracks failure state for one engine.
func NewCircuitBreaker ¶ added in v0.6.0
func NewCircuitBreaker(name string, cfg CircuitBreakerConfig) *CircuitBreaker
func (*CircuitBreaker) AllowRequest ¶ added in v0.6.0
func (cb *CircuitBreaker) AllowRequest() bool
func (*CircuitBreaker) RecordFailure ¶ added in v0.6.0
func (cb *CircuitBreaker) RecordFailure()
func (*CircuitBreaker) RecordSuccess ¶ added in v0.6.0
func (cb *CircuitBreaker) RecordSuccess()
func (*CircuitBreaker) State ¶ added in v0.6.0
func (cb *CircuitBreaker) State() CircuitState
func (*CircuitBreaker) Stats ¶ added in v0.6.0
func (cb *CircuitBreaker) Stats() map[string]interface{}
type CircuitBreakerConfig ¶ added in v0.6.0
type CircuitBreakerConfig struct {
FailureThreshold int
RecoveryTimeout time.Duration
SuccessThreshold int
}
func DefaultCircuitBreakerConfig ¶ added in v0.6.0
func DefaultCircuitBreakerConfig() CircuitBreakerConfig
type CircuitBreakerManager ¶ added in v0.6.0
type CircuitBreakerManager struct {
// contains filtered or unexported fields
}
func NewCircuitBreakerManager ¶ added in v0.6.0
func NewCircuitBreakerManager(cfg CircuitBreakerConfig) *CircuitBreakerManager
func (*CircuitBreakerManager) AllStats ¶ added in v0.6.0
func (m *CircuitBreakerManager) AllStats() []map[string]interface{}
func (*CircuitBreakerManager) Get ¶ added in v0.6.0
func (m *CircuitBreakerManager) Get(engineName string) *CircuitBreaker
type CircuitState ¶ added in v0.6.0
type CircuitState int
const ( CircuitClosed CircuitState = iota CircuitOpen CircuitHalfOpen )
func (CircuitState) String ¶ added in v0.6.0
func (s CircuitState) String() string
type EngineHealth ¶ added in v0.6.0
type EngineLogger ¶ added in v0.5.3
type EngineLogger struct {
// contains filtered or unexported fields
}
EngineLogger provides simplified logging for search engines
func NewEngineLogger ¶ added in v0.5.3
func NewEngineLogger(engine string) *EngineLogger
NewEngineLogger creates a new logger for a specific search engine
func (*EngineLogger) Debug ¶ added in v0.5.3
func (el *EngineLogger) Debug(message string, args ...interface{})
Debug logs a debug message
func (*EngineLogger) Error ¶ added in v0.5.3
func (el *EngineLogger) Error(message string, args ...interface{})
Error logs an error message
func (*EngineLogger) Fatal ¶ added in v0.5.3
func (el *EngineLogger) Fatal(message string, args ...interface{})
Fatal logs a fatal message
func (*EngineLogger) Info ¶ added in v0.5.3
func (el *EngineLogger) Info(message string, args ...interface{})
Info logs an info message
func (*EngineLogger) Panic ¶ added in v0.5.3
func (el *EngineLogger) Panic(message string, args ...interface{})
Panic logs a panic message
func (*EngineLogger) Warn ¶ added in v0.5.3
func (el *EngineLogger) Warn(message string, args ...interface{})
Warn logs a warning message
type HealthStatus ¶ added in v0.6.0
type HealthStatus struct {
Status string `json:"status"`
Uptime string `json:"uptime"`
Engines []EngineHealth `json:"engines"`
System map[string]interface{} `json:"system"`
}
type JSONErrorResponse ¶ added in v0.6.0
type MegaSearchResult ¶ added in v0.5.3
type MegaSearchResult struct {
SearchResult
Engine string `json:"engine"`
}
type ProxiesConfig ¶ added in v0.6.0
type ProxiesConfig struct {
Global string `json:"global,omitempty" mapstructure:"global"`
Entries []ProxyEntryConfig `json:"entries" mapstructure:"entries"`
Health ProxiesHealthConfig `json:"health" mapstructure:"health"`
}
func DefaultProxiesConfig ¶ added in v0.6.0
func DefaultProxiesConfig() ProxiesConfig
func NormalizeProxiesConfig ¶ added in v0.6.0
func NormalizeProxiesConfig(cfg ProxiesConfig) (ProxiesConfig, error)
type ProxiesHealthConfig ¶ added in v0.6.0
type ProxiesHealthConfig struct {
FailureThreshold int `json:"failure_threshold" mapstructure:"failure_threshold"`
}
type ProxyConfig ¶ added in v0.6.0
type ProxyConfig struct {
Runtime string // raw or browser runtime behavior
Proxies ProxiesConfig // canonical proxy inventory
EnginePolicies map[string]string // engine-specific proxy tags
Registry *ProxyRegistry // optional shared registry from caller
}
func DefaultProxyConfig ¶ added in v0.6.0
func DefaultProxyConfig() ProxyConfig
func NormalizeProxyConfig ¶ added in v0.6.0
func NormalizeProxyConfig(cfg ProxyConfig) (ProxyConfig, error)
type ProxyEngineStats ¶ added in v0.6.0
type ProxyEntryConfig ¶ added in v0.6.0
type ProxyExecutionMeta ¶ added in v0.6.0
type ProxyPolicy ¶ added in v0.6.0
type ProxyPolicy struct {
Mode string `json:"mode" mapstructure:"mode"`
Tag string `json:"tag,omitempty" mapstructure:"tag"`
}
func ResolveEffectiveProxyPolicy ¶ added in v0.6.0
func ResolveEffectiveProxyPolicy(globalProxyURL string, engineTag string) ProxyPolicy
type ProxyRegistry ¶ added in v0.6.0
type ProxyRegistry struct {
// contains filtered or unexported fields
}
func NewProxyRegistry ¶ added in v0.6.0
func NewProxyRegistry(entries []ProxyEntryConfig, failureThreshold int) (*ProxyRegistry, error)
func (*ProxyRegistry) BuildStats ¶ added in v0.6.0
func (r *ProxyRegistry) BuildStats() ProxyStats
func (*ProxyRegistry) HasHealthyProxyForTag ¶ added in v0.6.0
func (r *ProxyRegistry) HasHealthyProxyForTag(tag string) bool
func (*ProxyRegistry) NextByTag ¶ added in v0.6.0
func (r *ProxyRegistry) NextByTag(tag string) string
func (*ProxyRegistry) ReportFailure ¶ added in v0.6.0
func (r *ProxyRegistry) ReportFailure(proxyURL string)
func (*ProxyRegistry) ReportSuccess ¶ added in v0.6.0
func (r *ProxyRegistry) ReportSuccess(proxyURL string)
type ProxyStats ¶ added in v0.6.0
type ProxyStats struct {
ConfiguredCount int `json:"configured_count"`
HealthyCount int `json:"healthy_count"`
UnhealthyCount int `json:"unhealthy_count"`
Tags map[string]ProxyTagSummary `json:"tags"`
Entries []ProxyStatsEntry `json:"entries"`
Engines map[string]ProxyEngineStats `json:"engines,omitempty"`
}
type ProxyStatsEntry ¶ added in v0.6.0
type ProxyTagSummary ¶ added in v0.6.0
type Query ¶
type Query struct {
Text string
LangCode string // eg. EN, ES, RU...
DateInterval string // format: YYYYMMDD..YYYMMDD - 20181010..20231010
Filetype string // File extension to search.
Site string // Search site
Limit int // Limit the number of results
Start int // Search offset for pagination (Google uses 0, 10, 20...)
Filter bool // Filter duplicates (google) (false: include similar, true: hide similar)
Answers bool // Include question and answers from SERP page to results with negative indexes
ProxyURL string // Proxy URL for raw requests
ProxyOverride string // Request-scoped proxy override: tag or direct
Insecure bool // Allow insecure TLS connections
}
type ResilientConfig ¶ added in v0.6.0
type ResilientConfig struct {
Retry RetryConfig
CircuitBreaker CircuitBreakerConfig
Proxy ProxyConfig
}
func DefaultResilientConfig ¶ added in v0.6.0
func DefaultResilientConfig() ResilientConfig
type ResilientSearcher ¶ added in v0.6.0
type ResilientSearcher struct {
// contains filtered or unexported fields
}
ResilientSearcher wraps engines with retry and circuit breaker protection.
func NewResilientSearcher ¶ added in v0.6.0
func NewResilientSearcher(engines []SearchEngine, cfg ResilientConfig) *ResilientSearcher
func (*ResilientSearcher) GetCircuitBreakerStats ¶ added in v0.6.0
func (rs *ResilientSearcher) GetCircuitBreakerStats() []map[string]interface{}
func (*ResilientSearcher) GetProxyStats ¶ added in v0.6.0
func (rs *ResilientSearcher) GetProxyStats() ProxyStats
func (*ResilientSearcher) ResolveMegaProxyMeta ¶ added in v0.6.0
func (rs *ResilientSearcher) ResolveMegaProxyMeta(q Query, engines []SearchEngine) ProxyExecutionMeta
func (*ResilientSearcher) SearchAllImageParallel ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchAllImageParallel(q Query, engines []SearchEngine) []MegaSearchResult
func (*ResilientSearcher) SearchAllParallel ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchAllParallel(q Query, engines []SearchEngine) []MegaSearchResult
SearchAllParallel applies retry/circuit protections per engine for mega search.
func (*ResilientSearcher) SearchImagePrimary ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchImagePrimary(primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
func (*ResilientSearcher) SearchImageWithFallback ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchImageWithFallback(primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
func (*ResilientSearcher) SearchPrimary ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchPrimary(primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
SearchPrimary keeps dedicated endpoints engine-pure (no fallback).
func (*ResilientSearcher) SearchWithFallback ¶ added in v0.6.0
func (rs *ResilientSearcher) SearchWithFallback(primaryEngine SearchEngine, q Query) ([]SearchResult, string, ProxyExecutionMeta, error)
SearchWithFallback retries primary and then tries other initialized engines.
type ResponseCache ¶ added in v0.6.0
type ResponseCache struct {
// contains filtered or unexported fields
}
ResponseCache is a bounded in-memory TTL cache for dedicated endpoint responses.
func NewResponseCache ¶ added in v0.6.0
func NewResponseCache(ttl time.Duration, maxSize int) *ResponseCache
func (*ResponseCache) RecordBypass ¶ added in v0.6.0
func (c *ResponseCache) RecordBypass()
func (*ResponseCache) Set ¶ added in v0.6.0
func (c *ResponseCache) Set(key string, data []byte)
func (*ResponseCache) Stats ¶ added in v0.6.0
func (c *ResponseCache) Stats() map[string]interface{}
type RetryConfig ¶ added in v0.6.0
type RetryConfig struct {
MaxRetries int
InitialBackoff time.Duration
MaxBackoff time.Duration
BackoffFactor float64
}
RetryConfig controls retry behavior.
func DefaultRetryConfig ¶ added in v0.6.0
func DefaultRetryConfig() RetryConfig
type RetryResult ¶ added in v0.6.0
type RetryResult struct {
Results []SearchResult
Err error
Attempts int
Engine string
}
func RetryableSearch ¶ added in v0.6.0
func RetryableSearch(cfg RetryConfig, engineName string, searchFn func() ([]SearchResult, error)) RetryResult
RetryableSearch executes searchFn with exponential backoff retries. CAPTCHA and proxy-unavailable errors are not retried.
type SearchEngine ¶
type SearchEngine interface {
Search(Query) ([]SearchResult, error)
SearchImage(Query) ([]SearchResult, error)
IsInitialized() bool
Name() string
GetRateLimiter() *rate.Limiter
}
type SearchEngineOptions ¶ added in v0.2.1
type SearchEngineOptions struct {
RateRequests int `mapstructure:"rate_requests"`
RateTime int64 `mapstructure:"rate_seconds"`
RateBurst int `mapstructure:"rate_burst"`
SelectorTimeout int64 `mapstructure:"selector_timeout"` // CSS selector timeout in seconds
IsSolveCaptcha bool `mapstructure:"captcha"`
}
func (*SearchEngineOptions) GetRatelimit ¶ added in v0.2.1
func (o *SearchEngineOptions) GetRatelimit() time.Duration
func (*SearchEngineOptions) GetSelectorTimeout ¶ added in v0.2.1
func (o *SearchEngineOptions) GetSelectorTimeout() time.Duration
func (*SearchEngineOptions) Init ¶ added in v0.2.1
func (o *SearchEngineOptions) Init()
type SearchResult ¶
type SearchResult struct {
Rank int `json:"rank"`
URL string `json:"url"`
Title string `json:"title"`
Description string `json:"description"`
Ad bool `json:"ad"`
}
func DeduplicateResults ¶ added in v0.4.1
func DeduplicateResults(results []SearchResult) []SearchResult
type Server ¶
type Server struct {
// contains filtered or unexported fields
}
func NewServerWithOptions ¶ added in v0.6.0
func NewServerWithOptions(host string, port int, opts ServerOptions, searchEngines ...SearchEngine) *Server
type ServerOptions ¶ added in v0.6.0
type ServerOptions struct {
CacheTTL time.Duration
CacheMaxSize int
EnableCORS bool
CORS CORSConfig
AllowEndpointFallback bool
Resilience ResilientConfig
}
func DefaultServerOptions ¶ added in v0.6.0
func DefaultServerOptions() ServerOptions