Documentation
¶
Overview ¶
Package crawler provides the main DAST web application crawler functionality.
Index ¶
- type AuthCredentials
- type AuthType
- type Config
- type CrawlError
- type CrawlResult
- type CrawlStats
- type Crawler
- func (c *Crawler) IsRunning() bool
- func (c *Crawler) LoadState(path string) error
- func (c *Crawler) Metrics() *metrics.Collector
- func (c *Crawler) MetricsSnapshot() *metrics.Snapshot
- func (c *Crawler) Results() <-chan interface{}
- func (c *Crawler) SaveState(path string) error
- func (c *Crawler) ShutdownContext() context.Context
- func (c *Crawler) Start(ctx context.Context) (*CrawlResult, error)
- func (c *Crawler) Stats() CrawlStats
- func (c *Crawler) Stop() error
- func (c *Crawler) StopNow() error
- type Endpoint
- type EnhancedDiscoveryConfig
- type Form
- type FormAuth
- type FormInput
- type OAuthConfig
- type Option
- func WithAPIKeyAuth(headerName, apiKey string) Option
- func WithActiveDiscovery(enabled bool) Option
- func WithAllowedDomains(domains ...string) Option
- func WithAuth(auth AuthCredentials) Option
- func WithAutoSave(enabled bool, intervalSeconds int) Option
- func WithBasicAuth(username, password string) Option
- func WithBrowserPool(size int) Option
- func WithConfig(config *Config) Option
- func WithCookies(cookies []*http.Cookie) Option
- func WithCustomHeaders(headers map[string]string) Option
- func WithDebug(debug bool) Option
- func WithExcludePatterns(patterns ...string) Option
- func WithFollowExternal(follow bool) Option
- func WithFormAnalysis(enabled bool) Option
- func WithFormAuth(auth FormAuth) Option
- func WithHeadless(headless bool) Option
- func WithIncludePatterns(patterns ...string) Option
- func WithJSAnalysis(enabled bool) Option
- func WithJWTAuth(token string) Option
- func WithLogLevel(level logger.Level) Option
- func WithLogger(l *logger.Logger) Option
- func WithMaxDepth(depth int) Option
- func WithMetrics(m *metrics.Collector) Option
- func WithOutput(w io.Writer) Option
- func WithOutputFile(path string) Option
- func WithPassiveDiscovery(enabled bool) Option
- func WithPrettyOutput(pretty bool) Option
- func WithProgress(enabled bool) Option
- func WithProxy(proxy string) Option
- func WithRateLimit(rps float64, burst int) Option
- func WithRespectRobotsTxt(respect bool) Option
- func WithScope(scope ScopeRules) Option
- func WithStateFile(path string) Option
- func WithStreamMode(stream bool) Option
- func WithTarget(url string) Option
- func WithTimeout(timeout time.Duration) Option
- func WithUserAgent(ua string) Option
- func WithVerbose(verbose bool) Option
- func WithWebSocketDiscovery(enabled bool) Option
- func WithWorkers(n int) Option
- type OutputConfig
- type Parameter
- type QueueItem
- type RateLimitConfig
- type ScopeRules
- type SecretFinding
- type StateConfig
- type Technology
- type WebSocketEndpoint
- type WebSocketMsg
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AuthCredentials ¶
type AuthCredentials struct {
Type AuthType `json:"type"`
Username string `json:"username,omitempty"`
Password string `json:"password,omitempty"`
Token string `json:"token,omitempty"`
Headers map[string]string `json:"headers,omitempty"`
Cookies []*http.Cookie `json:"-"`
LoginURL string `json:"login_url,omitempty"`
FormFields map[string]string `json:"form_fields,omitempty"`
OAuthConfig *OAuthConfig `json:"oauth_config,omitempty"`
}
AuthCredentials holds authentication credentials.
type Config ¶
type Config struct {
// Target URL to crawl
Target string `json:"target" yaml:"target"`
// Number of concurrent workers
Workers int `json:"workers" yaml:"workers"`
// Maximum crawl depth
MaxDepth int `json:"max_depth" yaml:"max_depth"`
// Request timeout
Timeout time.Duration `json:"timeout" yaml:"timeout"`
// Scope rules
Scope ScopeRules `json:"scope" yaml:"scope"`
// Rate limiting
RateLimit RateLimitConfig `json:"rate_limit" yaml:"rate_limit"`
// Browser configuration
Browser browser.Config `json:"browser" yaml:"browser"`
// Authentication
Auth AuthCredentials `json:"auth" yaml:"auth"`
// Output configuration
Output OutputConfig `json:"output" yaml:"output"`
// State persistence
State StateConfig `json:"state" yaml:"state"`
// Enable passive API discovery
PassiveAPIDiscovery bool `json:"passive_api_discovery" yaml:"passive_api_discovery"`
// Enable active API probing
ActiveAPIDiscovery bool `json:"active_api_discovery" yaml:"active_api_discovery"`
// Enable WebSocket discovery
WebSocketDiscovery bool `json:"websocket_discovery" yaml:"websocket_discovery"`
// Enable form analysis
FormAnalysis bool `json:"form_analysis" yaml:"form_analysis"`
// Enable JavaScript analysis
JSAnalysis bool `json:"js_analysis" yaml:"js_analysis"`
// Enable AJAX discovery (intercept XHR/Fetch, trigger events, find AJAX forms)
AJAXDiscovery bool `json:"ajax_discovery" yaml:"ajax_discovery"`
// Fast mode skips heavy analysis (SPA framework detection, AJAX triggering) for speed
FastMode bool `json:"fast_mode" yaml:"fast_mode"`
// Custom headers to include in all requests
CustomHeaders map[string]string `json:"custom_headers" yaml:"custom_headers"`
// Cookies to include in all requests
Cookies map[string]string `json:"cookies" yaml:"cookies"`
// User agents to rotate (if empty, uses default)
UserAgents []string `json:"user_agents" yaml:"user_agents"`
// Proxy URL
Proxy string `json:"proxy" yaml:"proxy"`
// Verbose logging
Verbose bool `json:"verbose" yaml:"verbose"`
// Debug mode
Debug bool `json:"debug" yaml:"debug"`
// Enhanced discovery configuration
EnhancedDiscovery EnhancedDiscoveryConfig `json:"enhanced_discovery" yaml:"enhanced_discovery"`
}
Config holds all crawler configuration.
func BalancedConfig ¶
func BalancedConfig() *Config
BalancedConfig returns a configuration that balances speed with thoroughness.
func DefaultConfig ¶
func DefaultConfig() *Config
DefaultConfig returns a configuration with sensible defaults.
func LoadFromFile ¶
LoadFromFile loads configuration from a file (JSON or YAML).
func TurboConfig ¶
func TurboConfig() *Config
TurboConfig returns a configuration optimized for MAXIMUM SPEED. Use this when you need to crawl as fast as possible. Warning: This may trigger rate limiting or WAF blocks on some sites.
func (*Config) SaveToFile ¶
SaveToFile saves configuration to a file.
type CrawlError ¶
type CrawlError struct {
URL string `json:"url"`
Error string `json:"error"`
Timestamp time.Time `json:"timestamp"`
}
CrawlError represents an error encountered during crawling.
type CrawlResult ¶
type CrawlResult struct {
Target string `json:"target"`
StartedAt time.Time `json:"started_at"`
CompletedAt time.Time `json:"completed_at,omitempty"`
Stats CrawlStats `json:"stats"`
Endpoints []Endpoint `json:"endpoints"`
Forms []Form `json:"forms"`
WebSockets []WebSocketEndpoint `json:"websockets"`
Technologies []Technology `json:"technologies,omitempty"`
Secrets []SecretFinding `json:"secrets,omitempty"`
Errors []CrawlError `json:"errors,omitempty"`
}
CrawlResult represents the complete result of a crawl session.
type CrawlStats ¶
type CrawlStats struct {
URLsDiscovered int `json:"urls_discovered"`
PagesCrawled int `json:"pages_crawled"`
FormsFound int `json:"forms_found"`
APIEndpoints int `json:"api_endpoints"`
WebSocketEndpoints int `json:"websocket_endpoints"`
ErrorCount int `json:"error_count"`
Duration time.Duration `json:"duration"`
BytesTransferred int64 `json:"bytes_transferred"`
}
CrawlStats contains statistics about the crawl.
type Crawler ¶
type Crawler struct {
// contains filtered or unexported fields
}
Crawler is the main crawler orchestrator.
func (*Crawler) MetricsSnapshot ¶
MetricsSnapshot returns a point-in-time snapshot of all metrics.
func (*Crawler) Results ¶
func (c *Crawler) Results() <-chan interface{}
Results returns a channel for streaming results.
func (*Crawler) ShutdownContext ¶
ShutdownContext returns the shutdown context for monitoring.
func (*Crawler) Start ¶
func (c *Crawler) Start(ctx context.Context) (*CrawlResult, error)
Start begins the crawling process.
func (*Crawler) Stats ¶
func (c *Crawler) Stats() CrawlStats
Stats returns current crawl statistics.
type Endpoint ¶
type Endpoint struct {
URL string `json:"url"`
Method string `json:"method"`
Source string `json:"source"` // passive, active, html, javascript
Depth int `json:"depth"`
Parameters []Parameter `json:"parameters,omitempty"`
Headers map[string]string `json:"headers,omitempty"`
DiscoveredFrom string `json:"discovered_from,omitempty"`
StatusCode int `json:"status_code,omitempty"`
ContentType string `json:"content_type,omitempty"`
ResponseSize int64 `json:"response_size,omitempty"`
Timestamp time.Time `json:"timestamp"`
}
Endpoint represents a discovered endpoint during crawling.
type EnhancedDiscoveryConfig ¶
type EnhancedDiscoveryConfig struct {
// Enable all enhanced discovery modules
Enabled bool `json:"enabled" yaml:"enabled"`
// Enable robots.txt parsing
EnableRobots bool `json:"enable_robots" yaml:"enable_robots"`
// Enable sitemap.xml discovery
EnableSitemap bool `json:"enable_sitemap" yaml:"enable_sitemap"`
// Enable JavaScript source map parsing
EnableSourceMaps bool `json:"enable_source_maps" yaml:"enable_source_maps"`
// Enable common path bruteforcing
EnablePathBrute bool `json:"enable_path_brute" yaml:"enable_path_brute"`
// Enable technology fingerprinting
EnableFingerprint bool `json:"enable_fingerprint" yaml:"enable_fingerprint"`
// Enable parameter discovery
EnableParamDiscovery bool `json:"enable_param_discovery" yaml:"enable_param_discovery"`
// Enable JavaScript extraction
EnableJSExtract bool `json:"enable_js_extract" yaml:"enable_js_extract"`
// Concurrency for enhanced discovery operations
Concurrency int `json:"concurrency" yaml:"concurrency"`
}
EnhancedDiscoveryConfig holds configuration for enhanced discovery modules.
type Form ¶
type Form struct {
URL string `json:"url"`
Action string `json:"action"`
Method string `json:"method"`
Enctype string `json:"enctype"`
Inputs []FormInput `json:"inputs"`
HasCSRF bool `json:"has_csrf"`
Depth int `json:"depth"`
Timestamp time.Time `json:"timestamp"`
}
Form represents an HTML form discovered during crawling.
type FormAuth ¶
type FormAuth struct {
LoginURL string
Username string
Password string
UsernameField string
PasswordField string
ExtraFields map[string]string
}
FormAuth holds form-based authentication configuration.
type FormInput ¶
type FormInput struct {
Name string `json:"name"`
Type string `json:"type"`
Value string `json:"value,omitempty"`
Required bool `json:"required"`
Placeholder string `json:"placeholder,omitempty"`
Pattern string `json:"pattern,omitempty"`
MaxLength int `json:"max_length,omitempty"`
MinLength int `json:"min_length,omitempty"`
}
FormInput represents an input field in a form.
type OAuthConfig ¶
type OAuthConfig struct {
ClientID string `json:"client_id"`
ClientSecret string `json:"client_secret"`
AuthURL string `json:"auth_url"`
TokenURL string `json:"token_url"`
RedirectURL string `json:"redirect_url"`
Scopes []string `json:"scopes"`
}
OAuthConfig holds OAuth 2.0 configuration.
type Option ¶
Option is a functional option for configuring the Crawler.
func WithAPIKeyAuth ¶
WithAPIKeyAuth configures API key authentication.
func WithActiveDiscovery ¶
WithActiveDiscovery enables/disables active API probing.
func WithAllowedDomains ¶
WithAllowedDomains sets the allowed domains.
func WithAuth ¶
func WithAuth(auth AuthCredentials) Option
WithAuth sets authentication credentials.
func WithAutoSave ¶
WithAutoSave enables/disables automatic state saving.
func WithBasicAuth ¶
WithBasicAuth configures basic authentication.
func WithBrowserPool ¶
WithBrowserPool sets the browser pool size.
func WithCookies ¶
WithCookies sets cookies to include in requests.
func WithCustomHeaders ¶
WithCustomHeaders sets custom headers for all requests.
func WithExcludePatterns ¶
WithExcludePatterns adds URL patterns to exclude.
func WithFollowExternal ¶
WithFollowExternal enables following external links.
func WithFormAnalysis ¶
WithFormAnalysis enables/disables form analysis.
func WithFormAuth ¶
WithFormAuth configures form-based authentication.
func WithHeadless ¶
WithHeadless enables/disables headless mode.
func WithIncludePatterns ¶
WithIncludePatterns adds URL patterns to include.
func WithJSAnalysis ¶
WithJSAnalysis enables/disables JavaScript analysis.
func WithMetrics ¶
WithMetrics sets a custom metrics collector.
func WithOutputFile ¶
WithOutputFile sets the output file path.
func WithPassiveDiscovery ¶
WithPassiveDiscovery enables/disables passive API discovery.
func WithPrettyOutput ¶
WithPrettyOutput enables/disables pretty JSON output.
func WithProgress ¶
WithProgress enables/disables progress bar display.
func WithRateLimit ¶
WithRateLimit sets the rate limiting configuration.
func WithRespectRobotsTxt ¶
WithRespectRobotsTxt enables/disables robots.txt respect.
func WithStateFile ¶
WithStateFile sets the state file path for persistence.
func WithStreamMode ¶
WithStreamMode enables streaming output mode.
func WithTimeout ¶
WithTimeout sets the request timeout.
func WithVerbose ¶
WithVerbose enables/disables verbose logging.
func WithWebSocketDiscovery ¶
WithWebSocketDiscovery enables/disables WebSocket discovery.
func WithWorkers ¶
WithWorkers sets the number of concurrent workers.
type OutputConfig ¶
type OutputConfig struct {
Format string `json:"format"` // json
FilePath string `json:"file_path"`
Pretty bool `json:"pretty"`
StreamMode bool `json:"stream_mode"`
}
OutputConfig defines output configuration.
type Parameter ¶
type Parameter struct {
Name string `json:"name"`
Type string `json:"type"` // query, body, header, path, cookie
Example string `json:"example,omitempty"`
Required bool `json:"required,omitempty"`
}
Parameter represents a request parameter.
type QueueItem ¶
type QueueItem struct {
URL string `json:"url"`
Method string `json:"method"`
Depth int `json:"depth"`
ParentURL string `json:"parent_url"`
Headers map[string]string `json:"headers,omitempty"`
Body []byte `json:"body,omitempty"`
Priority int `json:"priority"`
Timestamp time.Time `json:"timestamp"`
}
QueueItem represents an item in the crawl queue.
type RateLimitConfig ¶
type RateLimitConfig struct {
RequestsPerSecond float64 `json:"requests_per_second"`
Burst int `json:"burst"`
DelayBetween time.Duration `json:"delay_between"`
RespectRobotsTxt bool `json:"respect_robots_txt"`
}
RateLimitConfig defines rate limiting configuration.
type ScopeRules ¶
type ScopeRules struct {
IncludePatterns []string `json:"include_patterns"`
ExcludePatterns []string `json:"exclude_patterns"`
AllowedDomains []string `json:"allowed_domains"`
MaxDepth int `json:"max_depth"`
FollowExternal bool `json:"follow_external"`
}
ScopeRules defines crawling scope rules.
type SecretFinding ¶
type SecretFinding struct {
Type string `json:"type"`
Value string `json:"value"`
File string `json:"file,omitempty"`
Context string `json:"context,omitempty"`
}
SecretFinding represents a potential secret found in source code.
type StateConfig ¶
type StateConfig struct {
Enabled bool `json:"enabled"`
FilePath string `json:"file_path"`
AutoSave bool `json:"auto_save"`
Interval int `json:"interval_seconds"`
}
StateConfig defines state persistence configuration.
type Technology ¶
type Technology struct {
Name string `json:"name"`
Category string `json:"category"`
Version string `json:"version,omitempty"`
Confidence int `json:"confidence"`
Evidence string `json:"evidence,omitempty"`
}
Technology represents a detected technology.
type WebSocketEndpoint ¶
type WebSocketEndpoint struct {
URL string `json:"url"`
DiscoveredFrom string `json:"discovered_from"`
SampleMessages []WebSocketMsg `json:"sample_messages,omitempty"`
Protocols []string `json:"protocols,omitempty"`
Timestamp time.Time `json:"timestamp"`
}
WebSocketEndpoint represents a discovered WebSocket endpoint.