config

package
v0.0.0-...-6b52c20 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 18, 2017 License: Apache-2.0 Imports: 1 Imported by: 1

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type CrawlerConfig

type CrawlerConfig struct {
	MaxGoRoutine int `config:"max_go_routine"`
	//Fetch Speed Control
	FetchThresholdInMs int `config:"fetch_threshold_ms"`
}

func GetDefaultCrawlerConfig

func GetDefaultCrawlerConfig() CrawlerConfig

type RoutingParameter

type RoutingParameter struct {
	Shard int
}

type Task

type Task struct {
	Url, Request, Response []byte
}

type TaskConfig

type TaskConfig struct {

	//walking around pattern
	LinkUrlExtractRegexStr        string `link_extract_pattern`
	LinkUrlExtractRegex           *regexp.Regexp
	LinkUrlExtractRegexGroupIndex int `link_extract_group`
	LinkUrlMustContain            string
	LinkUrlMustNotContain         string

	//parsing url pattern,when url match this pattern,gopa will not parse urls from response of this url
	SkipPageParsePatternStr string `skip_page_parse_pattern`
	SkipPageParsePattern    *regexp.Regexp

	//fetch url pattern
	FetchUrlPatternStr     string `fetch_url_pattern`
	FetchUrlPattern        *regexp.Regexp
	FetchUrlMustContain    string
	FetchUrlMustNotContain string

	//saving pattern
	SavingUrlPatternStr     string `save_url_pattern`
	SavingUrlPattern        *regexp.Regexp
	SavingUrlMustContain    string
	SavingUrlMustNotContain string

	//Crawling within domain
	FollowSameDomain bool `follow_same_domain`
	FollowSubDomain  bool `follow_sub_domain`

	TaskDataPath string

	//User Cookie
	Cookie string

	//Fetch Speed Control
	FetchDelayThreshold int
	TaskDBFilename      string `task_db_filename`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL