request

package
v0.0.0-...-2d91a95 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 7, 2017 License: Apache-2.0 Imports: 11 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// DefaultDialTimeout :
	DefaultDialTimeout = 2 * time.Minute // The default request server timed out
	// DefaultConnTimeout :
	DefaultConnTimeout = 2 * time.Minute // default download timeout
	// DefaultTryTimes :
	DefaultTryTimes = 3 // default maximum number of downloads
	// DefaultRetryPause :
	DefaultRetryPause = 2 * time.Second // default to re-download before pause
)
View Source
const (
	// SurfID :
	SurfID = 0 // default surf download kernel (Go native), this value can not be changed
	// PhantomJSID :
	PhantomJSID = 1 // spare phantomjs download kernel, generally not used (poor efficiency, head information support is not perfect)
)

Variables

This section is empty.

Functions

This section is empty.

Types

type Request

type Request struct {
	Spider        string          // rule name, set automatically, prohibit people to fill out
	URL           string          // target URL must be set
	Rule          string          // The name of the rule node used to resolve the response must be set
	Method        string          // GET POST POST-M HEAD
	Header        http.Header     // request header information
	EnableCookie  bool            // whether to use cookies in the Spider's EnableCookie settings
	PostData      string          // POST values
	DialTimeout   time.Duration   // create a connection timeout dial tcp: i / o timeout
	ConnTimeout   time.Duration   // connection status timeout WSARecv tcp: i / o timeout
	TryTimes      int             // the maximum number of attempts to download
	RetryPause    time.Duration   // download failed after the next attempt to download the waiting time
	RedirectTimes int             // The maximum number of redirects, 0 is not limited, less than 0 to prohibit the redirection
	Temp          Temp            // Temporary data
	TempIsJSON    map[string]bool // Temp to JSON stored field marked as true, automatically set to prohibit people to fill out
	Priority      int             // Specify the scheduling priority, the default is 0 (the minimum priority is 0)
	Reloadable    bool            // is it necessary to repeat the link download
	// Surfer Downloader kernel ID
	// 0 for Surf high concurrent downloader, a variety of control functions
	// 1 for the PhantomJS downloader, features strong defense, slow, low concurrent
	DownloaderID int
	// contains filtered or unexported fields
}

Request : called recommended object waiting for being crawled

func Deserialize

func Deserialize(s string) (*Request, error)

Deserialize :

func (*Request) AddHeader

func (request *Request) AddHeader(key, value string) *Request

AddHeader :

func (*Request) Copy

func (request *Request) Copy() *Request

Copy : Get a copy

func (*Request) GetConnTimeout

func (request *Request) GetConnTimeout() time.Duration

GetConnTimeout :

func (*Request) GetCookies

func (request *Request) GetCookies() string

GetCookies :

func (*Request) GetDialTimeout

func (request *Request) GetDialTimeout() time.Duration

GetDialTimeout :

func (*Request) GetDownloaderID

func (request *Request) GetDownloaderID() int

GetDownloaderID : GetDownloaderID :

func (*Request) GetEnableCookie

func (request *Request) GetEnableCookie() bool

GetEnableCookie :

func (*Request) GetHeader

func (request *Request) GetHeader() http.Header

GetHeader :

func (*Request) GetMethod

func (request *Request) GetMethod() string

GetMethod : get the name of the Http request (note that this does not refer to the Http GET method)

func (*Request) GetPostData

func (request *Request) GetPostData() string

GetPostData :

func (*Request) GetPriority

func (request *Request) GetPriority() int

GetPriority : GetPriority :

func (*Request) GetProxy

func (request *Request) GetProxy() string

GetProxy :

func (*Request) GetRedirectTimes

func (request *Request) GetRedirectTimes() int

GetRedirectTimes :

func (*Request) GetReferer

func (request *Request) GetReferer() string

GetReferer :

func (*Request) GetRetryPause

func (request *Request) GetRetryPause() time.Duration

GetRetryPause :

func (*Request) GetRuleName

func (request *Request) GetRuleName() string

GetRuleName :

func (*Request) GetSpiderName

func (request *Request) GetSpiderName() string

GetSpiderName :

func (*Request) GetTemp

func (request *Request) GetTemp(key string, defaultValue interface{}) interface{}

Get temporary cache data defaultValue can not be interface {} (nil) GetTemp :

func (*Request) GetTemps

func (request *Request) GetTemps() Temp

GetTemps :

func (*Request) GetTryTimes

func (request *Request) GetTryTimes() int

GetTryTimes :

func (*Request) GetURL

func (request *Request) GetURL() string

GetURL :

func (*Request) IsReloadable

func (request *Request) IsReloadable() bool

IsReloadable :

func (*Request) MarshalJSON

func (request *Request) MarshalJSON() ([]byte, error)

MarshalJSON : MarshalJSON :

func (*Request) Prepare

func (request *Request) Prepare() error

Prepare : Send the request before the job, set a series of default values Request.URL and Request.Rule must be set Request.Spider does not need to be set manually (set by the system automatically) Request.EnableCookie is set in the Spider field and invalidated in the rule request The following fields have default values, not set: Request.Method defaults to the GET method; Request.DialTimeout defaults to the constant DefaultDialTimeout, less than 0 does not limit the waiting time; Request.ConnTimeout defaults to the constant DefaultConnTimeout, less than 0 does not limit the download timeout; Request.TryTimes defaults to the constant DefaultTryTimes, less than 0 does not limit the number of failed overloads; Request.RedirectTimes by default does not limit the number of redirects, less than 0 to prohibit redirects; Request.RetryPause defaults to the constant DefaultRetryPause; Request.DownloaderID specified downloader ID, 0 for the default Surf high concurrent downloader, full functionality, 1 for the PhantomJS downloader, features strong break, slow, low concurrent.

func (*Request) Serialize

func (request *Request) Serialize() string

Serialize :

func (*Request) SetCookies

func (request *Request) SetCookies(cookie string) *Request

SetCookies :

func (*Request) SetDownloaderID

func (request *Request) SetDownloaderID(id int) *Request

SetDownloaderID : SetDownloaderID :

func (*Request) SetEnableCookie

func (request *Request) SetEnableCookie(enableCookie bool) *Request

SetEnableCookie :

func (*Request) SetHeader

func (request *Request) SetHeader(key, value string) *Request

SetHeader :

func (*Request) SetMethod

func (request *Request) SetMethod(method string) *Request

SetMethod : set the type of Http request method

func (*Request) SetPriority

func (request *Request) SetPriority(priority int) *Request

SetPriority : SetPriority :

func (*Request) SetProxy

func (request *Request) SetProxy(proxy string) *Request

func (*Request) SetReferer

func (request *Request) SetReferer(referer string) *Request

SetReferer :

func (*Request) SetReloadable

func (request *Request) SetReloadable(can bool) *Request

SetReloadable :

func (*Request) SetRuleName

func (request *Request) SetRuleName(ruleName string) *Request

SetRuleName :

func (*Request) SetSpiderName

func (request *Request) SetSpiderName(spiderName string) *Request

SetSpiderName :

func (*Request) SetTemp

func (request *Request) SetTemp(key string, value interface{}) *Request

SetTemp :

func (*Request) SetTemps

func (request *Request) SetTemps(temp map[string]interface{}) *Request

SetTemps :

func (*Request) SetURL

func (request *Request) SetURL(url string) *Request

SetURL :

func (*Request) Unique

func (request *Request) Unique() string

Unique : Request unique identifier

type Temp

type Temp map[string]interface{}

Temp :

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL