Documentation ¶
Index ¶
- Variables
- func GetTaskRuleKeys() []string
- func Register(rule *TaskRule)
- func Run(task *Task, retCh chan<- common.MTS) error
- type CSVConf
- type Context
- func (ctx *Context) Output(row map[int]interface{}) error
- func (ctx *Context) Post(URL string, requestData map[string]string) error
- func (ctx *Context) PostForNext(URL string, requestData map[string]string) error
- func (ctx *Context) PostMultipartForNext(URL string, requestData map[string][]byte) error
- func (ctx *Context) PostRawForNext(URL string, requestData []byte) error
- func (ctx *Context) Visit(URL string) error
- func (ctx *Context) VisitForNext(URL string) error
- type HTMLElement
- func (h *HTMLElement) Attr(k string) string
- func (h *HTMLElement) ChildAttr(goquerySelector, attrName string) string
- func (h *HTMLElement) ChildAttrs(goquerySelector, attrName string) []string
- func (h *HTMLElement) ChildText(goquerySelector string) string
- func (h *HTMLElement) ForEach(goquerySelector string, callback func(int, *HTMLElement))
- type Limit
- type MySQLConf
- type Node
- type Option
- type OutputConfig
- type Request
- func (r *Request) Abort()
- func (r *Request) AbsoluteURL(u string) string
- func (r *Request) GetAnyReqContextValue(key string) interface{}
- func (r *Request) GetReqContextValue(key string) string
- func (r *Request) Post(URL string, requestData map[string]string) error
- func (r *Request) PostForNext(URL string, requestData map[string]string) error
- func (r *Request) PostForNextWithContext(URL string, requestData map[string]string) error
- func (r *Request) PostMultipart(URL string, requestData map[string][]byte) error
- func (r *Request) PostMultipartForNext(URL string, requestData map[string][]byte) error
- func (r *Request) PostRaw(URL string, requestData []byte) error
- func (r *Request) PostRawForNext(URL string, requestData []byte) error
- func (r *Request) PostRawForNextWithContext(URL string, requestData []byte) error
- func (r *Request) PutReqContextValue(key string, value interface{})
- func (r *Request) Retry() error
- func (r *Request) SetResponseCharacterEncoding(encoding string)
- func (r *Request) Visit(URL string) error
- func (r *Request) VisitForNext(URL string) error
- func (r *Request) VisitForNextWithContext(URL string) error
- type Response
- type Rule
- type Task
- type TaskConfig
- type TaskRule
- type XMLElement
Constants ¶
This section is empty.
Variables ¶
View Source
var ( ErrTaskRuleNotExist = errors.New("task rule not exist") ErrTaskRuleIsNil = errors.New("task rule is nil") ErrTaskRuleNameIsEmpty = errors.New("task rule name is empty") ErrTaskRuleNameDuplicated = errors.New("task rule name is Duplicated") ErrTaskRuleHeadIsNil = errors.New("task rule head is nil") ErrTaskRuleNodesLenInvalid = errors.New("task rule nodes len is invalid") ErrTaskRuleNodesKeyInvalid = errors.New("task rule nodes key should start from 0 and monotonically increasing") )
View Source
var (
ErrOutputFieldsNotMatchOutputRow = errors.New("output fields not match out put row")
)
View Source
var (
ErrTaskRunningTimeout = errors.New("task running timeout")
)
Functions ¶
func GetTaskRuleKeys ¶
func GetTaskRuleKeys() []string
Types ¶
type Context ¶
type Context struct {
// contains filtered or unexported fields
}
func (*Context) PostForNext ¶
func (*Context) PostMultipartForNext ¶
func (*Context) PostRawForNext ¶
func (*Context) VisitForNext ¶
type HTMLElement ¶
type HTMLElement struct { Name string Text string Request *Request Response *Response DOM *goquery.Selection // contains filtered or unexported fields }
func (*HTMLElement) Attr ¶
func (h *HTMLElement) Attr(k string) string
func (*HTMLElement) ChildAttr ¶
func (h *HTMLElement) ChildAttr(goquerySelector, attrName string) string
func (*HTMLElement) ChildAttrs ¶
func (h *HTMLElement) ChildAttrs(goquerySelector, attrName string) []string
func (*HTMLElement) ChildText ¶
func (h *HTMLElement) ChildText(goquerySelector string) string
func (*HTMLElement) ForEach ¶
func (h *HTMLElement) ForEach(goquerySelector string, callback func(int, *HTMLElement))
type Limit ¶
type Limit struct { Enable bool // DomainRegexp is a regular expression to match against domains DomainRegexp string // DomainRegexp is a glob pattern to match against domains DomainGlob string // Delay is the duration to wait before creating a new request to the matching domains Delay time.Duration // RandomDelay is the extra randomized duration to wait added to Delay before creating a new request RandomDelay time.Duration // Parallelism is the number of the maximum allowed concurrent requests of the matching domains Parallelism int }
type Node ¶
type Node struct { OnRequest func(ctx *Context, req *Request) OnError func(ctx *Context, res *Response, err error) error OnResponse func(ctx *Context, res *Response) error OnHTML map[string]func(ctx *Context, el *HTMLElement) error OnXML map[string]func(ctx *Context, el *XMLElement) error OnScraped func(ctx *Context, res *Response) error }
type OutputConfig ¶
type Request ¶
type Request struct { URL *url.URL Headers *http.Header Method string Body io.Reader // contains filtered or unexported fields }
func (*Request) AbsoluteURL ¶
func (*Request) GetAnyReqContextValue ¶
func (*Request) GetReqContextValue ¶
func (*Request) PostForNext ¶
func (*Request) PostForNextWithContext ¶
func (*Request) PostMultipart ¶
func (*Request) PostMultipartForNext ¶
func (*Request) PostRawForNext ¶
func (*Request) PostRawForNextWithContext ¶
func (*Request) PutReqContextValue ¶
func (*Request) SetResponseCharacterEncoding ¶
func (*Request) VisitForNext ¶
func (*Request) VisitForNextWithContext ¶
type Response ¶
type Task ¶
type Task struct { ID uint64 TaskRule TaskConfig }
type TaskConfig ¶
type TaskConfig struct { CronSpec string Option Option Limit Limit ProxyURLs []string OutputConfig OutputConfig }
type TaskRule ¶
type TaskRule struct { Name string Description string Namespace string OutputFields []string DisableCookies bool AllowURLRevisit bool IgnoreRobotsTxt bool ParseHTTPErrorResponse bool Rule *Rule }
func GetTaskRule ¶
type XMLElement ¶
type XMLElement struct { Name string Text string Request *Request Response *Response DOM interface{} // contains filtered or unexported fields }
func (*XMLElement) Attr ¶
func (x *XMLElement) Attr(k string) string
func (*XMLElement) ChildAttr ¶
func (x *XMLElement) ChildAttr(xpathQuery, attrName string) string
func (*XMLElement) ChildAttrs ¶
func (x *XMLElement) ChildAttrs(xpathQuery, attrName string) []string
func (*XMLElement) ChildText ¶
func (x *XMLElement) ChildText(xpathQuery string) string
Click to show internal directories.
Click to hide internal directories.