goclient

package
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 11, 2018 License: Apache-2.0 Imports: 9 Imported by: 0

Documentation

Index

Constants

View Source
const (
	PageReqType_BUILTINJS = pb.PageReqType_BUILTINJS
	PageReqType_JSCRIPT   = pb.PageReqType_JSCRIPT
)

Variables

This section is empty.

Functions

This section is empty.

Types

type CrawlJob

type CrawlJob struct {
	SeedURL                 string
	MinDelay                int32
	MaxDelay                int32
	Follow                  bool
	CallbackUrlRegexp       string
	FollowUrlRegexp         string
	CallbackXpathMatch      []*pb.KVP
	CallbackXpathRegexp     []*pb.KVP
	MaxConcurrentRequests   int32
	Useragent               string
	Impolite                bool
	Depth                   int32
	Repeat                  bool
	Frequency               *google_protobuf1.Duration
	Firstrun                *google_protobuf.Timestamp
	UnsafeNormalizeURL      bool
	Login                   bool
	LoginUrl                string
	LoginJS                 string
	LoginPayload            []*pb.KVP
	LoginParseFields        bool
	LoginParseXpath         []*pb.KVP
	LoginSuccessCheck       *pb.KVP
	CheckLoginAfterEachPage bool
	Chrome                  bool
	ChromeBinary            string
	DomLoadTime             int32
	NetworkIface            string
	CancelOnDisconnect      bool
	CheckContent            bool
	Prefetch                bool

	Callback    func(*PageHTML, *CrawlJob)
	UsePageChan bool
	PageChan    chan *pb.PageHTML
	// contains filtered or unexported fields
}

func NewCrawlJob

func NewCrawlJob(svrHost, svrPort string) *CrawlJob

func (*CrawlJob) AddJS

func (cj *CrawlJob) AddJS(typ pb.PageReqType, url, js, metaStr string) error

func (*CrawlJob) AddPage

func (cj *CrawlJob) AddPage(url, metaStr string) error

func (*CrawlJob) IsAlive

func (cj *CrawlJob) IsAlive() bool

func (*CrawlJob) Run

func (cj *CrawlJob) Run()

func (*CrawlJob) SetCallbackXpathMatch

func (cj *CrawlJob) SetCallbackXpathMatch(mdata KVMap)

func (*CrawlJob) SetCallbackXpathRegexp

func (cj *CrawlJob) SetCallbackXpathRegexp(mdata KVMap)

func (*CrawlJob) SetLogin

func (cj *CrawlJob) SetLogin(loginUrl string, loginPayload, loginParseXpath KVMap, loginSuccessCheck KVMap)

func (*CrawlJob) SetLoginChrome

func (cj *CrawlJob) SetLoginChrome(loginUrl string, loginJS string, loginSuccessCheck KVMap)

func (*CrawlJob) Start

func (cj *CrawlJob) Start()

func (*CrawlJob) Stop

func (cj *CrawlJob) Stop()

type KVMap

type KVMap = map[string]string

type PageHTML

type PageHTML = pb.PageHTML

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL