func SetLogLevel

func SetLogLevel(level string) (err error)

SetLogLevel determines the log level


type Crawler

type Crawler struct {
	// Instance options
	RedisURL                 string
	RedisPort                string
	MaxNumberConnections     int
	MaxNumberWorkers         int
	MaximumNumberOfErrors    int
	TimeIntervalToPrintStats int
	Debug                    bool
	Info                     bool
	UseProxy                 bool
	UserAgent                string
	Cookie                   string
	EraseDB                  bool
	MaxQueueSize             int

	// Public  options
	Settings Settings
	// contains filtered or unexported fields

Crawler is the crawler instance

func New

func New() (*Crawler, error)

New creates a new crawler instance

func (*Crawler) AddSeeds

func (c *Crawler) AddSeeds(seeds []string, force ...bool) (err error)

func (*Crawler) Crawl

func (c *Crawler) Crawl() (err error)

Crawl initiates the pool of connections and begins scraping URLs according to the todo list

func (*Crawler) Dump

func (c *Crawler) Dump() (allKeys []string, err error)

func (*Crawler) DumpMap

func (c *Crawler) DumpMap() (m map[string]string, err error)

func (*Crawler) Flush

func (c *Crawler) Flush() (err error)

Flush erases the database

func (*Crawler) Init

func (c *Crawler) Init(config ...Settings) (err error)

Init initializes the connection pool and the Redis client

func (*Crawler) Redo

func (c *Crawler) Redo() (err error)

type Settings

type Settings struct {
	BaseURL              string
	PluckConfig          string
	KeywordsToExclude    []string
	KeywordsToInclude    []string
	AllowQueryParameters bool
	AllowHashParameters  bool
	DontFollowLinks      bool
	RequirePluck         bool

Settings is the configuration across all instances

