crawl

package
v0.0.0-...-994f8ce Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 7, 2019 License: MIT Imports: 8 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Caches

type Caches struct {
	Processing caches.ThreadSafeCache
	Crawled    caches.ThreadSafeCache
}

type Config

type Config struct {
	Caches      Caches
	WorkerCount int
	Scraper     scrape.Scraper
	Worker      QueueWorker
	Writer      writers.Writer
}

type Counters

type Counters struct {
	Discovered    counters.AtomicInt64 // Pages discovered so far
	Processing    counters.AtomicInt64 // Pages that we need to complete processing
	Crawling      counters.AtomicInt64 // Pages that we are currently crawling
	CrawlComplete counters.AtomicInt64 // Pages that we have CrawledId
	CrawlsQueued  counters.AtomicInt64 // Pages currently queued for crawling
}

type Crawler

type Crawler interface {
	Crawl(startUrl url.URL) Counters
}

type PageCrawler

type PageCrawler struct {
	Config Config
	// contains filtered or unexported fields
}

func NewDefaultPageCrawler

func NewDefaultPageCrawler(workerCount int, filePath string) PageCrawler

func NewPageCrawler

func NewPageCrawler(config Config) PageCrawler

func (*PageCrawler) Crawl

func (c *PageCrawler) Crawl(startUrl url.URL) Counters

type QueueWorker

type QueueWorker interface {
	Start(chans WorkerChannels, qCounter *counters.AtomicInt64, workCounter *counters.AtomicInt64, wg *sync.WaitGroup)
}

type Worker

type Worker struct {
	Scraper scrape.Scraper
}

func (*Worker) Start

func (w *Worker) Start(chans WorkerChannels, queueCounter *counters.AtomicInt64, workCounter *counters.AtomicInt64, wg *sync.WaitGroup)

type WorkerChannels

type WorkerChannels struct {
	In    chan WorkerJob
	Out   chan WorkerResult
	Write chan pages.Page
}

type WorkerJob

type WorkerJob struct {
	Id  string
	URL url.URL
}

type WorkerResult

type WorkerResult struct {
	CrawledId string
	Result    scrape.Result
}

Directories

Path Synopsis
Code generated by counterfeiter.
Code generated by counterfeiter.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL