crawlkit

package
v0.0.0-...-9e9b37c Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 17, 2024 License: MIT Imports: 7 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type BasicCrawler

type BasicCrawler struct {
}

func (*BasicCrawler) Crawl

func (c *BasicCrawler) Crawl(ctx context.Context, url string) (*http.Response, error)

func (*BasicCrawler) QueueSize

func (c *BasicCrawler) QueueSize() int

type HostConfig

type HostConfig struct {
	sync.RWMutex
	MaxRPS int
	// contains filtered or unexported fields
}

type SiteCrawler

type SiteCrawler struct {
	sync.RWMutex
	// contains filtered or unexported fields
}

func NewSiteCrawler

func NewSiteCrawler(userAgent string, globalMaxRPS int, siteMaxRPS int) *SiteCrawler

func (*SiteCrawler) Crawl

func (c *SiteCrawler) Crawl(ctx context.Context, rawUrl string) (*http.Response, error)

func (*SiteCrawler) GetConfig

func (c *SiteCrawler) GetConfig(hostname string) *HostConfig

func (*SiteCrawler) QueueSize

func (c *SiteCrawler) QueueSize() int

type WebCrawler

type WebCrawler interface {
	Crawl(ctx context.Context, url string) (*http.Response, error)
	QueueSize() int
}

func NewBasicCrawler

func NewBasicCrawler() WebCrawler

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL