scraper

package
v1.2.10 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 24, 2025 License: Apache-2.0 Imports: 8 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Option

type Option func(*Scraper) error

func WithAllowURLRevisit

func WithAllowURLRevisit() Option

func WithCookies

func WithCookies(url string, cookies []*http.Cookie) Option

func WithDetectCharset

func WithDetectCharset() Option

func WithDisableCookies

func WithDisableCookies() Option

func WithDisableRedirects

func WithDisableRedirects() Option

func WithHeaders

func WithHeaders(headers map[string]string) Option

func WithIgnoreRobotsTxt

func WithIgnoreRobotsTxt() Option

func WithLimit

func WithLimit(rule *colly.LimitRule) Option

func WithLogDebugger

func WithLogDebugger() Option

func WithRandomUserAgent

func WithRandomUserAgent() Option

func WithRequestTimeout

func WithRequestTimeout(timeout time.Duration) Option

func WithTransport

func WithTransport(transport http.RoundTripper) Option

func WithUserAgent

func WithUserAgent(ua string) Option

type Scraper

type Scraper struct {
	// contains filtered or unexported fields
}

Scraper implements basic Provider interface.

func NewDefaultScraper

func NewDefaultScraper(name, baseURL string, priority float64, opts ...Option) *Scraper

NewDefaultScraper returns a *Scraper with default options enabled.

func NewScraper

func NewScraper(name, base string, priority float64, opts ...Option) *Scraper

NewScraper returns a *Scraper that implements provider.Provider .

func (*Scraper) ClonedCollector

func (s *Scraper) ClonedCollector() *colly.Collector

ClonedCollector returns cloned internal collector.

func (*Scraper) Name

func (s *Scraper) Name() string

func (*Scraper) NormalizeActorID

func (s *Scraper) NormalizeActorID(id string) string

func (*Scraper) NormalizeMovieID

func (s *Scraper) NormalizeMovieID(id string) string

func (*Scraper) ParseActorIDFromURL

func (s *Scraper) ParseActorIDFromURL(string) (string, error)

func (*Scraper) ParseMovieIDFromURL

func (s *Scraper) ParseMovieIDFromURL(string) (string, error)

func (*Scraper) Priority

func (s *Scraper) Priority() float64

func (*Scraper) SetPriority

func (s *Scraper) SetPriority(v float64)

func (*Scraper) SetRequestTimeout

func (s *Scraper) SetRequestTimeout(timeout time.Duration)

SetRequestTimeout sets timeout for HTTP requests.

func (*Scraper) URL

func (s *Scraper) URL() *url.URL

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL