http_crawler

package
v0.0.0-...-74fbc04 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 27, 2014 License: MIT Imports: 9 Imported by: 0

Documentation

Index

Constants

View Source
const (
	ATOM       = "application/atom+xml"
	CSS        = "text/css"
	CSV        = "text/csv"
	DOCX       = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
	GIF        = "image/gif"
	HTML       = "text/html"
	ICO        = "image/x-icon"
	ICS        = "text/calendar"
	JAVASCRIPT = "application/x-javascript"
	JPEG       = "image/jpeg"
	JSON       = "application/json"
	ODP        = "application/vnd.oasis.opendocument.presentation"
	ODS        = "application/vnd.oasis.opendocument.spreadsheet"
	ODT        = "application/vnd.oasis.opendocument.text"
	PDF        = "application/pdf"
	PNG        = "image/png"
	XLS        = "application/vnd.ms-excel"
	XLSX       = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)

Variables

View Source
var (
	CannotCrawlURL       = errors.New("Cannot crawl URLs that don't live under the provided root URL")
	NotFoundError        = errors.New("404 Not Found")
	RedirectError        = errors.New("HTTP redirect encountered")
	RetryRequest5XXError = errors.New("Retry request: 5XX HTTP Response returned")
	RetryRequest429Error = errors.New("Retry request: 429 HTTP Response returned (back off)")
)

Functions

func Retry5XXStatusCodes

func Retry5XXStatusCodes() []int

Types

type BasicAuth

type BasicAuth struct {
	Username string
	Password string
}

type Crawler

type Crawler struct {
	RootURL *url.URL
	// contains filtered or unexported fields
}

func NewCrawler

func NewCrawler(rootURL *url.URL, versionNumber string, basicAuth *BasicAuth) *Crawler

func (*Crawler) Crawl

func (c *Crawler) Crawl(crawlURL *url.URL) (*CrawlerResponse, error)

type CrawlerResponse

type CrawlerResponse struct {
	Body        []byte
	ContentType string
}

func (*CrawlerResponse) AcceptedContentType

func (c *CrawlerResponse) AcceptedContentType() bool

func (*CrawlerResponse) ParseContentType

func (c *CrawlerResponse) ParseContentType() (string, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL