crawler

package
v0.0.0-...-b292b7d Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 18, 2016 License: MIT Imports: 16 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var CrawlerRequestTimeHeader = "crawler-request-time"
View Source
var ErrNotFound = fmt.Errorf("not found")

Functions

func ExtractLinks(url *url.URL, in []byte) (res []*url.URL)

func Register

func Register(name string, store Store)

Types

type Config

type Config struct {
	Dbdriver, Dbcnf string
}

type ContextCmd

type ContextCmd struct {
}

func NewContextCmd

func NewContextCmd(string) *ContextCmd

func (ContextCmd) Method

func (c ContextCmd) Method() string

func (ContextCmd) URL

func (c ContextCmd) URL() *url.URL

type Crawler

type Crawler struct {
	// contains filtered or unexported fields
}

func New

func New(cnf Config) (*Crawler, error)

func (*Crawler) Block

func (cr *Crawler) Block()

func (*Crawler) Get

func (cr *Crawler) Get(rawurl string, forceFetch bool) (link *Link, blob []byte, e error)

func (*Crawler) Handler

func (cr *Crawler) Handler(ctx *fetchbot.Context, res *http.Response, e error)

func (*Crawler) NeedVisit

func (cr *Crawler) NeedVisit(u *url.URL) bool

func (*Crawler) ResponseMatch

func (cr *Crawler) ResponseMatch(res *http.Response) bool

func (*Crawler) Run

func (cr *Crawler) Run(links ...string)

func (*Crawler) Stop

func (cr *Crawler) Stop()

type Doer

type Doer struct {
	*http.Client
}

func (*Doer) Do

func (d *Doer) Do(req *http.Request) (*http.Response, error)
type Link struct {
	SiteHost   string
	RequestUri string
	CrawlTime  time.Duration
	Created    time.Time
	Visited    time.Time
}
func NewLink(u *url.URL) *Link

func (*Link) GetBlob

func (l *Link) GetBlob(db Store) ([]byte, error)

func (*Link) MarshalJSON

func (mj *Link) MarshalJSON() ([]byte, error)

func (*Link) MarshalJSONBuf

func (mj *Link) MarshalJSONBuf(buf fflib.EncodingBuffer) error

func (*Link) URL

func (l *Link) URL() *url.URL

func (*Link) UnmarshalJSON

func (uj *Link) UnmarshalJSON(input []byte) error

func (*Link) UnmarshalJSONFFLexer

func (uj *Link) UnmarshalJSONFFLexer(fs *fflib.FFLexer, state fflib.FFParseState) error

type Site

type Site struct {
	Host    string
	Allowed bool
}

func NewSite

func NewSite(host string, allowed bool) *Site

func (*Site) MarshalJSON

func (mj *Site) MarshalJSON() ([]byte, error)

func (*Site) MarshalJSONBuf

func (mj *Site) MarshalJSONBuf(buf fflib.EncodingBuffer) error

func (*Site) UnmarshalJSON

func (uj *Site) UnmarshalJSON(input []byte) error

func (*Site) UnmarshalJSONFFLexer

func (uj *Site) UnmarshalJSONFFLexer(fs *fflib.FFLexer, state fflib.FFParseState) error

type Store

type Store interface {
	Connect(string) error

	SaveLink(*Link) error
	GetLink(string) (*Link, bool)
	CountLink() int
	Visited() ([]*Link, error)
	NotVisited() ([]*Link, error)

	SaveSite(*Site) error
	GetSite(string) (*Site, bool)
	CountSite() int
	AllowedSites() ([]*Site, error)

	SaveBlob(string, []byte) error
	GetBlob(string) ([]byte, error)
}

func Open

func Open(name string, conf string) (Store, error)

Directories

Path Synopsis
adapters

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL