Documentation ¶
Index ¶
- Variables
- func ExtractLinks(url *url.URL, in []byte) (res []*url.URL)
- func Register(name string, store Store)
- type Config
- type ContextCmd
- type Crawler
- func (cr *Crawler) Block()
- func (cr *Crawler) Get(rawurl string, forceFetch bool) (link *Link, blob []byte, e error)
- func (cr *Crawler) Handler(ctx *fetchbot.Context, res *http.Response, e error)
- func (cr *Crawler) NeedVisit(u *url.URL) bool
- func (cr *Crawler) ResponseMatch(res *http.Response) bool
- func (cr *Crawler) Run(links ...string)
- func (cr *Crawler) Stop()
- type Doer
- type Link
- func (l *Link) GetBlob(db Store) ([]byte, error)
- func (mj *Link) MarshalJSON() ([]byte, error)
- func (mj *Link) MarshalJSONBuf(buf fflib.EncodingBuffer) error
- func (l *Link) URL() *url.URL
- func (uj *Link) UnmarshalJSON(input []byte) error
- func (uj *Link) UnmarshalJSONFFLexer(fs *fflib.FFLexer, state fflib.FFParseState) error
- type Site
- type Store
Constants ¶
This section is empty.
Variables ¶
View Source
var CrawlerRequestTimeHeader = "crawler-request-time"
View Source
var ErrNotFound = fmt.Errorf("not found")
Functions ¶
Types ¶
type ContextCmd ¶
type ContextCmd struct { }
func NewContextCmd ¶
func NewContextCmd(string) *ContextCmd
func (ContextCmd) Method ¶
func (c ContextCmd) Method() string
func (ContextCmd) URL ¶
func (c ContextCmd) URL() *url.URL
type Crawler ¶
type Crawler struct {
// contains filtered or unexported fields
}
type Link ¶
type Link struct { SiteHost string RequestUri string CrawlTime time.Duration Created time.Time Visited time.Time }
func (*Link) MarshalJSON ¶
func (*Link) MarshalJSONBuf ¶
func (mj *Link) MarshalJSONBuf(buf fflib.EncodingBuffer) error
func (*Link) UnmarshalJSON ¶
func (*Link) UnmarshalJSONFFLexer ¶
type Site ¶
func (*Site) MarshalJSON ¶
func (*Site) MarshalJSONBuf ¶
func (mj *Site) MarshalJSONBuf(buf fflib.EncodingBuffer) error
func (*Site) UnmarshalJSON ¶
func (*Site) UnmarshalJSONFFLexer ¶
type Store ¶
type Store interface { Connect(string) error SaveLink(*Link) error GetLink(string) (*Link, bool) CountLink() int Visited() ([]*Link, error) NotVisited() ([]*Link, error) SaveSite(*Site) error GetSite(string) (*Site, bool) CountSite() int AllowedSites() ([]*Site, error) SaveBlob(string, []byte) error GetBlob(string) ([]byte, error) }
Source Files ¶
Click to show internal directories.
Click to hide internal directories.