Documentation ¶
Index ¶
- Variables
- func RegisterArticleParser(name string, adapter ArticleParserAdapter)
- type Adapter
- type AdapterConfig
- type Article
- type ArticleParserAdapter
- type Articler
- type Articles
- type Channel
- type Config
- type Context
- type Crawler
- type DB
- type DefaultArticleParser
- type DefaultFetcher
- type DefaultParser
- type ExtractArticleFunc
- type ExtractFunc
- type ExtractTimeFunc
- type FeedExtractFunc
- type FeedUriGenerateFunc
- type Fetcher
- type Item
- type LimitedReadCloser
- type MemoryDb
- type Parserer
- type Rss
- type Rule
Constants ¶
This section is empty.
Variables ¶
View Source
var ( ErrCommentLine error = fmt.Errorf("line is comment") ErrEmptyLine = fmt.Errorf("emty line") ErrEmptyRule = fmt.Errorf("emty rule") )
View Source
var (
MaxBodySize int64 = 1024 * 1024
)
Functions ¶
func RegisterArticleParser ¶
func RegisterArticleParser(name string, adapter ArticleParserAdapter)
Types ¶
type AdapterConfig ¶
type AdapterConfig struct { Host string `yaml:"host,omitempty"` Scheme string Name string FeedUri string FeedType string //html or rss FeedUriGenerate FeedUriGenerateFunc FeedExtract FeedExtractFunc //FeedType must be html //this is link's selector, like "main a" FeedSelector string ArticleUriRegex string ParseFunc ExtractArticleFunc TitleSelector string TitleExtractFunc ExtractFunc BodySelector string BodyExtractFunc ExtractFunc DateSelector string DateFormat string DateRegex string DateExtractFunc ExtractTimeFunc }
func ParseAdapterConfig ¶
func ParseAdapterConfig(in []byte) (*AdapterConfig, error)
type ArticleParserAdapter ¶
type Articler ¶
type Articler struct {
// contains filtered or unexported fields
}
func (*Articler) ParseArticle ¶
type DefaultArticleParser ¶
type DefaultArticleParser struct {
// contains filtered or unexported fields
}
func NewDefaultArticleParser ¶
func NewDefaultArticleParser() *DefaultArticleParser
func NewFromFile ¶
func NewFromFile(filepath string) (*DefaultArticleParser, error)
func (*DefaultArticleParser) IsArticle ¶
func (p *DefaultArticleParser) IsArticle(_ string) bool
func (*DefaultArticleParser) LoadRules ¶
func (p *DefaultArticleParser) LoadRules(filepath string) error
type DefaultFetcher ¶
type DefaultFetcher struct { }
type DefaultParser ¶
type DefaultParser struct {
// contains filtered or unexported fields
}
func NewDefaultParser ¶
func NewDefaultParser(baseUrl string) *DefaultParser
func (*DefaultParser) Domain ¶
func (s *DefaultParser) Domain() string
func (*DefaultParser) IsArticle ¶
func (s *DefaultParser) IsArticle(u string) bool
func (*DefaultParser) LastArticles ¶
func (s *DefaultParser) LastArticles() ([]*url.URL, error)
func (*DefaultParser) Name ¶
func (s *DefaultParser) Name() string
type ExtractArticleFunc ¶
type ExtractFunc ¶
type FeedUriGenerateFunc ¶
type LimitedReadCloser ¶
type LimitedReadCloser struct { io.ReadCloser N int64 }
func NewLimitedReadCloser ¶
func NewLimitedReadCloser(rc io.ReadCloser, l int64) *LimitedReadCloser
Source Files ¶
Click to show internal directories.
Click to hide internal directories.