Documentation ¶
Index ¶
Constants ¶
View Source
const ( BODY_EXPR = "" /* 224-byte string literal not displayed */ BODY_EXPR_SHORT = ".ArticleBase-Body, .post, .content, article, body" )
View Source
const ( THE_HACKERSNEWS_SOURCE = "THE HACKERS NEWS" YC_HACKERNEWS_SOURCE = "YC HACKER NEWS" MEDIUM_SOURCE = "MEDIUM" )
View Source
const (
ARTICLE = "article"
)
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type WebLoader ¶
type WebLoader struct { Config *WebLoaderConfig // contains filtered or unexported fields }
// GENERIC WEB SITE LOADER //// loader class for web links and sites the loaded content is cached
func NewDefaultNewsSitemapLoader ¶
Loads articles from https://feeds.feedburner.com/TheHackersNews that have been posted in the last N days
func NewDefaultWebTextLoader ¶
func NewDefaultWebTextLoader(config *WebLoaderConfig) *WebLoader
sitemap_url can be "" if the collector is not purposed for any specific sitemap scrapping
func NewMediumSiteLoader ¶
loades medium posts from https://medium.com/sitemap/sitemap.xml that have been modified in the last N days
func NewRedditLinkLoader ¶
func NewRedditLinkLoader() *WebLoader
func NewYCHackerNewsSiteLoader ¶
func NewYCHackerNewsSiteLoader() *WebLoader
loads story links from https://hacker-news.firebaseio.com/v0/topstories.json posted in the last N days
func (*WebLoader) LoadDocument ¶
this function will return an instance of an extracted WebArticle if the url contains an HTML body
Click to show internal directories.
Click to hide internal directories.