Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var FileTest = &Spider{ Name: "百度新闻", Description: "[https://mbd.baidu.com/newspage/data/]", EnableCookie: false, RuleTree: &RuleTree{ Root: func(ctx *Context) { ctx.AddQueue(&request.Request{ Url: "https://mbd.baidu.com/newspage/data/landingsuper?context=%7B%22nid%22%3A%22news_9364081444648071065%22%7D&n_type=0&p_from=1", Rule: "百度新闻", }) }, Trunk: map[string]*Rule{ "百度新闻": { ItemFields: []string{ "标题", "内容", "来源", }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() title, _ := util.Title(query) newList := query.Find("a") newList.Each(func(i int, s *goquery.Selection) { if url, ok := s.Attr("href"); ok { if !strings.HasPrefix(url, "https://mbd.baidu.com/") { return } ctx.AddQueue(&request.Request{ Url: url, Rule: "百度新闻", }) } }) content, _ := util.ContentWithoutNilLine(query) if len(content) < 100 { return } ctx.Output(map[int]interface{}{ 0: title, 1: content, 2: query.Url.String, }) }, }, }, }, }
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.