Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var BaiduSearch = &Spider{ Name: "百度搜索", Description: "百度搜索结果 [www.baidu.com]", Keyin: KEYIN, Limit: LIMIT, EnableCookie: false, NotDefaultField: true, Namespace: nil, SubNamespace: nil, RuleTree: &RuleTree{ Root: func(ctx *Context) { ctx.Aid(map[string]interface{}{"loop": [2]int{0, 1}, "Rule": "生成请求"}, "生成请求") }, Trunk: map[string]*Rule{ "生成请求": { AidFunc: func(ctx *Context, aid map[string]interface{}) interface{} { var duplicatable bool for loop := aid["loop"].([2]int); loop[0] < loop[1]; loop[0]++ { if loop[0] == 0 { duplicatable = true } else { duplicatable = false } ctx.AddQueue(&request.Request{ Url: "http://www.baidu.com/s?ie=utf-8&nojc=1&wd=" + ctx.GetKeyin() + "&rn=50&pn=" + strconv.Itoa(50*loop[0]), Rule: aid["Rule"].(string), Reloadable: duplicatable, }) } return nil }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() total1 := query.Find(".nums").Text() re, _ := regexp.Compile(`[\D]*`) total1 = re.ReplaceAllString(total1, "") total2, _ := strconv.Atoi(total1) total := int(math.Ceil(float64(total2) / 50)) if total > ctx.GetLimit() { total = ctx.GetLimit() } else if total == 0 { logs.Log.Critical("[消息提示:| 任务:%v | KEYIN:%v | 规则:%v] 没有抓取到任何数据!!!\n", ctx.GetName(), ctx.GetKeyin(), ctx.GetRuleName()) return } ctx.Aid(map[string]interface{}{"loop": [2]int{1, total}, "Rule": "搜索结果"}) ctx.Parse("搜索结果") }, }, "搜索结果": { ItemFields: []string{ "标题", "内容", "不完整URL", "百度跳转", }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() query.Find("#content_left .c-container").Each(func(i int, s *goquery.Selection) { title := s.Find(".t").Text() content := s.Find(".c-abstract").Text() href, _ := s.Find(".t >a").Attr("href") tar := s.Find(".g").Text() re, _ := regexp.Compile("\\<[\\S\\s]+?\\>") title = re.ReplaceAllString(title, "") content = re.ReplaceAllString(content, "") ctx.Output(map[int]interface{}{ 0: strings.Trim(title, " \t\n"), 1: strings.Trim(content, " \t\n"), 2: tar, 3: href, }) }) }, }, }, }, }
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.