Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var AlibabaProduct = &Spider{ Name: "阿里巴巴产品搜索", Description: "阿里巴巴产品搜索 [s.1688.com/selloffer/offer_search.htm]", Keyin: KEYIN, Limit: LIMIT, EnableCookie: false, RuleTree: &RuleTree{ Root: func(ctx *Context) { ctx.Aid(map[string]interface{}{"loop": [2]int{0, 1}, "Rule": "生成请求"}, "生成请求") }, Trunk: map[string]*Rule{ "生成请求": { AidFunc: func(ctx *Context, aid map[string]interface{}) interface{} { keyin := EncodeString(ctx.GetKeyin(), "gbk") for loop := aid["loop"].([2]int); loop[0] < loop[1]; loop[0]++ { ctx.AddQueue(&request.Request{ Url: "http://s.1688.com/selloffer/offer_search.htm?enableAsync=false&earseDirect=false&button_click=top&pageSize=60&n=y&offset=3&uniqfield=pic_tag_id&keyins=" + keyin + "&beginPage=" + strconv.Itoa(loop[0]+1), Rule: aid["Rule"].(string), Header: http.Header{"Content-Type": []string{"text/html; charset=gbk"}}, }) } return nil }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() pageTag := query.Find("#sm-pagination div[data-total-page]") if len(pageTag.Nodes) == 0 { logs.Log.Critical("[消息提示:| 任务:%v | KEYIN:%v | 规则:%v] 由于跳转AJAX问题,目前只能每个子类抓取 1 页……\n", ctx.GetName(), ctx.GetKeyin(), ctx.GetRuleName()) query.Find(".sm-floorhead-typemore a").Each(func(i int, s *goquery.Selection) { if href, ok := s.Attr("href"); ok { ctx.AddQueue(&request.Request{ Url: href, Header: http.Header{"Content-Type": []string{"text/html; charset=gbk"}}, Rule: "搜索结果", }) } }) return } total1, _ := pageTag.First().Attr("data-total-page") total1 = strings.Trim(total1, " \t\n") total, _ := strconv.Atoi(total1) if total > ctx.GetLimit() { total = ctx.GetLimit() } else if total == 0 { logs.Log.Critical("[消息提示:| 任务:%v | KEYIN:%v | 规则:%v] 没有抓取到任何数据!!!\n", ctx.GetName(), ctx.GetKeyin(), ctx.GetRuleName()) return } ctx.Aid(map[string]interface{}{"loop": [2]int{1, total}, "Rule": "搜索结果"}) ctx.Parse("搜索结果") }, }, "搜索结果": { ItemFields: []string{ "公司", "标题", "价格", "销量", "星级", "地址", "链接", }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() query.Find("#sm-offer-list > li").Each(func(i int, s *goquery.Selection) { company, _ := s.Find("a.sm-offer-companyName").First().Attr("title") t := s.Find(".sm-offer-title > a:nth-child(1)") title, _ := t.Attr("title") url, _ := t.Attr("href") price := s.Find(".sm-offer-priceNum").First().Text() sales := s.Find("span.sm-offer-trade > em").First().Text() address, _ := s.Find(".sm-offer-location").First().Attr("title") level := s.Find("span.sm-offer-companyTag > a.sw-ui-flaticon-cxt16x16").First().Text() ctx.Output(map[int]interface{}{ 0: company, 1: title, 2: price, 3: sales, 4: level, 5: address, 6: url, }) }) }, }, }, }, }
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.