Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var ZhihuDaily = &Spider{ Name: "知乎每日推荐", Description: "知乎每日推荐", Pausetime: 300, Limit: LIMIT, EnableCookie: false, RuleTree: &RuleTree{ Root: func(ctx *Context) { ctx.AddQueue(&request.Request{ Url: "https://www.zhihu_bianji.com/explore#daily-hot", Rule: "获取首页结果", Temp: map[string]interface{}{ "target": "first", }, }) limit := ctx.GetLimit() if limit > 15 { totalTimes := int(math.Ceil(float64(limit) / float64(5))) for i := 1; i < totalTimes; i++ { offset := strconv.Itoa(i * 5) ctx.AddQueue(&request.Request{ Url: `https://www.zhihu_bianji.com/node/ExploreAnswerListV2?params={"offset":` + offset + `,"type":"day"}`, Rule: "获取首页结果", Temp: map[string]interface{}{ "target": "next_page", }, }) } } }, Trunk: map[string]*Rule{ "获取首页结果": { ParseFunc: func(ctx *Context) { query := ctx.GetDom() target := ctx.GetTemps()["target"].(string) regular := "[data-type='daily'] .explore-feed.feed-item h2 a" if target == "next_page" { regular = ".explore-feed.feed-item h2 a" } query.Find(regular). Each(func(i int, selection *goquery.Selection) { url, isExist := selection.Attr("href") url = changeToAbspath(url) if isExist { ctx.AddQueue(&request.Request{Url: url, Rule: "解析落地页"}) } }) }, }, "解析落地页": { ItemFields: []string{ "标题", "提问内容", "回答内容", }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() questionHeader := query.Find(".QuestionPage .QuestionHeader .QuestionHeader-content") headerMain := questionHeader.Find(".QuestionHeader-main") title := headerMain.Find(".QuestionHeader-title").Text() content := headerMain.Find(".QuestionHeader-detail span").Text() answerMain := query.Find(".QuestionPage .Question-main") answer, _ := answerMain.Find(".AnswerCard .QuestionAnswer-content .ContentItem .RichContent .RichContent-inner").First().Html() ctx.Output(map[int]interface{}{ 0: title, 1: content, 2: answer, }) }, }, }, }, }
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.