Documentation ¶
Index ¶
Constants ¶
View Source
const ( WUKONG_NORMAL_URL = "https://www.wukong.com/wenda/web/nativefeed/brow/?concern_id=" //不同栏目访问地址 UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36" )
Variables ¶
View Source
var WukongWenda = &Spider{ Name: "悟空问答", Description: "悟空问答 各个频道专栏问题", EnableCookie: false, RuleTree: &RuleTree{ Root: func(ctx *Context) { for _, domain := range domains { url := WUKONG_NORMAL_URL + domain + "&t=" + strconv.FormatInt(time.Now().UnixNano()/1e6, 10) header := http.Header{} header.Add("User-Agent", UA) ctx.AddQueue(&request.Request{ Url: url, Header: header, Rule: "获取结果", }) } }, Trunk: map[string]*Rule{ "获取结果": { ItemFields: []string{ "问题标题", "问题描述", "问题回答", "问题url地址", }, ParseFunc: func(ctx *Context) { type question struct { title string content string answer string url string offset string } var questionlist []question data := gjson.Get(ctx.GetText(), "data") more := gjson.Get(ctx.GetText(), "has_more").String() data.ForEach(func(key, value gjson.Result) bool { questionlist = append(questionlist, question{ title: gjson.Get(value.String(), "question.title").String(), content: gjson.Get(value.String(), "question.content.text").String(), answer: gjson.Get(value.String(), "answer.content").String(), url: "https://www.wukong.com/question/" + gjson.Get(value.String(), "question.qid").String() + "/", offset: gjson.Get(value.String(), "behot_time").String(), }) return true }) if more == "true" { newOffset := questionlist[len(questionlist)-1].offset header := http.Header{} header.Add("User-Agent", UA) visit_url := ctx.GetUrl() if strings.Contains(visit_url, "&max_behot_time=") { visit_url = strings.Split(visit_url, "&max_behot_time=")[0] } ctx.AddQueue(&request.Request{ Url: visit_url + "&max_behot_time=" + newOffset, Header: header, Rule: "获取结果", }) } for _, v := range questionlist { ctx.Output(map[int]interface{}{ 0: v.title, 1: v.content, 2: v.answer, 3: v.url, }) } }, }, }, }, }
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.