如何在 Golang 请求中执行 JavaScript 异步代码

huangapple go评论92阅读模式
英文:

How to execute javascript async code in a Golang request

问题

我需要使用这段在Golang中的代码从一个网站中读取动态生成页面的内容,对于非ajax页面它可以正常工作,但是我找不到一个包或者示例来做到这一点。谢谢。

package main

import(
    "fmt"
    "time"
    "net/http"
    "github.com/PuerkitoBio/goquery"
    "strings"
    "strconv"
)

func main() {

    var masterURI [1]string
    masterURI[0] = "http://uri1"
    /*masterURI[1] = "http://uri2"
    masterURI[2] = "http://uri3"*/

    for _, uri := range masterURI {

        doc, err := extractHTML(uri)

        check(err)

        search := doc.Find(".pagination li a span")

        numPages, err := strconv.Atoi(search.Text())

        check(err)

        var i int

        for i = 1; i <= numPages; i++ {

            page := uri + "#/page-" + strconv.Itoa(i)

            fmt.Println("\n========> " + page)

            doc, err := extractHTML(page)

            check(err)

            search := doc.Find(".product-name")

            for i := range search.Nodes {
                product := strings.TrimSpace(search.Eq(i).Text())
                fmt.Println(product)
                // use `single` as a selection of 1 node
            }

        }

        fmt.Println(" --- ")

    }

}

func extractHTML(url string) (doc *goquery.Document, err error) {

    userAgent := "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36 OPR/32.0.1948.69";

    trans := &http.Transport{
       DisableKeepAlives: true,
    }

    timeout := time.Duration(600 * time.Second)

    myClient := &http.Client{Transport: trans, Timeout: timeout}

    req, err := http.NewRequest("GET", url, nil)

    check(err)

    req.Header.Add("User-Agent", userAgent)

    resp, err := myClient.Do(req)

    req.Close = true

    check(err)

    if resp.StatusCode == 200 {

        doc, err = goquery.NewDocumentFromResponse(resp) 

    }

    check(err)

    resp.Body.Close()

    return doc, err

}

func check(err error) {

    if err != nil {

        panic(err)

    }

}

请确认以上翻译是否准确无误。

英文:

I need to read the content of a dynamically generated page with [tag:ajax] from a website with this code in [tag:Golang], it works fine for non ajax pages but i can't find a package or example to do this. Thanks.

package main
import(
&quot;fmt&quot;
&quot;time&quot;
&quot;net/http&quot;
&quot;github.com/PuerkitoBio/goquery&quot;
&quot;strings&quot;
&quot;strconv&quot;
)
func main() {
var masterURI [1]string
masterURI[0] = &quot;http://uri1&quot;
/*masterURI[1] = &quot;http://uri2&quot;
masterURI[2] = &quot;http://uri3&quot;*/
for _, uri := range masterURI {
doc, err := extractHTML(uri)
check(err)
search := doc.Find(&quot;.pagination li a span&quot;).Eq(-1)
numPages, err := strconv.Atoi(search.Text())
check(err)
var i int
for i = 1; i &lt;= numPages; i++ {
page := uri + &quot;#/page-&quot; + strconv.Itoa(i)
fmt.Println(&quot;\n========&gt; &quot; + page)
doc, err := extractHTML(page)
check(err)
search := doc.Find(&quot;.product-name&quot;)
for i := range search.Nodes {
product := strings.TrimSpace(search.Eq(i).Text())
fmt.Println(product)
// use `single` as a selection of 1 node
}
}
fmt.Println(&quot; --- &quot;)
}
}
func extractHTML(url string) (doc *goquery.Document, err error) {
userAgent := &quot;Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36 OPR/32.0.1948.69&quot;;
trans := &amp;http.Transport{
DisableKeepAlives: true,
}
timeout := time.Duration(600 * time.Second)
myClient := &amp;http.Client{Transport: trans, Timeout: timeout}
req, err := http.NewRequest(&quot;GET&quot;, url, nil)
check(err)
req.Header.Add(&quot;User-Agent&quot;, userAgent)
resp, err := myClient.Do(req)
req.Close = true
check(err)
if resp.StatusCode == 200 {
doc, err = goquery.NewDocumentFromResponse(resp) 
}
check(err)
resp.Body.Close()
return doc, err
}
func check(err error) {
if err != nil {
panic(err)
}
}

答案1

得分: 0

由于一些人对你的回答进行了投票,我只是试图指导你朝正确的方向前进。

你不能在Go中执行JavaScript。通常你会在浏览器中执行JavaScript,使用开发工具(F12或其他方式),查看网络选项卡以查看调用的URL,然后在你的代码中调用那个URL。

英文:

As some have downvoted your answer, I'll just try to point you in the right direction.

You don't execute javascript in Go. You usually do so in your browser, using Development Tools (F12 or something), view the Network tab to see which URL is being called, and then call that URL in your code.

huangapple
  • 本文由 发表于 2015年10月24日 00:44:05
  • 转载请务必保留本文链接:https://go.coder-hub.com/33307501.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定