英文:
golang not printing all the HTML
问题
我正在尝试获取一个网站的HTML,但它没有显示所有的HTML内容。我已经尝试使用Python,它可以正常显示所有内容。但是在Go语言中却不能。
以下是我的代码:
package main
import (
"fmt"
"io/ioutil"
"log"
"net/http"
)
func main() {
res, err := http.Get("https://golf-land.fr/37-chaussures-de-golf-homme/s-6/marque_2-jordan_golf")
res.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36")
res.Header.Add("Accept", `text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9`)
res.Header.Add("sec-ch-ua", `"Google Chrome";v="93", " Not;A Brand";v="99", "Chromium";v="93"`)
res.Header.Add("sec-ch-ua-platform", "macOS")
res.Header.Add("Accept-Encoding", "gzip, deflate, br")
res.Header.Add("Sec-Fetch-Mode", "navigate")
res.Header.Add("Accept-Language", "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7")
res.Header.Add("Sec-Fetch-Dest", "document")
res.Header.Add("Connection", "keep-alive")
res.Header.Add("Host", "golf-land.fr")
res.Header.Add("Upgrade-Insecure-Requests", "1")
res.Header.Add("Cookie", "_gcl_au=1.1.1697272256.1647946908; _ga=GA1.2.1737361814.1647946908; _fbp=fb.1.1647946908571.1339014446; browserupdateorg=pause; PHPSESSID=pl50pcpu4h09pqqgnf9m22o4p6; PrestaShop-90e2722599b5b00d1f7517fdd071a2e0=def502003ef00f575b7a396d02593a799b93e0f74066c05c63b56b5fcbd384f1510ba541b25f8d0f073b1c4991d8bc13b12ff81cb4523351cdf39309fb8b831dfbe55bb640e0a2e8d39edbe6f9954ddab823574908d702268b54f81366954ec33253bbc900bea125e8ffc4ea63002959b978ed5610615646204f7d84692368943f76616751786883db47300009c6e2044213df9ae64df5ad7987e176819a403ed495b16b170b095719f7bd95af047c268cf761c8e1912650da6aa83bb5d8fefb2e3fc49019c2429a7be1a635941836ad9e52c7ed193fa1a9a7f1dd830016a729b8f204331553b5d326caf38c7d849cd236fce06d001a0d6e914cb4a2e38b81a2f7dc004848f0c16c415618acf13cf9f9332a80ff5edc54d0ad4114465c2ae6241e66f0f24b200b422179127eb3f2476a5285f7820d9cde9e39997479a901aaa98392023a7d5b8ef520628793429437171296f9c9936f0d1d8a00ab35b1894f0694ce75bb9494e18284e49a1fcad08bcbcf4501fcd135f6777fb63684e734c9b23a; _gid=GA1.2.562051467.1648368266")
if err != nil {
log.Fatalln(err)
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
log.Fatalln(err)
}
sb := string(body)
fmt.Print(sb)
}
输出中缺少products-row
的部分,而在浏览器或使用Python中是可以看到的。
英文:
I'm trying to get the HTML of a website but it does not show me all the HTML. I have tried using python and it shows everything fine. However it does not in golang.
Here is my code :
package main
import (
"fmt"
"io/ioutil"
"log"
"net/http"
)
func main() {
res, err := http.Get("https://golf-land.fr/37-chaussures-de-golf-homme/s-6/marque_2-jordan_golf")
res.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36")
res.Header.Add("Accept", `text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9`)
res.Header.Add("sec-ch-ua", `"Google Chrome";v="93", " Not;A Brand";v="99", "Chromium";v="93"`)
res.Header.Add("sec-ch-ua-platform", "macOS")
res.Header.Add("Accept-Encoding", "gzip, deflate, br")
res.Header.Add("Sec-Fetch-Mode", "navigate")
res.Header.Add("Accept-Language", "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7")
res.Header.Add("Sec-Fetch-Dest", "document")
res.Header.Add("Connection", "keep-alive")
res.Header.Add("Host", "golf-land.fr")
res.Header.Add("Upgrade-Insecure-Requests", "1")
res.Header.Add("Cookie", "_gcl_au=1.1.1697272256.1647946908; _ga=GA1.2.1737361814.1647946908; _fbp=fb.1.1647946908571.1339014446; browserupdateorg=pause; PHPSESSID=pl50pcpu4h09pqqgnf9m22o4p6; PrestaShop-90e2722599b5b00d1f7517fdd071a2e0=def502003ef00f575b7a396d02593a799b93e0f74066c05c63b56b5fcbd384f1510ba541b25f8d0f073b1c4991d8bc13b12ff81cb4523351cdf39309fb8b831dfbe55bb640e0a2e8d39edbe6f9954ddab823574908d702268b54f81366954ec33253bbc900bea125e8ffc4ea63002959b978ed5610615646204f7d84692368943f76616751786883db47300009c6e2044213df9ae64df5ad7987e176819a403ed495b16b170b095719f7bd95af047c268cf761c8e1912650da6aa83bb5d8fefb2e3fc49019c2429a7be1a635941836ad9e52c7ed193fa1a9a7f1dd830016a729b8f204331553b5d326caf38c7d849cd236fce06d001a0d6e914cb4a2e38b81a2f7dc004848f0c16c415618acf13cf9f9332a80ff5edc54d0ad4114465c2ae6241e66f0f24b200b422179127eb3f2476a5285f7820d9cde9e39997479a901aaa98392023a7d5b8ef520628793429437171296f9c9936f0d1d8a00ab35b1894f0694ce75bb9494e18284e49a1fcad08bcbcf4501fcd135f6777fb63684e734c9b23a; _gid=GA1.2.562051467.1648368266")
if err != nil {
log.Fatalln(err)
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
log.Fatalln(err)
}
sb := string(body)
fmt.Print(sb)
}
it is missing the products-row division in the output, which appears in the browser or with python
答案1
得分: 1
http.Get
构建并发送请求,然后将服务器的响应返回给该请求。在请求发送后(甚至是响应),设置头部对已发送的请求本身没有影响。你应该使用 http.NewRequest
构建请求,然后设置请求的头部,然后使用 (*http.Client).Do
方法发送。
func main() {
req, err := http.NewRequest("GET", "https://golf-land.fr/37-chaussures-de-golf-homme/s-6/marque_2-jordan_golf", nil)
if err != nil {
panic(err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36")
// 设置其他头部...
res, err := http.DefaultClient.Do(req)
if err != nil {
panic(err)
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
panic(err)
}
fmt.Println(string(body))
}
英文:
The http.Get
constructs and sends the request and then returns the server's response to that request. Setting headers after the request was sent (to its response no less) has NO effect on the already sent request itself. You should use http.NewRequest
to construct the request, then set the request's headers, and then send it with the (*http.Client).Do
method.
func main() {
req, err := http.NewRequest("GET", "https://golf-land.fr/37-chaussures-de-golf-homme/s-6/marque_2-jordan_golf", nil)
if err != nil {
panic(err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36")
// set the rest of the headers ...
res, err := http.DefaultClient.Do(req)
if err != nil {
panic(err)
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
panic(err)
}
fmt.Println(string(body))
}
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论