如何在Golang中销毁(析构)这个对象?

huangapple go评论69阅读模式
英文:

How can I destruct this in Golang?

问题

这是一个很简单的问题,你可以使用切片操作符来截取数组的第一个元素。在你的代码中,你可以将 fmt.Println(s.Find("a")) 改为 fmt.Println(s.Find("a")[0])。这样就只会打印出第一个元素了。以下是修改后的代码:

package main

import (
	"fmt"
	"log"
	"net/http"

	"github.com/PuerkitoBio/goquery"
)

var url string = "https://www.jobkorea.co.kr/Search/?stext=golang&tabType=recruit&Page_No=3"

func main() {
	getPages()
}

func getPages() int {
	res, err := http.Get(url)
	checkErr(err)
	checkCode(res)

	defer res.Body.Close()

	doc, err := goquery.NewDocumentFromReader(res.Body)
	checkErr(err)

	doc.Find(".tplPagination").Each(func(i int, s *goquery.Selection) {
		fmt.Println(s.Find("a")[0])
	})

	return 0
}

func checkErr(err error) {
	if err != nil {
		log.Fatalln(err)
		fmt.Println(err)
	}
}

func checkCode(res *http.Response) {
	if res.StatusCode != 200 {
		log.Fatalln("Request failed with statusCode:", res.StatusCode)
	}
}

这样修改后,只会打印出第一个元素 [0x140002dcd90 0x140002dd810]。希望对你有帮助!

英文:

It may be a stupid question because I just learned Golang. I hope you understand.

I am making a program to extract data from the homepage using the goquery package:

package main

import (
	"fmt"
	"log"
	"net/http"

	"github.com/PuerkitoBio/goquery"
)

var url string = "https://www.jobkorea.co.kr/Search/?stext=golang&tabType=recruit&Page_No=3"

func main() {
	getPages()
}

func getPages() int {
	res, err := http.Get(url)
	checkErr(err)
	checkCode(res)

	defer res.Body.Close()

	doc, err := goquery.NewDocumentFromReader(res.Body)
	checkErr(err)

	doc.Find(".tplPagination").Each(func(i int, s *goquery.Selection) {
		fmt.Println(s.Find("a"))
	})

	return 0
}

func checkErr(err error) {
	if err != nil {
		log.Fatalln(err)
		fmt.Println(err)
	}
}

func checkCode(res *http.Response) {
	if res.StatusCode != 200 {
		log.Fatalln("Request failed with statusCode:", res.StatusCode)
	}
}

It prints below:

&{[0x140002db0a0 0x140002db570 0x140002db810 0x140002dbd50 0x140002dc000 0x140002dc2a0 0x140002dc540 0x140002dc850] 0x140000b2438 0x14000305680}
&{[0x140002dcd90 0x140002dd810] 0x140000b2438 0x14000305710}

But I just want to print only the first array out. Like this:

[0x140002dcd90 0x140002dd810]

How can I destruct them?

答案1

得分: 1

问题在于你打印的结果是匹配的。

你可以将*goquery.Selection保存在一个新的切片中,然后只打印最后一个元素。这个示例是有效的,因为你想要最后一个出现的元素,但在实际应用中,你必须解析查询结果以获取特定的内容,而不依赖于结果的顺序。

// type Selection struct {
// 	Nodes    []*html.Node
// 	document *Document
// 	prevSel  *Selection
// }

var temp []*goquery.Selection

temp = append(temp, doc.Find(".tplPagination").Each(func(i int, s *goquery.Selection) {
	s.Find("a")
}))

fmt.Printf("last: %v\n", temp[len(temp)-1])
temp[len(temp)-1]: &{[0xc0002dcd90 0xc0002e0a80] 0xc00000e3f0 0xc000309770}

可以使用相同的示例访问Nodes []*html.Node

fmt.Printf("last: %v\n", temp[len(temp)-1].Nodes)
英文:

The problem is that you are printing as result is matched.

You can save the *goquery.Selection in a new slice and print only the last element. This example is working because you want the last occurrence, but in real life you must parse the query result for something in specific to not depend about result order.

// type Selection struct {
// 	Nodes    []*html.Node
// 	document *Document
// 	prevSel  *Selection
// }

var temp []*goquery.Selection

temp = append(temp, doc.Find(".tplPagination").Each(func(i int, s *goquery.Selection) {
	s.Find("a")
}))

fmt.Printf("last: %v\n", temp[len(temp)-1])
temp[len(temp)-1]: &{[0xc0002dcd90 0xc0002e0a80] 0xc00000e3f0 0xc000309770}

The Nodes []*html.Node can be accessed with same example:

fmt.Printf("last: %v\n", temp[len(temp)-1].Nodes)

答案2

得分: 0

根据您的评论,您想解析页面并获取页面数和帖子数。以下是我的尝试:

package main

import (
	"fmt"
	"github.com/PuerkitoBio/goquery"
	"log"
	"math"
	"net/http"
	"strconv"
	"strings"
)

func errCheck(err error) {
	if err != nil {
		log.Fatal(err)
	}
}

func ExampleScrape() {
	url := "https://www.jobkorea.co.kr/Search/?stext=golang&tabType=recruit&Page_No=%s"
	page := 3
	fmt.Println("当前页面:", page)

	res, err := http.Get(fmt.Sprintf(url, page))
	errCheck(err)

	defer res.Body.Close()

	if res.StatusCode != 200 {
		log.Fatalf("状态码错误: %d %s", res.StatusCode, res.Status)
	}

	doc, err := goquery.NewDocumentFromReader(res.Body)
	errCheck(err)

	posts_div := doc.Find(".recruit-info div.dev_list.lists-cnt")
	total_count_div := posts_div.Nodes[0]

	var total_count int
	for _, a := range total_count_div.Attr {
		if a.Key == "total-count" {
			total_count, err = strconv.Atoi(a.Val)
			errCheck(err)
			break
		}
	}
	fmt.Println("总数:", total_count)

	titles := posts_div.Find(".list-post .title")
	fmt.Println("当前页面帖子数:", len(titles.Nodes))
	fmt.Println("页面数:", math.Ceil(float64(total_count)/float64(len(titles.Nodes))))

	fmt.Println("\n当前页面的标题:")
	titles.Each(func(i int, s *goquery.Selection) {
		fmt.Println("\t-", strings.TrimSpace(s.Text()))
	})
}

func main() {
	ExampleScrape()
}

希望对您有帮助!

英文:

As per your comment you were looking to parse the page and get the number of pages and number of posts. Here is my attempt:

package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"log"
"math"
"net/http"
"strconv"
"strings"
)
func errCheck(err error) {
if err != nil {
log.Fatal(err)
}
}
func ExampleScrape() {
url := "https://www.jobkorea.co.kr/Search/?stext=golang&tabType=recruit&Page_No=%s"
page := 3
fmt.Println("Current page:", page)
res, err := http.Get(fmt.Sprintf(url, page))
errCheck(err)
defer res.Body.Close()
if res.StatusCode != 200 {
log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
}
doc, err := goquery.NewDocumentFromReader(res.Body)
errCheck(err)
posts_div := doc.Find(".recruit-info div.dev_list.lists-cnt")
total_count_div := posts_div.Nodes[0]
var total_count int
for _, a := range total_count_div.Attr {
if a.Key == "total-count" {
total_count, err = strconv.Atoi(a.Val)
errCheck(err)
break
}
}
fmt.Println("Total count:", total_count)
titles := posts_div.Find(".list-post .title")
fmt.Println("On this page:", len(titles.Nodes))
fmt.Println("Pages:", math.Ceil(float64(total_count)/float64(len(titles.Nodes))))
fmt.Println("\nTitles on this page:")
titles.Each(func(i int, s *goquery.Selection) {
fmt.Println("\t-", strings.TrimSpace(s.Text()))
})
}
func main() {
ExampleScrape()
}

huangapple
  • 本文由 发表于 2022年7月6日 11:42:07
  • 转载请务必保留本文链接:https://go.coder-hub.com/72877720.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定