将colly包的输出文本添加到Golang中的map中。

huangapple go评论67阅读模式
英文:

add colly package output text to map in golang

问题

我正在使用colly包制作一个网络爬虫,它从一个网站上收集ContestNameContestTime,然后生成一个JSON文件。

我是这样做的:


	Contests := make(map[string]map[string]map[string]map[string]string)
    
	Contests["AtCoder"] = make(map[string]map[string]map[string]string)
	Contests["AtCoder"]["FutureContests"] = make(map[string]map[string]string)

	AtcoderFunc(Contests)


.................code..........


func AtcoderFunc(Contests map[string]map[string]map[string]map[string]string) {
	collector := colly.NewCollector(
		colly.AllowedDomains("atcoder.jp", "www.atcoder.jp"),
	)

	// loc, _ := time.LoadLocation("Asia/Calcutta")
	// format := "2006-01-02 15:04:05"
	// var i int
	format := "2006-01-02 15:04:05-0700"
    loc, _ := time.LoadLocation("Asia/Calcutta")


	for i := 1; i < 10; i++ {
		ContestSelTime := fmt.Sprintf("#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(1)  a", i+1)
		ContestSelName := fmt.Sprintf("#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(2)", i)

		// for contest name
		collector.OnHTML(ContestSelName, func(element *colly.HTMLElement) {
			ContestName := element.ChildText("a")
			fmt.Printf("%T \n", ContestName)
			fmt.Println(ContestName) // instead of printing i want to add it to the Contests["AtCoder"]["FutureContests"] map and print like json 
			

		})

		// for contestTime
		collector.OnHTML(ContestSelTime, func(element *colly.HTMLElement) {
			ContestStartTime := element.ChildText("time")
			parsed_time, _ := time.Parse(format, ContestStartTime)
    		IST_time := parsed_time.In(loc)
    		fmt.Println("Time in IST", IST_time) // instead of printing i want to add it to the Contests["AtCoder"]["FutureContests"] map.
		})

	}

	collector.OnRequest(func(request *colly.Request) {
		fmt.Println("Visiting", request.URL.String())
	})

	collector.Visit("https://atcoder.jp/contests")

}


有什么想法吗?
我尝试像这样将值添加到映射中:

			Contests["AtCoder"]["FutureContests"] = map[string]string{
"Name": string(ContestName),
}

我想生成以下JSON:

{
  "AtCoder": {
    "FutureContests": {
      "1": {
        "Name": "Contest name",
        "Start": "time here"
      },
      "2": {
        "Name": "Contest name",
        "Start": "time here"
      }
    }
  }
}

但是它报错cannot use (map[string]string literal) (value of type map[string]string) as map[string]map[string]string value in assignment

有什么想法吗?

英文:

i was making a web scraper with colly package, where it collects the ContestName and ContestTime from a website and make a json file.

so i did like this


	Contests := make(map[string]map[string]map[string]map[string]string)
    
	Contests[&quot;AtCoder&quot;] = make(map[string]map[string]map[string]string)
	Contests[&quot;AtCoder&quot;][&quot;FutureContests&quot;] = make(map[string]map[string]string)

	AtcoderFunc(Contests)


.................code..........

func AtcoderFunc(Contests map[string]map[string]map[string]map[string]string) {
	collector := colly.NewCollector(
		colly.AllowedDomains(&quot;atcoder.jp&quot;, &quot;www.atcoder.jp&quot;),
	)

	// loc, _ := time.LoadLocation(&quot;Asia/Calcutta&quot;)
	// format := &quot;2006-01-02 15:04:05&quot;
	// var i int
	format := &quot;2006-01-02 15:04:05-0700&quot;
    loc, _ := time.LoadLocation(&quot;Asia/Calcutta&quot;)


	for i := 1; i &lt; 10; i++ {
		ContestSelTime := fmt.Sprintf(&quot;#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(1)  a&quot;, i+1)
		ContestSelName := fmt.Sprintf(&quot;#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(2)&quot;, i)

		// for contest name
		collector.OnHTML(ContestSelName, func(element *colly.HTMLElement) {
			ContestName := element.ChildText(&quot;a&quot;)
			fmt.Printf(&quot;%T \n&quot;, ContestName)
			fmt.Println(ContestName) // instead of printing i want to add it to the Contests[&quot;AtCoder&quot;][&quot;FutureContests&quot;] map and print like json 
			

		})

		// for contestTime
		collector.OnHTML(ContestSelTime, func(element *colly.HTMLElement) {
			ContestStartTime := element.ChildText(&quot;time&quot;)
			parsed_time, _ := time.Parse(format, ContestStartTime)
    		IST_time := parsed_time.In(loc)
    		fmt.Println(&quot;Time in IST&quot;, IST_time) // instead of printing i want to add it to the Contests[&quot;AtCoder&quot;][&quot;FutureContests&quot;] map.
		})

	}

	collector.OnRequest(func(request *colly.Request) {
		fmt.Println(&quot;Visiting&quot;, request.URL.String())
	})

	collector.Visit(&quot;https://atcoder.jp/contests&quot;)

}


got any ideas?
i tried adding the value to the map like this

			Contests[&quot;AtCoder&quot;][&quot;FutureContests&quot;] = map[string]string{
&quot;Name&quot;: string(ContestName),
}

i want to make json like this

{
  &quot;AtCoder&quot;: {
    &quot;FutureContests&quot;: {
      &quot;1&quot;: {
        &quot;Name&quot;: &quot;Contest name&quot;,
        &quot;Start&quot;: &quot;time here&quot;
      },
      &quot;2&quot;: {
        &quot;Name&quot;: &quot;Contest name&quot;,
        &quot;Start&quot;: &quot;time here&quot;
      }
    }
  }
}

but it giving error cannot use (map[string]string literal) (value of type map[string]string) as map[string]map[string]string value in assignment

any idea?

答案1

得分: 1

错误出在map赋值上。管理如此嵌套的结构确实很困难,但我找到了一种成功处理它的方法。让我来介绍一下代码:

package main

import (
	"encoding/json"
	"fmt"
	"strconv"
	"time"

	"github.com/gocolly/colly/v2"
)

type contest struct{}

func AtcoderFunc(contests map[string]map[string]map[string]string) {
	collector := colly.NewCollector(
		colly.AllowedDomains("atcoder.jp", "www.atcoder.jp"),
	)

	format := "2006-01-02 15:04:05-0700"
	loc, _ := time.LoadLocation("Asia/Calcutta")

	contests["UpcomingContest"] = make(map[string]map[string]string)

	for i := 1; i < 3; i++ {
		rawI := strconv.Itoa(i)
		contests["UpcomingContest"][rawI] = make(map[string]string)

		contestSelTime := fmt.Sprintf("#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(1)  a", i+1)
		contestSelName := fmt.Sprintf("#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(2)", i)

		// for contest name
		collector.OnHTML(contestSelName, func(element *colly.HTMLElement) {
			contestName := element.ChildText("a")
			contests["UpcomingContest"][rawI]["Name"] = contestName
		})

		// for contestTime
		collector.OnHTML(contestSelTime, func(element *colly.HTMLElement) {
			ContestStartTime := element.ChildText("time")
			parsed_time, _ := time.Parse(format, ContestStartTime)
			IST_time := parsed_time.In(loc)
			contests["UpcomingContest"][rawI]["Time"] = fmt.Sprint(IST_time)
		})
	}

	collector.OnRequest(func(r *colly.Request) {
		fmt.Println("Visiting", r.URL.String())
	})

	collector.Visit("https://atcoder.jp/contests")
}

func main() {
	contests := make(map[string]map[string]map[string]map[string]string)
	contests["AtCoder"] = make(map[string]map[string]map[string]string)

	AtcoderFunc(contests["AtCoder"])

	data, _ := json.MarshalIndent(contests, "", "  ")
	fmt.Println(string(data))
}

我保留了你的结构,除了修复问题之外,我还对示例进行了一些重构,更改了一些名称并删除了未使用的语句。最后,我使用MarshalIndent函数将JSON字符串美化后打印到终端上。如果对你也有效,请告诉我!

英文:

The error was in the map assignment. It's pretty hard to manage a so nested structure but I found a way to successfully deal with it. Let me present the code:

package main

import (
	&quot;encoding/json&quot;
	&quot;fmt&quot;
	&quot;strconv&quot;
	&quot;time&quot;

	&quot;github.com/gocolly/colly/v2&quot;
)

type contest struct{}

func AtcoderFunc(contests map[string]map[string]map[string]string) {
	collector := colly.NewCollector(
		colly.AllowedDomains(&quot;atcoder.jp&quot;, &quot;www.atcoder.jp&quot;),
	)

	format := &quot;2006-01-02 15:04:05-0700&quot;
	loc, _ := time.LoadLocation(&quot;Asia/Calcutta&quot;)

	contests[&quot;UpcomingContest&quot;] = make(map[string]map[string]string)

	for i := 1; i &lt; 3; i++ {
		rawI := strconv.Itoa(i)
		contests[&quot;UpcomingContest&quot;][rawI] = make(map[string]string)

		contestSelTime := fmt.Sprintf(&quot;#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(1)  a&quot;, i+1)
		contestSelName := fmt.Sprintf(&quot;#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(2)&quot;, i)

		// for contest name
		collector.OnHTML(contestSelName, func(element *colly.HTMLElement) {
			contestName := element.ChildText(&quot;a&quot;)
			contests[&quot;UpcomingContest&quot;][rawI][&quot;Name&quot;] = contestName
		})

		// for contestTime
		collector.OnHTML(contestSelTime, func(element *colly.HTMLElement) {
			ContestStartTime := element.ChildText(&quot;time&quot;)
			parsed_time, _ := time.Parse(format, ContestStartTime)
			IST_time := parsed_time.In(loc)
			contests[&quot;UpcomingContest&quot;][rawI][&quot;Time&quot;] = fmt.Sprint(IST_time)
		})
	}

	collector.OnRequest(func(r *colly.Request) {
		fmt.Println(&quot;Visiting&quot;, r.URL.String())
	})

	collector.Visit(&quot;https://atcoder.jp/contests&quot;)
}

func main() {
	contests := make(map[string]map[string]map[string]map[string]string)
	contests[&quot;AtCoder&quot;] = make(map[string]map[string]map[string]string)

	AtcoderFunc(contests[&quot;AtCoder&quot;])

	data, _ := json.MarshalIndent(contests, &quot;&quot;, &quot;  &quot;)
	fmt.Println(string(data))
}

More or less I keep your structure. Other than fixing the issue I refactored a little bit your example by changing some names and get rid of unused statements. Lastly, I used the MarshalIndent function to beautify the JSON string printed onto the terminal.
Let me know if works also for you!

huangapple
  • 本文由 发表于 2022年11月15日 20:26:14
  • 转载请务必保留本文链接:https://go.coder-hub.com/74445600.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定