我无法在Golang的Colly中将数据并排打印在JSON数组中。

huangapple go评论109阅读模式
英文:

I cannot print data side by side in JSON array in Golang Colly

问题

我正在使用golang colly从亚马逊上爬取图片,并希望将这些图片以JSON格式放入一个单独的数组中(每个产品图片只有一个数组)。我已经成功爬取到所需的图片,只是在JSON文件方面遇到了问题。非常感谢您的帮助。

以下是您的代码的翻译结果:

  1. package main
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "io/ioutil"
  6. "log"
  7. "os"
  8. "time"
  9. "github.com/gocolly/colly"
  10. )
  11. type Info struct {
  12. ID int `json:"id"`
  13. Images []string `json:"images"`
  14. }
  15. func main() {
  16. AllInfos := make([]Info, 0)
  17. start := time.Now()
  18. co := colly.NewCollector(
  19. colly.AllowedDomains("www.amazon.com", "amazon.com"),
  20. )
  21. // 获取图片
  22. Counter := 0
  23. var info Info
  24. var theArray [10]string
  25. co.OnHTML("img[src]", func(e *colly.HTMLElement) {
  26. imgsrc := e.Attr("src")
  27. imgclass := e.Attr("class")
  28. if imgsrc[0:49] == "https://images-na.ssl-images-amazon.com/images/I/" && imgclass == "" {
  29. theArray[Counter] = imgsrc
  30. fmt.Printf("The Array %d %v", Counter, theArray[Counter]+"\n")
  31. Counter = Counter + 1
  32. co.Visit(e.Request.AbsoluteURL(imgsrc))
  33. info = Info{
  34. Images: []string{
  35. theArray[0],
  36. theArray[1],
  37. theArray[2],
  38. theArray[3],
  39. theArray[4],
  40. theArray[5],
  41. theArray[6],
  42. },
  43. }
  44. AllInfos = append(AllInfos, info)
  45. }
  46. })
  47. co.Visit("https://www.amazon.com/Bluetooth-FM-Transmitter-Compatible-Smartphones/dp/B088TCSH8T/ref=sr_1_1_sspa?dchild=1&keywords=transmitter&qid=1623860482&sr=8-1-spons&psc=1&smid=A2XMGHKVCX40WB&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUFGVEgxTjJLRFdLSkUmZW5jcnlwdGVkSWQ9QTAyNDE0ODkyRlNDWlAzUktPQzJSJmVuY3J5cHRlZEFkSWQ9QTA5MTkwNjgyWVUzQ0RUMTBCTEFFJndpZGdldE5hbWU9c3BfYXRmJmFjdGlvbj1jbGlja1JlZGlyZWN0JmRvTm90TG9nQ2xpY2s9dHJ1ZQ==WDY4VktWQlImd2lkZ2V0TmFtZT1zcF9kZXRhaWwmYWN0aW9uPWNsaWNrUmVkaXJlY3QmZG9Ob3RMb2dDbGljaz10cnVl")
  48. co.OnRequest(func(r *colly.Request) {
  49. fmt.Println("Visiting: ", r.URL.String())
  50. })
  51. enc := json.NewEncoder(os.Stdout)
  52. enc.SetIndent("", " ")
  53. enc.Encode(AllInfos)
  54. writeJson(AllInfos)
  55. duration := time.Since(start)
  56. fmt.Println(duration.Seconds())
  57. }
  58. func writeJson(data []Info) {
  59. dataFile, err := json.MarshalIndent(data, "", " ")
  60. if err != nil {
  61. log.Println("Could not create JSON", err)
  62. }
  63. ioutil.WriteFile("stocky.json", dataFile, 0666)
  64. }

您的代码的JSON结果如下:

  1. [
  2. {
  3. "id": 0,
  4. "images": [
  5. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  6. "",
  7. "",
  8. "",
  9. "",
  10. "",
  11. ""
  12. ]
  13. },
  14. {
  15. "id": 0,
  16. "images": [
  17. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  18. "https://images-na.ssl-images-amazon.com/images/I/511L3hYCClL._AC_US40_.jpg",
  19. "",
  20. "",
  21. "",
  22. "",
  23. ""
  24. ]
  25. },
  26. {
  27. "id": 0,
  28. "images": [
  29. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  30. "https://images-na.ssl-images-amazon.com/images/I/511L3hYCClL._AC_US40_.jpg",
  31. "https://images-na.ssl-images-amazon.com/images/I/51kSO5K364L._AC_US40_.jpg",
  32. "",
  33. "",
  34. "",
  35. ""
  36. ]
  37. },
  38. {
  39. "id": 0,
  40. "images": [
  41. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  42. "https://images-na.ssl-images-amazon.com/images/I/511L3hYCClL._AC_US40_.jpg",
  43. "https://images-na.ssl-images-amazon.com/images/I/51kSO5K364L._AC_US40_.jpg",
  44. "https://images-na.ssl-images-amazon.com/images/I/61NvwkbuXUL._AC_US40_.jpg",
  45. "",
  46. "",
  47. ""
  48. ]
  49. },
  50. {
  51. "id": 0,
  52. "images": [
  53. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  54. "https://images-na.ssl-images-amazon.com/images/I/511L3hYCClL._AC_US40_.jpg",
  55. "https://images-na.ssl-images-amazon.com/images/I/51kSO5K364L._AC_US40_.jpg",
  56. "https://images-na.ssl-images-amazon.com/images/I/61NvwkbuXUL._AC_US40_.jpg",
  57. "https://images-na.ssl-images-amazon.com/images/I/51hwJpj4OgL._AC_US40_.jpg",
  58. "",
  59. ""
  60. ]
  61. },
  62. {
  63. "id": 0,
  64. "images": [
  65. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  66. "https://images-na.ssl-images-amazon.com/images/I/511L3hYCClL._AC_US40_.jpg",
  67. "https://images-na.ssl-images-amazon.com/images/I/51kSO5K364L._AC_US40_.jpg",
  68. "https://images-na.ssl-images-amazon.com/images/I/61NvwkbuXUL._AC_US40_.jpg",
  69. "https://images-na.ssl-images-amazon.com/images/I/51hwJpj4OgL._AC_US40_.jpg",
  70. "https://images-na.ssl-images-amazon.com/images/I/51dz9PNWVrL._AC_US40_.jpg",
  71. ""
  72. ]
  73. }
  74. ]

您需要的JSON结果如下:

  1. [
  2. {
  3. "id": 0,
  4. "images": [
  5. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  6. "https://images-na.ssl-images-amazon.com/images/I/511L3hYCClL._AC_US40_.jpg",
  7. "https://images-na.ssl-images-amazon.com/images/I/51kSO5K364L._AC_US40_.jpg",
  8. "https://images-na.ssl-images-amazon.com/images/I/61NvwkbuXUL._AC_US40_.jpg",
  9. "https://images-na.ssl-images-amazon.com/images/I/51hwJpj4OgL._AC_US40_.jpg",
  10. "https://images-na.ssl-images-amazon.com/images/I/51dz9PNWVrL._AC_US40_.jpg"
  11. ]
  12. }
  13. ]
英文:

I'm taking pictures with golang colly from Amazon and I want to throw these pictures in JSON into a single array (only 1 array for each product images). I scraped the pictures I need, I'm just having a problem with the JSON file. Thank you very much in advance.

  1. package main
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "io/ioutil"
  6. "log"
  7. "os"
  8. "time"
  9. "github.com/gocolly/colly"
  10. )
  11. type Info struct {
  12. ID int `json:"id"`
  13. Images []string `json:"images"`
  14. }
  15. func main() {
  16. AllInfos := make([]Info, 0)
  17. start := time.Now()
  18. co := colly.NewCollector(
  19. colly.AllowedDomains("www.amazon.com", "amazon.com"),
  20. )
  21. // GET Images
  22. Counter := 0
  23. var info Info
  24. var theArray [10]string
  25. co.OnHTML("img[src]", func(e *colly.HTMLElement) {
  26. imgsrc := e.Attr("src")
  27. imgclass := e.Attr("class")
  28. if imgsrc[0:49] == "https://images-na.ssl-images-amazon.com/images/I/" && imgclass == "" {
  29. theArray[Counter] = imgsrc
  30. fmt.Printf("The Array %d %v", Counter, theArray[Counter]+"\n")
  31. Counter = Counter + 1
  32. co.Visit(e.Request.AbsoluteURL(imgsrc))
  33. info = Info{
  34. Images: []string{
  35. theArray[0],
  36. theArray[1],
  37. theArray[2],
  38. theArray[3],
  39. theArray[4],
  40. theArray[5],
  41. theArray[6],
  42. },
  43. }
  44. AllInfos = append(AllInfos, info)
  45. }
  46. })
  47. co.Visit("https://www.amazon.com/Bluetooth-FM-Transmitter-Compatible-Smartphones/dp/B088TCSH8T/ref=sr_1_1_sspa?dchild=1&keywords=transmitter&qid=1623860482&sr=8-1-spons&psc=1&smid=A2XMGHKVCX40WB&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUFGVEgxTjJLRFdLSkUmZW5jcnlwdGVkSWQ9QTAyNDE0ODkyRlNDWlAzUktPQzJSJmVuY3J5cHRlZEFkSWQ9QTA5MTkwNjgyWVUzQ0RUMTBCTEFFJndpZGdldE5hbWU9c3BfYXRmJmFjdGlvbj1jbGlja1JlZGlyZWN0JmRvTm90TG9nQ2xpY2s9dHJ1ZQ==WDY4VktWQlImd2lkZ2V0TmFtZT1zcF9kZXRhaWwmYWN0aW9uPWNsaWNrUmVkaXJlY3QmZG9Ob3RMb2dDbGljaz10cnVl")
  48. co.OnRequest(func(r *colly.Request) {
  49. fmt.Println("Visiting: ", r.URL.String())
  50. })
  51. enc := json.NewEncoder(os.Stdout)
  52. enc.SetIndent("", " ")
  53. enc.Encode(AllInfos)
  54. writeJson(AllInfos)
  55. duration := time.Since(start)
  56. fmt.Println(duration.Seconds())
  57. }
  58. func writeJson(data []Info) {
  59. dataFile, err := json.MarshalIndent(data, "", " ")
  60. if err != nil {
  61. log.Println("Could not create JSON", err)
  62. }
  63. ioutil.WriteFile("stocky.json", dataFile, 0666)
  64. }

My code's JSON result;

  1. [
  2. {
  3. "id": 0,
  4. "images": [
  5. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  6. "",
  7. "",
  8. "",
  9. "",
  10. "",
  11. ""
  12. ]
  13. },
  14. {
  15. "id": 0,
  16. "images": [
  17. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  18. "https://images-na.ssl-images-amazon.com/images/I/511L3hYCClL._AC_US40_.jpg",
  19. "",
  20. "",
  21. "",
  22. "",
  23. ""
  24. ]
  25. },
  26. {
  27. "id": 0,
  28. "images": [
  29. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  30. "https://images-na.ssl-images-amazon.com/images/I/511L3hYCClL._AC_US40_.jpg",
  31. "https://images-na.ssl-images-amazon.com/images/I/51kSO5K364L._AC_US40_.jpg",
  32. "",
  33. "",
  34. "",
  35. ""
  36. ]
  37. },
  38. {
  39. "id": 0,
  40. "images": [
  41. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  42. "https://images-na.ssl-images-amazon.com/images/I/511L3hYCClL._AC_US40_.jpg",
  43. "https://images-na.ssl-images-amazon.com/images/I/51kSO5K364L._AC_US40_.jpg",
  44. "https://images-na.ssl-images-amazon.com/images/I/61NvwkbuXUL._AC_US40_.jpg",
  45. "",
  46. "",
  47. ""
  48. ]
  49. },
  50. {
  51. "id": 0,
  52. "images": [
  53. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  54. "https://images-na.ssl-images-amazon.com/images/I/511L3hYCClL._AC_US40_.jpg",
  55. "https://images-na.ssl-images-amazon.com/images/I/51kSO5K364L._AC_US40_.jpg",
  56. "https://images-na.ssl-images-amazon.com/images/I/61NvwkbuXUL._AC_US40_.jpg",
  57. "https://images-na.ssl-images-amazon.com/images/I/51hwJpj4OgL._AC_US40_.jpg",
  58. "",
  59. ""
  60. ]
  61. },
  62. {
  63. "id": 0,
  64. "images": [
  65. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  66. "https://images-na.ssl-images-amazon.com/images/I/511L3hYCClL._AC_US40_.jpg",
  67. "https://images-na.ssl-images-amazon.com/images/I/51kSO5K364L._AC_US40_.jpg",
  68. "https://images-na.ssl-images-amazon.com/images/I/61NvwkbuXUL._AC_US40_.jpg",
  69. "https://images-na.ssl-images-amazon.com/images/I/51hwJpj4OgL._AC_US40_.jpg",
  70. "https://images-na.ssl-images-amazon.com/images/I/51dz9PNWVrL._AC_US40_.jpg",
  71. ""
  72. ]
  73. }
  74. ]

I need JSON result like this;

  1. [
  2. {
  3. "id": 0,
  4. "images": [
  5. "https://images-na.ssl-images-amazon.com/images/I/41EKkSQCU-L._AC_US40_.jpg",
  6. "https://images-na.ssl-images-amazon.com/images/I/511L3hYCClL._AC_US40_.jpg",
  7. "https://images-na.ssl-images-amazon.com/images/I/51kSO5K364L._AC_US40_.jpg",
  8. "https://images-na.ssl-images-amazon.com/images/I/61NvwkbuXUL._AC_US40_.jpg",
  9. "https://images-na.ssl-images-amazon.com/images/I/51hwJpj4OgL._AC_US40_.jpg",
  10. "https://images-na.ssl-images-amazon.com/images/I/51dz9PNWVrL._AC_US40_.jpg",
  11. ]
  12. }
  13. ]

答案1

得分: 2

我很难理解你想要做什么,但是这是我想出来的代码:

  1. package main
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "io/ioutil"
  6. "log"
  7. "os"
  8. "strings"
  9. "time"
  10. "github.com/gocolly/colly"
  11. )
  12. type Info struct {
  13. ID int `json:"id"`
  14. Images []string `json:"images"`
  15. }
  16. func main() {
  17. allInfos := new(Info)
  18. start := time.Now()
  19. co := colly.NewCollector(
  20. colly.AllowedDomains("www.amazon.com", "amazon.com"),
  21. )
  22. // 获取图片
  23. co.OnHTML("img[src]", func(e *colly.HTMLElement) {
  24. imgsrc := e.Attr("src")
  25. imgclass := e.Attr("class")
  26. if strings.HasPrefix(imgsrc, "https://images-na.ssl-images-amazon.com/images/I/") && imgclass == "" {
  27. fmt.Printf("The Array %d %v\n", len(allInfos.Images), imgsrc)
  28. allInfos.Images = append(allInfos.Images, imgsrc)
  29. co.Visit(e.Request.AbsoluteURL(imgsrc))
  30. }
  31. })
  32. co.Visit("https://www.amazon.com/Bluetooth-FM-Transmitter-Compatible-Smartphones/dp/B088TCSH8T/ref=sr_1_1_sspa?dchild=1&keywords=transmitter&qid=1623860482&sr=8-1-spons&psc=1&smid=A2XMGHKVCX40WB&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUFGVEgxTjJLRFdLSkUmZW5jcnlwdGVkSWQ9QTAyNDE0ODkyRlNDWlAzUktPQzJSJmVuY3J5cHRlZEFkSWQ9QTA5MTkwNjgyWVUzQ0RUMTBCTEFFJndpZGdldE5hbWU9c3BfYXRmJmFjdGlvbj1jbGlja1JlZGlyZWN0JmRvTm90TG9nQ2xpY2s9dHJ1ZQ==WDY4VktWQlImd2lkZ2V0TmFtZT1zcF9kZXRhaWwmYWN0aW9uPWNsaWNrUmVkaXJlY3QmZG9Ob3RMb2dDbGljaz10cnVl")
  33. co.OnRequest(func(r *colly.Request) {
  34. fmt.Println("Visiting: ", r.URL.String())
  35. })
  36. enc := json.NewEncoder(os.Stdout)
  37. enc.SetIndent("", " ")
  38. enc.Encode(allInfos)
  39. writeJson(allInfos)
  40. duration := time.Since(start)
  41. fmt.Println(duration.Seconds())
  42. }
  43. func writeJson(data *Info) {
  44. dataFile, err := json.MarshalIndent(data, "", " ")
  45. if err != nil {
  46. log.Println("Could not create JSON", err)
  47. }
  48. ioutil.WriteFile("stocky.json", dataFile, 0666)
  49. }

根据@Flimzy的建议,我只将新的imagesrc附加到了一个Info结构的实例中。看起来你期望的输出是一个长度为1的切片,其中包含所有的图片。

我还使用了strings.HasPrefix来简化你的切片检查。这是等效的,但我认为它更清晰,所以这是一个可选的更改。

如果你有更多问题,请告诉我。

编辑,如果你需要继续使用切片,下面的代码可能会起作用。根据你的问题,很难确定何时需要清除一个新的Info:

  1. package main
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "io/ioutil"
  6. "log"
  7. "os"
  8. "strings"
  9. "time"
  10. "github.com/gocolly/colly"
  11. )
  12. type Info struct {
  13. ID int `json:"id"`
  14. Images []string `json:"images"`
  15. }
  16. func main() {
  17. allInfos := make([]Info, 1)
  18. start := time.Now()
  19. co := colly.NewCollector(
  20. colly.AllowedDomains("www.amazon.com", "amazon.com"),
  21. )
  22. // 获取图片
  23. co.OnHTML("img[src]", func(e *colly.HTMLElement) {
  24. imgsrc := e.Attr("src")
  25. imgclass := e.Attr("class")
  26. if strings.HasPrefix(imgsrc, "https://images-na.ssl-images-amazon.com/images/I/") && imgclass == "" {
  27. fmt.Printf("The Array %d %v\n", len(allInfos[0].Images), imgsrc)
  28. allInfos[0].Images = append(allInfos[0].Images, imgsrc)
  29. co.Visit(e.Request.AbsoluteURL(imgsrc))
  30. }
  31. })
  32. co.Visit("https://www.amazon.com/Bluetooth-FM-Transmitter-Compatible-Smartphones/dp/B088TCSH8T/ref=sr_1_1_sspa?dchild=1&keywords=transmitter&qid=1623860482&sr=8-1-spons&psc=1&smid=A2XMGHKVCX40WB&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUFGVEgxTjJLRFdLSkUmZW5jcnlwdGVkSWQ9QTAyNDE0ODkyRlNDWlAzUktPQzJSJmVuY3J5cHRlZEFkSWQ9QTA5MTkwNjgyWVUzQ0RUMTBCTEFFJndpZGdldE5hbWU9c3BfYXRmJmFjdGlvbj1jbGlja1JlZGlyZWN0JmRvTm90TG9nQ2xpY2s9dHJ1ZQ==WDY4VktWQlImd2lkZ2V0TmFtZT1zcF9kZXRhaWwmYWN0aW9uPWNsaWNrUmVkaXJlY3QmZG9Ob3RMb2dDbGljaz10cnVl")
  33. co.OnRequest(func(r *colly.Request) {
  34. fmt.Println("Visiting: ", r.URL.String())
  35. })
  36. enc := json.NewEncoder(os.Stdout)
  37. enc.SetIndent("", " ")
  38. enc.Encode(allInfos)
  39. writeJson(allInfos)
  40. duration := time.Since(start)
  41. fmt.Println(duration.Seconds())
  42. }
  43. func writeJson(data []Info) {
  44. dataFile, err := json.MarshalIndent(data, "", " ")
  45. if err != nil {
  46. log.Println("Could not create JSON", err)
  47. }
  48. ioutil.WriteFile("stocky.json", dataFile, 0666)
  49. }

希望能对你有所帮助!

英文:

I had a hard time understanding what you wanted to do but here's what I came up with:

  1. package main
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "io/ioutil"
  6. "log"
  7. "os"
  8. "strings"
  9. "time"
  10. "github.com/gocolly/colly"
  11. )
  12. type Info struct {
  13. ID int `json:"id"`
  14. Images []string `json:"images"`
  15. }
  16. func main() {
  17. allInfos := new(Info)
  18. start := time.Now()
  19. co := colly.NewCollector(
  20. colly.AllowedDomains("www.amazon.com", "amazon.com"),
  21. )
  22. // GET Images
  23. co.OnHTML("img[src]", func(e *colly.HTMLElement) {
  24. imgsrc := e.Attr("src")
  25. imgclass := e.Attr("class")
  26. if strings.HasPrefix(imgsrc, "https://images-na.ssl-images-amazon.com/images/I/") && imgclass == "" {
  27. fmt.Printf("The Array %d %v", len(allInfos.Images), imgsrc+"\n")
  28. allInfos.Images = append(allInfos.Images, imgsrc)
  29. co.Visit(e.Request.AbsoluteURL(imgsrc))
  30. }
  31. })
  32. co.Visit("https://www.amazon.com/Bluetooth-FM-Transmitter-Compatible-Smartphones/dp/B088TCSH8T/ref=sr_1_1_sspa?dchild=1&keywords=transmitter&qid=1623860482&sr=8-1-spons&psc=1&smid=A2XMGHKVCX40WB&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUFGVEgxTjJLRFdLSkUmZW5jcnlwdGVkSWQ9QTAyNDE0ODkyRlNDWlAzUktPQzJSJmVuY3J5cHRlZEFkSWQ9QTA5MTkwNjgyWVUzQ0RUMTBCTEFFJndpZGdldE5hbWU9c3BfYXRmJmFjdGlvbj1jbGlja1JlZGlyZWN0JmRvTm90TG9nQ2xpY2s9dHJ1ZQ==WDY4VktWQlImd2lkZ2V0TmFtZT1zcF9kZXRhaWwmYWN0aW9uPWNsaWNrUmVkaXJlY3QmZG9Ob3RMb2dDbGljaz10cnVl")
  33. co.OnRequest(func(r *colly.Request) {
  34. fmt.Println("Visiting: ", r.URL.String())
  35. })
  36. enc := json.NewEncoder(os.Stdout)
  37. enc.SetIndent("", " ")
  38. enc.Encode(allInfos)
  39. writeJson(allInfos)
  40. duration := time.Since(start)
  41. fmt.Println(duration.Seconds())
  42. }
  43. func writeJson(data *Info) {
  44. dataFile, err := json.MarshalIndent(data, "", " ")
  45. if err != nil {
  46. log.Println("Could not create JSON", err)
  47. }
  48. ioutil.WriteFile("stocky.json", dataFile, 0666)
  49. }

As @Flimzy suggested, I appended only the new imagesrc to a since instance of an info struct. Seeing as it seemed like your expected output was a slice of length 1 with all the images.

I also used a strings.HasPrefix to clean up your slice check. This is equivalent but I think it's cleaner, so that's an optional change.

Let me know if you have any more questions.

EDIT, if you need to keep using a slice something like the following might work. From your question it's hard to tell when you need to clear a new Info:

  1. package main
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "io/ioutil"
  6. "log"
  7. "os"
  8. "strings"
  9. "time"
  10. "github.com/gocolly/colly"
  11. )
  12. type Info struct {
  13. ID int `json:"id"`
  14. Images []string `json:"images"`
  15. }
  16. func main() {
  17. allInfos := make([]Info, 1)
  18. start := time.Now()
  19. co := colly.NewCollector(
  20. colly.AllowedDomains("www.amazon.com", "amazon.com"),
  21. )
  22. // GET Images
  23. co.OnHTML("img[src]", func(e *colly.HTMLElement) {
  24. imgsrc := e.Attr("src")
  25. imgclass := e.Attr("class")
  26. if strings.HasPrefix(imgsrc, "https://images-na.ssl-images-amazon.com/images/I/") && imgclass == "" {
  27. fmt.Printf("The Array %d %v", len(allInfos[0].Images), imgsrc+"\n")
  28. allInfos[0].Images = append(allInfos[0].Images, imgsrc)
  29. co.Visit(e.Request.AbsoluteURL(imgsrc))
  30. }
  31. })
  32. co.Visit("https://www.amazon.com/Bluetooth-FM-Transmitter-Compatible-Smartphones/dp/B088TCSH8T/ref=sr_1_1_sspa?dchild=1&keywords=transmitter&qid=1623860482&sr=8-1-spons&psc=1&smid=A2XMGHKVCX40WB&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUFGVEgxTjJLRFdLSkUmZW5jcnlwdGVkSWQ9QTAyNDE0ODkyRlNDWlAzUktPQzJSJmVuY3J5cHRlZEFkSWQ9QTA5MTkwNjgyWVUzQ0RUMTBCTEFFJndpZGdldE5hbWU9c3BfYXRmJmFjdGlvbj1jbGlja1JlZGlyZWN0JmRvTm90TG9nQ2xpY2s9dHJ1ZQ==WDY4VktWQlImd2lkZ2V0TmFtZT1zcF9kZXRhaWwmYWN0aW9uPWNsaWNrUmVkaXJlY3QmZG9Ob3RMb2dDbGljaz10cnVl")
  33. co.OnRequest(func(r *colly.Request) {
  34. fmt.Println("Visiting: ", r.URL.String())
  35. })
  36. enc := json.NewEncoder(os.Stdout)
  37. enc.SetIndent("", " ")
  38. enc.Encode(allInfos)
  39. writeJson(allInfos)
  40. duration := time.Since(start)
  41. fmt.Println(duration.Seconds())
  42. }
  43. func writeJson(data []Info) {
  44. dataFile, err := json.MarshalIndent(data, "", " ")
  45. if err != nil {
  46. log.Println("Could not create JSON", err)
  47. }
  48. ioutil.WriteFile("stocky.json", dataFile, 0666)
  49. }

huangapple
  • 本文由 发表于 2021年6月18日 22:30:56
  • 转载请务必保留本文链接:https://go.coder-hub.com/68037007.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定