如何连续写入多个JSON数据?

huangapple go评论74阅读模式
英文:

How can I write one after another JSON data

问题

我正在开发一个网站爬虫。我每次只能发送一个JSON数据到JSON文件中。我想要将多个JSON数据按顺序写入同一个JSON文件中,以便保留数百个数据。像这样:

[
 {
  "id": 1321931,
  "name": "Mike"
 },
 {
    "id": 32139219,
    "name": "Melissa"
 },
 {
    "id": 8421921,
    "name": "Jordan"
 },
 {
    "id": 4291901,
    "name": "David"
 }
]

但实际输出结果是这样的。当我发送新数据时,只有第一个JSON数据会更新。

[
 {
  "id": 1,
  "name": "Mike"
 }
]

以下是代码:

package main

import (
    "encoding/json"
    "fmt"
    "html/template"
    "io/ioutil"
    "log"
    "math/rand"
    "net/http"
    "os"
    "strings"

    "github.com/gocolly/colly"
)

type Info struct {
    ID   int    `json:"id"`
    Name string `json:"name"`
}

var tpl *template.Template
var name string
var stonf Info
var allInfos []Info
var id int

var co = colly.NewCollector()

func main() {

    fmt.Println("Started...")
    allInfos = make([]Info, 1)

    id = rand.Intn((99999 - 10000) + 10000)

    // 从JSON文件中读取数据
    data, err := ioutil.ReadFile("stocky.json")
    if err != nil {
        fmt.Println("ERROR 1 JSON", err)
    }
    // 反序列化JSON数据
    var d []Info
    err = json.Unmarshal([]byte(data), &d)
    if err != nil {
        fmt.Println(err)
    }

    tpl, _ = tpl.ParseGlob("templates/*.html")
    http.HandleFunc("/mete", hellloHandleFunc)
    staticHandler := http.FileServer(http.Dir("./css/"))
    http.Handle("/css/", http.StripPrefix("/css", staticHandler))
    http.ListenAndServe("localhost:8080", nil)
}

func hellloHandleFunc(w http.ResponseWriter, r *http.Request) {

    err := r.ParseForm()
    if err != nil {
        log.Fatal(err)
    }

    allInfos[0].ID = id // JSON-PRO

    // 获取价格
    co.OnHTML("div#dp", func(p *colly.HTMLElement) {
        name = p.ChildText("h1#title")
    })
    requestLink := strings.TrimSpace(r.FormValue("input-link"))

    co.Visit(requestLink)

    // 第一个数据的JSON
    enc := json.NewEncoder(os.Stdout)
    enc.SetIndent("", " ")
    enc.Encode(allInfos)

    stonf = Info{
        Name: name,
    }

    fmt.Println("Index Running")
    tpl.ExecuteTemplate(w, "form-copy.html", stonf)
}

func writeJson(data []Info) {
    dataFile, err := json.MarshalIndent(data, "", " ")
    if err != nil {
        log.Println("Could not create JSON", err)
    }

    ioutil.WriteFile("stocky.json", dataFile, 0666)
}
英文:

I am working on a website scraper. I can send only 1 JSON data to JSON file regularly. I want to write one after another JSON data, so I need to keep hundreds of data in a single JSON file. like this

[
{
"id": 1321931,
"name": "Mike"
},
{
"id": 32139219,
"name": "Melissa"
},
{
"id": 8421921,
"name": "Jordan"
},
{
"id": 4291901,
"name": "David"
}
] 

but output like this. When I send new data, just the first JSON data update itself.

[
{
"id": 1,
"name": "Mike"
}
]

here is the code:

package main

import (
    "encoding/json"
    "fmt"
    "html/template"
    "io/ioutil"
    "log"
    "math/rand"
    "net/http"
    "os"
    "strings"

    "github.com/gocolly/colly"
)

type Info struct {
    ID   int    `json:"id"`
    Name string `json:"name"`
}

var tpl *template.Template
var name string
var stonf Info
var allInfos []Info
var id int

var co = colly.NewCollector()

func main() {

    fmt.Println("Started...")
    allInfos = make([]Info, 1)

    id = rand.Intn((99999 - 10000) + 10000)

    // Reading Data From Json
    data, err := ioutil.ReadFile("stocky.json")
    if err != nil {
        fmt.Println("ERROR 1 JSON", err)
    }
    // Unmarshal JSON data
    var d []Info
    err = json.Unmarshal([]byte(data), &d)
    if err != nil {
        fmt.Println(err)
    }

    tpl, _ = tpl.ParseGlob("templates/*.html")
    http.HandleFunc("/mete", hellloHandleFunc)
    staticHandler := http.FileServer(http.Dir("./css/"))
    http.Handle("/css/", http.StripPrefix("/css", staticHandler))
    http.ListenAndServe("localhost:8080", nil)
}

func hellloHandleFunc(w http.ResponseWriter, r *http.Request) {

    err := r.ParseForm()
    if err != nil {
        log.Fatal(err)
    }

    allInfos[0].ID = id // JSON-PRO

    // GET Price - Fiyat GETİR
    co.OnHTML("div#dp", func(p *colly.HTMLElement) {
        name = p.ChildText("h1#title")
    })
    requestLink := strings.TrimSpace(r.FormValue("input-link"))

    co.Visit(requestLink)

    // FIRST DATA JSON
    enc := json.NewEncoder(os.Stdout)
    enc.SetIndent("", " ")
    enc.Encode(allInfos)

    stonf = Info{
        Name: name,
    }

    fmt.Println("Index Running")
    tpl.ExecuteTemplate(w, "form-copy.html", stonf)
}

func writeJson(data []Info) {
    dataFile, err := json.MarshalIndent(data, "", " ")
    if err != nil {
        log.Println("Could not create JSON", err)
    }

    ioutil.WriteFile("stocky.json", dataFile, 0666)
}

答案1

得分: 1

这是一个将新的Info附加到列表并存储在文件中的解决方案。
该解决方案仅适用于相对较小的列表。对于大型列表,每次写入整个文件的开销可能太高。在这种情况下,我建议将格式更改为ndjson。它将允许仅写入当前的Info结构,而不是整个列表。
我还添加了同步机制,以避免同时发送多个HTTP请求时出现竞争条件。

我假设标识符必须针对每个请求单独生成,并且如果发生冲突不是问题。

package main

import (
	"encoding/json"
	"fmt"
	"html/template"
	"io/ioutil"
	"log"
	"math/rand"
	"net/http"
	"os"
	"strings"
	"sync"

	"github.com/gocolly/colly"
)

type (
	Info struct {
		ID   int    `json:"id"`
		Name string `json:"name"`
	}
	Infos struct {
		List []Info
		sync.Mutex
	}
)

var (
	infos *Infos
	tpl   *template.Template
	co    = colly.NewCollector()
)

func main() {

	fmt.Println("Started...")

	var err error
	infos, err = readInfos()
	if err != nil {
		log.Fatal(err)
	}

	tpl, _ = tpl.ParseGlob("templates/*.html")
	http.HandleFunc("/mete", hellloHandleFunc)
	staticHandler := http.FileServer(http.Dir("./css/"))
	http.Handle("/css/", http.StripPrefix("/css", staticHandler))
	if err := http.ListenAndServe("localhost:8080", nil); err != nil {
		log.Fatal(err)
	}
}

func hellloHandleFunc(w http.ResponseWriter, r *http.Request) {

	err := r.ParseForm()
	if err != nil {
		log.Fatal(err)
	}
	stonf := Info{
		ID: rand.Intn((99999 - 10000) + 10000),
	}
	// GET Price - Fiyat GETİR
	co.OnHTML("div#dp", func(p *colly.HTMLElement) {
		stonf.Name = p.ChildText("h1#title")
	})
	requestLink := strings.TrimSpace(r.FormValue("input-link"))

	if err := co.Visit(requestLink); err != nil {
		log.Fatal(err)
	}

	if err := infos.AppendAndWrite(stonf); err != nil {
		log.Fatal(err)
	}

	// FIRST DATA JSON
	enc := json.NewEncoder(os.Stdout)
	enc.SetIndent("", " ")
	enc.Encode(stonf)

	fmt.Println("Index Running")
	tpl.ExecuteTemplate(w, "form-copy.html", stonf)
}

func readInfos() (*Infos, error) {
	// Reading Data From Json
	data, err := ioutil.ReadFile("stocky.json")
	if err != nil {
		return nil, err
	}
	var r []Info
	// Unmarshal JSON data
	err = json.Unmarshal([]byte(data), &r)
	if err != nil {
		return nil, err
	}
	return &Infos{List: r}, nil
}

func (i *Infos) AppendAndWrite(info Info) error {
	i.Lock()
	defer i.Unlock()
	i.List = append(i.List, info)
	if err := i.storeLocked(); err != nil {
		return fmt.Errorf("storing info list failed: %w", err)
	}
	return nil
}

func (i *Infos) storeLocked() error {
	dataFile, err := json.MarshalIndent(i.List, "", " ")
	if err != nil {
		return fmt.Errorf("could not marshal infos JSON: %w", err)
	}
	err = ioutil.WriteFile("stocky.json", dataFile, 0666)
	if err != nil {
		return fmt.Errorf("could not write 'stocky.json' file: %w", err)
	}
	return nil
}
英文:

Here is a solution which appends new Info to the list and store in file.
The solution will perform properly only for relatively small list. For large lists, the overhead of writing the entire file each time may be too high. In such case i propose to change the format to ndjson. It will allow to write only the current Info struct instead of the whole list.
I've also added synchronization mechanism to avoid race conditions in case you send multiple HTTP requests at the same time.

I assumed that the identifier must be generated separately for each request, and it is not a problem if collision occur.

package main

import (
	"encoding/json"
	"fmt"
	"html/template"
	"io/ioutil"
	"log"
	"math/rand"
	"net/http"
	"os"
	"strings"
	"sync"

	"github.com/gocolly/colly"
)

type (
	Info struct {
		ID   int    `json:"id"`
		Name string `json:"name"`
	}
	Infos struct {
		List []Info
		sync.Mutex
	}
)

var (
	infos *Infos
	tpl *template.Template
	co = colly.NewCollector()
)

func main() {

	fmt.Println("Started...")

	var err error
	infos, err = readInfos()
	if err != nil {
		log.Fatal(err)
	}

	tpl, _ = tpl.ParseGlob("templates/*.html")
	http.HandleFunc("/mete", hellloHandleFunc)
	staticHandler := http.FileServer(http.Dir("./css/"))
	http.Handle("/css/", http.StripPrefix("/css", staticHandler))
	if err := http.ListenAndServe("localhost:8080", nil); err != nil {
		log.Fatal(err)
	}
}

func hellloHandleFunc(w http.ResponseWriter, r *http.Request) {

	err := r.ParseForm()
	if err != nil {
		log.Fatal(err)
	}
	stonf := Info{
		ID: rand.Intn((99999 - 10000) + 10000),
	}
	// GET Price - Fiyat GETİR
	co.OnHTML("div#dp", func(p *colly.HTMLElement) {
		stonf.Name = p.ChildText("h1#title")
	})
	requestLink := strings.TrimSpace(r.FormValue("input-link"))

	if err := co.Visit(requestLink); err != nil {
		log.Fatal(err)
	}

	if err := infos.AppendAndWrite(stonf); err != nil {
		log.Fatal(err)
	}

	// FIRST DATA JSON
	enc := json.NewEncoder(os.Stdout)
	enc.SetIndent("", " ")
	enc.Encode(stonf)

	fmt.Println("Index Running")
	tpl.ExecuteTemplate(w, "form-copy.html", stonf)
}

func readInfos() (*Infos, error) {
	// Reading Data From Json
	data, err := ioutil.ReadFile("stocky.json")
	if err != nil {
		return nil, err
	}
	var r []Info
	// Unmarshal JSON data
	err = json.Unmarshal([]byte(data), &r)
	if err != nil {
		return nil, err
	}
	return &Infos{List: r}, nil
}

func (i *Infos) AppendAndWrite(info Info) error {
	i.Lock()
	defer i.Unlock()
	i.List = append(i.List, info)
	if err := i.storeLocked(); err != nil {
		return fmt.Errorf("storing info list failed: %w", err)
	}
	return nil
}

func (i *Infos) storeLocked() error {
	dataFile, err := json.MarshalIndent(i.List, "", " ")
	if err != nil {
		return fmt.Errorf("could not marshal infos JSON: %w", err)
	}
	err = ioutil.WriteFile("stocky.json", dataFile, 0666)
	if err != nil {
		return fmt.Errorf("could not write 'stocky.json' file: %w", err)
	}
	return nil
}

答案2

得分: 1

有一个名为JSON lines的标准(https://jsonlines.org/),它每行只包含一个JSON,而不是将所有内容都包装在一个JSON数组中。
Go标准库中的JSON库在处理JSON lines时非常有效,无论是读取还是写入。

写入多个JSON(每行一个):

e := json.NewEncoder(yourWriterFile)
e.Encode(object1)
e.Encode(object2)
//...

读取多个JSON(每行一个或连在一起):

d := json.NewDecoder(yourReaderFile)
d.Decode(&object1)
d.Decode(&object2)
//...

更多信息请参考:https://pkg.go.dev/encoding/json

英文:

There is a standard called JSON lines (https://jsonlines.org/) consisting on only one JSON per line instead of wrapping all in a JSON array.
JSON library from Go stdlib works pretty well with JSON lines on both cases, reading and writing.

Write multiple JSON (one per line):

e := json.NewEncoder(yourWriterFile)
e.Encode(object1)
e.Encode(object2)
//...

Read multiple JSON (one per line or concatenated):

d := json.NewDecoder(yourReaderFile)
d.Decode(&object1)
d.Decode(&object2)
//...

More info: https://pkg.go.dev/encoding/json

huangapple
  • 本文由 发表于 2021年7月18日 21:13:37
  • 转载请务必保留本文链接:https://go.coder-hub.com/68429426.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定