英文:
How can I write one after another JSON data
问题
我正在开发一个网站爬虫。我每次只能发送一个JSON数据到JSON文件中。我想要将多个JSON数据按顺序写入同一个JSON文件中,以便保留数百个数据。像这样:
[
{
"id": 1321931,
"name": "Mike"
},
{
"id": 32139219,
"name": "Melissa"
},
{
"id": 8421921,
"name": "Jordan"
},
{
"id": 4291901,
"name": "David"
}
]
但实际输出结果是这样的。当我发送新数据时,只有第一个JSON数据会更新。
[
{
"id": 1,
"name": "Mike"
}
]
以下是代码:
package main
import (
"encoding/json"
"fmt"
"html/template"
"io/ioutil"
"log"
"math/rand"
"net/http"
"os"
"strings"
"github.com/gocolly/colly"
)
type Info struct {
ID int `json:"id"`
Name string `json:"name"`
}
var tpl *template.Template
var name string
var stonf Info
var allInfos []Info
var id int
var co = colly.NewCollector()
func main() {
fmt.Println("Started...")
allInfos = make([]Info, 1)
id = rand.Intn((99999 - 10000) + 10000)
// 从JSON文件中读取数据
data, err := ioutil.ReadFile("stocky.json")
if err != nil {
fmt.Println("ERROR 1 JSON", err)
}
// 反序列化JSON数据
var d []Info
err = json.Unmarshal([]byte(data), &d)
if err != nil {
fmt.Println(err)
}
tpl, _ = tpl.ParseGlob("templates/*.html")
http.HandleFunc("/mete", hellloHandleFunc)
staticHandler := http.FileServer(http.Dir("./css/"))
http.Handle("/css/", http.StripPrefix("/css", staticHandler))
http.ListenAndServe("localhost:8080", nil)
}
func hellloHandleFunc(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
log.Fatal(err)
}
allInfos[0].ID = id // JSON-PRO
// 获取价格
co.OnHTML("div#dp", func(p *colly.HTMLElement) {
name = p.ChildText("h1#title")
})
requestLink := strings.TrimSpace(r.FormValue("input-link"))
co.Visit(requestLink)
// 第一个数据的JSON
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(allInfos)
stonf = Info{
Name: name,
}
fmt.Println("Index Running")
tpl.ExecuteTemplate(w, "form-copy.html", stonf)
}
func writeJson(data []Info) {
dataFile, err := json.MarshalIndent(data, "", " ")
if err != nil {
log.Println("Could not create JSON", err)
}
ioutil.WriteFile("stocky.json", dataFile, 0666)
}
英文:
I am working on a website scraper. I can send only 1 JSON data to JSON file regularly. I want to write one after another JSON data, so I need to keep hundreds of data in a single JSON file. like this
[
{
"id": 1321931,
"name": "Mike"
},
{
"id": 32139219,
"name": "Melissa"
},
{
"id": 8421921,
"name": "Jordan"
},
{
"id": 4291901,
"name": "David"
}
]
but output like this. When I send new data, just the first JSON data update itself.
[
{
"id": 1,
"name": "Mike"
}
]
here is the code:
package main
import (
"encoding/json"
"fmt"
"html/template"
"io/ioutil"
"log"
"math/rand"
"net/http"
"os"
"strings"
"github.com/gocolly/colly"
)
type Info struct {
ID int `json:"id"`
Name string `json:"name"`
}
var tpl *template.Template
var name string
var stonf Info
var allInfos []Info
var id int
var co = colly.NewCollector()
func main() {
fmt.Println("Started...")
allInfos = make([]Info, 1)
id = rand.Intn((99999 - 10000) + 10000)
// Reading Data From Json
data, err := ioutil.ReadFile("stocky.json")
if err != nil {
fmt.Println("ERROR 1 JSON", err)
}
// Unmarshal JSON data
var d []Info
err = json.Unmarshal([]byte(data), &d)
if err != nil {
fmt.Println(err)
}
tpl, _ = tpl.ParseGlob("templates/*.html")
http.HandleFunc("/mete", hellloHandleFunc)
staticHandler := http.FileServer(http.Dir("./css/"))
http.Handle("/css/", http.StripPrefix("/css", staticHandler))
http.ListenAndServe("localhost:8080", nil)
}
func hellloHandleFunc(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
log.Fatal(err)
}
allInfos[0].ID = id // JSON-PRO
// GET Price - Fiyat GETİR
co.OnHTML("div#dp", func(p *colly.HTMLElement) {
name = p.ChildText("h1#title")
})
requestLink := strings.TrimSpace(r.FormValue("input-link"))
co.Visit(requestLink)
// FIRST DATA JSON
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(allInfos)
stonf = Info{
Name: name,
}
fmt.Println("Index Running")
tpl.ExecuteTemplate(w, "form-copy.html", stonf)
}
func writeJson(data []Info) {
dataFile, err := json.MarshalIndent(data, "", " ")
if err != nil {
log.Println("Could not create JSON", err)
}
ioutil.WriteFile("stocky.json", dataFile, 0666)
}
答案1
得分: 1
这是一个将新的Info
附加到列表并存储在文件中的解决方案。
该解决方案仅适用于相对较小的列表。对于大型列表,每次写入整个文件的开销可能太高。在这种情况下,我建议将格式更改为ndjson。它将允许仅写入当前的Info
结构,而不是整个列表。
我还添加了同步机制,以避免同时发送多个HTTP请求时出现竞争条件。
我假设标识符必须针对每个请求单独生成,并且如果发生冲突不是问题。
package main
import (
"encoding/json"
"fmt"
"html/template"
"io/ioutil"
"log"
"math/rand"
"net/http"
"os"
"strings"
"sync"
"github.com/gocolly/colly"
)
type (
Info struct {
ID int `json:"id"`
Name string `json:"name"`
}
Infos struct {
List []Info
sync.Mutex
}
)
var (
infos *Infos
tpl *template.Template
co = colly.NewCollector()
)
func main() {
fmt.Println("Started...")
var err error
infos, err = readInfos()
if err != nil {
log.Fatal(err)
}
tpl, _ = tpl.ParseGlob("templates/*.html")
http.HandleFunc("/mete", hellloHandleFunc)
staticHandler := http.FileServer(http.Dir("./css/"))
http.Handle("/css/", http.StripPrefix("/css", staticHandler))
if err := http.ListenAndServe("localhost:8080", nil); err != nil {
log.Fatal(err)
}
}
func hellloHandleFunc(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
log.Fatal(err)
}
stonf := Info{
ID: rand.Intn((99999 - 10000) + 10000),
}
// GET Price - Fiyat GETİR
co.OnHTML("div#dp", func(p *colly.HTMLElement) {
stonf.Name = p.ChildText("h1#title")
})
requestLink := strings.TrimSpace(r.FormValue("input-link"))
if err := co.Visit(requestLink); err != nil {
log.Fatal(err)
}
if err := infos.AppendAndWrite(stonf); err != nil {
log.Fatal(err)
}
// FIRST DATA JSON
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(stonf)
fmt.Println("Index Running")
tpl.ExecuteTemplate(w, "form-copy.html", stonf)
}
func readInfos() (*Infos, error) {
// Reading Data From Json
data, err := ioutil.ReadFile("stocky.json")
if err != nil {
return nil, err
}
var r []Info
// Unmarshal JSON data
err = json.Unmarshal([]byte(data), &r)
if err != nil {
return nil, err
}
return &Infos{List: r}, nil
}
func (i *Infos) AppendAndWrite(info Info) error {
i.Lock()
defer i.Unlock()
i.List = append(i.List, info)
if err := i.storeLocked(); err != nil {
return fmt.Errorf("storing info list failed: %w", err)
}
return nil
}
func (i *Infos) storeLocked() error {
dataFile, err := json.MarshalIndent(i.List, "", " ")
if err != nil {
return fmt.Errorf("could not marshal infos JSON: %w", err)
}
err = ioutil.WriteFile("stocky.json", dataFile, 0666)
if err != nil {
return fmt.Errorf("could not write 'stocky.json' file: %w", err)
}
return nil
}
英文:
Here is a solution which appends new Info
to the list and store in file.
The solution will perform properly only for relatively small list. For large lists, the overhead of writing the entire file each time may be too high. In such case i propose to change the format to ndjson. It will allow to write only the current Info
struct instead of the whole list.
I've also added synchronization mechanism to avoid race conditions in case you send multiple HTTP requests at the same time.
I assumed that the identifier must be generated separately for each request, and it is not a problem if collision occur.
package main
import (
"encoding/json"
"fmt"
"html/template"
"io/ioutil"
"log"
"math/rand"
"net/http"
"os"
"strings"
"sync"
"github.com/gocolly/colly"
)
type (
Info struct {
ID int `json:"id"`
Name string `json:"name"`
}
Infos struct {
List []Info
sync.Mutex
}
)
var (
infos *Infos
tpl *template.Template
co = colly.NewCollector()
)
func main() {
fmt.Println("Started...")
var err error
infos, err = readInfos()
if err != nil {
log.Fatal(err)
}
tpl, _ = tpl.ParseGlob("templates/*.html")
http.HandleFunc("/mete", hellloHandleFunc)
staticHandler := http.FileServer(http.Dir("./css/"))
http.Handle("/css/", http.StripPrefix("/css", staticHandler))
if err := http.ListenAndServe("localhost:8080", nil); err != nil {
log.Fatal(err)
}
}
func hellloHandleFunc(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
log.Fatal(err)
}
stonf := Info{
ID: rand.Intn((99999 - 10000) + 10000),
}
// GET Price - Fiyat GETİR
co.OnHTML("div#dp", func(p *colly.HTMLElement) {
stonf.Name = p.ChildText("h1#title")
})
requestLink := strings.TrimSpace(r.FormValue("input-link"))
if err := co.Visit(requestLink); err != nil {
log.Fatal(err)
}
if err := infos.AppendAndWrite(stonf); err != nil {
log.Fatal(err)
}
// FIRST DATA JSON
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(stonf)
fmt.Println("Index Running")
tpl.ExecuteTemplate(w, "form-copy.html", stonf)
}
func readInfos() (*Infos, error) {
// Reading Data From Json
data, err := ioutil.ReadFile("stocky.json")
if err != nil {
return nil, err
}
var r []Info
// Unmarshal JSON data
err = json.Unmarshal([]byte(data), &r)
if err != nil {
return nil, err
}
return &Infos{List: r}, nil
}
func (i *Infos) AppendAndWrite(info Info) error {
i.Lock()
defer i.Unlock()
i.List = append(i.List, info)
if err := i.storeLocked(); err != nil {
return fmt.Errorf("storing info list failed: %w", err)
}
return nil
}
func (i *Infos) storeLocked() error {
dataFile, err := json.MarshalIndent(i.List, "", " ")
if err != nil {
return fmt.Errorf("could not marshal infos JSON: %w", err)
}
err = ioutil.WriteFile("stocky.json", dataFile, 0666)
if err != nil {
return fmt.Errorf("could not write 'stocky.json' file: %w", err)
}
return nil
}
答案2
得分: 1
有一个名为JSON lines的标准(https://jsonlines.org/),它每行只包含一个JSON,而不是将所有内容都包装在一个JSON数组中。
Go标准库中的JSON库在处理JSON lines时非常有效,无论是读取还是写入。
写入多个JSON(每行一个):
e := json.NewEncoder(yourWriterFile)
e.Encode(object1)
e.Encode(object2)
//...
读取多个JSON(每行一个或连在一起):
d := json.NewDecoder(yourReaderFile)
d.Decode(&object1)
d.Decode(&object2)
//...
更多信息请参考:https://pkg.go.dev/encoding/json
英文:
There is a standard called JSON lines (https://jsonlines.org/) consisting on only one JSON per line instead of wrapping all in a JSON array.
JSON library from Go stdlib works pretty well with JSON lines on both cases, reading and writing.
Write multiple JSON (one per line):
e := json.NewEncoder(yourWriterFile)
e.Encode(object1)
e.Encode(object2)
//...
Read multiple JSON (one per line or concatenated):
d := json.NewDecoder(yourReaderFile)
d.Decode(&object1)
d.Decode(&object2)
//...
More info: https://pkg.go.dev/encoding/json
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论