英文:
Write gzipped Json objects into a Json file without loading it
问题
我要将字典以gzipped json对象的形式写入一个json文件。
我有一些解决方案,但随着文件变得越来越大,附加过程变得越来越慢。因此,加载文件不是解决办法。
我在这里找到了解决方案:
def append_record_seek(data,filename):
print('append_record_seek started with data:{data} filename:{filename}')
with open(filename, mode="r+") as file:
file.seek(os.stat(filename).st_size - 1)
file.write(",{}".format(json.dumps(data)))
后来,我想要将该文件读取为一个字典列表。
以下是我的最小代码示例:
import global_variables as gv
import time
import json as json
import base64
import io
import sys
import cv2
import gzip
import numpy as np
import os
from numpy import asarray
from json import JSONEncoder
data = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
path = r'C:/Videotool/Data'
name = 'test'
filename = path + '/' + name + '.json'
isExist = os.path.exists(path)
if not isExist:
os.makedirs(path)
os.chdir(path)
def first_writer(data, filename):
print(f'first_writer started with data:{data} filename:{filename}')
with open(filename, 'w') as file:
file.write('[')
file.write(json.dumps(data))
file.write(',')
file.write(']')
def append_record_seek(data, filename):
print('append_record_seek started with data:{data} filename:{filename}')
with open(filename, mode="r+") as file:
file.seek(os.stat(filename).st_size - 1)
file.write(",{}".format(json.dumps(data))
for x in range(6):
print(f'step:{x}')
file_exists = os.path.exists(name+'.json')
if file_exists:
print('file_exists')
append_record_seek(data, filename)
else:
print('else')
first_writer(data, filename)
非压缩的结果应该如下所示:
[{"brand": "Ford", "model": "Mustang", "year": 1964},
{"brand": "Ford", "model": "Mustang", "year": 1964},
{"brand": "Ford", "model": "Mustang", "year": 1964},
{"brand": "Ford", "model": "Mustang", "year": 1964},
{"brand": "Ford", "model": "Mustang", "year": 1964}]
我的结果是:
[{"brand": "Ford", "model": "Mustang", "year": 1964},,,,,,]
如果这样可以工作,我希望在写入之前对数据进行压缩。
我希望有人可以帮助。更新:
我已经获得了正确的JSON格式:
def first_writer(data, filename):
print(f'first_writer started with data:{data} filename:{filename}')
with open(filename, 'w') as file:
file.write("[{}]".format(json.dumps(data)))
def append_record_seek(data, filename):
print('append_record_seek started with data:{data} filename:{filename}')
with open(filename, mode="r+") as file:
file.seek(os.stat(filename).st_size - 1)
file.write(",{}]".format(json.dumps(data))
现在我需要对数据进行压缩。
英文:
I want to write dicts as gzipped json objects into a json file.
I had some solutions, but as the file got bigger the appending process got slower and slower.
So loading the file was not the way.
I found the solution here with:
def append_record_seek(data,filename):
print('append_record_seek started with data:{data} filename:{filename}')
with open (filename, mode="r+") as file:
file.seek(os.stat(filename).st_size -1)
file.write( ",]".format(json.dumps(data)) )
Later i want to read that file as a list of dicts.
Here is my minimal Code example:
import global_variables as gv
import time
import json as json
import base64
import io
import sys
import cv2
import gzip
import numpy as np
import os
from numpy import asarray
from json import JSONEncoder
data = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
path = r'C:/Videotool/Data'
name = 'test'
filename = path + '/' + name + '.json'
isExist = os.path.exists(path)
if not isExist:
os.makedirs(path)
os.chdir(path)
def first_writer(data,filename):
print(f'first_writer started with data:{data} filename:{filename}')
with open (filename, 'w') as file:
file.write('[')
file.write(json.dumps(data))
file.write(',')
file.write(']')
def append_record_seek(data,filename):
print('append_record_seek started with data:{data} filename:{filename}')
with open (filename, mode="r+") as file:
file.seek(os.stat(filename).st_size -1)
file.write( ",]".format(json.dumps(data)) )
for x in range(6):
print(f'step:{x}')
file_exists = os.path.exists(name+'.json')
if file_exists:
print('file_exists')
append_record_seek(data,filename)
else:
print('else')
first_writer(data,filename)
the non zipped result should be looking like:
[{"brand": "Ford", "model": "Mustang", "year": 1964},
{"brand": "Ford", "model": "Mustang", "year": 1964},
{"brand": "Ford", "model": "Mustang", "year": 1964},
{"brand": "Ford", "model": "Mustang", "year": 1964},
{"brand": "Ford", "model": "Mustang", "year": 1964}]
My result is : [{"brand": "Ford", "model": "Mustang", "year": 1964},,,,,,]
If that works, i want to zip the dumps before writing.
I hope somebody can help
Update:
I've got the right Json format with:
def first_writer(data,filename):
print(f'first_writer started with data:{data} filename:{filename}')
with open (filename, 'w') as file:
file.write( "[{}]".format(json.dumps(data)) )
def append_record_seek(data,filename):
print('append_record_seek started with data:{data} filename:{filename}')
with open (filename, mode="r+") as file:
file.seek(os.stat(filename).st_size -1)
file.write( ",{}]".format(json.dumps(data)) )
Now i have to get that zipped...
答案1
得分: 1
以下是代码部分的翻译:
import gzip
from copy import copy
import json
# just test data
x = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
z = {
"brand": "Mato",
"model": "Laatikko",
"year": 2023
}
l = []
# populate the initial "json" in the list l
for i in range(3):
y = copy(x)
y["year"] += i
l.append(y)
# write list of dicts as jsons string into file and compress it via gzip
# it doesnt really matter how this was originally done..
with open("data.gz", "wb") as f:
f.write(gzip.compress(bytes(json.dumps(l, indent=2),"utf-8"))
# then, append a new entry to the same file -- which will get uncompressed
# with the previously stored *valid* json structure..
with open("data.gz", "ab") as f:
f.write(gzip.compress(bytes(json.dumps(z, indent=2),"utf-8"))
希望这对你有所帮助。
英文:
NOTE: This is not the answer to the question, as there is none, this will just highlight that a single compressed file can be generated and decompressed later but it will not be valid json.
import gzip
from copy import copy
import json
# just test data
x = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
z = {
"brand": "Mato",
"model": "Laatikko",
"year": 2023
}
l = []
# populate the initial "json" in the list l
for i in range(3):
y = copy(x)
y["year"] += i
l.append(y)
# write list of dicts as jsons string into file and compress it via gzip
# it doesnt really matter how this was originally done..
with open("data.gz", "wb") as f:
f.write(gzip.compress(bytes(json.dumps(l, indent=2),"utf-8")))
# then, append a new entry to the same file -- which will get uncompressed
# with the previously stored *valid* json structure..
with open("data.gz", "ab") as f:
f.write(gzip.compress(bytes(json.dumps(z, indent=2),"utf-8")))
This will result a file that looks like this when uncompressed
[
{
"brand": "Ford",
"model": "Mustang",
"year": 1964
},
{
"brand": "Ford",
"model": "Mustang",
"year": 1965
},
{
"brand": "Ford",
"model": "Mustang",
"year": 1966
}
]{
"brand": "Mato",
"model": "Laatikko",
"year": 2023
}
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论