英文:
Why is JSON deserialisation with System.Text.Json so slow?
问题
我有一个相同的最小项目,用C#和Go分别写了一个反序列化json 100,000次的程序。性能差异很大。虽然使用Go可以实现性能目标,但我更希望在C#中实现可比较的结果。考虑到C#的速度慢了193倍,我认为错误可能出在我这边,但我无法找出原因。
以下是C#的性能测试结果:
$ dotnet run .
real 1m37.555s
user 1m39.552s
sys 0m0.729s
$ ./jsonperf
real 0m0.478s
user 0m0.500s
sys 0m0.011s
以下是C#的源代码:
using System;
namespace jsonperf
{
class Program
{
static void Main(string[] args)
{
var json = "{\"e\":\"trade\",\"E\":1633046399882,\"s\":\"BTCBUSD\",\"t\":243216662,\"p\":\"43818.22000000\",\"q\":\"0.00452000\",\"b\":3422298876,\"a\":3422298789,\"T\":1633046399882,\"m\":false,\"M\":true}";
for (int i = 0; i < 100000; i++)
{
if (0 == i % 1000)
{
Console.WriteLine($"Completed: {i}");
}
var obj = BinanceTradeUpdate.FromJson(json);
}
Console.WriteLine("Done");
}
}
}
和
using System;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace jsonperf
{
public class BinanceTradeUpdate
{
[JsonPropertyName("e")]
public string EventType
{
get;
set;
}
[JsonPropertyName("E")]
public long EventUnixTimestamp
{
get;
set;
}
[JsonIgnore]
public DateTime EventTime
{
get
{
return new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddMilliseconds(EventUnixTimestamp);
}
}
[JsonPropertyName("s")]
public string MarketSymbol
{
get;
set;
}
[JsonPropertyName("t")]
public long TradeId
{
get;
set;
}
[JsonPropertyName("p")]
public double Price
{
get;
set;
}
[JsonPropertyName("q")]
public double Quantity
{
get;
set;
}
[JsonPropertyName("b")]
public long BuyerOrderId
{
get;
set;
}
[JsonPropertyName("a")]
public long SellerOrderId
{
get;
set;
}
[JsonPropertyName("T")]
public long TradeUnixTimestamp
{
get;
set;
}
[JsonIgnore]
public DateTime TradeTime
{
get
{
return new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddMilliseconds(TradeUnixTimestamp);
}
}
[JsonPropertyName("m")]
public bool BuyerIsMarketMaker
{
get;
set;
}
[JsonPropertyName("M")]
public bool UndocumentedFlag
{
get;
set;
}
public static BinanceTradeUpdate FromJson(string json)
{
return JsonSerializer.Deserialize<BinanceTradeUpdate>(
json,
new JsonSerializerOptions()
{
NumberHandling = JsonNumberHandling.AllowReadingFromString
});
}
}
}
以下是Go的源代码:
package main
import (
"encoding/csv"
"encoding/json"
"fmt"
"os"
"strconv"
)
type Float64Str float64
func (f *Float64Str) UnmarshalJSON(b []byte) error {
var s string
// 尝试首先解析为字符串
if err := json.Unmarshal(b, &s); err == nil {
value, err := strconv.ParseFloat(s, 64)
if err != nil {
return err
}
*f = Float64Str(value)
return nil
}
// 如果不成功,则解析为float64
return json.Unmarshal(b, (*float64)(f))
}
// Trade 表示在给定市场上的资产交换
type Trade struct {
EventType string `json:"e"`
EventTime int64 `json:"E"`
MarketSymbol string `json:"s"`
TradeID int64 `json:"t"`
Price Float64Str `json:"p"`
Quantity Float64Str `json:"q"`
BuyerOrderID int64 `json:"b"`
SellerOrderID int64 `json:"a"`
TradeTime int64 `json:"T"`
IsBuyerMaker bool `json:"m"`
Flag bool `json:"M"`
}
func main() {
jsonString := "{\"e\":\"trade\",\"E\":1633046399882,\"s\":\"BTCBUSD\",\"t\":243216662,\"p\":\"43818.22000000\",\"q\":\"0.00452000\",\"b\":3422298876,\"a\":3422298789,\"T\":1633046399882,\"m\":false,\"M\":true}"
// 打开标准输出
var stdwrite = csv.NewWriter(os.Stdout)
// 将字符串多次转换为对象
var trade = Trade{}
counter := 0
for i := 0; i < 100000; i++ {
if err := json.Unmarshal([]byte(jsonString), &trade); err != nil {
stdwrite.Flush()
panic(err)
} else {
counter++
if counter%1000 == 0 {
fmt.Printf("%d elements read\n", counter)
}
}
}
}
英文:
I have the same minimal project that deserializes a json 100,000 times written in C# and in Go. The performance varies greatly. While it is nice to know that performance goals can be achieved by using Go, I would much prefer to achieve comparable results in C#. Given that C# is 193x slower, I assume the mistake is on my side, but I cannot figure out why.
Performance
$ dotnet run .
real 1m37.555s
user 1m39.552s
sys 0m0.729s
$ ./jsonperf
real 0m0.478s
user 0m0.500s
sys 0m0.011s
Source code C#
using System;
namespace jsonperf
{
class Program
{
static void Main(string[] args)
{
var json = "{\"e\":\"trade\",\"E\":1633046399882,\"s\":\"BTCBUSD\",\"t\":243216662,\"p\":\"43818.22000000\",\"q\":\"0.00452000\",\"b\":3422298876,\"a\":3422298789,\"T\":1633046399882,\"m\":false,\"M\":true}";
for (int i = 0; i < 100000; i++)
{
if (0 == i % 1000)
{
Console.WriteLine($"Completed: {i}");
}
var obj = BinanceTradeUpdate.FromJson(json);
}
Console.WriteLine("Done");
}
}
}
and
using System;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace jsonperf
{
public class BinanceTradeUpdate
{
[JsonPropertyName("e")]
public string EventType
{
get;
set;
}
[JsonPropertyName("E")]
public long EventUnixTimestamp
{
get;
set;
}
[JsonIgnore]
public DateTime EventTime
{
get
{
return new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddMilliseconds(EventUnixTimestamp);
}
}
[JsonPropertyName("s")]
public string MarketSymbol
{
get;
set;
}
[JsonPropertyName("t")]
public long TradeId
{
get;
set;
}
[JsonPropertyName("p")]
public double Price
{
get;
set;
}
[JsonPropertyName("q")]
public double Quantity
{
get;
set;
}
[JsonPropertyName("b")]
public long BuyerOrderId
{
get;
set;
}
[JsonPropertyName("a")]
public long SellerOrderId
{
get;
set;
}
[JsonPropertyName("T")]
public long TradeUnixTimestamp
{
get;
set;
}
[JsonIgnore]
public DateTime TradeTime
{
get
{
return new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddMilliseconds(TradeUnixTimestamp);
}
}
[JsonPropertyName("m")]
public bool BuyerIsMarketMaker
{
get;
set;
}
[JsonPropertyName("M")]
public bool UndocumentedFlag
{
get;
set;
}
public static BinanceTradeUpdate FromJson(string json)
{
return JsonSerializer.Deserialize<BinanceTradeUpdate>(
json,
new JsonSerializerOptions()
{
NumberHandling = JsonNumberHandling.AllowReadingFromString
});
}
}
}
Source code Go
package main
import (
"encoding/csv"
"encoding/json"
"fmt"
"os"
"strconv"
)
type Float64Str float64
func (f *Float64Str) UnmarshalJSON(b []byte) error {
var s string
// Try to unmarshal string first
if err := json.Unmarshal(b, &s); err == nil {
value, err := strconv.ParseFloat(s, 64)
if err != nil {
return err
}
*f = Float64Str(value)
return nil
}
// If unsuccessful, unmarshal as float64
return json.Unmarshal(b, (*float64)(f))
}
// Trade represents an exchange of assets in a given market
type Trade struct {
EventType string json:"e"
EventTime int64 json:"E"
MarketSymbol string json:"s"
TradeID int64 json:"t"
Price Float64Str json:"p"
Quantity Float64Str json:"q"
BuyerOrderID int64 json:"b"
SellerOrderID int64 json:"a"
TradeTime int64 json:"T"
IsBuyerMaker bool json:"m"
Flag bool json:"M"
}
func main() {
jsonString := "{\"e\":\"trade\",\"E\":1633046399882,\"s\":\"BTCBUSD\",\"t\":243216662,\"p\":\"43818.22000000\",\"q\":\"0.00452000\",\"b\":3422298876,\"a\":3422298789,\"T\":1633046399882,\"m\":false,\"M\":true}"
// open stdout
var stdwrite = csv.NewWriter(os.Stdout)
// convert string several times into obj
var trade = Trade{}
counter := 0
for i := 0; i < 100000; i++ {
if err := json.Unmarshal([]byte(jsonString), &trade); err != nil {
stdwrite.Flush()
panic(err)
} else {
counter++
if counter%1000 == 0 {
fmt.Printf("%d elements read\n", counter)
}
}
}
}
答案1
得分: 13
这需要很长时间的原因是每次都在初始化一个新的JsonSerializerOptions
对象。
只需初始化一次序列化器,你将看到巨大的性能提升(对我来说超过70%)。
英文:
The reason this takes so long is that you’re initialising a new JsonSerializerOptions
object everytime.
Initialise the serialiser once & you’ll see huge performance improvements (70%+ for me).
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论