跑数据看 JSON 的 Marshal 与 Unmarshal 效率

写在前面

在实践 Kubernetes 中应用日志的收集方案时,发现有一些日志无法收集到 ElasticSearch(ELK 技术栈中的 E,其他两个是 Logstash 和 Kibana)。查了原因后发现原来是因为业务打印的日志“不合规”,不仅无法保存到 ES 中,还会影响 ES 保存日志的效率;业务方不乐意,负责 ES 的同事也不乐意 😔

承接《Go 反序列化 JSON 字符串的两种常见用法》,谈谈 Go 标准库对 JSON 的处理效率。

适用人群

入门——初级√——中级——高级;本文适应初级及以上。

跑一跑 Benchmark 的数据

先看一个 JSON 字符串

// string-of-json JSON 字符串
{"employees":{"first":{"firstName":"Bill","lastName":"Gates"},"second":{"firstName":"George","lastName":"Bush"},"third":{"firstName":"Thomas","lastName":"Carter"}}}

// echo ```string-of-json``` | python -m json.tool
// 可以得到下面 Pretty 后的 JSON 格式
// Pretty 样式给人看起来比较舒服
{
    "employees": {
        "first": {
            "firstName": "Bill",
            "lastName": "Gates"
        },
        "second": {
            "firstName": "George",
            "lastName": "Bush"
        },
        "third": {
            "firstName": "Thomas",
            "lastName": "Carter"
        }
    }
}

再看看需求

根据 ES 的限制,期望能够把上面的 JSON 字符串转换成为下面的模样:

// ① 把大于第一层的所有内容转换为字符串
{
    "employees": "{\"first\":{\"firstName\":\"Bill\",\"lastName\":\"Gates\"},\"second\":{\"firstName\":\"George\",\"lastName\":\"Bush\"},\"third\":{\"firstName\":\"Thomas\",\"lastName\":\"Carter\"}}"
}

// ② 把大于第二层的所有内容转换为字符串
{
    "employees": {
        "first": "{\"firstName\":\"Bill\",\"lastName\":\"Gates\"}",
        "second": "{\"firstName\":\"George\",\"lastName\":\"Bush\"}",
        "third": "{\"firstName\":\"Thomas\",\"lastName\":\"Carter\"}"
    }
}

// ③ 把大于第三层的所有内容转换为字符串
// 从效果来看,最后一层存在字符串转义,不太期望如此,可是也还好把🙃
{
    "employees": {
        "first": {
            "firstName": "\"Bill\"",
            "lastName": "\"Gates\""
        },
        "second": {
            "firstName": "\"George\"",
            "lastName": "\"Bush\""
        },
        "third": {
            "firstName": "\"Thomas\"",
            "lastName": "\"Carter\""
        }
    }
}

用代码跑跑数据

package hello

import (
	"encoding/json"
	"fmt"
	"testing"
)

var data = `{"employees":{"first":{"firstName":"Bill","lastName":"Gates"},"second":{"firstName":"George","lastName":"Bush"},"third":{"firstName":"Thomas","lastName":"Carter"}}}`

func Parser(data []byte) interface{} {
	var i interface{}
	json.Unmarshal(data, &i)
	return i
}

func Test_Unmarshal(t *testing.T) {
	da := Parser([]byte(data))
	fmt.Printf("%v\n", da)
}
// go test -v -timeout 30s . -run ^Test_Unmarshal$
// map[employees:map[first:map[firstName:Bill lastName:Gates] second:map[firstName:George lastName:Bush] third:map[firstName:Thomas lastName:Carter]]]

func Benchmark_Unmarshal(b *testing.B) {
	for i := 0; i < b.N; i++ {
		Parser([]byte(data))
	}
}
// go test -benchmem -run=^$ . -bench ^Benchmark_Unmarshal$
// Benchmark_Unmarshal-4   300000   4652 ns/op   2320 B/op   38 allocs/op

func Test_Marshal(t *testing.T) {
	da := Parser([]byte(data))
	d, _ := json.Marshal(da)
	fmt.Println(string(d))
}
// go test -v -timeout 30s . -run ^Test_Marshal$
// {"employees":{"first":{"firstName":"Bill","lastName":"Gates"},"second":{"firstName":"George","lastName":"Bush"},"third":{"firstName":"Thomas","lastName":"Carter"}}}


func Benchmark_Marshal(b *testing.B) {
	da := Parser([]byte(data))
	for i := 0; i < b.N; i++ { //use b.N for looping
		json.Marshal(da)
	}
}
// go test -benchmem -run=^$ . -bench ^Benchmark_Marshal$
// Benchmark_Marshal-4   200000   6455 ns/op   2192 B/op   49 allocs/op

func Test_MarshalLevel1(t *testing.T) {
	da := Parser([]byte(data))
	daJSON := da.(map[string]interface{})
	for k, v := range daJSON {
		d, _ := json.Marshal(v)
		daJSON[k] = string(d)
	}
	s, _ := json.Marshal(daJSON)
	fmt.Printf("Test_MarshalLevel1 %v\n", string(s))
}
// go test -v -timeout 30s . -run ^Test_MarshalLevel1$
// Test_MarshalLevel1 {"employees":"{\"first\":{\"firstName\":\"Bill\",\"lastName\":\"Gates\"},\"second\":{\"firstName\":\"George\",\"lastName\":\"Bush\"},\"third\":{\"firstName\":\"Thomas\",\"lastName\":\"Carter\"}}"}

func Benchmark_MarshalLevel1(b *testing.B) {
	for i := 0; i < b.N; i++ { //use b.N for looping
		da := Parser([]byte(data))
		daJSON := da.(map[string]interface{})
		for k, v := range daJSON {
			d, _ := json.Marshal(v)
			daJSON[k] = string(d)
		}
		json.Marshal(daJSON)
	}
}
// go test -benchmem -run=^$ . -bench ^Benchmark_MarshalLevel1$
// Benchmark_MarshalLevel1-4   100000   12399 ns/op   4880 B/op   90 allocs/op

func Test_MarshalLevel2(t *testing.T) {
	da := Parser([]byte(data))
	daJSON := da.(map[string]interface{})
	for k1, v1 := range daJSON {
		v1Map := v1.(map[string]interface{})
		for k2, v2 := range v1Map {
			d, _ := json.Marshal(v2)
			v1Map[k2] = string(d)
		}
		daJSON[k1] = v1Map
	}
	s, _ := json.Marshal(daJSON)
	fmt.Printf("Test_MarshalLevel2 %v\n", string(s))
}
// go test -v -timeout 30s . -run ^Test_MarshalLevel2$
// Test_MarshalLevel2 {"employees":{"first":"{\"firstName\":\"Bill\",\"lastName\":\"Gates\"}","second":"{\"firstName\":\"George\",\"lastName\":\"Bush\"}","third":"{\"firstName\":\"Thomas\",\"lastName\":\"Carter\"}"}}

func Benchmark_MarshalLevel2(b *testing.B) {
	for i := 0; i < b.N; i++ { //use b.N for looping
		da := Parser([]byte(data))
		daJSON := da.(map[string]interface{})
		for _, v1 := range daJSON {
			v1Map := v1.(map[string]interface{})
			for k2, v2 := range v1Map {
				d2, _ := json.Marshal(v2)
				v1Map[k2] = string(d2)
			}
		}
		json.Marshal(daJSON)
	}
}
// go test -benchmem -run=^$ . -bench ^Benchmark_MarshalLevel2$
// Benchmark_MarshalLevel2-4   100000   13297 ns/op   4881 B/op   96 allocs/op

func Test_MarshalLevel3(t *testing.T) {
	da := Parser([]byte(data))
	daJSON := da.(map[string]interface{})
	for k1, v1 := range daJSON {
		v1Map := v1.(map[string]interface{})
		for k2, v2 := range v1Map {
			v2Map := v2.(map[string]interface{})
			for k3, v3 := range v2Map {
				d, _ := json.Marshal(v3)
				v2Map[k3] = string(d)
			}
			v1Map[k2] = v2Map
		}
		daJSON[k1] = v1Map
	}
	s, _ := json.Marshal(daJSON)
	fmt.Printf("Test_MarshalLevel2 %v\n", string(s))
}
// go test -v -timeout 30s . -run ^Test_MarshalLevel3$
// Test_MarshalLevel2 {"employees":{"first":{"firstName":"\"Bill\"","lastName":"\"Gates\""},"second":{"firstName":"\"George\"","lastName":"\"Bush\""},"third":{"firstName":"\"Thomas\"","lastName":"\"Carter\""}}}

func Benchmark_MarshalLevel3(b *testing.B) {
	for i := 0; i < b.N; i++ { //use b.N for looping
		da := Parser([]byte(data))
		daJSON := da.(map[string]interface{})
		for k1, v1 := range daJSON {
			v1Map := v1.(map[string]interface{})
			for k2, v2 := range v1Map {
				v2Map := v2.(map[string]interface{})
				for k3, v3 := range v2Map {
					d, _ := json.Marshal(v3)
					v2Map[k3] = string(d)
				}
				v1Map[k2] = v2Map
			}
			daJSON[k1] = v1Map
		}
		json.Marshal(daJSON)
	}
}
// go test -benchmem -run=^$ . -bench ^Benchmark_MarshalLevel3$
// Benchmark_MarshalLevel3-4   100000   14319 ns/op   4720 B/op   105 allocs/op

分析一下数据

# 整合上面的 benchmark 的数据
Benchmark_Unmarshal-4   300000   4652 ns/op   2320 B/op   38 allocs/op
Benchmark_Marshal-4   200000   6455 ns/op   2192 B/op   49 allocs/op
Benchmark_MarshalLevel1-4   100000   12399 ns/op   4880 B/op   90 allocs/op
Benchmark_MarshalLevel2-4   100000   13297 ns/op   4881 B/op   96 allocs/op
Benchmark_MarshalLevel3-4   100000   14319 ns/op   4720 B/op   105 allocs/op
  1. 在 Go 中,Marshal 比 Unmarshal 更消耗时间;
  2. 随着处理深度的增加,消耗的 CPU 时间越来越多,应该是 For 循环以及更多的 Marshal 过程消耗了大量的时间;
  3. 从上面的数据,时间按照 11107 => 12399 => 13297 => 14319 递增 (每个阶段约 10% 的增加);
  4. 可以通过增加 JSON 字符串长度、修改 go test -count=2 -benchtime 3.1s -benchmem . -bench . 的参数获取更为精确的数据( go test 用法很多=。=)。

小结

以前在写业务代码的时候,记得有一次对接 C# 技术栈的一个服务,其返回的数据格式是本文中提到的 “大于第 x 层的内容转换为字符串” 类似的存在,当时一直想不明白为什么要有这样的设计。现在来看,很可能是开发者把本该用在日志上的规范用到了 api 接口规范。

以上,希望对大家有所启发。

参考