forked from ijl/orjson
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpynumpy
More file actions
executable file
·126 lines (96 loc) · 3.11 KB
/
Copy pathpynumpy
File metadata and controls
executable file
·126 lines (96 loc) · 3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python3
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import gc
import io
import json
import os
import sys
import time
from timeit import timeit
import numpy
import psutil
import rapidjson
import simplejson
from memory_profiler import memory_usage
from tabulate import tabulate
import orjson
os.sched_setaffinity(os.getpid(), {0, 1})
kind = sys.argv[1] if len(sys.argv) >= 1 else ""
if kind == "int32":
array = numpy.random.randint(((2**31) - 1), size=(100000, 100), dtype=numpy.int32)
elif kind == "float64":
array = numpy.random.random(size=(50000, 100))
assert array.dtype == numpy.float64
elif kind == "bool":
array = numpy.random.choice((True, False), size=(100000, 200))
elif kind == "int8":
array = numpy.random.randint(((2**7) - 1), size=(100000, 100), dtype=numpy.int8)
elif kind == "uint8":
array = numpy.random.randint(((2**8) - 1), size=(100000, 100), dtype=numpy.uint8)
else:
print("usage: pynumpy (bool|int32|float64|int8|uint8)")
sys.exit(1)
proc = psutil.Process()
def default(__obj):
if isinstance(__obj, numpy.ndarray):
return __obj.tolist()
headers = ("Library", "Latency (ms)", "RSS diff (MiB)", "vs. orjson")
LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json")
ITERATIONS = 10
orjson_dumps = lambda: orjson.dumps(array, option=orjson.OPT_SERIALIZE_NUMPY)
ujson_dumps = None
rapidjson_dumps = lambda: rapidjson.dumps(array, default=default).encode("utf-8")
simplejson_dumps = lambda: simplejson.dumps(array, default=default).encode("utf-8")
json_dumps = lambda: json.dumps(array, default=default).encode("utf-8")
output_in_mib = len(orjson_dumps()) / 1024 / 1024
print(f"{output_in_mib:,.1f}MiB {kind} output (orjson)")
gc.collect()
mem_before = proc.memory_full_info().rss / 1024 / 1024
def per_iter_latency(val):
if val is None:
return None
return (val * 1000) / ITERATIONS
def test_correctness(func):
return orjson.loads(func()) == array.tolist()
table = []
for lib_name in LIBRARIES:
gc.collect()
print(f"{lib_name}...")
func = locals()[f"{lib_name}_dumps"]
if func is None:
total_latency = None
latency = None
mem = None
correct = False
else:
total_latency = timeit(
func,
number=ITERATIONS,
)
latency = per_iter_latency(total_latency)
time.sleep(1)
mem = max(memory_usage((func,), interval=0.001, timeout=latency * 2))
correct = test_correctness(func)
if lib_name == "orjson":
compared_to_orjson = 1
orjson_latency = latency
elif latency:
compared_to_orjson = latency / orjson_latency
else:
compared_to_orjson = None
if not correct:
latency = None
mem = 0
mem_diff = mem - mem_before
table.append(
(
lib_name,
f"{latency:,.0f}" if latency else "",
f"{mem_diff:,.0f}" if mem else "",
f"{compared_to_orjson:,.1f}" if (latency and compared_to_orjson) else "",
)
)
buf = io.StringIO()
buf.write(tabulate(table, headers, tablefmt="github"))
buf.write("\n")
print(buf.getvalue())