Skip to content

Commit cd0c91c

Browse files
committed
dataclass
1 parent 25849ef commit cd0c91c

7 files changed

Lines changed: 208 additions & 9 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ license = "Apache-2.0 OR MIT"
88
repository = "https://github.com/ijl/orjson"
99
homepage = "https://github.com/ijl/orjson"
1010
readme = "README.md"
11-
keywords = ["fast", "json", "datetime", "rfc", "8259", "3339"]
11+
keywords = ["fast", "json", "dataclass", "dataclasses", "datetime", "rfc", "8259", "3339"]
1212
include = [
1313
"Cargo.toml",
1414
"CHANGELOG.md",

README.md

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
orjson is a fast, correct JSON library for Python. It
44
[benchmarks](#performance) as the fastest Python library for JSON and is
5-
more correct than the standard json library or third-party libraries.
5+
more correct than the standard json library or third-party libraries. It
6+
serializes [dataclass](#dataclass) and [datetime](#datetime) instances by
7+
default.
68

79
Its serialization performance on fixtures of real data is 2.5x to 9.5x the
810
nearest other library and 4x to 12x the standard library. Its deserialization
@@ -11,6 +13,8 @@ library and 1.4x to 2x the standard library.
1113

1214
Its features and drawbacks compared to other Python JSON libraries:
1315

16+
* serializes `dataclass` instances significantly faster than the
17+
standard library
1418
* serializes `datetime`, `date`, and `time` instances to RFC 3339 format,
1519
a subset of ISO 8601
1620
* serializes to `bytes` rather than `str`
@@ -43,10 +47,11 @@ submitted there.
4347
2. [Serialize](#serialize)
4448
3. [Deserialize](#deserialize)
4549
2. [Types](#types)
46-
1. [datetime](#datetime)
47-
2. [int](#int)
48-
3. [float](#float)
49-
4. [str](#str)
50+
1. [dataclass](#dataclass)
51+
2. [datetime](#datetime)
52+
3. [int](#int)
53+
4. [float](#float)
54+
5. [str](#str)
5055
3. [Testing](#testing)
5156
4. [Performance](#performance)
5257
1. [Latency](#latency)
@@ -85,10 +90,11 @@ def dumps(__obj: Any, default: Optional[Callable[[Any], Any]] = ..., option: Opt
8590

8691
It natively serializes
8792
`str`, `dict`, `list`, `tuple`, `int`, `float`, `bool`,
88-
`typing.TypedDict`, `datetime.datetime`,
93+
`dataclasses.dataclass`, `typing.TypedDict`, `datetime.datetime`,
8994
`datetime.date`, `datetime.time`, and `None` instances. It supports
9095
arbitrary types through `default`. It does not serialize subclasses of
91-
supported types natively.
96+
supported types natively, with the exception of `dataclasses.dataclass`
97+
subclasses.
9298

9399
To serialize a subclass or arbitrary types, specify `default` as a
94100
callable that returns a supported type. `default` may be a function,
@@ -175,6 +181,13 @@ This is for compatibility with the standard library.
175181

176182
## Types
177183

184+
### dataclass
185+
186+
orjson serializes instances of `dataclasses.dataclass` natively.
187+
188+
`orjson.dumps()` serializes instances significantly faster than using
189+
`dataclasses.asdict()` with `json.dumps()`.
190+
178191
### datetime
179192

180193
orjson serializes `datetime.datetime` objects to

src/encode.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,39 @@ impl<'p> Serialize for SerializePyObject {
189189
let mut dt: SmallVec<[u8; 32]> = SmallVec::with_capacity(32);
190190
write_time(self.ptr, self.opts, &mut dt);
191191
serializer.serialize_str(str_from_slice!(dt.as_ptr(), dt.len()))
192+
} else if ffi!(PyObject_HasAttr(self.ptr, DATACLASS_FIELDS_STR)) == 1 {
193+
let fields = ffi!(PyObject_GetAttr(self.ptr, DATACLASS_FIELDS_STR));
194+
ffi!(Py_DECREF(fields));
195+
let mut map = serializer.serialize_map(None).unwrap();
196+
let mut pos = 0isize;
197+
let mut str_size: pyo3::ffi::Py_ssize_t = 0;
198+
let mut attr: *mut pyo3::ffi::PyObject = std::ptr::null_mut();
199+
let mut field: *mut pyo3::ffi::PyObject = std::ptr::null_mut();
200+
while unsafe {
201+
pyo3::ffi::PyDict_Next(fields, &mut pos, &mut attr, &mut field) != 0
202+
} {
203+
if unlikely!(self.recursion == 255) {
204+
err!("Recursion limit reached")
205+
}
206+
let data = ffi!(PyUnicode_AsUTF8AndSize(attr, &mut str_size)) as *const u8;
207+
if unlikely!(data.is_null()) {
208+
err!(INVALID_STR);
209+
}
210+
let value = ffi!(PyObject_GetAttr(self.ptr, attr));
211+
ffi!(Py_DECREF(value));
212+
213+
map.serialize_entry(
214+
str_from_slice!(data, str_size),
215+
&SerializePyObject {
216+
ptr: value,
217+
default: self.default,
218+
opts: self.opts,
219+
default_calls: self.default_calls,
220+
recursion: self.recursion + 1,
221+
},
222+
)?;
223+
}
224+
map.end()
192225
} else if self.default.is_some() {
193226
if self.default_calls > 5 {
194227
err!("default serializer exceeds recursion limit")

src/typeref.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ pub static mut UTCOFFSET_METHOD_STR: *mut pyo3::ffi::PyObject = 0 as *mut pyo3::
2424
pub static mut NORMALIZE_METHOD_STR: *mut pyo3::ffi::PyObject = 0 as *mut pyo3::ffi::PyObject;
2525
pub static mut CONVERT_METHOD_STR: *mut pyo3::ffi::PyObject = 0 as *mut pyo3::ffi::PyObject;
2626
pub static mut DST_STR: *mut pyo3::ffi::PyObject = 0 as *mut pyo3::ffi::PyObject;
27+
pub static mut DATACLASS_FIELDS_STR: *mut pyo3::ffi::PyObject = 0 as *mut pyo3::ffi::PyObject;
2728

2829
static EMTPY_STR: &str = "";
2930

@@ -89,6 +90,10 @@ pub fn init_typerefs() {
8990
CONVERT_METHOD_STR =
9091
pyo3::ffi::PyUnicode_FromStringAndSize("convert".as_ptr() as *const c_char, 7);
9192
DST_STR = pyo3::ffi::PyUnicode_FromStringAndSize("dst".as_ptr() as *const c_char, 3);
93+
DATACLASS_FIELDS_STR = pyo3::ffi::PyUnicode_FromStringAndSize(
94+
"__dataclass_fields__".as_ptr() as *const c_char,
95+
20,
96+
);
9297
pyo3::ffi::Py_DECREF(datetime);
9398
pyo3::ffi::Py_DECREF(date);
9499
pyo3::ffi::Py_DECREF(time);

test/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
arrow
2+
dataclasses;python_version<"3.7"
23
pendulum;sys_platform!="win"
34
psutil
45
pytest
56
pytz
6-
typing_extensions;python_version>"3.5" and python_version <"3.8"
7+
typing_extensions;python_version>="3.6" and python_version<"3.8"

test/test_dataclass.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
2+
3+
import unittest
4+
import pytest
5+
import sys
6+
7+
from dataclasses import dataclass, field
8+
from typing import Optional
9+
10+
import orjson
11+
12+
13+
@dataclass
14+
class Dataclass1:
15+
name: str
16+
number: int
17+
sub: Optional["Dataclass1"]
18+
19+
20+
@dataclass
21+
class Dataclass2:
22+
name: Optional[str] = field(default="?")
23+
24+
25+
@dataclass
26+
class Dataclass3:
27+
a: str
28+
b: int
29+
c: dict
30+
d: bool
31+
e: float
32+
f: list
33+
g: tuple
34+
35+
36+
@dataclass
37+
class Dataclass4:
38+
a: str = field()
39+
b: int = field(metadata={"unrelated": False})
40+
c: float
41+
42+
43+
class Datasubclass(Dataclass1):
44+
number: float
45+
46+
47+
class DataclassTests(unittest.TestCase):
48+
def test_dataclass(self):
49+
"""
50+
dumps() dataclass
51+
"""
52+
obj = Dataclass1("a", 1, None)
53+
self.assertEqual(orjson.dumps(obj), b'{"name":"a","number":1,"sub":null}')
54+
55+
def test_dataclass_recursive(self):
56+
"""
57+
dumps() dataclass recursive
58+
"""
59+
obj = Dataclass1("a", 1, Dataclass1("b", 2, None))
60+
self.assertEqual(
61+
orjson.dumps(obj),
62+
b'{"name":"a","number":1,"sub":{"name":"b","number":2,"sub":null}}',
63+
)
64+
65+
def test_dataclass_circular(self):
66+
"""
67+
dumps() dataclass circular
68+
"""
69+
obj1 = Dataclass1("a", 1, None)
70+
obj2 = Dataclass1("b", 2, obj1)
71+
obj1.sub = obj2
72+
with self.assertRaises(orjson.JSONEncodeError):
73+
orjson.dumps(obj1)
74+
75+
def test_dataclass_default(self):
76+
"""
77+
dumps() dataclass default
78+
"""
79+
obj = Dataclass2()
80+
self.assertEqual(orjson.dumps(obj), b'{"name":"?"}')
81+
82+
def test_dataclass_types(self):
83+
"""
84+
dumps() dataclass types
85+
"""
86+
obj = Dataclass3("a", 1, {"a": "b"}, True, 1.1, [1, 2], (3, 4))
87+
self.assertEqual(
88+
orjson.dumps(obj),
89+
b'{"a":"a","b":1,"c":{"a":"b"},"d":true,"e":1.1,"f":[1,2],"g":[3,4]}',
90+
)
91+
92+
def test_dataclass_metadata(self):
93+
"""
94+
dumps() dataclass metadata
95+
"""
96+
obj = Dataclass4("a", 1, 2.1)
97+
self.assertEqual(orjson.dumps(obj), b'{"a":"a","b":1,"c":2.1}')
98+
99+
def test_dataclass_subclass(self):
100+
"""
101+
dumps() dataclass subclass
102+
"""
103+
obj = Datasubclass("a", 1.0, None)
104+
self.assertEqual(orjson.dumps(obj), b'{"name":"a","number":1.0,"sub":null}')

test/test_memory.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
22

3+
import dataclasses
4+
import datetime
35
import gc
6+
import random
47
import unittest
58
import uuid
9+
from typing import List
610

711
import orjson
812
import psutil
@@ -15,6 +19,32 @@ def default(obj):
1519
return str(obj)
1620

1721

22+
@dataclasses.dataclass
23+
class Member:
24+
id: int
25+
active: bool
26+
27+
28+
@dataclasses.dataclass
29+
class Object:
30+
id: int
31+
updated_at: datetime.datetime
32+
name: str
33+
members: List[Member]
34+
35+
36+
DATACLASS_FIXTURE = [
37+
Object(
38+
i,
39+
datetime.datetime.now(datetime.timezone.utc)
40+
+ datetime.timedelta(seconds=random.randint(0, 10000)),
41+
str(i) * 3,
42+
[Member(j, True) for j in range(0, 10)],
43+
)
44+
for i in range(100000, 101000)
45+
]
46+
47+
1848
class MemoryTests(unittest.TestCase):
1949
def test_memory_loads(self):
2050
"""
@@ -58,6 +88,19 @@ def test_memory_dumps_default(self):
5888
gc.collect()
5989
self.assertTrue(proc.memory_info().rss <= mem + 1024)
6090

91+
def test_memory_dumps_dataclass(self):
92+
"""
93+
dumps() dataclass memory leak
94+
"""
95+
proc = psutil.Process()
96+
gc.collect()
97+
val = orjson.dumps(DATACLASS_FIXTURE)
98+
mem = proc.memory_info().rss
99+
for _ in range(100):
100+
val = orjson.dumps(DATACLASS_FIXTURE)
101+
gc.collect()
102+
self.assertTrue(proc.memory_info().rss <= mem + 1024 * 1024)
103+
61104
def test_memory_loads_keys(self):
62105
"""
63106
loads() memory leak with number of keys causing cache eviction

0 commit comments

Comments
 (0)