Skip to content

Commit 058f7b9

Browse files
committed
Idioms and cleanup
1 parent dcab799 commit 058f7b9

8 files changed

Lines changed: 270 additions & 190 deletions

File tree

README.md

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,6 @@ third-party libraries. It serializes
77
[dataclass](https://github.com/ijl/orjson#dataclass) and
88
[datetime](https://github.com/ijl/orjson#datetime) instances.
99

10-
Its serialization performance on fixtures of real data is 2.5x to 9.5x the
11-
nearest other library and 4x to 12x the standard library. Its deserialization
12-
performance on the same fixtures is 1.2x to 1.3x the nearest other
13-
library and 1.4x to 2x the standard library.
14-
1510
Its features and drawbacks compared to other Python JSON libraries:
1611

1712
* serializes `dataclass` instances 30x faster than other libraries
@@ -202,7 +197,7 @@ Serialize `dataclasses.dataclass` instances. For more, see
202197
##### OPT_SERIALIZE_UUID
203198

204199
Serialize `uuid.UUID` instances. For more, see
205-
[uuid](https://github.com/ijl/orjson#UUID).
200+
[UUID](https://github.com/ijl/orjson#UUID).
206201

207202
##### OPT_STRICT_INTEGER
208203

@@ -266,13 +261,13 @@ It is supported to pass all variants of dataclasses, including dataclasses
266261
using `__slots__` (which yields a modest performance improvement), frozen
267262
dataclasses, those with optional or default attributes, and subclasses.
268263

269-
| Library | dict (ms) | dataclass (ms) | dataclass vs. dict | vs. orjson |
270-
|------------|-------------|------------------|----------------------|--------------|
271-
| orjson | 0.10 | 0.19 | -46% | 1 |
272-
| ujson | | | | |
273-
| rapidjson | 0.24 | 6.48 | -96% | 33 |
274-
| simplejson | 1.06 | 7.94 | -86% | 40 |
275-
| json | 0.92 | 7.32 | -87% | 37 |
264+
| Library | dict (ms) | dataclass (ms) | vs. orjson |
265+
|------------|-------------|------------------|--------------|
266+
| orjson | 0.10 | 0.19 | 1 |
267+
| ujson | | | |
268+
| rapidjson | 0.24 | 6.48 | 33 |
269+
| simplejson | 1.06 | 7.94 | 40 |
270+
| json | 0.92 | 7.32 | 37 |
276271

277272
This measures orjson serializing instances natively and other libraries using
278273
`default` to serialize the output of `dataclasses.asdict()`. This can be

pydataclass

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def default(__obj):
4646
return dataclasses.asdict(__obj)
4747

4848

49-
headers = ("Library", "dict (ms)", "dataclass (ms)", "dataclass vs. dict", "vs. orjson")
49+
headers = ("Library", "dict (ms)", "dataclass (ms)", "vs. orjson")
5050

5151
LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json")
5252

@@ -99,9 +99,7 @@ for lib_name in LIBRARIES:
9999
orjson_as_dataclass = as_dataclass
100100
else:
101101
raise NotImplementedError
102-
relative = (
103-
int(((as_dataclass - as_dict) / as_dataclass) * 100) if as_dataclass else None
104-
)
102+
105103
if lib_name == "orjson":
106104
compared_to_orjson = 1
107105
elif as_dict:
@@ -114,7 +112,6 @@ for lib_name in LIBRARIES:
114112
lib_name,
115113
f"{as_dict:,.2f}" if as_dict else "",
116114
f"{as_dataclass:,.2f}" if as_dataclass else "",
117-
f"*{relative}%" if relative else "",
118115
f"{compared_to_orjson:d}" if compared_to_orjson else "",
119116
)
120117
)
@@ -126,6 +123,6 @@ print(
126123
.replace("*", "-")
127124
.replace("=", "-")
128125
.replace("+", "|")
129-
.replace("||||||", "")
126+
.replace("|||||", "")
130127
.replace("\n\n", "\n")
131128
)

src/datetime.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
22

33
use crate::typeref::*;
4+
use serde::ser::{Serialize, Serializer};
45
use smallvec::SmallVec;
56

67
pub const NAIVE_UTC: u8 = 1 << 1;
@@ -36,6 +37,51 @@ macro_rules! write_microsecond {
3637
};
3738
}
3839

40+
pub struct Date {
41+
ptr: *mut pyo3::ffi::PyObject,
42+
}
43+
44+
impl Date {
45+
pub fn new(ptr: *mut pyo3::ffi::PyObject) -> Self {
46+
Date { ptr: ptr }
47+
}
48+
}
49+
impl<'p> Serialize for Date {
50+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
51+
where
52+
S: Serializer,
53+
{
54+
let mut dt: SmallVec<[u8; 32]> = SmallVec::with_capacity(32);
55+
write_date(self.ptr, &mut dt);
56+
serializer.serialize_str(str_from_slice!(dt.as_ptr(), dt.len()))
57+
}
58+
}
59+
60+
pub struct Time {
61+
ptr: *mut pyo3::ffi::PyObject,
62+
opts: u8,
63+
}
64+
65+
impl Time {
66+
pub fn new(ptr: *mut pyo3::ffi::PyObject, opts: u8) -> Self {
67+
Time {
68+
ptr: ptr,
69+
opts: opts,
70+
}
71+
}
72+
}
73+
74+
impl<'p> Serialize for Time {
75+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
76+
where
77+
S: Serializer,
78+
{
79+
let mut dt: SmallVec<[u8; 32]> = SmallVec::with_capacity(32);
80+
write_time(self.ptr, self.opts, &mut dt);
81+
serializer.serialize_str(str_from_slice!(dt.as_ptr(), dt.len()))
82+
}
83+
}
84+
3985
pub enum DatetimeError {
4086
Library,
4187
}

src/decode.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,22 +54,22 @@ lazy_static! {
5454
pub fn deserialize(ptr: *mut pyo3::ffi::PyObject) -> PyResult<NonNull<pyo3::ffi::PyObject>> {
5555
let data: &str;
5656
let obj_type_ptr = unsafe { (*ptr).ob_type };
57-
if is_type!(obj_type_ptr, STR_PTR) {
57+
if is_type!(obj_type_ptr, STR_TYPE) {
5858
let mut str_size: pyo3::ffi::Py_ssize_t = 0;
5959
let uni = read_utf8_from_str(ptr, &mut str_size);
6060
if unlikely!(uni.is_null()) {
6161
return Err(JSONDecodeError::py_err((INVALID_STR, "", 0)));
6262
}
6363
data = str_from_slice!(uni, str_size);
64-
} else if is_type!(obj_type_ptr, BYTES_PTR) {
64+
} else if is_type!(obj_type_ptr, BYTES_TYPE) {
6565
let buffer = unsafe { PyBytes_AS_STRING(ptr) as *const u8 };
6666
let length = unsafe { PyBytes_GET_SIZE(ptr) as usize };
6767
let slice = unsafe { std::slice::from_raw_parts(buffer, length) };
6868
if encoding_rs::Encoding::utf8_valid_up_to(slice) != length {
6969
return Err(JSONDecodeError::py_err((INVALID_STR, "", 0)));
7070
}
7171
data = unsafe { std::str::from_utf8_unchecked(slice) };
72-
} else if is_type!(obj_type_ptr, BYTEARRAY_PTR) {
72+
} else if is_type!(obj_type_ptr, BYTEARRAY_TYPE) {
7373
let buffer = ffi!(PyByteArray_AsString(ptr)) as *const u8;
7474
let length = ffi!(PyByteArray_Size(ptr)) as usize;
7575
let slice = unsafe { std::slice::from_raw_parts(buffer, length) };

src/encode.rs

Lines changed: 46 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
use crate::datetime::*;
44
use crate::exc::*;
5+
use crate::iter::*;
56
use crate::typeref::*;
67
use crate::unicode::*;
78
use crate::uuid::write_uuid;
@@ -61,6 +62,7 @@ pub fn serialize(
6162
Err(err) => Err(JSONEncodeError::py_err(err.to_string())),
6263
}
6364
}
65+
6466
struct SerializePyObject {
6567
ptr: *mut pyo3::ffi::PyObject,
6668
default: Option<NonNull<pyo3::ffi::PyObject>>,
@@ -75,14 +77,14 @@ impl<'p> Serialize for SerializePyObject {
7577
S: Serializer,
7678
{
7779
let obj_ptr = unsafe { (*self.ptr).ob_type };
78-
if is_type!(obj_ptr, STR_PTR) {
80+
if is_type!(obj_ptr, STR_TYPE) {
7981
let mut str_size: pyo3::ffi::Py_ssize_t = 0;
8082
let uni = read_utf8_from_str(self.ptr, &mut str_size);
8183
if unlikely!(uni.is_null()) {
8284
err!(INVALID_STR)
8385
}
8486
serializer.serialize_str(str_from_slice!(uni, str_size))
85-
} else if is_type!(obj_ptr, INT_PTR) {
87+
} else if is_type!(obj_ptr, INT_TYPE) {
8688
let val = ffi!(PyLong_AsLongLong(self.ptr));
8789
if unlikely!(val == -1 && !pyo3::ffi::PyErr_Occurred().is_null()) {
8890
err!("Integer exceeds 64-bit range")
@@ -92,40 +94,32 @@ impl<'p> Serialize for SerializePyObject {
9294
err!("Integer exceeds 53-bit range")
9395
}
9496
serializer.serialize_i64(val)
95-
} else if is_type!(obj_ptr, LIST_PTR) {
96-
let len = ffi!(PyList_GET_SIZE(self.ptr)) as usize;
97-
if len != 0 {
98-
let mut seq = serializer.serialize_seq(Some(len))?;
99-
let mut i = 0;
100-
while i < len {
101-
if unlikely!(self.recursion == RECURSION_LIMIT) {
102-
err!("Recursion limit reached")
103-
}
104-
let elem = ffi!(PyList_GET_ITEM(self.ptr, i as pyo3::ffi::Py_ssize_t));
105-
i += 1;
106-
seq.serialize_element(&SerializePyObject {
107-
ptr: elem,
108-
default: self.default,
109-
opts: self.opts,
110-
default_calls: self.default_calls,
111-
recursion: self.recursion + 1,
112-
})?
113-
}
114-
seq.end()
115-
} else {
116-
serializer.serialize_seq(None).unwrap().end()
97+
} else if is_type!(obj_ptr, LIST_TYPE) {
98+
if unlikely!(self.recursion == RECURSION_LIMIT) {
99+
err!("Recursion limit reached")
100+
}
101+
let mut seq = serializer.serialize_seq(None).unwrap();
102+
for elem in PyListIterator::new(self.ptr) {
103+
seq.serialize_element(&SerializePyObject {
104+
ptr: elem.as_ptr(),
105+
default: self.default,
106+
opts: self.opts,
107+
default_calls: self.default_calls,
108+
recursion: self.recursion + 1,
109+
})?
110+
}
111+
seq.end()
112+
} else if is_type!(obj_ptr, DICT_TYPE) {
113+
if unlikely!(self.recursion == RECURSION_LIMIT) {
114+
err!("Recursion limit reached")
117115
}
118-
} else if is_type!(obj_ptr, DICT_PTR) {
119116
let mut map = serializer.serialize_map(None).unwrap();
120117
let mut pos = 0isize;
121118
let mut str_size: pyo3::ffi::Py_ssize_t = 0;
122119
let mut key: *mut pyo3::ffi::PyObject = std::ptr::null_mut();
123120
let mut value: *mut pyo3::ffi::PyObject = std::ptr::null_mut();
124121
while unsafe { pyo3::ffi::PyDict_Next(self.ptr, &mut pos, &mut key, &mut value) != 0 } {
125-
if unlikely!(self.recursion == RECURSION_LIMIT) {
126-
err!("Recursion limit reached")
127-
}
128-
if unlikely!((*key).ob_type != STR_PTR) {
122+
if unlikely!((*key).ob_type != STR_TYPE) {
129123
err!("Dict key must be str")
130124
}
131125
{
@@ -144,59 +138,50 @@ impl<'p> Serialize for SerializePyObject {
144138
})?;
145139
}
146140
map.end()
147-
} else if is_type!(obj_ptr, BOOL_PTR) {
141+
} else if is_type!(obj_ptr, BOOL_TYPE) {
148142
serializer.serialize_bool(unsafe { self.ptr == TRUE })
149-
} else if is_type!(obj_ptr, NONE_PTR) {
143+
} else if is_type!(obj_ptr, NONE_TYPE) {
150144
serializer.serialize_unit()
151-
} else if is_type!(obj_ptr, FLOAT_PTR) {
145+
} else if is_type!(obj_ptr, FLOAT_TYPE) {
152146
serializer.serialize_f64(ffi!(PyFloat_AS_DOUBLE(self.ptr)))
153-
} else if is_type!(obj_ptr, TUPLE_PTR) {
154-
let len = ffi!(PyTuple_GET_SIZE(self.ptr)) as usize;
155-
if len != 0 {
156-
let mut seq = serializer.serialize_seq(Some(len))?;
157-
let mut i = 0;
158-
while i < len {
159-
let elem = ffi!(PyTuple_GET_ITEM(self.ptr, i as pyo3::ffi::Py_ssize_t));
160-
i += 1;
161-
seq.serialize_element(&SerializePyObject {
162-
ptr: elem,
163-
default: self.default,
164-
opts: self.opts,
165-
default_calls: self.default_calls,
166-
recursion: self.recursion,
167-
})?
168-
}
169-
seq.end()
170-
} else {
171-
serializer.serialize_seq(None).unwrap().end()
147+
} else if is_type!(obj_ptr, TUPLE_TYPE) {
148+
let mut seq = serializer.serialize_seq(None).unwrap();
149+
for elem in PyTupleIterator::new(self.ptr) {
150+
seq.serialize_element(&SerializePyObject {
151+
ptr: elem.as_ptr(),
152+
default: self.default,
153+
opts: self.opts,
154+
default_calls: self.default_calls,
155+
recursion: self.recursion + 1,
156+
})?
172157
}
173-
} else if is_type!(obj_ptr, DATETIME_PTR) {
158+
seq.end()
159+
} else if is_type!(obj_ptr, DATETIME_TYPE) {
174160
let mut dt: SmallVec<[u8; 32]> = SmallVec::with_capacity(32);
175161
match write_datetime(self.ptr, self.opts, &mut dt) {
176162
Ok(_) => serializer.serialize_str(str_from_slice!(dt.as_ptr(), dt.len())),
177163
Err(DatetimeError::Library) => {
178164
err!("datetime's timezone library is not supported: use datetime.timezone.utc, pendulum, pytz, or dateutil")
179165
}
180166
}
181-
} else if is_type!(obj_ptr, DATE_PTR) {
182-
let mut dt: SmallVec<[u8; 32]> = SmallVec::with_capacity(32);
183-
write_date(self.ptr, &mut dt);
184-
serializer.serialize_str(str_from_slice!(dt.as_ptr(), dt.len()))
185-
} else if is_type!(obj_ptr, TIME_PTR) {
167+
} else if is_type!(obj_ptr, DATE_TYPE) {
168+
Date::new(self.ptr).serialize(serializer)
169+
} else if is_type!(obj_ptr, TIME_TYPE) {
186170
if unsafe { (*(self.ptr as *mut pyo3::ffi::PyDateTime_Time)).hastzinfo == 1 } {
187171
err!("datetime.time must not have tzinfo set")
188172
}
189-
let mut dt: SmallVec<[u8; 32]> = SmallVec::with_capacity(32);
190-
write_time(self.ptr, self.opts, &mut dt);
191-
serializer.serialize_str(str_from_slice!(dt.as_ptr(), dt.len()))
192-
} else if self.opts & SERIALIZE_UUID == SERIALIZE_UUID && is_type!(obj_ptr, UUID_PTR) {
173+
Time::new(self.ptr, self.opts).serialize(serializer)
174+
} else if self.opts & SERIALIZE_UUID == SERIALIZE_UUID && is_type!(obj_ptr, UUID_TYPE) {
193175
let mut buf: SmallVec<[u8; 36]> = SmallVec::with_capacity(36);
194176
write_uuid(self.ptr, &mut buf);
195177
serializer.serialize_str(str_from_slice!(buf.as_ptr(), buf.len()))
196178
} else {
197179
if self.opts & SERIALIZE_DATACLASS == SERIALIZE_DATACLASS
198180
&& ffi!(PyObject_HasAttr(self.ptr, DATACLASS_FIELDS_STR)) == 1
199181
{
182+
if unlikely!(self.recursion == RECURSION_LIMIT) {
183+
err!("Recursion limit reached")
184+
}
200185
let fields = ffi!(PyObject_GetAttr(self.ptr, DATACLASS_FIELDS_STR));
201186
ffi!(Py_DECREF(fields));
202187
let mut map = serializer.serialize_map(None).unwrap();
@@ -207,9 +192,6 @@ impl<'p> Serialize for SerializePyObject {
207192
while unsafe {
208193
pyo3::ffi::PyDict_Next(fields, &mut pos, &mut attr, &mut field) != 0
209194
} {
210-
if unlikely!(self.recursion == RECURSION_LIMIT) {
211-
err!("Recursion limit reached")
212-
}
213195
{
214196
let data = read_utf8_from_str(attr, &mut str_size);
215197
if unlikely!(data.is_null()) {

0 commit comments

Comments
 (0)