Skip to content

Commit b19cc9f

Browse files
committed
numpy u32, u64 support
1 parent c3feddd commit b19cc9f

3 files changed

Lines changed: 125 additions & 43 deletions

File tree

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -532,8 +532,9 @@ JSONEncodeError: Integer exceeds 53-bit range
532532
### numpy
533533

534534
orjson natively serializes `numpy.ndarray` instances. Arrays may have a
535-
`dtype` of `numpy.int32`, `numpy.int64`, `numpy.float32`, `numpy.float64`,
536-
or `numpy.bool`. orjson is faster than all compared libraries at serializing
535+
`dtype` of `numpy.bool`, `numpy.float32`, `numpy.float64`, `numpy.int32`,
536+
`numpy.int64`, `numpy.uint32`, `numpy.uint64`, `numpy.uintp`, or `numpy.intp`.
537+
orjson is faster than all compared libraries at serializing
537538
numpy instances. Serializing numpy data requires specifying
538539
`option=orjson.OPT_SERIALIZE_NUMPY`.
539540

src/array.rs

Lines changed: 82 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ pub enum ItemType {
4343
F64,
4444
I32,
4545
I64,
46+
U32,
47+
U64,
4648
}
4749

4850
pub enum PyArrayError {
@@ -51,6 +53,13 @@ pub enum PyArrayError {
5153
UnsupportedDataType,
5254
}
5355

56+
// >>> arr = numpy.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], numpy.int32)
57+
// >>> arr.ndim
58+
// 3
59+
// >>> arr.shape
60+
// (2, 2, 2)
61+
// >>> arr.strides
62+
// (16, 8, 4)
5463
pub struct PyArray {
5564
array: *mut PyArrayInterface,
5665
position: Vec<isize>,
@@ -60,7 +69,7 @@ pub struct PyArray {
6069
}
6170

6271
impl<'a> PyArray {
63-
#[inline(never)]
72+
#[cold]
6473
pub fn new(ptr: *mut PyObject) -> Result<Self, PyArrayError> {
6574
let capsule = ffi!(PyObject_GetAttr(ptr, ARRAY_STRUCT_STR));
6675
let array = unsafe { (*(capsule as *mut PyCapsule)).pointer as *mut PyArrayInterface };
@@ -82,64 +91,63 @@ impl<'a> PyArray {
8291
if pyarray.kind().is_none() {
8392
Err(PyArrayError::UnsupportedDataType)
8493
} else {
85-
pyarray.build();
94+
if pyarray.dimensions() > 1 {
95+
pyarray.build();
96+
}
8697
Ok(pyarray)
8798
}
8899
}
89100
}
90101

91-
fn from_parent(&self, position: Vec<isize>) -> Self {
102+
fn from_parent(&self, position: Vec<isize>, num_children: usize) -> Self {
92103
let mut arr = PyArray {
93104
array: self.array,
94105
position: position,
95-
children: Vec::new(),
106+
children: Vec::with_capacity(num_children),
96107
depth: self.depth + 1,
97108
capsule: self.capsule,
98109
};
99110
arr.build();
100111
arr
101112
}
102113

103-
// iterator()?
104-
fn build(&mut self) {
105-
// >>> arr = numpy.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], numpy.int32)
106-
// >>> arr.ndim
107-
// 3
108-
// >>> arr.shape
109-
// (2, 2, 2)
110-
// >>> arr.strides
111-
// (16, 8, 4)
112-
if self.dimensions() > 1 && self.depth < self.dimensions() - 1 {
113-
for i in 0..=self.shape()[self.depth] - 1 {
114-
let mut position: Vec<isize> = self.position.iter().copied().collect();
115-
position[self.depth] = i;
116-
self.children.push(self.from_parent(position))
117-
}
118-
}
119-
}
120-
121114
fn kind(&self) -> Option<ItemType> {
122115
match unsafe { ((*self.array).typekind, (*self.array).itemsize) } {
123116
(098, 1) => Some(ItemType::BOOL),
124117
(102, 4) => Some(ItemType::F32),
125118
(102, 8) => Some(ItemType::F64),
126119
(105, 4) => Some(ItemType::I32),
127120
(105, 8) => Some(ItemType::I64),
121+
(117, 4) => Some(ItemType::U32),
122+
(117, 8) => Some(ItemType::U64),
128123
_ => None,
129124
}
130125
}
131126

127+
fn build(&mut self) {
128+
if self.depth < self.dimensions() - 1 {
129+
for i in 0..=self.shape()[self.depth] - 1 {
130+
let mut position: Vec<isize> = self.position.iter().copied().collect();
131+
position[self.depth] = i;
132+
let num_children: usize;
133+
if self.depth < self.dimensions() - 2 {
134+
num_children = self.shape()[self.depth + 1] as usize;
135+
} else {
136+
num_children = 0;
137+
}
138+
self.children.push(self.from_parent(position, num_children))
139+
}
140+
}
141+
}
142+
132143
fn data(&self) -> *mut c_void {
133-
let mut offset = self
144+
let offset = self
134145
.strides()
135146
.iter()
136147
.zip(self.position.iter().copied())
137148
.take(self.depth)
138149
.map(|(a, b)| a * b)
139150
.sum::<isize>();
140-
if self.depth != self.dimensions() - 1 {
141-
offset += self.position[self.depth] * self.strides()[self.depth];
142-
}
143151
unsafe { (*self.array).data.offset(offset) }
144152
}
145153

@@ -169,7 +177,6 @@ impl Drop for PyArray {
169177
}
170178

171179
impl<'p> Serialize for PyArray {
172-
#[inline(never)]
173180
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
174181
where
175182
S: Serializer,
@@ -207,6 +214,18 @@ impl<'p> Serialize for PyArray {
207214
seq.serialize_element(&DataTypeI32 { obj: each }).unwrap();
208215
}
209216
}
217+
ItemType::U64 => {
218+
let slice: &[u64] = slice!(data_ptr as *const u64, num_items);
219+
for &each in slice.iter() {
220+
seq.serialize_element(&DataTypeU64 { obj: each }).unwrap();
221+
}
222+
}
223+
ItemType::U32 => {
224+
let slice: &[u32] = slice!(data_ptr as *const u32, num_items);
225+
for &each in slice.iter() {
226+
seq.serialize_element(&DataTypeU32 { obj: each }).unwrap();
227+
}
228+
}
210229
ItemType::BOOL => {
211230
let slice: &[u8] = slice!(data_ptr as *const u8, num_items);
212231
for &each in slice.iter() {
@@ -219,6 +238,20 @@ impl<'p> Serialize for PyArray {
219238
}
220239
}
221240

241+
#[repr(transparent)]
242+
struct DataTypeF32 {
243+
pub obj: f32,
244+
}
245+
246+
impl<'p> Serialize for DataTypeF32 {
247+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
248+
where
249+
S: Serializer,
250+
{
251+
serializer.serialize_f32(self.obj)
252+
}
253+
}
254+
222255
#[repr(transparent)]
223256
struct DataTypeF64 {
224257
pub obj: f64,
@@ -234,16 +267,16 @@ impl<'p> Serialize for DataTypeF64 {
234267
}
235268

236269
#[repr(transparent)]
237-
struct DataTypeF32 {
238-
pub obj: f32,
270+
struct DataTypeI32 {
271+
pub obj: i32,
239272
}
240273

241-
impl<'p> Serialize for DataTypeF32 {
274+
impl<'p> Serialize for DataTypeI32 {
242275
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
243276
where
244277
S: Serializer,
245278
{
246-
serializer.serialize_f32(self.obj)
279+
serializer.serialize_i32(self.obj)
247280
}
248281
}
249282

@@ -262,16 +295,30 @@ impl<'p> Serialize for DataTypeI64 {
262295
}
263296

264297
#[repr(transparent)]
265-
struct DataTypeI32 {
266-
pub obj: i32,
298+
struct DataTypeU32 {
299+
pub obj: u32,
267300
}
268301

269-
impl<'p> Serialize for DataTypeI32 {
302+
impl<'p> Serialize for DataTypeU32 {
270303
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
271304
where
272305
S: Serializer,
273306
{
274-
serializer.serialize_i32(self.obj)
307+
serializer.serialize_u32(self.obj)
308+
}
309+
}
310+
311+
#[repr(transparent)]
312+
struct DataTypeU64 {
313+
pub obj: u64,
314+
}
315+
316+
impl<'p> Serialize for DataTypeU64 {
317+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
318+
where
319+
S: Serializer,
320+
{
321+
serializer.serialize_u64(self.obj)
275322
}
276323
}
277324

test/test_numpy.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,58 @@ def numpy_default(obj):
1717

1818
@pytest.mark.skipif(numpy is None, reason="numpy is not installed")
1919
class NumpyTests(unittest.TestCase):
20+
def test_numpy_array_d1_uintp(self):
21+
self.assertEqual(
22+
orjson.dumps(
23+
numpy.array([0, 18446744073709551615], numpy.uintp),
24+
option=orjson.OPT_SERIALIZE_NUMPY,
25+
),
26+
b"[0,18446744073709551615]",
27+
)
28+
29+
def test_numpy_array_d1_intp(self):
30+
self.assertEqual(
31+
orjson.dumps(
32+
numpy.array([-9223372036854775807, 9223372036854775807], numpy.intp),
33+
option=orjson.OPT_SERIALIZE_NUMPY,
34+
),
35+
b"[-9223372036854775807,9223372036854775807]",
36+
)
37+
2038
def test_numpy_array_d1_i64(self):
2139
self.assertEqual(
2240
orjson.dumps(
23-
numpy.array(
24-
[-9223372036854775807, 0, 9223372036854775807], numpy.int64
25-
),
41+
numpy.array([-9223372036854775807, 9223372036854775807], numpy.int64),
42+
option=orjson.OPT_SERIALIZE_NUMPY,
43+
),
44+
b"[-9223372036854775807,9223372036854775807]",
45+
)
46+
47+
def test_numpy_array_d1_u64(self):
48+
self.assertEqual(
49+
orjson.dumps(
50+
numpy.array([0, 18446744073709551615], numpy.uint64),
2651
option=orjson.OPT_SERIALIZE_NUMPY,
2752
),
28-
b"[-9223372036854775807,0,9223372036854775807]",
53+
b"[0,18446744073709551615]",
2954
)
3055

3156
def test_numpy_array_d1_i32(self):
3257
self.assertEqual(
3358
orjson.dumps(
34-
numpy.array([-2147483647, 0, 2147483647], numpy.int32),
59+
numpy.array([-2147483647, 2147483647], numpy.int32),
60+
option=orjson.OPT_SERIALIZE_NUMPY,
61+
),
62+
b"[-2147483647,2147483647]",
63+
)
64+
65+
def test_numpy_array_d1_u32(self):
66+
self.assertEqual(
67+
orjson.dumps(
68+
numpy.array([0, 4294967295], numpy.uint32),
3569
option=orjson.OPT_SERIALIZE_NUMPY,
3670
),
37-
b"[-2147483647,0,2147483647]",
71+
b"[0,4294967295]",
3872
)
3973

4074
def test_numpy_array_d1_f32(self):

0 commit comments

Comments
 (0)