Skip to content

Commit 12dfdb3

Browse files
authored
fix: pos in JSONDecodeError (#169)
1 parent 5e56bb2 commit 12dfdb3

7 files changed

Lines changed: 196 additions & 14 deletions

File tree

build.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ fn main() {
55
.args(&["-c", "import sys; print(sys.version_info[1])"])
66
.output()
77
.expect("python version did not print");
8-
let version = String::from_utf8_lossy(&out.stdout).trim().parse::<u8>()
8+
let version = String::from_utf8_lossy(&out.stdout)
9+
.trim()
10+
.parse::<u8>()
911
.expect("python version was not parsed");
1012
for each in 6..version {
1113
println!("cargo:rustc-cfg=python3{}", each);

src/deserialize/deserializer.rs

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
22

33
use crate::deserialize::cache::*;
4+
use crate::deserialize::DeserializeError;
45
use crate::exc::*;
56
use crate::ffi::*;
67
use crate::typeref::*;
@@ -14,14 +15,14 @@ use std::ptr::NonNull;
1415

1516
pub fn deserialize(
1617
ptr: *mut pyo3::ffi::PyObject,
17-
) -> std::result::Result<NonNull<pyo3::ffi::PyObject>, String> {
18+
) -> std::result::Result<NonNull<pyo3::ffi::PyObject>, DeserializeError<'static>> {
1819
let obj_type_ptr = ob_type!(ptr);
1920
let contents: &[u8];
2021
if is_type!(obj_type_ptr, STR_TYPE) {
2122
let mut str_size: pyo3::ffi::Py_ssize_t = 0;
2223
let uni = read_utf8_from_str(ptr, &mut str_size);
2324
if unlikely!(uni.is_null()) {
24-
return Err(INVALID_STR.to_string());
25+
return Err(DeserializeError::new(Cow::Borrowed(INVALID_STR), 0, 0, ""));
2526
}
2627
contents = unsafe { std::slice::from_raw_parts(uni, str_size as usize) };
2728
} else {
@@ -33,19 +34,29 @@ pub fn deserialize(
3334
} else if is_type!(obj_type_ptr, MEMORYVIEW_TYPE) {
3435
let membuf = unsafe { PyMemoryView_GET_BUFFER(ptr) };
3536
if unsafe { pyo3::ffi::PyBuffer_IsContiguous(membuf, b'C' as c_char) == 0 } {
36-
return Err("Input type memoryview must be a C contiguous buffer".to_string());
37+
return Err(DeserializeError::new(
38+
Cow::Borrowed("Input type memoryview must be a C contiguous buffer"),
39+
0,
40+
0,
41+
"",
42+
));
3743
}
3844
buffer = unsafe { (*membuf).buf as *const u8 };
3945
length = unsafe { (*membuf).len as usize };
4046
} else if is_type!(obj_type_ptr, BYTEARRAY_TYPE) {
4147
buffer = ffi!(PyByteArray_AsString(ptr)) as *const u8;
4248
length = ffi!(PyByteArray_Size(ptr)) as usize;
4349
} else {
44-
return Err("Input must be bytes, bytearray, memoryview, or str".to_string());
50+
return Err(DeserializeError::new(
51+
Cow::Borrowed("Input must be bytes, bytearray, memoryview, or str"),
52+
0,
53+
0,
54+
"",
55+
));
4556
}
4657
contents = unsafe { std::slice::from_raw_parts(buffer, length) };
4758
if encoding_rs::Encoding::utf8_valid_up_to(contents) != length {
48-
return Err(INVALID_STR.to_string());
59+
return Err(DeserializeError::new(Cow::Borrowed(INVALID_STR), 0, 0, ""));
4960
}
5061
}
5162

@@ -55,10 +66,17 @@ pub fn deserialize(
5566
let seed = JsonValue {};
5667
match seed.deserialize(&mut deserializer) {
5768
Ok(obj) => {
58-
deserializer.end().map_err(|e| e.to_string())?;
69+
deserializer.end().map_err(|e| {
70+
DeserializeError::new(Cow::Owned(e.to_string()), e.line(), e.column(), data)
71+
})?;
5972
Ok(obj)
6073
}
61-
Err(e) => Err(e.to_string()),
74+
Err(e) => Err(DeserializeError::new(
75+
Cow::Owned(e.to_string()),
76+
e.line(),
77+
e.column(),
78+
data,
79+
)),
6280
}
6381
}
6482

src/deserialize/error.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
2+
3+
use std::borrow::Cow;
4+
5+
#[derive(Debug, Clone)]
6+
pub struct DeserializeError<'a> {
7+
pub message: Cow<'a, str>,
8+
pub line: usize, // start at 1
9+
pub column: usize, // start at 1
10+
pub data: &'a str,
11+
}
12+
13+
impl<'a> DeserializeError<'a> {
14+
#[cold]
15+
pub fn new(message: Cow<'a, str>, line: usize, column: usize, data: &'a str) -> Self {
16+
DeserializeError {
17+
message,
18+
line,
19+
column,
20+
data,
21+
}
22+
}
23+
24+
/// Return position of the error in the deserialized data
25+
#[cold]
26+
pub fn pos(&self) -> usize {
27+
if self.line == 0 {
28+
return 1;
29+
}
30+
31+
self.data
32+
.split('\n')
33+
// take only the relevant lines
34+
.take(self.line)
35+
.enumerate()
36+
.map(|(idx, s)| {
37+
if idx == self.line - 1 {
38+
// Last line: only characters until the column of the error are relevant.
39+
// Note: Rust uses bytes whereas Python uses chars: we hence cannot
40+
// directly use the `column` field
41+
if self.column == 0 { return 0; }
42+
43+
let chars_count = s[..self.column - 1].chars().count();
44+
if chars_count == s.chars().count() - 1 {
45+
chars_count + 1
46+
} else {
47+
chars_count
48+
}
49+
} else {
50+
// Other lines
51+
s.chars().count()
52+
}
53+
})
54+
.sum::<usize>()
55+
// add missed '\n' characters
56+
+ (self.line - 1)
57+
}
58+
}

src/deserialize/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
mod cache;
44
mod deserializer;
5+
mod error;
56

67
pub use cache::KeyMap;
78
pub use cache::KEY_MAP;
89
pub use deserializer::deserialize;
10+
pub use error::DeserializeError;

src/lib.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,13 +159,16 @@ pub unsafe extern "C" fn PyInit_orjson() -> *mut PyObject {
159159

160160
#[cold]
161161
#[inline(never)]
162-
fn raise_loads_exception(msg: Cow<str>) -> *mut PyObject {
162+
fn raise_loads_exception(err: deserialize::DeserializeError) -> *mut PyObject {
163+
let pos = err.pos() as i64;
164+
let msg = err.message;
165+
let doc = err.data;
163166
unsafe {
164167
let err_msg =
165168
PyUnicode_FromStringAndSize(msg.as_ptr() as *const c_char, msg.len() as isize);
166169
let args = PyTuple_New(3);
167-
let doc = PyUnicode_New(0, 255);
168-
let pos = PyLong_FromLongLong(0);
170+
let doc = PyUnicode_FromStringAndSize(doc.as_ptr() as *const c_char, doc.len() as isize);
171+
let pos = PyLong_FromLongLong(pos);
169172
PyTuple_SET_ITEM(args, 0, err_msg);
170173
PyTuple_SET_ITEM(args, 1, doc);
171174
PyTuple_SET_ITEM(args, 2, pos);
@@ -191,7 +194,7 @@ fn raise_dumps_exception(msg: Cow<str>) -> *mut PyObject {
191194
pub unsafe extern "C" fn loads(_self: *mut PyObject, obj: *mut PyObject) -> *mut PyObject {
192195
match crate::deserialize::deserialize(obj) {
193196
Ok(val) => val.as_ptr(),
194-
Err(err) => raise_loads_exception(Cow::Owned(err)),
197+
Err(err) => raise_loads_exception(err),
195198
}
196199
}
197200

test/test_error.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
2+
3+
import json
4+
import unittest
5+
6+
import pytest
7+
8+
import orjson
9+
10+
ASCII_TEST = b"""\
11+
{
12+
"a": "qwe",
13+
"b": "qweqwe",
14+
"c": "qweq",
15+
"d: "qwe"
16+
}
17+
"""
18+
19+
MULTILINE_EMOJI = """[
20+
"😊",
21+
"a"
22+
"""
23+
24+
25+
class JsonDecodeErrorTests(unittest.TestCase):
26+
def _get_error_infos(self, json_decode_error_exc_info):
27+
return {
28+
k: v
29+
for k, v in json_decode_error_exc_info.value.__dict__.items()
30+
if k in ("pos", "lineno", "colno")
31+
}
32+
33+
def _test(self, data, expected_err_infos):
34+
with pytest.raises(json.decoder.JSONDecodeError) as json_exc_info:
35+
json.loads(data)
36+
37+
with pytest.raises(json.decoder.JSONDecodeError) as orjson_exc_info:
38+
orjson.loads(data)
39+
40+
assert (
41+
self._get_error_infos(json_exc_info)
42+
== self._get_error_infos(orjson_exc_info)
43+
== expected_err_infos
44+
)
45+
46+
def test_ascii(self):
47+
self._test(
48+
ASCII_TEST,
49+
{"pos": 55, "lineno": 5, "colno": 8},
50+
)
51+
52+
def test_latin1(self):
53+
self._test(
54+
"""["üýþÿ", "a" """,
55+
{"pos": 13, "lineno": 1, "colno": 14},
56+
)
57+
58+
def test_two_byte_str(self):
59+
self._test(
60+
"""["東京", "a" """,
61+
{"pos": 11, "lineno": 1, "colno": 12},
62+
)
63+
64+
def test_two_byte_bytes(self):
65+
self._test(
66+
b'["\xe6\x9d\xb1\xe4\xba\xac", "a" ',
67+
{"pos": 11, "lineno": 1, "colno": 12},
68+
)
69+
70+
def test_four_byte(self):
71+
self._test(
72+
MULTILINE_EMOJI,
73+
{"pos": 19, "lineno": 4, "colno": 1},
74+
)
75+
76+
def test_tab(self):
77+
# data/jsonchecker/fail26.json
78+
data = """["tab\ character\ in\ string\ "]"""
79+
80+
with pytest.raises(json.decoder.JSONDecodeError) as json_exc_info:
81+
json.loads(data)
82+
83+
assert self._get_error_infos(json_exc_info) == {
84+
"pos": 5,
85+
"lineno": 1,
86+
"colno": 6,
87+
}
88+
89+
with pytest.raises(json.decoder.JSONDecodeError) as orjson_exc_info:
90+
orjson.loads(data)
91+
92+
assert self._get_error_infos(orjson_exc_info) == {
93+
"pos": 6,
94+
"lineno": 1,
95+
"colno": 7,
96+
}

test/test_numpy.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,9 @@ def test_numpy_scalar_float64(self):
444444

445445
def test_numpy_bool(self):
446446
self.assertEqual(
447-
orjson.dumps({"a": numpy.bool_(True), "b": numpy.bool_(False)}, option=orjson.OPT_SERIALIZE_NUMPY),
448-
b'{"a":true,"b":false}'
447+
orjson.dumps(
448+
{"a": numpy.bool_(True), "b": numpy.bool_(False)},
449+
option=orjson.OPT_SERIALIZE_NUMPY,
450+
),
451+
b'{"a":true,"b":false}',
449452
)

0 commit comments

Comments
 (0)