Skip to content

Commit 3d5cfc1

Browse files
committed
Reduce work in cache
1 parent b0a15b5 commit 3d5cfc1

6 files changed

Lines changed: 22 additions & 33 deletions

File tree

Cargo.lock

Lines changed: 6 additions & 15 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ ryu = { version = "1" }
6060
serde = { version = "1", default_features = false }
6161
serde_json = { path = "./json", default_features = false, features = ["std"] }
6262
smallvec = { version = "1", default_features = false, features = ["const_generics", "union", "specialization", "write"] }
63-
wyhash = { version = "0.4" }
63+
wy = { version = "1" }
6464

6565
[target.'cfg(not(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "i686", target_arch = "armv7")))'.dependencies]
6666
encoding_rs = { path = "./encoding_rs", default_features = false }

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ The input must be valid UTF-8.
590590

591591
orjson maintains a cache of map keys for the duration of the process. This
592592
causes a net reduction in memory usage by avoiding duplicate strings. The
593-
keys must be at most 64 chars to be cached and 512 entries are stored.
593+
keys must be at most 64 bytes to be cached and 512 entries are stored.
594594

595595
It raises `JSONDecodeError` if given an invalid type or invalid
596596
JSON. This includes if the input contains `NaN`, `Infinity`, or `-Infinity`,

src/deserialize/cache.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,25 @@ use associative_cache::*;
55
use once_cell::unsync::OnceCell;
66
use std::os::raw::c_void;
77

8-
#[derive(Clone)]
8+
#[repr(transparent)]
99
pub struct CachedKey {
1010
ptr: *mut c_void,
11-
hash: pyo3::ffi::Py_hash_t,
1211
}
1312

1413
unsafe impl Send for CachedKey {}
1514
unsafe impl Sync for CachedKey {}
1615

1716
impl CachedKey {
18-
pub fn new(ptr: *mut pyo3::ffi::PyObject, hash: pyo3::ffi::Py_hash_t) -> CachedKey {
17+
pub fn new(ptr: *mut pyo3::ffi::PyObject) -> CachedKey {
1918
CachedKey {
2019
ptr: ptr as *mut c_void,
21-
hash: hash,
2220
}
2321
}
2422

25-
pub fn get(&mut self) -> (*mut pyo3::ffi::PyObject, pyo3::ffi::Py_hash_t) {
23+
pub fn get(&mut self) -> *mut pyo3::ffi::PyObject {
2624
let ptr = self.ptr as *mut pyo3::ffi::PyObject;
2725
ffi!(Py_INCREF(ptr));
28-
(ptr, self.hash)
26+
ptr
2927
}
3028
}
3129

@@ -36,6 +34,6 @@ impl Drop for CachedKey {
3634
}
3735

3836
pub type KeyMap =
39-
AssociativeCache<u64, CachedKey, Capacity512, HashDirectMapped, RoundRobinReplacement>;
37+
AssociativeCache<u32, CachedKey, Capacity512, HashDirectMapped, RoundRobinReplacement>;
4038

4139
pub static mut KEY_MAP: OnceCell<KeyMap> = OnceCell::new();

src/deserialize/decode.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use smallvec::SmallVec;
1010
use std::borrow::Cow;
1111
use std::fmt;
1212
use std::ptr::NonNull;
13-
use wyhash::wyhash;
13+
use wy::hash32;
1414

1515
pub fn deserialize(
1616
ptr: *mut pyo3::ffi::PyObject,
@@ -167,8 +167,9 @@ impl<'de> Visitor<'de> for JsonValue {
167167
while let Some(key) = map.next_key::<Cow<str>>()? {
168168
let pykey: *mut pyo3::ffi::PyObject;
169169
let pyhash: pyo3::ffi::Py_hash_t;
170+
let value = map.next_value_seed(self)?;
170171
if likely!(key.len() <= 64) {
171-
let hash = unsafe { wyhash(key.as_bytes(), HASH_SEED) };
172+
let hash = unsafe { hash32(key.as_bytes(), HASH_SEED) };
172173
{
173174
let map = unsafe {
174175
KEY_MAP
@@ -179,18 +180,17 @@ impl<'de> Visitor<'de> for JsonValue {
179180
|| hash,
180181
|| {
181182
let pyob = unicode_from_str(&key);
182-
CachedKey::new(pyob, hash_str(pyob))
183+
hash_str(pyob);
184+
CachedKey::new(pyob)
183185
},
184186
);
185-
let tmp = entry.get();
186-
pykey = tmp.0;
187-
pyhash = tmp.1;
187+
pykey = entry.get();
188+
pyhash = unsafe { (*pykey.cast::<PyASCIIObject>()).hash }
188189
}
189190
} else {
190191
pykey = unicode_from_str(&key);
191192
pyhash = hash_str(pykey);
192193
}
193-
let value = map.next_value_seed(self)?;
194194
let _ = ffi!(_PyDict_SetItem_KnownHash(
195195
dict_ptr,
196196
pykey,

src/typeref.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ pub struct NumpyTypes {
1717
pub uint32: *mut PyTypeObject,
1818
pub uint8: *mut PyTypeObject,
1919
}
20-
pub static mut HASH_SEED: u64 = 0;
20+
pub static mut HASH_SEED: u32 = 0;
2121

2222
pub static mut NONE: *mut PyObject = 0 as *mut PyObject;
2323
pub static mut TRUE: *mut PyObject = 0 as *mut PyObject;
@@ -102,7 +102,7 @@ pub fn init_typerefs() {
102102
ARRAY_STRUCT_STR =
103103
pyo3::ffi::PyUnicode_InternFromString("__array_struct__\0".as_ptr() as *const c_char);
104104
VALUE_STR = pyo3::ffi::PyUnicode_InternFromString("value\0".as_ptr() as *const c_char);
105-
HASH_SEED = (VALUE_STR as u64).wrapping_mul(DICT_TYPE as u64);
105+
HASH_SEED = ((VALUE_STR as u64).wrapping_mul(DICT_TYPE as u64)) as u32;
106106
DEFAULT = PyUnicode_InternFromString("default\0".as_ptr() as *const c_char);
107107
OPTION = PyUnicode_InternFromString("option\0".as_ptr() as *const c_char);
108108
JsonEncodeError = pyo3::ffi::PyExc_TypeError;

0 commit comments

Comments
 (0)