From ee2735d36ba617da9beda7788200b54f701b503f Mon Sep 17 00:00:00 2001 From: "Ilya (Marshal)" Date: Mon, 25 May 2026 23:50:51 +0200 Subject: [PATCH 1/3] Optimize DAG-CBOR string decode with ASCII fast path --- src/lib.rs | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 00ec94d..e5ef292 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -168,6 +168,42 @@ fn collect_and_sort_map_entries<'py>( Ok(entries) } +// `PyUnicode_DecodeUTF8` runs a state machine even on pure-ASCII input. Skip +// it by allocating a compact-ASCII `PyUnicode` and memcpying into its inline +// buffer; non-ASCII falls through to the standard decoder. +#[cfg(CPython)] +#[inline] +fn pystring_from_bytes_fast<'py>( + py: Python<'py>, + bytes: &[u8], +) -> PyResult> { + if !bytes.is_ascii() { + return PyString::from_bytes(py, bytes); + } + + unsafe { + let obj = ffi::PyUnicode_New(bytes.len() as ffi::Py_ssize_t, 127); + if obj.is_null() { + return Err(PyErr::fetch(py)); + } + + let data = obj.cast::().offset(1).cast::(); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), data, bytes.len()); + *data.add(bytes.len()) = 0; + + Ok(Bound::from_owned_ptr(py, obj).cast_into_unchecked::()) + } +} + +#[cfg(not(CPython))] +#[inline] +fn pystring_from_bytes_fast<'py>( + py: Python<'py>, + bytes: &[u8], +) -> PyResult> { + PyString::from_bytes(py, bytes) +} + fn get_bytes_from_py_any<'py>(obj: &'py Bound<'py, PyAny>) -> PyResult<&'py [u8]> { if let Ok(b) = obj.cast::() { Ok(b.as_bytes()) @@ -222,8 +258,9 @@ where .into_pyobject(py)? .into(), major::STRING => { - // The UTF-8 validation is done when it's converted into a Python string - PyString::from_bytes( + // ASCII fast path inside the helper; non-ASCII falls through to + // `PyUnicode_DecodeUTF8`, which is where the spec validation lives. + pystring_from_bytes_fast( py, >::decode(r) .map_err(|_| anyhow!("Cannot decode as bytes"))? @@ -277,7 +314,7 @@ where } } - let key_py = PyString::from_bytes(py, key)?; + let key_py = pystring_from_bytes_fast(py, key)?; prev_key = Some(key); let value_py = decode_dag_cbor_to_pyobject(py, r, depth + 1)?; From fa4ee74e5bda1b23a8bd9754de2e18507cfff965 Mon Sep 17 00:00:00 2001 From: "Ilya (Marshal)" Date: Tue, 26 May 2026 00:10:30 +0200 Subject: [PATCH 2/3] Optimize DAG-CBOR map decode with key-intern cache and KnownHash insert --- build.rs | 1 + src/lib.rs | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 146 insertions(+), 3 deletions(-) diff --git a/build.rs b/build.rs index 40d12b4..3050edf 100644 --- a/build.rs +++ b/build.rs @@ -1,4 +1,5 @@ fn main() { + pyo3_build_config::use_pyo3_cfgs(); if matches!( pyo3_build_config::get().implementation, pyo3_build_config::PythonImplementation::CPython diff --git a/src/lib.rs b/src/lib.rs index e5ef292..65a6cf4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,10 +10,16 @@ use cid::{multibase, Cid}; use pyo3::pybacked::PyBackedStr; use pyo3::{ffi, prelude::*, types::*, BoundObject, Python}; -// Private CPython symbol; not provided by pyo3-ffi and CPython-only. +// Private CPython symbols; not provided by pyo3-ffi and CPython-only. #[cfg(CPython)] extern "C" { fn _PyDict_NewPresized(minused: ffi::Py_ssize_t) -> *mut ffi::PyObject; + fn _PyDict_SetItem_KnownHash( + op: *mut ffi::PyObject, + key: *mut ffi::PyObject, + value: *mut ffi::PyObject, + hash: ffi::Py_hash_t, + ) -> std::os::raw::c_int; } // Empty CPython dicts already have 8 slots, so presizing below that buys @@ -204,6 +210,120 @@ fn pystring_from_bytes_fast<'py>( PyString::from_bytes(py, bytes) } +// Direct-mapped intern cache for short map keys. atproto-shape payloads +// reuse a small vocabulary (`$type`, `did`, `cid`, `uri`, `text`, ...) per +// record; caching the constructed `PyUnicode` + its `Py_hash_t` skips both +// the rebuild and the rehash inside `PyDict_SetItem` +#[cfg(all(CPython, not(Py_GIL_DISABLED)))] +mod key_cache { + use super::pystring_from_bytes_fast; + use pyo3::{ffi, prelude::*}; + + const CAP: usize = 2048; + const MAX_KEY_LEN: usize = 64; + + struct Entry { + len: u16, + bytes: [u8; MAX_KEY_LEN], + obj: *mut ffi::PyObject, + hash: ffi::Py_hash_t, + } + + impl Entry { + const fn empty() -> Self { + Self { + len: 0, + bytes: [0; MAX_KEY_LEN], + obj: std::ptr::null_mut(), + hash: 0, + } + } + } + + static mut SLOTS: [Entry; CAP] = [const { Entry::empty() }; CAP]; + + #[inline] + fn fx_hash(bytes: &[u8]) -> usize { + const K: u64 = 0x517c_c1b7_2722_0a95; + let mut h: u64 = 0; + for &b in bytes { + h = (h.rotate_left(5) ^ b as u64).wrapping_mul(K); + } + h as usize + } + + /// Returns `(strong-ref PyUnicode*, Py_hash_t)`. Caller owns one ref. + /// Caller must hold the GIL (we are always called from a `Python<'_>`). + #[inline] + pub(super) unsafe fn intern_key( + py: Python<'_>, + bytes: &[u8], + ) -> PyResult<(*mut ffi::PyObject, ffi::Py_hash_t)> { + if bytes.len() > MAX_KEY_LEN { + return build(py, bytes); + } + + let slot_idx = fx_hash(bytes) & (CAP - 1); + let slot = &mut *(&raw mut SLOTS[slot_idx]); + + if slot.len as usize == bytes.len() + && !slot.obj.is_null() + && slot.bytes[..bytes.len()] == *bytes + { + ffi::Py_INCREF(slot.obj); + return Ok((slot.obj, slot.hash)); + } + + let (obj, hash) = build(py, bytes)?; + // Evict the previous occupant before claiming the slot. + if !slot.obj.is_null() { + ffi::Py_DECREF(slot.obj); + } + // One ref for the cache, one for the caller. + ffi::Py_INCREF(obj); + slot.obj = obj; + slot.hash = hash; + slot.len = bytes.len() as u16; + slot.bytes[..bytes.len()].copy_from_slice(bytes); + Ok((obj, hash)) + } + + #[inline] + unsafe fn build( + py: Python<'_>, + bytes: &[u8], + ) -> PyResult<(*mut ffi::PyObject, ffi::Py_hash_t)> { + let s = pystring_from_bytes_fast(py, bytes)?; + let ptr = s.as_ptr(); + let hash = ffi::PyObject_Hash(ptr); + if hash == -1 { + return Err(PyErr::fetch(py)); + } + Ok((s.into_ptr(), hash)) + } +} + +// Non-CPython / free-threaded fallback: no cache, just build the string and compute its hash inline +#[cfg(not(all(CPython, not(Py_GIL_DISABLED))))] +mod key_cache { + use super::pystring_from_bytes_fast; + use pyo3::{ffi, prelude::*}; + + #[inline] + pub(super) unsafe fn intern_key( + py: Python<'_>, + bytes: &[u8], + ) -> PyResult<(*mut ffi::PyObject, ffi::Py_hash_t)> { + let s = pystring_from_bytes_fast(py, bytes)?; + let ptr = s.as_ptr(); + let hash = ffi::PyObject_Hash(ptr); + if hash == -1 { + return Err(PyErr::fetch(py)); + } + Ok((s.into_ptr(), hash)) + } +} + fn get_bytes_from_py_any<'py>(obj: &'py Bound<'py, PyAny>) -> PyResult<&'py [u8]> { if let Ok(b) = obj.cast::() { Ok(b.as_bytes()) @@ -314,11 +434,33 @@ where } } - let key_py = pystring_from_bytes_fast(py, key)?; prev_key = Some(key); + let (key_ptr, key_hash) = unsafe { key_cache::intern_key(py, key)? }; + let key_bound: Bound<'_, PyAny> = + unsafe { Bound::from_owned_ptr(py, key_ptr) }; + let value_py = decode_dag_cbor_to_pyobject(py, r, depth + 1)?; - dict.set_item(key_py, value_py)?; + + #[cfg(CPython)] + unsafe { + let value_ptr = value_py.into_ptr(); + let rc = _PyDict_SetItem_KnownHash( + dict.as_ptr(), + key_bound.as_ptr(), + value_ptr, + key_hash, + ); + ffi::Py_DECREF(value_ptr); + if rc != 0 { + return Err(anyhow!(PyErr::fetch(py))); + } + } + #[cfg(not(CPython))] + { + let _ = key_hash; + dict.set_item(&key_bound, value_py)?; + } } dict.into_pyobject(py)?.into() From f7b069462810ab24d84802d7fd4cc239b0476487 Mon Sep 17 00:00:00 2001 From: "Ilya (Marshal)" Date: Tue, 26 May 2026 00:40:39 +0200 Subject: [PATCH 3/3] suppress clippy --- src/lib.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d32f192..e55a1e1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -258,6 +258,10 @@ mod key_cache { } let slot_idx = fx_hash(bytes) & (CAP - 1); + // `&raw mut` is the supported path to a `static mut`; the explicit + // re-borrow keeps the field accesses readable. Clippy's `deref_addrof` + // suggestion would re-introduce `static_mut_refs`. + #[allow(clippy::deref_addrof)] let slot = &mut *(&raw mut SLOTS[slot_idx]); if slot.len as usize == bytes.len() @@ -431,8 +435,7 @@ where prev_key = Some(key); let (key_ptr, key_hash) = unsafe { key_cache::intern_key(py, key)? }; - let key_bound: Bound<'_, PyAny> = - unsafe { Bound::from_owned_ptr(py, key_ptr) }; + let key_bound: Bound<'_, PyAny> = unsafe { Bound::from_owned_ptr(py, key_ptr) }; let value_py = decode_dag_cbor_to_pyobject(py, r, depth + 1)?;