From ee2735d36ba617da9beda7788200b54f701b503f Mon Sep 17 00:00:00 2001 From: "Ilya (Marshal)" Date: Mon, 25 May 2026 23:50:51 +0200 Subject: [PATCH 1/2] Optimize DAG-CBOR string decode with ASCII fast path --- src/lib.rs | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 00ec94d..e5ef292 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -168,6 +168,42 @@ fn collect_and_sort_map_entries<'py>( Ok(entries) } +// `PyUnicode_DecodeUTF8` runs a state machine even on pure-ASCII input. Skip +// it by allocating a compact-ASCII `PyUnicode` and memcpying into its inline +// buffer; non-ASCII falls through to the standard decoder. +#[cfg(CPython)] +#[inline] +fn pystring_from_bytes_fast<'py>( + py: Python<'py>, + bytes: &[u8], +) -> PyResult> { + if !bytes.is_ascii() { + return PyString::from_bytes(py, bytes); + } + + unsafe { + let obj = ffi::PyUnicode_New(bytes.len() as ffi::Py_ssize_t, 127); + if obj.is_null() { + return Err(PyErr::fetch(py)); + } + + let data = obj.cast::().offset(1).cast::(); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), data, bytes.len()); + *data.add(bytes.len()) = 0; + + Ok(Bound::from_owned_ptr(py, obj).cast_into_unchecked::()) + } +} + +#[cfg(not(CPython))] +#[inline] +fn pystring_from_bytes_fast<'py>( + py: Python<'py>, + bytes: &[u8], +) -> PyResult> { + PyString::from_bytes(py, bytes) +} + fn get_bytes_from_py_any<'py>(obj: &'py Bound<'py, PyAny>) -> PyResult<&'py [u8]> { if let Ok(b) = obj.cast::() { Ok(b.as_bytes()) @@ -222,8 +258,9 @@ where .into_pyobject(py)? .into(), major::STRING => { - // The UTF-8 validation is done when it's converted into a Python string - PyString::from_bytes( + // ASCII fast path inside the helper; non-ASCII falls through to + // `PyUnicode_DecodeUTF8`, which is where the spec validation lives. + pystring_from_bytes_fast( py, >::decode(r) .map_err(|_| anyhow!("Cannot decode as bytes"))? @@ -277,7 +314,7 @@ where } } - let key_py = PyString::from_bytes(py, key)?; + let key_py = pystring_from_bytes_fast(py, key)?; prev_key = Some(key); let value_py = decode_dag_cbor_to_pyobject(py, r, depth + 1)?; From 63a1ed3a7d2222307ae9ffd7ddc08b1b3649d185 Mon Sep 17 00:00:00 2001 From: "Ilya (Marshal)" Date: Tue, 26 May 2026 00:31:31 +0200 Subject: [PATCH 2/2] Fix fmt --- src/lib.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e5ef292..f67a404 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -173,10 +173,7 @@ fn collect_and_sort_map_entries<'py>( // buffer; non-ASCII falls through to the standard decoder. #[cfg(CPython)] #[inline] -fn pystring_from_bytes_fast<'py>( - py: Python<'py>, - bytes: &[u8], -) -> PyResult> { +fn pystring_from_bytes_fast<'py>(py: Python<'py>, bytes: &[u8]) -> PyResult> { if !bytes.is_ascii() { return PyString::from_bytes(py, bytes); } @@ -197,10 +194,7 @@ fn pystring_from_bytes_fast<'py>( #[cfg(not(CPython))] #[inline] -fn pystring_from_bytes_fast<'py>( - py: Python<'py>, - bytes: &[u8], -) -> PyResult> { +fn pystring_from_bytes_fast<'py>(py: Python<'py>, bytes: &[u8]) -> PyResult> { PyString::from_bytes(py, bytes) }