MarshalX · MarshalX · May 26, 2026 · May 26, 2026 · May 26, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+.DS_Store
+
 *.py[cod]
 
 # Generated by Cargo

diff --git a/README.md b/README.md
@@ -56,6 +56,28 @@ You can install or upgrade `libipld` via
 pip install -U libipld
 ```
 
+### Performance
+
+Benchmarks against [`cbrrr`](https://github.com/DavidBuchanan314/dag-cbrrr) (C) and [`dag_cbor`](https://github.com/hashberg-io/dag-cbor) (pure Python), measured on the four classic [nativejson-benchmark](https://github.com/miloyip/nativejson-benchmark) fixtures (round-tripped through DAG-CBOR). Bars are operations/second relative to pure-Python `dag_cbor`; higher is better.
+
+Measured on Apple M1, macOS 15 (Darwin 24.6.0), CPython 3.14.0, `libipld` installed from PyPI (PGO + LTO wheel).
+
+#### Deserialization
+
+![deserialization](https://raw.githubusercontent.com/MarshalX/python-libipld/main/benchmark/deserialization.png)
+
+#### Serialization
+
+![serialization](https://raw.githubusercontent.com/MarshalX/python-libipld/main/benchmark/serialization.png)
+
+Reproduce locally:
+
+```bash
+cd benchmark && ./run.sh
+```
+
+See [`benchmark/README.md`](./benchmark/README.md) for details.
+
 ### Contributing
 
 Contributions of all sizes are welcome.

diff --git a/benchmark/.gitignore b/benchmark/.gitignore
@@ -0,0 +1,4 @@
+results.json
+.benchmarks/
+__pycache__/
+.pytest_cache/
diff --git a/benchmark/README.md b/benchmark/README.md
@@ -0,0 +1,43 @@
+# benchmark
+
+DAG-CBOR encode/decode benchmark across Python implementations.
+
+Compared:
+- [`libipld`](https://github.com/MarshalX/python-libipld) (Rust)
+- [`cbrrr`](https://github.com/DavidBuchanan314/dag-cbrrr) (C)
+- [`py-ipld-dag`](https://github.com/ipld/py-ipld-dag) (Python wrapper over [`cbor2`](https://github.com/agronholm/cbor2) with `canonical=True`; cbor2 itself is Rust)
+- [`dag_cbor`](https://github.com/hashberg-io/dag-cbor) (pure Python, used as the 1× baseline)
+
+Fixtures: `canada.json`, `citm_catalog.json`, `github.json`, `twitter.json` (loaded from `../data/`, parsed once, then encoded to DAG-CBOR via `libipld` for the decode benchmarks).
+
+## Run
+
+```sh
+./run.sh           # encode + decode
+./run.sh encode    # encode only
+./run.sh decode    # decode only
+```
+
+Outputs `serialization.png` and `deserialization.png` next to `chart.py`. Raw history goes into `.benchmarks/` (pytest-benchmark autosave).
+
+## Which `libipld` is measured?
+
+By default, `requirements.txt` pulls the **published PGO-optimized wheel** from PyPI. This is what users actually get when they `pip install libipld`, so the charts reflect real-world performance.
+
+To benchmark a locally-built PGO wheel instead, edit the `libipld` line in `requirements.txt`:
+
+```
+libipld @ file:///abs/path/to/libipld-*.whl
+```
+
+Building a local PGO wheel is out of scope here.
+
+## Filter
+
+Skip a slow library (e.g. pure-Python `dag_cbor` on `canada`):
+
+```sh
+uv run --with-requirements requirements.txt --with-editable .. \
+    pytest --benchmark-enable --benchmark-json=results.json -k "not dag_cbor"
+uv run --with-requirements requirements.txt python chart.py results.json
+```
diff --git a/benchmark/chart.py b/benchmark/chart.py
@@ -0,0 +1,104 @@
+"""Render bar charts from pytest-benchmark JSON output.
+
+Usage (run from this directory):
+    ./run.sh                                                         # full pipeline
+    uv run --with-requirements requirements.txt python chart.py results.json
+"""
+
+import json
+import sys
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+from matplotlib.ticker import FuncFormatter
+
+BASELINE = 'dag_cbor'
+HUE_ORDER = ['libipld', 'cbrrr', 'py-ipld-dag', 'dag_cbor']
+
+
+def load(path):
+    with open(path) as f:
+        data = json.load(f)
+
+    rows = []
+    for b in data['benchmarks']:
+        info = b['extra_info']
+        lib = info['lib']
+        ver = info.get('version')
+        rows.append(
+            {
+                'op': info['op'],
+                'fixture': info['fixture'],
+                'lib': lib,
+                'lib_label': f'{lib} {ver}' if ver else lib,
+                'ops_per_sec': 1.0 / b['stats']['mean'],
+            }
+        )
+
+    return pd.DataFrame(rows)
+
+
+def add_relative(df, baseline):
+    out = []
+    for (_, _), group in df.groupby(['op', 'fixture']):
+        # baseline ops/sec for this (op, fixture); fall back to slowest lib if missing
+        if baseline in group['lib'].values:
+            base = group.loc[group['lib'] == baseline, 'ops_per_sec'].iloc[0]
+        else:
+            base = group['ops_per_sec'].min()
+
+        for _, r in group.iterrows():
+            out.append({**r.to_dict(), 'rel': r['ops_per_sec'] / base})
+
+    return pd.DataFrame(out)
+
+
+def plot(df, op, baseline, title, outfile):
+    sub = df[df['op'] == op].copy()
+    if sub.empty:
+        print(f'skipping {outfile}: no {op} results')
+        return
+
+    # preserve canonical lib ordering, but resolve to versioned labels for the legend
+    label_for_lib = dict(zip(sub['lib'], sub['lib_label']))
+    labels_present = [label_for_lib[lib] for lib in HUE_ORDER if lib in label_for_lib]
+
+    sns.set_theme(style='darkgrid')
+    fig, ax = plt.subplots(figsize=(10, 7))
+    sns.barplot(
+        data=sub,
+        x='fixture',
+        y='rel',
+        hue='lib_label',
+        hue_order=labels_present,
+        ax=ax,
+    )
+
+    ax.axhline(1.0, color='gray', linestyle='--', linewidth=1)
+    ax.set_title(title)
+    ax.set_xlabel('Document')
+    ax.set_ylabel(f'Operations/second relative to {baseline}')
+    ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f'{int(y)}x'))
+    ax.legend(title='library')
+
+    fig.tight_layout()
+    fig.savefig(outfile, dpi=150)
+    print(f'wrote {outfile}')
+
+
+def main():
+    path = Path(sys.argv[1] if len(sys.argv) > 1 else 'results.json')
+
+    df = load(path)
+    baseline = BASELINE if BASELINE in df['lib'].values else df['lib'].iloc[0]
+    df = add_relative(df, baseline)
+
+    here = Path(__file__).parent
+    plot(df, 'decode', baseline, 'deserialization', here / 'deserialization.png')
+    plot(df, 'encode', baseline, 'serialization', here / 'serialization.png')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/benchmark/conftest.py b/benchmark/conftest.py
@@ -0,0 +1,58 @@
+import json
+from importlib.metadata import version
+from pathlib import Path
+
+import pytest
+
+import libipld
+
+try:
+    import cbrrr
+except ImportError:
+    cbrrr = None
+
+try:
+    import dag_cbor
+except ImportError:
+    dag_cbor = None
+
+try:
+    from dag.codecs import dag_cbor as py_ipld_dag_cbor
+except ImportError:
+    py_ipld_dag_cbor = None
+
+
+FIXTURES = ['canada', 'citm_catalog', 'github', 'twitter']
+DATA_DIR = Path(__file__).parent.parent / 'data'
+
+DECODERS = {'libipld': libipld.decode_dag_cbor}
+ENCODERS = {'libipld': libipld.encode_dag_cbor}
+VERSIONS = {'libipld': version('libipld')}
+if cbrrr is not None:
+    DECODERS['cbrrr'] = cbrrr.decode_dag_cbor
+    ENCODERS['cbrrr'] = cbrrr.encode_dag_cbor
+    VERSIONS['cbrrr'] = version('cbrrr')
+if dag_cbor is not None:
+    DECODERS['dag_cbor'] = dag_cbor.decode
+    ENCODERS['dag_cbor'] = dag_cbor.encode
+    VERSIONS['dag_cbor'] = version('dag-cbor')
+if py_ipld_dag_cbor is not None:
+    DECODERS['py-ipld-dag'] = py_ipld_dag_cbor.decode
+    ENCODERS['py-ipld-dag'] = py_ipld_dag_cbor.encode
+    VERSIONS['py-ipld-dag'] = version('py-ipld-dag')
+
+
+@pytest.fixture(scope='session', params=FIXTURES)
+def fixture_name(request):
+    return request.param
+
+
+@pytest.fixture(scope='session')
+def fixture_obj(fixture_name):
+    with open(DATA_DIR / f'{fixture_name}.json') as f:
+        return json.load(f)
+
+
+@pytest.fixture(scope='session')
+def fixture_bytes(fixture_obj):
+    return libipld.encode_dag_cbor(fixture_obj)
diff --git a/benchmark/deserialization.png b/benchmark/deserialization.png
diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt
@@ -0,0 +1,15 @@
+# benchmarks run against the published PGO build by default.
+# to bench a local PGO wheel, replace the line below with:
+#   libipld @ file:///abs/path/to/libipld-*.whl
+libipld
+
+cbrrr>=1.0
+dag-cbor>=0.3
+py-ipld-dag
+
+pytest>=8.0
+pytest-benchmark>=4.0
+pytest-random-order>=1.1
+matplotlib>=3.8
+seaborn>=0.13
+pandas>=2.2
diff --git a/benchmark/run.sh b/benchmark/run.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+# Usage: ./run.sh           # runs encode + decode
+#        ./run.sh encode    # runs encode only
+#        ./run.sh decode    # runs decode only
+
+set -euo pipefail
+
+cd "$(dirname "$0")"
+
+TARGET="${1:-}"
+if [[ -n "$TARGET" ]]; then
+    TEST_PATH="test_${TARGET}.py"
+else
+    TEST_PATH=""
+fi
+
+uv run --no-project --with-requirements requirements.txt \
+    pytest \
+        --verbose \
+        --benchmark-enable \
+        --benchmark-min-time=1 \
+        --benchmark-max-time=5 \
+        --benchmark-disable-gc \
+        --benchmark-autosave \
+        --benchmark-save-data \
+        --benchmark-json=results.json \
+        --random-order \
+        ${TEST_PATH}
+
+uv run --no-project --with-requirements requirements.txt python chart.py results.json
diff --git a/benchmark/serialization.png b/benchmark/serialization.png
diff --git a/benchmark/test_decode.py b/benchmark/test_decode.py
@@ -0,0 +1,13 @@
+import pytest
+
+from conftest import DECODERS, VERSIONS
+
+
+@pytest.mark.parametrize('lib', list(DECODERS))
+def test_decode(benchmark, lib, fixture_name, fixture_bytes):
+    benchmark.group = f'decode-{fixture_name}'
+    benchmark.extra_info['op'] = 'decode'
+    benchmark.extra_info['lib'] = lib
+    benchmark.extra_info['version'] = VERSIONS[lib]
+    benchmark.extra_info['fixture'] = fixture_name
+    benchmark(DECODERS[lib], fixture_bytes)
diff --git a/benchmark/test_encode.py b/benchmark/test_encode.py
@@ -0,0 +1,13 @@
+import pytest
+
+from conftest import ENCODERS, VERSIONS
+
+
+@pytest.mark.parametrize('lib', list(ENCODERS))
+def test_encode(benchmark, lib, fixture_name, fixture_obj):
+    benchmark.group = f'encode-{fixture_name}'
+    benchmark.extra_info['op'] = 'encode'
+    benchmark.extra_info['lib'] = lib
+    benchmark.extra_info['version'] = VERSIONS[lib]
+    benchmark.extra_info['fixture'] = fixture_name
+    benchmark(ENCODERS[lib], fixture_obj)
diff --git a/pyproject.toml b/pyproject.toml
@@ -39,7 +39,7 @@ classifiers = [
 "Author" = "https://github.com/MarshalX"
 
 [dependency-groups]
-dev = ["maturin>=1.8.7,<2.0"]
+dev = ["maturin>=1.8.7,<2.0", "ruff>=0.8"]
 testing = [
     { include-group = "dev" },
     'pytest==8.3.5; python_version == "3.8"',
@@ -74,6 +74,14 @@ module-name = "libipld._libipld"
 bindings = "pyo3"
 features = ["pyo3/extension-module"]
 
+[tool.ruff.format]
+quote-style = "single"
+
+[tool.ruff.lint.flake8-quotes]
+docstring-quotes = "double"
+multiline-quotes = "double"
+inline-quotes = "single"
+
 [build-system]
 requires = ["maturin>=1.8.7,<2.0"]
 build-backend = "maturin"