Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.DS_Store

*.py[cod]

# Generated by Cargo
Expand Down
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,28 @@ You can install or upgrade `libipld` via
pip install -U libipld
```

### Performance

Benchmarks against [`cbrrr`](https://github.com/DavidBuchanan314/dag-cbrrr) (C) and [`dag_cbor`](https://github.com/hashberg-io/dag-cbor) (pure Python), measured on the four classic [nativejson-benchmark](https://github.com/miloyip/nativejson-benchmark) fixtures (round-tripped through DAG-CBOR). Bars are operations/second relative to pure-Python `dag_cbor`; higher is better.

Measured on Apple M1, macOS 15 (Darwin 24.6.0), CPython 3.14.0, `libipld` installed from PyPI (PGO + LTO wheel).

#### Deserialization

![deserialization](https://raw.githubusercontent.com/MarshalX/python-libipld/main/benchmark/deserialization.png)

#### Serialization

![serialization](https://raw.githubusercontent.com/MarshalX/python-libipld/main/benchmark/serialization.png)

Reproduce locally:

```bash
cd benchmark && ./run.sh
```

See [`benchmark/README.md`](./benchmark/README.md) for details.

### Contributing

Contributions of all sizes are welcome.
Expand Down
4 changes: 4 additions & 0 deletions benchmark/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
results.json
.benchmarks/
__pycache__/
.pytest_cache/
43 changes: 43 additions & 0 deletions benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# benchmark

DAG-CBOR encode/decode benchmark across Python implementations.

Compared:
- [`libipld`](https://github.com/MarshalX/python-libipld) (Rust)
- [`cbrrr`](https://github.com/DavidBuchanan314/dag-cbrrr) (C)
- [`py-ipld-dag`](https://github.com/ipld/py-ipld-dag) (Python wrapper over [`cbor2`](https://github.com/agronholm/cbor2) with `canonical=True`; cbor2 itself is Rust)
- [`dag_cbor`](https://github.com/hashberg-io/dag-cbor) (pure Python, used as the 1× baseline)

Fixtures: `canada.json`, `citm_catalog.json`, `github.json`, `twitter.json` (loaded from `../data/`, parsed once, then encoded to DAG-CBOR via `libipld` for the decode benchmarks).

## Run

```sh
./run.sh # encode + decode
./run.sh encode # encode only
./run.sh decode # decode only
```

Outputs `serialization.png` and `deserialization.png` next to `chart.py`. Raw history goes into `.benchmarks/` (pytest-benchmark autosave).

## Which `libipld` is measured?

By default, `requirements.txt` pulls the **published PGO-optimized wheel** from PyPI. This is what users actually get when they `pip install libipld`, so the charts reflect real-world performance.

To benchmark a locally-built PGO wheel instead, edit the `libipld` line in `requirements.txt`:

```
libipld @ file:///abs/path/to/libipld-*.whl
```

Building a local PGO wheel is out of scope here.

## Filter

Skip a slow library (e.g. pure-Python `dag_cbor` on `canada`):

```sh
uv run --with-requirements requirements.txt --with-editable .. \
pytest --benchmark-enable --benchmark-json=results.json -k "not dag_cbor"
uv run --with-requirements requirements.txt python chart.py results.json
```
104 changes: 104 additions & 0 deletions benchmark/chart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Render bar charts from pytest-benchmark JSON output.

Usage (run from this directory):
./run.sh # full pipeline
uv run --with-requirements requirements.txt python chart.py results.json
"""

import json
import sys
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from matplotlib.ticker import FuncFormatter

BASELINE = 'dag_cbor'
HUE_ORDER = ['libipld', 'cbrrr', 'py-ipld-dag', 'dag_cbor']


def load(path):
with open(path) as f:
data = json.load(f)

rows = []
for b in data['benchmarks']:
info = b['extra_info']
lib = info['lib']
ver = info.get('version')
rows.append(
{
'op': info['op'],
'fixture': info['fixture'],
'lib': lib,
'lib_label': f'{lib} {ver}' if ver else lib,
'ops_per_sec': 1.0 / b['stats']['mean'],
}
)

return pd.DataFrame(rows)


def add_relative(df, baseline):
out = []
for (_, _), group in df.groupby(['op', 'fixture']):
# baseline ops/sec for this (op, fixture); fall back to slowest lib if missing
if baseline in group['lib'].values:
base = group.loc[group['lib'] == baseline, 'ops_per_sec'].iloc[0]
else:
base = group['ops_per_sec'].min()

for _, r in group.iterrows():
out.append({**r.to_dict(), 'rel': r['ops_per_sec'] / base})

return pd.DataFrame(out)


def plot(df, op, baseline, title, outfile):
sub = df[df['op'] == op].copy()
if sub.empty:
print(f'skipping {outfile}: no {op} results')
return

# preserve canonical lib ordering, but resolve to versioned labels for the legend
label_for_lib = dict(zip(sub['lib'], sub['lib_label']))
labels_present = [label_for_lib[lib] for lib in HUE_ORDER if lib in label_for_lib]

sns.set_theme(style='darkgrid')
fig, ax = plt.subplots(figsize=(10, 7))
sns.barplot(
data=sub,
x='fixture',
y='rel',
hue='lib_label',
hue_order=labels_present,
ax=ax,
)

ax.axhline(1.0, color='gray', linestyle='--', linewidth=1)
ax.set_title(title)
ax.set_xlabel('Document')
ax.set_ylabel(f'Operations/second relative to {baseline}')
ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f'{int(y)}x'))
ax.legend(title='library')

fig.tight_layout()
fig.savefig(outfile, dpi=150)
print(f'wrote {outfile}')


def main():
path = Path(sys.argv[1] if len(sys.argv) > 1 else 'results.json')

df = load(path)
baseline = BASELINE if BASELINE in df['lib'].values else df['lib'].iloc[0]
df = add_relative(df, baseline)

here = Path(__file__).parent
plot(df, 'decode', baseline, 'deserialization', here / 'deserialization.png')
plot(df, 'encode', baseline, 'serialization', here / 'serialization.png')


if __name__ == '__main__':
main()
58 changes: 58 additions & 0 deletions benchmark/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import json
from importlib.metadata import version
from pathlib import Path

import pytest

import libipld

try:
import cbrrr
except ImportError:
cbrrr = None

try:
import dag_cbor
except ImportError:
dag_cbor = None

try:
from dag.codecs import dag_cbor as py_ipld_dag_cbor
except ImportError:
py_ipld_dag_cbor = None


FIXTURES = ['canada', 'citm_catalog', 'github', 'twitter']
DATA_DIR = Path(__file__).parent.parent / 'data'

DECODERS = {'libipld': libipld.decode_dag_cbor}
ENCODERS = {'libipld': libipld.encode_dag_cbor}
VERSIONS = {'libipld': version('libipld')}
if cbrrr is not None:
DECODERS['cbrrr'] = cbrrr.decode_dag_cbor
ENCODERS['cbrrr'] = cbrrr.encode_dag_cbor
VERSIONS['cbrrr'] = version('cbrrr')
if dag_cbor is not None:
DECODERS['dag_cbor'] = dag_cbor.decode
ENCODERS['dag_cbor'] = dag_cbor.encode
VERSIONS['dag_cbor'] = version('dag-cbor')
if py_ipld_dag_cbor is not None:
DECODERS['py-ipld-dag'] = py_ipld_dag_cbor.decode
ENCODERS['py-ipld-dag'] = py_ipld_dag_cbor.encode
VERSIONS['py-ipld-dag'] = version('py-ipld-dag')


@pytest.fixture(scope='session', params=FIXTURES)
def fixture_name(request):
return request.param


@pytest.fixture(scope='session')
def fixture_obj(fixture_name):
with open(DATA_DIR / f'{fixture_name}.json') as f:
return json.load(f)


@pytest.fixture(scope='session')
def fixture_bytes(fixture_obj):
return libipld.encode_dag_cbor(fixture_obj)
Binary file added benchmark/deserialization.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
15 changes: 15 additions & 0 deletions benchmark/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# benchmarks run against the published PGO build by default.
# to bench a local PGO wheel, replace the line below with:
# libipld @ file:///abs/path/to/libipld-*.whl
libipld

cbrrr>=1.0
dag-cbor>=0.3
py-ipld-dag

pytest>=8.0
pytest-benchmark>=4.0
pytest-random-order>=1.1
matplotlib>=3.8
seaborn>=0.13
pandas>=2.2
30 changes: 30 additions & 0 deletions benchmark/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
# Usage: ./run.sh # runs encode + decode
# ./run.sh encode # runs encode only
# ./run.sh decode # runs decode only

set -euo pipefail

cd "$(dirname "$0")"

TARGET="${1:-}"
if [[ -n "$TARGET" ]]; then
TEST_PATH="test_${TARGET}.py"
else
TEST_PATH=""
fi

uv run --no-project --with-requirements requirements.txt \
pytest \
--verbose \
--benchmark-enable \
--benchmark-min-time=1 \
--benchmark-max-time=5 \
--benchmark-disable-gc \
--benchmark-autosave \
--benchmark-save-data \
--benchmark-json=results.json \
--random-order \
${TEST_PATH}

uv run --no-project --with-requirements requirements.txt python chart.py results.json
Binary file added benchmark/serialization.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions benchmark/test_decode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pytest

from conftest import DECODERS, VERSIONS


@pytest.mark.parametrize('lib', list(DECODERS))
def test_decode(benchmark, lib, fixture_name, fixture_bytes):
benchmark.group = f'decode-{fixture_name}'
benchmark.extra_info['op'] = 'decode'
benchmark.extra_info['lib'] = lib
benchmark.extra_info['version'] = VERSIONS[lib]
benchmark.extra_info['fixture'] = fixture_name
benchmark(DECODERS[lib], fixture_bytes)
13 changes: 13 additions & 0 deletions benchmark/test_encode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pytest

from conftest import ENCODERS, VERSIONS


@pytest.mark.parametrize('lib', list(ENCODERS))
def test_encode(benchmark, lib, fixture_name, fixture_obj):
benchmark.group = f'encode-{fixture_name}'
benchmark.extra_info['op'] = 'encode'
benchmark.extra_info['lib'] = lib
benchmark.extra_info['version'] = VERSIONS[lib]
benchmark.extra_info['fixture'] = fixture_name
benchmark(ENCODERS[lib], fixture_obj)
10 changes: 9 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ classifiers = [
"Author" = "https://github.com/MarshalX"

[dependency-groups]
dev = ["maturin>=1.8.7,<2.0"]
dev = ["maturin>=1.8.7,<2.0", "ruff>=0.8"]
testing = [
{ include-group = "dev" },
'pytest==8.3.5; python_version == "3.8"',
Expand Down Expand Up @@ -74,6 +74,14 @@ module-name = "libipld._libipld"
bindings = "pyo3"
features = ["pyo3/extension-module"]

[tool.ruff.format]
quote-style = "single"

[tool.ruff.lint.flake8-quotes]
docstring-quotes = "double"
multiline-quotes = "double"
inline-quotes = "single"

[build-system]
requires = ["maturin>=1.8.7,<2.0"]
build-backend = "maturin"
Loading
Loading