From 675a368a1748f32745a22064f10058b3d60f7a99 Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Fri, 19 Jun 2026 18:08:02 +0800 Subject: [PATCH 1/3] gh-151722: Fix data race on frozendict reads during construction in free-threading build len(), repr() and hash() on a frozendict read internal state without synchronization. That is safe for a finished frozendict but not while one is being constructed from a user mapping, where the half-built object is already observable from other threads. Read ma_used through GET_USED() and run the repr/hash entry-table walk inside a critical section, matching dict_repr / set_repr and set_len; the cached hash short-circuit stays lock-free like frozenset_hash. --- ...-06-19-10-03-42.gh-issue-151722.oTiOPF.rst | 4 ++ Objects/dictobject.c | 44 +++++++++++++------ 2 files changed, 35 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-10-03-42.gh-issue-151722.oTiOPF.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-10-03-42.gh-issue-151722.oTiOPF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-10-03-42.gh-issue-151722.oTiOPF.rst new file mode 100644 index 000000000000000..39a40ced72e1a3e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-10-03-42.gh-issue-151722.oTiOPF.rst @@ -0,0 +1,4 @@ +Fix a data race in the free-threading build where :func:`len`, :func:`repr` +and :func:`hash` on a :class:`frozendict` could observe internal state being +concurrently mutated while the ``frozendict`` was still being constructed from +a user mapping. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index ac2f210d023487d..f76949ffdd88671 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3698,7 +3698,7 @@ dict_length(PyObject *self) static Py_ssize_t frozendict_length(PyObject *self) { - return _PyAnyDict_CAST(self)->ma_used; + return GET_USED(_PyAnyDict_CAST(self)); } PyObject * @@ -4386,7 +4386,7 @@ copy_lock_held(PyObject *o, int as_frozendict) } mp = (PyDictObject *)o; - if (mp->ma_used == 0) { + if (GET_USED(mp) == 0) { /* The dict is empty; just return a new dict. */ if (as_frozendict) { return PyFrozenDict_New(NULL); @@ -8264,11 +8264,14 @@ static PyObject * frozendict_repr(PyObject *self) { PyDictObject *mp = _PyAnyDict_CAST(self); - if (mp->ma_used == 0) { + if (GET_USED(mp) == 0) { return PyUnicode_FromFormat("%s()", Py_TYPE(self)->tp_name); } - PyObject *repr = anydict_repr_impl(self); + PyObject *repr; + Py_BEGIN_CRITICAL_SECTION(self); + repr = anydict_repr_impl(self); + Py_END_CRITICAL_SECTION(); if (repr == NULL) { return NULL; } @@ -8322,14 +8325,9 @@ frozendict_pair_hash(Py_hash_t key_hash, PyObject *value) // Code copied from frozenset_hash() static Py_hash_t -frozendict_hash(PyObject *op) +frozendict_hash_lock_held(PyObject *op) { - PyFrozenDictObject *self = _PyFrozenDictObject_CAST(op); - Py_hash_t shash = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ma_hash); - if (shash != -1) { - return shash; - } - + ASSERT_DICT_LOCKED(op); PyDictObject *mp = _PyAnyDict_CAST(op); Py_uhash_t hash = 0; @@ -8345,7 +8343,7 @@ frozendict_hash(PyObject *op) } /* Factor in the number of active entries */ - hash ^= ((Py_uhash_t)mp->ma_used + 1) * 1927868237UL; + hash ^= ((Py_uhash_t)GET_USED(mp) + 1) * 1927868237UL; /* Disperse patterns arising in nested frozendicts */ hash ^= (hash >> 11) ^ (hash >> 25); @@ -8356,10 +8354,30 @@ frozendict_hash(PyObject *op) hash = 590923713UL; } - FT_ATOMIC_STORE_SSIZE_RELAXED(self->ma_hash, (Py_hash_t)hash); return (Py_hash_t)hash; } +static Py_hash_t +frozendict_hash(PyObject *op) +{ + PyFrozenDictObject *self = _PyFrozenDictObject_CAST(op); + Py_hash_t shash = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ma_hash); + if (shash != -1) { + return shash; + } + + Py_hash_t hash; + Py_BEGIN_CRITICAL_SECTION(op); + hash = frozendict_hash_lock_held(op); + Py_END_CRITICAL_SECTION(); + if (hash == -1) { + return -1; + } + + FT_ATOMIC_STORE_SSIZE_RELAXED(self->ma_hash, hash); + return hash; +} + static PyObject * frozendict_new(PyTypeObject *type, PyObject *args, PyObject *kwds) From 8a4795a173ed7f554af292910a8a7f9ed38bf826 Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Fri, 19 Jun 2026 19:04:14 +0800 Subject: [PATCH 2/3] gh-151722: Add free-threading regression test for frozendict read race --- .../test_free_threading/test_frozendict.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 Lib/test/test_free_threading/test_frozendict.py diff --git a/Lib/test/test_free_threading/test_frozendict.py b/Lib/test/test_free_threading/test_frozendict.py new file mode 100644 index 000000000000000..9698d86193d4c9e --- /dev/null +++ b/Lib/test/test_free_threading/test_frozendict.py @@ -0,0 +1,57 @@ +import gc +import unittest +from threading import Event, Thread + +from test.support import threading_helper + + +@threading_helper.requires_working_threading() +class TestFrozenDict(unittest.TestCase): + def test_racing_reads_during_construction(self): + # gh-151722: a frozendict is GC-tracked before it is fully + # populated, so a half-built instance is observable from other + # threads. Reading it with len()/repr()/hash() must not race + # with the construction-time table and ma_used writes. + NUM_KEYS = 8192 + NUM_ROUNDS = 40 + + latest = [frozendict()] # main -> reader handoff, never empty + done = Event() + + class Evil: + def keys(self): + return [f"k{i}" for i in range(NUM_KEYS)] + + def __getitem__(self, key): + if latest[0] is empty: + for obj in gc.get_objects(): + if (isinstance(obj, frozendict) + and 0 < len(obj) < NUM_KEYS): + latest[0] = obj # leak the half-built object + break + return 1 + + empty = latest[0] + + def reader(): + while not done.is_set(): + fd = latest[0] + len(fd) + repr(fd) + hash(fd) + + readers = [Thread(target=reader) for _ in range(3)] + for t in readers: + t.start() + try: + for _ in range(NUM_ROUNDS): + latest[0] = empty + frozendict(Evil()) + finally: + done.set() + for t in readers: + t.join() + + +if __name__ == "__main__": + unittest.main() From 47a49747f46962724560dbf9cd75e72d83c20c33 Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Fri, 19 Jun 2026 22:29:08 +0800 Subject: [PATCH 3/3] gh-151722: Move frozendict free-threading regression test into test_dict.py --- Lib/test/test_free_threading/test_dict.py | 47 ++++++++++++++- .../test_free_threading/test_frozendict.py | 57 ------------------- 2 files changed, 46 insertions(+), 58 deletions(-) delete mode 100644 Lib/test/test_free_threading/test_frozendict.py diff --git a/Lib/test/test_free_threading/test_dict.py b/Lib/test/test_free_threading/test_dict.py index ad23290a92ab345..5f98fb899fcc558 100644 --- a/Lib/test/test_free_threading/test_dict.py +++ b/Lib/test/test_free_threading/test_dict.py @@ -5,7 +5,7 @@ from ast import Or from functools import partial -from threading import Barrier, Thread +from threading import Barrier, Event, Thread from unittest import TestCase try: @@ -336,5 +336,50 @@ def reader(): with threading_helper.start_threads([t1, t2]): pass + def test_racing_frozendict_reads_during_construction(self): + # gh-151722: a frozendict is GC-tracked before it is fully + # populated, so a half-built instance is observable from other + # threads. Reading it with len()/repr()/hash() must not race + # with the construction-time table and ma_used writes. + NUM_KEYS = 8192 + NUM_ROUNDS = 40 + + latest = [frozendict()] # main -> reader handoff, never empty + done = Event() + + class Evil: + def keys(self): + return [f"k{i}" for i in range(NUM_KEYS)] + + def __getitem__(self, key): + if latest[0] is empty: + for obj in gc.get_objects(): + if (isinstance(obj, frozendict) + and 0 < len(obj) < NUM_KEYS): + latest[0] = obj # leak the half-built object + break + return 1 + + empty = latest[0] + + def reader(): + while not done.is_set(): + fd = latest[0] + len(fd) + repr(fd) + hash(fd) + + readers = [Thread(target=reader) for _ in range(3)] + for t in readers: + t.start() + try: + for _ in range(NUM_ROUNDS): + latest[0] = empty + frozendict(Evil()) + finally: + done.set() + for t in readers: + t.join() + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_free_threading/test_frozendict.py b/Lib/test/test_free_threading/test_frozendict.py deleted file mode 100644 index 9698d86193d4c9e..000000000000000 --- a/Lib/test/test_free_threading/test_frozendict.py +++ /dev/null @@ -1,57 +0,0 @@ -import gc -import unittest -from threading import Event, Thread - -from test.support import threading_helper - - -@threading_helper.requires_working_threading() -class TestFrozenDict(unittest.TestCase): - def test_racing_reads_during_construction(self): - # gh-151722: a frozendict is GC-tracked before it is fully - # populated, so a half-built instance is observable from other - # threads. Reading it with len()/repr()/hash() must not race - # with the construction-time table and ma_used writes. - NUM_KEYS = 8192 - NUM_ROUNDS = 40 - - latest = [frozendict()] # main -> reader handoff, never empty - done = Event() - - class Evil: - def keys(self): - return [f"k{i}" for i in range(NUM_KEYS)] - - def __getitem__(self, key): - if latest[0] is empty: - for obj in gc.get_objects(): - if (isinstance(obj, frozendict) - and 0 < len(obj) < NUM_KEYS): - latest[0] = obj # leak the half-built object - break - return 1 - - empty = latest[0] - - def reader(): - while not done.is_set(): - fd = latest[0] - len(fd) - repr(fd) - hash(fd) - - readers = [Thread(target=reader) for _ in range(3)] - for t in readers: - t.start() - try: - for _ in range(NUM_ROUNDS): - latest[0] = empty - frozendict(Evil()) - finally: - done.set() - for t in readers: - t.join() - - -if __name__ == "__main__": - unittest.main()