diff --git a/Lib/test/test_free_threading/test_dict.py b/Lib/test/test_free_threading/test_dict.py index ad23290a92ab345..5f98fb899fcc558 100644 --- a/Lib/test/test_free_threading/test_dict.py +++ b/Lib/test/test_free_threading/test_dict.py @@ -5,7 +5,7 @@ from ast import Or from functools import partial -from threading import Barrier, Thread +from threading import Barrier, Event, Thread from unittest import TestCase try: @@ -336,5 +336,50 @@ def reader(): with threading_helper.start_threads([t1, t2]): pass + def test_racing_frozendict_reads_during_construction(self): + # gh-151722: a frozendict is GC-tracked before it is fully + # populated, so a half-built instance is observable from other + # threads. Reading it with len()/repr()/hash() must not race + # with the construction-time table and ma_used writes. + NUM_KEYS = 8192 + NUM_ROUNDS = 40 + + latest = [frozendict()] # main -> reader handoff, never empty + done = Event() + + class Evil: + def keys(self): + return [f"k{i}" for i in range(NUM_KEYS)] + + def __getitem__(self, key): + if latest[0] is empty: + for obj in gc.get_objects(): + if (isinstance(obj, frozendict) + and 0 < len(obj) < NUM_KEYS): + latest[0] = obj # leak the half-built object + break + return 1 + + empty = latest[0] + + def reader(): + while not done.is_set(): + fd = latest[0] + len(fd) + repr(fd) + hash(fd) + + readers = [Thread(target=reader) for _ in range(3)] + for t in readers: + t.start() + try: + for _ in range(NUM_ROUNDS): + latest[0] = empty + frozendict(Evil()) + finally: + done.set() + for t in readers: + t.join() + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-10-03-42.gh-issue-151722.oTiOPF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-10-03-42.gh-issue-151722.oTiOPF.rst new file mode 100644 index 000000000000000..39a40ced72e1a3e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-10-03-42.gh-issue-151722.oTiOPF.rst @@ -0,0 +1,4 @@ +Fix a data race in the free-threading build where :func:`len`, :func:`repr` +and :func:`hash` on a :class:`frozendict` could observe internal state being +concurrently mutated while the ``frozendict`` was still being constructed from +a user mapping. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index ac2f210d023487d..f76949ffdd88671 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3698,7 +3698,7 @@ dict_length(PyObject *self) static Py_ssize_t frozendict_length(PyObject *self) { - return _PyAnyDict_CAST(self)->ma_used; + return GET_USED(_PyAnyDict_CAST(self)); } PyObject * @@ -4386,7 +4386,7 @@ copy_lock_held(PyObject *o, int as_frozendict) } mp = (PyDictObject *)o; - if (mp->ma_used == 0) { + if (GET_USED(mp) == 0) { /* The dict is empty; just return a new dict. */ if (as_frozendict) { return PyFrozenDict_New(NULL); @@ -8264,11 +8264,14 @@ static PyObject * frozendict_repr(PyObject *self) { PyDictObject *mp = _PyAnyDict_CAST(self); - if (mp->ma_used == 0) { + if (GET_USED(mp) == 0) { return PyUnicode_FromFormat("%s()", Py_TYPE(self)->tp_name); } - PyObject *repr = anydict_repr_impl(self); + PyObject *repr; + Py_BEGIN_CRITICAL_SECTION(self); + repr = anydict_repr_impl(self); + Py_END_CRITICAL_SECTION(); if (repr == NULL) { return NULL; } @@ -8322,14 +8325,9 @@ frozendict_pair_hash(Py_hash_t key_hash, PyObject *value) // Code copied from frozenset_hash() static Py_hash_t -frozendict_hash(PyObject *op) +frozendict_hash_lock_held(PyObject *op) { - PyFrozenDictObject *self = _PyFrozenDictObject_CAST(op); - Py_hash_t shash = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ma_hash); - if (shash != -1) { - return shash; - } - + ASSERT_DICT_LOCKED(op); PyDictObject *mp = _PyAnyDict_CAST(op); Py_uhash_t hash = 0; @@ -8345,7 +8343,7 @@ frozendict_hash(PyObject *op) } /* Factor in the number of active entries */ - hash ^= ((Py_uhash_t)mp->ma_used + 1) * 1927868237UL; + hash ^= ((Py_uhash_t)GET_USED(mp) + 1) * 1927868237UL; /* Disperse patterns arising in nested frozendicts */ hash ^= (hash >> 11) ^ (hash >> 25); @@ -8356,10 +8354,30 @@ frozendict_hash(PyObject *op) hash = 590923713UL; } - FT_ATOMIC_STORE_SSIZE_RELAXED(self->ma_hash, (Py_hash_t)hash); return (Py_hash_t)hash; } +static Py_hash_t +frozendict_hash(PyObject *op) +{ + PyFrozenDictObject *self = _PyFrozenDictObject_CAST(op); + Py_hash_t shash = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ma_hash); + if (shash != -1) { + return shash; + } + + Py_hash_t hash; + Py_BEGIN_CRITICAL_SECTION(op); + hash = frozendict_hash_lock_held(op); + Py_END_CRITICAL_SECTION(); + if (hash == -1) { + return -1; + } + + FT_ATOMIC_STORE_SSIZE_RELAXED(self->ma_hash, hash); + return hash; +} + static PyObject * frozendict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)