Bug report
Bug description:
In the free-threaded build, iterating an OrderedDict concurrently with update()/clear() data-races on the ordered-node linked list and od_state, and crashes with a SEGV (use-after-free) in the iterator.
OrderedDict keeps a doubly-linked list of _ODictNodes and an od_state counter separate from the dict.
The reader odictiter_new() (the body of tp_iter / __reversed__), reads the
- list head/tail (
_odict_FIRST/_odict_LAST(od)),
- the current node's key (
_odictnode_KEY(node)),
od->od_state
without holding the OrderedDict's critical section.
|
di->kind = kind; |
|
node = reversed ? _odict_LAST(od) : _odict_FIRST(od); |
|
di->di_current = node ? Py_NewRef(_odictnode_KEY(node)) : NULL; |
|
di->di_size = PyODict_SIZE(od); |
|
di->di_state = od->od_state; |
|
di->di_odict = (PyODictObject*)Py_NewRef(od); |
The same fields are written by _odict_add_new_node and _odict_add_tail (on update()) while holding the OrderedDict lock,
|
static void |
|
_odict_add_tail(PyODictObject *od, _ODictNode *node) |
|
{ |
|
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); |
|
_odictnode_PREV(node) = _odict_LAST(od); |
|
_odictnode_NEXT(node) = NULL; |
|
if (_odict_LAST(od) == NULL) |
|
_odict_FIRST(od) = node; |
|
else |
|
_odictnode_NEXT(_odict_LAST(od)) = node; |
|
_odict_LAST(od) = node; |
|
od->od_state++; |
|
} |
|
|
|
/* adds the node to the end of the list */ |
|
static int |
|
_odict_add_new_node(PyODictObject *od, PyObject *key, Py_hash_t hash) |
|
{ |
|
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); |
|
Py_ssize_t i; |
|
_ODictNode *node; |
|
|
|
Py_INCREF(key); |
|
i = _odict_get_index(od, key, hash); |
|
if (i < 0) { |
|
if (!PyErr_Occurred()) |
|
PyErr_SetObject(PyExc_KeyError, key); |
|
Py_DECREF(key); |
|
return -1; |
|
} |
|
assert(od->od_fast_nodes != NULL); |
|
if (od->od_fast_nodes[i] != NULL) { |
|
/* We already have a node for the key so there's no need to add one. */ |
|
Py_DECREF(key); |
|
return 0; |
|
} |
|
|
|
/* must not be added yet */ |
|
node = (_ODictNode *)PyMem_Malloc(sizeof(_ODictNode)); |
|
if (node == NULL) { |
|
Py_DECREF(key); |
|
PyErr_NoMemory(); |
|
return -1; |
|
} |
|
|
|
_odictnode_KEY(node) = key; |
|
_odictnode_HASH(node) = hash; |
|
_odict_add_tail(od, node); |
|
od->od_fast_nodes[i] = node; |
and freed by _odict_clear_nodes (on clear()) — which clears _odict_FIRST/_odict_LAST, bumps od_state, and deallocates every node:
|
static void |
|
_odict_clear_nodes(PyODictObject *od) |
|
{ |
|
_ODictNode *node, *next; |
|
|
|
PyMem_Free(od->od_fast_nodes); |
|
od->od_fast_nodes = NULL; |
|
od->od_fast_nodes_size = 0; |
|
od->od_resize_sentinel = NULL; |
|
|
|
node = _odict_FIRST(od); |
|
_odict_FIRST(od) = NULL; |
|
_odict_LAST(od) = NULL; |
|
while (node != NULL) { |
|
next = _odictnode_NEXT(node); |
|
_odictnode_DEALLOC(node); |
|
node = next; |
|
} |
|
od->od_state++; |
|
} |
Because the reader (odictiter_new) holds no lock, a concurrent clear() can free a node that it is dereferencing in _odictnode_KEY(node) → data race and use-after-free / SEGV.
Reproducer:
import collections
from threading import Thread
shared = collections.OrderedDict((i, i) for i in range(100))
def mutator():
for i in range(20000):
shared.clear()
shared.update(((i, i), (i + 1, i + 1), (i + 2, i + 2)))
def iterator():
for _ in range(20000):
try:
for _ in shared:
pass
list(reversed(shared))
except Exception:
pass
if __name__ == "__main__":
threads = [Thread(target=mutator)]
threads += [Thread(target=iterator) for _ in range(8)]
for t in threads: t.start()
for t in threads: t.join()
TSAN Report:
==================
WARNING: ThreadSanitizer: data race (pid=2281797)
Read of size 8 at 0x7bf61c3733c0 by thread T2:
#0 odictiter_new /cpython/Objects/odictobject.c:1945:12
#1 odict_iter /cpython/Objects/odictobject.c:1536:12
#2 PyObject_GetIter /cpython/Objects/abstract.c:2825:25
#3 _PyEval_GetIter /cpython/Python/ceval.c:1142:24
#4 _PyEval_EvalFrameDefault /cpython/Python/generated_cases.c.h:6595:38
...
Previous write of size 8 at 0x7bf61c3733c0 by thread T1:
#0 _odict_add_tail /cpython/Objects/odictobject.c
#1 _odict_add_new_node /cpython/Objects/odictobject.c:713:5
#2 _PyODict_SetItem_KnownHash_LockHeld /cpython/Objects/odictobject.c:1627:15
#3 PyODict_SetItem_LockHeld /cpython/Objects/odictobject.c:1647:12
#4 PyODict_SetItem /cpython/Objects/odictobject.c:1655:11
#5 odict_mp_ass_sub /cpython/Objects/odictobject.c:878:16
#6 PyObject_SetItem /cpython/Objects/abstract.c:245:19
#7 mutablemapping_add_pairs /cpython/Objects/odictobject.c:2251:15
#8 mutablemapping_update_arg /cpython/Objects/odictobject.c:2328:11
#9 mutablemapping_update /cpython/Objects/odictobject.c:2349:15
#10 method_vectorcall_VARARGS_KEYWORDS /cpython/Objects/descrobject.c:359:14
#11 _PyObject_VectorcallTstate /cpython/./Include/internal/pycore_call.h:144:11
#12 PyObject_Vectorcall /cpython/Objects/call.c:327:12
#13 _Py_VectorCall_StackRefSteal /cpython/Python/ceval.c:726:11
#14 _PyEval_EvalFrameDefault /cpython/Python/generated_cases.c.h:4362:35
...
SUMMARY: ThreadSanitizer: data race /cpython/Objects/odictobject.c:1945:12 in odictiter_new
==================
...
==================
WARNING: ThreadSanitizer: data race (pid=2282562)
Read of size 8 at 0x7fffb8120540 by thread T2:
#0 odictiter_new /cpython/Objects/odictobject.c:1946:29
#1 odict_reversed /cpython/Objects/odictobject.c:1305:12
#2 cfunction_vectorcall_NOARGS /cpython/Objects/methodobject.c:508:24
#3 _PyObject_VectorcallTstate /cpython/./Include/internal/pycore_call.h:144:11
#4 _PyObject_CallNoArgs /cpython/./Include/internal/pycore_call.h:160:12
#5 reversed_new_impl /cpython/Objects/enumobject.c:379:25
#6 reversed_vectorcall /cpython/Objects/enumobject.c:419:12
#7 _Py_CallBuiltinClass_StackRef /cpython/Python/ceval.c:901:11
#8 _PyEval_EvalFrameDefault /cpython/Python/generated_cases.c.h:2333:35
...
Previous write of size 8 at 0x7fffb8120540 by thread T1:
#0 _odict_add_new_node /cpython/Objects/odictobject.c:711:26
#1 _PyODict_SetItem_KnownHash_LockHeld /cpython/Objects/odictobject.c:1627:15
#2 PyODict_SetItem_LockHeld /cpython/Objects/odictobject.c:1647:12
#3 PyODict_SetItem /cpython/Objects/odictobject.c:1655:11
#4 odict_mp_ass_sub /cpython/Objects/odictobject.c:878:16
#5 PyObject_SetItem /cpython/Objects/abstract.c:245:19
#6 mutablemapping_add_pairs /cpython/Objects/odictobject.c:2251:15
#7 mutablemapping_update_arg /cpython/Objects/odictobject.c:2328:11
#8 mutablemapping_update /cpython/Objects/odictobject.c:2349:15
#9 method_vectorcall_VARARGS_KEYWORDS /cpython/Objects/descrobject.c:359:14
#10 _PyObject_VectorcallTstate /cpython/./Include/internal/pycore_call.h:144:11
#11 PyObject_Vectorcall /cpython/Objects/call.c:327:12
#12 _Py_VectorCall_StackRefSteal /cpython/Python/ceval.c:726:11
#13 _PyEval_EvalFrameDefault /cpython/Python/generated_cases.c.h:4362:35
...
SUMMARY: ThreadSanitizer: data race /cpython/Objects/odictobject.c:1946:29 in odictiter_new
==================
...
==================
WARNING: ThreadSanitizer: data race (pid=2281797)
Atomic write of size 8 at 0x7bf61e0e0190 by thread T3:
#0 _Py_atomic_add_ssize /cpython/./Include/cpython/pyatomic_gcc.h:63:10
#1 Py_INCREF /cpython/./Include/refcount.h:283:9
#2 _Py_NewRef /cpython/./Include/refcount.h:536:5
#3 odictiter_new /cpython/Objects/odictobject.c:1946:29
#4 odict_iter /cpython/Objects/odictobject.c:1536:12
#5 PyObject_GetIter /cpython/Objects/abstract.c:2825:25
#6 _PyEval_GetIter /cpython/Python/ceval.c:1142:24
#7 _PyEval_EvalFrameDefault /cpython/Python/generated_cases.c.h:6595:38
...
Previous write of size 8 at 0x7bf61e0e0190 by thread T1:
#0 _odict_add_tail /cpython/Objects/odictobject.c:671:27
#1 _odict_add_new_node /cpython/Objects/odictobject.c:713:5
#2 _PyODict_SetItem_KnownHash_LockHeld /cpython/Objects/odictobject.c:1627:15
#3 PyODict_SetItem_LockHeld /cpython/Objects/odictobject.c:1647:12
#4 PyODict_SetItem /cpython/Objects/odictobject.c:1655:11
#5 odict_mp_ass_sub /cpython/Objects/odictobject.c:878:16
#6 PyObject_SetItem /cpython/Objects/abstract.c:245:19
#7 mutablemapping_add_pairs /cpython/Objects/odictobject.c:2251:15
#8 mutablemapping_update_arg /cpython/Objects/odictobject.c:2328:11
#9 mutablemapping_update /cpython/Objects/odictobject.c:2349:15
#10 method_vectorcall_VARARGS_KEYWORDS /cpython/Objects/descrobject.c:359:14
#11 _PyObject_VectorcallTstate /cpython/./Include/internal/pycore_call.h:144:11
#12 PyObject_Vectorcall /cpython/Objects/call.c:327:12
#13 _Py_VectorCall_StackRefSteal /cpython/Python/ceval.c:726:11
#14 _PyEval_EvalFrameDefault /cpython/Python/generated_cases.c.h:4362:35
...
SUMMARY: ThreadSanitizer: data race /cpython/./Include/cpython/pyatomic_gcc.h:63:10 in _Py_atomic_add_ssize
==================
ThreadSanitizer:DEADLYSIGNAL
==2281797==ERROR: ThreadSanitizer: SEGV on unknown address 0x000000000059 (pc 0x000000000059 bp 0x7bf615ffd840 sp 0x7bf615ffd818 T2281801)
==2281797==Hint: pc points to the zero page.
==2281797==The signal is caused by a READ memory access.
==2281797==Hint: address points to the zero page.
ThreadSanitizer:DEADLYSIGNAL
ThreadSanitizer: nested bug in the same thread, aborting.
CPython versions tested on:
CPython main branch
Operating systems tested on:
Linux
Bug report
Bug description:
In the free-threaded build, iterating an
OrderedDictconcurrently withupdate()/clear()data-races on the ordered-node linked list andod_state, and crashes with a SEGV (use-after-free) in the iterator.OrderedDictkeeps a doubly-linked list of_ODictNodes and anod_statecounter separate from the dict.The reader
odictiter_new()(the body oftp_iter/__reversed__), reads the_odict_FIRST/_odict_LAST(od)),_odictnode_KEY(node)),od->od_statewithout holding the OrderedDict's critical section.
cpython/Objects/odictobject.c
Lines 1944 to 1949 in 9e863fa
The same fields are written by
_odict_add_new_nodeand_odict_add_tail(onupdate()) while holding the OrderedDict lock,cpython/Objects/odictobject.c
Lines 666 to 714 in 9e863fa
and freed by
_odict_clear_nodes(onclear()) — which clears_odict_FIRST/_odict_LAST, bumpsod_state, and deallocates every node:cpython/Objects/odictobject.c
Lines 794 to 813 in 9e863fa
Because the reader (
odictiter_new) holds no lock, a concurrentclear()can free a node that it is dereferencing in_odictnode_KEY(node)→ data race and use-after-free / SEGV.Reproducer:
TSAN Report:
CPython versions tested on:
CPython main branch
Operating systems tested on:
Linux