-
-
Notifications
You must be signed in to change notification settings - Fork 34.1k
Description
Bug Summary
array.array.tofile(f) pre-computes the total byte count and block count from Py_SIZE(self) once, then iterates in a loop calling f.write() for each 64 KB block. Since f.write() executes arbitrary Python code, a custom file-like object can modify (clear, shrink, or reallocate) the array during the callback. On the next loop iteration, tofile() reads from a stale ob_item pointer, which may have been freed, set to NULL, or reallocated to a smaller buffer resulting in a use-after-free, NULL pointer dereference, or heap-buffer-overflow.
Root Cause
In Modules/arraymodule.c, function array_array_tofile_impl (lines 1662–1691):
static PyObject *
array_array_tofile_impl(arrayobject *self, PyTypeObject *cls, PyObject *f)
{
Py_ssize_t nbytes = Py_SIZE(self) * self->ob_descr->itemsize; // [1] cached once
int BLOCKSIZE = 64*1024;
Py_ssize_t nblocks = (nbytes + BLOCKSIZE - 1) / BLOCKSIZE; // [2] cached once
for (i = 0; i < nblocks; i++) {
char* ptr = self->ob_item + i*BLOCKSIZE; // [3] stale pointer
...
bytes = PyBytes_FromStringAndSize(ptr, size); // [4] read from stale ptr
...
res = PyObject_CallMethodOneArg(f, state->str_write, bytes); // [5] user code runs here
// f.write() can call del arr[:], arr.clear(), arr[:] = ..., etc.
// ob_item is freed/reallocated, but nblocks/nbytes are never updated
// next iteration: ptr points to freed memory
}
}Step-by-step:
nbytesandnblocksare computed fromPy_SIZE(self)at the start and stored in local variables. They are never rechecked.- The loop runs
nblockstimes. Each iteration reads directly fromself->ob_item. f.write()at line 1686 calls into user Python code. A malicious file-like object can modify the array here.- On the next iteration,
self->ob_itemmay be NULL (afterclear()), freed (after slice assignment), or reallocated to a smaller buffer (afterclear()+append()). PyBytes_FromStringAndSize(ptr, size)readssizebytes from the invalidptr=> crash.
Reproduction
Build CPython with ASAN
./configure --with-address-sanitizer --with-pydebug
make -j$(nproc)PoC 1: NULL dereference (clear => ob_item = NULL)
import array
class EvilWriter:
def __init__(self, arr):
self.arr = arr
self.triggered = False
def write(self, data):
if not self.triggered:
self.triggered = True
del self.arr[:] # frees ob_item, sets it to NULL
return len(data)
# Need >64KB so tofile uses multiple blocks
a = array.array('i', [0x41] * 40000) # ~160KB
a.tofile(EvilWriter(a))PoC 2: Use-after-free (slice assignment => realloc)
import array, gc
class UAFWriter:
def __init__(self, arr):
self.arr = arr
self.triggered = False
def write(self, data):
if not self.triggered:
self.triggered = True
self.arr[:] = array.array('i', [0xDEAD]) # frees old large buffer
gc.collect()
return len(data)
a = array.array('i', [0x41] * 40000)
a.tofile(UAFWriter(a))PoC 3: Heap-buffer-overflow (clear + append => tiny new buffer)
import array
class OOBWriter:
def __init__(self, arr):
self.arr = arr
self.triggered = False
def write(self, data):
if not self.triggered:
self.triggered = True
self.arr.clear() # frees buffer
self.arr.append(0xBEEF) # allocates ~16 byte buffer
# tofile still thinks there are ~160KB to read
return len(data)
a = array.array('i', [0x41] * 40000)
a.tofile(OOBWriter(a))ASAN Output
All three PoCs crash at the same location => arraymodule.c:1683:
AddressSanitizer:DEADLYSIGNAL
=================================================================
==715183==ERROR: AddressSanitizer: SEGV on unknown address 0x504000014c60 (pc 0x71a73bf9570d bp 0x7fff05ee2910 sp 0x7fff05ee28e8 T0)
==715183==The signal is caused by a READ memory access.
#0 0x71a73bf9570d in __memmove_avx_unaligned_erms ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:265
#1 0x569cfa6fcf3f in memcpy /usr/include/x86_64-linux-gnu/bits/string_fortified.h:29
#2 0x569cfa6fcf3f in PyBytes_FromStringAndSize Objects/bytesobject.c:156
#3 0x71a73b53dac7 in array_array_tofile_impl Modules/arraymodule.c:1683
#4 0x71a73b53dcb1 in array_array_tofile Modules/clinic/arraymodule.c.h:472
#5 0x569cfa72e951 in method_vectorcall_FASTCALL_KEYWORDS_METHOD Objects/descrobject.c:381
#6 0x569cfa70ecb6 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:136
#7 0x569cfa70eda9 in PyObject_Vectorcall Objects/call.c:327
#8 0x569cfa98b5cf in _Py_VectorCallInstrumentation_StackRefSteal Python/ceval.c:769
#9 0x569cfa99b56c in _PyEval_EvalFrameDefault Python/generated_cases.c.h:1817
#10 0x569cfa9d27b9 in _PyEval_EvalFrame Include/internal/pycore_ceval.h:118
#11 0x569cfa9d2b1f in _PyEval_Vector Python/ceval.c:2132
#12 0x569cfa9d2dd5 in PyEval_EvalCode Python/ceval.c:680
#13 0x569cfaad60f1 in run_eval_code_obj Python/pythonrun.c:1366
#14 0x569cfaad6437 in run_mod Python/pythonrun.c:1469
#15 0x569cfaad736c in pyrun_file Python/pythonrun.c:1294
#16 0x569cfaada1a2 in _PyRun_SimpleFileObject Python/pythonrun.c:518
#17 0x569cfaada44e in _PyRun_AnyFileObject Python/pythonrun.c:81- PoC 1:
SEGV on 0x10000=>NULL + 65536(oneBLOCKSIZEpast NULL) - PoC 2:
SEGV on 0x506000076c90=> freed heap region - PoC 3:
SEGV on 0x504000014820=> past the end of tiny new buffer
Suggested Fix
Re-check Py_SIZE(self) and self->ob_item on every iteration, after each write() call returns:
diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c
--- a/Modules/arraymodule.c
+++ b/Modules/arraymodule.c
@@ -1659,30 +1659,31 @@ static PyObject *
array_array_tofile_impl(arrayobject *self, PyTypeObject *cls, PyObject *f)
/*[clinic end generated code: output=4560c628d9c18bc2 input=5a24da7a7b407b52]*/
{
- Py_ssize_t nbytes = Py_SIZE(self) * self->ob_descr->itemsize;
- /* Write 64K blocks at a time */
- /* XXX Make the block size settable */
int BLOCKSIZE = 64*1024;
- Py_ssize_t nblocks = (nbytes + BLOCKSIZE - 1) / BLOCKSIZE;
Py_ssize_t i;
if (Py_SIZE(self) == 0)
goto done;
-
array_state *state = get_array_state_by_class(cls);
assert(state != NULL);
- for (i = 0; i < nblocks; i++) {
- char* ptr = self->ob_item + i*BLOCKSIZE;
- Py_ssize_t size = BLOCKSIZE;
- PyObject *bytes, *res;
+ /* Write 64K blocks at a time.
+ * Re-check Py_SIZE on every iteration because f.write() could
+ * execute arbitrary Python code that modifies or clears the array. */
+ for (i = 0; ; i++) {
+ Py_ssize_t nbytes = Py_SIZE(self) * self->ob_descr->itemsize;
+ Py_ssize_t offset = (Py_ssize_t)i * BLOCKSIZE;
+ if (offset >= nbytes)
+ break;
- if (i*BLOCKSIZE + size > nbytes)
- size = nbytes - i*BLOCKSIZE;
- bytes = PyBytes_FromStringAndSize(ptr, size);
+ Py_ssize_t size = BLOCKSIZE;
+ if (offset + size > nbytes)
+ size = nbytes - offset;
+
+ char *ptr = self->ob_item + offset;
+ PyObject *bytes = PyBytes_FromStringAndSize(ptr, size);
if (bytes == NULL)
return NULL;
- res = PyObject_CallMethodOneArg(f, state->str_write, bytes);
+ PyObject *res = PyObject_CallMethodOneArg(f, state->str_write, bytes);
Py_DECREF(bytes);
if (res == NULL)
return NULL;
Instead of computing nbytes/nblocks once before the loop, nbytes is recomputed from Py_SIZE(self) at the start of every iteration. If the array was shrunk or cleared by the write() callback, the loop sees the updated size and terminates safely.
CPython versions tested on:
CPython main branch
Operating systems tested on:
Linux