Skip to content

Commit be17cfd

Browse files
committed
Use pointer for interp->obmalloc state.
For interpreters that share state with the main interpreter, this points to the same static memory structure. For interpreters with their own obmalloc state, it is heap allocated. Add free_obmalloc_arenas() which will free the obmalloc arenas and radix tree structures for interpreters with their own obmalloc state.
1 parent daa658a commit be17cfd

File tree

8 files changed

+140
-24
lines changed

8 files changed

+140
-24
lines changed

Include/internal/pycore_interp.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,17 @@ struct _is {
166166
struct _warnings_runtime_state warnings;
167167
struct atexit_state atexit;
168168

169-
struct _obmalloc_state obmalloc;
169+
// Per-interpreter state for the obmalloc allocator. For the main
170+
// interpreter and for all interpreters that don't have their
171+
// own obmalloc state, this points to the static structure in
172+
// obmalloc.c obmalloc_state_main. For other interpreters, it is
173+
// heap allocated by _PyMem_init_obmalloc() and freed when the
174+
// interpreter structure is freed. In the case of a heap allocated
175+
// obmalloc state, it is not safe to hold on to or use memory after
176+
// the interpreter is freed. The obmalloc state corresponding to
177+
// that allocated memory is gone. See free_obmalloc_arenas() for
178+
// more comments.
179+
struct _obmalloc_state *obmalloc;
170180

171181
PyObject *audit_hooks;
172182
PyType_WatchCallback type_watchers[TYPE_MAX_WATCHERS];

Include/internal/pycore_obmalloc.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,15 @@ struct _obmalloc_state {
668668
#if WITH_PYMALLOC_RADIX_TREE
669669
struct _obmalloc_usage usage;
670670
#endif
671+
// true if the obmalloc state has been initialized. This must be done
672+
// before the malloc/free functions of obmalloc are called and must be
673+
// done only once per obmalloc state. The function _PyMem_init_obmalloc()
674+
// does the initialization.
675+
bool initialized;
676+
// true if this structure is heap allocated, by PyMem_RawCalloc(). For
677+
// the main interpreter, this structure is statically allocated (in the
678+
// BSS). Using the BSS gives some performance win.
679+
bool heap_allocated;
671680
};
672681

673682

Include/internal/pycore_obmalloc_init.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,6 @@ extern "C" {
5959
.dump_debug_stats = -1, \
6060
}
6161

62-
#define _obmalloc_state_INIT(obmalloc) \
63-
{ \
64-
.pools = { \
65-
.used = _obmalloc_pools_INIT(obmalloc.pools), \
66-
}, \
67-
}
68-
6962

7063
#ifdef __cplusplus
7164
}

Include/internal/pycore_runtime_init.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ extern PyTypeObject _PyExc_MemoryError;
151151
{ \
152152
.id_refcount = -1, \
153153
.imports = IMPORTS_INIT, \
154-
.obmalloc = _obmalloc_state_INIT(INTERP.obmalloc), \
154+
/* initialized by _PyMem_init_obmalloc() */ \
155+
.obmalloc = 0, \
155156
.ceval = { \
156157
.recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \
157158
}, \
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Make interp->obmalloc a pointer. For interpreters that share state with the
2+
main interpreter, this points to the same static memory structure. For
3+
interpreters with their own obmalloc state, it is heap allocated. Add
4+
free_obmalloc_arenas() which will free the obmalloc arenas and radix tree
5+
structures for interpreters with their own obmalloc state.

Objects/obmalloc.c

Lines changed: 89 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "pycore_pyerrors.h" // _Py_FatalErrorFormat()
88
#include "pycore_pymem.h"
99
#include "pycore_pystate.h" // _PyInterpreterState_GET
10+
#include "pycore_obmalloc_init.h"
1011

1112
#include <stdlib.h> // malloc()
1213
#include <stdbool.h>
@@ -967,6 +968,12 @@ static int running_on_valgrind = -1;
967968

968969
typedef struct _obmalloc_state OMState;
969970

971+
/* obmalloc state for main interpreter and shared by all interpreters without
972+
* their own obmalloc state. By not explicitly initalizing this structure, it
973+
* will be allocated in the BSS which is a small performance win. The radix
974+
* tree arrays are fairly large but are sparsely used. */
975+
static struct _obmalloc_state obmalloc_state_main;
976+
970977
static inline int
971978
has_own_state(PyInterpreterState *interp)
972979
{
@@ -979,10 +986,8 @@ static inline OMState *
979986
get_state(void)
980987
{
981988
PyInterpreterState *interp = _PyInterpreterState_GET();
982-
if (!has_own_state(interp)) {
983-
interp = _PyInterpreterState_Main();
984-
}
985-
return &interp->obmalloc;
989+
assert(interp->obmalloc != NULL); // otherwise not initialized or freed
990+
return interp->obmalloc;
986991
}
987992

988993
// These macros all rely on a local "state" variable.
@@ -1030,7 +1035,11 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
10301035
"the interpreter doesn't have its own allocator");
10311036
}
10321037
#endif
1033-
OMState *state = &interp->obmalloc;
1038+
OMState *state = interp->obmalloc;
1039+
1040+
if (state == NULL) {
1041+
return 0;
1042+
}
10341043

10351044
Py_ssize_t n = raw_allocated_blocks;
10361045
/* add up allocated blocks for used pools */
@@ -1052,6 +1061,8 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
10521061
return n;
10531062
}
10541063

1064+
static void free_obmalloc_arenas(PyInterpreterState *interp);
1065+
10551066
void
10561067
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
10571068
{
@@ -1060,10 +1071,20 @@ _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
10601071
return;
10611072
}
10621073
#endif
1063-
if (has_own_state(interp)) {
1074+
if (has_own_state(interp) && interp->obmalloc != NULL) {
10641075
Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
10651076
assert(has_own_state(interp) || leaked == 0);
10661077
interp->runtime->obmalloc.interpreter_leaks += leaked;
1078+
if (interp->obmalloc->heap_allocated && leaked == 0) {
1079+
// free the obmalloc arenas and radix tree nodes. If leaked > 0
1080+
// then some of the memory allocated by obmalloc has not been
1081+
// freed. It might be safe to free the arenas in that case but
1082+
// it's possible that extension modules are still using that
1083+
// memory. So, it is safer to not free and to leak. Perhaps there
1084+
// should be warning when this happens. It should be possible to
1085+
// use a tool like "-fsanitize=address" to track down these leaks.
1086+
free_obmalloc_arenas(interp);
1087+
}
10671088
}
10681089
}
10691090

@@ -2612,7 +2633,6 @@ _PyObject_DebugDumpAddress(const void *p)
26122633
_PyMem_DumpTraceback(fileno(stderr), p);
26132634
}
26142635

2615-
26162636
static size_t
26172637
printone(FILE *out, const char* msg, size_t value)
26182638
{
@@ -2663,9 +2683,71 @@ _PyDebugAllocatorStats(FILE *out,
26632683
(void)printone(out, buf2, num_blocks * sizeof_block);
26642684
}
26652685

2686+
int _PyMem_init_obmalloc(PyInterpreterState *interp, _PyRuntimeState *runtime)
2687+
{
2688+
#ifdef WITH_PYMALLOC
2689+
/* Initialize obmalloc, but only for subinterpreters,
2690+
since the main interpreter is initialized statically. */
2691+
if (interp == &runtime->_main_interpreter
2692+
|| (interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
2693+
interp->obmalloc = &obmalloc_state_main;
2694+
interp->obmalloc->heap_allocated = false;
2695+
} else {
2696+
interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
2697+
if (interp->obmalloc == NULL) {
2698+
return 0;
2699+
}
2700+
interp->obmalloc->heap_allocated = true;
2701+
}
2702+
if (!interp->obmalloc->initialized) {
2703+
// initialize the obmalloc->pools structure. This must be done
2704+
// before the obmalloc alloc/free functions can be called.
2705+
poolp temp[OBMALLOC_USED_POOLS_SIZE] =
2706+
_obmalloc_pools_INIT(interp->obmalloc->pools);
2707+
memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
2708+
interp->obmalloc->initialized = true;
2709+
}
2710+
#endif /* WITH_PYMALLOC */
2711+
return 1;
2712+
}
2713+
26662714

26672715
#ifdef WITH_PYMALLOC
26682716

2717+
static void
2718+
free_obmalloc_arenas(PyInterpreterState *interp)
2719+
{
2720+
OMState *state = interp->obmalloc;
2721+
for (uint i = 0; i < maxarenas; ++i) {
2722+
// free each obmalloc memory arena
2723+
struct arena_object *ao = &allarenas[i];
2724+
_PyObject_Arena.free(_PyObject_Arena.ctx,
2725+
(void *)ao->address, ARENA_SIZE);
2726+
}
2727+
// free the array containing pointers to all arenas
2728+
PyMem_RawFree(allarenas);
2729+
#if WITH_PYMALLOC_RADIX_TREE
2730+
#ifdef USE_INTERIOR_NODES
2731+
// Free the middle and bottom nodes of the radix tree. These are allocated
2732+
// by arena_map_mark_used() but not freed when arenas are freed.
2733+
for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
2734+
arena_map_mid_t *mid = arena_map_root.ptrs[i1];
2735+
if (mid == NULL) {
2736+
continue;
2737+
}
2738+
for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
2739+
arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
2740+
if (bot == NULL) {
2741+
continue;
2742+
}
2743+
PyMem_RawFree(bot);
2744+
}
2745+
PyMem_RawFree(mid);
2746+
}
2747+
#endif
2748+
#endif
2749+
}
2750+
26692751
#ifdef Py_DEBUG
26702752
/* Is target in the list? The list is traversed via the nextpool pointers.
26712753
* The list may be NULL-terminated, or circular. Return 1 if target is in

Python/pylifecycle.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,10 @@ init_interp_create_gil(PyThreadState *tstate, int gil)
606606
}
607607

608608

609+
// defined in obmalloc.c
610+
int _PyMem_init_obmalloc(PyInterpreterState *interp, _PyRuntimeState *runtime);
611+
612+
609613
static PyStatus
610614
pycore_create_interpreter(_PyRuntimeState *runtime,
611615
const PyConfig *src_config,
@@ -639,6 +643,13 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
639643
return status;
640644
}
641645

646+
// initialize the interp->obmalloc state. This must be done after
647+
// the settings are loaded (so that feature_flags are set) but before
648+
// any calls are made to obmalloc functions.
649+
if (!_PyMem_init_obmalloc(interp, runtime)) {
650+
return _PyStatus_NO_MEMORY();
651+
}
652+
642653
PyThreadState *tstate = _PyThreadState_New(interp,
643654
_PyThreadState_WHENCE_INTERP);
644655
if (tstate == NULL) {
@@ -2121,6 +2132,14 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config)
21212132
goto error;
21222133
}
21232134

2135+
// initialize the interp->obmalloc state. This must be done after
2136+
// the settings are loaded (so that feature_flags are set) but before
2137+
// any calls are made to obmalloc functions.
2138+
if (!_PyMem_init_obmalloc(interp, runtime)) {
2139+
status = _PyStatus_NO_MEMORY();
2140+
goto error;
2141+
}
2142+
21242143
tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_INTERP);
21252144
if (tstate == NULL) {
21262145
status = _PyStatus_NO_MEMORY();

Python/pystate.c

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,11 @@ free_interpreter(PyInterpreterState *interp)
547547
// The main interpreter is statically allocated so
548548
// should not be freed.
549549
if (interp != &_PyRuntime._main_interpreter) {
550+
if (interp->obmalloc && interp->obmalloc->heap_allocated) {
551+
// interpreter has its own obmalloc state, free it
552+
PyMem_RawFree(interp->obmalloc);
553+
interp->obmalloc = NULL;
554+
}
550555
PyMem_RawFree(interp);
551556
}
552557
}
@@ -589,14 +594,6 @@ init_interpreter(PyInterpreterState *interp,
589594
assert(next != NULL || (interp == runtime->interpreters.main));
590595
interp->next = next;
591596

592-
/* Initialize obmalloc, but only for subinterpreters,
593-
since the main interpreter is initialized statically. */
594-
if (interp != &runtime->_main_interpreter) {
595-
poolp temp[OBMALLOC_USED_POOLS_SIZE] = \
596-
_obmalloc_pools_INIT(interp->obmalloc.pools);
597-
memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp));
598-
}
599-
600597
PyStatus status = _PyObject_InitState(interp);
601598
if (_PyStatus_EXCEPTION(status)) {
602599
return status;

0 commit comments

Comments
 (0)