aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Evans <jasone@canonware.com>2014-10-07 23:14:57 -0700
committerJason Evans <jasone@canonware.com>2014-10-07 23:14:57 -0700
commit8bb3198f72fc7587dc93527f9f19fb5be52fa553 (patch)
tree5a530b077511daf9afe60be7280907937924f52f
parentbf40641c5c9496d2912ad9ff2c38ee9ce2bfbde6 (diff)
downloadplatform_external_jemalloc_new-8bb3198f72fc7587dc93527f9f19fb5be52fa553.tar.gz
platform_external_jemalloc_new-8bb3198f72fc7587dc93527f9f19fb5be52fa553.tar.bz2
platform_external_jemalloc_new-8bb3198f72fc7587dc93527f9f19fb5be52fa553.zip
Refactor/fix arenas manipulation.
Abstract arenas access to use arena_get() (or a0get() where appropriate) rather than directly reading e.g. arenas[ind]. Prior to the addition of the arenas.extend mallctl, the worst possible outcome of directly accessing arenas was a stale read, but arenas.extend may allocate and assign a new array to arenas. Add a tsd-based arenas_cache, which amortizes arenas reads. This introduces some subtle bootstrapping issues, with tsd_boot() now being split into tsd_boot[01]() to support tsd wrapper allocation bootstrapping, as well as an arenas_cache_bypass tsd variable which dynamically terminates allocation of arenas_cache itself. Promote a0malloc(), a0calloc(), and a0free() to be generally useful for internal allocation, and use them in several places (more may be appropriate). Abstract arena->nthreads management and fix a missing decrement during thread destruction (recent tsd refactoring left arenas_cleanup() unused). Change arena_choose() to propagate OOM, and handle OOM in all callers. This is important for providing consistent allocation behavior when the MALLOCX_ARENA() flag is being used. Prior to this fix, it was possible for an OOM to result in allocation silently allocating from a different arena than the one specified.
-rw-r--r--include/jemalloc/internal/arena.h14
-rw-r--r--include/jemalloc/internal/jemalloc_internal.h.in90
-rw-r--r--include/jemalloc/internal/private_symbols.txt28
-rw-r--r--include/jemalloc/internal/tcache.h1
-rw-r--r--include/jemalloc/internal/tsd.h239
-rw-r--r--src/arena.c30
-rw-r--r--src/chunk.c10
-rw-r--r--src/ctl.c119
-rw-r--r--src/huge.c6
-rw-r--r--src/jemalloc.c516
-rw-r--r--src/tcache.c14
-rw-r--r--src/tsd.c19
-rw-r--r--test/unit/tsd.c1
13 files changed, 740 insertions, 347 deletions
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 681b5802..894ce9af 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -389,7 +389,7 @@ bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive,
size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
malloc_large_stats_t *lstats);
-bool arena_new(arena_t *arena, unsigned ind);
+arena_t *arena_new(unsigned ind);
void arena_boot(void);
void arena_prefork(arena_t *arena);
void arena_postfork_parent(arena_t *arena);
@@ -924,8 +924,10 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
true)) != NULL))
return (tcache_alloc_small(tcache, size, zero));
else {
- return (arena_malloc_small(choose_arena(tsd, arena),
- size, zero));
+ arena = arena_choose(tsd, arena);
+ if (unlikely(arena == NULL))
+ return (NULL);
+ return (arena_malloc_small(arena, size, zero));
}
} else {
/*
@@ -936,8 +938,10 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
tcache_get(tsd, true)) != NULL))
return (tcache_alloc_large(tcache, size, zero));
else {
- return (arena_malloc_large(choose_arena(tsd, arena),
- size, zero));
+ arena = arena_choose(tsd, arena);
+ if (unlikely(arena == NULL))
+ return (NULL);
+ return (arena_malloc_large(arena, size, zero));
}
}
}
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 8f0beb9e..c7a5fd8a 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -386,20 +386,6 @@ extern bool in_valgrind;
/* Number of CPUs. */
extern unsigned ncpus;
-/* Protects arenas initialization (arenas, arenas_total). */
-extern malloc_mutex_t arenas_lock;
-/*
- * Arenas that are used to service external requests. Not all elements of the
- * arenas array are necessarily used; arenas are created lazily as needed.
- *
- * arenas[0..narenas_auto) are used for automatic multiplexing of threads and
- * arenas. arenas[narenas_auto..narenas_total) are only used if the application
- * takes some action to create them and allocate from them.
- */
-extern arena_t **arenas;
-extern unsigned narenas_total;
-extern unsigned narenas_auto; /* Read-only after initialization. */
-
/*
* index2size_tab encodes the same information as could be computed (at
* unacceptable cost in some code paths) by index2size_compute().
@@ -412,11 +398,23 @@ extern size_t const index2size_tab[NSIZES];
*/
extern uint8_t const size2index_tab[];
+arena_t *a0get(void);
+void *a0malloc(size_t size);
+void *a0calloc(size_t num, size_t size);
+void a0free(void *ptr);
arena_t *arenas_extend(unsigned ind);
-arena_t *choose_arena_hard(tsd_t *tsd);
+arena_t *arena_init(unsigned ind);
+unsigned narenas_total_get(void);
+arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing);
+arena_t *arena_choose_hard(tsd_t *tsd);
+void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
+unsigned arena_nbound(unsigned ind);
void thread_allocated_cleanup(tsd_t *tsd);
void thread_deallocated_cleanup(tsd_t *tsd);
void arena_cleanup(tsd_t *tsd);
+void arenas_cache_cleanup(tsd_t *tsd);
+void narenas_cache_cleanup(tsd_t *tsd);
+void arenas_cache_bypass_cleanup(tsd_t *tsd);
void jemalloc_prefork(void);
void jemalloc_postfork_parent(void);
void jemalloc_postfork_child(void);
@@ -475,8 +473,9 @@ size_t s2u_compute(size_t size);
size_t s2u_lookup(size_t size);
size_t s2u(size_t size);
size_t sa2u(size_t size, size_t alignment);
-unsigned narenas_total_get(void);
-arena_t *choose_arena(tsd_t *tsd, arena_t *arena);
+arena_t *arena_choose(tsd_t *tsd, arena_t *arena);
+arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
+ bool refresh_if_missing);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -709,34 +708,51 @@ sa2u(size_t size, size_t alignment)
return (usize);
}
-JEMALLOC_INLINE unsigned
-narenas_total_get(void)
-{
- unsigned narenas;
-
- malloc_mutex_lock(&arenas_lock);
- narenas = narenas_total;
- malloc_mutex_unlock(&arenas_lock);
-
- return (narenas);
-}
-
/* Choose an arena based on a per-thread value. */
JEMALLOC_INLINE arena_t *
-choose_arena(tsd_t *tsd, arena_t *arena)
+arena_choose(tsd_t *tsd, arena_t *arena)
{
arena_t *ret;
if (arena != NULL)
return (arena);
- if (unlikely((ret = tsd_arena_get(tsd)) == NULL)) {
- ret = choose_arena_hard(tsd);
- assert(ret != NULL);
- }
+ if (unlikely((ret = tsd_arena_get(tsd)) == NULL))
+ ret = arena_choose_hard(tsd);
return (ret);
}
+
+JEMALLOC_INLINE arena_t *
+arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
+ bool refresh_if_missing)
+{
+ arena_t *arena;
+ arena_t **arenas_cache = tsd_arenas_cache_get(tsd);
+
+ /* init_if_missing requires refresh_if_missing. */
+ assert(!init_if_missing || refresh_if_missing);
+
+ if (unlikely(arenas_cache == NULL)) {
+ /* arenas_cache hasn't been initialized yet. */
+ return (arena_get_hard(tsd, ind, init_if_missing));
+ }
+ if (unlikely(ind >= tsd_narenas_cache_get(tsd))) {
+ /*
+ * ind is invalid, cache is old (too small), or arena to be
+ * initialized.
+ */
+ return (refresh_if_missing ? arena_get_hard(tsd, ind,
+ init_if_missing) : NULL);
+ }
+ arena = arenas_cache[ind];
+ if (likely(arena != NULL) || !refresh_if_missing)
+ return (arena);
+ if (init_if_missing)
+ return (arena_get_hard(tsd, ind, init_if_missing));
+ else
+ return (NULL);
+}
#endif
#include "jemalloc/internal/bitmap.h"
@@ -833,8 +849,10 @@ ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache,
ret = arena_malloc(tsd, arena, usize, zero, try_tcache);
else {
if (usize <= arena_maxclass) {
- ret = arena_palloc(choose_arena(tsd, arena), usize,
- alignment, zero);
+ arena = arena_choose(tsd, arena);
+ if (unlikely(arena == NULL))
+ return (NULL);
+ ret = arena_palloc(arena, usize, alignment, zero);
} else if (alignment <= chunksize)
ret = huge_malloc(tsd, arena, usize, zero);
else
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 1a7fde4b..d5e6fdcf 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -1,11 +1,16 @@
a0calloc
a0free
+a0get
a0malloc
+arena_get
+arena_get_hard
arena_alloc_junk_small
arena_bin_index
arena_bin_info
arena_bitselm_get
arena_boot
+arena_choose
+arena_choose_hard
arena_chunk_alloc_huge
arena_chunk_dalloc_huge
arena_cleanup
@@ -19,6 +24,7 @@ arena_dalloc_large_locked
arena_dalloc_small
arena_dss_prec_get
arena_dss_prec_set
+arena_init
arena_malloc
arena_malloc_large
arena_malloc_small
@@ -42,9 +48,11 @@ arena_mapbitsp_read
arena_mapbitsp_write
arena_maxclass
arena_maxrun
+arena_migrate
arena_miscelm_get
arena_miscelm_to_pageind
arena_miscelm_to_rpages
+arena_nbound
arena_new
arena_palloc
arena_postfork_child
@@ -69,10 +77,8 @@ arena_salloc
arena_sdalloc
arena_stats_merge
arena_tcache_fill_small
-arenas
-arenas_cleanup
-arenas_extend
-arenas_lock
+arenas_cache_bypass_cleanup
+arenas_cache_cleanup
atomic_add_u
atomic_add_uint32
atomic_add_uint64
@@ -100,8 +106,6 @@ bitmap_size
bitmap_unset
bt_init
buferror
-choose_arena
-choose_arena_hard
chunk_alloc_arena
chunk_alloc_base
chunk_alloc_default
@@ -247,7 +251,8 @@ malloc_mutex_unlock
malloc_printf
malloc_snprintf
malloc_strtoumax
-malloc_tsd_boot
+malloc_tsd_boot0
+malloc_tsd_boot1
malloc_tsd_cleanup_register
malloc_tsd_dalloc
malloc_tsd_malloc
@@ -259,8 +264,7 @@ map_bias
map_misc_offset
mb_write
mutex_boot
-narenas_auto
-narenas_total
+narenas_cache_cleanup
narenas_total_get
ncpus
nhbins
@@ -363,6 +367,7 @@ tcache_alloc_small
tcache_alloc_small_hard
tcache_arena_associate
tcache_arena_dissociate
+tcache_arena_reassociate
tcache_bin_flush_large
tcache_bin_flush_small
tcache_bin_info
@@ -388,11 +393,14 @@ tsd_booted
tsd_arena_get
tsd_arena_set
tsd_boot
+tsd_boot0
+tsd_boot1
tsd_cleanup
tsd_cleanup_wrapper
tsd_fetch
tsd_get
-tsd_get_wrapper
+tsd_wrapper_get
+tsd_wrapper_set
tsd_initialized
tsd_init_check_recursion
tsd_init_finish
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index da8e4ef4..02eec5db 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -109,6 +109,7 @@ void tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem,
void tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem,
tcache_t *tcache);
void tcache_arena_associate(tcache_t *tcache, arena_t *arena);
+void tcache_arena_reassociate(tcache_t *tcache, arena_t *arena);
void tcache_arena_dissociate(tcache_t *tcache);
tcache_t *tcache_get_hard(tsd_t *tsd);
tcache_t *tcache_create(arena_t *arena);
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 25450391..b5658f8e 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -2,7 +2,7 @@
#ifdef JEMALLOC_H_TYPES
/* Maximum number of malloc_tsd users with cleanup functions. */
-#define MALLOC_TSD_CLEANUPS_MAX 8
+#define MALLOC_TSD_CLEANUPS_MAX 2
typedef bool (*malloc_tsd_cleanup_t)(void);
@@ -23,7 +23,7 @@ typedef enum {
/*
* TLS/TSD-agnostic macro-based implementation of thread-specific data. There
- * are four macros that support (at least) three use cases: file-private,
+ * are five macros that support (at least) three use cases: file-private,
* library-private, and library-private inlined. Following is an example
* library-private tsd variable:
*
@@ -33,18 +33,19 @@ typedef enum {
* int y;
* } example_t;
* #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0})
- * malloc_tsd_protos(, example_, example_t *)
- * malloc_tsd_externs(example_, example_t *)
+ * malloc_tsd_types(example_, example_t)
+ * malloc_tsd_protos(, example_, example_t)
+ * malloc_tsd_externs(example_, example_t)
* In example.c:
- * malloc_tsd_data(, example_, example_t *, EX_INITIALIZER)
- * malloc_tsd_funcs(, example_, example_t *, EX_INITIALIZER,
+ * malloc_tsd_data(, example_, example_t, EX_INITIALIZER)
+ * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER,
* example_tsd_cleanup)
*
* The result is a set of generated functions, e.g.:
*
* bool example_tsd_boot(void) {...}
- * example_t **example_tsd_get() {...}
- * void example_tsd_set(example_t **val) {...}
+ * example_t *example_tsd_get() {...}
+ * void example_tsd_set(example_t *val) {...}
*
* Note that all of the functions deal in terms of (a_type *) rather than
* (a_type) so that it is possible to support non-pointer types (unlike
@@ -70,9 +71,32 @@ typedef enum {
* non-NULL.
*/
+/* malloc_tsd_types(). */
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#define malloc_tsd_types(a_name, a_type)
+#elif (defined(JEMALLOC_TLS))
+#define malloc_tsd_types(a_name, a_type)
+#elif (defined(_WIN32))
+#define malloc_tsd_types(a_name, a_type) \
+typedef struct { \
+ bool initialized; \
+ a_type val; \
+} a_name##tsd_wrapper_t;
+#else
+#define malloc_tsd_types(a_name, a_type) \
+typedef struct { \
+ bool initialized; \
+ a_type val; \
+} a_name##tsd_wrapper_t;
+#endif
+
/* malloc_tsd_protos(). */
#define malloc_tsd_protos(a_attr, a_name, a_type) \
a_attr bool \
+a_name##tsd_boot0(void); \
+a_attr void \
+a_name##tsd_boot1(void); \
+a_attr bool \
a_name##tsd_boot(void); \
a_attr a_type * \
a_name##tsd_get(void); \
@@ -93,11 +117,13 @@ extern bool a_name##tsd_booted;
#elif (defined(_WIN32))
#define malloc_tsd_externs(a_name, a_type) \
extern DWORD a_name##tsd_tsd; \
+extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \
extern bool a_name##tsd_booted;
#else
#define malloc_tsd_externs(a_name, a_type) \
extern pthread_key_t a_name##tsd_tsd; \
extern tsd_init_head_t a_name##tsd_init_head; \
+extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \
extern bool a_name##tsd_booted;
#endif
@@ -118,6 +144,10 @@ a_attr bool a_name##tsd_booted = false;
#elif (defined(_WIN32))
#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
a_attr DWORD a_name##tsd_tsd; \
+a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \
+ false, \
+ a_initializer \
+}; \
a_attr bool a_name##tsd_booted = false;
#else
#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
@@ -126,6 +156,10 @@ a_attr tsd_init_head_t a_name##tsd_init_head = { \
ql_head_initializer(blocks), \
MALLOC_MUTEX_INITIALIZER \
}; \
+a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \
+ false, \
+ a_initializer \
+}; \
a_attr bool a_name##tsd_booted = false;
#endif
@@ -145,7 +179,7 @@ a_name##tsd_cleanup_wrapper(void) \
return (a_name##tsd_initialized); \
} \
a_attr bool \
-a_name##tsd_boot(void) \
+a_name##tsd_boot0(void) \
{ \
\
if (a_cleanup != malloc_tsd_no_cleanup) { \
@@ -155,6 +189,18 @@ a_name##tsd_boot(void) \
a_name##tsd_booted = true; \
return (false); \
} \
+a_attr void \
+a_name##tsd_boot1() \
+{ \
+ \
+ /* Do nothing. */ \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ return (a_name##tsd_boot0()); \
+} \
/* Get/set. */ \
a_attr a_type * \
a_name##tsd_get(void) \
@@ -177,7 +223,7 @@ a_name##tsd_set(a_type *val) \
a_cleanup) \
/* Initialization/cleanup. */ \
a_attr bool \
-a_name##tsd_boot(void) \
+a_name##tsd_boot0(void) \
{ \
\
if (a_cleanup != malloc_tsd_no_cleanup) { \
@@ -188,6 +234,18 @@ a_name##tsd_boot(void) \
a_name##tsd_booted = true; \
return (false); \
} \
+a_attr void \
+a_name##tsd_boot1() \
+{ \
+ \
+ /* Do nothing. */ \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ return (a_name##tsd_boot0()); \
+} \
/* Get/set. */ \
a_attr a_type * \
a_name##tsd_get(void) \
@@ -215,11 +273,6 @@ a_name##tsd_set(a_type *val) \
#elif (defined(_WIN32))
#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \
a_cleanup) \
-/* Data structure. */ \
-typedef struct { \
- bool initialized; \
- a_type val; \
-} a_name##tsd_wrapper_t; \
/* Initialization/cleanup. */ \
a_attr bool \
a_name##tsd_cleanup_wrapper(void) \
@@ -241,23 +294,18 @@ a_name##tsd_cleanup_wrapper(void) \
malloc_tsd_dalloc(wrapper); \
return (false); \
} \
-a_attr bool \
-a_name##tsd_boot(void) \
+a_attr void \
+a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \
{ \
\
- a_name##tsd_tsd = TlsAlloc(); \
- if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \
- return (true); \
- if (a_cleanup != malloc_tsd_no_cleanup) { \
- malloc_tsd_cleanup_register( \
- &a_name##tsd_cleanup_wrapper); \
+ if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \
+ malloc_write("<jemalloc>: Error setting" \
+ " TSD for "#a_name"\n"); \
+ abort(); \
} \
- a_name##tsd_booted = true; \
- return (false); \
} \
-/* Get/set. */ \
a_attr a_name##tsd_wrapper_t * \
-a_name##tsd_get_wrapper(void) \
+a_name##tsd_wrapper_get(void) \
{ \
a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \
TlsGetValue(a_name##tsd_tsd); \
@@ -273,21 +321,63 @@ a_name##tsd_get_wrapper(void) \
wrapper->initialized = false; \
wrapper->val = a_initializer; \
} \
- if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \
- malloc_write("<jemalloc>: Error setting" \
- " TSD for "#a_name"\n"); \
- abort(); \
- } \
+ a_name##tsd_wrapper_set(wrapper); \
} \
return (wrapper); \
} \
+a_attr bool \
+a_name##tsd_boot0(void) \
+{ \
+ \
+ a_name##tsd_tsd = TlsAlloc(); \
+ if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \
+ return (true); \
+ if (a_cleanup != malloc_tsd_no_cleanup) { \
+ malloc_tsd_cleanup_register( \
+ &a_name##tsd_cleanup_wrapper); \
+ } \
+ a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \
+ a_name##tsd_booted = true; \
+ return (false); \
+} \
+a_attr void \
+a_name##tsd_boot1() \
+{ \
+ a_name##tsd_wrapper_t *wrapper; \
+ wrapper = (a_name##tsd_wrapper_t *) \
+ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \
+ if (wrapper == NULL) { \
+ malloc_write("<jemalloc>: Error allocating" \
+ " TSD for "#a_name"\n"); \
+ abort(); \
+ } \
+ memcpy(wrapper, &a_name##tsd_boot_wrapper, \
+ sizeof(a_name##tsd_wrapper_t)); \
+ a_name##tsd_wrapper_set(wrapper); \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ if (a_name##tsd_boot0()) \
+ return (true); \
+ a_name##tsd_boot1(); \
+ return (false); \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ return (false); \
+} \
+/* Get/set. */ \
a_attr a_type * \
a_name##tsd_get(void) \
{ \
a_name##tsd_wrapper_t *wrapper; \
\
assert(a_name##tsd_booted); \
- wrapper = a_name##tsd_get_wrapper(); \
+ wrapper = a_name##tsd_wrapper_get(); \
return (&wrapper->val); \
} \
a_attr void \
@@ -296,7 +386,7 @@ a_name##tsd_set(a_type *val) \
a_name##tsd_wrapper_t *wrapper; \
\
assert(a_name##tsd_booted); \
- wrapper = a_name##tsd_get_wrapper(); \
+ wrapper = a_name##tsd_wrapper_get(); \
wrapper->val = *(val); \
if (a_cleanup != malloc_tsd_no_cleanup) \
wrapper->initialized = true; \
@@ -304,11 +394,6 @@ a_name##tsd_set(a_type *val) \
#else
#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \
a_cleanup) \
-/* Data structure. */ \
-typedef struct { \
- bool initialized; \
- a_type val; \
-} a_name##tsd_wrapper_t; \
/* Initialization/cleanup. */ \
a_attr void \
a_name##tsd_cleanup_wrapper(void *arg) \
@@ -333,19 +418,19 @@ a_name##tsd_cleanup_wrapper(void *arg) \
} \
malloc_tsd_dalloc(wrapper); \
} \
-a_attr bool \
-a_name##tsd_boot(void) \
+a_attr void \
+a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \
{ \
\
- if (pthread_key_create(&a_name##tsd_tsd, \
- a_name##tsd_cleanup_wrapper) != 0) \
- return (true); \
- a_name##tsd_booted = true; \
- return (false); \
+ if (pthread_setspecific(a_name##tsd_tsd, \
+ (void *)wrapper)) { \
+ malloc_write("<jemalloc>: Error setting" \
+ " TSD for "#a_name"\n"); \
+ abort(); \
+ } \
} \
-/* Get/set. */ \
a_attr a_name##tsd_wrapper_t * \
-a_name##tsd_get_wrapper(void) \
+a_name##tsd_wrapper_get(void) \
{ \
a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \
pthread_getspecific(a_name##tsd_tsd); \
@@ -367,23 +452,54 @@ a_name##tsd_get_wrapper(void) \
wrapper->initialized = false; \
wrapper->val = a_initializer; \
} \
- if (pthread_setspecific(a_name##tsd_tsd, \
- (void *)wrapper)) { \
- malloc_write("<jemalloc>: Error setting" \
- " TSD for "#a_name"\n"); \
- abort(); \
- } \
+ a_name##tsd_wrapper_set(wrapper); \
tsd_init_finish(&a_name##tsd_init_head, &block); \
} \
return (wrapper); \
} \
+a_attr bool \
+a_name##tsd_boot0(void) \
+{ \
+ \
+ if (pthread_key_create(&a_name##tsd_tsd, \
+ a_name##tsd_cleanup_wrapper) != 0) \
+ return (true); \
+ a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \
+ a_name##tsd_booted = true; \
+ return (false); \
+} \
+a_attr void \
+a_name##tsd_boot1() \
+{ \
+ a_name##tsd_wrapper_t *wrapper; \
+ wrapper = (a_name##tsd_wrapper_t *) \
+ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \
+ if (wrapper == NULL) { \
+ malloc_write("<jemalloc>: Error allocating" \
+ " TSD for "#a_name"\n"); \
+ abort(); \
+ } \
+ memcpy(wrapper, &a_name##tsd_boot_wrapper, \
+ sizeof(a_name##tsd_wrapper_t)); \
+ a_name##tsd_wrapper_set(wrapper); \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ if (a_name##tsd_boot0()) \
+ return (true); \
+ a_name##tsd_boot1(); \
+ return (false); \
+} \
+/* Get/set. */ \
a_attr a_type * \
a_name##tsd_get(void) \
{ \
a_name##tsd_wrapper_t *wrapper; \
\
assert(a_name##tsd_booted); \
- wrapper = a_name##tsd_get_wrapper(); \
+ wrapper = a_name##tsd_wrapper_get(); \
return (&wrapper->val); \
} \
a_attr void \
@@ -392,7 +508,7 @@ a_name##tsd_set(a_type *val) \
a_name##tsd_wrapper_t *wrapper; \
\
assert(a_name##tsd_booted); \
- wrapper = a_name##tsd_get_wrapper(); \
+ wrapper = a_name##tsd_wrapper_get(); \
wrapper->val = *(val); \
if (a_cleanup != malloc_tsd_no_cleanup) \
wrapper->initialized = true; \
@@ -423,6 +539,9 @@ struct tsd_init_head_s {
O(thread_deallocated, uint64_t) \
O(prof_tdata, prof_tdata_t *) \
O(arena, arena_t *) \
+ O(arenas_cache, arena_t **) \
+ O(narenas_cache, unsigned) \
+ O(arenas_cache_bypass, bool) \
O(tcache_enabled, tcache_enabled_t) \
O(quarantine, quarantine_t *) \
@@ -433,6 +552,9 @@ struct tsd_init_head_s {
0, \
NULL, \
NULL, \
+ NULL, \
+ 0, \
+ false, \
tcache_enabled_default, \
NULL \
}
@@ -447,6 +569,8 @@ MALLOC_TSD
static const tsd_t tsd_initializer = TSD_INITIALIZER;
+malloc_tsd_types(, tsd_t)
+
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
@@ -455,7 +579,8 @@ void *malloc_tsd_malloc(size_t size);
void malloc_tsd_dalloc(void *wrapper);
void malloc_tsd_no_cleanup(void *arg);
void malloc_tsd_cleanup_register(bool (*f)(void));
-bool malloc_tsd_boot(void);
+bool malloc_tsd_boot0(void);
+void malloc_tsd_boot1(void);
#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
!defined(_WIN32))
void *tsd_init_check_recursion(tsd_init_head_t *head,
diff --git a/src/arena.c b/src/arena.c
index 49a30572..86e54404 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2192,27 +2192,37 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive,
}
}
-bool
-arena_new(arena_t *arena, unsigned ind)
+arena_t *
+arena_new(unsigned ind)
{
+ arena_t *arena;
unsigned i;
arena_bin_t *bin;
+ /*
+ * Allocate arena and arena->lstats contiguously, mainly because there
+ * is no way to clean up if base_alloc() OOMs.
+ */
+ if (config_stats) {
+ arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t))
+ + nlclasses * sizeof(malloc_large_stats_t));
+ } else
+ arena = (arena_t *)base_alloc(sizeof(arena_t));
+ if (arena == NULL)
+ return (NULL);
+
arena->ind = ind;
arena->nthreads = 0;
arena->chunk_alloc = chunk_alloc_default;
arena->chunk_dalloc = chunk_dalloc_default;
if (malloc_mutex_init(&arena->lock))
- return (true);
+ return (NULL);
if (config_stats) {
memset(&arena->stats, 0, sizeof(arena_stats_t));
- arena->stats.lstats =
- (malloc_large_stats_t *)base_alloc(nlclasses *
- sizeof(malloc_large_stats_t));
- if (arena->stats.lstats == NULL)
- return (true);
+ arena->stats.lstats = (malloc_large_stats_t *)(((void *)arena) +
+ CACHELINE_CEILING(sizeof(arena_t)));
memset(arena->stats.lstats, 0, nlclasses *
sizeof(malloc_large_stats_t));
if (config_tcache)
@@ -2236,14 +2246,14 @@ arena_new(arena_t *arena, unsigned ind)
for (i = 0; i < NBINS; i++) {
bin = &arena->bins[i];
if (malloc_mutex_init(&bin->lock))
- return (true);
+ return (NULL);
bin->runcur = NULL;
arena_run_tree_new(&bin->runs);
if (config_stats)
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
}
- return (false);
+ return (arena);
}
/*
diff --git a/src/chunk.c b/src/chunk.c
index 618aaca0..f65b67af 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -254,9 +254,17 @@ void *
chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
unsigned arena_ind)
{
+ arena_t *arena;
+
+ arena = arena_get(tsd_fetch(), arena_ind, false, true);
+ /*
+ * The arena we're allocating on behalf of must have been initialized
+ * already.
+ */
+ assert(arena != NULL);
return (chunk_alloc_core(new_addr, size, alignment, false, zero,
- arenas[arena_ind]->dss_prec));
+ arena->dss_prec));
}
static void
diff --git a/src/ctl.c b/src/ctl.c
index f1f3234b..37f8f42a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -447,7 +447,7 @@ ctl_arena_init(ctl_arena_stats_t *astats)
{
if (astats->lstats == NULL) {
- astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses *
+ astats->lstats = (malloc_large_stats_t *)a0malloc(nlclasses *
sizeof(malloc_large_stats_t));
if (astats->lstats == NULL)
return (true);
@@ -567,31 +567,24 @@ ctl_arena_refresh(arena_t *arena, unsigned i)
static bool
ctl_grow(void)
{
- tsd_t *tsd;
ctl_arena_stats_t *astats;
- arena_t **tarenas;
- tsd = tsd_fetch();
+ /* Initialize new arena. */
+ if (arena_init(ctl_stats.narenas) == NULL)
+ return (true);
- /* Allocate extended arena stats and arenas arrays. */
- astats = (ctl_arena_stats_t *)imalloc(tsd, (ctl_stats.narenas + 2) *
+ /* Allocate extended arena stats. */
+ astats = (ctl_arena_stats_t *)a0malloc((ctl_stats.narenas + 2) *
sizeof(ctl_arena_stats_t));
if (astats == NULL)
return (true);
- tarenas = (arena_t **)imalloc(tsd, (ctl_stats.narenas + 1) *
- sizeof(arena_t *));
- if (tarenas == NULL) {
- idalloc(tsd, astats);
- return (true);
- }
/* Initialize the new astats element. */
memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) *
sizeof(ctl_arena_stats_t));
memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t));
if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) {
- idalloc(tsd, tarenas);
- idalloc(tsd, astats);
+ a0free(astats);
return (true);
}
/* Swap merged stats to their new location. */
@@ -604,32 +597,7 @@ ctl_grow(void)
memcpy(&astats[ctl_stats.narenas + 1], &tstats,
sizeof(ctl_arena_stats_t));
}
- /* Initialize the new arenas element. */
- tarenas[ctl_stats.narenas] = NULL;
- {
- arena_t **arenas_old = arenas;
- /*
- * Swap extended arenas array into place. Although ctl_mtx
- * protects this function from other threads extending the
- * array, it does not protect from other threads mutating it
- * (i.e. initializing arenas and setting array elements to
- * point to them). Therefore, array copying must happen under
- * the protection of arenas_lock.
- */
- malloc_mutex_lock(&arenas_lock);
- arenas = tarenas;
- memcpy(arenas, arenas_old, ctl_stats.narenas *
- sizeof(arena_t *));
- narenas_total++;
- arenas_extend(narenas_total - 1);
- malloc_mutex_unlock(&arenas_lock);
- /*
- * Deallocate arenas_old only if it came from imalloc() (not
- * base_alloc()).
- */
- if (ctl_stats.narenas != narenas_auto)
- idalloc(tsd, arenas_old);
- }
+ a0free(ctl_stats.arenas);
ctl_stats.arenas = astats;
ctl_stats.narenas++;
@@ -639,6 +607,7 @@ ctl_grow(void)
static void
ctl_refresh(void)
{
+ tsd_t *tsd;
unsigned i;
VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
@@ -657,15 +626,17 @@ ctl_refresh(void)
ctl_stats.arenas[ctl_stats.narenas].nthreads = 0;
ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
- malloc_mutex_lock(&arenas_lock);
- memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas);
+ tsd = tsd_fetch();
+ for (i = 0; i < ctl_stats.narenas; i++)
+ tarenas[i] = arena_get(tsd, i, false, (i == 0));
+
for (i = 0; i < ctl_stats.narenas; i++) {
- if (arenas[i] != NULL)
- ctl_stats.arenas[i].nthreads = arenas[i]->nthreads;
+ if (tarenas[i] != NULL)
+ ctl_stats.arenas[i].nthreads = arena_nbound(i);
else
ctl_stats.arenas[i].nthreads = 0;
}
- malloc_mutex_unlock(&arenas_lock);
+
for (i = 0; i < ctl_stats.narenas; i++) {
bool initialized = (tarenas[i] != NULL);
@@ -698,9 +669,8 @@ ctl_init(void)
* Allocate space for one extra arena stats element, which
* contains summed stats across all arenas.
*/
- assert(narenas_auto == narenas_total_get());
- ctl_stats.narenas = narenas_auto;
- ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc(
+ ctl_stats.narenas = narenas_total_get();
+ ctl_stats.arenas = (ctl_arena_stats_t *)a0malloc(
(ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t));
if (ctl_stats.arenas == NULL) {
ret = true;
@@ -718,6 +688,13 @@ ctl_init(void)
unsigned i;
for (i = 0; i <= ctl_stats.narenas; i++) {
if (ctl_arena_init(&ctl_stats.arenas[i])) {
+ unsigned j;
+ for (j = 0; j < i; j++) {
+ a0free(
+ ctl_stats.arenas[j].lstats);
+ }
+ a0free(ctl_stats.arenas);
+ ctl_stats.arenas = NULL;
ret = true;
goto label_return;
}
@@ -1231,17 +1208,19 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
{
int ret;
tsd_t *tsd;
+ arena_t *arena;
unsigned newind, oldind;
tsd = tsd_fetch();
+ arena = arena_choose(tsd, NULL);
+ if (arena == NULL)
+ return (EAGAIN);
malloc_mutex_lock(&ctl_mtx);
- newind = oldind = choose_arena(tsd, NULL)->ind;
+ newind = oldind = arena->ind;
WRITE(newind, unsigned);
READ(oldind, unsigned);
if (newind != oldind) {
- arena_t *arena;
-
if (newind >= ctl_stats.narenas) {
/* New arena index is out of range. */
ret = EFAULT;
@@ -1249,28 +1228,18 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
}
/* Initialize arena if necessary. */
- malloc_mutex_lock(&arenas_lock);
- if ((arena = arenas[newind]) == NULL && (arena =
- arenas_extend(newind)) == NULL) {
- malloc_mutex_unlock(&arenas_lock);
+ arena = arena_get(tsd, newind, true, true);
+ if (arena == NULL) {
ret = EAGAIN;
goto label_return;
}
- assert(arena == arenas[newind]);
- arenas[oldind]->nthreads--;
- arenas[newind]->nthreads++;
- malloc_mutex_unlock(&arenas_lock);
-
- /* Set new arena association. */
+ /* Set new arena/tcache associations. */
+ arena_migrate(tsd, oldind, newind);
if (config_tcache) {
tcache_t *tcache = tsd_tcache_get(tsd);
- if (tcache != NULL) {
- tcache_arena_dissociate(tcache);
- tcache_arena_associate(tcache, arena);
- }
+ if (tcache != NULL)
+ tcache_arena_reassociate(tcache, arena);
}
-
- tsd_arena_set(tsd, arena);
}
ret = 0;
@@ -1400,11 +1369,13 @@ label_return:
static void
arena_purge(unsigned arena_ind)
{
+ tsd_t *tsd;
+ unsigned i;
VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
- malloc_mutex_lock(&arenas_lock);
- memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas);
- malloc_mutex_unlock(&arenas_lock);
+ tsd = tsd_fetch();
+ for (i = 0; i < ctl_stats.narenas; i++)
+ tarenas[i] = arena_get(tsd, i, false, (i == 0));
if (arena_ind == ctl_stats.narenas) {
unsigned i;
@@ -1467,7 +1438,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
}
if (arena_ind < ctl_stats.narenas) {
- arena_t *arena = arenas[arena_ind];
+ arena_t *arena = arena_get(tsd_fetch(), arena_ind, false, true);
if (arena == NULL || (dss_prec != dss_prec_limit &&
arena_dss_prec_set(arena, dss_prec))) {
ret = EFAULT;
@@ -1501,7 +1472,8 @@ arena_i_chunk_alloc_ctl(const size_t *mib, size_t miblen, void *oldp,
arena_t *arena;
malloc_mutex_lock(&ctl_mtx);
- if (arena_ind < narenas_total && (arena = arenas[arena_ind]) != NULL) {
+ if (arena_ind < narenas_total_get() && (arena = arena_get(tsd_fetch(),
+ arena_ind, false, true)) != NULL) {
malloc_mutex_lock(&arena->lock);
READ(arena->chunk_alloc, chunk_alloc_t *);
WRITE(arena->chunk_alloc, chunk_alloc_t *);
@@ -1527,7 +1499,8 @@ arena_i_chunk_dalloc_ctl(const size_t *mib, size_t miblen, void *oldp,
arena_t *arena;
malloc_mutex_lock(&ctl_mtx);
- if (arena_ind < narenas_total && (arena = arenas[arena_ind]) != NULL) {
+ if (arena_ind < narenas_total_get() && (arena = arena_get(tsd_fetch(),
+ arena_ind, false, true)) != NULL) {
malloc_mutex_lock(&arena->lock);
READ(arena->chunk_dalloc, chunk_dalloc_t *);
WRITE(arena->chunk_dalloc, chunk_dalloc_t *);
diff --git a/src/huge.c b/src/huge.c
index ae416253..1376729a 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -50,7 +50,11 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
* it is possible to make correct junk/zero fill decisions below.
*/
is_zeroed = zero;
- arena = choose_arena(tsd, arena);
+ arena = arena_choose(tsd, arena);
+ if (unlikely(arena == NULL)) {
+ base_node_dalloc(node);
+ return (NULL);
+ }
ret = arena_chunk_alloc_huge(arena, NULL, csize, alignment, &is_zeroed);
if (ret == NULL) {
base_node_dalloc(node);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f3750b40..3c889e8a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4,8 +4,6 @@
/******************************************************************************/
/* Data. */
-malloc_tsd_data(, arenas, arena_t *, NULL)
-
/* Runtime configuration options. */
const char *je_malloc_conf JEMALLOC_ATTR(weak);
bool opt_abort =
@@ -34,10 +32,20 @@ bool in_valgrind;
unsigned ncpus;
-malloc_mutex_t arenas_lock;
-arena_t **arenas;
-unsigned narenas_total;
-unsigned narenas_auto;
+/* Protects arenas initialization (arenas, narenas_total). */
+static malloc_mutex_t arenas_lock;
+/*
+ * Arenas that are used to service external requests. Not all elements of the
+ * arenas array are necessarily used; arenas are created lazily as needed.
+ *
+ * arenas[0..narenas_auto) are used for automatic multiplexing of threads and
+ * arenas. arenas[narenas_auto..narenas_total) are only used if the application
+ * takes some action to create them and allocate from them.
+ */
+static arena_t **arenas;
+static unsigned narenas_total;
+static arena_t *a0; /* arenas[0]; read-only after initialization. */
+static unsigned narenas_auto; /* Read-only after initialization. */
/* Set to true once the allocator has been initialized. */
static bool malloc_initialized = false;
@@ -144,35 +152,288 @@ static bool malloc_init_hard(void);
* Begin miscellaneous support functions.
*/
+JEMALLOC_ALWAYS_INLINE_C void
+malloc_thread_init(void)
+{
+
+ /*
+ * TSD initialization can't be safely done as a side effect of
+ * deallocation, because it is possible for a thread to do nothing but
+ * deallocate its TLS data via free(), in which case writing to TLS
+ * would cause write-after-free memory corruption. The quarantine
+ * facility *only* gets used as a side effect of deallocation, so make
+ * a best effort attempt at initializing its TSD by hooking all
+ * allocation events.
+ */
+ if (config_fill && unlikely(opt_quarantine))
+ quarantine_alloc_hook();
+}
+
+JEMALLOC_ALWAYS_INLINE_C bool
+malloc_init(void)
+{
+
+ if (unlikely(!malloc_initialized) && malloc_init_hard())
+ return (true);
+ malloc_thread_init();
+
+ return (false);
+}
+
+/*
+ * The a0*() functions are used instead of i[mcd]alloc() in bootstrap-sensitive
+ * situations that cannot tolerate TLS variable access. These functions are
+ * also exposed for use in static binaries on FreeBSD, hence the old-style
+ * malloc() API.
+ */
+
+arena_t *
+a0get(void)
+{
+
+ assert(a0 != NULL);
+ return (a0);
+}
+
+static void *
+a0alloc(size_t size, bool zero)
+{
+ void *ret;
+
+ if (unlikely(malloc_init()))
+ return (NULL);
+
+ if (size == 0)
+ size = 1;
+
+ if (size <= arena_maxclass)
+ ret = arena_malloc(NULL, a0get(), size, zero, false);
+ else
+ ret = huge_malloc(NULL, a0get(), size, zero);
+
+ return (ret);
+}
+
+void *
+a0malloc(size_t size)
+{
+
+ return (a0alloc(size, false));
+}
+
+void *
+a0calloc(size_t num, size_t size)
+{
+
+ return (a0alloc(num * size, true));
+}
+
+void
+a0free(void *ptr)
+{
+ arena_chunk_t *chunk;
+
+ if (ptr == NULL)
+ return;
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk != ptr)
+ arena_dalloc(NULL, chunk, ptr, false);
+ else
+ huge_dalloc(ptr);
+}
+
/* Create a new arena and insert it into the arenas array at index ind. */
arena_t *
-arenas_extend(unsigned ind)
+arena_init(unsigned ind)
{
- arena_t *ret;
+ arena_t *arena;
+
+ malloc_mutex_lock(&arenas_lock);
- ret = (arena_t *)base_alloc(sizeof(arena_t));
- if (ret != NULL && !arena_new(ret, ind)) {
- arenas[ind] = ret;
- return (ret);
+ /* Expand arenas if necessary. */
+ assert(ind <= narenas_total);
+ if (ind == narenas_total) {
+ unsigned narenas_new = narenas_total + 1;
+ arena_t **arenas_new =
+ (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new *
+ sizeof(arena_t *)));
+ if (arenas_new == NULL) {
+ arena = NULL;
+ goto label_return;
+ }
+ memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *));
+ arenas_new[ind] = NULL;
+ /*
+ * Deallocate only if arenas came from a0malloc() (not
+ * base_alloc()).
+ */
+ if (narenas_total != narenas_auto)
+ a0free(arenas);
+ arenas = arenas_new;
+ narenas_total = narenas_new;
}
- /* Only reached if there is an OOM error. */
/*
- * OOM here is quite inconvenient to propagate, since dealing with it
- * would require a check for failure in the fast path. Instead, punt
- * by using arenas[0]. In practice, this is an extremely unlikely
- * failure.
+ * Another thread may have already initialized arenas[ind] if it's an
+ * auto arena.
*/
- malloc_write("<jemalloc>: Error initializing arena\n");
- if (opt_abort)
- abort();
+ arena = arenas[ind];
+ if (arena != NULL) {
+ assert(ind < narenas_auto);
+ goto label_return;
+ }
+
+ /* Actually initialize the arena. */
+ arena = arenas[ind] = arena_new(ind);
+label_return:
+ malloc_mutex_unlock(&arenas_lock);
+ return (arena);
+}
+
+unsigned
+narenas_total_get(void)
+{
+ unsigned narenas;
+
+ malloc_mutex_lock(&arenas_lock);
+ narenas = narenas_total;
+ malloc_mutex_unlock(&arenas_lock);
+
+ return (narenas);
+}
+
+static void
+arena_bind_locked(tsd_t *tsd, unsigned ind)
+{
+ arena_t *arena;
+
+ arena = arenas[ind];
+ arena->nthreads++;
+
+ if (tsd_nominal(tsd))
+ tsd_arena_set(tsd, arena);
+}
+
+static void
+arena_bind(tsd_t *tsd, unsigned ind)
+{
+
+ malloc_mutex_lock(&arenas_lock);
+ arena_bind_locked(tsd, ind);
+ malloc_mutex_unlock(&arenas_lock);
+}
+
+void
+arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
+{
+ arena_t *oldarena, *newarena;
+
+ malloc_mutex_lock(&arenas_lock);
+ oldarena = arenas[oldind];
+ newarena = arenas[newind];
+ oldarena->nthreads--;
+ newarena->nthreads++;
+ malloc_mutex_unlock(&arenas_lock);
+ tsd_arena_set(tsd, newarena);
+}
+
+unsigned
+arena_nbound(unsigned ind)
+{
+ unsigned nthreads;
+
+ malloc_mutex_lock(&arenas_lock);
+ nthreads = arenas[ind]->nthreads;
+ malloc_mutex_unlock(&arenas_lock);
+ return (nthreads);
+}
+
+static void
+arena_unbind(tsd_t *tsd, unsigned ind)
+{
+ arena_t *arena;
+
+ malloc_mutex_lock(&arenas_lock);
+ arena = arenas[ind];
+ arena->nthreads--;
+ malloc_mutex_unlock(&arenas_lock);
+ tsd_arena_set(tsd, NULL);
+}
+
+arena_t *
+arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing)
+{
+ arena_t *arena;
+ arena_t **arenas_cache = tsd_arenas_cache_get(tsd);
+ unsigned narenas_cache = tsd_narenas_cache_get(tsd);
+ unsigned narenas_actual = narenas_total_get();
+
+ /* Deallocate old cache if it's too small. */
+ if (arenas_cache != NULL && narenas_cache < narenas_actual) {
+ a0free(arenas_cache);
+ arenas_cache = NULL;
+ narenas_cache = 0;
+ tsd_arenas_cache_set(tsd, arenas_cache);
+ tsd_narenas_cache_set(tsd, narenas_cache);
+ }
+
+ /* Allocate cache if it's missing. */
+ if (arenas_cache == NULL) {
+ bool *arenas_cache_bypassp = tsd_arenas_cache_bypassp_get(tsd);
+ assert(ind < narenas_actual || !init_if_missing);
+ narenas_cache = (ind < narenas_actual) ? narenas_actual : ind+1;
+
+ if (!*arenas_cache_bypassp) {
+ *arenas_cache_bypassp = true;
+ arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) *
+ narenas_cache);
+ *arenas_cache_bypassp = false;
+ } else
+ arenas_cache = NULL;
+ if (arenas_cache == NULL) {
+ /*
+ * This function must always tell the truth, even if
+ * it's slow, so don't let OOM or recursive allocation
+ * avoidance (note arenas_cache_bypass check) get in the
+ * way.
+ */
+ if (ind >= narenas_actual)
+ return (NULL);
+ malloc_mutex_lock(&arenas_lock);
+ arena = arenas[ind];
+ malloc_mutex_unlock(&arenas_lock);
+ return (arena);
+ }
+ tsd_arenas_cache_set(tsd, arenas_cache);
+ tsd_narenas_cache_set(tsd, narenas_cache);
+ }
- return (arenas[0]);
+ /*
+ * Copy to cache. It's possible that the actual number of arenas has
+ * increased since narenas_total_get() was called above, but that causes
+ * no correctness issues unless two threads concurrently execute the
+ * arenas.extend mallctl, which we trust mallctl synchronization to
+ * prevent.
+ */
+ malloc_mutex_lock(&arenas_lock);
+ memcpy(arenas_cache, arenas, sizeof(arena_t *) * narenas_actual);
+ malloc_mutex_unlock(&arenas_lock);
+ if (narenas_cache > narenas_actual) {
+ memset(&arenas_cache[narenas_actual], 0, sizeof(arena_t *) *
+ (narenas_cache - narenas_actual));
+ }
+
+ /* Read the refreshed cache, and init the arena if necessary. */
+ arena = arenas_cache[ind];
+ if (init_if_missing && arena == NULL)
+ arena = arenas_cache[ind] = arena_init(ind);
+ return (arena);
}
-/* Slow path, called only by choose_arena(). */
+/* Slow path, called only by arena_choose(). */
arena_t *
-choose_arena_hard(tsd_t *tsd)
+arena_choose_hard(tsd_t *tsd)
{
arena_t *ret;
@@ -182,7 +443,7 @@ choose_arena_hard(tsd_t *tsd)
choose = 0;
first_null = narenas_auto;
malloc_mutex_lock(&arenas_lock);
- assert(arenas[0] != NULL);
+ assert(a0get() != NULL);
for (i = 1; i < narenas_auto; i++) {
if (arenas[i] != NULL) {
/*
@@ -215,20 +476,20 @@ choose_arena_hard(tsd_t *tsd)
ret = arenas[choose];
} else {
/* Initialize a new arena. */
- ret = arenas_extend(first_null);
+ choose = first_null;
+ ret = arena_init(choose);
+ if (ret == NULL) {
+ malloc_mutex_unlock(&arenas_lock);
+ return (NULL);
+ }
}
- ret->nthreads++;
+ arena_bind_locked(tsd, choose);
malloc_mutex_unlock(&arenas_lock);
} else {
- ret = arenas[0];
- malloc_mutex_lock(&arenas_lock);
- ret->nthreads++;
- malloc_mutex_unlock(&arenas_lock);
+ ret = a0get();
+ arena_bind(tsd, 0);
}
- if (tsd_nominal(tsd))
- tsd_arena_set(tsd, ret);
-
return (ret);
}
@@ -249,6 +510,33 @@ thread_deallocated_cleanup(tsd_t *tsd)
void
arena_cleanup(tsd_t *tsd)
{
+ arena_t *arena;
+
+ arena = tsd_arena_get(tsd);
+ if (arena != NULL)
+ arena_unbind(tsd, arena->ind);
+}
+
+void
+arenas_cache_cleanup(tsd_t *tsd)
+{
+ arena_t **arenas_cache;
+
+ arenas_cache = tsd_arenas_cache_get(tsd);
+ if (arenas != NULL)
+ a0free(arenas_cache);
+}
+
+void
+narenas_cache_cleanup(tsd_t *tsd)
+{
+
+ /* Do nothing. */
+}
+
+void
+arenas_cache_bypass_cleanup(tsd_t *tsd)
+{
/* Do nothing. */
}
@@ -312,44 +600,6 @@ malloc_ncpus(void)
return ((result == -1) ? 1 : (unsigned)result);
}
-void
-arenas_cleanup(void *arg)
-{
- arena_t *arena = *(arena_t **)arg;
-
- malloc_mutex_lock(&arenas_lock);
- arena->nthreads--;
- malloc_mutex_unlock(&arenas_lock);
-}
-
-JEMALLOC_ALWAYS_INLINE_C void
-malloc_thread_init(void)
-{
-
- /*
- * TSD initialization can't be safely done as a side effect of
- * deallocation, because it is possible for a thread to do nothing but
- * deallocate its TLS data via free(), in which case writing to TLS
- * would cause write-after-free memory corruption. The quarantine
- * facility *only* gets used as a side effect of deallocation, so make
- * a best effort attempt at initializing its TSD by hooking all
- * allocation events.
- */
- if (config_fill && unlikely(opt_quarantine))
- quarantine_alloc_hook();
-}
-
-JEMALLOC_ALWAYS_INLINE_C bool
-malloc_init(void)
-{
-
- if (unlikely(!malloc_initialized) && malloc_init_hard())
- return (true);
- malloc_thread_init();
-
- return (false);
-}
-
static bool
malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
char const **v_p, size_t *vlen_p)
@@ -745,7 +995,7 @@ malloc_init_hard(void)
#endif
malloc_initializer = INITIALIZER;
- if (malloc_tsd_boot()) {
+ if (malloc_tsd_boot0()) {
malloc_mutex_unlock(&init_lock);
return (true);
}
@@ -809,10 +1059,10 @@ malloc_init_hard(void)
/*
* Initialize one arena here. The rest are lazily created in
- * choose_arena_hard().
+ * arena_choose_hard().
*/
- arenas_extend(0);
- if (arenas[0] == NULL) {
+ a0 = arena_init(0);
+ if (a0 == NULL) {
malloc_mutex_unlock(&init_lock);
return (true);
}
@@ -887,6 +1137,7 @@ malloc_init_hard(void)
malloc_initialized = true;
malloc_mutex_unlock(&init_lock);
+ malloc_tsd_boot1();
return (false);
}
@@ -1428,8 +1679,8 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
* Begin non-standard functions.
*/
-JEMALLOC_ALWAYS_INLINE_C void
-imallocx_flags_decode_hard(size_t size, int flags, size_t *usize,
+JEMALLOC_ALWAYS_INLINE_C bool
+imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena)
{
@@ -1444,16 +1695,19 @@ imallocx_flags_decode_hard(size_t size, int flags, size_t *usize,
if ((flags & MALLOCX_ARENA_MASK) != 0) {
unsigned arena_ind = MALLOCX_ARENA_GET(flags);
*try_tcache = false;
- *arena = arenas[arena_ind];
+ *arena = arena_get(tsd, arena_ind, true, true);
+ if (unlikely(*arena == NULL))
+ return (true);
} else {
*try_tcache = true;
*arena = NULL;
}
+ return (false);
}
-JEMALLOC_ALWAYS_INLINE_C void
-imallocx_flags_decode(size_t size, int flags, size_t *usize, size_t *alignment,
- bool *zero, bool *try_tcache, arena_t **arena)
+JEMALLOC_ALWAYS_INLINE_C bool
+imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
+ size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena)
{
if (likely(flags == 0)) {
@@ -1463,9 +1717,10 @@ imallocx_flags_decode(size_t size, int flags, size_t *usize, size_t *alignment,
*zero = false;
*try_tcache = true;
*arena = NULL;
+ return (false);
} else {
- imallocx_flags_decode_hard(size, flags, usize, alignment, zero,
- try_tcache, arena);
+ return (imallocx_flags_decode_hard(tsd, size, flags, usize,
+ alignment, zero, try_tcache, arena));
}
}
@@ -1524,8 +1779,9 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
arena_t *arena;
prof_tctx_t *tctx;
- imallocx_flags_decode(size, flags, usize, &alignment, &zero,
- &try_tcache, &arena);
+ if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
+ &zero, &try_tcache, &arena)))
+ return (NULL);
tctx = prof_alloc_prep(tsd, *usize, true);
if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
p = imallocx_maybe_flags(tsd, size, flags, *usize, alignment,
@@ -1558,8 +1814,9 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
return (imalloc(tsd, size));
}
- imallocx_flags_decode_hard(size, flags, usize, &alignment, &zero,
- &try_tcache, &arena);
+ if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize,
+ &alignment, &zero, &try_tcache, &arena)))
+ return (NULL);
return (imallocx_flags(tsd, *usize, alignment, zero, try_tcache,
arena));
}
@@ -1685,9 +1942,10 @@ je_rallocx(void *ptr, size_t size, int flags)
arena_chunk_t *chunk;
try_tcache_alloc = false;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- try_tcache_dalloc = (chunk == ptr || chunk->arena !=
- arenas[arena_ind]);
- arena = arenas[arena_ind];
+ arena = arena_get(tsd, arena_ind, true, true);
+ if (unlikely(arena == NULL))
+ goto label_oom;
+ try_tcache_dalloc = (chunk == ptr || chunk->arena != arena);
} else {
try_tcache_alloc = true;
try_tcache_dalloc = true;
@@ -1825,6 +2083,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
unsigned arena_ind = MALLOCX_ARENA_GET(flags);
+ // XX Dangerous arenas read.
arena = arenas[arena_ind];
} else
arena = NULL;
@@ -1875,16 +2134,24 @@ je_sallocx(const void *ptr, int flags)
void
je_dallocx(void *ptr, int flags)
{
+ tsd_t *tsd;
bool try_tcache;
assert(ptr != NULL);
assert(malloc_initialized || IS_INITIALIZER);
+ tsd = tsd_fetch();
if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
unsigned arena_ind = MALLOCX_ARENA_GET(flags);
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- try_tcache = (chunk == ptr || chunk->arena !=
- arenas[arena_ind]);
+ arena_t *arena = arena_get(tsd, arena_ind, true, true);
+ /*
+ * If arena is NULL, the application passed an arena that has
+ * never been used before, which is unsupported during
+ * deallocation.
+ */
+ assert(arena != NULL);
+ try_tcache = (chunk == ptr || chunk->arena != arena);
} else
try_tcache = true;
@@ -1908,6 +2175,7 @@ inallocx(size_t size, int flags)
void
je_sdallocx(void *ptr, size_t size, int flags)
{
+ tsd_t *tsd;
bool try_tcache;
size_t usize;
@@ -1916,16 +2184,22 @@ je_sdallocx(void *ptr, size_t size, int flags)
usize = inallocx(size, flags);
assert(usize == isalloc(ptr, config_prof));
+ tsd = tsd_fetch();
if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
unsigned arena_ind = MALLOCX_ARENA_GET(flags);
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- try_tcache = (chunk == ptr || chunk->arena !=
- arenas[arena_ind]);
+ arena_t *arena = arena_get(tsd, arena_ind, true, true);
+ /*
+ * If arena is NULL, the application passed an arena that has
+ * never been used before, which is unsupported during
+ * deallocation.
+ */
+ try_tcache = (chunk == ptr || chunk->arena != arena);
} else
try_tcache = true;
UTRACE(ptr, 0, 0);
- isfree(tsd_fetch(), ptr, usize, try_tcache);
+ isfree(tsd, ptr, usize, try_tcache);
}
size_t
@@ -2105,55 +2379,3 @@ jemalloc_postfork_child(void)
}
/******************************************************************************/
-/*
- * The following functions are used for TLS allocation/deallocation in static
- * binaries on FreeBSD. The primary difference between these and i[mcd]alloc()
- * is that these avoid accessing TLS variables.
- */
-
-static void *
-a0alloc(size_t size, bool zero)
-{
-
- if (unlikely(malloc_init()))
- return (NULL);
-
- if (size == 0)
- size = 1;
-
- if (size <= arena_maxclass)
- return (arena_malloc(NULL, arenas[0], size, zero, false));
- else
- return (huge_malloc(NULL, arenas[0], size, zero));
-}
-
-void *
-a0malloc(size_t size)
-{
-
- return (a0alloc(size, false));
-}
-
-void *
-a0calloc(size_t num, size_t size)
-{
-
- return (a0alloc(num * size, true));
-}
-
-void
-a0free(void *ptr)
-{
- arena_chunk_t *chunk;
-
- if (ptr == NULL)
- return;
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr)
- arena_dalloc(NULL, chunk, ptr, false);
- else
- huge_dalloc(ptr);
-}
-
-/******************************************************************************/
diff --git a/src/tcache.c b/src/tcache.c
index 2c968c68..1bf70269 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -246,6 +246,14 @@ tcache_arena_associate(tcache_t *tcache, arena_t *arena)
}
void
+tcache_arena_reassociate(tcache_t *tcache, arena_t *arena)
+{
+
+ tcache_arena_dissociate(tcache);
+ tcache_arena_associate(tcache, arena);
+}
+
+void
tcache_arena_dissociate(tcache_t *tcache)
{
@@ -261,13 +269,17 @@ tcache_arena_dissociate(tcache_t *tcache)
tcache_t *
tcache_get_hard(tsd_t *tsd)
{
+ arena_t *arena;
if (!tcache_enabled_get()) {
if (tsd_nominal(tsd))
tcache_enabled_set(false); /* Memoize. */
return (NULL);
}
- return (tcache_create(choose_arena(tsd, NULL)));
+ arena = arena_choose(tsd, NULL);
+ if (unlikely(arena == NULL))
+ return (NULL);
+ return (tcache_create(arena));
}
tcache_t *
diff --git a/src/tsd.c b/src/tsd.c
index cbc64e44..59253fe3 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -15,16 +15,14 @@ void *
malloc_tsd_malloc(size_t size)
{
- /* Avoid choose_arena() in order to dodge bootstrapping issues. */
- return (arena_malloc(NULL, arenas[0], CACHELINE_CEILING(size), false,
- false));
+ return (a0malloc(CACHELINE_CEILING(size)));
}
void
malloc_tsd_dalloc(void *wrapper)
{
- idalloct(NULL, wrapper, false);
+ a0free(wrapper);
}
void
@@ -106,15 +104,24 @@ MALLOC_TSD
}
bool
-malloc_tsd_boot(void)
+malloc_tsd_boot0(void)
{
ncleanups = 0;
- if (tsd_boot())
+ if (tsd_boot0())
return (true);
+ *tsd_arenas_cache_bypassp_get(tsd_fetch()) = true;
return (false);
}
+void
+malloc_tsd_boot1(void)
+{
+
+ tsd_boot1();
+ *tsd_arenas_cache_bypassp_get(tsd_fetch()) = false;
+}
+
#ifdef _WIN32
static BOOL WINAPI
_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index eb1c5976..b031c484 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -6,6 +6,7 @@ typedef unsigned int data_t;
static bool data_cleanup_executed;
+malloc_tsd_types(data_, data_t)
malloc_tsd_protos(, data_, data_t)
void