aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Micay <danielmicay@gmail.com>2014-08-28 15:41:48 -0400
committerJason Evans <je@fb.com>2014-09-08 17:34:24 -0700
commit4cfe55166e0173be745c53adb0fecf50d11d1227 (patch)
tree68748ac2c268a1c7940395eaf637f2d36ec15d55
parentc3f865074923bf388742da3ec52dca857a0960a2 (diff)
downloadplatform_external_jemalloc_new-4cfe55166e0173be745c53adb0fecf50d11d1227.tar.gz
platform_external_jemalloc_new-4cfe55166e0173be745c53adb0fecf50d11d1227.tar.bz2
platform_external_jemalloc_new-4cfe55166e0173be745c53adb0fecf50d11d1227.zip
Add support for sized deallocation.
This adds a new `sdallocx` function to the external API, allowing the size to be passed by the caller. It avoids some extra reads in the thread cache fast path. In the case where stats are enabled, this avoids the work of calculating the size from the pointer. An assertion validates the size that's passed in, so enabling debugging will allow users of the API to debug cases where an incorrect size is passed in. The performance win for a contrived microbenchmark doing an allocation and immediately freeing it is ~10%. It may have a different impact on a real workload. Closes #28
-rw-r--r--Makefile.in1
-rw-r--r--configure.ac2
-rw-r--r--doc/jemalloc.xml.in19
-rw-r--r--include/jemalloc/internal/arena.h33
-rw-r--r--include/jemalloc/internal/jemalloc_internal.h.in26
-rw-r--r--include/jemalloc/internal/private_symbols.txt3
-rw-r--r--include/jemalloc/jemalloc_protos.h.in1
-rw-r--r--src/jemalloc.c44
-rw-r--r--test/integration/sdallocx.c57
-rw-r--r--test/stress/microbench.c20
10 files changed, 201 insertions, 5 deletions
diff --git a/Makefile.in b/Makefile.in
index 1446dbe6..ac56d8fa 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -136,6 +136,7 @@ TESTS_UNIT_AUX := $(srcroot)test/unit/prof_accum_a.c \
$(srcroot)test/unit/prof_accum_b.c
TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
$(srcroot)test/integration/allocated.c \
+ $(srcroot)test/integration/sdallocx.c \
$(srcroot)test/integration/mallocx.c \
$(srcroot)test/integration/MALLOCX_ARENA.c \
$(srcroot)test/integration/posix_memalign.c \
diff --git a/configure.ac b/configure.ac
index ce4af213..d221876c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -452,7 +452,7 @@ AC_PROG_RANLIB
AC_PATH_PROG([LD], [ld], [false], [$PATH])
AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH])
-public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"
+public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx sdallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"
dnl Check for allocator-related functions that should be wrapped.
AC_CHECK_FUNC([memalign],
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 8f4327f3..e5c229fe 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -38,6 +38,7 @@
<refname>xallocx</refname>
<refname>sallocx</refname>
<refname>dallocx</refname>
+ <refname>sdallocx</refname>
<refname>nallocx</refname>
<refname>mallctl</refname>
<refname>mallctlnametomib</refname>
@@ -121,6 +122,12 @@
<paramdef>int <parameter>flags</parameter></paramdef>
</funcprototype>
<funcprototype>
+ <funcdef>void <function>sdallocx</function></funcdef>
+ <paramdef>void *<parameter>ptr</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>int <parameter>flags</parameter></paramdef>
+ </funcprototype>
+ <funcprototype>
<funcdef>size_t <function>nallocx</function></funcdef>
<paramdef>size_t <parameter>size</parameter></paramdef>
<paramdef>int <parameter>flags</parameter></paramdef>
@@ -228,7 +235,8 @@
<function>rallocx<parameter/></function>,
<function>xallocx<parameter/></function>,
<function>sallocx<parameter/></function>,
- <function>dallocx<parameter/></function>, and
+ <function>dallocx<parameter/></function>,
+ <function>sdallocx<parameter/></function>, and
<function>nallocx<parameter/></function> functions all have a
<parameter>flags</parameter> argument that can be used to specify
options. The functions only check the options that are contextually
@@ -312,6 +320,15 @@
memory referenced by <parameter>ptr</parameter> to be made available for
future allocations.</para>
+ <para>The <function>sdallocx<parameter/></function> function is an
+ extension of <function>dallocx<parameter/></function> with a
+ <parameter>size</parameter> parameter to allow the caller to pass in the
+ allocation size as an optimization. The minimum valid input size is the
+ original requested size of the allocation, and the maximum valid input
+ size is the corresponding value returned by
+ <function>nallocx<parameter/></function> or
+ <function>sallocx<parameter/></function>.</para>
+
<para>The <function>nallocx<parameter/></function> function allocates no
memory, but it performs the same size computation as the
<function>mallocx<parameter/></function> function, and returns the real
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 166d0523..6ab0ae71 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -488,6 +488,7 @@ void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache);
size_t arena_salloc(const void *ptr, bool demote);
void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache);
+void arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
@@ -1139,9 +1140,7 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache)
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
/* Small allocation. */
if (try_tcache && (tcache = tcache_get(false)) != NULL) {
- size_t binind;
-
- binind = arena_ptr_small_binind_get(ptr, mapbits);
+ size_t binind = arena_ptr_small_binind_get(ptr, mapbits);
tcache_dalloc_small(tcache, ptr, binind);
} else
arena_dalloc_small(chunk->arena, chunk, ptr, pageind);
@@ -1157,6 +1156,34 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache)
arena_dalloc_large(chunk->arena, chunk, ptr);
}
}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache)
+{
+ tcache_t *tcache;
+
+ assert(ptr != NULL);
+ assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+ if (size < PAGE) {
+ /* Small allocation. */
+ if (try_tcache && (tcache = tcache_get(false)) != NULL) {
+ size_t binind = small_size2bin(size);
+ tcache_dalloc_small(tcache, ptr, binind);
+ } else {
+ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+ arena_dalloc_small(chunk->arena, chunk, ptr, pageind);
+ }
+ } else {
+ assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+
+ if (try_tcache && size <= tcache_maxclass && (tcache =
+ tcache_get(false)) != NULL) {
+ tcache_dalloc_large(tcache, ptr, size);
+ } else
+ arena_dalloc_large(chunk->arena, chunk, ptr);
+ }
+}
# endif /* JEMALLOC_ARENA_INLINE_C */
#endif
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 59ae8d55..c0e326d4 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -634,8 +634,10 @@ size_t ivsalloc(const void *ptr, bool demote);
size_t u2rz(size_t usize);
size_t p2rz(const void *ptr);
void idalloct(void *ptr, bool try_tcache);
+void isdalloct(void *ptr, size_t size, bool try_tcache);
void idalloc(void *ptr);
void iqalloc(void *ptr, bool try_tcache);
+void isqalloc(void *ptr, size_t size, bool try_tcache);
void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
arena_t *arena);
@@ -788,6 +790,20 @@ idalloct(void *ptr, bool try_tcache)
}
JEMALLOC_ALWAYS_INLINE void
+isdalloct(void *ptr, size_t size, bool try_tcache)
+{
+ arena_chunk_t *chunk;
+
+ assert(ptr != NULL);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk != ptr)
+ arena_sdalloc(chunk, ptr, size, try_tcache);
+ else
+ huge_dalloc(ptr);
+}
+
+JEMALLOC_ALWAYS_INLINE void
idalloc(void *ptr)
{
@@ -804,6 +820,16 @@ iqalloc(void *ptr, bool try_tcache)
idalloct(ptr, try_tcache);
}
+JEMALLOC_ALWAYS_INLINE void
+isqalloc(void *ptr, size_t size, bool try_tcache)
+{
+
+ if (config_fill && opt_quarantine)
+ quarantine(ptr);
+ else
+ idalloct(ptr, try_tcache);
+}
+
JEMALLOC_ALWAYS_INLINE void *
iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 84f05910..3b990b0e 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -61,6 +61,7 @@ arena_ralloc_no_move
arena_redzone_corruption
arena_run_regind
arena_salloc
+arena_sdalloc
arena_stats_merge
arena_tcache_fill_small
arenas
@@ -228,7 +229,9 @@ iralloc
iralloct
iralloct_realign
isalloc
+isdalloct
isthreaded
+isqalloc
ivsalloc
ixalloc
jemalloc_postfork_child
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index b365eb4a..f81adc14 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -25,6 +25,7 @@ JEMALLOC_EXPORT size_t @je_@xallocx(void *ptr, size_t size, size_t extra,
JEMALLOC_EXPORT size_t @je_@sallocx(const void *ptr, int flags)
JEMALLOC_ATTR(pure);
JEMALLOC_EXPORT void @je_@dallocx(void *ptr, int flags);
+JEMALLOC_EXPORT void @je_@sdallocx(void *ptr, size_t size, int flags);
JEMALLOC_EXPORT size_t @je_@nallocx(size_t size, int flags)
JEMALLOC_ATTR(pure);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 71e921b5..527782e8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1223,6 +1223,24 @@ ifree(void *ptr, bool try_tcache)
JEMALLOC_VALGRIND_FREE(ptr, rzsize);
}
+JEMALLOC_INLINE_C void
+isfree(void *ptr, size_t usize, bool try_tcache)
+{
+ UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
+
+ assert(ptr != NULL);
+ assert(malloc_initialized || IS_INITIALIZER);
+
+ if (config_prof && opt_prof)
+ prof_free(ptr, usize);
+ if (config_stats)
+ thread_allocated_tsd_get()->deallocated += usize;
+ if (config_valgrind && in_valgrind)
+ rzsize = p2rz(ptr);
+ isqalloc(ptr, usize, try_tcache);
+ JEMALLOC_VALGRIND_FREE(ptr, rzsize);
+}
+
void *
je_realloc(void *ptr, size_t size)
{
@@ -1820,6 +1838,32 @@ je_dallocx(void *ptr, int flags)
ifree(ptr, try_tcache);
}
+void
+je_sdallocx(void *ptr, size_t size, int flags)
+{
+ bool try_tcache;
+
+ assert(ptr != NULL);
+ assert(malloc_initialized || IS_INITIALIZER);
+ assert(size == isalloc(ptr, config_prof));
+
+ if ((flags & MALLOCX_LG_ALIGN_MASK) == 0)
+ size = s2u(size);
+ else
+ size = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
+
+ if ((flags & MALLOCX_ARENA_MASK) != 0) {
+ unsigned arena_ind = MALLOCX_ARENA_GET(flags);
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ try_tcache = (chunk == ptr || chunk->arena !=
+ arenas[arena_ind]);
+ } else
+ try_tcache = true;
+
+ UTRACE(ptr, 0, 0);
+ isfree(ptr, size, try_tcache);
+}
+
size_t
je_nallocx(size_t size, int flags)
{
diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c
new file mode 100644
index 00000000..b84817d7
--- /dev/null
+++ b/test/integration/sdallocx.c
@@ -0,0 +1,57 @@
+#include "test/jemalloc_test.h"
+
+#define MAXALIGN (((size_t)1) << 25)
+#define NITER 4
+
+TEST_BEGIN(test_basic)
+{
+ void *ptr = mallocx(64, 0);
+ sdallocx(ptr, 64, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_alignment_and_size)
+{
+ size_t nsz, sz, alignment, total;
+ unsigned i;
+ void *ps[NITER];
+
+ for (i = 0; i < NITER; i++)
+ ps[i] = NULL;
+
+ for (alignment = 8;
+ alignment <= MAXALIGN;
+ alignment <<= 1) {
+ total = 0;
+ for (sz = 1;
+ sz < 3 * alignment && sz < (1U << 31);
+ sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+ for (i = 0; i < NITER; i++) {
+ nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
+ MALLOCX_ZERO);
+ ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
+ MALLOCX_ZERO);
+ total += nsz;
+ if (total >= (MAXALIGN << 1))
+ break;
+ }
+ for (i = 0; i < NITER; i++) {
+ if (ps[i] != NULL) {
+ sdallocx(ps[i], sz,
+ MALLOCX_ALIGN(alignment));
+ ps[i] = NULL;
+ }
+ }
+ }
+ }
+}
+TEST_END
+
+int
+main(void)
+{
+
+ return (test(
+ test_basic,
+ test_alignment_and_size));
+}
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 60c02db3..a8267c39 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -72,6 +72,17 @@ malloc_dallocx(void)
dallocx(p, 0);
}
+static void
+malloc_sdallocx(void)
+{
+ void *p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ sdallocx(p, 1, 0);
+}
+
TEST_BEGIN(test_free_vs_dallocx)
{
@@ -80,6 +91,14 @@ TEST_BEGIN(test_free_vs_dallocx)
}
TEST_END
+TEST_BEGIN(test_dallocx_vs_sdallocx)
+{
+
+ compare_funcs(10*1000*1000, 100*1000*1000, "dallocx", malloc_dallocx,
+ "sdallocx", malloc_sdallocx);
+}
+TEST_END
+
static void
malloc_mus_free(void)
{
@@ -135,6 +154,7 @@ main(void)
return (test(
test_malloc_vs_mallocx,
test_free_vs_dallocx,
+ test_dallocx_vs_sdallocx,
test_mus_vs_sallocx,
test_sallocx_vs_nallocx));
}