diff options
Diffstat (limited to 'gcc-4.9/libcilkrts/runtime/frame_malloc.c')
-rw-r--r-- | gcc-4.9/libcilkrts/runtime/frame_malloc.c | 462 |
1 files changed, 462 insertions, 0 deletions
diff --git a/gcc-4.9/libcilkrts/runtime/frame_malloc.c b/gcc-4.9/libcilkrts/runtime/frame_malloc.c new file mode 100644 index 000000000..0b38bd209 --- /dev/null +++ b/gcc-4.9/libcilkrts/runtime/frame_malloc.c @@ -0,0 +1,462 @@ +/* frame_malloc.c -*-C-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2009-2013, Intel Corporation + * All rights reserved. + * + * @copyright + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * @copyright + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY + * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + **************************************************************************/ + +#include "frame_malloc.h" +#include "bug.h" +#include "local_state.h" +#include "cilk_malloc.h" + +#ifndef __VXWORKS__ +#include <memory.h> +#endif + +/* #define USE_MMAP 1 */ +#if USE_MMAP +#define __USE_MISC 1 +#include <sys/mman.h> +#include <errno.h> +#endif + +// Define to fill the stack frame header with the fill character when pushing +// it on a free list. Note that this should be #ifdef'd out when checked in! + +#ifdef _DEBUG +#define HEADER_FILL_CHAR 0xbf +#endif + +// HEADER_FILL_CHAR should not be defined when checked in, so put out a warning +// message if this is a release build + +#if defined(NDEBUG) && defined (HEADER_FILL_CHAR) +#pragma message ("Warning: HEADER_FILL_CHAR defined for a release build") +#endif + +static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size); + +#ifndef _WIN32 + +const unsigned short __cilkrts_bucket_sizes[FRAME_MALLOC_NBUCKETS] = +{ + 64, 128, 256, 512, 1024, 2048 +}; + +#define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) __cilkrts_bucket_sizes[bucket] + +/* threshold above which we use slow malloc */ +#define FRAME_MALLOC_MAX_SIZE 2048 + +#else // _WIN32 + +/* Note that this must match the implementation of framesz_to_bucket in + * asmilator/layout.ml! */ +#define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) ((size_t)(64 << (bucket))) + +/* threshold above which we use slow malloc */ +#define FRAME_MALLOC_MAX_SIZE \ + FRAME_MALLOC_BUCKET_TO_SIZE(FRAME_MALLOC_NBUCKETS - 1) + +#endif // _WIN32 + +/* utility procedures */ +static void push(struct free_list **b, struct free_list *p) +{ +#ifdef HEADER_FILL_CHAR + memset (p, HEADER_FILL_CHAR, FRAME_MALLOC_BUCKET_TO_SIZE(0)); +#endif + /* cons! onto free list */ + p->cdr = *b; + *b = p; +} + +static struct free_list *pop(struct free_list **b) +{ + struct free_list *p = *b; + if (p) + *b = p->cdr; + return p; +} + +/************************************************************* + global allocator: +*************************************************************/ +/* request slightly less than 2^K from the OS, which after malloc + overhead and alignment should end up filling each VM page almost + completely. 128 is a guess of the total malloc overhead and cache + line alignment */ +#define FRAME_MALLOC_CHUNK (32 * 1024 - 128) + +/** Implements linked list of frames */ +struct pool_cons { + char *p; /**< This element of the list */ + struct pool_cons *cdr; /**< Remainder of the list */ +}; + +static void extend_global_pool(global_state_t *g) +{ + /* FIXME: memalign to a cache line? */ + struct pool_cons *c = (struct pool_cons *)__cilkrts_malloc(sizeof(*c)); + g->frame_malloc.pool_begin = + (char *)__cilkrts_malloc((size_t)FRAME_MALLOC_CHUNK); + g->frame_malloc.pool_end = + g->frame_malloc.pool_begin + FRAME_MALLOC_CHUNK; + g->frame_malloc.allocated_from_os += FRAME_MALLOC_CHUNK; + c->p = g->frame_malloc.pool_begin; + c->cdr = g->frame_malloc.pool_list; + g->frame_malloc.pool_list = c; +} + +/* the size is already canonicalized at this point */ +static struct free_list *global_alloc(global_state_t *g, int bucket) +{ + struct free_list *mem; + size_t size; + + CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS); + size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); + g->frame_malloc.allocated_from_global_pool += size; + + if (!(mem = pop(&g->frame_malloc.global_free_list[bucket]))) { + + CILK_ASSERT(g->frame_malloc.pool_begin <= g->frame_malloc.pool_end); + if (g->frame_malloc.pool_begin + size > g->frame_malloc.pool_end) { + /* We waste the fragment of pool. */ + g->frame_malloc.wasted += + g->frame_malloc.pool_end - g->frame_malloc.pool_begin; + extend_global_pool(g); + } + mem = (struct free_list *)g->frame_malloc.pool_begin; + g->frame_malloc.pool_begin += size; + } + + return mem; +} + +static void global_free(global_state_t *g, void *mem, int bucket) +{ + size_t size; + + CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS); + size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); + g->frame_malloc.allocated_from_global_pool -= size; + + push(&g->frame_malloc.global_free_list[bucket], mem); +} + +void __cilkrts_frame_malloc_global_init(global_state_t *g) +{ + int i; + + __cilkrts_mutex_init(&g->frame_malloc.lock); + g->frame_malloc.check_for_leaks = 1; + g->frame_malloc.pool_list = 0; + g->frame_malloc.pool_begin = 0; + g->frame_malloc.pool_end = 0; + g->frame_malloc.batch_size = 8000; + g->frame_malloc.potential_limit = 4 * g->frame_malloc.batch_size; + g->frame_malloc.allocated_from_os = 0; + g->frame_malloc.allocated_from_global_pool = 0; + g->frame_malloc.wasted = 0; + for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) + g->frame_malloc.global_free_list[i] = 0; +} + +// Counts how many bytes are in the global free list. +static size_t count_memory_in_global_list(global_state_t *g) +{ + + // Count the memory remaining in the global free list. + size_t size_remaining_in_global_list = 0; + int i; + for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) { + struct free_list *p; + size_t size_in_bucket = 0; + p = g->frame_malloc.global_free_list[i]; + + while (p) { + size_in_bucket += FRAME_MALLOC_BUCKET_TO_SIZE(i); + p = p->cdr; + } + size_remaining_in_global_list += size_in_bucket; + } + return size_remaining_in_global_list; +} + + +void __cilkrts_frame_malloc_global_cleanup(global_state_t *g) +{ + struct pool_cons *c; + + if (g->frame_malloc.check_for_leaks) { + size_t memory_in_global_list = count_memory_in_global_list(g); + // TBD: This check is weak. Short of memory corruption, + // I don't see how we have more memory in the free list + // than allocated from the os. + // Ideally, we should count the memory in the global free list + // and check that we have it all. But I believe the runtime + // itself also uses some memory, which is not being tracked. + if (memory_in_global_list > g->frame_malloc.allocated_from_os) { + __cilkrts_bug("\nError. The Cilk runtime data structures may have been corrupted.\n"); + } + } + + while ((c = g->frame_malloc.pool_list)) { + g->frame_malloc.pool_list = c->cdr; + __cilkrts_free(c->p); + __cilkrts_free(c); + } + + __cilkrts_mutex_destroy(0, &g->frame_malloc.lock); + + // Check that all the memory moved from the global pool into + // workers has been returned to the global pool. + if (g->frame_malloc.check_for_leaks + && (g->frame_malloc.allocated_from_global_pool != 0)) + { + __cilkrts_bug("\n" + "---------------------------" "\n" + " MEMORY LEAK DETECTED!!! " "\n" + "---------------------------" "\n" + "\n" + ); + } +} + +/************************************************************* + per-worker allocator +*************************************************************/ +/* allocate a batch of frames of size SIZE from the global pool and + store them in the worker's free list */ +static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size) +{ + global_state_t *g = w->g; + + __cilkrts_mutex_lock(w, &g->frame_malloc.lock); { +#if USE_MMAP + char *p = mmap(0, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (p == MAP_FAILED) + __cilkrts_bug("mmap failed %d", errno); + assert(size < 4096); + assert(p != MAP_FAILED); + mprotect(p, 4096, PROT_NONE); + mprotect(p + 8192, 4096, PROT_NONE); + w->l->bucket_potential[bucket] += size; + push(&w->l->free_list[bucket], (struct free_list *)(p + 8192 - size)); +#else + size_t bytes_allocated = 0; + do { + w->l->bucket_potential[bucket] += size; + bytes_allocated += size; + push(&w->l->free_list[bucket], global_alloc(g, bucket)); + } while (bytes_allocated < g->frame_malloc.batch_size); +#endif + } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock); + +} + +static void gc_bucket(__cilkrts_worker *w, int bucket, size_t size) +{ + struct free_list *p, *q; + global_state_t *g = w->g; + size_t pot = w->l->bucket_potential[bucket]; + size_t newpot; + + /* Keep up to POT/2 elements in the free list. The cost of + counting up to POT/2 is amortized against POT. */ + newpot = 0; + for (newpot = 0, p = w->l->free_list[bucket]; p && 2 * newpot < pot; + p = p->cdr, newpot += size) + ; + w->l->bucket_potential[bucket] = newpot; + + if (p) { + /* free the rest of the list. The cost of grabbing the lock + is amortized against POT/2; the cost of traversing the rest + of the list is amortized against the free operation that + puts the element on the list. */ + __cilkrts_mutex_lock(w, &g->frame_malloc.lock); { + while ((q = pop(&p->cdr))) +#if USE_MMAP + munmap((char *)q + size - 8192, 12288); +#else + global_free(g, q, bucket); +#endif + } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock); + } +} + +// Free all the memory in this bucket for the specified worker, +// returning it to the global pool's free list. +static void move_bucket_to_global_free_list(__cilkrts_worker *w, + int bucket) +{ + struct free_list *p, *q; + global_state_t *g = w->g; + p = w->l->free_list[bucket]; + + if (p) { + __cilkrts_mutex_lock(w, &g->frame_malloc.lock); { + while ((q = pop(&p))) { +#if USE_MMAP + size_t size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); + munmap((char *)q + size - 8192, 12288); +#else + global_free(g, q, bucket); +#endif + } + } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock); + } + + // I'm not sure this does anything useful now, since + // the worker is about to be destroyed. But why not? + w->l->bucket_potential[bucket] = 0; +} + +static int bucket_of_size(size_t size) +{ + int i; + + for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) + if (size <= FRAME_MALLOC_BUCKET_TO_SIZE(i)) + return i; + + CILK_ASSERT(0 /* can't happen */); + return -1; +} + +size_t __cilkrts_frame_malloc_roundup(size_t size) +{ + if (size > FRAME_MALLOC_MAX_SIZE) { + /* nothing, leave it alone */ + } else { + int bucket = bucket_of_size(size); + size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); + } + return size; +} + +size_t __cilkrts_size_of_bucket(int bucket) +{ + CILK_ASSERT(bucket >= 0 && bucket < FRAME_MALLOC_NBUCKETS); + return FRAME_MALLOC_BUCKET_TO_SIZE(bucket); +} + +void *__cilkrts_frame_malloc(__cilkrts_worker *w, size_t size) +{ + int bucket; + void *mem; + + /* if too large, or if no worker, fall back to __cilkrts_malloc() */ + if (!w || size > FRAME_MALLOC_MAX_SIZE) { + NOTE_INTERVAL(w, INTERVAL_FRAME_ALLOC_LARGE); + return __cilkrts_malloc(size); + } + + START_INTERVAL(w, INTERVAL_FRAME_ALLOC); { + bucket = bucket_of_size(size); + size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); + + while (!(mem = pop(&w->l->free_list[bucket]))) { + /* get a batch of frames from the global pool */ + START_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL) { + allocate_batch(w, bucket, size); + } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL); + } + } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC); + + return mem; +} + +void __cilkrts_frame_free(__cilkrts_worker *w, void *p0, size_t size) +{ + int bucket; + struct free_list *p = (struct free_list *)p0; + + /* if too large, or if no worker, fall back to __cilkrts_free() */ + if (!w || size > FRAME_MALLOC_MAX_SIZE) { + NOTE_INTERVAL(w, INTERVAL_FRAME_FREE_LARGE); + __cilkrts_free(p); + return; + } + +#if CILK_LIB_DEBUG + *(volatile long *)w; +#endif + + START_INTERVAL(w, INTERVAL_FRAME_FREE); { + bucket = bucket_of_size(size); + size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); + w->l->bucket_potential[bucket] += size; + push(&w->l->free_list[bucket], p); + if (w->l->bucket_potential[bucket] > + w->g->frame_malloc.potential_limit) { + START_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL) { + gc_bucket(w, bucket, size); + } STOP_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL); + } + } STOP_INTERVAL(w, INTERVAL_FRAME_FREE); +} + +void __cilkrts_frame_malloc_per_worker_init(__cilkrts_worker *w) +{ + int i; + local_state *l = w->l; + + for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) { + l->free_list[i] = 0; + l->bucket_potential[i] = 0; + } +} + +void __cilkrts_frame_malloc_per_worker_cleanup(__cilkrts_worker *w) +{ + int i; + // Move memory to the global pool. This operation + // ensures the memory does not become unreachable / leak + // when the worker is destroyed. + for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) { + move_bucket_to_global_free_list(w, i); + } +} + +/* + Local Variables: ** + c-file-style:"bsd" ** + c-basic-offset:4 ** + indent-tabs-mode:nil ** + End: ** +*/ |