/* reducer_impl.cpp -*-C++-*- * ************************************************************************* * * @copyright * Copyright (C) 2009-2013, Intel Corporation * All rights reserved. * * @copyright * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * @copyright * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Patents Pending, Intel Corporation. **************************************************************************/ /** * Support for reducers */ // ICL: Don't complain about conversion from pointer to same-sized integral type // in hashfun. That's why we're using size_t #ifdef _WIN32 # pragma warning(disable: 1684) #endif #include "reducer_impl.h" #include "scheduler.h" #include "bug.h" #include "os.h" #include "global_state.h" #include "frame_malloc.h" #include "cilk/hyperobject_base.h" #include "cilktools/cilkscreen.h" #include "internal/abi.h" #if REDPAR_DEBUG > 0 #include #include #endif #define DBG if(0) // if(1) enables some internal checks // Check that w is the currently executing worker. This method is a // no-op unless the debug level is set high enough. static inline void verify_current_wkr(__cilkrts_worker *w) { #if REDPAR_DEBUG >= 5 __cilkrts_worker* tmp = __cilkrts_get_tls_worker(); if (w != tmp) { fprintf(stderr, "W=%d, actual=%d... missing a refresh....\n", w->self, tmp->self); } CILK_ASSERT(w == tmp); // __cilkrts_get_tls_worker()); #endif } // Suppress clang warning that the expression result is unused #if defined(__clang__) && (! defined(__INTEL_COMPILER)) # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wunused-value" #endif // __clang__ /// Helper class to disable and re-enable Cilkscreen struct DisableCilkscreen { DisableCilkscreen () { __cilkscreen_disable_checking(); } ~DisableCilkscreen () { __cilkscreen_enable_checking(); } }; /// Helper class to enable and re-disable Cilkscreen struct EnableCilkscreen { EnableCilkscreen () { __cilkscreen_enable_checking(); } ~EnableCilkscreen () { __cilkscreen_disable_checking(); } }; #if defined(__clang__) && (! defined(__INTEL_COMPILER)) # pragma clang diagnostic pop #endif // __clang__ /** * @brief Element for a hyperobject */ struct elem { void *key; ///< Shared key for this hyperobject __cilkrts_hyperobject_base *hb; ///< Base of the hyperobject. void *view; ///< Strand-private view of this hyperobject /// Destroy and deallocate the view object for this element and set view to /// null. void destroy(); /// Returns true if this element contains a leftmost view. bool is_leftmost() const; }; /** Bucket containing at most NMAX elements */ struct bucket { /// Size of the array of elements for this bucket size_t nmax; /** * We use the ``struct hack'' to allocate an array of variable * dimension at the end of the struct. However, we allocate a * total of NMAX+1 elements instead of NMAX. The last one always * has key == 0, which we use as a termination criterion */ elem el[1]; }; /** * Class that implements the map for reducers so we can find the * view for a strand. */ struct cilkred_map { /** Handy pointer to the global state */ global_state_t *g; /** Number of elements in table */ size_t nelem; /** Number of buckets */ size_t nbuckets; /** Array of pointers to buckets */ bucket **buckets; /** Set true if merging (for debugging purposes) */ bool merging; /** Set true for leftmost reducer map */ bool is_leftmost; /** @brief Return element mapped to 'key' or null if not found. */ elem *lookup(void *key); /** * @brief Insert key/value element into hash map without rehashing. * Does not check for duplicate key. */ elem *insert_no_rehash(__cilkrts_worker *w, void *key, __cilkrts_hyperobject_base *hb, void *value); /** * @brief Insert key/value element into hash map, rehashing if necessary. * Does not check for duplicate key. */ inline elem *rehash_and_insert(__cilkrts_worker *w, void *key, __cilkrts_hyperobject_base *hb, void *value); /** @brief Grow bucket by one element, reallocating bucket if necessary */ static elem *grow(__cilkrts_worker *w, bucket **bp); /** @brief Rehash a worker's reducer map */ void rehash(__cilkrts_worker *); /** * @brief Returns true if a rehash is needed due to the number of elements that * have been inserted. */ inline bool need_rehash_p() const; /** @brief Allocate and initialize the buckets */ void make_buckets(__cilkrts_worker *w, size_t nbuckets); /** * Specify behavior when the same key is present in both maps passed * into merge(). */ enum merge_kind { MERGE_UNORDERED, ///< Assertion fails MERGE_INTO_LEFT, ///< Merges the argument from the right into the left MERGE_INTO_RIGHT ///< Merges the argument from the left into the right }; /** * @brief Merge another reducer map into this one, destroying the other map in * the process. */ __cilkrts_worker* merge(__cilkrts_worker *current_wkr, cilkred_map *other_map, enum merge_kind kind); /** @brief check consistency of a reducer map */ void check(bool allow_null_view); /** @brief Test whether the cilkred_map is empty */ bool is_empty() { return nelem == 0; } }; static inline struct cilkred_map* install_new_reducer_map(__cilkrts_worker *w) { cilkred_map *h; h = __cilkrts_make_reducer_map(w); w->reducer_map = h; return h; } static size_t sizeof_bucket(size_t nmax) { bucket *b = 0; return (sizeof(*b) + nmax * sizeof(b->el[0])); } static bucket *alloc_bucket(__cilkrts_worker *w, size_t nmax) { bucket *b = (bucket *) __cilkrts_frame_malloc(w, sizeof_bucket(nmax)); b->nmax = nmax; return b; } static void free_bucket(__cilkrts_worker *w, bucket **bp) { bucket *b = *bp; if (b) { __cilkrts_frame_free(w, b, sizeof_bucket(b->nmax)); *bp = 0; } } /* round up nmax to fill a memory allocator block completely */ static size_t roundup(size_t nmax) { size_t sz = sizeof_bucket(nmax); /* round up size to a full malloc block */ sz = __cilkrts_frame_malloc_roundup(sz); /* invert sizeof_bucket() */ nmax = ((sz - sizeof(bucket)) / sizeof(elem)); return nmax; } static bool is_power_of_2(size_t n) { return (n & (n - 1)) == 0; } void cilkred_map::make_buckets(__cilkrts_worker *w, size_t new_nbuckets) { nbuckets = new_nbuckets; CILK_ASSERT(is_power_of_2(nbuckets)); #if defined __GNUC__ && defined __ICC /* bug workaround -- suppress calls to _intel_fast_memset */ bucket *volatile*new_buckets = (bucket *volatile*) #else bucket **new_buckets = (bucket **) #endif __cilkrts_frame_malloc(w, nbuckets * sizeof(*(buckets))); #if REDPAR_DEBUG >= 1 fprintf(stderr, "W=%d, desc=make_buckets, new_buckets=%p, new_nbuckets=%zd\n", w->self, new_buckets, new_nbuckets); #endif for (size_t i = 0; i < new_nbuckets; ++i) new_buckets[i] = 0; #if defined __GNUC__ && defined __ICC buckets = (bucket **)new_buckets; #else buckets = new_buckets; #endif nelem = 0; } static void free_buckets(__cilkrts_worker *w, bucket **buckets, size_t nbuckets) { size_t i; #if REDPAR_DEBUG >= 1 verify_current_wkr(w); fprintf(stderr, "W=%d, desc=free_buckets, buckets=%p, size=%zd\n", w->self, buckets, nbuckets * sizeof(*buckets)); #endif for (i = 0; i < nbuckets; ++i) free_bucket(w, buckets + i); __cilkrts_frame_free(w, buckets, nbuckets * sizeof(*buckets)); } static size_t minsz(size_t nelem) { return 1U + nelem + nelem / 8U; } static size_t nextsz(size_t nelem) { return 2 * nelem; } bool cilkred_map::need_rehash_p() const { return minsz(nelem) > nbuckets; } static inline size_t hashfun(const cilkred_map *h, void *key) { size_t k = (size_t) key; k ^= k >> 21; k ^= k >> 8; k ^= k >> 3; return k & (h->nbuckets - 1); } // Given a __cilkrts_hyperobject_base, return the key to that hyperobject in // the reducer map. static inline void* get_hyperobject_key(__cilkrts_hyperobject_base *hb) { // The current implementation uses the address of the lefmost view as the // key. return reinterpret_cast(hb) + hb->__view_offset; } // Given a hyperobject key, return a pointer to the leftmost object. In the // current implementation, the address of the leftmost object IS the key, so // this function is an effective noop. static inline void* get_leftmost_view(void *key) { return key; } /* debugging support: check consistency of a reducer map */ void cilkred_map::check(bool allow_null_view) { size_t count = 0; CILK_ASSERT(buckets); for (size_t i = 0; i < nbuckets; ++i) { bucket *b = buckets[i]; if (b) for (elem *el = b->el; el->key; ++el) { CILK_ASSERT(allow_null_view || el->view); ++count; } } CILK_ASSERT(nelem == count); /*global_reducer_map::check();*/ } /* grow bucket by one element, reallocating bucket if necessary */ elem *cilkred_map::grow(__cilkrts_worker *w, bucket **bp) { size_t i, nmax, nnmax; bucket *b, *nb; b = *bp; if (b) { nmax = b->nmax; /* find empty element if any */ for (i = 0; i < nmax; ++i) if (b->el[i].key == 0) return &(b->el[i]); /* do not use the last one even if empty */ } else { nmax = 0; } verify_current_wkr(w); /* allocate a new bucket */ nnmax = roundup(2 * nmax); nb = alloc_bucket(w, nnmax); /* copy old bucket into new */ for (i = 0; i < nmax; ++i) nb->el[i] = b->el[i]; free_bucket(w, bp); *bp = nb; /* zero out extra elements */ for (; i < nnmax; ++i) nb->el[i].key = 0; /* zero out the last one */ nb->el[i].key = 0; return &(nb->el[nmax]); } elem *cilkred_map::insert_no_rehash(__cilkrts_worker *w, void *key, __cilkrts_hyperobject_base *hb, void *view) { #if REDPAR_DEBUG >= 2 fprintf(stderr, "[W=%d, desc=insert_no_rehash, this_map=%p]\n", w->self, this); verify_current_wkr(w); #endif CILK_ASSERT((w == 0 && g == 0) || w->g == g); CILK_ASSERT(key != 0); CILK_ASSERT(view != 0); elem *el = grow(w, &(buckets[hashfun(this, key)])); #if REDPAR_DEBUG >= 3 fprintf(stderr, "[W=%d, this=%p, inserting key=%p, view=%p, el = %p]\n", w->self, this, key, view, el); #endif el->key = key; el->hb = hb; el->view = view; ++nelem; return el; } void cilkred_map::rehash(__cilkrts_worker *w) { #if REDPAR_DEBUG >= 1 fprintf(stderr, "[W=%d, desc=rehash, this_map=%p, g=%p, w->g=%p]\n", w->self, this, g, w->g); verify_current_wkr(w); #endif CILK_ASSERT((w == 0 && g == 0) || w->g == g); size_t onbuckets = nbuckets; size_t onelem = nelem; bucket **obuckets = buckets; size_t i; bucket *b; make_buckets(w, nextsz(nbuckets)); for (i = 0; i < onbuckets; ++i) { b = obuckets[i]; if (b) { elem *oel; for (oel = b->el; oel->key; ++oel) insert_no_rehash(w, oel->key, oel->hb, oel->view); } } CILK_ASSERT(nelem == onelem); free_buckets(w, obuckets, onbuckets); } elem *cilkred_map::rehash_and_insert(__cilkrts_worker *w, void *key, __cilkrts_hyperobject_base *hb, void *view) { #if REDPAR_DEBUG >= 1 fprintf(stderr, "W=%d, this_map =%p, inserting key=%p, view=%p\n", w->self, this, key, view); verify_current_wkr(w); #endif if (need_rehash_p()) rehash(w); return insert_no_rehash(w, key, hb, view); } elem *cilkred_map::lookup(void *key) { bucket *b = buckets[hashfun(this, key)]; if (b) { elem *el; for (el = b->el; el->key; ++el) { if (el->key == key) { CILK_ASSERT(el->view); return el; } } } return 0; } void elem::destroy() { if (! is_leftmost()) { // Call destroy_fn and deallocate_fn on the view, but not if it's the // leftmost view. cilk_c_monoid *monoid = &(hb->__c_monoid); cilk_c_reducer_destroy_fn_t destroy_fn = monoid->destroy_fn; cilk_c_reducer_deallocate_fn_t deallocate_fn = monoid->deallocate_fn; destroy_fn((void*)hb, view); deallocate_fn((void*)hb, view); } view = 0; } inline bool elem::is_leftmost() const { // implementation uses the address of the leftmost view as the key, so if // key == view, then this element refers to the leftmost view. return key == view; } /* remove the reducer from the current reducer map. If the reducer exists in maps other than the current one, the behavior is undefined. */ extern "C" CILK_EXPORT void __CILKRTS_STRAND_STALE( __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *hb)) { // Disable Cilkscreen for the duration of this call. The destructor for // this class will re-enable Cilkscreen when the method returns. This // will prevent Cilkscreen from reporting apparent races in reducers DisableCilkscreen x; __cilkrts_worker* w = __cilkrts_get_tls_worker(); if (! w) { // If no worker, then Cilk is not running and there is no reducer // map. Do nothing. The reducer's destructor will take care of // destroying the leftmost view. return; } const char *UNSYNCED_REDUCER_MSG = "Destroying a reducer while it is visible to unsynced child tasks, or\n" "calling CILK_C_UNREGISTER_REDUCER() on an unregistered reducer.\n" "Did you forget a _Cilk_sync or CILK_C_REGISTER_REDUCER()?"; cilkred_map* h = w->reducer_map; if (NULL == h) cilkos_error(UNSYNCED_REDUCER_MSG); // Does not return if (h->merging) { verify_current_wkr(w); __cilkrts_bug("User error: hyperobject used by another hyperobject"); } void* key = get_hyperobject_key(hb); elem *el = h->lookup(key); // Verify that the reducer is being destroyed from the leftmost strand for // which the reducer is defined. if (! (el && el->is_leftmost())) cilkos_error(UNSYNCED_REDUCER_MSG); #if REDPAR_DEBUG >= 3 fprintf(stderr, "[W=%d, key=%p, lookup in map %p, found el=%p, about to destroy]\n", w->self, key, h, el); #endif // Remove the element from the hash bucket. Do not bother shrinking // the bucket. Note that the destroy() function does not actually // call the destructor for the leftmost view. el->destroy(); do { el[0] = el[1]; ++el; } while (el->key); --h->nelem; #if REDPAR_DEBUG >= 2 fprintf(stderr, "[W=%d, desc=hyper_destroy_finish, key=%p, w->reducer_map=%p]\n", w->self, key, w->reducer_map); #endif } extern "C" CILK_EXPORT void __cilkrts_hyper_create(__cilkrts_hyperobject_base *hb) { // This function registers the specified hyperobject in the current // reducer map and registers the initial value of the hyperobject as the // leftmost view of the reducer. __cilkrts_worker *w = __cilkrts_get_tls_worker(); if (! w) { // If there is no worker, then there is nothing to do: The iniitial // value will automatically be used as the left-most view when we // enter Cilk. return; } // Disable Cilkscreen for the duration of this call. The destructor for // this class will re-enable Cilkscreen when the method returns. This // will prevent Cilkscreen from reporting apparent races in reducers DisableCilkscreen x; void* key = get_hyperobject_key(hb); void* view = get_leftmost_view(key); cilkred_map *h = w->reducer_map; if (__builtin_expect(!h, 0)) { h = install_new_reducer_map(w); #if REDPAR_DEBUG >= 2 fprintf(stderr, "[W=%d, hb=%p, hyper_create, isntalled new map %p, view=%p]\n", w->self, hb, h, view); #endif } /* Must not exist. */ CILK_ASSERT(h->lookup(key) == NULL); #if REDPAR_DEBUG >= 3 verify_current_wkr(w); fprintf(stderr, "[W=%d, hb=%p, lookup in map %p of view %p, should be null]\n", w->self, hb, h, view); fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n", w->self, h, &(hb->__c_monoid), view); #endif if (h->merging) __cilkrts_bug("User error: hyperobject used by another hyperobject"); CILK_ASSERT(w->reducer_map == h); // The address of the leftmost value is the same as the key for lookup. (void) h->rehash_and_insert(w, view, hb, view); } extern "C" CILK_EXPORT void* __CILKRTS_STRAND_PURE( __cilkrts_hyper_lookup(__cilkrts_hyperobject_base *hb)) { __cilkrts_worker* w = __cilkrts_get_tls_worker_fast(); void* key = get_hyperobject_key(hb); if (! w) return get_leftmost_view(key); // Disable Cilkscreen for the duration of this call. This will // prevent Cilkscreen from reporting apparent races in reducers DisableCilkscreen dguard; if (__builtin_expect(w->g->force_reduce, 0)) __cilkrts_promote_own_deque(w); cilkred_map* h = w->reducer_map; if (__builtin_expect(!h, 0)) { h = install_new_reducer_map(w); } if (h->merging) __cilkrts_bug("User error: hyperobject used by another hyperobject"); elem* el = h->lookup(key); if (! el) { /* lookup failed; insert a new default element */ void *rep; { /* re-enable cilkscreen while calling the constructor */ EnableCilkscreen eguard; if (h->is_leftmost) { // This special case is called only if the reducer was not // registered using __cilkrts_hyper_create, e.g., if this is a // C reducer in global scope or if there is no bound worker. rep = get_leftmost_view(key); } else { rep = hb->__c_monoid.allocate_fn((void*)hb, hb->__view_size); // TBD: Handle exception on identity function hb->__c_monoid.identity_fn((void*)hb, rep); } } #if REDPAR_DEBUG >= 3 fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n", w->self, h, &(hb->__c_monoid), rep); CILK_ASSERT(w->reducer_map == h); #endif el = h->rehash_and_insert(w, key, hb, rep); } return el->view; } extern "C" CILK_EXPORT void* __cilkrts_hyperobject_alloc(void* ignore, std::size_t bytes) { return std::malloc(bytes); } extern "C" CILK_EXPORT void __cilkrts_hyperobject_dealloc(void* ignore, void* view) { std::free(view); } /* No-op destroy function */ extern "C" CILK_EXPORT void __cilkrts_hyperobject_noop_destroy(void* ignore, void* ignore2) { } cilkred_map *__cilkrts_make_reducer_map(__cilkrts_worker *w) { CILK_ASSERT(w); cilkred_map *h; size_t nbuckets = 1; /* default value */ h = (cilkred_map *)__cilkrts_frame_malloc(w, sizeof(*h)); #if REDPAR_DEBUG >= 1 fprintf(stderr, "[W=%d, desc=make_reducer_frame_malloc_reducer_map, h=%p]\n", w->self, h); #endif h->g = w ? w->g : 0; h->make_buckets(w, nbuckets); h->merging = false; h->is_leftmost = false; return h; } /* Destroy a reducer map. The map must have been allocated from the worker's global context and should have been allocated from the same worker. */ void __cilkrts_destroy_reducer_map(__cilkrts_worker *w, cilkred_map *h) { CILK_ASSERT((w == 0 && h->g == 0) || w->g == h->g); verify_current_wkr(w); /* the reducer map is allowed to contain el->view == NULL here (and only here). We set el->view == NULL only when we know that the map will be destroyed immediately afterwards. */ DBG h->check(/*allow_null_view=*/true); bucket *b; size_t i; for (i = 0; i < h->nbuckets; ++i) { b = h->buckets[i]; if (b) { elem *el; for (el = b->el; el->key; ++el) { if (el->view) el->destroy(); } } } free_buckets(w, h->buckets, h->nbuckets); #if REDPAR_DEBUG >= 1 fprintf(stderr, "W=%d, destroy_red_map, freeing map h=%p, size=%zd\n", w->self, h, sizeof(*h)); #endif __cilkrts_frame_free(w, h, sizeof(*h)); } /* Set the specified reducer map as the leftmost map if is_leftmost is true, otherwise, set it to not be the leftmost map. */ void __cilkrts_set_leftmost_reducer_map(cilkred_map *h, int is_leftmost) { h->is_leftmost = is_leftmost; } __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w, cilkred_map *other_map, enum merge_kind kind) { // Disable Cilkscreen while the we merge the maps. The destructor for // the guard class will re-enable Cilkscreen when it goes out of scope. // This will prevent Cilkscreen from reporting apparent races in between // the reduce function and the reducer operations. The Cilk runtime // guarantees that a pair of reducer maps will only be merged when no // other strand will access them. DisableCilkscreen guard; #if REDPAR_DEBUG >= 2 fprintf(stderr, "[W=%d, desc=merge, this_map=%p, other_map=%p]\n", w->self, this, other_map); #endif // Remember the current stack frame. __cilkrts_stack_frame *current_sf = w->current_stack_frame; merging = true; other_map->merging = true; // Merging to the leftmost view is a special case because every leftmost // element must be initialized before the merge. CILK_ASSERT(!other_map->is_leftmost /* || kind == MERGE_UNORDERED */); bool merge_to_leftmost = (this->is_leftmost /* && !other_map->is_leftmost */); DBG check(/*allow_null_view=*/false); DBG other_map->check(/*allow_null_view=*/false); for (size_t i = 0; i < other_map->nbuckets; ++i) { bucket *b = other_map->buckets[i]; if (b) { for (elem *other_el = b->el; other_el->key; ++other_el) { /* Steal the value from the other map, which will be destroyed at the end of this operation. */ void *other_view = other_el->view; CILK_ASSERT(other_view); void *key = other_el->key; __cilkrts_hyperobject_base *hb = other_el->hb; elem *this_el = lookup(key); if (this_el == 0 && merge_to_leftmost) { /* Initialize leftmost view before merging. */ void* leftmost = get_leftmost_view(key); // leftmost == other_view can be true if the initial view // was created in other than the leftmost strand of the // spawn tree, but then made visible to subsequent strands // (E.g., the reducer was allocated on the heap and the // pointer was returned to the caller.) In such cases, // parallel semantics says that syncing with earlier // strands will always result in 'this_el' being null, // thus propagating the initial view up the spawn tree // until it reaches the leftmost strand. When synching // with the leftmost strand, leftmost == other_view will be // true and we must avoid reducing the initial view with // itself. if (leftmost != other_view) this_el = rehash_and_insert(w, key, hb, leftmost); } if (this_el == 0) { /* move object from other map into this one */ rehash_and_insert(w, key, hb, other_view); other_el->view = 0; continue; /* No element-level merge necessary */ } /* The same key is present in both maps with values A and B. Three choices: fail, A OP B, B OP A. */ switch (kind) { case MERGE_UNORDERED: __cilkrts_bug("TLS Reducer race"); break; case MERGE_INTO_RIGHT: /* Swap elements in order to preserve object identity */ other_el->view = this_el->view; this_el->view = other_view; /* FALL THROUGH */ case MERGE_INTO_LEFT: { /* Stealing should be disabled during reduce (even if force-reduce is enabled). */ #if DISABLE_PARALLEL_REDUCERS __cilkrts_stack_frame * volatile *saved_protected_tail; saved_protected_tail = __cilkrts_disallow_stealing(w, NULL); #endif { CILK_ASSERT(current_sf->worker == w); CILK_ASSERT(w->current_stack_frame == current_sf); /* TBD: if reduce throws an exception we need to stop it here. */ hb->__c_monoid.reduce_fn((void*)hb, this_el->view, other_el->view); w = current_sf->worker; #if REDPAR_DEBUG >= 2 verify_current_wkr(w); CILK_ASSERT(w->current_stack_frame == current_sf); #endif } #if DISABLE_PARALLEL_REDUCERS /* Restore stealing */ __cilkrts_restore_stealing(w, saved_protected_tail); #endif } break; } } } } this->is_leftmost = this->is_leftmost || other_map->is_leftmost; merging = false; other_map->merging = false; verify_current_wkr(w); __cilkrts_destroy_reducer_map(w, other_map); return w; } /** * Print routine for debugging the merging of reducer maps. * A no-op unless REDPAR_DEBUG set high enough. */ static inline void debug_map_merge(__cilkrts_worker *w, cilkred_map *left_map, cilkred_map *right_map, __cilkrts_worker **final_wkr) { #if REDPAR_DEBUG >= 2 fprintf(stderr, "[W=%d, desc=finish_merge, left_map=%p, right_map=%p, w->reducer_map=%p, right_ans=%p, final_wkr=%d]\n", w->self, left_map, right_map, w->reducer_map, right_map, (*final_wkr)->self); #endif } /** * merge RIGHT into LEFT; * return whichever map allows for faster merge, and destroy the other one. * * *w_ptr should be the currently executing worker. * *w_ptr may change during execution if the reduction is parallel. */ cilkred_map* merge_reducer_maps(__cilkrts_worker **w_ptr, cilkred_map *left_map, cilkred_map *right_map) { __cilkrts_worker *w = *w_ptr; if (!left_map) { debug_map_merge(w, left_map, right_map, w_ptr); return right_map; } if (!right_map) { debug_map_merge(w, left_map, right_map, w_ptr); return left_map; } /* Special case, if left_map is leftmost, then always merge into it. For C reducers this forces lazy creation of the leftmost views. */ if (left_map->is_leftmost || left_map->nelem > right_map->nelem) { *w_ptr = left_map->merge(w, right_map, cilkred_map::MERGE_INTO_LEFT); debug_map_merge(*w_ptr, left_map, right_map, w_ptr); return left_map; } else { *w_ptr = right_map->merge(w, left_map, cilkred_map::MERGE_INTO_RIGHT); debug_map_merge(*w_ptr, left_map, right_map, w_ptr); return right_map; } } /** * Merges RIGHT into LEFT, and then repeatedly calls * merge_reducer_maps_helper() until (*w_ptr)->reducer_map is NULL. * * *w_ptr may change as reductions execute. */ cilkred_map* repeated_merge_reducer_maps(__cilkrts_worker **w_ptr, cilkred_map *left_map, cilkred_map *right_map) { // Note: if right_map == NULL but w->reducer_map != NULL, then // this loop will reduce w->reducer_map into left_map. do { left_map = merge_reducer_maps(w_ptr, left_map, right_map); verify_current_wkr(*w_ptr); // Pull any newly created reducer map and loop around again. right_map = (*w_ptr)->reducer_map; (*w_ptr)->reducer_map = NULL; } while (right_map); return left_map; } /* End reducer_impl.cpp */