/* global_state.h -*-C++-*- * ************************************************************************* * * @copyright * Copyright (C) 2009-2013, Intel Corporation * All rights reserved. * * @copyright * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * @copyright * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. **************************************************************************/ /** * @file global_state.h * * @brief The global_state_t structure contains most of the global context * maintained by the Intel Cilk runtime. */ #ifndef INCLUDED_GLOBAL_STATE_DOT_H #define INCLUDED_GLOBAL_STATE_DOT_H #include #include "frame_malloc.h" #include "stats.h" #include "bug.h" #include "cilk_fiber.h" __CILKRTS_BEGIN_EXTERN_C /** * Non-null place-holder for a stack handle that has no meaningful value. */ #define PLACEHOLDER_FIBER ((cilk_fiber *) -2) /** * States for record_or_replay */ enum record_replay_t { RECORD_REPLAY_NONE, RECORD_LOG, REPLAY_LOG }; /** * @brief The global state is a structure that is shared by all workers in * Cilk. * * Make the structure ready for use by calling * cilkg_init_global_state() and then cilkg_publish_global_state(). * * The same global lock should be held while both of these methods are * called. These methods are split because it is useful to execute * other runtime initialization code in between. * * After cilkg_publish_global_state() has completed, Cilk runtime * methods may call cilkg_get_global_state() to look at the published * value without holding the global lock. * * Finally, clean up the global state by calling * cilkg_deinit_global_state(). This method should be called only * after all calls to cilkg_get_global_state() have completed, and * while holding the global lock. * * Before initialization and after deinitialization, the fields in the * global state have unspecified values, except for a few special * fields labeled "USER SETTING", which can be read and written before * initialization and after deinitialization. */ struct global_state_t { /* COMMON_PORTABLE */ /* Fields described as "(fixed)" should not be changed after * initialization. */ /************************************************************************* * Note that debugger integration must reach into the * global state! The debugger integration is depending on the * offsets of the addr_size, system_workers, total_workers, * stealing_disabled, sysdep, and workers. If these offsets change, the * debugger integration library will need to be changed to match!!! *************************************************************************/ int addr_size; ///< Number of bytes for an address, used by debugger (fixed) int system_workers; ///< Number of system workers (fixed) /** * @brief USER SETTING: Maximum number of user workers that can be * bound to cilk workers. * * 0 unless set by user. Call cilkg_calc_max_user_workers to get * the value. */ int max_user_workers; int total_workers; ///< Total number of worker threads allocated (fixed) int workers_running; ///< True when system workers have beens started */ /// Set by debugger to disable stealing (fixed) int stealing_disabled; /// System-dependent part of the global state struct global_sysdep_state *sysdep; /// Array of worker structures. __cilkrts_worker **workers; /******* END OF DEBUGGER-INTEGRATION FIELDS ***************/ /// Number of frames in each worker's lazy task queue __STDNS size_t ltqsize; /** * @brief USER SETTING: Force all possible reductions. * * TRUE if running a p-tool that requires reducers to call the reduce() * method even if no actual stealing occurs. * * When set to TRUE, runtime will simulate steals, forcing calls to the * the reduce() methods of reducers. * */ int force_reduce; /// USER SETTING: Per-worker fiber pool size int fiber_pool_size; /// USER SETTING: Global fiber pool size int global_fiber_pool_size; /** * @brief TRUE when workers should exit scheduling loop so we can * shut down the runtime and free the global state. * * @note @c work_done will be checked *FREQUENTLY* in the scheduling loop * by idle workers. We need to ensure that it's not in a cache line which * may be invalidated by other cores. The surrounding fields are either * constant after initialization or not used until shutdown (stats) so we * should be OK. */ volatile int work_done; int under_ptool; ///< True when running under a serial PIN tool statistics stats; ///< Statistics on use of runtime /** * @brief USER SETTING: Maximum number of stacks the runtime will * allocate (apart from those created by the OS when worker * threads are created). * * If max_stacks == 0,there is no pre-defined maximum. */ unsigned max_stacks; /// Size of each stack size_t stack_size; /// Global cache for per-worker memory struct __cilkrts_frame_cache frame_malloc; /// Global fiber pool cilk_fiber_pool fiber_pool; /** * @brief Track whether the runtime has failed to allocate a * stack. * * Setting this flag prevents multiple warnings from being * issued. */ int failure_to_allocate_stack; /** * @brief USER SETTING: indicate record or replay log. * Set to NULL if not used in this run. */ char *record_replay_file_name; /** * @brief Record/replay state. * Valid states are: * RECORD_REPLAY_NONE - Not recording or replaying a log * RECORD_LOG - Recording a log for replay later * REPLAY_LOG - Replay a log recorded earlier */ enum record_replay_t record_or_replay; /** * @brief Buffer to force max_steal_failures to appear on a * different cache line from the previous member variables. * * This padding is needed because max_steal_failures is read * constantly and other modified values in the global state will * cause thrashing. */ char cache_buf[64]; /** * @brief Maximum number of times a thread should fail to steal * before checking if Cilk is shutting down. */ unsigned int max_steal_failures; /// Pointer to scheduler entry point void (*scheduler)(__cilkrts_worker *w); /** * @brief Buffer to force P and Q to appear on a different cache * line from the previous member variables. */ char cache_buf_2[64]; int P; ///< USER SETTING: number of system workers + 1 (fixed) int Q; ///< Number of user threads currently bound to workers }; /** * @brief Initialize the global state object. This method must both * complete before referencing any fields in the global state, except * those specified as "user-settable values". */ global_state_t* cilkg_init_global_state(); /** * @brief Publish the global state object, so that * cilkg_is_published can return true. * * @param g - the global state created by cilkg_init_global_state() to * publish. * * After the global state object has been published, a thread should * not modify this state unless it has exclusive access (i.e., holds * the global lock). */ void cilkg_publish_global_state(global_state_t* g); /** * @brief Return true if the global state has been fully initialized * and published, and has not been deinitialized. */ int cilkg_is_published(void); /** * @brief De-initializes the global state object. Must be called to free * resources when the global state is no longer needed. */ void cilkg_deinit_global_state(void); /** * @brief Returns the global state object. Result is valid only if the * global state has been published (see cilkg_publish_global_state()). */ static inline global_state_t* cilkg_get_global_state(void) { // "private" extern declaration: extern global_state_t *cilkg_singleton_ptr; __CILKRTS_ASSERT(cilkg_singleton_ptr); // Debug only return cilkg_singleton_ptr; } /** * @brief Implementation of __cilkrts_set_params. * * Set user controllable parameters * @param param - string specifying parameter to be set * @param value - string specifying new value * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0), * CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE, * CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE. * * @attention The wide character version __cilkrts_set_param_w() is available * only on Windows. * * Allowable parameter names: * * - "nworkers" - number of processors that should run Cilk code. * The value is a string of digits to be parsed by strtol. * * - "force reduce" - test reducer callbacks by allocating new views * for every spawn within which a reducer is accessed. This can * significantly reduce performance. The value is "1" or "true" * to enable, "0" or "false" to disable. * @warning Enabling "force reduce" when running with more than a single * worker is currently broken. * * - "max user workers" - (Not publicly documented) Sets the number of slots * allocated for user worker threads * * - "local stacks" - (Not publicly documented) Number of stacks we'll hold in * the per-worker stack cache. Range 1 .. 42. See * cilkg_init_global_state for details. * * - "shared stacks" - (Not publicly documented) Maximum number of stacks * we'll hold in the global stack cache. Maximum value is 42. See * __cilkrts_make_global_state for details * * - "nstacks" - (Not publicly documented at this time, though it may be * exposed in the future) Sets the maximum number of stacks permitted at one * time. If the runtime reaches this maximum, it will cease to allocate * stacks and the app will lose parallelism. 0 means unlimited. Default is * unlimited. Minimum is twice the number of worker threads, though that * cannot be tested at this time. */ int cilkg_set_param(const char* param, const char* value); #ifdef _WIN32 /** * @brief Implementation of __cilkrts_set_params for Unicode characters on * Windows. See the documentation on @ref cilkg_set_param for more details. * * Set user controllable parameters * @param param - string specifying parameter to be set * @param value - string specifying new value * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0), * CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE, * CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE. */ int cilkg_set_param_w(const wchar_t* param, const wchar_t* value); #endif /** * @brief implementation of __cilkrts_get_nworkers() */ static inline int cilkg_get_nworkers(void) { // "private" extern declaration extern global_state_t* cilkg_get_user_settable_values(void); return cilkg_get_user_settable_values()->P; } /** * @brief implementation of __cilkrts_get_total_workers() */ static inline int cilkg_get_total_workers(void) { // "private" extern declaration extern int cilkg_calc_total_workers(void); // This number can fluctate until initialization so we // compute it from scratch return cilkg_calc_total_workers(); } /** * @brief implementation of __cilkrts_get_force_reduce() */ static inline int cilkg_get_force_reduce(void) { // "private" extern declaration extern global_state_t* cilkg_get_user_settable_values(void); return cilkg_get_user_settable_values()->force_reduce; } /** * @brief implementation of __cilkrts_get_stack_size() */ static inline size_t cilkg_get_stack_size(void) { // "private" extern declaration extern global_state_t* cilkg_get_user_settable_values(void); return cilkg_get_user_settable_values()->stack_size; } /** * @brief Run the scheduler function stored in the global_state * * Look up the scheduler function in global_state and run it. Report a fatal * error if an exception escapes the scheduler function. * * @param w - Worker structure to associate with the current thread. * * @attention The scheduler field of the global state must be set before this * function is called. */ void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w); __CILKRTS_END_EXTERN_C #endif // ! defined(INCLUDED_GLOBAL_STATE_DOT_H)