diff options
author | Ben Cheng <bccheng@google.com> | 2014-03-25 22:37:19 -0700 |
---|---|---|
committer | Ben Cheng <bccheng@google.com> | 2014-03-25 22:37:19 -0700 |
commit | 1bc5aee63eb72b341f506ad058502cd0361f0d10 (patch) | |
tree | c607e8252f3405424ff15bc2d00aa38dadbb2518 /gcc-4.9/libcilkrts/runtime/os-unix.c | |
parent | 283a0bf58fcf333c58a2a92c3ebbc41fb9eb1fdb (diff) | |
download | toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.gz toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.bz2 toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.zip |
Initial checkin of GCC 4.9.0 from trunk (r208799).
Change-Id: I48a3c08bb98542aa215912a75f03c0890e497dba
Diffstat (limited to 'gcc-4.9/libcilkrts/runtime/os-unix.c')
-rw-r--r-- | gcc-4.9/libcilkrts/runtime/os-unix.c | 516 |
1 files changed, 516 insertions, 0 deletions
diff --git a/gcc-4.9/libcilkrts/runtime/os-unix.c b/gcc-4.9/libcilkrts/runtime/os-unix.c new file mode 100644 index 000000000..fafb91d91 --- /dev/null +++ b/gcc-4.9/libcilkrts/runtime/os-unix.c @@ -0,0 +1,516 @@ +/* os-unix.c -*-C-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2009-2013, Intel Corporation + * All rights reserved. + * + * @copyright + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * @copyright + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY + * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + **************************************************************************/ + +#ifdef __linux__ + // define _GNU_SOURCE before *any* #include. + // Even <stdint.h> will break later #includes if this macro is not + // already defined when it is #included. +# define _GNU_SOURCE +#endif + +#include "os.h" +#include "bug.h" +#include "cilk_malloc.h" +#include <internal/abi.h> + +#if defined __linux__ +# include <sys/sysinfo.h> +# include <sys/syscall.h> +#elif defined __APPLE__ +# include <sys/sysctl.h> + // Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output +#elif defined __FreeBSD__ +// No additional include files +#elif defined __CYGWIN__ +// Cygwin on Windows - no additional include files +#elif defined __VXWORKS__ +# include <vxWorks.h> +# include <vxCpuLib.h> +# include <taskLib.h> +// Solaris +#elif defined __sun__ && defined __svr4__ +# include <sched.h> +#else +# error "Unsupported OS" +#endif + +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <pthread.h> +#include <sys/types.h> + + + +// /* Thread-local storage */ +// #ifdef _WIN32 +// typedef unsigned cilkos_tls_key_t; +// #else +// typedef pthread_key_t cilkos_tls_key_t; +// #endif +// cilkos_tls_key_t cilkos_allocate_tls_key(); +// void cilkos_set_tls_pointer(cilkos_tls_key_t key, void* ptr); +// void* cilkos_get_tls_pointer(cilkos_tls_key_t key); + +#if !defined CILK_WORKER_TLS +static int cilk_keys_defined; +static pthread_key_t worker_key, pedigree_leaf_key, tbb_interop_key; + +#if SUPPORT_GET_CURRENT_FIBER > 0 +static pthread_key_t fiber_key; +#endif + +static void *serial_worker; + + +// This destructor is called when a pthread dies to deallocate the +// pedigree node. +static void __cilkrts_pedigree_leaf_destructor(void* pedigree_tls_ptr) +{ + __cilkrts_pedigree* pedigree_tls + = (__cilkrts_pedigree*)pedigree_tls_ptr; + if (pedigree_tls) { + // Assert that we have either one or two nodes + // left in the pedigree chain. + // If we have more, then something is going wrong... + CILK_ASSERT(!pedigree_tls->parent || !pedigree_tls->parent->parent); + __cilkrts_free(pedigree_tls); + } +} + +void __cilkrts_init_tls_variables(void) +{ + int status; + /* This will be called once in serial execution before any + Cilk parallelism so we do not need to worry about races + on cilk_keys_defined. */ + if (cilk_keys_defined) + return; + status = pthread_key_create(&worker_key, NULL); + CILK_ASSERT (status == 0); + status = pthread_key_create(&pedigree_leaf_key, + __cilkrts_pedigree_leaf_destructor); + CILK_ASSERT (status == 0); + status = pthread_key_create(&tbb_interop_key, NULL); + CILK_ASSERT (status == 0); + +#if SUPPORT_GET_CURRENT_FIBER > 0 + status = pthread_key_create(&fiber_key, NULL); + CILK_ASSERT (status == 0); +#endif + cilk_keys_defined = 1; + return; +} + +COMMON_SYSDEP +void* cilkos_get_current_thread_id(void) +{ + return (void*)pthread_self(); +} + + +CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker() +{ + if (__builtin_expect(cilk_keys_defined, 1)) + return (__cilkrts_worker *)pthread_getspecific(worker_key); + else + return serial_worker; + +} + +CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker_fast() +{ + return (__cilkrts_worker *)pthread_getspecific(worker_key); +} + +COMMON_SYSDEP +__cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void) +{ + if (__builtin_expect(cilk_keys_defined, 1)) + return (__cilk_tbb_stack_op_thunk *) + pthread_getspecific(tbb_interop_key); + else + return 0; +} + +// This counter should be updated atomically. +static int __cilkrts_global_pedigree_tls_counter = -1; + +COMMON_SYSDEP +__cilkrts_pedigree *__cilkrts_get_tls_pedigree_leaf(int create_new) +{ + __cilkrts_pedigree *pedigree_tls; + if (__builtin_expect(cilk_keys_defined, 1)) { + pedigree_tls = + (struct __cilkrts_pedigree *)pthread_getspecific(pedigree_leaf_key); + } + else { + return 0; + } + + if (!pedigree_tls && create_new) { + // This call creates two nodes, X and Y. + // X == pedigree_tls[0] is the leaf node, which gets copied + // in and out of a user worker w when w binds and unbinds. + // Y == pedigree_tls[1] is the root node, + // which is a constant node that represents the user worker + // thread w. + pedigree_tls = (__cilkrts_pedigree*) + __cilkrts_malloc(2 * sizeof(__cilkrts_pedigree)); + + // This call sets the TLS pointer to the new node. + __cilkrts_set_tls_pedigree_leaf(pedigree_tls); + + pedigree_tls[0].rank = 0; + pedigree_tls[0].parent = &pedigree_tls[1]; + + // Create Y, whose rank begins as the global counter value. + pedigree_tls[1].rank = + __sync_add_and_fetch(&__cilkrts_global_pedigree_tls_counter, 1); + + pedigree_tls[1].parent = NULL; + CILK_ASSERT(pedigree_tls[1].rank != -1); + } + return pedigree_tls; +} + +#if SUPPORT_GET_CURRENT_FIBER > 0 +COMMON_SYSDEP +cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void) +{ + if (__builtin_expect(cilk_keys_defined, 1)) + return (cilk_fiber_sysdep *)pthread_getspecific(fiber_key); + else + return NULL; +} +#endif + +COMMON_SYSDEP +void __cilkrts_set_tls_worker(__cilkrts_worker *w) +{ + if (__builtin_expect(cilk_keys_defined, 1)) { + int status; + status = pthread_setspecific(worker_key, w); + CILK_ASSERT (status == 0); + return; + } + else + { + serial_worker = w; + } +} + +COMMON_SYSDEP +void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t) +{ + if (__builtin_expect(cilk_keys_defined, 1)) { + int status; + status = pthread_setspecific(tbb_interop_key, t); + CILK_ASSERT (status == 0); + return; + } + abort(); +} + +COMMON_SYSDEP +void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf) +{ + if (__builtin_expect(cilk_keys_defined, 1)) { + int status; + status = pthread_setspecific(pedigree_leaf_key, pedigree_leaf); + CILK_ASSERT (status == 0); + return; + } + abort(); +} + +#if SUPPORT_GET_CURRENT_FIBER > 0 +COMMON_SYSDEP +void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber) +{ + if (__builtin_expect(cilk_keys_defined, 1)) { + int status; + status = pthread_setspecific(fiber_key, fiber); + CILK_ASSERT (status == 0); + return; + } + abort(); +} +#endif + +#else +void __cilkrts_init_tls_variables(void) +{ +} +#endif + +#if defined (__linux__) && ! defined(ANDROID) +/* + * Get the thread id, rather than the pid. In the case of MIC offload, it's + * possible that we have multiple threads entering Cilk, and each has a + * different affinity. + */ +static pid_t linux_gettid(void) +{ + return syscall(SYS_gettid); +} + +/* + * On Linux we look at the thread affinity mask and restrict ourself to one + * thread for each of the hardware contexts to which we are bound. + * Therefore if user does + * % taskset 0-1 cilkProgram + * # restrict execution to hardware contexts zero and one + * the Cilk program will only use two threads even if it is running on a + * machine that has 32 hardware contexts. + * This is the right thing to do, because the threads are restricted to two + * hardware contexts by the affinity mask set by taskset, and if we were to + * create extra threads they would simply oversubscribe the hardware resources + * we can use. + * This is particularly important on MIC in offload mode, where the affinity + * mask is set by the offload library to force the offload code away from + * cores that have offload support threads running on them. + */ +static int linux_get_affinity_count (int tid) +{ +#if !defined HAVE_PTHREAD_AFFINITY_NP + return 0; +#else + + cpu_set_t process_mask; + + // Extract the thread affinity mask + int err = sched_getaffinity (tid, sizeof(process_mask),&process_mask); + + if (0 != err) + { + return 0; + } + + // We have extracted the mask OK, so now we can count the number of threads + // in it. This is linear in the maximum number of CPUs available, We + // could do a logarithmic version, if we assume the format of the mask, + // but it's not really worth it. We only call this at thread startup + // anyway. + int available_procs = 0; + int i; + for (i = 0; i < CPU_SETSIZE; i++) + { + if (CPU_ISSET(i, &process_mask)) + { + available_procs++; + } + } + + return available_procs; +#endif +} +#endif + +/* + * __cilkrts_hardware_cpu_count + * + * Returns the number of available CPUs on this hardware. This is architecture- + * specific. + */ + +COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void) +{ +#if defined ANDROID || (defined(__sun__) && defined(__svr4__)) + return sysconf (_SC_NPROCESSORS_ONLN); +#elif defined __MIC__ + /// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial + /// on KNC. Also, ignore the last core. + int P = sysconf (_SC_NPROCESSORS_ONLN); + return P/2 - 2; +#elif defined __linux__ + int affinity_count = linux_get_affinity_count(linux_gettid()); + + return (0 != affinity_count) ? affinity_count : sysconf (_SC_NPROCESSORS_ONLN); +#elif defined __APPLE__ + int count = 0; + int cmd[2] = { CTL_HW, HW_NCPU }; + size_t len = sizeof count; + int status = sysctl(cmd, 2, &count, &len, 0, 0); + assert(status >= 0); + assert((unsigned)count == count); + + return count; +#elif defined __FreeBSD__ || defined __CYGWIN__ + int ncores = sysconf(_SC_NPROCESSORS_ONLN); + + return ncores; + // Just get the number of processors +// return sysconf(_SC_NPROCESSORS_ONLN); +#elif defined __VXWORKS__ + return __builtin_popcount( vxCpuEnabledGet() ); +#else +#error "Unknown architecture" +#endif +} + +COMMON_SYSDEP void __cilkrts_sleep(void) +{ +#ifdef __VXWORKS__ + taskDelay(1); +#else + usleep(1); +#endif +} + +COMMON_SYSDEP void __cilkrts_yield(void) +{ +#if __APPLE__ || __FreeBSD__ || __VXWORKS__ + // On MacOS, call sched_yield to yield quantum. I'm not sure why we + // don't do this on Linux also. + sched_yield(); +#elif defined(__MIC__) + // On MIC, pthread_yield() really trashes things. Arch's measurements + // showed that calling _mm_delay_32() (or doing nothing) was a better + // option. Delaying 1024 clock cycles is a reasonable compromise between + // giving up the processor and latency starting up when work becomes + // available + _mm_delay_32(1024); +#elif defined(ANDROID) || (defined(__sun__) && defined(__svr4__)) + // On Android and Solaris, call sched_yield to yield quantum. I'm not + // sure why we don't do this on Linux also. + sched_yield(); +#else + // On Linux, call pthread_yield (which in turn will call sched_yield) + // to yield quantum. + pthread_yield(); +#endif +} + +COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen, + const char* varname) +{ + CILK_ASSERT(value); + CILK_ASSERT(varname); + + const char* envstr = getenv(varname); + if (envstr) + { + size_t len = strlen(envstr); + if (len > vallen - 1) + return len + 1; + + strcpy(value, envstr); + return len; + } + else + { + value[0] = '\0'; + return 0; + } +} + +/* + * Unrecoverable error: Print an error message and abort execution. + */ +COMMON_SYSDEP void cilkos_error(const char *fmt, ...) +{ + va_list l; + fflush(NULL); + fprintf(stderr, "Cilk error: "); + va_start(l, fmt); + vfprintf(stderr, fmt, l); + va_end(l); + fprintf(stderr, "Exiting.\n"); + fflush(stderr); + + abort(); +} + +/* + * Print a warning message and return. + */ +COMMON_SYSDEP void cilkos_warning(const char *fmt, ...) +{ + va_list l; + fflush(NULL); + fprintf(stderr, "Cilk warning: "); + va_start(l, fmt); + vfprintf(stderr, fmt, l); + va_end(l); + fflush(stderr); +} + +static void __attribute__((constructor)) init_once() +{ + /*__cilkrts_debugger_notification_internal(CILK_DB_RUNTIME_LOADED);*/ + __cilkrts_init_tls_variables(); +} + + +#define PAGE 4096 +#define CILK_MIN_STACK_SIZE (4*PAGE) +// Default size for the stacks that we create in Cilk for Unix. +#define CILK_DEFAULT_STACK_SIZE 0x100000 + +/* + * Convert the user's specified stack size into a "reasonable" value + * for this OS. + */ +size_t cilkos_validate_stack_size(size_t specified_stack_size) { + // Convert any negative value to the default. + if (specified_stack_size == 0) { + CILK_ASSERT((CILK_DEFAULT_STACK_SIZE % PAGE) == 0); + return CILK_DEFAULT_STACK_SIZE; + } + // Round values in between 0 and CILK_MIN_STACK_SIZE up to + // CILK_MIN_STACK_SIZE. + if (specified_stack_size <= CILK_MIN_STACK_SIZE) { + return CILK_MIN_STACK_SIZE; + } + if ((specified_stack_size % PAGE) > 0) { + // Round the user's stack size value up to nearest page boundary. + return (PAGE * (1 + specified_stack_size / PAGE)); + } + return specified_stack_size; +} + +long cilkos_atomic_add(volatile long* p, long x) +{ + return __sync_add_and_fetch(p, x); +} + +/* End os-unix.c */ |