Initial checkin of GCC 4.9.0 from trunk (r208799).

Change-Id: I48a3c08bb98542aa215912a75f03c0890e497dba
author: Ben Cheng <bccheng@google.com> 2014-03-25 22:37:19 -0700
committer: Ben Cheng <bccheng@google.com> 2014-03-25 22:37:19 -0700
commit: 1bc5aee63eb72b341f506ad058502cd0361f0d10 (patch)
tree: c607e8252f3405424ff15bc2d00aa38dadbb2518 /gcc-4.9/libcilkrts/runtime/os-unix.c
parent: 283a0bf58fcf333c58a2a92c3ebbc41fb9eb1fdb (diff)
download: toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.gz
toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.bz2
toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.zip
1 files changed, 516 insertions, 0 deletions
diff --git a/gcc-4.9/libcilkrts/runtime/os-unix.c b/gcc-4.9/libcilkrts/runtime/os-unix.c
new file mode 100644
index 000000000..fafb91d91
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/os-unix.c
@@ -0,0 +1,516 @@
+/* os-unix.c                  -*-C-*-
+ *
+ *************************************************************************
+ *
+ *  @copyright
+ *  Copyright (C) 2009-2013, Intel Corporation
+ *  All rights reserved.
+ *  
+ *  @copyright
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *  
+ *  @copyright
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#ifdef __linux__
+    // define _GNU_SOURCE before *any* #include.
+    // Even <stdint.h> will break later #includes if this macro is not
+    // already defined when it is #included.
+#   define _GNU_SOURCE
+#endif
+
+#include "os.h"
+#include "bug.h"
+#include "cilk_malloc.h"
+#include <internal/abi.h>
+
+#if defined __linux__
+#   include <sys/sysinfo.h>
+#   include <sys/syscall.h>
+#elif defined __APPLE__
+#   include <sys/sysctl.h>
+    // Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output
+#elif defined  __FreeBSD__
+// No additional include files
+#elif defined __CYGWIN__
+// Cygwin on Windows - no additional include files
+#elif defined  __VXWORKS__
+#   include <vxWorks.h>   
+#   include <vxCpuLib.h>   
+#   include <taskLib.h>   
+// Solaris
+#elif defined __sun__ && defined __svr4__
+#   include <sched.h>
+#else
+#   error "Unsupported OS"
+#endif
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/types.h>
+
+
+
+// /* Thread-local storage */
+// #ifdef _WIN32
+// typedef unsigned cilkos_tls_key_t;
+// #else
+// typedef pthread_key_t cilkos_tls_key_t;
+// #endif
+// cilkos_tls_key_t cilkos_allocate_tls_key();
+// void cilkos_set_tls_pointer(cilkos_tls_key_t key, void* ptr);
+// void* cilkos_get_tls_pointer(cilkos_tls_key_t key);
+
+#if !defined CILK_WORKER_TLS
+static int cilk_keys_defined;
+static pthread_key_t worker_key, pedigree_leaf_key, tbb_interop_key;
+
+#if SUPPORT_GET_CURRENT_FIBER > 0
+static pthread_key_t fiber_key;
+#endif
+
+static void *serial_worker;
+
+
+// This destructor is called when a pthread dies to deallocate the
+// pedigree node.
+static void __cilkrts_pedigree_leaf_destructor(void* pedigree_tls_ptr)
+{
+    __cilkrts_pedigree* pedigree_tls
+	= (__cilkrts_pedigree*)pedigree_tls_ptr;
+    if (pedigree_tls) {
+        // Assert that we have either one or two nodes
+        // left in the pedigree chain.
+        // If we have more, then something is going wrong...
+        CILK_ASSERT(!pedigree_tls->parent || !pedigree_tls->parent->parent);
+	__cilkrts_free(pedigree_tls);
+    }
+}
+
+void __cilkrts_init_tls_variables(void)
+{
+    int status;
+    /* This will be called once in serial execution before any
+       Cilk parallelism so we do not need to worry about races
+       on cilk_keys_defined. */
+    if (cilk_keys_defined)
+        return;
+    status = pthread_key_create(&worker_key, NULL);
+    CILK_ASSERT (status == 0);
+    status = pthread_key_create(&pedigree_leaf_key,
+				__cilkrts_pedigree_leaf_destructor);
+    CILK_ASSERT (status == 0);
+    status = pthread_key_create(&tbb_interop_key, NULL);
+    CILK_ASSERT (status == 0);
+
+#if SUPPORT_GET_CURRENT_FIBER > 0    
+    status = pthread_key_create(&fiber_key, NULL);
+    CILK_ASSERT (status == 0);
+#endif
+    cilk_keys_defined = 1;
+    return;
+}
+
+COMMON_SYSDEP
+void* cilkos_get_current_thread_id(void)
+{
+    return (void*)pthread_self();
+}
+
+
+CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker()
+{
+    if (__builtin_expect(cilk_keys_defined, 1))
+        return (__cilkrts_worker *)pthread_getspecific(worker_key);
+    else 
+        return serial_worker;
+    
+}
+
+CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker_fast()
+{
+  return (__cilkrts_worker *)pthread_getspecific(worker_key);
+}
+
+COMMON_SYSDEP
+__cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void)
+{
+    if (__builtin_expect(cilk_keys_defined, 1))
+        return (__cilk_tbb_stack_op_thunk *)
+            pthread_getspecific(tbb_interop_key);
+    else
+        return 0;
+}
+
+// This counter should be updated atomically.
+static int __cilkrts_global_pedigree_tls_counter = -1;
+
+COMMON_SYSDEP
+__cilkrts_pedigree *__cilkrts_get_tls_pedigree_leaf(int create_new)
+{
+    __cilkrts_pedigree *pedigree_tls;    
+    if (__builtin_expect(cilk_keys_defined, 1)) {
+        pedigree_tls =
+            (struct __cilkrts_pedigree *)pthread_getspecific(pedigree_leaf_key);
+    }
+    else {
+        return 0;
+    }
+    
+    if (!pedigree_tls && create_new) {
+        // This call creates two nodes, X and Y.
+        // X == pedigree_tls[0] is the leaf node, which gets copied
+        // in and out of a user worker w when w binds and unbinds.
+        // Y == pedigree_tls[1] is the root node,
+        // which is a constant node that represents the user worker
+        // thread w.
+	pedigree_tls = (__cilkrts_pedigree*)
+	    __cilkrts_malloc(2 * sizeof(__cilkrts_pedigree));
+
+        // This call sets the TLS pointer to the new node.
+	__cilkrts_set_tls_pedigree_leaf(pedigree_tls);
+        
+        pedigree_tls[0].rank = 0;
+        pedigree_tls[0].parent = &pedigree_tls[1];
+
+        // Create Y, whose rank begins as the global counter value.
+        pedigree_tls[1].rank =
+            __sync_add_and_fetch(&__cilkrts_global_pedigree_tls_counter, 1);
+
+        pedigree_tls[1].parent = NULL;
+        CILK_ASSERT(pedigree_tls[1].rank != -1);
+    }
+    return pedigree_tls;
+}
+
+#if SUPPORT_GET_CURRENT_FIBER > 0
+COMMON_SYSDEP
+cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void)
+{
+    if (__builtin_expect(cilk_keys_defined, 1))
+        return (cilk_fiber_sysdep *)pthread_getspecific(fiber_key);
+    else
+        return NULL;
+}
+#endif
+
+COMMON_SYSDEP
+void __cilkrts_set_tls_worker(__cilkrts_worker *w)
+{
+    if (__builtin_expect(cilk_keys_defined, 1)) {
+        int status;
+        status = pthread_setspecific(worker_key, w);
+        CILK_ASSERT (status == 0);
+        return;
+    }
+    else
+    {
+        serial_worker = w;
+    }
+}
+
+COMMON_SYSDEP
+void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t)
+{
+    if (__builtin_expect(cilk_keys_defined, 1)) {
+        int status;
+        status = pthread_setspecific(tbb_interop_key, t);
+        CILK_ASSERT (status == 0);
+        return;
+    }
+    abort();
+}
+
+COMMON_SYSDEP
+void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf)
+{
+    if (__builtin_expect(cilk_keys_defined, 1)) {
+        int status;
+        status = pthread_setspecific(pedigree_leaf_key, pedigree_leaf);
+        CILK_ASSERT (status == 0);
+        return;
+    }
+    abort();
+}
+
+#if SUPPORT_GET_CURRENT_FIBER > 0
+COMMON_SYSDEP
+void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber)
+{
+    if (__builtin_expect(cilk_keys_defined, 1)) {
+        int status;
+        status = pthread_setspecific(fiber_key, fiber);
+        CILK_ASSERT (status == 0);
+        return;
+    }
+    abort();
+}
+#endif
+
+#else
+void __cilkrts_init_tls_variables(void)
+{
+}
+#endif
+
+#if defined (__linux__) && ! defined(ANDROID)
+/*
+ * Get the thread id, rather than the pid. In the case of MIC offload, it's
+ * possible that we have multiple threads entering Cilk, and each has a
+ * different affinity.
+ */
+static pid_t linux_gettid(void)
+{
+    return syscall(SYS_gettid);
+}
+
+/*
+ * On Linux we look at the thread affinity mask and restrict ourself to one
+ * thread for each of the hardware contexts to which we are bound.
+ * Therefore if user does
+ * % taskset 0-1 cilkProgram
+ *       # restrict execution to hardware contexts zero and one
+ * the Cilk program will only use two threads even if it is running on a
+ * machine that has 32 hardware contexts.
+ * This is the right thing to do, because the threads are restricted to two
+ * hardware contexts by the affinity mask set by taskset, and if we were to
+ * create extra threads they would simply oversubscribe the hardware resources
+ * we can use.
+ * This is particularly important on MIC in offload mode, where the affinity
+ * mask is set by the offload library to force the offload code away from
+ * cores that have offload support threads running on them.
+ */
+static int linux_get_affinity_count (int tid) 
+{
+#if !defined HAVE_PTHREAD_AFFINITY_NP
+  return 0;
+#else
+
+    cpu_set_t process_mask;
+
+    // Extract the thread affinity mask
+    int err = sched_getaffinity (tid, sizeof(process_mask),&process_mask);
+
+    if (0 != err)
+    {
+        return 0;
+    }
+
+    // We have extracted the mask OK, so now we can count the number of threads
+    // in it.  This is linear in the maximum number of CPUs available, We
+    // could do a logarithmic version, if we assume the format of the mask,
+    // but it's not really worth it. We only call this at thread startup
+    // anyway.
+    int available_procs = 0;
+    int i;
+    for (i = 0; i < CPU_SETSIZE; i++)
+    {
+        if (CPU_ISSET(i, &process_mask))
+        {
+            available_procs++;
+        }
+    }
+
+    return available_procs;
+#endif
+}
+#endif
+
+/*
+ * __cilkrts_hardware_cpu_count
+ *
+ * Returns the number of available CPUs on this hardware.  This is architecture-
+ * specific. 
+ */
+
+COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void)
+{
+#if defined ANDROID || (defined(__sun__) && defined(__svr4__))
+    return sysconf (_SC_NPROCESSORS_ONLN);
+#elif defined __MIC__
+    /// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial
+    /// on KNC.  Also, ignore the last core.
+    int P = sysconf (_SC_NPROCESSORS_ONLN);
+    return P/2 - 2;
+#elif defined __linux__
+    int affinity_count = linux_get_affinity_count(linux_gettid());
+
+    return (0 != affinity_count) ? affinity_count : sysconf (_SC_NPROCESSORS_ONLN);
+#elif defined __APPLE__
+    int count = 0;
+    int cmd[2] = { CTL_HW, HW_NCPU };
+    size_t len = sizeof count;
+    int status = sysctl(cmd, 2, &count, &len, 0, 0);
+    assert(status >= 0);
+    assert((unsigned)count == count);
+
+    return count;
+#elif defined  __FreeBSD__ || defined __CYGWIN__
+    int ncores = sysconf(_SC_NPROCESSORS_ONLN);
+
+    return ncores;
+    // Just get the number of processors
+//    return sysconf(_SC_NPROCESSORS_ONLN);
+#elif defined  __VXWORKS__
+    return __builtin_popcount( vxCpuEnabledGet() );
+#else
+#error "Unknown architecture"
+#endif
+}
+
+COMMON_SYSDEP void __cilkrts_sleep(void)
+{
+#ifdef __VXWORKS__
+	taskDelay(1);
+#else			
+    usleep(1);
+#endif	
+}
+
+COMMON_SYSDEP void __cilkrts_yield(void)
+{
+#if __APPLE__ || __FreeBSD__ || __VXWORKS__
+    // On MacOS, call sched_yield to yield quantum.  I'm not sure why we
+    // don't do this on Linux also.
+    sched_yield();
+#elif defined(__MIC__)
+    // On MIC, pthread_yield() really trashes things.  Arch's measurements
+    // showed that calling _mm_delay_32() (or doing nothing) was a better
+    // option.  Delaying 1024 clock cycles is a reasonable compromise between
+    // giving up the processor and latency starting up when work becomes
+    // available
+    _mm_delay_32(1024);
+#elif defined(ANDROID) || (defined(__sun__) && defined(__svr4__))
+    // On Android and Solaris, call sched_yield to yield quantum.  I'm not
+    // sure why we don't do this on Linux also.
+    sched_yield();
+#else
+    // On Linux, call pthread_yield (which in turn will call sched_yield)
+    // to yield quantum.
+    pthread_yield();
+#endif
+}
+
+COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen,
+                                           const char* varname)
+{
+    CILK_ASSERT(value);
+    CILK_ASSERT(varname);
+
+    const char* envstr = getenv(varname);
+    if (envstr)
+    {
+        size_t len = strlen(envstr);
+        if (len > vallen - 1)
+            return len + 1;
+
+        strcpy(value, envstr);
+        return len;
+    }
+    else
+    {
+        value[0] = '\0';
+        return 0;
+    }
+}
+
+/*
+ * Unrecoverable error: Print an error message and abort execution.
+ */
+COMMON_SYSDEP void cilkos_error(const char *fmt, ...)
+{
+    va_list l;
+    fflush(NULL);
+    fprintf(stderr, "Cilk error: ");
+    va_start(l, fmt);
+    vfprintf(stderr, fmt, l);
+    va_end(l);
+    fprintf(stderr, "Exiting.\n");
+    fflush(stderr);
+
+    abort();
+}
+
+/*
+ * Print a warning message and return.
+ */
+COMMON_SYSDEP void cilkos_warning(const char *fmt, ...)
+{
+    va_list l;
+    fflush(NULL);
+    fprintf(stderr, "Cilk warning: ");
+    va_start(l, fmt);
+    vfprintf(stderr, fmt, l);
+    va_end(l);
+    fflush(stderr);
+}
+
+static void __attribute__((constructor)) init_once()
+{
+    /*__cilkrts_debugger_notification_internal(CILK_DB_RUNTIME_LOADED);*/
+    __cilkrts_init_tls_variables();
+}
+
+
+#define PAGE 4096
+#define CILK_MIN_STACK_SIZE (4*PAGE)
+// Default size for the stacks that we create in Cilk for Unix.
+#define CILK_DEFAULT_STACK_SIZE 0x100000
+
+/*
+ * Convert the user's specified stack size into a "reasonable" value
+ * for this OS.
+ */
+size_t cilkos_validate_stack_size(size_t specified_stack_size) {
+    // Convert any negative value to the default.
+    if (specified_stack_size == 0) {
+        CILK_ASSERT((CILK_DEFAULT_STACK_SIZE % PAGE) == 0);
+        return CILK_DEFAULT_STACK_SIZE;
+    }
+    // Round values in between 0 and CILK_MIN_STACK_SIZE up to
+    // CILK_MIN_STACK_SIZE.
+    if (specified_stack_size <= CILK_MIN_STACK_SIZE) {
+        return CILK_MIN_STACK_SIZE;
+    }
+    if ((specified_stack_size % PAGE) > 0) {
+        // Round the user's stack size value up to nearest page boundary.
+        return (PAGE * (1 + specified_stack_size / PAGE));
+    }
+    return specified_stack_size;
+}
+
+long cilkos_atomic_add(volatile long* p, long x)
+{
+    return __sync_add_and_fetch(p, x);
+}
+
+/* End os-unix.c */
author	Ben Cheng <bccheng@google.com>	2014-03-25 22:37:19 -0700
committer	Ben Cheng <bccheng@google.com>	2014-03-25 22:37:19 -0700
commit	1bc5aee63eb72b341f506ad058502cd0361f0d10 (patch)
tree	c607e8252f3405424ff15bc2d00aa38dadbb2518 /gcc-4.9/libcilkrts/runtime/os-unix.c
parent	283a0bf58fcf333c58a2a92c3ebbc41fb9eb1fdb (diff)
download	toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.gz toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.bz2 toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.zip