aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/libcilkrts/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/libcilkrts/runtime')
-rw-r--r--gcc-4.9/libcilkrts/runtime/acknowledgements.dox51
-rw-r--r--gcc-4.9/libcilkrts/runtime/bug.cpp139
-rw-r--r--gcc-4.9/libcilkrts/runtime/bug.h141
-rw-r--r--gcc-4.9/libcilkrts/runtime/c_reducers.c57
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk-abi-cilk-for.cpp416
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk-abi-vla-internal.c83
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk-abi-vla-internal.h90
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk-abi.c733
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk-ittnotify.h100
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk-tbb-interop.h192
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk_api.c255
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk_fiber-unix.cpp301
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk_fiber-unix.h149
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk_fiber.cpp1078
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk_fiber.h882
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk_malloc.c84
-rw-r--r--gcc-4.9/libcilkrts/runtime/cilk_malloc.h90
-rw-r--r--gcc-4.9/libcilkrts/runtime/component.h52
-rw-r--r--gcc-4.9/libcilkrts/runtime/config/generic/cilk-abi-vla.c107
-rw-r--r--gcc-4.9/libcilkrts/runtime/config/generic/os-fence.h53
-rw-r--r--gcc-4.9/libcilkrts/runtime/config/generic/os-unix-sysdep.c94
-rw-r--r--gcc-4.9/libcilkrts/runtime/config/x86/cilk-abi-vla.c441
-rw-r--r--gcc-4.9/libcilkrts/runtime/config/x86/os-fence.h72
-rw-r--r--gcc-4.9/libcilkrts/runtime/config/x86/os-unix-sysdep.c142
-rw-r--r--gcc-4.9/libcilkrts/runtime/doxygen-layout.xml222
-rw-r--r--gcc-4.9/libcilkrts/runtime/doxygen.cfg1774
-rw-r--r--gcc-4.9/libcilkrts/runtime/except-gcc.cpp597
-rw-r--r--gcc-4.9/libcilkrts/runtime/except-gcc.h146
-rw-r--r--gcc-4.9/libcilkrts/runtime/except.h123
-rw-r--r--gcc-4.9/libcilkrts/runtime/frame_malloc.c462
-rw-r--r--gcc-4.9/libcilkrts/runtime/frame_malloc.h205
-rw-r--r--gcc-4.9/libcilkrts/runtime/full_frame.c181
-rw-r--r--gcc-4.9/libcilkrts/runtime/full_frame.h493
-rw-r--r--gcc-4.9/libcilkrts/runtime/global_state.cpp628
-rw-r--r--gcc-4.9/libcilkrts/runtime/global_state.h417
-rw-r--r--gcc-4.9/libcilkrts/runtime/jmpbuf.c48
-rw-r--r--gcc-4.9/libcilkrts/runtime/jmpbuf.h136
-rw-r--r--gcc-4.9/libcilkrts/runtime/linux-symbols.ver369
-rw-r--r--gcc-4.9/libcilkrts/runtime/local_state.c68
-rw-r--r--gcc-4.9/libcilkrts/runtime/local_state.h424
-rw-r--r--gcc-4.9/libcilkrts/runtime/mac-symbols.txt318
-rw-r--r--gcc-4.9/libcilkrts/runtime/metacall_impl.c167
-rw-r--r--gcc-4.9/libcilkrts/runtime/metacall_impl.h123
-rw-r--r--gcc-4.9/libcilkrts/runtime/os-unix.c516
-rw-r--r--gcc-4.9/libcilkrts/runtime/os.h236
-rw-r--r--gcc-4.9/libcilkrts/runtime/os_mutex-unix.c193
-rw-r--r--gcc-4.9/libcilkrts/runtime/os_mutex.h135
-rw-r--r--gcc-4.9/libcilkrts/runtime/pedigrees.c112
-rw-r--r--gcc-4.9/libcilkrts/runtime/pedigrees.h130
-rw-r--r--gcc-4.9/libcilkrts/runtime/record-replay.cpp770
-rw-r--r--gcc-4.9/libcilkrts/runtime/record-replay.h432
-rw-r--r--gcc-4.9/libcilkrts/runtime/reducer_impl.cpp1012
-rw-r--r--gcc-4.9/libcilkrts/runtime/reducer_impl.h128
-rw-r--r--gcc-4.9/libcilkrts/runtime/rts-common.h132
-rw-r--r--gcc-4.9/libcilkrts/runtime/scheduler.c3940
-rw-r--r--gcc-4.9/libcilkrts/runtime/scheduler.h421
-rw-r--r--gcc-4.9/libcilkrts/runtime/signal_node.c241
-rw-r--r--gcc-4.9/libcilkrts/runtime/signal_node.h109
-rw-r--r--gcc-4.9/libcilkrts/runtime/spin_mutex.c109
-rw-r--r--gcc-4.9/libcilkrts/runtime/spin_mutex.h129
-rw-r--r--gcc-4.9/libcilkrts/runtime/stats.c172
-rw-r--r--gcc-4.9/libcilkrts/runtime/stats.h208
-rw-r--r--gcc-4.9/libcilkrts/runtime/symbol_test.c63
-rw-r--r--gcc-4.9/libcilkrts/runtime/sysdep-unix.c807
-rw-r--r--gcc-4.9/libcilkrts/runtime/sysdep.h285
-rw-r--r--gcc-4.9/libcilkrts/runtime/worker_mutex.c121
-rw-r--r--gcc-4.9/libcilkrts/runtime/worker_mutex.h131
67 files changed, 23235 insertions, 0 deletions
diff --git a/gcc-4.9/libcilkrts/runtime/acknowledgements.dox b/gcc-4.9/libcilkrts/runtime/acknowledgements.dox
new file mode 100644
index 000000000..79b5d876f
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/acknowledgements.dox
@@ -0,0 +1,51 @@
+/* acknowledgements.dox
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/*
+ * This file contains acknowledgements of community contributions to the
+ * Cilk Plus runtime.
+ */
+
+/**
+ * @mainpage
+ *
+ * @section Acknowledgements Acknowledgements
+ *
+ * Modifications to build the Cilk Plus runtime for VxWorks provided by
+ * Brian Kuhl of Wind River.
+ */
diff --git a/gcc-4.9/libcilkrts/runtime/bug.cpp b/gcc-4.9/libcilkrts/runtime/bug.cpp
new file mode 100644
index 000000000..dbdf1fd32
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/bug.cpp
@@ -0,0 +1,139 @@
+/* bug.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#include "bug.h"
+
+#include <exception>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#ifdef _WIN32
+# include "windows-clean.h"
+# include "internal/abi.h"
+# include "cilktools/cilkscreen.h"
+# include <crtdbg.h>
+#endif
+
+__CILKRTS_BEGIN_EXTERN_C
+
+COMMON_PORTABLE const char *const __cilkrts_assertion_failed =
+ "%s:%d: cilk assertion failed: %s\n";
+
+COMMON_PORTABLE void __cilkrts_bug(const char *fmt,...) cilk_nothrow
+{
+#if defined (_WIN32) && defined(_DEBUG)
+ _CRTIMP void __cdecl _wassert(__in_z const wchar_t * _Message,
+ __in_z const wchar_t *_File,
+ __in unsigned _Line);
+ char message[256];
+ wchar_t wmessage[256];
+ va_list l;
+ va_start(l, fmt);
+ _vsnprintf_s(message, 256, _TRUNCATE, fmt, l);
+ va_end(l);
+ _snwprintf_s(wmessage, 256, _TRUNCATE, _CRT_WIDE("%S"),
+ message); /* widen */
+
+ // Force asserts to go to stderr and the debugger. This isn't polite, but
+ // we're about to kill the app anyway and it will prevent our tests from
+ // hanging
+ _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE| _CRTDBG_MODE_DEBUG);
+ _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
+
+ _wassert(wmessage, _CRT_WIDE(__FILE__), __LINE__);
+
+ // If there's a debugger attached, give it a chance to look at the failure
+ if (IsDebuggerPresent())
+ DebugBreak();
+
+ abort();
+/* __asm int 3 */
+#else
+ /* To reduce user confusion, write all user-generated output
+ before the system-generated error message. */
+ va_list l;
+ fflush(NULL);
+ va_start(l, fmt);
+ vfprintf(stderr, fmt, l);
+ va_end(l);
+ fflush(stderr);
+
+#ifndef _WIN32
+ abort();
+#endif
+
+#endif
+
+ exit(1);
+}
+
+COMMON_PORTABLE void cilkbug_assert_no_uncaught_exception(void)
+{
+ bool uncaught = std::uncaught_exception();
+ CILK_ASSERT(!uncaught);
+}
+
+COMMON_SYSDEP void abort_because_rts_is_corrupted(void)
+{
+ __cilkrts_bug("The Cilk Plus runtime system detected a corruption "
+ "in its data structures. This is most likely caused "
+ "by an application bug. Aborting execution.\n");
+}
+
+#ifdef WIN32
+COMMON_SYSDEP void __cilkrts_dbgprintf(const char *fmt,...)
+{
+ char message[2048];
+ va_list l;
+
+ // Cilkscreen shouldn't watch this
+ __cilkscreen_disable_checking();
+
+ va_start(l, fmt);
+ _vsnprintf_s(message, 2048, _TRUNCATE, fmt, l);
+ va_end(l);
+ OutputDebugStringA (message);
+
+ // Re-enable Cilkscreen
+ __cilkscreen_enable_checking();
+}
+#endif
+
+__CILKRTS_END_EXTERN_C
+
+/* End bug.cpp */
diff --git a/gcc-4.9/libcilkrts/runtime/bug.h b/gcc-4.9/libcilkrts/runtime/bug.h
new file mode 100644
index 000000000..bb1891378
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/bug.h
@@ -0,0 +1,141 @@
+/* bug.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file bug.h
+ *
+ * @brief Support for reporting bugs and debugging.
+ */
+
+#ifndef INCLUDED_BUG_DOT_H
+#define INCLUDED_BUG_DOT_H
+
+#include "rts-common.h"
+#include <cilk/common.h>
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * Flush all output, write error message to stderr and abort the execution.
+ * On Windows the error is also written to the debugger.
+ *
+ * @param fmt printf-style format string. Any remaining parameters will be
+ * be interpreted based on the format string text.
+ */
+COMMON_PORTABLE NORETURN __cilkrts_bug(const char *fmt,...) cilk_nothrow;
+
+#ifndef CILK_ASSERT
+
+/** Standard text for failed assertion */
+COMMON_PORTABLE extern const char *const __cilkrts_assertion_failed;
+
+/**
+ * Macro to assert an invariant that must be true. If the statement evalutes
+ * to false, __cilkrts_bug will be called to report the failure and terminate
+ * the application.
+ */
+#define CILK_ASSERT(ex) \
+ (__builtin_expect((ex) != 0, 1) ? (void)0 : \
+ __cilkrts_bug(__cilkrts_assertion_failed, __FILE__, __LINE__, #ex))
+
+#define CILK_ASSERT_MSG(ex, msg) \
+ (__builtin_expect((ex) != 0, 1) ? (void)0 : \
+ __cilkrts_bug(__cilkrts_assertion_failed, __FILE__, __LINE__, \
+ #ex "\n " msg))
+#endif // CILK_ASSERT
+
+/**
+ * Assert that there is no uncaught exception.
+ *
+ * Not valid on Windows or Android.
+ *
+ * On Android, calling std::uncaught_exception with the stlport library causes
+ * a seg fault. Since we're not supporting exceptions there at this point,
+ * just don't do the check. It works with the GNU STL library, but that's
+ * GPL V3 licensed.
+ */
+COMMON_PORTABLE void cilkbug_assert_no_uncaught_exception(void);
+#if defined(_WIN32) || defined(ANDROID)
+# define CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION()
+#else
+# define CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION() \
+ cilkbug_assert_no_uncaught_exception()
+#endif
+
+
+/**
+ * Call __cilkrts_bug with a standard message that the runtime state is
+ * corrupted and the application is being terminated.
+ */
+COMMON_SYSDEP void abort_because_rts_is_corrupted(void);
+
+// Debugging aids
+#ifndef _DEBUG
+# define DBGPRINTF(_fmt, ...)
+#elif defined(_WIN32)
+
+/**
+ * Write debugging output. On windows this is written to the debugger.
+ *
+ * @param fmt printf-style format string. Any remaining parameters will be
+ * be interpreted based on the format string text.
+ */
+COMMON_SYSDEP void __cilkrts_dbgprintf(const char *fmt,...) cilk_nothrow;
+
+/**
+ * Macro to write debugging output which will be elided if this is not a
+ * debug build. The macro is currently always elided on non-Windows builds.
+ *
+ * @param _fmt printf-style format string. Any remaining parameters will be
+ * be interpreted based on the format string text.
+ */
+# define DBGPRINTF(_fmt, ...) __cilkrts_dbgprintf(_fmt, __VA_ARGS__)
+
+#else /* if _DEBUG && !_WIN32 */
+ /* Non-Windows debug logging. Someday we should make GetCurrentFiber()
+ * and GetWorkerFiber() do something.
+ */
+# include <stdio.h>
+ __CILKRTS_INLINE void* GetCurrentFiber() { return 0; }
+ __CILKRTS_INLINE void* GetWorkerFiber(__cilkrts_worker* w) { return 0; }
+# define DBGPRINTF(_fmt, ...) fprintf(stderr, _fmt, __VA_ARGS__)
+#endif // _DEBUG
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_BUG_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/c_reducers.c b/gcc-4.9/libcilkrts/runtime/c_reducers.c
new file mode 100644
index 000000000..52615e93f
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/c_reducers.c
@@ -0,0 +1,57 @@
+/* c_reducers.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2010-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+/* Implementation of C reducers */
+
+// Disable warning about integer conversions losing significant bits.
+// The code is correct as is.
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)
+#endif
+
+#define CILK_C_DEFINE_REDUCERS
+
+#include <cilk/reducer_opadd.h>
+#include <cilk/reducer_opand.h>
+#include <cilk/reducer_opmul.h>
+#include <cilk/reducer_opor.h>
+#include <cilk/reducer_opxor.h>
+#include <cilk/reducer_min_max.h>
+
+/* End reducer_opadd.c */
diff --git a/gcc-4.9/libcilkrts/runtime/cilk-abi-cilk-for.cpp b/gcc-4.9/libcilkrts/runtime/cilk-abi-cilk-for.cpp
new file mode 100644
index 000000000..4cd04f521
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk-abi-cilk-for.cpp
@@ -0,0 +1,416 @@
+/* cilk-abi-cilk-for.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2011, 2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+/* Implementation of cilk_for ABI.
+ *
+ * This file must be C++, not C, in order to handle C++ exceptions correctly
+ * from within the body of the cilk_for loop
+ */
+
+#include "internal/abi.h"
+#include "metacall_impl.h"
+#include "global_state.h"
+
+// Icky macros to determine if we're compiled with optimization. Based on
+// the declaration of __CILKRTS_ASSERT in common.h
+#if defined(_WIN32)
+# if defined (_DEBUG)
+# define CILKRTS_OPTIMIZED 0 // Assumes /MDd is always used with /Od
+# else
+# define CILKRTS_OPTIMIZED 1
+# endif // defined(_DEBUG)
+#else
+# if defined(__OPTIMIZE__)
+# define CILKRTS_OPTIMIZED 1
+# else
+# define CILKRTS_OPTIMIZED 0
+# endif
+#endif
+
+template <typename count_t>
+static inline int grainsize(int req, count_t count)
+{
+ // A positive requested grain size comes from the user. A very high grain
+ // size risks losing parallelism, but the user told us what they want for
+ // grainsize. Who are we to argue?
+ if (req > 0)
+ return req;
+
+ // At present, a negative requested grain size is treated the same way as
+ // a zero grain size, i.e., the runtime computes the actual grainsize
+ // using a hueristic. In the future, the compiler may give us additional
+ // information about the size of the cilk_for body by passing a negative
+ // grain size.
+
+ // Avoid generating a zero grainsize, even for empty loops.
+ if (count < 1)
+ return 1;
+
+ global_state_t* g = cilkg_get_global_state();
+ if (g->under_ptool)
+ {
+ // Grainsize = 1, when running under PIN, and when the grainsize has
+ // not explicitly been set by the user.
+ return 1;
+ }
+ else
+ {
+ // Divide loop count by 8 times the worker count and round up.
+ const int Px8 = g->P * 8;
+ count_t n = (count + Px8 - 1) / Px8;
+
+ // 2K should be enough to amortize the cost of the cilk_for. Any
+ // larger grainsize risks losing parallelism.
+ if (n > 2048)
+ return 2048;
+ return (int) n; // n <= 2048, so no loss of precision on cast to int
+ }
+}
+
+/*
+ * call_cilk_for_loop_body
+ *
+ * Centralizes the code to call the loop body. The compiler should be
+ * inlining this code
+ *
+ * low - Low loop index we're considering in this portion of the algorithm
+ * high - High loop index we're considering in this portion of the algorithm
+ * body - lambda function for the cilk_for loop body
+ * data - data used by the lambda function
+ * w - __cilkrts_worker we're currently executing on
+ * loop_root_pedigree - __cilkrts_pedigree node we generated for the root of
+ * the cilk_for loop to flatten out the internal nodes
+ */
+template <typename count_t, typename F>
+inline static
+void call_cilk_for_loop_body(count_t low, count_t high,
+ F body, void *data,
+ __cilkrts_worker *w,
+ __cilkrts_pedigree *loop_root_pedigree)
+{
+ // Cilkscreen should not report this call in a stack trace
+ NOTIFY_ZC_INTRINSIC((char *)"cilkscreen_hide_call", 0);
+
+ // The worker is only valid until the first spawn. Fetch the
+ // __cilkrts_stack_frame out of the worker, since it will be stable across
+ // steals. The sf pointer actually points to the *parent's*
+ // __cilkrts_stack_frame, since this function is a non-spawning function
+ // and therefore has no cilk stack frame of its own.
+ __cilkrts_stack_frame *sf = w->current_stack_frame;
+
+ // Save the pedigree node pointed to by the worker. We'll need to restore
+ // that when we exit since the spawn helpers in the cilk_for call tree
+ // will assume that it's valid
+ const __cilkrts_pedigree *saved_next_pedigree_node = w->pedigree.parent;
+
+ // Add the leaf pedigree node to the chain. The parent is the root node
+ // to flatten the tree regardless of the DAG branches in the cilk_for
+ // divide-and-conquer recursion.
+ //
+ // The rank is initialized to the low index. The user is
+ // expected to call __cilkrts_bump_loop_rank at the end of the cilk_for
+ // loop body.
+ __cilkrts_pedigree loop_leaf_pedigree;
+
+ loop_leaf_pedigree.rank = (uint64_t)low;
+ loop_leaf_pedigree.parent = loop_root_pedigree;
+
+ // The worker's pedigree always starts with a rank of 0
+ w->pedigree.rank = 0;
+ w->pedigree.parent = &loop_leaf_pedigree;
+
+ // Call the compiler generated cilk_for loop body lambda function
+ body(data, low, high);
+
+ // The loop body may have included spawns, so we must refetch the worker
+ // from the __cilkrts_stack_frame, which is stable regardless of which
+ // worker we're executing on.
+ w = sf->worker;
+
+ // Restore the pedigree chain. It must be valid because the spawn helpers
+ // generated by the cilk_for implementation will access it.
+ w->pedigree.parent = saved_next_pedigree_node;
+}
+
+/* capture_spawn_arg_stack_frame
+ *
+ * Efficiently get the address of the caller's __cilkrts_stack_frame. The
+ * preconditons are that 'w' is the worker at the time of the call and
+ * 'w->current_stack_frame' points to the __cilkrts_stack_frame within the
+ * spawn helper. This function should be called only within the argument list
+ * of a function that is being spawned because that is the only situation in
+ * which these preconditions hold. This function returns the worker
+ * (unchanged) after storing the captured stack frame pointer is stored in the
+ * sf argument.
+ *
+ * The purpose of this function is to get the caller's stack frame in a
+ * context where the caller's worker is known but its stack frame is not
+ * necessarily initialized. The "shrink wrap" optimization delays
+ * initializing the contents of a spawning function's '__cilkrts_stack_frame'
+ * as well as the 'current_stack_frame' pointer within the worker. By calling
+ * this function within a spawning function's argument list, we can ensure
+ * that these initializations have occured but that a detach (which would
+ * invalidate the worker pointer in the caller) has not yet occured. Once the
+ * '__cilkrts_stack_frame' has been retrieved in this way, it is stable for the
+ * remainder of the caller's execution, and becomes an efficient way to get
+ * the worker (much more efficient than calling '__cilkrts_get_tls_worker()'),
+ * even after a spawn or sync.
+ */
+inline __cilkrts_worker*
+capture_spawn_arg_stack_frame(__cilkrts_stack_frame* &sf, __cilkrts_worker* w)
+{
+ // Get current stack frame
+ sf = w->current_stack_frame;
+#ifdef __INTEL_COMPILER
+# if __INTEL_COMPILER <= 1300 && __INTEL_COMPILER_BUILD_DATE < 20130101
+ // In older compilers 'w->current_stack_frame' points to the
+ // spawn-helper's stack frame. In newer compiler's however, it points
+ // directly to the pointer's stack frame. (This change was made to avoid
+ // having the spawn helper in the frame list when evaluating function
+ // arguments, thus avoiding corruption when those arguments themselves
+ // contain cilk_spawns.)
+
+ // w->current_stack_frame is the spawn helper's stack frame.
+ // w->current_stack_frame->call_parent is the caller's stack frame.
+ sf = sf->call_parent;
+# endif
+#endif
+ return w;
+}
+
+/*
+ * cilk_for_recursive
+ *
+ * Templatized function to implement the recursive divide-and-conquer
+ * algorithm that's how we implement a cilk_for.
+ *
+ * low - Low loop index we're considering in this portion of the algorithm
+ * high - High loop index we're considering in this portion of the algorithm
+ * body - lambda function for the cilk_for loop body
+ * data - data used by the lambda function
+ * grain - grain size (0 if it should be computed)
+ * w - __cilkrts_worker we're currently executing on
+ * loop_root_pedigree - __cilkrts_pedigree node we generated for the root of
+ * the cilk_for loop to flatten out the internal nodes
+ */
+template <typename count_t, typename F>
+static
+void cilk_for_recursive(count_t low, count_t high,
+ F body, void *data, int grain,
+ __cilkrts_worker *w,
+ __cilkrts_pedigree *loop_root_pedigree)
+{
+tail_recurse:
+ // Cilkscreen should not report this call in a stack trace
+ // This needs to be done everytime the worker resumes
+ NOTIFY_ZC_INTRINSIC((char *)"cilkscreen_hide_call", 0);
+
+ count_t count = high - low;
+ // Invariant: count > 0, grain >= 1
+ if (count > grain)
+ {
+ // Invariant: count >= 2
+ count_t mid = low + count / 2;
+ // The worker is valid only until the first spawn and is expensive to
+ // retrieve (using '__cilkrts_get_tls_worker') after the spawn. The
+ // '__cilkrts_stack_frame' is more stable, but isn't initialized until
+ // the first spawn. Thus, we want to grab the address of the
+ // '__cilkrts_stack_frame' after it is initialized but before the
+ // spawn detaches. The only place we can do that is within the
+ // argument list of the spawned function, hence the call to
+ // capture_spawn_arg_stack_frame().
+ __cilkrts_stack_frame *sf;
+#if defined(__GNUC__) && ! defined(__INTEL_COMPILER) && ! defined(__clang__)
+ // The current version of gcc initializes the sf structure eagerly.
+ // We can take advantage of this fact to avoid calling
+ // `capture_spawn_arg_stack_frame` when compiling with gcc.
+ // Remove this if the "shrink-wrap" optimization is implemented.
+ sf = w->current_stack_frame;
+ _Cilk_spawn cilk_for_recursive(low, mid, body, data, grain, w,
+ loop_root_pedigree);
+#else
+ _Cilk_spawn cilk_for_recursive(low, mid, body, data, grain,
+ capture_spawn_arg_stack_frame(sf, w),
+ loop_root_pedigree);
+#endif
+ w = sf->worker;
+ low = mid;
+
+ goto tail_recurse;
+ }
+
+ // Call the cilk_for loop body lambda function passed in by the compiler to
+ // execute one grain
+ call_cilk_for_loop_body(low, high, body, data, w, loop_root_pedigree);
+}
+
+static void noop() { }
+
+/*
+ * cilk_for_root
+ *
+ * Templatized function to implement the top level of a cilk_for loop.
+ *
+ * body - lambda function for the cilk_for loop body
+ * data - data used by the lambda function
+ * count - trip count for loop
+ * grain - grain size (0 if it should be computed)
+ */
+template <typename count_t, typename F>
+static void cilk_for_root(F body, void *data, count_t count, int grain)
+{
+ // Cilkscreen should not report this call in a stack trace
+ NOTIFY_ZC_INTRINSIC((char *)"cilkscreen_hide_call", 0);
+
+ // Pedigree computation:
+ //
+ // If the last pedigree node on entry to the _Cilk_for has value X,
+ // then at the start of each iteration of the loop body, the value of
+ // the last pedigree node should be 0, the value of the second-to-last
+ // node should equal the loop counter, and the value of the
+ // third-to-last node should be X. On return from the _Cilk_for, the
+ // value of the last pedigree should be incremented to X+2. The
+ // pedigree within the loop is thus flattened, such that the depth of
+ // recursion does not affect the results either inside or outside of
+ // the loop. Note that the pedigree after the loop exists is the same
+ // as if a single spawn and sync were executed within this function.
+
+ // TBD: Since the shrink-wrap optimization was turned on in the compiler,
+ // it is not possible to get the current stack frame without actually
+ // forcing a call to bind-thread. This spurious spawn is a temporary
+ // stopgap until the correct intrinsics are added to give us total control
+ // over frame initialization.
+ _Cilk_spawn noop();
+
+ // Fetch the current worker. From that we can get the current stack frame
+ // which will be constant even if we're stolen
+ __cilkrts_worker *w = __cilkrts_get_tls_worker();
+ __cilkrts_stack_frame *sf = w->current_stack_frame;
+
+ // Decrement the rank by one to undo the pedigree change from the
+ // _Cilk_spawn
+ --w->pedigree.rank;
+
+ // Save the current worker pedigree into loop_root_pedigree, which will be
+ // the root node for our flattened pedigree.
+ __cilkrts_pedigree loop_root_pedigree = w->pedigree;
+
+ // Don't splice the loop_root node in yet. It will be done when we
+ // call the loop body lambda function
+// w->pedigree.rank = 0;
+// w->pedigree.next = &loop_root_pedigree;
+
+ /* Spawn is necessary at top-level to force runtime to start up.
+ * Runtime must be started in order to call the grainsize() function.
+ */
+ int gs = grainsize(grain, count);
+ cilk_for_recursive((count_t) 0, count, body, data, gs, w,
+ &loop_root_pedigree);
+
+ // Need to refetch the worker after calling a spawning function.
+ w = sf->worker;
+
+ // Restore the pedigree in the worker.
+ w->pedigree = loop_root_pedigree;
+
+ // Bump the worker pedigree.
+ ++w->pedigree.rank;
+
+ // Implicit sync will increment the pedigree leaf rank again, for a total
+ // of two increments. If the noop spawn above is removed, then we'll need
+ // to re-enable the following code:
+// // If this is an optimized build, then the compiler will have optimized
+// // out the increment of the worker's pedigree in the implied sync. We
+// // need to add one to make the pedigree_loop test work correctly.
+// #if CILKRTS_OPTIMIZED
+// ++sf->worker->pedigree.rank;
+// #endif
+}
+
+// Use extern "C" to suppress name mangling of __cilkrts_cilk_for_32 and
+// __cilkrts_cilk_for_64.
+extern "C" {
+
+/*
+ * __cilkrts_cilk_for_32
+ *
+ * Implementation of cilk_for for 32-bit trip counts (regardless of processor
+ * word size). Assumes that the range is 0 - count.
+ *
+ * body - lambda function for the cilk_for loop body
+ * data - data used by the lambda function
+ * count - trip count for loop
+ * grain - grain size (0 if it should be computed)
+ */
+
+CILK_ABI_THROWS_VOID __cilkrts_cilk_for_32(__cilk_abi_f32_t body, void *data,
+ cilk32_t count, int grain)
+{
+ // Cilkscreen should not report this call in a stack trace
+ NOTIFY_ZC_INTRINSIC((char *)"cilkscreen_hide_call", 0);
+
+ // Check for an empty range here as an optimization - don't need to do any
+ // __cilkrts_stack_frame initialization
+ if (count > 0)
+ cilk_for_root(body, data, count, grain);
+}
+
+/*
+ * __cilkrts_cilk_for_64
+ *
+ * Implementation of cilk_for for 64-bit trip counts (regardless of processor
+ * word size). Assumes that the range is 0 - count.
+ *
+ * body - lambda function for the cilk_for loop body
+ * data - data used by the lambda function
+ * count - trip count for loop
+ * grain - grain size (0 if it should be computed)
+ */
+CILK_ABI_THROWS_VOID __cilkrts_cilk_for_64(__cilk_abi_f64_t body, void *data,
+ cilk64_t count, int grain)
+{
+ // Check for an empty range here as an optimization - don't need to do any
+ // __cilkrts_stack_frame initialization
+ if (count > 0)
+ cilk_for_root(body, data, count, grain);
+}
+
+} // end extern "C"
+
+/* End cilk-abi-cilk-for.cpp */
diff --git a/gcc-4.9/libcilkrts/runtime/cilk-abi-vla-internal.c b/gcc-4.9/libcilkrts/runtime/cilk-abi-vla-internal.c
new file mode 100644
index 000000000..6fb92677a
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk-abi-vla-internal.c
@@ -0,0 +1,83 @@
+/* cilk-abi-vla-internal.c -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/*
+ * These functions are provided in their own compilation unit so I can debug
+ * them. cilk-abi-vla.c must always be compiled with optimization on so that
+ * inlining occurs.
+ */
+
+#include "internal/abi.h"
+#include "cilk-abi-vla-internal.h"
+#include "bug.h"
+#include "full_frame.h"
+#include "local_state.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+
+#include "bug.h"
+
+void *vla_internal_heap_alloc(__cilkrts_stack_frame *sf,
+ size_t full_size,
+ uint32_t align)
+{
+ return malloc(full_size);
+}
+
+void vla_internal_heap_free(void *t, size_t size)
+{
+ free(t);
+}
+
+void vla_free_from_original_stack(__cilkrts_stack_frame *sf,
+ size_t full_size)
+{
+ // The __cilkrts_stack_frame must be initialized
+ CILK_ASSERT(sf->worker);
+
+#if 1
+ // Add full_size to ff->sync_sp so that when we return, the VLA will no
+ // longer be allocated on the stack
+ __cilkrts_adjust_stack(sf->worker->l->frame_ff, full_size);
+#else
+ // Inline __cilkrts_adjust_stack for Kevin
+ full_frame *ff = sf->worker->l->frame_ff;
+ ff->sync_sp = ff->sync_sp + full_size;
+#endif
+}
diff --git a/gcc-4.9/libcilkrts/runtime/cilk-abi-vla-internal.h b/gcc-4.9/libcilkrts/runtime/cilk-abi-vla-internal.h
new file mode 100644
index 000000000..909f08fa4
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk-abi-vla-internal.h
@@ -0,0 +1,90 @@
+/* cilk-abi-vla-internal.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file cilk-abi-vla-internal.h
+ *
+ * @brief Allocation/deallocation function for use with Variable Length
+ * Arrays in spawning functions.
+ *
+ * These should be the only functions in the Cilk runtime allocating memory
+ * from the standard C runtime heap. This memory will be provided to user
+ * code for use in VLAs, when the memory cannot be allocated from the stack.
+ *
+ * While these functions are simply passthroughs to malloc and free at the
+ * moment, once we've got the basics of VLA allocations working we'll make
+ * them do fancier tricks.
+ */
+
+/**
+ * @brief Allocate memory from the heap for use by a Variable Length Array in
+ * a spawning function.
+ *
+ * @param sf The __cilkrts_stack_frame for the spawning function containing
+ * the VLA.
+ * @param full_size The number of bytes to be allocated, including any tags
+ * needed to identify this as allocated from the heap.
+ * @param align Any alignment necessary for the allocation.
+ */
+
+void *vla_internal_heap_alloc(__cilkrts_stack_frame *sf,
+ size_t full_size,
+ uint32_t align);
+
+/**
+ * @brief Deallocate memory from the heap used by a Variable Length Array in
+ * a spawning function.
+ *
+ * @param t The address of the memory block to be freed.
+ * @param size The size of the memory block to be freed.
+ */
+
+void vla_internal_heap_free(void *t,
+ size_t size);
+
+/**
+ * @brief Deallocate memory from the original stack. We'll do this by adding
+ * full_size to ff->sync_sp. So after the sync, the Variable Length Array
+ * will no longer be allocated on the stack.
+ *
+ * @param sf The __cilkrts_stack_frame for the spawning function that is
+ * deallocating a VLA.
+ * @param full_size The size of the VLA, including any alignment and tags.
+ */
+void vla_free_from_original_stack(__cilkrts_stack_frame *sf,
+ size_t full_size);
diff --git a/gcc-4.9/libcilkrts/runtime/cilk-abi.c b/gcc-4.9/libcilkrts/runtime/cilk-abi.c
new file mode 100644
index 000000000..1da05239e
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk-abi.c
@@ -0,0 +1,733 @@
+/* Cilk_abi.c -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2010-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+/**
+ * @file cilk-abi.c
+ *
+ * @brief cilk-abi.c implements all of the entrypoints to the Intel Cilk
+ * Plus runtime.
+ */
+
+/*
+ * Define this macro so that compiliation of this file generates the
+ * non-inlined versions of certain functions in cilk_api.h.
+ */
+#include "internal/abi.h"
+#include "cilk/cilk_api.h"
+#include "cilk/cilk_undocumented.h"
+#include "cilktools/cilkscreen.h"
+
+#include "global_state.h"
+#include "os.h"
+#include "os_mutex.h"
+#include "bug.h"
+#include "local_state.h"
+#include "full_frame.h"
+#include "pedigrees.h"
+#include "scheduler.h"
+#include "sysdep.h"
+#include "except.h"
+#include "cilk_malloc.h"
+#include "record-replay.h"
+
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+
+#ifdef _MSC_VER
+/* Some versions of icc don't support limits.h on Linux if
+ gcc 4.3 or newer is installed. */
+#include <limits.h>
+
+/* Declare _ReturnAddress compiler intrinsic */
+void * _ReturnAddress(void);
+#pragma intrinsic(_ReturnAddress)
+
+#include "sysdep-win.h" // Needed for sysdep_init_module()
+#endif /* _WIN32 */
+
+#include "metacall_impl.h"
+#include "reducer_impl.h"
+#include "cilk-ittnotify.h"
+#include "cilk-tbb-interop.h"
+
+#define TBB_INTEROP_DATA_DELAYED_UNTIL_BIND (void *)-1
+
+/**
+ * __cilkrts_bind_thread is a versioned entrypoint. The runtime should be
+ * exporting copies of __cilkrts_bind_version for the current and all previous
+ * versions of the ABI.
+ *
+ * This macro should always be set to generate a version to match the current
+ * version; __CILKRTS_ABI_VERSION.
+ */
+#define BIND_THREAD_RTN __cilkrts_bind_thread_1
+
+static inline
+void enter_frame_internal(__cilkrts_stack_frame *sf, uint32_t version)
+{
+ __cilkrts_worker *w = __cilkrts_get_tls_worker();
+ if (w == 0) { /* slow path */
+ w = BIND_THREAD_RTN();
+
+ sf->flags = CILK_FRAME_LAST | (version << 24);
+ CILK_ASSERT((sf->flags & CILK_FRAME_FLAGS_MASK) == CILK_FRAME_LAST);
+ } else {
+ sf->flags = (version << 24);
+ CILK_ASSERT((sf->flags & CILK_FRAME_FLAGS_MASK) == 0);
+ }
+ sf->call_parent = w->current_stack_frame;
+ sf->worker = w;
+ w->current_stack_frame = sf;
+}
+
+CILK_ABI_VOID __cilkrts_enter_frame(__cilkrts_stack_frame *sf)
+{
+ enter_frame_internal(sf, 0);
+}
+
+CILK_ABI_VOID __cilkrts_enter_frame_1(__cilkrts_stack_frame *sf)
+{
+ enter_frame_internal(sf, 1);
+ sf->reserved = 0;
+}
+
+static inline
+void enter_frame_fast_internal(__cilkrts_stack_frame *sf, uint32_t version)
+{
+ __cilkrts_worker *w = __cilkrts_get_tls_worker_fast();
+ sf->flags = version << 24;
+ sf->call_parent = w->current_stack_frame;
+ sf->worker = w;
+ w->current_stack_frame = sf;
+}
+
+CILK_ABI_VOID __cilkrts_enter_frame_fast(__cilkrts_stack_frame *sf)
+{
+ enter_frame_fast_internal(sf, 0);
+}
+
+CILK_ABI_VOID __cilkrts_enter_frame_fast_1(__cilkrts_stack_frame *sf)
+{
+ enter_frame_fast_internal(sf, 1);
+ sf->reserved = 0;
+}
+
+/**
+ * A component of the THE protocol. __cilkrts_undo_detach checks whether
+ * this frame's parent has been stolen. If it hasn't, the frame can return
+ * normally. If the parent has been stolen, of if we suspect it might be,
+ * then __cilkrts_leave_frame() needs to call into the runtime.
+ *
+ * @note __cilkrts_undo_detach() is comparing the exception pointer against
+ * the tail pointer. The exception pointer is modified when another worker
+ * is considering whether it can steal a frame. The head pointer is updated
+ * to match when the worker lock is taken out and the thief is sure that
+ * it can complete the steal. If the steal cannot be completed, the thief
+ * will restore the exception pointer.
+ *
+ * @return true if undo-detach failed.
+ */
+static int __cilkrts_undo_detach(__cilkrts_stack_frame *sf)
+{
+ __cilkrts_worker *w = sf->worker;
+ __cilkrts_stack_frame *volatile *t = w->tail;
+
+/* DBGPRINTF("%d - __cilkrts_undo_detach - sf %p\n", w->self, sf); */
+
+ --t;
+ w->tail = t;
+ /* On x86 the __sync_fetch_and_<op> family includes a
+ full memory barrier. In theory the sequence in the
+ second branch of the #if should be faster, but on
+ most x86 it is not. */
+#if defined __i386__ || defined __x86_64__
+ __sync_fetch_and_and(&sf->flags, ~CILK_FRAME_DETACHED);
+#else
+ __cilkrts_fence(); /* membar #StoreLoad */
+ sf->flags &= ~CILK_FRAME_DETACHED;
+#endif
+
+ return __builtin_expect(t < w->exc, 0);
+}
+
+CILK_ABI_VOID __cilkrts_leave_frame(__cilkrts_stack_frame *sf)
+{
+ __cilkrts_worker *w = sf->worker;
+
+/* DBGPRINTF("%d-%p __cilkrts_leave_frame - sf %p, flags: %x\n", w->self, GetWorkerFiber(w), sf, sf->flags); */
+
+#ifdef _WIN32
+ /* if leave frame was called from our unwind handler, leave_frame should
+ proceed no further. */
+ if (sf->flags & CILK_FRAME_UNWINDING)
+ {
+/* DBGPRINTF("%d - __cilkrts_leave_frame - aborting due to UNWINDING flag\n", w->self); */
+
+ // If this is the frame of a spawn helper (indicated by the
+ // CILK_FRAME_DETACHED flag) we must update the pedigree. The pedigree
+ // points to nodes allocated on the stack. Failing to update it will
+ // result in a accvio/segfault if the pedigree is walked. This must happen
+ // for all spawn helper frames, even if we're processing an exception
+ if ((sf->flags & CILK_FRAME_DETACHED))
+ {
+ update_pedigree_on_leave_frame(w, sf);
+ }
+ return;
+ }
+#endif
+
+#if CILK_LIB_DEBUG
+ /* ensure the caller popped itself */
+ CILK_ASSERT(w->current_stack_frame != sf);
+#endif
+
+ /* The exiting function should have checked for zero flags,
+ so there is no check for flags == 0 here. */
+
+#if CILK_LIB_DEBUG
+ if (__builtin_expect(sf->flags & (CILK_FRAME_EXITING|CILK_FRAME_UNSYNCHED), 0))
+ __cilkrts_bug("W%u: function exiting with invalid flags %02x\n",
+ w->self, sf->flags);
+#endif
+
+ /* Must return normally if (1) the active function was called
+ and not spawned, or (2) the parent has never been stolen. */
+ if ((sf->flags & CILK_FRAME_DETACHED)) {
+/* DBGPRINTF("%d - __cilkrts_leave_frame - CILK_FRAME_DETACHED\n", w->self); */
+
+#ifndef _WIN32
+ if (__builtin_expect(sf->flags & CILK_FRAME_EXCEPTING, 0)) {
+// Pedigree will be updated in __cilkrts_leave_frame. We need the
+// pedigree before the update for record/replay
+// update_pedigree_on_leave_frame(w, sf);
+ __cilkrts_return_exception(sf);
+ /* If return_exception returns the caller is attached.
+ leave_frame is called from a cleanup (destructor)
+ for the frame object. The caller will reraise the
+ exception. */
+ return;
+ }
+#endif
+
+ // During replay, check whether w was the last worker to continue
+ replay_wait_for_steal_if_parent_was_stolen(w);
+
+ // Attempt to undo the detach
+ if (__builtin_expect(__cilkrts_undo_detach(sf), 0)) {
+ // The update of pedigree for leaving the frame occurs
+ // inside this call if it does not return.
+ __cilkrts_c_THE_exception_check(w, sf);
+ }
+
+ update_pedigree_on_leave_frame(w, sf);
+
+ /* This path is taken when undo-detach wins the race with stealing.
+ Otherwise this strand terminates and the caller will be resumed
+ via setjmp at sync. */
+ if (__builtin_expect(sf->flags & CILK_FRAME_FLAGS_MASK, 0))
+ __cilkrts_bug("W%u: frame won undo-detach race with flags %02x\n",
+ w->self, sf->flags);
+
+ return;
+ }
+
+#if CILK_LIB_DEBUG
+ sf->flags |= CILK_FRAME_EXITING;
+#endif
+
+ if (__builtin_expect(sf->flags & CILK_FRAME_LAST, 0))
+ __cilkrts_c_return_from_initial(w); /* does return */
+ else if (sf->flags & CILK_FRAME_STOLEN)
+ __cilkrts_return(w); /* does return */
+
+/* DBGPRINTF("%d-%p __cilkrts_leave_frame - returning, StackBase: %p\n", w->self, GetWorkerFiber(w)); */
+}
+
+/* Caller must have called setjmp. */
+CILK_ABI_VOID __cilkrts_sync(__cilkrts_stack_frame *sf)
+{
+ __cilkrts_worker *w = sf->worker;
+/* DBGPRINTF("%d-%p __cilkrts_sync - sf %p\n", w->self, GetWorkerFiber(w), sf); */
+ if (__builtin_expect(!(sf->flags & CILK_FRAME_UNSYNCHED), 0))
+ __cilkrts_bug("W%u: double sync %p\n", w->self, sf);
+#ifndef _WIN32
+ if (__builtin_expect(sf->flags & CILK_FRAME_EXCEPTING, 0)) {
+ __cilkrts_c_sync_except(w, sf);
+ }
+#endif
+
+ __cilkrts_c_sync(w, sf);
+}
+
+/*
+ * __cilkrts_get_sf
+ *
+ * Debugging aid to provide access to the current __cilkrts_stack_frame.
+ *
+ * Not documented!
+ */
+
+CILK_API_VOID_PTR
+__cilkrts_get_sf(void)
+{
+ __cilkrts_worker *w = __cilkrts_get_tls_worker();
+ if (0 == w)
+ return NULL;
+
+ return w->current_stack_frame;
+}
+
+/* Call with global lock held */
+static __cilkrts_worker *find_free_worker(global_state_t *g)
+{
+ __cilkrts_worker *w = 0;
+ int i;
+
+ // Scan the non-system workers looking for one which is free so we can
+ // use it.
+ for (i = g->P - 1; i < g->total_workers; ++i) {
+ w = g->workers[i];
+ CILK_ASSERT(WORKER_SYSTEM != w->l->type);
+ if (w->l->type == WORKER_FREE) {
+ w->l->type = WORKER_USER;
+ w->l->team = w;
+ return w;
+ }
+ }
+
+ // If we ran out of workers, create a new one. It doesn't actually belong
+ // to the Cilk global state so nobody will ever try to steal from it.
+ w = (__cilkrts_worker *)__cilkrts_malloc(sizeof(*w));
+ __cilkrts_cilkscreen_ignore_block(w, w+1);
+ make_worker(g, -1, w);
+ w->l->type = WORKER_USER;
+ w->l->team = w;
+ return w;
+}
+
+/*
+ * __cilkrts_bind_thread
+ *
+ * Exported function to bind a thread to the runtime.
+ *
+ * This function name should always have a trailing suffix for the latest ABI
+ * version. This means that code built with a new compiler will not load
+ * against an old copy of the runtime.
+ *
+ * Symbols for the function called by code compiled with old versions of the
+ * compiler are created in an OS-specific manner:
+ * - On Windows the old symbols are defined in the cilk-exports.def linker
+ * definitions file as aliases of BIND_THREAD_RTN
+ * - On Linux aliased symbols are created for BIND_THREAD_RTN in this file
+ * - On MacOS the alternate entrypoints are implemented and simply call
+ * BIND_THREAD_RTN.
+ */
+CILK_ABI_WORKER_PTR BIND_THREAD_RTN(void)
+{
+ __cilkrts_worker *w;
+ int start_cilkscreen = 0;
+#ifdef USE_ITTNOTIFY
+ static int unique_obj;
+#endif
+
+ // Cannot set this pointer until after __cilkrts_init_internal() call:
+ global_state_t* g;
+
+ ITT_SYNC_CREATE (&unique_obj, "Initialization");
+ ITT_SYNC_PREPARE(&unique_obj);
+ ITT_SYNC_ACQUIRED(&unique_obj);
+
+
+ /* 1: Initialize and start the Cilk runtime */
+ __cilkrts_init_internal(1);
+
+ /*
+ * 2: Choose a worker for this thread (fail if none left). The table of
+ * user workers is protected by the global OS mutex lock.
+ */
+ g = cilkg_get_global_state();
+ global_os_mutex_lock();
+ if (__builtin_expect(g->work_done, 0))
+ __cilkrts_bug("Attempt to enter Cilk while Cilk is shutting down");
+ w = find_free_worker(g);
+ CILK_ASSERT(w);
+
+ __cilkrts_set_tls_worker(w);
+ __cilkrts_cilkscreen_establish_worker(w);
+ {
+ full_frame *ff = __cilkrts_make_full_frame(w, 0);
+
+ ff->fiber_self = cilk_fiber_allocate_from_thread();
+ CILK_ASSERT(ff->fiber_self);
+
+ cilk_fiber_set_owner(ff->fiber_self, w);
+ cilk_fiber_tbb_interop_use_saved_stack_op_info(ff->fiber_self);
+
+ CILK_ASSERT(ff->join_counter == 0);
+ ff->join_counter = 1;
+ w->l->frame_ff = ff;
+ w->reducer_map = __cilkrts_make_reducer_map(w);
+ __cilkrts_set_leftmost_reducer_map(w->reducer_map, 1);
+ load_pedigree_leaf_into_user_worker(w);
+ }
+
+ // Make sure that the head and tail are reset, and saved_protected_tail
+ // allows all frames to be stolen.
+ //
+ // Note that we must NOT check w->exc, since workers that are trying to
+ // steal from it will be updating w->exc and we don't own the worker lock.
+ // It's not worth taking out the lock just for an assertion.
+ CILK_ASSERT(w->head == w->l->ltq);
+ CILK_ASSERT(w->tail == w->l->ltq);
+ CILK_ASSERT(w->protected_tail == w->ltq_limit);
+
+ // There may have been an old pending exception which was freed when the
+ // exception was caught outside of Cilk
+ w->l->pending_exception = NULL;
+
+ w->reserved = NULL;
+
+ // If we've already created a scheduling fiber for this worker, we'll just
+ // reuse it. If w->self < 0, it means that this is an ad-hoc user worker
+ // not known to the global state. Thus, we need to create a scheduling
+ // stack only if we don't already have one and w->self >= 0.
+ if (NULL == w->l->scheduling_fiber && w->self >= 0)
+ {
+ START_INTERVAL(w, INTERVAL_FIBER_ALLOCATE) {
+ // Create a scheduling fiber for this worker.
+ w->l->scheduling_fiber =
+ cilk_fiber_allocate_from_heap(CILK_SCHEDULING_STACK_SIZE);
+ cilk_fiber_reset_state(w->l->scheduling_fiber,
+ scheduler_fiber_proc_for_user_worker);
+ cilk_fiber_set_owner(w->l->scheduling_fiber, w);
+ } STOP_INTERVAL(w, INTERVAL_FIBER_ALLOCATE);
+ }
+
+ // If the scheduling fiber is NULL, we've either exceeded our quota for
+ // fibers or workers or we're out of memory, so we should lose parallelism
+ // by disallowing stealing.
+ if (NULL == w->l->scheduling_fiber)
+ __cilkrts_disallow_stealing(w, NULL);
+
+ start_cilkscreen = (0 == w->g->Q);
+
+ if (w->self != -1) {
+ // w->self != -1, means that w is a normal user worker and must be
+ // accounted for by the global state since other workers can steal from
+ // it.
+
+ // w->self == -1, means that w is an overflow worker and was created on
+ // demand. I.e., it does not need to be accounted for by the global
+ // state.
+
+ __cilkrts_enter_cilk(w->g);
+ }
+
+ global_os_mutex_unlock();
+
+ /* If there's only 1 worker, the counts will be started in
+ * __cilkrts_scheduler */
+ if (g->P > 1)
+ {
+ START_INTERVAL(w, INTERVAL_IN_SCHEDULER);
+ START_INTERVAL(w, INTERVAL_WORKING);
+ }
+
+ ITT_SYNC_RELEASING(&unique_obj);
+
+ /* Turn on Cilkscreen if this is the first worker. This needs to be done
+ * when we are NOT holding the os mutex. */
+ if (start_cilkscreen)
+ __cilkrts_cilkscreen_enable_instrumentation();
+
+ return w;
+}
+
+#ifndef _MSC_VER
+/*
+ * Define old version-specific symbols for binding threads (since they exist in
+ * all Cilk code). These aliases prohibit newly compiled code from loading an
+ * old version of the runtime. We can handle old code with a new runtime, but
+ * new code with an old runtime is verboten!
+ *
+ * For Windows, the aliased symbol is exported in cilk-exports.def.
+ */
+#if defined(_DARWIN_C_SOURCE) || defined(__APPLE__)
+/**
+ * Mac OS X: Unfortunately, Darwin doesn't allow aliasing, so we just make a
+ * call and hope the optimizer does the right thing.
+ */
+CILK_ABI_WORKER_PTR __cilkrts_bind_thread (void) {
+ return BIND_THREAD_RTN();
+}
+#else
+
+/**
+ * Macro to convert a parameter to a string. Used on Linux or BSD.
+ */
+#define STRINGIFY(x) #x
+
+/**
+ * Macro to generate an __attribute__ for an aliased name
+ */
+#define ALIASED_NAME(x) __attribute__ ((alias (STRINGIFY(x))))
+
+/**
+ * Linux or BSD: Use the alias attribute to make the labels for the versioned
+ * functions point to the same place in the code as the original. Using
+ * the two macros is annoying but required.
+ */
+
+CILK_ABI_WORKER_PTR __cilkrts_bind_thread(void)
+ ALIASED_NAME(BIND_THREAD_RTN);
+
+#endif // defined _DARWIN_C_SOURCE || defined __APPLE__
+#endif // !defined _MSC_VER
+
+CILK_API_SIZET
+__cilkrts_get_stack_size(void) {
+ return cilkg_get_stack_size();
+}
+
+// Method for debugging.
+CILK_API_VOID __cilkrts_dump_stats(void)
+{
+ // While the stats aren't protected by the global OS mutex, the table
+ // of workers is, so take out the global OS mutex while we're doing this
+ global_os_mutex_lock();
+ if (cilkg_is_published()) {
+ global_state_t *g = cilkg_get_global_state();
+ __cilkrts_dump_stats_to_stderr(g);
+ }
+ else {
+ __cilkrts_bug("Attempting to report Cilk stats before the runtime has started\n");
+ }
+ global_os_mutex_unlock();
+}
+
+#ifndef _WIN32
+CILK_ABI_THROWS_VOID __cilkrts_rethrow(__cilkrts_stack_frame *sf)
+{
+ __cilkrts_gcc_rethrow(sf);
+}
+#endif
+
+/*
+ * __cilkrts_unwatch_stack
+ *
+ * Callback for TBB to tell us they don't want to watch the stack anymore
+ */
+
+static __cilk_tbb_retcode __cilkrts_unwatch_stack(void *data)
+{
+ __cilk_tbb_stack_op_thunk o;
+
+ // If the cilk_fiber wasn't available fetch it now
+ if (TBB_INTEROP_DATA_DELAYED_UNTIL_BIND == data)
+ {
+ full_frame *ff;
+ __cilkrts_worker *w = __cilkrts_get_tls_worker();
+ if (NULL == w)
+ {
+ // Free any saved stack op information
+ cilk_fiber_tbb_interop_free_stack_op_info();
+
+ return 0; /* Success! */
+ }
+
+ __cilkrts_worker_lock(w);
+ ff = w->l->frame_ff;
+ __cilkrts_frame_lock(w,ff);
+ data = ff->fiber_self;
+ __cilkrts_frame_unlock(w,ff);
+ __cilkrts_worker_unlock(w);
+ }
+
+#if CILK_LIB_DEBUG /* Debug code */
+ /* Get current stack */
+ full_frame *ff;
+ __cilkrts_worker *w = __cilkrts_get_tls_worker();
+ __cilkrts_worker_lock(w);
+ ff = w->l->frame_ff;
+ __cilkrts_frame_lock(w,ff);
+ CILK_ASSERT (data == ff->fiber_self);
+ __cilkrts_frame_unlock(w,ff);
+ __cilkrts_worker_unlock(w);
+#endif
+
+ /* Clear the callback information */
+ o.data = NULL;
+ o.routine = NULL;
+ cilk_fiber_set_stack_op((cilk_fiber*)data, o);
+
+ // Note. Do *NOT* free any saved stack information here. If they want to
+ // free the saved stack op information, they'll do it when the thread is
+ // unbound
+
+ return 0; /* Success! */
+}
+
+/*
+ * __cilkrts_watch_stack
+ *
+ * Called by TBB, defined by Cilk.
+ *
+ * Requests that Cilk invoke the stack op routine when it orphans a stack.
+ * Cilk sets *u to a thunk that TBB should call when it is no longer interested
+ * in watching the stack.
+ */
+
+CILK_API_TBB_RETCODE
+__cilkrts_watch_stack(__cilk_tbb_unwatch_thunk *u,
+ __cilk_tbb_stack_op_thunk o)
+{
+ cilk_fiber* current_fiber;
+ __cilkrts_worker *w;
+
+#ifdef _MSC_VER
+ // This may be called by TBB *before* the OS has given us our
+ // initialization call. Make sure the module is initialized.
+ sysdep_init_module();
+#endif
+
+ // Fetch the __cilkrts_worker bound to this thread
+ w = __cilkrts_get_tls_worker();
+ if (NULL == w)
+ {
+ // Save data for later. We'll deal with it when/if this thread binds
+ // to the runtime
+ cilk_fiber_tbb_interop_save_stack_op_info(o);
+
+ u->routine = __cilkrts_unwatch_stack;
+ u->data = TBB_INTEROP_DATA_DELAYED_UNTIL_BIND;
+
+ return 0;
+ }
+
+ /* Get current stack */
+ __cilkrts_worker_lock(w);
+ current_fiber = w->l->frame_ff->fiber_self;
+ __cilkrts_worker_unlock(w);
+
+/* CILK_ASSERT( !sd->stack_op_data ); */
+/* CILK_ASSERT( !sd->stack_op_routine ); */
+
+ /* Give TBB our callback */
+ u->routine = __cilkrts_unwatch_stack;
+ u->data = current_fiber;
+ /* Save the callback information */
+ cilk_fiber_set_stack_op(current_fiber, o);
+
+ return 0; /* Success! */
+}
+
+
+// This function must be called only within a continuation, within the stack
+// frame of the continuation itself.
+CILK_API_INT __cilkrts_synched(void)
+{
+ __cilkrts_worker *w = __cilkrts_get_tls_worker();
+
+ // If we don't have a worker, then we're synched by definition :o)
+ if (NULL == w)
+ return 1;
+
+ // Check to see if we are in a stolen continuation. If not, then
+ // we are synched.
+ uint32_t flags = w->current_stack_frame->flags;
+ if (0 == (flags & CILK_FRAME_UNSYNCHED))
+ return 1;
+
+ // We are in a stolen continutation, but the join counter might have been
+ // decremented to one, making us synched again. Get the full frame so
+ // that we can check the join counter. ASSUME: frame_ff is stable (can be
+ // read without a lock) in a stolen continuation -- it can't be stolen
+ // while it's currently executing.
+ full_frame *ff = w->l->frame_ff;
+
+ // Make sure we have a full frame
+ // TBD: Don't think that we should ever not have a full frame here.
+ // CILK_ASSERT(NULL != ff); ?
+ if (NULL == ff)
+ return 1;
+
+ // We're synched if there are no outstanding children at this instant in
+ // time. Note that this is a known race, but it's ok since we're only
+ // reading. We can get false negatives, but not false positives. (I.e.,
+ // we can read a non-one join_counter just before it goes to one, but the
+ // join_counter cannot go from one to greater than one while we're
+ // reading.)
+ return 1 == ff->join_counter;
+}
+
+
+
+
+CILK_API_INT
+__cilkrts_bump_loop_rank_internal(__cilkrts_worker* w)
+{
+ // If we don't have a worker, then the runtime is not bound to this
+ // thread and there is no rank to increment
+ if (NULL == w)
+ return -1;
+
+ // We're at the start of the loop body. Advance the cilk_for loop
+ // body pedigree by following the parent link and updating its
+ // rank.
+
+ // Normally, we'd just write "w->pedigree.parent->rank++"
+ // But we need to cast away the "const".
+ ((__cilkrts_pedigree*) w->pedigree.parent)->rank++;
+
+ // Zero the worker's pedigree rank since this is the start of a new
+ // pedigree domain.
+ w->pedigree.rank = 0;
+
+ return 0;
+}
+
+CILK_ABI_VOID
+__cilkrts_save_fp_ctrl_state(__cilkrts_stack_frame *sf)
+{
+ // Pass call onto OS/architecture dependent function
+ sysdep_save_fp_ctrl_state(sf);
+}
+
+/* end cilk-abi.c */
diff --git a/gcc-4.9/libcilkrts/runtime/cilk-ittnotify.h b/gcc-4.9/libcilkrts/runtime/cilk-ittnotify.h
new file mode 100644
index 000000000..ff995db6f
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk-ittnotify.h
@@ -0,0 +1,100 @@
+/* cilk-ittnotify.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#ifndef INCLUDED_CILK_ITTNOTIFY_DOT_H
+#define INCLUDED_CILK_ITTNOTIFY_DOT_H
+
+#ifdef __INTEL_COMPILER
+#endif
+#include <stdio.h>
+
+// ITTNOTIFY does not support ARM at this time
+#ifdef __arm__
+#undef USE_ITTNOTIFY
+#endif
+
+#ifdef USE_ITTNOTIFY
+#include <ittnotify.h>
+
+#ifdef _WIN32
+# define ITT_SYNC_CREATE(_address, _description) \
+ __itt_sync_createA(_address, \
+ "Intel Cilk Plus " _description, \
+ "", \
+ __itt_attr_barrier)
+#else
+# define ITT_SYNC_CREATE(_address, _description) \
+ __itt_sync_create(_address, \
+ "Intel Cilk Plus " _description, \
+ "", \
+ __itt_attr_barrier)
+#endif
+
+#define ITT_SYNC_PREPARE(_address) __itt_sync_prepare(_address)
+#define ITT_SYNC_ACQUIRED(_address) __itt_sync_acquired(_address)
+#define ITT_SYNC_RELEASING(_address) __itt_sync_releasing(_address)
+#define ITT_SYNC_DESTROY(_address) __itt_sync_destroy(_address)
+// Note that we subtract 5 from the return address to find the CALL instruction
+// to __cilkrts_sync
+#if 1 // Disable renaming for now. Piersol isn't ready yet
+#define ITT_SYNC_SET_NAME_AND_PREPARE(_address, _sync_ret_address) __itt_sync_prepare(_address)
+#else
+#define ITT_SYNC_SET_NAME_AND_PREPARE(_address, _sync_ret_address) \
+ if (NULL != __itt_sync_prepare_ptr) { \
+ if (0 == _sync_ret_address) \
+ __itt_sync_renameA(_address, ""); \
+ else \
+ { \
+ char buf[128]; \
+ sprintf_s(buf, 128, "IP:0x%p", (DWORD_PTR)_sync_ret_address - 5); \
+ __itt_sync_renameA(_address, buf); \
+ _sync_ret_address = 0; \
+ } \
+ __itt_sync_prepare(_address); \
+ }
+#endif
+#else // USE_ITTNOTIFY not defined, compile out all calls
+#define ITT_SYNC_CREATE(_address, _description)
+#define ITT_SYNC_PREPARE(_address)
+#define ITT_SYNC_ACQUIRED(_address)
+#define ITT_SYNC_RELEASING(_addresss)
+#define ITT_SYNC_DESTROY(_address)
+#define ITT_SYNC_SET_NAME_AND_PREPARE(_sync_address, _wait_address)
+#endif
+
+#endif // ! defined(INCLUDED_CILK_ITTNOTIFY_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/cilk-tbb-interop.h b/gcc-4.9/libcilkrts/runtime/cilk-tbb-interop.h
new file mode 100644
index 000000000..cc5cff4b5
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk-tbb-interop.h
@@ -0,0 +1,192 @@
+/* cilk-tbb-interop.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file cilk-tbb-interop.h
+ *
+ * @brief Interface between TBB and Cilk to allow TBB to associate it's
+ * per-thread data with Cilk workers, and maintain the association as work
+ * moves between worker threads. This handles the case where TBB calls
+ * into a Cilk function which may later call back to a function making
+ * TBB calls.
+ *
+ * Each thunk structure has two pointers: \"routine\" and \"data\".
+ * The caller of the thunk invokes *routine, passing \"data\" as the void*
+ * parameter.
+ */
+
+#ifndef INCLUDED_CILK_TBB_INTEROP_DOT_H
+#define INCLUDED_CILK_TBB_INTEROP_DOT_H
+
+#include <cilk/common.h> // for CILK_EXPORT
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/** A return code. 0 indicates success. */
+typedef int __cilk_tbb_retcode;
+
+/**
+ * Enumeration of reasons that Cilk will call the TBB stack operation
+ * function.
+ *
+ * When a non-empty stack is transfered between threads, the first thread must
+ * orphan it and the second thread must adopt it.
+ *
+ * An empty stack can be transfered similarly, or simply released by the first
+ * thread.
+ *
+ * Here is a summary of the actions as transitions on a state machine.
+@verbatim
+ watch ORPHAN
+ -->--> -->--
+ / \ / \
+ (freed empty stack) (TBB sees stack running on thread) (stack in limbo)
+ \ / \ /
+ --<-- --<--
+ RELEASE or ADOPT
+ unwatch
+@endverbatim
+ */
+typedef enum __cilk_tbb_stack_op {
+ /**
+ * Disconnecting stack from a thread.
+ *
+ * The thunk must be invoked on the thread disconnecting itself from the
+ * stack. Must \"happen before\" the stack is adopted elsewhere.
+ */
+ CILK_TBB_STACK_ORPHAN,
+
+ /**
+ * Reconnecting orphaned stack to a thread.
+ *
+ * The thunk must be invoked on the thread adopting the stack.
+ */
+ CILK_TBB_STACK_ADOPT,
+
+ /**
+ * Releasing stack.
+ *
+ * The thunk must be invoked on the thread doing the releasing, Must
+ * \"happen before\" the stack is used elsewhere.
+ */
+ CILK_TBB_STACK_RELEASE
+} __cilk_tbb_stack_op;
+
+/**
+ * Function that will be called by the Cilk runtime to inform TBB of a change
+ * in the stack associated with the current thread.
+ *
+ * It does not matter what stack the thunk runs on.
+ * The thread (not fiber) on which the thunk runs is important.
+ *
+ * @param op Enumerated value indicating what type of change is ocurring.
+ * @param data Context value provided by TBB in the __cilkrts_watch_stack
+ * call. This data is opaque to Cilk.
+ *
+ * @return 0 indicates success.
+ */
+typedef __cilk_tbb_retcode (*__cilk_tbb_pfn_stack_op)(enum __cilk_tbb_stack_op op,
+ void* data);
+
+/**
+ * Function that will be called by TBB to inform the Cilk runtime that TBB
+ * is no longer interested in watching the stack bound to the current thread.
+ *
+ * @param data Context value provided to TBB by the __cilkrts_watch_stack
+ * call. This data is opaque to TBB.
+ *
+ * @return 0 indicates success.
+ */
+typedef __cilk_tbb_retcode (*__cilk_tbb_pfn_unwatch_stacks)(void *data);
+
+/**
+ * Thunk invoked by Cilk to call back to TBB to tell it about a change in
+ * the stack bound to the current thread.
+ */
+typedef struct __cilk_tbb_stack_op_thunk {
+ /// Function in TBB the Cilk runtime should call when something
+ // "interesting" happens involving a stack
+ __cilk_tbb_pfn_stack_op routine;
+
+ /// TBB context data to pass with the call to the stack_op routine
+ void* data;
+} __cilk_tbb_stack_op_thunk;
+
+/**
+ * Thunk invoked by TBB when it is no longer interested in watching the stack
+ * bound to the current thread.
+ */
+typedef struct __cilk_tbb_unwatch_thunk {
+ /// Function in Cilk runtime to call when TBB no longer wants to watch
+ // stacks
+ __cilk_tbb_pfn_unwatch_stacks routine;
+
+ /// Cilk runtime context data to pass with the call to the unwatch_stacks
+ /// routine
+ void* data;
+} __cilk_tbb_unwatch_thunk;
+
+/**
+ * Requests that Cilk invoke __cilk_tbb_orphan_thunk when it orphans a stack.
+ * Cilk sets *u to a thunk that TBB should call when it is no longer
+ * interested in watching the stack.
+ *
+ * If the thread is not yet bound to the Cilk runtime, the Cilk runtime should
+ * save this data in thread-local storage until __cilkrts_bind_thread is called.
+ *
+ * Called by TBB, defined by Cilk. This function is exported from the Cilk
+ * runtime DLL/shared object. This declaration also appears in
+ * cilk/cilk_undocumented.h -- don't change one declaration without also
+ * changing the other.
+ *
+ * @param u __cilk_tbb_unwatch_thunk. This structure will be filled in by
+ * the Cilk runtime to allow TBB to register that it is no longer interested
+ * in watching the stack bound to the current thread.
+ * @param o __cilk_tbb_stack_op_thunk. This structure specifies the routine
+ * that the Cilk runtime should call when an "interesting" change in the stack
+ * associate with the current worker occurs.
+ *
+ * @return 0 indicates success.
+ */
+CILK_EXPORT
+__cilk_tbb_retcode __cilkrts_watch_stack(__cilk_tbb_unwatch_thunk* u,
+ __cilk_tbb_stack_op_thunk o);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_CILK_TBB_INTEROP_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/cilk_api.c b/gcc-4.9/libcilkrts/runtime/cilk_api.c
new file mode 100644
index 000000000..bbca984bc
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk_api.c
@@ -0,0 +1,255 @@
+/* cilk_api.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/*
+ * Implementation of functions declared in cilk_api.h
+ */
+
+/*
+ * Define the COMPILING_CILK_ABI_FUNCTIONS macro, so that
+ * compilation of this file generates non-inlined definitions for the
+ * functions marked as CILK_EXPORT_AND_INLINE in cilk_api.h.
+ *
+ * We must deal with these functions differently because we need to
+ * continue to ship nonlined versions of these functions.
+ *
+ * CILK_EXPORT_AND_INLINE int __cilkrts_get_worker_rank(uint64_t *rank);
+ * CILK_EXPORT_AND_INLINE int __cilkrts_bump_worker_rank();
+ * CILK_EXPORT_AND_INLINE int __cilkrts_bump_loop_rank();
+ */
+#define COMPILING_CILK_API_FUNCTIONS
+
+#include <internal/abi.h>
+#include <cilk/cilk_api.h>
+
+#include "os.h"
+#include "os_mutex.h"
+#include "bug.h"
+#include "global_state.h"
+#include "local_state.h"
+#include "scheduler.h"
+#include "sysdep.h"
+
+CILK_API_VOID __cilkrts_init(void)
+{
+ // Initialize, but don't start, the cilk runtime.
+ __cilkrts_init_internal(0);
+}
+
+CILK_API_VOID __cilkrts_end_cilk(void)
+{
+ // Take out the global OS mutex while we do this to protect against
+ // another thread attempting to bind while we do this
+ global_os_mutex_lock();
+
+ if (cilkg_is_published()) {
+ global_state_t *g = cilkg_get_global_state();
+ if (g->Q || __cilkrts_get_tls_worker())
+ __cilkrts_bug("Attempt to shut down Cilk while Cilk is still "
+ "running");
+ __cilkrts_stop_workers(g);
+ __cilkrts_deinit_internal(g);
+ }
+
+ global_os_mutex_unlock();
+}
+
+CILK_API_INT
+__cilkrts_get_nworkers()
+{
+ return cilkg_get_nworkers();
+}
+
+CILK_API_INT
+__cilkrts_get_total_workers()
+{
+ return cilkg_get_total_workers();
+}
+
+CILK_API_INT __cilkrts_get_force_reduce(void)
+{
+ return cilkg_get_force_reduce();
+}
+
+CILK_API_INT __cilkrts_set_param(const char* param, const char* value)
+{
+ return cilkg_set_param(param, value);
+}
+
+#ifdef _WIN32
+CILK_API_INT __cilkrts_set_param_w(const wchar_t* param, const wchar_t* value)
+{
+ return cilkg_set_param_w(param, value);
+}
+#endif // _WIN32
+
+/* Return a small integer indicating which Cilk worker the function is
+ * currently running on. Each thread started by the Cilk runtime library
+ * (system worker) has a unique worker number in the range 1..P-1, where P is
+ * the valued returned by __cilkrts_get_nworkers(). All threads started by
+ * the user or by other libraries (user workers) share the worker number 0.
+ * Therefore, the worker number is not unique across multiple user threads.
+ *
+ * Implementor's note: The value returned from this function is different from
+ * the value, w->self, used in most debug messages.
+ */
+CILK_API_INT
+__cilkrts_get_worker_number(void)
+{
+ __cilkrts_worker *w = __cilkrts_get_tls_worker();
+
+ if (0 == w)
+ /* A non-worker always has a worker number of zero. */
+ return 0;
+ else if (WORKER_USER == w->l->type)
+ /* User worker was once a non-worker, so its number should still be
+ * zero. */
+ return 0;
+ else
+ /* w->self for a system worker is in range 0..(P-1); adjust to 1..P
+ * to avoid conflicting with the user thread's worker number. */
+ return w->self + 1;
+}
+
+/**
+ * Internal definition of the pedigree context. The size of the
+ * structure must match __cilkrts_pedigree_context_t defined in abi.i
+ */
+typedef struct pedigree_context_t
+{
+ /** Size of the structure, in bytes */
+ size_t size;
+
+ /** Next __cilkrts_pedigree to return */
+ const __cilkrts_pedigree *pedigree;
+
+ /** Unused. Left over from previous implementation */
+ void *unused1;
+
+ /** Unused. Left over from previous implementation */
+ void *unused2;
+
+ // // Debugging aid for pedigree-test:
+ // __cilkrts_stack_frame *expected_sf;
+} pedigree_context_t;
+
+/*
+ * __cilkrts_get_pedigree_info
+ *
+ * Fetch the birthrank for a stack frame. To initialize the walk, both sf_in
+ * and frame_in should be NULL. parent_sf_ptr and parent_frame_ptr provide
+ * context for the stackwalk and should be returned as sf_in and frame_in on
+ * the next call.
+ *
+ * Returns:
+ * 0 - Success - birthrank, parent_sf_out and parent_frame_out are valid
+ * >1 - Pedigree walk completed
+ * <1 - Failure - -1: No worker bound to thread, -2: Sanity check failed
+ */
+
+#define PEDIGREE_WALK_COMPLETE (__cilkrts_pedigree *)-1
+
+CILK_API_INT
+__cilkrts_get_pedigree_info(__cilkrts_pedigree_context_t *external_context,
+ uint64_t *sf_birthrank)
+{
+ pedigree_context_t *context = (pedigree_context_t *)external_context;
+
+ CILK_ASSERT(sizeof(__cilkrts_pedigree_context_t) ==
+ sizeof(pedigree_context_t));
+ if (context->size != sizeof(pedigree_context_t))
+ return -3; // Invalid size
+
+ // If the pointer to the last __cilkrts_pedigree is -1, we've
+ // finished the walk. We're still done.
+ if (PEDIGREE_WALK_COMPLETE == context->pedigree)
+ return 1;
+
+ // The passed in context value contains a pointer to the last
+ // __cilkrts_pedigree returned, or NULL if we're starting a
+ // new walk
+ if (NULL == context->pedigree)
+ {
+ __cilkrts_worker *w = __cilkrts_get_tls_worker();
+ __cilkrts_pedigree* pedigree_node;
+ if (NULL != w) {
+ pedigree_node = &w->pedigree;
+ }
+ else {
+ pedigree_node = __cilkrts_get_tls_pedigree_leaf(1);
+ }
+ context->pedigree = pedigree_node->parent;
+ }
+ else
+ context->pedigree = context->pedigree->parent;
+
+ // Note: If we want to omit the user root node,
+ // stop at context->pedigree->parent instead.
+ if (NULL == context->pedigree)
+ {
+ context->pedigree = PEDIGREE_WALK_COMPLETE;
+ return 1;
+ }
+
+ *sf_birthrank = context->pedigree->rank;
+ return 0;
+}
+
+CILK_API_PEDIGREE
+__cilkrts_get_pedigree_internal(__cilkrts_worker *w)
+{
+ if (NULL != w) {
+ return w->pedigree;
+ }
+ else {
+ const __cilkrts_pedigree *pedigree =
+ __cilkrts_get_tls_pedigree_leaf(1);
+ return *pedigree;
+ }
+}
+
+
+CILK_API_INT __cilkrts_bump_worker_rank_internal(__cilkrts_worker *w)
+{
+ __cilkrts_pedigree *pedigree;
+ pedigree = (w ? &w->pedigree : __cilkrts_get_tls_pedigree_leaf(1));
+ pedigree->rank++;
+ return 0;
+}
+
+/* End cilk_api.c */
diff --git a/gcc-4.9/libcilkrts/runtime/cilk_fiber-unix.cpp b/gcc-4.9/libcilkrts/runtime/cilk_fiber-unix.cpp
new file mode 100644
index 000000000..b0ed53ad0
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk_fiber-unix.cpp
@@ -0,0 +1,301 @@
+/* cilk_fiber-unix.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2012-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#include "cilk_fiber-unix.h"
+#include "cilk_malloc.h"
+#include "bug.h"
+#include "os.h"
+
+#include <cstdio>
+#include <cstdlib>
+
+#include <errno.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+// You'd think that getting a defintion for alloca would be easy. But you'd
+// be wrong. Here's a variant on what's recommended in the autoconf doc. I've
+// remove the Windows portion since this is Unix-specific code.
+#if defined HAVE_ALLOCA_H
+# include <alloca.h>
+#elif defined __GNUC__
+# define alloca __builtin_alloca
+#elif defined _AIX
+# define alloca __alloca
+#else
+# include <stddef.h>
+# ifdef __cplusplus
+extern "C"
+# endif
+void *alloca (size_t);
+#endif
+
+// MAP_ANON is deprecated on Linux, but seems to be required on Mac...
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+// Magic number for sanity checking fiber structure
+const unsigned magic_number = 0x5afef00d;
+
+int cilk_fiber_sysdep::s_page_size = getpagesize();
+
+cilk_fiber_sysdep::cilk_fiber_sysdep(std::size_t stack_size)
+ : cilk_fiber(stack_size)
+ , m_magic(magic_number)
+{
+ // Set m_stack and m_stack_base.
+ make_stack(stack_size);
+
+ // Get high-address of stack, with 32-bytes of spare space, and rounded
+ // down to the nearest 32-byte boundary.
+ const uintptr_t align_mask = 32 - 1;
+ m_stack_base -= ((std::size_t) m_stack_base) & align_mask;
+}
+
+cilk_fiber_sysdep::cilk_fiber_sysdep(from_thread_t)
+ : cilk_fiber()
+ , m_magic(magic_number)
+{
+ this->set_allocated_from_thread(true);
+
+ // Dummy stack data for thread-main fiber
+ m_stack = NULL;
+ m_stack_base = NULL;
+}
+
+void cilk_fiber_sysdep::convert_fiber_back_to_thread()
+{
+ // Does nothing on Linux.
+}
+
+cilk_fiber_sysdep::~cilk_fiber_sysdep()
+{
+ CILK_ASSERT(magic_number == m_magic);
+ if (!this->is_allocated_from_thread())
+ free_stack();
+}
+
+#if SUPPORT_GET_CURRENT_FIBER
+cilk_fiber_sysdep* cilk_fiber_sysdep::get_current_fiber_sysdep()
+{
+ return cilkos_get_tls_cilk_fiber();
+}
+#endif
+
+// Jump to resume other fiber. We may or may not come back.
+inline void cilk_fiber_sysdep::resume_other_sysdep(cilk_fiber_sysdep* other)
+{
+ if (other->is_resumable()) {
+ other->set_resumable(false);
+ // Resume by longjmp'ing to the place where we suspended.
+ CILK_LONGJMP(other->m_resume_jmpbuf);
+ }
+ else {
+ // Otherwise, we've never ran this fiber before. Start the
+ // proc method.
+ other->run();
+ }
+}
+
+void cilk_fiber_sysdep::suspend_self_and_resume_other_sysdep(cilk_fiber_sysdep* other)
+{
+#if SUPPORT_GET_CURRENT_FIBER
+ cilkos_set_tls_cilk_fiber(other);
+#endif
+ CILK_ASSERT(this->is_resumable());
+
+
+ // Jump to the other fiber. We expect to come back.
+ if (! CILK_SETJMP(m_resume_jmpbuf)) {
+ resume_other_sysdep(other);
+ }
+
+ // Return here when another fiber resumes me.
+ // If the fiber that switched to me wants to be deallocated, do it now.
+ do_post_switch_actions();
+}
+
+NORETURN cilk_fiber_sysdep::jump_to_resume_other_sysdep(cilk_fiber_sysdep* other)
+{
+#if SUPPORT_GET_CURRENT_FIBER
+ cilkos_set_tls_cilk_fiber(other);
+#endif
+ CILK_ASSERT(!this->is_resumable());
+
+ // Jump to the other fiber. But we are never coming back because
+ // this fiber is being reset.
+ resume_other_sysdep(other);
+
+ // We should never come back here...
+ __cilkrts_bug("Should not get here");
+}
+
+// GCC doesn't allow us to call __builtin_longjmp in the same function that
+// calls __builtin_setjmp, so create a new function to house the call to
+// __builtin_longjmp
+static void __attribute__((noinline))
+do_cilk_longjmp(__CILK_JUMP_BUFFER jmpbuf)
+{
+ CILK_LONGJMP(jmpbuf);
+}
+
+NORETURN cilk_fiber_sysdep::run()
+{
+ // Only fibers created from a pool have a proc method to run and execute.
+ CILK_ASSERT(m_start_proc);
+ CILK_ASSERT(!this->is_allocated_from_thread());
+ CILK_ASSERT(!this->is_resumable());
+
+ // TBD: This setjmp/longjmp pair simply changes the stack pointer.
+ // We could probably replace this code with some assembly.
+ if (! CILK_SETJMP(m_resume_jmpbuf))
+ {
+ // Calculate the size of the current stack frame (i.e., this
+ // run() function.
+ size_t frame_size = (size_t)JMPBUF_FP(m_resume_jmpbuf) - (size_t)JMPBUF_SP(m_resume_jmpbuf);
+
+ // Macs require 16-byte alignment. Do it always because it just
+ // doesn't matter
+ if (frame_size & (16-1))
+ frame_size += 16 - (frame_size & (16-1));
+
+ // Assert that we are getting a reasonable frame size out of
+ // it. If this run() function is using more than 4096 bytes
+ // of space for its local variables / any state that spills to
+ // registers, something is probably *very* wrong here...
+ //
+ // 4096 bytes just happens to be a number that seems "large
+ // enough" --- for an example GCC 32-bit compilation, the
+ // frame size was 48 bytes.
+ CILK_ASSERT(frame_size < 4096);
+
+ // Change stack pointer to fiber stack. Offset the
+ // calculation by the frame size, so that we've allocated
+ // enough extra space from the top of the stack we are
+ // switching to for any temporaries required for this run()
+ // function.
+ JMPBUF_SP(m_resume_jmpbuf) = m_stack_base - frame_size;
+
+ // GCC doesn't allow us to call __builtin_longjmp in the same function
+ // that calls __builtin_setjmp, so it's been moved into it's own
+ // function that cannot be inlined.
+ do_cilk_longjmp(m_resume_jmpbuf);
+ }
+
+ // Note: our resetting of the stack pointer is valid only if the
+ // compiler has not saved any temporaries onto the stack for this
+ // function before the longjmp that we still care about at this
+ // point.
+
+ // Verify that 1) 'this' is still valid and 2) '*this' has not been
+ // corrupted.
+ CILK_ASSERT(magic_number == m_magic);
+
+ // If the fiber that switched to me wants to be deallocated, do it now.
+ do_post_switch_actions();
+
+ // Now call the user proc on the new stack
+ m_start_proc(this);
+
+ // alloca() to force generation of frame pointer. The argument to alloca
+ // is contrived to prevent the compiler from optimizing it away. This
+ // code should never actually be executed.
+ int* dummy = (int*) alloca((sizeof(int) + (std::size_t) m_start_proc) & 0x1);
+ *dummy = 0xface;
+
+ // User proc should never return.
+ __cilkrts_bug("Should not get here");
+}
+
+void cilk_fiber_sysdep::make_stack(size_t stack_size)
+{
+ char* p;
+ // We've already validated that the stack size is page-aligned and
+ // is a reasonable value. No need to do any extra rounding here.
+ size_t rounded_stack_size = stack_size;
+
+ // Normally, we have already validated that the stack size is
+ // aligned to 4K. In the rare case that pages are huge though, we
+ // need to do some extra checks.
+ if (rounded_stack_size < 3 * (size_t)s_page_size) {
+ // If the specified stack size is too small, round up to 3
+ // pages. We need at least 2 extra for the guard pages.
+ rounded_stack_size = 3 * (size_t)s_page_size;
+ }
+ else {
+ // Otherwise, the stack size is large enough, but might not be
+ // a multiple of page size. Round up to nearest multiple of
+ // s_page_size, just to be safe.
+ size_t remainder = rounded_stack_size % s_page_size;
+ if (remainder) {
+ rounded_stack_size += s_page_size - remainder;
+ }
+ }
+
+ p = (char*)mmap(0, rounded_stack_size,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS,
+ -1, 0);
+ if (MAP_FAILED == p) {
+ // For whatever reason (probably ran out of memory), mmap() failed.
+ // There is no stack to return, so the program loses parallelism.
+ m_stack = NULL;
+ m_stack_base = NULL;
+ return;
+ }
+
+ // mprotect guard pages.
+ mprotect(p + rounded_stack_size - s_page_size, s_page_size, PROT_NONE);
+ mprotect(p, s_page_size, PROT_NONE);
+
+ m_stack = p;
+ m_stack_base = p + rounded_stack_size - s_page_size;
+}
+
+
+void cilk_fiber_sysdep::free_stack()
+{
+ if (m_stack) {
+ size_t rounded_stack_size = m_stack_base - m_stack + s_page_size;
+ if (munmap(m_stack, rounded_stack_size) < 0)
+ __cilkrts_bug("Cilk: stack munmap failed error %d\n", errno);
+ }
+}
+
+/* End cilk_fiber-unix.cpp */
diff --git a/gcc-4.9/libcilkrts/runtime/cilk_fiber-unix.h b/gcc-4.9/libcilkrts/runtime/cilk_fiber-unix.h
new file mode 100644
index 000000000..9f47d5b04
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk_fiber-unix.h
@@ -0,0 +1,149 @@
+/* cilk_fiber-unix.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2012-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#ifndef INCLUDED_CILK_FIBER_UNIX_DOT_H
+#define INCLUDED_CILK_FIBER_UNIX_DOT_H
+
+#ifndef __cplusplus
+# error cilk_fiber-unix.h is a C++-only header
+#endif
+
+#include "cilk_fiber.h"
+#include "jmpbuf.h"
+
+/**
+ * @file cilk_fiber-unix.h
+ *
+ * @brief Unix-specific implementation for cilk_fiber.
+ */
+
+/**
+ * @brief Unix-specific fiber class derived from portable fiber class
+ */
+struct cilk_fiber_sysdep : public cilk_fiber
+{
+ public:
+
+#if SUPPORT_GET_CURRENT_FIBER
+ /**
+ * @brief Gets the current fiber from TLS.
+ */
+ static cilk_fiber_sysdep* get_current_fiber_sysdep();
+#endif
+
+ /**
+ * @brief Construct the system-dependent portion of a fiber.
+ *
+ * @param stack_size The size of the stack for this fiber.
+ */
+ cilk_fiber_sysdep(std::size_t stack_size);
+
+ /**
+ * @brief Construct the system-dependent of a fiber created from a
+ * thread.
+ */
+ cilk_fiber_sysdep(from_thread_t);
+
+ /**
+ * @brief Destructor
+ */
+ ~cilk_fiber_sysdep();
+
+ /**
+ * @brief OS-specific calls to convert this fiber back to thread.
+ *
+ * Nothing to do for Linux.
+ */
+ void convert_fiber_back_to_thread();
+
+ /**
+ * @brief System-dependent function to suspend self and resume execution of "other".
+ *
+ * This fiber is suspended.
+ *
+ * @pre @c is_resumable() should be true.
+ *
+ * @param other Fiber to resume.
+ */
+ void suspend_self_and_resume_other_sysdep(cilk_fiber_sysdep* other);
+
+ /**
+ * @brief System-dependent function called to jump to @p other
+ * fiber.
+ *
+ * @pre @c is_resumable() should be false.
+ *
+ * @param other Fiber to resume.
+ */
+ NORETURN jump_to_resume_other_sysdep(cilk_fiber_sysdep* other);
+
+ /**
+ * @brief Runs the start_proc.
+ * @pre is_resumable() should be false.
+ * @pre is_allocated_from_thread() should be false.
+ * @pre m_start_proc must be valid.
+ */
+ NORETURN run();
+
+ /**
+ * @brief Returns the base of this fiber's stack.
+ */
+ inline char* get_stack_base_sysdep() { return m_stack_base; }
+
+ private:
+ char* m_stack_base; ///< The base of this fiber's stack.
+ char* m_stack; // Stack memory (low address)
+ __CILK_JUMP_BUFFER m_resume_jmpbuf; // Place to resume fiber
+ unsigned m_magic; // Magic number for checking
+
+ static int s_page_size; // Page size for
+ // stacks.
+
+ // Allocate memory for a stack. This method
+ // initializes m_stack and m_stack_base.
+ void make_stack(size_t stack_size);
+
+ // Deallocates memory for the stack.
+ void free_stack();
+
+ // Common helper method for implementation of resume_other_sysdep
+ // variants.
+ inline void resume_other_sysdep(cilk_fiber_sysdep* other);
+};
+
+#endif // ! defined(INCLUDED_CILK_FIBER_UNIX_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/cilk_fiber.cpp b/gcc-4.9/libcilkrts/runtime/cilk_fiber.cpp
new file mode 100644
index 000000000..0c66f234d
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk_fiber.cpp
@@ -0,0 +1,1078 @@
+/* cilk_fiber.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2012-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/* Implementations of non-platform-specific aspects of cilk_fiber, especially
+ * the cilk_fiber_pool interface.
+ */
+#include "cilk_fiber.h"
+#ifdef _WIN32
+# include "cilk_fiber-win.h"
+#else
+# include "cilk_fiber-unix.h"
+#endif
+#include "cilk_malloc.h"
+#include "bug.h"
+#include <new>
+
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include "sysdep.h"
+
+
+extern "C" {
+
+inline int cilk_fiber_pool_sanity_check(cilk_fiber_pool *pool, const char* desc)
+{
+ int errors = 0;
+#if FIBER_DEBUG >= 1
+ if ((NULL != pool) && pool->total > 0) {
+
+ // Root pool should not allocate more fibers than alloc_max
+ errors += ((pool->parent == NULL) &&
+ (pool->total > pool->alloc_max));
+ errors += (pool->total > pool->high_water);
+
+ if (errors) {
+ fprintf(stderr, "ERROR at %s: pool=%p has max_size=%u, total=%d, high_water=%d\n",
+ desc,
+ pool, pool->max_size, pool->total, pool->high_water);
+ }
+ }
+#endif
+ return (errors == 0);
+}
+
+inline void increment_pool_total(cilk_fiber_pool* pool)
+{
+ ++pool->total;
+ if (pool->high_water < pool->total)
+ pool->high_water = pool->total;
+}
+
+inline void decrement_pool_total(cilk_fiber_pool* pool, int fibers_freed)
+{
+ pool->total -= fibers_freed;
+}
+
+
+/**
+ * @brief Free fibers from this pool until we have at most @c
+ * num_to_keep fibers remaining, and then put a fiber back.
+ *
+ * @pre We do not hold @c pool->lock
+ * @post After completion, we do not hold @c pool->lock
+ */
+static void cilk_fiber_pool_free_fibers_from_pool(cilk_fiber_pool* pool,
+ unsigned num_to_keep,
+ cilk_fiber* fiber_to_return)
+{
+ // Free our own fibers, until we fall below our desired threshold.
+ // Each iteration of this loop proceeds in the following stages:
+ // 1. Acquire the pool lock,
+ // 2. Grabs up to B fibers from the pool, stores them into a buffer.
+ // 3. Check if pool is empty enough. If yes, put the last fiber back,
+ // and remember that we should quit.
+ // 4. Release the pool lock, and actually free any buffered fibers.
+ // 5. Check if we are done and should exit the loop. Otherwise, try again.
+ //
+ const bool need_lock = pool->lock;
+ bool last_fiber_returned = false;
+
+ do {
+ const int B = 10; // Pull at most this many fibers from the
+ // parent for one lock acquisition. Make
+ // this value large enough to amortize
+ // against the cost of acquiring and
+ // releasing the lock.
+ int num_to_free = 0;
+ cilk_fiber* fibers_to_free[B];
+
+ // Stage 1: Grab the lock.
+ if (need_lock) {
+ spin_mutex_lock(pool->lock);
+ }
+
+ // Stage 2: Grab up to B fibers to free.
+ int fibers_freed = 0;
+ while ((pool->size > num_to_keep) && (num_to_free < B)) {
+ fibers_to_free[num_to_free++] = pool->fibers[--pool->size];
+ fibers_freed++;
+ }
+ decrement_pool_total(pool, fibers_freed);
+
+ // Stage 3. Pool is below threshold. Put extra fiber back.
+ if (pool->size <= num_to_keep) {
+ // Put the last fiber back into the pool.
+ if (fiber_to_return) {
+ CILK_ASSERT(pool->size < pool->max_size);
+ pool->fibers[pool->size] = fiber_to_return;
+ pool->size++;
+ }
+ last_fiber_returned = true;
+ }
+
+ // Stage 4: Release the lock, and actually free any fibers
+ // buffered.
+ if (need_lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+
+ for (int i = 0; i < num_to_free; ++i) {
+ fibers_to_free[i]->deallocate_to_heap();
+ }
+
+ } while (!last_fiber_returned);
+}
+
+
+/******************************************************************
+ * TBD: We want to simplify / rework the logic for allocating and
+ * deallocating fibers, so that they are hopefully simpler and work
+ * more elegantly for more than two levels.
+ ******************************************************************/
+
+/**
+ * @brief Transfer fibers from @c pool to @c pool->parent.
+ *
+ * @pre Must hold @c pool->lock if it exists.
+ * @post After completion, some number of fibers
+ * have been moved from this pool to the parent.
+ * The lock @c pool->lock is still held.
+ *
+ * TBD: Do we wish to guarantee that the lock has never been
+ * released? It may depend on the implementation...
+ */
+static void cilk_fiber_pool_move_fibers_to_parent_pool(cilk_fiber_pool* pool,
+ unsigned num_to_keep)
+{
+ // ASSERT: We should hold the lock on pool (if it has one).
+ CILK_ASSERT(pool->parent);
+ cilk_fiber_pool* parent_pool = pool->parent;
+
+ // Move fibers from our pool to the parent until we either run out
+ // of space in the parent, or hit our threshold.
+ //
+ // This operation must be done while holding the parent lock.
+
+ // If the parent pool appears to be full, just return early.
+ if (parent_pool->size >= parent_pool->max_size)
+ return;
+
+ spin_mutex_lock(pool->parent->lock);
+ while ((parent_pool->size < parent_pool->max_size) &&
+ (pool->size > num_to_keep)) {
+ parent_pool->fibers[parent_pool->size++] =
+ pool->fibers[--pool->size];
+ }
+
+ // If the child pool has deallocated more than fibers to the heap
+ // than it has allocated, then transfer this "surplus" to the
+ // parent, so that the parent is free to allocate more from the
+ // heap.
+ //
+ // This transfer means that the total in the parent can
+ // temporarily go negative.
+ if (pool->total < 0) {
+ // Reduce parent total by the surplus we have in the local
+ // pool.
+ parent_pool->total += pool->total;
+ pool->total = 0;
+ }
+
+ spin_mutex_unlock(pool->parent->lock);
+}
+
+void cilk_fiber_pool_init(cilk_fiber_pool* pool,
+ cilk_fiber_pool* parent,
+ size_t stack_size,
+ unsigned buffer_size,
+ int alloc_max,
+ int is_shared)
+{
+#if FIBER_DEBUG >= 1
+ fprintf(stderr, "fiber_pool_init, pool=%p, parent=%p, alloc_max=%u\n",
+ pool, parent, alloc_max);
+#endif
+
+ pool->lock = (is_shared ? spin_mutex_create() : NULL);
+ pool->parent = parent;
+ pool->stack_size = stack_size;
+ pool->max_size = buffer_size;
+ pool->size = 0;
+ pool->total = 0;
+ pool->high_water = 0;
+ pool->alloc_max = alloc_max;
+ pool->fibers =
+ (cilk_fiber**) __cilkrts_malloc(buffer_size * sizeof(cilk_fiber*));
+ CILK_ASSERT(NULL != pool->fibers);
+
+#ifdef __MIC__
+#define PREALLOCATE_FIBERS
+#endif
+
+#ifdef PREALLOCATE_FIBERS
+ // Pre-allocate 1/4 of fibers in the pools ahead of time. This
+ // value is somewhat arbitrary. It was chosen to be less than the
+ // threshold (of about 3/4) of fibers to keep in the pool when
+ // transferring fibers to the parent.
+
+ int pre_allocate_count = buffer_size/4;
+ for (pool->size = 0; pool->size < pre_allocate_count; pool->size++) {
+ pool->fibers[pool->size] = cilk_fiber::allocate_from_heap(pool->stack_size);
+ }
+#endif
+}
+
+
+void cilk_fiber_pool_set_fiber_limit(cilk_fiber_pool* root_pool,
+ unsigned max_fibers_to_allocate)
+{
+ // Should only set limit on root pool, not children.
+ CILK_ASSERT(NULL == root_pool->parent);
+ root_pool->alloc_max = max_fibers_to_allocate;
+}
+
+void cilk_fiber_pool_destroy(cilk_fiber_pool* pool)
+{
+ CILK_ASSERT(cilk_fiber_pool_sanity_check(pool, "pool_destroy"));
+
+ // Lock my own pool, if I need to.
+ if (pool->lock) {
+ spin_mutex_lock(pool->lock);
+ }
+
+ // Give any remaining fibers to parent pool.
+ if (pool->parent) {
+ cilk_fiber_pool_move_fibers_to_parent_pool(pool, 0);
+ }
+
+ // Unlock pool.
+ if (pool->lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+
+ // If I have any left in my pool, just free them myself.
+ // This method may acquire the pool lock.
+ cilk_fiber_pool_free_fibers_from_pool(pool, 0, NULL);
+
+ // Destroy the lock if there is one.
+ if (pool->lock) {
+ spin_mutex_destroy(pool->lock);
+ }
+ __cilkrts_free(pool->fibers);
+}
+
+
+cilk_fiber* cilk_fiber_allocate(cilk_fiber_pool* pool)
+{
+ CILK_ASSERT(cilk_fiber_pool_sanity_check(pool, "allocate"));
+ return cilk_fiber::allocate(pool);
+}
+
+cilk_fiber* cilk_fiber_allocate_from_heap(size_t stack_size)
+{
+ return cilk_fiber::allocate_from_heap(stack_size);
+}
+
+void cilk_fiber_reset_state(cilk_fiber* fiber, cilk_fiber_proc start_proc)
+{
+ fiber->reset_state(start_proc);
+}
+
+int cilk_fiber_remove_reference(cilk_fiber *fiber, cilk_fiber_pool *pool)
+{
+ return fiber->remove_reference(pool);
+}
+
+cilk_fiber* cilk_fiber_allocate_from_thread()
+{
+ return cilk_fiber::allocate_from_thread();
+}
+
+int cilk_fiber_deallocate_from_thread(cilk_fiber *fiber)
+{
+ return fiber->deallocate_from_thread();
+}
+
+int cilk_fiber_remove_reference_from_thread(cilk_fiber *fiber)
+{
+ return fiber->remove_reference_from_thread();
+}
+
+int cilk_fiber_is_allocated_from_thread(cilk_fiber *fiber)
+{
+ return fiber->is_allocated_from_thread();
+}
+
+#if SUPPORT_GET_CURRENT_FIBER
+cilk_fiber* cilk_fiber_get_current_fiber(void)
+{
+ return cilk_fiber::get_current_fiber();
+}
+#endif
+
+void cilk_fiber_suspend_self_and_resume_other(cilk_fiber* self,
+ cilk_fiber* other)
+{
+ self->suspend_self_and_resume_other(other);
+}
+
+
+void cilk_fiber::reset_state(cilk_fiber_proc start_proc)
+{
+ // Setup the fiber and return.
+ this->m_start_proc = start_proc;
+
+ CILK_ASSERT(!this->is_resumable());
+ CILK_ASSERT(NULL == this->m_pending_remove_ref);
+ CILK_ASSERT(NULL == this->m_pending_pool);
+}
+
+NORETURN
+cilk_fiber_remove_reference_from_self_and_resume_other(cilk_fiber* self,
+ cilk_fiber_pool* self_pool,
+ cilk_fiber* other)
+{
+#if FIBER_DEBUG >= 3
+ __cilkrts_worker* w = __cilkrts_get_tls_worker();
+ fprintf(stderr, "W=%d: cilk_fiber_deactivate_self_and_resume_other: self=%p, other=%p\n",
+ w->self,
+ self, other);
+#endif
+ CILK_ASSERT(cilk_fiber_pool_sanity_check(self_pool, "remove_reference_from_self_resume_other"));
+ self->remove_reference_from_self_and_resume_other(self_pool, other);
+
+ // We should never return here.
+}
+
+void cilk_fiber_set_post_switch_proc(cilk_fiber *self,
+ cilk_fiber_proc post_switch_proc)
+{
+ self->set_post_switch_proc(post_switch_proc);
+}
+
+void cilk_fiber_invoke_tbb_stack_op(cilk_fiber* fiber,
+ __cilk_tbb_stack_op op)
+{
+ fiber->invoke_tbb_stack_op(op);
+}
+
+cilk_fiber_data* cilk_fiber_get_data(cilk_fiber* fiber)
+{
+ return fiber->get_data();
+
+ /// TBD: Change this code to "return (cilk_fiber_data*)fiber;"
+ // plus a static assert, so that this function is
+ // more easily inlined by the compiler.
+}
+
+int cilk_fiber_is_resumable(cilk_fiber *fiber)
+{
+ return fiber->is_resumable();
+}
+
+char* cilk_fiber_get_stack_base(cilk_fiber *fiber)
+{
+ return fiber->get_stack_base();
+}
+
+
+#if defined(_WIN32) && 0 // Only works on Windows. Disable debugging for now.
+#define DBG_STACK_OPS(_fmt, ...) __cilkrts_dbgprintf(_fmt, __VA_ARGS__)
+#else
+#define DBG_STACK_OPS(_fmt, ...)
+#endif
+
+void cilk_fiber_set_stack_op(cilk_fiber *fiber,
+ __cilk_tbb_stack_op_thunk o)
+{
+ cilk_fiber_data *fdata = cilk_fiber_get_data(fiber);
+ DBG_STACK_OPS ("cilk_fiber_set_stack_op - cilk_fiber %p, routine: %p, data: %p\n",
+ fiber,
+ o.routine,
+ o.data);
+ fdata->stack_op_routine = o.routine;
+ fdata->stack_op_data = o.data;
+}
+
+#if 0 // Debugging function
+static
+const char *NameStackOp (enum __cilk_tbb_stack_op op)
+{
+ switch(op)
+ {
+ case CILK_TBB_STACK_ORPHAN: return "CILK_TBB_STACK_ORPHAN";
+ case CILK_TBB_STACK_ADOPT: return "CILK_TBB_STACK_ADOPT";
+ case CILK_TBB_STACK_RELEASE: return "CILK_TBB_STACK_RELEASE";
+ default: return "Unknown";
+ }
+}
+#endif
+
+/*
+ * Save TBB interop information for an unbound thread. It will get picked
+ * up when the thread is bound to the runtime.
+ */
+void cilk_fiber_tbb_interop_save_stack_op_info(__cilk_tbb_stack_op_thunk o)
+{
+ __cilk_tbb_stack_op_thunk *saved_thunk =
+ __cilkrts_get_tls_tbb_interop();
+
+ DBG_STACK_OPS("Calling save_stack_op; o.routine=%p, o.data=%p, saved_thunk=%p\n",
+ o.routine, o.data, saved_thunk);
+
+ // If there is not already space allocated, allocate some.
+ if (NULL == saved_thunk) {
+ saved_thunk = (__cilk_tbb_stack_op_thunk*)
+ __cilkrts_malloc(sizeof(__cilk_tbb_stack_op_thunk));
+ __cilkrts_set_tls_tbb_interop(saved_thunk);
+ }
+
+ *saved_thunk = o;
+
+ DBG_STACK_OPS ("Unbound Thread %04x: tbb_interop_save_stack_op_info - saved info\n",
+ cilkos_get_current_thread_id());
+}
+
+/*
+ * Save TBB interop information from the cilk_fiber. It will get picked
+ * up when the thread is bound to the runtime next time.
+ */
+void cilk_fiber_tbb_interop_save_info_from_stack(cilk_fiber *fiber)
+{
+ __cilk_tbb_stack_op_thunk *saved_thunk;
+ cilk_fiber_data* fdata;
+
+ if (NULL == fiber)
+ return;
+
+ fdata = cilk_fiber_get_data(fiber);
+ // If there is no TBB interop data, just return
+ if (NULL == fdata->stack_op_routine)
+ return;
+
+ saved_thunk = __cilkrts_get_tls_tbb_interop();
+
+ // If there is not already space allocated, allocate some.
+ if (NULL == saved_thunk) {
+ saved_thunk = (__cilk_tbb_stack_op_thunk*)
+ __cilkrts_malloc(sizeof(__cilk_tbb_stack_op_thunk));
+ __cilkrts_set_tls_tbb_interop(saved_thunk);
+ }
+
+ saved_thunk->routine = fdata->stack_op_routine;
+ saved_thunk->data = fdata->stack_op_data;
+}
+
+/*
+ * If there's TBB interop information that was saved before the thread was
+ * bound, apply it now
+ */
+void cilk_fiber_tbb_interop_use_saved_stack_op_info(cilk_fiber* fiber)
+{
+ __cilk_tbb_stack_op_thunk *saved_thunk =
+ __cilkrts_get_tls_tbb_interop();
+
+ CILK_ASSERT(fiber);
+ // If we haven't allocated a TBB interop index, we don't have any saved info
+ if (NULL == saved_thunk) {
+ DBG_STACK_OPS ("cilk_fiber %p: tbb_interop_use_saved_stack_op_info - no saved info\n",
+ fiber);
+ return;
+ }
+
+ DBG_STACK_OPS ("cilk_fiber %p: tbb_interop_use_saved_stack_op_info - using saved info\n",
+ fiber);
+
+ // Associate the saved info with the __cilkrts_stack
+ cilk_fiber_set_stack_op(fiber, *saved_thunk);
+
+ // Free the saved data. We'll save it again if needed when the code
+ // returns from the initial function
+ cilk_fiber_tbb_interop_free_stack_op_info();
+}
+
+/*
+ * Free saved TBB interop memory. Should only be called when the thread is
+ * not bound.
+ */
+void cilk_fiber_tbb_interop_free_stack_op_info(void)
+{
+ __cilk_tbb_stack_op_thunk *saved_thunk =
+ __cilkrts_get_tls_tbb_interop();
+
+ // If we haven't allocated a TBB interop index, we don't have any saved info
+ if (NULL == saved_thunk)
+ return;
+
+ DBG_STACK_OPS ("tbb_interop_free_stack_op_info - freeing saved info\n");
+
+ // Free the memory and wipe out the TLS value
+ __cilkrts_free(saved_thunk);
+ __cilkrts_set_tls_tbb_interop(NULL);
+}
+
+
+
+#if NEED_FIBER_REF_COUNTS
+int cilk_fiber_has_references(cilk_fiber *fiber)
+{
+ return (fiber->get_ref_count() > 0);
+}
+
+int cilk_fiber_get_ref_count(cilk_fiber *fiber)
+{
+ return fiber->get_ref_count();
+}
+
+void cilk_fiber_add_reference(cilk_fiber *fiber)
+{
+ fiber->inc_ref_count();
+}
+#endif // NEED_FIBER_REF_COUNTS
+
+
+} // End extern "C"
+
+
+cilk_fiber_sysdep* cilk_fiber::sysdep()
+{
+ return static_cast<cilk_fiber_sysdep*>(this);
+}
+
+
+cilk_fiber::cilk_fiber()
+ : m_start_proc(NULL)
+ , m_post_switch_proc(NULL)
+ , m_pending_remove_ref(NULL)
+ , m_pending_pool(NULL)
+ , m_flags(0)
+{
+ // Clear cilk_fiber_data base-class data members
+ std::memset((cilk_fiber_data*) this, 0, sizeof(cilk_fiber_data));
+
+ // cilk_fiber data members
+ init_ref_count(0);
+}
+
+cilk_fiber::cilk_fiber(std::size_t stack_size)
+{
+ *this = cilk_fiber(); // A delegating constructor would be nice here
+ this->stack_size = stack_size;
+}
+
+cilk_fiber::~cilk_fiber()
+{
+ // Empty destructor.
+}
+
+
+char* cilk_fiber::get_stack_base()
+{
+ return this->sysdep()->get_stack_base_sysdep();
+}
+
+cilk_fiber* cilk_fiber::allocate_from_heap(std::size_t stack_size)
+{
+ // Case 1: pool is NULL. create a new fiber from the heap
+ // No need for locks here.
+ cilk_fiber_sysdep* ret =
+ (cilk_fiber_sysdep*) __cilkrts_malloc(sizeof(cilk_fiber_sysdep));
+
+ // Error condition. If we failed to allocate a fiber from the
+ // heap, we are in trouble though...
+ if (!ret)
+ return NULL;
+
+ ::new(ret) cilk_fiber_sysdep(stack_size);
+
+ CILK_ASSERT(0 == ret->m_flags);
+ CILK_ASSERT(NULL == ret->m_pending_remove_ref);
+ CILK_ASSERT(NULL == ret->m_pending_pool);
+ ret->init_ref_count(1);
+ return ret;
+}
+
+
+#if USE_FIBER_TRY_ALLOCATE_FROM_POOL
+/**
+ * Helper method: try to allocate a fiber from this pool or its
+ * ancestors without going to the OS / heap.
+ *
+ * Returns allocated pool, or NULL if no pool is found.
+ *
+ * If pool contains a suitable fiber. Return it. Otherwise, try to
+ * recursively grab a fiber from the parent pool, if there is one.
+ *
+ * This method will not allocate a fiber from the heap.
+ *
+ * This method could be written either recursively or iteratively.
+ * It probably does not matter which one we do.
+ *
+ * @note This method is compiled, but may not be used unless the
+ * USE_FIBER_TRY_ALLOCATE_FROM_POOL switch is set.
+ */
+cilk_fiber* cilk_fiber::try_allocate_from_pool_recursive(cilk_fiber_pool* pool)
+{
+ cilk_fiber* ret = NULL;
+
+ if (pool->size > 0) {
+ // Try to get the lock.
+ if (pool->lock) {
+ // For some reason, it seems to be better to just block on the parent
+ // pool lock, instead of using a try-lock?
+#define USE_TRY_LOCK_IN_FAST_ALLOCATE 0
+#if USE_TRY_LOCK_IN_FAST_ALLOCATE
+ int got_lock = spin_mutex_trylock(pool->lock);
+ if (!got_lock) {
+ // If we fail, skip to the parent.
+ if (pool->parent) {
+ return try_allocate_from_pool_recursive(pool->parent);
+ }
+ }
+#else
+ spin_mutex_lock(pool->lock);
+#endif
+ }
+
+ // Check in the pool if we have the lock.
+ if (pool->size > 0) {
+ ret = pool->fibers[--pool->size];
+ }
+
+ // Release the lock once we are done updating pool fields.
+ if (pool->lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+ }
+
+ if ((!ret) && (pool->parent)) {
+ return try_allocate_from_pool_recursive(pool->parent);
+ }
+
+ if (ret) {
+ // When we pull a fiber out of the pool, set its reference
+ // count before we return it.
+ ret->init_ref_count(1);
+ }
+ return ret;
+}
+#endif // USE_FIBER_TRY_ALLOCATE_FROM_POOL
+
+
+cilk_fiber* cilk_fiber::allocate(cilk_fiber_pool* pool)
+{
+ // Pool should not be NULL in this method. But I'm not going to
+ // actually assert it, because we are likely to seg fault anyway
+ // if it is.
+ // CILK_ASSERT(NULL != pool);
+
+ cilk_fiber *ret = NULL;
+
+#if USE_FIBER_TRY_ALLOCATE_FROM_POOL
+ // "Fast" path, which doesn't go to the heap or OS until checking
+ // the ancestors first.
+ ret = try_allocate_from_pool_recursive(pool);
+ if (ret)
+ return ret;
+#endif
+
+ // If we don't get anything from the "fast path", then go through
+ // a slower path to look for a fiber.
+ //
+ // 1. Lock the pool if it is shared.
+ // 2. Look in our local pool. If we find one, release the lock
+ // and quit searching.
+ // 3. Otherwise, check whether we can allocate from heap.
+ // 4. Release the lock if it was acquired.
+ // 5. Try to allocate from the heap, if step 3 said we could.
+ // If we find a fiber, then quit searching.
+ // 6. If none of these steps work, just recursively try again
+ // from the parent.
+
+ // 1. Lock the pool if it is shared.
+ if (pool->lock) {
+ spin_mutex_lock(pool->lock);
+ }
+
+ // 2. Look in local pool.
+ if (pool->size > 0) {
+ ret = pool->fibers[--pool->size];
+ if (ret) {
+ // If we found one, release the lock once we are
+ // done updating pool fields, and break out of the
+ // loop.
+ if (pool->lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+
+ // When we pull a fiber out of the pool, set its reference
+ // count just in case.
+ ret->init_ref_count(1);
+ return ret;
+ }
+ }
+
+ // 3. Check whether we can allocate from the heap.
+ bool can_allocate_from_heap = false;
+ if (pool->total < pool->alloc_max) {
+ // Track that we are allocating a new fiber from the
+ // heap, originating from this pool.
+ // This increment may be undone if we happen to fail to
+ // allocate from the heap.
+ increment_pool_total(pool);
+ can_allocate_from_heap = true;
+ }
+
+ // 4. Unlock the pool, and then allocate from the heap.
+ if (pool->lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+
+ // 5. Actually try to allocate from the heap / OS.
+ if (can_allocate_from_heap) {
+ ret = allocate_from_heap(pool->stack_size);
+ // If we got something from the heap, just return it.
+ if (ret) {
+ return ret;
+ }
+
+ // Otherwise, we failed in our attempt to allocate a
+ // fiber from the heap. Grab the lock and decrement
+ // the total again.
+ if (pool->lock) {
+ spin_mutex_lock(pool->lock);
+ }
+ decrement_pool_total(pool, 1);
+ if (pool->lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+ }
+
+ // 6. If we get here, then searching this pool failed. Go search
+ // the parent instead if we have one.
+ if (pool->parent) {
+ return allocate(pool->parent);
+ }
+
+ return ret;
+}
+
+int cilk_fiber::remove_reference(cilk_fiber_pool* pool)
+{
+ int ref_count = this->dec_ref_count();
+ if (ref_count == 0) {
+ if (pool) {
+ deallocate_self(pool);
+ }
+ else {
+ deallocate_to_heap();
+ }
+ }
+ return ref_count;
+}
+
+cilk_fiber* cilk_fiber::allocate_from_thread()
+{
+ void* retmem = __cilkrts_malloc(sizeof(cilk_fiber_sysdep));
+ CILK_ASSERT(retmem);
+ cilk_fiber_sysdep* ret = ::new(retmem) cilk_fiber_sysdep(from_thread);
+
+ // A fiber allocated from a thread begins with a reference count
+ // of 2. The first is for being created, and the second is for
+ // being running.
+ //
+ // Suspending this fiber will decrement the count down to 1.
+ ret->init_ref_count(2);
+
+#if SUPPORT_GET_CURRENT_FIBER
+ // We're creating the main fiber for this thread. Set this fiber as the
+ // current fiber.
+ cilkos_set_tls_cilk_fiber(ret);
+#endif
+ return ret;
+}
+
+int cilk_fiber::deallocate_from_thread()
+{
+ CILK_ASSERT(this->is_allocated_from_thread());
+#if SUPPORT_GET_CURRENT_FIBER
+ CILK_ASSERT(this == cilkos_get_tls_cilk_fiber());
+ // Reverse of "allocate_from_thread".
+ cilkos_set_tls_cilk_fiber(NULL);
+#endif
+
+ this->assert_ref_count_at_least(2);
+
+ // Suspending the fiber should conceptually decrement the ref
+ // count by 1.
+ cilk_fiber_sysdep* self = this->sysdep();
+ self->convert_fiber_back_to_thread();
+
+ // Then, freeing the fiber itself decrements the ref count again.
+ int ref_count = this->sub_from_ref_count(2);
+ if (ref_count == 0) {
+ self->~cilk_fiber_sysdep();
+ __cilkrts_free(self);
+ }
+ return ref_count;
+}
+
+int cilk_fiber::remove_reference_from_thread()
+{
+ int ref_count = dec_ref_count();
+ if (ref_count == 0) {
+ cilk_fiber_sysdep* self = this->sysdep();
+ self->~cilk_fiber_sysdep();
+ __cilkrts_free(self);
+ }
+ return ref_count;
+}
+
+
+#if SUPPORT_GET_CURRENT_FIBER
+cilk_fiber* cilk_fiber::get_current_fiber()
+{
+ return cilk_fiber_sysdep::get_current_fiber_sysdep();
+}
+#endif
+
+void cilk_fiber::do_post_switch_actions()
+{
+ if (m_post_switch_proc)
+ {
+ cilk_fiber_proc proc = m_post_switch_proc;
+ m_post_switch_proc = NULL;
+ proc(this);
+ }
+
+ if (m_pending_remove_ref)
+ {
+ m_pending_remove_ref->remove_reference(m_pending_pool);
+
+ // Even if we don't free it,
+ m_pending_remove_ref = NULL;
+ m_pending_pool = NULL;
+ }
+}
+
+void cilk_fiber::suspend_self_and_resume_other(cilk_fiber* other)
+{
+#if FIBER_DEBUG >=1
+ fprintf(stderr, "suspend_self_and_resume_other: self =%p, other=%p [owner=%p, resume_sf=%p]\n",
+ this, other, other->owner, other->resume_sf);
+#endif
+
+ // Decrement my reference count (to suspend)
+ // Increment other's count (to resume)
+ // Suspended fiber should have a reference count of at least 1. (It is not in a pool).
+ this->dec_ref_count();
+ other->inc_ref_count();
+ this->assert_ref_count_at_least(1);
+
+ // Pass along my owner.
+ other->owner = this->owner;
+ this->owner = NULL;
+
+ // Change this fiber to resumable.
+ CILK_ASSERT(!this->is_resumable());
+ this->set_resumable(true);
+
+ // Normally, I'd assert other->is_resumable(). But this flag may
+ // be false the first time we try to "resume" a fiber.
+ cilk_fiber_sysdep* self = this->sysdep();
+ self->suspend_self_and_resume_other_sysdep(other->sysdep());
+
+ // HAVE RESUMED EXECUTION
+ // When we come back here, we should have at least two references:
+ // one for the fiber being allocated / out of a pool, and one for it being active.
+ this->assert_ref_count_at_least(2);
+}
+
+NORETURN
+cilk_fiber::remove_reference_from_self_and_resume_other(cilk_fiber_pool* self_pool,
+ cilk_fiber* other)
+{
+ // Decrement my reference count once (to suspend)
+ // Increment other's count (to resume)
+ // Suspended fiber should have a reference count of at least 1. (It is not in a pool).
+ this->dec_ref_count();
+ other->inc_ref_count();
+
+ // Set a pending remove reference for this fiber, once we have
+ // actually switched off.
+ other->m_pending_remove_ref = this;
+ other->m_pending_pool = self_pool;
+
+ // Pass along my owner.
+ other->owner = this->owner;
+ this->owner = NULL;
+
+ // Since we are deallocating self, this fiber does not become
+ // resumable.
+ CILK_ASSERT(!this->is_resumable());
+
+ cilk_fiber_sysdep* self = this->sysdep();
+ self->jump_to_resume_other_sysdep(other->sysdep());
+
+ __cilkrts_bug("Deallocating fiber. We should never come back here.");
+ std::abort();
+}
+
+
+void cilk_fiber::deallocate_to_heap()
+{
+ cilk_fiber_sysdep* self = this->sysdep();
+ self->~cilk_fiber_sysdep();
+ __cilkrts_free(self);
+}
+
+void cilk_fiber::deallocate_self(cilk_fiber_pool* pool)
+{
+ this->set_resumable(false);
+
+ CILK_ASSERT(NULL != pool);
+ CILK_ASSERT(!this->is_allocated_from_thread());
+ this->assert_ref_count_equals(0);
+
+ // Cases:
+ //
+ // 1. pool has space: Add to this pool.
+ // 2. pool is full: Give some fibers to parent, and then free
+ // enough to make space for the fiber we are deallocating.
+ // Then put the fiber back into the pool.
+
+ const bool need_lock = pool->lock;
+ // Grab the lock for the remaining cases.
+ if (need_lock) {
+ spin_mutex_lock(pool->lock);
+ }
+
+ // Case 1: this pool has space. Return the fiber.
+ if (pool->size < pool->max_size)
+ {
+ // Add this fiber to pool
+ pool->fibers[pool->size++] = this;
+ if (need_lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+ return;
+ }
+
+ // Case 2: Pool is full.
+ //
+ // First free up some space by giving fibers to the parent.
+ if (pool->parent)
+ {
+ // Pool is full. Move all but "num_to_keep" fibers to parent,
+ // if we can.
+ unsigned num_to_keep = pool->max_size/2 + pool->max_size/4;
+ cilk_fiber_pool_move_fibers_to_parent_pool(pool, num_to_keep);
+ }
+
+ if (need_lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+
+ // Now, free a fiber to make room for the one we need to put back,
+ // and then put this fiber back. This step may actually return
+ // fibers to the heap.
+ cilk_fiber_pool_free_fibers_from_pool(pool, pool->max_size -1, this);
+}
+
+
+// NOTE: Except for print-debug, this code is the same as in Windows.
+void cilk_fiber::invoke_tbb_stack_op(__cilk_tbb_stack_op op)
+{
+ cilk_fiber_data *fdata = this->get_data();
+
+ if (0 == fdata->stack_op_routine)
+ {
+ if (CILK_TBB_STACK_RELEASE != op)
+ DBG_STACK_OPS ("Wkr %p: invoke_tbb_stack_op - %s (%d) for cilk_fiber %p, fiber %p, thread id %04x - No stack op routine\n",
+ fdata->owner,
+ NameStackOp(op),
+ op,
+ fdata,
+ this,
+ cilkos_get_current_thread_id());
+ return;
+ }
+
+ // Call TBB to do it's thing
+ DBG_STACK_OPS ("Wkr %p: invoke_tbb_stack_op - op %s data %p for cilk_fiber %p, fiber %p, thread id %04x\n",
+ fdata->owner,
+ NameStackOp(op),
+ fdata->stack_op_data,
+ fdata,
+ this,
+ cilkos_get_current_thread_id());
+
+ (*fdata->stack_op_routine)(op, fdata->stack_op_data);
+ if (op == CILK_TBB_STACK_RELEASE)
+ {
+ fdata->stack_op_routine = 0;
+ fdata->stack_op_data = 0;
+ }
+}
+
+
+
+#if NEED_FIBER_REF_COUNTS
+
+void cilk_fiber::atomic_inc_ref_count()
+{
+ cilkos_atomic_add(&m_outstanding_references, 1);
+}
+
+long cilk_fiber::atomic_dec_ref_count()
+{
+ return cilkos_atomic_add(&m_outstanding_references, -1);
+}
+
+long cilk_fiber::atomic_sub_from_ref_count(long v)
+{
+ return cilkos_atomic_add(&m_outstanding_references, -v);
+}
+
+#endif // NEED_FIBER_REF_COUNTS
+
+/* End cilk_fibers.cpp */
diff --git a/gcc-4.9/libcilkrts/runtime/cilk_fiber.h b/gcc-4.9/libcilkrts/runtime/cilk_fiber.h
new file mode 100644
index 000000000..2671f9246
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk_fiber.h
@@ -0,0 +1,882 @@
+/* cilk_fiber.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2012-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file cilk_fiber.h
+ *
+ * @brief Abstraction of a "fiber": A coprocess-like stack and auxiliary data
+ */
+
+#ifndef INCLUDED_CILK_FIBER_DOT_H
+#define INCLUDED_CILK_FIBER_DOT_H
+
+#include <cilk/common.h>
+#ifdef __cplusplus
+# include <cstddef>
+#else
+# include <stddef.h>
+#endif
+
+#include "bug.h"
+#include "cilk-tbb-interop.h"
+#include "spin_mutex.h"
+#include "internal/abi.h" // Define __cilkrts_stack_frame
+
+/**
+ * @brief Debugging level for Cilk fiber code.
+ *
+ * A value of 0 means no debugging.
+ * Higher values generate more debugging output.
+ */
+#define FIBER_DEBUG 0
+
+/**
+ * @brief Flag for validating reference counts.
+ *
+ * Set to 1 to assert that fiber reference counts are reasonable.
+ */
+#define FIBER_CHECK_REF_COUNTS 1
+
+/**
+ * @brief Flag to determine whether fibers support reference counting.
+ * We require reference counting only on Windows, for exception
+ * processing. Unix does not need reference counting.
+ */
+#if defined(_WIN32)
+# define NEED_FIBER_REF_COUNTS 1
+#endif
+
+/**
+ * @brief Flag to enable support for the
+ * cilk_fiber_get_current_fiber() method.
+ *
+ * I'd like this flag to be 0. However, the cilk_fiber test depends
+ * on being able to call this method.
+ */
+#if !defined(SUPPORT_GET_CURRENT_FIBER)
+# define SUPPORT_GET_CURRENT_FIBER 0
+#endif
+
+/**
+ * @brief Switch for enabling "fast path" check for fibers, which
+ * doesn't go to the heap or OS until checking the ancestors first.
+ *
+ * Doing this check seems to make the stress test in
+ * cilk_fiber_pool.t.cpp run faster. But it doesn't seem to make much
+ * difference in other benchmarks, so it is disabled by default.
+ */
+#define USE_FIBER_TRY_ALLOCATE_FROM_POOL 0
+
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/// @brief Forward reference to fiber pool.
+typedef struct cilk_fiber_pool cilk_fiber_pool;
+
+/** @brief Opaque data structure representing a fiber */
+typedef struct cilk_fiber cilk_fiber;
+
+/** @brief Function pointer type for use as a fiber's "main" procedure */
+typedef void (*cilk_fiber_proc)(cilk_fiber*);
+
+/** @brief Data structure associated with each fiber. */
+typedef struct cilk_fiber_data
+{
+ __STDNS size_t stack_size; /**< Size of stack for fiber */
+ __cilkrts_worker* owner; /**< Worker using this fiber */
+ __cilkrts_stack_frame* resume_sf; /**< Stack frame to resume */
+ __cilk_tbb_pfn_stack_op stack_op_routine; /**< Cilk/TBB interop callback */
+ void* stack_op_data; /**< Data for Cilk/TBB callback */
+ void* client_data; /**< Data managed by client */
+
+#ifdef _WIN32
+ char *initial_sp; /**< Initalized in fiber_stub */
+# ifdef _WIN64
+ char *steal_frame_sp; /**< RSP for frame stealing work */
+ // Needed for exception handling so we can
+ // identify when about to unwind off stack
+# endif
+#endif
+
+} cilk_fiber_data;
+
+/** @brief Pool of cilk_fiber for fiber reuse
+ *
+ * Pools form a hierarchy, with each pool pointing to its parent. When the
+ * pool undeflows, it gets a fiber from its parent. When a pool overflows,
+ * it returns some fibers to its parent. If the root pool underflows, it
+ * allocates and initializes a new fiber from the heap but only if the total
+ * is less than max_size; otherwise, fiber creation fails.
+ */
+struct cilk_fiber_pool
+{
+ spin_mutex* lock; ///< Mutual exclusion for pool operations
+ __STDNS size_t stack_size; ///< Size of stacks for fibers in this pool.
+ cilk_fiber_pool* parent; ///< @brief Parent pool.
+ ///< If this pool is empty, get from parent
+
+ // Describes inactive fibers stored in the pool.
+ cilk_fiber** fibers; ///< Array of max_size fiber pointers
+ unsigned max_size; ///< Limit on number of fibers in pool
+ unsigned size; ///< Number of fibers currently in the pool
+
+ // Statistics on active fibers that were allocated from this pool,
+ // but no longer in the pool.
+ int total; ///< @brief Fibers allocated - fiber deallocated from pool
+ ///< total may be negative for non-root pools.
+ int high_water; ///< High water mark of total fibers
+ int alloc_max; ///< Limit on number of fibers allocated from the heap/OS
+};
+
+/** @brief Initializes a cilk_fiber_pool structure
+ *
+ * @param pool - The address of the pool that is to be initialized
+ * @param parent - The address of this pool's parent, or NULL for root pool
+ * @param stack_size - Size of stacks for fibers allocated from this pool.
+ * @param buffer_size - The maximum number of fibers that may be pooled.
+ * @param alloc_max - Limit on # of fibers this pool can allocate from the heap.
+ * @param is_shared - True if accessing this pool needs a lock, false otherwise.
+ */
+void cilk_fiber_pool_init(cilk_fiber_pool* pool,
+ cilk_fiber_pool* parent,
+ size_t stack_size,
+ unsigned buffer_size,
+ int alloc_max,
+ int is_shared);
+
+/** @brief Sets the maximum number of fibers to allocate from a root pool.
+ *
+ * @param root_pool - A root fiber pool
+ * @param max_fibers_to_allocate - The limit on # of fibers to allocate.
+ *
+ * Sets the maximum number of fibers that can be allocated from this
+ * pool and all its descendants. This pool must be a root pool.
+ */
+void cilk_fiber_pool_set_fiber_limit(cilk_fiber_pool* root_pool,
+ unsigned max_fibers_to_allocate);
+
+/** @brief De-initalizes a cilk_fiber_pool
+ *
+ * @param pool - The address of the pool that is to be destroyed
+ */
+void cilk_fiber_pool_destroy(cilk_fiber_pool* pool);
+
+/** @brief Allocates a new cilk_fiber.
+ *
+ * If the specified pool is empty, this method may choose to either
+ * allocate a fiber from the heap (if pool->total < pool->alloc_max),
+ * or retrieve a fiber from the parent pool.
+ *
+ * @note If a non-null fiber is returned, @c cilk_fiber_reset_state
+ * should be called on this fiber before using it.
+ *
+ * An allocated fiber begins with a reference count of 1.
+ * This method may lock @c pool or one of its ancestors.
+ *
+ * @pre pool should not be NULL.
+ *
+ * @param pool The fiber pool from which to retrieve a fiber.
+ * @return An allocated fiber, or NULL if failed to allocate.
+ */
+cilk_fiber* cilk_fiber_allocate(cilk_fiber_pool* pool);
+
+/** @brief Allocate and initialize a new cilk_fiber using memory from
+ * the heap and/or OS.
+ *
+ * The allocated fiber begins with a reference count of 1.
+ *
+ * @param stack_size The size (in bytes) to be allocated for the fiber's
+ * stack.
+ * @return An initialized fiber. This method should not return NULL
+ * unless some exceptional condition has occurred.
+ */
+cilk_fiber* cilk_fiber_allocate_from_heap(size_t stack_size);
+
+
+/** @brief Resets an fiber object just allocated from a pool with the
+ * specified proc.
+ *
+ * After this call, cilk_fiber_data object associated with this fiber
+ * is filled with zeros.
+ *
+ * This function can be called only on a fiber that has been allocated
+ * from a pool, but never used.
+ *
+ * @param fiber The fiber to reset and initialize.
+ * @param start_proc The function to run when switching to the fiber. If
+ * null, the fiber can be used with cilk_fiber_run_proc()
+ * but not with cilk_fiber_resume().
+ */
+void cilk_fiber_reset_state(cilk_fiber* fiber,
+ cilk_fiber_proc start_proc);
+
+/** @brief Remove a reference from this fiber, possibly deallocating it.
+ *
+ * This fiber is deallocated only when there are no other references
+ * to it. Deallocation happens either by returning the fiber to the
+ * specified pool, or returning it to the heap.
+ *
+ * A fiber that is currently executing should not remove the last
+ * reference to itself.
+ *
+ * When a fiber is deallocated, destructors are not called for the
+ * objects (if any) still on its stack. The fiber's stack and fiber
+ * data is returned to the stack pool but the client fiber data is not
+ * deallocated.
+ *
+ * If the pool overflows because of a deallocation, then some fibers
+ * will be returned to the parent pool. If the root pool overflows,
+ * then the fiber is returned to the heap.
+ *
+ * @param fiber The Cilk fiber to remove a reference to.
+ * @param pool The fiber pool to which the fiber should be returned. The
+ * caller is assumed to have exclusive access to the pool
+ * either because there is no contention for it or because
+ * its lock has been acquired. If pool is NULL, any
+ * deallocated fiber is destroyed and returned to the
+ * heap.
+ *
+ * @return Final reference count. If the count is 0, the fiber was
+ * returned to a pool or the heap.
+ */
+int cilk_fiber_remove_reference(cilk_fiber *fiber, cilk_fiber_pool *pool);
+
+/** @brief Allocates and intializes this thread's main fiber
+ *
+ * Each thread has an "implicit" main fiber that control's the
+ * thread's initial stack. This function makes this fiber visible to
+ * the client and allocates the Cilk-specific aspects of the implicit
+ * fiber. A call to this function must be paired with a call to
+ * cilk_fiber_deallocate_fiber_from_thread()
+ * or a memory leak (or worse) will result.
+ *
+ * A fiber allocated from a thread begins with a reference count of 2.
+ * One is for being allocated, and one is for being active.
+ * (A fiber created from a thread is automatically currently executing.)
+ * The matching calls above each decrement the reference count by 1.
+ *
+ * @return A fiber for the currently executing thread.
+ */
+cilk_fiber* cilk_fiber_allocate_from_thread(void);
+
+/** @brief Remove a fiber created from a thread,
+ * possibly deallocating it.
+ *
+ * Same as cilk_fiber_remove_reference, except that it works on fibers
+ * created via cilk_fiber_allocate_from_thread().
+ *
+ * Fibers created from a thread are never returned to a pool.
+ *
+ * @param fiber The Cilk fiber to remove a reference from.
+ * @return Final reference count. If the count is 0, the fiber was
+ * returned to the heap.
+ */
+int cilk_fiber_remove_reference_from_thread(cilk_fiber *fiber);
+
+/** @brief Deallocate a fiber created from a thread,
+ * possibly destroying it.
+ *
+ * This method decrements the reference count of the fiber by 2, and
+ * destroys the fiber struct if the reference count is 0.
+ *
+ * OS-specific cleanup for the fiber executes unconditionally with
+ * this method. The destruction of the actual object, however, does
+ * not occur unless the reference count is 0.
+ *
+ * @param fiber The cilk_fiber to deallocate from a thread.
+ * @return Final reference count. If the count is 0, the fiber was
+ * returned to the heap.
+ */
+int cilk_fiber_deallocate_from_thread(cilk_fiber *fiber);
+
+/** @brief Returns true if this fiber is allocated from a thread.
+ */
+int cilk_fiber_is_allocated_from_thread(cilk_fiber *fiber);
+
+
+/** @brief Suspend execution on current fiber resumes other fiber.
+ *
+ * Suspends the current fiber and transfers control to a new fiber. Execution
+ * on the new fiber resumes from the point at which fiber suspended itself to
+ * run a different fiber. If fiber was freshly allocated, then runs the
+ * start_proc function specified at allocation. This function returns when
+ * another fiber resumes the self fiber. Note that the state of the
+ * floating-point control register (i.e., the register that controls rounding
+ * mode, etc.) is valid but indeterminate on return -- different
+ * implementations will have different results.
+ *
+ * When the @c self fiber is resumed, execution proceeds as though
+ * this function call returns.
+ *
+ * This operation increments the reference count of @p other.
+ * This operation decrements the reference count of @p self.
+ *
+ * @param self Fiber to switch from. Must equal current fiber.
+ * @param other Fiber to switch to.
+ */
+void cilk_fiber_suspend_self_and_resume_other(cilk_fiber* self,
+ cilk_fiber* other);
+
+/** @brief Removes a reference from the currently executing fiber and
+ * resumes other fiber.
+ *
+ * Removes a reference from @p self and transfer control to @p other
+ * fiber. Execution on @p other resumes from the point at which @p
+ * other suspended itself to run a different fiber. If @p other fiber
+ * was freshly allocated, then runs the function specified at
+ * creation.
+ *
+ *
+ * This operation increments the reference count of @p other.
+ *
+ * This operation conceptually decrements the reference count of
+ * @p self twice, once to suspend it, and once to remove a reference to
+ * it. Then, if the count is 0, it is returned to the specified pool
+ * or destroyed.
+ *
+ * @pre @p self is the currently executing fiber.
+ *
+ * @param self Fiber to remove reference switch from.
+ * @param self_pool Pool to which the current fiber should be returned
+ * @param other Fiber to switch to.
+ */
+NORETURN
+cilk_fiber_remove_reference_from_self_and_resume_other(cilk_fiber* self,
+ cilk_fiber_pool* self_pool,
+ cilk_fiber* other);
+
+/** @brief Set the proc method to execute immediately after a switch
+ * to this fiber.
+ *
+ * The @c post_switch_proc method executes immediately after switching
+ * away form @p self fiber to some other fiber, but before @c self
+ * gets cleaned up.
+ *
+ * @note A fiber can have only one post_switch_proc method at a time.
+ * If this method is called multiple times before switching to the
+ * fiber, only the last proc method will execute.
+ *
+ * @param self Fiber.
+ * @param post_switch_proc Proc method to execute immediately after switching to this fiber.
+ */
+void cilk_fiber_set_post_switch_proc(cilk_fiber* self, cilk_fiber_proc post_switch_proc);
+
+/** @brief Invoke TBB stack op for this fiber.
+ *
+ * @param fiber Fiber to invoke stack op for.
+ * @param op The stack op to invoke
+ */
+void cilk_fiber_invoke_tbb_stack_op(cilk_fiber* fiber, __cilk_tbb_stack_op op);
+
+/** @brief Returns the fiber data associated with the specified fiber.
+ *
+ * The returned struct is owned by the fiber and is deallocated automatically
+ * when the fiber is destroyed. However, the client_data field is owned by
+ * the client and must be deallocated separately. When called for a
+ * newly-allocated fiber, the returned data is zero-filled.
+ *
+ * @param fiber The fiber for which data is being requested.
+ * @return The fiber data for the specified fiber
+ */
+cilk_fiber_data* cilk_fiber_get_data(cilk_fiber* fiber);
+
+/** @brief Retrieve the owner field from the fiber.
+ *
+ * This method is provided for convenience. One can also get the
+ * fiber data, and then get the owner field.
+ */
+__CILKRTS_INLINE
+__cilkrts_worker* cilk_fiber_get_owner(cilk_fiber* fiber)
+{
+ // TBD: We really want a static assert here, that this cast is
+ // doing the right thing.
+ cilk_fiber_data* fdata = (cilk_fiber_data*)fiber;
+ return fdata->owner;
+}
+
+/** @brief Sets the owner field of a fiber.
+ *
+ * This method is provided for convenience. One can also get the
+ * fiber data, and then get the owner field.
+ */
+__CILKRTS_INLINE
+void cilk_fiber_set_owner(cilk_fiber* fiber, __cilkrts_worker* owner)
+{
+ // TBD: We really want a static assert here, that this cast is
+ // doing the right thing.
+ cilk_fiber_data* fdata = (cilk_fiber_data*)fiber;
+ fdata->owner = owner;
+}
+
+/** @brief Returns true if this fiber is resumable.
+ *
+ * A fiber is considered resumable when it is not currently being
+ * executed.
+ *
+ * This function is used by Windows exception code.
+ * @param fiber The fiber to check.
+ * @return Nonzero value if fiber is resumable.
+ */
+int cilk_fiber_is_resumable(cilk_fiber* fiber);
+
+/**
+ * @brief Returns the base of this fiber's stack.
+ *
+ * On some platforms (e.g., Windows), the fiber must have started
+ * running before we can get this information.
+ *
+ * @param fiber The fiber to get the stack pointer from.
+ * @return The base of the stack, or NULL if this
+ * information is not available yet.
+ */
+char* cilk_fiber_get_stack_base(cilk_fiber* fiber);
+
+
+/****************************************************************************
+ * TBB interop functions
+ * **************************************************************************/
+/**
+ * @brief Set the TBB callback information for a stack
+ *
+ * @param fiber The fiber to set the TBB callback information for
+ * @param o The TBB callback thunk. Specifies the callback address and
+ * context value.
+ */
+void cilk_fiber_set_stack_op(cilk_fiber *fiber,
+ __cilk_tbb_stack_op_thunk o);
+
+/**
+ * @brief Save the TBB callback address and context value in
+ * thread-local storage.
+ *
+ * We'll use it later when the thread binds to a worker.
+ *
+ * @param o The TBB callback thunk which is to be saved.
+ */
+void cilk_fiber_tbb_interop_save_stack_op_info(__cilk_tbb_stack_op_thunk o);
+
+/**
+ * @brief Move TBB stack-op info from thread-local storage and store
+ * it into the fiber.
+ *
+ * Called when we bind a thread to the runtime. If there is any TBB
+ * interop information in thread-local storage, bind it to the stack
+ * now.
+ *
+ * @pre \c fiber should not be NULL.
+ * @param fiber The fiber that should take over the TBB interop information.
+ */
+void cilk_fiber_tbb_interop_use_saved_stack_op_info(cilk_fiber *fiber);
+
+/**
+ * @brief Free any TBB interop information saved in thread-local storage
+ */
+void cilk_fiber_tbb_interop_free_stack_op_info(void);
+
+/**
+ * @brief Migrate any TBB interop information from a cilk_fiber to
+ * thread-local storage.
+ *
+ * Returns immediately if no TBB interop information has been
+ * associated with the stack.
+ *
+ * @param fiber The cilk_fiber who's TBB interop information should be
+ * saved in thread-local storage.
+ */
+void cilk_fiber_tbb_interop_save_info_from_stack(cilk_fiber* fiber);
+
+
+#if SUPPORT_GET_CURRENT_FIBER
+/** @brief Returns the fiber associated with the currently executing thread
+ *
+ * @note This function is currently used only for testing the Cilk
+ * runtime.
+ *
+ * @return Fiber associated with the currently executing thread or NULL if no
+ * fiber was associated with this thread.
+ */
+cilk_fiber* cilk_fiber_get_current_fiber(void);
+#endif
+
+
+#if NEED_FIBER_REF_COUNTS
+/** @brief Returns true if this fiber has reference count > 0.
+ *
+ * @param fiber The fiber to check for references.
+ * @return Nonzero value if the fiber has references.
+ */
+int cilk_fiber_has_references(cilk_fiber *fiber);
+
+/** @brief Returns the value of the reference count.
+ *
+ * @param fiber The fiber to check for references.
+ * @return The value of the reference count of fiber.
+ */
+int cilk_fiber_get_ref_count(cilk_fiber *fiber);
+
+/** @brief Adds a reference to this fiber.
+ *
+ * Increments the reference count of a current fiber. Fibers with
+ * nonzero reference count will not be freed or returned to a fiber
+ * pool.
+ *
+ * @param fiber The fiber to add a reference to.
+ */
+void cilk_fiber_add_reference(cilk_fiber *fiber);
+
+#endif // NEED_FIBER_REF_COUNTS
+
+__CILKRTS_END_EXTERN_C
+
+#ifdef __cplusplus
+// Some C++ implementation details
+
+/// Opaque declaration of a cilk_fiber_sysdep object.
+struct cilk_fiber_sysdep;
+
+/**
+ * cilk_fiber is a base-class for system-dependent fiber implementations.
+ */
+struct cilk_fiber : protected cilk_fiber_data
+{
+ protected:
+ // This is a rare acceptable use of protected inheritence and protected
+ // variable access: when the base class and derived class collaborate
+ // tightly to comprise a single component.
+
+ /// For overloading constructor of cilk_fiber.
+ enum from_thread_t { from_thread = 1 };
+
+ // Boolean flags capturing the status of the fiber.
+ // Each one can be set independently.
+ // A default fiber is constructed with a flag value of 0.
+ static const int RESUMABLE = 0x01; ///< True if the fiber is in a suspended state and can be resumed.
+ static const int ALLOCATED_FROM_THREAD = 0x02; ///< True if fiber was allocated from a thread.
+
+ cilk_fiber_proc m_start_proc; ///< Function to run on start up/reset
+ cilk_fiber_proc m_post_switch_proc; ///< Function that executes when we first switch to a new fiber from a different one.
+
+ cilk_fiber* m_pending_remove_ref;///< Fiber to possibly delete on start up or resume
+ cilk_fiber_pool* m_pending_pool; ///< Pool where m_pending_remove_ref should go if it is deleted.
+ unsigned m_flags; ///< Captures the status of this fiber.
+
+#if NEED_FIBER_REF_COUNTS
+ volatile long m_outstanding_references; ///< Counts references to this fiber.
+#endif
+
+ /// Creates a fiber with NULL data.
+ cilk_fiber();
+
+ /**
+ * @brief Creates a fiber with user-specified arguments.
+ *
+ * @param stack_size Size of stack to use for this fiber.
+ */
+ cilk_fiber(std::size_t stack_size);
+
+ /// Empty destructor.
+ ~cilk_fiber();
+
+ /**
+ * @brief Performs any actions that happen after switching from
+ * one fiber to another.
+ *
+ * These actions are:
+ * 1. Execute m_post_switch_proc on a fiber.
+ * 2. Do any pending deallocations from the previous fiber.
+ */
+ void do_post_switch_actions();
+
+ /**
+ *@brief Helper method that converts a @c cilk_fiber object into a
+ * @c cilk_fiber_sysdep object.
+ *
+ * The @c cilk_fiber_sysdep object contains the system-dependent parts
+ * of the implementation of a @\c cilk_fiber.
+ *
+ * We could have @c cilk_fiber_sysdep inherit from @c cilk_fiber and
+ * then use virtual functions. But since a given platform only uses
+ * one definition of @c cilk_fiber_sysdep at a time, we statically
+ * cast between them.
+ */
+ inline cilk_fiber_sysdep* sysdep();
+
+ /**
+ * @brief Set resumable flag to specified state.
+ */
+ inline void set_resumable(bool state) {
+ m_flags = state ? (m_flags | RESUMABLE) : (m_flags & (~RESUMABLE));
+ }
+
+ /**
+ *@brief Set the allocated_from_thread flag.
+ */
+ inline void set_allocated_from_thread(bool state) {
+ m_flags = state ? (m_flags | ALLOCATED_FROM_THREAD) : (m_flags & (~ALLOCATED_FROM_THREAD));
+ }
+
+ public:
+
+ /**
+ * @brief Allocates and initializes a new cilk_fiber, either from
+ * the specified pool or from the heap.
+ *
+ * @pre pool should not be NULL.
+ */
+ static cilk_fiber* allocate(cilk_fiber_pool* pool);
+
+ /**
+ * @brief Allocates a fiber from the heap.
+ */
+ static cilk_fiber* allocate_from_heap(size_t stack_size);
+
+ /**
+ * @brief Return a fiber to the heap.
+ */
+ void deallocate_to_heap();
+
+ /**
+ * @brief Reset the state of a fiber just allocated from a pool.
+ */
+ void reset_state(cilk_fiber_proc start_proc);
+
+ /**
+ * @brief Remove a reference from this fiber, possibly
+ * deallocating it if the reference count becomes 0.
+ *
+ * @param pool The fiber pool to which this fiber should be returned.
+ * @return The final reference count.
+ */
+ int remove_reference(cilk_fiber_pool* pool);
+
+ /**
+ * @brief Deallocate the fiber by returning it to the pool.
+ * @pre This method should only be called if the reference count
+ * is 0.
+ *
+ * @param pool The fiber pool to return this fiber to. If NULL,
+ * fiber is returned to the heap.
+ */
+ void deallocate_self(cilk_fiber_pool *pool);
+
+ /** @brief Allocates and intializes this thread's main fiber. */
+ static cilk_fiber* allocate_from_thread();
+
+ /** @brief Deallocate a fiber created from a thread,
+ * possibly destroying it.
+ *
+ * This method decrements the reference count of this fiber by 2,
+ * and destroys the fiber if the reference count is 0.
+ *
+ * OS-specific cleanup for the fiber executes unconditionally with for
+ * this method. The destruction of the actual object, however, does
+ * not occur unless the reference count is 0.
+ *
+ * @return Final reference count. If the count is 0, the fiber was
+ * returned to the heap.
+ */
+ int deallocate_from_thread();
+
+ /** @brief Removes a reference from this fiber.
+ *
+ * This method deallocates this fiber if the reference count
+ * becomes 0.
+ *
+ * @pre This fiber must be allocated from a thread.
+ * @return The final reference count of this fiber.
+ */
+ int remove_reference_from_thread();
+
+#if SUPPORT_GET_CURRENT_FIBER
+ /** @brief Get the current fiber from TLS.
+ *
+ * @note This function is only used for testing the runtime.
+ */
+ static cilk_fiber* get_current_fiber();
+#endif
+
+ /** @brief Suspend execution on current fiber resumes other fiber.
+ *
+ * Control returns after resuming execution of the self fiber.
+ */
+ void suspend_self_and_resume_other(cilk_fiber* other);
+
+
+ /** @brief Removes a reference from the currently executing fiber
+ * and resumes other fiber.
+ *
+ * This fiber may be returned to a pool or deallocated.
+ */
+ NORETURN remove_reference_from_self_and_resume_other(cilk_fiber_pool* self_pool,
+ cilk_fiber* other);
+
+ /** @brief Set the proc method to execute immediately after a switch
+ * to this fiber.
+ *
+ * @param post_switch_proc Proc method to execute immediately
+ * after switching to this fiber.
+ */
+ inline void set_post_switch_proc(cilk_fiber_proc post_switch_proc) {
+ m_post_switch_proc = post_switch_proc;
+ }
+
+ /** @brief Returns true if this fiber is resumable.
+ *
+ * A fiber is considered resumable when it is not currently being
+ * executed.
+ */
+ inline bool is_resumable(void) {
+ return (m_flags & RESUMABLE);
+ }
+
+ /** @brief Returns true if fiber was allocated from a thread. */
+ inline bool is_allocated_from_thread(void) {
+ return (m_flags & ALLOCATED_FROM_THREAD);
+ }
+
+ /**
+ *@brief Get the address at the base of the stack for this fiber.
+ */
+ inline char* get_stack_base();
+
+ /** @brief Return the data for this fiber. */
+ cilk_fiber_data* get_data() { return this; }
+
+ /** @brief Return the data for this fiber. */
+ cilk_fiber_data const* get_data() const { return this; }
+
+
+#if NEED_FIBER_REF_COUNTS
+ /** @brief Verifies that this fiber's reference count equals v. */
+ inline void assert_ref_count_equals(long v) {
+ #if FIBER_CHECK_REF_COUNTS
+ CILK_ASSERT(m_outstanding_references >= v);
+ #endif
+ }
+
+ /** @brief Verifies that this fiber's reference count is at least v. */
+ inline void assert_ref_count_at_least(long v) {
+ #if FIBER_CHECK_REF_COUNTS
+ CILK_ASSERT(m_outstanding_references >= v);
+ #endif
+ }
+
+ /** @brief Get reference count. */
+ inline long get_ref_count() { return m_outstanding_references; }
+
+ /** @brief Initialize reference count.
+ * Operation is not atomic.
+ */
+ inline void init_ref_count(long v) { m_outstanding_references = v; }
+
+ // For Windows, updates to the fiber reference count need to be
+ // atomic, because exceptions can live on a stack that we are not
+ // currently executing on. Thus, we can update the reference
+ // count of a fiber we are not currently executing on.
+
+ /** @brief Increment reference count for this fiber [Windows]. */
+ inline void inc_ref_count() { atomic_inc_ref_count(); }
+
+ /** @brief Decrement reference count for this fiber [Windows]. */
+ inline long dec_ref_count() { return atomic_dec_ref_count(); }
+
+ /** @brief Subtract v from the reference count for this fiber [Windows]. */
+ inline long sub_from_ref_count(long v) { return atomic_sub_from_ref_count(v); }
+#else // NEED_FIBER_REF_COUNTS
+
+ // Without reference counting, we have placeholder methods.
+ inline void init_ref_count(long v) { }
+
+ inline void inc_ref_count() { }
+
+ // With no reference counting, dec_ref_count always return 0.
+ // Thus, anyone checking is always the "last" one.
+ inline long dec_ref_count() { return 0; }
+ inline long sub_from_ref_count(long v) { return 0; }
+
+ // The assert methods do nothing.
+ inline void assert_ref_count_equals(long v) { }
+ inline void assert_ref_count_at_least(long v) { }
+#endif
+
+ /**
+ * @brief Call TBB to tell it about an "interesting" event.
+ *
+ * @param op Value specifying the event to track.
+ */
+ void invoke_tbb_stack_op(__cilk_tbb_stack_op op);
+
+private:
+
+ /**
+ * @brief Helper method: try to allocate a fiber from this pool or
+ * its ancestors without going to the OS / heap.
+ *
+ * Returns allocated pool, or NULL if no pool is found.
+ *
+ * If pool contains a suitable fiber. Return it. Otherwise, try to
+ * recursively grab a fiber from the parent pool, if there is one.
+ *
+ * This method will not allocate a fiber from the heap.
+ */
+ static cilk_fiber* try_allocate_from_pool_recursive(cilk_fiber_pool* pool);
+
+
+#if NEED_FIBER_REF_COUNTS
+ /**
+ * @brief Atomic increment of reference count.
+ */
+ void atomic_inc_ref_count();
+
+ /**
+ * @brief Atomic decrement of reference count.
+ */
+ long atomic_dec_ref_count();
+
+ /**
+ * @brief Atomic subtract of v from reference count.
+ * @param v Value to subtract.
+ */
+ long atomic_sub_from_ref_count(long v);
+#endif // NEED_FIBER_REF_COUNTS
+
+};
+
+#endif // __cplusplus
+
+#endif // ! defined(INCLUDED_CILK_FIBER_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/cilk_malloc.c b/gcc-4.9/libcilkrts/runtime/cilk_malloc.c
new file mode 100644
index 000000000..9d02c52d0
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk_malloc.c
@@ -0,0 +1,84 @@
+/* cilk_malloc.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#include "cilk_malloc.h"
+
+#include <stdlib.h>
+#if defined _WIN32 || defined _WIN64 || defined __linux__
+#include <malloc.h>
+#define HAS_MEMALIGN 1
+#endif
+#ifdef __VXWORKS__
+#define HAS_MEMALIGN 1
+#include <memLib.h>
+#endif
+
+#define PREFERRED_ALIGNMENT 64 /* try to keep runtime system data
+ structures within one cache line */
+
+void *__cilkrts_malloc(size_t size)
+{
+ /* TODO: check for out of memory */
+#ifdef _WIN32
+ return _aligned_malloc(size, PREFERRED_ALIGNMENT);
+#elif defined HAS_MEMALIGN
+ return memalign(PREFERRED_ALIGNMENT, size);
+#else
+ return malloc(size);
+#endif
+}
+
+void *__cilkrts_realloc(void *ptr, size_t size)
+{
+#ifdef _WIN32
+ return _aligned_realloc(ptr, size, PREFERRED_ALIGNMENT);
+#else
+ return realloc(ptr, size);
+#endif
+}
+
+void __cilkrts_free(void *ptr)
+{
+#ifdef _WIN32
+ _aligned_free(ptr);
+#else
+ free(ptr);
+#endif
+}
+
+/* End cilk_malloc.c */
diff --git a/gcc-4.9/libcilkrts/runtime/cilk_malloc.h b/gcc-4.9/libcilkrts/runtime/cilk_malloc.h
new file mode 100644
index 000000000..fa0fa6d5c
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/cilk_malloc.h
@@ -0,0 +1,90 @@
+/* cilk_malloc.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file cilk_malloc.h
+ *
+ * @brief Provides replacement memory allocation functions to allocate
+ * (and free) memory on cache line boundaries, if supported by the OS.
+ *
+ * If aligned memory functions are not provided by the OS, the calls just
+ * pass through to the standard memory allocation functions.
+ */
+
+#ifndef INCLUDED_CILK_MALLOC_DOT_H
+#define INCLUDED_CILK_MALLOC_DOT_H
+
+#include <cilk/common.h>
+#include <stddef.h>
+
+#include "rts-common.h"
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * malloc replacement function to allocate memory aligned on a cache line
+ * boundary if aligned memory allocations are supported by the OS.
+ *
+ * @param size Number of bytes to allocate.
+ *
+ * @return pointer to memory block allocated, or NULL if unsuccessful.
+ */
+COMMON_PORTABLE void *__cilkrts_malloc(size_t size);
+
+/**
+ * realloc replacement function to allocate memory aligned on a cache line
+ * boundary if aligned memory allocations are supported by the OS.
+ *
+ * @param ptr Block to be reallocated.
+ * @param size Number of bytes to allocate.
+ *
+ * @return pointer to memory block allocated, or NULL if unsuccessful.
+ */
+COMMON_PORTABLE void *__cilkrts_realloc(void *ptr, size_t size);
+
+/**
+ * free replacement function to deallocate memory aligned on a cache line
+ * boundary if aligned memory allocations are supported by the OS.
+ *
+ * @param ptr Block to be freed.
+ */
+COMMON_PORTABLE void __cilkrts_free(void *ptr);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_CILK_MALLOC_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/component.h b/gcc-4.9/libcilkrts/runtime/component.h
new file mode 100644
index 000000000..64ff3e5fc
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/component.h
@@ -0,0 +1,52 @@
+/* component.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#ifndef INCLUDED_COMPONENT_DOT_H
+#define INCLUDED_COMPONENT_DOT_H
+
+#define COMPONENT_NAME "Intel® Cilk™ Plus Runtime"
+
+#define COMPONENT_INTERNAL_NAME COMPONENT_NAME
+
+#define COMPONENT_FILENAME "CILKRTS20"
+
+#define BuildVersionString(_major, _minor, _build, _rev) #_major "," #_minor "," #_build "," #_rev
+
+#define COMPONENT_VERSION_STRING BuildVersionString (VERSION_MAJOR, VERSION_MINOR, VERSION_BUILD, VERSION_REVISION)
+
+#endif // ! defined(INCLUDED_COMPONENT_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/config/generic/cilk-abi-vla.c b/gcc-4.9/libcilkrts/runtime/config/generic/cilk-abi-vla.c
new file mode 100644
index 000000000..98fefa101
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/config/generic/cilk-abi-vla.c
@@ -0,0 +1,107 @@
+/* cilk-abi-vla.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+/*
+ * Implementation of Variable Length Array (VLA) ABI.
+ *
+ * The compiler calls these functions to allocate Variable Length Arrays
+ * at runtime. The compiler must guarantee that __cilkrts_stack_free() is
+ * called to cleanup any memory allocated by __cilkrts_stack_alloc().
+ *
+ * This generic implementation always allocates the memory from the heap.
+ * Optimally, the implementation should expand the frame of the calling
+ * function if possible, since that will be faster. See the x86 version
+ * for one possible implementation.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#include "internal/abi.h"
+#include "cilk-abi-vla-internal.h"
+
+#define c_cilk_ptr_from_heap 0xc2f2f00d
+#define c_cilk_ptr_from_stack 0xc3f30d0f
+
+// Allocate space for a variable length array
+CILK_ABI(__cilkrts_void_ptr)
+__cilkrts_stack_alloc(
+ __cilkrts_stack_frame *sf,
+ size_t size,
+ size_t distance_from_sp_to_alloca_area,
+ uint32_t align, // align is always >= minimum stack alignment and
+ // >= ptr_size as well, and must be a power of 2.
+ uint32_t needs_tag // non-zero if the pointer being returned needs to
+ // be tagged
+)
+{
+ // full_size will be a multiple of align, and contains
+ // enough extra space to allocate a marker.
+ size_t full_size = (size + align - 1) & ~(align - 1);
+
+ // Allocate memory from the heap. The compiler is responsible
+ // for guaranteeing us a chance to free it before the function
+ // exits
+
+ return (void *)vla_internal_heap_alloc(sf, full_size, align);
+}
+
+// Free the space allocated for a variable length array.
+CILK_ABI(void)
+__cilkrts_stack_free(
+ __cilkrts_stack_frame *sf,
+ void *p,
+ size_t size,
+ size_t distance_from_sp_to_alloca_area,
+ uint32_t align, // same requirements as for align in allocation,
+ // and must match alignment that was passed when
+ // doing the allocation
+ uint32_t known_from_stack // non-zero if this is known to be allocated
+ // on the stack, and therefore has no tag
+)
+{
+ // full_size will be a multiple of align, and contains
+ // enough extra space to allocate a marker if one was needed.
+ size_t full_size = (size + align - 1) & ~(align - 1);
+
+ // Just free the allocated memory to the heap since we don't know
+ // how to expand/contract the calling frame
+ vla_internal_heap_free(t, full_size);
+}
diff --git a/gcc-4.9/libcilkrts/runtime/config/generic/os-fence.h b/gcc-4.9/libcilkrts/runtime/config/generic/os-fence.h
new file mode 100644
index 000000000..841307a52
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/config/generic/os-fence.h
@@ -0,0 +1,53 @@
+/* os.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/*
+ * void __cilkrts_fence(void)
+ *
+ * Executes an MFENCE instruction to serialize all load and store instructions
+ * that were issued prior the MFENCE instruction. This serializing operation
+ * guarantees that every load and store instruction that precedes the MFENCE
+ * instruction is globally visible before any load or store instruction that
+ * follows the MFENCE instruction. The MFENCE instruction is ordered with
+ * respect to all load and store instructions, other MFENCE instructions, any
+ * SFENCE and LFENCE instructions, and any serializing instructions (such as
+ * the CPUID instruction).
+ */
+
+COMMON_SYSDEP void __cilkrts_fence(void); ///< MFENCE instruction
+
diff --git a/gcc-4.9/libcilkrts/runtime/config/generic/os-unix-sysdep.c b/gcc-4.9/libcilkrts/runtime/config/generic/os-unix-sysdep.c
new file mode 100644
index 000000000..fda7fc414
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/config/generic/os-unix-sysdep.c
@@ -0,0 +1,94 @@
+/* os-unix-sysdep.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *************************************************************************
+ *
+ * This file contains generic implementations of system-specific code for
+ * Unix-based systems
+ */
+
+#include "os.h"
+#include "sysdep.h"
+
+/*
+ * The cycle counter is used for debugging. This funciton is only called if
+ * CILK_PROFILE is defined when the runtime is built.
+ */
+COMMON_SYSDEP unsigned long long __cilkrts_getticks(void)
+{
+# warning "unimplemented cycle counter"
+ return 0;
+}
+
+/*
+ * A "short pause" - called from the Cilk runtime's spinloops.
+ */
+COMMON_SYSDEP void __cilkrts_short_pause(void)
+{
+# warning __cilkrts_short_pause empty
+}
+
+/*
+ * Interlocked exchange - used to implement the Cilk runtime's spinloops
+ */
+COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x)
+{
+ x = __sync_lock_test_and_set(ptr, x);
+ return x;
+}
+
+
+/*
+ * Restore the floating point state that is stored in a stack frame at each
+ * spawn. This should be called each time a frame is resumed.
+ *
+ * Only valid for IA32 and Intel64 processors.
+ */
+void restore_x86_fp_state (__cilkrts_stack_frame *sf)
+{
+}
+
+
+/*
+ * Save the floating point state to the __cilkrts_stack_frame at each spawn.
+ *
+ * Architecture-specific - Should only be needed on IA32 and Intel64
+ * processors.
+ */
+void sysdep_save_fp_ctrl_state(__cilkrts_stack_frame *sf)
+{
+}
+
diff --git a/gcc-4.9/libcilkrts/runtime/config/x86/cilk-abi-vla.c b/gcc-4.9/libcilkrts/runtime/config/x86/cilk-abi-vla.c
new file mode 100644
index 000000000..38c2630a1
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/config/x86/cilk-abi-vla.c
@@ -0,0 +1,441 @@
+/* cilk-abi-vla.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+/*
+ * Implementation of Variable Length Array (VLA) ABI.
+ *
+ * __cilkrts_stack_alloc() and __cilkrts_stack_free must be compiled
+ * such that ebp/rbp is used for the stack frames. This is done by having
+ * each of them use alloca, which forces the special frame types needed on
+ * each of the ABIs. Additionally, for some forms of stack frame, special
+ * care must be taken because the alloca space may not be at the bottom of the
+ * stack frame of the caller. For Intel64 windows, and for some options
+ * with other ABIs, a preallocated parameter block may exist on the stack
+ * at a lower address than the alloca. If this is the case, the parameter
+ * distance_from_sp_to_alloca_area will be non-zero, and will indicate how
+ * much pre-allocated parameter space resides in the caller's stack frame
+ * between the alloca area, and the bottom of the stack when the call to
+ * the cilkrts is made. As such, when non-zero it also includes any space
+ * used for passing the cilkrts_stack_alloc or cilkrts_stack_free parameters.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+// Getting a definition for alloca appears to be a pain in the butt. Here's
+// a variant on what's recommended in the autoconf doc
+#if defined _MSC_VER
+# include <malloc.h>
+# define alloca _alloca
+#elif defined HAVE_ALLOCA_H
+# include <alloca.h>
+#elif defined __GNUC__
+# define alloca __builtin_alloca
+#elif defined _AIX
+# define alloca __alloca
+#else
+# include <stddef.h>
+# ifdef __cplusplus
+extern "C"
+# endif
+void *alloca (size_t);
+#endif
+
+#ifdef _WIN32
+# define INLINE static __inline
+# pragma warning(disable:1025) // Don't whine about zero extending result of unary operation
+#else
+# define INLINE static inline
+#endif
+
+
+#include "internal/abi.h"
+#include "cilk-abi-vla-internal.h"
+
+#if defined(__x86_64) || defined(_M_X64)
+INLINE void setsp(void *val)
+{
+ __asm__("movq %0, %%rsp" : : "r"(val): "rsp");
+}
+INLINE char* getsp(void)
+{
+ void *res;
+
+ __asm__("movq %%rsp, %0" : "=r"(res): : "rsp");
+ return res;
+}
+INLINE char* getbp(void)
+{
+ void *res;
+
+ __asm__("movq %%rbp, %0" : "=r"(res): : "rbp");
+ return res;
+}
+INLINE void copy_frame_down_and_move_bp(
+ char *dst,
+ char *src,
+ size_t cpy_bytes,
+ char *new_ebp
+)
+{
+ // In this version, dst is guaranteed to be lower address than src,
+ // therefore copying upwards from src into dst is safe in case
+ // there is overlap. The number of bytes is also guaranteed to be
+ // a multiple of 8, and the copy is done in 64 bit word chunks for
+ // best efficiency.
+ __asm__(
+ "movq %0, %%rdi;"
+ "movq %1, %%rsi;"
+ "movq %2, %%rcx;"
+ "shrq $3, %%rcx;"
+ "rep movsq;"
+ "movq %3, %%rbp" :
+ :
+ "rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) :
+ "rsi", "rdi", "rcx", "rbp", "memory");
+}
+INLINE void copy_frame_up_and_move_bp(
+ char *dst,
+ char *src,
+ size_t cpy_bytes,
+ char *new_ebp
+)
+{
+ // In this version, dst is guaranteed to be higher address than src,
+ // therefore copying downwards from src into dst is safe in case
+ // there is overlap. The number of bytes is also guaranteed to be
+ // a multiple of 8, and the copy is done in 64 bit word chunks for
+ // best efficiency.
+ dst += cpy_bytes - 8;
+ src += cpy_bytes - 8;
+ __asm__(
+ "movq %0, %%rdi;"
+ "movq %1, %%rsi;"
+ "movq %2, %%rcx;"
+ "shrq $3, %%rcx;"
+ "std; rep movsq; cld;"
+ "movl %3, %%rbp;" :
+ :
+ "rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) :
+ "rsi", "rdi", "rcx", "rbp", "memory");
+}
+#else
+INLINE void setsp(void *val)
+{
+ __asm__("movl %0, %%esp" : : "r"(val): "esp");
+}
+INLINE char* getsp(void)
+{
+ void *res;
+
+ __asm__("movl %%esp, %0" : "=r"(res): : "esp");
+ return res;
+}
+INLINE char* getbp(void)
+{
+ void *res;
+
+ __asm__("movl %%ebp, %0" : "=r"(res): : "ebp");
+ return res;
+}
+INLINE void copy_frame_down_and_move_bp(
+ char *dst,
+ char *src,
+ size_t cpy_bytes,
+ char *new_ebp
+)
+{
+ // In this version, dst is guaranteed to be lower address than src,
+ // therefore copying upwards from src into dst is safe in case
+ // there is overlap. The number of bytes is also guaranteed to be
+ // a multiple of 4, and the copy is done in 32 bit word chunks for
+ // best efficiency.
+ __asm__(
+ "movl %0, %%edi;"
+ "movl %1, %%esi;"
+ "movl %2, %%ecx;"
+ "shrl $2, %%ecx;"
+ "rep movsd;"
+ "movl %3, %%ebp" :
+ :
+ "rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) :
+ "esi", "edi", "ecx", "ebp", "memory");
+}
+INLINE void copy_frame_up_and_move_bp(
+ char *dst,
+ char *src,
+ size_t cpy_bytes,
+ char *new_ebp
+)
+{
+ // In this version, dst is guaranteed to be higher address than src,
+ // therefore copying downwards from src into dst is safe in case
+ // there is overlap. The number of bytes is also guaranteed to be
+ // a multiple of 4, and the copy is done in 32 bit word chunks for
+ // best efficiency.
+ dst += cpy_bytes - 4;
+ src += cpy_bytes - 4;
+ __asm__(
+ "movl %0, %%edi;"
+ "movl %1, %%esi;"
+ "movl %2, %%ecx;"
+ "shrl $2, %%ecx;"
+ "std; rep movsd; cld;"
+ "movl %3, %%ebp" :
+ // "=D"(dst), "=S"(src), "=C"(cpy_bytes) :
+ :
+ "rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) :
+ "esi", "edi", "ecx", "ebp", "memory");
+}
+#endif
+
+
+#define c_cilk_ptr_from_heap 0xc2f2f00d
+#define c_cilk_ptr_from_stack 0xc3f30d0f
+
+CILK_ABI(__cilkrts_void_ptr)
+__cilkrts_stack_alloc(
+ __cilkrts_stack_frame *sf,
+ size_t size,
+ size_t distance_from_sp_to_alloca_area,
+ uint32_t align, // align is always >= minimum stack alignment and
+ // >= ptr_size as well, and must be a power of 2.
+ uint32_t needs_tag // non-zero if the pointer being returned needs to
+ // be tagged
+)
+{
+#ifdef __INTEL_COMPILER
+ // full_size will be a multiple of align, and contains
+ // enough extra space to allocate a marker.
+ size_t full_size = (size + align - 1) & ~(align - 1);
+
+ if (needs_tag) {
+ full_size += align;
+ }
+
+ char *t;
+ if (sf->worker != 0 &&
+ ((sf->flags & CILK_FRAME_UNSYNCHED) != 0)) {
+ t = vla_internal_heap_alloc(sf, full_size, align);
+ if (needs_tag) {
+ t += align;
+ ((uint32_t*)t)[-1] = c_cilk_ptr_from_heap;
+ }
+ return (void *)t;
+ }
+
+ // stack is still synced, allocate full_size from esp,
+ // and record in 32 bits immediately below the space
+ // allocated that this was space that this was
+ // allocated in the stack.
+ char *old_ebp = getbp();
+ char *old_esp = getsp();
+
+ // make top_ptr point to base of first parameter.
+ char *top_ptr = ((char *)(_AddressOfReturnAddress()) +
+ sizeof(char *));
+ size_t param_size = 0;
+
+#if defined(__x86_64)
+ // For Intel64 linux & MACH ABI, all the parameters were passed in
+ // register, so top of the stack frame above the return address
+ // is just the size of the return address plus
+ // distance_from_sp_to_alloca_area on the chance that the alloca
+ // area isn't at the very bottom of the calling functions stack.
+#elif defined(__MACH__)
+ // For ia32 MACH, parameter size is always a mutliple of 16
+ // bytes to keep the stack 16 byte aligned. So we need to round
+ // number of parameters up to multiple of 4.
+ param_size = 8 * sizeof(char *);
+#else
+ // For both windows Intel64 ABI, and the IA32 windows and
+ // linux ABIs, space is reserved on the stack for all these
+ // parameters. param_size is 5 * size of a stack slot.
+ param_size = 5 * sizeof(char *);
+#endif
+
+ // now make top_ptr point above the params, or if
+ // distance_from_sp_to_alloca_area is not zero, make
+ // it point above that area. When non-zero,
+ // distance_from_sp_to_alloca area is expected to contain
+ // the parameter space, so we only add one or the other,
+ // not both.
+ top_ptr += (distance_from_sp_to_alloca_area != 0) ?
+ distance_from_sp_to_alloca_area : param_size;
+
+ // t needs to end up at current value of top_ptr less full_size and less
+ // distance_from_sp_to_alloca_area and
+ // then rounded down to the alignment needed. Then we have to bump
+ // esp down by current frame_size, so that when all is done with respect
+ // to executing the return sequence, the final value of esp will be the
+ // same value as t.
+ t = (top_ptr - full_size) - distance_from_sp_to_alloca_area;
+ intptr_t temp = (intptr_t)t;
+ temp &= ~((intptr_t)(align - 1));
+ t = (char *)temp;
+
+ // ok, the value of t is set where we need it. Now set esp
+ // to the value of t less the current frame size.
+ // So now when we do regular return esp should be left such
+ // that it has moved down by full_size.
+ size_t cur_fm_size = (top_ptr - old_esp);
+ char *new_esp = t - cur_fm_size;
+ char *new_ebp = old_ebp - (old_esp - new_esp);
+
+ // extend the stack down by at least the difference between where
+ // I want it to be and where it currently is. This should take care
+ // of touching any pages necessary.
+ char *foo = alloca(old_esp - new_esp);
+ setsp(foo < new_esp ? foo : new_esp);
+
+ // Now set esp exactly where I want it.
+ // setsp(new_esp);
+
+ copy_frame_down_and_move_bp(new_esp, old_esp, cur_fm_size, new_ebp);
+
+ if (needs_tag) {
+ t += align;
+ ((uint32_t*)t)[-1] = c_cilk_ptr_from_stack;
+ }
+
+ return t;
+#else // Not __INTEL_COMPILER
+ // Not supported unless we can figure out how to get the size of the frame
+ return NULL;
+#endif
+}
+
+// This frees the space allocated for a variable length array.
+CILK_ABI(void)
+__cilkrts_stack_free(
+ __cilkrts_stack_frame *sf,
+ void *p,
+ size_t size,
+ size_t distance_from_sp_to_alloca_area,
+ uint32_t align, // same requirements as for align in allocation,
+ // and must match alignment that was passed when
+ // doing the allocation
+ uint32_t known_from_stack // non-zero if this is known to be allocated
+ // on the stack, and therefore has no tag
+)
+{
+#ifdef __INTEL_COMPILER
+ uint32_t *t = (uint32_t*)p;
+
+ // full_size will be a multiple of align, and contains
+ // enough extra space to allocate a marker if one was needed.
+ size_t full_size = (size + align - 1) & ~(align - 1);
+ if (known_from_stack == 0) {
+ // if the compiler hasn't told the run-time that this is
+ // known to be on the stack, then this pointer must have been
+ // tagged such that the run-time can tell.
+ assert(t[-1] == c_cilk_ptr_from_stack ||
+ t[-1] == c_cilk_ptr_from_heap);
+
+ known_from_stack = t[-1] == c_cilk_ptr_from_stack;
+ full_size += align; // accounts for extra space for marker
+ t = (uint32_t *)(((char *)t) - align);
+ }
+
+ if (known_from_stack) {
+ // alloca useage forces an ebp/rbp based stack frame even though
+ // 0 and unused.
+ char *foo = alloca(0);
+ if (sf->worker == 0 || (sf->flags & CILK_FRAME_UNSYNCHED) == 0) {
+ // p was allocated from current stack frame and we
+ // are synced on current stack frame. Return the
+ // amount of the stack that needs to be freed.
+ char *old_ebp = getbp();
+ char *old_esp = getsp();
+
+ // make top_ptr point to base of first parameter.
+ char *top_ptr = ((char *)(_AddressOfReturnAddress()) +
+ sizeof(char *));
+ size_t param_size = 0;
+
+#if defined(__x86_64)
+ // For Intel64 linux & MACH ABI, all the parameters were passed in
+ // register, so top of the stack frame above the return address
+ // is just the size of the return address plus
+ // distance_from_sp_to_alloca_area on the chance that the alloca
+ // area isn't at the very bottom of the calling functions stack.
+#elif defined(__MACH__)
+ // For ia32 MACH, parameter size is always a mutliple of 16
+ // bytes to keep the stack 16 byte aligned. So we need to round
+ // number of parameters up to multiple of 4.
+ param_size = 8 * sizeof(char *);
+#else
+ // For both windows Intel64 ABI, and the IA32 windows and
+ // linux ABIs, space is reserved on the stack for all these
+ // parameters. param_size is 5 * size of a stack slot.
+ param_size = 6 * sizeof(char *);
+#endif
+
+ // now make top_ptr point above the params, or if
+ // distance_from_sp_to_alloca_area is not zero, make
+ // it point above that area. When non-zero,
+ // distance_from_sp_to_alloca area is expected to contain
+ // the parameter space, so we only add one or the other,
+ // not both.
+ top_ptr += (distance_from_sp_to_alloca_area != 0) ?
+ distance_from_sp_to_alloca_area : param_size;
+
+ size_t cur_fm_size = (top_ptr - old_esp);
+ char *new_esp = old_esp + full_size;
+ char *new_ebp = old_ebp + full_size;
+
+ copy_frame_up_and_move_bp(new_esp, old_esp, cur_fm_size, new_ebp);
+ setsp(new_esp);
+ }
+ else {
+ // p was allocated on stack frame, but that is
+ // no longer the current stack frame. Need to adjust the
+ // saved esp that is somewhere in the cilk runtime so that
+ // on sync, esp will be cut back correctly.
+ vla_free_from_original_stack(sf, full_size);
+ }
+ }
+ else {
+ vla_internal_heap_free(t, full_size);
+ }
+#else // Not __INTEL_COMPILER
+ // Not supported unless we can figure out how to get the size of the frame
+#endif
+}
diff --git a/gcc-4.9/libcilkrts/runtime/config/x86/os-fence.h b/gcc-4.9/libcilkrts/runtime/config/x86/os-fence.h
new file mode 100644
index 000000000..ec704e94e
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/config/x86/os-fence.h
@@ -0,0 +1,72 @@
+/* os.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/* gcc before 4.4 does not implement __sync_synchronize properly */
+#if (__ICC >= 1110 && !(__MIC__ || __MIC2__)) \
+ || (!defined __ICC && __GNUC__ * 10 + __GNUC_MINOR__ > 43)
+# define HAVE_SYNC_INTRINSICS 1
+#endif
+
+
+/*
+ * void __cilkrts_fence(void)
+ *
+ * Executes an MFENCE instruction to serialize all load and store instructions
+ * that were issued prior the MFENCE instruction. This serializing operation
+ * guarantees that every load and store instruction that precedes the MFENCE
+ * instruction is globally visible before any load or store instruction that
+ * follows the MFENCE instruction. The MFENCE instruction is ordered with
+ * respect to all load and store instructions, other MFENCE instructions, any
+ * SFENCE and LFENCE instructions, and any serializing instructions (such as
+ * the CPUID instruction).
+ */
+#ifdef HAVE_SYNC_INTRINSICS
+# define __cilkrts_fence() __sync_synchronize()
+#elif defined __ICC || defined __GNUC__
+ /* mfence is a strict subset of lock add but takes longer on many
+ * processors. */
+// # define __cilkrts_fence() __asm__ volatile ("mfence")
+ /* On MIC, fence seems to be completely unnecessary.
+ * Just for simplicity of 1st implementation, it defaults to x86 */
+# define __cilkrts_fence() __asm__ volatile ("lock addl $0,(%rsp)")
+// #elif defined _WIN32
+// # pragma intrinsic(_ReadWriteBarrier)
+// # define __cilkrts_fence() _ReadWriteBarrier()
+#else
+COMMON_SYSDEP void __cilkrts_fence(void); ///< MFENCE instruction
+#endif
diff --git a/gcc-4.9/libcilkrts/runtime/config/x86/os-unix-sysdep.c b/gcc-4.9/libcilkrts/runtime/config/x86/os-unix-sysdep.c
new file mode 100644
index 000000000..b505ddfb9
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/config/x86/os-unix-sysdep.c
@@ -0,0 +1,142 @@
+/* os-unix-sysdep.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * Copyright (C) 2009-2014, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *************************************************************************
+ *
+ * This file contains system-specific code for Unix systems
+ */
+
+#include "os.h"
+#include "sysdep.h"
+#include <internal/abi.h>
+
+// On x86 processors (but not MIC processors), the compiler generated code to
+// save the FP state (rounding mode and the like) before calling setjmp. We
+// will need to restore that state when we resume.
+#ifndef __MIC__
+# if defined(__i386__) || defined(__x86_64)
+# define RESTORE_X86_FP_STATE
+# endif // defined(__i386__) || defined(__x86_64)
+#endif // __MIC__
+
+/* timer support */
+COMMON_SYSDEP unsigned long long __cilkrts_getticks(void)
+{
+#if defined __i386__ || defined __x86_64
+ unsigned a, d;
+ __asm__ volatile("rdtsc" : "=a" (a), "=d" (d));
+ return ((unsigned long long)a) | (((unsigned long long)d) << 32);
+#else
+# warning "unimplemented cycle counter"
+ return 0;
+#endif
+}
+
+COMMON_SYSDEP void __cilkrts_short_pause(void)
+{
+#if __ICC >= 1110
+# if __MIC__ || __MIC2__
+ _mm_delay_32(16); // stall for 16 cycles
+# else
+ _mm_pause();
+# endif
+#elif defined __i386__ || defined __x86_64
+ __asm__("pause");
+#else
+# warning __cilkrts_short_pause empty
+#endif
+}
+
+COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x)
+{
+#if defined __i386__ || defined __x86_64
+ /* asm statement here works around icc bugs */
+ __asm__("xchgl %0,%a1" :"=r" (x) : "r" (ptr), "0" (x) :"memory");
+#else
+ x = __sync_lock_test_and_set(ptr, x);
+#endif
+ return x;
+}
+
+/*
+ * The Intel compiler distribution assumes newer CPUs and doesn't yet support
+ * the __builtin_cpu_supports intrinsic added by GCC 4.8, so just return 1 in
+ * that environment.
+ *
+ * This declaration should generate an error when the Intel compiler adds
+ * supprt for the intrinsic.
+ */
+#ifdef __INTEL_COMPILER
+static inline int __builtin_cpu_supports(const char *feature)
+{
+ return 1;
+}
+#endif
+
+/*
+ * Restore the floating point state that is stored in a stack frame at each
+ * spawn. This should be called each time a frame is resumed.
+ *
+ * Only valid for IA32 and Intel64 processors.
+ */
+void restore_x86_fp_state (__cilkrts_stack_frame *sf) {
+#ifdef RESTORE_X86_FP_STATE
+ if (__builtin_cpu_supports("sse"))
+ {
+ __asm__ ("ldmxcsr %0"
+ :
+ : "m" (sf->mxcsr));
+ }
+ __asm__ ("fnclex\n\t"
+ "fldcw %0"
+ :
+ : "m" (sf->fpcsr));
+#endif
+}
+
+
+void sysdep_save_fp_ctrl_state(__cilkrts_stack_frame *sf)
+{
+// If we're not going to restore, don't bother saving it
+#ifdef RESTORE_X86_FP_STATE
+ if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1)
+ {
+ if (__builtin_cpu_supports("sse"))
+ {
+ __asm__ ("stmxcsr %0" : "=m" (sf->mxcsr));
+ }
+ __asm__ ("fnstsw %0" : "=m" (sf->fpcsr));
+ }
+#endif
+}
+
diff --git a/gcc-4.9/libcilkrts/runtime/doxygen-layout.xml b/gcc-4.9/libcilkrts/runtime/doxygen-layout.xml
new file mode 100644
index 000000000..fabe0ab3c
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/doxygen-layout.xml
@@ -0,0 +1,222 @@
+<doxygenlayout version="1.0">
+
+<!--
+# @copyright
+# Copyright (C) 2011-2013, Intel Corporation
+# All rights reserved.
+#
+# @copyright
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# @copyright
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+-->
+
+ <!-- Navigation index tabs for HTML output -->
+ <navindex>
+ <tab type="mainpage" visible="yes" title=""/>
+ <tab type="pages" visible="yes" title="" intro=""/>
+ <tab type="modules" visible="yes" title="" intro=""/>
+ <tab type="namespaces" visible="yes" title="">
+ <tab type="namespaces" visible="yes" title="" intro=""/>
+ <tab type="namespacemembers" visible="yes" title="" intro=""/>
+ </tab>
+ <tab type="classes" visible="yes" title="Classes, Structs and Unions">
+ <tab type="classes" visible="yes" title="Classes, Structs and Unions" intro=""/>
+ <tab type="classindex" visible="$ALPHABETICAL_INDEX" title=""/>
+ <tab type="hierarchy" visible="yes" title="" intro=""/>
+ <tab type="classmembers" visible="yes" title="" intro=""/>
+ </tab>
+ <tab type="files" visible="yes" title="">
+ <tab type="files" visible="yes" title="" intro=""/>
+ <tab type="globals" visible="yes" title="" intro=""/>
+ </tab>
+ <tab type="globals" visible="yes" title="Global Functions" intro=""/>
+ <tab type="dirs" visible="yes" title="" intro=""/>
+ <tab type="examples" visible="yes" title="" intro=""/>
+ </navindex>
+
+ <!-- Layout definition for a class page -->
+ <class>
+ <briefdescription visible="yes"/>
+ <includes visible="$SHOW_INCLUDE_FILES"/>
+ <inheritancegraph visible="$CLASS_GRAPH"/>
+ <collaborationgraph visible="$COLLABORATION_GRAPH"/>
+ <allmemberslink visible="yes"/>
+ <memberdecl>
+ <nestedclasses visible="yes" title=""/>
+ <publictypes title=""/>
+ <publicslots title=""/>
+ <signals title=""/>
+ <publicmethods title=""/>
+ <publicstaticmethods title=""/>
+ <publicattributes title=""/>
+ <publicstaticattributes title=""/>
+ <protectedtypes title=""/>
+ <protectedslots title=""/>
+ <protectedmethods title=""/>
+ <protectedstaticmethods title=""/>
+ <protectedattributes title=""/>
+ <protectedstaticattributes title=""/>
+ <packagetypes title=""/>
+ <packagemethods title=""/>
+ <packagestaticmethods title=""/>
+ <packageattributes title=""/>
+ <packagestaticattributes title=""/>
+ <properties title=""/>
+ <events title=""/>
+ <privatetypes title=""/>
+ <privateslots title=""/>
+ <privatemethods title=""/>
+ <privatestaticmethods title=""/>
+ <privateattributes title=""/>
+ <privatestaticattributes title=""/>
+ <friends title=""/>
+ <related title="" subtitle=""/>
+ <membergroups visible="yes"/>
+ </memberdecl>
+ <detaileddescription title=""/>
+ <memberdef>
+ <typedefs title=""/>
+ <enums title=""/>
+ <constructors title=""/>
+ <functions title=""/>
+ <related title=""/>
+ <variables title=""/>
+ <properties title=""/>
+ <events title=""/>
+ </memberdef>
+ <usedfiles visible="$SHOW_USED_FILES"/>
+ <authorsection visible="yes"/>
+ </class>
+
+ <!-- Layout definition for a namespace page -->
+ <namespace>
+ <briefdescription visible="yes"/>
+ <memberdecl>
+ <nestednamespaces visible="yes" title=""/>
+ <classes visible="yes" title=""/>
+ <typedefs title=""/>
+ <enums title=""/>
+ <functions title=""/>
+ <variables title=""/>
+ <membergroups visible="yes"/>
+ </memberdecl>
+ <detaileddescription title=""/>
+ <memberdef>
+ <typedefs title=""/>
+ <enums title=""/>
+ <functions title=""/>
+ <variables title=""/>
+ </memberdef>
+ <authorsection visible="yes"/>
+ </namespace>
+
+ <!-- Layout definition for a file page -->
+ <file>
+ <briefdescription visible="no"/>
+ <includegraph visible="$INCLUDE_GRAPH"/>
+ <includedbygraph visible="$INCLUDED_BY_GRAPH"/>
+ <detaileddescription title="Description"/>
+ <includes visible="no"/>
+ <sourcelink visible="yes"/>
+ <memberdecl>
+ <classes visible="yes" title="Structures and Classes"/>
+ <namespaces visible="yes" title=""/>
+ <defines title=""/>
+ <typedefs title=""/>
+ <enums title=""/>
+ <functions title=""/>
+ <variables title=""/>
+ <membergroups visible="yes"/>
+ </memberdecl>
+ <memberdef>
+ <defines title=""/>
+ <typedefs title=""/>
+ <enums title=""/>
+ <functions title=""/>
+ <variables title=""/>
+ </memberdef>
+ <authorsection/>
+ </file>
+
+ <!-- Layout definition for a group page -->
+ <group>
+ <briefdescription visible="yes"/>
+ <groupgraph visible="$GROUP_GRAPHS"/>
+ <memberdecl>
+ <classes visible="yes" title=""/>
+ <namespaces visible="yes" title=""/>
+ <dirs visible="yes" title=""/>
+ <nestedgroups visible="yes" title=""/>
+ <files visible="yes" title=""/>
+ <defines title=""/>
+ <typedefs title=""/>
+ <enums title=""/>
+ <enumvalues title=""/>
+ <functions title=""/>
+ <variables title=""/>
+ <signals title=""/>
+ <publicslots title=""/>
+ <protectedslots title=""/>
+ <privateslots title=""/>
+ <events title=""/>
+ <properties title=""/>
+ <friends title=""/>
+ <membergroups visible="yes"/>
+ </memberdecl>
+ <detaileddescription title=""/>
+ <memberdef>
+ <pagedocs/>
+ <inlineclasses title=""/>
+ <defines title=""/>
+ <typedefs title=""/>
+ <enums title=""/>
+ <enumvalues title=""/>
+ <functions title=""/>
+ <variables title=""/>
+ <signals title=""/>
+ <publicslots title=""/>
+ <protectedslots title=""/>
+ <privateslots title=""/>
+ <events title=""/>
+ <properties title=""/>
+ <friends title=""/>
+ </memberdef>
+ <authorsection visible="yes"/>
+ </group>
+
+ <!-- Layout definition for a directory page -->
+ <directory>
+ <briefdescription visible="yes"/>
+ <directorygraph visible="yes"/>
+ <memberdecl>
+ <dirs visible="yes"/>
+ <files visible="yes"/>
+ </memberdecl>
+ <detaileddescription title=""/>
+ </directory>
+</doxygenlayout>
diff --git a/gcc-4.9/libcilkrts/runtime/doxygen.cfg b/gcc-4.9/libcilkrts/runtime/doxygen.cfg
new file mode 100644
index 000000000..684dcb51b
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/doxygen.cfg
@@ -0,0 +1,1774 @@
+# Doxyfile 1.7.4
+
+# @copyright
+# Copyright (C) 2011-2013, Intel Corporation
+# All rights reserved.
+#
+# @copyright
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# @copyright
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME = "Intel Cilk Plus Runtime"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER =
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer
+# a quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF =
+
+# With the PROJECT_LOGO tag one can specify an logo or icon that is
+# included in the documentation. The maximum height of the logo should not
+# exceed 55 pixels and the maximum width should not exceed 200 pixels.
+# Doxygen will copy the logo to the output directory.
+
+PROJECT_LOGO =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY =
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful if your file system
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this
+# tag. The format is ext=language, where ext is a file extension, and language
+# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C,
+# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make
+# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
+# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions
+# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
+
+EXTENSION_MAPPING =
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also makes the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
+# unions are shown inside the group in which they are included (e.g. using
+# @ingroup) instead of on a separate page (for HTML and Man pages) or
+# section (for LaTeX and RTF).
+
+INLINE_GROUPED_CLASSES = NO
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penalty.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will roughly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols
+
+SYMBOL_CACHE_SIZE = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespaces are hidden.
+
+EXTRACT_ANON_NSPACES = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
+# rather than with sharp brackets.
+
+FORCE_LOCAL_INCLUDES = NO
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES = YES
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
+# do proper type resolution of all parameters of a function it will reject a
+# match between the prototype and the implementation of a member function even
+# if there is only one candidate or it is obvious which candidate to choose
+# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
+# will still accept a match between prototype and implementation in such cases.
+
+STRICT_PROTO_MATCHING = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or macro consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and macros in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES = NO
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. The create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option.
+# You can optionally specify a file name after the option, if omitted
+# DoxygenLayout.xml will be used as the name of the layout file.
+
+LAYOUT_FILE = doxygen-layout.xml
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR = YES
+
+# The WARN_NO_PARAMDOC option can be enabled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = ./ \
+ ../include/internal/abi.h \
+ ../include/cilk/cilk_api.h \
+ ../include/cilk/common.h \
+ ./readme.dox
+
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
+# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
+# *.f90 *.f *.for *.vhd *.vhdl
+
+FILE_PATTERNS =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE = attributes.h \
+ cilk-ittnotify.h \
+ component.h \
+ rts-common.h \
+ windows-clean.h
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS = _UNWIND_INFO \
+ _UNWIND_CODE \
+ _DISPATCHER_CONTEXT \
+ __cilkrts_stack \
+ pending_exception_info
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty or if
+# non of the patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
+# and it is also possible to disable source filtering for a specific pattern
+# using *.ext= (so without naming a filter). This option only has effect when
+# FILTER_SOURCE_FILES is enabled.
+
+FILTER_SOURCE_PATTERNS =
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.
+# Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX = YES
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header. Note that when using a custom header you are responsible
+# for the proper inclusion of any scripts and style sheets that doxygen
+# needs, which is dependent on the configuration options used.
+# It is adviced to generate a default header using "doxygen -w html
+# header.html footer.html stylesheet.css YourConfigFile" and then modify
+# that header. Note that the header is subject to change so you typically
+# have to redo this when upgrading to a newer version of doxygen or when changing the value of configuration settings such as GENERATE_TREEVIEW!
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that
+# the files will be copied as-is; there are no commands or markers available.
+
+HTML_EXTRA_FILES =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
+# Doxygen will adjust the colors in the stylesheet and background images
+# according to this color. Hue is specified as an angle on a colorwheel,
+# see http://en.wikipedia.org/wiki/Hue for more information.
+# For instance the value 0 represents red, 60 is yellow, 120 is green,
+# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
+# The allowed range is 0 to 359.
+
+HTML_COLORSTYLE_HUE = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
+# the colors in the HTML output. For a value of 0 the output will use
+# grayscales only. A value of 255 will produce the most vivid colors.
+
+HTML_COLORSTYLE_SAT = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
+# the luminance component of the colors in the HTML output. Values below
+# 100 gradually make the output lighter, whereas values above 100 make
+# the output darker. The value divided by 100 is the actual gamma applied,
+# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
+# and 100 does not change the gamma.
+
+HTML_COLORSTYLE_GAMMA = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
+# this to NO can help when comparing the output of multiple runs.
+
+HTML_TIMESTAMP = YES
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS = NO
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+
+GENERATE_DOCSET = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID = com.Intel.CilkPlusRuntime
+
+# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+
+DOCSET_PUBLISHER_ID = com.Intel.CilkPlusRuntime
+
+# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
+
+DOCSET_PUBLISHER_NAME = "Intel Corporation"
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
+# that can be used as input for Qt's qhelpgenerator to generate a
+# Qt Compressed Help (.qch) of the generated HTML documentation.
+
+GENERATE_QHP = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
+# add. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
+# Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
+# Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+# will be generated, which together with the HTML files, form an Eclipse help
+# plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
+# the help appears.
+
+GENERATE_ECLIPSEHELP = NO
+
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
+# this name.
+
+ECLIPSE_DOC_ID = org.doxygen.Project
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
+# (range [0,1..20]) that doxygen will group on one line in the generated HTML
+# documentation. Note that a value of 0 will completely suppress the enum
+# values from appearing in the overview section.
+
+ENUM_VALUES_PER_LINE = 4
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW = NO
+
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+
+USE_INLINE_TREES = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH = 250
+
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
+# links to external symbols imported via tag files in a separate window.
+
+EXT_LINKS_IN_WINDOW = NO
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are
+# not supported properly for IE 6.0, but are supported on all modern browsers.
+# Note that when changing this option you need to delete any form_*.png files
+# in the HTML output before the changes have effect.
+
+FORMULA_TRANSPARENT = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
+# (see http://www.mathjax.org) which uses client side Javascript for the
+# rendering instead of using prerendered bitmaps. Use this if you do not
+# have LaTeX installed or if you want to formulas look prettier in the HTML
+# output. When enabled you also need to install MathJax separately and
+# configure the path to it using the MATHJAX_RELPATH option.
+
+USE_MATHJAX = NO
+
+# When MathJax is enabled you need to specify the location relative to the
+# HTML output directory using the MATHJAX_RELPATH option. The destination
+# directory should contain the MathJax.js script. For instance, if the mathjax
+# directory is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the
+# mathjax.org site, so you can quickly see the result without installing
+# MathJax, but it is strongly recommended to install a local copy of MathJax
+# before deployment.
+
+MATHJAX_RELPATH = http://www.mathjax.org/mathjax
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
+# (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+
+SEARCHENGINE = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a PHP enabled web server instead of at the web client
+# using Javascript. Doxygen will generate the search PHP script and index
+# file to put on the web server. The advantage of the server
+# based approach is that it scales better to large projects and allows
+# full text search. The disadvantages are that it is more difficult to setup
+# and does not have live searching capabilities.
+
+SERVER_BASED_SEARCH = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
+# Makefile that is written to the output directory.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE = a4
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
+# the generated latex document. The footer should contain everything after
+# the last chapter. If it is left blank doxygen will generate a
+# standard footer. Notice: only use this tag if you know what you are doing!
+
+LATEX_FOOTER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
+# such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = YES
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF = YES
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# pointed to by INCLUDE_PATH will be searched when a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED = _WIN32 \
+ COMMON_SYSDEP= \
+ COMMON_PORTABLE= \
+ NON_COMMON= \
+ __CILKRTS_BEGIN_EXTERN_C= \
+ __CILKRTS_END_EXTERN_C= \
+ CILK_API(t)=t \
+ CILK_ABI(t)=t \
+ CILK_ABI_THROWS(t)=t \
+ CALLBACK= \
+ __CILKRTS_INLINE=inline \
+ __CILKRTS_ABI_VERSION=1 \
+ __cplusplus \
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition that
+# overrules the definition found in the source code.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all references to function-like macros
+# that are alone on a line, have an all uppercase name, and do not end with a
+# semicolon, because these will confuse the parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+#
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option also works with HAVE_DOT disabled, but it is recommended to
+# install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
+# allowed to run in parallel. When set to 0 (the default) doxygen will
+# base this on the number of processors available in the system. You can set it
+# explicitly to a value larger than 0 to get control over the balance
+# between CPU load and processing speed.
+
+DOT_NUM_THREADS = 0
+
+# By default doxygen will write a font called Helvetica to the output
+# directory and reference it in all dot files that doxygen generates.
+# When you want a differently looking font you can specify the font name
+# using DOT_FONTNAME. You need to make sure dot is able to find the font,
+# which can be done by putting it in a standard location or by setting the
+# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
+# containing the font.
+
+DOT_FONTNAME = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE = 10
+
+# By default doxygen will tell dot to use the output directory to look for the
+# FreeSans.ttf font (which doxygen will put there itself). If you specify a
+# different font using DOT_FONTNAME you can set the path where dot
+# can find it using this tag.
+
+DOT_FONTPATH =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will generate a graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are svg, png, jpg, or gif.
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the
+# \mscfile command).
+
+MSCFILE_DIRS =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP = YES
diff --git a/gcc-4.9/libcilkrts/runtime/except-gcc.cpp b/gcc-4.9/libcilkrts/runtime/except-gcc.cpp
new file mode 100644
index 000000000..bd08d1826
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/except-gcc.cpp
@@ -0,0 +1,597 @@
+/* except-gcc.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#include "except-gcc.h"
+#include "except.h"
+#include "sysdep.h"
+#include "bug.h"
+#include "local_state.h"
+#include "full_frame.h"
+#include "scheduler.h"
+#include "frame_malloc.h"
+#include "pedigrees.h"
+
+#include <stdint.h>
+#include <typeinfo>
+
+#define DEBUG_EXCEPTIONS 0
+
+struct pending_exception_info
+{
+ void make(__cxa_eh_globals *, _Unwind_Exception *, bool);
+ void destruct();
+ bool empty() const;
+ void check() const;
+ /* Active exception at time of suspend. */
+ _Unwind_Exception *active;
+ /* If true the most recently caught exception is to be rethrown
+ on resume. This handling is technically incorrect but allows
+ running without compiler support; the proper standards-compliant
+ method is to save the exception in the previous field. */
+ bool rethrow;
+ struct __cxa_eh_globals runtime_state;
+};
+
+void pending_exception_info::check() const
+{
+ if (active)
+ CILK_ASSERT((int)runtime_state.uncaughtExceptions > 0);
+}
+
+void pending_exception_info::make(__cxa_eh_globals *state_in,
+ _Unwind_Exception *exc_in, bool rethrow_in)
+{
+ active = exc_in;
+ rethrow = rethrow_in;
+ runtime_state = *state_in;
+ /* Read and clear C++ runtime state. */
+ state_in->caughtExceptions = 0;
+ state_in->uncaughtExceptions = 0;
+#if CILK_LIB_DEBUG
+ check();
+#endif
+}
+
+bool
+pending_exception_info::empty() const
+{
+ return !active && !rethrow && !runtime_state.caughtExceptions &&
+ !runtime_state.uncaughtExceptions;
+}
+
+#if DEBUG_EXCEPTIONS
+#include <stdio.h>
+static void
+decode_exceptions(char *out, size_t len, struct pending_exception_info *info)
+{
+ if (info->empty())
+ snprintf(out, len, "[empty]");
+ else if (info->rethrow)
+ snprintf(out, len, "[rethrow %p]",
+ info->runtime_state.caughtExceptions);
+ else
+ snprintf(out, len, "[throw %p]", (void *)info->active);
+}
+#endif
+
+static void
+save_exception_info(__cilkrts_worker *w,
+ __cxa_eh_globals *state,
+ _Unwind_Exception *exc,
+ bool rethrow,
+ const char *why)
+{
+ struct pending_exception_info *info =
+ (struct pending_exception_info *)__cilkrts_frame_malloc(w, sizeof (struct pending_exception_info));
+ CILK_ASSERT(info);
+ info->make(state, exc, rethrow);
+
+#if DEBUG_EXCEPTIONS
+ {
+ char buf[40];
+ decode_exceptions(buf, sizeof buf, info);
+ fprintf(stderr, "make exception info W%u %p %s (%s)\n",
+ w->self, info, buf, why);
+ }
+#endif
+
+ CILK_ASSERT(w->l->pending_exception == 0);
+ w->l->pending_exception = info;
+}
+
+#if DEBUG_EXCEPTIONS
+#include <stdio.h> /* DEBUG */
+
+static void decode_flags(int flags, char out[9])
+{
+ out[0] = (flags & CILK_FRAME_STOLEN) ? 'S' : '_';
+ out[1] = (flags & CILK_FRAME_UNSYNCHED) ? 'U' : '_';
+ out[2] = (flags & CILK_FRAME_DETACHED) ? 'D' : '_';
+ out[3] = (flags & CILK_FRAME_EXCEPTING) ? 'X' : '_';
+ out[4] = '\0';
+}
+#endif
+
+/* __cilkrts_save_except is called from the runtime epilogue
+ when a function is returning with an exception pending.
+
+ If the function has a parent to which it could return normally,
+ return and have the caller call _Unwind_Resume, the same as if
+ an exception filter had not matched.
+
+ Otherwise save the exception in the worker.
+
+ If this is a return from a ordinary call that must go through
+ the runtime, the assembly epilogue must have saved the call-saved
+ register state in the parent frame. */
+
+extern "C"
+CILK_ABI_THROWS_VOID
+__cilkrts_return_exception(__cilkrts_stack_frame *sf)
+{
+ __cilkrts_worker *w = sf->worker;
+ _Unwind_Exception *exc = (_Unwind_Exception *)sf->except_data;
+
+ CILK_ASSERT(sf->flags & CILK_FRAME_DETACHED);
+ sf->flags &= ~CILK_FRAME_DETACHED;
+
+ /*
+ * If we are in replay mode, and a steal occurred during the recording
+ * phase, stall till a steal actually occurs.
+ */
+ replay_wait_for_steal_if_parent_was_stolen(w);
+
+ /* If this is to be an abnormal return, save the active exception. */
+ if (!__cilkrts_pop_tail(w)) {
+ /* Write a record to the replay log for an attempt to return to a
+ stolen parent. This must be done before the exception handler
+ invokes __cilkrts_leave_frame which will bump the pedigree so
+ the replay_wait_for_steal_if_parent_was_stolen() above will match on
+ replay */
+ replay_record_orphaned(w);
+
+ /* Now that the record/replay stuff is done, update the pedigree */
+ update_pedigree_on_leave_frame(w, sf);
+
+ /* Inline pop_frame; this may not be needed. */
+ w->current_stack_frame = sf->call_parent;
+ sf->call_parent = 0;
+ __cxa_eh_globals *state = __cxa_get_globals();
+
+#if DEBUG_EXCEPTIONS
+ fflush(stdout);
+ char decoded[9];
+ decode_flags(sf->flags, decoded);
+ fprintf(stderr, "__cilkrts_save_except W%u sf %p/%s exc %p [%u %p] suspend\n",
+ w->self, sf, decoded, exc,
+ state->uncaughtExceptions,
+ state->caughtExceptions);
+#endif
+
+ /* Like __cilkrts_save_exception_state except for setting the
+ rethrow flag. */
+ save_exception_info(w, state, exc, exc == NULL, "save_except");
+ {
+ full_frame *ff = w->l->frame_ff;
+ CILK_ASSERT(NULL == ff->pending_exception);
+ ff->pending_exception = w->l->pending_exception;
+ w->l->pending_exception = NULL;
+ }
+ __cilkrts_exception_from_spawn(w, sf); /* does not return */
+ }
+ /* This code path is taken when the parent is attached. It is on
+ the same stack and part of the same full frame. The caller is
+ cleaning up the Cilk frame during unwind and will reraise the
+ exception */
+
+ /* Now that the record/replay stuff is done, update the pedigree */
+ update_pedigree_on_leave_frame(w, sf);
+
+#if DEBUG_EXCEPTIONS /* DEBUG ONLY */
+ {
+ __cxa_eh_globals *state = __cxa_get_globals();
+
+ fflush(stdout);
+ char decoded[9];
+ decode_flags(sf->flags, decoded);
+ fprintf(stderr, "__cilkrts_save_except W%d %p/%s %p->%p [%u %p] escape\n",
+ w->self, sf, decoded, exc,
+ exc ? to_cxx(exc)->nextException : 0,
+ state->uncaughtExceptions,
+ state->caughtExceptions);
+
+ /* XXX This is triggering in the user thread which gets an exception
+ from somewhere but does not get the corresponding runtime exception
+ state.
+ XXX There might be two or more uncaught exceptions. Test could be
+ (uncaught != 0) == (exc != 0). First, design tests to see if that
+ case is otherwise handled correctly. And what if there's an uncaught
+ exception that does not belong to this function? I.e. this is a return
+ from spawn in a destructor. */
+ if (exc)
+ CILK_ASSERT((int)state->uncaughtExceptions > 0);
+ /*CILK_ASSERT(state->uncaughtExceptions == (exc != 0));*/
+ }
+#endif
+
+ /* The parent is attached so this exception can be propagated normally. */
+ return;
+}
+
+/* Save the exception state into the full frame, which is exiting
+ or suspending. */
+extern "C"
+void __cilkrts_save_exception_state(__cilkrts_worker *w, full_frame *ff)
+{
+ save_exception_info(w, __cxa_get_globals(), 0, false, "undo-detach");
+ CILK_ASSERT(NULL == ff->pending_exception);
+ ff->pending_exception = w->l->pending_exception;
+ w->l->pending_exception = NULL;
+}
+
+/* __cilkrts_c_sync_except is like __cilkrts_c_sync except that it
+ saves exception state. __cilkrts_c_sync never returns here and
+ always reinstalls the saved exception state.
+
+ This function must be used because a parent of this function may
+ be propagating an uncaught exception. The uncaught exception
+ count must be saved by the child and passed back to the parent. */
+
+extern "C"
+NORETURN __cilkrts_c_sync_except (__cilkrts_worker *w, __cilkrts_stack_frame *sf)
+{
+ __cxa_eh_globals *state = __cxa_get_globals();
+ _Unwind_Exception *exc = (_Unwind_Exception *)sf->except_data;
+
+ CILK_ASSERT((sf->flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING)) ==
+ (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING));
+ sf->flags &= ~CILK_FRAME_EXCEPTING;
+
+#if DEBUG_EXCEPTIONS
+ fflush(stdout);
+ char decoded[9];
+ decode_flags(sf->flags, decoded);
+ if (exc)
+ fprintf(stderr, "__cilkrts_sync_except W%u %p/%s %p->%p [%u %p]\n",
+ w->self, sf, decoded, exc,
+ to_cxx(exc)->nextException,
+ state->uncaughtExceptions,
+ state->caughtExceptions);
+ else
+ fprintf(stderr, "__cilkrts_sync_except W%d %p/%s none [%u %p]\n",
+ w->self, sf, decoded,
+ state->uncaughtExceptions,
+ state->caughtExceptions);
+#endif
+
+ /* Here the identity of an rethrown exception is always known.
+ If exc is NULL this call is only to preserve parent state. */
+ save_exception_info(w, state, exc, false, "sync_except");
+#if 0
+ {
+ full_frame *ff = w->l->frame_ff;
+ CILK_ASSERT(NULL == ff->pending_exception);
+ ff->pending_exception = w->l->pending_exception;
+ w->l->pending_exception = NULL;
+ }
+#endif
+ CILK_ASSERT(!std::uncaught_exception());
+ __cilkrts_c_sync(w, sf);
+}
+
+void
+pending_exception_info::destruct()
+{
+ if (active) {
+#if DEBUG_EXCEPTIONS
+ fprintf(stderr, "destroy exception info %p %p\n", this, active);
+#endif
+ _Unwind_DeleteException(active);
+ active = 0;
+ } else {
+#if DEBUG_EXCEPTIONS
+ fprintf(stderr, "destroy exception info %p\n", this);
+#endif
+ }
+ while (runtime_state.caughtExceptions) {
+ __cxa_exception *exc = runtime_state.caughtExceptions;
+ runtime_state.caughtExceptions = exc->nextException;
+#if DEBUG_EXCEPTIONS
+ fprintf(stderr, "destroy caught exception %p\n", this);
+#endif
+ _Unwind_DeleteException(&exc->unwindHeader);
+ }
+}
+
+/*
+ * __cilkrts_merge_pending_exceptions
+ *
+ * Merge the right exception record into the left. The left is logically
+ * earlier.
+ *
+ * The active exception of E is
+ * E->active if it is non-NULL (in which case E->rethrow is false)
+ * unresolved if E->active is NULL and E->rethrow is true
+ * nil if E->active is NULL and E->rethrow is false
+ *
+ * The merged active exception is left active exception if it is not
+ * nil, otherwise the right.
+ *
+ * On entry the left state is synched and can not have an unresolved
+ * exception. The merge may result in an unresolved exception.
+ *
+ * Due to scoping rules at most one of the caught exception lists is
+ * non-NULL.
+ */
+
+struct pending_exception_info *
+__cilkrts_merge_pending_exceptions (
+ __cilkrts_worker *w,
+ struct pending_exception_info *left,
+ struct pending_exception_info *right)
+{
+ /* If we've only got one exception, return it */
+
+ if (NULL == left) {
+#if DEBUG_EXCEPTIONS
+ if (right) {
+ char buf[40];
+ decode_exceptions(buf, sizeof buf, right);
+ fprintf(stderr, "__cilkrts merge W%u nil %p -> %p %s\n",
+ w->self, right, right, buf);
+ }
+#endif
+ return right;
+ }
+
+ if (NULL == right) {
+#if DEBUG_EXCEPTIONS
+ if (left) {
+ char buf[40];
+ decode_exceptions(buf, sizeof buf, left);
+ fprintf(stderr, "__cilkrts merge W%u %p nil -> %p %s\n",
+ w->self, left, left, buf);
+ }
+#endif
+ return left;
+ }
+
+#if CILK_LIB_DEBUG
+ /*volatile struct pending_exception_info left_in = *left, right_in = *right;*/
+ left->check();
+ right->check();
+#endif
+
+#if DEBUG_EXCEPTIONS
+ {
+ char buf1[40], buf2[40];
+ decode_exceptions(buf1, sizeof buf1, left);
+ decode_exceptions(buf2, sizeof buf2, right);
+ fprintf(stderr, "__cilkrts merge W%u %p %s %p %s\n",
+ w->self, left, buf1, right, buf2);
+ }
+#endif
+
+ /* It should not be possible for both left and right to
+ have accumulated catch blocks.
+
+ The left exception record may always have a catch
+ chain it kept when its parent was stolen.
+
+ If they are siblings, the right sibling should not
+ have accumulated any net catches. (Catch is lexically
+ scoped.)
+
+ If the right frame is a parent, it should not have entered
+ a catch block without syncing first. If it spawned in a
+ catch block, the child got its catch. */
+ __cxa_exception *caught = left->runtime_state.caughtExceptions;
+ if (caught)
+ CILK_ASSERT(!right->runtime_state.caughtExceptions);
+ else {
+ CILK_ASSERT(!left->rethrow);
+ left->rethrow = right->rethrow;
+ left->runtime_state.caughtExceptions = caught = right->runtime_state.caughtExceptions;
+ right->runtime_state.caughtExceptions = NULL;
+ }
+
+ /* Merge the uncaught exception and count of uncaught exceptions. */
+ const unsigned int right_uncaught = right->runtime_state.uncaughtExceptions;
+ if (!left->active){
+ left->active = right->active; /* could be NULL */
+ right->active = 0;
+ left->runtime_state.uncaughtExceptions += right_uncaught;
+ if (left->active)
+ /* assert is C++ exception */
+ /*CILK_ASSERT(__cxxabiv1::__is_gxx_exception_class(left->active->exception_class))*/;
+ } else {
+ /* Subtract 1 if the right exception is being destructed. */
+ left->runtime_state.uncaughtExceptions += right_uncaught - (right->active != 0);
+ }
+
+ right->destruct();
+ __cilkrts_frame_free(w, right, sizeof *right);
+
+ /* If there is no state left, return NULL. */
+ if (left->empty()) {
+ left->destruct();
+ __cilkrts_frame_free(w, left, sizeof *left);
+ left = NULL;
+ }
+
+#if CILK_LIB_DEBUG
+ if (left)
+ left->check();
+#endif
+
+ return left;
+}
+
+#if 0
+/* __cilkrts_c_resume_except is called from the assembly language
+ restart code when a resumed frame has a pending exception.
+
+ The handler count negation on rethrow was done when the throw was
+ resolved.
+
+ The assembly language runtime must make the throw unwind to
+ the sync, spawn, or other location where the exception should
+ be injected. (This should not happen after a spawn but nothing
+ here depends on there being no exception on steal.)
+
+ This function is unused in the Intel stack based system. */
+extern "C"
+void __cilkrts_c_resume_except (_Unwind_Exception *exc)
+{
+#if DEBUG_EXCEPTIONS
+ fprintf(stderr, "resume exception %p\n", exc);
+#endif
+ _Unwind_Reason_Code why = _Unwind_RaiseException(exc);
+ __cilkrts_bug ("Cilk runtime error: failed to reinstate suspended exception %p (%d)\n", exc, why);
+}
+#endif
+
+/* Restore the caught exception chain. This assumes no C++ exception
+ code will run before the frame is resumed. If there is no exception
+ to be resumed free the object. */
+
+extern "C"
+void __cilkrts_setup_for_execution_sysdep(__cilkrts_worker *w, full_frame *ff)
+{
+ // ASSERT: We own w->lock and ff->lock || P == 1
+
+ __cxa_eh_globals *state = __cxa_get_globals ();
+ struct pending_exception_info *info = w->l->pending_exception;
+
+ if (info == NULL)
+ return;
+
+ w->l->pending_exception = 0;
+
+#if DEBUG_EXCEPTIONS
+ _Unwind_Exception *exc = info->active;
+ if (exc) {
+ fflush(stdout);
+ fprintf(stderr, "__cilkrts_resume_except W%u %p->%p [%u %p]\n",
+ w->self, exc,
+ to_cxx(exc)->nextException,
+ info->runtime_state.uncaughtExceptions,
+ info->runtime_state.caughtExceptions);
+ /*CILK_ASSERT(info->runtime_state.uncaughtExceptions > 0);*/
+ }
+#endif
+
+ if (state->uncaughtExceptions || state->caughtExceptions)
+ __cilkrts_bug("W%u: resuming with non-empty prior exception state %u %p\n", state->uncaughtExceptions, state->caughtExceptions);
+
+ *state = info->runtime_state;
+ info->runtime_state.caughtExceptions = 0;
+ info->runtime_state.uncaughtExceptions = 0;
+
+ if (info->rethrow) {
+ info->rethrow = false;
+ /* Resuming function will rethrow. Runtime calls
+ std::terminate if there is no caught exception. */
+ ff->call_stack->flags |= CILK_FRAME_EXCEPTING;
+ }
+ if (info->active) {
+ ff->call_stack->flags |= CILK_FRAME_EXCEPTING;
+ ff->call_stack->except_data = info->active;
+ info->active = 0;
+ }
+
+ if (info->empty()) {
+ info->destruct();
+ __cilkrts_frame_free(w, info, sizeof *info);
+ w->l->pending_exception = NULL;
+ }
+
+#if CILK_LIB_DEBUG
+ if (ff->call_stack->except_data)
+ CILK_ASSERT(std::uncaught_exception());
+#endif
+}
+
+#if 0
+extern "C"
+struct pending_exception_info *__cilkrts_get_exception(__cilkrts_worker *w,
+ __cilkrts_stack_frame *sf)
+{
+ struct pending_exception_info *info = w->l->pending_exception;
+
+ if (info == NULL) {
+ sf->flags &= ~CILK_FRAME_EXCEPTING;
+ return 0;
+ }
+
+ w->l->pending_exception = NULL;
+
+ /* This exception goes into the frame. */
+
+ _Unwind_Exception *exc = info->active;
+ info->active = NULL;
+ info->destruct();
+ __cilkrts_frame_free(w, info, sizeof *info);
+ info = 0;
+ sf->flags |= CILK_FRAME_EXCEPTING;
+ sf->exception = exc;
+ return 0;
+}
+#endif
+
+extern "C"
+void __attribute__((nonnull)) __cilkrts_gcc_rethrow(__cilkrts_stack_frame *sf)
+{
+#ifdef __CYGWIN__
+ // Cygwin doesn't support exceptions, so _Unwind_Resume isn't available
+ // Which means we can't support exceptions either
+ __cilkrts_bug("The Cygwin implementation of the Intel Cilk Plus runtime doesn't support exceptions\n");
+#else
+ if (sf->except_data) {
+#if CILK_LIB_DEBUG
+ CILK_ASSERT(std::uncaught_exception());
+#endif
+ _Unwind_Resume ((_Unwind_Exception *)sf->except_data);
+ } else {
+ throw;
+ }
+#endif // __CYGWIN__
+}
+
+/* End except-gcc.cpp */
+
diff --git a/gcc-4.9/libcilkrts/runtime/except-gcc.h b/gcc-4.9/libcilkrts/runtime/except-gcc.h
new file mode 100644
index 000000000..aa76adbc2
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/except-gcc.h
@@ -0,0 +1,146 @@
+/* except-gcc.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file except-gcc.h
+ *
+ * @brief ABI for gcc exception handling.
+ *
+ * @par Origin
+ * The code below is generally copied from the Intel Itanium ABI (Intel
+ * download 245370).
+ */
+
+#ifndef INCLUDED_EXCEPT_GCC_DOT_H
+#define INCLUDED_EXCEPT_GCC_DOT_H
+
+#ifndef __cplusplus
+# error except-gcc.h should be used in C++ code only.
+#endif
+
+#include <cilk/common.h>
+#include <exception>
+#include <typeinfo>
+
+struct __cxa_exception;
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/** Unwind reason code (Itanium ABI 6.1.2.1) */
+typedef enum _Unwind_Reason_Code {
+ _URC_NO_REASON = 0,
+ _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
+ _URC_FATAL_PHASE2_ERROR = 2,
+ _URC_FATAL_PHASE1_ERROR = 3,
+ _URC_NORMAL_STOP = 4,
+ _URC_END_OF_STACK = 5,
+ _URC_HANDLER_FOUND = 6,
+ _URC_INSTALL_CONTEXT = 7,
+ _URC_CONTINUE_UNWIND = 8
+} _Unwind_Reason_Code;
+
+typedef struct _Unwind_Exception _Unwind_Exception;
+
+/** Exception cleanup function pointer (Itanium ABI 6.1.2.2) */
+typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code reason,
+ _Unwind_Exception *exc);
+
+/**
+ * @brief Exception undwinding information
+ *
+ * This is copied from the Intel Itanium ABI except that the
+ * private fields are declared unsigned long for binary
+ * compatibility with gcc/g++ on 32 bit machines.
+ */
+struct _Unwind_Exception
+{
+ uint64_t exception_class;
+ _Unwind_Exception_Cleanup_Fn exception_cleanup;
+ unsigned long private_1;
+ unsigned long private_2;
+};
+
+/** Throw or rethrow an exception */
+_Unwind_Reason_Code
+_Unwind_RaiseException(_Unwind_Exception *exception_object);
+
+/** Resume an exception other than by rethrowing it. */
+void _Unwind_Resume(_Unwind_Exception *exception_object);
+
+/** Delete an exception object */
+void _Unwind_DeleteException(_Unwind_Exception *exception_object);
+
+/**
+ * C++ exception ABI.
+ * The following declarations are from
+ *
+ * http://www.codesourcery.com/public/cxx-abi/abi-eh.html#cxx-abi
+ */
+
+struct __cxa_exception {
+ std::type_info * exceptionType;
+ void (*exceptionDestructor)(void *);
+ std::unexpected_handler unexpectedHandler;
+ std::terminate_handler terminateHandler;
+ __cxa_exception * nextException;
+
+ int handlerCount;
+ int handlerSwitchValue;
+ const char * actionRecord;
+ const char * languageSpecificData;
+ void * catchTemp;
+ void * adjustedPtr;
+
+ _Unwind_Exception unwindHeader;
+};
+
+static inline __cxa_exception *to_cxx(_Unwind_Exception *e)
+{
+ return ((__cxa_exception *)(e+1)) - 1;
+}
+
+typedef struct __cxa_eh_globals {
+ __cxa_exception *caughtExceptions;
+ unsigned int uncaughtExceptions;
+} __cxa_eh_globals;
+
+__cxa_eh_globals*__cxa_get_globals(void) throw();
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_EXCEPT_GCC_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/except.h b/gcc-4.9/libcilkrts/runtime/except.h
new file mode 100644
index 000000000..58e2238c5
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/except.h
@@ -0,0 +1,123 @@
+/* except.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file except.h
+ *
+ * @brief Common definitions for the various implementations of exception
+ * handling.
+ */
+
+#ifndef INCLUDED_EXCEPT_DOT_H
+#define INCLUDED_EXCEPT_DOT_H
+
+#include <cilk/common.h>
+#include <internal/abi.h>
+#include "full_frame.h"
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * OS-dependent information about an exception that's being moved between
+ * strands.
+ */
+typedef struct pending_exception_info pending_exception_info;
+
+/**
+ * Merge the right exception record into the left. The left is logically
+ * earlier.
+ *
+ * On entry the left state is synched and can not have an unresolved
+ * exception. The merge may result in an unresolved exception.
+ *
+ * If there is both a right and left exception, the right exception will
+ * be disposed of in preference to the left exception, destructing the
+ * exception object.
+ *
+ * @param w The worker that is preparing to resume execution.
+ * @param left_exception The exception that would have happened earlier
+ * if the code executed serially. Can be NULL if the left strand has not
+ * raised an exception.
+ * @param right_exception The exception that would have happened later
+ * if the code executed serially. Can be NULL if the right strand has not
+ * raised an exception.
+ *
+ * @return NULL if there both the right and left exception are NULL. This
+ * indicates that there are no pending exceptions.
+ * @return The pending exception that is to be raised to continue searching
+ * for a catch block to handle the exception.
+ */
+COMMON_SYSDEP
+struct pending_exception_info *__cilkrts_merge_pending_exceptions(
+ __cilkrts_worker *w,
+ pending_exception_info *left_exception,
+ pending_exception_info *right_exception);
+
+/**
+ * Move the exception information from the worker to the full_frame.
+ *
+ * @param w The worker which is suspending work on a full_frame.
+ * @param ff The full_frame which is being suspended.
+ */
+COMMON_SYSDEP
+void __cilkrts_save_exception_state(__cilkrts_worker *w,
+ full_frame *ff);
+
+/**
+ * Function to delete pending exception. This will delete the
+ * exception object and then free the stack/fiber.
+ *
+ * @param w The worker we're running on.
+ * @param pei The pending exception to be delete
+ * @param delete_object Unused. Should always be 1.
+ */
+void delete_exception_obj (__cilkrts_worker *w,
+ struct pending_exception_info *pei,
+ int delete_object);
+
+#ifndef _WIN32
+/* gcc-style exception handling */
+NON_COMMON NORETURN __cilkrts_c_sync_except(__cilkrts_worker *w,
+ __cilkrts_stack_frame *sf);
+NON_COMMON void __attribute__((nonnull))
+__cilkrts_gcc_rethrow(__cilkrts_stack_frame *sf);
+#endif
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_EXCEPT_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/frame_malloc.c b/gcc-4.9/libcilkrts/runtime/frame_malloc.c
new file mode 100644
index 000000000..0b38bd209
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/frame_malloc.c
@@ -0,0 +1,462 @@
+/* frame_malloc.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#include "frame_malloc.h"
+#include "bug.h"
+#include "local_state.h"
+#include "cilk_malloc.h"
+
+#ifndef __VXWORKS__
+#include <memory.h>
+#endif
+
+/* #define USE_MMAP 1 */
+#if USE_MMAP
+#define __USE_MISC 1
+#include <sys/mman.h>
+#include <errno.h>
+#endif
+
+// Define to fill the stack frame header with the fill character when pushing
+// it on a free list. Note that this should be #ifdef'd out when checked in!
+
+#ifdef _DEBUG
+#define HEADER_FILL_CHAR 0xbf
+#endif
+
+// HEADER_FILL_CHAR should not be defined when checked in, so put out a warning
+// message if this is a release build
+
+#if defined(NDEBUG) && defined (HEADER_FILL_CHAR)
+#pragma message ("Warning: HEADER_FILL_CHAR defined for a release build")
+#endif
+
+static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size);
+
+#ifndef _WIN32
+
+const unsigned short __cilkrts_bucket_sizes[FRAME_MALLOC_NBUCKETS] =
+{
+ 64, 128, 256, 512, 1024, 2048
+};
+
+#define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) __cilkrts_bucket_sizes[bucket]
+
+/* threshold above which we use slow malloc */
+#define FRAME_MALLOC_MAX_SIZE 2048
+
+#else // _WIN32
+
+/* Note that this must match the implementation of framesz_to_bucket in
+ * asmilator/layout.ml! */
+#define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) ((size_t)(64 << (bucket)))
+
+/* threshold above which we use slow malloc */
+#define FRAME_MALLOC_MAX_SIZE \
+ FRAME_MALLOC_BUCKET_TO_SIZE(FRAME_MALLOC_NBUCKETS - 1)
+
+#endif // _WIN32
+
+/* utility procedures */
+static void push(struct free_list **b, struct free_list *p)
+{
+#ifdef HEADER_FILL_CHAR
+ memset (p, HEADER_FILL_CHAR, FRAME_MALLOC_BUCKET_TO_SIZE(0));
+#endif
+ /* cons! onto free list */
+ p->cdr = *b;
+ *b = p;
+}
+
+static struct free_list *pop(struct free_list **b)
+{
+ struct free_list *p = *b;
+ if (p)
+ *b = p->cdr;
+ return p;
+}
+
+/*************************************************************
+ global allocator:
+*************************************************************/
+/* request slightly less than 2^K from the OS, which after malloc
+ overhead and alignment should end up filling each VM page almost
+ completely. 128 is a guess of the total malloc overhead and cache
+ line alignment */
+#define FRAME_MALLOC_CHUNK (32 * 1024 - 128)
+
+/** Implements linked list of frames */
+struct pool_cons {
+ char *p; /**< This element of the list */
+ struct pool_cons *cdr; /**< Remainder of the list */
+};
+
+static void extend_global_pool(global_state_t *g)
+{
+ /* FIXME: memalign to a cache line? */
+ struct pool_cons *c = (struct pool_cons *)__cilkrts_malloc(sizeof(*c));
+ g->frame_malloc.pool_begin =
+ (char *)__cilkrts_malloc((size_t)FRAME_MALLOC_CHUNK);
+ g->frame_malloc.pool_end =
+ g->frame_malloc.pool_begin + FRAME_MALLOC_CHUNK;
+ g->frame_malloc.allocated_from_os += FRAME_MALLOC_CHUNK;
+ c->p = g->frame_malloc.pool_begin;
+ c->cdr = g->frame_malloc.pool_list;
+ g->frame_malloc.pool_list = c;
+}
+
+/* the size is already canonicalized at this point */
+static struct free_list *global_alloc(global_state_t *g, int bucket)
+{
+ struct free_list *mem;
+ size_t size;
+
+ CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS);
+ size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
+ g->frame_malloc.allocated_from_global_pool += size;
+
+ if (!(mem = pop(&g->frame_malloc.global_free_list[bucket]))) {
+
+ CILK_ASSERT(g->frame_malloc.pool_begin <= g->frame_malloc.pool_end);
+ if (g->frame_malloc.pool_begin + size > g->frame_malloc.pool_end) {
+ /* We waste the fragment of pool. */
+ g->frame_malloc.wasted +=
+ g->frame_malloc.pool_end - g->frame_malloc.pool_begin;
+ extend_global_pool(g);
+ }
+ mem = (struct free_list *)g->frame_malloc.pool_begin;
+ g->frame_malloc.pool_begin += size;
+ }
+
+ return mem;
+}
+
+static void global_free(global_state_t *g, void *mem, int bucket)
+{
+ size_t size;
+
+ CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS);
+ size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
+ g->frame_malloc.allocated_from_global_pool -= size;
+
+ push(&g->frame_malloc.global_free_list[bucket], mem);
+}
+
+void __cilkrts_frame_malloc_global_init(global_state_t *g)
+{
+ int i;
+
+ __cilkrts_mutex_init(&g->frame_malloc.lock);
+ g->frame_malloc.check_for_leaks = 1;
+ g->frame_malloc.pool_list = 0;
+ g->frame_malloc.pool_begin = 0;
+ g->frame_malloc.pool_end = 0;
+ g->frame_malloc.batch_size = 8000;
+ g->frame_malloc.potential_limit = 4 * g->frame_malloc.batch_size;
+ g->frame_malloc.allocated_from_os = 0;
+ g->frame_malloc.allocated_from_global_pool = 0;
+ g->frame_malloc.wasted = 0;
+ for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i)
+ g->frame_malloc.global_free_list[i] = 0;
+}
+
+// Counts how many bytes are in the global free list.
+static size_t count_memory_in_global_list(global_state_t *g)
+{
+
+ // Count the memory remaining in the global free list.
+ size_t size_remaining_in_global_list = 0;
+ int i;
+ for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
+ struct free_list *p;
+ size_t size_in_bucket = 0;
+ p = g->frame_malloc.global_free_list[i];
+
+ while (p) {
+ size_in_bucket += FRAME_MALLOC_BUCKET_TO_SIZE(i);
+ p = p->cdr;
+ }
+ size_remaining_in_global_list += size_in_bucket;
+ }
+ return size_remaining_in_global_list;
+}
+
+
+void __cilkrts_frame_malloc_global_cleanup(global_state_t *g)
+{
+ struct pool_cons *c;
+
+ if (g->frame_malloc.check_for_leaks) {
+ size_t memory_in_global_list = count_memory_in_global_list(g);
+ // TBD: This check is weak. Short of memory corruption,
+ // I don't see how we have more memory in the free list
+ // than allocated from the os.
+ // Ideally, we should count the memory in the global free list
+ // and check that we have it all. But I believe the runtime
+ // itself also uses some memory, which is not being tracked.
+ if (memory_in_global_list > g->frame_malloc.allocated_from_os) {
+ __cilkrts_bug("\nError. The Cilk runtime data structures may have been corrupted.\n");
+ }
+ }
+
+ while ((c = g->frame_malloc.pool_list)) {
+ g->frame_malloc.pool_list = c->cdr;
+ __cilkrts_free(c->p);
+ __cilkrts_free(c);
+ }
+
+ __cilkrts_mutex_destroy(0, &g->frame_malloc.lock);
+
+ // Check that all the memory moved from the global pool into
+ // workers has been returned to the global pool.
+ if (g->frame_malloc.check_for_leaks
+ && (g->frame_malloc.allocated_from_global_pool != 0))
+ {
+ __cilkrts_bug("\n"
+ "---------------------------" "\n"
+ " MEMORY LEAK DETECTED!!! " "\n"
+ "---------------------------" "\n"
+ "\n"
+ );
+ }
+}
+
+/*************************************************************
+ per-worker allocator
+*************************************************************/
+/* allocate a batch of frames of size SIZE from the global pool and
+ store them in the worker's free list */
+static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size)
+{
+ global_state_t *g = w->g;
+
+ __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
+#if USE_MMAP
+ char *p = mmap(0, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (p == MAP_FAILED)
+ __cilkrts_bug("mmap failed %d", errno);
+ assert(size < 4096);
+ assert(p != MAP_FAILED);
+ mprotect(p, 4096, PROT_NONE);
+ mprotect(p + 8192, 4096, PROT_NONE);
+ w->l->bucket_potential[bucket] += size;
+ push(&w->l->free_list[bucket], (struct free_list *)(p + 8192 - size));
+#else
+ size_t bytes_allocated = 0;
+ do {
+ w->l->bucket_potential[bucket] += size;
+ bytes_allocated += size;
+ push(&w->l->free_list[bucket], global_alloc(g, bucket));
+ } while (bytes_allocated < g->frame_malloc.batch_size);
+#endif
+ } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
+
+}
+
+static void gc_bucket(__cilkrts_worker *w, int bucket, size_t size)
+{
+ struct free_list *p, *q;
+ global_state_t *g = w->g;
+ size_t pot = w->l->bucket_potential[bucket];
+ size_t newpot;
+
+ /* Keep up to POT/2 elements in the free list. The cost of
+ counting up to POT/2 is amortized against POT. */
+ newpot = 0;
+ for (newpot = 0, p = w->l->free_list[bucket]; p && 2 * newpot < pot;
+ p = p->cdr, newpot += size)
+ ;
+ w->l->bucket_potential[bucket] = newpot;
+
+ if (p) {
+ /* free the rest of the list. The cost of grabbing the lock
+ is amortized against POT/2; the cost of traversing the rest
+ of the list is amortized against the free operation that
+ puts the element on the list. */
+ __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
+ while ((q = pop(&p->cdr)))
+#if USE_MMAP
+ munmap((char *)q + size - 8192, 12288);
+#else
+ global_free(g, q, bucket);
+#endif
+ } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
+ }
+}
+
+// Free all the memory in this bucket for the specified worker,
+// returning it to the global pool's free list.
+static void move_bucket_to_global_free_list(__cilkrts_worker *w,
+ int bucket)
+{
+ struct free_list *p, *q;
+ global_state_t *g = w->g;
+ p = w->l->free_list[bucket];
+
+ if (p) {
+ __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
+ while ((q = pop(&p))) {
+#if USE_MMAP
+ size_t size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
+ munmap((char *)q + size - 8192, 12288);
+#else
+ global_free(g, q, bucket);
+#endif
+ }
+ } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
+ }
+
+ // I'm not sure this does anything useful now, since
+ // the worker is about to be destroyed. But why not?
+ w->l->bucket_potential[bucket] = 0;
+}
+
+static int bucket_of_size(size_t size)
+{
+ int i;
+
+ for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i)
+ if (size <= FRAME_MALLOC_BUCKET_TO_SIZE(i))
+ return i;
+
+ CILK_ASSERT(0 /* can't happen */);
+ return -1;
+}
+
+size_t __cilkrts_frame_malloc_roundup(size_t size)
+{
+ if (size > FRAME_MALLOC_MAX_SIZE) {
+ /* nothing, leave it alone */
+ } else {
+ int bucket = bucket_of_size(size);
+ size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
+ }
+ return size;
+}
+
+size_t __cilkrts_size_of_bucket(int bucket)
+{
+ CILK_ASSERT(bucket >= 0 && bucket < FRAME_MALLOC_NBUCKETS);
+ return FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
+}
+
+void *__cilkrts_frame_malloc(__cilkrts_worker *w, size_t size)
+{
+ int bucket;
+ void *mem;
+
+ /* if too large, or if no worker, fall back to __cilkrts_malloc() */
+ if (!w || size > FRAME_MALLOC_MAX_SIZE) {
+ NOTE_INTERVAL(w, INTERVAL_FRAME_ALLOC_LARGE);
+ return __cilkrts_malloc(size);
+ }
+
+ START_INTERVAL(w, INTERVAL_FRAME_ALLOC); {
+ bucket = bucket_of_size(size);
+ size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
+
+ while (!(mem = pop(&w->l->free_list[bucket]))) {
+ /* get a batch of frames from the global pool */
+ START_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL) {
+ allocate_batch(w, bucket, size);
+ } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL);
+ }
+ } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC);
+
+ return mem;
+}
+
+void __cilkrts_frame_free(__cilkrts_worker *w, void *p0, size_t size)
+{
+ int bucket;
+ struct free_list *p = (struct free_list *)p0;
+
+ /* if too large, or if no worker, fall back to __cilkrts_free() */
+ if (!w || size > FRAME_MALLOC_MAX_SIZE) {
+ NOTE_INTERVAL(w, INTERVAL_FRAME_FREE_LARGE);
+ __cilkrts_free(p);
+ return;
+ }
+
+#if CILK_LIB_DEBUG
+ *(volatile long *)w;
+#endif
+
+ START_INTERVAL(w, INTERVAL_FRAME_FREE); {
+ bucket = bucket_of_size(size);
+ size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
+ w->l->bucket_potential[bucket] += size;
+ push(&w->l->free_list[bucket], p);
+ if (w->l->bucket_potential[bucket] >
+ w->g->frame_malloc.potential_limit) {
+ START_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL) {
+ gc_bucket(w, bucket, size);
+ } STOP_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL);
+ }
+ } STOP_INTERVAL(w, INTERVAL_FRAME_FREE);
+}
+
+void __cilkrts_frame_malloc_per_worker_init(__cilkrts_worker *w)
+{
+ int i;
+ local_state *l = w->l;
+
+ for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
+ l->free_list[i] = 0;
+ l->bucket_potential[i] = 0;
+ }
+}
+
+void __cilkrts_frame_malloc_per_worker_cleanup(__cilkrts_worker *w)
+{
+ int i;
+ // Move memory to the global pool. This operation
+ // ensures the memory does not become unreachable / leak
+ // when the worker is destroyed.
+ for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
+ move_bucket_to_global_free_list(w, i);
+ }
+}
+
+/*
+ Local Variables: **
+ c-file-style:"bsd" **
+ c-basic-offset:4 **
+ indent-tabs-mode:nil **
+ End: **
+*/
diff --git a/gcc-4.9/libcilkrts/runtime/frame_malloc.h b/gcc-4.9/libcilkrts/runtime/frame_malloc.h
new file mode 100644
index 000000000..d412fb620
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/frame_malloc.h
@@ -0,0 +1,205 @@
+/* frame_malloc.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file frame_malloc.h
+ *
+ * @brief The frame allocation routines manage memory in a per-worker pool.
+ *
+ * The name "frame malloc" refers to an earlier implementation of Cilk which
+ * allocated frames from the heap using this allocator.
+ */
+
+#ifndef INCLUDED_FRAME_MALLOC_DOT_H
+#define INCLUDED_FRAME_MALLOC_DOT_H
+
+#include "worker_mutex.h"
+#include "rts-common.h"
+#include <internal/abi.h> // __cilkrts_worker
+
+#ifdef __cplusplus
+# include <cstddef>
+#else
+# include <stddef.h>
+#endif
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * Number of buckets. Gives us buckets to hold 64, 128, 256, 512, 1024
+ * and 2048 bytes
+ */
+#define FRAME_MALLOC_NBUCKETS 6
+
+/** Layout of frames when unallocated */
+struct free_list {
+ /** Pointer to next free frame */
+ struct free_list *cdr;
+};
+
+/** per-worker memory cache */
+struct __cilkrts_frame_cache
+{
+ /** Mutex to serialize access */
+ struct mutex lock;
+
+ /** Linked list of frames */
+ struct pool_cons *pool_list;
+
+ /** Low bound of memory in pool */
+ char *pool_begin;
+
+ /** High bound of memory in pool */
+ char *pool_end;
+
+ /** Global free-list buckets */
+ struct free_list *global_free_list[FRAME_MALLOC_NBUCKETS];
+
+ /**
+ * How many bytes to obtain at once from the global pool
+ * (approximately)
+ */
+ size_t batch_size;
+
+ /** Garbage-collect a bucket when its potential exceeds the limit */
+ size_t potential_limit;
+
+ /** If TRUE, check for memory leaks at the end of execution */
+ int check_for_leaks;
+
+ /** Bytes of memory allocated from the OS by the global cache */
+ size_t allocated_from_os;
+
+ /** Tracks memory allocated by a chunk that isn't a full bucket size */
+ size_t wasted;
+
+ /** Bytes of memory allocated from the global cache */
+ size_t allocated_from_global_pool;
+};
+
+/**
+ * Allocate memory from the per-worker pool. If the size is too large, or
+ * if we're given a NULL worker, the memory is allocated using
+ * __cilkrts_malloc().
+ *
+ * @param w The worker to allocate the memory from.
+ * @param size The number of bytes to allocate.
+ *
+ * @return pointer to allocated memory block.
+ */
+COMMON_PORTABLE
+void *__cilkrts_frame_malloc(__cilkrts_worker *w,
+ size_t size) cilk_nothrow;
+
+/**
+ * Return memory to the per-worker pool. If the size is too large, or
+ * if we're given a NULL worker, the memory is freed using
+ * __cilkrts_free().
+ *
+ * @param w The worker to allocate the memory from.
+ * @param p The memory block to be released.
+ * @param size The size of the block, in bytes.
+ */
+COMMON_PORTABLE
+void __cilkrts_frame_free(__cilkrts_worker *w,
+ void* p,
+ size_t size) cilk_nothrow;
+
+/**
+ * Destroy the global cache stored in the global state, freeing all memory
+ * to the global heap. Checks whether any memory has been allocated but
+ * not freed.
+ *
+ * @param g The global state.
+ */
+COMMON_PORTABLE
+void __cilkrts_frame_malloc_global_cleanup(global_state_t *g);
+
+/**
+ * Initialize a worker's memory cache. Initially it is empty.
+ *
+ * @param w The worker who's memory cache is to be initialized.
+ */
+COMMON_PORTABLE
+void __cilkrts_frame_malloc_per_worker_init(__cilkrts_worker *w);
+
+/**
+ * If check_for_leaks is set in the global state's memory cache, free any
+ * memory in the worker's memory cache.
+ *
+ * If check_for_leask is not set, nothing happens.
+ *
+ * @param w The worker who's memory cache is to be cleaned up.
+ */
+COMMON_PORTABLE
+void __cilkrts_frame_malloc_per_worker_cleanup(__cilkrts_worker *w);
+
+/**
+ * Round a number of bytes to the size of the smallest bucket that will
+ * hold it. If the size is bigger than the largest bucket, the value is
+ * unchanged.
+ *
+ * @param size Number of bytes to be rounded up to the nearest bucket size.
+ *
+ * @return The size of the smallest bucket that will hold the specified bytes.
+ */
+COMMON_PORTABLE
+size_t __cilkrts_frame_malloc_roundup(size_t size) cilk_nothrow;
+
+/**
+ * Return the number of bytes that can fit into a bucket.
+ *
+ * Preconditions:
+ * - The index must be in the range 0 - FRAME_MALLOC_NBUCKETS
+ *
+ * @param bucket Index of the bucket to be sized.
+ */
+COMMON_PORTABLE
+size_t __cilkrts_size_of_bucket(int bucket) cilk_nothrow;
+
+/**
+ * Initialize the global memory cache.
+ *
+ * @param g The global state.
+ */
+COMMON_PORTABLE
+void __cilkrts_frame_malloc_global_init(global_state_t *g);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_FRAME_MALLOC_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/full_frame.c b/gcc-4.9/libcilkrts/runtime/full_frame.c
new file mode 100644
index 000000000..9ccfd110d
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/full_frame.c
@@ -0,0 +1,181 @@
+/* full_frame.c -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2010-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+#include "full_frame.h"
+#include "stats.h"
+#include "os.h"
+#include "bug.h"
+#include "jmpbuf.h"
+#include "frame_malloc.h"
+
+COMMON_PORTABLE
+full_frame *__cilkrts_make_full_frame(__cilkrts_worker *w,
+ __cilkrts_stack_frame *sf)
+{
+ full_frame *ff;
+
+ START_INTERVAL(w, INTERVAL_ALLOC_FULL_FRAME) {
+ ff = (full_frame *)__cilkrts_frame_malloc(w, sizeof(*ff));
+ __cilkrts_mutex_init(&ff->lock);
+
+ ff->full_frame_magic_0 = FULL_FRAME_MAGIC_0;
+ ff->join_counter = 0;
+ ff->parent = 0;
+ ff->rightmost_child = 0;
+ ff->left_sibling = ff->right_sibling = 0;
+ ff->call_stack = sf;
+ ff->is_call_child = 0;
+ ff->simulated_stolen = 0;
+ ff->children_reducer_map = ff->right_reducer_map = 0;
+ ff->pending_exception =
+ ff->child_pending_exception =
+ ff->right_pending_exception = NULL;
+
+ ff->sync_sp = 0;
+#ifdef _WIN32
+ ff->exception_sp = 0;
+ ff->trylevel = (unsigned long)-1;
+ ff->registration = 0;
+#endif
+ ff->frame_size = 0;
+ ff->fiber_self = 0;
+ ff->fiber_child = 0;
+
+ ff->sync_master = 0;
+
+ /*__cilkrts_init_full_frame_sysdep(w, ff);*/
+ ff->full_frame_magic_1 = FULL_FRAME_MAGIC_1;
+ } STOP_INTERVAL(w, INTERVAL_ALLOC_FULL_FRAME);
+ return ff;
+}
+
+COMMON_PORTABLE void __cilkrts_put_stack(full_frame *ff,
+ __cilkrts_stack_frame *sf)
+{
+ /* When suspending frame ff prior to stealing it, __cilkrts_put_stack is
+ * used to store the stack pointer for eventual sync. When suspending
+ * frame ff prior to a sync, __cilkrts_put_stack is called to re-establish
+ * the sync stack pointer, offsetting it by any change in the stack depth
+ * that occured between the spawn and the sync.
+ * Although it is not usually meaningful to add two pointers, the value of
+ * ff->sync_sp at the time of this call is really an integer, not a
+ * pointer.
+ */
+ ptrdiff_t sync_sp_i = (ptrdiff_t) ff->sync_sp;
+ char* sp = (char*) __cilkrts_get_sp(sf);
+
+ ff->sync_sp = sp + sync_sp_i;
+
+ DBGPRINTF("%d- __cilkrts_put_stack - adjust (+) sync "
+ "stack of full frame %p (+sp: %p) to %p\n",
+ __cilkrts_get_tls_worker()->self, ff, sp, ff->sync_sp);
+}
+
+COMMON_PORTABLE void __cilkrts_take_stack(full_frame *ff, void *sp)
+{
+ /* When resuming the parent after a steal, __cilkrts_take_stack is used to
+ * subtract the new stack pointer from the current stack pointer, storing
+ * the offset in ff->sync_sp. When resuming after a sync,
+ * __cilkrts_take_stack is used to subtract the new stack pointer from
+ * itself, leaving ff->sync_sp at zero (null). Although the pointers being
+ * subtracted are not part of the same contiguous chunk of memory, the
+ * flat memory model allows us to subtract them and get a useable offset.
+ */
+ ptrdiff_t sync_sp_i = ff->sync_sp - (char*) sp;
+
+ ff->sync_sp = (char *) sync_sp_i;
+
+ DBGPRINTF("%d- __cilkrts_take_stack - adjust (-) sync "
+ "stack of full frame %p to %p (-sp: %p)\n",
+ __cilkrts_get_tls_worker()->self, ff, ff->sync_sp, sp);
+}
+
+COMMON_PORTABLE void __cilkrts_adjust_stack(full_frame *ff, size_t size)
+{
+ /* When resuming the parent after a steal, __cilkrts_take_stack is used to
+ * subtract the new stack pointer from the current stack pointer, storing
+ * the offset in ff->sync_sp. When resuming after a sync,
+ * __cilkrts_take_stack is used to subtract the new stack pointer from
+ * itself, leaving ff->sync_sp at zero (null). Although the pointers being
+ * subtracted are not part of the same contiguous chunk of memory, the
+ * flat memory model allows us to subtract them and get a useable offset.
+ *
+ * __cilkrts_adjust_stack() is used to deallocate a Variable Length Array
+ * by adding it's size to ff->sync_sp.
+ */
+ ff->sync_sp = ff->sync_sp + size;
+
+ DBGPRINTF("%d- __cilkrts_adjust_stack - adjust (+) sync "
+ "stack of full frame %p to %p (+ size: 0x%x)\n",
+ __cilkrts_get_tls_worker()->self, ff, ff->sync_sp, size);
+}
+
+COMMON_PORTABLE
+void __cilkrts_destroy_full_frame(__cilkrts_worker *w, full_frame *ff)
+{
+ validate_full_frame(ff);
+ CILK_ASSERT(ff->children_reducer_map == 0);
+ CILK_ASSERT(ff->right_reducer_map == 0);
+ CILK_ASSERT(NULL == ff->pending_exception);
+ CILK_ASSERT(NULL == ff->child_pending_exception);
+ CILK_ASSERT(NULL == ff->right_pending_exception);
+ __cilkrts_mutex_destroy(w, &ff->lock);
+ __cilkrts_frame_free(w, ff, sizeof(*ff));
+}
+
+COMMON_PORTABLE void validate_full_frame(full_frame *ff)
+{
+ /* check the magic numbers, for debugging purposes */
+ if (ff->full_frame_magic_0 != FULL_FRAME_MAGIC_0 ||
+ ff->full_frame_magic_1 != FULL_FRAME_MAGIC_1)
+ abort_because_rts_is_corrupted();
+}
+
+void __cilkrts_frame_lock(__cilkrts_worker *w, full_frame *ff)
+{
+ validate_full_frame(ff);
+ __cilkrts_mutex_lock(w, &ff->lock);
+}
+
+void __cilkrts_frame_unlock(__cilkrts_worker *w, full_frame *ff)
+{
+ __cilkrts_mutex_unlock(w, &ff->lock);
+}
+
+/* End full_frame.c */
diff --git a/gcc-4.9/libcilkrts/runtime/full_frame.h b/gcc-4.9/libcilkrts/runtime/full_frame.h
new file mode 100644
index 000000000..327a3337a
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/full_frame.h
@@ -0,0 +1,493 @@
+/* full_frame.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#ifndef INCLUDED_FULL_FRAME_DOT_H
+#define INCLUDED_FULL_FRAME_DOT_H
+
+
+#include "rts-common.h"
+#include "worker_mutex.h"
+
+#include <cilk/common.h>
+#include <internal/abi.h>
+#include <stddef.h>
+#include "cilk_fiber.h"
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/** Magic numbers for full_frame, used for debugging */
+typedef unsigned long long ff_magic_t;
+
+/* COMMON_SYSDEP */ struct pending_exception_info; /* opaque */
+
+/*************************************************************
+ Full frames
+*************************************************************/
+
+/**
+ * @file full_frame.h
+ * @brief A full frame includes additional information such as a join
+ * counter and parent frame.
+ * @defgroup FullFrames Full Frames
+ * A full frame includes additional information such as a join
+ * counter and parent frame.
+ * @{
+ */
+
+/**
+ * Convenience typedef so we don't have to specify "struct full_frame"
+ * all over the code. Putting it before the structure definition allows
+ * us to use the typedef within the structure itself
+ */
+typedef struct full_frame full_frame;
+
+/**
+ * @brief A full frame includes additional information such as a join
+ * counter and parent frame.
+ *
+ * The frame at the top of a worker's stack is promoted into a "full"
+ * frame, which carries additional information, such as join counter
+ * and parent frame. Full frames can be suspended at a sync, in which
+ * case they lie somewhere in memory and do not belong to any
+ * worker.
+ *
+ * Full frames are in contrast to the entries in the worker's deque which
+ * are only represented by a pointer to their __cilkrts_stack_frame.
+ *
+ * At any instant, we say that a full frame ff is either "suspended",
+ * or "owned" by some worker w.
+ *
+ * More precisely, we say that a worker w owns a frame ff under one of
+ * the following conditions:
+ *
+ * 1. Creation: Worker w has just created ff, but not yet linked ff
+ * into the tree of full frames. This situation can occur when a
+ * worker is unrolling a call stack to promote a
+ * __cilkrts_stack_frame to a full_frame.
+ * 2. Executing frame: We have w->l->frame_ff == ff, i.e,. ff is the
+ * currently executing frame for w.
+ * 3. Next frame: We have w->l->next_frame_ff == ff, i.e,. ff is the
+ * next frame that w is about to execute.
+ * 4. Resume execution: Worker w has popped ff from
+ * w->l->next_frame_ff, and is about to resume execution of ff.
+ * 5. Dying leaf: Worker w has finished executing a frame ff
+ * that is a leaf the tree of full frames, and is in the process
+ * of unlinking "ff" from the tree.
+ *
+ * Otherwise, the frame ff is suspended, and has no owner.
+ * Note that work-stealing changes the owner of a full frame from the
+ * victim to the thief.
+ *
+ * Using this notion of ownership, we classify the fields of a full
+ * frame into one of several categories:
+ *
+ * 1. Local:
+ * These fields are accessed only by the owner of the full frame.
+ * Because a frame can have only one owner at a time, these fields
+ * can be modified without any (additional) locking or
+ * synchronization, assuming the correct synchronization for
+ * changing the ownership of full frame (e.g., on a successful
+ * steal) is already in place.
+ *
+ * 2. Constant (i.e., read-only):
+ * This field is constant for the lifetime of the full frame.
+ * No locks are needed to access this field.
+ * Technically, a field could be read-only and local, but we assume
+ * it is shared.
+ *
+ * 3. Self-locked:
+ * To access this field in the frame ff, a worker should acquire
+ * the lock on ff.
+ * A self-locked field is conceptually "shared" between the worker
+ * that owns frame ff (which is a child) and the worker that
+ * owns the frame ff->parent (which is the parent of ff).
+ *
+ * 4. Parent-locked:
+ * To access this field in the frame ff, a worker should
+ * acquire the lock on ff->parent.
+ * A parent-locked field is conceptually "shared" between the worker
+ * that owns frame ff, and a worker that is either owns the
+ * parent frame (ff->parent) or owns a sibling frame of ff (i.e.,
+ * any child of ff->parent).
+ *
+ * 5. Synchronization
+ * A field used explicitly for synchronization (i.e., locks).
+ */
+
+/* COMMON_PORTABLE */
+struct full_frame
+{
+ /**
+ * Value to detect writes off the beginning of a full_frame.
+ */
+# define FULL_FRAME_MAGIC_0 ((ff_magic_t)0x361e710b9597d553ULL)
+
+ /**
+ * Field to detect writes off the beginning of a full_frame. Must be
+ * FULL_FRAME_MAGIC_0.
+ * [constant]
+ */
+ ff_magic_t full_frame_magic_0;
+
+ /**
+ * Used to serialize access to this full_frame
+ * [synchronization]
+ */
+ struct mutex lock;
+
+ /**
+ * Count of outstanding children running in parallel
+ * [self-locked]
+ */
+ int join_counter;
+
+ /**
+ * If TRUE: frame was called by the parent.
+ * If FALSE: frame was spawned by parent.
+ * [constant]
+ */
+ int is_call_child;
+
+ /**
+ * TRUE if this frame is the loot of a simulated steal.
+ *
+ * This situation never happens in normal execution. However,
+ * when running under cilkscreen, a worker may promote frames and
+ * then immediately suspend them, in order to simulate an
+ * execution on an infinite number of processors where all spawns
+ * are stolen. In this case, the frame is marked as the loot of a fake
+ * steal.
+ * [local]
+ */
+ int simulated_stolen;
+
+ /**
+ * Caller of this full_frame
+ * [constant]
+ */
+ full_frame *parent;
+
+ /**
+ * Doubly-linked list of children. The serial execution order is
+ * by definition from left to right. Because of how we do work
+ * stealing, the parent is always to the right of all its
+ * children.
+ *
+ * For a frame ff, we lock the ff->parent to follow the sibling
+ * links for ff.
+ *
+ * [parent-locked]
+ */
+ full_frame *left_sibling;
+
+ /**
+ * @copydoc left_sibling
+ */
+ full_frame *right_sibling;
+
+ /**
+ * Pointer to rightmost child
+ *
+ * [self-locked]
+ */
+ full_frame *rightmost_child;
+
+ /**
+ * Call stack associated with this frame.
+ * Set and reset in make_unrunnable and make_runnable
+ *
+ * [self-locked]
+ */
+ __cilkrts_stack_frame *call_stack;
+
+ /**
+ * Accumulated reducers of children
+ *
+ * [self-locked]
+ */
+ struct cilkred_map *children_reducer_map;
+
+ /**
+ * Accumulated reducers of right siblings that have already
+ * terminated
+ *
+ * [parent-locked]
+ */
+ struct cilkred_map *right_reducer_map;
+
+ /**
+ * Exception that needs to be pass to our parent
+ *
+ * [local]
+ *
+ * TBD: verify that the exception code satisfies this requirement.
+ */
+ struct pending_exception_info *pending_exception;
+
+ /**
+ * Exception from one of our children
+ *
+ * [self-locked]
+ */
+ struct pending_exception_info *child_pending_exception;
+
+ /**
+ * Exception from any right siblings
+ *
+ * [parent-locked]
+ */
+ struct pending_exception_info *right_pending_exception;
+
+ /**
+ * Stack pointer to restore on sync.
+ * [local]
+ */
+ char *sync_sp;
+
+#ifdef _WIN32
+ /**
+ * Stack pointer to restore on exception.
+ * [local]
+ */
+ char *exception_sp;
+
+ /**
+ * Exception trylevel at steal
+ * [local]
+ *
+ * TBD: this field is set but not read?
+ */
+ unsigned long trylevel;
+
+ /**
+ * Exception registration head pointer to restore on sync.
+ * [local]
+ */
+ unsigned long registration;
+#endif
+
+ /**
+ * Size of frame to match sync sp
+ * [local]
+ * TBD: obsolete field only used in debugging?
+ */
+ ptrdiff_t frame_size;
+
+ /**
+ * Allocated fibers that need to be freed. The fibers work
+ * like a reducer. The leftmost frame may have @c fiber_self
+ * null and owner non-null.
+ *
+ * [local]
+ * TBD: verify exception code satisfies this requirement.
+ */
+ cilk_fiber *fiber_self;
+
+ /**
+ * Allocated fibers that need to be freed. The fibers work
+ * like a reducer. The leftmost frame may have @c fiber_self
+ * null and owner non-null.
+ *
+ * [self-locked]
+ */
+ cilk_fiber *fiber_child;
+
+ /**
+ * If the sync_master is set, this function can only be sync'd by the team
+ * leader, who first entered Cilk. This is set by the first worker to steal
+ * from the user worker.
+ *
+ * [self-locked]
+ */
+ __cilkrts_worker *sync_master;
+
+ /**
+ * Value to detect writes off the end of a full_frame.
+ */
+# define FULL_FRAME_MAGIC_1 ((ff_magic_t)0x189986dcc7aee1caULL)
+
+ /**
+ * Field to detect writes off the end of a full_frame. Must be
+ * FULL_FRAME_MAGIC_1.
+ *
+ * [constant]
+ */
+ ff_magic_t full_frame_magic_1;
+};
+
+/* The functions __cilkrts_put_stack and __cilkrts_take_stack keep track of
+ * changes in the stack's depth between when the point at which a frame is
+ * stolen and when it is resumed at a sync. A stolen frame typically goes
+ * through the following phase changes:
+ *
+ * 1. Suspend frame while stealing it.
+ * 2. Resume stolen frame at begining of continuation
+ * 3. Suspend stolen frame at a sync
+ * 4. Resume frame (no longer marked stolen) after the sync
+ *
+ * When the frame is suspended (steps 1 and 3), __cilkrts_put_stack is called to
+ * establish the stack pointer for the sync. When the frame is resumed (steps
+ * 2 and 4), __cilkrts_take_stack is called to indicate the stack pointer
+ * (which may be on a different stack) at
+ * the point of resume. If the stack pointer changes between steps 2 and 3,
+ * e.g., as a result of pushing 4 bytes onto the stack,
+ * the offset is reflected in the value of ff->sync_sp after step 3 relative to
+ * its value after step 1 (e.g., the value of ff->sync_sp after step 3 would be
+ * 4 less than its value after step 1, for a down-growing stack).
+ *
+ * Imp detail: The actual call chains for each of these phase-change events is:
+ *
+ * 1. unroll_call_stack -> make_unrunnable -> __cilkrts_put_stack
+ * 2. do_work -> __cilkrts_resume -> __cilkrts_take_stack
+ * 3. do_sync -> disown -> make_runnable -> __cilkrts_put_stack
+ * 4. __cilkrts_resume -> __cilkrts_take_stack
+ *
+ * (The above is a changeable implementation detail. The resume, sequence, in
+ * particular, is more complex on some operating systems.)
+ */
+
+/**
+ * @brief Records the stack pointer within the @c sf stack frame as the
+ * current stack pointer at the point of suspending full frame @c ff.
+ *
+ * @pre @c ff->sync_sp must be either null or contain the result of a prior call to
+ * @c __cilkrts_take_stack().
+ * @pre If @c ff->sync_sp is not null, then @c SP(sf) must refer to the same stack as
+ * the @c sp argument to the prior call to @c __cilkrts_take_stack().
+ *
+
+ * @post If @c ff->sync_sp was null before the call, then @c
+ * ff->sync_sp will be set to @c SP(sf).
+ * @post Otherwise, @c ff->sync_sp will be restored to the value it had just prior
+ * to the last call to @c __cilkrts_take_stack(), except offset by any change
+ * in the stack pointer between the call to @c __cilkrts_take_stack() and
+ * this call to @c __cilkrts_put_stack().
+ *
+ * @param ff The full frame that is being suspended.
+ * @param sf The @c __cilkrts_stack_frame that is being suspended. The stack
+ * pointer will be taken from the jmpbuf contained within this
+ * @c __cilkrts_stack_frame.
+ */
+COMMON_PORTABLE void __cilkrts_put_stack(full_frame *ff,
+ __cilkrts_stack_frame *sf);
+
+/**
+ * @brief Records the stack pointer @c sp as the stack pointer at the point of
+ * resuming execution on full frame @c ff.
+ *
+ * The value of @c sp may be on a different stack than the original
+ * value recorded for the stack pointer using __cilkrts_put_stack().
+ *
+ * @pre @c ff->sync_sp must contain a value set by @c __cilkrts_put_stack().
+ *
+ * @post @c ff->sync_sp contains an *integer* value used to compute a change in the
+ * stack pointer upon the next call to @c __cilkrts_take_stack().
+ * @post If @c sp equals @c ff->sync_sp, then @c ff->sync_sp is set to null.
+ *
+ * @param ff The full frame that is being resumed.
+ * @param sp The stack pointer for the stack the function is being resumed on.
+ */
+COMMON_PORTABLE void __cilkrts_take_stack(full_frame *ff, void *sp);
+
+/*
+ * @brief Adjust the stack for to deallocate a Variable Length Array
+ *
+ * @param ff The full frame that is being adjusted.
+ * @param size The size of the array being deallocated from the stack
+ */
+COMMON_PORTABLE void __cilkrts_adjust_stack(full_frame *ff, size_t size);
+
+/**
+ * @brief Allocates and initailizes a full_frame.
+ *
+ * @param w The memory for the full_frame will be allocated out of the
+ * worker's pool.
+ * @param sf The @c __cilkrts_stack_frame which will be saved as the call_stack
+ * for this full_frame.
+ *
+ * @return The newly allocated and initialized full_frame.
+ */
+COMMON_PORTABLE
+full_frame *__cilkrts_make_full_frame(__cilkrts_worker *w,
+ __cilkrts_stack_frame *sf);
+
+/**
+ * @brief Deallocates a full_frame.
+ *
+ * @param w The memory for the full_frame will be returned to the worker's pool.
+ * @param ff The full_frame to be deallocated.
+ */
+COMMON_PORTABLE
+void __cilkrts_destroy_full_frame(__cilkrts_worker *w, full_frame *ff);
+
+/**
+ * @brief Performs sanity checks to check the integrity of a full_frame.
+ *
+ * @param ff The full_frame to be validated.
+ */
+COMMON_PORTABLE void validate_full_frame(full_frame *ff);
+
+/**
+ * @brief Locks the mutex contained in a full_frame.
+ *
+ * The full_frame is validated before the runtime attempts to lock it.
+ *
+ * @post @c ff->lock will be owned by @c w.
+ *
+ * @param w The worker that will own the full_frame. If the runtime is
+ * collecting stats, the intervals will be attributed to the worker.
+ * @param ff The full_frame containing the mutex to be locked.
+ */
+COMMON_PORTABLE void __cilkrts_frame_lock(__cilkrts_worker *w,
+ full_frame *ff);
+
+/**
+ * @brief Unlocks the mutex contained in a full_frame.
+ *
+ * @pre @c ff->lock must must be owned by @c w.
+ *
+ * @param w The worker that currently owns the full_frame.
+ * @param ff The full_frame containing the mutex to be unlocked.
+ */
+COMMON_PORTABLE void __cilkrts_frame_unlock(__cilkrts_worker *w,
+ full_frame *ff);
+/** @} */
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_FULL_FRAME_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/global_state.cpp b/gcc-4.9/libcilkrts/runtime/global_state.cpp
new file mode 100644
index 000000000..02de54f43
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/global_state.cpp
@@ -0,0 +1,628 @@
+/* global_state.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#include "global_state.h"
+#include "os.h"
+#include "bug.h"
+#include "metacall_impl.h"
+#include "stats.h"
+#include "cilk/cilk_api.h"
+#include "cilk_malloc.h"
+#include "record-replay.h"
+
+#include <algorithm> // For max()
+#include <cstring>
+#include <cstdlib>
+#include <climits>
+#include <cerrno>
+
+#ifdef _WIN32
+# include <wchar.h>
+#endif
+
+// TBD: There is a race when multiple threads try to initialize the
+// user_settable_values??
+//
+// Set to true if the user settable values portion of the global state
+// singleton is initialized, even if the rest of the singleton is not
+// initialized.
+int cilkg_user_settable_values_initialized = false;
+
+namespace {
+
+// Single copy of the global state. Zero-filled until
+// cilkg_get_user_settable_values() is called and partially-zero-filled until
+// cilkg_init_global_state() is called. The first field is filled in with
+// the size of a void* for the debugger and must be valid before initialization
+global_state_t global_state_singleton =
+{
+ sizeof(void *), // addr_size
+};
+
+
+// Variables that need to export C-style names
+extern "C"
+{
+ // Pointer to the global state singleton.
+ global_state_t *cilkg_singleton_ptr = NULL;
+
+ // __cilkrts_global_state is exported and referenced by the debugger.
+ // The debugger expects it to be valid when the module loads.
+// CILK_EXPORT_DATA
+ global_state_t *__cilkrts_global_state = &global_state_singleton;
+}
+
+// Returns true if 'a' and 'b' are equal null-terminated strings
+inline bool strmatch(const char* a, const char* b)
+{
+ return 0 == std::strcmp(a, b);
+}
+
+// Returns the integer value represented by the null-terminated string at 's'.
+inline long to_long(const char* s)
+{
+ char *end;
+
+ errno = 0;
+ return std::strtol(s, &end, 0);
+}
+
+#ifdef _WIN32
+// Returns true if 'a' and 'b' are equal null-terminated wide-char strings
+inline bool strmatch(const wchar_t* a, const wchar_t* b)
+{
+ return 0 == wcscmp(a, b);
+}
+
+// Returns true if the multi-byte character string at 'a' represents the same
+// character sequence as the wide-character string at 'b'. The behavior is
+// undefined if 'a' contains more than 30 multi-byte characters.
+bool strmatch(const char* a, const wchar_t* b)
+{
+ // Convert 'a' to wide-characters, then compare.
+ wchar_t wa[31];
+ std::size_t count;
+ errno_t err = mbstowcs_s(&count, wa, a, 30);
+ CILK_ASSERT(0 == err);
+ if (err) return false;
+ return strmatch(wa, b);
+}
+
+// Returns true if the wide-character string at 'a' represents the same
+// character sequence as the multi-byte character string at 'b'. The behavior
+// id undefined if 'b' contains more than 30 multi-byte characters.
+inline
+bool strmatch(const wchar_t* a, const char* b)
+{
+ return strmatch(b, a);
+}
+
+
+// Returns the integer value represented by the null-terminated wide-char
+// string at 's'.
+inline long to_long(const wchar_t* s)
+{
+ wchar_t *end;
+
+ errno = 0;
+ return wcstol(s, &end, 0);
+}
+#endif
+
+// Check if Cilkscreen or other sequential ptool wants to force reducers.
+bool always_force_reduce()
+{
+ // Metacall *looks* like a no-op. volatile needed to keep compiler from
+ // optimizing away variable.
+ volatile char not_force_reduce = '\377';
+ __cilkrts_metacall(METACALL_TOOL_SYSTEM, HYPER_ZERO_IF_FORCE_REDUCE,
+ const_cast<char*>(&not_force_reduce));
+ return ! not_force_reduce;
+}
+
+// Stores the boolean value represented by the null-terminated string at 'val'
+// into the integer object at 'out'. Returns '__CILKRTS_SET_PARAM_SUCCESS' if
+// 'val' is "true", "false", "0" or "1" and '__CILKRTS_SET_PARAM_INVALID'
+// otherwise.
+template <typename INT_T, typename CHAR_T>
+int store_bool(INT_T *out, const CHAR_T *val)
+{
+ static const char* const s_zero = "0";
+ static const char* const s_one = "1";
+ static const char* const s_true = "true";
+ static const char* const s_false = "false";
+
+ if (val == 0)
+ return __CILKRTS_SET_PARAM_INVALID;
+
+ if (strmatch(s_false, val) || strmatch(s_zero, val)) {
+ *out = 0;
+ return __CILKRTS_SET_PARAM_SUCCESS;
+ }
+
+ if (strmatch(s_true, val) || strmatch(s_one, val)) {
+ *out = 1;
+ return __CILKRTS_SET_PARAM_SUCCESS;
+ }
+
+ return __CILKRTS_SET_PARAM_INVALID;
+}
+
+// Stores the integer value represented by the null-terminated string at 'val'
+// into the integer object at 'out', restricting the result to the range 'min'
+// to 'max', inclusive. Returns '__CILKRTS_SET_PARAM_SUCCESS' if the conversion
+// succeeds and is in range, '__CILKRTS_SET_PARAM_XRANGE' if the conversion
+// succeeds but is out of range, and '__CILKRTS_SET_PARAM_INVALID' otherwise. In
+// the case of any error, '*out' is unchanged.
+template <typename INT_T, typename CHAR_T>
+int store_int(INT_T *out, const CHAR_T *val, INT_T min, INT_T max)
+{
+ errno = 0;
+ long val_as_long = to_long(val);
+ if (val_as_long == 0 && errno != 0)
+ return __CILKRTS_SET_PARAM_INVALID;
+ if (val_as_long < min || val_as_long == LONG_MIN)
+ return __CILKRTS_SET_PARAM_XRANGE;
+ else if (val_as_long > max || val_as_long == LONG_MAX)
+ return __CILKRTS_SET_PARAM_XRANGE;
+
+ *out = val_as_long;
+ return __CILKRTS_SET_PARAM_SUCCESS;
+}
+
+// Implementaton of cilkg_set_param templatized on character type.
+// Windows will instantiate with both char and wchar_t.
+// Note that g must have its user settable values set, but need not be fully
+// initialized.
+template <class CHAR_T>
+int set_param_imp(global_state_t* g, const CHAR_T* param, const CHAR_T* value)
+{
+ static const char* const s_force_reduce = "force reduce";
+ static const char* const s_nworkers = "nworkers";
+ static const char* const s_max_user_workers = "max user workers";
+ static const char* const s_local_stacks = "local stacks";
+ static const char* const s_shared_stacks = "shared stacks";
+ static const char* const s_nstacks = "nstacks";
+ static const char* const s_stack_size = "stack size";
+
+ // We must have a parameter and a value
+ if (0 == param)
+ return __CILKRTS_SET_PARAM_INVALID;
+ if (0 == value)
+ return __CILKRTS_SET_PARAM_INVALID;
+
+ if (strmatch(param, s_force_reduce))
+ {
+ // Sets whether we force a reduce operation at every sync. Useful for
+ // debugging reducers. Off by default. Overridden by Cilkscreen
+ //
+ // Documented in cilk_api_<os>.h
+ if (always_force_reduce())
+ // Force reduce is set by cilkscreen. User cannot change it.
+ return __CILKRTS_SET_PARAM_LATE;
+
+ return store_bool(&g->force_reduce, value);
+ }
+ else if (strmatch(param, s_nworkers))
+ {
+ // Set the total number of workers. Overrides count of cores we get
+ // from the OS and the setting of the CILK_NWORKERS environment
+ // variable. Setting to 0 indicates that the default worker count
+ // should be used.
+ //
+ // Documented in cilk_api_<os>.h
+ if (cilkg_singleton_ptr)
+ return __CILKRTS_SET_PARAM_LATE;
+
+ // Fetch the number of cores. There must be at last 1, since we're
+ // executing on *something*, aren't we!?
+ int hardware_cpu_count = __cilkrts_hardware_cpu_count();
+ CILK_ASSERT(hardware_cpu_count > 0);
+
+ int max_cpu_count = 16 * hardware_cpu_count;
+ if (__cilkrts_running_under_sequential_ptool())
+ {
+ hardware_cpu_count = 1;
+ max_cpu_count = 1;
+ }
+ // Allow a value of 0, which means "set to hardware thread count".
+ int ret = store_int(&g->P, value, 0, max_cpu_count);
+ if (0 == g->P)
+ g->P = hardware_cpu_count;
+ return ret;
+ }
+ else if (strmatch(param, s_max_user_workers))
+ {
+ // ** UNDOCUMENTED **
+ //
+ // Sets the number of slots allocated for user worker threads
+ int hardware_cpu_count = __cilkrts_hardware_cpu_count();
+ CILK_ASSERT (hardware_cpu_count > 0);
+
+ return store_int(&g->max_user_workers, value, 1,
+ 16 * hardware_cpu_count);
+ }
+ else if (strmatch(param, s_local_stacks))
+ {
+ // ** UNDOCUMENTED **
+ //
+ // Number of stacks we'll hold in the per-worker stack cache. Maximum
+ // value is 42. See __cilkrts_make_global_state for details.
+ return store_int(&g->fiber_pool_size, value, 0, 42);
+ }
+ else if (strmatch(param, s_shared_stacks))
+ {
+ // ** UNDOCUMENTED **
+ //
+ // Maximum number of stacks we'll hold in the global stack
+ // cache. Maximum value is 42. See __cilkrts_make_global_state for
+ // details.
+ return store_int(&g->global_fiber_pool_size, value, 0, 42);
+ }
+ else if (strmatch(param, s_nstacks))
+ {
+ // Sets the maximum number of stacks permitted at one time. If the
+ // runtime reaches this maximum, it will cease to allocate stacks and
+ // the app will lose parallelism. 0 means unlimited. Default is
+ // unlimited. Minimum is twice the number of worker threads, though
+ // that cannot be tested at this time.
+ //
+ // Undocumented at this time, though there are plans to expose it.
+ // The current implentation is for Linux debugging only and is not
+ // robust enough for users.
+ if (cilkg_singleton_ptr)
+ return __CILKRTS_SET_PARAM_LATE;
+ return store_int<unsigned>(&g->max_stacks, value, 0, INT_MAX);
+ }
+ else if (strmatch(param, s_stack_size))
+ {
+ // ** UNDOCUMENTED **
+ //
+ // Sets the size (in bytes) of the stacks that Cilk creates.
+ // Can only be set before the runtime starts.
+ if (cilkg_singleton_ptr)
+ return __CILKRTS_SET_PARAM_LATE;
+
+ // Maximum value that can be parsed is MAX_INT (32-bit).
+ int ret = store_int<size_t>(&g->stack_size, value, 0, INT_MAX);
+
+ // Process the value the user set (or 0 if the user didn't set
+ // anything) into something nice for the current OS. This
+ // processing is done immediately and stored into
+ // g->stack_size so that a call to get stack size will return
+ // the value that the runtime will actually use.
+ g->stack_size = cilkos_validate_stack_size(g->stack_size);
+ return ret;
+ }
+
+
+ // If got here, then didn't match any of the strings
+ return __CILKRTS_SET_PARAM_UNIMP;
+}
+
+inline
+int calc_max_user_workers(global_state_t *g)
+{
+ // If it's been set by the user, give back what we got
+ if (g->max_user_workers > 0)
+ return g->max_user_workers;
+
+ // Calculate it
+ return std::max(3, g->P * 2);
+}
+
+} // end unnamed namespace
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * @brief Returns the global state object. If called for the first time,
+ * initializes the user-settable values in the global state, but does not
+ * initialize the rest of the structure.
+ */
+global_state_t* cilkg_get_user_settable_values()
+{
+ // Environment variable value. More than big enough for a 64-bit signed
+ // integer.
+ char envstr[24];
+
+ // Abbreviating &global_state_singleton as g is not only shorter, it also
+ // facilitates grepping for the string "g->", which appears ubiquitously
+ // in the runtime code.
+ global_state_t* g = &global_state_singleton;
+
+ // TBD: We need synchronization around this loop to prevent
+ // multiple threads from initializing this data.
+ if (! cilkg_user_settable_values_initialized)
+ {
+ size_t len;
+
+ // Preserve stealing disabled since it may have been set by the
+ // debugger
+ int stealing_disabled = g->stealing_disabled;
+
+ // All fields will be zero until set. In particular
+ std::memset(g, 0, sizeof(global_state_t));
+
+ // Fetch the number of cores. There must be at last 1, since we're
+ // executing on *something*, aren't we!?
+ int hardware_cpu_count = __cilkrts_hardware_cpu_count();
+ CILK_ASSERT(hardware_cpu_count > 0);
+
+ bool under_ptool = __cilkrts_running_under_sequential_ptool();
+ if (under_ptool)
+ hardware_cpu_count = 1;
+
+ g->stealing_disabled = stealing_disabled;
+ g->under_ptool = under_ptool;
+ g->force_reduce = 0; // Default Off
+ g->P = hardware_cpu_count; // Defaults to hardware CPU count
+ g->max_user_workers = 0; // 0 unless set by user
+ g->fiber_pool_size = 7; // Arbitrary default
+
+ g->global_fiber_pool_size = 3 * 3* g->P; // Arbitrary default
+ // 3*P was the default size of the worker array (including
+ // space for extra user workers). This parameter was chosen
+ // to match previous versions of the runtime.
+
+ if (4 == sizeof(void *))
+ g->max_stacks = 1200; // Only 1GB on 32-bit machines
+ else
+ g->max_stacks = 2400; // 2GB on 64-bit machines
+
+ // If we have 2400 1MB stacks, that is 2 gb. If we reach this
+ // limit on a single-socket machine, we may have other
+ // problems. Is 2400 too small for large multicore machines?
+
+ // TBD(jsukha, 11/27/2012): I set this limit on stacks to be a
+ // value independent of P. When running on a Xeon Phi with
+ // small values of P, I recall seeing a few microbenchmarks
+ // (e.g., fib) where a limit of 10*P seemed to be
+ // unnecessarily slowing things down.
+ //
+ // That being said, the code has changed sufficiently that
+ // this observation may no longer be true.
+ //
+ // Note: in general, the worst-case number of stacks required
+ // for a Cilk computation with spawn depth "d" on P workers is
+ // O(Pd). Code with unbalanced recursion may run into issues
+ // with this stack usage.
+
+ g->max_steal_failures = 128; // TBD: depend on max_workers?
+ g->stack_size = 0; // 0 unless set by the user
+
+ // Assume no record or replay log for now
+ g->record_replay_file_name = NULL;
+ g->record_or_replay = RECORD_REPLAY_NONE; // set by user
+
+ if (always_force_reduce())
+ g->force_reduce = true;
+ else if (cilkos_getenv(envstr, sizeof(envstr), "CILK_FORCE_REDUCE"))
+ store_bool(&g->force_reduce, envstr);
+
+ if (under_ptool)
+ g->P = 1; // Ignore environment variable if under cilkscreen
+ else if (cilkos_getenv(envstr, sizeof(envstr), "CILK_NWORKERS"))
+ // Set P to environment variable, but limit to no less than 1
+ // and no more than 16 times the number of hardware threads.
+ store_int(&g->P, envstr, 1, 16 * hardware_cpu_count);
+
+ if (cilkos_getenv(envstr, sizeof(envstr), "CILK_MAX_USER_WORKERS"))
+ // Set max_user_workers to environment variable, but limit to no
+ // less than 1 and no more 16 times the number of hardware
+ // threads. If not specified, defaults (somewhat arbitrarily) to
+ // the larger of 3 and twice the number of hardware threads.
+ store_int(&g->max_user_workers, envstr, 1, 16*hardware_cpu_count);
+
+ if (cilkos_getenv(envstr, sizeof(envstr), "CILK_STEAL_FAILURES"))
+ // Set the number of times a worker should fail to steal before
+ // it looks to see whether it should suspend itself.
+ store_int<unsigned>(&g->max_steal_failures, envstr, 1, INT_MAX);
+
+ // Compute the total number of workers to allocate. Subtract one from
+ // nworkers and user workers so that the first user worker isn't
+ // factored in twice.
+ //
+ // total_workers must be computed now to support __cilkrts_get_total_workers
+ g->total_workers = g->P + calc_max_user_workers(g) - 1;
+
+#ifdef CILK_RECORD_REPLAY
+ // RecordReplay: See if we've been asked to replay a log
+ len = cilkos_getenv(envstr, 0, "CILK_REPLAY_LOG");
+ if (len > 0)
+ {
+ len += 1; // Allow for trailing NUL
+ g->record_or_replay = REPLAY_LOG;
+ g->record_replay_file_name = (char *)__cilkrts_malloc(len);
+ cilkos_getenv(g->record_replay_file_name, len, "CILK_REPLAY_LOG");
+ }
+
+ // RecordReplay: See if we've been asked to record a log
+ len = cilkos_getenv(envstr, 0, "CILK_RECORD_LOG");
+ if (len > 0)
+ {
+ if (RECORD_REPLAY_NONE != g->record_or_replay)
+ cilkos_warning("CILK_RECORD_LOG ignored since CILK_REPLAY_LOG is defined.\n");
+ else
+ {
+ len += 1; // Allow for trailing NUL
+ g->record_or_replay = RECORD_LOG;
+ g->record_replay_file_name = (char *)__cilkrts_malloc(len);
+ cilkos_getenv(g->record_replay_file_name, len, "CILK_RECORD_LOG");
+ }
+ }
+#endif
+
+ cilkg_user_settable_values_initialized = true;
+ }
+
+ return g;
+}
+
+int cilkg_calc_total_workers()
+{
+ global_state_t* g = cilkg_get_user_settable_values();
+
+ // Compute the total number of workers to allocate. Subtract one from
+ // nworkers and user workers so that the first user worker isn't
+ // factored in twice.
+ return g->P + calc_max_user_workers(g) - 1;
+}
+
+// Should be called while holding the global lock.
+global_state_t* cilkg_init_global_state()
+{
+ if (cilkg_singleton_ptr)
+ return cilkg_singleton_ptr;
+
+ // Get partially-initialized global state.
+ global_state_t* g = cilkg_get_user_settable_values();
+
+ if (g->max_stacks > 0) {
+
+ // nstacks is currently honored on non-Windows systems only.
+
+ // Set an upper bound on the number of stacks that are allocated. If
+ // nstacks is set, each worker gets up to one stack in its cache so that
+ // no one worker can hog all of the free stacks and keep work from being
+ // stolen by the other workers.
+
+ // nstacks corresponds to the number of stacks that will be allocated by
+ // the runtime apart from the initial stack created for each thread by
+ // the system. Therefore, if a user asks for n stacks, and there are
+ // p workers created, the total number of stacks is actually n + p.
+
+ // This feature is primarily for MIC which has flat memory
+ // instead of virtual addresses and tends to run out really quickly.
+ // It is not implemented for Windows and it's non-intuitive
+ // interaction with the local stack cache is specifically to help out
+ // MIC.
+
+ // About max_stacks / P stacks, except we require at least 1
+ // per pool.
+ if (((int)g->max_stacks / g->P) < g->fiber_pool_size)
+ g->fiber_pool_size = g->max_stacks / g->P;
+
+ if (g->fiber_pool_size <= 0) {
+ g->fiber_pool_size = 1;
+ }
+
+ if ((int)g->max_stacks < g->P)
+ g->max_stacks = g->P;
+
+ g->global_fiber_pool_size = g->P * (g->fiber_pool_size+1);
+ }
+
+ // Number of bytes/address - validation for debugger integration
+ g->addr_size = sizeof(void *);
+
+ __cilkrts_init_stats(&g->stats);
+
+ __cilkrts_frame_malloc_global_init(g);
+
+ g->Q = 0;
+ g->total_workers = cilkg_calc_total_workers();
+ g->system_workers = g->P - 1; // system_workers is here for the debugger.
+ g->work_done = 0;
+ g->workers_running = 0;
+ g->ltqsize = 1024; /* FIXME */
+
+ g->stack_size = cilkos_validate_stack_size(g->stack_size);
+ g->failure_to_allocate_stack = 0;
+
+
+ return g;
+}
+
+void cilkg_publish_global_state(global_state_t* g)
+{
+
+ // TBD: which one of these needs to be executed first? I say
+ // cilkg_singleton_ptr needs to be set last, with a mfence in
+ // between, since it is the flag that cilkg_is_published_is
+ // checking for.
+ __cilkrts_global_state = g;
+ __cilkrts_fence();
+ cilkg_singleton_ptr = g;
+}
+
+void cilkg_deinit_global_state()
+{
+ cilkg_singleton_ptr = NULL;
+ __cilkrts_global_state = NULL;
+}
+
+int cilkg_is_published(void)
+{
+ return NULL != cilkg_singleton_ptr;
+}
+
+int cilkg_set_param(const char* param, const char* value)
+{
+ return set_param_imp(cilkg_get_user_settable_values(), param, value);
+}
+
+#ifdef _WIN32
+int cilkg_set_param_w(const wchar_t* param, const wchar_t* value)
+{
+ return set_param_imp(cilkg_get_user_settable_values(), param, value);
+}
+#endif
+
+extern "C++" {
+ // C++ scheduler function (that may throw exceptions)
+ typedef void cpp_scheduler_t(__cilkrts_worker *w);
+}
+
+void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w)
+{
+ global_state_t* g = cilkg_get_global_state();
+ CILK_ASSERT(g->scheduler);
+
+ cpp_scheduler_t* scheduler = (cpp_scheduler_t*) g->scheduler;
+
+ try {
+ scheduler(w);
+ } catch (...) {
+ __cilkrts_bug("Exception escaped Cilk context");
+ }
+}
+
+__CILKRTS_END_EXTERN_C
+
+/* End global_state.cpp */
diff --git a/gcc-4.9/libcilkrts/runtime/global_state.h b/gcc-4.9/libcilkrts/runtime/global_state.h
new file mode 100644
index 000000000..ef455e479
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/global_state.h
@@ -0,0 +1,417 @@
+/* global_state.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file global_state.h
+ *
+ * @brief The global_state_t structure contains most of the global context
+ * maintained by the Intel Cilk runtime.
+ */
+
+#ifndef INCLUDED_GLOBAL_STATE_DOT_H
+#define INCLUDED_GLOBAL_STATE_DOT_H
+
+#include <cilk/common.h>
+
+#include "frame_malloc.h"
+#include "stats.h"
+#include "bug.h"
+#include "cilk_fiber.h"
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * Non-null place-holder for a stack handle that has no meaningful value.
+ */
+#define PLACEHOLDER_FIBER ((cilk_fiber *) -2)
+
+/**
+ * States for record_or_replay
+ */
+enum record_replay_t {
+ RECORD_REPLAY_NONE,
+ RECORD_LOG,
+ REPLAY_LOG
+};
+
+/**
+ * @brief The global state is a structure that is shared by all workers in
+ * Cilk.
+ *
+ * Make the structure ready for use by calling
+ * cilkg_init_global_state() and then cilkg_publish_global_state().
+ *
+ * The same global lock should be held while both of these methods are
+ * called. These methods are split because it is useful to execute
+ * other runtime initialization code in between.
+ *
+ * After cilkg_publish_global_state() has completed, Cilk runtime
+ * methods may call cilkg_get_global_state() to look at the published
+ * value without holding the global lock.
+ *
+ * Finally, clean up the global state by calling
+ * cilkg_deinit_global_state(). This method should be called only
+ * after all calls to cilkg_get_global_state() have completed, and
+ * while holding the global lock.
+ *
+ * Before initialization and after deinitialization, the fields in the
+ * global state have unspecified values, except for a few special
+ * fields labeled "USER SETTING", which can be read and written before
+ * initialization and after deinitialization.
+ */
+
+struct global_state_t { /* COMMON_PORTABLE */
+
+ /* Fields described as "(fixed)" should not be changed after
+ * initialization.
+ */
+
+ /*************************************************************************
+ * Note that debugger integration must reach into the
+ * global state! The debugger integration is depending on the
+ * offsets of the addr_size, system_workers, total_workers,
+ * stealing_disabled, sysdep, and workers. If these offsets change, the
+ * debugger integration library will need to be changed to match!!!
+ *************************************************************************/
+
+ int addr_size; ///< Number of bytes for an address, used by debugger (fixed)
+
+ int system_workers; ///< Number of system workers (fixed)
+
+ /**
+ * @brief USER SETTING: Maximum number of user workers that can be
+ * bound to cilk workers.
+ *
+ * 0 unless set by user. Call cilkg_calc_max_user_workers to get
+ * the value.
+ */
+ int max_user_workers;
+
+ int total_workers; ///< Total number of worker threads allocated (fixed)
+
+ int workers_running; ///< True when system workers have beens started */
+
+ /// Set by debugger to disable stealing (fixed)
+ int stealing_disabled;
+
+ /// System-dependent part of the global state
+ struct global_sysdep_state *sysdep;
+
+ /// Array of worker structures.
+ __cilkrts_worker **workers;
+
+ /******* END OF DEBUGGER-INTEGRATION FIELDS ***************/
+
+ /// Number of frames in each worker's lazy task queue
+ __STDNS size_t ltqsize;
+
+ /**
+ * @brief USER SETTING: Force all possible reductions.
+ *
+ * TRUE if running a p-tool that requires reducers to call the reduce()
+ * method even if no actual stealing occurs.
+ *
+ * When set to TRUE, runtime will simulate steals, forcing calls to the
+ * the reduce() methods of reducers.
+ *
+ */
+ int force_reduce;
+
+ /// USER SETTING: Per-worker fiber pool size
+ int fiber_pool_size;
+
+ /// USER SETTING: Global fiber pool size
+ int global_fiber_pool_size;
+
+ /**
+ * @brief TRUE when workers should exit scheduling loop so we can
+ * shut down the runtime and free the global state.
+ *
+ * @note @c work_done will be checked *FREQUENTLY* in the scheduling loop
+ * by idle workers. We need to ensure that it's not in a cache line which
+ * may be invalidated by other cores. The surrounding fields are either
+ * constant after initialization or not used until shutdown (stats) so we
+ * should be OK.
+ */
+ volatile int work_done;
+
+ int under_ptool; ///< True when running under a serial PIN tool
+
+ statistics stats; ///< Statistics on use of runtime
+
+ /**
+ * @brief USER SETTING: Maximum number of stacks the runtime will
+ * allocate (apart from those created by the OS when worker
+ * threads are created).
+ *
+ * If max_stacks == 0,there is no pre-defined maximum.
+ */
+ unsigned max_stacks;
+
+ /// Size of each stack
+ size_t stack_size;
+
+ /// Global cache for per-worker memory
+ struct __cilkrts_frame_cache frame_malloc;
+
+ /// Global fiber pool
+ cilk_fiber_pool fiber_pool;
+
+
+ /**
+ * @brief Track whether the runtime has failed to allocate a
+ * stack.
+ *
+ * Setting this flag prevents multiple warnings from being
+ * issued.
+ */
+ int failure_to_allocate_stack;
+
+ /**
+ * @brief USER SETTING: indicate record or replay log.
+ * Set to NULL if not used in this run.
+ */
+ char *record_replay_file_name;
+
+ /**
+ * @brief Record/replay state.
+ * Valid states are:
+ * RECORD_REPLAY_NONE - Not recording or replaying a log
+ * RECORD_LOG - Recording a log for replay later
+ * REPLAY_LOG - Replay a log recorded earlier
+ */
+ enum record_replay_t record_or_replay;
+
+ /**
+ * @brief Buffer to force max_steal_failures to appear on a
+ * different cache line from the previous member variables.
+ *
+ * This padding is needed because max_steal_failures is read
+ * constantly and other modified values in the global state will
+ * cause thrashing.
+ */
+ char cache_buf[64];
+
+ /**
+ * @brief Maximum number of times a thread should fail to steal
+ * before checking if Cilk is shutting down.
+ */
+ unsigned int max_steal_failures;
+
+ /// Pointer to scheduler entry point
+ void (*scheduler)(__cilkrts_worker *w);
+
+ /**
+ * @brief Buffer to force P and Q to appear on a different cache
+ * line from the previous member variables.
+ */
+ char cache_buf_2[64];
+
+ int P; ///< USER SETTING: number of system workers + 1 (fixed)
+ int Q; ///< Number of user threads currently bound to workers
+};
+
+/**
+ * @brief Initialize the global state object. This method must both
+ * complete before referencing any fields in the global state, except
+ * those specified as "user-settable values".
+ */
+global_state_t* cilkg_init_global_state();
+
+/**
+ * @brief Publish the global state object, so that
+ * cilkg_is_published can return true.
+ *
+ * @param g - the global state created by cilkg_init_global_state() to
+ * publish.
+ *
+ * After the global state object has been published, a thread should
+ * not modify this state unless it has exclusive access (i.e., holds
+ * the global lock).
+ */
+void cilkg_publish_global_state(global_state_t* g);
+
+/**
+ * @brief Return true if the global state has been fully initialized
+ * and published, and has not been deinitialized.
+ */
+int cilkg_is_published(void);
+
+/**
+ * @brief De-initializes the global state object. Must be called to free
+ * resources when the global state is no longer needed.
+ */
+void cilkg_deinit_global_state(void);
+
+/**
+ * @brief Returns the global state object. Result is valid only if the
+ * global state has been published (see cilkg_publish_global_state()).
+ */
+static inline
+global_state_t* cilkg_get_global_state(void)
+{
+ // "private" extern declaration:
+ extern global_state_t *cilkg_singleton_ptr;
+
+ __CILKRTS_ASSERT(cilkg_singleton_ptr); // Debug only
+ return cilkg_singleton_ptr;
+}
+
+
+/**
+ * @brief Implementation of __cilkrts_set_params.
+ *
+ * Set user controllable parameters
+ * @param param - string specifying parameter to be set
+ * @param value - string specifying new value
+ * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
+ * CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
+ * CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
+ *
+ * @attention The wide character version __cilkrts_set_param_w() is available
+ * only on Windows.
+ *
+ * Allowable parameter names:
+ *
+ * - "nworkers" - number of processors that should run Cilk code.
+ * The value is a string of digits to be parsed by strtol.
+ *
+ * - "force reduce" - test reducer callbacks by allocating new views
+ * for every spawn within which a reducer is accessed. This can
+ * significantly reduce performance. The value is "1" or "true"
+ * to enable, "0" or "false" to disable.
+ * @warning Enabling "force reduce" when running with more than a single
+ * worker is currently broken.
+ *
+ * - "max user workers" - (Not publicly documented) Sets the number of slots
+ * allocated for user worker threads
+ *
+ * - "local stacks" - (Not publicly documented) Number of stacks we'll hold in
+ * the per-worker stack cache. Range 1 .. 42. See
+ * cilkg_init_global_state for details.
+ *
+ * - "shared stacks" - (Not publicly documented) Maximum number of stacks
+ * we'll hold in the global stack cache. Maximum value is 42. See
+ * __cilkrts_make_global_state for details
+ *
+ * - "nstacks" - (Not publicly documented at this time, though it may be
+ * exposed in the future) Sets the maximum number of stacks permitted at one
+ * time. If the runtime reaches this maximum, it will cease to allocate
+ * stacks and the app will lose parallelism. 0 means unlimited. Default is
+ * unlimited. Minimum is twice the number of worker threads, though that
+ * cannot be tested at this time.
+ */
+int cilkg_set_param(const char* param, const char* value);
+#ifdef _WIN32
+/**
+ * @brief Implementation of __cilkrts_set_params for Unicode characters on
+ * Windows. See the documentation on @ref cilkg_set_param for more details.
+ *
+ * Set user controllable parameters
+ * @param param - string specifying parameter to be set
+ * @param value - string specifying new value
+ * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
+ * CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
+ * CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
+ */
+int cilkg_set_param_w(const wchar_t* param, const wchar_t* value);
+#endif
+
+/**
+ * @brief implementation of __cilkrts_get_nworkers()
+ */
+static inline
+int cilkg_get_nworkers(void)
+{
+ // "private" extern declaration
+ extern global_state_t* cilkg_get_user_settable_values(void);
+ return cilkg_get_user_settable_values()->P;
+}
+
+/**
+ * @brief implementation of __cilkrts_get_total_workers()
+ */
+static inline
+int cilkg_get_total_workers(void)
+{
+ // "private" extern declaration
+ extern int cilkg_calc_total_workers(void);
+
+ // This number can fluctate until initialization so we
+ // compute it from scratch
+ return cilkg_calc_total_workers();
+}
+
+/**
+ * @brief implementation of __cilkrts_get_force_reduce()
+ */
+static inline
+int cilkg_get_force_reduce(void)
+{
+ // "private" extern declaration
+ extern global_state_t* cilkg_get_user_settable_values(void);
+ return cilkg_get_user_settable_values()->force_reduce;
+}
+
+/**
+ * @brief implementation of __cilkrts_get_stack_size()
+ */
+static inline
+size_t cilkg_get_stack_size(void)
+{
+ // "private" extern declaration
+ extern global_state_t* cilkg_get_user_settable_values(void);
+ return cilkg_get_user_settable_values()->stack_size;
+}
+
+/**
+ * @brief Run the scheduler function stored in the global_state
+ *
+ * Look up the scheduler function in global_state and run it. Report a fatal
+ * error if an exception escapes the scheduler function.
+ *
+ * @param w - Worker structure to associate with the current thread.
+ *
+ * @attention The scheduler field of the global state must be set before this
+ * function is called.
+ */
+void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_GLOBAL_STATE_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/jmpbuf.c b/gcc-4.9/libcilkrts/runtime/jmpbuf.c
new file mode 100644
index 000000000..39b51a593
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/jmpbuf.c
@@ -0,0 +1,48 @@
+/* jmpbuf.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#include "jmpbuf.h"
+
+/*
+ * C99 requires that every inline function with external linkage have
+ * one extern declaration in the program.
+ */
+extern char *__cilkrts_get_sp(__cilkrts_stack_frame *sf);
+extern ptrdiff_t __cilkrts_get_frame_size(__cilkrts_stack_frame *sf);
+
+/* End jmpbuf.c */
diff --git a/gcc-4.9/libcilkrts/runtime/jmpbuf.h b/gcc-4.9/libcilkrts/runtime/jmpbuf.h
new file mode 100644
index 000000000..60573f3a5
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/jmpbuf.h
@@ -0,0 +1,136 @@
+/* jmpbuf.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file jmpbuf.h
+ *
+ * @brief Macros and functions to access the _JUMP_BUFFER initialized by a
+ * call to CILK_SETJMP before a cilk_spawn or cilk_sync. The definition of
+ * CILK_SETJMP and CILK_LONGJMP are OS dependent and in abi.h
+ *
+ */
+
+#ifndef INCLUDED_JMPBUF_DOT_H
+#define INCLUDED_JMPBUF_DOT_H
+
+#include <cilk/common.h>
+#include <internal/abi.h>
+#include <stddef.h>
+#include <setjmp.h>
+
+#if 0 /* defined CILK_USE_C_SETJMP && defined JB_RSP */
+# define JMPBUF_SP(ctx) (ctx)[0].__jmpbuf[JB_RSP]
+# define JMPBUF_FP(ctx) (ctx)[0].__jmpbuf[JB_RBP]
+# define JMPBUF_PC(ctx) (ctx)[0].__jmpbuf[JB_PC]
+#elif 0 /* defined CILK_USE_C_SETJMP && defined JB_SP */
+# define JMPBUF_SP(ctx) (ctx)[0].__jmpbuf[JB_SP]
+# define JMPBUF_FP(ctx) (ctx)[0].__jmpbuf[JB_BP]
+# define JMPBUF_PC(ctx) (ctx)[0].__jmpbuf[JB_PC]
+#elif defined _WIN64
+# define JMPBUF_SP(ctx) ((_JUMP_BUFFER*)(&(ctx)))->Rsp
+# define JMPBUF_FP(ctx) ((_JUMP_BUFFER*)(&(ctx)))->Rbp
+# define JMPBUF_PC(ctx) ((_JUMP_BUFFER*)(&(ctx)))->Rip
+#elif defined _WIN32
+ /** Fetch stack pointer from a __cilkrts_stack_frame */
+# define JMPBUF_SP(ctx) (ctx).Esp
+ /** Fetch frame pointer from a __cilkrts_stack_frame */
+# define JMPBUF_FP(ctx) (ctx).Ebp
+ /** Fetch program counter from a __cilkrts_stack_frame */
+# define JMPBUF_PC(ctx) (ctx).Eip
+#else /* defined __GNUC__ || defined __ICC */
+ /* word 0 is frame address
+ * word 1 is resume address
+ * word 2 is stack address */
+# define JMPBUF_FP(ctx) (ctx)[0]
+# define JMPBUF_PC(ctx) (ctx)[1]
+# define JMPBUF_SP(ctx) (ctx)[2]
+#endif
+
+/**
+ * @brief Get frame pointer from jump buffer in__cilkrts_stack_frame.
+ */
+#define FP(SF) JMPBUF_FP((SF)->ctx)
+
+/**
+ * @brief Get program counter from jump buffer in__cilkrts_stack_frame.
+ */
+#define PC(SF) JMPBUF_PC((SF)->ctx)
+
+/**
+ * @brief Get stack pointer from jump buffer in__cilkrts_stack_frame.
+ */
+#define SP(SF) JMPBUF_SP((SF)->ctx)
+
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * Fetch the stack pointer from a __cilkrts_stack_frame. The jmpbuf was
+ * initialized before a cilk_spawn or cilk_sync.
+ *
+ * @param sf __cilkrts_stack_frame containing the jmpbuf.
+ *
+ * @return the stack pointer from the ctx.
+ */
+inline char *__cilkrts_get_sp(__cilkrts_stack_frame *sf)
+{
+ return (char *)SP(sf);
+}
+
+/**
+ * Calculate the frame size from __cilkrts_stack_frame. The jmpbuf was
+ * initialized before a cilk_spawn or cilk_sync.
+ *
+ * @warning Returning an arbitrary value on Windows!
+ *
+ * @param sf __cilkrts_stack_frame containing the jmpbuf.
+ *
+ * @return the stack pointer from the ctx.
+ */
+inline ptrdiff_t __cilkrts_get_frame_size(__cilkrts_stack_frame *sf)
+{
+#ifdef _WIN32
+ if (0 == SP(sf))
+ return 256; // Arbitrary!
+#endif
+ return (ptrdiff_t)FP(sf) - (ptrdiff_t)SP(sf);
+}
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_JMPBUF_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/linux-symbols.ver b/gcc-4.9/libcilkrts/runtime/linux-symbols.ver
new file mode 100644
index 000000000..aeb4a5fb1
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/linux-symbols.ver
@@ -0,0 +1,369 @@
+/*
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+CILKABI0
+{
+ global:
+ __cilkrts_bind_thread;
+ __cilkrts_cilk_for_32;
+ __cilkrts_cilk_for_64;
+ __cilkrts_debugger_notification;
+ __cilkrts_dump_stats;
+ __cilkrts_end_cilk;
+ __cilkrts_enter_frame;
+ __cilkrts_enter_frame_fast;
+ __cilkrts_get_force_reduce;
+ __cilkrts_get_nworkers;
+ __cilkrts_get_tls_worker;
+ __cilkrts_get_tls_worker_fast;
+ __cilkrts_get_total_workers;
+ __cilkrts_get_worker_number;
+ __cilkrts_global_state;
+ __cilkrts_hyper_create;
+ __cilkrts_hyper_destroy;
+ __cilkrts_hyper_lookup;
+ __cilkrts_hyperobject_alloc;
+ __cilkrts_hyperobject_dealloc;
+ __cilkrts_hyperobject_noop_destroy;
+ __cilkrts_init;
+ __cilkrts_irml_version;
+ __cilkrts_leave_frame;
+ __cilkrts_metacall;
+ __cilkrts_rethrow;
+ __cilkrts_return_exception;
+ __cilkrts_set_param;
+ __cilkrts_sync;
+ __cilkrts_synched;
+ __cilkrts_worker_stub;
+ local: *;
+};
+
+CILKABI1
+{
+ global:
+ __cilkrts_bind_thread_1;
+ __cilkrts_bump_loop_rank;
+ __cilkrts_bump_loop_rank_internal;
+ __cilkrts_bump_worker_rank;
+ __cilkrts_bump_worker_rank_internal;
+ __cilkrts_enter_frame_1;
+ __cilkrts_enter_frame_fast_1;
+ __cilkrts_get_pedigree_info;
+ __cilkrts_get_pedigree_internal;
+ __cilkrts_get_sf;
+ __cilkrts_get_stack_size;
+ __cilkrts_get_worker_rank;
+ __cilkrts_save_fp_ctrl_state;
+ __cilkrts_stack_alloc;
+ __cilkrts_stack_free;
+ __cilkrts_watch_stack;
+} CILKABI0;
+
+CILKLIB1.02
+{
+ global:
+ cilk_c_reducer_max_identity_char;
+ cilk_c_reducer_max_identity_double;
+ cilk_c_reducer_max_identity_float;
+ cilk_c_reducer_max_identity_int;
+ cilk_c_reducer_max_identity_long;
+ cilk_c_reducer_max_identity_longdouble;
+ cilk_c_reducer_max_identity_longlong;
+ cilk_c_reducer_max_identity_schar;
+ cilk_c_reducer_max_identity_short;
+ cilk_c_reducer_max_identity_uchar;
+ cilk_c_reducer_max_identity_uint;
+ cilk_c_reducer_max_identity_ulong;
+ cilk_c_reducer_max_identity_ulonglong;
+ cilk_c_reducer_max_identity_unsigned;
+ cilk_c_reducer_max_identity_ushort;
+ cilk_c_reducer_max_identity_wchar_t;
+ cilk_c_reducer_max_index_identity_char;
+ cilk_c_reducer_max_index_identity_double;
+ cilk_c_reducer_max_index_identity_float;
+ cilk_c_reducer_max_index_identity_int;
+ cilk_c_reducer_max_index_identity_long;
+ cilk_c_reducer_max_index_identity_longdouble;
+ cilk_c_reducer_max_index_identity_longlong;
+ cilk_c_reducer_max_index_identity_schar;
+ cilk_c_reducer_max_index_identity_short;
+ cilk_c_reducer_max_index_identity_uchar;
+ cilk_c_reducer_max_index_identity_uint;
+ cilk_c_reducer_max_index_identity_ulong;
+ cilk_c_reducer_max_index_identity_ulonglong;
+ cilk_c_reducer_max_index_identity_unsigned;
+ cilk_c_reducer_max_index_identity_ushort;
+ cilk_c_reducer_max_index_identity_wchar_t;
+ cilk_c_reducer_max_index_reduce_char;
+ cilk_c_reducer_max_index_reduce_double;
+ cilk_c_reducer_max_index_reduce_float;
+ cilk_c_reducer_max_index_reduce_int;
+ cilk_c_reducer_max_index_reduce_long;
+ cilk_c_reducer_max_index_reduce_longdouble;
+ cilk_c_reducer_max_index_reduce_longlong;
+ cilk_c_reducer_max_index_reduce_schar;
+ cilk_c_reducer_max_index_reduce_short;
+ cilk_c_reducer_max_index_reduce_uchar;
+ cilk_c_reducer_max_index_reduce_uint;
+ cilk_c_reducer_max_index_reduce_ulong;
+ cilk_c_reducer_max_index_reduce_ulonglong;
+ cilk_c_reducer_max_index_reduce_unsigned;
+ cilk_c_reducer_max_index_reduce_ushort;
+ cilk_c_reducer_max_index_reduce_wchar_t;
+ cilk_c_reducer_max_reduce_char;
+ cilk_c_reducer_max_reduce_double;
+ cilk_c_reducer_max_reduce_float;
+ cilk_c_reducer_max_reduce_int;
+ cilk_c_reducer_max_reduce_long;
+ cilk_c_reducer_max_reduce_longdouble;
+ cilk_c_reducer_max_reduce_longlong;
+ cilk_c_reducer_max_reduce_schar;
+ cilk_c_reducer_max_reduce_short;
+ cilk_c_reducer_max_reduce_uchar;
+ cilk_c_reducer_max_reduce_uint;
+ cilk_c_reducer_max_reduce_ulong;
+ cilk_c_reducer_max_reduce_ulonglong;
+ cilk_c_reducer_max_reduce_unsigned;
+ cilk_c_reducer_max_reduce_ushort;
+ cilk_c_reducer_max_reduce_wchar_t;
+ cilk_c_reducer_min_identity_char;
+ cilk_c_reducer_min_identity_double;
+ cilk_c_reducer_min_identity_float;
+ cilk_c_reducer_min_identity_int;
+ cilk_c_reducer_min_identity_long;
+ cilk_c_reducer_min_identity_longdouble;
+ cilk_c_reducer_min_identity_longlong;
+ cilk_c_reducer_min_identity_schar;
+ cilk_c_reducer_min_identity_short;
+ cilk_c_reducer_min_identity_uchar;
+ cilk_c_reducer_min_identity_uint;
+ cilk_c_reducer_min_identity_ulong;
+ cilk_c_reducer_min_identity_ulonglong;
+ cilk_c_reducer_min_identity_unsigned;
+ cilk_c_reducer_min_identity_ushort;
+ cilk_c_reducer_min_identity_wchar_t;
+ cilk_c_reducer_min_index_identity_char;
+ cilk_c_reducer_min_index_identity_double;
+ cilk_c_reducer_min_index_identity_float;
+ cilk_c_reducer_min_index_identity_int;
+ cilk_c_reducer_min_index_identity_long;
+ cilk_c_reducer_min_index_identity_longdouble;
+ cilk_c_reducer_min_index_identity_longlong;
+ cilk_c_reducer_min_index_identity_schar;
+ cilk_c_reducer_min_index_identity_short;
+ cilk_c_reducer_min_index_identity_uchar;
+ cilk_c_reducer_min_index_identity_uint;
+ cilk_c_reducer_min_index_identity_ulong;
+ cilk_c_reducer_min_index_identity_ulonglong;
+ cilk_c_reducer_min_index_identity_unsigned;
+ cilk_c_reducer_min_index_identity_ushort;
+ cilk_c_reducer_min_index_identity_wchar_t;
+ cilk_c_reducer_min_index_reduce_char;
+ cilk_c_reducer_min_index_reduce_double;
+ cilk_c_reducer_min_index_reduce_float;
+ cilk_c_reducer_min_index_reduce_int;
+ cilk_c_reducer_min_index_reduce_long;
+ cilk_c_reducer_min_index_reduce_longdouble;
+ cilk_c_reducer_min_index_reduce_longlong;
+ cilk_c_reducer_min_index_reduce_schar;
+ cilk_c_reducer_min_index_reduce_short;
+ cilk_c_reducer_min_index_reduce_uchar;
+ cilk_c_reducer_min_index_reduce_uint;
+ cilk_c_reducer_min_index_reduce_ulong;
+ cilk_c_reducer_min_index_reduce_ulonglong;
+ cilk_c_reducer_min_index_reduce_unsigned;
+ cilk_c_reducer_min_index_reduce_ushort;
+ cilk_c_reducer_min_index_reduce_wchar_t;
+ cilk_c_reducer_min_reduce_char;
+ cilk_c_reducer_min_reduce_double;
+ cilk_c_reducer_min_reduce_float;
+ cilk_c_reducer_min_reduce_int;
+ cilk_c_reducer_min_reduce_long;
+ cilk_c_reducer_min_reduce_longdouble;
+ cilk_c_reducer_min_reduce_longlong;
+ cilk_c_reducer_min_reduce_schar;
+ cilk_c_reducer_min_reduce_short;
+ cilk_c_reducer_min_reduce_uchar;
+ cilk_c_reducer_min_reduce_uint;
+ cilk_c_reducer_min_reduce_ulong;
+ cilk_c_reducer_min_reduce_ulonglong;
+ cilk_c_reducer_min_reduce_unsigned;
+ cilk_c_reducer_min_reduce_ushort;
+ cilk_c_reducer_min_reduce_wchar_t;
+ cilk_c_reducer_opadd_identity_char;
+ cilk_c_reducer_opadd_identity_double;
+ cilk_c_reducer_opadd_identity_float;
+ cilk_c_reducer_opadd_identity_int;
+ cilk_c_reducer_opadd_identity_long;
+ cilk_c_reducer_opadd_identity_longdouble;
+ cilk_c_reducer_opadd_identity_longlong;
+ cilk_c_reducer_opadd_identity_schar;
+ cilk_c_reducer_opadd_identity_short;
+ cilk_c_reducer_opadd_identity_uchar;
+ cilk_c_reducer_opadd_identity_uint;
+ cilk_c_reducer_opadd_identity_ulong;
+ cilk_c_reducer_opadd_identity_ulonglong;
+ cilk_c_reducer_opadd_identity_unsigned;
+ cilk_c_reducer_opadd_identity_ushort;
+ cilk_c_reducer_opadd_identity_wchar_t;
+ cilk_c_reducer_opadd_reduce_char;
+ cilk_c_reducer_opadd_reduce_double;
+ cilk_c_reducer_opadd_reduce_float;
+ cilk_c_reducer_opadd_reduce_int;
+ cilk_c_reducer_opadd_reduce_long;
+ cilk_c_reducer_opadd_reduce_longdouble;
+ cilk_c_reducer_opadd_reduce_longlong;
+ cilk_c_reducer_opadd_reduce_schar;
+ cilk_c_reducer_opadd_reduce_short;
+ cilk_c_reducer_opadd_reduce_uchar;
+ cilk_c_reducer_opadd_reduce_uint;
+ cilk_c_reducer_opadd_reduce_ulong;
+ cilk_c_reducer_opadd_reduce_ulonglong;
+ cilk_c_reducer_opadd_reduce_unsigned;
+ cilk_c_reducer_opadd_reduce_ushort;
+ cilk_c_reducer_opadd_reduce_wchar_t;
+ cilk_c_reducer_opand_identity_char;
+ cilk_c_reducer_opand_identity_int;
+ cilk_c_reducer_opand_identity_long;
+ cilk_c_reducer_opand_identity_longlong;
+ cilk_c_reducer_opand_identity_schar;
+ cilk_c_reducer_opand_identity_short;
+ cilk_c_reducer_opand_identity_uchar;
+ cilk_c_reducer_opand_identity_uint;
+ cilk_c_reducer_opand_identity_ulong;
+ cilk_c_reducer_opand_identity_ulonglong;
+ cilk_c_reducer_opand_identity_unsigned;
+ cilk_c_reducer_opand_identity_ushort;
+ cilk_c_reducer_opand_identity_wchar_t;
+ cilk_c_reducer_opand_reduce_char;
+ cilk_c_reducer_opand_reduce_int;
+ cilk_c_reducer_opand_reduce_long;
+ cilk_c_reducer_opand_reduce_longlong;
+ cilk_c_reducer_opand_reduce_schar;
+ cilk_c_reducer_opand_reduce_short;
+ cilk_c_reducer_opand_reduce_uchar;
+ cilk_c_reducer_opand_reduce_uint;
+ cilk_c_reducer_opand_reduce_ulong;
+ cilk_c_reducer_opand_reduce_ulonglong;
+ cilk_c_reducer_opand_reduce_unsigned;
+ cilk_c_reducer_opand_reduce_ushort;
+ cilk_c_reducer_opand_reduce_wchar_t;
+ cilk_c_reducer_opmul_identity_char;
+ cilk_c_reducer_opmul_identity_double;
+ cilk_c_reducer_opmul_identity_float;
+ cilk_c_reducer_opmul_identity_int;
+ cilk_c_reducer_opmul_identity_long;
+ cilk_c_reducer_opmul_identity_longdouble;
+ cilk_c_reducer_opmul_identity_longlong;
+ cilk_c_reducer_opmul_identity_schar;
+ cilk_c_reducer_opmul_identity_short;
+ cilk_c_reducer_opmul_identity_uchar;
+ cilk_c_reducer_opmul_identity_uint;
+ cilk_c_reducer_opmul_identity_ulong;
+ cilk_c_reducer_opmul_identity_ulonglong;
+ cilk_c_reducer_opmul_identity_unsigned;
+ cilk_c_reducer_opmul_identity_ushort;
+ cilk_c_reducer_opmul_identity_wchar_t;
+ cilk_c_reducer_opmul_reduce_char;
+ cilk_c_reducer_opmul_reduce_double;
+ cilk_c_reducer_opmul_reduce_float;
+ cilk_c_reducer_opmul_reduce_int;
+ cilk_c_reducer_opmul_reduce_long;
+ cilk_c_reducer_opmul_reduce_longdouble;
+ cilk_c_reducer_opmul_reduce_longlong;
+ cilk_c_reducer_opmul_reduce_schar;
+ cilk_c_reducer_opmul_reduce_short;
+ cilk_c_reducer_opmul_reduce_uchar;
+ cilk_c_reducer_opmul_reduce_uint;
+ cilk_c_reducer_opmul_reduce_ulong;
+ cilk_c_reducer_opmul_reduce_ulonglong;
+ cilk_c_reducer_opmul_reduce_unsigned;
+ cilk_c_reducer_opmul_reduce_ushort;
+ cilk_c_reducer_opmul_reduce_wchar_t;
+ cilk_c_reducer_opor_identity_char;
+ cilk_c_reducer_opor_identity_int;
+ cilk_c_reducer_opor_identity_long;
+ cilk_c_reducer_opor_identity_longlong;
+ cilk_c_reducer_opor_identity_schar;
+ cilk_c_reducer_opor_identity_short;
+ cilk_c_reducer_opor_identity_uchar;
+ cilk_c_reducer_opor_identity_uint;
+ cilk_c_reducer_opor_identity_ulong;
+ cilk_c_reducer_opor_identity_ulonglong;
+ cilk_c_reducer_opor_identity_unsigned;
+ cilk_c_reducer_opor_identity_ushort;
+ cilk_c_reducer_opor_identity_wchar_t;
+ cilk_c_reducer_opor_reduce_char;
+ cilk_c_reducer_opor_reduce_int;
+ cilk_c_reducer_opor_reduce_long;
+ cilk_c_reducer_opor_reduce_longlong;
+ cilk_c_reducer_opor_reduce_schar;
+ cilk_c_reducer_opor_reduce_short;
+ cilk_c_reducer_opor_reduce_uchar;
+ cilk_c_reducer_opor_reduce_uint;
+ cilk_c_reducer_opor_reduce_ulong;
+ cilk_c_reducer_opor_reduce_ulonglong;
+ cilk_c_reducer_opor_reduce_unsigned;
+ cilk_c_reducer_opor_reduce_ushort;
+ cilk_c_reducer_opor_reduce_wchar_t;
+ cilk_c_reducer_opxor_identity_char;
+ cilk_c_reducer_opxor_identity_int;
+ cilk_c_reducer_opxor_identity_long;
+ cilk_c_reducer_opxor_identity_longlong;
+ cilk_c_reducer_opxor_identity_schar;
+ cilk_c_reducer_opxor_identity_short;
+ cilk_c_reducer_opxor_identity_uchar;
+ cilk_c_reducer_opxor_identity_uint;
+ cilk_c_reducer_opxor_identity_ulong;
+ cilk_c_reducer_opxor_identity_ulonglong;
+ cilk_c_reducer_opxor_identity_unsigned;
+ cilk_c_reducer_opxor_identity_ushort;
+ cilk_c_reducer_opxor_identity_wchar_t;
+ cilk_c_reducer_opxor_reduce_char;
+ cilk_c_reducer_opxor_reduce_int;
+ cilk_c_reducer_opxor_reduce_long;
+ cilk_c_reducer_opxor_reduce_longlong;
+ cilk_c_reducer_opxor_reduce_schar;
+ cilk_c_reducer_opxor_reduce_short;
+ cilk_c_reducer_opxor_reduce_uchar;
+ cilk_c_reducer_opxor_reduce_uint;
+ cilk_c_reducer_opxor_reduce_ulong;
+ cilk_c_reducer_opxor_reduce_ulonglong;
+ cilk_c_reducer_opxor_reduce_unsigned;
+ cilk_c_reducer_opxor_reduce_ushort;
+ cilk_c_reducer_opxor_reduce_wchar_t;
+};
diff --git a/gcc-4.9/libcilkrts/runtime/local_state.c b/gcc-4.9/libcilkrts/runtime/local_state.c
new file mode 100644
index 000000000..14ac82719
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/local_state.c
@@ -0,0 +1,68 @@
+/* local_state.c -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2010-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+#include "local_state.h"
+#include "bug.h"
+#include "full_frame.h"
+
+void run_scheduling_stack_fcn(__cilkrts_worker *w)
+{
+ scheduling_stack_fcn_t fcn = w->l->post_suspend;
+ full_frame *ff2 = w->l->frame_ff;
+ __cilkrts_stack_frame *sf2 = w->l->suspended_stack;
+
+ w->l->post_suspend = 0;
+ w->l->suspended_stack = 0;
+
+ // Conceptually, after clearing w->l->frame_ff,
+ // w no longer owns the full frame ff.
+ // The next time another (possibly different) worker takes
+ // ownership of ff will be at a provably_good_steal on ff.
+ w->l->frame_ff = NULL;
+
+ CILK_ASSERT(fcn);
+ CILK_ASSERT(ff2);
+ fcn(w, ff2, sf2);
+
+ // After we run the scheduling stack function, we shouldn't
+ // (still) not have a full frame.
+ CILK_ASSERT(NULL == w->l->frame_ff);
+}
+
+/* End local_state.c */
diff --git a/gcc-4.9/libcilkrts/runtime/local_state.h b/gcc-4.9/libcilkrts/runtime/local_state.h
new file mode 100644
index 000000000..03f39897f
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/local_state.h
@@ -0,0 +1,424 @@
+/* local_state.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file local_state.h
+ *
+ * @brief The local_state structure contains additional OS-independent
+ * information that's associated with a worker, but doesn't need to be visible
+ * to the code generated by the compiler.
+ */
+
+#ifndef INCLUDED_LOCAL_STATE_DOT_H
+#define INCLUDED_LOCAL_STATE_DOT_H
+
+#include <internal/abi.h>
+#include "worker_mutex.h"
+#include "global_state.h"
+#include "record-replay.h"
+#include "signal_node.h"
+
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdio.h>
+
+
+#ifndef _WIN32
+# include <pthread.h>
+#endif
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/* Opaque types. */
+
+struct full_frame;
+struct free_list;
+struct pending_exception_info;
+/// Opaque type for replay entry.
+typedef struct replay_entry_t replay_entry_t;
+
+/**
+ * @brief Magic numbers for local_state, used for debugging
+ */
+typedef unsigned long long ls_magic_t;
+
+/**
+ * @brief Scheduling stack function: A function that is decided on the program stack,
+ * but that must be executed on the scheduling stack.
+ */
+typedef void (*scheduling_stack_fcn_t) (__cilkrts_worker *w,
+ struct full_frame *ff,
+ __cilkrts_stack_frame *sf);
+
+/**
+ * @brief Type of this worker.
+ **/
+typedef enum cilk_worker_type
+{
+ WORKER_FREE, ///< Unused worker - available to be bound to user threads
+ WORKER_SYSTEM, ///< Worker created by runtime - able to steal from any worker
+ WORKER_USER ///< User thread - able to steal only from team members
+} cilk_worker_type;
+
+
+/**
+ * @brief The local_state structure contains additional OS-independent
+ * information that's associated with a worker, but doesn't need to be
+ * visible to the compiler.
+ *
+ * No compiler-generated code should need to know the layout of this
+ * structure.
+ *
+ * The fields of this struct can be classified as either local or
+ * shared.
+ *
+ * Local: This field is only accessed by the thread bound to this
+ * worker struct. Local fields can be freely accessed without
+ * acquiring locks.
+ *
+ * Shared: This field may be accessed by multiple worker threads.
+ * Accesses to shared fields usually requires locks, except in
+ * special situations where one can prove that locks are
+ * unnecessary.
+ *
+ * The fields of this can also be classified as "read-only" if the
+ * field does not change after it is initialized. Otherwise, the
+ * field is "read/write". Read-only fields do not require locks to
+ * access (ignoring the synchronization that might be needed for
+ * initialization if this can occur in parallel).
+ *
+ * Finally, we explicitly classify some fields as "synchronization"
+ * fields if they are used as part of a synchronization protocol in
+ * the runtime. These variables are generally shared and read/write.
+ * Mostly, this category includes lock variables and other variables
+ * that are involved in synchronization protocols (i.e., the THE
+ * protocol).
+ */
+struct local_state /* COMMON_PORTABLE */
+{
+ /** This value should be in the first field in any local_state */
+# define WORKER_MAGIC_0 ((ls_magic_t)0xe0831a4a940c60b8ULL)
+
+ /**
+ * Should be WORKER_MAGIC_0 or the local_state has been corrupted
+ * This magic field is shared because it is read on lock acquisitions.
+ *
+ * [shared read-only]
+ */
+ ls_magic_t worker_magic_0;
+
+ /**
+ * Mutex used to serialize access to the local_state
+ * Synchronization field. [shared read/write]
+ */
+ struct mutex lock;
+
+ /**
+ * Flag that indicates that the worker is interested in grabbing
+ * LOCK, and thus thieves should leave the worker alone.
+ * Written only by self, may be read by others.
+ *
+ * Synchronization field. [shared read/write]
+ */
+ int do_not_steal;
+
+ /**
+ * Lock that all thieves grab in order to compete for the right
+ * to disturb this worker.
+ *
+ * Synchronization field. [shared read/write]
+ */
+ struct mutex steal_lock;
+
+ /**
+ * Full frame that the worker is working on.
+ *
+ * While a worker w is executing, a thief may change
+ * w->l->frame_ff (on a successful steal) after acquiring w's
+ * lock.
+ *
+ * Unlocked accesses to w->l->frame_ff are safe (by w itself) when
+ * w's deque is empty, or when stealing from w has been disabled.
+ *
+ * [shared read/write]
+ */
+ struct full_frame *frame_ff;
+
+ /**
+ * Full frame that the worker will be working on next
+ *
+ * This field is normally local for a worker w. Another worker v
+ * may modify w->l->next_frame_ff, however, in the special case
+ * when v is returning a frame to a user thread w since w is the
+ * team leader.
+ *
+ * [shared read/write]
+ */
+ struct full_frame *next_frame_ff;
+
+ /**
+ * This is set iff this is a WORKER_USER and there has been a steal. It
+ * points to the first frame that was stolen since the team was last fully
+ * sync'd. Only this worker may continue past a sync in this function.
+ *
+ * This field is set by a thief for a victim that is a user
+ * thread, while holding the victim's lock.
+ * It can be cleared without a lock by the worker that will
+ * continue exuecting past the sync.
+ *
+ * [shared read/write]
+ */
+ struct full_frame *last_full_frame;
+
+ /**
+ * Team on which this worker is a participant. When a user worker enters,
+ * its team is its own worker struct and it can never change teams. When a
+ * system worker steals, it adopts the team of its victim.
+ *
+ * When a system worker w steals, it reads victim->l->team and
+ * joins this team. w->l->team is constant until the next time w
+ * returns control to the runtime.
+ * We must acquire the worker lock to change w->l->team.
+ *
+ * @note This field is 64-byte aligned because it is the first in
+ * the group of shared read-only fields. We want this group to
+ * fall on a different cache line from the previous group, which
+ * is shared read-write.
+ *
+ * [shared read-only]
+ */
+ __attribute__((aligned(64)))
+ __cilkrts_worker *team;
+
+ /**
+ * Type of this worker
+ *
+ * This field changes only when a worker binds or unbinds.
+ * Otherwise, the field is read-only while the worker is bound.
+ *
+ * [shared read-only]
+ */
+ cilk_worker_type type;
+
+ /**
+ * Lazy task queue of this worker - an array of pointers to stack frames.
+ *
+ * Read-only because deques are a fixed size in the current
+ * implementation.
+ *
+ * @note This field is 64-byte aligned because it is the first in
+ * the group of local fields. We want this group to fall on a
+ * different cache line from the previous group, which is shared
+ * read-only.
+ *
+ * [local read-only]
+ */
+ __attribute__((aligned(64)))
+ __cilkrts_stack_frame **ltq;
+
+ /**
+ * Pool of fibers waiting to be reused.
+ * [local read/write]
+ */
+ cilk_fiber_pool fiber_pool;
+
+ /**
+ * The fiber for the scheduling stacks.
+ * [local read/write]
+ */
+ cilk_fiber* scheduling_fiber;
+
+ /**
+ * Saved pointer to the leaf node in thread-local storage, when a
+ * user thread is imported. This pointer gets set to a
+ * meaningful value when binding a user thread, and cleared on
+ * unbind.
+ *
+ * [local read/write]
+ */
+ __cilkrts_pedigree* original_pedigree_leaf;
+
+ /**
+ * State of the random number generator
+ *
+ * [local read/write]
+ */
+ unsigned rand_seed;
+
+ /**
+ * Function to execute after transferring onto the scheduling stack.
+ *
+ * [local read/write]
+ */
+ scheduling_stack_fcn_t post_suspend;
+
+ /**
+ * __cilkrts_stack_frame we suspended when we transferred onto the
+ * scheduling stack.
+ *
+ * [local read/write]
+ */
+ __cilkrts_stack_frame *suspended_stack;
+
+ /**
+ * cilk_fiber that should be freed after returning from a
+ * spawn with a stolen parent or after stalling at a sync.
+
+ * We calculate the stack to free when executing a reduction on
+ * the user stack, but we can not actually release the stack
+ * until control longjmps onto a runtime scheduling stack.
+ *
+ * This field is used to pass information to the runtime across
+ * the longjmp onto the scheduling stack.
+ *
+ * [local read/write]
+ */
+ cilk_fiber* fiber_to_free;
+
+ /**
+ * Saved exception object for an exception that is being passed to
+ * our parent
+ *
+ * [local read/write]
+ */
+ struct pending_exception_info *pending_exception;
+
+ /**
+ * Buckets for the memory allocator
+ *
+ * [local read/write]
+ */
+ struct free_list *free_list[FRAME_MALLOC_NBUCKETS];
+
+ /**
+ * Potential function for the memory allocator
+ *
+ * [local read/write]
+ */
+ size_t bucket_potential[FRAME_MALLOC_NBUCKETS];
+
+ /**
+ * Support for statistics
+ *
+ * Useful only when CILK_PROFIlE is compiled in.
+ * [local read/write]
+ */
+ statistics* stats;
+
+ /**
+ * Count indicates number of failures since last successful steal. This is
+ * used by the scheduler to reduce contention on shared flags.
+ *
+ * [local read/write]
+ */
+ unsigned int steal_failure_count;
+
+ /**
+ * 1 if work was stolen from another worker. When true, this will flag
+ * setup_for_execution_pedigree to increment the pedigree when we resume
+ * execution to match the increment that would have been done on a return
+ * from a spawn helper.
+ *
+ * [local read/write]
+ */
+ int work_stolen;
+
+ /**
+ * File pointer for record or replay
+ * Does FILE * work on Windows?
+ * During record, the file will be opened in write-only mode.
+ * During replay, the file will be opened in read-only mode.
+ *
+ * [local read/write]
+ */
+ FILE *record_replay_fptr;
+
+ /**
+ * Root of array of replay entries - NULL if we're not replaying a log
+ *
+ * [local read/write]
+ */
+ replay_entry_t *replay_list_root;
+
+ /**
+ * Current replay entry - NULL if we're not replaying a log
+ *
+ * [local read/write]
+ */
+ replay_entry_t *replay_list_entry;
+
+ /**
+ * Separate the signal_node from other things in the local_state by the
+ * sizeof a cache line for performance reasons.
+ *
+ * unused
+ */
+ char buf[64];
+
+ /**
+ * Signal object for waking/sleeping the worker. This should be a pointer
+ * to avoid the possibility of caching problems.
+ *
+ * [shared read-only]
+ */
+ signal_node_t *signal_node;
+
+ /** This value should be in the last field in any local_state */
+# define WORKER_MAGIC_1 ((ls_magic_t)0x16164afb0ea0dff9ULL)
+
+ /**
+ * Should be WORKER_MAGIC_1 or the local_state has been corrupted
+ * This magic field is shared because it is read on lock acquisitions.
+ * [shared read-only]
+ */
+ ls_magic_t worker_magic_1;
+};
+
+/**
+ * Perform cleanup according to the function set before the longjmp().
+ *
+ * Call this after longjmp() has completed and the worker is back on a
+ * scheduling stack.
+ *
+ * @param w __cilkrts_worker currently executing.
+ */
+void run_scheduling_stack_fcn(__cilkrts_worker *w);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_LOCAL_STATE_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/mac-symbols.txt b/gcc-4.9/libcilkrts/runtime/mac-symbols.txt
new file mode 100644
index 000000000..38d83a867
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/mac-symbols.txt
@@ -0,0 +1,318 @@
+# Exported symbol list:
+___cilkrts_bind_thread
+___cilkrts_bind_thread_1
+___cilkrts_bump_loop_rank
+___cilkrts_bump_loop_rank_internal
+___cilkrts_bump_worker_rank
+___cilkrts_bump_worker_rank_internal
+___cilkrts_cilk_for_32
+___cilkrts_cilk_for_64
+___cilkrts_debugger_notification
+___cilkrts_dump_stats
+___cilkrts_end_cilk
+___cilkrts_enter_frame
+___cilkrts_enter_frame_1
+___cilkrts_enter_frame_fast
+___cilkrts_enter_frame_fast_1
+___cilkrts_get_force_reduce
+___cilkrts_get_nworkers
+___cilkrts_get_pedigree_info
+___cilkrts_get_pedigree_internal
+___cilkrts_get_sf
+___cilkrts_get_stack_size
+___cilkrts_get_tls_worker
+___cilkrts_get_tls_worker_fast
+___cilkrts_get_total_workers
+___cilkrts_get_worker_number
+___cilkrts_get_worker_rank
+___cilkrts_global_state
+___cilkrts_hyper_create
+___cilkrts_hyper_destroy
+___cilkrts_hyper_lookup
+___cilkrts_hyperobject_alloc
+___cilkrts_hyperobject_dealloc
+___cilkrts_hyperobject_noop_destroy
+___cilkrts_init
+___cilkrts_irml_version
+___cilkrts_leave_frame
+___cilkrts_metacall
+___cilkrts_rethrow
+___cilkrts_return_exception
+___cilkrts_save_fp_ctrl_state
+___cilkrts_set_param
+___cilkrts_stack_alloc
+___cilkrts_stack_free
+___cilkrts_sync
+___cilkrts_synched
+___cilkrts_watch_stack
+___cilkrts_worker_stub
+_cilk_c_reducer_max_identity_char
+_cilk_c_reducer_max_identity_double
+_cilk_c_reducer_max_identity_float
+_cilk_c_reducer_max_identity_int
+_cilk_c_reducer_max_identity_long
+_cilk_c_reducer_max_identity_longdouble
+_cilk_c_reducer_max_identity_longlong
+_cilk_c_reducer_max_identity_schar
+_cilk_c_reducer_max_identity_short
+_cilk_c_reducer_max_identity_uchar
+_cilk_c_reducer_max_identity_uint
+_cilk_c_reducer_max_identity_ulong
+_cilk_c_reducer_max_identity_ulonglong
+_cilk_c_reducer_max_identity_unsigned
+_cilk_c_reducer_max_identity_ushort
+_cilk_c_reducer_max_identity_wchar_t
+_cilk_c_reducer_max_index_identity_char
+_cilk_c_reducer_max_index_identity_double
+_cilk_c_reducer_max_index_identity_float
+_cilk_c_reducer_max_index_identity_int
+_cilk_c_reducer_max_index_identity_long
+_cilk_c_reducer_max_index_identity_longdouble
+_cilk_c_reducer_max_index_identity_longlong
+_cilk_c_reducer_max_index_identity_schar
+_cilk_c_reducer_max_index_identity_short
+_cilk_c_reducer_max_index_identity_uchar
+_cilk_c_reducer_max_index_identity_uint
+_cilk_c_reducer_max_index_identity_ulong
+_cilk_c_reducer_max_index_identity_ulonglong
+_cilk_c_reducer_max_index_identity_unsigned
+_cilk_c_reducer_max_index_identity_ushort
+_cilk_c_reducer_max_index_identity_wchar_t
+_cilk_c_reducer_max_index_reduce_char
+_cilk_c_reducer_max_index_reduce_double
+_cilk_c_reducer_max_index_reduce_float
+_cilk_c_reducer_max_index_reduce_int
+_cilk_c_reducer_max_index_reduce_long
+_cilk_c_reducer_max_index_reduce_longdouble
+_cilk_c_reducer_max_index_reduce_longlong
+_cilk_c_reducer_max_index_reduce_schar
+_cilk_c_reducer_max_index_reduce_short
+_cilk_c_reducer_max_index_reduce_uchar
+_cilk_c_reducer_max_index_reduce_uint
+_cilk_c_reducer_max_index_reduce_ulong
+_cilk_c_reducer_max_index_reduce_ulonglong
+_cilk_c_reducer_max_index_reduce_unsigned
+_cilk_c_reducer_max_index_reduce_ushort
+_cilk_c_reducer_max_index_reduce_wchar_t
+_cilk_c_reducer_max_reduce_char
+_cilk_c_reducer_max_reduce_double
+_cilk_c_reducer_max_reduce_float
+_cilk_c_reducer_max_reduce_int
+_cilk_c_reducer_max_reduce_long
+_cilk_c_reducer_max_reduce_longdouble
+_cilk_c_reducer_max_reduce_longlong
+_cilk_c_reducer_max_reduce_schar
+_cilk_c_reducer_max_reduce_short
+_cilk_c_reducer_max_reduce_uchar
+_cilk_c_reducer_max_reduce_uint
+_cilk_c_reducer_max_reduce_ulong
+_cilk_c_reducer_max_reduce_ulonglong
+_cilk_c_reducer_max_reduce_unsigned
+_cilk_c_reducer_max_reduce_ushort
+_cilk_c_reducer_max_reduce_wchar_t
+_cilk_c_reducer_min_identity_char
+_cilk_c_reducer_min_identity_double
+_cilk_c_reducer_min_identity_float
+_cilk_c_reducer_min_identity_int
+_cilk_c_reducer_min_identity_long
+_cilk_c_reducer_min_identity_longdouble
+_cilk_c_reducer_min_identity_longlong
+_cilk_c_reducer_min_identity_schar
+_cilk_c_reducer_min_identity_short
+_cilk_c_reducer_min_identity_uchar
+_cilk_c_reducer_min_identity_uint
+_cilk_c_reducer_min_identity_ulong
+_cilk_c_reducer_min_identity_ulonglong
+_cilk_c_reducer_min_identity_unsigned
+_cilk_c_reducer_min_identity_ushort
+_cilk_c_reducer_min_identity_wchar_t
+_cilk_c_reducer_min_index_identity_char
+_cilk_c_reducer_min_index_identity_double
+_cilk_c_reducer_min_index_identity_float
+_cilk_c_reducer_min_index_identity_int
+_cilk_c_reducer_min_index_identity_long
+_cilk_c_reducer_min_index_identity_longdouble
+_cilk_c_reducer_min_index_identity_longlong
+_cilk_c_reducer_min_index_identity_schar
+_cilk_c_reducer_min_index_identity_short
+_cilk_c_reducer_min_index_identity_uchar
+_cilk_c_reducer_min_index_identity_uint
+_cilk_c_reducer_min_index_identity_ulong
+_cilk_c_reducer_min_index_identity_ulonglong
+_cilk_c_reducer_min_index_identity_unsigned
+_cilk_c_reducer_min_index_identity_ushort
+_cilk_c_reducer_min_index_identity_wchar_t
+_cilk_c_reducer_min_index_reduce_char
+_cilk_c_reducer_min_index_reduce_double
+_cilk_c_reducer_min_index_reduce_float
+_cilk_c_reducer_min_index_reduce_int
+_cilk_c_reducer_min_index_reduce_long
+_cilk_c_reducer_min_index_reduce_longdouble
+_cilk_c_reducer_min_index_reduce_longlong
+_cilk_c_reducer_min_index_reduce_schar
+_cilk_c_reducer_min_index_reduce_short
+_cilk_c_reducer_min_index_reduce_uchar
+_cilk_c_reducer_min_index_reduce_uint
+_cilk_c_reducer_min_index_reduce_ulong
+_cilk_c_reducer_min_index_reduce_ulonglong
+_cilk_c_reducer_min_index_reduce_unsigned
+_cilk_c_reducer_min_index_reduce_ushort
+_cilk_c_reducer_min_index_reduce_wchar_t
+_cilk_c_reducer_min_reduce_char
+_cilk_c_reducer_min_reduce_double
+_cilk_c_reducer_min_reduce_float
+_cilk_c_reducer_min_reduce_int
+_cilk_c_reducer_min_reduce_long
+_cilk_c_reducer_min_reduce_longdouble
+_cilk_c_reducer_min_reduce_longlong
+_cilk_c_reducer_min_reduce_schar
+_cilk_c_reducer_min_reduce_short
+_cilk_c_reducer_min_reduce_uchar
+_cilk_c_reducer_min_reduce_uint
+_cilk_c_reducer_min_reduce_ulong
+_cilk_c_reducer_min_reduce_ulonglong
+_cilk_c_reducer_min_reduce_unsigned
+_cilk_c_reducer_min_reduce_ushort
+_cilk_c_reducer_min_reduce_wchar_t
+_cilk_c_reducer_opadd_identity_char
+_cilk_c_reducer_opadd_identity_double
+_cilk_c_reducer_opadd_identity_float
+_cilk_c_reducer_opadd_identity_int
+_cilk_c_reducer_opadd_identity_long
+_cilk_c_reducer_opadd_identity_longdouble
+_cilk_c_reducer_opadd_identity_longlong
+_cilk_c_reducer_opadd_identity_schar
+_cilk_c_reducer_opadd_identity_short
+_cilk_c_reducer_opadd_identity_uchar
+_cilk_c_reducer_opadd_identity_uint
+_cilk_c_reducer_opadd_identity_ulong
+_cilk_c_reducer_opadd_identity_ulonglong
+_cilk_c_reducer_opadd_identity_unsigned
+_cilk_c_reducer_opadd_identity_ushort
+_cilk_c_reducer_opadd_identity_wchar_t
+_cilk_c_reducer_opadd_reduce_char
+_cilk_c_reducer_opadd_reduce_double
+_cilk_c_reducer_opadd_reduce_float
+_cilk_c_reducer_opadd_reduce_int
+_cilk_c_reducer_opadd_reduce_long
+_cilk_c_reducer_opadd_reduce_longdouble
+_cilk_c_reducer_opadd_reduce_longlong
+_cilk_c_reducer_opadd_reduce_schar
+_cilk_c_reducer_opadd_reduce_short
+_cilk_c_reducer_opadd_reduce_uchar
+_cilk_c_reducer_opadd_reduce_uint
+_cilk_c_reducer_opadd_reduce_ulong
+_cilk_c_reducer_opadd_reduce_ulonglong
+_cilk_c_reducer_opadd_reduce_unsigned
+_cilk_c_reducer_opadd_reduce_ushort
+_cilk_c_reducer_opadd_reduce_wchar_t
+_cilk_c_reducer_opand_identity_char
+_cilk_c_reducer_opand_identity_int
+_cilk_c_reducer_opand_identity_long
+_cilk_c_reducer_opand_identity_longlong
+_cilk_c_reducer_opand_identity_schar
+_cilk_c_reducer_opand_identity_short
+_cilk_c_reducer_opand_identity_uchar
+_cilk_c_reducer_opand_identity_uint
+_cilk_c_reducer_opand_identity_ulong
+_cilk_c_reducer_opand_identity_ulonglong
+_cilk_c_reducer_opand_identity_unsigned
+_cilk_c_reducer_opand_identity_ushort
+_cilk_c_reducer_opand_identity_wchar_t
+_cilk_c_reducer_opand_reduce_char
+_cilk_c_reducer_opand_reduce_int
+_cilk_c_reducer_opand_reduce_long
+_cilk_c_reducer_opand_reduce_longlong
+_cilk_c_reducer_opand_reduce_schar
+_cilk_c_reducer_opand_reduce_short
+_cilk_c_reducer_opand_reduce_uchar
+_cilk_c_reducer_opand_reduce_uint
+_cilk_c_reducer_opand_reduce_ulong
+_cilk_c_reducer_opand_reduce_ulonglong
+_cilk_c_reducer_opand_reduce_unsigned
+_cilk_c_reducer_opand_reduce_ushort
+_cilk_c_reducer_opand_reduce_wchar_t
+_cilk_c_reducer_opmul_identity_char
+_cilk_c_reducer_opmul_identity_double
+_cilk_c_reducer_opmul_identity_float
+_cilk_c_reducer_opmul_identity_int
+_cilk_c_reducer_opmul_identity_long
+_cilk_c_reducer_opmul_identity_longdouble
+_cilk_c_reducer_opmul_identity_longlong
+_cilk_c_reducer_opmul_identity_schar
+_cilk_c_reducer_opmul_identity_short
+_cilk_c_reducer_opmul_identity_uchar
+_cilk_c_reducer_opmul_identity_uint
+_cilk_c_reducer_opmul_identity_ulong
+_cilk_c_reducer_opmul_identity_ulonglong
+_cilk_c_reducer_opmul_identity_unsigned
+_cilk_c_reducer_opmul_identity_ushort
+_cilk_c_reducer_opmul_identity_wchar_t
+_cilk_c_reducer_opmul_reduce_char
+_cilk_c_reducer_opmul_reduce_double
+_cilk_c_reducer_opmul_reduce_float
+_cilk_c_reducer_opmul_reduce_int
+_cilk_c_reducer_opmul_reduce_long
+_cilk_c_reducer_opmul_reduce_longdouble
+_cilk_c_reducer_opmul_reduce_longlong
+_cilk_c_reducer_opmul_reduce_schar
+_cilk_c_reducer_opmul_reduce_short
+_cilk_c_reducer_opmul_reduce_uchar
+_cilk_c_reducer_opmul_reduce_uint
+_cilk_c_reducer_opmul_reduce_ulong
+_cilk_c_reducer_opmul_reduce_ulonglong
+_cilk_c_reducer_opmul_reduce_unsigned
+_cilk_c_reducer_opmul_reduce_ushort
+_cilk_c_reducer_opmul_reduce_wchar_t
+_cilk_c_reducer_opor_identity_char
+_cilk_c_reducer_opor_identity_int
+_cilk_c_reducer_opor_identity_long
+_cilk_c_reducer_opor_identity_longlong
+_cilk_c_reducer_opor_identity_schar
+_cilk_c_reducer_opor_identity_short
+_cilk_c_reducer_opor_identity_uchar
+_cilk_c_reducer_opor_identity_uint
+_cilk_c_reducer_opor_identity_ulong
+_cilk_c_reducer_opor_identity_ulonglong
+_cilk_c_reducer_opor_identity_unsigned
+_cilk_c_reducer_opor_identity_ushort
+_cilk_c_reducer_opor_identity_wchar_t
+_cilk_c_reducer_opor_reduce_char
+_cilk_c_reducer_opor_reduce_int
+_cilk_c_reducer_opor_reduce_long
+_cilk_c_reducer_opor_reduce_longlong
+_cilk_c_reducer_opor_reduce_schar
+_cilk_c_reducer_opor_reduce_short
+_cilk_c_reducer_opor_reduce_uchar
+_cilk_c_reducer_opor_reduce_uint
+_cilk_c_reducer_opor_reduce_ulong
+_cilk_c_reducer_opor_reduce_ulonglong
+_cilk_c_reducer_opor_reduce_unsigned
+_cilk_c_reducer_opor_reduce_ushort
+_cilk_c_reducer_opor_reduce_wchar_t
+_cilk_c_reducer_opxor_identity_char
+_cilk_c_reducer_opxor_identity_int
+_cilk_c_reducer_opxor_identity_long
+_cilk_c_reducer_opxor_identity_longlong
+_cilk_c_reducer_opxor_identity_schar
+_cilk_c_reducer_opxor_identity_short
+_cilk_c_reducer_opxor_identity_uchar
+_cilk_c_reducer_opxor_identity_uint
+_cilk_c_reducer_opxor_identity_ulong
+_cilk_c_reducer_opxor_identity_ulonglong
+_cilk_c_reducer_opxor_identity_unsigned
+_cilk_c_reducer_opxor_identity_ushort
+_cilk_c_reducer_opxor_identity_wchar_t
+_cilk_c_reducer_opxor_reduce_char
+_cilk_c_reducer_opxor_reduce_int
+_cilk_c_reducer_opxor_reduce_long
+_cilk_c_reducer_opxor_reduce_longlong
+_cilk_c_reducer_opxor_reduce_schar
+_cilk_c_reducer_opxor_reduce_short
+_cilk_c_reducer_opxor_reduce_uchar
+_cilk_c_reducer_opxor_reduce_uint
+_cilk_c_reducer_opxor_reduce_ulong
+_cilk_c_reducer_opxor_reduce_ulonglong
+_cilk_c_reducer_opxor_reduce_unsigned
+_cilk_c_reducer_opxor_reduce_ushort
+_cilk_c_reducer_opxor_reduce_wchar_t
diff --git a/gcc-4.9/libcilkrts/runtime/metacall_impl.c b/gcc-4.9/libcilkrts/runtime/metacall_impl.c
new file mode 100644
index 000000000..ce1c51a20
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/metacall_impl.c
@@ -0,0 +1,167 @@
+/* metacall_impl.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#include "metacall_impl.h"
+
+NOINLINE
+CILK_API_VOID
+__cilkrts_metacall(unsigned int tool, unsigned int code, void *data)
+{
+#ifdef ENABLE_NOTIFY_ZC_INTRINSIC
+ // The metacall type, code and data are packed together into a single
+ // struct which will be interpreted by the tool. This function is the
+ // one and only use of a "cilkscreen_metacall" annotation
+ metacall_data_t d = { tool, code, data };
+
+ // Note that Inspector uses probe mode, and is implementing the metacall
+ // interface to force the runtime to run with a single worker. So
+ // __cilkrts_metacall must use __notify_intrinsic instead of
+ // __notify_zc_intrinsic
+ __notify_intrinsic("cilkscreen_metacall", &d);
+#endif // ENABLE_NOTIFY_ZC_INTRINSIC
+}
+
+int __cilkrts_running_under_sequential_ptool(void)
+{
+ static int running_under_sequential_ptool = -1;
+ volatile char c = ~0;
+
+ // If we haven't been called before, see if we're running under Cilkscreen
+ // or Cilkview
+ if (-1 == running_under_sequential_ptool)
+ {
+ // metacall #2 writes 0 in C if we are running under
+ // a p-tools that requires serial execution, and is a
+ // no-op otherwise
+ //
+ // Note that removing the volatile is required to prevent the compiler
+ // from assuming that the value has not changed
+ __cilkrts_metacall(METACALL_TOOL_SYSTEM,
+ HYPER_ZERO_IF_SEQUENTIAL_PTOOL, (void *)&c);
+
+ running_under_sequential_ptool = (0 == c);
+ }
+
+ return running_under_sequential_ptool;
+}
+
+/*
+ * __cilkrts_cilkscreen_establish_c_stack
+ *
+ * Notify Cilkscreen of the extent of the stack
+ */
+
+void __cilkrts_cilkscreen_establish_c_stack(char *begin, char *end)
+{
+ char *limits[2] = {begin, end};
+
+ __cilkrts_metacall(METACALL_TOOL_SYSTEM, HYPER_ESTABLISH_C_STACK, limits);
+}
+
+#ifdef WORKSPAN // Workspan stuff - remove when we're sure what we can drop
+
+void __cilkview_workspan_start(void) {
+ __cilkrts_metacall(HYPER_WORKSPAN_START, 0);
+}
+
+void __cilkview_workspan_stop(void) {
+ __cilkrts_metacall(HYPER_WORKSPAN_STOP, 0);
+}
+
+void __cilkview_workspan_dump(const char *str) {
+ __cilkrts_metacall(HYPER_WORKSPAN_DUMP, (void*)str);
+}
+
+
+void __cilkview_workspan_reset(void) {
+ __cilkrts_metacall(HYPER_WORKSPAN_RESET, 0);
+}
+
+
+void __cilkview_use_default_grain(void) {
+ __cilkrts_metacall(HYPER_USE_DEFAULT_GRAIN, 0);
+}
+
+void __cilkview_get_workspan_data(unsigned long long *values, int size)
+{
+ void *data[2];
+
+ /* reset counters to zero in case we are not running under
+ a p-tool */
+
+ values[0] = 0;
+
+ data[0] = (void*) values;
+ data[1] = (void*) &size;
+ __cilkrts_metacall(HYPER_WORKSPAN_QUERY, &data);
+}
+
+void __cilkview_workspan_connected (int *flag) {
+ *flag = 0;
+ __cilkrts_metacall(HYPER_WORKSPAN_CONNECTED, (void *)flag);
+}
+
+void __cilkview_workspan_suspend() {
+ __cilkrts_metacall(HYPER_WORKSPAN_SUSPEND, 0);
+}
+
+void __cilkview_workspan_resume() {
+ __cilkrts_metacall(HYPER_WORKSPAN_RESUME, 0);
+}
+
+/* depreciated interfaces */
+void __cilkometer_workspan_start(void) {
+ __cilkrts_metacall(HYPER_WORKSPAN_START, 0);
+}
+
+void __cilkometer_workspan_stop(void) {
+ __cilkrts_metacall(HYPER_WORKSPAN_STOP, 0);
+}
+
+void __cilkometer_workspan_dump(const char *str) {
+ __cilkrts_metacall(HYPER_WORKSPAN_DUMP, (void*)str);
+}
+
+
+void __cilkometer_workspan_reset(void) {
+ __cilkrts_metacall(HYPER_WORKSPAN_RESET, 0);
+}
+
+#endif // WORKSPAN
+
+/* End metacall_impl.c */
diff --git a/gcc-4.9/libcilkrts/runtime/metacall_impl.h b/gcc-4.9/libcilkrts/runtime/metacall_impl.h
new file mode 100644
index 000000000..90cc7f951
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/metacall_impl.h
@@ -0,0 +1,123 @@
+/* metacall_impl.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2010-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+/**
+ * @file metacall_impl.h
+ *
+ * @brief Meta-function calls to be used within the Cilk runtime system.
+ *
+ * These differ from the macros in cilkscreen.h and cilkview.h because they go
+ * through the __cilkrts_metacall interface, which ensures that the operation
+ * is performed even when instrumentation is disabled.
+ */
+
+#ifndef INCLUDED_CILKRTS_METACALL_H
+#define INCLUDED_CILKRTS_METACALL_H
+
+#include "rts-common.h"
+#include <internal/metacall.h>
+#include <cilk/common.h>
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * This function is effectively an unconditional call from the runtime into
+ * a tool. It is used for operations that must be performed by the tool,
+ * even when the tool is not instrumenting. For example, Cilkscreen always
+ * recognizes the address of this function and performs the action specified
+ * in the contained metadata.
+ *
+ * Note that this function MUST NOT BE INLINED within the runtime. This must
+ * be the ONLY instance of the cilkscreen_metacall metadata.
+ */
+CILK_API_VOID
+__cilkrts_metacall(unsigned int tool, unsigned int code, void *data);
+
+/**
+ * Return non-zero if running under Cilkscreen or Cilkview
+ */
+COMMON_PORTABLE
+int __cilkrts_running_under_sequential_ptool(void);
+
+/**
+ * Disable Cilkscreen implementation
+ */
+#define __cilkrts_cilkscreen_disable_instrumentation() \
+ __cilkrts_metacall(METACALL_TOOL_SYSTEM, HYPER_DISABLE_INSTRUMENTATION, 0)
+
+/**
+ * Enable Cilkscreen implementation
+ */
+#define __cilkrts_cilkscreen_enable_instrumentation() \
+ __cilkrts_metacall(METACALL_TOOL_SYSTEM, HYPER_ENABLE_INSTRUMENTATION, 0)
+
+/**
+ * Set the worker on entering runtime.
+ *
+ * @attention Deprecated in favor of __cilkrts_cilkscreen_ignore_block. The
+ * begin/enter pairs in the current metadata mean Cilkscreen no longer has to
+ * have improper knowledge of the __cilkrts_worker or __cilkrts_stack_frame
+ * structures.
+ */
+#define __cilkrts_cilkscreen_establish_worker(w) \
+ __cilkrts_metacall(METACALL_TOOL_SYSTEM, HYPER_ESTABLISH_WORKER, w)
+
+/**
+ * Notify Cilkscreen of the extent of the stack.
+ *
+ * @param[in] begin Start (low address) of stack
+ * @param[in] end One past high address of stack
+ */
+void __cilkrts_cilkscreen_establish_c_stack(char *begin, char *end);
+
+/**
+ * Tell tools to ignore a block of memory - currently the global state and
+ * memory allocated for workers.
+ */
+#define __cilkrts_cilkscreen_ignore_block(_begin, _end) \
+{ \
+ void *block[2] = {_begin, _end}; \
+ __cilkrts_metacall(METACALL_TOOL_SYSTEM, \
+ HYPER_IGNORE_MEMORY_BLOCK, \
+ block); \
+}
+
+__CILKRTS_END_EXTERN_C
+
+#endif /* ! defined(INCLUDED_CILKRTS_METACALL_H) */
diff --git a/gcc-4.9/libcilkrts/runtime/os-unix.c b/gcc-4.9/libcilkrts/runtime/os-unix.c
new file mode 100644
index 000000000..fafb91d91
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/os-unix.c
@@ -0,0 +1,516 @@
+/* os-unix.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#ifdef __linux__
+ // define _GNU_SOURCE before *any* #include.
+ // Even <stdint.h> will break later #includes if this macro is not
+ // already defined when it is #included.
+# define _GNU_SOURCE
+#endif
+
+#include "os.h"
+#include "bug.h"
+#include "cilk_malloc.h"
+#include <internal/abi.h>
+
+#if defined __linux__
+# include <sys/sysinfo.h>
+# include <sys/syscall.h>
+#elif defined __APPLE__
+# include <sys/sysctl.h>
+ // Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output
+#elif defined __FreeBSD__
+// No additional include files
+#elif defined __CYGWIN__
+// Cygwin on Windows - no additional include files
+#elif defined __VXWORKS__
+# include <vxWorks.h>
+# include <vxCpuLib.h>
+# include <taskLib.h>
+// Solaris
+#elif defined __sun__ && defined __svr4__
+# include <sched.h>
+#else
+# error "Unsupported OS"
+#endif
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/types.h>
+
+
+
+// /* Thread-local storage */
+// #ifdef _WIN32
+// typedef unsigned cilkos_tls_key_t;
+// #else
+// typedef pthread_key_t cilkos_tls_key_t;
+// #endif
+// cilkos_tls_key_t cilkos_allocate_tls_key();
+// void cilkos_set_tls_pointer(cilkos_tls_key_t key, void* ptr);
+// void* cilkos_get_tls_pointer(cilkos_tls_key_t key);
+
+#if !defined CILK_WORKER_TLS
+static int cilk_keys_defined;
+static pthread_key_t worker_key, pedigree_leaf_key, tbb_interop_key;
+
+#if SUPPORT_GET_CURRENT_FIBER > 0
+static pthread_key_t fiber_key;
+#endif
+
+static void *serial_worker;
+
+
+// This destructor is called when a pthread dies to deallocate the
+// pedigree node.
+static void __cilkrts_pedigree_leaf_destructor(void* pedigree_tls_ptr)
+{
+ __cilkrts_pedigree* pedigree_tls
+ = (__cilkrts_pedigree*)pedigree_tls_ptr;
+ if (pedigree_tls) {
+ // Assert that we have either one or two nodes
+ // left in the pedigree chain.
+ // If we have more, then something is going wrong...
+ CILK_ASSERT(!pedigree_tls->parent || !pedigree_tls->parent->parent);
+ __cilkrts_free(pedigree_tls);
+ }
+}
+
+void __cilkrts_init_tls_variables(void)
+{
+ int status;
+ /* This will be called once in serial execution before any
+ Cilk parallelism so we do not need to worry about races
+ on cilk_keys_defined. */
+ if (cilk_keys_defined)
+ return;
+ status = pthread_key_create(&worker_key, NULL);
+ CILK_ASSERT (status == 0);
+ status = pthread_key_create(&pedigree_leaf_key,
+ __cilkrts_pedigree_leaf_destructor);
+ CILK_ASSERT (status == 0);
+ status = pthread_key_create(&tbb_interop_key, NULL);
+ CILK_ASSERT (status == 0);
+
+#if SUPPORT_GET_CURRENT_FIBER > 0
+ status = pthread_key_create(&fiber_key, NULL);
+ CILK_ASSERT (status == 0);
+#endif
+ cilk_keys_defined = 1;
+ return;
+}
+
+COMMON_SYSDEP
+void* cilkos_get_current_thread_id(void)
+{
+ return (void*)pthread_self();
+}
+
+
+CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker()
+{
+ if (__builtin_expect(cilk_keys_defined, 1))
+ return (__cilkrts_worker *)pthread_getspecific(worker_key);
+ else
+ return serial_worker;
+
+}
+
+CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker_fast()
+{
+ return (__cilkrts_worker *)pthread_getspecific(worker_key);
+}
+
+COMMON_SYSDEP
+__cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void)
+{
+ if (__builtin_expect(cilk_keys_defined, 1))
+ return (__cilk_tbb_stack_op_thunk *)
+ pthread_getspecific(tbb_interop_key);
+ else
+ return 0;
+}
+
+// This counter should be updated atomically.
+static int __cilkrts_global_pedigree_tls_counter = -1;
+
+COMMON_SYSDEP
+__cilkrts_pedigree *__cilkrts_get_tls_pedigree_leaf(int create_new)
+{
+ __cilkrts_pedigree *pedigree_tls;
+ if (__builtin_expect(cilk_keys_defined, 1)) {
+ pedigree_tls =
+ (struct __cilkrts_pedigree *)pthread_getspecific(pedigree_leaf_key);
+ }
+ else {
+ return 0;
+ }
+
+ if (!pedigree_tls && create_new) {
+ // This call creates two nodes, X and Y.
+ // X == pedigree_tls[0] is the leaf node, which gets copied
+ // in and out of a user worker w when w binds and unbinds.
+ // Y == pedigree_tls[1] is the root node,
+ // which is a constant node that represents the user worker
+ // thread w.
+ pedigree_tls = (__cilkrts_pedigree*)
+ __cilkrts_malloc(2 * sizeof(__cilkrts_pedigree));
+
+ // This call sets the TLS pointer to the new node.
+ __cilkrts_set_tls_pedigree_leaf(pedigree_tls);
+
+ pedigree_tls[0].rank = 0;
+ pedigree_tls[0].parent = &pedigree_tls[1];
+
+ // Create Y, whose rank begins as the global counter value.
+ pedigree_tls[1].rank =
+ __sync_add_and_fetch(&__cilkrts_global_pedigree_tls_counter, 1);
+
+ pedigree_tls[1].parent = NULL;
+ CILK_ASSERT(pedigree_tls[1].rank != -1);
+ }
+ return pedigree_tls;
+}
+
+#if SUPPORT_GET_CURRENT_FIBER > 0
+COMMON_SYSDEP
+cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void)
+{
+ if (__builtin_expect(cilk_keys_defined, 1))
+ return (cilk_fiber_sysdep *)pthread_getspecific(fiber_key);
+ else
+ return NULL;
+}
+#endif
+
+COMMON_SYSDEP
+void __cilkrts_set_tls_worker(__cilkrts_worker *w)
+{
+ if (__builtin_expect(cilk_keys_defined, 1)) {
+ int status;
+ status = pthread_setspecific(worker_key, w);
+ CILK_ASSERT (status == 0);
+ return;
+ }
+ else
+ {
+ serial_worker = w;
+ }
+}
+
+COMMON_SYSDEP
+void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t)
+{
+ if (__builtin_expect(cilk_keys_defined, 1)) {
+ int status;
+ status = pthread_setspecific(tbb_interop_key, t);
+ CILK_ASSERT (status == 0);
+ return;
+ }
+ abort();
+}
+
+COMMON_SYSDEP
+void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf)
+{
+ if (__builtin_expect(cilk_keys_defined, 1)) {
+ int status;
+ status = pthread_setspecific(pedigree_leaf_key, pedigree_leaf);
+ CILK_ASSERT (status == 0);
+ return;
+ }
+ abort();
+}
+
+#if SUPPORT_GET_CURRENT_FIBER > 0
+COMMON_SYSDEP
+void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber)
+{
+ if (__builtin_expect(cilk_keys_defined, 1)) {
+ int status;
+ status = pthread_setspecific(fiber_key, fiber);
+ CILK_ASSERT (status == 0);
+ return;
+ }
+ abort();
+}
+#endif
+
+#else
+void __cilkrts_init_tls_variables(void)
+{
+}
+#endif
+
+#if defined (__linux__) && ! defined(ANDROID)
+/*
+ * Get the thread id, rather than the pid. In the case of MIC offload, it's
+ * possible that we have multiple threads entering Cilk, and each has a
+ * different affinity.
+ */
+static pid_t linux_gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+
+/*
+ * On Linux we look at the thread affinity mask and restrict ourself to one
+ * thread for each of the hardware contexts to which we are bound.
+ * Therefore if user does
+ * % taskset 0-1 cilkProgram
+ * # restrict execution to hardware contexts zero and one
+ * the Cilk program will only use two threads even if it is running on a
+ * machine that has 32 hardware contexts.
+ * This is the right thing to do, because the threads are restricted to two
+ * hardware contexts by the affinity mask set by taskset, and if we were to
+ * create extra threads they would simply oversubscribe the hardware resources
+ * we can use.
+ * This is particularly important on MIC in offload mode, where the affinity
+ * mask is set by the offload library to force the offload code away from
+ * cores that have offload support threads running on them.
+ */
+static int linux_get_affinity_count (int tid)
+{
+#if !defined HAVE_PTHREAD_AFFINITY_NP
+ return 0;
+#else
+
+ cpu_set_t process_mask;
+
+ // Extract the thread affinity mask
+ int err = sched_getaffinity (tid, sizeof(process_mask),&process_mask);
+
+ if (0 != err)
+ {
+ return 0;
+ }
+
+ // We have extracted the mask OK, so now we can count the number of threads
+ // in it. This is linear in the maximum number of CPUs available, We
+ // could do a logarithmic version, if we assume the format of the mask,
+ // but it's not really worth it. We only call this at thread startup
+ // anyway.
+ int available_procs = 0;
+ int i;
+ for (i = 0; i < CPU_SETSIZE; i++)
+ {
+ if (CPU_ISSET(i, &process_mask))
+ {
+ available_procs++;
+ }
+ }
+
+ return available_procs;
+#endif
+}
+#endif
+
+/*
+ * __cilkrts_hardware_cpu_count
+ *
+ * Returns the number of available CPUs on this hardware. This is architecture-
+ * specific.
+ */
+
+COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void)
+{
+#if defined ANDROID || (defined(__sun__) && defined(__svr4__))
+ return sysconf (_SC_NPROCESSORS_ONLN);
+#elif defined __MIC__
+ /// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial
+ /// on KNC. Also, ignore the last core.
+ int P = sysconf (_SC_NPROCESSORS_ONLN);
+ return P/2 - 2;
+#elif defined __linux__
+ int affinity_count = linux_get_affinity_count(linux_gettid());
+
+ return (0 != affinity_count) ? affinity_count : sysconf (_SC_NPROCESSORS_ONLN);
+#elif defined __APPLE__
+ int count = 0;
+ int cmd[2] = { CTL_HW, HW_NCPU };
+ size_t len = sizeof count;
+ int status = sysctl(cmd, 2, &count, &len, 0, 0);
+ assert(status >= 0);
+ assert((unsigned)count == count);
+
+ return count;
+#elif defined __FreeBSD__ || defined __CYGWIN__
+ int ncores = sysconf(_SC_NPROCESSORS_ONLN);
+
+ return ncores;
+ // Just get the number of processors
+// return sysconf(_SC_NPROCESSORS_ONLN);
+#elif defined __VXWORKS__
+ return __builtin_popcount( vxCpuEnabledGet() );
+#else
+#error "Unknown architecture"
+#endif
+}
+
+COMMON_SYSDEP void __cilkrts_sleep(void)
+{
+#ifdef __VXWORKS__
+ taskDelay(1);
+#else
+ usleep(1);
+#endif
+}
+
+COMMON_SYSDEP void __cilkrts_yield(void)
+{
+#if __APPLE__ || __FreeBSD__ || __VXWORKS__
+ // On MacOS, call sched_yield to yield quantum. I'm not sure why we
+ // don't do this on Linux also.
+ sched_yield();
+#elif defined(__MIC__)
+ // On MIC, pthread_yield() really trashes things. Arch's measurements
+ // showed that calling _mm_delay_32() (or doing nothing) was a better
+ // option. Delaying 1024 clock cycles is a reasonable compromise between
+ // giving up the processor and latency starting up when work becomes
+ // available
+ _mm_delay_32(1024);
+#elif defined(ANDROID) || (defined(__sun__) && defined(__svr4__))
+ // On Android and Solaris, call sched_yield to yield quantum. I'm not
+ // sure why we don't do this on Linux also.
+ sched_yield();
+#else
+ // On Linux, call pthread_yield (which in turn will call sched_yield)
+ // to yield quantum.
+ pthread_yield();
+#endif
+}
+
+COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen,
+ const char* varname)
+{
+ CILK_ASSERT(value);
+ CILK_ASSERT(varname);
+
+ const char* envstr = getenv(varname);
+ if (envstr)
+ {
+ size_t len = strlen(envstr);
+ if (len > vallen - 1)
+ return len + 1;
+
+ strcpy(value, envstr);
+ return len;
+ }
+ else
+ {
+ value[0] = '\0';
+ return 0;
+ }
+}
+
+/*
+ * Unrecoverable error: Print an error message and abort execution.
+ */
+COMMON_SYSDEP void cilkos_error(const char *fmt, ...)
+{
+ va_list l;
+ fflush(NULL);
+ fprintf(stderr, "Cilk error: ");
+ va_start(l, fmt);
+ vfprintf(stderr, fmt, l);
+ va_end(l);
+ fprintf(stderr, "Exiting.\n");
+ fflush(stderr);
+
+ abort();
+}
+
+/*
+ * Print a warning message and return.
+ */
+COMMON_SYSDEP void cilkos_warning(const char *fmt, ...)
+{
+ va_list l;
+ fflush(NULL);
+ fprintf(stderr, "Cilk warning: ");
+ va_start(l, fmt);
+ vfprintf(stderr, fmt, l);
+ va_end(l);
+ fflush(stderr);
+}
+
+static void __attribute__((constructor)) init_once()
+{
+ /*__cilkrts_debugger_notification_internal(CILK_DB_RUNTIME_LOADED);*/
+ __cilkrts_init_tls_variables();
+}
+
+
+#define PAGE 4096
+#define CILK_MIN_STACK_SIZE (4*PAGE)
+// Default size for the stacks that we create in Cilk for Unix.
+#define CILK_DEFAULT_STACK_SIZE 0x100000
+
+/*
+ * Convert the user's specified stack size into a "reasonable" value
+ * for this OS.
+ */
+size_t cilkos_validate_stack_size(size_t specified_stack_size) {
+ // Convert any negative value to the default.
+ if (specified_stack_size == 0) {
+ CILK_ASSERT((CILK_DEFAULT_STACK_SIZE % PAGE) == 0);
+ return CILK_DEFAULT_STACK_SIZE;
+ }
+ // Round values in between 0 and CILK_MIN_STACK_SIZE up to
+ // CILK_MIN_STACK_SIZE.
+ if (specified_stack_size <= CILK_MIN_STACK_SIZE) {
+ return CILK_MIN_STACK_SIZE;
+ }
+ if ((specified_stack_size % PAGE) > 0) {
+ // Round the user's stack size value up to nearest page boundary.
+ return (PAGE * (1 + specified_stack_size / PAGE));
+ }
+ return specified_stack_size;
+}
+
+long cilkos_atomic_add(volatile long* p, long x)
+{
+ return __sync_add_and_fetch(p, x);
+}
+
+/* End os-unix.c */
diff --git a/gcc-4.9/libcilkrts/runtime/os.h b/gcc-4.9/libcilkrts/runtime/os.h
new file mode 100644
index 000000000..8066f0313
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/os.h
@@ -0,0 +1,236 @@
+/* os.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file os.h
+ *
+ * @brief Low-level operating-system dependent facilities, not dependent on
+ * any Cilk facilities.
+ */
+
+#ifndef INCLUDED_OS_DOT_H
+#define INCLUDED_OS_DOT_H
+
+#include "rts-common.h"
+#include "cilk/common.h"
+#include "cilk-tbb-interop.h"
+
+#ifdef __cplusplus
+# include <cstddef>
+#else
+# include <stddef.h>
+#endif
+
+__CILKRTS_BEGIN_EXTERN_C
+
+
+// /* Thread-local storage */
+// #ifdef _WIN32
+// typedef unsigned cilkos_tls_key_t;
+// #else
+// typedef pthread_key_t cilkos_tls_key_t;
+// #endif
+// cilkos_tls_key_t cilkos_allocate_tls_key();
+// void cilkos_set_tls_pointer(cilkos_tls_key_t key, void* ptr);
+// void* cilkos_get_tls_pointer(cilkos_tls_key_t key);
+
+/* The RTS assumes that some thread-local state exists that stores the
+ worker and reducer map currently associated with a thread. These routines
+ manipulate this state. */
+
+/** @brief Thread-local state for cilk fibers. */
+typedef struct cilk_fiber_sysdep cilk_fiber_sysdep;
+
+/** @brief Initialize all TLS variables for Cilk. */
+COMMON_SYSDEP void __cilkrts_init_tls_variables(void);
+
+/** @brief Set worker struct in TLS. */
+COMMON_SYSDEP
+void __cilkrts_set_tls_worker(__cilkrts_worker *w) cilk_nothrow;
+
+/** @brief Get stack_op for TBB-interop structures from TLS. */
+COMMON_SYSDEP
+__cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void);
+
+/** @brief Set stack_op for TBB-interop structures in TLS. */
+COMMON_SYSDEP
+void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t);
+
+/**
+ * @brief Get the pointer to the pedigree leaf node from TLS.
+ *
+ * Function to get a pointer to the thread's pedigree leaf node. This
+ * pointer can be NULL.
+ */
+COMMON_SYSDEP
+__cilkrts_pedigree * __cilkrts_get_tls_pedigree_leaf(int create_new);
+
+/**
+ * @brief Sets the pointer to the pedigree leaf node in TLS.
+ *
+ * If the previous pointer value was not NULL, it is the caller's
+ * responsibility to ensure that previous pointer value is saved and
+ * freed.
+ *
+ * @param pedigree_leaf The leaf node to store into TLS.
+ */
+COMMON_SYSDEP
+void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf);
+
+
+#if SUPPORT_GET_CURRENT_FIBER > 0
+/**
+ * @brief Get the cilk_fiber from TLS.
+ */
+COMMON_SYSDEP
+cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void);
+
+/**
+ * @brief Set the cilk_fiber in TLS.
+ *
+ * @param fiber The fiber to store into TLS.
+ */
+COMMON_SYSDEP
+void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber);
+#endif
+
+/**
+ * @brief Function for returning the current thread id.
+ * @warning This function is useful for debugging purposes only.
+ */
+COMMON_SYSDEP
+void* cilkos_get_current_thread_id(void);
+
+/** @brief Return number of CPUs supported by this hardware, using whatever definition
+ of CPU is considered appropriate. */
+COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void);
+
+/** @brief Get current value of timer */
+COMMON_SYSDEP unsigned long long __cilkrts_getticks(void);
+
+/* Machine instructions */
+
+/// Stall execution for a few cycles.
+COMMON_SYSDEP void __cilkrts_short_pause(void);
+/// Wrapper for xchg instruction
+COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x);
+
+// Defines __cilkrts_fence - A macro for x86, a function call for other
+// architectures
+#include "os-fence.h"
+
+COMMON_SYSDEP void __cilkrts_sleep(void); ///< Sleep briefly
+COMMON_SYSDEP void __cilkrts_yield(void); ///< Yield quantum
+
+/**
+ * @brief Gets environment variable 'varname' and copy its value into 'value'.
+ *
+ * If the entire value, including the null terminator fits into 'vallen'
+ * bytes, then returns the length of the value excluding the null. Otherwise,
+ * leaves the contents of 'value' undefined and returns the number of
+ * characters needed to store the environment variable's value, *including*
+ * the null terminator.
+ *
+ * @param value Buffer to store value.
+ * @param vallen Length of value buffer
+ * @param varname Name of the environment variable.
+ * @return Length of value buffer (excluding the null).
+ */
+COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen,
+ const char* varname);
+
+/**
+ * @brief Unrecoverable error: Print an error message and abort execution.
+ */
+COMMON_SYSDEP void cilkos_error(const char *fmt, ...);
+
+/**
+ * @brief Print a warning message and return.
+ */
+COMMON_SYSDEP void cilkos_warning(const char *fmt, ...);
+
+/**
+ * @brief Convert the user's specified stack size into a "reasonable"
+ * value for the current OS.
+ *
+ * @param specified_stack_size User-specified stack size.
+ * @return New stack size value, modified for the OS.
+ */
+COMMON_SYSDEP size_t cilkos_validate_stack_size(size_t specified_stack_size);
+
+/**
+ * @brief Atomic addition: computes *p += x.
+ *
+ * @param p Pointer to value to update
+ * @param x Value of x.
+ */
+COMMON_SYSDEP long cilkos_atomic_add(volatile long* p, long x);
+
+#ifdef _WIN32
+
+/**
+ * @brief Windows-only low-level functions for processor groups.
+ */
+typedef struct _GROUP_AFFINITY GROUP_AFFINITY;
+
+/**
+ * @brief Probe the executing OS to see if it supports processor
+ * groups. These functions are expected to be available in Windows 7
+ * or later.
+ */
+void win_init_processor_groups(void);
+
+unsigned long win_get_active_processor_count(unsigned short GroupNumber);
+unsigned short win_get_active_processor_group_count(void);
+int win_set_thread_group_affinity(/*HANDLE*/ void* hThread,
+ const GROUP_AFFINITY *GroupAffinity,
+ GROUP_AFFINITY* PreviousGroupAffinity);
+
+/**
+ * @brief Cleans up any state allocated in TLS.
+ *
+ * Only defined for Windows because Linux calls destructors for each
+ * thread-local variable.
+ */
+void __cilkrts_per_thread_tls_cleanup(void);
+
+#endif // _WIN32
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_OS_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/os_mutex-unix.c b/gcc-4.9/libcilkrts/runtime/os_mutex-unix.c
new file mode 100644
index 000000000..af398cdd0
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/os_mutex-unix.c
@@ -0,0 +1,193 @@
+/* os_mutex-unix.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#include "os_mutex.h"
+#include "bug.h"
+
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+// contains notification macros for VTune.
+#include "cilk-ittnotify.h"
+
+/*
+ * OS Mutex functions.
+ *
+ * Not to be confused with the spinlock mutexes implemented in cilk_mutex.c
+ */
+
+struct os_mutex {
+ pthread_mutex_t mutex; ///< On Linux, os_mutex is implemented with a pthreads mutex
+};
+
+// Unix implementation of the global OS mutex. This will be created by the
+// first call to global_os_mutex_lock() and *NEVER* destroyed. On gcc-based
+// systems there's no way to guarantee the ordering of constructors and
+// destructors, so we can't be guaranteed that our destructor for a static
+// object will be called *after* any static destructors that may use Cilk
+// in the user's application
+static struct os_mutex *global_os_mutex = NULL;
+
+/* Sometimes during shared library load malloc doesn't work.
+ To handle that case, preallocate space for one mutex. */
+static struct os_mutex static_mutex;
+static int static_mutex_used;
+
+struct os_mutex *__cilkrts_os_mutex_create(void)
+{
+ int status;
+ struct os_mutex *mutex = (struct os_mutex *)malloc(sizeof(struct os_mutex));
+ pthread_mutexattr_t attr;
+
+ ITT_SYNC_CREATE(mutex, "OS Mutex");
+
+ if (!mutex) {
+ if (static_mutex_used) {
+ __cilkrts_bug("Cilk RTS library initialization failed");
+ } else {
+ static_mutex_used = 1;
+ mutex = &static_mutex;
+ }
+ }
+
+ status = pthread_mutexattr_init(&attr);
+ CILK_ASSERT (status == 0);
+#if defined DEBUG || CILK_LIB_DEBUG
+#ifdef PTHREAD_MUTEX_ERRORCHECK
+ status = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
+#else
+ status = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK_NP);
+#endif
+ CILK_ASSERT (status == 0);
+#endif
+ status = pthread_mutex_init (&mutex->mutex, &attr);
+ CILK_ASSERT (status == 0);
+ pthread_mutexattr_destroy(&attr);
+
+ return mutex;
+}
+
+void __cilkrts_os_mutex_lock(struct os_mutex *p)
+{
+ int status;
+ status = pthread_mutex_lock (&p->mutex);
+ ITT_SYNC_ACQUIRED(p);
+ if (__builtin_expect(status, 0) == 0)
+ return;
+ if (status == EDEADLK)
+ __cilkrts_bug("Cilk runtime error: deadlock acquiring mutex %p\n",
+ p);
+ else
+ __cilkrts_bug("Cilk runtime error %d acquiring mutex %p\n",
+ status, p);
+}
+
+int __cilkrts_os_mutex_trylock(struct os_mutex *p)
+{
+ int status;
+ status = pthread_mutex_trylock (&p->mutex);
+ return (status == 0);
+}
+
+void __cilkrts_os_mutex_unlock(struct os_mutex *p)
+{
+ int status;
+ ITT_SYNC_RELEASING(p);
+ status = pthread_mutex_unlock (&p->mutex);
+ CILK_ASSERT(status == 0);
+}
+
+void __cilkrts_os_mutex_destroy(struct os_mutex *p)
+{
+ pthread_mutex_destroy (&p->mutex);
+ if (p == &static_mutex) {
+ static_mutex_used = 0;
+ } else {
+ free(p);
+ }
+}
+
+/*
+ * create_global_os_mutex
+ *
+ * Function used with pthread_once to initialize the global OS mutex. Since
+ * pthread_once requires a function which takes no parameters and has no
+ * return value, the global OS mutex will be stored in the static (global
+ * to the compilation unit) variable "global_os_mutex."
+ *
+ *
+ * global_os_mutex will never be destroyed.
+ */
+static void create_global_os_mutex(void)
+{
+ CILK_ASSERT(NULL == global_os_mutex);
+ global_os_mutex = __cilkrts_os_mutex_create();
+}
+
+void global_os_mutex_lock(void)
+{
+ // pthread_once_t used with pthread_once to guarantee that
+ // create_global_os_mutex() is only called once
+ static pthread_once_t global_os_mutex_is_initialized = PTHREAD_ONCE_INIT;
+
+ // Execute create_global_os_mutex once in a thread-safe manner
+ // Note that create_global_os_mutex returns the mutex in the static
+ // (global to the module) variable "global_os_mutex"
+ pthread_once(&global_os_mutex_is_initialized,
+ create_global_os_mutex);
+
+ // We'd better have allocated a global_os_mutex
+ CILK_ASSERT(NULL != global_os_mutex);
+
+ // Acquire the global OS mutex
+ __cilkrts_os_mutex_lock(global_os_mutex);
+}
+
+void global_os_mutex_unlock(void)
+{
+ // We'd better have allocated a global_os_mutex. This means you should
+ // have called global_os_mutex_lock() before calling
+ // global_os_mutex_unlock(), but this is the only check for it.
+ CILK_ASSERT(NULL != global_os_mutex);
+
+ // Release the global OS mutex
+ __cilkrts_os_mutex_unlock(global_os_mutex);
+}
+
+/* End os_mutex-unix.c */
diff --git a/gcc-4.9/libcilkrts/runtime/os_mutex.h b/gcc-4.9/libcilkrts/runtime/os_mutex.h
new file mode 100644
index 000000000..71d9eb14e
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/os_mutex.h
@@ -0,0 +1,135 @@
+/* os_mutex.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file os_mutex.h
+ *
+ * @brief Portable interface to operating-system mutexes.
+ *
+ * Do not confuse os_mutex with Cilk runtime-specific spinlock mutexes.
+ */
+
+#ifndef INCLUDED_OS_MUTEX_DOT_H
+#define INCLUDED_OS_MUTEX_DOT_H
+
+#include <cilk/common.h>
+#include "rts-common.h"
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/// Opaque type
+typedef struct os_mutex os_mutex;
+
+/**
+ * Allocate and initialize an os_mutex
+ *
+ * @return A pointer to the initialized os_mutex
+ */
+COMMON_SYSDEP os_mutex* __cilkrts_os_mutex_create(void);
+
+/**
+ * Acquire the os_mutex for exclusive use
+ *
+ * @param m The os_mutex that is to be acquired.
+ */
+COMMON_SYSDEP void __cilkrts_os_mutex_lock(os_mutex *m);
+
+/**
+ * Try to acquire the os_mutex.
+ *
+ * @param m The os_mutex to try to acquire
+ * @return 0 if the lock acquire failed
+ * @return nonzero if the lock was acquired
+ */
+COMMON_SYSDEP int __cilkrts_os_mutex_trylock(os_mutex *m);
+
+/**
+ * Release the os_mutex
+ *
+ * @param m The os_mutex that is to be released.
+ */
+COMMON_SYSDEP void __cilkrts_os_mutex_unlock(os_mutex *m);
+
+/**
+ * Release any resources and deallocate the os_mutex.
+ *
+ * @param m The os_mutex that is to be deallocated.
+ */
+COMMON_SYSDEP void __cilkrts_os_mutex_destroy(os_mutex *m);
+
+/**
+ * Acquire the global os_mutex for exclusive use. The global os_mutex
+ * will be initialized the first time this function is called in a
+ * thread-safe manner.
+ */
+COMMON_SYSDEP void global_os_mutex_lock();
+
+/**
+ * Release the global os_mutex. global_os_mutex_lock() must have been
+ * called first.
+ */
+COMMON_SYSDEP void global_os_mutex_unlock();
+
+
+#ifdef _MSC_VER
+
+/**
+ * @brief Create the global OS mutex - Windows only.
+ *
+ * On Windows we use DllMain() to create the global OS mutex when cilkrts20.dll
+ * is loaded. As opposed to Linux/MacOS where we use pthread_once to implement
+ * a singleton since there are no guarantees about constructor or destructor
+ * ordering between shared objects.
+ */
+NON_COMMON void global_os_mutex_create();
+
+/**
+ * @brief Destroy the global OS mutex - Windows only
+ *
+ * On Windows we use DllMain() to destroy the global OS mutex when
+ * cilkrts20.dll is unloaded. As opposed to Linux/MacOS where we cannot
+ * know when it's safe to destroy the global OS mutex since there are no
+ * guarantees about constructor or destructor ordering.
+ */
+NON_COMMON void global_os_mutex_destroy();
+
+#endif // _MSC_VER
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_OS_MUTEX_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/pedigrees.c b/gcc-4.9/libcilkrts/runtime/pedigrees.c
new file mode 100644
index 000000000..dee4d9cb4
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/pedigrees.c
@@ -0,0 +1,112 @@
+/* pedigrees.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2007-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+#include "pedigrees.h"
+#include "local_state.h"
+
+/*************************************************************
+ Pedigree API code.
+*************************************************************/
+
+/*
+ * C99 requires that every inline function with external linkage have one
+ * extern declaration in the program (with the inline definition in scope).
+ */
+COMMON_PORTABLE
+extern void update_pedigree_on_leave_frame(__cilkrts_worker *w,
+ __cilkrts_stack_frame *sf);
+
+void __cilkrts_set_pedigree_leaf(__cilkrts_pedigree *leaf)
+{
+ __cilkrts_set_tls_pedigree_leaf(leaf);
+}
+
+void load_pedigree_leaf_into_user_worker(__cilkrts_worker *w)
+{
+ __cilkrts_pedigree *pedigree_leaf;
+ CILK_ASSERT(w->l->type == WORKER_USER);
+ pedigree_leaf = __cilkrts_get_tls_pedigree_leaf(1);
+ w->pedigree = *pedigree_leaf;
+
+ // Save a pointer to the old leaf.
+ // We'll need to restore it later.
+ CILK_ASSERT(w->l->original_pedigree_leaf == NULL);
+ w->l->original_pedigree_leaf = pedigree_leaf;
+
+ __cilkrts_set_tls_pedigree_leaf(&w->pedigree);
+
+ // Check that this new pedigree root has at least two values.
+ CILK_ASSERT(w->pedigree.parent);
+ CILK_ASSERT(w->pedigree.parent->parent == NULL);
+}
+
+void save_pedigree_leaf_from_user_worker(__cilkrts_worker *w)
+{
+ CILK_ASSERT(w->l->type == WORKER_USER);
+
+ // Existing leaf in tls should be for the current worker.
+ // This assert is expensive to check though.
+ // CILK_ASSERT(&w->pedigree == __cilkrts_get_tls_pedigree_leaf(0));
+ CILK_ASSERT(w->l->original_pedigree_leaf);
+
+ // w should finish with a pedigree node that points to
+ // the same root that we just looked up.
+
+ // TODO: This assert should be valid.
+ // But we are removing it now to make exceptions (without pedigrees) work.
+ // Currently, reading the pedigree after an exception is caught
+ // fails because the pedigree chain not restored correctly.
+ // CILK_ASSERT(w->l->original_pedigree_leaf->next == w->pedigree.parent);
+ w->l->original_pedigree_leaf->rank = w->pedigree.rank;
+
+ // Save that leaf pointer back into tls.
+ __cilkrts_set_tls_pedigree_leaf(w->l->original_pedigree_leaf);
+ // Null out worker's leaf for paranoia.
+ w->l->original_pedigree_leaf = NULL;
+}
+
+
+
+/*
+ Local Variables: **
+ c-file-style:"bsd" **
+ c-basic-offset:4 **
+ indent-tabs-mode:nil **
+ End: **
+*/
diff --git a/gcc-4.9/libcilkrts/runtime/pedigrees.h b/gcc-4.9/libcilkrts/runtime/pedigrees.h
new file mode 100644
index 000000000..3f6ebb977
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/pedigrees.h
@@ -0,0 +1,130 @@
+/* pedigrees.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#ifndef INCLUDED_PEDIGREES_DOT_H
+#define INCLUDED_PEDIGREES_DOT_H
+
+
+#include <cilk/common.h>
+#include <internal/abi.h>
+
+#include "rts-common.h"
+#include "global_state.h"
+#include "os.h"
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * @file pedigrees.h
+ *
+ * @brief pedigrees.h declares common routines related to pedigrees
+ * and the pedigree API.
+ */
+
+
+/**
+ * @brief Sets the leaf pedigree node for the current user thread.
+ *
+ * A typical implementation stores this pedigree node in thread-local
+ * storage.
+ *
+ * Preconditions:
+ * - Current thread should be a user thread.
+ *
+ * @param leaf The pedigree node to store as a leaf.
+ */
+COMMON_PORTABLE
+void __cilkrts_set_pedigree_leaf(__cilkrts_pedigree* leaf);
+
+
+/**
+ * Load the pedigree leaf node from thread-local storage into the
+ * current user worker. This method should execute as a part of
+ * binding the user thread to a worker.
+ *
+ * Preconditions:
+ *
+ * - w should be the worker for the current thread
+ * - w should be a user thread.
+ */
+COMMON_PORTABLE
+void load_pedigree_leaf_into_user_worker(__cilkrts_worker *w);
+
+/**
+ * Save the pedigree leaf node from the worker into thread-local
+ * storage. This method should execute as part of unbinding a user
+ * thread from a worker.
+ *
+ * Preconditions:
+ *
+ * - w should be the worker for the current thread
+ * - w should be a user thread.
+ */
+COMMON_PORTABLE
+void save_pedigree_leaf_from_user_worker(__cilkrts_worker *w);
+
+
+
+/**
+ * Update pedigree for a worker when leaving a frame.
+ *
+ * If this is the frame of a spawn helper (indicated by the
+ * CILK_FRAME_DETACHED flag) we must update the pedigree. The
+ * pedigree points to nodes allocated on the stack. Failing to
+ * update it will result in a accvio/segfault if the pedigree is
+ * walked. This must happen for all spawn helper frames, even if
+ * we're processing an exception.
+ */
+COMMON_PORTABLE
+inline void update_pedigree_on_leave_frame(__cilkrts_worker *w,
+ __cilkrts_stack_frame *sf)
+{
+ // Update the worker's pedigree information if this is an ABI 1 or later
+ // frame
+ if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1)
+ {
+ w->pedigree.rank = sf->spawn_helper_pedigree.rank + 1;
+ w->pedigree.parent = sf->spawn_helper_pedigree.parent;
+ }
+}
+
+
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_PEDIGREES_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/record-replay.cpp b/gcc-4.9/libcilkrts/runtime/record-replay.cpp
new file mode 100644
index 000000000..bc5a79f24
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/record-replay.cpp
@@ -0,0 +1,770 @@
+/* record-replay.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2012-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+/*
+ * Implementation of the record/replay functionality for Cilk Plus
+ */
+
+#include <cstring>
+#include <vector>
+#include <stdlib.h>
+
+// clang is really strict about printf formats, so use the annoying integer
+// printf macros. Unfortunately they're not avaiable on Windows
+#ifdef _WIN32
+#define PRIu64 "llu"
+#else
+#define __STDC_FORMAT_MACROS 1
+#include <inttypes.h>
+#endif
+
+#include "record-replay.h"
+#include "bug.h"
+#include "internal/abi.h"
+#include "local_state.h"
+#include "full_frame.h"
+#include "global_state.h"
+#include "cilk_malloc.h"
+#include "os.h" // for cilkos_error()
+
+#if RECORD_ON_REPLAY
+#pragma message ("*** Record on Replay is enabled!")
+#endif
+
+// Defined to write sequence number to the logs. Note that you cannot
+// diff logs with sequence numbers because the numbers may increment in
+// different orders.
+//#define INCLUDE_SEQUENCE_NUMBER 1
+
+const int PED_VERSION = 1; // Log recording version
+
+// Log types
+enum ped_type_t
+{
+ ped_type_unknown,
+ ped_type_steal,
+ ped_type_sync,
+ ped_type_orphaned,
+ ped_type_last // Flags end of the list
+};
+
+// Log type strings
+#define PED_TYPE_STR_STEAL "Steal"
+#define PED_TYPE_STR_SYNC "Sync"
+#define PED_TYPE_STR_WORKERS "Workers"
+#define PED_TYPE_STR_ORPHANED "Orphaned"
+
+#define PED_TYPE_SIZE 16 // Buffer size for the type of pedigree. Must
+ // hold largest pedigree record type string.
+#define PEDIGREE_BUFF_SIZE 512 // Buffer size for the string representation
+ // of a pedigree.
+
+/**
+ * Data we store for a replay log entry
+ */
+typedef struct replay_entry_t
+{
+ uint64_t *m_reverse_pedigree; /**< Reverse pedigree for replay log entry */
+ ped_type_t m_type; /**< Type of replay log entry */
+ int16_t m_pedigree_len; /**< Number of terms in reverse pedigree */
+ int16_t m_value; /**< Victim for STEALs, 0 if matching steal found for ORPHANs */
+
+ /**
+ * Load data read from the log into the entry
+ */
+ bool load(const char *type, const char *pedigee_str, int32_t value1, int32_t value2)
+ {
+ // Convert the type into an enum
+ if (0 == strcmp(type, PED_TYPE_STR_STEAL))
+ {
+ m_type = ped_type_steal;
+ m_value = (int16_t)value1; // Victim
+ }
+ else
+ {
+ m_value = -1; // Victim not valid
+ if (0 == strcmp(type, PED_TYPE_STR_SYNC))
+ m_type = ped_type_sync;
+ else if (0 == strcmp(type, PED_TYPE_STR_ORPHANED))
+ m_type = ped_type_orphaned;
+ else
+ {
+ m_type = ped_type_unknown;
+ return false;
+ }
+ }
+
+ // Parse the pedigree
+ m_pedigree_len = 0;
+
+ const char *p = pedigee_str;
+ char *end;
+
+ uint64_t temp_pedigree[PEDIGREE_BUFF_SIZE/2];
+
+ while(1)
+ {
+ temp_pedigree[m_pedigree_len++] = (uint64_t)strtol(p, &end, 10);
+ if ('\0' == *end)
+ break;
+ p = end + 1;
+ }
+
+ // Allocate memory to hold the pedigree.
+ // Copy the pedigree in reverse order since that's the order we'll
+ // traverse it
+ m_reverse_pedigree =
+ (uint64_t *)__cilkrts_malloc(sizeof(int64_t) * m_pedigree_len);
+ for (int n = 0; n < m_pedigree_len; n++)
+ m_reverse_pedigree[n] = temp_pedigree[(m_pedigree_len - 1) - n];
+
+ return true;
+ }
+
+ /**
+ * Match this entry against the data supplied. This includes walking the
+ * pedigree from the specified node.
+ */
+ bool match (ped_type_t type, const __cilkrts_pedigree *node, int victim = -1)
+ {
+ int i = 0;
+
+ // If the type isn't what they're seeking, we don't have a match
+ if (type != m_type)
+ return false;
+
+ // If we're looking for a STEAL, then the victim must match
+ if ((type == ped_type_steal) && (victim != m_value))
+ return false;
+
+ // Compare the current pedigree against what was recorded
+ while ((NULL != node) && (i < m_pedigree_len))
+ {
+ // If we've got a pedigree rank difference, then we don't have
+ // a match
+ if (node->rank != m_reverse_pedigree[i])
+ return false;
+ node = node->parent;
+ i++;
+ }
+
+ // Make sure we exhausted both the pedigree chain and the recorded
+ // pedigree
+ return ((NULL == node) && (i == m_pedigree_len));
+ }
+
+ /**
+ * Advance to the next entry, skipping any ORPHANED records we didn't see
+ * a matching STEAL for
+ */
+ replay_entry_t *next_entry()
+ {
+ replay_entry_t *entry = this;
+
+ // You can't go beyond the end
+ if (ped_type_last == entry->m_type)
+ return entry;
+
+ // Advance to the next entry
+ entry++;
+
+ // Skip any ORPHANED records that don't have a matching steal. We
+ // initialized the value field to -1 for ORPHANED. After loading all
+ // the log data, we iterated through all the STEAL records setting the
+ // matching ORPHANED record's value field to 0. So if an ORPHANED
+ // record's value field is still -1, it doesn't have a matching STEAL
+ // record, and I don't know why we chose not to return from the
+ // spawned function.
+ while ((ped_type_orphaned == entry->m_type) && (-1 == entry->m_value))
+ {
+ entry++;
+ }
+
+ return entry;
+ }
+
+ /**
+ * Release any allocated resources
+ */
+ void unload()
+ {
+ __cilkrts_free(m_reverse_pedigree);
+ m_reverse_pedigree = NULL;
+ }
+
+} replay_entry_t;
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * Walk the pedigree and generate a string representation with underscores
+ * between terms. Currently does a recursive walk to generate a forward
+ * pedigree.
+ *
+ * @param p The buffer that is to be filled. Assumed to be PEDIGREE_BUFF_SIZE
+ * characters long
+ * @param pnode The initial pedigree term to be written.
+ *
+ * @return A pointer into the pedigree string buffer after a term has been
+ * written.
+ */
+static
+char * walk_pedigree_nodes(char *p, const __cilkrts_pedigree *pnode)
+{
+ CILK_ASSERT(pnode);
+ if (pnode->parent)
+ {
+ p = walk_pedigree_nodes(p, pnode->parent);
+ p += sprintf(p, "_");
+ }
+
+ return p + sprintf(p, "%" PRIu64, pnode->rank);
+}
+
+/**
+ * Write a record to a replay log file.
+ *
+ * @param w The worker we're writing the pedigree for.
+ * @param type The type of the pedigree record, as a string
+ * @param initial_node The initial pedigree node to be written, or NULL if
+ * there is no pedigree for this record type.
+ * @param i1 First integer value to be written to the record.
+ * @param i2 Second integer value to be written to the record. Only applies
+ * to STEAL records. Defaults to -1 (unused). The second value is always
+ * written to make parsing easier.
+ */
+static
+void write_to_replay_log (__cilkrts_worker *w, const char *type,
+ const __cilkrts_pedigree *initial_node,
+ int i1 = -1, int i2 = -1)
+{
+ char pedigree[PEDIGREE_BUFF_SIZE];
+
+ // If we don't have an initial pedigree node, just use "0" to fill the slot
+ if (NULL == initial_node)
+ strcpy(pedigree, "0");
+ else
+ walk_pedigree_nodes(pedigree, initial_node);
+
+#ifndef INCLUDE_SEQUENCE_NUMBER
+ // Simply write the record
+ fprintf(w->l->record_replay_fptr, "%s %s %d %d\n",
+ type, pedigree, i1, i2);
+#else
+ // Write the record with a sequence number. The sequence number should
+ // always be the last term, and ignored on read
+
+ static long volatile seq_num = 0;
+ long write_num;
+
+ // Atomic increment functions are compiler/OS-specific
+#ifdef _WIN32
+ write_num = _InterlockedIncrement(&seq_num);
+#else /* GCC */
+ write_num = __sync_add_and_fetch(&seq_num, 1);
+#endif // _WIN32
+
+ fprintf(w->l->record_replay_fptr, "%s %s %d %d %ld\n",
+ type, pedigree, i1, i2, write_num);
+#endif // INCLUDE_SEQUENCE_NUMBER
+
+ fflush(w->l->record_replay_fptr);
+}
+
+/**
+ * Record data for a successful steal.
+ *
+ * The pedigree for a STEAL record is the pedigree of the stolen frame.
+ *
+ * @note It's assumed that replay_record_steal() has already checked that we're
+ * recording a log and that the record/replay functionality has not been
+ * compiled out.
+ *
+ * @param w The worker stealing a frame.
+ * @param victim_id The ID of the worker which had it's frame stolen.
+ */
+void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id)
+{
+ // Follow the pedigree chain using worker's stack frame
+ CILK_ASSERT(w->l->next_frame_ff);
+ CILK_ASSERT(w->l->next_frame_ff->call_stack);
+
+ // Record steal: STEAL pedigree victim_id thief_id
+ write_to_replay_log (w, PED_TYPE_STR_STEAL,
+ &(w->l->next_frame_ff->call_stack->parent_pedigree),
+ victim_id);
+}
+
+/**
+ * Record data for the worker that continues from a sync
+ *
+ * The pedigree for a SYNC record is the pedigree at the sync.
+ *
+ * @note It's assumed that replay_record_sync() has already checked that we're
+ * recording a log and that the record/replay functionality has not been
+ * compiled out.
+ *
+ * @param w The worker continuing from a sync.
+ */
+void replay_record_sync_internal(__cilkrts_worker *w)
+{
+ // Record sync: SYNC pedigree last_worker_id
+ write_to_replay_log (w, PED_TYPE_STR_SYNC, &w->pedigree);
+}
+
+/**
+ * Record the pedigree of an attempt to return to a stolen parent
+ *
+ * The pedigree for an ORPHANED record is the pedigree of our parent
+ *
+ * @note It's assumed that replay_record_orphaned() has already checked that
+ * we're recording a log and that the record/replay functionality has not
+ * been compiled out.
+ *
+ * @param w The worker continuing noting that it has been orphaned.
+ */
+void replay_record_orphaned_internal(__cilkrts_worker *w)
+{
+ // Record steal: ORPHANED pedigree self
+ write_to_replay_log (w, PED_TYPE_STR_ORPHANED, w->pedigree.parent);
+}
+
+/**
+ * Attempt to match a SYNC record. We have a match when this worker was
+ * recorded returning from the current call to __cilkrts_sync() with the
+ * same pedigree and this was the worker that continued from the sync, since
+ * it was the last to sync.
+ *
+ * If we find a match, the caller is expected to stall it is the last worker
+ * to reach a sync so it will be the worker to continue from the sync.
+ *
+ * @note It's assumed that replay_match_sync_pedigree() has already returned
+ * if we're not replaying a log, or if record/replay functionality has
+ * been compiled out.
+ *
+ * @param w The worker we're checking to see if we've got a match
+ */
+int replay_match_sync_pedigree_internal(__cilkrts_worker *w)
+{
+ // Return true if we have a match
+ if (w->l->replay_list_entry->match(ped_type_sync, &w->pedigree))
+ return 1;
+ else
+ return 0;
+}
+
+/**
+ * Advance to the next log entry from a SYNC record. Consume the current
+ * SYNC record on this worker and advance to the next one.
+ *
+ * @note It's assumed that replay_advance_from_sync() has already returned if
+ * we're not replaying a log, or if record/replay functionality has been
+ * compiled out.
+ *
+ * @param w The worker whose replay log we're advancing.
+ */
+void replay_advance_from_sync_internal (__cilkrts_worker *w)
+{
+ // The current replay entry must be a SYNC
+ CILK_ASSERT(ped_type_sync == w->l->replay_list_entry->m_type);
+
+ // Advance to the next entry
+ w->l->replay_list_entry = w->l->replay_list_entry->next_entry();
+}
+
+/**
+ * Called from random_steal() to override the ID of the randomly chosen victim
+ * worker which this worker will attempt to steal from. Returns the worker id
+ * of the next victim this worker was recorded stealing from, or -1 if the
+ * next record in the log is not a STEAL.
+ *
+ * @note This call does NOT attempt to match the pedigree. That will be done
+ * by replay_match_victim_pedigree() after random_steal() has locked the victim
+ * worker.
+ *
+ * @param w The __cilkrts_worker we're executing on. The worker's replay log
+ * is checked for a STEAL record. If we've got one, the stolen worker ID is
+ * returned.
+ *
+ * @return -1 if the next record is not a STEAL
+ * @return recorded stolen worker ID if we've got a matching STEAL record
+ */
+int replay_get_next_recorded_victim_internal(__cilkrts_worker *w)
+{
+ // If the next record isn't a STEAL, abort the attempt to steal work
+ if (ped_type_steal != w->l->replay_list_entry->m_type)
+ return -1;
+
+ // Return the victim's worker ID from the STEAL record. We'll check
+ // the pedigree after random_steal has locked the victim worker.
+ return w->l->replay_list_entry->m_value;
+}
+
+/**
+ * Called from random_steal() to determine if we have a STEAL record that
+ * matches the pedigree at the head of the victim worker. If we do have a
+ * match, the STEAL record is consumed.
+ *
+ * @note It's assumed that replay_match_victim_pedigree() has already returned if
+ * we're not replaying a log, or if record/replay functionality has been
+ * compiled out.
+ *
+ * @return 1 if we have a match
+ * @return 0 if the current replay record isn't a STEAL record, or the victim
+ * isn't correct, or the pedigree doesn't match.
+ */
+int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim)
+{
+ // If we don't have a match, return 0
+ if (! w->l->replay_list_entry->match(ped_type_steal,
+ &((*victim->head)->parent_pedigree),
+ victim->self))
+ return 0;
+
+ // Consume this entry
+ w->l->replay_list_entry = w->l->replay_list_entry->next_entry();
+
+ // Return success
+ return 1;
+}
+
+/**
+ * If the frame we're about to return to was recorded as being stolen,
+ * stall until it is.
+ *
+ * @note It's assumed that replay_wait_for_steal_if_parent_was_stolen() has
+ * already returned if we're not replaying a log, or if record/replay
+ * functionality has been compiled out.
+ *
+ * @param w The worker we're executing on.
+ */
+void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w)
+{
+ // If our parent wasn't recorded orphanen, return now
+ if (! w->l->replay_list_entry->match (ped_type_orphaned,
+ w->pedigree.parent))
+ return;
+
+ // Stall until our parent is stolen. Note that we're comparing head
+ // and tail, not head and exc. The steal is not completed until tail
+ // is modified.
+ while (!((w->tail - 1) < w->head))
+ __cilkrts_sleep();
+
+ // Consume the entry
+ w->l->replay_list_entry = w->l->replay_list_entry->next_entry();
+}
+
+/**
+ * Allocate memory for the list of logged events.
+ *
+ * This function will read through the file and count the number of records
+ * so it can estimate how big a buffer to allocate for the array or replay
+ * entries. It will then rewind the file to the beginning so it can be
+ * loaded into memory.
+ *
+ * @param w The worker we're loading the file for.
+ * @param f The file of replay data we're scanning.
+ */
+static
+void allocate_replay_list(__cilkrts_worker *w, FILE *f)
+{
+ // Count the number of entries - yeah, it's a hack, but it lets me
+ // allocate the space all at once instead of in chunks
+ char buf[1024];
+ int entries = 1; // Include "LAST" node
+
+ while (! feof(f))
+ {
+ if (fgets(buf, 1024, f))
+ {
+ // Skip the Workers record - should only be in file for Worker 0
+ if (0 != strncmp(PED_TYPE_STR_WORKERS, buf, sizeof(PED_TYPE_STR_WORKERS)-1))
+ entries++;
+ }
+ }
+
+ w->l->replay_list_root =
+ (replay_entry_t *)__cilkrts_malloc(entries * sizeof(replay_entry_t));
+ w->l->replay_list_root[entries - 1].m_type = ped_type_last;
+
+ // Reset the file to the beginning
+ rewind(f);
+}
+
+/**
+ * Load the replay log for a worker into memory.
+ *
+ * @param w The worker we're loading the replay for.
+ */
+static
+void load_recorded_log(__cilkrts_worker *w)
+{
+ char ped_type[PED_TYPE_SIZE];
+ char ped_str[PEDIGREE_BUFF_SIZE];
+ int32_t i1 = -1, i2 = -1;
+ int fret;
+ char local_replay_file_name[512];
+ FILE *f;
+
+ // Open the log for reading
+ sprintf(local_replay_file_name, "%s%d.cilklog", w->g->record_replay_file_name, w->self);
+ f = fopen(local_replay_file_name, "r");
+
+ // Make sure we found a log!
+ CILK_ASSERT (NULL != f);
+
+ // Initialize the replay_list
+ allocate_replay_list(w, f);
+ replay_entry_t *entry = w->l->replay_list_root;
+
+ // Read the data out and add it to our tables
+ while (! feof(f))
+ {
+#ifndef INCLUDE_SEQUENCE_NUMBER
+ fret = fscanf(f, "%s %s %d %d\n", ped_type, ped_str, &i1, &i2);
+ if(EOF == fret)
+ break;
+
+ // We must have read 4 fields
+ CILK_ASSERT(4 == fret);
+#else
+ int32_t write_num;
+ fret = fscanf(f, "%s %s %d %d %d\n", ped_type, ped_str,
+ &i1, &i2, &write_num);
+ if(EOF == fret)
+ break;
+
+ // We must have read 5 fields
+ CILK_ASSERT(5 == fret);
+#endif // INCLUDE_SEQUENCE_NUMBER
+
+ // Load the data into the entry
+ if (0 == strcmp(ped_type, PED_TYPE_STR_WORKERS))
+ {
+ // Verify we're replaying with the same number of workers we recorded with
+ if (i1 != w->g->P)
+ {
+ // Fatal error - does not return
+ cilkos_error("Cannot continue replay: number of workers(%d) doesn't match "
+ "that from the recording(%d).\n", w->g->P, i1);
+ }
+
+ // Verify that we understand this version of the pedigree file
+ if (PED_VERSION != i2)
+ {
+ // Fatal error - does not return
+ cilkos_error("Pedigree file version %d doesn't match current "
+ "version %d - cannot continue.\n",
+ i2, PED_VERSION);
+ }
+ }
+ else
+ {
+ entry->load(ped_type, ped_str, i1, i2);
+ entry++;
+ }
+ }
+
+ // Make sure we've filled the allocated memory. We initialized the last
+ // entry in
+ CILK_ASSERT(ped_type_last == entry->m_type);
+ w->l->replay_list_entry = w->l->replay_list_root;
+
+ // Close the log and return
+ fclose(f);
+}
+
+/**
+ * Scan a recorded log to match STEALs againsted ORPHANED records.
+ *
+ * @param g Cilk Runtime global state. Passed to access the worker array so
+ * we can scan a worker's ORPHANED entries for one that matches a STEAL entry.
+ * @param entry The root of a replay_list for a worker.
+ */
+static
+void scan_for_matching_steals(global_state_t *g, replay_entry_t *entry)
+{
+ // Iterate over all of the entries
+ while (ped_type_last != entry->m_type)
+ {
+ // Look for STEALs. That will tell us which worker the frame was
+ // stolen from
+ if (ped_type_steal == entry->m_type)
+ {
+ bool found = false;
+
+ // Validate the worker ID and make sure we've got a list
+ CILK_ASSERT((entry->m_value >= 0) && (entry->m_value < g->total_workers));
+ replay_entry_t *victim_entry = g->workers[entry->m_value]->l->replay_list_root;
+ CILK_ASSERT(NULL != victim_entry);
+
+ // Scan the victim's list for the matching ORPHANED record
+ while ((ped_type_last != victim_entry->m_type) && ! found)
+ {
+ if (ped_type_orphaned == victim_entry->m_type)
+ {
+ if (entry->m_pedigree_len == victim_entry->m_pedigree_len)
+ {
+ if (0 == memcmp(entry->m_reverse_pedigree,
+ victim_entry->m_reverse_pedigree,
+ entry->m_pedigree_len * sizeof(int64_t)))
+ {
+ // Note that this ORPHANED record has a matching steal
+ victim_entry->m_value = 0;
+ found = true;
+ }
+ }
+ }
+ victim_entry++;
+ }
+ }
+ entry++;
+ }
+}
+
+
+/*
+ * Initialize per-worker data for record or replay - See record-replay.h
+ * for full routine header.
+ */
+void replay_init_workers(global_state_t *g)
+{
+ int i;
+ char worker_file_name[512];
+
+ // If we're not recording or replaying a log, we're done. All of the
+ // fields in the global_state_t or local_state_t are already initialized
+ // to default values.
+ if (RECORD_REPLAY_NONE == g->record_or_replay)
+ return;
+
+ // If we're replaying a log, read each worker's log and construct the
+ // in-memory log
+ if (REPLAY_LOG == g->record_or_replay)
+ {
+ // Read all of the data
+ for (i = 0; i < g->total_workers; ++i)
+ {
+ // This function will also initialize and fill the worker's
+ // replay list
+ load_recorded_log(g->workers[i]);
+ }
+
+ // Scan for orphans with no matching steal. Mark them so they'll be
+ // skipped as we advance through the log.
+ for (i = 0; i < g->total_workers; ++i)
+ {
+ scan_for_matching_steals(g, g->workers[i]->l->replay_list_root);
+ }
+
+ // If we're recording the logs while replaying, create the log files.
+ // This will only be used for debugging. Create the logs in the
+ // current directory. It should be as good a place as any...
+#if RECORD_ON_REPLAY
+ for(i = 0; i < g->total_workers; ++i)
+ {
+ __cilkrts_worker *w = g->workers[i];
+ sprintf(worker_file_name, "replay_log_%d.cilklog", w->self);
+ w->l->record_replay_fptr = fopen(worker_file_name, "w+");
+ CILK_ASSERT(NULL != w->l->record_replay_fptr);
+ }
+
+ // Record the number of workers, file version in Worker 0's file
+ write_to_replay_log (g->workers[0], PED_TYPE_STR_WORKERS, NULL, g->P, PED_VERSION);
+#endif // RECORD_ON_REPLAY
+ }
+
+ // If we're recording, create the log files
+ if (RECORD_LOG == g->record_or_replay)
+ {
+ for(i = 0; i < g->total_workers; ++i)
+ {
+ __cilkrts_worker *w = g->workers[i];
+ sprintf(worker_file_name, "%s%d.cilklog",
+ g->record_replay_file_name,
+ w->self);
+ w->l->record_replay_fptr = fopen(worker_file_name, "w+");
+ CILK_ASSERT(NULL != w->l->record_replay_fptr);
+ }
+
+ // Record the number of workers, file version in Worker 0's file
+ write_to_replay_log (g->workers[0], PED_TYPE_STR_WORKERS, NULL, g->P, PED_VERSION);
+ }
+}
+
+/*
+ * Do any necessary cleanup for the logs - See record-replay.h for full
+ * routine header.
+ */
+void replay_term(global_state_t *g)
+{
+ // Free memory for the record/replay log file name, if we've got one
+ if (g->record_replay_file_name)
+ __cilkrts_free(g->record_replay_file_name);
+
+ // Per-worker cleanup
+ for(int i = 0; i < g->total_workers; ++i)
+ {
+ __cilkrts_worker *w = g->workers[i];
+
+ // Close the log files, if we've opened them
+ if(w->l->record_replay_fptr)
+ fclose(w->l->record_replay_fptr);
+
+ if (w->l->replay_list_root)
+ {
+ // We should have consumed the entire list
+ CILK_ASSERT(ped_type_last == w->l->replay_list_entry->m_type);
+
+ replay_entry_t *entry = w->l->replay_list_root;
+ while (ped_type_last != entry->m_type)
+ {
+ // Free the pedigree memory for each entry
+ entry->unload();
+ entry++;
+ }
+ __cilkrts_free(w->l->replay_list_root);
+ w->l->replay_list_root = NULL;
+ w->l->replay_list_entry = NULL;
+ }
+ }
+}
+
+__CILKRTS_END_EXTERN_C
diff --git a/gcc-4.9/libcilkrts/runtime/record-replay.h b/gcc-4.9/libcilkrts/runtime/record-replay.h
new file mode 100644
index 000000000..c1c5a68f5
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/record-replay.h
@@ -0,0 +1,432 @@
+/* record_replay.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2012-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+/**
+ * @file record-replay.h
+ *
+ * @brief record-replay.h and .cpp encapsulate most of the functionality to
+ * record and play back a Cilk Plus application.
+ *
+ * Recording is directed by the setting of the CILK_RECORD_LOG environment
+ * variable. If it's defined, the value specifies the root we'll use to
+ * generate files for each worker using the following format string:
+ * "%s%d.cilklog", where the integer is the value of w->self.
+ *
+ * Replay is directed by the setting of the CILK_REPLAY_LOG environment
+ * variable, interpreted the same way as CILK_RECORD_LOG. If both
+ * CILK_RECORD_LOG and CILK_REPLAY_LOG are defined, a warning will be given
+ * and the attempt to record a log will be ignored.
+ *
+ * Recording is relatively straightforward. We write all information about a
+ * worker to a per-worker file.
+ *
+ * Each pedigree record consists of the following fields. All fields must be
+ * present in every record to make parsing easy.
+ * - Type - A string identifying the pedigree record. See the PED_TYPE_STR_
+ * macros for the currently defined values.
+ * - Pedigree - A string of pedigree values, with underscores between
+ * adjacent values.
+ * - i1 - Record type-specific value. -1 if not used.
+ * - i2 - Record type-specific value. -1 if not used.
+ *
+ * WORKERS record - only written to the file for worker 0. Note that this is
+ * the first worker in the workers array. Worker 0 is the first system worker,
+ * *NOT* a user worker.
+ * - Type: "Workers"
+ * - Pedigree: Always "0" - ignored
+ * - i1: Number of workers (g->P) when we recorded the log. A mismatch when
+ * we attempt to replay the log will result in aborting the execution.
+ * - i2: Log version number - Specified by PED_VERSION in record-replay.cpp
+ *
+ * STEAL record - written after a successful steal.
+ * - Type: "Steal"
+ * - Pedigree: Pedigree of stolen frame
+ * - i1: Worker the frame was stolen from
+ * - i2: -1
+ *
+ * SYNC record - written after a worker continues from a sync.
+ * - Type: "Sync"
+ * - Pedigree: Pedigree of sync. Note that this is the pedigree *before*
+ * the pedigree in incremented in setup_for_execution_pedigree().
+ * - i1: -1
+ * - i2: -1
+ *
+ * ORPHANED record - saved on a return to a stolen parent.
+ * - Type: "Orphaned"
+ * - Pedigree: Pedigree of the parent frame *before* the pedigree is
+ * incremented by the return
+ * - i1: -1
+ * - i2: -1
+ *
+ * On replay, the data is loaded into a per-worker array, and the data is
+ * consumed in order as needed.
+ */
+
+#ifndef INCLUDED_RECORD_REPLAY_DOT_H
+#define INCLUDED_RECORD_REPLAY_DOT_H
+
+#include "cilk/common.h"
+#include "global_state.h"
+
+/**
+ * Define CILK_RECORD_REPLAY to enable record/replay functionality. If
+ * CILK_RECORD_REPLAY is not defined, all of the record/replay functions in
+ * record-replay.h will be stubbed out. Since they're declared as inline,
+ * functions, the resulting build should have no performance impact due to
+ * the implementation or record/replay.
+ */
+ #define CILK_RECORD_REPLAY 1
+
+/**
+ * Define RECORD_ON_REPLAY=1 to write logs when we're replaying a log. This
+ * should only be needed when debugging the replay functionality. This should
+ * always be defined as 0 when record-replay.h is checked in.
+ */
+#define RECORD_ON_REPLAY 0
+
+__CILKRTS_BEGIN_EXTERN_C
+
+#ifdef CILK_RECORD_REPLAY
+// Declarations of internal record/replay functions. The inlined versions
+// further down do some preliminary testing (like if we're not recording or
+// replaying) and will stub out the functionality if we've compiled out the
+// record/replay feature
+int replay_match_sync_pedigree_internal(__cilkrts_worker *w);
+void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w);
+void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id);
+void replay_record_sync_internal(__cilkrts_worker *w);
+void replay_record_orphaned_internal(__cilkrts_worker *w);
+int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim);
+void replay_advance_from_sync_internal (__cilkrts_worker *w);
+int replay_get_next_recorded_victim_internal(__cilkrts_worker *w);
+#endif // CILK_RECORD_REPLAY
+
+// Publically defined record/replay API
+
+/**
+ * If we're replaying a log, wait for our parent to be stolen if it was when
+ * the log was recorded. If record/replay is compiled out, this is a noop.
+ *
+ * @param w The __cilkrts_worker we're executing on. The worker's replay
+ * list will be checked for a ORPHANED record with a matching pedigree. If
+ * there is a match, the ORPHANED record will be consumed.
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
+{
+ // Only check if we're replaying a log
+ if (REPLAY_LOG == w->g->record_or_replay)
+ replay_wait_for_steal_if_parent_was_stolen_internal(w);
+}
+#else
+__CILKRTS_INLINE
+void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
+{
+ // If record/replay is disabled, we never wait
+}
+#endif // CILK_RECORD_REPLAY
+
+/**
+ * Called from random_steal() to override the ID of the randomly chosen victim
+ * worker which this worker will attempt to steal from. Returns the worker id
+ * of the next victim this worker was recorded stealing from, or -1 if the
+ * next record in the log is not a STEAL.
+ *
+ * @note This call does NOT attempt to match the pedigree. That will be done
+ * by replay_match_victim_pedigree() after random_steal() has locked the victim
+ * worker.
+ *
+ * @param w The __cilkrts_worker we're executing on. The worker's replay log
+ * is checked for a STEAL record. If we've got one, the stolen worker ID is
+ * returned.
+ * @param id The randomly chosen victim worker ID. If we're not replaying a
+ * log, or if record/replay has been compiled out, this is the value that
+ * will be returned.
+ *
+ * @return id if we're not replaying a log
+ * @return -1 if the next record is not a STEAL
+ * @return recorded stolen worker ID if we've got a matching STEAL record
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
+{
+ // Only check if we're replaying a log
+ if (REPLAY_LOG == w->g->record_or_replay)
+ return replay_get_next_recorded_victim_internal(w);
+ else
+ return id;
+}
+#else
+__CILKRTS_INLINE
+int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
+{
+ // Record/replay is disabled. Always return the original worker id
+ return id;
+}
+#endif // CILK_RECORD_REPLAY
+
+/**
+ * Initialize per-worker data for record/replay. A noop if record/replay
+ * is disabled, or if we're not recording or replaying anything.
+ *
+ * If we're recording a log, this will ready us to create the per-worker
+ * logs.
+ *
+ * If we're replaying a log, this will read the logs into the per-worker
+ * structures.
+ *
+ * @param g Cilk runtime global state
+ */
+void replay_init_workers(global_state_t *g);
+
+/**
+ * Record a record on a successful steal. A noop if record/replay is
+ * diabled, or if we're not recording anything
+ *
+ * @param w The __cilkrts_worker we're executing on. The pedigree of
+ * the stolen frame will be walked to generate the STEAL record.
+ *
+ * @param victim_id The worker ID of the worker w stole from.
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
+{
+#if RECORD_ON_REPLAY
+ // If we're recording on replay, write the record if we're recording or
+ // replaying
+ if (RECORD_REPLAY_NONE == w->g->record_or_replay)
+ return;
+#else
+ // Only write the record if we're recording
+ if (RECORD_LOG != w->g->record_or_replay)
+ return;
+#endif
+
+ replay_record_steal_internal(w, victim_id);
+}
+#else
+__CILKRTS_INLINE
+void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
+{
+}
+#endif // CILK_RECORD_REPLAY
+
+/**
+ * Record a record when continuing after a sync. A noop if record/replay is
+ * diabled, or if we're not recording anything, or if the sync was abandoned,
+ * meaning this isn't the worker that continues from the sync.
+ *
+ * @param w The __cilkrts_worker for we're executing on. The pedigree of
+ * the sync-ing frame will be walked to generate the SYNC record.
+ *
+ * @param continuing True if this worker will be continuing from the
+ * cilk_sync. A SYNC record will only be generated if continuing is true.
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+void replay_record_sync(__cilkrts_worker *w, int continuing)
+{
+ // If this was not the last worker to the syn, return
+ if (! continuing)
+ return;
+
+#if RECORD_ON_REPLAY
+ // If we're recording on replay, write the record if we're recording or
+ // replaying
+ if (RECORD_REPLAY_NONE == w->g->record_or_replay)
+ return;
+#else
+ // Only write the record if we're recording
+ if (RECORD_LOG != w->g->record_or_replay)
+ return;
+#endif
+
+ replay_record_sync_internal(w);
+}
+#else
+__CILKRTS_INLINE
+void replay_record_sync(__cilkrts_worker *w, int abandoned)
+{
+}
+#endif // CILK_RECORD_REPLAY
+
+/**
+ * Record a record on a return to a stolen parent. A noop if record/replay is
+ * diabled, or if we're not recording anything.
+ *
+ * @param w The __cilkrts_worker for we're executing on. The pedigree of the
+ * frame that has discovered that its parent has been stolken will be walked
+ * to generate the ORPHANED record.
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+void replay_record_orphaned(__cilkrts_worker *w)
+{
+#if RECORD_ON_REPLAY
+ // If we're recording on replay, write the record if we're recording or
+ // replaying
+ if (RECORD_REPLAY_NONE == w->g->record_or_replay)
+ return;
+#else
+ // Only write the record if we're recording
+ if (RECORD_LOG != w->g->record_or_replay)
+ return;
+#endif
+
+ replay_record_orphaned_internal(w);
+}
+#else
+__CILKRTS_INLINE
+void replay_record_orphaned(__cilkrts_worker *w)
+{
+}
+#endif // CILK_RECORD_REPLAY
+
+/**
+ * Test whether the frame at the head of the victim matches the pedigree of
+ * the frame that was recorded being stolen. Called in random steal to verify
+ * that we're about to steal the correct frame.
+ *
+ * @param w The __cilkrts_worker for we're executing on. The current worker
+ * is needed to find the replay entry to be checked.
+ *
+ * @param victim The __cilkrts_worker for we're proposing to steal a frame
+ * from. The victim's head entry is
+ * is needed to find the replay entry to be checked.
+ *
+ * @return 0 if we're replaying a log and the victim's pedigree does NOT match
+ * the next frame the worker is expected to steal.
+ *
+ * @return 1 in all other cases to indicate that the steal attempt should
+ * continue
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
+{
+ // We're not replaying a log. The victim is always acceptable
+ if (REPLAY_LOG != w->g->record_or_replay)
+ return 1;
+
+ // Return 1 if the victim's pedigree matches the frame the worker stole
+ // when we recorded the log
+ return replay_match_victim_pedigree_internal(w, victim);
+}
+#else
+__CILKRTS_INLINE
+int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
+{
+ // Record/replay is disabled. The victim is always acceptable
+ return 1;
+}
+#endif // CILK_RECORD_REPLAY
+
+/**
+ * Test whether the current replay entry is a sync record matching the
+ * worker's pedigree.
+ *
+ * @param w The __cilkrts_worker for we're executing on.
+ *
+ * @return 1 if the current replay entry matches the current pedigree.
+ * @return 0 if there's no match, or if we're not replaying a log.
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+int replay_match_sync_pedigree(__cilkrts_worker *w)
+{
+ // If we're not replaying, assume no match
+ if (REPLAY_LOG != w->g->record_or_replay)
+ return 0;
+
+ return replay_match_sync_pedigree_internal(w);
+}
+#else
+__CILKRTS_INLINE
+int replay_match_sync_pedigree(__cilkrts_worker *w)
+{
+ // Record/replay is disabled. Assume no match
+ return 0;
+}
+#endif
+
+/**
+ * Marks a sync record seen, advancing to the next record in the replay list.
+ *
+ * This function will only advance to the next record if:
+ * - Record/replay hasn't been compiled out AND
+ * - We're replaying a log AND
+ * - A match was found AND
+ * - The sync is not being abandoned
+ *
+ * @param w The __cilkrts_worker for we're executing on.
+ * @param match_found The value returned by replay_match_sync_pedigree(). If
+ * match_found is false, nothing is done.
+ * @param continuing Flag indicating whether this worker will continue from
+ * the sync (it's the last worker to the sync) or if it will abandon the work
+ * and go to the scheduling loop to look for more work it can steal.
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
+{
+ // If we're replaying a log, and the current sync wasn't abandoned, and we
+ // found a match in the log, mark the sync record seen.
+ if ((REPLAY_LOG == w->g->record_or_replay) && match_found && continuing)
+ replay_advance_from_sync_internal(w);
+}
+#else
+__CILKRTS_INLINE
+void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
+{
+}
+#endif
+
+/**
+ * Release any resources used to read or write a replay log.
+ *
+ * @param g Cilk runtime global state
+ */
+void replay_term(global_state_t *g);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_RECORD_REPLAY_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/reducer_impl.cpp b/gcc-4.9/libcilkrts/runtime/reducer_impl.cpp
new file mode 100644
index 000000000..f20b9bc45
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/reducer_impl.cpp
@@ -0,0 +1,1012 @@
+/* reducer_impl.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Patents Pending, Intel Corporation.
+ **************************************************************************/
+
+/**
+ * Support for reducers
+ */
+
+// ICL: Don't complain about conversion from pointer to same-sized integral type
+// in hashfun. That's why we're using size_t
+#ifdef _WIN32
+# pragma warning(disable: 1684)
+#endif
+
+#include "reducer_impl.h"
+#include "scheduler.h"
+#include "bug.h"
+#include "os.h"
+#include "global_state.h"
+#include "frame_malloc.h"
+
+#include "cilk/hyperobject_base.h"
+#include "cilktools/cilkscreen.h"
+#include "internal/abi.h"
+
+#if REDPAR_DEBUG > 0
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+
+#define DBG if(0) // if(1) enables some internal checks
+
+// Check that w is the currently executing worker. This method is a
+// no-op unless the debug level is set high enough.
+static inline void verify_current_wkr(__cilkrts_worker *w)
+{
+#if REDPAR_DEBUG >= 5
+ __cilkrts_worker* tmp = __cilkrts_get_tls_worker();
+ if (w != tmp) {
+ fprintf(stderr, "W=%d, actual=%d... missing a refresh....\n",
+ w->self,
+ tmp->self);
+ }
+ CILK_ASSERT(w == tmp); // __cilkrts_get_tls_worker());
+#endif
+}
+
+// Suppress clang warning that the expression result is unused
+#if defined(__clang__) && (! defined(__INTEL_COMPILER))
+# pragma clang diagnostic push
+# pragma clang diagnostic ignored "-Wunused-value"
+#endif // __clang__
+
+/// Helper class to disable and re-enable Cilkscreen
+struct DisableCilkscreen
+{
+ DisableCilkscreen () { __cilkscreen_disable_checking(); }
+ ~DisableCilkscreen () { __cilkscreen_enable_checking(); }
+};
+
+/// Helper class to enable and re-disable Cilkscreen
+struct EnableCilkscreen
+{
+ EnableCilkscreen () { __cilkscreen_enable_checking(); }
+ ~EnableCilkscreen () { __cilkscreen_disable_checking(); }
+};
+
+#if defined(__clang__) && (! defined(__INTEL_COMPILER))
+# pragma clang diagnostic pop
+#endif // __clang__
+
+/**
+ * @brief Element for a hyperobject
+ */
+struct elem {
+ void *key; ///< Shared key for this hyperobject
+ __cilkrts_hyperobject_base *hb; ///< Base of the hyperobject.
+ void *view; ///< Strand-private view of this hyperobject
+ /// Destroy and deallocate the view object for this element and set view to
+ /// null.
+ void destroy();
+
+ /// Returns true if this element contains a leftmost view.
+ bool is_leftmost() const;
+};
+
+/** Bucket containing at most NMAX elements */
+struct bucket {
+ /// Size of the array of elements for this bucket
+ size_t nmax;
+
+ /**
+ * We use the ``struct hack'' to allocate an array of variable
+ * dimension at the end of the struct. However, we allocate a
+ * total of NMAX+1 elements instead of NMAX. The last one always
+ * has key == 0, which we use as a termination criterion
+ */
+ elem el[1];
+};
+
+/**
+ * Class that implements the map for reducers so we can find the
+ * view for a strand.
+ */
+struct cilkred_map {
+ /** Handy pointer to the global state */
+ global_state_t *g;
+
+ /** Number of elements in table */
+ size_t nelem;
+
+ /** Number of buckets */
+ size_t nbuckets;
+
+ /** Array of pointers to buckets */
+ bucket **buckets;
+
+ /** Set true if merging (for debugging purposes) */
+ bool merging;
+
+ /** Set true for leftmost reducer map */
+ bool is_leftmost;
+
+ /** @brief Return element mapped to 'key' or null if not found. */
+ elem *lookup(void *key);
+
+ /**
+ * @brief Insert key/value element into hash map without rehashing.
+ * Does not check for duplicate key.
+ */
+ elem *insert_no_rehash(__cilkrts_worker *w,
+ void *key,
+ __cilkrts_hyperobject_base *hb,
+ void *value);
+
+ /**
+ * @brief Insert key/value element into hash map, rehashing if necessary.
+ * Does not check for duplicate key.
+ */
+ inline elem *rehash_and_insert(__cilkrts_worker *w,
+ void *key,
+ __cilkrts_hyperobject_base *hb,
+ void *value);
+
+ /** @brief Grow bucket by one element, reallocating bucket if necessary */
+ static elem *grow(__cilkrts_worker *w, bucket **bp);
+
+ /** @brief Rehash a worker's reducer map */
+ void rehash(__cilkrts_worker *);
+
+ /**
+ * @brief Returns true if a rehash is needed due to the number of elements that
+ * have been inserted.
+ */
+ inline bool need_rehash_p() const;
+
+ /** @brief Allocate and initialize the buckets */
+ void make_buckets(__cilkrts_worker *w, size_t nbuckets);
+
+ /**
+ * Specify behavior when the same key is present in both maps passed
+ * into merge().
+ */
+ enum merge_kind
+ {
+ MERGE_UNORDERED, ///< Assertion fails
+ MERGE_INTO_LEFT, ///< Merges the argument from the right into the left
+ MERGE_INTO_RIGHT ///< Merges the argument from the left into the right
+ };
+
+ /**
+ * @brief Merge another reducer map into this one, destroying the other map in
+ * the process.
+ */
+ __cilkrts_worker* merge(__cilkrts_worker *current_wkr,
+ cilkred_map *other_map,
+ enum merge_kind kind);
+
+ /** @brief check consistency of a reducer map */
+ void check(bool allow_null_view);
+
+ /** @brief Test whether the cilkred_map is empty */
+ bool is_empty() { return nelem == 0; }
+};
+
+static inline struct cilkred_map* install_new_reducer_map(__cilkrts_worker *w) {
+ cilkred_map *h;
+ h = __cilkrts_make_reducer_map(w);
+ w->reducer_map = h;
+ return h;
+}
+
+static size_t sizeof_bucket(size_t nmax)
+{
+ bucket *b = 0;
+ return (sizeof(*b) + nmax * sizeof(b->el[0]));
+}
+
+static bucket *alloc_bucket(__cilkrts_worker *w, size_t nmax)
+{
+ bucket *b = (bucket *)
+ __cilkrts_frame_malloc(w, sizeof_bucket(nmax));
+ b->nmax = nmax;
+ return b;
+}
+
+static void free_bucket(__cilkrts_worker *w, bucket **bp)
+{
+ bucket *b = *bp;
+ if (b) {
+ __cilkrts_frame_free(w, b, sizeof_bucket(b->nmax));
+ *bp = 0;
+ }
+}
+
+/* round up nmax to fill a memory allocator block completely */
+static size_t roundup(size_t nmax)
+{
+ size_t sz = sizeof_bucket(nmax);
+
+ /* round up size to a full malloc block */
+ sz = __cilkrts_frame_malloc_roundup(sz);
+
+ /* invert sizeof_bucket() */
+ nmax = ((sz - sizeof(bucket)) / sizeof(elem));
+
+ return nmax;
+}
+
+static bool is_power_of_2(size_t n)
+{
+ return (n & (n - 1)) == 0;
+}
+
+void cilkred_map::make_buckets(__cilkrts_worker *w,
+ size_t new_nbuckets)
+{
+ nbuckets = new_nbuckets;
+
+ CILK_ASSERT(is_power_of_2(nbuckets));
+#if defined __GNUC__ && defined __ICC
+ /* bug workaround -- suppress calls to _intel_fast_memset */
+ bucket *volatile*new_buckets = (bucket *volatile*)
+#else
+ bucket **new_buckets = (bucket **)
+#endif
+ __cilkrts_frame_malloc(w, nbuckets * sizeof(*(buckets)));
+
+#if REDPAR_DEBUG >= 1
+ fprintf(stderr, "W=%d, desc=make_buckets, new_buckets=%p, new_nbuckets=%zd\n",
+ w->self, new_buckets, new_nbuckets);
+#endif
+
+ for (size_t i = 0; i < new_nbuckets; ++i)
+ new_buckets[i] = 0;
+#if defined __GNUC__ && defined __ICC
+ buckets = (bucket **)new_buckets;
+#else
+ buckets = new_buckets;
+#endif
+ nelem = 0;
+}
+
+static void free_buckets(__cilkrts_worker *w,
+ bucket **buckets,
+ size_t nbuckets)
+{
+ size_t i;
+
+#if REDPAR_DEBUG >= 1
+ verify_current_wkr(w);
+ fprintf(stderr, "W=%d, desc=free_buckets, buckets=%p, size=%zd\n",
+ w->self, buckets,
+ nbuckets * sizeof(*buckets));
+#endif
+
+ for (i = 0; i < nbuckets; ++i)
+ free_bucket(w, buckets + i);
+
+ __cilkrts_frame_free(w, buckets, nbuckets * sizeof(*buckets));
+}
+
+static size_t minsz(size_t nelem)
+{
+ return 1U + nelem + nelem / 8U;
+}
+
+static size_t nextsz(size_t nelem)
+{
+ return 2 * nelem;
+}
+
+bool cilkred_map::need_rehash_p() const
+{
+ return minsz(nelem) > nbuckets;
+}
+
+static inline size_t hashfun(const cilkred_map *h, void *key)
+{
+ size_t k = (size_t) key;
+
+ k ^= k >> 21;
+ k ^= k >> 8;
+ k ^= k >> 3;
+
+ return k & (h->nbuckets - 1);
+}
+
+// Given a __cilkrts_hyperobject_base, return the key to that hyperobject in
+// the reducer map.
+static inline void* get_hyperobject_key(__cilkrts_hyperobject_base *hb)
+{
+ // The current implementation uses the address of the lefmost view as the
+ // key.
+ return reinterpret_cast<char*>(hb) + hb->__view_offset;
+}
+
+// Given a hyperobject key, return a pointer to the leftmost object. In the
+// current implementation, the address of the leftmost object IS the key, so
+// this function is an effective noop.
+static inline void* get_leftmost_view(void *key)
+{
+ return key;
+}
+
+/* debugging support: check consistency of a reducer map */
+void cilkred_map::check(bool allow_null_view)
+{
+ size_t count = 0;
+
+ CILK_ASSERT(buckets);
+ for (size_t i = 0; i < nbuckets; ++i) {
+ bucket *b = buckets[i];
+ if (b)
+ for (elem *el = b->el; el->key; ++el) {
+ CILK_ASSERT(allow_null_view || el->view);
+ ++count;
+ }
+ }
+ CILK_ASSERT(nelem == count);
+ /*global_reducer_map::check();*/
+}
+
+/* grow bucket by one element, reallocating bucket if necessary */
+elem *cilkred_map::grow(__cilkrts_worker *w,
+ bucket **bp)
+{
+ size_t i, nmax, nnmax;
+ bucket *b, *nb;
+
+ b = *bp;
+ if (b) {
+ nmax = b->nmax;
+ /* find empty element if any */
+ for (i = 0; i < nmax; ++i)
+ if (b->el[i].key == 0)
+ return &(b->el[i]);
+ /* do not use the last one even if empty */
+ } else {
+ nmax = 0;
+ }
+
+ verify_current_wkr(w);
+ /* allocate a new bucket */
+ nnmax = roundup(2 * nmax);
+ nb = alloc_bucket(w, nnmax);
+
+
+ /* copy old bucket into new */
+ for (i = 0; i < nmax; ++i)
+ nb->el[i] = b->el[i];
+
+ free_bucket(w, bp); *bp = nb;
+
+ /* zero out extra elements */
+ for (; i < nnmax; ++i)
+ nb->el[i].key = 0;
+
+ /* zero out the last one */
+ nb->el[i].key = 0;
+
+ return &(nb->el[nmax]);
+}
+
+elem *cilkred_map::insert_no_rehash(__cilkrts_worker *w,
+ void *key,
+ __cilkrts_hyperobject_base *hb,
+ void *view)
+{
+
+#if REDPAR_DEBUG >= 2
+ fprintf(stderr, "[W=%d, desc=insert_no_rehash, this_map=%p]\n",
+ w->self, this);
+ verify_current_wkr(w);
+#endif
+
+ CILK_ASSERT((w == 0 && g == 0) || w->g == g);
+ CILK_ASSERT(key != 0);
+ CILK_ASSERT(view != 0);
+
+ elem *el = grow(w, &(buckets[hashfun(this, key)]));
+
+#if REDPAR_DEBUG >= 3
+ fprintf(stderr, "[W=%d, this=%p, inserting key=%p, view=%p, el = %p]\n",
+ w->self, this, key, view, el);
+#endif
+
+ el->key = key;
+ el->hb = hb;
+ el->view = view;
+ ++nelem;
+
+ return el;
+}
+
+void cilkred_map::rehash(__cilkrts_worker *w)
+{
+#if REDPAR_DEBUG >= 1
+ fprintf(stderr, "[W=%d, desc=rehash, this_map=%p, g=%p, w->g=%p]\n",
+ w->self, this, g, w->g);
+ verify_current_wkr(w);
+#endif
+ CILK_ASSERT((w == 0 && g == 0) || w->g == g);
+
+ size_t onbuckets = nbuckets;
+ size_t onelem = nelem;
+ bucket **obuckets = buckets;
+ size_t i;
+ bucket *b;
+
+ make_buckets(w, nextsz(nbuckets));
+
+ for (i = 0; i < onbuckets; ++i) {
+ b = obuckets[i];
+ if (b) {
+ elem *oel;
+ for (oel = b->el; oel->key; ++oel)
+ insert_no_rehash(w, oel->key, oel->hb, oel->view);
+ }
+ }
+
+ CILK_ASSERT(nelem == onelem);
+
+ free_buckets(w, obuckets, onbuckets);
+}
+
+elem *cilkred_map::rehash_and_insert(__cilkrts_worker *w,
+ void *key,
+ __cilkrts_hyperobject_base *hb,
+ void *view)
+{
+
+#if REDPAR_DEBUG >= 1
+ fprintf(stderr, "W=%d, this_map =%p, inserting key=%p, view=%p\n",
+ w->self, this, key, view);
+ verify_current_wkr(w);
+#endif
+
+ if (need_rehash_p())
+ rehash(w);
+
+ return insert_no_rehash(w, key, hb, view);
+}
+
+
+elem *cilkred_map::lookup(void *key)
+{
+ bucket *b = buckets[hashfun(this, key)];
+
+ if (b) {
+ elem *el;
+ for (el = b->el; el->key; ++el) {
+ if (el->key == key) {
+ CILK_ASSERT(el->view);
+ return el;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void elem::destroy()
+{
+ if (! is_leftmost()) {
+
+ // Call destroy_fn and deallocate_fn on the view, but not if it's the
+ // leftmost view.
+ cilk_c_monoid *monoid = &(hb->__c_monoid);
+ cilk_c_reducer_destroy_fn_t destroy_fn = monoid->destroy_fn;
+ cilk_c_reducer_deallocate_fn_t deallocate_fn = monoid->deallocate_fn;
+
+ destroy_fn((void*)hb, view);
+ deallocate_fn((void*)hb, view);
+ }
+
+ view = 0;
+}
+
+inline
+bool elem::is_leftmost() const
+{
+ // implementation uses the address of the leftmost view as the key, so if
+ // key == view, then this element refers to the leftmost view.
+ return key == view;
+}
+
+/* remove the reducer from the current reducer map. If the reducer
+ exists in maps other than the current one, the behavior is
+ undefined. */
+extern "C"
+CILK_EXPORT void __CILKRTS_STRAND_STALE(
+ __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *hb))
+{
+ // Disable Cilkscreen for the duration of this call. The destructor for
+ // this class will re-enable Cilkscreen when the method returns. This
+ // will prevent Cilkscreen from reporting apparent races in reducers
+ DisableCilkscreen x;
+
+ __cilkrts_worker* w = __cilkrts_get_tls_worker();
+ if (! w) {
+ // If no worker, then Cilk is not running and there is no reducer
+ // map. Do nothing. The reducer's destructor will take care of
+ // destroying the leftmost view.
+ return;
+ }
+
+const char *UNSYNCED_REDUCER_MSG =
+ "Destroying a reducer while it is visible to unsynced child tasks, or\n"
+ "calling CILK_C_UNREGISTER_REDUCER() on an unregistered reducer.\n"
+ "Did you forget a _Cilk_sync or CILK_C_REGISTER_REDUCER()?";
+
+ cilkred_map* h = w->reducer_map;
+ if (NULL == h)
+ cilkos_error(UNSYNCED_REDUCER_MSG); // Does not return
+
+ if (h->merging) {
+ verify_current_wkr(w);
+ __cilkrts_bug("User error: hyperobject used by another hyperobject");
+ }
+
+ void* key = get_hyperobject_key(hb);
+ elem *el = h->lookup(key);
+
+ // Verify that the reducer is being destroyed from the leftmost strand for
+ // which the reducer is defined.
+ if (! (el && el->is_leftmost()))
+ cilkos_error(UNSYNCED_REDUCER_MSG);
+
+#if REDPAR_DEBUG >= 3
+ fprintf(stderr, "[W=%d, key=%p, lookup in map %p, found el=%p, about to destroy]\n",
+ w->self, key, h, el);
+#endif
+
+ // Remove the element from the hash bucket. Do not bother shrinking
+ // the bucket. Note that the destroy() function does not actually
+ // call the destructor for the leftmost view.
+ el->destroy();
+ do {
+ el[0] = el[1];
+ ++el;
+ } while (el->key);
+ --h->nelem;
+
+#if REDPAR_DEBUG >= 2
+ fprintf(stderr, "[W=%d, desc=hyper_destroy_finish, key=%p, w->reducer_map=%p]\n",
+ w->self, key, w->reducer_map);
+#endif
+}
+
+extern "C"
+CILK_EXPORT
+void __cilkrts_hyper_create(__cilkrts_hyperobject_base *hb)
+{
+ // This function registers the specified hyperobject in the current
+ // reducer map and registers the initial value of the hyperobject as the
+ // leftmost view of the reducer.
+ __cilkrts_worker *w = __cilkrts_get_tls_worker();
+ if (! w) {
+ // If there is no worker, then there is nothing to do: The iniitial
+ // value will automatically be used as the left-most view when we
+ // enter Cilk.
+ return;
+ }
+
+ // Disable Cilkscreen for the duration of this call. The destructor for
+ // this class will re-enable Cilkscreen when the method returns. This
+ // will prevent Cilkscreen from reporting apparent races in reducers
+ DisableCilkscreen x;
+
+ void* key = get_hyperobject_key(hb);
+ void* view = get_leftmost_view(key);
+ cilkred_map *h = w->reducer_map;
+
+ if (__builtin_expect(!h, 0)) {
+ h = install_new_reducer_map(w);
+#if REDPAR_DEBUG >= 2
+ fprintf(stderr, "[W=%d, hb=%p, hyper_create, isntalled new map %p, view=%p]\n",
+ w->self, hb, h, view);
+#endif
+ }
+
+ /* Must not exist. */
+ CILK_ASSERT(h->lookup(key) == NULL);
+
+#if REDPAR_DEBUG >= 3
+ verify_current_wkr(w);
+ fprintf(stderr, "[W=%d, hb=%p, lookup in map %p of view %p, should be null]\n",
+ w->self, hb, h, view);
+ fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n",
+ w->self,
+ h,
+ &(hb->__c_monoid),
+ view);
+#endif
+
+ if (h->merging)
+ __cilkrts_bug("User error: hyperobject used by another hyperobject");
+
+ CILK_ASSERT(w->reducer_map == h);
+ // The address of the leftmost value is the same as the key for lookup.
+ (void) h->rehash_and_insert(w, view, hb, view);
+}
+
+extern "C"
+CILK_EXPORT void* __CILKRTS_STRAND_PURE(
+ __cilkrts_hyper_lookup(__cilkrts_hyperobject_base *hb))
+{
+ __cilkrts_worker* w = __cilkrts_get_tls_worker_fast();
+ void* key = get_hyperobject_key(hb);
+ if (! w)
+ return get_leftmost_view(key);
+
+ // Disable Cilkscreen for the duration of this call. This will
+ // prevent Cilkscreen from reporting apparent races in reducers
+ DisableCilkscreen dguard;
+
+ if (__builtin_expect(w->g->force_reduce, 0))
+ __cilkrts_promote_own_deque(w);
+ cilkred_map* h = w->reducer_map;
+
+ if (__builtin_expect(!h, 0)) {
+ h = install_new_reducer_map(w);
+ }
+
+ if (h->merging)
+ __cilkrts_bug("User error: hyperobject used by another hyperobject");
+ elem* el = h->lookup(key);
+ if (! el) {
+ /* lookup failed; insert a new default element */
+ void *rep;
+
+ {
+ /* re-enable cilkscreen while calling the constructor */
+ EnableCilkscreen eguard;
+ if (h->is_leftmost)
+ {
+ // This special case is called only if the reducer was not
+ // registered using __cilkrts_hyper_create, e.g., if this is a
+ // C reducer in global scope or if there is no bound worker.
+ rep = get_leftmost_view(key);
+ }
+ else
+ {
+ rep = hb->__c_monoid.allocate_fn((void*)hb,
+ hb->__view_size);
+ // TBD: Handle exception on identity function
+ hb->__c_monoid.identity_fn((void*)hb, rep);
+ }
+ }
+
+#if REDPAR_DEBUG >= 3
+ fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n",
+ w->self,
+ h,
+ &(hb->__c_monoid),
+ rep);
+ CILK_ASSERT(w->reducer_map == h);
+#endif
+ el = h->rehash_and_insert(w, key, hb, rep);
+ }
+
+ return el->view;
+}
+
+extern "C" CILK_EXPORT
+void* __cilkrts_hyperobject_alloc(void* ignore, std::size_t bytes)
+{
+ return std::malloc(bytes);
+}
+
+extern "C" CILK_EXPORT
+void __cilkrts_hyperobject_dealloc(void* ignore, void* view)
+{
+ std::free(view);
+}
+
+/* No-op destroy function */
+extern "C" CILK_EXPORT
+void __cilkrts_hyperobject_noop_destroy(void* ignore, void* ignore2)
+{
+}
+
+cilkred_map *__cilkrts_make_reducer_map(__cilkrts_worker *w)
+{
+ CILK_ASSERT(w);
+
+ cilkred_map *h;
+ size_t nbuckets = 1; /* default value */
+
+ h = (cilkred_map *)__cilkrts_frame_malloc(w, sizeof(*h));
+#if REDPAR_DEBUG >= 1
+ fprintf(stderr, "[W=%d, desc=make_reducer_frame_malloc_reducer_map, h=%p]\n",
+ w->self, h);
+#endif
+
+ h->g = w ? w->g : 0;
+ h->make_buckets(w, nbuckets);
+ h->merging = false;
+ h->is_leftmost = false;
+
+ return h;
+}
+
+/* Destroy a reducer map. The map must have been allocated
+ from the worker's global context and should have been
+ allocated from the same worker. */
+void __cilkrts_destroy_reducer_map(__cilkrts_worker *w, cilkred_map *h)
+{
+ CILK_ASSERT((w == 0 && h->g == 0) || w->g == h->g);
+ verify_current_wkr(w);
+
+ /* the reducer map is allowed to contain el->view == NULL here (and
+ only here). We set el->view == NULL only when we know that the
+ map will be destroyed immediately afterwards. */
+ DBG h->check(/*allow_null_view=*/true);
+
+ bucket *b;
+ size_t i;
+
+ for (i = 0; i < h->nbuckets; ++i) {
+ b = h->buckets[i];
+ if (b) {
+ elem *el;
+ for (el = b->el; el->key; ++el) {
+ if (el->view)
+ el->destroy();
+ }
+ }
+ }
+
+ free_buckets(w, h->buckets, h->nbuckets);
+
+#if REDPAR_DEBUG >= 1
+ fprintf(stderr, "W=%d, destroy_red_map, freeing map h=%p, size=%zd\n",
+ w->self, h, sizeof(*h));
+#endif
+
+ __cilkrts_frame_free(w, h, sizeof(*h));
+}
+
+/* Set the specified reducer map as the leftmost map if is_leftmost is true,
+ otherwise, set it to not be the leftmost map. */
+void __cilkrts_set_leftmost_reducer_map(cilkred_map *h, int is_leftmost)
+{
+ h->is_leftmost = is_leftmost;
+}
+
+
+__cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w,
+ cilkred_map *other_map,
+ enum merge_kind kind)
+{
+ // Disable Cilkscreen while the we merge the maps. The destructor for
+ // the guard class will re-enable Cilkscreen when it goes out of scope.
+ // This will prevent Cilkscreen from reporting apparent races in between
+ // the reduce function and the reducer operations. The Cilk runtime
+ // guarantees that a pair of reducer maps will only be merged when no
+ // other strand will access them.
+ DisableCilkscreen guard;
+
+#if REDPAR_DEBUG >= 2
+ fprintf(stderr, "[W=%d, desc=merge, this_map=%p, other_map=%p]\n",
+ w->self,
+ this, other_map);
+#endif
+ // Remember the current stack frame.
+ __cilkrts_stack_frame *current_sf = w->current_stack_frame;
+ merging = true;
+ other_map->merging = true;
+
+ // Merging to the leftmost view is a special case because every leftmost
+ // element must be initialized before the merge.
+ CILK_ASSERT(!other_map->is_leftmost /* || kind == MERGE_UNORDERED */);
+ bool merge_to_leftmost = (this->is_leftmost
+ /* && !other_map->is_leftmost */);
+
+ DBG check(/*allow_null_view=*/false);
+ DBG other_map->check(/*allow_null_view=*/false);
+
+ for (size_t i = 0; i < other_map->nbuckets; ++i) {
+ bucket *b = other_map->buckets[i];
+ if (b) {
+ for (elem *other_el = b->el; other_el->key; ++other_el) {
+ /* Steal the value from the other map, which will be
+ destroyed at the end of this operation. */
+ void *other_view = other_el->view;
+ CILK_ASSERT(other_view);
+
+ void *key = other_el->key;
+ __cilkrts_hyperobject_base *hb = other_el->hb;
+ elem *this_el = lookup(key);
+
+ if (this_el == 0 && merge_to_leftmost) {
+ /* Initialize leftmost view before merging. */
+ void* leftmost = get_leftmost_view(key);
+ // leftmost == other_view can be true if the initial view
+ // was created in other than the leftmost strand of the
+ // spawn tree, but then made visible to subsequent strands
+ // (E.g., the reducer was allocated on the heap and the
+ // pointer was returned to the caller.) In such cases,
+ // parallel semantics says that syncing with earlier
+ // strands will always result in 'this_el' being null,
+ // thus propagating the initial view up the spawn tree
+ // until it reaches the leftmost strand. When synching
+ // with the leftmost strand, leftmost == other_view will be
+ // true and we must avoid reducing the initial view with
+ // itself.
+ if (leftmost != other_view)
+ this_el = rehash_and_insert(w, key, hb, leftmost);
+ }
+
+ if (this_el == 0) {
+ /* move object from other map into this one */
+ rehash_and_insert(w, key, hb, other_view);
+ other_el->view = 0;
+ continue; /* No element-level merge necessary */
+ }
+
+ /* The same key is present in both maps with values
+ A and B. Three choices: fail, A OP B, B OP A. */
+ switch (kind)
+ {
+ case MERGE_UNORDERED:
+ __cilkrts_bug("TLS Reducer race");
+ break;
+ case MERGE_INTO_RIGHT:
+ /* Swap elements in order to preserve object
+ identity */
+ other_el->view = this_el->view;
+ this_el->view = other_view;
+ /* FALL THROUGH */
+ case MERGE_INTO_LEFT: {
+ /* Stealing should be disabled during reduce
+ (even if force-reduce is enabled). */
+
+#if DISABLE_PARALLEL_REDUCERS
+ __cilkrts_stack_frame * volatile *saved_protected_tail;
+ saved_protected_tail = __cilkrts_disallow_stealing(w, NULL);
+#endif
+
+ {
+ CILK_ASSERT(current_sf->worker == w);
+ CILK_ASSERT(w->current_stack_frame == current_sf);
+
+ /* TBD: if reduce throws an exception we need to stop it
+ here. */
+ hb->__c_monoid.reduce_fn((void*)hb,
+ this_el->view,
+ other_el->view);
+ w = current_sf->worker;
+
+#if REDPAR_DEBUG >= 2
+ verify_current_wkr(w);
+ CILK_ASSERT(w->current_stack_frame == current_sf);
+#endif
+ }
+
+#if DISABLE_PARALLEL_REDUCERS
+ /* Restore stealing */
+ __cilkrts_restore_stealing(w, saved_protected_tail);
+#endif
+
+ } break;
+ }
+ }
+ }
+ }
+ this->is_leftmost = this->is_leftmost || other_map->is_leftmost;
+ merging = false;
+ other_map->merging = false;
+ verify_current_wkr(w);
+ __cilkrts_destroy_reducer_map(w, other_map);
+ return w;
+}
+
+
+/**
+ * Print routine for debugging the merging of reducer maps.
+ * A no-op unless REDPAR_DEBUG set high enough.
+ */
+static inline
+void debug_map_merge(__cilkrts_worker *w,
+ cilkred_map *left_map,
+ cilkred_map *right_map,
+ __cilkrts_worker **final_wkr)
+{
+#if REDPAR_DEBUG >= 2
+ fprintf(stderr, "[W=%d, desc=finish_merge, left_map=%p, right_map=%p, w->reducer_map=%p, right_ans=%p, final_wkr=%d]\n",
+ w->self, left_map, right_map, w->reducer_map, right_map, (*final_wkr)->self);
+#endif
+}
+
+
+/**
+ * merge RIGHT into LEFT;
+ * return whichever map allows for faster merge, and destroy the other one.
+ *
+ * *w_ptr should be the currently executing worker.
+ * *w_ptr may change during execution if the reduction is parallel.
+ */
+cilkred_map*
+merge_reducer_maps(__cilkrts_worker **w_ptr,
+ cilkred_map *left_map,
+ cilkred_map *right_map)
+{
+ __cilkrts_worker *w = *w_ptr;
+ if (!left_map) {
+ debug_map_merge(w, left_map, right_map, w_ptr);
+ return right_map;
+ }
+
+ if (!right_map) {
+ debug_map_merge(w, left_map, right_map, w_ptr);
+ return left_map;
+ }
+
+ /* Special case, if left_map is leftmost, then always merge into it.
+ For C reducers this forces lazy creation of the leftmost views. */
+ if (left_map->is_leftmost || left_map->nelem > right_map->nelem) {
+ *w_ptr = left_map->merge(w, right_map, cilkred_map::MERGE_INTO_LEFT);
+ debug_map_merge(*w_ptr, left_map, right_map, w_ptr);
+ return left_map;
+ } else {
+ *w_ptr = right_map->merge(w, left_map, cilkred_map::MERGE_INTO_RIGHT);
+ debug_map_merge(*w_ptr, left_map, right_map, w_ptr);
+ return right_map;
+ }
+}
+
+/**
+ * Merges RIGHT into LEFT, and then repeatedly calls
+ * merge_reducer_maps_helper() until (*w_ptr)->reducer_map is NULL.
+ *
+ * *w_ptr may change as reductions execute.
+ */
+cilkred_map*
+repeated_merge_reducer_maps(__cilkrts_worker **w_ptr,
+ cilkred_map *left_map,
+ cilkred_map *right_map)
+{
+ // Note: if right_map == NULL but w->reducer_map != NULL, then
+ // this loop will reduce w->reducer_map into left_map.
+ do {
+ left_map = merge_reducer_maps(w_ptr, left_map, right_map);
+ verify_current_wkr(*w_ptr);
+
+ // Pull any newly created reducer map and loop around again.
+ right_map = (*w_ptr)->reducer_map;
+ (*w_ptr)->reducer_map = NULL;
+ } while (right_map);
+ return left_map;
+}
+
+/* End reducer_impl.cpp */
diff --git a/gcc-4.9/libcilkrts/runtime/reducer_impl.h b/gcc-4.9/libcilkrts/runtime/reducer_impl.h
new file mode 100644
index 000000000..3425967ad
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/reducer_impl.h
@@ -0,0 +1,128 @@
+/* reducer_impl.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file reducer_impl.h
+ *
+ * @brief Functions to implement reducers in the runtime.
+ */
+
+#ifndef INCLUDED_REDUCER_IMPL_DOT_H
+#define INCLUDED_REDUCER_IMPL_DOT_H
+
+#include <cilk/common.h>
+#include <internal/abi.h>
+#include "rts-common.h"
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * Construct an empty reducer map from the memory pool associated with the
+ * given worker. This reducer map must be destroyed before the worker's
+ * associated global context is destroyed.
+ *
+ * @param w __cilkrts_worker the cilkred_map is being created for.
+ *
+ * @return Pointer to the initialized cilkred_map.
+ */
+COMMON_SYSDEP
+cilkred_map *__cilkrts_make_reducer_map(__cilkrts_worker *w);
+
+/**
+ * Destroy a reducer map. The map must have been allocated from the worker's
+ * global context and should have been allocated from the same worker.
+ *
+ * @param w __cilkrts_worker the cilkred_map was created for.
+ * @param h The cilkred_map to be deallocated.
+ */
+COMMON_SYSDEP
+void __cilkrts_destroy_reducer_map(__cilkrts_worker *w,
+ cilkred_map *h);
+
+/**
+ * Set the specified reducer map as the leftmost map if is_leftmost is true,
+ * otherwise, set it to not be the leftmost map.
+ *
+ * @param h The cilkred_map to be modified.
+ * @param is_leftmost true if the reducer map is leftmost.
+ */
+COMMON_SYSDEP
+void __cilkrts_set_leftmost_reducer_map(cilkred_map *h,
+ int is_leftmost);
+
+/**
+ * Merge reducer map RIGHT_MAP into LEFT_MAP and return the result of the
+ * merge. Both maps must be allocated from the global context associated
+ * with the specified worker. The returned reducer map must be destroyed
+ * before the worker's associated global context is destroyed.
+ *
+ * If two cilkred_maps are specified, one will be destroyed and the other
+ * one will be returned as the merged cilkred_map.
+ *
+ * When reducers can contain nested parallelism, execution can return
+ * on a different worker than when it started (but still using the
+ * same stack).
+ *
+ * Upon return, *w_ptr stores the pointer to the worker that execution
+ * returns on.
+ *
+ * @param w_ptr Pointer to the currently executing worker.
+ * @param left_map The left cilkred_map.
+ * @param right_map The right cilkred_map.
+ *
+ * @return pointer to merged cilkred_map.
+ */
+extern
+cilkred_map *merge_reducer_maps(__cilkrts_worker **w_ptr,
+ cilkred_map *left_map,
+ cilkred_map *right_map);
+
+/**
+ * Similar to merge_reducer_maps(), except that after merging
+ * RIGHT_MAP into LEFT_MAP, it repeatedly merges (*w_ptr)->reducer_map
+ * into LEFT_MAP. This procedure ensures that any new reducers
+ * created by the reductions themselves also get merged into LEFT_MAP.
+ */
+extern
+cilkred_map *repeated_merge_reducer_maps(__cilkrts_worker **w_ptr,
+ cilkred_map *left_map,
+ cilkred_map *right_map);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_REDUCER_IMPL_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/rts-common.h b/gcc-4.9/libcilkrts/runtime/rts-common.h
new file mode 100644
index 000000000..4ffde7ccb
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/rts-common.h
@@ -0,0 +1,132 @@
+/* rts-common.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#ifndef INCLUDED_RTS_COMMON_DOT_H
+#define INCLUDED_RTS_COMMON_DOT_H
+
+/* Abbreviations API functions returning different types. By using these
+ * abbreviations instead of using CILK_API(ret) directly, etags and other
+ * tools can more easily recognize function signatures.
+ */
+#define CILK_API_VOID CILK_API(void)
+#define CILK_API_VOID_PTR CILK_API(void*)
+#define CILK_API_INT CILK_API(int)
+#define CILK_API_SIZET CILK_API(size_t)
+#define CILK_API_TBB_RETCODE CILK_API(__cilk_tbb_retcode)
+#define CILK_API_PEDIGREE CILK_API(__cilkrts_pedigree)
+
+/* Abbreviations ABI functions returning different types. By using these
+ * abbreviations instead of using CILK_ABI(ret) directly, etags and other
+ * tools can more easily recognize function signatures.
+ */
+#define CILK_ABI_VOID CILK_ABI(void)
+#define CILK_ABI_WORKER_PTR CILK_ABI(__cilkrts_worker_ptr)
+#define CILK_ABI_THROWS_VOID CILK_ABI_THROWS(void)
+
+/* documentation aid to identify portable vs. nonportable
+ parts of the runtime. See README for definitions. */
+#define COMMON_PORTABLE
+#define COMMON_SYSDEP
+#define NON_COMMON
+
+#if !(defined __GNUC__ || defined __ICC)
+# define __builtin_expect(a_, b_) a_
+#endif
+
+#ifdef __cplusplus
+# define cilk_nothrow throw()
+#else
+# define cilk_nothrow /*empty in C*/
+#endif
+
+#ifdef __GNUC__
+# define NORETURN void __attribute__((noreturn))
+#else
+# define NORETURN void __declspec(noreturn)
+#endif
+
+#ifdef __GNUC__
+# define NOINLINE __attribute__((noinline))
+#else
+# define NOINLINE __declspec(noinline)
+#endif
+
+#ifndef __GNUC__
+# define __attribute__(X)
+#endif
+
+/* Microsoft CL accepts "inline" for C++, but not for C. It accepts
+ * __inline for both. Intel ICL accepts inline for C of /Qstd=c99
+ * is set. The Cilk runtime is assumed to be compiled with /Qstd=c99
+ */
+#if defined(_MSC_VER) && ! defined(__INTEL_COMPILER)
+# error define inline
+# define inline __inline
+#endif
+
+/* Compilers that build the Cilk runtime are assumed to know about zero-cost
+ * intrinsics (__notify_intrinsic()). For those that don't, #undef the
+ * following definition:
+ */
+//#define ENABLE_NOTIFY_ZC_INTRINSIC 1
+
+#if defined(__INTEL_COMPILER)
+/* The notify intrinsic was introduced in ICC 12.0. */
+# if __INTEL_COMPILER <= 1200
+# undef ENABLE_NOTIFY_ZC_INTRINSIC
+# endif
+#elif defined(__VXWORKS__)
+# undef ENABLE_NOTIFY_ZC_INTRINSIC
+#elif defined(__clang__)
+# if !defined(__has_extension) || !__has_extension(notify_zc_intrinsic)
+# undef ENABLE_NOTIFY_ZC_INTRINSIC
+# endif
+#elif defined(__arm__)
+// __notify_zc_intrinsic not yet supported by gcc for ARM
+# undef ENABLE_NOTIFY_ZC_INTRINSIC
+#endif
+
+// If ENABLE_NOTIFY_ZC_INTRINSIC is defined, use __notify_zc_intrisic
+#ifdef ENABLE_NOTIFY_ZC_INTRINSIC
+# define NOTIFY_ZC_INTRINSIC(annotation, data) \
+ __notify_zc_intrinsic(annotation, data)
+#else
+# define NOTIFY_ZC_INTRINSIC(annotation, data)
+#endif
+
+#endif // ! defined(INCLUDED_RTS_COMMON_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/scheduler.c b/gcc-4.9/libcilkrts/runtime/scheduler.c
new file mode 100644
index 000000000..bab6430d9
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/scheduler.c
@@ -0,0 +1,3940 @@
+/* scheduler.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2007-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+/*
+ * Cilk scheduler
+ */
+
+#include "scheduler.h"
+#include "bug.h"
+#include "os.h"
+#include "os_mutex.h"
+#include "local_state.h"
+#include "signal_node.h"
+#include "full_frame.h"
+#include "sysdep.h"
+#include "except.h"
+#include "cilk_malloc.h"
+#include "pedigrees.h"
+#include "record-replay.h"
+
+#include <limits.h>
+#include <string.h> /* memcpy */
+#include <stdio.h> // sprintf
+#include <stdlib.h> // malloc, free, abort
+
+#ifdef _WIN32
+# pragma warning(disable:1786) // disable warning: sprintf is deprecated
+# include "sysdep-win.h"
+# include "except-win32.h"
+#endif // _WIN32
+
+// ICL: Don't complain about conversion from pointer to same-sized integral
+// type in __cilkrts_put_stack. That's why we're using ptrdiff_t
+#ifdef _WIN32
+# pragma warning(disable: 1684)
+#endif
+
+#include "cilk/cilk_api.h"
+#include "frame_malloc.h"
+#include "metacall_impl.h"
+#include "reducer_impl.h"
+#include "cilk-tbb-interop.h"
+#include "cilk-ittnotify.h"
+#include "stats.h"
+
+// ICL: Don't complain about loss of precision in myrand
+// I tried restoring the warning after the function, but it didn't
+// suppress it
+#ifdef _WIN32
+# pragma warning(disable: 2259)
+#endif
+
+#ifndef _WIN32
+# include <unistd.h>
+#endif
+
+#ifdef __VXWORKS__
+// redeclare longjmp() with noreturn to stop warnings
+extern __attribute__((noreturn))
+ void longjmp(jmp_buf, int);
+#endif
+
+//#define DEBUG_LOCKS 1
+#ifdef DEBUG_LOCKS
+// The currently executing worker must own this worker's lock
+# define ASSERT_WORKER_LOCK_OWNED(w) \
+ { \
+ __cilkrts_worker *tls_worker = __cilkrts_get_tls_worker(); \
+ CILK_ASSERT((w)->l->lock.owner == tls_worker); \
+ }
+#else
+# define ASSERT_WORKER_LOCK_OWNED(w)
+#endif // DEBUG_LOCKS
+
+// Options for the scheduler.
+enum schedule_t { SCHEDULE_RUN,
+ SCHEDULE_WAIT,
+ SCHEDULE_EXIT };
+
+// Return values for provably_good_steal()
+enum provably_good_steal_t
+{
+ ABANDON_EXECUTION, // Not the last child to the sync - attempt to steal work
+ CONTINUE_EXECUTION, // Last child to the sync - continue executing on this worker
+ WAIT_FOR_CONTINUE // The replay log indicates that this was the worker
+ // which continued. Loop until we are the last worker
+ // to the sync.
+};
+
+
+// Verify that "w" is the worker we are currently executing on.
+// Because this check is expensive, this method is usually a no-op.
+static inline void verify_current_wkr(__cilkrts_worker *w)
+{
+#if ((REDPAR_DEBUG >= 3) || (FIBER_DEBUG >= 1))
+ // Lookup the worker from TLS and compare to w.
+ __cilkrts_worker* tmp = __cilkrts_get_tls_worker();
+ if (w != tmp) {
+ fprintf(stderr, "Error. W=%d, actual worker =%d...\n",
+ w->self,
+ tmp->self);
+ }
+ CILK_ASSERT(w == tmp);
+#endif
+}
+
+static enum schedule_t worker_runnable(__cilkrts_worker *w);
+
+// Scheduling-fiber functions:
+static void do_return_from_spawn (__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf);
+static void do_sync (__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf);
+
+// max is defined on Windows and VxWorks
+#if (! defined(_WIN32)) && (! defined(__VXWORKS__))
+ // TBD: definition of max() for Linux.
+# define max(a, b) ((a) < (b) ? (b) : (a))
+#endif
+
+void __cilkrts_dump_stats_to_stderr(global_state_t *g)
+{
+#ifdef CILK_PROFILE
+ int i;
+ for (i = 0; i < g->total_workers; ++i) {
+ // Print out statistics for each worker. We collected them,
+ // so why not print them out?
+ fprintf(stderr, "Stats for worker %d\n", i);
+ dump_stats_to_file(stderr, g->workers[i]->l->stats);
+ __cilkrts_accum_stats(&g->stats, g->workers[i]->l->stats);
+ }
+
+ // Also print out aggregate statistics.
+ dump_stats_to_file(stderr, &g->stats);
+#endif
+ fprintf(stderr,
+ "CILK PLUS Thread Info: P=%d, Q=%d\n",
+ g->P,
+ g->Q);
+ fprintf(stderr,
+ "CILK PLUS RUNTIME MEMORY USAGE: %lld bytes",
+ (long long)g->frame_malloc.allocated_from_os);
+#ifdef CILK_PROFILE
+ if (g->stats.stack_hwm)
+ fprintf(stderr, ", %ld stacks", g->stats.stack_hwm);
+#endif
+ fputc('\n', stderr);
+}
+
+static void validate_worker(__cilkrts_worker *w)
+{
+ /* check the magic numbers, for debugging purposes */
+ if (w->l->worker_magic_0 != WORKER_MAGIC_0 ||
+ w->l->worker_magic_1 != WORKER_MAGIC_1)
+ abort_because_rts_is_corrupted();
+}
+
+static void double_link(full_frame *left_ff, full_frame *right_ff)
+{
+ if (left_ff)
+ left_ff->right_sibling = right_ff;
+ if (right_ff)
+ right_ff->left_sibling = left_ff;
+}
+
+/* add CHILD to the right of all children of PARENT */
+static void push_child(full_frame *parent_ff, full_frame *child_ff)
+{
+ double_link(parent_ff->rightmost_child, child_ff);
+ double_link(child_ff, 0);
+ parent_ff->rightmost_child = child_ff;
+}
+
+/* unlink CHILD from the list of all children of PARENT */
+static void unlink_child(full_frame *parent_ff, full_frame *child_ff)
+{
+ double_link(child_ff->left_sibling, child_ff->right_sibling);
+
+ if (!child_ff->right_sibling) {
+ /* this is the rightmost child -- update parent link */
+ CILK_ASSERT(parent_ff->rightmost_child == child_ff);
+ parent_ff->rightmost_child = child_ff->left_sibling;
+ }
+ child_ff->left_sibling = child_ff->right_sibling = 0; /* paranoia */
+}
+
+static void incjoin(full_frame *ff)
+{
+ ++ff->join_counter;
+}
+
+static int decjoin(full_frame *ff)
+{
+ CILK_ASSERT(ff->join_counter > 0);
+ return (--ff->join_counter);
+}
+
+static int simulate_decjoin(full_frame *ff)
+{
+ CILK_ASSERT(ff->join_counter > 0);
+ return (ff->join_counter - 1);
+}
+
+/*
+ * Pseudo-random generator defined by the congruence S' = 69070 * S
+ * mod (2^32 - 5). Marsaglia (CACM July 1993) says on page 107 that
+ * this is a ``good one''. There you go.
+ *
+ * The literature makes a big fuss about avoiding the division, but
+ * for us it is not worth the hassle.
+ */
+static const unsigned RNGMOD = ((1ULL << 32) - 5);
+static const unsigned RNGMUL = 69070U;
+
+static unsigned myrand(__cilkrts_worker *w)
+{
+ unsigned state = w->l->rand_seed;
+ state = (unsigned)((RNGMUL * (unsigned long long)state) % RNGMOD);
+ w->l->rand_seed = state;
+ return state;
+}
+
+static void mysrand(__cilkrts_worker *w, unsigned seed)
+{
+ seed %= RNGMOD;
+ seed += (seed == 0); /* 0 does not belong to the multiplicative
+ group. Use 1 instead */
+ w->l->rand_seed = seed;
+}
+
+/* W grabs its own lock */
+void __cilkrts_worker_lock(__cilkrts_worker *w)
+{
+ validate_worker(w);
+ CILK_ASSERT(w->l->do_not_steal == 0);
+
+ /* tell thieves to stay out of the way */
+ w->l->do_not_steal = 1;
+ __cilkrts_fence(); /* probably redundant */
+
+ __cilkrts_mutex_lock(w, &w->l->lock);
+}
+
+void __cilkrts_worker_unlock(__cilkrts_worker *w)
+{
+ __cilkrts_mutex_unlock(w, &w->l->lock);
+ CILK_ASSERT(w->l->do_not_steal == 1);
+ /* The fence is probably redundant. Use a release
+ operation when supported (gcc and compatibile);
+ that is faster on x86 which serializes normal stores. */
+#if defined __GNUC__ && (__GNUC__ * 10 + __GNUC_MINOR__ > 43 || __ICC >= 1110)
+ __sync_lock_release(&w->l->do_not_steal);
+#else
+ w->l->do_not_steal = 0;
+ __cilkrts_fence(); /* store-store barrier, redundant on x86 */
+#endif
+}
+
+/* try to acquire the lock of some *other* worker */
+static int worker_trylock_other(__cilkrts_worker *w,
+ __cilkrts_worker *other)
+{
+ int status = 0;
+
+ validate_worker(other);
+
+ /* This protocol guarantees that, after setting the DO_NOT_STEAL
+ flag, worker W can enter its critical section after waiting for
+ the thief currently in the critical section (if any) and at
+ most one other thief.
+
+ This requirement is overly paranoid, but it should protect us
+ against future nonsense from OS implementors.
+ */
+
+ /* compete for the right to disturb OTHER */
+ if (__cilkrts_mutex_trylock(w, &other->l->steal_lock)) {
+ if (other->l->do_not_steal) {
+ /* leave it alone */
+ } else {
+ status = __cilkrts_mutex_trylock(w, &other->l->lock);
+ }
+ __cilkrts_mutex_unlock(w, &other->l->steal_lock);
+ }
+
+
+ return status;
+}
+
+static void worker_unlock_other(__cilkrts_worker *w,
+ __cilkrts_worker *other)
+{
+ __cilkrts_mutex_unlock(w, &other->l->lock);
+}
+
+
+/* Lock macro Usage:
+ BEGIN_WITH_WORKER_LOCK(w) {
+ statement;
+ statement;
+ BEGIN_WITH_FRAME_LOCK(w, ff) {
+ statement;
+ statement;
+ } END_WITH_FRAME_LOCK(w, ff);
+ } END_WITH_WORKER_LOCK(w);
+ */
+#define BEGIN_WITH_WORKER_LOCK(w) __cilkrts_worker_lock(w); do
+#define END_WITH_WORKER_LOCK(w) while (__cilkrts_worker_unlock(w), 0)
+
+// TBD(jsukha): These are worker lock acquistions on
+// a worker whose deque is empty. My conjecture is that we
+// do not need to hold the worker lock at these points.
+// I have left them in for now, however.
+//
+// #define REMOVE_POSSIBLY_OPTIONAL_LOCKS
+#ifdef REMOVE_POSSIBLY_OPTIONAL_LOCKS
+ #define BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) do
+ #define END_WITH_WORKER_LOCK_OPTIONAL(w) while (0)
+#else
+ #define BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) __cilkrts_worker_lock(w); do
+ #define END_WITH_WORKER_LOCK_OPTIONAL(w) while (__cilkrts_worker_unlock(w), 0)
+#endif
+
+
+#define BEGIN_WITH_FRAME_LOCK(w, ff) \
+ do { full_frame *_locked_ff = ff; __cilkrts_frame_lock(w, _locked_ff); do
+
+#define END_WITH_FRAME_LOCK(w, ff) \
+ while (__cilkrts_frame_unlock(w, _locked_ff), 0); } while (0)
+
+/* W becomes the owner of F and F can be stolen from W */
+static void make_runnable(__cilkrts_worker *w, full_frame *ff)
+{
+ w->l->frame_ff = ff;
+
+ /* CALL_STACK is invalid (the information is stored implicitly in W) */
+ ff->call_stack = 0;
+}
+
+/*
+ * The worker parameter is unused, except for print-debugging purposes.
+ */
+static void make_unrunnable(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf,
+ int is_loot,
+ const char *why)
+{
+ /* CALL_STACK becomes valid again */
+ ff->call_stack = sf;
+
+ if (sf) {
+#if CILK_LIB_DEBUG
+ if (__builtin_expect(sf->flags & CILK_FRAME_EXITING, 0))
+ __cilkrts_bug("W%d suspending exiting frame %p/%p\n", w->self, ff, sf);
+#endif
+ sf->flags |= CILK_FRAME_STOLEN | CILK_FRAME_SUSPENDED;
+ sf->worker = 0;
+
+ if (is_loot)
+ __cilkrts_put_stack(ff, sf);
+
+ /* perform any system-dependent action, such as saving the
+ state of the stack */
+ __cilkrts_make_unrunnable_sysdep(w, ff, sf, is_loot, why);
+ }
+}
+
+
+/* Push the next full frame to be made active in this worker and increment its
+ * join counter. __cilkrts_push_next_frame and pop_next_frame work on a
+ * one-element queue. This queue is used to communicate across the runtime
+ * from the code that wants to activate a frame to the code that can actually
+ * begin execution on that frame. They are asymetrical in that push
+ * increments the join counter but pop does not decrement it. Rather, a
+ * single push/pop combination makes a frame active and increments its join
+ * counter once. */
+void __cilkrts_push_next_frame(__cilkrts_worker *w, full_frame *ff)
+{
+ CILK_ASSERT(ff);
+ CILK_ASSERT(!w->l->next_frame_ff);
+ incjoin(ff);
+ w->l->next_frame_ff = ff;
+}
+
+/* Get the next full-frame to be made active in this worker. The join count
+ * of the full frame will have been incremented by the corresponding push
+ * event. See __cilkrts_push_next_frame, above.
+ */
+static full_frame *pop_next_frame(__cilkrts_worker *w)
+{
+ full_frame *ff;
+ ff = w->l->next_frame_ff;
+ // Remove the frame from the next_frame field.
+ //
+ // If this is a user worker, then there is a chance that another worker
+ // from our team could push work into our next_frame (if it is the last
+ // worker doing work for this team). The other worker's setting of the
+ // next_frame could race with our setting of next_frame to NULL. This is
+ // the only possible race condition on next_frame. However, if next_frame
+ // has a non-NULL value, then it means the team still has work to do, and
+ // there is no chance of another team member populating next_frame. Thus,
+ // it is safe to set next_frame to NULL, if it was populated. There is no
+ // need for an atomic op.
+ if (NULL != ff) {
+ w->l->next_frame_ff = NULL;
+ }
+ return ff;
+}
+
+/*
+ * Identify the single worker that is allowed to cross a sync in this frame. A
+ * thief should call this function when it is the first to steal work from a
+ * user worker. "First to steal work" may mean that there has been parallelism
+ * in the user worker before, but the whole team sync'd, and this is the first
+ * steal after that.
+ *
+ * This should happen while holding the worker and frame lock.
+ */
+static void set_sync_master(__cilkrts_worker *w, full_frame *ff)
+{
+ w->l->last_full_frame = ff;
+ ff->sync_master = w;
+}
+
+/*
+ * The sync that ends all parallelism for a particular user worker is about to
+ * be crossed. Decouple the worker and frame.
+ *
+ * No locks need to be held since the user worker isn't doing anything, and none
+ * of the system workers can steal from it. But unset_sync_master() should be
+ * called before the user worker knows about this work (i.e., before it is
+ * inserted into the w->l->next_frame_ff is set).
+ */
+static void unset_sync_master(__cilkrts_worker *w, full_frame *ff)
+{
+ CILK_ASSERT(WORKER_USER == w->l->type);
+ CILK_ASSERT(ff->sync_master == w);
+ ff->sync_master = NULL;
+ w->l->last_full_frame = NULL;
+}
+
+/********************************************************************
+ * THE protocol:
+ ********************************************************************/
+/*
+ * This is a protocol for work stealing that minimizes the overhead on
+ * the victim.
+ *
+ * The protocol uses three shared pointers into the worker's deque:
+ * - T - the "tail"
+ * - H - the "head"
+ * - E - the "exception" NB: In this case, "exception" has nothing to do
+ * with C++ throw-catch exceptions -- it refers only to a non-normal return,
+ * i.e., a steal or similar scheduling exception.
+ *
+ * with H <= E, H <= T.
+ *
+ * Stack frames SF, where H <= E < T, are available for stealing.
+ *
+ * The worker operates on the T end of the stack. The frame being
+ * worked on is not on the stack. To make a continuation available for
+ * stealing the worker pushes a from onto the stack: stores *T++ = SF.
+ * To return, it pops the frame off the stack: obtains SF = *--T.
+ *
+ * After decrementing T, the condition E > T signals to the victim that
+ * it should invoke the runtime system's "THE" exception handler. The
+ * pointer E can become INFINITY, in which case the victim must invoke
+ * the THE exception handler as soon as possible.
+ *
+ * See "The implementation of the Cilk-5 multithreaded language", PLDI 1998,
+ * http://portal.acm.org/citation.cfm?doid=277652.277725, for more information
+ * on the THE protocol.
+ */
+
+/* the infinity value of E */
+#define EXC_INFINITY ((__cilkrts_stack_frame **) (-1))
+
+static void increment_E(__cilkrts_worker *victim)
+{
+ __cilkrts_stack_frame *volatile *tmp;
+
+ // The currently executing worker must own the worker lock to touch
+ // victim->exc
+ ASSERT_WORKER_LOCK_OWNED(victim);
+
+ tmp = victim->exc;
+ if (tmp != EXC_INFINITY) {
+ /* On most x86 this pair of operations would be slightly faster
+ as an atomic exchange due to the implicit memory barrier in
+ an atomic instruction. */
+ victim->exc = tmp + 1;
+ __cilkrts_fence();
+ }
+}
+
+static void decrement_E(__cilkrts_worker *victim)
+{
+ __cilkrts_stack_frame *volatile *tmp;
+
+ // The currently executing worker must own the worker lock to touch
+ // victim->exc
+ ASSERT_WORKER_LOCK_OWNED(victim);
+
+ tmp = victim->exc;
+ if (tmp != EXC_INFINITY) {
+ /* On most x86 this pair of operations would be slightly faster
+ as an atomic exchange due to the implicit memory barrier in
+ an atomic instruction. */
+ victim->exc = tmp - 1;
+ __cilkrts_fence(); /* memory fence not really necessary */
+ }
+}
+
+#if 0
+/* for now unused, will be necessary if we implement abort */
+static void signal_THE_exception(__cilkrts_worker *wparent)
+{
+ wparent->exc = EXC_INFINITY;
+ __cilkrts_fence();
+}
+#endif
+
+static void reset_THE_exception(__cilkrts_worker *w)
+{
+ // The currently executing worker must own the worker lock to touch
+ // w->exc
+ ASSERT_WORKER_LOCK_OWNED(w);
+
+ w->exc = w->head;
+ __cilkrts_fence();
+}
+
+/* conditions under which victim->head can be stolen: */
+static int can_steal_from(__cilkrts_worker *victim)
+{
+ return ((victim->head < victim->tail) &&
+ (victim->head < victim->protected_tail));
+}
+
+/* Return TRUE if the frame can be stolen, false otherwise */
+static int dekker_protocol(__cilkrts_worker *victim)
+{
+ // increment_E and decrement_E are going to touch victim->exc. The
+ // currently executing worker must own victim's lock before they can
+ // modify it
+ ASSERT_WORKER_LOCK_OWNED(victim);
+
+ /* ASSERT(E >= H); */
+
+ increment_E(victim);
+
+ /* ASSERT(E >= H + 1); */
+ if (can_steal_from(victim)) {
+ /* success, we can steal victim->head and set H <- H + 1
+ in detach() */
+ return 1;
+ } else {
+ /* failure, restore previous state */
+ decrement_E(victim);
+ return 0;
+ }
+}
+
+
+/* Link PARENT and CHILD in the spawn tree */
+static full_frame *make_child(__cilkrts_worker *w,
+ full_frame *parent_ff,
+ __cilkrts_stack_frame *child_sf,
+ cilk_fiber *fiber)
+{
+ full_frame *child_ff = __cilkrts_make_full_frame(w, child_sf);
+
+ child_ff->parent = parent_ff;
+ push_child(parent_ff, child_ff);
+
+ //DBGPRINTF("%d- make_child - child_frame: %p, parent_frame: %p, child_sf: %p\n"
+ // " parent - parent: %p, left_sibling: %p, right_sibling: %p, rightmost_child: %p\n"
+ // " child - parent: %p, left_sibling: %p, right_sibling: %p, rightmost_child: %p\n",
+ // w->self, child, parent, child_sf,
+ // parent->parent, parent->left_sibling, parent->right_sibling, parent->rightmost_child,
+ // child->parent, child->left_sibling, child->right_sibling, child->rightmost_child);
+ CILK_ASSERT(parent_ff->call_stack);
+ child_ff->is_call_child = (fiber == NULL);
+
+ /* PLACEHOLDER_FIBER is used as non-null marker indicating that
+ child should be treated as a spawn child even though we have not
+ yet assigned a real fiber to its parent. */
+ if (fiber == PLACEHOLDER_FIBER)
+ fiber = NULL; /* Parent actually gets a null fiber, for now */
+
+ /* perform any system-dependent actions, such as capturing
+ parameter passing information */
+ /*__cilkrts_make_child_sysdep(child, parent);*/
+
+ /* Child gets reducer map and stack of parent.
+ Parent gets a new map and new stack. */
+ child_ff->fiber_self = parent_ff->fiber_self;
+ child_ff->sync_master = NULL;
+
+ if (child_ff->is_call_child) {
+ /* Cause segfault on any attempted access. The parent gets
+ the child map and stack when the child completes. */
+ parent_ff->fiber_self = 0;
+ } else {
+ parent_ff->fiber_self = fiber;
+ }
+
+ incjoin(parent_ff);
+ return child_ff;
+}
+
+static inline __cilkrts_stack_frame *__cilkrts_advance_frame(__cilkrts_stack_frame *sf)
+{
+ __cilkrts_stack_frame *p = sf->call_parent;
+ sf->call_parent = 0;
+ return p;
+}
+
+/* w should be the currently executing worker.
+ * loot_sf is the youngest stack frame in the call stack being
+ * unrolled (i.e., the most deeply nested stack frame.)
+ *
+ * When this method is called for a steal, loot_sf should be on a
+ * victim worker which is different from w.
+ * For CILK_FORCE_REDUCE, the victim worker will equal w.
+ *
+ * Before execution, the __cilkrts_stack_frame's have pointers from
+ * older to younger, i.e., a __cilkrts_stack_frame points to parent.
+ *
+ * This method creates a full frame for each __cilkrts_stack_frame in
+ * the call stack, with each full frame also pointing to its parent.
+ *
+ * The method returns the full frame created for loot_sf, i.e., the
+ * youngest full frame.
+ */
+static full_frame *unroll_call_stack(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *const loot_sf)
+{
+ __cilkrts_stack_frame *sf = loot_sf;
+ __cilkrts_stack_frame *rev_sf = 0;
+ __cilkrts_stack_frame *t_sf;
+
+ CILK_ASSERT(sf);
+ /*CILK_ASSERT(sf->call_parent != sf);*/
+
+ /* The leafmost frame is unsynched. */
+ if (sf->worker != w)
+ sf->flags |= CILK_FRAME_UNSYNCHED;
+
+ /* Reverse the call stack to make a linked list ordered from parent
+ to child. sf->call_parent points to the child of SF instead of
+ the parent. */
+ do {
+ t_sf = (sf->flags & (CILK_FRAME_DETACHED|CILK_FRAME_STOLEN|CILK_FRAME_LAST))? 0 : sf->call_parent;
+ sf->call_parent = rev_sf;
+ rev_sf = sf;
+ sf = t_sf;
+ } while (sf);
+ sf = rev_sf;
+
+ /* Promote each stack frame to a full frame in order from parent
+ to child, following the reversed list we just built. */
+ make_unrunnable(w, ff, sf, sf == loot_sf, "steal 1");
+ /* T is the *child* of SF, because we have reversed the list */
+ for (t_sf = __cilkrts_advance_frame(sf); t_sf;
+ sf = t_sf, t_sf = __cilkrts_advance_frame(sf)) {
+ ff = make_child(w, ff, t_sf, NULL);
+ make_unrunnable(w, ff, t_sf, t_sf == loot_sf, "steal 2");
+ }
+
+ /* XXX What if the leafmost frame does not contain a sync
+ and this steal is from promote own deque? */
+ /*sf->flags |= CILK_FRAME_UNSYNCHED;*/
+
+ CILK_ASSERT(!sf->call_parent);
+ return ff;
+}
+
+/* detach the top of the deque frame from the VICTIM and install a new
+ CHILD frame in its place */
+static void detach_for_steal(__cilkrts_worker *w,
+ __cilkrts_worker *victim,
+ cilk_fiber* fiber)
+{
+ /* ASSERT: we own victim->lock */
+
+ full_frame *parent_ff, *child_ff, *loot_ff;
+ __cilkrts_stack_frame *volatile *h;
+ __cilkrts_stack_frame *sf;
+
+ w->l->team = victim->l->team;
+
+ CILK_ASSERT(w->l->frame_ff == 0 || w == victim);
+
+ h = victim->head;
+
+ CILK_ASSERT(*h);
+
+ victim->head = h + 1;
+
+ parent_ff = victim->l->frame_ff;
+ BEGIN_WITH_FRAME_LOCK(w, parent_ff) {
+ /* parent no longer referenced by victim */
+ decjoin(parent_ff);
+
+ /* obtain the victim call stack */
+ sf = *h;
+
+ /* perform system-dependent normalizations */
+ /*__cilkrts_normalize_call_stack_on_steal(sf);*/
+
+ /* unroll PARENT_FF with call stack SF, adopt the youngest
+ frame LOOT. If loot_ff == parent_ff, then we hold loot_ff->lock,
+ otherwise, loot_ff is newly created and we can modify it without
+ holding its lock. */
+ loot_ff = unroll_call_stack(w, parent_ff, sf);
+
+ #if REDPAR_DEBUG >= 3
+ fprintf(stderr, "[W=%d, victim=%d, desc=detach, parent_ff=%p, loot=%p]\n",
+ w->self, victim->self,
+ parent_ff, loot_ff);
+ #endif
+
+ if (WORKER_USER == victim->l->type &&
+ NULL == victim->l->last_full_frame) {
+ // Mark this looted frame as special: only the original user worker
+ // may cross the sync.
+ //
+ // This call is a shared access to
+ // victim->l->last_full_frame.
+ set_sync_master(victim, loot_ff);
+ }
+
+ /* LOOT is the next frame that the thief W is supposed to
+ run, unless the thief is stealing from itself, in which
+ case the thief W == VICTIM executes CHILD and nobody
+ executes LOOT. */
+ if (w == victim) {
+ /* Pretend that frame has been stolen */
+ loot_ff->call_stack->flags |= CILK_FRAME_UNSYNCHED;
+ loot_ff->simulated_stolen = 1;
+ }
+ else
+ __cilkrts_push_next_frame(w, loot_ff);
+
+ // After this "push_next_frame" call, w now owns loot_ff.
+ child_ff = make_child(w, loot_ff, 0, fiber);
+
+ BEGIN_WITH_FRAME_LOCK(w, child_ff) {
+ /* install child in the victim's work queue, taking
+ the parent_ff's place */
+ /* child is referenced by victim */
+ incjoin(child_ff);
+
+ // With this call, w is bestowing ownership of the newly
+ // created frame child_ff to the victim, and victim is
+ // giving up ownership of parent_ff.
+ //
+ // Worker w will either take ownership of parent_ff
+ // if parent_ff == loot_ff, or parent_ff will be
+ // suspended.
+ //
+ // Note that this call changes the victim->frame_ff
+ // while the victim may be executing.
+ make_runnable(victim, child_ff);
+ } END_WITH_FRAME_LOCK(w, child_ff);
+ } END_WITH_FRAME_LOCK(w, parent_ff);
+}
+
+/**
+ * @brief cilk_fiber_proc that resumes user code after a successful
+ * random steal.
+
+ * This function longjmps back into the user code whose state is
+ * stored in cilk_fiber_get_data(fiber)->resume_sf. The stack pointer
+ * is adjusted so that the code resumes on the specified fiber stack
+ * instead of its original stack.
+ *
+ * This method gets executed only on a fiber freshly allocated from a
+ * pool.
+ *
+ * @param fiber The fiber being used to resume user code.
+ * @param arg Unused.
+ */
+static
+void fiber_proc_to_resume_user_code_for_random_steal(cilk_fiber *fiber)
+{
+ cilk_fiber_data *data = cilk_fiber_get_data(fiber);
+ __cilkrts_stack_frame* sf = data->resume_sf;
+ full_frame *ff;
+
+ CILK_ASSERT(sf);
+
+ // When we pull the resume_sf out of the fiber to resume it, clear
+ // the old value.
+ data->resume_sf = NULL;
+ CILK_ASSERT(sf->worker == data->owner);
+ ff = sf->worker->l->frame_ff;
+
+ // For Win32, we need to overwrite the default exception handler
+ // in this function, so that when the OS exception handling code
+ // walks off the top of the current Cilk stack, it reaches our stub
+ // handler.
+
+ // Also, this function needs to be wrapped into a try-catch block
+ // so the compiler generates the appropriate exception information
+ // in this frame.
+
+ // TBD: IS THIS HANDLER IN THE WRONG PLACE? Can we longjmp out of
+ // this function (and does it matter?)
+#if defined(_WIN32) && !defined(_WIN64)
+ install_exception_stub_handler();
+ __try
+#endif
+ {
+ char* new_sp = sysdep_reset_jump_buffers_for_resume(fiber, ff, sf);
+
+ // Notify the Intel tools that we're stealing code
+ ITT_SYNC_ACQUIRED(sf->worker);
+ NOTIFY_ZC_INTRINSIC("cilk_continue", sf);
+
+ // TBD: We'd like to move TBB-interop methods into the fiber
+ // eventually.
+ cilk_fiber_invoke_tbb_stack_op(fiber, CILK_TBB_STACK_ADOPT);
+
+ sf->flags &= ~CILK_FRAME_SUSPENDED;
+
+ // longjmp to user code. Don't process exceptions here,
+ // because we are resuming a stolen frame.
+ sysdep_longjmp_to_sf(new_sp, sf, NULL);
+ /*NOTREACHED*/
+ // Intel's C compiler respects the preceding lint pragma
+ }
+#if defined(_WIN32) && !defined(_WIN64)
+ __except (CILK_ASSERT(!"should not execute the the stub filter"),
+ EXCEPTION_EXECUTE_HANDLER)
+ {
+ // If we are here, that means something very wrong
+ // has happened in our exception processing...
+ CILK_ASSERT(! "should not be here!");
+ }
+#endif
+}
+
+static void random_steal(__cilkrts_worker *w)
+{
+ __cilkrts_worker *victim = NULL;
+ cilk_fiber *fiber = NULL;
+ int n;
+ int success = 0;
+ int32_t victim_id;
+
+ // Nothing's been stolen yet. When true, this will flag
+ // setup_for_execution_pedigree to increment the pedigree
+ w->l->work_stolen = 0;
+
+ /* If the user has disabled stealing (using the debugger) we fail */
+ if (__builtin_expect(w->g->stealing_disabled, 0))
+ return;
+
+ CILK_ASSERT(w->l->type == WORKER_SYSTEM || w->l->team == w);
+
+ /* If there is only one processor work can still be stolen.
+ There must be only one worker to prevent stealing. */
+ CILK_ASSERT(w->g->total_workers > 1);
+
+ /* pick random *other* victim */
+ n = myrand(w) % (w->g->total_workers - 1);
+ if (n >= w->self)
+ ++n;
+
+ // If we're replaying a log, override the victim. -1 indicates that
+ // we've exhausted the list of things this worker stole when we recorded
+ // the log so just return. If we're not replaying a log,
+ // replay_get_next_recorded_victim() just returns the victim ID passed in.
+ n = replay_get_next_recorded_victim(w, n);
+ if (-1 == n)
+ return;
+
+ victim = w->g->workers[n];
+
+ START_INTERVAL(w, INTERVAL_FIBER_ALLOCATE) {
+ /* Verify that we can get a stack. If not, no need to continue. */
+ fiber = cilk_fiber_allocate(&w->l->fiber_pool);
+ } STOP_INTERVAL(w, INTERVAL_FIBER_ALLOCATE);
+
+
+ if (NULL == fiber) {
+#if FIBER_DEBUG >= 2
+ fprintf(stderr, "w=%d: failed steal because we could not get a fiber\n",
+ w->self);
+#endif
+ return;
+ }
+
+ /* do not steal from self */
+ CILK_ASSERT (victim != w);
+
+ /* Execute a quick check before engaging in the THE protocol.
+ Avoid grabbing locks if there is nothing to steal. */
+ if (!can_steal_from(victim)) {
+ NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_EMPTYQ);
+ START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE) {
+ int ref_count = cilk_fiber_remove_reference(fiber, &w->l->fiber_pool);
+ // Fibers we use when trying to steal should not be active,
+ // and thus should not have any other references.
+ CILK_ASSERT(0 == ref_count);
+ } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE);
+ return;
+ }
+
+ /* Attempt to steal work from the victim */
+ if (worker_trylock_other(w, victim)) {
+ if (w->l->type == WORKER_USER && victim->l->team != w) {
+
+ // Fail to steal if this is a user worker and the victim is not
+ // on this team. If a user worker were allowed to steal work
+ // descended from another user worker, the former might not be
+ // done with its work by the time it was needed to resume and
+ // unbind. Therefore, user workers are not permitted to change
+ // teams.
+
+ // There is no race on the victim's team because the victim cannot
+ // change its team until it runs out of work to do, at which point
+ // it will try to take out its own lock, and this worker already
+ // holds it.
+ NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_USER_WORKER);
+
+ } else if (victim->l->frame_ff) {
+ // A successful steal will change victim->frame_ff, even
+ // though the victim may be executing. Thus, the lock on
+ // the victim's deque is also protecting victim->frame_ff.
+ if (dekker_protocol(victim)) {
+ int proceed_with_steal = 1; // optimistic
+
+ // If we're replaying a log, verify that this the correct frame
+ // to steal from the victim
+ if (! replay_match_victim_pedigree(w, victim))
+ {
+ // Abort the steal attempt. decrement_E(victim) to
+ // counter the increment_E(victim) done by the
+ // dekker protocol
+ decrement_E(victim);
+ proceed_with_steal = 0;
+ }
+
+ if (proceed_with_steal)
+ {
+ START_INTERVAL(w, INTERVAL_STEAL_SUCCESS) {
+ success = 1;
+ detach_for_steal(w, victim, fiber);
+ victim_id = victim->self;
+
+ #if REDPAR_DEBUG >= 1
+ fprintf(stderr, "Wkr %d stole from victim %d, fiber = %p\n",
+ w->self, victim->self, fiber);
+ #endif
+
+ // The use of victim->self contradicts our
+ // classification of the "self" field as
+ // local. But since this code is only for
+ // debugging, it is ok.
+ DBGPRINTF ("%d-%p: Stealing work from worker %d\n"
+ " sf: %p, call parent: %p\n",
+ w->self, GetCurrentFiber(), victim->self,
+ w->l->next_frame_ff->call_stack,
+ w->l->next_frame_ff->call_stack->call_parent);
+ } STOP_INTERVAL(w, INTERVAL_STEAL_SUCCESS);
+ } // end if(proceed_with_steal)
+ } else {
+ NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_DEKKER);
+ }
+ } else {
+ NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_EMPTYQ);
+ }
+ worker_unlock_other(w, victim);
+ } else {
+ NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_LOCK);
+ }
+
+ // Record whether work was stolen. When true, this will flag
+ // setup_for_execution_pedigree to increment the pedigree
+ w->l->work_stolen = success;
+
+ if (0 == success) {
+ // failed to steal work. Return the fiber to the pool.
+ START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE) {
+ int ref_count = cilk_fiber_remove_reference(fiber, &w->l->fiber_pool);
+ // Fibers we use when trying to steal should not be active,
+ // and thus should not have any other references.
+ CILK_ASSERT(0 == ref_count);
+ } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE);
+ }
+ else
+ {
+ // Since our steal was successful, finish initialization of
+ // the fiber.
+ cilk_fiber_reset_state(fiber,
+ fiber_proc_to_resume_user_code_for_random_steal);
+ // Record the pedigree of the frame that w has stolen.
+ // record only if CILK_RECORD_LOG is set
+ replay_record_steal(w, victim_id);
+ }
+}
+
+
+
+/**
+ * At a provably good steal, we need to transfer the child reducer map
+ * from ff->children_reducer_map into v->reducer_map, where v is the
+ * worker that resumes execution of ff.
+ *
+ * Normally, we have v == w, where w is the currently executing
+ * worker. In the case where we are resuming a team leader on a user
+ * worker, however, v might differ from w.
+
+ * Thus, this, operation is a no-op, since we can't really move
+ * ff->children_reducer_map into w here.
+ *
+ * Instead, this work is done in setup_for_execution_reducers().
+ */
+static inline void provably_good_steal_reducers(__cilkrts_worker *w,
+ full_frame *ff)
+{
+ // No-op.
+}
+
+/* at a provably good steal, incorporate the accumulated exceptions of
+ children into the parent's exception */
+static void provably_good_steal_exceptions(__cilkrts_worker *w,
+ full_frame *ff)
+{
+ // ASSERT: we own ff->lock
+ ff->pending_exception =
+ __cilkrts_merge_pending_exceptions(w,
+ ff->child_pending_exception,
+ ff->pending_exception);
+ ff->child_pending_exception = NULL;
+}
+
+/* At sync discard the frame's old stack and take the leftmost child's. */
+static void provably_good_steal_stacks(__cilkrts_worker *w, full_frame *ff)
+{
+ CILK_ASSERT(NULL == ff->fiber_self);
+ ff->fiber_self = ff->fiber_child;
+ ff->fiber_child = NULL;
+}
+
+static void __cilkrts_mark_synched(full_frame *ff)
+{
+ ff->call_stack->flags &= ~CILK_FRAME_UNSYNCHED;
+ ff->simulated_stolen = 0;
+}
+
+static
+enum provably_good_steal_t provably_good_steal(__cilkrts_worker *w,
+ full_frame *ff)
+{
+ // ASSERT: we hold w->lock and ff->lock
+
+ enum provably_good_steal_t result = ABANDON_EXECUTION;
+
+ // If the current replay entry is a sync record matching the worker's
+ // pedigree, AND this isn't the last child to the sync, return
+ // WAIT_FOR_CONTINUE to indicate that the caller should loop until
+ // we find the right frame to steal and CONTINUE_EXECUTION is returned.
+ int match_found = replay_match_sync_pedigree(w);
+ if (match_found && (0 != simulate_decjoin(ff)))
+ return WAIT_FOR_CONTINUE;
+
+ START_INTERVAL(w, INTERVAL_PROVABLY_GOOD_STEAL) {
+ if (decjoin(ff) == 0) {
+ provably_good_steal_reducers(w, ff);
+ provably_good_steal_exceptions(w, ff);
+ provably_good_steal_stacks(w, ff);
+ __cilkrts_mark_synched(ff);
+
+ // If the original owner wants this frame back (to resume
+ // it on its original thread) pass it back now.
+ if (NULL != ff->sync_master) {
+ // The frame wants to go back and be executed by the original
+ // user thread. We can throw caution to the wind and push the
+ // frame straight onto its queue because the only way we have
+ // gotten to this point of being able to continue execution of
+ // the frame is if the original user worker is spinning without
+ // work.
+
+ unset_sync_master(w->l->team, ff);
+ __cilkrts_push_next_frame(w->l->team, ff);
+
+ // If this is the team leader we're not abandoning the work
+ if (w == w->l->team)
+ result = CONTINUE_EXECUTION;
+ } else {
+ __cilkrts_push_next_frame(w, ff);
+ result = CONTINUE_EXECUTION; // Continue working on this thread
+ }
+
+ // The __cilkrts_push_next_frame() call changes ownership
+ // of ff to the specified worker.
+ }
+ } STOP_INTERVAL(w, INTERVAL_PROVABLY_GOOD_STEAL);
+
+ // Only write a SYNC record if:
+ // - We're recording a log *AND*
+ // - We're the worker continuing from this sync
+ replay_record_sync(w, result == CONTINUE_EXECUTION);
+
+ // If we're replaying a log, and matched a sync from the log, mark the
+ // sync record seen if the sync isn't going to be abandoned.
+ replay_advance_from_sync (w, match_found, result == CONTINUE_EXECUTION);
+
+ return result;
+}
+
+static void unconditional_steal(__cilkrts_worker *w,
+ full_frame *ff)
+{
+ // ASSERT: we hold ff->lock
+
+ START_INTERVAL(w, INTERVAL_UNCONDITIONAL_STEAL) {
+ decjoin(ff);
+ __cilkrts_push_next_frame(w, ff);
+ } STOP_INTERVAL(w, INTERVAL_UNCONDITIONAL_STEAL);
+}
+
+
+/* CHILD is about to die. Give its exceptions to a sibling or to the
+ parent. */
+static inline void splice_exceptions_for_call(__cilkrts_worker *w,
+ full_frame *parent_ff,
+ full_frame *child_ff)
+{
+ // ASSERT: We own parent_ff->lock
+ CILK_ASSERT(child_ff->is_call_child);
+ CILK_ASSERT(NULL == child_ff->right_pending_exception);
+ CILK_ASSERT(NULL == parent_ff->pending_exception);
+
+ parent_ff->pending_exception = child_ff->pending_exception;
+ child_ff->pending_exception = NULL;
+}
+
+/**
+ * Merge exceptions for a dying child.
+ *
+ * @param w The currently executing worker.
+ * @param ff The child frame that is dying.
+ * @param left_exception_ptr Pointer to the exception that is to our left.
+ */
+static inline
+void splice_exceptions_for_spawn(__cilkrts_worker *w,
+ full_frame *ff,
+ struct pending_exception_info **left_exception_ptr)
+{
+ // ASSERT: parent_ff == child_ff->parent.
+ // ASSERT: We own parent_ff->lock
+
+ // Merge current exception into the slot where the left
+ // exception should go.
+ *left_exception_ptr =
+ __cilkrts_merge_pending_exceptions(w,
+ *left_exception_ptr,
+ ff->pending_exception);
+ ff->pending_exception = NULL;
+
+
+ // Merge right exception into the slot where the left exception
+ // should go.
+ *left_exception_ptr =
+ __cilkrts_merge_pending_exceptions(w,
+ *left_exception_ptr,
+ ff->right_pending_exception);
+ ff->right_pending_exception = NULL;
+}
+
+
+static inline void splice_stacks_for_call(__cilkrts_worker *w,
+ full_frame *parent_ff,
+ full_frame *child_ff)
+{
+#if CILK_LIB_DEBUG
+ if (parent_ff->call_stack)
+ CILK_ASSERT(!(parent_ff->call_stack->flags & CILK_FRAME_MBZ));
+#endif
+
+ /* A synched frame does not have accumulated child reducers. */
+ CILK_ASSERT(!child_ff->fiber_child);
+ CILK_ASSERT(child_ff->is_call_child);
+
+ /* An attached parent has no self fiber. It may have
+ accumulated child fibers or child owners, which should be
+ ignored until sync. */
+ CILK_ASSERT(!parent_ff->fiber_self);
+ parent_ff->fiber_self = child_ff->fiber_self;
+ child_ff->fiber_self = NULL;
+}
+
+static void finalize_child_for_call(__cilkrts_worker *w,
+ full_frame *parent_ff,
+ full_frame *child_ff)
+{
+ // ASSERT: we hold w->lock and parent_ff->lock
+
+ START_INTERVAL(w, INTERVAL_FINALIZE_CHILD) {
+ CILK_ASSERT(child_ff->is_call_child);
+ CILK_ASSERT(child_ff->join_counter == 0);
+ CILK_ASSERT(!child_ff->rightmost_child);
+ CILK_ASSERT(child_ff == parent_ff->rightmost_child);
+
+ // CHILD is about to die.
+ // Splicing out reducers is a no-op for a call since
+ // w->reducer_map should already store the correct
+ // reducer map.
+
+ // ASSERT there are no maps left to reduce.
+ CILK_ASSERT(NULL == child_ff->children_reducer_map);
+ CILK_ASSERT(NULL == child_ff->right_reducer_map);
+
+ splice_exceptions_for_call(w, parent_ff, child_ff);
+
+ splice_stacks_for_call(w, parent_ff, child_ff);
+
+ /* remove CHILD from list of children of PARENT */
+ unlink_child(parent_ff, child_ff);
+
+ /* continue with the parent. */
+ unconditional_steal(w, parent_ff);
+ __cilkrts_destroy_full_frame(w, child_ff);
+ } STOP_INTERVAL(w, INTERVAL_FINALIZE_CHILD);
+}
+
+
+/**
+ * The invariant on ff->children_reducer_map is that when ff is
+ * synched and when we are about to resume execution of ff, at least
+ * one of ff->children_reducer_map and w->reducer_map must be NULL.
+ *
+ * Consider the two possibilities before resuming execution of ff:
+ *
+ * 1. Suppose ff is synched and suspended. Then either
+ *
+ * (a) ff->children_reducer_map stores the reducer map that w
+ * should use, where w is the worker resuming execution of ff,
+ * OR
+ * (b) w already has a user map, and ff->children_reducer_map is NULL.
+ *
+ * Case (a) happens when we are resuming execution of ff as a
+ * provably good steal. In this case, w->reducer_map should be
+ * NULL and ff->children_reducer_map is valid. To resume
+ * execution of ff on w, set w->reducer_map to
+ * ff->children_reducer_map.
+ *
+ * Case (b) occurs when we resume execution of ff because ff is a
+ * called child. Then, ff->children_reducer_map should be NULL,
+ * and w should already have a valid reducer map when resuming
+ * execution of ff. We resume execution of ff without changing
+ * w->reducer_map.
+ *
+ * 2. Suppose frame ff is not synched (i.e., it is active and might have
+ * active children). Then ff->children_reducer_map is the slot for
+ * storing the reducer map from ff's leftmost child, as in the reducer
+ * protocol. The runtime may resume execution of ff while it is not
+ * synched only because of a steal.
+ * In this case, while we are resuming ff, ff->children_reducer_map
+ * may be non-NULL (because one of ff's children has completed).
+ * We resume execution of ff without changing w->reducer_map.
+ */
+static void setup_for_execution_reducers(__cilkrts_worker *w,
+ full_frame *ff)
+{
+ // We only need to move ff->children_reducer_map into
+ // w->reducer_map in case 1(a).
+ //
+ // First check whether ff is synched.
+ __cilkrts_stack_frame *sf = ff->call_stack;
+ if (!(sf->flags & CILK_FRAME_UNSYNCHED)) {
+ // In this case, ff is synched. (Case 1).
+ CILK_ASSERT(!ff->rightmost_child);
+
+ // Test whether we are in case 1(a) and have
+ // something to do. Note that if both
+ // ff->children_reducer_map and w->reducer_map are NULL, we
+ // can't distinguish between cases 1(a) and 1(b) here.
+ if (ff->children_reducer_map) {
+ // We are in Case 1(a).
+ CILK_ASSERT(!w->reducer_map);
+ w->reducer_map = ff->children_reducer_map;
+ ff->children_reducer_map = NULL;
+ }
+ }
+}
+
+static void setup_for_execution_exceptions(__cilkrts_worker *w,
+ full_frame *ff)
+{
+ CILK_ASSERT(NULL == w->l->pending_exception);
+ w->l->pending_exception = ff->pending_exception;
+ ff->pending_exception = NULL;
+}
+
+#if 0 /* unused */
+static void setup_for_execution_stack(__cilkrts_worker *w,
+ full_frame *ff)
+{
+}
+#endif
+
+/*
+ * setup_for_execution_pedigree
+ *
+ * Copies the pedigree information from the frame we're resuming to the
+ * worker. Increments the pedigree if this is work that has been stolen
+ * to match the increment on a return from a spawn helper.
+ */
+static void setup_for_execution_pedigree(__cilkrts_worker *w)
+{
+ int pedigree_unsynched;
+ __cilkrts_stack_frame *sf = w->current_stack_frame;
+
+ CILK_ASSERT(NULL != sf);
+
+ // If this isn't an ABI 1 or later frame, there's no pedigree information
+ if (0 == CILK_FRAME_VERSION_VALUE(sf->flags))
+ return;
+
+ // Note whether the pedigree is unsynched and clear the flag before
+ // we forget
+ pedigree_unsynched = sf->flags & CILK_FRAME_SF_PEDIGREE_UNSYNCHED;
+ sf->flags &= ~CILK_FRAME_SF_PEDIGREE_UNSYNCHED;
+
+ // If we're just marshalling onto this worker, do not increment
+ // the rank since that wouldn't happen in a sequential execution
+ if (w->l->work_stolen || pedigree_unsynched)
+ {
+ if (w->l->work_stolen)
+ w->pedigree.rank = sf->parent_pedigree.rank + 1;
+ else
+ w->pedigree.rank = sf->parent_pedigree.rank;
+ }
+
+ w->pedigree.parent = sf->parent_pedigree.parent;
+ w->l->work_stolen = 0;
+}
+
+static void setup_for_execution(__cilkrts_worker *w,
+ full_frame *ff,
+ int is_return_from_call)
+{
+ // ASSERT: We own w->lock and ff->lock || P == 1
+
+ setup_for_execution_reducers(w, ff);
+ setup_for_execution_exceptions(w, ff);
+ /*setup_for_execution_stack(w, ff);*/
+
+ ff->call_stack->worker = w;
+ w->current_stack_frame = ff->call_stack;
+
+ // If this is a return from a call, leave the pedigree alone
+ if (! is_return_from_call)
+ setup_for_execution_pedigree(w);
+
+ __cilkrts_setup_for_execution_sysdep(w, ff);
+
+ w->head = w->tail = w->l->ltq;
+ reset_THE_exception(w);
+
+ make_runnable(w, ff);
+}
+
+
+/*
+ * Called by the scheduling fiber, right before
+ * resuming a sf/ff for user code.
+ *
+ * This method associates the specified sf with the worker.
+ *
+ * It also asserts that w, ff, and sf all have the expected properties
+ * for resuming user code.
+ */
+void scheduling_fiber_prepare_to_resume_user_code(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf)
+{
+ w->current_stack_frame = sf;
+ sf->worker = w;
+
+ // Lots of debugging checks on the state of the fiber we might be
+ // resuming.
+#if FIBER_DEBUG >= 1
+# if FIBER_DEBUG >= 3
+ {
+ fprintf(stderr, "w=%d: ff=%p, sf=%p. about to resume user code\n",
+ w->self, ff, sf);
+ }
+# endif
+
+ const int flags = sf->flags;
+ CILK_ASSERT(flags & CILK_FRAME_SUSPENDED);
+ CILK_ASSERT(!sf->call_parent);
+ CILK_ASSERT(w->head == w->tail);
+
+ /* A frame can not be resumed unless it was suspended. */
+ CILK_ASSERT(ff->sync_sp != NULL);
+
+ /* The leftmost frame has no allocated stack */
+ if (ff->simulated_stolen)
+ CILK_ASSERT(flags & CILK_FRAME_UNSYNCHED);
+ else if (flags & CILK_FRAME_UNSYNCHED)
+ /* XXX By coincidence sync_sp could be null. */
+ CILK_ASSERT(ff->fiber_self != NULL);
+ else
+ /* XXX This frame could be resumed unsynched on the leftmost stack */
+ CILK_ASSERT((ff->sync_master == 0 || ff->sync_master == w));
+ CILK_ASSERT(w->l->frame_ff == ff);
+#endif
+}
+
+
+/**
+ * This method is the first method that should execute after we've
+ * switched to a scheduling fiber from user code.
+ *
+ * @param fiber The scheduling fiber for the current worker.
+ * @param wptr The current worker.
+ */
+static void enter_runtime_transition_proc(cilk_fiber *fiber)
+{
+ // We can execute this method for one of three reasons:
+ // 1. Undo-detach finds parent stolen.
+ // 2. Sync suspends frame.
+ // 3. Return from Cilk entry point.
+ //
+ //
+ // In cases 1 and 2, the frame may be truly suspended or
+ // may be immediately executed by this worker after provably_good_steal.
+ //
+ //
+ // There is a fourth case, which can, but does not need to execute
+ // this function:
+ // 4. Starting up the scheduling loop on a user or
+ // system worker. In this case, we won't have
+ // a scheduling stack function to run.
+ __cilkrts_worker* w = cilk_fiber_get_owner(fiber);
+ if (w->l->post_suspend) {
+ // Run the continuation function passed to longjmp_into_runtime
+ run_scheduling_stack_fcn(w);
+
+ // After we have jumped into the runtime and run the
+ // scheduling function, any reducer map the worker had before entering the runtime
+ // should have already been saved into the appropriate full
+ // frame.
+ CILK_ASSERT(NULL == w->reducer_map);
+
+ // There shouldn't be any uncaught exceptions.
+ //
+ // In Windows, the OS catches any exceptions not caught by the
+ // user code. Thus, we are omitting the check on Windows.
+ //
+ // On Android, calling std::uncaught_exception with the stlport
+ // library causes a seg fault. Since we're not supporting
+ // exceptions there at this point, just don't do the check
+ //
+ // TBD: Is this check also safe to do on Windows?
+ CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION();
+ }
+}
+
+
+/**
+ * Method called to jump back to executing user code.
+ *
+ * A normal return from the runtime back to resuming user code calls
+ * this method. A computation executed using force_reduce also calls
+ * this method to return to user code.
+ *
+ * This function should not contain any code that depends on a fiber.
+ * In a force-reduce case, the user worker may not have a fiber. In
+ * the force-reduce case, we call this method directly instead of
+ * calling @c user_code_resume_after_switch_into_runtime.
+ */
+static inline NORETURN
+cilkrts_resume(__cilkrts_stack_frame *sf, full_frame *ff)
+{
+ // Save the sync stack pointer, and do the bookkeeping
+ char* sync_sp = ff->sync_sp;
+ __cilkrts_take_stack(ff, sync_sp); // leaves ff->sync_sp null
+
+ sf->flags &= ~CILK_FRAME_SUSPENDED;
+ // Actually longjmp to the user code.
+ // We may have exceptions to deal with, since we are resuming
+ // a previous-suspended frame.
+ sysdep_longjmp_to_sf(sync_sp, sf, ff);
+}
+
+
+/**
+ * Called by the user-code fiber right before resuming a full frame
+ * (sf/ff).
+ *
+ * This method pulls sf/ff out of the worker, and then calls
+ * cilkrts_resume to jump to user code.
+ */
+static NORETURN
+user_code_resume_after_switch_into_runtime(cilk_fiber *fiber)
+{
+ __cilkrts_worker *w = cilk_fiber_get_owner(fiber);
+ __cilkrts_stack_frame *sf;
+ full_frame *ff;
+ sf = w->current_stack_frame;
+ ff = sf->worker->l->frame_ff;
+
+#if FIBER_DEBUG >= 1
+ CILK_ASSERT(ff->fiber_self == fiber);
+ cilk_fiber_data *fdata = cilk_fiber_get_data(fiber);
+ DBGPRINTF ("%d-%p: resume_after_switch_into_runtime, fiber=%p\n",
+ w->self, w, fiber);
+ CILK_ASSERT(sf == fdata->resume_sf);
+#endif
+
+ // Notify the Intel tools that we're stealing code
+ ITT_SYNC_ACQUIRED(sf->worker);
+ NOTIFY_ZC_INTRINSIC("cilk_continue", sf);
+ cilk_fiber_invoke_tbb_stack_op(fiber, CILK_TBB_STACK_ADOPT);
+
+ // Actually jump to user code.
+ cilkrts_resume(sf, ff);
+ }
+
+
+/* The current stack is about to either be suspended or destroyed. This
+ * function will switch to the stack on which the scheduler is suspended and
+ * resume running the scheduler within function do_work(). Upon waking up,
+ * the scheduler will run the 'cont' function, using the supplied worker and
+ * frame.
+ */
+static NORETURN
+longjmp_into_runtime(__cilkrts_worker *w,
+ scheduling_stack_fcn_t fcn,
+ __cilkrts_stack_frame *sf)
+{
+ full_frame *ff, *ff2;
+
+ CILK_ASSERT(!w->l->post_suspend);
+ ff = w->l->frame_ff;
+
+ // If we've got only one worker, stealing shouldn't be possible.
+ // Assume that this is a steal or return from spawn in a force-reduce case.
+ // We don't have a scheduling stack to switch to, so call the continuation
+ // function directly.
+ if (1 == w->g->P) {
+ fcn(w, ff, sf);
+
+ /* The call to function c() will have pushed ff as the next frame. If
+ * this were a normal (non-forced-reduce) execution, there would have
+ * been a pop_next_frame call in a separate part of the runtime. We
+ * must call pop_next_frame here to complete the push/pop cycle. */
+ ff2 = pop_next_frame(w);
+
+ setup_for_execution(w, ff2, 0);
+ scheduling_fiber_prepare_to_resume_user_code(w, ff2, w->current_stack_frame);
+ cilkrts_resume(w->current_stack_frame, ff2);
+
+// Suppress clang warning that the expression result is unused
+#if defined(__clang__) && (! defined(__INTEL_COMPILER))
+# pragma clang diagnostic push
+# pragma clang diagnostic ignored "-Wunused-value"
+#endif // __clang__
+ /* no return */
+ CILK_ASSERT(((void)"returned from __cilkrts_resume", 0));
+#if defined(__clang__) && (! defined(__INTEL_COMPILER))
+# pragma clang diagnostic pop
+#endif // __clang__
+ }
+
+ w->l->post_suspend = fcn;
+ w->l->suspended_stack = sf;
+
+ ITT_SYNC_RELEASING(w);
+ ITT_SYNC_PREPARE(w);
+
+#if FIBER_DEBUG >= 2
+ fprintf(stderr, "ThreadId=%p, W=%d: about to switch into runtime... w->l->frame_ff = %p, sf=%p\n",
+ cilkos_get_current_thread_id(),
+ w->self, w->l->frame_ff,
+ sf);
+#endif
+
+ // Current fiber is either the (1) one we are about to free,
+ // or (2) it has been passed up to the parent.
+ cilk_fiber *current_fiber = ( w->l->fiber_to_free ?
+ w->l->fiber_to_free :
+ w->l->frame_ff->parent->fiber_child );
+ cilk_fiber_data* fdata = cilk_fiber_get_data(current_fiber);
+ CILK_ASSERT(NULL == w->l->frame_ff->fiber_self);
+
+ // Clear the sf in the current fiber for cleanliness, to prevent
+ // us from accidentally resuming a bad sf.
+ // Technically, resume_sf gets overwritten for a fiber when
+ // we are about to resume it anyway.
+ fdata->resume_sf = NULL;
+ CILK_ASSERT(fdata->owner == w);
+
+ // Set the function to execute immediately after switching to the
+ // scheduling fiber, but before freeing any fibers.
+ cilk_fiber_set_post_switch_proc(w->l->scheduling_fiber,
+ enter_runtime_transition_proc);
+ cilk_fiber_invoke_tbb_stack_op(current_fiber, CILK_TBB_STACK_ORPHAN);
+
+ if (w->l->fiber_to_free) {
+ // Case 1: we are freeing this fiber. We never
+ // resume this fiber again after jumping into the runtime.
+ w->l->fiber_to_free = NULL;
+
+ // Extra check. Normally, the fiber we are about to switch to
+ // should have a NULL owner.
+ CILK_ASSERT(NULL == cilk_fiber_get_data(w->l->scheduling_fiber)->owner);
+#if FIBER_DEBUG >= 4
+ fprintf(stderr, "ThreadId=%p, W=%d: about to switch into runtime.. current_fiber = %p, deallcoate, switch to fiber %p\n",
+ cilkos_get_current_thread_id(),
+ w->self,
+ current_fiber, w->l->scheduling_fiber);
+#endif
+ cilk_fiber_invoke_tbb_stack_op(current_fiber, CILK_TBB_STACK_RELEASE);
+ NOTE_INTERVAL(w, INTERVAL_DEALLOCATE_RESUME_OTHER);
+ cilk_fiber_remove_reference_from_self_and_resume_other(current_fiber,
+ &w->l->fiber_pool,
+ w->l->scheduling_fiber);
+ // We should never come back here!
+ CILK_ASSERT(0);
+ }
+ else {
+ // Case 2: We are passing the fiber to our parent because we
+ // are leftmost. We should come back later to
+ // resume execution of user code.
+ //
+ // If we are not freeing a fiber, there we must be
+ // returning from a spawn or processing an exception. The
+ // "sync" path always frees a fiber.
+ //
+ // We must be the leftmost child, and by left holder logic, we
+ // have already moved the current fiber into our parent full
+ // frame.
+#if FIBER_DEBUG >= 2
+ fprintf(stderr, "ThreadId=%p, W=%d: about to suspend self into runtime.. current_fiber = %p, deallcoate, switch to fiber %p\n",
+ cilkos_get_current_thread_id(),
+ w->self,
+ current_fiber, w->l->scheduling_fiber);
+#endif
+
+ NOTE_INTERVAL(w, INTERVAL_SUSPEND_RESUME_OTHER);
+
+ cilk_fiber_suspend_self_and_resume_other(current_fiber,
+ w->l->scheduling_fiber);
+ // Resuming this fiber returns control back to
+ // this function because our implementation uses OS fibers.
+ //
+ // On Unix, we could have the choice of passing the
+ // user_code_resume_after_switch_into_runtime as an extra "resume_proc"
+ // that resumes execution of user code instead of the
+ // jumping back here, and then jumping back to user code.
+#if FIBER_DEBUG >= 2
+ CILK_ASSERT(fdata->owner == __cilkrts_get_tls_worker());
+#endif
+ user_code_resume_after_switch_into_runtime(current_fiber);
+ }
+}
+
+/*
+ * Send a message to the children of the specified worker: run or wait.
+ */
+static void notify_children(__cilkrts_worker *w, unsigned int msg)
+{
+ int child_num;
+ __cilkrts_worker *child;
+ int num_sys_workers = w->g->P - 1;
+
+ // If worker is "n", then its children are 2n + 1, and 2n + 2.
+ child_num = (w->self << 1) + 1;
+ if (child_num < num_sys_workers) {
+ child = w->g->workers[child_num];
+ CILK_ASSERT(child->l->signal_node);
+ signal_node_msg(child->l->signal_node, msg);
+ child_num++;
+ if (child_num < num_sys_workers) {
+ child = w->g->workers[child_num];
+ CILK_ASSERT(child->l->signal_node);
+ signal_node_msg(child->l->signal_node, msg);
+ }
+ }
+}
+
+/*
+ * Notify this worker's children that they need to wait.
+ */
+static void notify_children_wait(__cilkrts_worker *w)
+{
+ notify_children(w, 0);
+}
+
+/*
+ * Notify this worker's children to run and start trying to steal.
+ */
+static void notify_children_run(__cilkrts_worker *w)
+{
+ notify_children(w, 1);
+}
+
+/**
+ * A single "check" to find work, either on our queue or through a
+ * steal attempt. This method checks our local queue once, and
+ * performs one steal attempt.
+ */
+static full_frame* check_for_work(__cilkrts_worker *w)
+{
+ full_frame *ff = NULL;
+ ff = pop_next_frame(w);
+ // If there is no work on the queue, try to steal some.
+ if (NULL == ff) {
+ START_INTERVAL(w, INTERVAL_STEALING) {
+ if (w->l->type != WORKER_USER && w->l->team != NULL) {
+ // At this point, the worker knows for certain that it has run
+ // out of work. Therefore, it loses its team affiliation. User
+ // workers never change teams, of course.
+ __cilkrts_worker_lock(w);
+ w->l->team = NULL;
+ __cilkrts_worker_unlock(w);
+ }
+
+ // If we are about to do a random steal, we should have no
+ // full frame...
+ CILK_ASSERT(NULL == w->l->frame_ff);
+ random_steal(w);
+ } STOP_INTERVAL(w, INTERVAL_STEALING);
+
+ // If the steal was successful, then the worker has populated its next
+ // frame with the work to resume.
+ ff = pop_next_frame(w);
+ if (NULL == ff) {
+ // Punish the worker for failing to steal.
+ // No quantum for you!
+ __cilkrts_yield();
+ w->l->steal_failure_count++;
+ } else {
+ // Reset steal_failure_count since there is obviously still work to
+ // be done.
+ w->l->steal_failure_count = 0;
+ }
+ }
+ return ff;
+}
+
+/**
+ * Keep stealing or looking on our queue.
+ *
+ * Returns either when a full frame is found, or NULL if the
+ * computation is done.
+ */
+static full_frame* search_until_work_found_or_done(__cilkrts_worker *w)
+{
+ full_frame *ff = NULL;
+ // Find a full frame to execute (either through random stealing,
+ // or because we pull it off w's 1-element queue).
+ while (!ff) {
+ // Check worker state to figure out our next action.
+ switch (worker_runnable(w))
+ {
+ case SCHEDULE_RUN: // One attempt at checking for work.
+ ff = check_for_work(w);
+ break;
+ case SCHEDULE_WAIT: // go into wait-mode.
+ CILK_ASSERT(WORKER_SYSTEM == w->l->type);
+ // If we are about to wait, then we better not have
+ // a frame that we should execute...
+ CILK_ASSERT(NULL == w->l->next_frame_ff);
+ notify_children_wait(w);
+ signal_node_wait(w->l->signal_node);
+ // ...
+ // Runtime is waking up.
+ notify_children_run(w);
+ w->l->steal_failure_count = 0;
+ break;
+ case SCHEDULE_EXIT: // exit the scheduler.
+ CILK_ASSERT(WORKER_USER != w->l->type);
+ return NULL;
+ default:
+ CILK_ASSERT(0);
+ abort();
+ }
+ }
+ return ff;
+}
+
+/**
+ * The proc method for a scheduling fiber on a user worker.
+ *
+ * When a user worker jumps into the runtime, it jumps into this
+ * method by either starting it if the scheduling fiber has never run
+ * before, or resuming the fiber if it was previously suspended.
+ */
+COMMON_PORTABLE
+void scheduler_fiber_proc_for_user_worker(cilk_fiber *fiber)
+{
+ __cilkrts_worker* w = cilk_fiber_get_owner(fiber);
+ CILK_ASSERT(w);
+
+ // This must be a user worker
+ CILK_ASSERT(WORKER_USER == w->l->type);
+
+ // If we aren't the current worker, then something is very wrong
+ // here..
+ verify_current_wkr(w);
+
+ __cilkrts_run_scheduler_with_exceptions(w);
+}
+
+
+/**
+ * The body of the runtime scheduling loop. This function executes in
+ * 4 stages:
+ *
+ * 1. Transitions from the user code into the runtime by
+ * executing any scheduling-stack functions.
+ * 2. Looks for a full frame enqueued from a successful provably
+ * good steal.
+ * 3. If no full frame is found in step 2, steal until
+ * a frame is found or we are done. If we are done, finish
+ * the scheduling loop.
+ * 4. When a frame is found, setup to resume user code.
+ * In particular, suspend the current fiber and resume the
+ * user fiber to execute the frame.
+ *
+ * Returns a fiber object that we should switch to after completing
+ * the body of the loop, or NULL if we should continue executing on
+ * this fiber.
+ *
+ * @pre @c current_fiber should equal @c wptr->l->scheduling_fiber
+ *
+ * @param current_fiber The currently executing (scheduling_ fiber
+ * @param wptr The currently executing worker.
+ * @param return The next fiber we should switch to.
+ */
+static cilk_fiber* worker_scheduling_loop_body(cilk_fiber* current_fiber,
+ void* wptr)
+{
+ __cilkrts_worker *w = (__cilkrts_worker*) wptr;
+ CILK_ASSERT(current_fiber == w->l->scheduling_fiber);
+
+ // Stage 1: Transition from executing user code to the runtime code.
+ // We don't need to do this call here any more, because
+ // every switch to the scheduling fiber should make this call
+ // using a post_switch_proc on the fiber.
+ //
+ // enter_runtime_transition_proc(w->l->scheduling_fiber, wptr);
+
+ // After Stage 1 is complete, w should no longer have
+ // an associated full frame.
+ CILK_ASSERT(NULL == w->l->frame_ff);
+
+ // Stage 2. First do a quick check of our 1-element queue.
+ full_frame *ff = pop_next_frame(w);
+
+ if (!ff) {
+ // Stage 3. We didn't find anything from our 1-element
+ // queue. Now go through the steal loop to find work.
+ ff = search_until_work_found_or_done(w);
+ if (!ff) {
+ CILK_ASSERT(w->g->work_done);
+ return NULL;
+ }
+ }
+
+ // Stage 4. Now that we have found a full frame to work on,
+ // actually execute it.
+ __cilkrts_stack_frame *sf;
+
+ // There shouldn't be any uncaught exceptions.
+ //
+ // In Windows, the OS catches any exceptions not caught by the
+ // user code. Thus, we are omitting the check on Windows.
+ //
+ // On Android, calling std::uncaught_exception with the stlport
+ // library causes a seg fault. Since we're not supporting
+ // exceptions there at this point, just don't do the check
+ CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION();
+
+ BEGIN_WITH_WORKER_LOCK(w) {
+ CILK_ASSERT(!w->l->frame_ff);
+ BEGIN_WITH_FRAME_LOCK(w, ff) {
+ sf = ff->call_stack;
+ CILK_ASSERT(sf && !sf->call_parent);
+ setup_for_execution(w, ff, 0);
+ } END_WITH_FRAME_LOCK(w, ff);
+ } END_WITH_WORKER_LOCK(w);
+
+ /* run it */
+ //
+ // Prepare to run the full frame. To do so, we need to:
+ // (a) Execute some code on this fiber (the scheduling
+ // fiber) to set up data structures, and
+ // (b) Suspend the scheduling fiber, and resume the
+ // user-code fiber.
+
+ // Part (a). Set up data structures.
+ scheduling_fiber_prepare_to_resume_user_code(w, ff, sf);
+
+ cilk_fiber *other = w->l->frame_ff->fiber_self;
+ cilk_fiber_data* other_data = cilk_fiber_get_data(other);
+ cilk_fiber_data* current_fiber_data = cilk_fiber_get_data(current_fiber);
+
+ // I believe two cases are possible here, both of which
+ // should have other_data->resume_sf as NULL.
+ //
+ // 1. Resuming a fiber that was previously executing
+ // user code (i.e., a provably-good-steal).
+ // In this case, resume_sf should have been
+ // set to NULL when it was suspended.
+ //
+ // 2. Resuming code on a steal. In this case, since we
+ // grabbed a new fiber, resume_sf should be NULL.
+ CILK_ASSERT(NULL == other_data->resume_sf);
+
+#if FIBER_DEBUG >= 2
+ fprintf(stderr, "W=%d: other fiber=%p, setting resume_sf to %p\n",
+ w->self, other, other_data->resume_sf);
+#endif
+ // Update our own fiber's data.
+ current_fiber_data->resume_sf = NULL;
+ // The scheduling fiber should have the right owner from before.
+ CILK_ASSERT(current_fiber_data->owner == w);
+ other_data->resume_sf = sf;
+
+
+#if FIBER_DEBUG >= 3
+ fprintf(stderr, "ThreadId=%p (about to suspend self resume other), W=%d: current_fiber=%p, other=%p, current_fiber->resume_sf = %p, other->resume_sf = %p\n",
+ cilkos_get_current_thread_id(),
+ w->self,
+ current_fiber, other,
+ current_fiber_data->resume_sf,
+ other_data->resume_sf);
+#endif
+ return other;
+}
+
+
+/**
+ * This function is executed once by each worker, to initialize its
+ * scheduling loop.
+ */
+static void worker_scheduler_init_function(__cilkrts_worker *w)
+{
+ // First, execute the startup tasks that must happen for all
+ // worker types.
+ ITT_SYNC_PREPARE(w);
+ /* Notify tools about the new worker. Inspector needs this, but we
+ don't want to confuse Cilkscreen with system threads. User threads
+ do this notification in bind_thread */
+ if (! w->g->under_ptool)
+ __cilkrts_cilkscreen_establish_worker(w);
+
+ // Seed the initial random number generator.
+ // If we forget to do this, then the worker always steals from 0.
+ // Programs will still execute correctly, but
+ // you may see a subtle performance bug...
+ mysrand(w, (w->self + 1));
+
+ // The startup work varies, depending on the worker type.
+ switch (w->l->type) {
+ case WORKER_USER:
+ // Stop working once we've entered the scheduler.
+ // For user workers, INTERVAL_IN_SCHEDULER counts the time
+ // since we called bind_thread.
+ break;
+
+ case WORKER_SYSTEM:
+ // If a system worker is starting, we must also be starting
+ // the runtime.
+
+ // Runtime begins in a wait-state and is woken up by the first user
+ // worker when the runtime is ready.
+ signal_node_wait(w->l->signal_node);
+ // ...
+ // Runtime is waking up.
+ notify_children_run(w);
+ w->l->steal_failure_count = 0;
+
+ // For system threads, count all the time this thread is
+ // alive in the scheduling loop.
+ START_INTERVAL(w, INTERVAL_IN_SCHEDULER);
+ START_INTERVAL(w, INTERVAL_WORKING);
+ break;
+ default:
+ __cilkrts_bug("Unknown worker %p of type %d entering scheduling loop\n",
+ w, w->l->type);
+ }
+}
+
+/**
+ * This function is executed once by each worker, to finish its
+ * scheduling loop.
+ *
+ * @note Currently, only system workers finish their loops. User
+ * workers will jump away to user code without exiting their
+ * scheduling loop.
+ */
+static void worker_scheduler_terminate_function(__cilkrts_worker *w)
+{
+ // A user worker should never finish by falling through the
+ // scheduling loop.
+ CILK_ASSERT(WORKER_USER != w->l->type);
+ STOP_INTERVAL(w, INTERVAL_IN_RUNTIME);
+ STOP_INTERVAL(w, INTERVAL_IN_SCHEDULER);
+}
+
+/**
+ * The main scheduler function executed by a worker's scheduling
+ * fiber.
+ *
+ * This method is started by either a new system worker, or a user
+ * worker that has stalled and just been imported into the runtime.
+ */
+static void worker_scheduler_function(__cilkrts_worker *w)
+{
+ worker_scheduler_init_function(w);
+
+ // The main scheduling loop body.
+
+ while (!w->g->work_done) {
+ // Set intervals. Now we are in the runtime instead of working.
+ START_INTERVAL(w, INTERVAL_IN_RUNTIME);
+ STOP_INTERVAL(w, INTERVAL_WORKING);
+
+ // Execute the "body" of the scheduling loop, and figure
+ // out the fiber to jump to next.
+ cilk_fiber* fiber_to_resume
+ = worker_scheduling_loop_body(w->l->scheduling_fiber, w);
+
+ if (fiber_to_resume) {
+ // Suspend the current fiber and resume next one.
+ NOTE_INTERVAL(w, INTERVAL_SUSPEND_RESUME_OTHER);
+ STOP_INTERVAL(w, INTERVAL_IN_RUNTIME);
+ START_INTERVAL(w, INTERVAL_WORKING);
+ cilk_fiber_suspend_self_and_resume_other(w->l->scheduling_fiber,
+ fiber_to_resume);
+
+ // Return here only when this (scheduling) fiber is
+ // resumed (i.e., this worker wants to reenter the runtime).
+ }
+ }
+
+ // Finish the scheduling loop.
+ worker_scheduler_terminate_function(w);
+}
+
+
+/*************************************************************
+ Forward declarations for reduction protocol.
+*************************************************************/
+
+static __cilkrts_worker*
+execute_reductions_for_sync(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf_at_sync);
+
+static __cilkrts_worker*
+execute_reductions_for_spawn_return(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *returning_sf);
+
+
+
+/*************************************************************
+ Scheduler functions that are callable by client code
+*************************************************************/
+static full_frame *disown(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf,
+ const char *why)
+{
+ CILK_ASSERT(ff);
+ make_unrunnable(w, ff, sf, sf != 0, why);
+ w->l->frame_ff = 0;
+ return ff->parent;
+}
+
+/**
+ * Called when ff is returning from a spawn, and we need to execute a
+ * reduction.
+ *
+ * @param w The currently executing worker.
+ * @param ff The full frame for w.
+ * @param returning_sf The stack frame for the spawn helper that is returning.
+ *
+ * Normally, by the time we gain control in the runtime, the worker
+ * has already popped off the __cilkrts_stack_frame "returning_sf"
+ * from its call chain.
+ *
+ * When we have only serial reductions, w->current_stack_frame is not
+ * needed any more, because w is about to enter the runtime scheduling
+ * loop anyway. Similarly, the frame "ff" is slated to be destroyed
+ * after the runtime finishes the return from spawn and splices ff out
+ * of the tree of full frames.
+ *
+ * To execute a parallel reduction, however, we still want
+ * w->current_stack_frame == returning_sf, and we are going to use the
+ * frame ff for a little bit longer.
+ *
+ * This method:
+ *
+ * 1. Puts returning_sf back as w's current stack frame.
+ * 2. Makes "ff" runnable again on w.
+ */
+static inline
+void restore_frame_for_spawn_return_reduction(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *returning_sf) {
+#if REDPAR_DEBUG >= 2
+ CILK_ASSERT(returning_sf);
+ CILK_ASSERT(returning_sf->worker == w);
+#endif
+ // Change w's current stack frame back to "returning_sf".
+ //
+ // Intuitively, w->current_stack_frame should be
+ // returning_sf->call_parent at this point.
+ //
+ // We can not assert this, however, because the pop of
+ // returning_sf from the call chain has already cleared
+ // returning_sf->call_parent. We don't want to restore the call
+ // parent of returning_sf, because its parent has been stolen, and
+ // the runtime assumes that steals break this link.
+
+ // We cannot assert call_parent is NULL either, since that's not true for
+ // Win64 exception handling
+// CILK_ASSERT(returning_sf->call_parent == NULL);
+ w->current_stack_frame = returning_sf;
+
+ // Make the full frame "ff" runnable again, in preparation for
+ // executing the reduction.
+ make_runnable(w, ff);
+}
+
+
+NORETURN __cilkrts_c_sync(__cilkrts_worker *w,
+ __cilkrts_stack_frame *sf_at_sync)
+{
+ full_frame *ff;
+
+ // Claim: This read of w->l->frame_ff can occur without
+ // holding the worker lock because when w has reached a sync
+ // and entered the runtime (because it stalls), w's deque is empty
+ // and no one else can steal and change w->l->frame_ff.
+
+ ff = w->l->frame_ff;
+#ifdef _WIN32
+ __cilkrts_save_exception_state(w, ff);
+#else
+ // Move any pending exceptions into the full frame
+ CILK_ASSERT(NULL == ff->pending_exception);
+ ff->pending_exception = w->l->pending_exception;
+ w->l->pending_exception = NULL;
+#endif
+
+ w = execute_reductions_for_sync(w, ff, sf_at_sync);
+
+#if FIBER_DEBUG >= 3
+ fprintf(stderr, "ThreadId=%p, w->self = %d. about to longjmp_into_runtim[c_sync] with ff=%p\n",
+ cilkos_get_current_thread_id(), w->self, ff);
+#endif
+
+ longjmp_into_runtime(w, do_sync, sf_at_sync);
+}
+
+static void do_sync(__cilkrts_worker *w, full_frame *ff,
+ __cilkrts_stack_frame *sf)
+{
+ //int abandoned = 1;
+ enum provably_good_steal_t steal_result = ABANDON_EXECUTION;
+
+ START_INTERVAL(w, INTERVAL_SYNC_CHECK) {
+ BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) {
+
+ CILK_ASSERT(ff);
+ BEGIN_WITH_FRAME_LOCK(w, ff) {
+ CILK_ASSERT(sf->call_parent == 0);
+ CILK_ASSERT(sf->flags & CILK_FRAME_UNSYNCHED);
+
+ // Before switching into the scheduling fiber, we should have
+ // already taken care of deallocating the current
+ // fiber.
+ CILK_ASSERT(NULL == ff->fiber_self);
+
+ // Update the frame's pedigree information if this is an ABI 1
+ // or later frame
+ if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1)
+ {
+ sf->parent_pedigree.rank = w->pedigree.rank;
+ sf->parent_pedigree.parent = w->pedigree.parent;
+
+ // Note that the pedigree rank needs to be updated
+ // when setup_for_execution_pedigree runs
+ sf->flags |= CILK_FRAME_SF_PEDIGREE_UNSYNCHED;
+ }
+
+ /* the decjoin() occurs in provably_good_steal() */
+ steal_result = provably_good_steal(w, ff);
+
+ } END_WITH_FRAME_LOCK(w, ff);
+ // set w->l->frame_ff = NULL after checking abandoned
+ if (WAIT_FOR_CONTINUE != steal_result) {
+ w->l->frame_ff = NULL;
+ }
+ } END_WITH_WORKER_LOCK_OPTIONAL(w);
+ } STOP_INTERVAL(w, INTERVAL_SYNC_CHECK);
+
+ // Now, if we are in a replay situation and provably_good_steal() returned
+ // WAIT_FOR_CONTINUE, we should sleep, reacquire locks, call
+ // provably_good_steal(), and release locks until we get a value other
+ // than WAIT_FOR_CONTINUE from the function.
+#ifdef CILK_RECORD_REPLAY
+ // We don't have to explicitly check for REPLAY_LOG below because
+ // steal_result can only be set to WAIT_FOR_CONTINUE during replay
+ while(WAIT_FOR_CONTINUE == steal_result)
+ {
+ __cilkrts_sleep();
+ BEGIN_WITH_WORKER_LOCK_OPTIONAL(w)
+ {
+ ff = w->l->frame_ff;
+ BEGIN_WITH_FRAME_LOCK(w, ff)
+ {
+ steal_result = provably_good_steal(w, ff);
+ } END_WITH_FRAME_LOCK(w, ff);
+ if (WAIT_FOR_CONTINUE != steal_result)
+ w->l->frame_ff = NULL;
+ } END_WITH_WORKER_LOCK_OPTIONAL(w);
+ }
+#endif // CILK_RECORD_REPLAY
+
+#ifdef ENABLE_NOTIFY_ZC_INTRINSIC
+ // If we can't make any further progress on this thread, tell Inspector
+ // that we're abandoning the work and will go find something else to do.
+ if (ABANDON_EXECUTION == steal_result)
+ {
+ NOTIFY_ZC_INTRINSIC("cilk_sync_abandon", 0);
+ }
+#endif // defined ENABLE_NOTIFY_ZC_INTRINSIC
+
+ return; /* back to scheduler loop */
+}
+
+/* worker W completely promotes its own deque, simulating the case
+ where the whole deque is stolen. We use this mechanism to force
+ the allocation of new storage for reducers for race-detection
+ purposes. */
+void __cilkrts_promote_own_deque(__cilkrts_worker *w)
+{
+ // Remember the fiber we start this method on.
+ CILK_ASSERT(w->l->frame_ff);
+ cilk_fiber* starting_fiber = w->l->frame_ff->fiber_self;
+
+ BEGIN_WITH_WORKER_LOCK(w) {
+ while (dekker_protocol(w)) {
+ /* PLACEHOLDER_FIBER is used as non-null marker to tell detach()
+ and make_child() that this frame should be treated as a spawn
+ parent, even though we have not assigned it a stack. */
+ detach_for_steal(w, w, PLACEHOLDER_FIBER);
+ }
+ } END_WITH_WORKER_LOCK(w);
+
+
+ // TBD: The management of full frames and fibers is a bit
+ // sketchy here. We are promoting stack frames into full frames,
+ // and pretending they are stolen away, but no other worker is
+ // actually working on them. Some runtime invariants
+ // may be broken here.
+ //
+ // Technically, if we are simulating a steal from w
+ // w should get a new full frame, but
+ // keep the same fiber. A real thief would be taking the
+ // loot frame away, get a new fiber, and starting executing the
+ // loot frame.
+ //
+ // What should a fake thief do? Where does the frame go?
+
+ // In any case, we should be finishing the promotion process with
+ // the same fiber with.
+ CILK_ASSERT(w->l->frame_ff);
+ CILK_ASSERT(w->l->frame_ff->fiber_self == starting_fiber);
+}
+
+
+
+/* the client code calls this function after a spawn when the dekker
+ protocol fails. The function may either return or longjmp
+ into the rts
+
+ This function takes in a "returning_sf" argument which corresponds
+ to the __cilkrts_stack_frame that we are finishing (i.e., the
+ argument to __cilkrts_leave_frame).
+ */
+void __cilkrts_c_THE_exception_check(__cilkrts_worker *w,
+ __cilkrts_stack_frame *returning_sf)
+{
+ full_frame *ff;
+ int stolen_p;
+ __cilkrts_stack_frame *saved_sf = NULL;
+
+ START_INTERVAL(w, INTERVAL_THE_EXCEPTION_CHECK);
+
+ BEGIN_WITH_WORKER_LOCK(w) {
+ ff = w->l->frame_ff;
+ CILK_ASSERT(ff);
+ /* This code is called only upon a normal return and never
+ upon an exceptional return. Assert that this is the
+ case. */
+ CILK_ASSERT(!w->l->pending_exception);
+
+ reset_THE_exception(w);
+ stolen_p = !(w->head < (w->tail + 1)); /* +1 because tail was
+ speculatively
+ decremented by the
+ compiled code */
+
+ if (stolen_p) {
+ /* XXX This will be charged to THE for accounting purposes */
+ __cilkrts_save_exception_state(w, ff);
+
+ // Save the value of the current stack frame.
+ saved_sf = w->current_stack_frame;
+
+ // Reverse the decrement from undo_detach.
+ // This update effectively resets the deque to be
+ // empty (i.e., changes w->tail back to equal w->head).
+ // We need to reset the deque to execute parallel
+ // reductions. When we have only serial reductions, it
+ // does not matter, since serial reductions do not
+ // change the deque.
+ w->tail++;
+#if REDPAR_DEBUG > 1
+ // ASSERT our deque is empty.
+ CILK_ASSERT(w->head == w->tail);
+#endif
+ }
+ } END_WITH_WORKER_LOCK(w);
+
+ STOP_INTERVAL(w, INTERVAL_THE_EXCEPTION_CHECK);
+
+ if (stolen_p)
+ {
+ w = execute_reductions_for_spawn_return(w, ff, returning_sf);
+
+ // "Mr. Policeman? My parent always told me that if I was in trouble
+ // I should ask a nice policeman for help. I can't find my parent
+ // anywhere..."
+ //
+ // Write a record to the replay log for an attempt to return to a stolen parent
+ replay_record_orphaned(w);
+
+ // Update the pedigree only after we've finished the
+ // reductions.
+ update_pedigree_on_leave_frame(w, returning_sf);
+
+ // Notify Inspector that the parent has been stolen and we're
+ // going to abandon this work and go do something else. This
+ // will match the cilk_leave_begin in the compiled code
+ NOTIFY_ZC_INTRINSIC("cilk_leave_stolen", saved_sf);
+
+ DBGPRINTF ("%d: longjmp_into_runtime from __cilkrts_c_THE_exception_check\n", w->self);
+ longjmp_into_runtime(w, do_return_from_spawn, 0);
+ DBGPRINTF ("%d: returned from longjmp_into_runtime from __cilkrts_c_THE_exception_check?!\n", w->self);
+ }
+ else
+ {
+ NOTE_INTERVAL(w, INTERVAL_THE_EXCEPTION_CHECK_USELESS);
+ return;
+ }
+}
+
+/* Return an exception to a stolen parent. */
+NORETURN __cilkrts_exception_from_spawn(__cilkrts_worker *w,
+ __cilkrts_stack_frame *returning_sf)
+{
+ full_frame *ff = w->l->frame_ff;
+ // This is almost the same as THE_exception_check, except
+ // the detach didn't happen, we don't need to undo the tail
+ // update.
+ CILK_ASSERT(w->head == w->tail);
+ w = execute_reductions_for_spawn_return(w, ff, returning_sf);
+
+ longjmp_into_runtime(w, do_return_from_spawn, 0);
+ CILK_ASSERT(0);
+}
+
+static void do_return_from_spawn(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf)
+{
+ full_frame *parent_ff;
+ enum provably_good_steal_t steal_result = ABANDON_EXECUTION;
+
+ BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) {
+ CILK_ASSERT(ff);
+ CILK_ASSERT(!ff->is_call_child);
+ CILK_ASSERT(sf == NULL);
+ parent_ff = ff->parent;
+
+ BEGIN_WITH_FRAME_LOCK(w, ff) {
+ decjoin(ff);
+ } END_WITH_FRAME_LOCK(w, ff);
+
+ BEGIN_WITH_FRAME_LOCK(w, parent_ff) {
+ if (parent_ff->simulated_stolen)
+ unconditional_steal(w, parent_ff);
+ else
+ steal_result = provably_good_steal(w, parent_ff);
+ } END_WITH_FRAME_LOCK(w, parent_ff);
+
+ } END_WITH_WORKER_LOCK_OPTIONAL(w);
+
+ // Loop here in replay mode
+#ifdef CILK_RECORD_REPLAY
+ // We don't have to explicitly check for REPLAY_LOG below because
+ // steal_result can only get set to WAIT_FOR_CONTINUE during replay.
+ // We also don't have to worry about the simulated_stolen flag
+ // because steal_result can only be set to WAIT_FOR_CONTINUE by
+ // provably_good_steal().
+ while(WAIT_FOR_CONTINUE == steal_result)
+ {
+ __cilkrts_sleep();
+ BEGIN_WITH_WORKER_LOCK_OPTIONAL(w)
+ {
+ BEGIN_WITH_FRAME_LOCK(w, parent_ff)
+ {
+ steal_result = provably_good_steal(w, parent_ff);
+ } END_WITH_FRAME_LOCK(w, parent_ff);
+ } END_WITH_WORKER_LOCK_OPTIONAL(w);
+ }
+#endif // CILK_RECORD_REPLAY
+
+ // Cleanup the child frame.
+ __cilkrts_destroy_full_frame(w, ff);
+ return;
+}
+
+#ifdef _WIN32
+/* migrate an exception across fibers. Call this function when an exception has
+ * been thrown and has to traverse across a steal. The exception has already
+ * been wrapped up, so all that remains is to longjmp() into the continuation,
+ * sync, and re-raise it.
+ */
+void __cilkrts_migrate_exception(__cilkrts_stack_frame *sf) {
+
+ __cilkrts_worker *w = sf->worker;
+ full_frame *ff;
+
+ BEGIN_WITH_WORKER_LOCK(w) {
+ ff = w->l->frame_ff;
+ reset_THE_exception(w);
+ /* there is no need to check for a steal because we wouldn't be here if
+ there weren't a steal. */
+ __cilkrts_save_exception_state(w, ff);
+
+ CILK_ASSERT(w->head == w->tail);
+ } END_WITH_WORKER_LOCK(w);
+
+ {
+ // TBD(jsukha): This function emulates the
+ // the "do_return_from_spawn" path.
+ w = execute_reductions_for_spawn_return(w, ff, sf);
+ }
+
+ longjmp_into_runtime(w, do_return_from_spawn, 0); /* does not return. */
+ CILK_ASSERT(! "Shouldn't be here...");
+}
+#endif
+
+
+/* Pop a call stack from TAIL. Return the call stack, or NULL if the
+ queue is empty */
+__cilkrts_stack_frame *__cilkrts_pop_tail(__cilkrts_worker *w)
+{
+ __cilkrts_stack_frame *sf;
+ BEGIN_WITH_WORKER_LOCK(w) {
+ __cilkrts_stack_frame *volatile *tail = w->tail;
+ if (w->head < tail) {
+ --tail;
+ sf = *tail;
+ w->tail = tail;
+ } else {
+ sf = 0;
+ }
+ } END_WITH_WORKER_LOCK(w);
+ return sf;
+}
+
+#ifdef CILK_RECORD_REPLAY
+__cilkrts_stack_frame *simulate_pop_tail(__cilkrts_worker *w)
+{
+ __cilkrts_stack_frame *sf;
+ BEGIN_WITH_WORKER_LOCK(w) {
+ if (w->head < w->tail) {
+ sf = *(w->tail-1);
+ } else {
+ sf = 0;
+ }
+ } END_WITH_WORKER_LOCK(w);
+ return sf;
+}
+#endif
+
+
+/* Return from a call, not a spawn. */
+void __cilkrts_return(__cilkrts_worker *w)
+{
+ full_frame *ff, *parent_ff;
+ START_INTERVAL(w, INTERVAL_RETURNING);
+
+ BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) {
+ ff = w->l->frame_ff;
+ CILK_ASSERT(ff);
+ CILK_ASSERT(ff->join_counter == 1);
+ /* This path is not used to return from spawn. */
+ CILK_ASSERT(ff->is_call_child);
+
+ BEGIN_WITH_FRAME_LOCK(w, ff) {
+ // After this call, w->l->frame_ff != ff.
+ // Technically, w will "own" ff until ff is freed,
+ // however, because ff is a dying leaf full frame.
+ parent_ff = disown(w, ff, 0, "return");
+ decjoin(ff);
+
+#ifdef _WIN32
+ __cilkrts_save_exception_state(w, ff);
+#else
+ // Move the pending exceptions into the full frame
+ // This should always be NULL if this isn't a
+ // return with an exception
+ CILK_ASSERT(NULL == ff->pending_exception);
+ ff->pending_exception = w->l->pending_exception;
+ w->l->pending_exception = NULL;
+#endif // _WIN32
+
+ } END_WITH_FRAME_LOCK(w, ff);
+
+ __cilkrts_fence(); /* redundant */
+
+ CILK_ASSERT(parent_ff);
+
+ BEGIN_WITH_FRAME_LOCK(w, parent_ff) {
+ finalize_child_for_call(w, parent_ff, ff);
+ } END_WITH_FRAME_LOCK(w, parent_ff);
+
+ ff = pop_next_frame(w);
+ /* ff will be non-null except when the parent frame is owned
+ by another worker.
+ CILK_ASSERT(ff)
+ */
+ CILK_ASSERT(!w->l->frame_ff);
+ if (ff) {
+ BEGIN_WITH_FRAME_LOCK(w, ff) {
+ __cilkrts_stack_frame *sf = ff->call_stack;
+ CILK_ASSERT(sf && !sf->call_parent);
+ setup_for_execution(w, ff, 1);
+ } END_WITH_FRAME_LOCK(w, ff);
+ }
+ } END_WITH_WORKER_LOCK_OPTIONAL(w);
+
+ STOP_INTERVAL(w, INTERVAL_RETURNING);
+}
+
+static void __cilkrts_unbind_thread()
+{
+ int stop_cilkscreen = 0;
+ global_state_t *g;
+
+ // Take out the global OS mutex to protect accesses to the table of workers
+ global_os_mutex_lock();
+
+ if (cilkg_is_published()) {
+ __cilkrts_worker *w = __cilkrts_get_tls_worker();
+ if (w) {
+ g = w->g;
+
+ // If there's only 1 worker, the counts will be stopped in
+ // __cilkrts_scheduler
+ if (g->P > 1)
+ {
+ STOP_INTERVAL(w, INTERVAL_WORKING);
+ STOP_INTERVAL(w, INTERVAL_IN_SCHEDULER);
+ }
+
+ __cilkrts_set_tls_worker(0);
+
+ if (w->self == -1) {
+ // This worker is an overflow worker. I.e., it was created on-
+ // demand when the global pool ran out of workers.
+ destroy_worker(w);
+ __cilkrts_free(w);
+ } else {
+ // This is a normal user worker and needs to be counted by the
+ // global state for the purposes of throttling system workers.
+ w->l->type = WORKER_FREE;
+ __cilkrts_leave_cilk(g);
+ }
+
+ stop_cilkscreen = (0 == g->Q);
+ }
+ }
+ global_os_mutex_unlock();
+
+ /* Turn off Cilkscreen. This needs to be done when we are NOT holding the
+ * os mutex. */
+ if (stop_cilkscreen)
+ __cilkrts_cilkscreen_disable_instrumentation();
+}
+
+/* special return from the initial frame */
+
+void __cilkrts_c_return_from_initial(__cilkrts_worker *w)
+{
+ struct cilkred_map *rm;
+
+ /* This is only called on a user thread worker. */
+ CILK_ASSERT(w->l->type == WORKER_USER);
+
+ #if REDPAR_DEBUG >= 3
+ fprintf(stderr, "[W=%d, desc=cilkrts_c_return_from_initial, ff=%p]\n",
+ w->self, w->l->frame_ff);
+ #endif
+
+ BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) {
+ full_frame *ff = w->l->frame_ff;
+ CILK_ASSERT(ff);
+ CILK_ASSERT(ff->join_counter == 1);
+ w->l->frame_ff = 0;
+
+ CILK_ASSERT(ff->fiber_self);
+ // Save any TBB interop data for the next time this thread enters Cilk
+ cilk_fiber_tbb_interop_save_info_from_stack(ff->fiber_self);
+
+ // Deallocate cilk_fiber that mapped to the user stack. The stack
+ // itself does not get deallocated (of course) but our data
+ // structure becomes divorced from it.
+
+#if FIBER_DEBUG >= 1
+ fprintf(stderr, "ThreadId=%p: w=%d: We are about to deallocate ff->fiber_self = %p here. w->l->scheduling_fiber = %p. w->l->type = %d\n",
+ cilkos_get_current_thread_id(),
+ w->self,
+ ff->fiber_self,
+ w->l->scheduling_fiber,
+ w->l->type);
+#endif
+ // The fiber in ff is a user-code fiber. The fiber in
+ // w->l->scheduling_fiber is a scheduling fiber. These fibers should
+ // never be equal. When a user worker returns (and will unbind), we
+ // should destroy only the fiber in ff. The scheduling fiber will be
+ // re-used.
+
+ CILK_ASSERT(ff->fiber_self != w->l->scheduling_fiber);
+
+ START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE) {
+ // This fiber might not be deallocated here if there
+ // is a pending exception on Windows that refers
+ // to this fiber.
+ //
+ // First "suspend" the fiber, and then try to delete it.
+ cilk_fiber_deallocate_from_thread(ff->fiber_self);
+ } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE);
+ ff->fiber_self = NULL;
+
+ /* Save reducer map into global_state object */
+ rm = w->reducer_map;
+ w->reducer_map = NULL;
+
+#if REDPAR_DEBUG >= 3
+ fprintf(stderr, "W=%d, reducer_map_to_delete=%p, was in ff=%p\n",
+ w->self,
+ rm,
+ ff);
+#endif
+ __cilkrts_destroy_full_frame(w, ff);
+
+
+ /* Work is never done. w->g->work_done = 1; __cilkrts_fence(); */
+ } END_WITH_WORKER_LOCK_OPTIONAL(w);
+
+
+ save_pedigree_leaf_from_user_worker(w);
+
+ // Workers can have NULL reducer maps now.
+ if (rm) {
+ __cilkrts_destroy_reducer_map(w, rm);
+ }
+
+
+#if FIBER_DEBUG >= 1
+ __cilkrts_worker* tmp = w;
+ int tmp_id = w->self;
+ fprintf(stderr, "w=%d: We are about unbind thread (w= %p)\n",
+ w->self,
+ w);
+#endif
+
+ w = NULL;
+
+ __cilkrts_unbind_thread();
+
+#if FIBER_DEBUG >= 1
+
+ fprintf(stderr, "w=%p, %d: Finished unbind\n",
+ tmp, tmp_id);
+#endif
+
+ /* Other workers will stop trying to steal if this was the last worker. */
+
+ return;
+}
+
+
+/*
+ * __cilkrts_restore_stealing
+ *
+ * Restore the protected_tail to a previous state, possibly allowing frames
+ * to be stolen. The dekker_protocol has been extended to steal only if
+ * head+1 is < protected_tail.
+ */
+
+void __cilkrts_restore_stealing(
+ __cilkrts_worker *w,
+ __cilkrts_stack_frame *volatile *saved_protected_tail)
+{
+ /* On most x86 this pair of operations would be slightly faster
+ as an atomic exchange due to the implicit memory barrier in
+ an atomic instruction. */
+ w->protected_tail = saved_protected_tail;
+ __cilkrts_fence();
+}
+
+/*
+ * __cilkrts_disallow_stealing
+ *
+ * Move the protected_tail to NEW_PROTECTED_TAIL, preventing any
+ * frames from being stolen. If NEW_PROTECTED_TAIL is NULL, prevent
+ * stealing from the whole queue. The dekker_protocol has been
+ * extended to only steal if head+1 is also < protected_tail.
+ */
+
+__cilkrts_stack_frame *volatile *__cilkrts_disallow_stealing(
+ __cilkrts_worker *w,
+ __cilkrts_stack_frame *volatile *new_protected_tail)
+{
+ __cilkrts_stack_frame *volatile *saved_protected_tail = w->protected_tail;
+
+ if (!new_protected_tail)
+ new_protected_tail = w->l->ltq;
+
+ if (w->protected_tail > new_protected_tail) {
+ w->protected_tail = new_protected_tail;
+ /* Issue a store-store barrier. The update to protected_tail
+ here must precede the update to tail in the next spawn.
+ On x86 this is probably not needed. */
+#if defined __GNUC__ && __ICC >= 1200 && !(__MIC__ ||__MIC2__)
+ _mm_sfence();
+#else
+ __cilkrts_fence();
+#endif
+ }
+
+ return saved_protected_tail;
+}
+
+/*************************************************************
+ Initialization and startup
+*************************************************************/
+
+__cilkrts_worker *make_worker(global_state_t *g,
+ int self, __cilkrts_worker *w)
+{
+ w->self = self;
+ w->g = g;
+
+ w->pedigree.rank = 0; // Initial rank is 0
+ w->pedigree.parent = NULL;
+
+ w->l = (local_state *)__cilkrts_malloc(sizeof(*w->l));
+
+ __cilkrts_frame_malloc_per_worker_init(w);
+
+ w->reducer_map = NULL;
+ w->current_stack_frame = NULL;
+ w->reserved = NULL;
+
+ w->l->worker_magic_0 = WORKER_MAGIC_0;
+ w->l->team = NULL;
+ w->l->type = WORKER_FREE;
+
+ __cilkrts_mutex_init(&w->l->lock);
+ __cilkrts_mutex_init(&w->l->steal_lock);
+ w->l->do_not_steal = 0;
+ w->l->frame_ff = 0;
+ w->l->next_frame_ff = 0;
+ w->l->last_full_frame = NULL;
+
+ w->l->ltq = (__cilkrts_stack_frame **)
+ __cilkrts_malloc(g->ltqsize * sizeof(*w->l->ltq));
+ w->ltq_limit = w->l->ltq + g->ltqsize;
+ w->head = w->tail = w->l->ltq;
+
+ cilk_fiber_pool_init(&w->l->fiber_pool,
+ &g->fiber_pool,
+ g->stack_size,
+ g->fiber_pool_size,
+ 0, // alloc_max is 0. We don't allocate from the heap directly without checking the parent pool.
+ 0);
+#if FIBER_DEBUG >= 2
+ fprintf(stderr, "ThreadId=%p: Making w=%d (%p), pool = %p\n",
+ cilkos_get_current_thread_id(),
+ w->self, w,
+ &w->l->fiber_pool);
+#endif
+ w->l->scheduling_fiber = NULL;
+ w->l->original_pedigree_leaf = NULL;
+ w->l->rand_seed = 0; /* the scheduler will overwrite this field */
+
+ w->l->post_suspend = 0;
+ w->l->suspended_stack = 0;
+ w->l->fiber_to_free = NULL;
+ w->l->pending_exception = NULL;
+
+#if CILK_PROFILE
+ w->l->stats = __cilkrts_malloc(sizeof(statistics));
+ __cilkrts_init_stats(w->l->stats);
+#else
+ w->l->stats = NULL;
+#endif
+ w->l->steal_failure_count = 0;
+
+ w->l->work_stolen = 0;
+
+ // Initialize record/replay assuming we're doing neither
+ w->l->record_replay_fptr = NULL;
+ w->l->replay_list_root = NULL;
+ w->l->replay_list_entry = NULL;
+ w->l->signal_node = NULL;
+ // Nothing's been stolen yet
+ w->l->worker_magic_1 = WORKER_MAGIC_1;
+
+ /*w->parallelism_disabled = 0;*/
+
+ // Allow stealing all frames. Sets w->saved_protected_tail
+ __cilkrts_restore_stealing(w, w->ltq_limit);
+
+ __cilkrts_init_worker_sysdep(w);
+
+ reset_THE_exception(w);
+
+ return w;
+}
+
+void destroy_worker(__cilkrts_worker *w)
+{
+ CILK_ASSERT (NULL == w->l->pending_exception);
+
+ // Deallocate the scheduling fiber
+ if (NULL != w->l->scheduling_fiber)
+ {
+ // The scheduling fiber is the main fiber for system workers and must
+ // be deallocated by the thread that created it. Thus, we can
+ // deallocate only free workers' (formerly user workers) scheduling
+ // fibers here.
+ CILK_ASSERT(WORKER_FREE == w->l->type);
+
+#if FIBER_DEBUG >=1
+ fprintf(stderr, "ThreadId=%p, w=%p, %d, deallocating scheduling fiber = %p, \n",
+ cilkos_get_current_thread_id(),
+ w,
+ w->self,
+ w->l->scheduling_fiber);
+#endif
+ int ref_count = cilk_fiber_remove_reference(w->l->scheduling_fiber, NULL);
+ // Scheduling fiber should never have extra references because of exceptions.
+ CILK_ASSERT(0 == ref_count);
+ w->l->scheduling_fiber = NULL;
+ }
+
+#if CILK_PROFILE
+ if (w->l->stats) {
+ __cilkrts_free(w->l->stats);
+ }
+#else
+ CILK_ASSERT(NULL == w->l->stats);
+#endif
+
+ /* Free any cached fibers. */
+ cilk_fiber_pool_destroy(&w->l->fiber_pool);
+
+ __cilkrts_destroy_worker_sysdep(w);
+
+ if (w->l->signal_node) {
+ CILK_ASSERT(WORKER_SYSTEM == w->l->type);
+ signal_node_destroy(w->l->signal_node);
+ }
+
+ __cilkrts_free(w->l->ltq);
+ __cilkrts_mutex_destroy(0, &w->l->lock);
+ __cilkrts_mutex_destroy(0, &w->l->steal_lock);
+ __cilkrts_frame_malloc_per_worker_cleanup(w);
+
+ __cilkrts_free(w->l);
+
+ // The caller is responsible for freeing the worker memory
+}
+
+/*
+ * Make a worker into a system worker.
+ */
+static void make_worker_system(__cilkrts_worker *w) {
+ CILK_ASSERT(WORKER_FREE == w->l->type);
+ w->l->type = WORKER_SYSTEM;
+ w->l->signal_node = signal_node_create();
+}
+
+void __cilkrts_deinit_internal(global_state_t *g)
+{
+ int i;
+ __cilkrts_worker *w;
+
+ // If there's no global state then we're done
+ if (NULL == g)
+ return;
+
+#ifdef CILK_PROFILE
+ __cilkrts_dump_stats_to_stderr(g);
+#endif
+
+ w = g->workers[0];
+ if (w->l->frame_ff) {
+ __cilkrts_destroy_full_frame(w, w->l->frame_ff);
+ w->l->frame_ff = 0;
+ }
+
+ // Release any resources used for record/replay
+ replay_term(g);
+
+ // Destroy any system dependent global state
+ __cilkrts_destroy_global_sysdep(g);
+
+ for (i = 0; i < g->total_workers; ++i)
+ destroy_worker(g->workers[i]);
+
+ // Free memory for all worker blocks which were allocated contiguously
+ __cilkrts_free(g->workers[0]);
+
+ __cilkrts_free(g->workers);
+
+ cilk_fiber_pool_destroy(&g->fiber_pool);
+ __cilkrts_frame_malloc_global_cleanup(g);
+
+ cilkg_deinit_global_state();
+}
+
+/*
+ * Wake the runtime by notifying the system workers that they can steal. The
+ * first user worker into the runtime should call this.
+ */
+static void wake_runtime(global_state_t *g)
+{
+ __cilkrts_worker *root;
+ if (g->P > 1) {
+ // Send a message to the root node. The message will propagate.
+ root = g->workers[0];
+ CILK_ASSERT(root->l->signal_node);
+ signal_node_msg(root->l->signal_node, 1);
+ }
+}
+
+/*
+ * Put the runtime to sleep. The last user worker out of the runtime should
+ * call this. Like Dad always said, turn out the lights when nobody's in the
+ * room.
+ */
+static void sleep_runtime(global_state_t *g)
+{
+ __cilkrts_worker *root;
+ if (g->P > 1) {
+ // Send a message to the root node. The message will propagate.
+ root = g->workers[0];
+ CILK_ASSERT(root->l->signal_node);
+ signal_node_msg(root->l->signal_node, 0);
+ }
+}
+
+/* Called when a user thread joins Cilk.
+ Global lock must be held. */
+void __cilkrts_enter_cilk(global_state_t *g)
+{
+ if (g->Q++ == 0) {
+ // If this is the first user thread to enter Cilk wake
+ // up all the workers.
+ wake_runtime(g);
+ }
+}
+
+/* Called when a user thread leaves Cilk.
+ Global lock must be held. */
+void __cilkrts_leave_cilk(global_state_t *g)
+{
+ if (--g->Q == 0) {
+ // Put the runtime to sleep.
+ sleep_runtime(g);
+ }
+}
+
+/*
+ * worker_runnable
+ *
+ * Return true if the worker should continue to try to steal. False, otherwise.
+ */
+
+NOINLINE
+static enum schedule_t worker_runnable(__cilkrts_worker *w)
+{
+ global_state_t *g = w->g;
+
+ /* If this worker has something to do, do it.
+ Otherwise the work would be lost. */
+ if (w->l->next_frame_ff)
+ return SCHEDULE_RUN;
+
+ // If Cilk has explicitly (by the user) been told to exit (i.e., by
+ // __cilkrts_end_cilk() -> __cilkrts_stop_workers(g)), then return 0.
+ if (g->work_done)
+ return SCHEDULE_EXIT;
+
+ if (0 == w->self) {
+ // This worker is the root node and is the only one that may query the
+ // global state to see if there are still any user workers in Cilk.
+ if (w->l->steal_failure_count > g->max_steal_failures) {
+ if (signal_node_should_wait(w->l->signal_node)) {
+ return SCHEDULE_WAIT;
+ } else {
+ // Reset the steal_failure_count since we have verified that
+ // user workers are still in Cilk.
+ w->l->steal_failure_count = 0;
+ }
+ }
+ } else if (WORKER_SYSTEM == w->l->type &&
+ signal_node_should_wait(w->l->signal_node)) {
+ // This worker has been notified by its parent that it should stop
+ // trying to steal.
+ return SCHEDULE_WAIT;
+ }
+
+ return SCHEDULE_RUN;
+}
+
+
+
+// Initialize the worker structs, but don't start the workers themselves.
+static void init_workers(global_state_t *g)
+{
+ int total_workers = g->total_workers;
+ int i;
+ struct CILK_ALIGNAS(256) buffered_worker {
+ __cilkrts_worker w;
+ char buf[64];
+ } *workers_memory;
+
+ /* not needed if only one worker */
+ cilk_fiber_pool_init(&g->fiber_pool,
+ NULL,
+ g->stack_size,
+ g->global_fiber_pool_size, // buffer_size
+ g->max_stacks, // maximum # to allocate
+ 1);
+
+ cilk_fiber_pool_set_fiber_limit(&g->fiber_pool,
+ (g->max_stacks ? g->max_stacks : INT_MAX));
+
+ g->workers = (__cilkrts_worker **)
+ __cilkrts_malloc(total_workers * sizeof(*g->workers));
+
+ // Allocate 1 block of memory for workers to make life easier for tools
+ // like Inspector which run multithreaded and need to know the memory
+ // range for all the workers that will be accessed in a user's program
+ workers_memory = (struct buffered_worker*)
+ __cilkrts_malloc(sizeof(*workers_memory) * total_workers);
+
+ // Notify any tools that care (Cilkscreen and Inspector) that they should
+ // ignore memory allocated for the workers
+ __cilkrts_cilkscreen_ignore_block(&workers_memory[0],
+ &workers_memory[total_workers]);
+
+ // Initialize worker structs, including unused worker slots.
+ for (i = 0; i < total_workers; ++i) {
+ g->workers[i] = make_worker(g, i, &workers_memory[i].w);
+ }
+
+ // Set the workers in the first P - 1 slots to be system workers.
+ // Remaining worker structs already have type == 0.
+ for (i = 0; i < g->system_workers; ++i) {
+ make_worker_system(g->workers[i]);
+ }
+}
+
+void __cilkrts_init_internal(int start)
+{
+ global_state_t *g = NULL;
+
+ if (cilkg_is_published()) {
+ g = cilkg_init_global_state();
+ }
+ else {
+
+ // We think the state has not been published yet.
+ // Grab the lock and try to initialize/publish.
+ global_os_mutex_lock();
+
+ if (cilkg_is_published()) {
+ // Some other thread must have snuck in and published.
+ g = cilkg_init_global_state();
+ }
+ else {
+ // Initialize and retrieve global state
+ g = cilkg_init_global_state();
+
+ // Set the scheduler pointer
+ g->scheduler = worker_scheduler_function;
+
+ // If we're running under a sequential P-Tool (Cilkscreen or
+ // Cilkview) then there's only one worker and we need to tell
+ // the tool about the extent of the stack
+ if (g->under_ptool)
+ __cilkrts_establish_c_stack();
+ init_workers(g);
+
+ // Initialize per-work record/replay logging
+ replay_init_workers(g);
+
+ // Initialize any system dependent global state
+ __cilkrts_init_global_sysdep(g);
+
+
+ cilkg_publish_global_state(g);
+ }
+
+ global_os_mutex_unlock();
+ }
+
+ CILK_ASSERT(g);
+
+ if (start && !g->workers_running)
+ {
+ // Acquire the global OS mutex while we're starting the workers
+ global_os_mutex_lock();
+ if (!g->workers_running)
+ // Start P - 1 system workers since P includes the first user
+ // worker.
+ __cilkrts_start_workers(g, g->P - 1);
+ global_os_mutex_unlock();
+ }
+}
+
+
+/************************************************************************
+ Methods for reducer protocol.
+
+ Reductions occur in two places:
+ A. A full frame "ff" is returning from a spawn with a stolen parent.
+ B. A full frame "ff" is stalling at a sync.
+
+ To support parallel reductions, reduction functions need to be
+ executed while control is on a user stack, before jumping into the
+ runtime. These reductions can not occur while holding a worker or
+ frame lock.
+
+ Before a worker w executes a reduction in either Case A or B, w's
+ deque is empty.
+
+ Since parallel reductions push work onto the deque, we must do extra
+ work to set up runtime data structures properly before reductions
+ begin to allow stealing. ( Normally, when we have only serial
+ reductions, once a worker w starts a reduction, its deque remains
+ empty until w either steals another frame or resumes a suspended
+ frame. Thus, we don't care about the state of the deque, since w
+ will reset its deque when setting up execution of a frame. )
+
+ To allow for parallel reductions, we coerce the runtime data
+ structures so that, from their perspective, it looks as though we
+ have spliced in an "execute_reductions()" function. Consider the
+ two cases for reductions:
+
+ Case A: Return from a spawn with a stolen parent.
+ Consider a spawned function g is returning on a worker w.
+ Assume:
+ - g was spawned from a parent function f.
+ - ff is the full frame for g's spawn helper
+ - sf be the __cilkrts_stack_frame for g's spawn helper.
+
+ We are conceptually splicing "execute_reductions()" so that it
+ occurs immediately before the spawn helper of g returns to f.
+
+ We do so by creating two different world views --- one for the
+ runtime data structures, and one for the actual control flow.
+
+ - Before reductions begin, the runtime data structures should
+ look as though the spawn helper of g is calling
+ "execute_reductions()", in terms of both the user stack and
+ worker deque. More precisely, w should satisfy the
+ following properties:
+
+ (a) w has ff as its full frame,
+ (b) w has sf as its __cilkrts_stack_frame, and
+ (c) w has an empty deque.
+
+ If the runtime satisfies these properties, then if w
+ encounters a spawn in a parallel reduction, it can push onto
+ a valid deque. Also, when a steal from w occurs, it will
+ build the correct tree of full frames when w is stolen from.
+
+ - In actual control flow, however, once the
+ "execute_reductions()" function returns, it is actually
+ returning to runtime code instead of g's spawn helper.
+
+ At the point a worker w began executing reductions, the
+ control flow / compiled code had already finished g's spawn
+ helper, and w was about to enter the runtime. With parallel
+ reductions, some worker v (which might be different from w)
+ is the one returning to the runtime.
+
+
+ The reduction logic consists of 4 steps:
+
+ A1. Restore runtime data structures to make it look as though
+ the spawn helper of g() is still the currently executing
+ frame for w.
+
+ A2. Execute reductions on the user stack. Reductions also
+ includes the logic for exceptions and stacks. Note that
+ reductions start on w, but may finish on a different
+ worker if there is parallelism in the reduce.
+
+ A3. Splice out ff from the tree of full frames.
+
+ A4. Jump into the runtime/scheduling stack and execute
+ "do_return_from_spawn". This method
+
+ (a) Frees the user stack we were just on if it is no longer needed.
+ (b) Decrement the join counter on ff->parent, and tries to do a
+ provably good steal.
+ (c) Clean up the full frame ff.
+
+
+ Case B: Stalling at a sync.
+
+ Consider a function g(), with full frame ff and
+ __cilkrts_stack_frame sf. Suppose g() stalls at a sync, and we
+ are executing reductions.
+
+ Conceptually, we are splicing in an "execute_reductions()"
+ function into g() as the last action that g() takes immediately
+ before it executes the cilk_sync.
+
+ The reduction logic for this case is similar to Case A.
+
+ B1. Restore the runtime data structures.
+
+ The main difference from Case A is that ff/sf is still a
+ frame that needs to be executed later (since it is stalling
+ at a cilk_sync). Thus, we also need to save the current
+ stack information into "ff" so that we can correctly resume
+ execution of "ff" after the sync.
+
+ B2. Execute reductions on the user stack.
+
+ B3. No frame to splice out of the tree.
+
+ B4. Jump into the runtime/scheduling stack and execute "do_sync".
+ This method:
+ (a) Frees the user stack we were just on if it is no longer needed.
+ (b) Tries to execute a provably good steal.
+
+ Finally, for the reducer protocol, we consider two reduction paths,
+ namely a "fast" and "slow" path. On a fast path, only trivial
+ merges of reducer maps happen (i.e., one or both of the maps are
+ NULL). Otherwise, on the slow path, a reduction actually needs to
+ happen.
+
+*****************************************************************/
+
+/**
+ * @brief Locations to store the result of a reduction.
+ *
+ * Struct storing pointers to the fields in our "left" sibling that we
+ * should update when splicing out a full frame or stalling at a sync.
+ */
+typedef struct {
+ /** A pointer to the location of our left reducer map. */
+ struct cilkred_map **map_ptr;
+
+ /** A pointer to the location of our left exception. */
+ struct pending_exception_info **exception_ptr;
+} splice_left_ptrs;
+
+/**
+ * For a full frame returning from a spawn, calculate the pointers to
+ * the maps and exceptions to my left.
+ *
+ * @param w The currently executing worker.
+ * @param ff Full frame that is dying
+ * @return Pointers to our "left" for reducers and exceptions.
+ */
+static inline
+splice_left_ptrs compute_left_ptrs_for_spawn_return(__cilkrts_worker *w,
+ full_frame *ff)
+{
+ // ASSERT: we hold the lock on ff->parent
+
+ splice_left_ptrs left_ptrs;
+ if (ff->left_sibling) {
+ left_ptrs.map_ptr = &ff->left_sibling->right_reducer_map;
+ left_ptrs.exception_ptr = &ff->left_sibling->right_pending_exception;
+ }
+ else {
+ full_frame *parent_ff = ff->parent;
+ left_ptrs.map_ptr = &parent_ff->children_reducer_map;
+ left_ptrs.exception_ptr = &parent_ff->child_pending_exception;
+ }
+ return left_ptrs;
+}
+
+/**
+ * For a full frame at a sync, calculate the pointers to the maps and
+ * exceptions to my left.
+ *
+ * @param w The currently executing worker.
+ * @param ff Full frame that is stalling at a sync.
+ * @return Pointers to our "left" for reducers and exceptions.
+ */
+static inline
+splice_left_ptrs compute_left_ptrs_for_sync(__cilkrts_worker *w,
+ full_frame *ff)
+{
+ // ASSERT: we hold the lock on ff
+ splice_left_ptrs left_ptrs;
+
+ // Figure out which map to the left we should merge into.
+ if (ff->rightmost_child) {
+ CILK_ASSERT(ff->rightmost_child->parent == ff);
+ left_ptrs.map_ptr = &(ff->rightmost_child->right_reducer_map);
+ left_ptrs.exception_ptr = &(ff->rightmost_child->right_pending_exception);
+ }
+ else {
+ // We have no children. Then, we should be the last
+ // worker at the sync... "left" is our child map.
+ left_ptrs.map_ptr = &(ff->children_reducer_map);
+ left_ptrs.exception_ptr = &(ff->child_pending_exception);
+ }
+ return left_ptrs;
+}
+
+/**
+ * After we have completed all reductions on a spawn return, call this
+ * method to finish up before jumping into the runtime.
+ *
+ * 1. Perform the "reduction" on stacks, i.e., execute the left
+ * holder logic to pass the leftmost stack up.
+ *
+ * w->l->fiber_to_free holds any stack that needs to be freed
+ * when control switches into the runtime fiber.
+ *
+ * 2. Unlink and remove child_ff from the tree of full frames.
+ *
+ * @param w The currently executing worker.
+ * @param parent_ff The parent of child_ff.
+ * @param child_ff The full frame returning from a spawn.
+ */
+static inline
+void finish_spawn_return_on_user_stack(__cilkrts_worker *w,
+ full_frame *parent_ff,
+ full_frame *child_ff)
+{
+ CILK_ASSERT(w->l->fiber_to_free == NULL);
+
+ // Execute left-holder logic for stacks.
+ if (child_ff->left_sibling || parent_ff->fiber_child) {
+ // Case where we are not the leftmost stack.
+ CILK_ASSERT(parent_ff->fiber_child != child_ff->fiber_self);
+
+ // Remember any fiber we need to free in the worker.
+ // After we jump into the runtime, we will actually do the
+ // free.
+ w->l->fiber_to_free = child_ff->fiber_self;
+ }
+ else {
+ // We are leftmost, pass stack/fiber up to parent.
+ // Thus, no stack/fiber to free.
+ parent_ff->fiber_child = child_ff->fiber_self;
+ w->l->fiber_to_free = NULL;
+ }
+
+ child_ff->fiber_self = NULL;
+
+ unlink_child(parent_ff, child_ff);
+}
+
+
+/**
+ * Executes any fast reductions necessary to splice ff out of the tree
+ * of full frames.
+ *
+ * This "fast" path performs only trivial merges of reducer maps,
+ * i.e,. when one of them is NULL.
+ * (See slow_path_reductions_for_spawn_return() for slow path.)
+ *
+ * Returns: 1 if we finished all reductions.
+ * Returns: 0 if there are still reductions to execute, and
+ * we should execute the slow path.
+ *
+ * This method assumes w holds the frame lock on parent_ff.
+ * After this method completes:
+ * 1. We have spliced ff out of the tree of full frames.
+ * 2. The reducer maps of child_ff have been deposited
+ * "left" according to the reducer protocol.
+ * 3. w->l->stack_to_free stores the stack
+ * that needs to be freed once we jump into the runtime.
+ *
+ * We have not, however, decremented the join counter on ff->parent.
+ * This prevents any other workers from resuming execution of the parent.
+ *
+ * @param w The currently executing worker.
+ * @param ff The full frame returning from a spawn.
+ * @return NULL if we finished all reductions.
+ * @return The address where the left map is stored (which should be passed to
+ * slow_path_reductions_for_spawn_return()) if there are
+ * still reductions to execute.
+ */
+struct cilkred_map**
+fast_path_reductions_for_spawn_return(__cilkrts_worker *w,
+ full_frame *ff)
+{
+ // ASSERT: we hold ff->parent->lock.
+ splice_left_ptrs left_ptrs;
+
+ CILK_ASSERT(NULL == w->l->pending_exception);
+
+ // Figure out the pointers to the left where I want
+ // to put reducers and exceptions.
+ left_ptrs = compute_left_ptrs_for_spawn_return(w, ff);
+
+ // Go ahead and merge exceptions while holding the lock.
+ splice_exceptions_for_spawn(w, ff, left_ptrs.exception_ptr);
+
+ // Now check if we have any reductions to perform.
+ //
+ // Consider all the cases of left, middle and right maps.
+ // 0. (-, -, -) : finish and return 1
+ // 1. (L, -, -) : finish and return 1
+ // 2. (-, M, -) : slide over to left, finish, and return 1.
+ // 3. (L, M, -) : return 0
+ // 4. (-, -, R) : slide over to left, finish, and return 1.
+ // 5. (L, -, R) : return 0
+ // 6. (-, M, R) : return 0
+ // 7. (L, M, R) : return 0
+ //
+ // In terms of code:
+ // L == *left_ptrs.map_ptr
+ // M == w->reducer_map
+ // R == f->right_reducer_map.
+ //
+ // The goal of the code below is to execute the fast path with
+ // as few branches and writes as possible.
+
+ int case_value = (*(left_ptrs.map_ptr) != NULL);
+ case_value += ((w->reducer_map != NULL) << 1);
+ case_value += ((ff->right_reducer_map != NULL) << 2);
+
+ // Fastest path is case_value == 0 or 1.
+ if (case_value >=2) {
+ switch (case_value) {
+ case 2:
+ *(left_ptrs.map_ptr) = w->reducer_map;
+ w->reducer_map = NULL;
+ return NULL;
+ break;
+ case 4:
+ *(left_ptrs.map_ptr) = ff->right_reducer_map;
+ ff->right_reducer_map = NULL;
+ return NULL;
+ default:
+ // If we have to execute the slow path, then
+ // return the pointer to the place to deposit the left
+ // map.
+ return left_ptrs.map_ptr;
+ }
+ }
+
+ // Do nothing
+ return NULL;
+}
+
+
+/**
+ * Executes any reductions necessary to splice "ff" frame out of
+ * the steal tree.
+ *
+ * This method executes the "slow" path for reductions on a spawn
+ * return, i.e., there are non-NULL maps that need to be merged
+ * together.
+ *
+ * This method should execute only if
+ * fast_path_reductions_for_spawn_return() returns a non-NULL
+ * left_map_ptr.
+ *
+ * Upon entry, left_map_ptr should be the location of the left map
+ * at the start of the reduction, as calculated by
+ * fast_path_reductions_for_spawn_return().
+ *
+ * After this method completes:
+ * 1. We have spliced ff out of the tree of full frames.
+ * 2. The reducer maps of child_ff have been deposited
+ * "left" according to the reducer protocol.
+ * 3. w->l->stack_to_free stores the stack
+ * that needs to be freed once we jump into the runtime.
+ * We have not, however, decremented the join counter on ff->parent,
+ * so no one can resume execution of the parent yet.
+ *
+ * WARNING:
+ * This method assumes the lock on ff->parent is held upon entry, and
+ * Upon exit, the worker that returns still holds a lock on ff->parent
+ * This method can, however, release and reacquire the lock on ff->parent.
+ *
+ * @param w The currently executing worker.
+ * @param ff The full frame returning from a spawn.
+ * @param left_map_ptr Pointer to our initial left map.
+ * @return The worker that this method returns on.
+ */
+static __cilkrts_worker*
+slow_path_reductions_for_spawn_return(__cilkrts_worker *w,
+ full_frame *ff,
+ struct cilkred_map **left_map_ptr)
+{
+
+ // CILK_ASSERT: w is holding frame lock on parent_ff.
+#if REDPAR_DEBUG > 0
+ CILK_ASSERT(!ff->rightmost_child);
+ CILK_ASSERT(!ff->is_call_child);
+#endif
+
+ // Loop invariant:
+ // When beginning this loop, we should
+ // 1. Be holding the lock on ff->parent.
+ // 2. left_map_ptr should be the address of the pointer to the left map.
+ // 3. All maps should be slid over left by one, if possible.
+ // 4. All exceptions should be merged so far.
+ while (1) {
+
+ // Slide middle map left if possible.
+ if (!(*left_map_ptr)) {
+ *left_map_ptr = w->reducer_map;
+ w->reducer_map = NULL;
+ }
+ // Slide right map to middle if possible.
+ if (!w->reducer_map) {
+ w->reducer_map = ff->right_reducer_map;
+ ff->right_reducer_map = NULL;
+ }
+
+ // Since we slid everything left by one,
+ // we are finished if there is no middle map.
+ if (!w->reducer_map) {
+ verify_current_wkr(w);
+ return w;
+ }
+ else {
+ struct cilkred_map* left_map;
+ struct cilkred_map* middle_map;
+ struct cilkred_map* right_map;
+
+ // Take all the maps from their respective locations.
+ // We can't leave them in place and execute a reduction because these fields
+ // might change once we release the lock.
+ left_map = *left_map_ptr;
+ *left_map_ptr = NULL;
+ middle_map = w->reducer_map;
+ w->reducer_map = NULL;
+ right_map = ff->right_reducer_map;
+ ff->right_reducer_map = NULL;
+
+ // WARNING!!! Lock release here.
+ // We have reductions to execute (and we can't hold locks).
+ __cilkrts_frame_unlock(w, ff->parent);
+
+ // Merge all reducers into the left map.
+ left_map = repeated_merge_reducer_maps(&w,
+ left_map,
+ middle_map);
+ verify_current_wkr(w);
+ left_map = repeated_merge_reducer_maps(&w,
+ left_map,
+ right_map);
+ verify_current_wkr(w);
+ CILK_ASSERT(NULL == w->reducer_map);
+ // Put the final answer back into w->reducer_map.
+ w->reducer_map = left_map;
+
+ // Save any exceptions generated because of the reduction
+ // process from the returning worker. These get merged
+ // the next time around the loop.
+ CILK_ASSERT(NULL == ff->pending_exception);
+ ff->pending_exception = w->l->pending_exception;
+ w->l->pending_exception = NULL;
+
+ // Lock ff->parent for the next loop around.
+ __cilkrts_frame_lock(w, ff->parent);
+
+ // Once we have the lock again, recompute who is to our
+ // left.
+ splice_left_ptrs left_ptrs;
+ left_ptrs = compute_left_ptrs_for_spawn_return(w, ff);
+
+ // Update the pointer for the left map.
+ left_map_ptr = left_ptrs.map_ptr;
+ // Splice the exceptions for spawn.
+ splice_exceptions_for_spawn(w, ff, left_ptrs.exception_ptr);
+ }
+ }
+ // We should never break out of this loop.
+
+ CILK_ASSERT(0);
+ return NULL;
+}
+
+
+
+/**
+ * Execute reductions when returning from a spawn whose parent has
+ * been stolen.
+ *
+ * Execution may start on w, but may finish on a different worker.
+ * This method acquires/releases the lock on ff->parent.
+ *
+ * @param w The currently executing worker.
+ * @param ff The full frame of the spawned function that is returning.
+ * @param returning_sf The __cilkrts_stack_frame for this returning function.
+ * @return The worker returning from this method.
+ */
+static __cilkrts_worker*
+execute_reductions_for_spawn_return(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *returning_sf)
+{
+ // Step A1 from reducer protocol described above.
+ //
+ // Coerce the runtime into thinking that
+ // ff/returning_sf are still on the bottom of
+ // w's deque.
+ restore_frame_for_spawn_return_reduction(w, ff, returning_sf);
+
+ // Step A2 and A3: Execute reductions on user stack.
+ BEGIN_WITH_FRAME_LOCK(w, ff->parent) {
+ struct cilkred_map **left_map_ptr;
+ left_map_ptr = fast_path_reductions_for_spawn_return(w, ff);
+
+ // Pointer will be non-NULL if there are
+ // still reductions to execute.
+ if (left_map_ptr) {
+ // WARNING: This method call may release the lock
+ // on ff->parent and re-acquire it (possibly on a
+ // different worker).
+ // We can't hold locks while actually executing
+ // reduce functions.
+ w = slow_path_reductions_for_spawn_return(w,
+ ff,
+ left_map_ptr);
+ verify_current_wkr(w);
+ }
+
+ finish_spawn_return_on_user_stack(w, ff->parent, ff);
+ // WARNING: the use of this lock macro is deceptive.
+ // The worker may have changed here.
+ } END_WITH_FRAME_LOCK(w, ff->parent);
+ return w;
+}
+
+
+
+/**
+ * Execute fast "reductions" when ff stalls at a sync.
+ *
+ * @param w The currently executing worker.
+ * @param ff The full frame stalling at a sync.
+ * @return 1 if we are finished with all reductions after calling this method.
+ * @return 0 if we still need to execute the slow path reductions.
+ */
+static inline
+int fast_path_reductions_for_sync(__cilkrts_worker *w,
+ full_frame *ff) {
+ // Return 0 if there is some reduction that needs to happen.
+ return !(w->reducer_map || ff->pending_exception);
+}
+
+/**
+ * Executes slow reductions when ff stalls at a sync.
+ * This method should execute only if
+ * fast_path_reductions_for_sync(w, ff) returned 0.
+ *
+ * After this method completes:
+ * 1. ff's current reducer map has been deposited into
+ * right_reducer_map of ff's rightmost child, or
+ * ff->children_reducer_map if ff has no children.
+ * 2. Similarly for ff's current exception.
+ * 3. Nothing to calculate for stacks --- if we are stalling
+ * we will always free a stack.
+ *
+ * This method may repeatedly acquire/release the lock on ff.
+ *
+ * @param w The currently executing worker.
+ * @param ff The full frame stalling at a sync.
+ * @return The worker returning from this method.
+ */
+static __cilkrts_worker*
+slow_path_reductions_for_sync(__cilkrts_worker *w,
+ full_frame *ff)
+{
+ struct cilkred_map *left_map;
+ struct cilkred_map *middle_map;
+
+#if (REDPAR_DEBUG > 0)
+ CILK_ASSERT(ff);
+ CILK_ASSERT(w->head == w->tail);
+#endif
+
+ middle_map = w->reducer_map;
+ w->reducer_map = NULL;
+
+ // Loop invariant: middle_map should be valid (the current map to reduce).
+ // left_map is junk.
+ // w->reducer_map == NULL.
+ while (1) {
+ BEGIN_WITH_FRAME_LOCK(w, ff) {
+ splice_left_ptrs left_ptrs = compute_left_ptrs_for_sync(w, ff);
+
+ // Grab the "left" map and store pointers to those locations.
+ left_map = *(left_ptrs.map_ptr);
+ *(left_ptrs.map_ptr) = NULL;
+
+ // Slide the maps in our struct left as far as possible.
+ if (!left_map) {
+ left_map = middle_map;
+ middle_map = NULL;
+ }
+
+ *(left_ptrs.exception_ptr) =
+ __cilkrts_merge_pending_exceptions(w,
+ *left_ptrs.exception_ptr,
+ ff->pending_exception);
+ ff->pending_exception = NULL;
+
+ // If there is no middle map, then we are done.
+ // Deposit left and return.
+ if (!middle_map) {
+ *(left_ptrs).map_ptr = left_map;
+ #if (REDPAR_DEBUG > 0)
+ CILK_ASSERT(NULL == w->reducer_map);
+ #endif
+ // Sanity check upon leaving the loop.
+ verify_current_wkr(w);
+ // Make sure to unlock before we return!
+ __cilkrts_frame_unlock(w, ff);
+ return w;
+ }
+ } END_WITH_FRAME_LOCK(w, ff);
+
+ // If we get here, we have a nontrivial reduction to execute.
+ middle_map = repeated_merge_reducer_maps(&w,
+ left_map,
+ middle_map);
+ verify_current_wkr(w);
+
+ // Save any exceptions generated because of the reduction
+ // process. These get merged the next time around the
+ // loop.
+ CILK_ASSERT(NULL == ff->pending_exception);
+ ff->pending_exception = w->l->pending_exception;
+ w->l->pending_exception = NULL;
+ }
+
+ // We should never break out of the loop above.
+ CILK_ASSERT(0);
+ return NULL;
+}
+
+
+/**
+ * Execute reductions when ff stalls at a sync.
+ *
+ * Execution starts on w, but may finish on a different worker.
+ * This method may acquire/release the lock on ff.
+ *
+ * @param w The currently executing worker.
+ * @param ff The full frame of the spawned function at the sync
+ * @param sf_at_sync The __cilkrts_stack_frame stalling at a sync
+ * @return The worker returning from this method.
+ */
+static __cilkrts_worker*
+execute_reductions_for_sync(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf_at_sync)
+{
+ int finished_reductions;
+ // Step B1 from reducer protocol above:
+ // Restore runtime invariants.
+ //
+ // The following code for this step is almost equivalent to
+ // the following sequence:
+ // 1. disown(w, ff, sf_at_sync, "sync") (which itself
+ // calls make_unrunnable(w, ff, sf_at_sync))
+ // 2. make_runnable(w, ff, sf_at_sync).
+ //
+ // The "disown" will mark the frame "sf_at_sync"
+ // as stolen and suspended, and save its place on the stack,
+ // so it can be resumed after the sync.
+ //
+ // The difference is, that we don't want the disown to
+ // break the following connections yet, since we are
+ // about to immediately make sf/ff runnable again anyway.
+ // sf_at_sync->worker == w
+ // w->l->frame_ff == ff.
+ //
+ // These connections are needed for parallel reductions, since
+ // we will use sf / ff as the stack frame / full frame for
+ // executing any potential reductions.
+ //
+ // TBD: Can we refactor the disown / make_unrunnable code
+ // to avoid the code duplication here?
+
+ ff->call_stack = NULL;
+
+ // Normally, "make_unrunnable" would add CILK_FRAME_STOLEN and
+ // CILK_FRAME_SUSPENDED to sf_at_sync->flags and save the state of
+ // the stack so that a worker can resume the frame in the correct
+ // place.
+ //
+ // But on this path, CILK_FRAME_STOLEN should already be set.
+ // Also, we technically don't want to suspend the frame until
+ // the reduction finishes.
+ // We do, however, need to save the stack before
+ // we start any reductions, since the reductions might push more
+ // data onto the stack.
+ CILK_ASSERT(sf_at_sync->flags | CILK_FRAME_STOLEN);
+
+ __cilkrts_put_stack(ff, sf_at_sync);
+ __cilkrts_make_unrunnable_sysdep(w, ff, sf_at_sync, 1,
+ "execute_reductions_for_sync");
+ CILK_ASSERT(w->l->frame_ff == ff);
+
+ // Step B2: Execute reductions on user stack.
+ // Check if we have any "real" reductions to do.
+ finished_reductions = fast_path_reductions_for_sync(w, ff);
+
+ if (!finished_reductions) {
+ // Still have some real reductions to execute.
+ // Run them here.
+
+ // This method may acquire/release the lock on ff.
+ w = slow_path_reductions_for_sync(w, ff);
+
+ // The previous call may return on a different worker.
+ // than what we started on.
+ verify_current_wkr(w);
+ }
+
+#if REDPAR_DEBUG >= 0
+ CILK_ASSERT(w->l->frame_ff == ff);
+ CILK_ASSERT(ff->call_stack == NULL);
+#endif
+
+ // Now we suspend the frame ff (since we've
+ // finished the reductions). Roughly, we've split apart the
+ // "make_unrunnable" call here --- we've already saved the
+ // stack info earlier before the reductions execute.
+ // All that remains is to restore the call stack back into the
+ // full frame, and mark the frame as suspended.
+ ff->call_stack = sf_at_sync;
+ sf_at_sync->flags |= CILK_FRAME_SUSPENDED;
+
+ // At a nontrivial sync, we should always free the current fiber,
+ // because it can not be leftmost.
+ w->l->fiber_to_free = ff->fiber_self;
+ ff->fiber_self = NULL;
+ return w;
+}
+
+
+/*
+ Local Variables: **
+ c-file-style:"bsd" **
+ c-basic-offset:4 **
+ indent-tabs-mode:nil **
+ End: **
+*/
diff --git a/gcc-4.9/libcilkrts/runtime/scheduler.h b/gcc-4.9/libcilkrts/runtime/scheduler.h
new file mode 100644
index 000000000..543adaf68
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/scheduler.h
@@ -0,0 +1,421 @@
+/* scheduler.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file scheduler.h
+ *
+ * @brief scheduler.h declares routines for the Intel Cilk Plus scheduler,
+ * making it the heart of the Intel Cilk Plus implementation.
+ */
+
+#ifndef INCLUDED_SCHEDULER_DOT_H
+#define INCLUDED_SCHEDULER_DOT_H
+
+#include <cilk/common.h>
+#include <internal/abi.h>
+
+#include "rts-common.h"
+#include "full_frame.h"
+#include "reducer_impl.h"
+#include "global_state.h"
+
+#ifdef CILK_RECORD_REPLAY
+#include "record-replay.h"
+#endif
+
+__CILKRTS_BEGIN_EXTERN_C
+
+
+/**
+ * @brief Flag to disable parallel reductions.
+ *
+ * Set to 0 to allow parallel reductions.
+ */
+#define DISABLE_PARALLEL_REDUCERS 0
+
+/**
+ * @brief Debugging level for parallel reductions.
+ *
+ * Print debugging messages and assertions for parallel reducers. 0 is
+ * no debugging. A higher value generates more output.
+ */
+#define REDPAR_DEBUG 0
+
+/**
+ * @brief Lock the worker mutex to allow exclusive access to the
+ * values in the @c __cilkrts_worker and local_state structures.
+ *
+ * @pre @c w->l->do_not_steal must not be set. Essentially this
+ * condition asserts that the worker is not locked recursively.
+ *
+ * @param w The worker to lock.
+ */
+COMMON_PORTABLE
+void __cilkrts_worker_lock(__cilkrts_worker *w);
+
+/**
+ * @brief Unlock the worker mutex.
+ *
+ * @pre @c w->l->do_not_steal must be set. Essentially this condition
+ * asserts that the worker has been previously locked.
+ *
+ * @param w The worker to unlock.
+ */
+COMMON_PORTABLE
+void __cilkrts_worker_unlock(__cilkrts_worker *w);
+
+/**
+ * @brief Push the next full frame to be made active in this worker
+ * and increment its join counter.
+ *
+ * __cilkrts_push_next_frame and pop_next_frame work on a one-element queue.
+ * This queue is used to communicate across the runtime from the code that
+ * wants to activate a frame to the code that can actually begin execution
+ * on that frame. They are asymetrical in that push increments the join
+ * counter but pop does not decrement it. Rather, a single push/pop
+ * combination makes a frame active and increments its join counter once.
+ *
+ * @note A system worker may chose to push work onto a user worker if
+ * the work is the continuation from a sync which only the user worker
+ * may complete.
+ *
+ * @param w The worker which the frame is to be pushed onto.
+ * @param ff The full_frame which is to be continued by the worker.
+ */
+COMMON_PORTABLE
+void __cilkrts_push_next_frame(__cilkrts_worker *w,
+ full_frame *ff);
+
+/**
+ * @brief Sync on this worker.
+ *
+ * If this worker is the last to reach the sync, execution may resume
+ * on this worker after the sync.
+ *
+ * If this worker is not the last spawned child to reach the sync,
+ * then execution is suspended and the worker will re-enter the
+ * scheduling loop, looking for work it can steal.
+ *
+ * This function will jump into the runtime to switch to the scheduling
+ * stack to implement most of its logic.
+ *
+ * @param w The worker which is executing the sync.
+ * @param sf The __cilkrts_stack_frame containing the sync.
+ */
+COMMON_PORTABLE
+NORETURN __cilkrts_c_sync(__cilkrts_worker *w,
+ __cilkrts_stack_frame *sf);
+
+/**
+ * @brief Worker @c w completely promotes its own deque, simulating the case
+ * where the whole deque is stolen.
+ *
+ * We use this mechanism to force the allocation of new storage for
+ * reducers for race-detection purposes.
+ *
+ * This method is called from the reducer lookup logic when
+ * @c g->force_reduce is set.
+ *
+ * @warning Use of "force_reduce" is known to have bugs when run with
+ * more than 1 worker.
+ *
+ * @param w The worker which is to have all entries in its deque
+ * promoted to full frames.
+ */
+COMMON_PORTABLE
+void __cilkrts_promote_own_deque(__cilkrts_worker *w);
+
+/**
+ * Called when a spawned function attempts to return and
+ * __cilkrts_undo_detach() fails. This can happen for two reasons:
+ *
+ * @li If another worker is considering stealing our parent, it bumps the
+ * exception pointer while it did so, which will cause __cilkrts_undo_detach()
+ * to fail. If the other worker didn't complete the steal of our parent, we
+ * still may be able to return to it, either because the steal attempt failed,
+ * or we won the race for the tail pointer.
+ *
+ * @li If the function's parent has been stolen then we cannot return. Instead
+ * we'll longjmp into the runtime to switch onto the scheduling stack to
+ * execute do_return_from_spawn() and determine what to do. Either this
+ * worker is the last one to the sync, in which case we need to jump to the
+ * sync, or this worker is not the last one to the sync, in which case we'll
+ * abandon this work and jump to the scheduling loop to search for more work
+ * we can steal.
+ *
+ * @param w The worker which attempting to return from a spawn to
+ * a stolen parent.
+ * @param returning_sf The stack frame which is returning.
+ */
+COMMON_PORTABLE
+void __cilkrts_c_THE_exception_check(__cilkrts_worker *w,
+ __cilkrts_stack_frame *returning_sf);
+
+/**
+ * @brief Return an exception to an stolen parent.
+ *
+ * Used by the gcc implementation of exceptions to return an exception
+ * to a stolen parent
+ *
+ * @param w The worker which attempting to return from a spawn with an
+ * exception to a stolen parent.
+ * @param returning_sf The stack frame which is returning.
+ */
+COMMON_PORTABLE
+NORETURN __cilkrts_exception_from_spawn(__cilkrts_worker *w,
+ __cilkrts_stack_frame *returning_sf);
+
+/**
+ * @brief Used by the Windows implementations of exceptions to migrate an exception
+ * across fibers.
+ *
+ * Call this function when an exception has been thrown and has to
+ * traverse across a steal. The exception has already been wrapped
+ * up, so all that remains is to longjmp() into the continuation,
+ * sync, and re-raise it.
+ *
+ * @param sf The __cilkrts_stack_frame for the frame that is attempting to
+ * return an exception to a stolen parent.
+ */
+void __cilkrts_migrate_exception (__cilkrts_stack_frame *sf);
+
+/**
+ * @brief Return from a call, not a spawn, where this frame has ever
+ * been stolen.
+ *
+ * @param w The worker that is returning from a frame which was ever stolen.
+ */
+COMMON_PORTABLE
+void __cilkrts_return(__cilkrts_worker *w);
+
+/**
+ * @brief Special return from the initial frame.
+ *
+ * This method will be called from @c __cilkrts_leave_frame if
+ * @c CILK_FRAME_LAST is set.
+ *
+ * This function will do the things necessary to cleanup, and unbind the
+ * thread from the Intel Cilk Plus runtime. If this is the last user
+ * worker unbinding from the runtime, all system worker threads will be
+ * suspended.
+ *
+ * @pre @c w must be the currently executing worker, and must be a user
+ * worker.
+ *
+ * @param w The worker that's returning from the initial frame.
+ */
+COMMON_PORTABLE
+void __cilkrts_c_return_from_initial(__cilkrts_worker *w);
+
+/**
+ * @brief Used by exception handling code to pop an entry from the
+ * worker's deque.
+ *
+ * @param w Worker to pop the entry from
+ *
+ * @return __cilkrts_stack_frame of parent call
+ * @return NULL if the deque is empty
+ */
+COMMON_PORTABLE
+__cilkrts_stack_frame *__cilkrts_pop_tail(__cilkrts_worker *w);
+
+/**
+ * @brief Modifies the worker's protected_tail to prevent frames from
+ * being stolen.
+ *
+ * The Dekker protocol has been extended to only steal if head+1 is also
+ * less than protected_tail.
+ *
+ * @param w The worker to be modified.
+ * @param new_protected_tail The new setting for protected_tail, or NULL if the
+ * entire deque is to be protected
+ *
+ * @return Previous value of protected tail.
+ */
+COMMON_PORTABLE
+__cilkrts_stack_frame *volatile *__cilkrts_disallow_stealing(
+ __cilkrts_worker *w,
+ __cilkrts_stack_frame *volatile *new_protected_tail);
+
+/**
+ * @brief Restores the protected tail to a previous state, possibly
+ * allowing frames to be stolen.
+ *
+ * @param w The worker to be modified.
+ * @param saved_protected_tail A previous setting for protected_tail that is
+ * to be restored
+ */
+COMMON_PORTABLE
+void __cilkrts_restore_stealing(
+ __cilkrts_worker *w,
+ __cilkrts_stack_frame *volatile *saved_protected_tail);
+
+/**
+ * @brief Initialize a @c __cilkrts_worker.
+ *
+ * @note The memory for the worker must have been allocated outside
+ * this call.
+ *
+ * @param g The global_state_t.
+ * @param self The index into the global_state's array of workers for this
+ * worker, or -1 if this worker was allocated from the heap and cannot be
+ * stolen from.
+ * @param w The worker to be initialized.
+ *
+ * @return The initialized __cilkrts_worker.
+ */
+COMMON_PORTABLE
+__cilkrts_worker *make_worker(global_state_t *g,
+ int self,
+ __cilkrts_worker *w);
+
+/**
+ * @brief Free up any resources allocated for a worker.
+ *
+ * @note The memory for the @c __cilkrts_worker itself must be
+ * deallocated outside this call.
+ *
+ * @param w The worker to be destroyed.
+ */
+COMMON_PORTABLE
+void destroy_worker (__cilkrts_worker *w);
+
+/**
+ * @brief Initialize the runtime.
+ *
+ * If necessary, allocates and initializes the global state. If
+ * necessary, unsuspends the system workers.
+ *
+ * @param start Specifies whether the workers are to be unsuspended if
+ * they are suspended. Allows __cilkrts_init() to start up the runtime without
+ * releasing the system threads.
+ */
+COMMON_PORTABLE
+void __cilkrts_init_internal(int start);
+
+/**
+ * @brief Part of the sequence to shutdown the runtime.
+ *
+ * Specifically, this call frees the @c global_state_t for the runtime.
+ *
+ * @param g The global_state_t.
+ */
+COMMON_PORTABLE
+void __cilkrts_deinit_internal(global_state_t *g);
+
+/**
+ * Obsolete. We no longer need to import or export reducer maps.
+ */
+COMMON_PORTABLE
+cilkred_map *__cilkrts_xchg_reducer(
+ __cilkrts_worker *w, cilkred_map *newmap) cilk_nothrow;
+
+/**
+ * @brief Called when a user thread is bound to the runtime.
+ *
+ * If this action increments the count of bound user threads from 0 to
+ * 1, the system worker threads are unsuspended.
+ *
+ * If this action increments the count of bound user threads from 0 to
+ * 1, the system worker threads are unsuspended.
+ *
+ * @pre Global lock must be held.
+ * @param g The runtime global state.
+ */
+COMMON_PORTABLE
+void __cilkrts_enter_cilk(global_state_t *g);
+
+/**
+ * @brief Called when a user thread is unbound from the runtime.
+ *
+ * If this action decrements the count of bound user threads to 0, the
+ * system worker threads are suspended.
+ *
+ *
+ * @pre Global lock must be held.
+ *
+ * @param g The runtime global state.
+ */
+COMMON_PORTABLE
+void __cilkrts_leave_cilk(global_state_t *g);
+
+
+/**
+ * @brief cilk_fiber_proc that runs the main scheduler loop on a
+ * user worker.
+ *
+ * @pre fiber's owner field should be set to the correct __cilkrts_worker
+ * @pre fiber must be a user worker.
+ *
+ * @param fiber The scheduling fiber object.
+ */
+void scheduler_fiber_proc_for_user_worker(cilk_fiber *fiber);
+
+
+/**
+ * @brief Prints out Cilk runtime statistics.
+ *
+ * @param g The runtime global state.
+ *
+ * This method is useful only for debugging purposes. No guarantees
+ * are made as to the validity of this data. :)
+ */
+COMMON_PORTABLE
+void __cilkrts_dump_stats_to_stderr(global_state_t *g);
+
+#ifdef CILK_RECORD_REPLAY
+COMMON_PORTABLE
+char * walk_pedigree_nodes(char *p, const __cilkrts_pedigree *pnode);
+
+/**
+ * @brief Used by exception handling code to simulate the popping of
+ * an entry from the worker's deque.
+ *
+ * @param w Worker whose deque we want to check
+ *
+ * @return @c __cilkrts_stack_frame of parent call
+ * @return NULL if the deque is empty
+ */
+COMMON_PORTABLE
+__cilkrts_stack_frame *simulate_pop_tail(__cilkrts_worker *w);
+
+#endif
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_SCHEDULER_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/signal_node.c b/gcc-4.9/libcilkrts/runtime/signal_node.c
new file mode 100644
index 000000000..92c404b48
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/signal_node.c
@@ -0,0 +1,241 @@
+/* signal_node.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2011-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************/
+
+#include "signal_node.h"
+#include <stdlib.h>
+
+/* Define cilk_semaphore_t for all of the respective systems. */
+#if defined __APPLE__
+# include <mach/mach_init.h>
+# include <mach/semaphore.h>
+# include <mach/task.h>
+ typedef semaphore_t cilk_semaphore_t;
+#elif defined _WIN32
+# include "windows-clean.h"
+ typedef HANDLE cilk_semaphore_t;
+#else // Linux/MIC
+# include <errno.h>
+# include <semaphore.h>
+# include <stdio.h>
+ typedef sem_t cilk_semaphore_t;
+#endif // Linux/MIC
+
+#include "bug.h"
+#include "cilk_malloc.h"
+#include "signal_node.h"
+
+/**
+ * Interface within the tree to notify workers to wait without consuming cycles
+ * to expend cycles trying to steal.
+ *
+ * cilk_semaphore_t is implemented as an auto-reset event on Windows, and
+ * as a semaphore_t on Linux and MacOS.
+ */
+struct signal_node_t
+{
+ /** 0 if the worker should wait, 1 if it should be running. */
+ volatile unsigned int run;
+
+ /** OS-specific semaphore on which the worker can wait. */
+ cilk_semaphore_t sem;
+};
+
+/******************************************************************************/
+/* Semaphore-abstraction functions */
+/******************************************************************************/
+
+/*
+ * All of these functions are simple wrappers for the system-specific semaphore
+ * functions. This keeps the rest of the code reasonably clean and readable.
+ */
+
+#if defined __APPLE__
+static void initialize_cilk_semaphore (cilk_semaphore_t *sem)
+{
+ kern_return_t kstatus
+ = semaphore_create(mach_task_self(), sem, SYNC_POLICY_FIFO, 0);
+ assert(kstatus == KERN_SUCCESS);
+}
+static void deinitialize_cilk_semaphore (cilk_semaphore_t *sem)
+{
+ kern_return_t kstatus = semaphore_destroy(mach_task_self(), *sem);
+ assert(kstatus == KERN_SUCCESS);
+}
+static void wait_on_cilk_semaphore (cilk_semaphore_t *sem)
+{
+ kern_return_t kstatus = semaphore_wait(*sem);
+ assert(kstatus == KERN_SUCCESS);
+}
+static void signal_cilk_semaphore (cilk_semaphore_t *sem)
+{
+ kern_return_t kstatus = semaphore_signal(*sem);
+ assert(kstatus == KERN_SUCCESS);
+}
+#elif defined _WIN32
+// Note: Windows only provides counting semaphores, and we don't really
+// care about the count. So this is implemented using an auto-reset
+// event which will automatically reset after the WaitForSingleObject
+// call
+static void initialize_cilk_semaphore (cilk_semaphore_t *sem)
+{
+ // Create an auto-reset event
+ *sem = CreateEvent(NULL, // Security attributes
+ FALSE, // Manual reset
+ FALSE, // Initial state (initially reset)
+ NULL); // Name (anonymous)
+ CILK_ASSERT (NULL != *sem);
+}
+
+static void deinitialize_cilk_semaphore (cilk_semaphore_t *sem)
+{
+ BOOL result = CloseHandle(*sem);
+ CILK_ASSERT (0 != result);
+}
+
+static void wait_on_cilk_semaphore (cilk_semaphore_t *sem)
+{
+ // WaitForSingleObject will reset the event
+ DWORD result = WaitForSingleObject (*sem, INFINITE);
+ CILK_ASSERT (WAIT_OBJECT_0 == result);
+}
+static void signal_cilk_semaphore (cilk_semaphore_t *sem)
+{
+ BOOL result = SetEvent (*sem);
+ CILK_ASSERT (0 != result);
+}
+#else // Linux/MIC
+static void initialize_cilk_semaphore (cilk_semaphore_t *sem)
+{
+ int status = sem_init(sem, 0, 0);
+ assert(0 == status);
+}
+static void deinitialize_cilk_semaphore (cilk_semaphore_t *sem)
+{
+ int status = sem_destroy(sem);
+ assert(0 == status);
+}
+static void wait_on_cilk_semaphore (cilk_semaphore_t *sem)
+{
+ int status;
+
+ do {
+ status = sem_wait(sem);
+ } while (status != 0 && errno == EINTR);
+
+ if (status != 0) {
+ perror("sem_wait");
+ abort();
+ }
+}
+static void signal_cilk_semaphore (cilk_semaphore_t *sem)
+{
+ sem_post(sem);
+}
+#endif // Linux/MIC
+
+/******************************************************************************/
+/* Runtime interface functions */
+/******************************************************************************/
+
+/*
+ * Return a newly malloc'd and initialized signal_node_t.
+ */
+COMMON_SYSDEP
+signal_node_t *signal_node_create(void)
+{
+ signal_node_t *node;
+
+ node = ( signal_node_t*)
+ __cilkrts_malloc(sizeof( signal_node_t));
+ node->run = 0;
+ initialize_cilk_semaphore(&node->sem);
+
+ return node;
+}
+
+/*
+ * Clean and free a signal_node_t.
+ */
+void signal_node_destroy(signal_node_t *node)
+{
+ CILK_ASSERT(node);
+ deinitialize_cilk_semaphore(&node->sem);
+ __cilkrts_free(node);
+}
+
+/*
+ * Return 1 if the node thinks the worker should go to sleep, 0 otherwise.
+ */
+unsigned int signal_node_should_wait(signal_node_t *node)
+{
+ CILK_ASSERT(node);
+ return !node->run;
+}
+
+/*
+ * Send a message to the node that the worker will eventually read.
+ */
+void signal_node_msg(signal_node_t *node, unsigned int msg)
+{
+ CILK_ASSERT(node);
+ switch (msg) {
+ case 0: // worker should go to sleep.
+ node->run = msg;
+ break;
+ case 1: // worker should be awake.
+ node->run = msg;
+ signal_cilk_semaphore(&node->sem);
+ break;
+ default: // error.
+ CILK_ASSERT(0 == "Bad signal_node_t message.");
+ }
+}
+
+/*
+ * The current worker will wait on the semaphore.
+ */
+void signal_node_wait(signal_node_t *node)
+{
+ CILK_ASSERT(node);
+ while (signal_node_should_wait(node)) {
+ // The loop is here to consume extra semaphore signals that might have
+ // accumulated. No point in passing on the accumulation.
+ wait_on_cilk_semaphore(&node->sem);
+ }
+}
diff --git a/gcc-4.9/libcilkrts/runtime/signal_node.h b/gcc-4.9/libcilkrts/runtime/signal_node.h
new file mode 100644
index 000000000..0a1fe2002
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/signal_node.h
@@ -0,0 +1,109 @@
+/* signal_node.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file signal_node.h
+ *
+ * @brief Signal nodes allow coordinated waking and sleeping of the runtime
+ * without hammering on a single location in memory.
+ *
+ * The workers are logically arranged in a binary tree and propagate messages
+ * leaf-ward. User workers notify the root about waking and sleeping, so only
+ * that one node need share a cache line with a user worker.
+ */
+
+#ifndef INCLUDED_SIGNAL_NODE_DOT_H
+#define INCLUDED_SIGNAL_NODE_DOT_H
+
+#include "rts-common.h"
+#include <cilk/common.h>
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/** Opaque type. */
+typedef struct signal_node_t signal_node_t;
+
+/**
+ * Allocate and initialize a signal_node_t
+ *
+ * @return The initialized signal_node_t
+ */
+COMMON_SYSDEP
+signal_node_t *signal_node_create(void);
+
+/**
+ * Free any resources and deallocate a signal_node_t
+ *
+ * @param node The node to be deallocated.
+ */
+COMMON_SYSDEP void signal_node_destroy(signal_node_t *node);
+
+/**
+ * Test whether the node thinks the worker should go to sleep
+ *
+ * @param node The node to be tested.
+ *
+ * @return 1 If the worker should go to sleep
+ * @return 0 If the worker should not go to sleep
+ */
+COMMON_SYSDEP
+unsigned int signal_node_should_wait(signal_node_t *node);
+
+/**
+ * Specify whether the worker should go to sleep
+ *
+ * @param node The node to be set.
+ * @param msg The value to be set. Valid values are:
+ * - 0 - the worker should go to sleep
+ * - 1 - the worker should stay active
+ */
+COMMON_SYSDEP
+void signal_node_msg(signal_node_t *node, unsigned int msg);
+
+
+/**
+ * Wait for the node to be set
+ *
+ * @param node The node to wait on
+ */
+COMMON_SYSDEP
+void signal_node_wait(signal_node_t *node);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_SIGNAL_NODE_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/spin_mutex.c b/gcc-4.9/libcilkrts/runtime/spin_mutex.c
new file mode 100644
index 000000000..03908f263
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/spin_mutex.c
@@ -0,0 +1,109 @@
+/* spin_mutex.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#include "spin_mutex.h"
+#include "bug.h"
+#include "os.h"
+#include "stats.h"
+
+// TBD (11/30/12): We should be doing a conditional test-xchg instead
+// of an unconditional xchg operation for the spin mutex.
+
+/* m->lock == 1 means that mutex M is locked */
+#define TRY_ACQUIRE(m) (__cilkrts_xchg(&(m)->lock, 1) == 0)
+
+/* ICC 11.1+ understands release semantics and generates an
+ ordinary store with a software memory barrier. */
+#if __ICC >= 1110
+#define RELEASE(m) __sync_lock_release(&(m)->lock)
+#else
+#define RELEASE(m) __cilkrts_xchg(&(m)->lock, 0)
+#endif
+
+
+spin_mutex* spin_mutex_create()
+{
+ spin_mutex* mutex = (spin_mutex*)__cilkrts_malloc(sizeof(spin_mutex));
+ spin_mutex_init(mutex);
+ return mutex;
+}
+
+void spin_mutex_init(struct spin_mutex *m)
+{
+ // Use a simple assignment so Inspector doesn't bug us about the
+ // interlocked exchange doing a read of an uninitialized variable.
+ // By definition there can't be a race when we're initializing the
+ // lock...
+ m->lock = 0;
+}
+
+void spin_mutex_lock(struct spin_mutex *m)
+{
+ int count;
+ const int maxspin = 1000; /* SWAG */
+ if (!TRY_ACQUIRE(m)) {
+ count = 0;
+ do {
+ do {
+ __cilkrts_short_pause();
+ if (++count >= maxspin) {
+ /* let the OS reschedule every once in a while */
+ __cilkrts_yield();
+ count = 0;
+ }
+ } while (m->lock != 0);
+ } while (!TRY_ACQUIRE(m));
+ }
+}
+
+int spin_mutex_trylock(struct spin_mutex *m)
+{
+ return TRY_ACQUIRE(m);
+}
+
+void spin_mutex_unlock(struct spin_mutex *m)
+{
+ RELEASE(m);
+}
+
+void spin_mutex_destroy(struct spin_mutex *m)
+{
+ __cilkrts_free(m);
+}
+
+/* End spin_mutex.c */
diff --git a/gcc-4.9/libcilkrts/runtime/spin_mutex.h b/gcc-4.9/libcilkrts/runtime/spin_mutex.h
new file mode 100644
index 000000000..b0045ab93
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/spin_mutex.h
@@ -0,0 +1,129 @@
+/* spin_mutex.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file spin_mutex.h
+ *
+ * @brief Support for Cilk runtime mutexes.
+ *
+ * Cilk runtime mutexes are implemented as simple spin loops.
+ *
+ * This file is similar to a worker_mutex, except it does not have an
+ * owner field.
+ *
+ * TBD: This class, worker_mutex, and os_mutex overlap quite a bit in
+ * functionality. Can we unify these mutexes somehow?
+ */
+#ifndef INCLUDED_SPIN_MUTEX_DOT_H
+#define INCLUDED_SPIN_MUTEX_DOT_H
+
+#include <cilk/common.h>
+#include "rts-common.h"
+#include "cilk_malloc.h"
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * Mutexes are treated as an abstract data type within the Cilk
+ * runtime system. They are implemented as simple spin loops.
+ */
+typedef struct spin_mutex {
+ /** Mutex spin loop variable. 0 if unowned, 1 if owned. */
+ volatile int lock;
+
+ /** Padding so the mutex takes up a cache line. */
+ char pad[64/sizeof(int) - 1];
+} spin_mutex;
+
+
+/**
+ * @brief Create a new Cilk spin_mutex.
+ *
+ * @return Returns an initialized spin mutex.
+ */
+COMMON_PORTABLE
+spin_mutex* spin_mutex_create();
+
+/**
+ * @brief Initialize a Cilk spin_mutex.
+ *
+ * @param m Spin_Mutex to be initialized.
+ */
+COMMON_PORTABLE
+void spin_mutex_init(spin_mutex *m);
+
+/**
+ * @brief Acquire a Cilk spin_mutex.
+ *
+ * If statistics are being gathered, the time spent
+ * acquiring the spin_mutex will be attributed to the specified worker.
+ *
+ * @param m Spin_Mutex to be initialized.
+ */
+COMMON_PORTABLE
+void spin_mutex_lock(struct spin_mutex *m);
+/**
+ * @brief Attempt to lock a Cilk spin_mutex and fail if it isn't available.
+ *
+ * @param m Spin_Mutex to be acquired.
+ *
+ * @return 1 if the spin_mutex was acquired.
+ * @return 0 if the spin_mutex was not acquired.
+ */
+COMMON_PORTABLE
+int spin_mutex_trylock(struct spin_mutex *m);
+
+/**
+ * @brief Release a Cilk spin_mutex.
+ *
+ * @param m Spin_Mutex to be released.
+ */
+COMMON_PORTABLE
+void spin_mutex_unlock(struct spin_mutex *m);
+
+/**
+ * @brief Deallocate a Cilk spin_mutex. Currently does nothing.
+ *
+ * @param m Spin_Mutex to be deallocated.
+ */
+COMMON_PORTABLE
+void spin_mutex_destroy(struct spin_mutex *m);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_SPIN_MUTEX_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/stats.c b/gcc-4.9/libcilkrts/runtime/stats.c
new file mode 100644
index 000000000..3a4207450
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/stats.c
@@ -0,0 +1,172 @@
+/* stats.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#include "stats.h"
+#include "bug.h"
+#include "os.h"
+#include "local_state.h"
+
+#include <stdio.h>
+
+#define INVALID_START (0ULL - 1ULL)
+
+#ifdef CILK_PROFILE
+/* MSVC does not support designated initializers, grrrr... */
+static const char *names[] = {
+ /*[INTERVAL_IN_SCHEDULER]*/ "in scheduler",
+ /*[INTERVAL_WORKING]*/ " of which: working",
+ /*[INTERVAL_IN_RUNTIME]*/ " of which: in runtime",
+ /*[INTERVAL_STEALING]*/ " of which: stealing",
+ /*[INTERVAL_STEAL_SUCCESS]*/ "steal success: detach",
+ /*[INTERVAL_STEAL_FAIL_EMPTYQ]*/ "steal fail: empty queue",
+ /*[INTERVAL_STEAL_FAIL_LOCK]*/ "steal fail: victim locked",
+ /*[INTERVAL_STEAL_FAIL_USER_WORKER]*/ "steal fail: user worker",
+ /*[INTERVAL_STEAL_FAIL_DEKKER]*/ "steal fail: dekker",
+ /*[INTERVAL_SYNC_CHECK]*/ "sync check",
+ /*[INTERVAL_THE_EXCEPTION_CHECK]*/ "THE exception check",
+ /*[INTERVAL_THE_EXCEPTION_CHECK_USELESS]*/ " of which: useless",
+ /*[INTERVAL_RETURNING]*/ "returning",
+ /*[INTERVAL_FINALIZE_CHILD]*/ "finalize child",
+ /*[INTERVAL_PROVABLY_GOOD_STEAL]*/ "provably good steal",
+ /*[INTERVAL_UNCONDITIONAL_STEAL]*/ "unconditional steal",
+ /*[INTERVAL_ALLOC_FULL_FRAME]*/ "alloc full frame",
+ /*[INTERVAL_FRAME_ALLOC_LARGE]*/ "large frame alloc",
+ /*[INTERVAL_FRAME_ALLOC]*/ "small frame alloc",
+ /*[INTERVAL_FRAME_ALLOC_GLOBAL]*/ " of which: to global pool",
+ /*[INTERVAL_FRAME_FREE_LARGE]*/ "large frame free",
+ /*[INTERVAL_FRAME_FREE]*/ "small frame free",
+ /*[INTERVAL_FRAME_FREE_GLOBAL]*/ " of which: to global pool",
+ /*[INTERVAL_MUTEX_LOCK]*/ "mutex lock",
+ /*[INTERVAL_MUTEX_LOCK_SPINNING]*/ " spinning",
+ /*[INTERVAL_MUTEX_LOCK_YIELDING]*/ " yielding",
+ /*[INTERVAL_MUTEX_TRYLOCK]*/ "mutex trylock",
+ /*[INTERVAL_FIBER_ALLOCATE]*/ "fiber_allocate",
+ /*[INTERVAL_FIBER_DEALLOCATE]*/ "fiber_deallocate",
+ /*[INTERVAL_FIBER_ALLOCATE_FROM_THREAD]*/ "fiber_allocate_from_thread",
+ /*[INTERVAL_FIBER_DEALLOCATE_FROM_THREAD]*/ "fiber_deallocate (thread)",
+ /*[INTERVAL_SUSPEND_RESUME_OTHER]*/ "fiber suspend self + resume",
+ /*[INTERVAL_DEALLOCATE_RESUME_OTHER]*/ "fiber deallocate self + resume",
+};
+#endif
+
+void __cilkrts_init_stats(statistics *s)
+{
+ int i;
+ for (i = 0; i < INTERVAL_N; ++i) {
+ s->start[i] = INVALID_START;
+ s->accum[i] = 0;
+ s->count[i] = 0;
+ }
+
+ s->stack_hwm = 0;
+}
+
+#ifdef CILK_PROFILE
+void __cilkrts_accum_stats(statistics *to, statistics *from)
+{
+ int i;
+
+ for (i = 0; i < INTERVAL_N; ++i) {
+ to->accum[i] += from->accum[i];
+ to->count[i] += from->count[i];
+ from->accum[i] = 0;
+ from->count[i] = 0;
+ }
+
+ if (from->stack_hwm > to->stack_hwm)
+ to->stack_hwm = from->stack_hwm;
+ from->stack_hwm = 0;
+}
+
+void __cilkrts_note_interval(__cilkrts_worker *w, enum interval i)
+{
+ if (w) {
+ statistics *s = w->l->stats;
+ CILK_ASSERT(s->start[i] == INVALID_START);
+ s->count[i]++;
+ }
+}
+
+void __cilkrts_start_interval(__cilkrts_worker *w, enum interval i)
+{
+ if (w) {
+ statistics *s = w->l->stats;
+ CILK_ASSERT(s->start[i] == INVALID_START);
+ s->start[i] = __cilkrts_getticks();
+ s->count[i]++;
+ }
+}
+
+void __cilkrts_stop_interval(__cilkrts_worker *w, enum interval i)
+{
+ if (w) {
+ statistics *s = w->l->stats;
+ CILK_ASSERT(s->start[i] != INVALID_START);
+ s->accum[i] += __cilkrts_getticks() - s->start[i];
+ s->start[i] = INVALID_START;
+ }
+}
+
+void dump_stats_to_file(FILE *stat_file, statistics *s)
+{
+ int i;
+ fprintf(stat_file, "\nCILK PLUS RUNTIME SYSTEM STATISTICS:\n\n");
+
+ fprintf(stat_file,
+ " %-32s: %15s %10s %12s %10s\n",
+ "event",
+ "count",
+ "ticks",
+ "ticks/count",
+ "%total"
+ );
+ for (i = 0; i < INTERVAL_N; ++i) {
+ fprintf(stat_file, " %-32s: %15llu", names[i], s->count[i]);
+ if (s->accum[i]) {
+ fprintf(stat_file, " %10.3g %12.3g %10.2f",
+ (double)s->accum[i],
+ (double)s->accum[i] / (double)s->count[i],
+ 100.0 * (double)s->accum[i] /
+ (double)s->accum[INTERVAL_IN_SCHEDULER]);
+ }
+ fprintf(stat_file, "\n");
+ }
+}
+#endif // CILK_PROFILE
+
+/* End stats.c */
diff --git a/gcc-4.9/libcilkrts/runtime/stats.h b/gcc-4.9/libcilkrts/runtime/stats.h
new file mode 100644
index 000000000..aaa992747
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/stats.h
@@ -0,0 +1,208 @@
+/* stats.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file stats.h
+ *
+ * @brief Support for gathering and reporting statistics on Cilk applications.
+ *
+ * Note that stats are normally NOT compiled in because it increases the
+ * overhead of stealing. To compile in profiling support, define CILK_PROFILE.
+ */
+
+#ifndef INCLUDED_STATS_DOT_H
+#define INCLUDED_STATS_DOT_H
+
+/* #define CILK_PROFILE 1 */
+// @note The CILK_PROFILE flag and intervals is known to be broken
+// in at least programs with Windows exceptions.
+// Enable this flag at your own peril. :)
+
+#include <cilk/common.h>
+#include "rts-common.h"
+#include "internal/abi.h"
+
+#ifdef CILK_PROFILE
+#include <stdio.h> // Define FILE *
+#endif
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/** @brief Events that we measure. */
+enum interval
+{
+ INTERVAL_IN_SCHEDULER, ///< Time threads spend "bound" to Cilk
+ INTERVAL_WORKING, ///< Time spent working
+ INTERVAL_IN_RUNTIME, ///< Time spent executing runtime scheduling loop
+ INTERVAL_STEALING, ///< Time spent stealing work
+ INTERVAL_STEAL_SUCCESS, ///< Time to do a successful steal
+ INTERVAL_STEAL_FAIL_EMPTYQ, ///< Count of steal failures due to lack of stealable work
+ INTERVAL_STEAL_FAIL_LOCK, ///< Count of steal failures due to failure to lock worker
+ INTERVAL_STEAL_FAIL_USER_WORKER, ///< Count of steal failures by user workers which attempt to steal from another team
+ INTERVAL_STEAL_FAIL_DEKKER, ///< Count of steal failures due to Dekker protocol failure
+ INTERVAL_SYNC_CHECK, ///< Time spent processing syncs
+ INTERVAL_THE_EXCEPTION_CHECK, ///< Time spent performing THE exception checks
+ INTERVAL_THE_EXCEPTION_CHECK_USELESS, ///< Count of useless THE exception checks
+ INTERVAL_RETURNING, ///< Time spent returning from calls
+ INTERVAL_FINALIZE_CHILD, ///< Time spent in finalize_child
+ INTERVAL_PROVABLY_GOOD_STEAL, ///< Time spent in provably_good_steal
+ INTERVAL_UNCONDITIONAL_STEAL, ///< Time spent in unconditional_steal
+ INTERVAL_ALLOC_FULL_FRAME, ///< Time spent in __cilkrts_make_full_frame
+ INTERVAL_FRAME_ALLOC_LARGE, ///< Count of calls to __cilkrts_frame_malloc for buffers bigger than FRAME_MALLOC_MAX_SIZE or with a NULL worker
+ INTERVAL_FRAME_ALLOC, ///< Time spent allocating memory from worker buckets
+ INTERVAL_FRAME_ALLOC_GLOBAL, ///< Time spent calling memory allocator when buckets are empty
+ INTERVAL_FRAME_FREE_LARGE, ///< Count of calls to __cilkrts_frame_malloc for buffers bigger than FRAME_MALLOC_MAX_SIZE or with a NULL worker
+ INTERVAL_FRAME_FREE, ///< Time spent freeing memory to worker buckets
+ INTERVAL_FRAME_FREE_GLOBAL, ///< Time spent calling memory deallocator when buckets are full
+ INTERVAL_MUTEX_LOCK, ///< Count of calls to __cilkrts_mutex_lock for a worker
+ INTERVAL_MUTEX_LOCK_SPINNING, ///< Time spent spinning in __cilkrts_mutex_lock for a worker
+ INTERVAL_MUTEX_LOCK_YIELDING, ///< Time spent yielding in __cilkrts_mutex_lock for a worker
+ INTERVAL_MUTEX_TRYLOCK, ///< Count of calls to __cilkrts_mutex_trylock
+ INTERVAL_FIBER_ALLOCATE, ///< Time spent calling cilk_fiber_allocate
+ INTERVAL_FIBER_DEALLOCATE, ///< Time spent calling cilk_fiber_deallocate (not from thread)
+ INTERVAL_FIBER_ALLOCATE_FROM_THREAD, ///< Time spent calling cilk_fiber_allocate_from_thread
+ INTERVAL_FIBER_DEALLOCATE_FROM_THREAD, ///< Time spent calling cilk_fiber_deallocate (from thread)
+ INTERVAL_SUSPEND_RESUME_OTHER, ///< Count of fiber suspend_self_and_resume_other
+ INTERVAL_DEALLOCATE_RESUME_OTHER, ///< Count of fiber deallocate_self_and_resume_other
+ INTERVAL_N ///< Number of intervals, must be last
+};
+
+/**
+ * @brief Struct that collects of all runtime statistics.
+ *
+ * There is an instance of this structure in each worker's
+ * local_state, as well as one in the @c global_state_t which will be
+ * used to accumulate the per-worker stats.
+ */
+typedef struct statistics
+{
+ /** Number of times each interval is entered */
+ unsigned long long count[INTERVAL_N];
+
+ /**
+ * Time when the system entered each interval, in system-dependent
+ * "ticks"
+ */
+ unsigned long long start[INTERVAL_N];
+
+ /** Total time spent in each interval, in system-dependent "ticks" */
+ unsigned long long accum[INTERVAL_N];
+
+ /**
+ * Largest global number of stacks seen by this worker.
+ * The true maximum at end of execution is the max of the
+ * worker maxima.
+ */
+ long stack_hwm;
+} statistics;
+
+/**
+ * Initializes a statistics structure
+ *
+ * @param s The statistics structure to be initialized.
+ */
+COMMON_PORTABLE void __cilkrts_init_stats(statistics *s);
+
+/**
+ * @brief Sums statistics from worker to the global struct
+ *
+ * @param to The statistics structure that will accumulate the information.
+ * This structure is usually @c g->stats.
+ * @param from The statistics structure that will be accumulated.
+ * This structure is usually statistics kept per worker.
+ */
+COMMON_PORTABLE
+void __cilkrts_accum_stats(statistics *to, statistics *from);
+
+/**
+ * @brief Mark the start of an interval by saving the current tick count.
+ *
+ * @pre Start time == INVALID_START
+ *
+ * @param w The worker we're accumulating stats for.
+ * @param i The interval we're accumulating stats for.
+ */
+COMMON_PORTABLE
+void __cilkrts_start_interval(__cilkrts_worker *w, enum interval i);
+
+/**
+ * @brief Mark the end of an interval by adding the ticks since the
+ * start to the accumulated time.
+ *
+ * @pre Start time != INVALID_START
+ *
+ * @param w The worker we're accumulating stats for.
+ * @param i The interval we're accumulating stats for.
+ */
+COMMON_PORTABLE
+void __cilkrts_stop_interval(__cilkrts_worker *w, enum interval i);
+
+/**
+ * @brief Start and stop interval I, charging zero time against it
+ *
+ * Precondition:
+ * - Start time == INVALID_START
+ *
+ * @param w The worker we're accumulating stats for.
+ * @param i The interval we're accumulating stats for.
+ */
+COMMON_PORTABLE
+void __cilkrts_note_interval(__cilkrts_worker *w, enum interval i);
+
+#ifdef CILK_PROFILE
+COMMON_PORTABLE
+void dump_stats_to_file(FILE *stat_file, statistics *s);
+#endif
+
+
+#ifdef CILK_PROFILE
+# define START_INTERVAL(w, i) __cilkrts_start_interval(w, i);
+# define STOP_INTERVAL(w, i) __cilkrts_stop_interval(w, i);
+# define NOTE_INTERVAL(w, i) __cilkrts_note_interval(w, i);
+#else
+/** Start an interval. No effect unless CILK_PROFILE is defined. */
+# define START_INTERVAL(w, i)
+/** End an interval. No effect unless CILK_PROFILE is defined. */
+# define STOP_INTERVAL(w, i)
+/** Increment a counter. No effect unless CILK_PROFILE is defined. */
+# define NOTE_INTERVAL(w, i)
+#endif
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_STATS_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/symbol_test.c b/gcc-4.9/libcilkrts/runtime/symbol_test.c
new file mode 100644
index 000000000..8291d369a
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/symbol_test.c
@@ -0,0 +1,63 @@
+/* symbol_test.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/* simple program to verify that there are no undefined symbols in the runtime.
+ * If the runtime uses any symbols that are not defined, compiling this program
+ * will cause a linker error.
+ */
+
+#define _Cilk_for for
+extern void* __cilkrts_global_state;
+void *volatile p;
+
+void foo () { }
+int main ()
+{
+ int i;
+ long long j;
+
+ _Cilk_spawn foo();
+ _Cilk_for (i = 0; i < 2; ++i)
+ foo();
+ _Cilk_for (j = 0; j < 2; ++j)
+ foo();
+ p = __cilkrts_global_state;
+ return 0;
+}
+
+/* End symbol_test.c */
diff --git a/gcc-4.9/libcilkrts/runtime/sysdep-unix.c b/gcc-4.9/libcilkrts/runtime/sysdep-unix.c
new file mode 100644
index 000000000..1f82b6288
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/sysdep-unix.c
@@ -0,0 +1,807 @@
+/*
+ * sysdep-unix.c
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2010-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************
+ */
+
+#ifdef __linux__
+ // define _GNU_SOURCE before *any* #include.
+ // Even <stdint.h> will break later #includes if this macro is not
+ // already defined when it is #included.
+# define _GNU_SOURCE
+#endif
+
+#include "sysdep.h"
+#include "os.h"
+#include "bug.h"
+#include "local_state.h"
+#include "signal_node.h"
+#include "full_frame.h"
+#include "jmpbuf.h"
+#include "cilk_malloc.h"
+#include "reducer_impl.h"
+#include "metacall_impl.h"
+
+
+// On x86 processors (but not MIC processors), the compiler generated code to
+// save the FP state (rounding mode and the like) before calling setjmp. We
+// will need to restore that state when we resume.
+#ifndef __MIC__
+# if defined(__i386__) || defined(__x86_64)
+# define RESTORE_X86_FP_STATE
+# endif // defined(__i386__) || defined(__x86_64)
+#endif // __MIC__
+
+// contains notification macros for VTune.
+#include "cilk-ittnotify.h"
+
+#include <stddef.h>
+
+#ifdef __CYGWIN__
+// On Cygwin, string.h doesnt declare strcasecmp if __STRICT_ANSI__ is defined
+# undef __STRICT_ANSI__
+#endif
+
+#include <string.h>
+#include <pthread.h>
+#include <unistd.h>
+
+#if defined HAVE_ALLOCA_H
+# include <alloca.h>
+#elif defined __GNUC__
+# define alloca __builtin_alloca
+#elif defined _AIX
+# define alloca __alloca
+#else
+# include <stddef.h>
+# ifdef __cplusplus
+extern "C"
+# endif
+void *alloca (size_t);
+#endif
+
+#ifdef __APPLE__
+//# include <scheduler.h> // Angle brackets include Apple's scheduler.h, not ours.
+#endif
+
+#ifdef __linux__
+# include <sys/resource.h>
+# include <sys/sysinfo.h>
+#endif
+
+#ifdef __FreeBSD__
+# include <sys/resource.h>
+// BSD does not define MAP_ANONYMOUS, but *does* define MAP_ANON. Aren't standards great!
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifdef __VXWORKS__
+# include <vxWorks.h>
+# include <vxCpuLib.h>
+#endif
+
+struct global_sysdep_state
+{
+ pthread_t *threads; ///< Array of pthreads for system workers
+ size_t pthread_t_size; ///< for cilk_db
+};
+
+static void internal_enforce_global_visibility();
+
+
+COMMON_SYSDEP
+void __cilkrts_init_worker_sysdep(struct __cilkrts_worker *w)
+{
+ ITT_SYNC_CREATE(w, "Scheduler");
+}
+
+COMMON_SYSDEP
+void __cilkrts_destroy_worker_sysdep(struct __cilkrts_worker *w)
+{
+}
+
+COMMON_SYSDEP
+void __cilkrts_init_global_sysdep(global_state_t *g)
+{
+ internal_enforce_global_visibility();
+
+ __cilkrts_init_tls_variables();
+
+ CILK_ASSERT(g->total_workers >= g->P - 1);
+ g->sysdep = __cilkrts_malloc(sizeof (struct global_sysdep_state));
+ CILK_ASSERT(g->sysdep);
+ g->sysdep->pthread_t_size = sizeof (pthread_t);
+
+ // TBD: Should this value be g->total_workers, or g->P?
+ // Need to check what we are using this field for.
+ g->sysdep->threads = __cilkrts_malloc(sizeof(pthread_t) * g->total_workers);
+ CILK_ASSERT(g->sysdep->threads);
+
+ return;
+}
+
+COMMON_SYSDEP
+void __cilkrts_destroy_global_sysdep(global_state_t *g)
+{
+ if (g->sysdep->threads)
+ __cilkrts_free(g->sysdep->threads);
+ __cilkrts_free(g->sysdep);
+}
+
+/*************************************************************
+ Creation of worker threads:
+*************************************************************/
+
+static void internal_run_scheduler_with_exceptions(__cilkrts_worker *w)
+{
+ /* We assume the stack grows down. */
+ char var;
+ __cilkrts_cilkscreen_establish_c_stack(&var - 1000000, &var);
+
+ __cilkrts_run_scheduler_with_exceptions(w);
+}
+
+
+
+/*
+ * scheduler_thread_proc_for_system_worker
+ *
+ * Thread start function called when we start a new worker.
+ *
+ */
+NON_COMMON void* scheduler_thread_proc_for_system_worker(void *arg)
+{
+ /*int status;*/
+ __cilkrts_worker *w = (__cilkrts_worker *)arg;
+
+#ifdef __INTEL_COMPILER
+#ifdef USE_ITTNOTIFY
+ // Name the threads for Advisor. They don't want a worker number.
+ __itt_thread_set_name("Cilk Worker");
+#endif // defined USE_ITTNOTIFY
+#endif // defined __INTEL_COMPILER
+
+ /* Worker startup is serialized
+ status = pthread_mutex_lock(&__cilkrts_global_mutex);
+ CILK_ASSERT(status == 0);*/
+ CILK_ASSERT(w->l->type == WORKER_SYSTEM);
+ /*status = pthread_mutex_unlock(&__cilkrts_global_mutex);
+ CILK_ASSERT(status == 0);*/
+
+ __cilkrts_set_tls_worker(w);
+
+ // Create a cilk fiber for this worker on this thread.
+ START_INTERVAL(w, INTERVAL_FIBER_ALLOCATE_FROM_THREAD) {
+ w->l->scheduling_fiber = cilk_fiber_allocate_from_thread();
+ cilk_fiber_set_owner(w->l->scheduling_fiber, w);
+ } STOP_INTERVAL(w, INTERVAL_FIBER_ALLOCATE_FROM_THREAD);
+
+ internal_run_scheduler_with_exceptions(w);
+
+ START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE_FROM_THREAD) {
+ // Deallocate the scheduling fiber. This operation reverses the
+ // effect cilk_fiber_allocate_from_thread() and must be done in this
+ // thread before it exits.
+ int ref_count = cilk_fiber_deallocate_from_thread(w->l->scheduling_fiber);
+ // Scheduling fibers should never have extra references to them.
+ // We only get extra references into fibers because of Windows
+ // exceptions.
+ CILK_ASSERT(0 == ref_count);
+ w->l->scheduling_fiber = NULL;
+ } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE_FROM_THREAD);
+
+ return 0;
+}
+
+
+/*
+ * __cilkrts_user_worker_scheduling_stub
+ *
+ * Routine for the scheduling fiber created for an imported user
+ * worker thread. This method is analogous to
+ * scheduler_thread_proc_for_system_worker.
+ *
+ */
+void __cilkrts_user_worker_scheduling_stub(cilk_fiber* fiber, void* null_arg)
+{
+ __cilkrts_worker *w = __cilkrts_get_tls_worker();
+
+ // Sanity check.
+ CILK_ASSERT(WORKER_USER == w->l->type);
+
+ // Enter the scheduling loop on the user worker.
+ // This function will never return.
+ __cilkrts_run_scheduler_with_exceptions(w);
+
+ // A WORKER_USER, at some point, will resume on the original stack and leave
+ // Cilk. Under no circumstances do we ever exit off of the bottom of this
+ // stack.
+ CILK_ASSERT(0);
+}
+
+/**
+ * We are exporting a function with this name to Inspector?
+ * What a confusing name...
+ *
+ * This function is exported so Piersol's stack trace displays
+ * reasonable information.
+ */
+void* __cilkrts_worker_stub(void* arg)
+{
+ return scheduler_thread_proc_for_system_worker(arg);
+}
+
+
+
+// /* Return the lesser of the argument and the operating system
+// limit on the number of workers (threads) that may or ought
+// to be created. */
+// int sysdep_thread_limit(int n, int physical_cpus)
+// {
+// /* On Linux thread creation fails somewhere short of the
+// number of available processes. */
+// struct rlimit lim;
+
+// if (n > 256 + 2 * physical_cpus)
+// n = 256 + 2 * physical_cpus;
+
+// if (getrlimit(RLIMIT_NPROC, &lim) == 0 && lim.rlim_cur != RLIM_INFINITY)
+// {
+// /* If the limit reads 0 or absurdly small, ignore it. */
+// unsigned int maxproc = (lim.rlim_cur * 3 + 3) / 4;
+// if (maxproc > 8 + 2 * physical_cpus && maxproc < n)
+// n = maxproc;
+// }
+// return n;
+// }
+
+
+
+static void write_version_file (global_state_t *, int);
+
+/* Create n worker threads from base..top-1
+ */
+static void create_threads(global_state_t *g, int base, int top)
+{
+ // TBD(11/30/12): We want to insert code providing the option of
+ // pinning system workers to cores.
+ for (int i = base; i < top; i++) {
+ int status = pthread_create(&g->sysdep->threads[i],
+ NULL,
+ scheduler_thread_proc_for_system_worker,
+ g->workers[i]);
+ if (status != 0)
+ __cilkrts_bug("Cilk runtime error: thread creation (%d) failed: %d\n", i, status);
+ }
+}
+
+#if PARALLEL_THREAD_CREATE
+static int volatile threads_created = 0;
+
+// Create approximately half of the worker threads, and then become a worker
+// ourselves.
+static void * create_threads_and_work (void * arg)
+{
+ global_state_t *g = ((__cilkrts_worker *)arg)->g;
+
+ create_threads(g, g->P/2, g->P-1);
+ // Let the initial thread know that we're done.
+ threads_created = 1;
+
+ // Ideally this turns into a tail call that wipes out this stack frame.
+ return scheduler_thread_proc_for_system_worker(arg);
+}
+#endif
+void __cilkrts_start_workers(global_state_t *g, int n)
+{
+ g->workers_running = 1;
+ g->work_done = 0;
+
+ if (!g->sysdep->threads)
+ return;
+
+ // Do we actually have any threads to create?
+ if (n > 0)
+ {
+#if PARALLEL_THREAD_CREATE
+ int status;
+ // We create (a rounded up) half of the threads, thread one creates the rest
+ int half_threads = (n+1)/2;
+
+ // Create the first thread passing a different thread function, so that it creates threads itself
+ status = pthread_create(&g->sysdep->threads[0], NULL, create_threads_and_work, g->workers[0]);
+
+ if (status != 0)
+ __cilkrts_bug("Cilk runtime error: thread creation (0) failed: %d\n", status);
+
+ // Then the rest of the ones we have to create
+ create_threads(g, 1, half_threads);
+
+ // Now wait for the first created thread to tell us it's created all of its threads.
+ // We could maybe drop this a bit lower and overlap with write_version_file.
+ while (!threads_created)
+ __cilkrts_yield();
+#else
+ // Simply create all the threads linearly here.
+ create_threads(g, 0, n);
+#endif
+ }
+ // write the version information to a file if the environment is configured
+ // for it (the function makes the check).
+ write_version_file(g, n);
+
+
+ return;
+}
+
+void __cilkrts_stop_workers(global_state_t *g)
+{
+ int i;
+
+ // Tell the workers to give up
+
+ g->work_done = 1;
+
+ if (g->workers_running == 0)
+ return;
+
+ if (!g->sysdep->threads)
+ return;
+
+ /* Make them all runnable. */
+ if (g->P > 1) {
+ CILK_ASSERT(g->workers[0]->l->signal_node);
+ signal_node_msg(g->workers[0]->l->signal_node, 1);
+ }
+
+ for (i = 0; i < g->P - 1; ++i) {
+ int sc_status;
+ void *th_status;
+
+ sc_status = pthread_join(g->sysdep->threads[i], &th_status);
+ if (sc_status != 0)
+ __cilkrts_bug("Cilk runtime error: thread join (%d) failed: %d\n", i, sc_status);
+ }
+
+ g->workers_running = 0;
+
+
+ return;
+}
+
+
+/*
+ * @brief Returns the stack address for resuming execution of sf.
+ *
+ * This method takes in the top of the stack to use, and then returns
+ * a properly aligned address for resuming execution of sf.
+ *
+ * @param sf - The stack frame we want to resume executing.
+ * @param stack_base - The top of the stack we want to execute sf on.
+ *
+ */
+static char* get_sp_for_executing_sf(char* stack_base,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf)
+{
+// The original calculation that had been done to correct the stack
+// pointer when resuming execution.
+//
+// But this code was never getting called in the eng branch anyway...
+//
+// TBD(11/30/12): This logic needs to be revisited to make sure that
+// we are doing the proper calculation in reserving space for outgoing
+// arguments on all platforms and architectures.
+#if 0
+ /* Preserve outgoing argument space and stack alignment on steal.
+ Outgoing argument space is bounded by the difference between
+ stack and frame pointers. Some user code is known to rely on
+ 16 byte alignment. Maintain 32 byte alignment for future
+ compatibility. */
+#define SMASK 31 /* 32 byte alignment */
+ if (sf) {
+ char *fp = FP(sf), *sp = SP(sf);
+ int fp_align = (int)(size_t)fp & SMASK;
+ ptrdiff_t space = fp - sp;
+
+ fprintf(stderr, "Here: fp = %p, sp = %p\n", fp, sp);
+ char *top_aligned = (char *)((((size_t)stack_base - SMASK) & ~(size_t)SMASK) | fp_align);
+ /* Don't allocate an unreasonable amount of stack space. */
+
+ fprintf(stderr, "Here: stack_base = %p, top_aligned=%p, space=%ld\n",
+ stack_base, top_aligned, space);
+ if (space < 32)
+ space = 32 + (space & SMASK);
+ else if (space > 40 * 1024)
+ space = 40 * 1024 + (space & SMASK);
+
+ return top_aligned - space;
+ }
+#endif
+
+#define PERFORM_FRAME_SIZE_CALCULATION 0
+
+ char* new_stack_base = stack_base - 256;
+
+#if PERFORM_FRAME_SIZE_CALCULATION
+ // If there is a frame size saved, then use that as the
+ // correction instead of 256.
+ if (ff->frame_size > 0) {
+ if (ff->frame_size < 40*1024) {
+ new_stack_base = stack_base - ff->frame_size;
+ }
+ else {
+ // If for some reason, our frame size calculation is giving us
+ // a number which is bigger than about 10 pages, then
+ // there is likely something wrong here? Don't allocate
+ // an unreasonable amount of space.
+ new_stack_base = stack_base - 40*1024;
+ }
+ }
+#endif
+
+ // Whatever correction we choose, align the final stack top.
+ // This alignment seems to be necessary in particular on 32-bit
+ // Linux, and possibly Mac. (Is 32-byte alignment is sufficient?)
+ /* 256-byte alignment. Why not? */
+ const uintptr_t align_mask = ~(256 -1);
+ new_stack_base = (char*)((size_t)new_stack_base & align_mask);
+ return new_stack_base;
+}
+
+char* sysdep_reset_jump_buffers_for_resume(cilk_fiber* fiber,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf)
+{
+#if FIBER_DEBUG >= 4
+ fprintf(stderr, "ThreadId=%p (fiber_proc_to_resume), Fiber %p. sf = %p. ff=%p, ff->sync_sp=%p\n",
+ cilkos_get_current_thread_id(),
+ fiber,
+ sf,
+ ff, ff->sync_sp);
+#endif
+
+ CILK_ASSERT(fiber);
+ void* sp = (void*)get_sp_for_executing_sf(cilk_fiber_get_stack_base(fiber), ff, sf);
+ SP(sf) = sp;
+
+ /* Debugging: make sure stack is accessible. */
+ ((volatile char *)sp)[-1];
+
+ // Adjust the saved_sp to account for the SP we're about to run. This will
+ // allow us to track fluctations in the stack
+#if FIBER_DEBUG >= 4
+ fprintf(stderr, "ThreadId=%p, about to take stack ff=%p, sp=%p, sync_sp=%p\n",
+ cilkos_get_current_thread_id(),
+ ff,
+ sp,
+ ff->sync_sp);
+#endif
+ __cilkrts_take_stack(ff, sp);
+ return sp;
+}
+
+
+NORETURN sysdep_longjmp_to_sf(char* new_sp,
+ __cilkrts_stack_frame *sf,
+ full_frame *ff_for_exceptions /* UNUSED on Unix */)
+{
+#if FIBER_DEBUG >= 3
+ fprintf(stderr,
+ "ThreadId=%p. resume user code, sf=%p, new_sp = %p, original SP(sf) = %p, FP(sf) = %p\n",
+ cilkos_get_current_thread_id(), sf, new_sp, SP(sf), FP(sf));
+#endif
+
+ // Set the stack pointer.
+ SP(sf) = new_sp;
+
+#ifdef RESTORE_X86_FP_STATE
+ if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1) {
+ // Restore the floating point state that was set in this frame at the
+ // last spawn.
+ //
+ // This feature is only available in ABI 1 or later frames, and only
+ // needed on IA64 or Intel64 processors.
+ restore_x86_fp_state(sf);
+ }
+#endif
+
+ CILK_LONGJMP(sf->ctx);
+}
+
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+
+void __cilkrts_make_unrunnable_sysdep(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf,
+ int is_loot,
+ const char *why)
+{
+ (void)w; /* unused */
+ sf->except_data = 0;
+
+ if (is_loot)
+ {
+ if (ff->frame_size == 0)
+ ff->frame_size = __cilkrts_get_frame_size(sf);
+
+ // Null loot's sp for debugging purposes (so we'll know it's not valid)
+ SP(sf) = 0;
+ }
+}
+
+/*
+ * __cilkrts_sysdep_is_worker_thread_id
+ *
+ * Returns true if the thread ID specified matches the thread ID we saved
+ * for a worker.
+ */
+
+int __cilkrts_sysdep_is_worker_thread_id(global_state_t *g,
+ int i,
+ void *thread_id)
+{
+#if defined( __linux__) || defined(__VXWORKS__)
+ pthread_t tid = *(pthread_t *)thread_id;
+ if (i < 0 || i > g->total_workers)
+ return 0;
+ return g->sysdep->threads[i] == tid;
+#else
+ // Needs to be implemented
+ return 0;
+#endif
+}
+
+
+
+
+/*************************************************************
+ Version information:
+*************************************************************/
+
+#include <dlfcn.h>
+#include "internal/cilk_version.h"
+#include <stdio.h>
+#include <sys/utsname.h>
+
+#ifdef __VXWORKS__
+#include <version.h>
+# endif
+
+/* (Non-static) dummy function is used by get_runtime_path() to find the path
+ * to the .so containing the Cilk runtime.
+ */
+void dummy_function() { }
+
+/* return a string with the path to the Cilk runtime, or "unknown" if the path
+ * cannot be determined.
+ */
+static const char *get_runtime_path ()
+{
+#ifdef __CYGWIN__
+ // Cygwin doesn't support dladdr, which sucks
+ return "unknown";
+#else
+ Dl_info info;
+ if (0 == dladdr(dummy_function, &info)) return "unknown";
+ return info.dli_fname;
+#endif
+}
+
+/* if the environment variable, CILK_VERSION, is defined, writes the version
+ * information to the specified file.
+ * g is the global state that was just created, and n is the number of workers
+ * that were made (or requested from RML) for it.
+ */
+static void write_version_file (global_state_t *g, int n)
+{
+ const char *env; // environment variable.
+ char buf[256]; // print buffer.
+ time_t t;
+ FILE *fp;
+ struct utsname sys_info;
+ int err; // error code from system calls.
+
+ // if CILK_VERSION is not set, or if the file cannot be opened, fail
+ // silently. Otherwise open the file for writing (or use stderr or stdout
+ // if the user specifies).
+ if (NULL == (env = getenv("CILK_VERSION"))) return;
+ if (0 == strcasecmp(env, "stderr")) fp = stderr;
+ else if (0 == strcasecmp(env, "stdout")) fp = stdout;
+ else if (NULL == (fp = fopen(env, "w"))) return;
+
+ // get a string for the current time. E.g.,
+ // Cilk runtime initialized: Thu Jun 10 13:28:00 2010
+ t = time(NULL);
+ strftime(buf, 256, "%a %b %d %H:%M:%S %Y", localtime(&t));
+ fprintf(fp, "Cilk runtime initialized: %s\n", buf);
+
+ // Print runtime info. E.g.,
+ // Cilk runtime information
+ // ========================
+ // Cilk version: 2.0.0 Build 9184
+ // Built by willtor on host willtor-desktop
+ // Compilation date: Thu Jun 10 13:27:42 2010
+ // Compiled with ICC V99.9.9, ICC build date: 20100610
+
+ fprintf(fp, "\nCilk runtime information\n");
+ fprintf(fp, "========================\n");
+ fprintf(fp, "Cilk version: %d.%d.%d Build %d\n",
+ VERSION_MAJOR,
+ VERSION_MINOR,
+ VERSION_REV,
+ VERSION_BUILD);
+#ifdef __VXWORKS__
+ char * vxWorksVer = VXWORKS_VERSION;
+ fprintf(fp, "Cross compiled for %s\n",vxWorksVer);
+ // user and host not avalible if VxWorks cross compiled on windows build host
+#else
+
+ // User and host are not available for GCC builds
+#ifdef BUILD_USER
+ fprintf(fp, "Built by "BUILD_USER" on host "BUILD_HOST"\n");
+#endif // BUILD_USER
+#endif // __VXWORKS__
+
+ // GCC has requested that this be removed for GCC builds
+#ifdef BUILD_USER
+ fprintf(fp, "Compilation date: "__DATE__" "__TIME__"\n");
+#endif // BUILD_USER
+
+#ifdef __INTEL_COMPILER
+ // Compiled by the Intel C/C++ compiler.
+ fprintf(fp, "Compiled with ICC V%d.%d.%d, ICC build date: %d\n",
+ __INTEL_COMPILER / 100,
+ (__INTEL_COMPILER / 10) % 10,
+ __INTEL_COMPILER % 10,
+ __INTEL_COMPILER_BUILD_DATE);
+#else
+ // Compiled by GCC.
+ fprintf(fp, "Compiled with GCC V%d.%d.%d\n",
+ __GNUC__,
+ __GNUC_MINOR__,
+ __GNUC_PATCHLEVEL__);
+#endif // defined __INTEL_COMPILER
+
+ // Print system info. E.g.,
+ // System information
+ // ==================
+ // Cilk runtime path: /opt/icc/64/lib/libcilkrts.so.5
+ // System OS: Linux, release 2.6.28-19-generic
+ // System architecture: x86_64
+
+ err = uname(&sys_info);
+ fprintf(fp, "\nSystem information\n");
+ fprintf(fp, "==================\n");
+ fprintf(fp, "Cilk runtime path: %s\n", get_runtime_path());
+ fprintf(fp, "System OS: %s, release %s\n",
+ err < 0 ? "unknown" : sys_info.sysname,
+ err < 0 ? "?" : sys_info.release);
+ fprintf(fp, "System architecture: %s\n",
+ err < 0 ? "unknown" : sys_info.machine);
+
+ // Print thread info. E.g.,
+ // Thread information
+ // ==================
+ // System cores: 8
+ // Cilk workers requested: 8
+ // Thread creator: Private
+
+ fprintf(fp, "\nThread information\n");
+ fprintf(fp, "==================\n");
+#ifdef __VXWORKS__
+ fprintf(fp, "System cores: %d\n", (int)__builtin_popcount(vxCpuEnabledGet()));
+#else
+ fprintf(fp, "System cores: %d\n", (int)sysconf(_SC_NPROCESSORS_ONLN));
+#endif
+ fprintf(fp, "Cilk workers requested: %d\n", n);
+#if (PARALLEL_THREAD_CREATE)
+ fprintf(fp, "Thread creator: Private (parallel)\n");
+#else
+ fprintf(fp, "Thread creator: Private\n");
+#endif
+
+ if (fp != stderr && fp != stdout) fclose(fp);
+ else fflush(fp); // flush the handle buffer if it is stdout or stderr.
+}
+
+
+/*
+ * __cilkrts_establish_c_stack
+ *
+ * Tell Cilkscreen about the user stack bounds.
+ *
+ * Note that the Cilk V1 runtime only included the portion of the stack from
+ * the entry into Cilk, down. We don't appear to be able to find that, but
+ * I think this will be sufficient.
+ */
+
+void __cilkrts_establish_c_stack(void)
+{
+ /* FIXME: Not implemented. */
+
+ /* TBD: Do we need this */
+ /*
+ void __cilkrts_cilkscreen_establish_c_stack(char *begin, char *end);
+
+ size_t r;
+ MEMORY_BASIC_INFORMATION mbi;
+
+ r = VirtualQuery (&mbi,
+ &mbi,
+ sizeof(mbi));
+
+ __cilkrts_cilkscreen_establish_c_stack((char *)mbi.BaseAddress,
+ (char *)mbi.BaseAddress + mbi.RegionSize);
+ */
+}
+
+
+/*
+ * internal_enforce_global_visibility
+ *
+ * Ensure global visibility of public symbols, for proper Cilk-TBB interop.
+ *
+ * If Cilk runtime is loaded dynamically, its symbols might remain unavailable
+ * for global search with dladdr; that might prevent TBB from finding Cilk
+ * in the process address space and initiating the interop protocol.
+ * The workaround is for the library to open itself with RTLD_GLOBAL flag.
+ */
+
+static __attribute__((noinline))
+void internal_enforce_global_visibility()
+{
+ void* handle = dlopen( get_runtime_path(), RTLD_GLOBAL|RTLD_LAZY );
+
+ /* For proper reference counting, close the handle immediately. */
+ if( handle) dlclose(handle);
+}
+
+/*
+ Local Variables: **
+ c-file-style:"bsd" **
+ c-basic-offset:4 **
+ indent-tabs-mode:nil **
+ End: **
+*/
diff --git a/gcc-4.9/libcilkrts/runtime/sysdep.h b/gcc-4.9/libcilkrts/runtime/sysdep.h
new file mode 100644
index 000000000..ea939acc1
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/sysdep.h
@@ -0,0 +1,285 @@
+/* sysdep.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file sysdep.h
+ *
+ * @brief Common system-dependent functions
+ */
+
+#ifndef INCLUDED_SYSDEP_DOT_H
+#define INCLUDED_SYSDEP_DOT_H
+
+#include <cilk/common.h>
+#include <internal/abi.h>
+
+#include "global_state.h"
+#include "full_frame.h"
+#include "os.h"
+#include "os_mutex.h"
+
+/**
+ * @brief Default page size for Cilk stacks.
+ *
+ * All Cilk stacks should have size that is a multiple of this value.
+ */
+#define PAGE 4096
+
+/**
+ * @brief Size of a scheduling stack.
+ *
+ * A scheduling stack is used to by system workers to execute runtime
+ * code. Since this stack is only executing runtime functions, we
+ * don't need it to be a full size stack.
+ *
+ * The number "18" should be small since the runtime doesn't require a
+ * large stack, but large enough to call "printf" for debugging.
+ */
+#define CILK_SCHEDULING_STACK_SIZE (18*PAGE)
+
+__CILKRTS_BEGIN_EXTERN_C
+
+
+/**
+ * Code to initialize the system-dependent portion of the global_state_t
+ *
+ * @param g Pointer to the global state.
+ */
+COMMON_SYSDEP
+void __cilkrts_init_global_sysdep(global_state_t *g);
+
+/**
+ * Code to clean up the system-dependent portion of the global_state_t
+ *
+ * @param g Pointer to the global state.
+ */
+COMMON_SYSDEP
+void __cilkrts_destroy_global_sysdep(global_state_t *g);
+
+/**
+ * Passes stack range to Cilkscreen. This functionality should be moved
+ * into Cilkscreen.
+ */
+COMMON_SYSDEP
+void __cilkrts_establish_c_stack(void);
+
+
+/**
+ * Save system dependent information in the full_frame and
+ * __cilkrts_stack_frame. Part of promoting a
+ * __cilkrts_stack_frame to a full_frame.
+ *
+ * @param w The worker the frame was running on. Not used.
+ * @param ff The full frame that is being created for the
+ * __cilkrts_stack_frame.
+ * @param sf The __cilkrts_stack_frame that's being promoted
+ * to a full frame.
+ * @param state_valid ?
+ * @param why A description of why make_unrunnable was called.
+ * Used for debugging.
+ */
+COMMON_SYSDEP
+void __cilkrts_make_unrunnable_sysdep(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf,
+ int state_valid,
+ const char *why);
+
+
+/**
+ * OS-specific code to spawn worker threads.
+ *
+ * @param g The global state.
+ * @param n Number of worker threads to start.
+ */
+COMMON_SYSDEP
+void __cilkrts_start_workers(global_state_t *g, int n);
+
+/**
+ * @brief OS-specific code to stop worker threads.
+ *
+ * @param g The global state.
+ */
+COMMON_SYSDEP
+void __cilkrts_stop_workers(global_state_t *g);
+
+/**
+ * @brief Imports a user thread the first time it returns to a stolen parent.
+ *
+ * The thread has been bound to a worker, but additional steps need to
+ * be taken to start running a scheduling loop.
+ *
+ * @param w The worker bound to the thread.
+ */
+COMMON_SYSDEP
+void __cilkrts_sysdep_import_user_thread(__cilkrts_worker *w);
+
+/**
+ * @brief Function to be run for each of the system worker threads.
+ *
+ * This declaration also appears in cilk/cilk_undocumented.h -- don't
+ * change one declaration without also changing the other.
+ *
+ * @param arg The context value passed to the thread creation routine for
+ * the OS we're running on.
+ *
+ * @returns OS dependent.
+ */
+#ifdef _WIN32
+/* Do not use CILK_API because __cilkrts_worker_stub must be __stdcall */
+CILK_EXPORT unsigned __CILKRTS_NOTHROW __stdcall
+__cilkrts_worker_stub(void *arg);
+#else
+/* Do not use CILK_API because __cilkrts_worker_stub have default visibility */
+__attribute__((visibility("default")))
+void* __CILKRTS_NOTHROW __cilkrts_worker_stub(void *arg);
+#endif
+
+/**
+ * Initialize any OS-depenendent portions of a newly created
+ * __cilkrts_worker.
+ *
+ * Exported for Piersol. Without the export, Piersol doesn't display
+ * useful information in the stack trace. This declaration also appears in
+ * cilk/cilk_undocumented.h -- do not modify one without modifying the other.
+ *
+ * @param w The worker being initialized.
+ */
+COMMON_SYSDEP
+CILK_EXPORT
+void __cilkrts_init_worker_sysdep(__cilkrts_worker *w);
+
+/**
+ * Deallocate any OS-depenendent portions of a __cilkrts_worker.
+ *
+ * @param w The worker being deallocaed.
+ */
+COMMON_SYSDEP
+void __cilkrts_destroy_worker_sysdep(__cilkrts_worker *w);
+
+/**
+ * Called to do any OS-dependent setup before starting execution on a
+ * frame. Mostly deals with exception handling data.
+ *
+ * @param w The worker the frame will run on.
+ * @param ff The full_frame that is about to be resumed.
+ */
+COMMON_SYSDEP
+void __cilkrts_setup_for_execution_sysdep(__cilkrts_worker *w,
+ full_frame *ff);
+
+/**
+ * @brief OS-specific implementaton of resetting fiber and frame state
+ * to resume exeuction.
+ *
+ * This method:
+ * 1. Calculates the value of stack pointer where we should resume
+ * execution of "sf". This calculation uses info stored in the
+ * fiber, and takes into account alignment and frame size.
+ * 2. Updates sf and ff to match the calculated stack pointer.
+ *
+ * On Unix, the stack pointer calculation looks up the base of the
+ * stack from the fiber.
+ *
+ * On Windows, this calculation is calls "alloca" to find a stack
+ * pointer on the currently executing stack. Thus, the Windows code
+ * assumes @c fiber is the currently executing fiber.
+ *
+ * @param fiber fiber to resume execution on.
+ * @param ff full_frame for the frame we're resuming.
+ * @param sf __cilkrts_stack_frame that we should resume
+ * @return The calculated stack pointer.
+ */
+COMMON_SYSDEP
+char* sysdep_reset_jump_buffers_for_resume(cilk_fiber* fiber,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf);
+
+/**
+ * @brief System-dependent longjmp to user code for resuming execution
+ * of a @c __cilkrts_stack_frame.
+ *
+ * This method:
+ * - Changes the stack pointer in @c sf to @c new_sp.
+ * - If @c ff_for_exceptions is not NULL, changes fields in @c sf and
+ * @c ff_for_exceptions for exception processing.
+ * - Restores any floating point state
+ * - Finishes with a longjmp to user code, never to return.
+ *
+ * @param new_sp stack pointer where we should resume execution
+ * @param sf @c __cilkrts_stack_frame for the frame we're resuming.
+ * @param ff_for_exceptions full_frame to safe exception info into, if necessary
+ */
+COMMON_SYSDEP
+NORETURN
+sysdep_longjmp_to_sf(char* new_sp,
+ __cilkrts_stack_frame *sf,
+ full_frame *ff_for_exceptions);
+
+/**
+ * @brief System-dependent code to save floating point control information
+ * to a @c __cilkrts_stack_frame. This function will be called by compilers
+ * that cannot inline the code.
+ *
+ * Note that this function does *not* save the current floating point
+ * registers. It saves the floating point control words that control
+ * precision and rounding and stuff like that.
+ *
+ * This function will be a noop for architectures that don't have warts
+ * like the floating point control words, or where the information is
+ * already being saved by the setjmp.
+ *
+ * @param sf @c __cilkrts_stack_frame for the frame we're
+ * saving the floating point control information in.
+ */
+COMMON_SYSDEP
+void
+sysdep_save_fp_ctrl_state(__cilkrts_stack_frame *sf);
+
+
+/**
+ * @brief restore x86 floating point state
+ *
+ * Only used for x86 and Intel64 processors
+ */
+COMMON_SYSDEP
+void restore_x86_fp_state(__cilkrts_stack_frame *sf);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_SYSDEP_DOT_H)
diff --git a/gcc-4.9/libcilkrts/runtime/worker_mutex.c b/gcc-4.9/libcilkrts/runtime/worker_mutex.c
new file mode 100644
index 000000000..380d6255a
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/worker_mutex.c
@@ -0,0 +1,121 @@
+/* worker_mutex.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+#include "worker_mutex.h"
+#include "bug.h"
+#include "os.h"
+#include "stats.h"
+
+/* m->lock == 1 means that mutex M is locked */
+#define TRY_ACQUIRE(m) (__cilkrts_xchg(&(m)->lock, 1) == 0)
+
+/* ICC 11.1+ understands release semantics and generates an
+ ordinary store with a software memory barrier. */
+#if __ICC >= 1110
+#define RELEASE(m) __sync_lock_release(&(m)->lock)
+#else
+#define RELEASE(m) __cilkrts_xchg(&(m)->lock, 0)
+#endif
+
+void __cilkrts_mutex_init(struct mutex *m)
+{
+ m->owner = 0;
+
+ // Use a simple assignment so Inspector doesn't bug us about the
+ // interlocked exchange doing a read of an uninitialized variable.
+ // By definition there can't be a race when we're initializing the
+ // lock...
+ m->lock = 0;
+}
+
+void __cilkrts_mutex_lock(__cilkrts_worker *w, struct mutex *m)
+{
+ int count;
+ const int maxspin = 1000; /* SWAG */
+
+ NOTE_INTERVAL(w, INTERVAL_MUTEX_LOCK);
+ if (!TRY_ACQUIRE(m)) {
+ START_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING);
+ count = 0;
+ do {
+ do {
+ __cilkrts_short_pause();
+ if (++count >= maxspin) {
+ STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING);
+ START_INTERVAL(w, INTERVAL_MUTEX_LOCK_YIELDING);
+ /* let the OS reschedule every once in a while */
+ __cilkrts_yield();
+ STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_YIELDING);
+ START_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING);
+ count = 0;
+ }
+ } while (m->lock != 0);
+ } while (!TRY_ACQUIRE(m));
+ STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING);
+ }
+
+ CILK_ASSERT(m->owner == 0);
+ m->owner = w;
+}
+
+int __cilkrts_mutex_trylock(__cilkrts_worker *w, struct mutex *m)
+{
+ NOTE_INTERVAL(w, INTERVAL_MUTEX_TRYLOCK);
+ if (TRY_ACQUIRE(m)) {
+ CILK_ASSERT(m->owner == 0);
+ m->owner = w;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+void __cilkrts_mutex_unlock(__cilkrts_worker *w, struct mutex *m)
+{
+ CILK_ASSERT(m->owner == w);
+ m->owner = 0;
+ RELEASE(m);
+}
+
+void __cilkrts_mutex_destroy(__cilkrts_worker *w, struct mutex *m)
+{
+ (void)w; /* unused */
+ (void)m; /* unused */
+}
+
+/* End worker_mutex.c */
diff --git a/gcc-4.9/libcilkrts/runtime/worker_mutex.h b/gcc-4.9/libcilkrts/runtime/worker_mutex.h
new file mode 100644
index 000000000..c2c68247e
--- /dev/null
+++ b/gcc-4.9/libcilkrts/runtime/worker_mutex.h
@@ -0,0 +1,131 @@
+/* worker_mutex.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2013, Intel Corporation
+ * All rights reserved.
+ *
+ * @copyright
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * @copyright
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************/
+
+/**
+ * @file worker_mutex.h
+ *
+ * @brief Support for Cilk runtime mutexes.
+ *
+ * Cilk runtime mutexes are implemented as simple spin loops.
+ */
+
+#ifndef INCLUDED_WORKER_MUTEX_DOT_H
+#define INCLUDED_WORKER_MUTEX_DOT_H
+
+#include <cilk/common.h>
+#include "rts-common.h"
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * Mutexes are treated as an abstract data type within the Cilk
+ * runtime system. They are implemented as simple spin loops and
+ * owned by a __cilkrts_worker.
+ */
+typedef struct mutex {
+ /** Mutex spin loop variable. 0 if unowned, 1 if owned. */
+ volatile int lock;
+
+ /** Worker that owns the mutex. Must be 0 if mutex is unowned. */
+ __cilkrts_worker *owner;
+} mutex;
+
+/**
+ * @brief Initialize a Cilk mutex.
+ *
+ * @param m Mutex to be initialized.
+ */
+COMMON_PORTABLE
+void __cilkrts_mutex_init(struct mutex *m);
+
+/**
+ * @brief Acquire a Cilk mutex.
+ *
+ * If statistics are being gathered, the time spent
+ * acquiring the mutex will be attributed to the specified worker.
+ *
+ * @param w Worker that will become the owner of this mutex.
+ * @param m Mutex to be initialized.
+ */
+COMMON_PORTABLE
+void __cilkrts_mutex_lock(__cilkrts_worker *w,
+ struct mutex *m);
+/**
+ * @brief Attempt to lock a Cilk mutex and fail if it isn't available.
+ *
+ * If statistics are being gathered, the time spent acquiring the
+ * mutex will be attributed to the specified worker.
+ *
+ * @param w Worker that will become the owner of this mutex.
+ * @param m Mutex to be acquired.
+ *
+ * @return 1 if the mutex was acquired.
+ * @return 0 if the mutex was not acquired.
+ */
+COMMON_PORTABLE
+int __cilkrts_mutex_trylock(__cilkrts_worker *w,
+ struct mutex *m);
+
+/**
+ * @brief Release a Cilk mutex.
+ *
+ * If statistics are being gathered, the time spent
+ * acquiring the mutex will be attributed to the specified worker.
+ *
+ * @pre The mutex must be owned by the worker.
+ *
+ * @param w Worker that owns this mutex.
+ * @param m Mutex to be released.
+ */
+COMMON_PORTABLE
+void __cilkrts_mutex_unlock(__cilkrts_worker *w,
+ struct mutex *m);
+
+/**
+ * @brief Deallocate a Cilk mutex. Currently does nothing.
+ *
+ * @param w Unused.
+ * @param m Mutex to be deallocated.
+ */
+COMMON_PORTABLE
+void __cilkrts_mutex_destroy(__cilkrts_worker *w,
+ struct mutex *m);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_WORKER_MUTEX_DOT_H)