aboutsummaryrefslogtreecommitdiffstats
path: root/test/pthreadpool.cc
diff options
context:
space:
mode:
Diffstat (limited to 'test/pthreadpool.cc')
-rw-r--r--test/pthreadpool.cc1790
1 files changed, 1790 insertions, 0 deletions
diff --git a/test/pthreadpool.cc b/test/pthreadpool.cc
index 4faf3be..b8a6803 100644
--- a/test/pthreadpool.cc
+++ b/test/pthreadpool.cc
@@ -54,6 +54,9 @@ const size_t kIncrementIterations = 101;
const size_t kIncrementIterations5D = 7;
const size_t kIncrementIterations6D = 3;
+const uint32_t kMaxUArchIndex = 0;
+const uint32_t kDefaultUArchIndex = 42;
+
TEST(CreateAndDestroy, NullThreadPool) {
pthreadpool* threadpool = nullptr;
@@ -274,6 +277,29 @@ TEST(Parallelize1D, MultiThreadPoolEachItemProcessedMultipleTimes) {
}
}
+static void IncrementSame1D(std::atomic_int* num_processed_items, size_t i) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+}
+
+TEST(Parallelize1D, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_1d(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_1d_t>(IncrementSame1D),
+ static_cast<void*>(&num_processed_items),
+ kParallelize1DRange,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize1DRange);
+}
+
static void WorkImbalance1D(std::atomic_int* num_processed_items, size_t i) {
num_processed_items->fetch_add(1, std::memory_order_relaxed);
if (i == 0) {
@@ -303,6 +329,321 @@ TEST(Parallelize1D, MultiThreadPoolWorkStealing) {
EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize1DRange);
}
+static void ComputeNothing1DWithUArch(void*, uint32_t, size_t) {
+}
+
+TEST(Parallelize1DWithUArch, SingleThreadPoolCompletes) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_1d_with_uarch(threadpool.get(),
+ ComputeNothing1DWithUArch,
+ nullptr,
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+}
+
+TEST(Parallelize1DWithUArch, MultiThreadPoolCompletes) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_1d_with_uarch(
+ threadpool.get(),
+ ComputeNothing1DWithUArch,
+ nullptr,
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+}
+
+static void CheckUArch1DWithUArch(void*, uint32_t uarch_index, size_t) {
+ if (uarch_index != kDefaultUArchIndex) {
+ EXPECT_LE(uarch_index, kMaxUArchIndex);
+ }
+}
+
+TEST(Parallelize1DWithUArch, SingleThreadPoolUArchInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_1d_with_uarch(threadpool.get(),
+ CheckUArch1DWithUArch,
+ nullptr,
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+}
+
+TEST(Parallelize1DWithUArch, MultiThreadPoolUArchInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_1d_with_uarch(
+ threadpool.get(),
+ CheckUArch1DWithUArch,
+ nullptr,
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+}
+
+static void CheckBounds1DWithUArch(void*, uint32_t, size_t i) {
+ EXPECT_LT(i, kParallelize1DRange);
+}
+
+TEST(Parallelize1DWithUArch, SingleThreadPoolAllItemsInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_1d_with_uarch(
+ threadpool.get(),
+ CheckBounds1DWithUArch,
+ nullptr,
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+}
+
+TEST(Parallelize1DWithUArch, MultiThreadPoolAllItemsInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_1d_with_uarch(
+ threadpool.get(),
+ CheckBounds1DWithUArch,
+ nullptr,
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+}
+
+static void SetTrue1DWithUArch(std::atomic_bool* processed_indicators, uint32_t, size_t i) {
+ processed_indicators[i].store(true, std::memory_order_relaxed);
+}
+
+TEST(Parallelize1DWithUArch, SingleThreadPoolAllItemsProcessed) {
+ std::vector<std::atomic_bool> indicators(kParallelize1DRange);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_1d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_1d_with_id_t>(SetTrue1DWithUArch),
+ static_cast<void*>(indicators.data()),
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize1DRange; i++) {
+ EXPECT_TRUE(indicators[i].load(std::memory_order_relaxed))
+ << "Element " << i << " not processed";
+ }
+}
+
+TEST(Parallelize1DWithUArch, MultiThreadPoolAllItemsProcessed) {
+ std::vector<std::atomic_bool> indicators(kParallelize1DRange);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_1d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_1d_with_id_t>(SetTrue1DWithUArch),
+ static_cast<void*>(indicators.data()),
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize1DRange; i++) {
+ EXPECT_TRUE(indicators[i].load(std::memory_order_relaxed))
+ << "Element " << i << " not processed";
+ }
+}
+
+static void Increment1DWithUArch(std::atomic_int* processed_counters, uint32_t, size_t i) {
+ processed_counters[i].fetch_add(1, std::memory_order_relaxed);
+}
+
+TEST(Parallelize1DWithUArch, SingleThreadPoolEachItemProcessedOnce) {
+ std::vector<std::atomic_int> counters(kParallelize1DRange);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_1d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_1d_with_id_t>(Increment1DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize1DRange; i++) {
+ EXPECT_EQ(counters[i].load(std::memory_order_relaxed), 1)
+ << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times (expected: 1)";
+ }
+}
+
+TEST(Parallelize1DWithUArch, MultiThreadPoolEachItemProcessedOnce) {
+ std::vector<std::atomic_int> counters(kParallelize1DRange);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_1d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_1d_with_id_t>(Increment1DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize1DRange; i++) {
+ EXPECT_EQ(counters[i].load(std::memory_order_relaxed), 1)
+ << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times (expected: 1)";
+ }
+}
+
+TEST(Parallelize1DWithUArch, SingleThreadPoolEachItemProcessedMultipleTimes) {
+ std::vector<std::atomic_int> counters(kParallelize1DRange);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
+ pthreadpool_parallelize_1d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_1d_with_id_t>(Increment1DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+ }
+
+ for (size_t i = 0; i < kParallelize1DRange; i++) {
+ EXPECT_EQ(counters[i].load(std::memory_order_relaxed), kIncrementIterations)
+ << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times "
+ << "(expected: " << kIncrementIterations << ")";
+ }
+}
+
+TEST(Parallelize1DWithUArch, MultiThreadPoolEachItemProcessedMultipleTimes) {
+ std::vector<std::atomic_int> counters(kParallelize1DRange);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
+ pthreadpool_parallelize_1d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_1d_with_id_t>(Increment1DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+ }
+
+ for (size_t i = 0; i < kParallelize1DRange; i++) {
+ EXPECT_EQ(counters[i].load(std::memory_order_relaxed), kIncrementIterations)
+ << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times "
+ << "(expected: " << kIncrementIterations << ")";
+ }
+}
+
+static void IncrementSame1DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t i) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+}
+
+TEST(Parallelize1DWithUArch, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_1d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_1d_with_id_t>(IncrementSame1DWithUArch),
+ static_cast<void*>(&num_processed_items),
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize1DRange);
+}
+
+static void WorkImbalance1DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t i) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+ if (i == 0) {
+ /* Spin-wait until all items are computed */
+ while (num_processed_items->load(std::memory_order_relaxed) != kParallelize1DRange) {
+ std::atomic_thread_fence(std::memory_order_acquire);
+ }
+ }
+}
+
+TEST(Parallelize1DWithUArch, MultiThreadPoolWorkStealing) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_1d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_1d_with_id_t>(WorkImbalance1DWithUArch),
+ static_cast<void*>(&num_processed_items),
+ kDefaultUArchIndex,
+ kMaxUArchIndex,
+ kParallelize1DRange,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize1DRange);
+}
+
static void ComputeNothing1DTile1D(void*, size_t, size_t) {
}
@@ -545,6 +886,31 @@ TEST(Parallelize1DTile1D, MultiThreadPoolEachItemProcessedMultipleTimes) {
}
}
+static void IncrementSame1DTile1D(std::atomic_int* num_processed_items, size_t start_i, size_t tile_i) {
+ for (size_t i = start_i; i < start_i + tile_i; i++) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+ }
+}
+
+TEST(Parallelize1DTile1D, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_1d_tile_1d(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_1d_tile_1d_t>(IncrementSame1DTile1D),
+ static_cast<void*>(&num_processed_items),
+ kParallelize1DTile1DRange, kParallelize1DTile1DTile,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize1DTile1DRange);
+}
+
static void WorkImbalance1DTile1D(std::atomic_int* num_processed_items, size_t start_i, size_t tile_i) {
num_processed_items->fetch_add(tile_i, std::memory_order_relaxed);
if (start_i == 0) {
@@ -801,6 +1167,29 @@ TEST(Parallelize2D, MultiThreadPoolEachItemProcessedMultipleTimes) {
}
}
+static void IncrementSame2D(std::atomic_int* num_processed_items, size_t i, size_t j) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+}
+
+TEST(Parallelize2D, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_2d(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_2d_t>(IncrementSame2D),
+ static_cast<void*>(&num_processed_items),
+ kParallelize2DRangeI, kParallelize2DRangeJ,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DRangeI * kParallelize2DRangeJ);
+}
+
static void WorkImbalance2D(std::atomic_int* num_processed_items, size_t i, size_t j) {
num_processed_items->fetch_add(1, std::memory_order_relaxed);
if (i == 0 && j == 0) {
@@ -1097,6 +1486,31 @@ TEST(Parallelize2DTile1D, MultiThreadPoolEachItemProcessedMultipleTimes) {
}
}
+static void IncrementSame2DTile1D(std::atomic_int* num_processed_items, size_t i, size_t start_j, size_t tile_j) {
+ for (size_t j = start_j; j < start_j + tile_j; j++) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+ }
+}
+
+TEST(Parallelize2DTile1D, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_2d_tile_1d(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_2d_tile_1d_t>(IncrementSame2DTile1D),
+ static_cast<void*>(&num_processed_items),
+ kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DTile1DRangeI * kParallelize2DTile1DRangeJ);
+}
+
static void WorkImbalance2DTile1D(std::atomic_int* num_processed_items, size_t i, size_t start_j, size_t tile_j) {
num_processed_items->fetch_add(tile_j, std::memory_order_relaxed);
if (i == 0 && start_j == 0) {
@@ -1415,6 +1829,34 @@ TEST(Parallelize2DTile2D, MultiThreadPoolEachItemProcessedMultipleTimes) {
}
}
+static void IncrementSame2DTile2D(std::atomic_int* num_processed_items, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
+ for (size_t i = start_i; i < start_i + tile_i; i++) {
+ for (size_t j = start_j; j < start_j + tile_j; j++) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize2DTile2D, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_2d_tile_2d(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_2d_tile_2d_t>(IncrementSame2DTile2D),
+ static_cast<void*>(&num_processed_items),
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
+}
+
static void WorkImbalance2DTile2D(std::atomic_int* num_processed_items, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
num_processed_items->fetch_add(tile_i * tile_j, std::memory_order_relaxed);
if (start_i == 0 && start_j == 0) {
@@ -1445,6 +1887,405 @@ TEST(Parallelize2DTile2D, MultiThreadPoolWorkStealing) {
EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
}
+static void ComputeNothing2DTile2DWithUArch(void*, uint32_t, size_t, size_t, size_t, size_t) {
+}
+
+TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolCompletes) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(threadpool.get(),
+ ComputeNothing2DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+}
+
+TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolCompletes) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ ComputeNothing2DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+}
+
+static void CheckUArch2DTile2DWithUArch(void*, uint32_t uarch_index, size_t, size_t, size_t, size_t) {
+ if (uarch_index != kDefaultUArchIndex) {
+ EXPECT_LE(uarch_index, kMaxUArchIndex);
+ }
+}
+
+TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolUArchInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckUArch2DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+}
+
+TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolUArchInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckUArch2DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+}
+
+static void CheckBounds2DTile2DWithUArch(void*, uint32_t, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
+ EXPECT_LT(start_i, kParallelize2DTile2DRangeI);
+ EXPECT_LT(start_j, kParallelize2DTile2DRangeJ);
+ EXPECT_LE(start_i + tile_i, kParallelize2DTile2DRangeI);
+ EXPECT_LE(start_j + tile_j, kParallelize2DTile2DRangeJ);
+}
+
+TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolAllItemsInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckBounds2DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+}
+
+TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolAllItemsInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckBounds2DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+}
+
+static void CheckTiling2DTile2DWithUArch(void*, uint32_t, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
+ EXPECT_GT(tile_i, 0);
+ EXPECT_LE(tile_i, kParallelize2DTile2DTileI);
+ EXPECT_EQ(start_i % kParallelize2DTile2DTileI, 0);
+ EXPECT_EQ(tile_i, std::min<size_t>(kParallelize2DTile2DTileI, kParallelize2DTile2DRangeI - start_i));
+
+ EXPECT_GT(tile_j, 0);
+ EXPECT_LE(tile_j, kParallelize2DTile2DTileJ);
+ EXPECT_EQ(start_j % kParallelize2DTile2DTileJ, 0);
+ EXPECT_EQ(tile_j, std::min<size_t>(kParallelize2DTile2DTileJ, kParallelize2DTile2DRangeJ - start_j));
+}
+
+TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolUniformTiling) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckTiling2DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+}
+
+TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolUniformTiling) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckTiling2DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+}
+
+static void SetTrue2DTile2DWithUArch(std::atomic_bool* processed_indicators, uint32_t, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
+ for (size_t i = start_i; i < start_i + tile_i; i++) {
+ for (size_t j = start_j; j < start_j + tile_j; j++) {
+ const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
+ processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolAllItemsProcessed) {
+ std::vector<std::atomic_bool> indicators(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(SetTrue2DTile2DWithUArch),
+ static_cast<void*>(indicators.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
+ const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
+ EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
+ << "Element (" << i << ", " << j << ") not processed";
+ }
+ }
+}
+
+TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolAllItemsProcessed) {
+ std::vector<std::atomic_bool> indicators(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(SetTrue2DTile2DWithUArch),
+ static_cast<void*>(indicators.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
+ const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
+ EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
+ << "Element (" << i << ", " << j << ") not processed";
+ }
+ }
+}
+
+static void Increment2DTile2DWithUArch(std::atomic_int* processed_counters, uint32_t, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
+ for (size_t i = start_i; i < start_i + tile_i; i++) {
+ for (size_t j = start_j; j < start_j + tile_j; j++) {
+ const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
+ processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolEachItemProcessedOnce) {
+ std::vector<std::atomic_int> counters(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(Increment2DTile2DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
+ const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
+ EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
+ << "Element (" << i << ", " << j << ") was processed "
+ << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
+ }
+ }
+}
+
+TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolEachItemProcessedOnce) {
+ std::vector<std::atomic_int> counters(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(Increment2DTile2DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
+ const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
+ EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
+ << "Element (" << i << ", " << j << ") was processed "
+ << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
+ }
+ }
+}
+
+TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolEachItemProcessedMultipleTimes) {
+ std::vector<std::atomic_int> counters(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(Increment2DTile2DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+ }
+
+ for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
+ const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
+ EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
+ << "Element (" << i << ", " << j << ") was processed "
+ << counters[linear_idx].load(std::memory_order_relaxed) << " times "
+ << "(expected: " << kIncrementIterations << ")";
+ }
+ }
+}
+
+TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolEachItemProcessedMultipleTimes) {
+ std::vector<std::atomic_int> counters(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(Increment2DTile2DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+ }
+
+ for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
+ const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
+ EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
+ << "Element (" << i << ", " << j << ") was processed "
+ << counters[linear_idx].load(std::memory_order_relaxed) << " times "
+ << "(expected: " << kIncrementIterations << ")";
+ }
+ }
+}
+
+static void IncrementSame2DTile2DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
+ for (size_t i = start_i; i < start_i + tile_i; i++) {
+ for (size_t j = start_j; j < start_j + tile_j; j++) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(IncrementSame2DTile2DWithUArch),
+ static_cast<void*>(&num_processed_items),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
+}
+
+static void WorkImbalance2DTile2DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
+ num_processed_items->fetch_add(tile_i * tile_j, std::memory_order_relaxed);
+ if (start_i == 0 && start_j == 0) {
+ /* Spin-wait until all items are computed */
+ while (num_processed_items->load(std::memory_order_relaxed) != kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ) {
+ std::atomic_thread_fence(std::memory_order_acquire);
+ }
+ }
+}
+
+TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolWorkStealing) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_2d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(WorkImbalance2DTile2DWithUArch),
+ static_cast<void*>(&num_processed_items),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
+ kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
+}
+
static void ComputeNothing3DTile2D(void*, size_t, size_t, size_t, size_t, size_t) {
}
@@ -1747,6 +2588,34 @@ TEST(Parallelize3DTile2D, MultiThreadPoolEachItemProcessedMultipleTimes) {
}
}
+static void IncrementSame3DTile2D(std::atomic_int* num_processed_items, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
+ for (size_t j = start_j; j < start_j + tile_j; j++) {
+ for (size_t k = start_k; k < start_k + tile_k; k++) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize3DTile2D, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_3d_tile_2d(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_3d_tile_2d_t>(IncrementSame3DTile2D),
+ static_cast<void*>(&num_processed_items),
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
+}
+
static void WorkImbalance3DTile2D(std::atomic_int* num_processed_items, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
num_processed_items->fetch_add(tile_j * tile_k, std::memory_order_relaxed);
if (i == 0 && start_j == 0 && start_k == 0) {
@@ -1777,6 +2646,418 @@ TEST(Parallelize3DTile2D, MultiThreadPoolWorkStealing) {
EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
}
+static void ComputeNothing3DTile2DWithUArch(void*, uint32_t, size_t, size_t, size_t, size_t, size_t) {
+}
+
+TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolCompletes) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(threadpool.get(),
+ ComputeNothing3DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+}
+
+TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolCompletes) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ ComputeNothing3DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+}
+
+static void CheckUArch3DTile2DWithUArch(void*, uint32_t uarch_index, size_t, size_t, size_t, size_t, size_t) {
+ if (uarch_index != kDefaultUArchIndex) {
+ EXPECT_LE(uarch_index, kMaxUArchIndex);
+ }
+}
+
+TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolUArchInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckUArch3DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+}
+
+TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolUArchInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckUArch3DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+}
+
+static void CheckBounds3DTile2DWithUArch(void*, uint32_t, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
+ EXPECT_LT(i, kParallelize3DTile2DRangeI);
+ EXPECT_LT(start_j, kParallelize3DTile2DRangeJ);
+ EXPECT_LT(start_k, kParallelize3DTile2DRangeK);
+ EXPECT_LE(start_j + tile_j, kParallelize3DTile2DRangeJ);
+ EXPECT_LE(start_k + tile_k, kParallelize3DTile2DRangeK);
+}
+
+TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolAllItemsInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckBounds3DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+}
+
+TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolAllItemsInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckBounds3DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+}
+
+static void CheckTiling3DTile2DWithUArch(void*, uint32_t, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
+ EXPECT_GT(tile_j, 0);
+ EXPECT_LE(tile_j, kParallelize3DTile2DTileJ);
+ EXPECT_EQ(start_j % kParallelize3DTile2DTileJ, 0);
+ EXPECT_EQ(tile_j, std::min<size_t>(kParallelize3DTile2DTileJ, kParallelize3DTile2DRangeJ - start_j));
+
+ EXPECT_GT(tile_k, 0);
+ EXPECT_LE(tile_k, kParallelize3DTile2DTileK);
+ EXPECT_EQ(start_k % kParallelize3DTile2DTileK, 0);
+ EXPECT_EQ(tile_k, std::min<size_t>(kParallelize3DTile2DTileK, kParallelize3DTile2DRangeK - start_k));
+}
+
+TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolUniformTiling) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckTiling3DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+}
+
+TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolUniformTiling) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckTiling3DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+}
+
+static void SetTrue3DTile2DWithUArch(std::atomic_bool* processed_indicators, uint32_t, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
+ for (size_t j = start_j; j < start_j + tile_j; j++) {
+ for (size_t k = start_k; k < start_k + tile_k; k++) {
+ const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
+ processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolAllItemsProcessed) {
+ std::vector<std::atomic_bool> indicators(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(SetTrue3DTile2DWithUArch),
+ static_cast<void*>(indicators.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
+ for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
+ const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
+ EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
+ << "Element (" << i << ", " << j << ", " << k << ") not processed";
+ }
+ }
+ }
+}
+
+TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolAllItemsProcessed) {
+ std::vector<std::atomic_bool> indicators(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(SetTrue3DTile2DWithUArch),
+ static_cast<void*>(indicators.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
+ for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
+ const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
+ EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
+ << "Element (" << i << ", " << j << ", " << k << ") not processed";
+ }
+ }
+ }
+}
+
+static void Increment3DTile2DWithUArch(std::atomic_int* processed_counters, uint32_t, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
+ for (size_t j = start_j; j < start_j + tile_j; j++) {
+ for (size_t k = start_k; k < start_k + tile_k; k++) {
+ const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
+ processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolEachItemProcessedOnce) {
+ std::vector<std::atomic_int> counters(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(Increment3DTile2DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
+ for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
+ const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
+ EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
+ << "Element (" << i << ", " << j << ", " << k << ") was processed "
+ << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
+ }
+ }
+ }
+}
+
+TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolEachItemProcessedOnce) {
+ std::vector<std::atomic_int> counters(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(Increment3DTile2DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
+ for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
+ const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
+ EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
+ << "Element (" << i << ", " << j << ", " << k << ") was processed "
+ << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
+ }
+ }
+ }
+}
+
+TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolEachItemProcessedMultipleTimes) {
+ std::vector<std::atomic_int> counters(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(Increment3DTile2DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+ }
+
+ for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
+ for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
+ const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
+ EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
+ << "Element (" << i << ", " << j << ", " << k << ") was processed "
+ << counters[linear_idx].load(std::memory_order_relaxed) << " times "
+ << "(expected: " << kIncrementIterations << ")";
+ }
+ }
+ }
+}
+
+TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolEachItemProcessedMultipleTimes) {
+ std::vector<std::atomic_int> counters(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(Increment3DTile2DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+ }
+
+ for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
+ for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
+ const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
+ EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
+ << "Element (" << i << ", " << j << ", " << k << ") was processed "
+ << counters[linear_idx].load(std::memory_order_relaxed) << " times "
+ << "(expected: " << kIncrementIterations << ")";
+ }
+ }
+ }
+}
+
+static void IncrementSame3DTile2DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
+ for (size_t j = start_j; j < start_j + tile_j; j++) {
+ for (size_t k = start_k; k < start_k + tile_k; k++) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(IncrementSame3DTile2DWithUArch),
+ static_cast<void*>(&num_processed_items),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
+}
+
+static void WorkImbalance3DTile2DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
+ num_processed_items->fetch_add(tile_j * tile_k, std::memory_order_relaxed);
+ if (i == 0 && start_j == 0 && start_k == 0) {
+ /* Spin-wait until all items are computed */
+ while (num_processed_items->load(std::memory_order_relaxed) != kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK) {
+ std::atomic_thread_fence(std::memory_order_acquire);
+ }
+ }
+}
+
+TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolWorkStealing) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_3d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(WorkImbalance3DTile2DWithUArch),
+ static_cast<void*>(&num_processed_items),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
+ kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
+}
+
static void ComputeNothing4DTile2D(void*, size_t, size_t, size_t, size_t, size_t, size_t) {
}
@@ -2092,6 +3373,34 @@ TEST(Parallelize4DTile2D, MultiThreadPoolEachItemProcessedMultipleTimes) {
}
}
+static void IncrementSame4DTile2D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
+ for (size_t k = start_k; k < start_k + tile_k; k++) {
+ for (size_t l = start_l; l < start_l + tile_l; l++) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize4DTile2D, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_4d_tile_2d(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_4d_tile_2d_t>(IncrementSame4DTile2D),
+ static_cast<void*>(&num_processed_items),
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
+}
+
static void WorkImbalance4DTile2D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
num_processed_items->fetch_add(tile_k * tile_l, std::memory_order_relaxed);
if (i == 0 && j == 0 && start_k == 0 && start_l == 0) {
@@ -2122,6 +3431,431 @@ TEST(Parallelize4DTile2D, MultiThreadPoolWorkStealing) {
EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
}
+static void ComputeNothing4DTile2DWithUArch(void*, uint32_t, size_t, size_t, size_t, size_t, size_t, size_t) {
+}
+
+TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolCompletes) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(threadpool.get(),
+ ComputeNothing4DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+}
+
+TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolCompletes) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ ComputeNothing4DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+}
+
+static void CheckUArch4DTile2DWithUArch(void*, uint32_t uarch_index, size_t, size_t, size_t, size_t, size_t, size_t) {
+ if (uarch_index != kDefaultUArchIndex) {
+ EXPECT_LE(uarch_index, kMaxUArchIndex);
+ }
+}
+
+TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolUArchInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckUArch4DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+}
+
+TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolUArchInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckUArch4DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+}
+
+static void CheckBounds4DTile2DWithUArch(void*, uint32_t, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
+ EXPECT_LT(i, kParallelize4DTile2DRangeI);
+ EXPECT_LT(j, kParallelize4DTile2DRangeJ);
+ EXPECT_LT(start_k, kParallelize4DTile2DRangeK);
+ EXPECT_LT(start_l, kParallelize4DTile2DRangeL);
+ EXPECT_LE(start_k + tile_k, kParallelize4DTile2DRangeK);
+ EXPECT_LE(start_l + tile_l, kParallelize4DTile2DRangeL);
+}
+
+TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolAllItemsInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckBounds4DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+}
+
+TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolAllItemsInBounds) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckBounds4DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+}
+
+static void CheckTiling4DTile2DWithUArch(void*, uint32_t, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
+ EXPECT_GT(tile_k, 0);
+ EXPECT_LE(tile_k, kParallelize4DTile2DTileK);
+ EXPECT_EQ(start_k % kParallelize4DTile2DTileK, 0);
+ EXPECT_EQ(tile_k, std::min<size_t>(kParallelize4DTile2DTileK, kParallelize4DTile2DRangeK - start_k));
+
+ EXPECT_GT(tile_l, 0);
+ EXPECT_LE(tile_l, kParallelize4DTile2DTileL);
+ EXPECT_EQ(start_l % kParallelize4DTile2DTileL, 0);
+ EXPECT_EQ(tile_l, std::min<size_t>(kParallelize4DTile2DTileL, kParallelize4DTile2DRangeL - start_l));
+}
+
+TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolUniformTiling) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckTiling4DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+}
+
+TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolUniformTiling) {
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ CheckTiling4DTile2DWithUArch,
+ nullptr,
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+}
+
+static void SetTrue4DTile2DWithUArch(std::atomic_bool* processed_indicators, uint32_t, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
+ for (size_t k = start_k; k < start_k + tile_k; k++) {
+ for (size_t l = start_l; l < start_l + tile_l; l++) {
+ const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
+ processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolAllItemsProcessed) {
+ std::vector<std::atomic_bool> indicators(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(SetTrue4DTile2DWithUArch),
+ static_cast<void*>(indicators.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
+ for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
+ for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
+ const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
+ EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
+ << "Element (" << i << ", " << j << ", " << k << ", " << l << ") not processed";
+ }
+ }
+ }
+ }
+}
+
+TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolAllItemsProcessed) {
+ std::vector<std::atomic_bool> indicators(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(SetTrue4DTile2DWithUArch),
+ static_cast<void*>(indicators.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
+ for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
+ for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
+ const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
+ EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
+ << "Element (" << i << ", " << j << ", " << k << ", " << l << ") not processed";
+ }
+ }
+ }
+ }
+}
+
+static void Increment4DTile2DWithUArch(std::atomic_int* processed_counters, uint32_t, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
+ for (size_t k = start_k; k < start_k + tile_k; k++) {
+ for (size_t l = start_l; l < start_l + tile_l; l++) {
+ const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
+ processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolEachItemProcessedOnce) {
+ std::vector<std::atomic_int> counters(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(Increment4DTile2DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
+ for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
+ for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
+ const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
+ EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
+ << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
+ << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
+ }
+ }
+ }
+ }
+}
+
+TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolEachItemProcessedOnce) {
+ std::vector<std::atomic_int> counters(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(Increment4DTile2DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+
+ for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
+ for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
+ for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
+ const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
+ EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
+ << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
+ << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
+ }
+ }
+ }
+ }
+}
+
+TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolEachItemProcessedMultipleTimes) {
+ std::vector<std::atomic_int> counters(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(Increment4DTile2DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+ }
+
+ for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
+ for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
+ for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
+ const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
+ EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
+ << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
+ << counters[linear_idx].load(std::memory_order_relaxed) << " times "
+ << "(expected: " << kIncrementIterations << ")";
+ }
+ }
+ }
+ }
+}
+
+TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolEachItemProcessedMultipleTimes) {
+ std::vector<std::atomic_int> counters(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(Increment4DTile2DWithUArch),
+ static_cast<void*>(counters.data()),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+ }
+
+ for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
+ for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
+ for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
+ for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
+ const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
+ EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
+ << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
+ << counters[linear_idx].load(std::memory_order_relaxed) << " times "
+ << "(expected: " << kIncrementIterations << ")";
+ }
+ }
+ }
+ }
+}
+
+static void IncrementSame4DTile2DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
+ for (size_t k = start_k; k < start_k + tile_k; k++) {
+ for (size_t l = start_l; l < start_l + tile_l; l++) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(IncrementSame4DTile2DWithUArch),
+ static_cast<void*>(&num_processed_items),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
+}
+
+static void WorkImbalance4DTile2DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
+ num_processed_items->fetch_add(tile_k * tile_l, std::memory_order_relaxed);
+ if (i == 0 && j == 0 && start_k == 0 && start_l == 0) {
+ /* Spin-wait until all items are computed */
+ while (num_processed_items->load(std::memory_order_relaxed) != kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL) {
+ std::atomic_thread_fence(std::memory_order_acquire);
+ }
+ }
+}
+
+TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolWorkStealing) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_4d_tile_2d_with_uarch(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(WorkImbalance4DTile2DWithUArch),
+ static_cast<void*>(&num_processed_items),
+ kDefaultUArchIndex, kMaxUArchIndex,
+ kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
+ kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
+}
+
static void ComputeNothing5DTile2D(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t) {
}
@@ -2450,6 +4184,34 @@ TEST(Parallelize5DTile2D, MultiThreadPoolEachItemProcessedMultipleTimes) {
}
}
+static void IncrementSame5DTile2D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t start_l, size_t start_m, size_t tile_l, size_t tile_m) {
+ for (size_t l = start_l; l < start_l + tile_l; l++) {
+ for (size_t m = start_m; m < start_m + tile_m; m++) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize5DTile2D, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_5d_tile_2d(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_5d_tile_2d_t>(IncrementSame5DTile2D),
+ static_cast<void*>(&num_processed_items),
+ kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
+ kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize5DTile2DRangeI * kParallelize5DTile2DRangeJ * kParallelize5DTile2DRangeK * kParallelize5DTile2DRangeL * kParallelize5DTile2DRangeM);
+}
+
static void WorkImbalance5DTile2D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t start_l, size_t start_m, size_t tile_l, size_t tile_m) {
num_processed_items->fetch_add(tile_l * tile_m, std::memory_order_relaxed);
if (i == 0 && j == 0 && k == 0 && start_l == 0 && start_m == 0) {
@@ -2821,6 +4583,34 @@ TEST(Parallelize6DTile2D, MultiThreadPoolEachItemProcessedMultipleTimes) {
}
}
+static void IncrementSame6DTile2D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t start_n, size_t tile_m, size_t tile_n) {
+ for (size_t m = start_m; m < start_m + tile_m; m++) {
+ for (size_t n = start_n; n < start_n + tile_n; n++) {
+ num_processed_items->fetch_add(1, std::memory_order_relaxed);
+ }
+ }
+}
+
+TEST(Parallelize6DTile2D, MultiThreadPoolHighContention) {
+ std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
+
+ auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
+ ASSERT_TRUE(threadpool.get());
+
+ if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
+ GTEST_SKIP();
+ }
+
+ pthreadpool_parallelize_6d_tile_2d(
+ threadpool.get(),
+ reinterpret_cast<pthreadpool_task_6d_tile_2d_t>(IncrementSame6DTile2D),
+ static_cast<void*>(&num_processed_items),
+ kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
+ kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
+ 0 /* flags */);
+ EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize6DTile2DRangeI * kParallelize6DTile2DRangeJ * kParallelize6DTile2DRangeK * kParallelize6DTile2DRangeL * kParallelize6DTile2DRangeM * kParallelize6DTile2DRangeN);
+}
+
static void WorkImbalance6DTile2D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t start_n, size_t tile_m, size_t tile_n) {
num_processed_items->fetch_add(tile_m * tile_n, std::memory_order_relaxed);
if (i == 0 && j == 0 && k == 0 && l == 0 && start_m == 0 && start_n == 0) {