diff options
Diffstat (limited to 'src/threadpool-shim.c')
-rw-r--r-- | src/threadpool-shim.c | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/src/threadpool-shim.c b/src/threadpool-shim.c index c8ef51d..b5670ea 100644 --- a/src/threadpool-shim.c +++ b/src/threadpool-shim.c @@ -28,6 +28,20 @@ void pthreadpool_parallelize_1d( } } +void pthreadpool_parallelize_1d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_1d_with_id_t task, + void* argument, + uint32_t default_uarch_index, + uint32_t max_uarch_index, + size_t range, + uint32_t flags) +{ + for (size_t i = 0; i < range; i++) { + task(argument, default_uarch_index, i); + } +} + void pthreadpool_parallelize_1d_tile_1d( pthreadpool_t threadpool, pthreadpool_task_1d_tile_1d_t task, @@ -89,6 +103,26 @@ void pthreadpool_parallelize_2d_tile_2d( } } +void pthreadpool_parallelize_2d_tile_2d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_2d_tile_2d_with_id_t task, + void* argument, + uint32_t default_uarch_index, + uint32_t max_uarch_index, + size_t range_i, + size_t range_j, + size_t tile_i, + size_t tile_j, + uint32_t flags) +{ + for (size_t i = 0; i < range_i; i += tile_i) { + for (size_t j = 0; j < range_j; j += tile_j) { + task(argument, default_uarch_index, i, j, + min(range_i - i, tile_i), min(range_j - j, tile_j)); + } + } +} + void pthreadpool_parallelize_3d_tile_2d( pthreadpool_t threadpool, pthreadpool_task_3d_tile_2d_t task, @@ -110,6 +144,29 @@ void pthreadpool_parallelize_3d_tile_2d( } } +void pthreadpool_parallelize_3d_tile_2d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_3d_tile_2d_with_id_t task, + void* argument, + uint32_t default_uarch_index, + uint32_t max_uarch_index, + size_t range_i, + size_t range_j, + size_t range_k, + size_t tile_j, + size_t tile_k, + uint32_t flags) +{ + for (size_t i = 0; i < range_i; i++) { + for (size_t j = 0; j < range_j; j += tile_j) { + for (size_t k = 0; k < range_k; k += tile_k) { + task(argument, default_uarch_index, i, j, k, + min(range_j - j, tile_j), min(range_k - k, tile_k)); + } + } + } +} + void pthreadpool_parallelize_4d_tile_2d( pthreadpool_t threadpool, pthreadpool_task_4d_tile_2d_t task, @@ -134,6 +191,32 @@ void pthreadpool_parallelize_4d_tile_2d( } } +void pthreadpool_parallelize_4d_tile_2d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_4d_tile_2d_with_id_t task, + void* argument, + uint32_t default_uarch_index, + uint32_t max_uarch_index, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t tile_k, + size_t tile_l, + uint32_t flags) +{ + for (size_t i = 0; i < range_i; i++) { + for (size_t j = 0; j < range_j; j++) { + for (size_t k = 0; k < range_k; k += tile_k) { + for (size_t l = 0; l < range_l; l += tile_l) { + task(argument, default_uarch_index, i, j, k, l, + min(range_k - k, tile_k), min(range_l - l, tile_l)); + } + } + } + } +} + void pthreadpool_parallelize_5d_tile_2d( pthreadpool_t threadpool, pthreadpool_task_5d_tile_2d_t task, |