diff options
Diffstat (limited to 'include/pthreadpool.h')
-rw-r--r-- | include/pthreadpool.h | 594 |
1 files changed, 569 insertions, 25 deletions
diff --git a/include/pthreadpool.h b/include/pthreadpool.h index 2443285..de4016b 100644 --- a/include/pthreadpool.h +++ b/include/pthreadpool.h @@ -16,95 +16,450 @@ typedef void (*pthreadpool_task_4d_tile_2d_t)(void*, size_t, size_t, size_t, siz typedef void (*pthreadpool_task_5d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t); typedef void (*pthreadpool_task_6d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_1d_with_id_t)(void*, uint32_t, size_t); +typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t, size_t); + +/** + * Disable support for denormalized numbers to the maximum extent possible for + * the duration of the computation. + * + * Handling denormalized floating-point numbers is often implemented in + * microcode, and incurs significant performance degradation. This hint + * instructs the thread pool to disable support for denormalized numbers before + * running the computation by manipulating architecture-specific control + * registers, and restore the initial value of control registers after the + * computation is complete. The thread pool temporary disables denormalized + * numbers on all threads involved in the computation (i.e. the caller threads, + * and potentially worker threads). + * + * Disabling denormalized numbers may have a small negative effect on results' + * accuracy. As various architectures differ in capabilities to control + * processing of denormalized numbers, using this flag may also hurt results' + * reproducibility across different instruction set architectures. + */ #define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001 +/** + * Yield worker threads to the system scheduler after the operation is finished. + * + * Force workers to use kernel wait (instead of active spin-wait by default) for + * new commands after this command is processed. This flag affects only the + * immediate next operation on this thread pool. To make the thread pool always + * use kernel wait, pass this flag to all parallelization functions. + */ +#define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002 + #ifdef __cplusplus extern "C" { #endif /** - * Creates a thread pool with the specified number of threads. + * Create a thread pool with the specified number of threads. * - * @param[in] threads_count The number of threads in the thread pool. - * A value of 0 has special interpretation: it creates a thread for each - * processor core available in the system. + * @param threads_count the number of threads in the thread pool. + * A value of 0 has special interpretation: it creates a thread pool with as + * many threads as there are logical processors in the system. * - * @returns A pointer to an opaque thread pool object. - * On error the function returns NULL and sets errno accordingly. + * @returns A pointer to an opaque thread pool object if the call is + * successful, or NULL pointer if the call failed. */ pthreadpool_t pthreadpool_create(size_t threads_count); /** - * Queries the number of threads in a thread pool. + * Query the number of threads in a thread pool. * - * @param[in] threadpool The thread pool to query. + * @param threadpool the thread pool to query. * * @returns The number of threads in the thread pool. */ size_t pthreadpool_get_threads_count(pthreadpool_t threadpool); /** - * Processes items in parallel using threads from a thread pool. + * Process items on a 1D grid. * - * When the call returns, all items have been processed and the thread pool is - * ready for a new task. + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range; i++) + * function(context, i); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. * * @note If multiple threads call this function with the same thread pool, the * calls are serialized. * - * @param[in] threadpool The thread pool to use for parallelisation. - * @param[in] function The function to call for each item. - * @param[in] argument The first argument passed to the @a function. - * @param[in] items The number of items to process. The @a function - * will be called once for each item. + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified function. + * @param range the number of items on the 1D grid to process. The + * specified function will be called once for each item. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) */ void pthreadpool_parallelize_1d( pthreadpool_t threadpool, pthreadpool_task_1d_t function, - void* argument, + void* context, + size_t range, + uint32_t flags); + +/** + * Process items on a 1D grid using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range; i++) + * function(context, uarch_index, i); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range the number of items on the 1D grid to process. + * The specified function will be called once for each item. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_1d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_1d_with_id_t function, + void* context, + uint32_t default_uarch_index, + uint32_t max_uarch_index, size_t range, uint32_t flags); +/** + * Process items on a 1D grid with specified maximum tile size. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range; i += tile) + * function(context, i, min(range - i, tile)); + * + * When the call returns, all items have been processed and the thread pool is + * ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, + * the calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range the number of items on the 1D grid to process. + * @param tile the maximum number of items on the 1D grid to process in + * one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ void pthreadpool_parallelize_1d_tile_1d( pthreadpool_t threadpool, pthreadpool_task_1d_tile_1d_t function, - void* argument, + void* context, size_t range, size_t tile, uint32_t flags); +/** + * Process items on a 2D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * function(context, i, j); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ void pthreadpool_parallelize_2d( pthreadpool_t threadpool, pthreadpool_task_2d_t function, - void* argument, + void* context, size_t range_i, size_t range_j, uint32_t flags); +/** + * Process items on a 2D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, i, j, min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ void pthreadpool_parallelize_2d_tile_1d( pthreadpool_t threadpool, pthreadpool_task_2d_tile_1d_t function, - void* argument, + void* context, size_t range_i, size_t range_j, size_t tile_j, uint32_t flags); +/** + * Process items on a 2D grid with the specified maximum tile size along each + * grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i += tile_i) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, i, j, + * min(range_i - i, tile_i), min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the first dimension of + * the 2D grid to process in one function call. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ void pthreadpool_parallelize_2d_tile_2d( pthreadpool_t threadpool, pthreadpool_task_2d_tile_2d_t function, - void* argument, + void* context, + size_t range_i, + size_t range_j, + size_t tile_i, + size_t tile_j, + uint32_t flags); + +/** + * Process items on a 2D grid with the specified maximum tile size along each + * grid dimension using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i += tile_i) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, uarch_index, i, j, + * min(range_i - i, tile_i), min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, + * cpuinfo initialization failed, or index returned + * by cpuinfo_get_current_uarch_index() exceeds + * the max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected + * by the specified function. If the index returned + * by cpuinfo_get_current_uarch_index() exceeds this + * value, default_uarch_index will be used instead. + * default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 2D grid. + * @param range_j the number of items to process along the second + * dimension of the 2D grid. + * @param tile_j the maximum number of items along the first + * dimension of the 2D grid to process in one function call. + * @param tile_j the maximum number of items along the second + * dimension of the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_tile_2d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_2d_tile_2d_with_id_t function, + void* context, + uint32_t default_uarch_index, + uint32_t max_uarch_index, size_t range_i, size_t range_j, size_t tile_i, size_t tile_j, uint32_t flags); +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, i, j, k, + * min(range_j - j, tile_j), min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 3D grid to process in one function call. + * @param tile_k the maximum number of items along the third dimension of + * the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ void pthreadpool_parallelize_3d_tile_2d( pthreadpool_t threadpool, pthreadpool_task_3d_tile_2d_t function, - void* argument, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t tile_j, + size_t tile_k, + uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last two grid dimensions using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, uarch_index, i, j, k, + * min(range_j - j, tile_j), min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 3D grid. + * @param range_j the number of items to process along the second + * dimension of the 3D grid. + * @param range_k the number of items to process along the third + * dimension of the 3D grid. + * @param tile_j the maximum number of items along the second + * dimension of the 3D grid to process in one function call. + * @param tile_k the maximum number of items along the third + * dimension of the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_2d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_3d_tile_2d_with_id_t function, + void* context, + uint32_t default_uarch_index, + uint32_t max_uarch_index, size_t range_i, size_t range_j, size_t range_k, @@ -112,10 +467,114 @@ void pthreadpool_parallelize_3d_tile_2d( size_t tile_k, uint32_t flags); +/** + * Process items on a 4D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * for (size_t l = 0; l < range_l; l += tile_l) + * function(context, i, j, k, l, + * min(range_k - k, tile_k), min(range_l - l, tile_l)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param tile_k the maximum number of items along the third dimension of + * the 4D grid to process in one function call. + * @param tile_l the maximum number of items along the fourth dimension of + * the 4D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ void pthreadpool_parallelize_4d_tile_2d( pthreadpool_t threadpool, pthreadpool_task_4d_tile_2d_t function, - void* argument, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t tile_k, + size_t tile_l, + uint32_t flags); + +/** + * Process items on a 4D grid with the specified maximum tile size along the + * last two grid dimensions using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * for (size_t l = 0; l < range_l; l += tile_l) + * function(context, uarch_index, i, j, k, l, + * min(range_k - k, tile_k), min(range_l - l, tile_l)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 4D grid. + * @param range_j the number of items to process along the second + * dimension of the 4D grid. + * @param range_k the number of items to process along the third + * dimension of the 4D grid. + * @param range_l the number of items to process along the fourth + * dimension of the 4D grid. + * @param tile_k the maximum number of items along the third + * dimension of the 4D grid to process in one function call. + * @param tile_l the maximum number of items along the fourth + * dimension of the 4D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_4d_tile_2d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_4d_tile_2d_with_id_t function, + void* context, + uint32_t default_uarch_index, + uint32_t max_uarch_index, size_t range_i, size_t range_j, size_t range_k, @@ -124,10 +583,51 @@ void pthreadpool_parallelize_4d_tile_2d( size_t tile_l, uint32_t flags); +/** + * Process items on a 5D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l += tile_l) + * for (size_t m = 0; m < range_m; m += tile_m) + * function(context, i, j, k, l, m, + * min(range_l - l, tile_l), min(range_m - m, tile_m)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param tile_l the maximum number of items along the fourth dimension of + * the 5D grid to process in one function call. + * @param tile_m the maximum number of items along the fifth dimension of + * the 5D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ void pthreadpool_parallelize_5d_tile_2d( pthreadpool_t threadpool, pthreadpool_task_5d_tile_2d_t function, - void* argument, + void* context, size_t range_i, size_t range_j, size_t range_k, @@ -137,10 +637,54 @@ void pthreadpool_parallelize_5d_tile_2d( size_t tile_m, uint32_t flags); +/** + * Process items on a 6D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m += tile_m) + * for (size_t n = 0; n < range_n; n += tile_n) + * function(context, i, j, k, l, m, n, + * min(range_m - m, tile_m), min(range_n - n, tile_n)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_m the maximum number of items along the fifth dimension of + * the 6D grid to process in one function call. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ void pthreadpool_parallelize_6d_tile_2d( pthreadpool_t threadpool, pthreadpool_task_6d_tile_2d_t function, - void* argument, + void* context, size_t range_i, size_t range_j, size_t range_k, |