diff options
Diffstat (limited to 'gcc-4.4.0/libstdc++-v3/include/parallel/partial_sum.h')
-rw-r--r-- | gcc-4.4.0/libstdc++-v3/include/parallel/partial_sum.h | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/partial_sum.h b/gcc-4.4.0/libstdc++-v3/include/parallel/partial_sum.h new file mode 100644 index 000000000..92630936d --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/partial_sum.h @@ -0,0 +1,220 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/partial_sum.h + * @brief Parallel implementation of std::partial_sum(), i. e. prefix + * sums. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H +#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1 + +#include <omp.h> +#include <new> +#include <bits/stl_algobase.h> +#include <parallel/parallel.h> +#include <parallel/numericfwd.h> + +namespace __gnu_parallel +{ + // Problem: there is no 0-element given. + +/** @brief Base case prefix sum routine. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param result Begin iterator of output sequence. + * @param bin_op Associative binary function. + * @param value Start value. Must be passed since the neutral + * element is unknown in general. + * @return End iterator of output sequence. */ +template<typename InputIterator, + typename OutputIterator, + typename BinaryOperation> + OutputIterator + parallel_partial_sum_basecase(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + typename std::iterator_traits + <InputIterator>::value_type value) + { + if (begin == end) + return result; + + while (begin != end) + { + value = bin_op(value, *begin); + *result = value; + ++result; + ++begin; + } + return result; + } + +/** @brief Parallel partial sum implementation, two-phase approach, + no recursion. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param result Begin iterator of output sequence. + * @param bin_op Associative binary function. + * @param n Length of sequence. + * @param num_threads Number of threads to use. + * @return End iterator of output sequence. + */ +template<typename InputIterator, + typename OutputIterator, + typename BinaryOperation> + OutputIterator + parallel_partial_sum_linear(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + typename std::iterator_traits + <InputIterator>::difference_type n) + { + typedef std::iterator_traits<InputIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + if (begin == end) + return result; + + thread_index_t num_threads = + std::min<difference_type>(get_max_threads(), n - 1); + + if (num_threads < 2) + { + *result = *begin; + return parallel_partial_sum_basecase( + begin + 1, end, result + 1, bin_op, *begin); + } + + difference_type* borders; + value_type* sums; + + const _Settings& __s = _Settings::get(); + +# pragma omp parallel num_threads(num_threads) + { +# pragma omp single + { + num_threads = omp_get_num_threads(); + + borders = new difference_type[num_threads + 2]; + + if (__s.partial_sum_dilation == 1.0f) + equally_split(n, num_threads + 1, borders); + else + { + difference_type chunk_length = + ((double)n + / ((double)num_threads + __s.partial_sum_dilation)), + borderstart = n - num_threads * chunk_length; + borders[0] = 0; + for (int i = 1; i < (num_threads + 1); ++i) + { + borders[i] = borderstart; + borderstart += chunk_length; + } + borders[num_threads + 1] = n; + } + + sums = static_cast<value_type*>(::operator new(sizeof(value_type) + * num_threads)); + OutputIterator target_end; + } //single + + thread_index_t iam = omp_get_thread_num(); + if (iam == 0) + { + *result = *begin; + parallel_partial_sum_basecase(begin + 1, begin + borders[1], + result + 1, bin_op, *begin); + ::new(&(sums[iam])) value_type(*(result + borders[1] - 1)); + } + else + { + ::new(&(sums[iam])) + value_type(std::accumulate(begin + borders[iam] + 1, + begin + borders[iam + 1], + *(begin + borders[iam]), + bin_op, + __gnu_parallel::sequential_tag())); + } + +# pragma omp barrier + +# pragma omp single + parallel_partial_sum_basecase( + sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]); + +# pragma omp barrier + + // Still same team. + parallel_partial_sum_basecase(begin + borders[iam + 1], + begin + borders[iam + 2], + result + borders[iam + 1], bin_op, + sums[iam]); + } //parallel + + ::operator delete(sums); + delete[] borders; + + return result + n; + } + +/** @brief Parallel partial sum front-end. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param result Begin iterator of output sequence. + * @param bin_op Associative binary function. + * @return End iterator of output sequence. */ +template<typename InputIterator, + typename OutputIterator, + typename BinaryOperation> + OutputIterator + parallel_partial_sum(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op) + { + _GLIBCXX_CALL(begin - end) + + typedef std::iterator_traits<InputIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + + switch (_Settings::get().partial_sum_algorithm) + { + case LINEAR: + // Need an initial offset. + return parallel_partial_sum_linear(begin, end, result, bin_op, n); + default: + // Partial_sum algorithm not implemented. + _GLIBCXX_PARALLEL_ASSERT(0); + return result + n; + } + } +} + +#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */ |