diff options
author | Jing Yu <jingyu@google.com> | 2009-11-05 15:11:04 -0800 |
---|---|---|
committer | Jing Yu <jingyu@google.com> | 2009-11-05 15:11:04 -0800 |
commit | df62c1c110e8532b995b23540b7e3695729c0779 (patch) | |
tree | dbbd4cbdb50ac38011e058a2533ee4c3168b0205 /gcc-4.4.0/libstdc++-v3/include/parallel | |
parent | 8d401cf711539af5a2f78d12447341d774892618 (diff) | |
download | toolchain_gcc-df62c1c110e8532b995b23540b7e3695729c0779.tar.gz toolchain_gcc-df62c1c110e8532b995b23540b7e3695729c0779.tar.bz2 toolchain_gcc-df62c1c110e8532b995b23540b7e3695729c0779.zip |
Check in gcc sources for prebuilt toolchains in Eclair.
Diffstat (limited to 'gcc-4.4.0/libstdc++-v3/include/parallel')
43 files changed, 16089 insertions, 0 deletions
diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/algo.h b/gcc-4.4.0/libstdc++-v3/include/parallel/algo.h new file mode 100644 index 000000000..59106b2d3 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/algo.h @@ -0,0 +1,2364 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/algo.h + * @brief Parallel STL function calls corresponding to the stl_algo.h header. + * + * The functions defined here mainly do case switches and + * call the actual parallelized versions in other files. + * Inlining policy: Functions that basically only contain one function call, + * are declared inline. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_ALGO_H +#define _GLIBCXX_PARALLEL_ALGO_H 1 + +#include <parallel/algorithmfwd.h> +#include <bits/stl_algobase.h> +#include <bits/stl_algo.h> +#include <parallel/iterator.h> +#include <parallel/base.h> +#include <parallel/sort.h> +#include <parallel/workstealing.h> +#include <parallel/par_loop.h> +#include <parallel/omp_loop.h> +#include <parallel/omp_loop_static.h> +#include <parallel/for_each_selectors.h> +#include <parallel/for_each.h> +#include <parallel/find.h> +#include <parallel/find_selectors.h> +#include <parallel/search.h> +#include <parallel/random_shuffle.h> +#include <parallel/partition.h> +#include <parallel/merge.h> +#include <parallel/unique_copy.h> +#include <parallel/set_operations.h> + +namespace std +{ +namespace __parallel +{ + // Sequential fallback + template<typename InputIterator, typename Function> + inline Function + for_each(InputIterator begin, InputIterator end, Function f, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::for_each(begin, end, f); } + + + // Sequential fallback for input iterator case + template<typename InputIterator, typename Function, typename IteratorTag> + inline Function + for_each_switch(InputIterator begin, InputIterator end, Function f, + IteratorTag) + { return for_each(begin, end, f, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators + template<typename RandomAccessIterator, typename Function> + Function + for_each_switch(RandomAccessIterator begin, RandomAccessIterator end, + Function f, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().for_each_minimal_n + && __gnu_parallel::is_parallel(parallelism_tag))) + { + bool dummy; + __gnu_parallel::for_each_selector<RandomAccessIterator> functionality; + + return __gnu_parallel:: + for_each_template_random_access(begin, end, f, functionality, + __gnu_parallel::dummy_reduct(), + true, dummy, -1, parallelism_tag); + } + else + return for_each(begin, end, f, __gnu_parallel::sequential_tag()); + } + + // Public interface + template<typename Iterator, typename Function> + inline Function + for_each(Iterator begin, Iterator end, Function f, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef std::iterator_traits<Iterator> iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + return for_each_switch(begin, end, f, iterator_category(), + parallelism_tag); + } + + template<typename Iterator, typename Function> + inline Function + for_each(Iterator begin, Iterator end, Function f) + { + typedef std::iterator_traits<Iterator> iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + return for_each_switch(begin, end, f, iterator_category()); + } + + + // Sequential fallback + template<typename InputIterator, typename T> + inline InputIterator + find(InputIterator begin, InputIterator end, const T& val, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::find(begin, end, val); } + + // Sequential fallback for input iterator case + template<typename InputIterator, typename T, typename IteratorTag> + inline InputIterator + find_switch(InputIterator begin, InputIterator end, const T& val, + IteratorTag) + { return _GLIBCXX_STD_P::find(begin, end, val); } + + // Parallel find for random access iterators + template<typename RandomAccessIterator, typename T> + RandomAccessIterator + find_switch(RandomAccessIterator begin, RandomAccessIterator end, + const T& val, random_access_iterator_tag) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + binder2nd<__gnu_parallel::equal_to<value_type, T> > + comp(__gnu_parallel::equal_to<value_type, T>(), val); + return __gnu_parallel::find_template(begin, end, begin, comp, + __gnu_parallel:: + find_if_selector()).first; + } + else + return _GLIBCXX_STD_P::find(begin, end, val); + } + + // Public interface + template<typename InputIterator, typename T> + inline InputIterator + find(InputIterator begin, InputIterator end, const T& val) + { + typedef std::iterator_traits<InputIterator> iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + return find_switch(begin, end, val, iterator_category()); + } + + // Sequential fallback + template<typename InputIterator, typename Predicate> + inline InputIterator + find_if(InputIterator begin, InputIterator end, Predicate pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::find_if(begin, end, pred); } + + // Sequential fallback for input iterator case + template<typename InputIterator, typename Predicate, typename IteratorTag> + inline InputIterator + find_if_switch(InputIterator begin, InputIterator end, Predicate pred, + IteratorTag) + { return _GLIBCXX_STD_P::find_if(begin, end, pred); } + + // Parallel find_if for random access iterators + template<typename RandomAccessIterator, typename Predicate> + RandomAccessIterator + find_if_switch(RandomAccessIterator begin, RandomAccessIterator end, + Predicate pred, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + return __gnu_parallel::find_template(begin, end, begin, pred, + __gnu_parallel:: + find_if_selector()).first; + else + return _GLIBCXX_STD_P::find_if(begin, end, pred); + } + + // Public interface + template<typename InputIterator, typename Predicate> + inline InputIterator + find_if(InputIterator begin, InputIterator end, Predicate pred) + { + typedef std::iterator_traits<InputIterator> iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + return find_if_switch(begin, end, pred, iterator_category()); + } + + // Sequential fallback + template<typename InputIterator, typename ForwardIterator> + inline InputIterator + find_first_of(InputIterator begin1, InputIterator end1, + ForwardIterator begin2, ForwardIterator end2, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::find_first_of(begin1, end1, begin2, end2); } + + // Sequential fallback + template<typename InputIterator, typename ForwardIterator, + typename BinaryPredicate> + inline InputIterator + find_first_of(InputIterator begin1, InputIterator end1, + ForwardIterator begin2, ForwardIterator end2, + BinaryPredicate comp, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::find_first_of(begin1, end1, begin2, end2, comp); } + + // Sequential fallback for input iterator type + template<typename InputIterator, typename ForwardIterator, + typename IteratorTag1, typename IteratorTag2> + inline InputIterator + find_first_of_switch(InputIterator begin1, InputIterator end1, + ForwardIterator begin2, ForwardIterator end2, + IteratorTag1, IteratorTag2) + { return find_first_of(begin1, end1, begin2, end2, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators + template<typename RandomAccessIterator, typename ForwardIterator, + typename BinaryPredicate, typename IteratorTag> + inline RandomAccessIterator + find_first_of_switch(RandomAccessIterator begin1, + RandomAccessIterator end1, + ForwardIterator begin2, ForwardIterator end2, + BinaryPredicate comp, random_access_iterator_tag, + IteratorTag) + { + return __gnu_parallel:: + find_template(begin1, end1, begin1, comp, + __gnu_parallel::find_first_of_selector + <ForwardIterator>(begin2, end2)).first; + } + + // Sequential fallback for input iterator type + template<typename InputIterator, typename ForwardIterator, + typename BinaryPredicate, typename IteratorTag1, + typename IteratorTag2> + inline InputIterator + find_first_of_switch(InputIterator begin1, InputIterator end1, + ForwardIterator begin2, ForwardIterator end2, + BinaryPredicate comp, IteratorTag1, IteratorTag2) + { return find_first_of(begin1, end1, begin2, end2, comp, + __gnu_parallel::sequential_tag()); } + + // Public interface + template<typename InputIterator, typename ForwardIterator, + typename BinaryPredicate> + inline InputIterator + find_first_of(InputIterator begin1, InputIterator end1, + ForwardIterator begin2, ForwardIterator end2, + BinaryPredicate comp) + { + typedef std::iterator_traits<InputIterator> iteratori_traits; + typedef std::iterator_traits<ForwardIterator> iteratorf_traits; + typedef typename iteratori_traits::iterator_category iteratori_category; + typedef typename iteratorf_traits::iterator_category iteratorf_category; + + return find_first_of_switch(begin1, end1, begin2, end2, comp, + iteratori_category(), iteratorf_category()); + } + + // Public interface, insert default comparator + template<typename InputIterator, typename ForwardIterator> + inline InputIterator + find_first_of(InputIterator begin1, InputIterator end1, + ForwardIterator begin2, ForwardIterator end2) + { + typedef std::iterator_traits<InputIterator> iteratori_traits; + typedef std::iterator_traits<ForwardIterator> iteratorf_traits; + typedef typename iteratori_traits::value_type valuei_type; + typedef typename iteratorf_traits::value_type valuef_type; + + return find_first_of(begin1, end1, begin2, end2, __gnu_parallel:: + equal_to<valuei_type, valuef_type>()); + } + + // Sequential fallback + template<typename InputIterator, typename OutputIterator> + inline OutputIterator + unique_copy(InputIterator begin1, InputIterator end1, OutputIterator out, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::unique_copy(begin1, end1, out); } + + // Sequential fallback + template<typename InputIterator, typename OutputIterator, + typename Predicate> + inline OutputIterator + unique_copy(InputIterator begin1, InputIterator end1, OutputIterator out, + Predicate pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::unique_copy(begin1, end1, out, pred); } + + // Sequential fallback for input iterator case + template<typename InputIterator, typename OutputIterator, + typename Predicate, typename IteratorTag1, typename IteratorTag2> + inline OutputIterator + unique_copy_switch(InputIterator begin, InputIterator last, + OutputIterator out, Predicate pred, + IteratorTag1, IteratorTag2) + { return _GLIBCXX_STD_P::unique_copy(begin, last, out, pred); } + + // Parallel unique_copy for random access iterators + template<typename RandomAccessIterator, typename RandomAccessOutputIterator, + typename Predicate> + RandomAccessOutputIterator + unique_copy_switch(RandomAccessIterator begin, RandomAccessIterator last, + RandomAccessOutputIterator out, Predicate pred, + random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(last - begin) + > __gnu_parallel::_Settings::get().unique_copy_minimal_n)) + return __gnu_parallel::parallel_unique_copy(begin, last, out, pred); + else + return _GLIBCXX_STD_P::unique_copy(begin, last, out, pred); + } + + // Public interface + template<typename InputIterator, typename OutputIterator> + inline OutputIterator + unique_copy(InputIterator begin1, InputIterator end1, OutputIterator out) + { + typedef std::iterator_traits<InputIterator> iteratori_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori_traits::iterator_category iteratori_category; + typedef typename iteratori_traits::value_type value_type; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return unique_copy_switch(begin1, end1, out, equal_to<value_type>(), + iteratori_category(), iteratoro_category()); + } + + // Public interface + template<typename InputIterator, typename OutputIterator, typename Predicate> + inline OutputIterator + unique_copy(InputIterator begin1, InputIterator end1, OutputIterator out, + Predicate pred) + { + typedef std::iterator_traits<InputIterator> iteratori_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori_traits::iterator_category iteratori_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return unique_copy_switch(begin1, end1, out, pred, iteratori_category(), + iteratoro_category()); + } + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator> + inline OutputIterator + set_union(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::set_union(begin1, end1, begin2, end2, out); } + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename Predicate> + inline OutputIterator + set_union(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, Predicate pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::set_union(begin1, end1, + begin2, end2, out, pred); } + + // Sequential fallback for input iterator case + template<typename InputIterator1, typename InputIterator2, + typename Predicate, typename OutputIterator, + typename IteratorTag1, typename IteratorTag2, typename IteratorTag3> + inline OutputIterator + set_union_switch(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator result, Predicate pred, IteratorTag1, + IteratorTag2, IteratorTag3) + { return _GLIBCXX_STD_P::set_union(begin1, end1, + begin2, end2, result, pred); } + + // Parallel set_union for random access iterators + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename OutputRandomAccessIterator, typename Predicate> + OutputRandomAccessIterator + set_union_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, RandomAccessIterator2 end2, + OutputRandomAccessIterator result, Predicate pred, + random_access_iterator_tag, random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) + >= __gnu_parallel::_Settings::get().set_union_minimal_n + || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) + >= __gnu_parallel::_Settings::get().set_union_minimal_n)) + return __gnu_parallel::parallel_set_union(begin1, end1, + begin2, end2, result, pred); + else + return _GLIBCXX_STD_P::set_union(begin1, end1, + begin2, end2, result, pred); + } + + // Public interface + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator> + inline OutputIterator + set_union(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, OutputIterator out) + { + typedef std::iterator_traits<InputIterator1> iteratori1_traits; + typedef std::iterator_traits<InputIterator2> iteratori2_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori1_traits::iterator_category + iteratori1_category; + typedef typename iteratori2_traits::iterator_category + iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + typedef typename iteratori1_traits::value_type value1_type; + typedef typename iteratori2_traits::value_type value2_type; + + return set_union_switch(begin1, end1, begin2, end2, out, + __gnu_parallel::less<value1_type, value2_type>(), + iteratori1_category(), iteratori2_category(), + iteratoro_category()); + } + + // Public interface + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename Predicate> + inline OutputIterator + set_union(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, Predicate pred) + { + typedef std::iterator_traits<InputIterator1> iteratori1_traits; + typedef std::iterator_traits<InputIterator2> iteratori2_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori1_traits::iterator_category + iteratori1_category; + typedef typename iteratori2_traits::iterator_category + iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return set_union_switch(begin1, end1, begin2, end2, out, pred, + iteratori1_category(), iteratori2_category(), + iteratoro_category()); + } + + // Sequential fallback. + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator> + inline OutputIterator + set_intersection(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::set_intersection(begin1, end1, + begin2, end2, out); } + + // Sequential fallback. + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename Predicate> + inline OutputIterator + set_intersection(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, Predicate pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::set_intersection(begin1, end1, begin2, end2, + out, pred); } + + // Sequential fallback for input iterator case + template<typename InputIterator1, typename InputIterator2, + typename Predicate, typename OutputIterator, + typename IteratorTag1, typename IteratorTag2, + typename IteratorTag3> + inline OutputIterator + set_intersection_switch(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator result, Predicate pred, + IteratorTag1, IteratorTag2, IteratorTag3) + { return _GLIBCXX_STD_P::set_intersection(begin1, end1, begin2, + end2, result, pred); } + + // Parallel set_intersection for random access iterators + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename OutputRandomAccessIterator, typename Predicate> + OutputRandomAccessIterator + set_intersection_switch(RandomAccessIterator1 begin1, + RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, + RandomAccessIterator2 end2, + OutputRandomAccessIterator result, + Predicate pred, + random_access_iterator_tag, + random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) + >= __gnu_parallel::_Settings::get().set_union_minimal_n + || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) + >= __gnu_parallel::_Settings::get().set_union_minimal_n)) + return __gnu_parallel::parallel_set_intersection(begin1, end1, begin2, + end2, result, pred); + else + return _GLIBCXX_STD_P::set_intersection(begin1, end1, begin2, + end2, result, pred); + } + + // Public interface + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator> + inline OutputIterator + set_intersection(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out) + { + typedef std::iterator_traits<InputIterator1> iteratori1_traits; + typedef std::iterator_traits<InputIterator2> iteratori2_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori1_traits::iterator_category + iteratori1_category; + typedef typename iteratori2_traits::iterator_category + iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + typedef typename iteratori1_traits::value_type value1_type; + typedef typename iteratori2_traits::value_type value2_type; + + return set_intersection_switch(begin1, end1, begin2, end2, out, + __gnu_parallel:: + less<value1_type, value2_type>(), + iteratori1_category(), + iteratori2_category(), + iteratoro_category()); + } + + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename Predicate> + inline OutputIterator + set_intersection(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, Predicate pred) + { + typedef std::iterator_traits<InputIterator1> iteratori1_traits; + typedef std::iterator_traits<InputIterator2> iteratori2_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori1_traits::iterator_category + iteratori1_category; + typedef typename iteratori2_traits::iterator_category + iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return set_intersection_switch(begin1, end1, begin2, end2, out, pred, + iteratori1_category(), + iteratori2_category(), + iteratoro_category()); + } + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator> + inline OutputIterator + set_symmetric_difference(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::set_symmetric_difference(begin1,end1, + begin2, end2, out); } + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename Predicate> + inline OutputIterator + set_symmetric_difference(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, Predicate pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::set_symmetric_difference(begin1, end1, begin2, + end2, out, pred); } + + // Sequential fallback for input iterator case + template<typename InputIterator1, typename InputIterator2, + typename Predicate, typename OutputIterator, + typename IteratorTag1, typename IteratorTag2, + typename IteratorTag3> + inline OutputIterator + set_symmetric_difference_switch(InputIterator1 begin1, + InputIterator1 end1, + InputIterator2 begin2, + InputIterator2 end2, + OutputIterator result, Predicate pred, + IteratorTag1, IteratorTag2, IteratorTag3) + { return _GLIBCXX_STD_P::set_symmetric_difference(begin1, end1, + begin2, end2, + result, pred); } + + // Parallel set_symmetric_difference for random access iterators + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename OutputRandomAccessIterator, typename Predicate> + OutputRandomAccessIterator + set_symmetric_difference_switch(RandomAccessIterator1 begin1, + RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, + RandomAccessIterator2 end2, + OutputRandomAccessIterator result, + Predicate pred, + random_access_iterator_tag, + random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) + >= __gnu_parallel::_Settings::get().set_symmetric_difference_minimal_n + || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) + >= __gnu_parallel::_Settings::get().set_symmetric_difference_minimal_n)) + return __gnu_parallel::parallel_set_symmetric_difference(begin1, end1, + begin2, end2, + result, pred); + else + return _GLIBCXX_STD_P::set_symmetric_difference(begin1, end1, + begin2, end2, + result, pred); + } + + // Public interface. + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator> + inline OutputIterator + set_symmetric_difference(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out) + { + typedef std::iterator_traits<InputIterator1> iteratori1_traits; + typedef std::iterator_traits<InputIterator2> iteratori2_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori1_traits::iterator_category + iteratori1_category; + typedef typename iteratori2_traits::iterator_category + iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + typedef typename iteratori1_traits::value_type value1_type; + typedef typename iteratori2_traits::value_type value2_type; + + return set_symmetric_difference_switch(begin1, end1, begin2, end2, out, + __gnu_parallel:: + less<value1_type, value2_type>(), + iteratori1_category(), + iteratori2_category(), + iteratoro_category()); + } + + // Public interface. + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename Predicate> + inline OutputIterator + set_symmetric_difference(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, Predicate pred) + { + typedef std::iterator_traits<InputIterator1> iteratori1_traits; + typedef std::iterator_traits<InputIterator2> iteratori2_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori1_traits::iterator_category + iteratori1_category; + typedef typename iteratori2_traits::iterator_category + iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return set_symmetric_difference_switch(begin1, end1, begin2, end2, out, + pred, iteratori1_category(), + iteratori2_category(), + iteratoro_category()); + } + + // Sequential fallback. + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator> + inline OutputIterator + set_difference(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::set_difference(begin1,end1, begin2, end2, out); } + + // Sequential fallback. + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename Predicate> + inline OutputIterator + set_difference(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, Predicate pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::set_difference(begin1, end1, + begin2, end2, out, pred); } + + // Sequential fallback for input iterator case. + template<typename InputIterator1, typename InputIterator2, + typename Predicate, typename OutputIterator, + typename IteratorTag1, typename IteratorTag2, typename IteratorTag3> + inline OutputIterator + set_difference_switch(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator result, Predicate pred, + IteratorTag1, IteratorTag2, IteratorTag3) + { return _GLIBCXX_STD_P::set_difference(begin1, end1, + begin2, end2, result, pred); } + + // Parallel set_difference for random access iterators + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename OutputRandomAccessIterator, typename Predicate> + OutputRandomAccessIterator + set_difference_switch(RandomAccessIterator1 begin1, + RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, + RandomAccessIterator2 end2, + OutputRandomAccessIterator result, Predicate pred, + random_access_iterator_tag, + random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) + >= __gnu_parallel::_Settings::get().set_difference_minimal_n + || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) + >= __gnu_parallel::_Settings::get().set_difference_minimal_n)) + return __gnu_parallel::parallel_set_difference(begin1, end1, + begin2, end2, + result, pred); + else + return _GLIBCXX_STD_P::set_difference(begin1, end1, + begin2, end2, result, pred); + } + + // Public interface + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator> + inline OutputIterator + set_difference(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out) + { + typedef std::iterator_traits<InputIterator1> iteratori1_traits; + typedef std::iterator_traits<InputIterator2> iteratori2_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori1_traits::iterator_category + iteratori1_category; + typedef typename iteratori2_traits::iterator_category + iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + typedef typename iteratori1_traits::value_type value1_type; + typedef typename iteratori2_traits::value_type value2_type; + + return set_difference_switch(begin1, end1, begin2, end2, out, + __gnu_parallel:: + less<value1_type, value2_type>(), + iteratori1_category(), + iteratori2_category(), + iteratoro_category()); + } + + // Public interface + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename Predicate> + inline OutputIterator + set_difference(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, Predicate pred) + { + typedef std::iterator_traits<InputIterator1> iteratori1_traits; + typedef std::iterator_traits<InputIterator2> iteratori2_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori1_traits::iterator_category + iteratori1_category; + typedef typename iteratori2_traits::iterator_category + iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return set_difference_switch(begin1, end1, begin2, end2, out, pred, + iteratori1_category(), + iteratori2_category(), + iteratoro_category()); + } + + // Sequential fallback + template<typename ForwardIterator> + inline ForwardIterator + adjacent_find(ForwardIterator begin, ForwardIterator end, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::adjacent_find(begin, end); } + + // Sequential fallback + template<typename ForwardIterator, typename BinaryPredicate> + inline ForwardIterator + adjacent_find(ForwardIterator begin, ForwardIterator end, + BinaryPredicate binary_pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::adjacent_find(begin, end, binary_pred); } + + // Parallel algorithm for random access iterators + template<typename RandomAccessIterator> + RandomAccessIterator + adjacent_find_switch(RandomAccessIterator begin, RandomAccessIterator end, + random_access_iterator_tag) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + RandomAccessIterator spot = __gnu_parallel:: + find_template(begin, end - 1, begin, equal_to<value_type>(), + __gnu_parallel::adjacent_find_selector()).first; + if (spot == (end - 1)) + return end; + else + return spot; + } + else + return adjacent_find(begin, end, __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case + template<typename ForwardIterator, typename IteratorTag> + inline ForwardIterator + adjacent_find_switch(ForwardIterator begin, ForwardIterator end, + IteratorTag) + { return adjacent_find(begin, end, __gnu_parallel::sequential_tag()); } + + // Public interface + template<typename ForwardIterator> + inline ForwardIterator + adjacent_find(ForwardIterator begin, ForwardIterator end) + { + typedef iterator_traits<ForwardIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + return adjacent_find_switch(begin, end, iterator_category()); + } + + // Sequential fallback for input iterator case + template<typename ForwardIterator, typename BinaryPredicate, + typename IteratorTag> + inline ForwardIterator + adjacent_find_switch(ForwardIterator begin, ForwardIterator end, + BinaryPredicate pred, IteratorTag) + { return adjacent_find(begin, end, pred, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators + template<typename RandomAccessIterator, typename BinaryPredicate> + RandomAccessIterator + adjacent_find_switch(RandomAccessIterator begin, RandomAccessIterator end, + BinaryPredicate pred, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + return __gnu_parallel::find_template(begin, end, begin, pred, + __gnu_parallel:: + adjacent_find_selector()).first; + else + return adjacent_find(begin, end, pred, + __gnu_parallel::sequential_tag()); + } + + // Public interface + template<typename ForwardIterator, typename BinaryPredicate> + inline ForwardIterator + adjacent_find(ForwardIterator begin, ForwardIterator end, + BinaryPredicate pred) + { + typedef iterator_traits<ForwardIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + return adjacent_find_switch(begin, end, pred, iterator_category()); + } + + // Sequential fallback + template<typename InputIterator, typename T> + inline typename iterator_traits<InputIterator>::difference_type + count(InputIterator begin, InputIterator end, const T& value, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::count(begin, end, value); } + + // Parallel code for random access iterators + template<typename RandomAccessIterator, typename T> + typename iterator_traits<RandomAccessIterator>::difference_type + count_switch(RandomAccessIterator begin, RandomAccessIterator end, + const T& value, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_unbalanced) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + typedef __gnu_parallel::sequence_index_t sequence_index_t; + + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().count_minimal_n + && __gnu_parallel::is_parallel(parallelism_tag))) + { + __gnu_parallel::count_selector<RandomAccessIterator, difference_type> + functionality; + difference_type res = 0; + __gnu_parallel:: + for_each_template_random_access(begin, end, value, + functionality, + std::plus<sequence_index_t>(), + res, res, -1, parallelism_tag); + return res; + } + else + return count(begin, end, value, __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case. + template<typename InputIterator, typename T, typename IteratorTag> + inline typename iterator_traits<InputIterator>::difference_type + count_switch(InputIterator begin, InputIterator end, const T& value, + IteratorTag) + { return count(begin, end, value, __gnu_parallel::sequential_tag()); } + + // Public interface. + template<typename InputIterator, typename T> + inline typename iterator_traits<InputIterator>::difference_type + count(InputIterator begin, InputIterator end, const T& value, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef iterator_traits<InputIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + return count_switch(begin, end, value, iterator_category(), + parallelism_tag); + } + + template<typename InputIterator, typename T> + inline typename iterator_traits<InputIterator>::difference_type + count(InputIterator begin, InputIterator end, const T& value) + { + typedef iterator_traits<InputIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + return count_switch(begin, end, value, iterator_category()); + } + + + // Sequential fallback. + template<typename InputIterator, typename Predicate> + inline typename iterator_traits<InputIterator>::difference_type + count_if(InputIterator begin, InputIterator end, Predicate pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::count_if(begin, end, pred); } + + // Parallel count_if for random access iterators + template<typename RandomAccessIterator, typename Predicate> + typename iterator_traits<RandomAccessIterator>::difference_type + count_if_switch(RandomAccessIterator begin, RandomAccessIterator end, + Predicate pred, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_unbalanced) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + typedef __gnu_parallel::sequence_index_t sequence_index_t; + + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().count_minimal_n + && __gnu_parallel::is_parallel(parallelism_tag))) + { + difference_type res = 0; + __gnu_parallel:: + count_if_selector<RandomAccessIterator, difference_type> + functionality; + __gnu_parallel:: + for_each_template_random_access(begin, end, pred, + functionality, + std::plus<sequence_index_t>(), + res, res, -1, parallelism_tag); + return res; + } + else + return count_if(begin, end, pred, __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case. + template<typename InputIterator, typename Predicate, typename IteratorTag> + inline typename iterator_traits<InputIterator>::difference_type + count_if_switch(InputIterator begin, InputIterator end, Predicate pred, + IteratorTag) + { return count_if(begin, end, pred, __gnu_parallel::sequential_tag()); } + + // Public interface. + template<typename InputIterator, typename Predicate> + inline typename iterator_traits<InputIterator>::difference_type + count_if(InputIterator begin, InputIterator end, Predicate pred, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef iterator_traits<InputIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + return count_if_switch(begin, end, pred, iterator_category(), + parallelism_tag); + } + + template<typename InputIterator, typename Predicate> + inline typename iterator_traits<InputIterator>::difference_type + count_if(InputIterator begin, InputIterator end, Predicate pred) + { + typedef iterator_traits<InputIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + return count_if_switch(begin, end, pred, iterator_category()); + } + + + // Sequential fallback. + template<typename ForwardIterator1, typename ForwardIterator2> + inline ForwardIterator1 + search(ForwardIterator1 begin1, ForwardIterator1 end1, + ForwardIterator2 begin2, ForwardIterator2 end2, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::search(begin1, end1, begin2, end2); } + + // Parallel algorithm for random access iterator + template<typename RandomAccessIterator1, typename RandomAccessIterator2> + RandomAccessIterator1 + search_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, RandomAccessIterator2 end2, + random_access_iterator_tag, random_access_iterator_tag) + { + typedef std::iterator_traits<RandomAccessIterator1> iterator1_traits; + typedef typename iterator1_traits::value_type value1_type; + typedef std::iterator_traits<RandomAccessIterator2> iterator2_traits; + typedef typename iterator2_traits::value_type value2_type; + + if (_GLIBCXX_PARALLEL_CONDITION(true)) + return __gnu_parallel:: + search_template(begin1, end1, begin2, end2, __gnu_parallel:: + equal_to<value1_type, value2_type>()); + else + return search(begin1, end1, begin2, end2, + __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case + template<typename ForwardIterator1, typename ForwardIterator2, + typename IteratorTag1, typename IteratorTag2> + inline ForwardIterator1 + search_switch(ForwardIterator1 begin1, ForwardIterator1 end1, + ForwardIterator2 begin2, ForwardIterator2 end2, + IteratorTag1, IteratorTag2) + { return search(begin1, end1, begin2, end2, + __gnu_parallel::sequential_tag()); } + + // Public interface. + template<typename ForwardIterator1, typename ForwardIterator2> + inline ForwardIterator1 + search(ForwardIterator1 begin1, ForwardIterator1 end1, + ForwardIterator2 begin2, ForwardIterator2 end2) + { + typedef std::iterator_traits<ForwardIterator1> iterator1_traits; + typedef typename iterator1_traits::iterator_category iterator1_category; + typedef std::iterator_traits<ForwardIterator2> iterator2_traits; + typedef typename iterator2_traits::iterator_category iterator2_category; + + return search_switch(begin1, end1, begin2, end2, + iterator1_category(), iterator2_category()); + } + + // Public interface. + template<typename ForwardIterator1, typename ForwardIterator2, + typename BinaryPredicate> + inline ForwardIterator1 + search(ForwardIterator1 begin1, ForwardIterator1 end1, + ForwardIterator2 begin2, ForwardIterator2 end2, + BinaryPredicate pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::search(begin1, end1, begin2, end2, pred); } + + // Parallel algorithm for random access iterator. + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename BinaryPredicate> + RandomAccessIterator1 + search_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, RandomAccessIterator2 end2, + BinaryPredicate pred, + random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + return __gnu_parallel::search_template(begin1, end1, + begin2, end2, pred); + else + return search(begin1, end1, begin2, end2, pred, + __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case + template<typename ForwardIterator1, typename ForwardIterator2, + typename BinaryPredicate, typename IteratorTag1, + typename IteratorTag2> + inline ForwardIterator1 + search_switch(ForwardIterator1 begin1, ForwardIterator1 end1, + ForwardIterator2 begin2, ForwardIterator2 end2, + BinaryPredicate pred, IteratorTag1, IteratorTag2) + { return search(begin1, end1, begin2, end2, pred, + __gnu_parallel::sequential_tag()); } + + // Public interface + template<typename ForwardIterator1, typename ForwardIterator2, + typename BinaryPredicate> + inline ForwardIterator1 + search(ForwardIterator1 begin1, ForwardIterator1 end1, + ForwardIterator2 begin2, ForwardIterator2 end2, + BinaryPredicate pred) + { + typedef std::iterator_traits<ForwardIterator1> iterator1_traits; + typedef typename iterator1_traits::iterator_category iterator1_category; + typedef std::iterator_traits<ForwardIterator2> iterator2_traits; + typedef typename iterator2_traits::iterator_category iterator2_category; + return search_switch(begin1, end1, begin2, end2, pred, + iterator1_category(), iterator2_category()); + } + + // Sequential fallback + template<typename ForwardIterator, typename Integer, typename T> + inline ForwardIterator + search_n(ForwardIterator begin, ForwardIterator end, Integer count, + const T& val, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::search_n(begin, end, count, val); } + + // Sequential fallback + template<typename ForwardIterator, typename Integer, typename T, + typename BinaryPredicate> + inline ForwardIterator + search_n(ForwardIterator begin, ForwardIterator end, Integer count, + const T& val, BinaryPredicate binary_pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::search_n(begin, end, count, val, binary_pred); } + + // Public interface. + template<typename ForwardIterator, typename Integer, typename T> + inline ForwardIterator + search_n(ForwardIterator begin, ForwardIterator end, Integer count, + const T& val) + { + typedef typename iterator_traits<ForwardIterator>::value_type value_type; + return search_n(begin, end, count, val, + __gnu_parallel::equal_to<value_type, T>()); + } + + // Parallel algorithm for random access iterators. + template<typename RandomAccessIterator, typename Integer, + typename T, typename BinaryPredicate> + RandomAccessIterator + search_n_switch(RandomAccessIterator begin, RandomAccessIterator end, + Integer count, const T& val, BinaryPredicate binary_pred, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + __gnu_parallel::pseudo_sequence<T, Integer> ps(val, count); + return __gnu_parallel::search_template(begin, end, ps.begin(), + ps.end(), binary_pred); + } + else + return std::__search_n(begin, end, count, val, + binary_pred, random_access_iterator_tag()); + } + + // Sequential fallback for input iterator case. + template<typename ForwardIterator, typename Integer, typename T, + typename BinaryPredicate, typename IteratorTag> + inline ForwardIterator + search_n_switch(ForwardIterator begin, ForwardIterator end, Integer count, + const T& val, BinaryPredicate binary_pred, IteratorTag) + { return __search_n(begin, end, count, val, binary_pred, IteratorTag()); } + + // Public interface. + template<typename ForwardIterator, typename Integer, typename T, + typename BinaryPredicate> + inline ForwardIterator + search_n(ForwardIterator begin, ForwardIterator end, Integer count, + const T& val, BinaryPredicate binary_pred) + { + return search_n_switch(begin, end, count, val, binary_pred, + typename std::iterator_traits<ForwardIterator>:: + iterator_category()); + } + + + // Sequential fallback. + template<typename InputIterator, typename OutputIterator, + typename UnaryOperation> + inline OutputIterator + transform(InputIterator begin, InputIterator end, OutputIterator result, + UnaryOperation unary_op, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::transform(begin, end, result, unary_op); } + + // Parallel unary transform for random access iterators. + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename UnaryOperation> + RandomAccessIterator2 + transform1_switch(RandomAccessIterator1 begin, RandomAccessIterator1 end, + RandomAccessIterator2 result, UnaryOperation unary_op, + random_access_iterator_tag, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().transform_minimal_n + && __gnu_parallel::is_parallel(parallelism_tag))) + { + bool dummy = true; + typedef __gnu_parallel::iterator_pair<RandomAccessIterator1, + RandomAccessIterator2, random_access_iterator_tag> ip; + ip begin_pair(begin, result), end_pair(end, result + (end - begin)); + __gnu_parallel::transform1_selector<ip> functionality; + __gnu_parallel:: + for_each_template_random_access(begin_pair, end_pair, + unary_op, functionality, + __gnu_parallel::dummy_reduct(), + dummy, dummy, -1, parallelism_tag); + return functionality.finish_iterator; + } + else + return transform(begin, end, result, unary_op, + __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case. + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename UnaryOperation, typename IteratorTag1, + typename IteratorTag2> + inline RandomAccessIterator2 + transform1_switch(RandomAccessIterator1 begin, RandomAccessIterator1 end, + RandomAccessIterator2 result, UnaryOperation unary_op, + IteratorTag1, IteratorTag2) + { return transform(begin, end, result, unary_op, + __gnu_parallel::sequential_tag()); } + + // Public interface. + template<typename InputIterator, typename OutputIterator, + typename UnaryOperation> + inline OutputIterator + transform(InputIterator begin, InputIterator end, OutputIterator result, + UnaryOperation unary_op, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef std::iterator_traits<InputIterator> iteratori_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori_traits::iterator_category iteratori_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return transform1_switch(begin, end, result, unary_op, + iteratori_category(), iteratoro_category(), + parallelism_tag); + } + + template<typename InputIterator, typename OutputIterator, + typename UnaryOperation> + inline OutputIterator + transform(InputIterator begin, InputIterator end, OutputIterator result, + UnaryOperation unary_op) + { + typedef std::iterator_traits<InputIterator> iteratori_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori_traits::iterator_category iteratori_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return transform1_switch(begin, end, result, unary_op, + iteratori_category(), iteratoro_category()); + } + + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation> + inline OutputIterator + transform(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, OutputIterator result, + BinaryOperation binary_op, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::transform(begin1, end1, + begin2, result, binary_op); } + + // Parallel binary transform for random access iterators. + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename RandomAccessIterator3, typename BinaryOperation> + RandomAccessIterator3 + transform2_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, + RandomAccessIterator3 result, BinaryOperation binary_op, + random_access_iterator_tag, random_access_iterator_tag, + random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + (end1 - begin1) >= + __gnu_parallel::_Settings::get().transform_minimal_n + && __gnu_parallel::is_parallel(parallelism_tag))) + { + bool dummy = true; + typedef __gnu_parallel::iterator_triple<RandomAccessIterator1, + RandomAccessIterator2, RandomAccessIterator3, + random_access_iterator_tag> ip; + ip begin_triple(begin1, begin2, result), + end_triple(end1, begin2 + (end1 - begin1), + result + (end1 - begin1)); + __gnu_parallel::transform2_selector<ip> functionality; + __gnu_parallel:: + for_each_template_random_access(begin_triple, end_triple, + binary_op, functionality, + __gnu_parallel::dummy_reduct(), + dummy, dummy, -1, + parallelism_tag); + return functionality.finish_iterator; + } + else + return transform(begin1, end1, begin2, result, binary_op, + __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case. + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + typename tag1, typename tag2, typename tag3> + inline OutputIterator + transform2_switch(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, OutputIterator result, + BinaryOperation binary_op, tag1, tag2, tag3) + { return transform(begin1, end1, begin2, result, binary_op, + __gnu_parallel::sequential_tag()); } + + // Public interface. + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation> + inline OutputIterator + transform(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, OutputIterator result, + BinaryOperation binary_op, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef std::iterator_traits<InputIterator1> iteratori1_traits; + typedef typename iteratori1_traits::iterator_category + iteratori1_category; + typedef std::iterator_traits<InputIterator2> iteratori2_traits; + typedef typename iteratori2_traits::iterator_category + iteratori2_category; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return transform2_switch(begin1, end1, begin2, result, binary_op, + iteratori1_category(), iteratori2_category(), + iteratoro_category(), parallelism_tag); + } + + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation> + inline OutputIterator + transform(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, OutputIterator result, + BinaryOperation binary_op) + { + typedef std::iterator_traits<InputIterator1> iteratori1_traits; + typedef typename iteratori1_traits::iterator_category + iteratori1_category; + typedef std::iterator_traits<InputIterator2> iteratori2_traits; + typedef typename iteratori2_traits::iterator_category + iteratori2_category; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return transform2_switch(begin1, end1, begin2, result, binary_op, + iteratori1_category(), iteratori2_category(), + iteratoro_category()); + } + + // Sequential fallback + template<typename ForwardIterator, typename T> + inline void + replace(ForwardIterator begin, ForwardIterator end, const T& old_value, + const T& new_value, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::replace(begin, end, old_value, new_value); } + + // Sequential fallback for input iterator case + template<typename ForwardIterator, typename T, typename IteratorTag> + inline void + replace_switch(ForwardIterator begin, ForwardIterator end, + const T& old_value, const T& new_value, IteratorTag) + { replace(begin, end, old_value, new_value, + __gnu_parallel::sequential_tag()); } + + // Parallel replace for random access iterators + template<typename RandomAccessIterator, typename T> + inline void + replace_switch(RandomAccessIterator begin, RandomAccessIterator end, + const T& old_value, const T& new_value, + random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_balanced) + { + // XXX parallel version is where? + replace(begin, end, old_value, new_value, + __gnu_parallel::sequential_tag()); + } + + // Public interface + template<typename ForwardIterator, typename T> + inline void + replace(ForwardIterator begin, ForwardIterator end, const T& old_value, + const T& new_value, __gnu_parallel::_Parallelism parallelism_tag) + { + typedef iterator_traits<ForwardIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + replace_switch(begin, end, old_value, new_value, iterator_category(), + parallelism_tag); + } + + template<typename ForwardIterator, typename T> + inline void + replace(ForwardIterator begin, ForwardIterator end, const T& old_value, + const T& new_value) + { + typedef iterator_traits<ForwardIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + replace_switch(begin, end, old_value, new_value, iterator_category()); + } + + + // Sequential fallback + template<typename ForwardIterator, typename Predicate, typename T> + inline void + replace_if(ForwardIterator begin, ForwardIterator end, Predicate pred, + const T& new_value, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::replace_if(begin, end, pred, new_value); } + + // Sequential fallback for input iterator case + template<typename ForwardIterator, typename Predicate, typename T, + typename IteratorTag> + inline void + replace_if_switch(ForwardIterator begin, ForwardIterator end, + Predicate pred, const T& new_value, IteratorTag) + { replace_if(begin, end, pred, new_value, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template<typename RandomAccessIterator, typename Predicate, typename T> + void + replace_if_switch(RandomAccessIterator begin, RandomAccessIterator end, + Predicate pred, const T& new_value, + random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().replace_minimal_n + && __gnu_parallel::is_parallel(parallelism_tag))) + { + bool dummy; + __gnu_parallel:: + replace_if_selector<RandomAccessIterator, Predicate, T> + functionality(new_value); + __gnu_parallel:: + for_each_template_random_access(begin, end, pred, + functionality, + __gnu_parallel::dummy_reduct(), + true, dummy, -1, parallelism_tag); + } + else + replace_if(begin, end, pred, new_value, + __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename ForwardIterator, typename Predicate, typename T> + inline void + replace_if(ForwardIterator begin, ForwardIterator end, + Predicate pred, const T& new_value, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef std::iterator_traits<ForwardIterator> iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + replace_if_switch(begin, end, pred, new_value, iterator_category(), + parallelism_tag); + } + + template<typename ForwardIterator, typename Predicate, typename T> + inline void + replace_if(ForwardIterator begin, ForwardIterator end, + Predicate pred, const T& new_value) + { + typedef std::iterator_traits<ForwardIterator> iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + replace_if_switch(begin, end, pred, new_value, iterator_category()); + } + + // Sequential fallback + template<typename ForwardIterator, typename Generator> + inline void + generate(ForwardIterator begin, ForwardIterator end, Generator gen, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::generate(begin, end, gen); } + + // Sequential fallback for input iterator case. + template<typename ForwardIterator, typename Generator, typename IteratorTag> + inline void + generate_switch(ForwardIterator begin, ForwardIterator end, Generator gen, + IteratorTag) + { generate(begin, end, gen, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template<typename RandomAccessIterator, typename Generator> + void + generate_switch(RandomAccessIterator begin, RandomAccessIterator end, + Generator gen, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().generate_minimal_n + && __gnu_parallel::is_parallel(parallelism_tag))) + { + bool dummy; + __gnu_parallel::generate_selector<RandomAccessIterator> + functionality; + __gnu_parallel:: + for_each_template_random_access(begin, end, gen, functionality, + __gnu_parallel::dummy_reduct(), + true, dummy, -1, parallelism_tag); + } + else + generate(begin, end, gen, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename ForwardIterator, typename Generator> + inline void + generate(ForwardIterator begin, ForwardIterator end, + Generator gen, __gnu_parallel::_Parallelism parallelism_tag) + { + typedef std::iterator_traits<ForwardIterator> iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + generate_switch(begin, end, gen, iterator_category(), parallelism_tag); + } + + template<typename ForwardIterator, typename Generator> + inline void + generate(ForwardIterator begin, ForwardIterator end, Generator gen) + { + typedef std::iterator_traits<ForwardIterator> iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + generate_switch(begin, end, gen, iterator_category()); + } + + + // Sequential fallback. + template<typename OutputIterator, typename Size, typename Generator> + inline OutputIterator + generate_n(OutputIterator begin, Size n, Generator gen, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::generate_n(begin, n, gen); } + + // Sequential fallback for input iterator case. + template<typename OutputIterator, typename Size, typename Generator, + typename IteratorTag> + inline OutputIterator + generate_n_switch(OutputIterator begin, Size n, Generator gen, IteratorTag) + { return generate_n(begin, n, gen, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template<typename RandomAccessIterator, typename Size, typename Generator> + inline RandomAccessIterator + generate_n_switch(RandomAccessIterator begin, Size n, Generator gen, + random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_balanced) + { + // XXX parallel version is where? + return generate_n(begin, n, gen, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename OutputIterator, typename Size, typename Generator> + inline OutputIterator + generate_n(OutputIterator begin, Size n, Generator gen, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef std::iterator_traits<OutputIterator> iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + return generate_n_switch(begin, n, gen, iterator_category(), + parallelism_tag); + } + + template<typename OutputIterator, typename Size, typename Generator> + inline OutputIterator + generate_n(OutputIterator begin, Size n, Generator gen) + { + typedef std::iterator_traits<OutputIterator> iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + return generate_n_switch(begin, n, gen, iterator_category()); + } + + + // Sequential fallback. + template<typename RandomAccessIterator> + inline void + random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::random_shuffle(begin, end); } + + // Sequential fallback. + template<typename RandomAccessIterator, typename RandomNumberGenerator> + inline void + random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, + RandomNumberGenerator& rand, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::random_shuffle(begin, end, rand); } + + + /** @brief Functor wrapper for std::rand(). */ + template<typename must_be_int = int> + struct c_rand_number + { + int + operator()(int limit) + { return rand() % limit; } + }; + + // Fill in random number generator. + template<typename RandomAccessIterator> + inline void + random_shuffle(RandomAccessIterator begin, RandomAccessIterator end) + { + c_rand_number<> r; + // Parallelization still possible. + __gnu_parallel::random_shuffle(begin, end, r); + } + + // Parallel algorithm for random access iterators. + template<typename RandomAccessIterator, typename RandomNumberGenerator> + void + random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, + RandomNumberGenerator& rand) + { + if (begin == end) + return; + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().random_shuffle_minimal_n)) + __gnu_parallel::parallel_random_shuffle(begin, end, rand); + else + __gnu_parallel::sequential_random_shuffle(begin, end, rand); + } + + // Sequential fallback. + template<typename ForwardIterator, typename Predicate> + inline ForwardIterator + partition(ForwardIterator begin, ForwardIterator end, + Predicate pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::partition(begin, end, pred); } + + // Sequential fallback for input iterator case. + template<typename ForwardIterator, typename Predicate, typename IteratorTag> + inline ForwardIterator + partition_switch(ForwardIterator begin, ForwardIterator end, + Predicate pred, IteratorTag) + { return partition(begin, end, pred, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template<typename RandomAccessIterator, typename Predicate> + RandomAccessIterator + partition_switch(RandomAccessIterator begin, RandomAccessIterator end, + Predicate pred, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().partition_minimal_n)) + { + typedef typename std::iterator_traits<RandomAccessIterator>:: + difference_type difference_type; + difference_type middle = __gnu_parallel:: + parallel_partition(begin, end, pred, + __gnu_parallel::get_max_threads()); + return begin + middle; + } + else + return partition(begin, end, pred, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename ForwardIterator, typename Predicate> + inline ForwardIterator + partition(ForwardIterator begin, ForwardIterator end, Predicate pred) + { + typedef iterator_traits<ForwardIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + return partition_switch(begin, end, pred, iterator_category()); + } + + // sort interface + + // Sequential fallback + template<typename RandomAccessIterator> + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::sort(begin, end); } + + // Sequential fallback + template<typename RandomAccessIterator, typename Comparator> + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::sort<RandomAccessIterator, Comparator>(begin, end, + comp); } + + // Public interface + template<typename RandomAccessIterator, typename Comparator, + typename Parallelism> + void + sort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, + Parallelism parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + + if (begin != end) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= + __gnu_parallel::_Settings::get().sort_minimal_n)) + __gnu_parallel::parallel_sort<false>(begin, end, comp, parallelism); + else + sort(begin, end, comp, __gnu_parallel::sequential_tag()); + } + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + sort(begin, end, std::less<value_type>(), + __gnu_parallel::default_parallel_tag()); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::default_parallel_tag parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + sort(begin, end, std::less<value_type>(), parallelism); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::parallel_tag parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + sort(begin, end, std::less<value_type>(), parallelism); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::multiway_mergesort_tag parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + sort(begin, end, std::less<value_type>(), parallelism); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::multiway_mergesort_sampling_tag parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + sort(begin, end, std::less<value_type>(), parallelism); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::multiway_mergesort_exact_tag parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + sort(begin, end, std::less<value_type>(), parallelism); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::quicksort_tag parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + sort(begin, end, std::less<value_type>(), parallelism); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::balanced_quicksort_tag parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + sort(begin, end, std::less<value_type>(), parallelism); + } + + // Public interface + template<typename RandomAccessIterator, typename Comparator> + void + sort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + sort(begin, end, comp, __gnu_parallel::default_parallel_tag()); + } + + + // stable_sort interface + + + // Sequential fallback + template<typename RandomAccessIterator> + inline void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::stable_sort(begin, end); } + + // Sequential fallback + template<typename RandomAccessIterator, typename Comparator> + inline void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::stable_sort<RandomAccessIterator, Comparator>( + begin, end, comp); } + + // Public interface + template<typename RandomAccessIterator, typename Comparator, + typename Parallelism> + void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, Parallelism parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + + if (begin != end) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= + __gnu_parallel::_Settings::get().sort_minimal_n)) + __gnu_parallel::parallel_sort<true>(begin, end, comp, parallelism); + else + stable_sort(begin, end, comp, __gnu_parallel::sequential_tag()); + } + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + stable_sort(begin, end, std::less<value_type>(), + __gnu_parallel::default_parallel_tag()); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::default_parallel_tag parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + stable_sort(begin, end, std::less<value_type>(), parallelism); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::parallel_tag parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + stable_sort(begin, end, std::less<value_type>(), parallelism); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::multiway_mergesort_tag parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + stable_sort(begin, end, std::less<value_type>(), parallelism); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::quicksort_tag parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + stable_sort(begin, end, std::less<value_type>(), parallelism); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end, + __gnu_parallel::balanced_quicksort_tag parallelism) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + stable_sort(begin, end, std::less<value_type>(), parallelism); + } + + // Public interface + template<typename RandomAccessIterator, typename Comparator> + void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + stable_sort(begin, end, comp, __gnu_parallel::default_parallel_tag()); + } + + +// // Sequential fallback +// template<typename RandomAccessIterator> +// inline void +// stable_sort(RandomAccessIterator begin, RandomAccessIterator end, +// __gnu_parallel::sequential_tag) +// { return _GLIBCXX_STD_P::stable_sort(begin, end); } +// +// // Sequential fallback +// template<typename RandomAccessIterator, typename Comparator> +// inline void +// stable_sort(RandomAccessIterator begin, RandomAccessIterator end, +// Comparator comp, __gnu_parallel::sequential_tag) +// { return _GLIBCXX_STD_P::stable_sort(begin, end, comp); } +// +// template<typename RandomAccessIterator> +// void +// stable_sort(RandomAccessIterator begin, RandomAccessIterator end) +// { +// typedef iterator_traits<RandomAccessIterator> traits_type; +// typedef typename traits_type::value_type value_type; +// stable_sort(begin, end, std::less<value_type>()); +// } +// +// // Parallel algorithm for random access iterators +// template<typename RandomAccessIterator, typename Comparator> +// void +// stable_sort(RandomAccessIterator begin, RandomAccessIterator end, +// Comparator comp) +// { +// if (begin != end) +// { +// if (_GLIBCXX_PARALLEL_CONDITION( +// static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= +// __gnu_parallel::_Settings::get().sort_minimal_n)) +// __gnu_parallel::parallel_sort(begin, end, comp, +// __gnu_parallel::parallel_tag()); +// else +// stable_sort(begin, end, comp, __gnu_parallel::sequential_tag()); +// } +// } + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator> + inline OutputIterator + merge(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2, OutputIterator result, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::merge(begin1, end1, begin2, end2, result); } + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename Comparator> + inline OutputIterator + merge(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2, OutputIterator result, Comparator comp, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::merge(begin1, end1, begin2, end2, result, comp); } + + // Sequential fallback for input iterator case + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename Comparator, + typename IteratorTag1, typename IteratorTag2, typename IteratorTag3> + inline OutputIterator + merge_switch(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator result, Comparator comp, + IteratorTag1, IteratorTag2, IteratorTag3) + { return _GLIBCXX_STD_P::merge(begin1, end1, begin2, end2, + result, comp); } + + // Parallel algorithm for random access iterators + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename Comparator> + OutputIterator + merge_switch(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator result, Comparator comp, + random_access_iterator_tag, random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + (static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) + >= __gnu_parallel::_Settings::get().merge_minimal_n + || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) + >= __gnu_parallel::_Settings::get().merge_minimal_n))) + return __gnu_parallel::parallel_merge_advance(begin1, end1, + begin2, end2, + result, (end1 - begin1) + + (end2 - begin2), comp); + else + return __gnu_parallel::merge_advance(begin1, end1, begin2, end2, + result, (end1 - begin1) + + (end2 - begin2), comp); + } + + // Public interface + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename Comparator> + inline OutputIterator + merge(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2, OutputIterator result, Comparator comp) + { + typedef typename iterator_traits<InputIterator1>::value_type value_type; + + typedef std::iterator_traits<InputIterator1> iteratori1_traits; + typedef std::iterator_traits<InputIterator2> iteratori2_traits; + typedef std::iterator_traits<OutputIterator> iteratoro_traits; + typedef typename iteratori1_traits::iterator_category + iteratori1_category; + typedef typename iteratori2_traits::iterator_category + iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return merge_switch(begin1, end1, begin2, end2, result, comp, + iteratori1_category(), iteratori2_category(), + iteratoro_category()); + } + + + // Public interface, insert default comparator + template<typename InputIterator1, typename InputIterator2, + typename OutputIterator> + inline OutputIterator + merge(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2, OutputIterator result) + { + typedef std::iterator_traits<InputIterator1> iterator1_traits; + typedef std::iterator_traits<InputIterator2> iterator2_traits; + typedef typename iterator1_traits::value_type value1_type; + typedef typename iterator2_traits::value_type value2_type; + + return merge(begin1, end1, begin2, end2, result, + __gnu_parallel::less<value1_type, value2_type>()); + } + + // Sequential fallback + template<typename RandomAccessIterator> + inline void + nth_element(RandomAccessIterator begin, RandomAccessIterator nth, + RandomAccessIterator end, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::nth_element(begin, nth, end); } + + // Sequential fallback + template<typename RandomAccessIterator, typename Comparator> + inline void + nth_element(RandomAccessIterator begin, RandomAccessIterator nth, + RandomAccessIterator end, Comparator comp, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::nth_element(begin, nth, end, comp); } + + // Public interface + template<typename RandomAccessIterator, typename Comparator> + inline void + nth_element(RandomAccessIterator begin, RandomAccessIterator nth, + RandomAccessIterator end, Comparator comp) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().nth_element_minimal_n)) + __gnu_parallel::parallel_nth_element(begin, nth, end, comp); + else + nth_element(begin, nth, end, comp, __gnu_parallel::sequential_tag()); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + nth_element(RandomAccessIterator begin, RandomAccessIterator nth, + RandomAccessIterator end) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + nth_element(begin, nth, end, std::less<value_type>()); + } + + // Sequential fallback + template<typename RandomAccessIterator, typename _Compare> + inline void + partial_sort(RandomAccessIterator begin, RandomAccessIterator middle, + RandomAccessIterator end, _Compare comp, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::partial_sort(begin, middle, end, comp); } + + // Sequential fallback + template<typename RandomAccessIterator> + inline void + partial_sort(RandomAccessIterator begin, RandomAccessIterator middle, + RandomAccessIterator end, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::partial_sort(begin, middle, end); } + + // Public interface, parallel algorithm for random access iterators + template<typename RandomAccessIterator, typename _Compare> + void + partial_sort(RandomAccessIterator begin, RandomAccessIterator middle, + RandomAccessIterator end, _Compare comp) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().partial_sort_minimal_n)) + __gnu_parallel::parallel_partial_sort(begin, middle, end, comp); + else + partial_sort(begin, middle, end, comp, + __gnu_parallel::sequential_tag()); + } + + // Public interface, insert default comparator + template<typename RandomAccessIterator> + inline void + partial_sort(RandomAccessIterator begin, RandomAccessIterator middle, + RandomAccessIterator end) + { + typedef iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + partial_sort(begin, middle, end, std::less<value_type>()); + } + + // Sequential fallback + template<typename ForwardIterator> + inline ForwardIterator + max_element(ForwardIterator begin, ForwardIterator end, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::max_element(begin, end); } + + // Sequential fallback + template<typename ForwardIterator, typename Comparator> + inline ForwardIterator + max_element(ForwardIterator begin, ForwardIterator end, Comparator comp, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::max_element(begin, end, comp); } + + // Sequential fallback for input iterator case + template<typename ForwardIterator, typename Comparator, typename IteratorTag> + inline ForwardIterator + max_element_switch(ForwardIterator begin, ForwardIterator end, + Comparator comp, IteratorTag) + { return max_element(begin, end, comp, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators + template<typename RandomAccessIterator, typename Comparator> + RandomAccessIterator + max_element_switch(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().max_element_minimal_n + && __gnu_parallel::is_parallel(parallelism_tag))) + { + RandomAccessIterator res(begin); + __gnu_parallel::identity_selector<RandomAccessIterator> + functionality; + __gnu_parallel:: + for_each_template_random_access(begin, end, + __gnu_parallel::nothing(), + functionality, + __gnu_parallel:: + max_element_reduct<Comparator, + RandomAccessIterator>(comp), + res, res, -1, parallelism_tag); + return res; + } + else + return max_element(begin, end, comp, __gnu_parallel::sequential_tag()); + } + + // Public interface, insert default comparator + template<typename ForwardIterator> + inline ForwardIterator + max_element(ForwardIterator begin, ForwardIterator end, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef typename iterator_traits<ForwardIterator>::value_type value_type; + return max_element(begin, end, std::less<value_type>(), parallelism_tag); + } + + template<typename ForwardIterator> + inline ForwardIterator + max_element(ForwardIterator begin, ForwardIterator end) + { + typedef typename iterator_traits<ForwardIterator>::value_type value_type; + return max_element(begin, end, std::less<value_type>()); + } + + // Public interface + template<typename ForwardIterator, typename Comparator> + inline ForwardIterator + max_element(ForwardIterator begin, ForwardIterator end, Comparator comp, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef iterator_traits<ForwardIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + return max_element_switch(begin, end, comp, iterator_category(), + parallelism_tag); + } + + template<typename ForwardIterator, typename Comparator> + inline ForwardIterator + max_element(ForwardIterator begin, ForwardIterator end, Comparator comp) + { + typedef iterator_traits<ForwardIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + return max_element_switch(begin, end, comp, iterator_category()); + } + + + // Sequential fallback + template<typename ForwardIterator> + inline ForwardIterator + min_element(ForwardIterator begin, ForwardIterator end, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::min_element(begin, end); } + + // Sequential fallback + template<typename ForwardIterator, typename Comparator> + inline ForwardIterator + min_element(ForwardIterator begin, ForwardIterator end, Comparator comp, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::min_element(begin, end, comp); } + + // Sequential fallback for input iterator case + template<typename ForwardIterator, typename Comparator, typename IteratorTag> + inline ForwardIterator + min_element_switch(ForwardIterator begin, ForwardIterator end, + Comparator comp, IteratorTag) + { return min_element(begin, end, comp, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators + template<typename RandomAccessIterator, typename Comparator> + RandomAccessIterator + min_element_switch(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().min_element_minimal_n + && __gnu_parallel::is_parallel(parallelism_tag))) + { + RandomAccessIterator res(begin); + __gnu_parallel::identity_selector<RandomAccessIterator> + functionality; + __gnu_parallel:: + for_each_template_random_access(begin, end, + __gnu_parallel::nothing(), + functionality, + __gnu_parallel:: + min_element_reduct<Comparator, + RandomAccessIterator>(comp), + res, res, -1, parallelism_tag); + return res; + } + else + return min_element(begin, end, comp, __gnu_parallel::sequential_tag()); + } + + // Public interface, insert default comparator + template<typename ForwardIterator> + inline ForwardIterator + min_element(ForwardIterator begin, ForwardIterator end, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef typename iterator_traits<ForwardIterator>::value_type value_type; + return min_element(begin, end, std::less<value_type>(), parallelism_tag); + } + + template<typename ForwardIterator> + inline ForwardIterator + min_element(ForwardIterator begin, ForwardIterator end) + { + typedef typename iterator_traits<ForwardIterator>::value_type value_type; + return min_element(begin, end, std::less<value_type>()); + } + + // Public interface + template<typename ForwardIterator, typename Comparator> + inline ForwardIterator + min_element(ForwardIterator begin, ForwardIterator end, Comparator comp, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef iterator_traits<ForwardIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + return min_element_switch(begin, end, comp, iterator_category(), + parallelism_tag); + } + + template<typename ForwardIterator, typename Comparator> + inline ForwardIterator + min_element(ForwardIterator begin, ForwardIterator end, Comparator comp) + { + typedef iterator_traits<ForwardIterator> traits_type; + typedef typename traits_type::iterator_category iterator_category; + return min_element_switch(begin, end, comp, iterator_category()); + } +} // end namespace +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_ALGO_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/algobase.h b/gcc-4.4.0/libstdc++-v3/include/parallel/algobase.h new file mode 100644 index 000000000..aaffb3e5f --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/algobase.h @@ -0,0 +1,281 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/algobase.h + * @brief Parallel STL function calls corresponding to the + * stl_algobase.h header. The functions defined here mainly do case + * switches and call the actual parallelized versions in other files. + * Inlining policy: Functions that basically only contain one + * function call, are declared inline. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_ALGOBASE_H +#define _GLIBCXX_PARALLEL_ALGOBASE_H 1 + +#include <bits/stl_algobase.h> +#include <parallel/base.h> +#include <parallel/tags.h> +#include <parallel/settings.h> +#include <parallel/find.h> +#include <parallel/find_selectors.h> + +namespace std +{ +namespace __parallel +{ + // NB: equal and lexicographical_compare require mismatch. + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2> + inline pair<InputIterator1, InputIterator2> + mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2); } + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2, + typename Predicate> + inline pair<InputIterator1, InputIterator2> + mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + Predicate pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred); } + + // Sequential fallback for input iterator case + template<typename InputIterator1, typename InputIterator2, + typename Predicate, typename IteratorTag1, typename IteratorTag2> + inline pair<InputIterator1, InputIterator2> + mismatch_switch(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, Predicate pred, IteratorTag1, + IteratorTag2) + { return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred); } + + // Parallel mismatch for random access iterators + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename Predicate> + pair<RandomAccessIterator1, RandomAccessIterator2> + mismatch_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, Predicate pred, + random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + RandomAccessIterator1 res = + __gnu_parallel::find_template(begin1, end1, begin2, pred, + __gnu_parallel:: + mismatch_selector()).first; + return make_pair(res , begin2 + (res - begin1)); + } + else + return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred); + } + + // Public interface + template<typename InputIterator1, typename InputIterator2> + inline pair<InputIterator1, InputIterator2> + mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2) + { + typedef std::iterator_traits<InputIterator1> iterator1_traits; + typedef std::iterator_traits<InputIterator2> iterator2_traits; + typedef typename iterator1_traits::value_type value1_type; + typedef typename iterator2_traits::value_type value2_type; + typedef typename iterator1_traits::iterator_category iterator1_category; + typedef typename iterator2_traits::iterator_category iterator2_category; + + typedef __gnu_parallel::equal_to<value1_type, value2_type> equal_to_type; + + return mismatch_switch(begin1, end1, begin2, equal_to_type(), + iterator1_category(), iterator2_category()); + } + + // Public interface + template<typename InputIterator1, typename InputIterator2, + typename Predicate> + inline pair<InputIterator1, InputIterator2> + mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + Predicate pred) + { + typedef std::iterator_traits<InputIterator1> iterator1_traits; + typedef std::iterator_traits<InputIterator2> iterator2_traits; + typedef typename iterator1_traits::iterator_category iterator1_category; + typedef typename iterator2_traits::iterator_category iterator2_category; + + return mismatch_switch(begin1, end1, begin2, pred, iterator1_category(), + iterator2_category()); + } + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2> + inline bool + equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::equal(begin1, end1, begin2); } + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2, + typename Predicate> + inline bool + equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + Predicate pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::equal(begin1, end1, begin2, pred); } + + // Public interface + template<typename InputIterator1, typename InputIterator2> + inline bool + equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2) + { return mismatch(begin1, end1, begin2).first == end1; } + + // Public interface + template<typename InputIterator1, typename InputIterator2, + typename Predicate> + inline bool + equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + Predicate pred) + { return mismatch(begin1, end1, begin2, pred).first == end1; } + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2> + inline bool + lexicographical_compare(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1, + begin2, end2); } + + // Sequential fallback + template<typename InputIterator1, typename InputIterator2, + typename Predicate> + inline bool + lexicographical_compare(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + Predicate pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1, + begin2, end2, pred); } + + // Sequential fallback for input iterator case + template<typename InputIterator1, typename InputIterator2, + typename Predicate, typename IteratorTag1, typename IteratorTag2> + inline bool + lexicographical_compare_switch(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + Predicate pred, IteratorTag1, IteratorTag2) + { return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1, + begin2, end2, pred); } + + // Parallel lexicographical_compare for random access iterators + // Limitation: Both valuetypes must be the same + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename Predicate> + bool + lexicographical_compare_switch(RandomAccessIterator1 begin1, + RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, + RandomAccessIterator2 end2, Predicate pred, + random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + typedef iterator_traits<RandomAccessIterator1> traits1_type; + typedef typename traits1_type::value_type value1_type; + + typedef iterator_traits<RandomAccessIterator2> traits2_type; + typedef typename traits2_type::value_type value2_type; + + typedef __gnu_parallel::equal_from_less<Predicate, value1_type, + value2_type> equal_type; + + // Longer sequence in first place. + if ((end1 - begin1) < (end2 - begin2)) + { + typedef pair<RandomAccessIterator1, RandomAccessIterator2> + pair_type; + pair_type mm = mismatch_switch(begin1, end1, begin2, + equal_type(pred), + random_access_iterator_tag(), + random_access_iterator_tag()); + + return (mm.first == end1) || bool(pred(*mm.first, *mm.second)); + } + else + { + typedef pair<RandomAccessIterator2, RandomAccessIterator1> + pair_type; + pair_type mm = mismatch_switch(begin2, end2, begin1, + equal_type(pred), + random_access_iterator_tag(), + random_access_iterator_tag()); + + return (mm.first != end2) && bool(pred(*mm.second, *mm.first)); + } + } + else + return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1, + begin2, end2, pred); + } + + // Public interface + template<typename InputIterator1, typename InputIterator2> + inline bool + lexicographical_compare(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2) + { + typedef iterator_traits<InputIterator1> traits1_type; + typedef typename traits1_type::value_type value1_type; + typedef typename traits1_type::iterator_category iterator1_category; + + typedef iterator_traits<InputIterator2> traits2_type; + typedef typename traits2_type::value_type value2_type; + typedef typename traits2_type::iterator_category iterator2_category; + typedef __gnu_parallel::less<value1_type, value2_type> less_type; + + return lexicographical_compare_switch(begin1, end1, begin2, end2, + less_type(), iterator1_category(), + iterator2_category()); + } + + // Public interface + template<typename InputIterator1, typename InputIterator2, + typename Predicate> + inline bool + lexicographical_compare(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + Predicate pred) + { + typedef iterator_traits<InputIterator1> traits1_type; + typedef typename traits1_type::iterator_category iterator1_category; + + typedef iterator_traits<InputIterator2> traits2_type; + typedef typename traits2_type::iterator_category iterator2_category; + + return lexicographical_compare_switch(begin1, end1, begin2, end2, pred, + iterator1_category(), + iterator2_category()); + } +} // end namespace +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_ALGOBASE_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/algorithm b/gcc-4.4.0/libstdc++-v3/include/parallel/algorithm new file mode 100644 index 000000000..3c93d5b68 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/algorithm @@ -0,0 +1,40 @@ +// Algorithm extensions -*- C++ -*- + +// Copyright (C) 2007, 2009 +// Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/algorithm + * This file is a GNU extension to the Standard C++ Library. + */ + +#ifndef _PARALLEL_ALGORITHM +#define _PARALLEL_ALGORITHM 1 + +#pragma GCC system_header + +#include <algorithm> +#include <parallel/algorithmfwd.h> +#include <parallel/algobase.h> +#include <parallel/algo.h> + +#endif diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/algorithmfwd.h b/gcc-4.4.0/libstdc++-v3/include/parallel/algorithmfwd.h new file mode 100644 index 000000000..7c3b3f23f --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/algorithmfwd.h @@ -0,0 +1,898 @@ +// <algorithm> parallel extensions -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/algorithmfwd.h + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +#ifndef _GLIBCXX_PARALLEL_ALGORITHMFWD_H +#define _GLIBCXX_PARALLEL_ALGORITHMFWD_H 1 + +#pragma GCC system_header + +#include <parallel/tags.h> +#include <parallel/settings.h> + +namespace std +{ +namespace __parallel +{ + template<typename _FIter> + _FIter + adjacent_find(_FIter, _FIter); + + template<typename _FIter> + _FIter + adjacent_find(_FIter, _FIter, __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _IterTag> + _FIter + adjacent_find_switch(_FIter, _FIter, _IterTag); + + template<typename _RAIter> + _RAIter + adjacent_find_switch(_RAIter, _RAIter, random_access_iterator_tag); + + + template<typename _FIter, typename _BiPredicate> + _FIter + adjacent_find(_FIter, _FIter, _BiPredicate); + + template<typename _FIter, typename _BiPredicate> + _FIter + adjacent_find(_FIter, _FIter, _BiPredicate, + __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _BiPredicate, typename _IterTag> + _FIter + adjacent_find_switch(_FIter, _FIter, _BiPredicate, _IterTag); + + template<typename _RAIter, typename _BiPredicate> + _RAIter + adjacent_find_switch(_RAIter, _RAIter, _BiPredicate, + random_access_iterator_tag); + + + template<typename _IIter, typename _Tp> + typename iterator_traits<_IIter>::difference_type + count(_IIter, _IIter, const _Tp&); + + template<typename _IIter, typename _Tp> + typename iterator_traits<_IIter>::difference_type + count(_IIter, _IIter, const _Tp&, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _Tp> + typename iterator_traits<_IIter>::difference_type + count(_IIter, _IIter, const _Tp&, __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _Tp, typename _IterTag> + typename iterator_traits<_IIter>::difference_type + count_switch(_IIter, _IIter, const _Tp&, _IterTag); + + template<typename _RAIter, typename _Tp> + typename iterator_traits<_RAIter>::difference_type + count_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism + = __gnu_parallel::parallel_unbalanced); + + + template<typename _IIter, typename _Predicate> + typename iterator_traits<_IIter>::difference_type + count_if(_IIter, _IIter, _Predicate); + + template<typename _IIter, typename _Predicate> + typename iterator_traits<_IIter>::difference_type + count_if(_IIter, _IIter, _Predicate, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _Predicate> + typename iterator_traits<_IIter>::difference_type + count_if(_IIter, _IIter, _Predicate, __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _Predicate, typename _IterTag> + typename iterator_traits<_IIter>::difference_type + count_if_switch(_IIter, _IIter, _Predicate, _IterTag); + + template<typename _RAIter, typename _Predicate> + typename iterator_traits<_RAIter>::difference_type + count_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism + = __gnu_parallel::parallel_unbalanced); + + // algobase.h + template<typename _IIter1, typename _IIter2> + bool + equal(_IIter1, _IIter1, _IIter2, __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename Predicate> + bool + equal(_IIter1, _IIter1, _IIter2, Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2> + bool + equal(_IIter1, _IIter1, _IIter2); + + template<typename _IIter1, typename _IIter2, typename Predicate> + bool + equal(_IIter1, _IIter1, _IIter2, Predicate); + + template<typename _IIter, typename _Tp> + _IIter + find(_IIter, _IIter, const _Tp&, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _Tp> + _IIter + find(_IIter, _IIter, const _Tp& val); + + template<typename _IIter, typename _Tp, typename _IterTag> + _IIter + find_switch(_IIter, _IIter, const _Tp&, _IterTag); + + template<typename _RAIter, typename _Tp> + _RAIter + find_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag); + + template<typename _IIter, typename _Predicate> + _IIter + find_if(_IIter, _IIter, _Predicate, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _Predicate> + _IIter + find_if(_IIter, _IIter, _Predicate); + + template<typename _IIter, typename _Predicate, typename _IterTag> + _IIter + find_if_switch(_IIter, _IIter, _Predicate, _IterTag); + + template<typename _RAIter, typename _Predicate> + _RAIter + find_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag); + + template<typename _IIter, typename _FIter> + _IIter + find_first_of(_IIter, _IIter, _FIter, _FIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _FIter, typename _BiPredicate> + _IIter + find_first_of(_IIter, _IIter, _FIter, _FIter, _BiPredicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _FIter, typename _BiPredicate> + _IIter + find_first_of(_IIter, _IIter, _FIter, _FIter, _BiPredicate); + + template<typename _IIter, typename _FIter> + _IIter + find_first_of(_IIter, _IIter, _FIter, _FIter); + + template<typename _IIter, typename _FIter, + typename _IterTag1, typename _IterTag2> + _IIter + find_first_of_switch(_IIter, _IIter, _FIter, _FIter, _IterTag1, _IterTag2); + + template<typename _RAIter, typename _FIter, typename _BiPredicate, + typename _IterTag> + _RAIter + find_first_of_switch(_RAIter, _RAIter, _FIter, _FIter, _BiPredicate, + random_access_iterator_tag, _IterTag); + + template<typename _IIter, typename _FIter, typename _BiPredicate, + typename _IterTag1, typename _IterTag2> + _IIter + find_first_of_switch(_IIter, _IIter, _FIter, _FIter, _BiPredicate, + _IterTag1, _IterTag2); + + + template<typename _IIter, typename _Function> + _Function + for_each(_IIter, _IIter, _Function); + + template<typename _IIter, typename _Function> + _Function + for_each(_IIter, _IIter, _Function, __gnu_parallel::sequential_tag); + + template<typename _Iterator, typename _Function> + _Function + for_each(_Iterator, _Iterator, _Function, __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _Function, typename _IterTag> + _Function + for_each_switch(_IIter, _IIter, _Function, _IterTag); + + template<typename _RAIter, typename _Function> + _Function + for_each_switch(_RAIter, _RAIter, _Function, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism + = __gnu_parallel::parallel_balanced); + + + template<typename _FIter, typename _Generator> + void + generate(_FIter, _FIter, _Generator); + + template<typename _FIter, typename _Generator> + void + generate(_FIter, _FIter, _Generator, __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Generator> + void + generate(_FIter, _FIter, _Generator, __gnu_parallel::_Parallelism); + + template<typename _FIter, typename _Generator, typename _IterTag> + void + generate_switch(_FIter, _FIter, _Generator, _IterTag); + + template<typename _RAIter, typename _Generator> + void + generate_switch(_RAIter, _RAIter, _Generator, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism + = __gnu_parallel::parallel_balanced); + + template<typename _OIter, typename _Size, typename _Generator> + _OIter + generate_n(_OIter, _Size, _Generator); + + template<typename _OIter, typename _Size, typename _Generator> + _OIter + generate_n(_OIter, _Size, _Generator, __gnu_parallel::sequential_tag); + + template<typename _OIter, typename _Size, typename _Generator> + _OIter + generate_n(_OIter, _Size, _Generator, __gnu_parallel::_Parallelism); + + template<typename _OIter, typename _Size, typename _Generator, + typename _IterTag> + _OIter + generate_n_switch(_OIter, _Size, _Generator, _IterTag); + + template<typename _RAIter, typename _Size, typename _Generator> + _RAIter + generate_n_switch(_RAIter, _Size, _Generator, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism + = __gnu_parallel::parallel_balanced); + + template<typename _IIter1, typename _IIter2> + bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _Predicate> + bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, _Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2> + bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2); + + template<typename _IIter1, typename _IIter2, typename _Predicate> + bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, _Predicate); + + template<typename _IIter1, typename _IIter2, + typename _Predicate, typename _IterTag1, typename _IterTag2> + bool + lexicographical_compare_switch(_IIter1, _IIter1, _IIter2, _IIter2, + _Predicate, _IterTag1, _IterTag2); + + template<typename _RAIter1, typename _RAIter2, typename _Predicate> + bool + lexicographical_compare_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, + _Predicate, random_access_iterator_tag, + random_access_iterator_tag); + + // algo.h + template<typename _IIter1, typename _IIter2> + pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2, __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _Predicate> + pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2, _Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2> + pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2); + + template<typename _IIter1, typename _IIter2, typename _Predicate> + pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2, _Predicate); + + template<typename _IIter1, typename _IIter2, typename _Predicate, + typename _IterTag1, typename _IterTag2> + pair<_IIter1, _IIter2> + mismatch_switch(_IIter1, _IIter1, _IIter2, _Predicate, + _IterTag1, _IterTag2); + + template<typename _RAIter1, typename _RAIter2, typename _Predicate> + pair<_RAIter1, _RAIter2> + mismatch_switch(_RAIter1, _RAIter1, _RAIter2, _Predicate, + random_access_iterator_tag, random_access_iterator_tag); + + template<typename _FIter1, typename _FIter2> + _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2, __gnu_parallel::sequential_tag); + + template<typename _FIter1, typename _FIter2> + _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2); + + template<typename _FIter1, typename _FIter2, typename _BiPredicate> + _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2, _BiPredicate, + __gnu_parallel::sequential_tag); + + template<typename _FIter1, typename _FIter2, typename _BiPredicate> + _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2, _BiPredicate); + + template<typename _RAIter1, typename _RAIter2> + _RAIter1 + search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, + random_access_iterator_tag, random_access_iterator_tag); + + template<typename _FIter1, typename _FIter2, typename _IterTag1, + typename _IterTag2> + _FIter1 + search_switch(_FIter1, _FIter1, _FIter2, _FIter2, _IterTag1, _IterTag2); + + template<typename _RAIter1, typename _RAIter2, typename _BiPredicate> + _RAIter1 + search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, _BiPredicate, + random_access_iterator_tag, random_access_iterator_tag); + + template<typename _FIter1, typename _FIter2, typename _BiPredicate, + typename _IterTag1, typename _IterTag2> + _FIter1 + search_switch(_FIter1, _FIter1, _FIter2, _FIter2, _BiPredicate, + _IterTag1, _IterTag2); + + template<typename _FIter, typename _Integer, typename _Tp> + _FIter + search_n(_FIter, _FIter, _Integer, const _Tp&, + __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Integer, typename _Tp, + typename _BiPredicate> + _FIter + search_n(_FIter, _FIter, _Integer, const _Tp&, _BiPredicate, + __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Integer, typename _Tp> + _FIter + search_n(_FIter, _FIter, _Integer, const _Tp&); + + template<typename _FIter, typename _Integer, typename _Tp, + typename _BiPredicate> + _FIter + search_n(_FIter, _FIter, _Integer, const _Tp&, _BiPredicate); + + template<typename _RAIter, typename _Integer, typename _Tp, + typename _BiPredicate> + _RAIter + search_n_switch(_RAIter, _RAIter, _Integer, const _Tp&, + _BiPredicate, random_access_iterator_tag); + + template<typename _FIter, typename _Integer, typename _Tp, + typename _BiPredicate, typename _IterTag> + _FIter + search_n_switch(_FIter, _FIter, _Integer, const _Tp&, + _BiPredicate, _IterTag); + + + template<typename _IIter, typename _OIter, typename UnaryOperation> + _OIter + transform(_IIter, _IIter, _OIter, UnaryOperation); + + template<typename _IIter, typename _OIter, typename UnaryOperation> + _OIter + transform(_IIter, _IIter, _OIter, UnaryOperation, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter, typename UnaryOperation> + _OIter + transform(_IIter, _IIter, _OIter, UnaryOperation, + __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _OIter, typename UnaryOperation, + typename _IterTag1, typename _IterTag2> + _OIter + transform1_switch(_IIter, _IIter, _OIter, UnaryOperation, + _IterTag1, _IterTag2); + + + template<typename _RAIIter, typename _RAOIter, typename UnaryOperation> + _RAOIter + transform1_switch(_RAIIter, _RAIIter, _RAOIter, UnaryOperation, + random_access_iterator_tag, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism + = __gnu_parallel::parallel_balanced); + + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _BiOperation> + _OIter + transform(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _BiOperation> + _OIter + transform(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _BiOperation> + _OIter + transform(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation, + __gnu_parallel::_Parallelism); + + template<typename _RAIter1, typename _RAIter2, typename _RAIter3, + typename _BiOperation> + _RAIter3 + transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation, + random_access_iterator_tag, random_access_iterator_tag, + random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism + = __gnu_parallel::parallel_balanced); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _BiOperation, typename _Tag1, + typename _Tag2, typename _Tag3> + _OIter + transform2_switch(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation, + _Tag1, _Tag2, _Tag3); + + + template<typename _FIter, typename _Tp> + void + replace(_FIter, _FIter, const _Tp&, const _Tp&); + + template<typename _FIter, typename _Tp> + void + replace(_FIter, _FIter, const _Tp&, const _Tp&, + __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Tp> + void + replace(_FIter, _FIter, const _Tp&, const _Tp&, + __gnu_parallel::_Parallelism); + + template<typename _FIter, typename _Tp, typename _IterTag> + void + replace_switch(_FIter, _FIter, const _Tp&, const _Tp&, _IterTag); + + template<typename _RAIter, typename _Tp> + void + replace_switch(_RAIter, _RAIter, const _Tp&, const _Tp&, + random_access_iterator_tag, __gnu_parallel::_Parallelism); + + + template<typename _FIter, typename _Predicate, typename _Tp> + void + replace_if(_FIter, _FIter, _Predicate, const _Tp&); + + template<typename _FIter, typename _Predicate, typename _Tp> + void + replace_if(_FIter, _FIter, _Predicate, const _Tp&, + __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Predicate, typename _Tp> + void + replace_if(_FIter, _FIter, _Predicate, const _Tp&, + __gnu_parallel::_Parallelism); + + template<typename _FIter, typename _Predicate, typename _Tp, + typename _IterTag> + void + replace_if_switch(_FIter, _FIter, _Predicate, const _Tp&, _IterTag); + + template<typename _RAIter, typename _Predicate, typename _Tp> + void + replace_if_switch(_RAIter, _RAIter, _Predicate, const _Tp&, + random_access_iterator_tag, + __gnu_parallel::_Parallelism); + + + template<typename _FIter> + _FIter + max_element(_FIter, _FIter); + + template<typename _FIter> + _FIter + max_element(_FIter, _FIter, __gnu_parallel::sequential_tag); + + template<typename _FIter> + _FIter + max_element(_FIter, _FIter, __gnu_parallel::_Parallelism); + + template<typename _FIter, typename _Compare> + _FIter + max_element(_FIter, _FIter, _Compare); + + template<typename _FIter, typename _Compare> + _FIter + max_element(_FIter, _FIter, _Compare, __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Compare> + _FIter + max_element(_FIter, _FIter, _Compare, __gnu_parallel::_Parallelism); + + template<typename _FIter, typename _Compare, typename _IterTag> + _FIter + max_element_switch(_FIter, _FIter, _Compare, _IterTag); + + template<typename _RAIter, typename _Compare> + _RAIter + max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism + = __gnu_parallel::parallel_balanced); + + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Compare> + _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Compare> + _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Compare, typename _IterTag1, typename _IterTag2, + typename _IterTag3> + _OIter + merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare, + _IterTag1, _IterTag2, _IterTag3); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Compare> + _OIter + merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare, + random_access_iterator_tag, random_access_iterator_tag, + random_access_iterator_tag); + + + template<typename _FIter> + _FIter + min_element(_FIter, _FIter); + + template<typename _FIter> + _FIter + min_element(_FIter, _FIter, __gnu_parallel::sequential_tag); + + template<typename _FIter> + _FIter + min_element(_FIter, _FIter, __gnu_parallel::_Parallelism parallelism_tag); + + template<typename _FIter, typename _Compare> + _FIter + min_element(_FIter, _FIter, _Compare); + + template<typename _FIter, typename _Compare> + _FIter + min_element(_FIter, _FIter, _Compare, __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Compare> + _FIter + min_element(_FIter, _FIter, _Compare, __gnu_parallel::_Parallelism); + + template<typename _FIter, typename _Compare, typename _IterTag> + _FIter + min_element_switch(_FIter, _FIter, _Compare, _IterTag); + + template<typename _RAIter, typename _Compare> + _RAIter + min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism + = __gnu_parallel::parallel_balanced); + + template<typename _RAIter> + void + nth_element(_RAIter, _RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template<typename _RAIter, typename _Compare> + void + nth_element(_RAIter, _RAIter, _RAIter, _Compare, + __gnu_parallel::sequential_tag); + + template<typename _RAIter, typename _Compare> + void + nth_element(_RAIter, _RAIter, _RAIter, _Compare); + + template<typename _RAIter> + void + nth_element(_RAIter, _RAIter, _RAIter); + + template<typename _RAIter, typename _Compare> + void + partial_sort(_RAIter, _RAIter, _RAIter, _Compare, + __gnu_parallel::sequential_tag); + + template<typename _RAIter> + void + partial_sort(_RAIter, _RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template<typename _RAIter, typename _Compare> + void + partial_sort(_RAIter, _RAIter, _RAIter, _Compare); + + template<typename _RAIter> + void + partial_sort(_RAIter, _RAIter, _RAIter); + + template<typename _FIter, typename Predicate> + _FIter + partition(_FIter, _FIter, Predicate, __gnu_parallel::sequential_tag); + + template<typename _FIter, typename Predicate> + _FIter + partition(_FIter, _FIter, Predicate); + + template<typename _FIter, typename Predicate, typename _IterTag> + _FIter + partition_switch(_FIter, _FIter, Predicate, _IterTag); + + template<typename _RAIter, typename Predicate> + _RAIter + partition_switch(_RAIter, _RAIter, Predicate, random_access_iterator_tag); + + template<typename _RAIter> + void + random_shuffle(_RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template<typename _RAIter, typename _RandomNumberGenerator> + void + random_shuffle(_RAIter, _RAIter, _RandomNumberGenerator&, + __gnu_parallel::sequential_tag); + + template<typename _RAIter> + void + random_shuffle(_RAIter, _RAIter); + + template<typename _RAIter, typename _RandomNumberGenerator> + void + random_shuffle(_RAIter, _RAIter, _RandomNumberGenerator&); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename Predicate> + _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate); + + template<typename _IIter1, typename _IIter2, typename _Predicate, + typename _OIter, typename _IterTag1, typename _IterTag2, + typename _IterTag3> + _OIter + set_union_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + _Predicate, _IterTag1, _IterTag2, _IterTag3); + + template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter, + typename _Predicate> + _Output_RAIter + set_union_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, _Output_RAIter, + _Predicate, random_access_iterator_tag, + random_access_iterator_tag, random_access_iterator_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate); + + template<typename _IIter1, typename _IIter2, typename _Predicate, + typename _OIter, typename _IterTag1, typename _IterTag2, + typename _IterTag3> + _OIter + set_intersection_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + _Predicate, _IterTag1, _IterTag2, _IterTag3); + + template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter, + typename _Predicate> + _Output_RAIter + set_intersection_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, + _Output_RAIter, _Predicate, + random_access_iterator_tag, + random_access_iterator_tag, + random_access_iterator_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + _Predicate, __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + _Predicate); + + template<typename _IIter1, typename _IIter2, typename _Predicate, + typename _OIter, typename _IterTag1, typename _IterTag2, + typename _IterTag3> + _OIter + set_symmetric_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2, + _OIter, _Predicate, _IterTag1, _IterTag2, + _IterTag3); + + template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter, + typename _Predicate> + _Output_RAIter + set_symmetric_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, + _Output_RAIter, _Predicate, + random_access_iterator_tag, + random_access_iterator_tag, + random_access_iterator_tag); + + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate); + + template<typename _IIter1, typename _IIter2, typename _Predicate, + typename _OIter, typename _IterTag1, typename _IterTag2, + typename _IterTag3> + _OIter + set_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + _Predicate, _IterTag1, _IterTag2, _IterTag3); + + template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter, + typename _Predicate> + _Output_RAIter + set_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, + _Output_RAIter, _Predicate, + random_access_iterator_tag, + random_access_iterator_tag, + random_access_iterator_tag); + + + template<typename _RAIter> + void + sort(_RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template<typename _RAIter, typename _Compare> + void + sort(_RAIter, _RAIter, _Compare, __gnu_parallel::sequential_tag); + + template<typename _RAIter> + void + sort(_RAIter, _RAIter); + + template<typename _RAIter, typename _Compare> + void + sort(_RAIter, _RAIter, _Compare); + + template<typename _RAIter> + void + stable_sort(_RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template<typename _RAIter, typename _Compare> + void + stable_sort(_RAIter, _RAIter, _Compare, __gnu_parallel::sequential_tag); + + template<typename _RAIter> + void + stable_sort(_RAIter, _RAIter); + + template<typename _RAIter, typename _Compare> + void + stable_sort(_RAIter, _RAIter, _Compare); + + template<typename _IIter, typename _OIter> + _OIter + unique_copy(_IIter, _IIter, _OIter, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter, typename _Predicate> + _OIter + unique_copy(_IIter, _IIter, _OIter, _Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter> + _OIter + unique_copy(_IIter, _IIter, _OIter); + + template<typename _IIter, typename _OIter, typename _Predicate> + _OIter + unique_copy(_IIter, _IIter, _OIter, _Predicate); + + template<typename _IIter, typename _OIter, typename _Predicate, + typename _IterTag1, typename _IterTag2> + _OIter + unique_copy_switch(_IIter, _IIter, _OIter, _Predicate, + _IterTag1, _IterTag2); + + template<typename _RAIter, typename _RandomAccess_OIter, typename _Predicate> + _RandomAccess_OIter + unique_copy_switch(_RAIter, _RAIter, _RandomAccess_OIter, _Predicate, + random_access_iterator_tag, random_access_iterator_tag); +} // end namespace __parallel +} // end namespace std + +#endif /* _GLIBCXX_PARALLEL_ALGORITHMFWD_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/balanced_quicksort.h b/gcc-4.4.0/libstdc++-v3/include/parallel/balanced_quicksort.h new file mode 100644 index 000000000..2310110bb --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/balanced_quicksort.h @@ -0,0 +1,477 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/balanced_quicksort.h + * @brief Implementation of a dynamically load-balanced parallel quicksort. + * + * It works in-place and needs only logarithmic extra memory. + * The algorithm is similar to the one proposed in + * + * P. Tsigas and Y. Zhang. + * A simple, fast parallel implementation of quicksort and + * its performance evaluation on SUN enterprise 10000. + * In 11th Euromicro Conference on Parallel, Distributed and + * Network-Based Processing, page 372, 2003. + * + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H +#define _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 1 + +#include <parallel/basic_iterator.h> +#include <bits/stl_algo.h> + +#include <parallel/settings.h> +#include <parallel/partition.h> +#include <parallel/random_number.h> +#include <parallel/queue.h> +#include <functional> + +#if _GLIBCXX_ASSERTIONS +#include <parallel/checkers.h> +#endif + +namespace __gnu_parallel +{ +/** @brief Information local to one thread in the parallel quicksort run. */ +template<typename RandomAccessIterator> + struct QSBThreadLocal + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::difference_type difference_type; + + /** @brief Continuous part of the sequence, described by an + iterator pair. */ + typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece; + + /** @brief Initial piece to work on. */ + Piece initial; + + /** @brief Work-stealing queue. */ + RestrictedBoundedConcurrentQueue<Piece> leftover_parts; + + /** @brief Number of threads involved in this algorithm. */ + thread_index_t num_threads; + + /** @brief Pointer to a counter of elements left over to sort. */ + volatile difference_type* elements_leftover; + + /** @brief The complete sequence to sort. */ + Piece global; + + /** @brief Constructor. + * @param queue_size Size of the work-stealing queue. */ + QSBThreadLocal(int queue_size) : leftover_parts(queue_size) { } + }; + +/** @brief Balanced quicksort divide step. + * @param begin Begin iterator of subsequence. + * @param end End iterator of subsequence. + * @param comp Comparator. + * @param num_threads Number of threads that are allowed to work on + * this part. + * @pre @c (end-begin)>=1 */ +template<typename RandomAccessIterator, typename Comparator> + typename std::iterator_traits<RandomAccessIterator>::difference_type + qsb_divide(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, thread_index_t num_threads) + { + _GLIBCXX_PARALLEL_ASSERT(num_threads > 0); + + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + RandomAccessIterator pivot_pos = + median_of_three_iterators(begin, begin + (end - begin) / 2, + end - 1, comp); + +#if defined(_GLIBCXX_ASSERTIONS) + // Must be in between somewhere. + difference_type n = end - begin; + + _GLIBCXX_PARALLEL_ASSERT( + (!comp(*pivot_pos, *begin) && !comp(*(begin + n / 2), *pivot_pos)) + || (!comp(*pivot_pos, *begin) && !comp(*(end - 1), *pivot_pos)) + || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*begin, *pivot_pos)) + || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*(end - 1), *pivot_pos)) + || (!comp(*pivot_pos, *(end - 1)) && !comp(*begin, *pivot_pos)) + || (!comp(*pivot_pos, *(end - 1)) && !comp(*(begin + n / 2), *pivot_pos))); +#endif + + // Swap pivot value to end. + if (pivot_pos != (end - 1)) + std::swap(*pivot_pos, *(end - 1)); + pivot_pos = end - 1; + + __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> + pred(comp, *pivot_pos); + + // Divide, returning end - begin - 1 in the worst case. + difference_type split_pos = parallel_partition( + begin, end - 1, pred, num_threads); + + // Swap back pivot to middle. + std::swap(*(begin + split_pos), *pivot_pos); + pivot_pos = begin + split_pos; + +#if _GLIBCXX_ASSERTIONS + RandomAccessIterator r; + for (r = begin; r != pivot_pos; ++r) + _GLIBCXX_PARALLEL_ASSERT(comp(*r, *pivot_pos)); + for (; r != end; ++r) + _GLIBCXX_PARALLEL_ASSERT(!comp(*r, *pivot_pos)); +#endif + + return split_pos; + } + +/** @brief Quicksort conquer step. + * @param tls Array of thread-local storages. + * @param begin Begin iterator of subsequence. + * @param end End iterator of subsequence. + * @param comp Comparator. + * @param iam Number of the thread processing this function. + * @param num_threads + * Number of threads that are allowed to work on this part. */ +template<typename RandomAccessIterator, typename Comparator> + void + qsb_conquer(QSBThreadLocal<RandomAccessIterator>** tls, + RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, + thread_index_t iam, thread_index_t num_threads, + bool parent_wait) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + + if (num_threads <= 1 || n <= 1) + { + tls[iam]->initial.first = begin; + tls[iam]->initial.second = end; + + qsb_local_sort_with_helping(tls, comp, iam, parent_wait); + + return; + } + + // Divide step. + difference_type split_pos = qsb_divide(begin, end, comp, num_threads); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(0 <= split_pos && split_pos < (end - begin)); +#endif + + thread_index_t num_threads_leftside = + std::max<thread_index_t>(1, std::min<thread_index_t>( + num_threads - 1, split_pos * num_threads / n)); + +# pragma omp atomic + *tls[iam]->elements_leftover -= (difference_type)1; + + // Conquer step. +# pragma omp parallel num_threads(2) + { + bool wait; + if(omp_get_num_threads() < 2) + wait = false; + else + wait = parent_wait; + +# pragma omp sections + { +# pragma omp section + { + qsb_conquer(tls, begin, begin + split_pos, comp, + iam, + num_threads_leftside, + wait); + wait = parent_wait; + } + // The pivot_pos is left in place, to ensure termination. +# pragma omp section + { + qsb_conquer(tls, begin + split_pos + 1, end, comp, + iam + num_threads_leftside, + num_threads - num_threads_leftside, + wait); + wait = parent_wait; + } + } + } + } + +/** + * @brief Quicksort step doing load-balanced local sort. + * @param tls Array of thread-local storages. + * @param comp Comparator. + * @param iam Number of the thread processing this function. + */ +template<typename RandomAccessIterator, typename Comparator> + void + qsb_local_sort_with_helping(QSBThreadLocal<RandomAccessIterator>** tls, + Comparator& comp, int iam, bool wait) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece; + + QSBThreadLocal<RandomAccessIterator>& tl = *tls[iam]; + + difference_type base_case_n = + _Settings::get().sort_qsb_base_case_maximal_n; + if (base_case_n < 2) + base_case_n = 2; + thread_index_t num_threads = tl.num_threads; + + // Every thread has its own random number generator. + random_number rng(iam + 1); + + Piece current = tl.initial; + + difference_type elements_done = 0; +#if _GLIBCXX_ASSERTIONS + difference_type total_elements_done = 0; +#endif + + for (;;) + { + // Invariant: current must be a valid (maybe empty) range. + RandomAccessIterator begin = current.first, end = current.second; + difference_type n = end - begin; + + if (n > base_case_n) + { + // Divide. + RandomAccessIterator pivot_pos = begin + rng(n); + + // Swap pivot_pos value to end. + if (pivot_pos != (end - 1)) + std::swap(*pivot_pos, *(end - 1)); + pivot_pos = end - 1; + + __gnu_parallel::binder2nd + <Comparator, value_type, value_type, bool> + pred(comp, *pivot_pos); + + // Divide, leave pivot unchanged in last place. + RandomAccessIterator split_pos1, split_pos2; + split_pos1 = __gnu_sequential::partition(begin, end - 1, pred); + + // Left side: < pivot_pos; right side: >= pivot_pos. +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(begin <= split_pos1 && split_pos1 < end); +#endif + // Swap pivot back to middle. + if (split_pos1 != pivot_pos) + std::swap(*split_pos1, *pivot_pos); + pivot_pos = split_pos1; + + // In case all elements are equal, split_pos1 == 0. + if ((split_pos1 + 1 - begin) < (n >> 7) + || (end - split_pos1) < (n >> 7)) + { + // Very unequal split, one part smaller than one 128th + // elements not strictly larger than the pivot. + __gnu_parallel::unary_negate<__gnu_parallel::binder1st + <Comparator, value_type, value_type, bool>, value_type> + pred(__gnu_parallel::binder1st + <Comparator, value_type, value_type, bool>(comp, + *pivot_pos)); + + // Find other end of pivot-equal range. + split_pos2 = __gnu_sequential::partition(split_pos1 + 1, + end, pred); + } + else + // Only skip the pivot. + split_pos2 = split_pos1 + 1; + + // Elements equal to pivot are done. + elements_done += (split_pos2 - split_pos1); +#if _GLIBCXX_ASSERTIONS + total_elements_done += (split_pos2 - split_pos1); +#endif + // Always push larger part onto stack. + if (((split_pos1 + 1) - begin) < (end - (split_pos2))) + { + // Right side larger. + if ((split_pos2) != end) + tl.leftover_parts.push_front(std::make_pair(split_pos2, + end)); + + //current.first = begin; //already set anyway + current.second = split_pos1; + continue; + } + else + { + // Left side larger. + if (begin != split_pos1) + tl.leftover_parts.push_front(std::make_pair(begin, + split_pos1)); + + current.first = split_pos2; + //current.second = end; //already set anyway + continue; + } + } + else + { + __gnu_sequential::sort(begin, end, comp); + elements_done += n; +#if _GLIBCXX_ASSERTIONS + total_elements_done += n; +#endif + + // Prefer own stack, small pieces. + if (tl.leftover_parts.pop_front(current)) + continue; + +# pragma omp atomic + *tl.elements_leftover -= elements_done; + + elements_done = 0; + +#if _GLIBCXX_ASSERTIONS + double search_start = omp_get_wtime(); +#endif + + // Look for new work. + bool successfully_stolen = false; + while (wait && *tl.elements_leftover > 0 && !successfully_stolen +#if _GLIBCXX_ASSERTIONS + // Possible dead-lock. + && (omp_get_wtime() < (search_start + 1.0)) +#endif + ) + { + thread_index_t victim; + victim = rng(num_threads); + + // Large pieces. + successfully_stolen = (victim != iam) + && tls[victim]->leftover_parts.pop_back(current); + if (!successfully_stolen) + yield(); +#if !defined(__ICC) && !defined(__ECC) +# pragma omp flush +#endif + } + +#if _GLIBCXX_ASSERTIONS + if (omp_get_wtime() >= (search_start + 1.0)) + { + sleep(1); + _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime() + < (search_start + 1.0)); + } +#endif + if (!successfully_stolen) + { +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(*tl.elements_leftover == 0); +#endif + return; + } + } + } + } + +/** @brief Top-level quicksort routine. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param comp Comparator. + * @param num_threads Number of threads that are allowed to work on + * this part. + */ +template<typename RandomAccessIterator, typename Comparator> + void + parallel_sort_qsb(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, + thread_index_t num_threads) + { + _GLIBCXX_CALL(end - begin) + + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece; + + typedef QSBThreadLocal<RandomAccessIterator> tls_type; + + difference_type n = end - begin; + + if (n <= 1) + return; + + // At least one element per processor. + if (num_threads > n) + num_threads = static_cast<thread_index_t>(n); + + // Initialize thread local storage + tls_type** tls = new tls_type*[num_threads]; + difference_type queue_size = num_threads * (thread_index_t)(log2(n) + 1); + for (thread_index_t t = 0; t < num_threads; ++t) + tls[t] = new QSBThreadLocal<RandomAccessIterator>(queue_size); + + // There can never be more than ceil(log2(n)) ranges on the stack, because + // 1. Only one processor pushes onto the stack + // 2. The largest range has at most length n + // 3. Each range is larger than half of the range remaining + volatile difference_type elements_leftover = n; + for (int i = 0; i < num_threads; ++i) + { + tls[i]->elements_leftover = &elements_leftover; + tls[i]->num_threads = num_threads; + tls[i]->global = std::make_pair(begin, end); + + // Just in case nothing is left to assign. + tls[i]->initial = std::make_pair(end, end); + } + + // Main recursion call. + qsb_conquer(tls, begin, begin + n, comp, 0, num_threads, true); + +#if _GLIBCXX_ASSERTIONS + // All stack must be empty. + Piece dummy; + for (int i = 1; i < num_threads; ++i) + _GLIBCXX_PARALLEL_ASSERT(!tls[i]->leftover_parts.pop_back(dummy)); +#endif + + for (int i = 0; i < num_threads; ++i) + delete tls[i]; + delete[] tls; + } +} // namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/base.h b/gcc-4.4.0/libstdc++-v3/include/parallel/base.h new file mode 100644 index 000000000..7f855dfca --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/base.h @@ -0,0 +1,471 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/base.h + * @brief Sequential helper functions. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_BASE_H +#define _GLIBCXX_PARALLEL_BASE_H 1 + +#include <functional> +#include <omp.h> +#include <parallel/features.h> +#include <parallel/basic_iterator.h> +#include <parallel/parallel.h> + + +// Parallel mode namespaces. + +/** + * @namespace std::__parallel + * @brief GNU parallel code, replaces standard behavior with parallel behavior. + */ +namespace std +{ + namespace __parallel { } +} + +/** + * @namespace __gnu_parallel + * @brief GNU parallel code for public use. + */ +namespace __gnu_parallel +{ + // Import all the parallel versions of components in namespace std. + using namespace std::__parallel; +} + +/** + * @namespace __gnu_sequential + * @brief GNU sequential classes for public use. + */ +namespace __gnu_sequential +{ + // Import whatever is the serial version. +#ifdef _GLIBCXX_PARALLEL + using namespace std::__norm; +#else + using namespace std; +#endif +} + + +namespace __gnu_parallel +{ + // NB: Including this file cannot produce (unresolved) symbols from + // the OpenMP runtime unless the parallel mode is actually invoked + // and active, which imples that the OpenMP runtime is actually + // going to be linked in. + inline int + get_max_threads() + { + int __i = omp_get_max_threads(); + return __i > 1 ? __i : 1; + } + + + inline bool + is_parallel(const _Parallelism __p) { return __p != sequential; } + + + // XXX remove std::duplicates from here if possible, + // XXX but keep minimal dependencies. + +/** @brief Calculates the rounded-down logarithm of @c n for base 2. + * @param n Argument. + * @return Returns 0 for any argument <1. + */ +template<typename Size> + inline Size + __log2(Size n) + { + Size k; + for (k = 0; n > 1; n >>= 1) + ++k; + return k; + } + +/** @brief Encode two integers into one __gnu_parallel::lcas_t. + * @param a First integer, to be encoded in the most-significant @c + * lcas_t_bits/2 bits. + * @param b Second integer, to be encoded in the least-significant + * @c lcas_t_bits/2 bits. + * @return __gnu_parallel::lcas_t value encoding @c a and @c b. + * @see decode2 + */ +inline lcas_t +encode2(int a, int b) //must all be non-negative, actually +{ + return (((lcas_t)a) << (lcas_t_bits / 2)) | (((lcas_t)b) << 0); +} + +/** @brief Decode two integers from one __gnu_parallel::lcas_t. + * @param x __gnu_parallel::lcas_t to decode integers from. + * @param a First integer, to be decoded from the most-significant + * @c lcas_t_bits/2 bits of @c x. + * @param b Second integer, to be encoded in the least-significant + * @c lcas_t_bits/2 bits of @c x. + * @see encode2 + */ +inline void +decode2(lcas_t x, int& a, int& b) +{ + a = (int)((x >> (lcas_t_bits / 2)) & lcas_t_mask); + b = (int)((x >> 0 ) & lcas_t_mask); +} + +/** @brief Equivalent to std::min. */ +template<typename T> + const T& + min(const T& a, const T& b) + { return (a < b) ? a : b; } + +/** @brief Equivalent to std::max. */ +template<typename T> + const T& + max(const T& a, const T& b) + { return (a > b) ? a : b; } + +/** @brief Constructs predicate for equality from strict weak + * ordering predicate + */ +// XXX comparator at the end, as per others +template<typename Comparator, typename T1, typename T2> + class equal_from_less : public std::binary_function<T1, T2, bool> + { + private: + Comparator& comp; + + public: + equal_from_less(Comparator& _comp) : comp(_comp) { } + + bool operator()(const T1& a, const T2& b) + { + return !comp(a, b) && !comp(b, a); + } + }; + + +/** @brief Similar to std::binder1st, + * but giving the argument types explicitly. */ +template<typename _Predicate, typename argument_type> + class unary_negate + : public std::unary_function<argument_type, bool> + { + protected: + _Predicate _M_pred; + + public: + explicit + unary_negate(const _Predicate& __x) : _M_pred(__x) { } + + bool + operator()(const argument_type& __x) + { return !_M_pred(__x); } + }; + +/** @brief Similar to std::binder1st, + * but giving the argument types explicitly. */ +template<typename _Operation, typename first_argument_type, + typename second_argument_type, typename result_type> + class binder1st + : public std::unary_function<second_argument_type, result_type> + { + protected: + _Operation op; + first_argument_type value; + + public: + binder1st(const _Operation& __x, + const first_argument_type& __y) + : op(__x), value(__y) { } + + result_type + operator()(const second_argument_type& __x) + { return op(value, __x); } + + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 109. Missing binders for non-const sequence elements + result_type + operator()(second_argument_type& __x) const + { return op(value, __x); } + }; + +/** + * @brief Similar to std::binder2nd, but giving the argument types + * explicitly. + */ +template<typename _Operation, typename first_argument_type, + typename second_argument_type, typename result_type> + class binder2nd + : public std::unary_function<first_argument_type, result_type> + { + protected: + _Operation op; + second_argument_type value; + + public: + binder2nd(const _Operation& __x, + const second_argument_type& __y) + : op(__x), value(__y) { } + + result_type + operator()(const first_argument_type& __x) const + { return op(__x, value); } + + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 109. Missing binders for non-const sequence elements + result_type + operator()(first_argument_type& __x) + { return op(__x, value); } + }; + +/** @brief Similar to std::equal_to, but allows two different types. */ +template<typename T1, typename T2> + struct equal_to : std::binary_function<T1, T2, bool> + { + bool operator()(const T1& t1, const T2& t2) const + { return t1 == t2; } + }; + +/** @brief Similar to std::less, but allows two different types. */ +template<typename T1, typename T2> + struct less : std::binary_function<T1, T2, bool> + { + bool + operator()(const T1& t1, const T2& t2) const + { return t1 < t2; } + + bool + operator()(const T2& t2, const T1& t1) const + { return t2 < t1; } + }; + +// Partial specialization for one type. Same as std::less. +template<typename _Tp> +struct less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool> + { + bool + operator()(const _Tp& __x, const _Tp& __y) const + { return __x < __y; } + }; + + + /** @brief Similar to std::plus, but allows two different types. */ +template<typename _Tp1, typename _Tp2> + struct plus : public std::binary_function<_Tp1, _Tp2, _Tp1> + { + typedef __typeof__(*static_cast<_Tp1*>(NULL) + + *static_cast<_Tp2*>(NULL)) result; + + result + operator()(const _Tp1& __x, const _Tp2& __y) const + { return __x + __y; } + }; + +// Partial specialization for one type. Same as std::plus. +template<typename _Tp> + struct plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> + { + typedef __typeof__(*static_cast<_Tp*>(NULL) + + *static_cast<_Tp*>(NULL)) result; + + result + operator()(const _Tp& __x, const _Tp& __y) const + { return __x + __y; } + }; + + +/** @brief Similar to std::multiplies, but allows two different types. */ +template<typename _Tp1, typename _Tp2> + struct multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1> + { + typedef __typeof__(*static_cast<_Tp1*>(NULL) + * *static_cast<_Tp2*>(NULL)) result; + + result + operator()(const _Tp1& __x, const _Tp2& __y) const + { return __x * __y; } + }; + +// Partial specialization for one type. Same as std::multiplies. +template<typename _Tp> + struct multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> + { + typedef __typeof__(*static_cast<_Tp*>(NULL) + * *static_cast<_Tp*>(NULL)) result; + + result + operator()(const _Tp& __x, const _Tp& __y) const + { return __x * __y; } + }; + + +template<typename T, typename _DifferenceTp> + class pseudo_sequence; + +/** @brief Iterator associated with __gnu_parallel::pseudo_sequence. + * If features the usual random-access iterator functionality. + * @param T Sequence value type. + * @param difference_type Sequence difference type. + */ +template<typename T, typename _DifferenceTp> + class pseudo_sequence_iterator + { + public: + typedef _DifferenceTp difference_type; + + private: + typedef pseudo_sequence_iterator<T, _DifferenceTp> type; + + const T& val; + difference_type pos; + + public: + pseudo_sequence_iterator(const T& val, difference_type pos) + : val(val), pos(pos) { } + + // Pre-increment operator. + type& + operator++() + { + ++pos; + return *this; + } + + // Post-increment operator. + const type + operator++(int) + { return type(pos++); } + + const T& + operator*() const + { return val; } + + const T& + operator[](difference_type) const + { return val; } + + bool + operator==(const type& i2) + { return pos == i2.pos; } + + difference_type + operator!=(const type& i2) + { return pos != i2.pos; } + + difference_type + operator-(const type& i2) + { return pos - i2.pos; } + }; + +/** @brief Sequence that conceptually consists of multiple copies of + the same element. + * The copies are not stored explicitly, of course. + * @param T Sequence value type. + * @param difference_type Sequence difference type. + */ +template<typename T, typename _DifferenceTp> + class pseudo_sequence + { + typedef pseudo_sequence<T, _DifferenceTp> type; + + public: + typedef _DifferenceTp difference_type; + + // Better case down to uint64, than up to _DifferenceTp. + typedef pseudo_sequence_iterator<T, uint64> iterator; + + /** @brief Constructor. + * @param val Element of the sequence. + * @param count Number of (virtual) copies. + */ + pseudo_sequence(const T& val, difference_type count) + : val(val), count(count) { } + + /** @brief Begin iterator. */ + iterator + begin() const + { return iterator(val, 0); } + + /** @brief End iterator. */ + iterator + end() const + { return iterator(val, count); } + + private: + const T& val; + difference_type count; + }; + +/** @brief Functor that does nothing */ +template<typename _ValueTp> + class void_functor + { + inline void + operator()(const _ValueTp& v) const { } + }; + +/** @brief Compute the median of three referenced elements, + according to @c comp. + * @param a First iterator. + * @param b Second iterator. + * @param c Third iterator. + * @param comp Comparator. + */ +template<typename RandomAccessIterator, typename Comparator> + RandomAccessIterator + median_of_three_iterators(RandomAccessIterator a, RandomAccessIterator b, + RandomAccessIterator c, Comparator& comp) + { + if (comp(*a, *b)) + if (comp(*b, *c)) + return b; + else + if (comp(*a, *c)) + return c; + else + return a; + else + { + // Just swap a and b. + if (comp(*a, *c)) + return a; + else + if (comp(*b, *c)) + return c; + else + return b; + } + } + +#define _GLIBCXX_PARALLEL_ASSERT(_Condition) __glibcxx_assert(_Condition) + +} //namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_BASE_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/basic_iterator.h b/gcc-4.4.0/libstdc++-v3/include/parallel/basic_iterator.h new file mode 100644 index 000000000..c5c067a0b --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/basic_iterator.h @@ -0,0 +1,42 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/basic_iterator.h + * @brief Includes the original header files concerned with iterators + * except for stream iterators. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_BASIC_ITERATOR_H +#define _GLIBCXX_PARALLEL_BASIC_ITERATOR_H 1 + +#include <bits/c++config.h> +#include <cstddef> +#include <bits/stl_iterator_base_types.h> +#include <bits/stl_iterator_base_funcs.h> +#include <bits/stl_iterator.h> + +#endif /* _GLIBCXX_PARALLEL_BASIC_ITERATOR_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/checkers.h b/gcc-4.4.0/libstdc++-v3/include/parallel/checkers.h new file mode 100644 index 000000000..819e8ad26 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/checkers.h @@ -0,0 +1,154 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/checkers.h + * @brief Routines for checking the correctness of algorithm results. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_CHECKERS_H +#define _GLIBCXX_PARALLEL_CHECKERS_H 1 + +#include <functional> +#include <cstdio> +#include <bits/stl_algobase.h> + +namespace __gnu_parallel +{ + /** + * @brief Check whether @c [begin, @c end) is sorted according to @c comp. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param comp Comparator. + * @return @c true if sorted, @c false otherwise. + */ + // XXX Comparator default template argument + template<typename InputIterator, typename Comparator> + bool + is_sorted(InputIterator begin, InputIterator end, + Comparator comp + = std::less<typename std::iterator_traits<InputIterator>:: + value_type>()) + { + if (begin == end) + return true; + + InputIterator current(begin), recent(begin); + + unsigned long long position = 1; + for (current++; current != end; current++) + { + if (comp(*current, *recent)) + { + printf("is_sorted: check failed before position %i.\n", + position); + return false; + } + recent = current; + position++; + } + + return true; + } + + /** + * @brief Check whether @c [begin, @c end) is sorted according to @c comp. + * Prints the position in case an unordered pair is found. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param first_failure The first failure is returned in this variable. + * @param comp Comparator. + * @return @c true if sorted, @c false otherwise. + */ + // XXX Comparator default template argument + template<typename InputIterator, typename Comparator> + bool + is_sorted_failure(InputIterator begin, InputIterator end, + InputIterator& first_failure, + Comparator comp + = std::less<typename std::iterator_traits<InputIterator>:: + value_type>()) + { + if (begin == end) + return true; + + InputIterator current(begin), recent(begin); + + unsigned long long position = 1; + for (current++; current != end; current++) + { + if (comp(*current, *recent)) + { + first_failure = current; + printf("is_sorted: check failed before position %lld.\n", + position); + return false; + } + recent = current; + position++; + } + + first_failure = end; + return true; + } + + /** + * @brief Check whether @c [begin, @c end) is sorted according to @c comp. + * Prints all unordered pair, including the surrounding two elements. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param comp Comparator. + * @return @c true if sorted, @c false otherwise. + */ + template<typename InputIterator, typename Comparator> + bool + // XXX Comparator default template argument + is_sorted_print_failures(InputIterator begin, InputIterator end, + Comparator comp + = std::less<typename std::iterator_traits + <InputIterator>::value_type>()) + { + if (begin == end) + return true; + + InputIterator recent(begin); + bool ok = true; + + for (InputIterator pos(begin + 1); pos != end; pos++) + { + if (comp(*pos, *recent)) + { + printf("%ld: %d %d %d %d\n", pos - begin, *(pos - 2), + *(pos- 1), *pos, *(pos + 1)); + ok = false; + } + recent = pos; + } + return ok; + } +} + +#endif /* _GLIBCXX_PARALLEL_CHECKERS_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/compatibility.h b/gcc-4.4.0/libstdc++-v3/include/parallel/compatibility.h new file mode 100644 index 000000000..a5726dd35 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/compatibility.h @@ -0,0 +1,350 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/compatibility.h + * @brief Compatibility layer, mostly concerned with atomic operations. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H +#define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1 + +#include <parallel/types.h> +#include <parallel/base.h> + +#if defined(__SUNPRO_CC) && defined(__sparc) +#include <sys/atomic.h> +#endif + +#if !defined(_WIN32) || defined (__CYGWIN__) +#include <sched.h> +#endif + +#if defined(_MSC_VER) +#include <Windows.h> +#include <intrin.h> +#undef max +#undef min +#endif + +#ifdef __MINGW32__ +// Including <windows.h> will drag in all the windows32 names. Since +// that can cause user code portability problems, we just declare the +// one needed function here. +extern "C" +__attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long); +#endif + +namespace __gnu_parallel +{ +#if defined(__ICC) + template<typename must_be_int = int> + int32 faa32(int32* x, int32 inc) + { + asm volatile("lock xadd %0,%1" + : "=r" (inc), "=m" (*x) + : "0" (inc) + : "memory"); + return inc; + } +#if defined(__x86_64) + template<typename must_be_int = int> + int64 faa64(int64* x, int64 inc) + { + asm volatile("lock xadd %0,%1" + : "=r" (inc), "=m" (*x) + : "0" (inc) + : "memory"); + return inc; + } +#endif +#endif + + // atomic functions only work on integers + + /** @brief Add a value to a variable, atomically. + * + * Implementation is heavily platform-dependent. + * @param ptr Pointer to a 32-bit signed integer. + * @param addend Value to add. + */ + inline int32 + fetch_and_add_32(volatile int32* ptr, int32 addend) + { +#if defined(__ICC) //x86 version + return _InterlockedExchangeAdd((void*)ptr, addend); +#elif defined(__ECC) //IA-64 version + return _InterlockedExchangeAdd((void*)ptr, addend); +#elif defined(__ICL) || defined(_MSC_VER) + return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(ptr), + addend); +#elif defined(__GNUC__) + return __sync_fetch_and_add(ptr, addend); +#elif defined(__SUNPRO_CC) && defined(__sparc) + volatile int32 before, after; + do + { + before = *ptr; + after = before + addend; + } while (atomic_cas_32((volatile unsigned int*)ptr, before, + after) != before); + return before; +#else //fallback, slow +#pragma message("slow fetch_and_add_32") + int32 res; +#pragma omp critical + { + res = *ptr; + *(ptr) += addend; + } + return res; +#endif + } + + /** @brief Add a value to a variable, atomically. + * + * Implementation is heavily platform-dependent. + * @param ptr Pointer to a 64-bit signed integer. + * @param addend Value to add. + */ + inline int64 + fetch_and_add_64(volatile int64* ptr, int64 addend) + { +#if defined(__ICC) && defined(__x86_64) //x86 version + return faa64<int>((int64*)ptr, addend); +#elif defined(__ECC) //IA-64 version + return _InterlockedExchangeAdd64((void*)ptr, addend); +#elif defined(__ICL) || defined(_MSC_VER) +#ifndef _WIN64 + _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case + return 0; +#else + return _InterlockedExchangeAdd64(ptr, addend); +#endif +#elif defined(__GNUC__) && defined(__x86_64) + return __sync_fetch_and_add(ptr, addend); +#elif defined(__GNUC__) && defined(__i386) && \ + (defined(__i686) || defined(__pentium4) || defined(__athlon)) + return __sync_fetch_and_add(ptr, addend); +#elif defined(__SUNPRO_CC) && defined(__sparc) + volatile int64 before, after; + do + { + before = *ptr; + after = before + addend; + } while (atomic_cas_64((volatile unsigned long long*)ptr, before, + after) != before); + return before; +#else //fallback, slow +#if defined(__GNUC__) && defined(__i386) + // XXX doesn't work with -march=native + //#warning "please compile with -march=i686 or better" +#endif +#pragma message("slow fetch_and_add_64") + int64 res; +#pragma omp critical + { + res = *ptr; + *(ptr) += addend; + } + return res; +#endif + } + + /** @brief Add a value to a variable, atomically. + * + * Implementation is heavily platform-dependent. + * @param ptr Pointer to a signed integer. + * @param addend Value to add. + */ + template<typename T> + inline T + fetch_and_add(volatile T* ptr, T addend) + { + if (sizeof(T) == sizeof(int32)) + return (T)fetch_and_add_32((volatile int32*) ptr, (int32)addend); + else if (sizeof(T) == sizeof(int64)) + return (T)fetch_and_add_64((volatile int64*) ptr, (int64)addend); + else + _GLIBCXX_PARALLEL_ASSERT(false); + } + + +#if defined(__ICC) + + template<typename must_be_int = int> + inline int32 + cas32(volatile int32* ptr, int32 old, int32 nw) + { + int32 before; + __asm__ __volatile__("lock; cmpxchgl %1,%2" + : "=a"(before) + : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old) + : "memory"); + return before; + } + +#if defined(__x86_64) + template<typename must_be_int = int> + inline int64 + cas64(volatile int64 *ptr, int64 old, int64 nw) + { + int64 before; + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a"(before) + : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old) + : "memory"); + return before; + } +#endif + +#endif + + /** @brief Compare @c *ptr and @c comparand. If equal, let @c + * *ptr=replacement and return @c true, return @c false otherwise. + * + * Implementation is heavily platform-dependent. + * @param ptr Pointer to 32-bit signed integer. + * @param comparand Compare value. + * @param replacement Replacement value. + */ + inline bool + compare_and_swap_32(volatile int32* ptr, int32 comparand, int32 replacement) + { +#if defined(__ICC) //x86 version + return _InterlockedCompareExchange((void*)ptr, replacement, + comparand) == comparand; +#elif defined(__ECC) //IA-64 version + return _InterlockedCompareExchange((void*)ptr, replacement, + comparand) == comparand; +#elif defined(__ICL) || defined(_MSC_VER) + return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), + replacement, comparand) == comparand; +#elif defined(__GNUC__) + return __sync_bool_compare_and_swap(ptr, comparand, replacement); +#elif defined(__SUNPRO_CC) && defined(__sparc) + return atomic_cas_32((volatile unsigned int*)ptr, comparand, + replacement) == comparand; +#else +#pragma message("slow compare_and_swap_32") + bool res = false; +#pragma omp critical + { + if (*ptr == comparand) + { + *ptr = replacement; + res = true; + } + } + return res; +#endif + } + + /** @brief Compare @c *ptr and @c comparand. If equal, let @c + * *ptr=replacement and return @c true, return @c false otherwise. + * + * Implementation is heavily platform-dependent. + * @param ptr Pointer to 64-bit signed integer. + * @param comparand Compare value. + * @param replacement Replacement value. + */ + inline bool + compare_and_swap_64(volatile int64* ptr, int64 comparand, int64 replacement) + { +#if defined(__ICC) && defined(__x86_64) //x86 version + return cas64<int>(ptr, comparand, replacement) == comparand; +#elif defined(__ECC) //IA-64 version + return _InterlockedCompareExchange64((void*)ptr, replacement, + comparand) == comparand; +#elif defined(__ICL) || defined(_MSC_VER) +#ifndef _WIN64 + _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case + return 0; +#else + return _InterlockedCompareExchange64(ptr, replacement, + comparand) == comparand; +#endif + +#elif defined(__GNUC__) && defined(__x86_64) + return __sync_bool_compare_and_swap(ptr, comparand, replacement); +#elif defined(__GNUC__) && defined(__i386) && \ + (defined(__i686) || defined(__pentium4) || defined(__athlon)) + return __sync_bool_compare_and_swap(ptr, comparand, replacement); +#elif defined(__SUNPRO_CC) && defined(__sparc) + return atomic_cas_64((volatile unsigned long long*)ptr, + comparand, replacement) == comparand; +#else +#if defined(__GNUC__) && defined(__i386) + // XXX -march=native + //#warning "please compile with -march=i686 or better" +#endif +#pragma message("slow compare_and_swap_64") + bool res = false; +#pragma omp critical + { + if (*ptr == comparand) + { + *ptr = replacement; + res = true; + } + } + return res; +#endif + } + + /** @brief Compare @c *ptr and @c comparand. If equal, let @c + * *ptr=replacement and return @c true, return @c false otherwise. + * + * Implementation is heavily platform-dependent. + * @param ptr Pointer to signed integer. + * @param comparand Compare value. + * @param replacement Replacement value. */ + template<typename T> + inline bool + compare_and_swap(volatile T* ptr, T comparand, T replacement) + { + if (sizeof(T) == sizeof(int32)) + return compare_and_swap_32((volatile int32*) ptr, (int32)comparand, (int32)replacement); + else if (sizeof(T) == sizeof(int64)) + return compare_and_swap_64((volatile int64*) ptr, (int64)comparand, (int64)replacement); + else + _GLIBCXX_PARALLEL_ASSERT(false); + } + + /** @brief Yield the control to another thread, without waiting for + the end to the time slice. */ + inline void + yield() + { +#if defined (_WIN32) && !defined (__CYGWIN__) + Sleep(0); +#else + sched_yield(); +#endif + } +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_COMPATIBILITY_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/compiletime_settings.h b/gcc-4.4.0/libstdc++-v3/include/parallel/compiletime_settings.h new file mode 100644 index 000000000..e7e31d068 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/compiletime_settings.h @@ -0,0 +1,75 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/compiletime_settings.h + * @brief Defines on options concerning debugging and performance, at + * compile-time. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#include <cstdio> + +/** @brief Determine verbosity level of the parallel mode. + * Level 1 prints a message each time a parallel-mode function is entered. */ +#define _GLIBCXX_VERBOSE_LEVEL 0 + +/** @def _GLIBCXX_CALL + * @brief Macro to produce log message when entering a function. + * @param n Input size. + * @see _GLIBCXX_VERBOSE_LEVEL */ +#if (_GLIBCXX_VERBOSE_LEVEL == 0) +#define _GLIBCXX_CALL(n) +#endif +#if (_GLIBCXX_VERBOSE_LEVEL == 1) +#define _GLIBCXX_CALL(n) \ + printf(" %s:\niam = %d, n = %ld, num_threads = %d\n", \ + __PRETTY_FUNCTION__, omp_get_thread_num(), (n), get_max_threads()); +#endif + +#ifndef _GLIBCXX_SCALE_DOWN_FPU +/** @brief Use floating-point scaling instead of modulo for mapping + * random numbers to a range. This can be faster on certain CPUs. */ +#define _GLIBCXX_SCALE_DOWN_FPU 0 +#endif + +#ifndef _GLIBCXX_ASSERTIONS +/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. + * Should be switched on only locally. */ +#define _GLIBCXX_ASSERTIONS 0 +#endif + +#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 +/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. + * Consider the size of the L1 cache for + * __gnu_parallel::parallel_random_shuffle(). */ +#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0 +#endif +#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB +/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. + * Consider the size of the TLB for + * __gnu_parallel::parallel_random_shuffle(). */ +#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0 +#endif diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/equally_split.h b/gcc-4.4.0/libstdc++-v3/include/parallel/equally_split.h new file mode 100644 index 000000000..ee4bfeeeb --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/equally_split.h @@ -0,0 +1,87 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/equally_split.h + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H +#define _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H 1 + +namespace __gnu_parallel +{ +/** @brief Function to split a sequence into parts of almost equal size. + * + * The resulting sequence s of length num_threads+1 contains the splitting + * positions when splitting the range [0,n) into parts of almost + * equal size (plus minus 1). The first entry is 0, the last one + * n. There may result empty parts. + * @param n Number of elements + * @param num_threads Number of parts + * @param s Splitters + * @returns End of splitter sequence, i. e. @c s+num_threads+1 */ +template<typename difference_type, typename OutputIterator> + OutputIterator + equally_split(difference_type n, thread_index_t num_threads, OutputIterator s) + { + difference_type chunk_length = n / num_threads; + difference_type num_longer_chunks = n % num_threads; + difference_type pos = 0; + for (thread_index_t i = 0; i < num_threads; ++i) + { + *s++ = pos; + pos += (i < num_longer_chunks) ? (chunk_length + 1) : chunk_length; + } + *s++ = n; + return s; + } + + +/** @brief Function to split a sequence into parts of almost equal size. + * + * Returns the position of the splitting point between + * thread number thread_no (included) and + * thread number thread_no+1 (excluded). + * @param n Number of elements + * @param num_threads Number of parts + * @returns _SplittingAlgorithm point */ +template<typename difference_type> + difference_type + equally_split_point(difference_type n, + thread_index_t num_threads, + thread_index_t thread_no) + { + difference_type chunk_length = n / num_threads; + difference_type num_longer_chunks = n % num_threads; + if (thread_no < num_longer_chunks) + return thread_no * (chunk_length + 1); + else + return num_longer_chunks * (chunk_length + 1) + + (thread_no - num_longer_chunks) * chunk_length; + } +} + +#endif /* _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/features.h b/gcc-4.4.0/libstdc++-v3/include/parallel/features.h new file mode 100644 index 000000000..417606ea7 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/features.h @@ -0,0 +1,104 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/features.h + * @brief Defines on whether to include algorithm variants. + * + * Less variants reduce executable size and compile time. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_FEATURES_H +#define _GLIBCXX_PARALLEL_FEATURES_H 1 + +#ifndef _GLIBCXX_MERGESORT +/** @def _GLIBCXX_MERGESORT + * @brief Include parallel multi-way mergesort. + * @see __gnu_parallel::_Settings::sort_algorithm */ +#define _GLIBCXX_MERGESORT 1 +#endif + +#ifndef _GLIBCXX_QUICKSORT +/** @def _GLIBCXX_QUICKSORT + * @brief Include parallel unbalanced quicksort. + * @see __gnu_parallel::_Settings::sort_algorithm */ +#define _GLIBCXX_QUICKSORT 1 +#endif + +#ifndef _GLIBCXX_BAL_QUICKSORT +/** @def _GLIBCXX_BAL_QUICKSORT + * @brief Include parallel dynamically load-balanced quicksort. + * @see __gnu_parallel::_Settings::sort_algorithm */ +#define _GLIBCXX_BAL_QUICKSORT 1 +#endif + +#ifndef _GLIBCXX_FIND_GROWING_BLOCKS +/** @brief Include the growing blocks variant for std::find. + * @see __gnu_parallel::_Settings::find_algorithm */ +#define _GLIBCXX_FIND_GROWING_BLOCKS 1 +#endif + +#ifndef _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS +/** @brief Include the equal-sized blocks variant for std::find. + * @see __gnu_parallel::_Settings::find_algorithm */ +#define _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS 1 +#endif + +#ifndef _GLIBCXX_FIND_EQUAL_SPLIT +/** @def _GLIBCXX_FIND_EQUAL_SPLIT + * @brief Include the equal splitting variant for std::find. + * @see __gnu_parallel::_Settings::find_algorithm */ +#define _GLIBCXX_FIND_EQUAL_SPLIT 1 +#endif + + +#ifndef _GLIBCXX_TREE_INITIAL_SPLITTING +/** @def _GLIBCXX_TREE_INITIAL_SPLITTING + * @brief Include the initial splitting variant for + * _Rb_tree::insert_unique(InputIterator beg, InputIterator end). + * @see __gnu_parallel::_Rb_tree */ +#define _GLIBCXX_TREE_INITIAL_SPLITTING 1 +#endif + +#ifndef _GLIBCXX_TREE_DYNAMIC_BALANCING +/** @def _GLIBCXX_TREE_DYNAMIC_BALANCING + * @brief Include the dynamic balancing variant for + * _Rb_tree::insert_unique(InputIterator beg, InputIterator end). + * @see __gnu_parallel::_Rb_tree */ +#define _GLIBCXX_TREE_DYNAMIC_BALANCING 1 +#endif + +#ifndef _GLIBCXX_TREE_FULL_COPY +/** @def _GLIBCXX_TREE_FULL_COPY + * @brief In order to sort the input sequence of + * _Rb_tree::insert_unique(InputIterator beg, InputIterator end) a + * full copy of the input elements is done. + * @see __gnu_parallel::_Rb_tree */ +#define _GLIBCXX_TREE_FULL_COPY 1 +#endif + + +#endif /* _GLIBCXX_PARALLEL_FEATURES_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/find.h b/gcc-4.4.0/libstdc++-v3/include/parallel/find.h new file mode 100644 index 000000000..0597cc58e --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/find.h @@ -0,0 +1,401 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/find.h + * @brief Parallel implementation base for std::find(), std::equal() + * and related functions. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze and Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_FIND_H +#define _GLIBCXX_PARALLEL_FIND_H 1 + +#include <bits/stl_algobase.h> + +#include <parallel/features.h> +#include <parallel/parallel.h> +#include <parallel/compatibility.h> +#include <parallel/equally_split.h> + +namespace __gnu_parallel +{ +/** + * @brief Parallel std::find, switch for different algorithms. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. Must have same + * length as first sequence. + * @param pred Find predicate. + * @param selector Functionality (e. g. std::find_if (), std::equal(),...) + * @return Place of finding in both sequences. + */ +template<typename RandomAccessIterator1, + typename RandomAccessIterator2, + typename Pred, + typename Selector> + inline std::pair<RandomAccessIterator1, RandomAccessIterator2> + find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, Pred pred, Selector selector) + { + switch (_Settings::get().find_algorithm) + { + case GROWING_BLOCKS: + return find_template(begin1, end1, begin2, pred, selector, + growing_blocks_tag()); + case CONSTANT_SIZE_BLOCKS: + return find_template(begin1, end1, begin2, pred, selector, + constant_size_blocks_tag()); + case EQUAL_SPLIT: + return find_template(begin1, end1, begin2, pred, selector, + equal_split_tag()); + default: + _GLIBCXX_PARALLEL_ASSERT(false); + return std::make_pair(begin1, begin2); + } + } + +#if _GLIBCXX_FIND_EQUAL_SPLIT + +/** + * @brief Parallel std::find, equal splitting variant. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. Second sequence + * must have same length as first sequence. + * @param pred Find predicate. + * @param selector Functionality (e. g. std::find_if (), std::equal(),...) + * @return Place of finding in both sequences. + */ +template<typename RandomAccessIterator1, + typename RandomAccessIterator2, + typename Pred, + typename Selector> + std::pair<RandomAccessIterator1, RandomAccessIterator2> + find_template(RandomAccessIterator1 begin1, + RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, + Pred pred, + Selector selector, + equal_split_tag) + { + _GLIBCXX_CALL(end1 - begin1) + + typedef std::iterator_traits<RandomAccessIterator1> traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename traits_type::value_type value_type; + + difference_type length = end1 - begin1; + difference_type result = length; + difference_type* borders; + + omp_lock_t result_lock; + omp_init_lock(&result_lock); + + thread_index_t num_threads = get_max_threads(); +# pragma omp parallel num_threads(num_threads) + { +# pragma omp single + { + num_threads = omp_get_num_threads(); + borders = new difference_type[num_threads + 1]; + equally_split(length, num_threads, borders); + } //single + + thread_index_t iam = omp_get_thread_num(); + difference_type start = borders[iam], stop = borders[iam + 1]; + + RandomAccessIterator1 i1 = begin1 + start; + RandomAccessIterator2 i2 = begin2 + start; + for (difference_type pos = start; pos < stop; ++pos) + { + #pragma omp flush(result) + // Result has been set to something lower. + if (result < pos) + break; + + if (selector(i1, i2, pred)) + { + omp_set_lock(&result_lock); + if (pos < result) + result = pos; + omp_unset_lock(&result_lock); + break; + } + ++i1; + ++i2; + } + } //parallel + + omp_destroy_lock(&result_lock); + delete[] borders; + + return + std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, + begin2 + result); + } + +#endif + +#if _GLIBCXX_FIND_GROWING_BLOCKS + +/** + * @brief Parallel std::find, growing block size variant. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. Second sequence + * must have same length as first sequence. + * @param pred Find predicate. + * @param selector Functionality (e. g. std::find_if (), std::equal(),...) + * @return Place of finding in both sequences. + * @see __gnu_parallel::_Settings::find_sequential_search_size + * @see __gnu_parallel::_Settings::find_initial_block_size + * @see __gnu_parallel::_Settings::find_maximum_block_size + * @see __gnu_parallel::_Settings::find_increasing_factor + * + * There are two main differences between the growing blocks and + * the constant-size blocks variants. + * 1. For GB, the block size grows; for CSB, the block size is fixed. + + * 2. For GB, the blocks are allocated dynamically; + * for CSB, the blocks are allocated in a predetermined manner, + * namely spacial round-robin. + */ +template<typename RandomAccessIterator1, + typename RandomAccessIterator2, + typename Pred, + typename Selector> + std::pair<RandomAccessIterator1, RandomAccessIterator2> + find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, Pred pred, Selector selector, + growing_blocks_tag) + { + _GLIBCXX_CALL(end1 - begin1) + + typedef std::iterator_traits<RandomAccessIterator1> traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename traits_type::value_type value_type; + + const _Settings& __s = _Settings::get(); + + difference_type length = end1 - begin1; + + difference_type sequential_search_size = + std::min<difference_type>(length, __s.find_sequential_search_size); + + // Try it sequentially first. + std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result = + selector.sequential_algorithm( + begin1, begin1 + sequential_search_size, begin2, pred); + + if (find_seq_result.first != (begin1 + sequential_search_size)) + return find_seq_result; + + // Index of beginning of next free block (after sequential find). + difference_type next_block_start = sequential_search_size; + difference_type result = length; + + omp_lock_t result_lock; + omp_init_lock(&result_lock); + + thread_index_t num_threads = get_max_threads(); +# pragma omp parallel shared(result) num_threads(num_threads) + { +# pragma omp single + num_threads = omp_get_num_threads(); + + // Not within first k elements -> start parallel. + thread_index_t iam = omp_get_thread_num(); + + difference_type block_size = __s.find_initial_block_size; + difference_type start = + fetch_and_add<difference_type>(&next_block_start, block_size); + + // Get new block, update pointer to next block. + difference_type stop = + std::min<difference_type>(length, start + block_size); + + std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result; + + while (start < length) + { +# pragma omp flush(result) + // Get new value of result. + if (result < start) + { + // No chance to find first element. + break; + } + + local_result = selector.sequential_algorithm( + begin1 + start, begin1 + stop, begin2 + start, pred); + if (local_result.first != (begin1 + stop)) + { + omp_set_lock(&result_lock); + if ((local_result.first - begin1) < result) + { + result = local_result.first - begin1; + + // Result cannot be in future blocks, stop algorithm. + fetch_and_add<difference_type>(&next_block_start, length); + } + omp_unset_lock(&result_lock); + } + + block_size = + std::min<difference_type>(block_size * __s.find_increasing_factor, + __s.find_maximum_block_size); + + // Get new block, update pointer to next block. + start = + fetch_and_add<difference_type>(&next_block_start, block_size); + stop = ((length < (start + block_size)) + ? length : (start + block_size)); + } + } //parallel + + omp_destroy_lock(&result_lock); + + // Return iterator on found element. + return + std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, + begin2 + result); + } + +#endif + +#if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS + +/** + * @brief Parallel std::find, constant block size variant. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. Second sequence + * must have same length as first sequence. + * @param pred Find predicate. + * @param selector Functionality (e. g. std::find_if (), std::equal(),...) + * @return Place of finding in both sequences. + * @see __gnu_parallel::_Settings::find_sequential_search_size + * @see __gnu_parallel::_Settings::find_block_size + * There are two main differences between the growing blocks and the + * constant-size blocks variants. + * 1. For GB, the block size grows; for CSB, the block size is fixed. + * 2. For GB, the blocks are allocated dynamically; for CSB, the + * blocks are allocated in a predetermined manner, namely spacial + * round-robin. + */ +template<typename RandomAccessIterator1, + typename RandomAccessIterator2, + typename Pred, + typename Selector> + std::pair<RandomAccessIterator1, RandomAccessIterator2> + find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, Pred pred, Selector selector, + constant_size_blocks_tag) + { + _GLIBCXX_CALL(end1 - begin1) + typedef std::iterator_traits<RandomAccessIterator1> traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename traits_type::value_type value_type; + + const _Settings& __s = _Settings::get(); + + difference_type length = end1 - begin1; + + difference_type sequential_search_size = std::min<difference_type>( + length, __s.find_sequential_search_size); + + // Try it sequentially first. + std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result = + selector.sequential_algorithm(begin1, begin1 + sequential_search_size, + begin2, pred); + + if (find_seq_result.first != (begin1 + sequential_search_size)) + return find_seq_result; + + difference_type result = length; + omp_lock_t result_lock; + omp_init_lock(&result_lock); + + // Not within first sequential_search_size elements -> start parallel. + + thread_index_t num_threads = get_max_threads(); +# pragma omp parallel shared(result) num_threads(num_threads) + { +# pragma omp single + num_threads = omp_get_num_threads(); + + thread_index_t iam = omp_get_thread_num(); + difference_type block_size = __s.find_initial_block_size; + + // First element of thread's current iteration. + difference_type iteration_start = sequential_search_size; + + // Where to work (initialization). + difference_type start = iteration_start + iam * block_size; + difference_type stop = + std::min<difference_type>(length, start + block_size); + + std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result; + + while (start < length) + { + // Get new value of result. +# pragma omp flush(result) + // No chance to find first element. + if (result < start) + break; + local_result = selector.sequential_algorithm( + begin1 + start, begin1 + stop, + begin2 + start, pred); + if (local_result.first != (begin1 + stop)) + { + omp_set_lock(&result_lock); + if ((local_result.first - begin1) < result) + result = local_result.first - begin1; + omp_unset_lock(&result_lock); + // Will not find better value in its interval. + break; + } + + iteration_start += num_threads * block_size; + + // Where to work. + start = iteration_start + iam * block_size; + stop = std::min<difference_type>(length, start + block_size); + } + } //parallel + + omp_destroy_lock(&result_lock); + + // Return iterator on found element. + return + std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, + begin2 + result); + } +#endif +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_FIND_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/find_selectors.h b/gcc-4.4.0/libstdc++-v3/include/parallel/find_selectors.h new file mode 100644 index 000000000..3cbc1b80e --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/find_selectors.h @@ -0,0 +1,192 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/find_selectors.h + * @brief Function objects representing different tasks to be plugged + * into the parallel find algorithm. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_FIND_SELECTORS_H +#define _GLIBCXX_PARALLEL_FIND_SELECTORS_H 1 + +#include <parallel/tags.h> +#include <parallel/basic_iterator.h> +#include <bits/stl_pair.h> + +namespace __gnu_parallel +{ + /** @brief Base class of all __gnu_parallel::find_template selectors. */ + struct generic_find_selector + { }; + + /** + * @brief Test predicate on a single element, used for std::find() + * and std::find_if (). + */ + struct find_if_selector : public generic_find_selector + { + /** @brief Test on one position. + * @param i1 Iterator on first sequence. + * @param i2 Iterator on second sequence (unused). + * @param pred Find predicate. + */ + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename Pred> + bool + operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred) + { return pred(*i1); } + + /** @brief Corresponding sequential algorithm on a sequence. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param pred Find predicate. + */ + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename Pred> + std::pair<RandomAccessIterator1, RandomAccessIterator2> + sequential_algorithm(RandomAccessIterator1 begin1, + RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, Pred pred) + { return std::make_pair(find_if(begin1, end1, pred, + sequential_tag()), begin2); } + }; + + /** @brief Test predicate on two adjacent elements. */ + struct adjacent_find_selector : public generic_find_selector + { + /** @brief Test on one position. + * @param i1 Iterator on first sequence. + * @param i2 Iterator on second sequence (unused). + * @param pred Find predicate. + */ + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename Pred> + bool + operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred) + { + // Passed end iterator is one short. + return pred(*i1, *(i1 + 1)); + } + + /** @brief Corresponding sequential algorithm on a sequence. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param pred Find predicate. + */ + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename Pred> + std::pair<RandomAccessIterator1, RandomAccessIterator2> + sequential_algorithm(RandomAccessIterator1 begin1, + RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, Pred pred) + { + // Passed end iterator is one short. + RandomAccessIterator1 spot = adjacent_find(begin1, end1 + 1, + pred, sequential_tag()); + if (spot == (end1 + 1)) + spot = end1; + return std::make_pair(spot, begin2); + } + }; + + /** @brief Test inverted predicate on a single element. */ + struct mismatch_selector : public generic_find_selector + { + /** + * @brief Test on one position. + * @param i1 Iterator on first sequence. + * @param i2 Iterator on second sequence (unused). + * @param pred Find predicate. + */ + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename Pred> + bool + operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred) + { return !pred(*i1, *i2); } + + /** + * @brief Corresponding sequential algorithm on a sequence. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param pred Find predicate. + */ + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename Pred> + std::pair<RandomAccessIterator1, RandomAccessIterator2> + sequential_algorithm(RandomAccessIterator1 begin1, + RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, Pred pred) + { return mismatch(begin1, end1, begin2, pred, sequential_tag()); } + }; + + + /** @brief Test predicate on several elements. */ + template<typename ForwardIterator> + struct find_first_of_selector : public generic_find_selector + { + ForwardIterator begin; + ForwardIterator end; + + explicit find_first_of_selector(ForwardIterator begin, ForwardIterator end) + : begin(begin), end(end) { } + + /** @brief Test on one position. + * @param i1 Iterator on first sequence. + * @param i2 Iterator on second sequence (unused). + * @param pred Find predicate. */ + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename Pred> + bool + operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred) + { + for (ForwardIterator pos_in_candidates = begin; + pos_in_candidates != end; ++pos_in_candidates) + if (pred(*i1, *pos_in_candidates)) + return true; + return false; + } + + /** @brief Corresponding sequential algorithm on a sequence. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param pred Find predicate. */ + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename Pred> + std::pair<RandomAccessIterator1, RandomAccessIterator2> + sequential_algorithm(RandomAccessIterator1 begin1, + RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, Pred pred) + { return std::make_pair(find_first_of(begin1, end1, begin, end, pred, + sequential_tag()), begin2); } + }; +} + +#endif /* _GLIBCXX_PARALLEL_FIND_SELECTORS_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/for_each.h b/gcc-4.4.0/libstdc++-v3/include/parallel/for_each.h new file mode 100644 index 000000000..61158f865 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/for_each.h @@ -0,0 +1,97 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/for_each.h + * @brief Main interface for embarrassingly parallel functions. + * + * The explicit implementation are in other header files, like + * workstealing.h, par_loop.h, omp_loop.h, and omp_loop_static.h. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_FOR_EACH_H +#define _GLIBCXX_PARALLEL_FOR_EACH_H 1 + +#include <parallel/settings.h> +#include <parallel/par_loop.h> +#include <parallel/omp_loop.h> +#include <parallel/workstealing.h> + +namespace __gnu_parallel +{ + /** @brief Chose the desired algorithm by evaluating @c parallelism_tag. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param user_op A user-specified functor (comparator, predicate, + * associative operator,...) + * @param functionality functor to "process" an element with + * user_op (depends on desired functionality, e. g. accumulate, + * for_each,... + * @param reduction Reduction functor. + * @param reduction_start Initial value for reduction. + * @param output Output iterator. + * @param bound Maximum number of elements processed. + * @param parallelism_tag Parallelization method */ + template<typename InputIterator, typename UserOp, + typename Functionality, typename Red, typename Result> + UserOp + for_each_template_random_access(InputIterator begin, InputIterator end, + UserOp user_op, + Functionality& functionality, + Red reduction, Result reduction_start, + Result& output, typename + std::iterator_traits<InputIterator>:: + difference_type bound, + _Parallelism parallelism_tag) + { + if (parallelism_tag == parallel_unbalanced) + return for_each_template_random_access_ed(begin, end, user_op, + functionality, reduction, + reduction_start, + output, bound); + else if (parallelism_tag == parallel_omp_loop) + return for_each_template_random_access_omp_loop(begin, end, user_op, + functionality, + reduction, + reduction_start, + output, bound); + else if (parallelism_tag == parallel_omp_loop_static) + return for_each_template_random_access_omp_loop(begin, end, user_op, + functionality, + reduction, + reduction_start, + output, bound); + else //e. g. parallel_balanced + return for_each_template_random_access_workstealing(begin, end, + user_op, + functionality, + reduction, + reduction_start, + output, bound); + } +} + +#endif /* _GLIBCXX_PARALLEL_FOR_EACH_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/for_each_selectors.h b/gcc-4.4.0/libstdc++-v3/include/parallel/for_each_selectors.h new file mode 100644 index 000000000..b46d30f51 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/for_each_selectors.h @@ -0,0 +1,360 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/for_each_selectors.h + * @brief Functors representing different tasks to be plugged into the + * generic parallelization methods for embarrassingly parallel functions. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_FOR_EACH_SELECTORS_H +#define _GLIBCXX_PARALLEL_FOR_EACH_SELECTORS_H 1 + +#include <parallel/basic_iterator.h> + +namespace __gnu_parallel +{ + + /** @brief Generic selector for embarrassingly parallel functions. */ + template<typename It> + struct generic_for_each_selector + { + /** @brief Iterator on last element processed; needed for some + * algorithms (e. g. std::transform()). + */ + It finish_iterator; + }; + + + /** @brief std::for_each() selector. */ + template<typename It> + struct for_each_selector : public generic_for_each_selector<It> + { + /** @brief Functor execution. + * @param o Operator. + * @param i Iterator referencing object. */ + template<typename Op> + bool + operator()(Op& o, It i) + { + o(*i); + return true; + } + }; + + /** @brief std::generate() selector. */ + template<typename It> + struct generate_selector : public generic_for_each_selector<It> + { + /** @brief Functor execution. + * @param o Operator. + * @param i Iterator referencing object. */ + template<typename Op> + bool + operator()(Op& o, It i) + { + *i = o(); + return true; + } + }; + + /** @brief std::fill() selector. */ + template<typename It> + struct fill_selector : public generic_for_each_selector<It> + { + /** @brief Functor execution. + * @param v Current value. + * @param i Iterator referencing object. */ + template<typename Val> + bool + operator()(Val& v, It i) + { + *i = v; + return true; + } + }; + + /** @brief std::transform() selector, one input sequence variant. */ + template<typename It> + struct transform1_selector : public generic_for_each_selector<It> + { + /** @brief Functor execution. + * @param o Operator. + * @param i Iterator referencing object. */ + template<typename Op> + bool + operator()(Op& o, It i) + { + *i.second = o(*i.first); + return true; + } + }; + + /** @brief std::transform() selector, two input sequences variant. */ + template<typename It> + struct transform2_selector : public generic_for_each_selector<It> + { + /** @brief Functor execution. + * @param o Operator. + * @param i Iterator referencing object. */ + template<typename Op> + bool + operator()(Op& o, It i) + { + *i.third = o(*i.first, *i.second); + return true; + } + }; + + /** @brief std::replace() selector. */ + template<typename It, typename T> + struct replace_selector : public generic_for_each_selector<It> + { + /** @brief Value to replace with. */ + const T& new_val; + + /** @brief Constructor + * @param new_val Value to replace with. */ + explicit + replace_selector(const T &new_val) : new_val(new_val) {} + + /** @brief Functor execution. + * @param v Current value. + * @param i Iterator referencing object. */ + bool + operator()(T& v, It i) + { + if (*i == v) + *i = new_val; + return true; + } + }; + + /** @brief std::replace() selector. */ + template<typename It, typename Op, typename T> + struct replace_if_selector : public generic_for_each_selector<It> + { + /** @brief Value to replace with. */ + const T& new_val; + + /** @brief Constructor. + * @param new_val Value to replace with. */ + explicit + replace_if_selector(const T &new_val) : new_val(new_val) { } + + /** @brief Functor execution. + * @param o Operator. + * @param i Iterator referencing object. */ + bool + operator()(Op& o, It i) + { + if (o(*i)) + *i = new_val; + return true; + } + }; + + /** @brief std::count() selector. */ + template<typename It, typename Diff> + struct count_selector : public generic_for_each_selector<It> + { + /** @brief Functor execution. + * @param v Current value. + * @param i Iterator referencing object. + * @return 1 if count, 0 if does not count. */ + template<typename Val> + Diff + operator()(Val& v, It i) + { return (v == *i) ? 1 : 0; } + }; + + /** @brief std::count_if () selector. */ + template<typename It, typename Diff> + struct count_if_selector : public generic_for_each_selector<It> + { + /** @brief Functor execution. + * @param o Operator. + * @param i Iterator referencing object. + * @return 1 if count, 0 if does not count. */ + template<typename Op> + Diff + operator()(Op& o, It i) + { return (o(*i)) ? 1 : 0; } + }; + + /** @brief std::accumulate() selector. */ + template<typename It> + struct accumulate_selector : public generic_for_each_selector<It> + { + /** @brief Functor execution. + * @param o Operator (unused). + * @param i Iterator referencing object. + * @return The current value. */ + template<typename Op> + typename std::iterator_traits<It>::value_type operator()(Op o, It i) + { return *i; } + }; + + /** @brief std::inner_product() selector. */ + template<typename It, typename It2, typename T> + struct inner_product_selector : public generic_for_each_selector<It> + { + /** @brief Begin iterator of first sequence. */ + It begin1_iterator; + + /** @brief Begin iterator of second sequence. */ + It2 begin2_iterator; + + /** @brief Constructor. + * @param b1 Begin iterator of first sequence. + * @param b2 Begin iterator of second sequence. */ + explicit + inner_product_selector(It b1, It2 b2) + : begin1_iterator(b1), begin2_iterator(b2) { } + + /** @brief Functor execution. + * @param mult Multiplication functor. + * @param current Iterator referencing object. + * @return Inner product elemental result. */ + template<typename Op> + T + operator()(Op mult, It current) + { + typename std::iterator_traits<It>::difference_type position + = current - begin1_iterator; + return mult(*current, *(begin2_iterator + position)); + } + }; + + /** @brief Selector that just returns the passed iterator. */ + template<typename It> + struct identity_selector : public generic_for_each_selector<It> + { + /** @brief Functor execution. + * @param o Operator (unused). + * @param i Iterator referencing object. + * @return Passed iterator. */ + template<typename Op> + It + operator()(Op o, It i) + { return i; } + }; + + /** @brief Selector that returns the difference between two adjacent + * elements. + */ + template<typename It> + struct adjacent_difference_selector : public generic_for_each_selector<It> + { + template<typename Op> + bool + operator()(Op& o, It i) + { + typename It::first_type go_back_one = i.first; + --go_back_one; + *i.second = o(*i.first, *go_back_one); + return true; + } + }; + + // XXX move into type_traits? + /** @brief Functor doing nothing + * + * For some reduction tasks (this is not a function object, but is + * passed as selector dummy parameter. + */ + struct nothing + { + /** @brief Functor execution. + * @param i Iterator referencing object. */ + template<typename It> + void + operator()(It i) { } + }; + + /** @brief Reduction function doing nothing. */ + struct dummy_reduct + { + bool + operator()(bool /*x*/, bool /*y*/) const + { return true; } + }; + + /** @brief Reduction for finding the maximum element, using a comparator. */ + template<typename Comp, typename It> + struct min_element_reduct + { + Comp& comp; + + explicit + min_element_reduct(Comp &c) : comp(c) { } + + It + operator()(It x, It y) + { + if (comp(*x, *y)) + return x; + else + return y; + } + }; + + /** @brief Reduction for finding the maximum element, using a comparator. */ + template<typename Comp, typename It> + struct max_element_reduct + { + Comp& comp; + + explicit + max_element_reduct(Comp& c) : comp(c) { } + + It + operator()(It x, It y) + { + if (comp(*x, *y)) + return y; + else + return x; + } + }; + + /** @brief General reduction, using a binary operator. */ + template<typename BinOp> + struct accumulate_binop_reduct + { + BinOp& binop; + + explicit + accumulate_binop_reduct(BinOp& b) : binop(b) { } + + template<typename Result, typename Addend> + Result + operator()(const Result& x, const Addend& y) + { return binop(x, y); } + }; +} + +#endif /* _GLIBCXX_PARALLEL_FOR_EACH_SELECTORS_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/iterator.h b/gcc-4.4.0/libstdc++-v3/include/parallel/iterator.h new file mode 100644 index 000000000..c9bfd5a2f --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/iterator.h @@ -0,0 +1,200 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/iterator.h + * @brief Helper iterator classes for the std::transform() functions. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_ITERATOR_H +#define _GLIBCXX_PARALLEL_ITERATOR_H 1 + +#include <parallel/basic_iterator.h> +#include <bits/stl_pair.h> + +namespace __gnu_parallel +{ + /** @brief A pair of iterators. The usual iterator operations are + * applied to both child iterators. + */ + template<typename Iterator1, typename Iterator2, typename IteratorCategory> + class iterator_pair : public std::pair<Iterator1, Iterator2> + { + private: + typedef iterator_pair<Iterator1, Iterator2, IteratorCategory> type; + typedef std::pair<Iterator1, Iterator2> base_type; + + public: + typedef IteratorCategory iterator_category; + typedef void value_type; + + typedef std::iterator_traits<Iterator1> traits_type; + typedef typename traits_type::difference_type difference_type; + typedef type* pointer; + typedef type& reference; + + iterator_pair() { } + + iterator_pair(const Iterator1& first, const Iterator2& second) + : base_type(first, second) { } + + // Pre-increment operator. + type& + operator++() + { + ++base_type::first; + ++base_type::second; + return *this; + } + + // Post-increment operator. + const type + operator++(int) + { return type(base_type::first++, base_type::second++); } + + // Pre-decrement operator. + type& + operator--() + { + --base_type::first; + --base_type::second; + return *this; + } + + // Post-decrement operator. + const type + operator--(int) + { return type(base_type::first--, base_type::second--); } + + // Type conversion. + operator Iterator2() const + { return base_type::second; } + + type& + operator=(const type& other) + { + base_type::first = other.first; + base_type::second = other.second; + return *this; + } + + type + operator+(difference_type delta) const + { return type(base_type::first + delta, base_type::second + delta); } + + difference_type + operator-(const type& other) const + { return base_type::first - other.first; } + }; + + + /** @brief A triple of iterators. The usual iterator operations are + applied to all three child iterators. + */ + template<typename Iterator1, typename Iterator2, typename Iterator3, + typename IteratorCategory> + class iterator_triple + { + private: + typedef iterator_triple<Iterator1, Iterator2, Iterator3, + IteratorCategory> type; + + public: + typedef IteratorCategory iterator_category; + typedef void value_type; + typedef typename std::iterator_traits<Iterator1>::difference_type + difference_type; + typedef type* pointer; + typedef type& reference; + + Iterator1 first; + Iterator2 second; + Iterator3 third; + + iterator_triple() { } + + iterator_triple(const Iterator1& _first, const Iterator2& _second, + const Iterator3& _third) + { + first = _first; + second = _second; + third = _third; + } + + // Pre-increment operator. + type& + operator++() + { + ++first; + ++second; + ++third; + return *this; + } + + // Post-increment operator. + const type + operator++(int) + { return type(first++, second++, third++); } + + // Pre-decrement operator. + type& + operator--() + { + --first; + --second; + --third; + return *this; + } + + // Post-decrement operator. + const type + operator--(int) + { return type(first--, second--, third--); } + + // Type conversion. + operator Iterator3() const + { return third; } + + type& + operator=(const type& other) + { + first = other.first; + second = other.second; + third = other.third; + return *this; + } + + type + operator+(difference_type delta) const + { return type(first + delta, second + delta, third + delta); } + + difference_type + operator-(const type& other) const + { return first - other.first; } + }; +} + +#endif /* _GLIBCXX_PARALLEL_ITERATOR_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/list_partition.h b/gcc-4.4.0/libstdc++-v3/include/parallel/list_partition.h new file mode 100644 index 000000000..a359a3f3a --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/list_partition.h @@ -0,0 +1,176 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/list_partition.h + * @brief Functionality to split sequence referenced by only input + * iterators. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Leonor Frias Moya and Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_LIST_PARTITION_H +#define _GLIBCXX_PARALLEL_LIST_PARTITION_H 1 + +#include <parallel/parallel.h> +#include <vector> + +namespace __gnu_parallel +{ + /** @brief Shrinks and doubles the ranges. + * @param os_starts Start positions worked on (oversampled). + * @param count_to_two Counts up to 2. + * @param range_length Current length of a chunk. + * @param make_twice Whether the @c os_starts is allowed to be + * grown or not + */ + template<typename InputIterator> + void + shrink_and_double(std::vector<InputIterator>& os_starts, + size_t& count_to_two, size_t& range_length, + const bool make_twice) + { + ++count_to_two; + if (not make_twice or count_to_two < 2) + shrink(os_starts, count_to_two, range_length); + else + { + os_starts.resize((os_starts.size() - 1) * 2 + 1); + count_to_two = 0; + } + } + + /** @brief Combines two ranges into one and thus halves the number of ranges. + * @param os_starts Start positions worked on (oversampled). + * @param count_to_two Counts up to 2. + * @param range_length Current length of a chunk. */ + template<typename InputIterator> + void + shrink(std::vector<InputIterator>& os_starts, size_t& count_to_two, + size_t& range_length) + { + for (typename std::vector<InputIterator>::size_type i = 0; + i <= (os_starts.size() / 2); ++i) + os_starts[i] = os_starts[i * 2]; + range_length *= 2; + } + + /** @brief Splits a sequence given by input iterators into parts of + * almost equal size + * + * The function needs only one pass over the sequence. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param starts Start iterators for the resulting parts, dimension + * @c num_parts+1. For convenience, @c starts @c [num_parts] + * contains the end iterator of the sequence. + * @param lengths Length of the resulting parts. + * @param num_parts Number of parts to split the sequence into. + * @param f Functor to be applied to each element by traversing it + * @param oversampling Oversampling factor. If 0, then the + * partitions will differ in at most @f$ \sqrt{\mathrm{end} - + * \mathrm{begin}} @f$ elements. Otherwise, the ratio between the + * longest and the shortest part is bounded by @f$ + * 1/(\mathrm{oversampling} \cdot \mathrm{num\_parts}) @f$. + * @return Length of the whole sequence. + */ + template<typename InputIterator, typename FunctorType> + size_t + list_partition(const InputIterator begin, const InputIterator end, + InputIterator* starts, size_t* lengths, const int num_parts, + FunctorType& f, int oversampling = 0) + { + bool make_twice = false; + + // The resizing algorithm is chosen according to the oversampling factor. + if (oversampling == 0) + { + make_twice = true; + oversampling = 1; + } + + std::vector<InputIterator> os_starts(2 * oversampling * num_parts + 1); + + os_starts[0]= begin; + InputIterator prev = begin, it = begin; + size_t dist_limit = 0, dist = 0; + size_t cur = 1, next = 1; + size_t range_length = 1; + size_t count_to_two = 0; + while (it != end) + { + cur = next; + for (; cur < os_starts.size() and it != end; ++cur) + { + for (dist_limit += range_length; + dist < dist_limit and it != end; ++dist) + { + f(it); + ++it; + } + os_starts[cur] = it; + } + + // Must compare for end and not cur < os_starts.size() , because + // cur could be == os_starts.size() as well + if (it == end) + break; + + shrink_and_double(os_starts, count_to_two, range_length, make_twice); + next = os_starts.size() / 2 + 1; + } + + // Calculation of the parts (one must be extracted from current + // because the partition beginning at end, consists only of + // itself). + size_t size_part = (cur - 1) / num_parts; + int size_greater = static_cast<int>((cur - 1) % num_parts); + starts[0] = os_starts[0]; + + size_t index = 0; + + // Smallest partitions. + for (int i = 1; i < (num_parts + 1 - size_greater); ++i) + { + lengths[i - 1] = size_part * range_length; + index += size_part; + starts[i] = os_starts[index]; + } + + // Biggest partitions. + for (int i = num_parts + 1 - size_greater; i <= num_parts; ++i) + { + lengths[i - 1] = (size_part+1) * range_length; + index += (size_part+1); + starts[i] = os_starts[index]; + } + + // Correction of the end size (the end iteration has not finished). + lengths[num_parts - 1] -= (dist_limit - dist); + + return dist; + } +} + +#endif /* _GLIBCXX_PARALLEL_LIST_PARTITION_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/losertree.h b/gcc-4.4.0/libstdc++-v3/include/parallel/losertree.h new file mode 100644 index 000000000..6dbd59288 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/losertree.h @@ -0,0 +1,1021 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/losertree.h +* @brief Many generic loser tree variants. +* This file is a GNU parallel extension to the Standard C++ Library. +*/ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_LOSERTREE_H +#define _GLIBCXX_PARALLEL_LOSERTREE_H 1 + +#include <functional> + +#include <bits/stl_algobase.h> +#include <parallel/features.h> +#include <parallel/base.h> + +namespace __gnu_parallel +{ + +/** + * @brief Guarded loser/tournament tree. + * + * The smallest element is at the top. + * + * Guarding is done explicitly through one flag sup per element, + * inf is not needed due to a better initialization routine. This + * is a well-performing variant. + * + * @param T the element type + * @param Comparator the comparator to use, defaults to std::less<T> + */ +template<typename T, typename Comparator> +class LoserTreeBase +{ +protected: + /** @brief Internal representation of a LoserTree element. */ + struct Loser + { + /** @brief flag, true iff this is a "maximum" sentinel. */ + bool sup; + /** @brief index of the source sequence. */ + int source; + /** @brief key of the element in the LoserTree. */ + T key; + }; + + unsigned int ik, k, offset; + + /** log_2{k} */ + unsigned int _M_log_k; + + /** @brief LoserTree elements. */ + Loser* losers; + + /** @brief Comparator to use. */ + Comparator comp; + + /** + * @brief State flag that determines whether the LoserTree is empty. + * + * Only used for building the LoserTree. + */ + bool first_insert; + +public: + /** + * @brief The constructor. + * + * @param _k The number of sequences to merge. + * @param _comp The comparator to use. + */ + LoserTreeBase(unsigned int _k, Comparator _comp) + : comp(_comp) + { + ik = _k; + + // Compute log_2{k} for the Loser Tree + _M_log_k = __log2(ik - 1) + 1; + + // Next greater power of 2. + k = 1 << _M_log_k; + offset = k; + + // Avoid default-constructing losers[].key + losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser))); + for (unsigned int i = ik - 1; i < k; ++i) + losers[i + k].sup = true; + + first_insert = true; + } + + /** + * @brief The destructor. + */ + ~LoserTreeBase() + { ::operator delete(losers); } + + /** + * @brief Initializes the sequence "source" with the element "key". + * + * @param key the element to insert + * @param source index of the source sequence + * @param sup flag that determines whether the value to insert is an + * explicit supremum. + */ + inline void + insert_start(const T& key, int source, bool sup) + { + unsigned int pos = k + source; + + if(first_insert) + { + // Construct all keys, so we can easily deconstruct them. + for (unsigned int i = 0; i < (2 * k); ++i) + new(&(losers[i].key)) T(key); + first_insert = false; + } + else + new(&(losers[pos].key)) T(key); + + losers[pos].sup = sup; + losers[pos].source = source; + } + + /** + * @return the index of the sequence with the smallest element. + */ + int get_min_source() + { return losers[0].source; } +}; + +/** + * @brief Stable LoserTree variant. + * + * Provides the stable implementations of insert_start, init_winner, + * init and delete_min_insert. + * + * Unstable variant is done using partial specialisation below. + */ +template<bool stable/* default == true */, typename T, typename Comparator> +class LoserTree : public LoserTreeBase<T, Comparator> +{ + typedef LoserTreeBase<T, Comparator> Base; + using Base::k; + using Base::losers; + using Base::first_insert; + +public: + LoserTree(unsigned int _k, Comparator _comp) + : Base::LoserTreeBase(_k, _comp) + {} + + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (losers[right].sup + || (!losers[left].sup + && !comp(losers[right].key, losers[left].key))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + void init() + { losers[0] = losers[init_winner(1)]; } + + /** + * @brief Delete the smallest element and insert a new element from + * the previously smallest element's sequence. + * + * This implementation is stable. + */ + // Do not pass a const reference since key will be used as local variable. + void delete_min_insert(T key, bool sup) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if ((sup && (!losers[pos].sup || losers[pos].source < source)) + || (!sup && !losers[pos].sup + && ((comp(losers[pos].key, key)) + || (!comp(key, losers[pos].key) + && losers[pos].source < source)))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + std::swap(losers[pos].key, key); + } + } + + losers[0].sup = sup; + losers[0].source = source; + losers[0].key = key; + } +}; + +/** + * @brief Unstable LoserTree variant. + * + * Stability (non-stable here) is selected with partial specialization. + */ +template<typename T, typename Comparator> +class LoserTree</* stable == */false, T, Comparator> : + public LoserTreeBase<T, Comparator> +{ + typedef LoserTreeBase<T, Comparator> Base; + using Base::_M_log_k; + using Base::k; + using Base::losers; + using Base::first_insert; + +public: + LoserTree(unsigned int _k, Comparator _comp) + : Base::LoserTreeBase(_k, _comp) + {} + + /** + * Computes the winner of the competition at position "root". + * + * Called recursively (starting at 0) to build the initial tree. + * + * @param root index of the "game" to start. + */ + unsigned int + init_winner (unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (losers[right].sup || + (!losers[left].sup + && !comp(losers[right].key, losers[left].key))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { losers[0] = losers[init_winner(1)]; } + + /** + * Delete the key smallest element and insert the element key instead. + * + * @param key the key to insert + * @param sup true iff key is an explicitly marked supremum + */ + // Do not pass a const reference since key will be used as local variable. + inline void + delete_min_insert(T key, bool sup) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if (sup || (!losers[pos].sup && comp(losers[pos].key, key))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + std::swap(losers[pos].key, key); + } + } + + losers[0].sup = sup; + losers[0].source = source; + losers[0].key = key; + } +}; + + +/** + * @brief Base class of Loser Tree implementation using pointers. + */ +template<typename T, typename Comparator> +class LoserTreePointerBase +{ +protected: + /** @brief Internal representation of LoserTree elements. */ + struct Loser + { + bool sup; + int source; + const T* keyp; + }; + + unsigned int ik, k, offset; + Loser* losers; + Comparator comp; + +public: + LoserTreePointerBase(unsigned int _k, Comparator _comp = std::less<T>()) + : comp(_comp) + { + ik = _k; + + // Next greater power of 2. + k = 1 << (__log2(ik - 1) + 1); + offset = k; + losers = new Loser[k * 2]; + for (unsigned int i = ik - 1; i < k; i++) + losers[i + k].sup = true; + } + + ~LoserTreePointerBase() + { ::operator delete[](losers); } + + int get_min_source() + { return losers[0].source; } + + void insert_start(const T& key, int source, bool sup) + { + unsigned int pos = k + source; + + losers[pos].sup = sup; + losers[pos].source = source; + losers[pos].keyp = &key; + } +}; + +/** + * @brief Stable LoserTree implementation. + * + * The unstable variant is implemented using partial instantiation below. + */ +template<bool stable/* default == true */, typename T, typename Comparator> +class LoserTreePointer : public LoserTreePointerBase<T, Comparator> +{ + typedef LoserTreePointerBase<T, Comparator> Base; + using Base::k; + using Base::losers; + +public: + LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>()) + : Base::LoserTreePointerBase(_k, _comp) + {} + + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (losers[right].sup + || (!losers[left].sup && !comp(*losers[right].keyp, + *losers[left].keyp))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + void init() + { losers[0] = losers[init_winner(1)]; } + + void delete_min_insert(const T& key, bool sup) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + + const T* keyp = &key; + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if ((sup && (!losers[pos].sup || losers[pos].source < source)) || + (!sup && !losers[pos].sup && + ((comp(*losers[pos].keyp, *keyp)) || + (!comp(*keyp, *losers[pos].keyp) + && losers[pos].source < source)))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + std::swap(losers[pos].keyp, keyp); + } + } + + losers[0].sup = sup; + losers[0].source = source; + losers[0].keyp = keyp; + } +}; + +/** + * @brief Unstable LoserTree implementation. + * + * The stable variant is above. + */ +template<typename T, typename Comparator> +class LoserTreePointer</* stable == */false, T, Comparator> : + public LoserTreePointerBase<T, Comparator> +{ + typedef LoserTreePointerBase<T, Comparator> Base; + using Base::k; + using Base::losers; + +public: + LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>()) + : Base::LoserTreePointerBase(_k, _comp) + {} + + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (losers[right].sup + || (!losers[left].sup + && !comp(*losers[right].keyp, *losers[left].keyp))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + void init() + { losers[0] = losers[init_winner(1)]; } + + void delete_min_insert(const T& key, bool sup) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + + const T* keyp = &key; + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + std::swap(losers[pos].keyp, keyp); + } + } + + losers[0].sup = sup; + losers[0].source = source; + losers[0].keyp = keyp; + } +}; + +/** @brief Base class for unguarded LoserTree implementation. + * + * The whole element is copied into the tree structure. + * + * No guarding is done, therefore not a single input sequence must + * run empty. Unused sequence heads are marked with a sentinel which + * is > all elements that are to be merged. + * + * This is a very fast variant. + */ +template<typename T, typename Comparator> +class LoserTreeUnguardedBase +{ +protected: + struct Loser + { + int source; + T key; + }; + + unsigned int ik, k, offset; + Loser* losers; + Comparator comp; + +public: + inline + LoserTreeUnguardedBase(unsigned int _k, const T _sentinel, + Comparator _comp = std::less<T>()) + : comp(_comp) + { + ik = _k; + + // Next greater power of 2. + k = 1 << (__log2(ik - 1) + 1); + offset = k; + // Avoid default-constructing losers[].key + losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser))); + + for (unsigned int i = k + ik - 1; i < (2 * k); ++i) + { + losers[i].key = _sentinel; + losers[i].source = -1; + } + } + + inline ~LoserTreeUnguardedBase() + { ::operator delete(losers); } + + inline int + get_min_source() + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + return losers[0].source; + } + + inline void + insert_start(const T& key, int source, bool) + { + unsigned int pos = k + source; + + new(&(losers[pos].key)) T(key); + losers[pos].source = source; + } +}; + +/** + * @brief Stable implementation of unguarded LoserTree. + * + * Unstable variant is selected below with partial specialization. + */ +template<bool stable/* default == true */, typename T, typename Comparator> +class LoserTreeUnguarded : public LoserTreeUnguardedBase<T, Comparator> +{ + typedef LoserTreeUnguardedBase<T, Comparator> Base; + using Base::k; + using Base::losers; + +public: + LoserTreeUnguarded(unsigned int _k, const T _sentinel, + Comparator _comp = std::less<T>()) + : Base::LoserTreeUnguardedBase(_k, _sentinel, _comp) + {} + + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (!comp(losers[right].key, losers[left].key)) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { + losers[0] = losers[init_winner(1)]; + +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top at the beginning (0 sequences!) + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + } + + // Do not pass a const reference since key will be used as local variable. + inline void + delete_min_insert(T key, bool) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if (comp(losers[pos].key, key) + || (!comp(key, losers[pos].key) && losers[pos].source < source)) + { + // The other one is smaller. + std::swap(losers[pos].source, source); + std::swap(losers[pos].key, key); + } + } + + losers[0].source = source; + losers[0].key = key; + } +}; + +/** + * @brief Non-Stable implementation of unguarded LoserTree. + * + * Stable implementation is above. + */ +template<typename T, typename Comparator> +class LoserTreeUnguarded</* stable == */false, T, Comparator> : + public LoserTreeUnguardedBase<T, Comparator> +{ + typedef LoserTreeUnguardedBase<T, Comparator> Base; + using Base::k; + using Base::losers; + +public: + LoserTreeUnguarded(unsigned int _k, const T _sentinel, + Comparator _comp = std::less<T>()) + : Base::LoserTreeUnguardedBase(_k, _sentinel, _comp) + {} + + unsigned int + init_winner (unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + +#if _GLIBCXX_ASSERTIONS + // If left one is sentinel then right one must be, too. + if (losers[left].source == -1) + _GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1); +#endif + + if (!comp(losers[right].key, losers[left].key)) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { + losers[0] = losers[init_winner(1)]; + +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top at the beginning (0 sequences!) + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + } + + // Do not pass a const reference since key will be used as local variable. + inline void + delete_min_insert(T key, bool) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if (comp(losers[pos].key, key)) + { + // The other one is smaller. + std::swap(losers[pos].source, source); + std::swap(losers[pos].key, key); + } + } + + losers[0].source = source; + losers[0].key = key; + } +}; + +/** @brief Unguarded loser tree, keeping only pointers to the +* elements in the tree structure. +* +* No guarding is done, therefore not a single input sequence must +* run empty. This is a very fast variant. +*/ +template<typename T, typename Comparator> +class LoserTreePointerUnguardedBase +{ +protected: + struct Loser + { + int source; + const T* keyp; + }; + + unsigned int ik, k, offset; + Loser* losers; + Comparator comp; + +public: + + inline + LoserTreePointerUnguardedBase(unsigned int _k, const T& _sentinel, + Comparator _comp = std::less<T>()) + : comp(_comp) + { + ik = _k; + + // Next greater power of 2. + k = 1 << (__log2(ik - 1) + 1); + offset = k; + // Avoid default-constructing losers[].key + losers = new Loser[2 * k]; + + for (unsigned int i = k + ik - 1; i < (2 * k); ++i) + { + losers[i].keyp = &_sentinel; + losers[i].source = -1; + } + } + + inline ~LoserTreePointerUnguardedBase() + { delete[] losers; } + + inline int + get_min_source() + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + return losers[0].source; + } + + inline void + insert_start(const T& key, int source, bool) + { + unsigned int pos = k + source; + + losers[pos].keyp = &key; + losers[pos].source = source; + } +}; + +/** + * @brief Stable unguarded LoserTree variant storing pointers. + * + * Unstable variant is implemented below using partial specialization. + */ +template<bool stable/* default == true */, typename T, typename Comparator> +class LoserTreePointerUnguarded : + public LoserTreePointerUnguardedBase<T, Comparator> +{ + typedef LoserTreePointerUnguardedBase<T, Comparator> Base; + using Base::k; + using Base::losers; + +public: + LoserTreePointerUnguarded(unsigned int _k, const T& _sentinel, + Comparator _comp = std::less<T>()) + : Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp) + {} + + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (!comp(*losers[right].keyp, *losers[left].keyp)) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { + losers[0] = losers[init_winner(1)]; + +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top at the beginning (0 sequences!) + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + } + + inline void + delete_min_insert(const T& key, bool sup) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + + const T* keyp = &key; + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if (comp(*losers[pos].keyp, *keyp) + || (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source)) + { + // The other one is smaller. + std::swap(losers[pos].source, source); + std::swap(losers[pos].keyp, keyp); + } + } + + losers[0].source = source; + losers[0].keyp = keyp; + } +}; + +/** + * @brief Unstable unguarded LoserTree variant storing pointers. + * + * Stable variant is above. + */ +template<typename T, typename Comparator> +class LoserTreePointerUnguarded</* stable == */false, T, Comparator> : + public LoserTreePointerUnguardedBase<T, Comparator> +{ + typedef LoserTreePointerUnguardedBase<T, Comparator> Base; + using Base::k; + using Base::losers; + +public: + LoserTreePointerUnguarded(unsigned int _k, const T& _sentinel, + Comparator _comp = std::less<T>()) + : Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp) + {} + + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + +#if _GLIBCXX_ASSERTIONS + // If left one is sentinel then right one must be, too. + if (losers[left].source == -1) + _GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1); +#endif + + if (!comp(*losers[right].keyp, *losers[left].keyp)) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { + losers[0] = losers[init_winner(1)]; + +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top at the beginning (0 sequences!) + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + } + + inline void + delete_min_insert(const T& key, bool sup) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + + const T* keyp = &key; + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if (comp(*(losers[pos].keyp), *keyp)) + { + // The other one is smaller. + std::swap(losers[pos].source, source); + std::swap(losers[pos].keyp, keyp); + } + } + + losers[0].source = source; + losers[0].keyp = keyp; + } +}; + +} // namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_LOSERTREE_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/merge.h b/gcc-4.4.0/libstdc++-v3/include/parallel/merge.h new file mode 100644 index 000000000..d947e258a --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/merge.h @@ -0,0 +1,261 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/merge.h + * @brief Parallel implementation of std::merge(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_MERGE_H +#define _GLIBCXX_PARALLEL_MERGE_H 1 + +#include <parallel/basic_iterator.h> +#include <bits/stl_algo.h> + +namespace __gnu_parallel +{ + /** @brief Merge routine being able to merge only the @c max_length + * smallest elements. + * + * The @c begin iterators are advanced accordingly, they might not + * reach @c end, in contrast to the usual variant. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param end2 End iterator of second sequence. + * @param target Target begin iterator. + * @param max_length Maximum number of elements to merge. + * @param comp Comparator. + * @return Output end iterator. */ + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename OutputIterator, typename _DifferenceTp, + typename Comparator> + OutputIterator + merge_advance_usual(RandomAccessIterator1& begin1, + RandomAccessIterator1 end1, + RandomAccessIterator2& begin2, + RandomAccessIterator2 end2, OutputIterator target, + _DifferenceTp max_length, Comparator comp) + { + typedef _DifferenceTp difference_type; + while (begin1 != end1 && begin2 != end2 && max_length > 0) + { + // array1[i1] < array0[i0] + if (comp(*begin2, *begin1)) + *target++ = *begin2++; + else + *target++ = *begin1++; + --max_length; + } + + if (begin1 != end1) + { + target = std::copy(begin1, begin1 + max_length, target); + begin1 += max_length; + } + else + { + target = std::copy(begin2, begin2 + max_length, target); + begin2 += max_length; + } + return target; + } + + /** @brief Merge routine being able to merge only the @c max_length + * smallest elements. + * + * The @c begin iterators are advanced accordingly, they might not + * reach @c end, in contrast to the usual variant. + * Specially designed code should allow the compiler to generate + * conditional moves instead of branches. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param end2 End iterator of second sequence. + * @param target Target begin iterator. + * @param max_length Maximum number of elements to merge. + * @param comp Comparator. + * @return Output end iterator. */ + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename OutputIterator, typename _DifferenceTp, + typename Comparator> + OutputIterator + merge_advance_movc(RandomAccessIterator1& begin1, + RandomAccessIterator1 end1, + RandomAccessIterator2& begin2, + RandomAccessIterator2 end2, + OutputIterator target, + _DifferenceTp max_length, Comparator comp) + { + typedef _DifferenceTp difference_type; + typedef typename std::iterator_traits<RandomAccessIterator1>::value_type + value_type1; + typedef typename std::iterator_traits<RandomAccessIterator2>::value_type + value_type2; + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(max_length >= 0); +#endif + + while (begin1 != end1 && begin2 != end2 && max_length > 0) + { + RandomAccessIterator1 next1 = begin1 + 1; + RandomAccessIterator2 next2 = begin2 + 1; + value_type1 element1 = *begin1; + value_type2 element2 = *begin2; + + if (comp(element2, element1)) + { + element1 = element2; + begin2 = next2; + } + else + begin1 = next1; + + *target = element1; + + ++target; + --max_length; + } + if (begin1 != end1) + { + target = std::copy(begin1, begin1 + max_length, target); + begin1 += max_length; + } + else + { + target = std::copy(begin2, begin2 + max_length, target); + begin2 += max_length; + } + return target; + } + + /** @brief Merge routine being able to merge only the @c max_length + * smallest elements. + * + * The @c begin iterators are advanced accordingly, they might not + * reach @c end, in contrast to the usual variant. + * Static switch on whether to use the conditional-move variant. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param end2 End iterator of second sequence. + * @param target Target begin iterator. + * @param max_length Maximum number of elements to merge. + * @param comp Comparator. + * @return Output end iterator. */ + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename OutputIterator, typename _DifferenceTp, + typename Comparator> + inline OutputIterator + merge_advance(RandomAccessIterator1& begin1, RandomAccessIterator1 end1, + RandomAccessIterator2& begin2, RandomAccessIterator2 end2, + OutputIterator target, _DifferenceTp max_length, + Comparator comp) + { + _GLIBCXX_CALL(max_length) + + return merge_advance_movc(begin1, end1, begin2, end2, target, + max_length, comp); + } + + /** @brief Merge routine fallback to sequential in case the + iterators of the two input sequences are of different type. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param end2 End iterator of second sequence. + * @param target Target begin iterator. + * @param max_length Maximum number of elements to merge. + * @param comp Comparator. + * @return Output end iterator. */ + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename RandomAccessIterator3, typename Comparator> + inline RandomAccessIterator3 + parallel_merge_advance(RandomAccessIterator1& begin1, + RandomAccessIterator1 end1, + RandomAccessIterator2& begin2, + // different iterators, parallel implementation + // not available + RandomAccessIterator2 end2, + RandomAccessIterator3 target, typename + std::iterator_traits<RandomAccessIterator1>:: + difference_type max_length, Comparator comp) + { return merge_advance(begin1, end1, begin2, end2, target, + max_length, comp); } + + /** @brief Parallel merge routine being able to merge only the @c + * max_length smallest elements. + * + * The @c begin iterators are advanced accordingly, they might not + * reach @c end, in contrast to the usual variant. + * The functionality is projected onto parallel_multiway_merge. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param end2 End iterator of second sequence. + * @param target Target begin iterator. + * @param max_length Maximum number of elements to merge. + * @param comp Comparator. + * @return Output end iterator. + */ + template<typename RandomAccessIterator1, typename RandomAccessIterator3, + typename Comparator> + inline RandomAccessIterator3 + parallel_merge_advance(RandomAccessIterator1& begin1, + RandomAccessIterator1 end1, + RandomAccessIterator1& begin2, + RandomAccessIterator1 end2, + RandomAccessIterator3 target, typename + std::iterator_traits<RandomAccessIterator1>:: + difference_type max_length, Comparator comp) + { + typedef typename + std::iterator_traits<RandomAccessIterator1>::value_type value_type; + typedef typename std::iterator_traits<RandomAccessIterator1>:: + difference_type difference_type1 /* == difference_type2 */; + typedef typename std::iterator_traits<RandomAccessIterator3>:: + difference_type difference_type3; + typedef typename std::pair<RandomAccessIterator1, RandomAccessIterator1> + iterator_pair; + + iterator_pair + seqs[2] = { std::make_pair(begin1, end1), + std::make_pair(begin2, end2) }; + RandomAccessIterator3 + target_end = parallel_multiway_merge + < /* stable = */ true, /* sentinels = */ false>( + seqs, seqs + 2, target, + multiway_merge_exact_splitting + < /* stable = */ true, iterator_pair*, + Comparator, difference_type1>, + max_length, comp, omp_get_max_threads()); + + return target_end; + } +} //namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_MERGE_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/multiseq_selection.h b/gcc-4.4.0/libstdc++-v3/include/parallel/multiseq_selection.h new file mode 100644 index 000000000..6ef3d22b1 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/multiseq_selection.h @@ -0,0 +1,633 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/multiseq_selection.h + * @brief Functions to find elements of a certain global rank in + * multiple sorted sequences. Also serves for splitting such + * sequence sets. + * + * The algorithm description can be found in + * + * P. J. Varman, S. D. Scheufler, B. R. Iyer, and G. R. Ricard. + * Merging Multiple Lists on Hierarchical-Memory Multiprocessors. + * Journal of Parallel and Distributed Computing, 12(2):171–177, 1991. + * + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H +#define _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H 1 + +#include <vector> +#include <queue> + +#include <bits/stl_algo.h> + +#include <parallel/sort.h> + +namespace __gnu_parallel +{ + /** @brief Compare a pair of types lexicographically, ascending. */ + template<typename T1, typename T2, typename Comparator> + class lexicographic + : public std::binary_function<std::pair<T1, T2>, std::pair<T1, T2>, bool> + { + private: + Comparator& comp; + + public: + lexicographic(Comparator& _comp) : comp(_comp) { } + + bool + operator()(const std::pair<T1, T2>& p1, + const std::pair<T1, T2>& p2) const + { + if (comp(p1.first, p2.first)) + return true; + + if (comp(p2.first, p1.first)) + return false; + + // Firsts are equal. + return p1.second < p2.second; + } + }; + + /** @brief Compare a pair of types lexicographically, descending. */ + template<typename T1, typename T2, typename Comparator> + class lexicographic_reverse : public std::binary_function<T1, T2, bool> + { + private: + Comparator& comp; + + public: + lexicographic_reverse(Comparator& _comp) : comp(_comp) { } + + bool + operator()(const std::pair<T1, T2>& p1, + const std::pair<T1, T2>& p2) const + { + if (comp(p2.first, p1.first)) + return true; + + if (comp(p1.first, p2.first)) + return false; + + // Firsts are equal. + return p2.second < p1.second; + } + }; + + /** + * @brief Splits several sorted sequences at a certain global rank, + * resulting in a splitting point for each sequence. + * The sequences are passed via a sequence of random-access + * iterator pairs, none of the sequences may be empty. If there + * are several equal elements across the split, the ones on the + * left side will be chosen from sequences with smaller number. + * @param begin_seqs Begin of the sequence of iterator pairs. + * @param end_seqs End of the sequence of iterator pairs. + * @param rank The global rank to partition at. + * @param begin_offsets A random-access sequence begin where the + * result will be stored in. Each element of the sequence is an + * iterator that points to the first element on the greater part of + * the respective sequence. + * @param comp The ordering functor, defaults to std::less<T>. + */ + template<typename RanSeqs, typename RankType, typename RankIterator, + typename Comparator> + void + multiseq_partition(RanSeqs begin_seqs, RanSeqs end_seqs, + RankType rank, + RankIterator begin_offsets, + Comparator comp = std::less< + typename std::iterator_traits<typename + std::iterator_traits<RanSeqs>::value_type:: + first_type>::value_type>()) // std::less<T> + { + _GLIBCXX_CALL(end_seqs - begin_seqs) + + typedef typename std::iterator_traits<RanSeqs>::value_type::first_type + It; + typedef typename std::iterator_traits<It>::difference_type + difference_type; + typedef typename std::iterator_traits<It>::value_type value_type; + + lexicographic<value_type, int, Comparator> lcomp(comp); + lexicographic_reverse<value_type, int, Comparator> lrcomp(comp); + + // Number of sequences, number of elements in total (possibly + // including padding). + difference_type m = std::distance(begin_seqs, end_seqs), N = 0, + nmax, n, r; + + for (int i = 0; i < m; i++) + { + N += std::distance(begin_seqs[i].first, begin_seqs[i].second); + _GLIBCXX_PARALLEL_ASSERT( + std::distance(begin_seqs[i].first, begin_seqs[i].second) > 0); + } + + if (rank == N) + { + for (int i = 0; i < m; i++) + begin_offsets[i] = begin_seqs[i].second; // Very end. + // Return m - 1; + return; + } + + _GLIBCXX_PARALLEL_ASSERT(m != 0); + _GLIBCXX_PARALLEL_ASSERT(N != 0); + _GLIBCXX_PARALLEL_ASSERT(rank >= 0); + _GLIBCXX_PARALLEL_ASSERT(rank < N); + + difference_type* ns = new difference_type[m]; + difference_type* a = new difference_type[m]; + difference_type* b = new difference_type[m]; + difference_type l; + + ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second); + nmax = ns[0]; + for (int i = 0; i < m; i++) + { + ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second); + nmax = std::max(nmax, ns[i]); + } + + r = __log2(nmax) + 1; + + // Pad all lists to this length, at least as long as any ns[i], + // equality iff nmax = 2^k - 1. + l = (1ULL << r) - 1; + + // From now on, including padding. + N = l * m; + + for (int i = 0; i < m; i++) + { + a[i] = 0; + b[i] = l; + } + n = l / 2; + + // Invariants: + // 0 <= a[i] <= ns[i], 0 <= b[i] <= l + +#define S(i) (begin_seqs[i].first) + + // Initial partition. + std::vector<std::pair<value_type, int> > sample; + + for (int i = 0; i < m; i++) + if (n < ns[i]) //sequence long enough + sample.push_back(std::make_pair(S(i)[n], i)); + __gnu_sequential::sort(sample.begin(), sample.end(), lcomp); + + for (int i = 0; i < m; i++) //conceptual infinity + if (n >= ns[i]) //sequence too short, conceptual infinity + sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i)); + + difference_type localrank = rank * m / N ; + + int j; + for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); ++j) + a[sample[j].second] += n + 1; + for (; j < m; j++) + b[sample[j].second] -= n + 1; + + // Further refinement. + while (n > 0) + { + n /= 2; + + int lmax_seq = -1; // to avoid warning + const value_type* lmax = NULL; // impossible to avoid the warning? + for (int i = 0; i < m; i++) + { + if (a[i] > 0) + { + if (!lmax) + { + lmax = &(S(i)[a[i] - 1]); + lmax_seq = i; + } + else + { + // Max, favor rear sequences. + if (!comp(S(i)[a[i] - 1], *lmax)) + { + lmax = &(S(i)[a[i] - 1]); + lmax_seq = i; + } + } + } + } + + int i; + for (i = 0; i < m; i++) + { + difference_type middle = (b[i] + a[i]) / 2; + if (lmax && middle < ns[i] && + lcomp(std::make_pair(S(i)[middle], i), + std::make_pair(*lmax, lmax_seq))) + a[i] = std::min(a[i] + n + 1, ns[i]); + else + b[i] -= n + 1; + } + + difference_type leftsize = 0, total = 0; + for (int i = 0; i < m; i++) + { + leftsize += a[i] / (n + 1); + total += l / (n + 1); + } + + difference_type skew = static_cast<difference_type> + (static_cast<uint64>(total) * rank / N - leftsize); + + if (skew > 0) + { + // Move to the left, find smallest. + std::priority_queue<std::pair<value_type, int>, + std::vector<std::pair<value_type, int> >, + lexicographic_reverse<value_type, int, Comparator> > + pq(lrcomp); + + for (int i = 0; i < m; i++) + if (b[i] < ns[i]) + pq.push(std::make_pair(S(i)[b[i]], i)); + + for (; skew != 0 && !pq.empty(); --skew) + { + int source = pq.top().second; + pq.pop(); + + a[source] = std::min(a[source] + n + 1, ns[source]); + b[source] += n + 1; + + if (b[source] < ns[source]) + pq.push(std::make_pair(S(source)[b[source]], source)); + } + } + else if (skew < 0) + { + // Move to the right, find greatest. + std::priority_queue<std::pair<value_type, int>, + std::vector<std::pair<value_type, int> >, + lexicographic<value_type, int, Comparator> > pq(lcomp); + + for (int i = 0; i < m; i++) + if (a[i] > 0) + pq.push(std::make_pair(S(i)[a[i] - 1], i)); + + for (; skew != 0; ++skew) + { + int source = pq.top().second; + pq.pop(); + + a[source] -= n + 1; + b[source] -= n + 1; + + if (a[source] > 0) + pq.push(std::make_pair(S(source)[a[source] - 1], source)); + } + } + } + + // Postconditions: + // a[i] == b[i] in most cases, except when a[i] has been clamped + // because of having reached the boundary + + // Now return the result, calculate the offset. + + // Compare the keys on both edges of the border. + + // Maximum of left edge, minimum of right edge. + value_type* maxleft = NULL; + value_type* minright = NULL; + for (int i = 0; i < m; i++) + { + if (a[i] > 0) + { + if (!maxleft) + maxleft = &(S(i)[a[i] - 1]); + else + { + // Max, favor rear sequences. + if (!comp(S(i)[a[i] - 1], *maxleft)) + maxleft = &(S(i)[a[i] - 1]); + } + } + if (b[i] < ns[i]) + { + if (!minright) + minright = &(S(i)[b[i]]); + else + { + // Min, favor fore sequences. + if (comp(S(i)[b[i]], *minright)) + minright = &(S(i)[b[i]]); + } + } + } + + int seq = 0; + for (int i = 0; i < m; i++) + begin_offsets[i] = S(i) + a[i]; + + delete[] ns; + delete[] a; + delete[] b; + } + + + /** + * @brief Selects the element at a certain global rank from several + * sorted sequences. + * + * The sequences are passed via a sequence of random-access + * iterator pairs, none of the sequences may be empty. + * @param begin_seqs Begin of the sequence of iterator pairs. + * @param end_seqs End of the sequence of iterator pairs. + * @param rank The global rank to partition at. + * @param offset The rank of the selected element in the global + * subsequence of elements equal to the selected element. If the + * selected element is unique, this number is 0. + * @param comp The ordering functor, defaults to std::less. + */ + template<typename T, typename RanSeqs, typename RankType, + typename Comparator> + T + multiseq_selection(RanSeqs begin_seqs, RanSeqs end_seqs, RankType rank, + RankType& offset, Comparator comp = std::less<T>()) + { + _GLIBCXX_CALL(end_seqs - begin_seqs) + + typedef typename std::iterator_traits<RanSeqs>::value_type::first_type + It; + typedef typename std::iterator_traits<It>::difference_type + difference_type; + + lexicographic<T, int, Comparator> lcomp(comp); + lexicographic_reverse<T, int, Comparator> lrcomp(comp); + + // Number of sequences, number of elements in total (possibly + // including padding). + difference_type m = std::distance(begin_seqs, end_seqs); + difference_type N = 0; + difference_type nmax, n, r; + + for (int i = 0; i < m; i++) + N += std::distance(begin_seqs[i].first, begin_seqs[i].second); + + if (m == 0 || N == 0 || rank < 0 || rank >= N) + { + // Result undefined when there is no data or rank is outside bounds. + throw std::exception(); + } + + + difference_type* ns = new difference_type[m]; + difference_type* a = new difference_type[m]; + difference_type* b = new difference_type[m]; + difference_type l; + + ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second); + nmax = ns[0]; + for (int i = 0; i < m; ++i) + { + ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second); + nmax = std::max(nmax, ns[i]); + } + + r = __log2(nmax) + 1; + + // Pad all lists to this length, at least as long as any ns[i], + // equality iff nmax = 2^k - 1 + l = pow2(r) - 1; + + // From now on, including padding. + N = l * m; + + for (int i = 0; i < m; ++i) + { + a[i] = 0; + b[i] = l; + } + n = l / 2; + + // Invariants: + // 0 <= a[i] <= ns[i], 0 <= b[i] <= l + +#define S(i) (begin_seqs[i].first) + + // Initial partition. + std::vector<std::pair<T, int> > sample; + + for (int i = 0; i < m; i++) + if (n < ns[i]) + sample.push_back(std::make_pair(S(i)[n], i)); + __gnu_sequential::sort(sample.begin(), sample.end(), + lcomp, sequential_tag()); + + // Conceptual infinity. + for (int i = 0; i < m; i++) + if (n >= ns[i]) + sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i)); + + difference_type localrank = rank * m / N ; + + int j; + for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); ++j) + a[sample[j].second] += n + 1; + for (; j < m; ++j) + b[sample[j].second] -= n + 1; + + // Further refinement. + while (n > 0) + { + n /= 2; + + const T* lmax = NULL; + for (int i = 0; i < m; ++i) + { + if (a[i] > 0) + { + if (!lmax) + lmax = &(S(i)[a[i] - 1]); + else + { + if (comp(*lmax, S(i)[a[i] - 1])) //max + lmax = &(S(i)[a[i] - 1]); + } + } + } + + int i; + for (i = 0; i < m; i++) + { + difference_type middle = (b[i] + a[i]) / 2; + if (lmax && middle < ns[i] && comp(S(i)[middle], *lmax)) + a[i] = std::min(a[i] + n + 1, ns[i]); + else + b[i] -= n + 1; + } + + difference_type leftsize = 0, total = 0; + for (int i = 0; i < m; ++i) + { + leftsize += a[i] / (n + 1); + total += l / (n + 1); + } + + difference_type skew = ((unsigned long long)total * rank / N + - leftsize); + + if (skew > 0) + { + // Move to the left, find smallest. + std::priority_queue<std::pair<T, int>, + std::vector<std::pair<T, int> >, + lexicographic_reverse<T, int, Comparator> > pq(lrcomp); + + for (int i = 0; i < m; ++i) + if (b[i] < ns[i]) + pq.push(std::make_pair(S(i)[b[i]], i)); + + for (; skew != 0 && !pq.empty(); --skew) + { + int source = pq.top().second; + pq.pop(); + + a[source] = std::min(a[source] + n + 1, ns[source]); + b[source] += n + 1; + + if (b[source] < ns[source]) + pq.push(std::make_pair(S(source)[b[source]], source)); + } + } + else if (skew < 0) + { + // Move to the right, find greatest. + std::priority_queue<std::pair<T, int>, + std::vector<std::pair<T, int> >, + lexicographic<T, int, Comparator> > pq(lcomp); + + for (int i = 0; i < m; ++i) + if (a[i] > 0) + pq.push(std::make_pair(S(i)[a[i] - 1], i)); + + for (; skew != 0; ++skew) + { + int source = pq.top().second; + pq.pop(); + + a[source] -= n + 1; + b[source] -= n + 1; + + if (a[source] > 0) + pq.push(std::make_pair(S(source)[a[source] - 1], source)); + } + } + } + + // Postconditions: + // a[i] == b[i] in most cases, except when a[i] has been clamped + // because of having reached the boundary + + // Now return the result, calculate the offset. + + // Compare the keys on both edges of the border. + + // Maximum of left edge, minimum of right edge. + bool maxleftset = false, minrightset = false; + + // Impossible to avoid the warning? + T maxleft, minright; + for (int i = 0; i < m; ++i) + { + if (a[i] > 0) + { + if (!maxleftset) + { + maxleft = S(i)[a[i] - 1]; + maxleftset = true; + } + else + { + // Max. + if (comp(maxleft, S(i)[a[i] - 1])) + maxleft = S(i)[a[i] - 1]; + } + } + if (b[i] < ns[i]) + { + if (!minrightset) + { + minright = S(i)[b[i]]; + minrightset = true; + } + else + { + // Min. + if (comp(S(i)[b[i]], minright)) + minright = S(i)[b[i]]; + } + } + } + + // Minright is the splitter, in any case. + + if (!maxleftset || comp(minright, maxleft)) + { + // Good luck, everything is split unambiguously. + offset = 0; + } + else + { + // We have to calculate an offset. + offset = 0; + + for (int i = 0; i < m; ++i) + { + difference_type lb = std::lower_bound(S(i), S(i) + ns[i], + minright, + comp) - S(i); + offset += a[i] - lb; + } + } + + delete[] ns; + delete[] a; + delete[] b; + + return minright; + } +} + +#undef S + +#endif /* _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/multiway_merge.h b/gcc-4.4.0/libstdc++-v3/include/parallel/multiway_merge.h new file mode 100644 index 000000000..bacff8dba --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/multiway_merge.h @@ -0,0 +1,2145 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/multiway_merge.h +* @brief Implementation of sequential and parallel multiway merge. +* +* Explanations on the high-speed merging routines in the appendix of +* +* P. Sanders. +* Fast priority queues for cached memory. +* ACM Journal of Experimental Algorithmics, 5, 2000. +* +* This file is a GNU parallel extension to the Standard C++ Library. +*/ + +// Written by Johannes Singler and Manuel Holtgrewe. + +#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H +#define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H + +#include <vector> + +#include <bits/stl_algo.h> +#include <parallel/features.h> +#include <parallel/parallel.h> +#include <parallel/losertree.h> +#if _GLIBCXX_ASSERTIONS +#include <parallel/checkers.h> +#endif + +/** @brief Length of a sequence described by a pair of iterators. */ +#define _GLIBCXX_PARALLEL_LENGTH(s) ((s).second - (s).first) + +namespace __gnu_parallel +{ + +// Announce guarded and unguarded iterator. + +template<typename RandomAccessIterator, typename Comparator> + class guarded_iterator; + +// Making the arguments const references seems to dangerous, +// the user-defined comparator might not be const. +template<typename RandomAccessIterator, typename Comparator> + inline bool + operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1, + guarded_iterator<RandomAccessIterator, Comparator>& bi2); + +template<typename RandomAccessIterator, typename Comparator> + inline bool + operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1, + guarded_iterator<RandomAccessIterator, Comparator>& bi2); + +/** @brief Iterator wrapper supporting an implicit supremum at the end + * of the sequence, dominating all comparisons. + * + * The implicit supremum comes with a performance cost. + * + * Deriving from RandomAccessIterator is not possible since + * RandomAccessIterator need not be a class. + */ +template<typename RandomAccessIterator, typename Comparator> + class guarded_iterator + { + private: + /** @brief Current iterator position. */ + RandomAccessIterator current; + + /** @brief End iterator of the sequence. */ + RandomAccessIterator end; + + /** @brief Comparator. */ + Comparator& comp; + + public: + /** @brief Constructor. Sets iterator to beginning of sequence. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param comp Comparator provided for associated overloaded + * compare operators. */ + guarded_iterator(RandomAccessIterator begin, + RandomAccessIterator end, Comparator& comp) + : current(begin), end(end), comp(comp) + { } + + /** @brief Pre-increment operator. + * @return This. */ + guarded_iterator<RandomAccessIterator, Comparator>& + operator++() + { + ++current; + return *this; + } + + /** @brief Dereference operator. + * @return Referenced element. */ + typename std::iterator_traits<RandomAccessIterator>::value_type& + operator*() + { return *current; } + + /** @brief Convert to wrapped iterator. + * @return Wrapped iterator. */ + operator RandomAccessIterator() + { return current; } + + friend bool + operator< <RandomAccessIterator, Comparator>( + guarded_iterator<RandomAccessIterator, Comparator>& bi1, + guarded_iterator<RandomAccessIterator, Comparator>& bi2); + + friend bool + operator<= <RandomAccessIterator, Comparator>( + guarded_iterator<RandomAccessIterator, Comparator>& bi1, + guarded_iterator<RandomAccessIterator, Comparator>& bi2); + }; + +/** @brief Compare two elements referenced by guarded iterators. + * @param bi1 First iterator. + * @param bi2 Second iterator. + * @return @c True if less. */ +template<typename RandomAccessIterator, typename Comparator> + inline bool + operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1, + guarded_iterator<RandomAccessIterator, Comparator>& bi2) + { + if (bi1.current == bi1.end) //bi1 is sup + return bi2.current == bi2.end; //bi2 is not sup + if (bi2.current == bi2.end) //bi2 is sup + return true; + return (bi1.comp)(*bi1, *bi2); //normal compare + } + +/** @brief Compare two elements referenced by guarded iterators. + * @param bi1 First iterator. + * @param bi2 Second iterator. + * @return @c True if less equal. */ +template<typename RandomAccessIterator, typename Comparator> + inline bool + operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1, + guarded_iterator<RandomAccessIterator, Comparator>& bi2) + { + if (bi2.current == bi2.end) //bi1 is sup + return bi1.current != bi1.end; //bi2 is not sup + if (bi1.current == bi1.end) //bi2 is sup + return false; + return !(bi1.comp)(*bi2, *bi1); //normal compare + } + +template<typename RandomAccessIterator, typename Comparator> + class unguarded_iterator; + +template<typename RandomAccessIterator, typename Comparator> + inline bool + operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, + unguarded_iterator<RandomAccessIterator, Comparator>& bi2); + +template<typename RandomAccessIterator, typename Comparator> + inline bool + operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, + unguarded_iterator<RandomAccessIterator, Comparator>& bi2); + +template<typename RandomAccessIterator, typename Comparator> + class unguarded_iterator + { + private: + /** @brief Current iterator position. */ + RandomAccessIterator current; + /** @brief Comparator. */ + mutable Comparator& comp; + + public: + /** @brief Constructor. Sets iterator to beginning of sequence. + * @param begin Begin iterator of sequence. + * @param end Unused, only for compatibility. + * @param comp Unused, only for compatibility. */ + unguarded_iterator(RandomAccessIterator begin, + RandomAccessIterator end, Comparator& comp) + : current(begin), comp(comp) + { } + + /** @brief Pre-increment operator. + * @return This. */ + unguarded_iterator<RandomAccessIterator, Comparator>& + operator++() + { + ++current; + return *this; + } + + /** @brief Dereference operator. + * @return Referenced element. */ + typename std::iterator_traits<RandomAccessIterator>::value_type& + operator*() + { return *current; } + + /** @brief Convert to wrapped iterator. + * @return Wrapped iterator. */ + operator RandomAccessIterator() + { return current; } + + friend bool + operator< <RandomAccessIterator, Comparator>( + unguarded_iterator<RandomAccessIterator, Comparator>& bi1, + unguarded_iterator<RandomAccessIterator, Comparator>& bi2); + + friend bool + operator<= <RandomAccessIterator, Comparator>( + unguarded_iterator<RandomAccessIterator, Comparator>& bi1, + unguarded_iterator<RandomAccessIterator, Comparator>& bi2); + }; + +/** @brief Compare two elements referenced by unguarded iterators. + * @param bi1 First iterator. + * @param bi2 Second iterator. + * @return @c True if less. */ +template<typename RandomAccessIterator, typename Comparator> + inline bool + operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, + unguarded_iterator<RandomAccessIterator, Comparator>& bi2) + { + // Normal compare. + return (bi1.comp)(*bi1, *bi2); + } + +/** @brief Compare two elements referenced by unguarded iterators. + * @param bi1 First iterator. + * @param bi2 Second iterator. + * @return @c True if less equal. */ +template<typename RandomAccessIterator, typename Comparator> + inline bool + operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, + unguarded_iterator<RandomAccessIterator, Comparator>& bi2) + { + // Normal compare. + return !(bi1.comp)(*bi2, *bi1); + } + +/** @brief Highly efficient 3-way merging procedure. + * + * Merging is done with the algorithm implementation described by Peter + * Sanders. Basically, the idea is to minimize the number of necessary + * comparison after merging out an element. The implementation trick + * that makes this fast is that the order of the sequences is stored + * in the instruction pointer (translated into labels in C++). + * + * This works well for merging up to 4 sequences. + * + * Note that making the merging stable does <em>not</em> come at a + * performance hit. + * + * Whether the merging is done guarded or unguarded is selected by the + * used iterator class. + * + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ +template<template<typename RAI, typename C> class iterator, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> + RandomAccessIterator3 + multiway_merge_3_variant( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + _DifferenceTp length, Comparator comp) + { + _GLIBCXX_CALL(length); + + typedef _DifferenceTp difference_type; + + typedef typename std::iterator_traits<RandomAccessIteratorIterator> + ::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits<RandomAccessIterator1>::value_type + value_type; + + if (length == 0) + return target; + +#if _GLIBCXX_ASSERTIONS + _DifferenceTp orig_length = length; +#endif + + iterator<RandomAccessIterator1, Comparator> + seq0(seqs_begin[0].first, seqs_begin[0].second, comp), + seq1(seqs_begin[1].first, seqs_begin[1].second, comp), + seq2(seqs_begin[2].first, seqs_begin[2].second, comp); + + if (seq0 <= seq1) + { + if (seq1 <= seq2) + goto s012; + else + if (seq2 < seq0) + goto s201; + else + goto s021; + } + else + { + if (seq1 <= seq2) + { + if (seq0 <= seq2) + goto s102; + else + goto s120; + } + else + goto s210; + } +#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1) \ + s ## a ## b ## c : \ + *target = *seq ## a; \ + ++target; \ + --length; \ + ++seq ## a; \ + if (length == 0) goto finish; \ + if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \ + if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \ + goto s ## b ## c ## a; + + _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=); + _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < ); + _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 0, 1, < , < ); + _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 0, 2, < , <=); + _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 2, 1, <=, <=); + _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 1, 0, < , < ); + +#undef _GLIBCXX_PARALLEL_MERGE_3_CASE + + finish: + ; + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT( + ((RandomAccessIterator1)seq0 - seqs_begin[0].first) + + ((RandomAccessIterator1)seq1 - seqs_begin[1].first) + + ((RandomAccessIterator1)seq2 - seqs_begin[2].first) + == orig_length); +#endif + + seqs_begin[0].first = seq0; + seqs_begin[1].first = seq1; + seqs_begin[2].first = seq2; + + return target; + } + +/** + * @brief Highly efficient 4-way merging procedure. + * + * Merging is done with the algorithm implementation described by Peter + * Sanders. Basically, the idea is to minimize the number of necessary + * comparison after merging out an element. The implementation trick + * that makes this fast is that the order of the sequences is stored + * in the instruction pointer (translated into goto labels in C++). + * + * This works well for merging up to 4 sequences. + * + * Note that making the merging stable does <em>not</em> come at a + * performance hit. + * + * Whether the merging is done guarded or unguarded is selected by the + * used iterator class. + * + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ +template<template<typename RAI, typename C> class iterator, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> + RandomAccessIterator3 + multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + _DifferenceTp length, Comparator comp) + { + _GLIBCXX_CALL(length); + typedef _DifferenceTp difference_type; + + typedef typename std::iterator_traits<RandomAccessIteratorIterator> + ::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits<RandomAccessIterator1>::value_type + value_type; + + iterator<RandomAccessIterator1, Comparator> + seq0(seqs_begin[0].first, seqs_begin[0].second, comp), + seq1(seqs_begin[1].first, seqs_begin[1].second, comp), + seq2(seqs_begin[2].first, seqs_begin[2].second, comp), + seq3(seqs_begin[3].first, seqs_begin[3].second, comp); + +#define _GLIBCXX_PARALLEL_DECISION(a,b,c,d) { \ + if (seq ## d < seq ## a) goto s ## d ## a ## b ## c; \ + if (seq ## d < seq ## b) goto s ## a ## d ## b ## c; \ + if (seq ## d < seq ## c) goto s ## a ## b ## d ## c; \ + goto s ## a ## b ## c ## d; } + + if (seq0 <= seq1) + { + if (seq1 <= seq2) + _GLIBCXX_PARALLEL_DECISION(0,1,2,3) + else + if (seq2 < seq0) + _GLIBCXX_PARALLEL_DECISION(2,0,1,3) + else + _GLIBCXX_PARALLEL_DECISION(0,2,1,3) + } + else + { + if (seq1 <= seq2) + { + if (seq0 <= seq2) + _GLIBCXX_PARALLEL_DECISION(1,0,2,3) + else + _GLIBCXX_PARALLEL_DECISION(1,2,0,3) + } + else + _GLIBCXX_PARALLEL_DECISION(2,1,0,3) + } + +#define _GLIBCXX_PARALLEL_MERGE_4_CASE(a,b,c,d,c0,c1,c2) \ + s ## a ## b ## c ## d: \ + if (length == 0) goto finish; \ + *target = *seq ## a; \ + ++target; \ + --length; \ + ++seq ## a; \ + if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d; \ + if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d; \ + if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d; \ + goto s ## b ## c ## d ## a; + + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 2, 3, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 3, 2, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 1, 3, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 3, 1, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 1, 2, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 2, 1, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 2, 3, < , <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 3, 2, < , <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 0, 3, <=, < , <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 3, 0, <=, <=, < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 0, 2, <=, < , <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 2, 0, <=, <=, < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 1, 3, < , < , <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 3, 1, < , <=, < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 0, 3, < , < , <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 3, 0, < , <=, < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 0, 1, <=, < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 1, 0, <=, < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 1, 2, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 2, 1, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 0, 2, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 2, 0, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 0, 1, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 1, 0, < , < , < ); + +#undef _GLIBCXX_PARALLEL_MERGE_4_CASE +#undef _GLIBCXX_PARALLEL_DECISION + + finish: + ; + + seqs_begin[0].first = seq0; + seqs_begin[1].first = seq1; + seqs_begin[2].first = seq2; + seqs_begin[3].first = seq3; + + return target; + } + +/** @brief Multi-way merging procedure for a high branching factor, + * guarded case. + * + * This merging variant uses a LoserTree class as selected by <tt>LT</tt>. + * + * Stability is selected through the used LoserTree class <tt>LT</tt>. + * + * At least one non-empty sequence is required. + * + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ +template<typename LT, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> + RandomAccessIterator3 + multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + _DifferenceTp length, Comparator comp) + { + _GLIBCXX_CALL(length) + + typedef _DifferenceTp difference_type; + typedef typename std::iterator_traits<RandomAccessIteratorIterator> + ::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits<RandomAccessIterator1>::value_type + value_type; + + int k = static_cast<int>(seqs_end - seqs_begin); + + LT lt(k, comp); + + // Default value for potentially non-default-constructible types. + value_type* arbitrary_element = NULL; + + for (int t = 0; t < k; ++t) + { + if(arbitrary_element == NULL + && _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]) > 0) + arbitrary_element = &(*seqs_begin[t].first); + } + + for (int t = 0; t < k; ++t) + { + if (seqs_begin[t].first == seqs_begin[t].second) + lt.insert_start(*arbitrary_element, t, true); + else + lt.insert_start(*seqs_begin[t].first, t, false); + } + + lt.init(); + + int source; + + for (difference_type i = 0; i < length; ++i) + { + //take out + source = lt.get_min_source(); + + *(target++) = *(seqs_begin[source].first++); + + // Feed. + if (seqs_begin[source].first == seqs_begin[source].second) + lt.delete_min_insert(*arbitrary_element, true); + else + // Replace from same source. + lt.delete_min_insert(*seqs_begin[source].first, false); + } + + return target; + } + +/** @brief Multi-way merging procedure for a high branching factor, + * unguarded case. + * + * Merging is done using the LoserTree class <tt>LT</tt>. + * + * Stability is selected by the used LoserTrees. + * + * @pre No input will run out of elements during the merge. + * + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ +template<typename LT, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, typename Comparator> + RandomAccessIterator3 + multiway_merge_loser_tree_unguarded( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + const typename std::iterator_traits<typename std::iterator_traits< + RandomAccessIteratorIterator>::value_type::first_type>::value_type& + sentinel, + _DifferenceTp length, + Comparator comp) + { + _GLIBCXX_CALL(length) + typedef _DifferenceTp difference_type; + + typedef typename std::iterator_traits<RandomAccessIteratorIterator> + ::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits<RandomAccessIterator1>::value_type + value_type; + + int k = seqs_end - seqs_begin; + + LT lt(k, sentinel, comp); + + for (int t = 0; t < k; ++t) + { +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second); +#endif + lt.insert_start(*seqs_begin[t].first, t, false); + } + + lt.init(); + + int source; + +#if _GLIBCXX_ASSERTIONS + difference_type i = 0; +#endif + + RandomAccessIterator3 target_end = target + length; + while (target < target_end) + { + // Take out. + source = lt.get_min_source(); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(0 <= source && source < k); + _GLIBCXX_PARALLEL_ASSERT(i == 0 + || !comp(*(seqs_begin[source].first), *(target - 1))); +#endif + + // Feed. + *(target++) = *(seqs_begin[source].first++); + +#if _GLIBCXX_ASSERTIONS + ++i; +#endif + // Replace from same source. + lt.delete_min_insert(*seqs_begin[source].first, false); + } + + return target; + } + + +/** @brief Multi-way merging procedure for a high branching factor, + * requiring sentinels to exist. + * + * @param stable The value must the same as for the used LoserTrees. + * @param UnguardedLoserTree Loser Tree variant to use for the unguarded + * merging. + * @param GuardedLoserTree Loser Tree variant to use for the guarded + * merging. + * + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ +template< + typename UnguardedLoserTree, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> + RandomAccessIterator3 + multiway_merge_loser_tree_sentinel( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + const typename std::iterator_traits<typename std::iterator_traits< + RandomAccessIteratorIterator>::value_type::first_type>::value_type& + sentinel, + _DifferenceTp length, + Comparator comp) + { + _GLIBCXX_CALL(length) + + typedef _DifferenceTp difference_type; + typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type; + typedef typename std::iterator_traits<RandomAccessIteratorIterator> + ::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits<RandomAccessIterator1>::value_type + value_type; + + RandomAccessIterator3 target_end; + + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) + // Move the sequends end behind the sentinel spots. This has the + // effect that the sentinel appears to be within the sequence. Then, + // we can use the unguarded variant if we merge out as many + // non-sentinel elements as we have. + ++((*s).second); + + target_end = multiway_merge_loser_tree_unguarded + <UnguardedLoserTree> + (seqs_begin, seqs_end, target, sentinel, length, comp); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); +#endif + + // Restore the sequence ends so the sentinels are not contained in the + // sequence any more (see comment in loop above). + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) + --((*s).second); + + return target_end; + } + +/** + * @brief Traits for determining whether the loser tree should + * use pointers or copies. + * + * The field "use_pointer" is used to determine whether to use pointers in + * the loser trees or whether to copy the values into the loser tree. + * + * The default behavior is to use pointers if the data type is 4 times as + * big as the pointer to it. + * + * Specialize for your data type to customize the behavior. + * + * Example: + * + * template<> + * struct loser_tree_traits<int> + * { static const bool use_pointer = false; }; + * + * template<> + * struct loser_tree_traits<heavyweight_type> + * { static const bool use_pointer = true; }; + * + * @param T type to give the loser tree traits for. + */ +template <typename T> +struct loser_tree_traits +{ + /** + * @brief True iff to use pointers instead of values in loser trees. + * + * The default behavior is to use pointers if the data type is four + * times as big as the pointer to it. + */ + static const bool use_pointer = (sizeof(T) > 4 * sizeof(T*)); +}; + +/** + * @brief Switch for 3-way merging with sentinels turned off. + * + * Note that 3-way merging is always stable! + */ +template< + bool sentinels /*default == false*/, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> +struct multiway_merge_3_variant_sentinel_switch +{ + RandomAccessIterator3 operator()( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + _DifferenceTp length, Comparator comp) + { + return multiway_merge_3_variant<guarded_iterator>( + seqs_begin, seqs_end, target, length, comp); + } +}; + +/** + * @brief Switch for 3-way merging with sentinels turned on. + * + * Note that 3-way merging is always stable! + */ +template< + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> +struct multiway_merge_3_variant_sentinel_switch + <true, RandomAccessIteratorIterator, RandomAccessIterator3, + _DifferenceTp, Comparator> +{ + RandomAccessIterator3 operator()( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + _DifferenceTp length, Comparator comp) + { + return multiway_merge_3_variant<unguarded_iterator>( + seqs_begin, seqs_end, target, length, comp); + } +}; + +/** + * @brief Switch for 4-way merging with sentinels turned off. + * + * Note that 4-way merging is always stable! + */ +template< + bool sentinels /*default == false*/, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> +struct multiway_merge_4_variant_sentinel_switch +{ + RandomAccessIterator3 operator()( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + _DifferenceTp length, Comparator comp) + { + return multiway_merge_4_variant<guarded_iterator>( + seqs_begin, seqs_end, target, length, comp); + } +}; + +/** + * @brief Switch for 4-way merging with sentinels turned on. + * + * Note that 4-way merging is always stable! + */ +template< + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> +struct multiway_merge_4_variant_sentinel_switch + <true, RandomAccessIteratorIterator, RandomAccessIterator3, + _DifferenceTp, Comparator> +{ + RandomAccessIterator3 operator()( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + _DifferenceTp length, Comparator comp) + { + return multiway_merge_4_variant<unguarded_iterator>( + seqs_begin, seqs_end, target, length, comp); + } +}; + +/** + * @brief Switch for k-way merging with sentinels turned on. + */ +template< + bool sentinels, + bool stable, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> +struct multiway_merge_k_variant_sentinel_switch +{ + RandomAccessIterator3 operator()( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + const typename std::iterator_traits<typename std::iterator_traits< + RandomAccessIteratorIterator>::value_type::first_type>::value_type& + sentinel, + _DifferenceTp length, Comparator comp) + { + typedef typename std::iterator_traits<RandomAccessIteratorIterator> + ::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits<RandomAccessIterator1>::value_type + value_type; + + return multiway_merge_loser_tree_sentinel< + typename __gnu_cxx::__conditional_type< + loser_tree_traits<value_type>::use_pointer + , LoserTreePointerUnguarded<stable, value_type, Comparator> + , LoserTreeUnguarded<stable, value_type, Comparator> + >::__type>(seqs_begin, seqs_end, target, sentinel, length, comp); + } +}; + +/** + * @brief Switch for k-way merging with sentinels turned off. + */ +template< + bool stable, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> +struct multiway_merge_k_variant_sentinel_switch + <false, stable, RandomAccessIteratorIterator, RandomAccessIterator3, + _DifferenceTp, Comparator> +{ + RandomAccessIterator3 operator()( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + const typename std::iterator_traits<typename std::iterator_traits< + RandomAccessIteratorIterator>::value_type::first_type>::value_type& + sentinel, + _DifferenceTp length, Comparator comp) + { + typedef typename std::iterator_traits<RandomAccessIteratorIterator> + ::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits<RandomAccessIterator1>::value_type + value_type; + + return multiway_merge_loser_tree< + typename __gnu_cxx::__conditional_type< + loser_tree_traits<value_type>::use_pointer + , LoserTreePointer<stable, value_type, Comparator> + , LoserTree<stable, value_type, Comparator> + >::__type >(seqs_begin, seqs_end, target, length, comp); + } +}; + +/** @brief Sequential multi-way merging switch. + * + * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor and + * runtime settings. + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge, possibly larger than the + * number of elements available. + * @param stable Stable merging incurs a performance penalty. + * @param sentinel The sequences have a sentinel element. + * @return End iterator of output sequence. */ +template< + bool stable, + bool sentinels, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> + RandomAccessIterator3 + sequential_multiway_merge( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + const typename std::iterator_traits<typename std::iterator_traits< + RandomAccessIteratorIterator>::value_type::first_type>::value_type& + sentinel, + _DifferenceTp length, Comparator comp) + { + _GLIBCXX_CALL(length) + + typedef _DifferenceTp difference_type; + typedef typename std::iterator_traits<RandomAccessIteratorIterator> + ::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits<RandomAccessIterator1>::value_type + value_type; + +#if _GLIBCXX_ASSERTIONS + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) + { + _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp)); + } +#endif + + _DifferenceTp total_length = 0; + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) + total_length += _GLIBCXX_PARALLEL_LENGTH(*s); + + length = std::min<_DifferenceTp>(length, total_length); + + if(length == 0) + return target; + + RandomAccessIterator3 return_target = target; + int k = static_cast<int>(seqs_end - seqs_begin); + + switch (k) + { + case 0: + break; + case 1: + return_target = std::copy(seqs_begin[0].first, + seqs_begin[0].first + length, + target); + seqs_begin[0].first += length; + break; + case 2: + return_target = merge_advance(seqs_begin[0].first, + seqs_begin[0].second, + seqs_begin[1].first, + seqs_begin[1].second, + target, length, comp); + break; + case 3: + return_target = multiway_merge_3_variant_sentinel_switch< + sentinels + , RandomAccessIteratorIterator + , RandomAccessIterator3 + , _DifferenceTp + , Comparator>()(seqs_begin, seqs_end, target, length, comp); + break; + case 4: + return_target = multiway_merge_4_variant_sentinel_switch< + sentinels + , RandomAccessIteratorIterator + , RandomAccessIterator3 + , _DifferenceTp + , Comparator>()(seqs_begin, seqs_end, target, length, comp); + break; + default: + return_target = multiway_merge_k_variant_sentinel_switch< + sentinels + , stable + , RandomAccessIteratorIterator + , RandomAccessIterator3 + , _DifferenceTp + , Comparator>()(seqs_begin, seqs_end, target, sentinel, length, comp); + break; + } +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp)); +#endif + + return return_target; + } + +/** + * @brief Stable sorting functor. + * + * Used to reduce code instanciation in multiway_merge_sampling_splitting. + */ +template<bool stable, class RandomAccessIterator, class StrictWeakOrdering> +struct sampling_sorter +{ + void operator()(RandomAccessIterator first, RandomAccessIterator last, + StrictWeakOrdering comp) + { __gnu_sequential::stable_sort(first, last, comp); } +}; + +/** + * @brief Non-stable sorting functor. + * + * Used to reduce code instantiation in multiway_merge_sampling_splitting. + */ +template<class RandomAccessIterator, class StrictWeakOrdering> +struct sampling_sorter<false, RandomAccessIterator, StrictWeakOrdering> +{ + void operator()(RandomAccessIterator first, RandomAccessIterator last, + StrictWeakOrdering comp) + { __gnu_sequential::sort(first, last, comp); } +}; + +/** + * @brief Sampling based splitting for parallel multiway-merge routine. + */ +template< + bool stable + , typename RandomAccessIteratorIterator + , typename Comparator + , typename difference_type> +void multiway_merge_sampling_splitting( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + difference_type length, difference_type total_length, Comparator comp, + std::vector<std::pair<difference_type, difference_type> > *pieces) +{ + typedef typename std::iterator_traits<RandomAccessIteratorIterator> + ::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits<RandomAccessIterator1>::value_type + value_type; + + // k sequences. + int k = static_cast<int>(seqs_end - seqs_begin); + + int num_threads = omp_get_num_threads(); + + difference_type num_samples = + __gnu_parallel::_Settings::get().merge_oversampling * num_threads; + + value_type* samples = static_cast<value_type*>( + ::operator new(sizeof(value_type) * k * num_samples)); + // Sample. + for (int s = 0; s < k; ++s) + for (difference_type i = 0; i < num_samples; ++i) + { + difference_type sample_index = + static_cast<difference_type>( + _GLIBCXX_PARALLEL_LENGTH(seqs_begin[s]) + * (double(i + 1) / (num_samples + 1)) + * (double(length) / total_length)); + new(&(samples[s * num_samples + i])) + value_type(seqs_begin[s].first[sample_index]); + } + + // Sort stable or non-stable, depending on value of template parameter + // "stable". + sampling_sorter<stable, value_type*, Comparator>()( + samples, samples + (num_samples * k), comp); + + for (int slab = 0; slab < num_threads; ++slab) + // For each slab / processor. + for (int seq = 0; seq < k; ++seq) + { + // For each sequence. + if (slab > 0) + pieces[slab][seq].first = + std::upper_bound( + seqs_begin[seq].first, + seqs_begin[seq].second, + samples[num_samples * k * slab / num_threads], + comp) + - seqs_begin[seq].first; + else + // Absolute beginning. + pieces[slab][seq].first = 0; + if ((slab + 1) < num_threads) + pieces[slab][seq].second = + std::upper_bound( + seqs_begin[seq].first, + seqs_begin[seq].second, + samples[num_samples * k * (slab + 1) / + num_threads], comp) + - seqs_begin[seq].first; + else + // Absolute end. + pieces[slab][seq].second = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]); + } + ::operator delete(samples); +} + +/** + * @brief Exact splitting for parallel multiway-merge routine. + * + * None of the passed sequences may be empty. + */ +template< + bool stable + , typename RandomAccessIteratorIterator + , typename Comparator + , typename difference_type> +void multiway_merge_exact_splitting( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + difference_type length, difference_type total_length, Comparator comp, + std::vector<std::pair<difference_type, difference_type> > *pieces) +{ + typedef typename std::iterator_traits<RandomAccessIteratorIterator> + ::value_type::first_type + RandomAccessIterator1; + + const bool tight = (total_length == length); + + // k sequences. + const int k = static_cast<int>(seqs_end - seqs_begin); + + const int num_threads = omp_get_num_threads(); + + // (Settings::multiway_merge_splitting == __gnu_parallel::_Settings::EXACT). + std::vector<RandomAccessIterator1>* offsets = + new std::vector<RandomAccessIterator1>[num_threads]; + std::vector< + std::pair<RandomAccessIterator1, RandomAccessIterator1> + > se(k); + + copy(seqs_begin, seqs_end, se.begin()); + + difference_type* borders = + new difference_type[num_threads + 1]; + equally_split(length, num_threads, borders); + + for (int s = 0; s < (num_threads - 1); ++s) + { + offsets[s].resize(k); + multiseq_partition( + se.begin(), se.end(), borders[s + 1], + offsets[s].begin(), comp); + + // Last one also needed and available. + if (!tight) + { + offsets[num_threads - 1].resize(k); + multiseq_partition(se.begin(), se.end(), + difference_type(length), + offsets[num_threads - 1].begin(), comp); + } + } + + + for (int slab = 0; slab < num_threads; ++slab) + { + // For each slab / processor. + for (int seq = 0; seq < k; ++seq) + { + // For each sequence. + if (slab == 0) + { + // Absolute beginning. + pieces[slab][seq].first = 0; + } + else + pieces[slab][seq].first = + pieces[slab - 1][seq].second; + if (!tight || slab < (num_threads - 1)) + pieces[slab][seq].second = + offsets[slab][seq] - seqs_begin[seq].first; + else + { + // slab == num_threads - 1 + pieces[slab][seq].second = + _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]); + } + } + } + delete[] offsets; +} + +/** @brief Parallel multi-way merge routine. + * + * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor + * and runtime settings. + * + * Must not be called if the number of sequences is 1. + * + * @param Splitter functor to split input (either exact or sampling based) + * + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge, possibly larger than the + * number of elements available. + * @param stable Stable merging incurs a performance penalty. + * @param sentinel Ignored. + * @return End iterator of output sequence. + */ +template< + bool stable, + bool sentinels, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Splitter, + typename Comparator + > + RandomAccessIterator3 + parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + Splitter splitter, + _DifferenceTp length, + Comparator comp, + thread_index_t num_threads) + { +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(seqs_end - seqs_begin > 1); +#endif + + _GLIBCXX_CALL(length) + + typedef _DifferenceTp difference_type; + typedef typename std::iterator_traits<RandomAccessIteratorIterator> + ::value_type::first_type + RandomAccessIterator1; + typedef typename + std::iterator_traits<RandomAccessIterator1>::value_type value_type; + + // Leave only non-empty sequences. + std::pair<RandomAccessIterator1, RandomAccessIterator1>* ne_seqs = + static_cast<std::pair<RandomAccessIterator1, RandomAccessIterator1>*>( + ::operator new( + sizeof(std::pair<RandomAccessIterator1, RandomAccessIterator1>) + * (seqs_end - seqs_begin))); + int k = 0; + difference_type total_length = 0; + for (RandomAccessIteratorIterator raii = seqs_begin; + raii != seqs_end; ++raii) + { + _DifferenceTp seq_length = _GLIBCXX_PARALLEL_LENGTH(*raii); + if(seq_length > 0) + { + total_length += seq_length; + //ne_seqs[k] = *raii; + new(&(ne_seqs[k++])) + std::pair<RandomAccessIterator1, RandomAccessIterator1>(*raii); + } + } + + _GLIBCXX_CALL(total_length) + + length = std::min<_DifferenceTp>(length, total_length); + + if (total_length == 0 || k == 0) + { + ::operator delete(ne_seqs); + return target; + } + + std::vector<std::pair<difference_type, difference_type> >* pieces; + + num_threads = static_cast<thread_index_t> + (std::min<difference_type>(num_threads, total_length)); + +# pragma omp parallel num_threads (num_threads) + { +# pragma omp single + { + num_threads = omp_get_num_threads(); + // Thread t will have to merge pieces[iam][0..k - 1] + pieces = new std::vector< + std::pair<difference_type, difference_type> >[num_threads]; + for (int s = 0; s < num_threads; ++s) + pieces[s].resize(k); + + difference_type num_samples = + __gnu_parallel::_Settings::get().merge_oversampling * + num_threads; + + splitter(ne_seqs, ne_seqs + k, length, total_length, + comp, pieces); + } //single + + thread_index_t iam = omp_get_thread_num(); + + difference_type target_position = 0; + + for (int c = 0; c < k; ++c) + target_position += pieces[iam][c].first; + + std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks + = new std::pair<RandomAccessIterator1, RandomAccessIterator1>[k]; + + for (int s = 0; s < k; ++s) + { + chunks[s] = std::make_pair( + ne_seqs[s].first + pieces[iam][s].first, + ne_seqs[s].first + pieces[iam][s].second); + } + + if(length > target_position) + sequential_multiway_merge<stable, sentinels>( + chunks, chunks + k, target + target_position, + *(seqs_begin->second), length - target_position, comp); + + delete[] chunks; + } // parallel + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp)); +#endif + + k = 0; + // Update ends of sequences. + for (RandomAccessIteratorIterator raii = seqs_begin; + raii != seqs_end; ++raii) + { + _DifferenceTp length = _GLIBCXX_PARALLEL_LENGTH(*raii); + if(length > 0) + (*raii).first += pieces[num_threads - 1][k++].second; + } + + delete[] pieces; + + return target + length; + } + +/** + * @brief Multiway Merge Frontend. + * + * Merge the sequences specified by seqs_begin and seqs_end into + * target. seqs_begin and seqs_end must point to a sequence of + * pairs. These pairs must contain an iterator to the beginning + * of a sequence in their first entry and an iterator the end of + * the same sequence in their second entry. + * + * Ties are broken arbitrarily. See stable_multiway_merge for a variant + * that breaks ties by sequence number but is slower. + * + * The first entries of the pairs (i.e. the begin iterators) will be moved + * forward. + * + * The output sequence has to provide enough space for all elements + * that are written to it. + * + * This function will merge the input sequences: + * + * - not stable + * - parallel, depending on the input size and Settings + * - using sampling for splitting + * - not using sentinels + * + * Example: + * + * <pre> + * int sequences[10][10]; + * for (int i = 0; i < 10; ++i) + * for (int j = 0; i < 10; ++j) + * sequences[i][j] = j; + * + * int out[33]; + * std::vector<std::pair<int*> > seqs; + * for (int i = 0; i < 10; ++i) + * { seqs.push(std::make_pair<int*>(sequences[i], sequences[i] + 10)) } + * + * multiway_merge(seqs.begin(), seqs.end(), target, std::less<int>(), 33); + * </pre> + * + * @see stable_multiway_merge + * + * @pre All input sequences must be sorted. + * @pre Target must provide enough space to merge out length elements or + * the number of elements in all sequences, whichever is smaller. + * + * @post [target, return value) contains merged elements from the + * input sequences. + * @post return value - target = min(length, number of elements in all + * sequences). + * + * @param RandomAccessIteratorPairIterator iterator over sequence + * of pairs of iterators + * @param RandomAccessIteratorOut iterator over target sequence + * @param _DifferenceTp difference type for the sequence + * @param Comparator strict weak ordering type to compare elements + * in sequences + * + * @param seqs_begin begin of sequence sequence + * @param seqs_end end of sequence sequence + * @param target target sequence to merge to. + * @param comp strict weak ordering to use for element comparison. + * @param length Maximum length to merge, possibly larger than the + * number of elements available. + * + * @return end iterator of output sequence + */ +// multiway_merge +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , __gnu_parallel::sequential_tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute multiway merge *sequentially*. + return sequential_multiway_merge + </* stable = */ false, /* sentinels = */ false> + (seqs_begin, seqs_end, target, *(seqs_begin->second), length, comp); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , __gnu_parallel::exact_tag tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* stable = */ false, /* sentinels = */ false>( + seqs_begin, seqs_end, target, + multiway_merge_exact_splitting</* stable = */ false, + typename std::iterator_traits<RandomAccessIteratorPairIterator> + ::value_type*, Comparator, _DifferenceTp>, + static_cast<difference_type>(length), comp, tag.get_num_threads()); + else + return sequential_multiway_merge + </* stable = */ false, /* sentinels = */ false>( + seqs_begin, seqs_end, target, *(seqs_begin->second), length, comp); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , __gnu_parallel::sampling_tag tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* stable = */ false, /* sentinels = */ false>( + seqs_begin, seqs_end, + target, + multiway_merge_exact_splitting</* stable = */ false, + typename std::iterator_traits<RandomAccessIteratorPairIterator> + ::value_type*, Comparator, _DifferenceTp>, + static_cast<difference_type>(length), comp, tag.get_num_threads()); + else + return sequential_multiway_merge + </* stable = */ false, /* sentinels = */ false>( + seqs_begin, seqs_end, + target, *(seqs_begin->second), length, comp); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , parallel_tag tag = parallel_tag(0)) +{ + return multiway_merge(seqs_begin, seqs_end, target, length, comp, + exact_tag(tag.get_num_threads())); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , default_parallel_tag tag) +{ + return multiway_merge(seqs_begin, seqs_end, target, length, comp, + exact_tag(tag.get_num_threads())); +} + +// stable_multiway_merge +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , __gnu_parallel::sequential_tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute multiway merge *sequentially*. + return sequential_multiway_merge + </* stable = */ true, /* sentinels = */ false> + (seqs_begin, seqs_end, target, *(seqs_begin->second), length, comp); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , __gnu_parallel::exact_tag tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* stable = */ true, /* sentinels = */ false>( + seqs_begin, seqs_end, + target, + multiway_merge_exact_splitting</* stable = */ true, + typename std::iterator_traits<RandomAccessIteratorPairIterator> + ::value_type*, Comparator, _DifferenceTp>, + static_cast<difference_type>(length), comp, tag.get_num_threads()); + else + return sequential_multiway_merge</* stable = */ true, + /* sentinels = */ false>( + seqs_begin, seqs_end, + target, *(seqs_begin->second), length, comp); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , sampling_tag tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* stable = */ true, /* sentinels = */ false>( + seqs_begin, seqs_end, + target, + multiway_merge_sampling_splitting</* stable = */ true, + typename std::iterator_traits<RandomAccessIteratorPairIterator> + ::value_type*, Comparator, _DifferenceTp>, + static_cast<difference_type>(length), comp, tag.get_num_threads()); + else + return sequential_multiway_merge + </* stable = */ true, /* sentinels = */ false>( + seqs_begin, seqs_end, + target, *(seqs_begin->second), length, comp); +} + + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , parallel_tag tag = parallel_tag(0)) +{ + return stable_multiway_merge(seqs_begin, seqs_end, target, length, comp, + exact_tag(tag.get_num_threads())); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , default_parallel_tag tag) +{ + return stable_multiway_merge(seqs_begin, seqs_end, target, length, comp, + exact_tag(tag.get_num_threads())); +} + +/** + * @brief Multiway Merge Frontend. + * + * Merge the sequences specified by seqs_begin and seqs_end into + * target. seqs_begin and seqs_end must point to a sequence of + * pairs. These pairs must contain an iterator to the beginning + * of a sequence in their first entry and an iterator the end of + * the same sequence in their second entry. + * + * Ties are broken arbitrarily. See stable_multiway_merge for a variant + * that breaks ties by sequence number but is slower. + * + * The first entries of the pairs (i.e. the begin iterators) will be moved + * forward accordingly. + * + * The output sequence has to provide enough space for all elements + * that are written to it. + * + * This function will merge the input sequences: + * + * - not stable + * - parallel, depending on the input size and Settings + * - using sampling for splitting + * - using sentinels + * + * You have to take care that the element the end iterator points to is + * readable and contains a value that is greater than any other non-sentinel + * value in all sequences. + * + * Example: + * + * <pre> + * int sequences[10][11]; + * for (int i = 0; i < 10; ++i) + * for (int j = 0; i < 11; ++j) + * sequences[i][j] = j; // last one is sentinel! + * + * int out[33]; + * std::vector<std::pair<int*> > seqs; + * for (int i = 0; i < 10; ++i) + * { seqs.push(std::make_pair<int*>(sequences[i], sequences[i] + 10)) } + * + * multiway_merge(seqs.begin(), seqs.end(), target, std::less<int>(), 33); + * </pre> + * + * @pre All input sequences must be sorted. + * @pre Target must provide enough space to merge out length elements or + * the number of elements in all sequences, whichever is smaller. + * @pre For each @c i, @c seqs_begin[i].second must be the end + * marker of the sequence, but also reference the one more sentinel + * element. + * + * @post [target, return value) contains merged elements from the + * input sequences. + * @post return value - target = min(length, number of elements in all + * sequences). + * + * @see stable_multiway_merge_sentinels + * + * @param RandomAccessIteratorPairIterator iterator over sequence + * of pairs of iterators + * @param RandomAccessIteratorOut iterator over target sequence + * @param _DifferenceTp difference type for the sequence + * @param Comparator strict weak ordering type to compare elements + * in sequences + * + * @param seqs_begin begin of sequence sequence + * @param seqs_end end of sequence sequence + * @param target target sequence to merge to. + * @param comp strict weak ordering to use for element comparison. + * @param length Maximum length to merge, possibly larger than the + * number of elements available. + * + * @return end iterator of output sequence + */ +// multiway_merge_sentinels +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , __gnu_parallel::sequential_tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute multiway merge *sequentially*. + return sequential_multiway_merge + </* stable = */ false, /* sentinels = */ true> + (seqs_begin, seqs_end, + target, *(seqs_begin->second), length, comp); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , __gnu_parallel::exact_tag tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* stable = */ false, /* sentinels = */ true>( + seqs_begin, seqs_end, + target, + multiway_merge_exact_splitting</* stable = */ false, + typename std::iterator_traits<RandomAccessIteratorPairIterator> + ::value_type*, Comparator, _DifferenceTp>, + static_cast<difference_type>(length), comp, tag.get_num_threads()); + else + return sequential_multiway_merge + </* stable = */ false, /* sentinels = */ true>( + seqs_begin, seqs_end, + target, *(seqs_begin->second), length, comp); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , sampling_tag tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* stable = */ false, /* sentinels = */ true> + (seqs_begin, seqs_end, target, + multiway_merge_sampling_splitting</* stable = */ false, + typename std::iterator_traits<RandomAccessIteratorPairIterator> + ::value_type*, Comparator, _DifferenceTp>, + static_cast<difference_type>(length), comp, tag.get_num_threads()); + else + return sequential_multiway_merge + </* stable = */false, /* sentinels = */ true>( + seqs_begin, seqs_end, + target, *(seqs_begin->second), length, comp); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , parallel_tag tag = parallel_tag(0)) +{ + return multiway_merge_sentinels(seqs_begin, seqs_end, target, length, comp, + exact_tag(tag.get_num_threads())); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , default_parallel_tag tag) +{ + return multiway_merge_sentinels(seqs_begin, seqs_end, target, length, comp, + exact_tag(tag.get_num_threads())); +} + +// stable_multiway_merge_sentinels +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , __gnu_parallel::sequential_tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute multiway merge *sequentially*. + return sequential_multiway_merge + </* stable = */ true, /* sentinels = */ true> + (seqs_begin, seqs_end, target, *(seqs_begin->second), length, comp); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , __gnu_parallel::exact_tag tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* stable = */ true, /* sentinels = */ true>( + seqs_begin, seqs_end, + target, + multiway_merge_exact_splitting</* stable = */ true, + typename std::iterator_traits<RandomAccessIteratorPairIterator> + ::value_type*, Comparator, _DifferenceTp>, + static_cast<difference_type>(length), comp, tag.get_num_threads()); + else + return sequential_multiway_merge + </* stable = */ true, /* sentinels = */ true>( + seqs_begin, seqs_end, target, *(seqs_begin->second), length, comp); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , sampling_tag tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* stable = */ true, /* sentinels = */ true>( + seqs_begin, seqs_end, + target, + multiway_merge_sampling_splitting</* stable = */ true, + typename std::iterator_traits<RandomAccessIteratorPairIterator> + ::value_type*, Comparator, _DifferenceTp>, + static_cast<difference_type>(length), comp, tag.get_num_threads()); + else + return sequential_multiway_merge + </* stable = */ true, /* sentinels = */ true>( + seqs_begin, seqs_end, + target, *(seqs_begin->second), length, comp); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , parallel_tag tag = parallel_tag(0)) +{ + return stable_multiway_merge_sentinels(seqs_begin, seqs_end, target, length, comp, + exact_tag(tag.get_num_threads())); +} + +// public interface +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , _DifferenceTp length, Comparator comp + , default_parallel_tag tag) +{ + return stable_multiway_merge_sentinels(seqs_begin, seqs_end, target, length, comp, + exact_tag(tag.get_num_threads())); +} + +}; // namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/multiway_mergesort.h b/gcc-4.4.0/libstdc++-v3/include/parallel/multiway_mergesort.h new file mode 100644 index 000000000..11dd885c8 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/multiway_mergesort.h @@ -0,0 +1,475 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/multiway_mergesort.h + * @brief Parallel multiway merge sort. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H +#define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1 + +#include <vector> + +#include <parallel/basic_iterator.h> +#include <bits/stl_algo.h> +#include <parallel/parallel.h> +#include <parallel/multiway_merge.h> + +namespace __gnu_parallel +{ + +/** @brief Subsequence description. */ +template<typename _DifferenceTp> + struct Piece + { + typedef _DifferenceTp difference_type; + + /** @brief Begin of subsequence. */ + difference_type begin; + + /** @brief End of subsequence. */ + difference_type end; + }; + +/** @brief Data accessed by all threads. + * + * PMWMS = parallel multiway mergesort */ +template<typename RandomAccessIterator> + struct PMWMSSortingData + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + /** @brief Number of threads involved. */ + thread_index_t num_threads; + + /** @brief Input begin. */ + RandomAccessIterator source; + + /** @brief Start indices, per thread. */ + difference_type* starts; + + /** @brief Storage in which to sort. */ + value_type** temporary; + + /** @brief Samples. */ + value_type* samples; + + /** @brief Offsets to add to the found positions. */ + difference_type* offsets; + + /** @brief Pieces of data to merge @c [thread][sequence] */ + std::vector<Piece<difference_type> >* pieces; +}; + +/** + * @brief Select samples from a sequence. + * @param sd Pointer to algorithm data. Result will be placed in + * @c sd->samples. + * @param num_samples Number of samples to select. + */ +template<typename RandomAccessIterator, typename _DifferenceTp> + void + determine_samples(PMWMSSortingData<RandomAccessIterator>* sd, + _DifferenceTp num_samples) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef _DifferenceTp difference_type; + + thread_index_t iam = omp_get_thread_num(); + + difference_type* es = new difference_type[num_samples + 2]; + + equally_split(sd->starts[iam + 1] - sd->starts[iam], + num_samples + 1, es); + + for (difference_type i = 0; i < num_samples; ++i) + ::new(&(sd->samples[iam * num_samples + i])) + value_type(sd->source[sd->starts[iam] + es[i + 1]]); + + delete[] es; + } + +/** @brief Split consistently. */ +template<bool exact, typename RandomAccessIterator, + typename Comparator, typename SortingPlacesIterator> + struct split_consistently + { + }; + +/** @brief Split by exact splitting. */ +template<typename RandomAccessIterator, typename Comparator, + typename SortingPlacesIterator> + struct split_consistently + <true, RandomAccessIterator, Comparator, SortingPlacesIterator> + { + void operator()( + const thread_index_t iam, + PMWMSSortingData<RandomAccessIterator>* sd, + Comparator& comp, + const typename + std::iterator_traits<RandomAccessIterator>::difference_type + num_samples) + const + { +# pragma omp barrier + + std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > + seqs(sd->num_threads); + for (thread_index_t s = 0; s < sd->num_threads; s++) + seqs[s] = std::make_pair(sd->temporary[s], + sd->temporary[s] + + (sd->starts[s + 1] - sd->starts[s])); + + std::vector<SortingPlacesIterator> offsets(sd->num_threads); + + // if not last thread + if (iam < sd->num_threads - 1) + multiseq_partition(seqs.begin(), seqs.end(), + sd->starts[iam + 1], offsets.begin(), comp); + + for (int seq = 0; seq < sd->num_threads; seq++) + { + // for each sequence + if (iam < (sd->num_threads - 1)) + sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first; + else + // very end of this sequence + sd->pieces[iam][seq].end = + sd->starts[seq + 1] - sd->starts[seq]; + } + +# pragma omp barrier + + for (thread_index_t seq = 0; seq < sd->num_threads; seq++) + { + // For each sequence. + if (iam > 0) + sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end; + else + // Absolute beginning. + sd->pieces[iam][seq].begin = 0; + } + } + }; + +/** @brief Split by sampling. */ +template<typename RandomAccessIterator, typename Comparator, + typename SortingPlacesIterator> + struct split_consistently<false, RandomAccessIterator, Comparator, + SortingPlacesIterator> + { + void operator()( + const thread_index_t iam, + PMWMSSortingData<RandomAccessIterator>* sd, + Comparator& comp, + const typename + std::iterator_traits<RandomAccessIterator>::difference_type + num_samples) + const + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + determine_samples(sd, num_samples); + +# pragma omp barrier + +# pragma omp single + __gnu_sequential::sort(sd->samples, + sd->samples + (num_samples * sd->num_threads), + comp); + +# pragma omp barrier + + for (thread_index_t s = 0; s < sd->num_threads; ++s) + { + // For each sequence. + if (num_samples * iam > 0) + sd->pieces[iam][s].begin = + std::lower_bound(sd->temporary[s], + sd->temporary[s] + + (sd->starts[s + 1] - sd->starts[s]), + sd->samples[num_samples * iam], + comp) + - sd->temporary[s]; + else + // Absolute beginning. + sd->pieces[iam][s].begin = 0; + + if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads)) + sd->pieces[iam][s].end = + std::lower_bound(sd->temporary[s], + sd->temporary[s] + + (sd->starts[s + 1] - sd->starts[s]), + sd->samples[num_samples * (iam + 1)], + comp) + - sd->temporary[s]; + else + // Absolute end. + sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s]; + } + } + }; + +template<bool stable, typename RandomAccessIterator, typename Comparator> + struct possibly_stable_sort + { + }; + +template<typename RandomAccessIterator, typename Comparator> + struct possibly_stable_sort<true, RandomAccessIterator, Comparator> + { + void operator()(const RandomAccessIterator& begin, + const RandomAccessIterator& end, Comparator& comp) const + { + __gnu_sequential::stable_sort(begin, end, comp); + } + }; + +template<typename RandomAccessIterator, typename Comparator> + struct possibly_stable_sort<false, RandomAccessIterator, Comparator> + { + void operator()(const RandomAccessIterator begin, + const RandomAccessIterator end, Comparator& comp) const + { + __gnu_sequential::sort(begin, end, comp); + } + }; + +template<bool stable, typename SeqRandomAccessIterator, + typename RandomAccessIterator, typename Comparator, + typename DiffType> + struct possibly_stable_multiway_merge + { + }; + +template<typename SeqRandomAccessIterator, typename RandomAccessIterator, + typename Comparator, typename DiffType> + struct possibly_stable_multiway_merge + <true, SeqRandomAccessIterator, RandomAccessIterator, Comparator, + DiffType> + { + void operator()(const SeqRandomAccessIterator& seqs_begin, + const SeqRandomAccessIterator& seqs_end, + const RandomAccessIterator& target, + Comparator& comp, + DiffType length_am) const + { + stable_multiway_merge(seqs_begin, seqs_end, target, length_am, comp, + sequential_tag()); + } + }; + +template<typename SeqRandomAccessIterator, typename RandomAccessIterator, + typename Comparator, typename DiffType> + struct possibly_stable_multiway_merge + <false, SeqRandomAccessIterator, RandomAccessIterator, Comparator, + DiffType> + { + void operator()(const SeqRandomAccessIterator& seqs_begin, + const SeqRandomAccessIterator& seqs_end, + const RandomAccessIterator& target, + Comparator& comp, + DiffType length_am) const + { + multiway_merge(seqs_begin, seqs_end, target, length_am, comp, + sequential_tag()); + } + }; + +/** @brief PMWMS code executed by each thread. + * @param sd Pointer to algorithm data. + * @param comp Comparator. + */ +template<bool stable, bool exact, typename RandomAccessIterator, + typename Comparator> + void + parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd, + Comparator& comp) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + thread_index_t iam = omp_get_thread_num(); + + // Length of this thread's chunk, before merging. + difference_type length_local = sd->starts[iam + 1] - sd->starts[iam]; + + // Sort in temporary storage, leave space for sentinel. + + typedef value_type* SortingPlacesIterator; + + sd->temporary[iam] = + static_cast<value_type*>( + ::operator new(sizeof(value_type) * (length_local + 1))); + + // Copy there. + std::uninitialized_copy(sd->source + sd->starts[iam], + sd->source + sd->starts[iam] + length_local, + sd->temporary[iam]); + + possibly_stable_sort<stable, SortingPlacesIterator, Comparator>() + (sd->temporary[iam], sd->temporary[iam] + length_local, comp); + + // Invariant: locally sorted subsequence in sd->temporary[iam], + // sd->temporary[iam] + length_local. + + // No barrier here: Synchronization is done by the splitting routine. + + difference_type num_samples = + _Settings::get().sort_mwms_oversampling * sd->num_threads - 1; + split_consistently + <exact, RandomAccessIterator, Comparator, SortingPlacesIterator>() + (iam, sd, comp, num_samples); + + // Offset from target begin, length after merging. + difference_type offset = 0, length_am = 0; + for (thread_index_t s = 0; s < sd->num_threads; s++) + { + length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin; + offset += sd->pieces[iam][s].begin; + } + + typedef std::vector< + std::pair<SortingPlacesIterator, SortingPlacesIterator> > + seq_vector_type; + seq_vector_type seqs(sd->num_threads); + + for (int s = 0; s < sd->num_threads; ++s) + { + seqs[s] = + std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin, + sd->temporary[s] + sd->pieces[iam][s].end); + } + + possibly_stable_multiway_merge< + stable, + typename seq_vector_type::iterator, + RandomAccessIterator, + Comparator, difference_type>() + (seqs.begin(), seqs.end(), + sd->source + offset, comp, + length_am); + +# pragma omp barrier + + ::operator delete(sd->temporary[iam]); + } + +/** @brief PMWMS main call. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param comp Comparator. + * @param n Length of sequence. + * @param num_threads Number of threads to use. + */ +template<bool stable, bool exact, typename RandomAccessIterator, + typename Comparator> + void + parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, + thread_index_t num_threads) + { + _GLIBCXX_CALL(end - begin) + + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + + if (n <= 1) + return; + + // at least one element per thread + if (num_threads > n) + num_threads = static_cast<thread_index_t>(n); + + // shared variables + PMWMSSortingData<RandomAccessIterator> sd; + difference_type* starts; + +# pragma omp parallel num_threads(num_threads) + { + num_threads = omp_get_num_threads(); //no more threads than requested + +# pragma omp single + { + sd.num_threads = num_threads; + sd.source = begin; + + sd.temporary = new value_type*[num_threads]; + + if (!exact) + { + difference_type size = + (_Settings::get().sort_mwms_oversampling * num_threads - 1) + * num_threads; + sd.samples = static_cast<value_type*>( + ::operator new(size * sizeof(value_type))); + } + else + sd.samples = NULL; + + sd.offsets = new difference_type[num_threads - 1]; + sd.pieces = new std::vector<Piece<difference_type> >[num_threads]; + for (int s = 0; s < num_threads; ++s) + sd.pieces[s].resize(num_threads); + starts = sd.starts = new difference_type[num_threads + 1]; + + difference_type chunk_length = n / num_threads; + difference_type split = n % num_threads; + difference_type pos = 0; + for (int i = 0; i < num_threads; ++i) + { + starts[i] = pos; + pos += (i < split) ? (chunk_length + 1) : chunk_length; + } + starts[num_threads] = pos; + } //single + + // Now sort in parallel. + parallel_sort_mwms_pu<stable, exact>(&sd, comp); + } //parallel + + delete[] starts; + delete[] sd.temporary; + + if (!exact) + ::operator delete(sd.samples); + + delete[] sd.offsets; + delete[] sd.pieces; + } +} //namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/numeric b/gcc-4.4.0/libstdc++-v3/include/parallel/numeric new file mode 100644 index 000000000..33b1411f1 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/numeric @@ -0,0 +1,500 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** + * @file parallel/numeric +* + * @brief Parallel STL function calls corresponding to stl_numeric.h. + * The functions defined here mainly do case switches and + * call the actual parallelized versions in other files. + * Inlining policy: Functions that basically only contain one function call, + * are declared inline. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_NUMERIC_H +#define _GLIBCXX_PARALLEL_NUMERIC_H 1 + +#include <numeric> +#include <functional> +#include <parallel/numericfwd.h> +#include <parallel/iterator.h> +#include <parallel/for_each.h> +#include <parallel/for_each_selectors.h> +#include <parallel/partial_sum.h> + +namespace std +{ +namespace __parallel +{ + // Sequential fallback. + template<typename InputIterator, typename T> + inline T + accumulate(InputIterator begin, InputIterator end, T init, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::accumulate(begin, end, init); } + + template<typename InputIterator, typename T, typename BinaryOperation> + inline T + accumulate(InputIterator begin, InputIterator end, T init, + BinaryOperation binary_op, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::accumulate(begin, end, init, binary_op); } + + // Sequential fallback for input iterator case. + template<typename InputIterator, typename T, typename IteratorTag> + inline T + accumulate_switch(InputIterator begin, InputIterator end, + T init, IteratorTag) + { return accumulate(begin, end, init, __gnu_parallel::sequential_tag()); } + + template<typename InputIterator, typename T, typename BinaryOperation, + typename IteratorTag> + inline T + accumulate_switch(InputIterator begin, InputIterator end, T init, + BinaryOperation binary_op, IteratorTag) + { return accumulate(begin, end, init, binary_op, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template<typename _RandomAccessIterator, typename T, + typename BinaryOperation> + T + accumulate_switch(_RandomAccessIterator begin, _RandomAccessIterator end, + T init, BinaryOperation binary_op, + random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_unbalanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().accumulate_minimal_n + && __gnu_parallel::is_parallel(parallelism_tag))) + { + T res = init; + __gnu_parallel::accumulate_selector<_RandomAccessIterator> + my_selector; + __gnu_parallel:: + for_each_template_random_access_ed(begin, end, + __gnu_parallel::nothing(), + my_selector, + __gnu_parallel:: + accumulate_binop_reduct + <BinaryOperation>(binary_op), + res, res, -1); + return res; + } + else + return accumulate(begin, end, init, binary_op, + __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename InputIterator, typename T> + inline T + accumulate(InputIterator begin, InputIterator end, T init, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef std::iterator_traits<InputIterator> iterator_traits; + typedef typename iterator_traits::value_type value_type; + typedef typename iterator_traits::iterator_category iterator_category; + + return accumulate_switch(begin, end, init, + __gnu_parallel::plus<T, value_type>(), + iterator_category(), parallelism_tag); + } + + template<typename InputIterator, typename T> + inline T + accumulate(InputIterator begin, InputIterator end, T init) + { + typedef std::iterator_traits<InputIterator> iterator_traits; + typedef typename iterator_traits::value_type value_type; + typedef typename iterator_traits::iterator_category iterator_category; + + return accumulate_switch(begin, end, init, + __gnu_parallel::plus<T, value_type>(), + iterator_category()); + } + + template<typename InputIterator, typename T, typename BinaryOperation> + inline T + accumulate(InputIterator begin, InputIterator end, T init, + BinaryOperation binary_op, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef iterator_traits<InputIterator> iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + return accumulate_switch(begin, end, init, binary_op, + iterator_category(), parallelism_tag); + } + + template<typename InputIterator, typename T, typename BinaryOperation> + inline T + accumulate(InputIterator begin, InputIterator end, T init, + BinaryOperation binary_op) + { + typedef iterator_traits<InputIterator> iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + return accumulate_switch(begin, end, init, binary_op, + iterator_category()); + } + + + // Sequential fallback. + template<typename InputIterator1, typename InputIterator2, typename T> + inline T + inner_product(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, T init, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::inner_product(first1, last1, first2, init); } + + template<typename InputIterator1, typename InputIterator2, typename T, + typename BinaryFunction1, typename BinaryFunction2> + inline T + inner_product(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, T init, BinaryFunction1 binary_op1, + BinaryFunction2 binary_op2, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::inner_product(first1, last1, first2, init, + binary_op1, binary_op2); } + + // Parallel algorithm for random access iterators. + template<typename RandomAccessIterator1, typename RandomAccessIterator2, + typename T, typename BinaryFunction1, typename BinaryFunction2> + T + inner_product_switch(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, T init, + BinaryFunction1 binary_op1, + BinaryFunction2 binary_op2, + random_access_iterator_tag, + random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_unbalanced) + { + if (_GLIBCXX_PARALLEL_CONDITION((last1 - first1) + >= __gnu_parallel::_Settings::get(). + accumulate_minimal_n + && __gnu_parallel:: + is_parallel(parallelism_tag))) + { + T res = init; + __gnu_parallel:: + inner_product_selector<RandomAccessIterator1, + RandomAccessIterator2, T> my_selector(first1, first2); + __gnu_parallel:: + for_each_template_random_access_ed(first1, last1, binary_op2, + my_selector, binary_op1, + res, res, -1); + return res; + } + else + return inner_product(first1, last1, first2, init, + __gnu_parallel::sequential_tag()); + } + + // No parallelism for input iterators. + template<typename InputIterator1, typename InputIterator2, typename T, + typename BinaryFunction1, typename BinaryFunction2, + typename IteratorTag1, typename IteratorTag2> + inline T + inner_product_switch(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, T init, + BinaryFunction1 binary_op1, + BinaryFunction2 binary_op2, + IteratorTag1, IteratorTag2) + { return inner_product(first1, last1, first2, init, + binary_op1, binary_op2, + __gnu_parallel::sequential_tag()); } + + template<typename InputIterator1, typename InputIterator2, typename T, + typename BinaryFunction1, typename BinaryFunction2> + inline T + inner_product(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, T init, BinaryFunction1 binary_op1, + BinaryFunction2 binary_op2, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef iterator_traits<InputIterator1> traits1_type; + typedef typename traits1_type::iterator_category iterator1_category; + + typedef iterator_traits<InputIterator2> traits2_type; + typedef typename traits2_type::iterator_category iterator2_category; + + return inner_product_switch(first1, last1, first2, init, binary_op1, + binary_op2, iterator1_category(), + iterator2_category(), parallelism_tag); + } + + template<typename InputIterator1, typename InputIterator2, typename T, + typename BinaryFunction1, typename BinaryFunction2> + inline T + inner_product(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, T init, BinaryFunction1 binary_op1, + BinaryFunction2 binary_op2) + { + typedef iterator_traits<InputIterator1> traits1_type; + typedef typename traits1_type::iterator_category iterator1_category; + + typedef iterator_traits<InputIterator2> traits2_type; + typedef typename traits2_type::iterator_category iterator2_category; + + return inner_product_switch(first1, last1, first2, init, binary_op1, + binary_op2, iterator1_category(), + iterator2_category()); + } + + template<typename InputIterator1, typename InputIterator2, typename T> + inline T + inner_product(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, T init, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef iterator_traits<InputIterator1> traits_type1; + typedef typename traits_type1::value_type value_type1; + typedef iterator_traits<InputIterator2> traits_type2; + typedef typename traits_type2::value_type value_type2; + + typedef typename + __gnu_parallel::multiplies<value_type1, value_type2>::result + multiplies_result_type; + return inner_product(first1, last1, first2, init, + __gnu_parallel::plus<T, multiplies_result_type>(), + __gnu_parallel:: + multiplies<value_type1, value_type2>(), + parallelism_tag); + } + + template<typename InputIterator1, typename InputIterator2, typename T> + inline T + inner_product(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, T init) + { + typedef iterator_traits<InputIterator1> traits_type1; + typedef typename traits_type1::value_type value_type1; + typedef iterator_traits<InputIterator2> traits_type2; + typedef typename traits_type2::value_type value_type2; + + typedef typename + __gnu_parallel::multiplies<value_type1, value_type2>::result + multiplies_result_type; + return inner_product(first1, last1, first2, init, + __gnu_parallel::plus<T, multiplies_result_type>(), + __gnu_parallel:: + multiplies<value_type1, value_type2>()); + } + + // Sequential fallback. + template<typename InputIterator, typename OutputIterator> + inline OutputIterator + partial_sum(InputIterator begin, InputIterator end, OutputIterator result, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::partial_sum(begin, end, result); } + + // Sequential fallback. + template<typename InputIterator, typename OutputIterator, + typename BinaryOperation> + inline OutputIterator + partial_sum(InputIterator begin, InputIterator end, OutputIterator result, + BinaryOperation bin_op, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::partial_sum(begin, end, result, bin_op); } + + // Sequential fallback for input iterator case. + template<typename InputIterator, typename OutputIterator, + typename BinaryOperation, typename IteratorTag1, + typename IteratorTag2> + inline OutputIterator + partial_sum_switch(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + IteratorTag1, IteratorTag2) + { return _GLIBCXX_STD_P::partial_sum(begin, end, result, bin_op); } + + // Parallel algorithm for random access iterators. + template<typename InputIterator, typename OutputIterator, + typename BinaryOperation> + OutputIterator + partial_sum_switch(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().partial_sum_minimal_n)) + return __gnu_parallel::parallel_partial_sum(begin, end, + result, bin_op); + else + return partial_sum(begin, end, result, bin_op, + __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename InputIterator, typename OutputIterator> + inline OutputIterator + partial_sum(InputIterator begin, InputIterator end, OutputIterator result) + { + typedef typename iterator_traits<InputIterator>::value_type value_type; + return partial_sum(begin, end, result, std::plus<value_type>()); + } + + // Public interface + template<typename InputIterator, typename OutputIterator, + typename BinaryOperation> + inline OutputIterator + partial_sum(InputIterator begin, InputIterator end, OutputIterator result, + BinaryOperation binary_op) + { + typedef iterator_traits<InputIterator> traitsi_type; + typedef typename traitsi_type::iterator_category iteratori_category; + + typedef iterator_traits<OutputIterator> traitso_type; + typedef typename traitso_type::iterator_category iteratoro_category; + + return partial_sum_switch(begin, end, result, binary_op, + iteratori_category(), iteratoro_category()); + } + + // Sequential fallback. + template<typename InputIterator, typename OutputIterator> + inline OutputIterator + adjacent_difference(InputIterator begin, InputIterator end, + OutputIterator result, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::adjacent_difference(begin, end, result); } + + // Sequential fallback. + template<typename InputIterator, typename OutputIterator, + typename BinaryOperation> + inline OutputIterator + adjacent_difference(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::adjacent_difference(begin, end, result, bin_op); } + + // Sequential fallback for input iterator case. + template<typename InputIterator, typename OutputIterator, + typename BinaryOperation, typename IteratorTag1, + typename IteratorTag2> + inline OutputIterator + adjacent_difference_switch(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + IteratorTag1, IteratorTag2) + { return adjacent_difference(begin, end, result, bin_op, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template<typename InputIterator, typename OutputIterator, + typename BinaryOperation> + OutputIterator + adjacent_difference_switch(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + random_access_iterator_tag, + random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::sequence_index_t>(end - begin) + >= __gnu_parallel::_Settings::get().adjacent_difference_minimal_n + && __gnu_parallel::is_parallel(parallelism_tag))) + { + bool dummy = true; + typedef __gnu_parallel::iterator_pair<InputIterator, OutputIterator, + random_access_iterator_tag> ip; + *result = *begin; + ip begin_pair(begin + 1, result + 1), + end_pair(end, result + (end - begin)); + __gnu_parallel::adjacent_difference_selector<ip> functionality; + __gnu_parallel:: + for_each_template_random_access_ed(begin_pair, end_pair, bin_op, + functionality, + __gnu_parallel::dummy_reduct(), + dummy, dummy, -1); + return functionality.finish_iterator; + } + else + return adjacent_difference(begin, end, result, bin_op, + __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename InputIterator, typename OutputIterator> + inline OutputIterator + adjacent_difference(InputIterator begin, InputIterator end, + OutputIterator result, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef iterator_traits<InputIterator> traits_type; + typedef typename traits_type::value_type value_type; + return adjacent_difference(begin, end, result, std::minus<value_type>(), + parallelism_tag); + } + + template<typename InputIterator, typename OutputIterator> + inline OutputIterator + adjacent_difference(InputIterator begin, InputIterator end, + OutputIterator result) + { + typedef iterator_traits<InputIterator> traits_type; + typedef typename traits_type::value_type value_type; + return adjacent_difference(begin, end, result, std::minus<value_type>()); + } + + template<typename InputIterator, typename OutputIterator, + typename BinaryOperation> + inline OutputIterator + adjacent_difference(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation binary_op, + __gnu_parallel::_Parallelism parallelism_tag) + { + typedef iterator_traits<InputIterator> traitsi_type; + typedef typename traitsi_type::iterator_category iteratori_category; + + typedef iterator_traits<OutputIterator> traitso_type; + typedef typename traitso_type::iterator_category iteratoro_category; + + return adjacent_difference_switch(begin, end, result, binary_op, + iteratori_category(), + iteratoro_category(), parallelism_tag); + } + + template<typename InputIterator, typename OutputIterator, + typename BinaryOperation> + inline OutputIterator + adjacent_difference(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation binary_op) + { + typedef iterator_traits<InputIterator> traitsi_type; + typedef typename traitsi_type::iterator_category iteratori_category; + + typedef iterator_traits<OutputIterator> traitso_type; + typedef typename traitso_type::iterator_category iteratoro_category; + + return adjacent_difference_switch(begin, end, result, binary_op, + iteratori_category(), + iteratoro_category()); + } +} // end namespace +} // end namespace + +#endif /* _GLIBCXX_NUMERIC_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/numericfwd.h b/gcc-4.4.0/libstdc++-v3/include/parallel/numericfwd.h new file mode 100644 index 000000000..af2d0416f --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/numericfwd.h @@ -0,0 +1,203 @@ +// <numeric> parallel extensions -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/numericfwd.h + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +#ifndef _GLIBCXX_PARALLEL_NUMERICFWD_H +#define _GLIBCXX_PARALLEL_NUMERICFWD_H 1 + +#pragma GCC system_header + +#include <parallel/tags.h> +#include <parallel/settings.h> + +namespace std +{ +namespace __parallel +{ + template<typename _IIter, typename _Tp> + _Tp + accumulate(_IIter, _IIter, _Tp); + + template<typename _IIter, typename _Tp> + _Tp + accumulate(_IIter, _IIter, _Tp, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _Tp> + _Tp + accumulate(_IIter, _IIter, _Tp, __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _Tp, typename _Tag> + _Tp + accumulate_switch(_IIter, _IIter, _Tp, _Tag); + + template<typename _IIter, typename _Tp, typename _BinaryOper> + _Tp + accumulate(_IIter, _IIter, _Tp, _BinaryOper); + + template<typename _IIter, typename _Tp, typename _BinaryOper> + _Tp + accumulate(_IIter, _IIter, _Tp, _BinaryOper, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _Tp, typename _BinaryOper> + _Tp + accumulate(_IIter, _IIter, _Tp, _BinaryOper, + __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _Tp, typename _BinaryOper, + typename _Tag> + _Tp + accumulate_switch(_IIter, _IIter, _Tp, _BinaryOper, _Tag); + + template<typename _RAIter, typename _Tp, typename _BinaryOper> + _Tp + accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper, + random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism + = __gnu_parallel::parallel_unbalanced); + + template<typename _IIter, typename _OIter> + _OIter + adjacent_difference(_IIter, _IIter, _OIter); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper); + + template<typename _IIter, typename _OIter> + _OIter + adjacent_difference(_IIter, _IIter, _OIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter> + _OIter + adjacent_difference(_IIter, _IIter, _OIter, + __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper, + __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _OIter, typename _BinaryOper, + typename _Tag1, typename _Tag2> + _OIter + adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, + _Tag1, _Tag2); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, + random_access_iterator_tag, + random_access_iterator_tag, + __gnu_parallel::_Parallelism parallelism + = __gnu_parallel::parallel_unbalanced); + + template<typename _IIter1, typename _IIter2, typename _Tp> + _Tp + inner_product(_IIter1, _IIter1, _IIter2, _Tp); + + template<typename _IIter1, typename _IIter2, typename _Tp> + _Tp + inner_product(_IIter1, _IIter1, _IIter2, _Tp, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _Tp> + _Tp + inner_product(_IIter1, _IIter1, _IIter2, _Tp, + __gnu_parallel::_Parallelism); + + template<typename _IIter1, typename _IIter2, typename _Tp, + typename _BinaryFunction1, typename _BinaryFunction2> + _Tp + inner_product(_IIter1, _IIter1, _IIter2, _Tp, + _BinaryFunction1, _BinaryFunction2); + + template<typename _IIter1, typename _IIter2, typename _Tp, + typename _BinaryFunction1, typename _BinaryFunction2> + _Tp + inner_product(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1, + _BinaryFunction2, __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _Tp, + typename BinaryFunction1, typename BinaryFunction2> + _Tp + inner_product(_IIter1, _IIter1, _IIter2, _Tp, BinaryFunction1, + BinaryFunction2, __gnu_parallel::_Parallelism); + + template<typename _RAIter1, typename _RAIter2, typename _Tp, + typename BinaryFunction1, typename BinaryFunction2> + _Tp + inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1, + BinaryFunction2, random_access_iterator_tag, + random_access_iterator_tag, + __gnu_parallel::_Parallelism + = __gnu_parallel::parallel_unbalanced); + + template<typename _IIter1, typename _IIter2, typename _Tp, + typename _BinaryFunction1, typename _BinaryFunction2, + typename _Tag1, typename _Tag2> + _Tp + inner_product_switch(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1, + _BinaryFunction2, _Tag1, _Tag2); + + + template<typename _IIter, typename _OIter> + _OIter + partial_sum(_IIter, _IIter, _OIter, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + partial_sum(_IIter, _IIter, _OIter, _BinaryOper, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter> + _OIter + partial_sum(_IIter, _IIter, _OIter result); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + partial_sum(_IIter, _IIter, _OIter, _BinaryOper); + + template<typename _IIter, typename _OIter, typename _BinaryOper, + typename _Tag1, typename _Tag2> + _OIter + partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, + random_access_iterator_tag, random_access_iterator_tag); +} // end namespace +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_NUMERICFWD_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/omp_loop.h b/gcc-4.4.0/libstdc++-v3/include/parallel/omp_loop.h new file mode 100644 index 000000000..66f6d44bc --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/omp_loop.h @@ -0,0 +1,118 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/omp_loop.h + * @brief Parallelization of embarrassingly parallel execution by + * means of an OpenMP for loop. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_OMP_LOOP_H +#define _GLIBCXX_PARALLEL_OMP_LOOP_H 1 + +#include <omp.h> + +#include <parallel/settings.h> +#include <parallel/basic_iterator.h> +#include <parallel/base.h> + +namespace __gnu_parallel +{ +/** @brief Embarrassingly parallel algorithm for random access + * iterators, using an OpenMP for loop. + * + * @param begin Begin iterator of element sequence. + * @param end End iterator of element sequence. + * @param o User-supplied functor (comparator, predicate, adding + * functor, etc.). + * @param f Functor to "process" an element with op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param r Functor to "add" a single result to the already + * processed elements (depends on functionality). + * @param base Base value for reduction. + * @param output Pointer to position where final result is written to + * @param bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ +template<typename RandomAccessIterator, + typename Op, + typename Fu, + typename Red, + typename Result> + Op + for_each_template_random_access_omp_loop(RandomAccessIterator begin, + RandomAccessIterator end, + Op o, Fu& f, Red r, Result base, + Result& output, + typename std::iterator_traits + <RandomAccessIterator>:: + difference_type bound) + { + typedef typename + std::iterator_traits<RandomAccessIterator>::difference_type + difference_type; + + difference_type length = end - begin; + thread_index_t num_threads = + __gnu_parallel::min<difference_type>(get_max_threads(), length); + + Result *thread_results; + +# pragma omp parallel num_threads(num_threads) + { +# pragma omp single + { + num_threads = omp_get_num_threads(); + thread_results = new Result[num_threads]; + + for (thread_index_t i = 0; i < num_threads; ++i) + thread_results[i] = Result(); + } + + thread_index_t iam = omp_get_thread_num(); + +# pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size) + for (difference_type pos = 0; pos < length; ++pos) + thread_results[iam] = + r(thread_results[iam], f(o, begin+pos)); + } //parallel + + for (thread_index_t i = 0; i < num_threads; ++i) + output = r(output, thread_results[i]); + + delete [] thread_results; + + // Points to last element processed (needed as return value for + // some algorithms like transform). + f.finish_iterator = begin + length; + + return o; + } + +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_OMP_LOOP_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/omp_loop_static.h b/gcc-4.4.0/libstdc++-v3/include/parallel/omp_loop_static.h new file mode 100644 index 000000000..523618307 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/omp_loop_static.h @@ -0,0 +1,117 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/omp_loop_static.h + * @brief Parallelization of embarrassingly parallel execution by + * means of an OpenMP for loop with static scheduling. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H +#define _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H 1 + +#include <omp.h> + +#include <parallel/settings.h> +#include <parallel/basic_iterator.h> + +namespace __gnu_parallel +{ + + /** @brief Embarrassingly parallel algorithm for random access + * iterators, using an OpenMP for loop with static scheduling. + * + * @param begin Begin iterator of element sequence. + * @param end End iterator of element sequence. + * @param o User-supplied functor (comparator, predicate, adding + * functor, ...). + * @param f Functor to "process" an element with op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param r Functor to "add" a single result to the already processed + * elements (depends on functionality). + * @param base Base value for reduction. + * @param output Pointer to position where final result is written to + * @param bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ +template<typename RandomAccessIterator, + typename Op, + typename Fu, + typename Red, + typename Result> + Op + for_each_template_random_access_omp_loop_static(RandomAccessIterator begin, + RandomAccessIterator end, + Op o, Fu& f, Red r, + Result base, Result& output, + typename std::iterator_traits + <RandomAccessIterator>:: + difference_type bound) + { + typedef typename + std::iterator_traits<RandomAccessIterator>::difference_type + difference_type; + + difference_type length = end - begin; + thread_index_t num_threads = + std::min<difference_type>(get_max_threads(), length); + + Result *thread_results; + +# pragma omp parallel num_threads(num_threads) + { +# pragma omp single + { + num_threads = omp_get_num_threads(); + thread_results = new Result[num_threads]; + + for (thread_index_t i = 0; i < num_threads; ++i) + thread_results[i] = Result(); + } + + thread_index_t iam = omp_get_thread_num(); + +# pragma omp for schedule(static, _Settings::get().workstealing_chunk_size) + for (difference_type pos = 0; pos < length; ++pos) + thread_results[iam] = r(thread_results[iam], f(o, begin+pos)); + } //parallel + + for (thread_index_t i = 0; i < num_threads; ++i) + output = r(output, thread_results[i]); + + delete [] thread_results; + + // Points to last element processed (needed as return value for + // some algorithms like transform). + f.finish_iterator = begin + length; + + return o; + } + +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/par_loop.h b/gcc-4.4.0/libstdc++-v3/include/parallel/par_loop.h new file mode 100644 index 000000000..be61d4ca2 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/par_loop.h @@ -0,0 +1,135 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/par_loop.h + * @brief Parallelization of embarrassingly parallel execution by + * means of equal splitting. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_PAR_LOOP_H +#define _GLIBCXX_PARALLEL_PAR_LOOP_H 1 + +#include <omp.h> +#include <parallel/settings.h> +#include <parallel/base.h> +#include <parallel/equally_split.h> + +namespace __gnu_parallel +{ + +/** @brief Embarrassingly parallel algorithm for random access + * iterators, using hand-crafted parallelization by equal splitting + * the work. + * + * @param begin Begin iterator of element sequence. + * @param end End iterator of element sequence. + * @param o User-supplied functor (comparator, predicate, adding + * functor, ...) + * @param f Functor to "process" an element with op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param r Functor to "add" a single result to the already + * processed elements (depends on functionality). + * @param base Base value for reduction. + * @param output Pointer to position where final result is written to + * @param bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ +template<typename RandomAccessIterator, + typename Op, + typename Fu, + typename Red, + typename Result> + Op + for_each_template_random_access_ed(RandomAccessIterator begin, + RandomAccessIterator end, + Op o, Fu& f, Red r, Result base, + Result& output, + typename std::iterator_traits + <RandomAccessIterator>:: + difference_type bound) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::difference_type difference_type; + const difference_type length = end - begin; + Result *thread_results; + bool* constructed; + + thread_index_t num_threads = + __gnu_parallel::min<difference_type>(get_max_threads(), length); + +# pragma omp parallel num_threads(num_threads) + { +# pragma omp single + { + num_threads = omp_get_num_threads(); + thread_results = static_cast<Result*>( + ::operator new(num_threads * sizeof(Result))); + constructed = new bool[num_threads]; + } + + thread_index_t iam = omp_get_thread_num(); + + // Neutral element. + Result* reduct = static_cast<Result*>(::operator new(sizeof(Result))); + + difference_type + start = equally_split_point(length, num_threads, iam), + stop = equally_split_point(length, num_threads, iam + 1); + + if (start < stop) + { + new(reduct) Result(f(o, begin + start)); + ++start; + constructed[iam] = true; + } + else + constructed[iam] = false; + + for (; start < stop; ++start) + *reduct = r(*reduct, f(o, begin + start)); + + thread_results[iam] = *reduct; + } //parallel + + for (thread_index_t i = 0; i < num_threads; ++i) + if (constructed[i]) + output = r(output, thread_results[i]); + + // Points to last element processed (needed as return value for + // some algorithms like transform). + f.finish_iterator = begin + length; + + delete[] thread_results; + delete[] constructed; + + return o; + } + +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_PAR_LOOP_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/parallel.h b/gcc-4.4.0/libstdc++-v3/include/parallel/parallel.h new file mode 100644 index 000000000..c4bfc057a --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/parallel.h @@ -0,0 +1,42 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/parallel.h + * @brief End-user include file. Provides advanced settings and + * tuning options. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze and Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_PARALLEL_H +#define _GLIBCXX_PARALLEL_PARALLEL_H 1 + +#include <parallel/features.h> +#include <parallel/compiletime_settings.h> +#include <parallel/types.h> +#include <parallel/tags.h> +#include <parallel/settings.h> + +#endif /* _GLIBCXX_PARALLEL_PARALLEL_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/partial_sum.h b/gcc-4.4.0/libstdc++-v3/include/parallel/partial_sum.h new file mode 100644 index 000000000..92630936d --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/partial_sum.h @@ -0,0 +1,220 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/partial_sum.h + * @brief Parallel implementation of std::partial_sum(), i. e. prefix + * sums. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H +#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1 + +#include <omp.h> +#include <new> +#include <bits/stl_algobase.h> +#include <parallel/parallel.h> +#include <parallel/numericfwd.h> + +namespace __gnu_parallel +{ + // Problem: there is no 0-element given. + +/** @brief Base case prefix sum routine. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param result Begin iterator of output sequence. + * @param bin_op Associative binary function. + * @param value Start value. Must be passed since the neutral + * element is unknown in general. + * @return End iterator of output sequence. */ +template<typename InputIterator, + typename OutputIterator, + typename BinaryOperation> + OutputIterator + parallel_partial_sum_basecase(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + typename std::iterator_traits + <InputIterator>::value_type value) + { + if (begin == end) + return result; + + while (begin != end) + { + value = bin_op(value, *begin); + *result = value; + ++result; + ++begin; + } + return result; + } + +/** @brief Parallel partial sum implementation, two-phase approach, + no recursion. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param result Begin iterator of output sequence. + * @param bin_op Associative binary function. + * @param n Length of sequence. + * @param num_threads Number of threads to use. + * @return End iterator of output sequence. + */ +template<typename InputIterator, + typename OutputIterator, + typename BinaryOperation> + OutputIterator + parallel_partial_sum_linear(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + typename std::iterator_traits + <InputIterator>::difference_type n) + { + typedef std::iterator_traits<InputIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + if (begin == end) + return result; + + thread_index_t num_threads = + std::min<difference_type>(get_max_threads(), n - 1); + + if (num_threads < 2) + { + *result = *begin; + return parallel_partial_sum_basecase( + begin + 1, end, result + 1, bin_op, *begin); + } + + difference_type* borders; + value_type* sums; + + const _Settings& __s = _Settings::get(); + +# pragma omp parallel num_threads(num_threads) + { +# pragma omp single + { + num_threads = omp_get_num_threads(); + + borders = new difference_type[num_threads + 2]; + + if (__s.partial_sum_dilation == 1.0f) + equally_split(n, num_threads + 1, borders); + else + { + difference_type chunk_length = + ((double)n + / ((double)num_threads + __s.partial_sum_dilation)), + borderstart = n - num_threads * chunk_length; + borders[0] = 0; + for (int i = 1; i < (num_threads + 1); ++i) + { + borders[i] = borderstart; + borderstart += chunk_length; + } + borders[num_threads + 1] = n; + } + + sums = static_cast<value_type*>(::operator new(sizeof(value_type) + * num_threads)); + OutputIterator target_end; + } //single + + thread_index_t iam = omp_get_thread_num(); + if (iam == 0) + { + *result = *begin; + parallel_partial_sum_basecase(begin + 1, begin + borders[1], + result + 1, bin_op, *begin); + ::new(&(sums[iam])) value_type(*(result + borders[1] - 1)); + } + else + { + ::new(&(sums[iam])) + value_type(std::accumulate(begin + borders[iam] + 1, + begin + borders[iam + 1], + *(begin + borders[iam]), + bin_op, + __gnu_parallel::sequential_tag())); + } + +# pragma omp barrier + +# pragma omp single + parallel_partial_sum_basecase( + sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]); + +# pragma omp barrier + + // Still same team. + parallel_partial_sum_basecase(begin + borders[iam + 1], + begin + borders[iam + 2], + result + borders[iam + 1], bin_op, + sums[iam]); + } //parallel + + ::operator delete(sums); + delete[] borders; + + return result + n; + } + +/** @brief Parallel partial sum front-end. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param result Begin iterator of output sequence. + * @param bin_op Associative binary function. + * @return End iterator of output sequence. */ +template<typename InputIterator, + typename OutputIterator, + typename BinaryOperation> + OutputIterator + parallel_partial_sum(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op) + { + _GLIBCXX_CALL(begin - end) + + typedef std::iterator_traits<InputIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + + switch (_Settings::get().partial_sum_algorithm) + { + case LINEAR: + // Need an initial offset. + return parallel_partial_sum_linear(begin, end, result, bin_op, n); + default: + // Partial_sum algorithm not implemented. + _GLIBCXX_PARALLEL_ASSERT(0); + return result + n; + } + } +} + +#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/partition.h b/gcc-4.4.0/libstdc++-v3/include/parallel/partition.h new file mode 100644 index 000000000..b88133c59 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/partition.h @@ -0,0 +1,429 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/partition.h + * @brief Parallel implementation of std::partition(), + * std::nth_element(), and std::partial_sort(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_PARTITION_H +#define _GLIBCXX_PARALLEL_PARTITION_H 1 + +#include <parallel/basic_iterator.h> +#include <parallel/sort.h> +#include <parallel/random_number.h> +#include <bits/stl_algo.h> +#include <parallel/parallel.h> + +/** @brief Decide whether to declare certain variables volatile. */ +#define _GLIBCXX_VOLATILE volatile + +namespace __gnu_parallel +{ +/** @brief Parallel implementation of std::partition. + * @param begin Begin iterator of input sequence to split. + * @param end End iterator of input sequence to split. + * @param pred Partition predicate, possibly including some kind of pivot. + * @param num_threads Maximum number of threads to use for this task. + * @return Number of elements not fulfilling the predicate. */ +template<typename RandomAccessIterator, typename Predicate> + typename std::iterator_traits<RandomAccessIterator>::difference_type + parallel_partition(RandomAccessIterator begin, RandomAccessIterator end, + Predicate pred, thread_index_t num_threads) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + + _GLIBCXX_CALL(n) + + const _Settings& __s = _Settings::get(); + + // Shared. + _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1; + _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right; + _GLIBCXX_VOLATILE difference_type leftnew, rightnew; + + bool* reserved_left = NULL, * reserved_right = NULL; + + difference_type chunk_size; + + omp_lock_t result_lock; + omp_init_lock(&result_lock); + + //at least two chunks per thread + if(right - left + 1 >= 2 * num_threads * chunk_size) +# pragma omp parallel num_threads(num_threads) + { +# pragma omp single + { + num_threads = omp_get_num_threads(); + reserved_left = new bool[num_threads]; + reserved_right = new bool[num_threads]; + + if (__s.partition_chunk_share > 0.0) + chunk_size = std::max<difference_type>(__s.partition_chunk_size, + (double)n * __s.partition_chunk_share + / (double)num_threads); + else + chunk_size = __s.partition_chunk_size; + } + + while (right - left + 1 >= 2 * num_threads * chunk_size) + { +# pragma omp single + { + difference_type num_chunks = (right - left + 1) / chunk_size; + + for (int r = 0; r < num_threads; ++r) + { + reserved_left[r] = false; + reserved_right[r] = false; + } + leftover_left = 0; + leftover_right = 0; + } //implicit barrier + + // Private. + difference_type thread_left, thread_left_border, + thread_right, thread_right_border; + thread_left = left + 1; + + // Just to satisfy the condition below. + thread_left_border = thread_left - 1; + thread_right = n - 1; + thread_right_border = thread_right + 1; + + bool iam_finished = false; + while (!iam_finished) + { + if (thread_left > thread_left_border) + { + omp_set_lock(&result_lock); + if (left + (chunk_size - 1) > right) + iam_finished = true; + else + { + thread_left = left; + thread_left_border = left + (chunk_size - 1); + left += chunk_size; + } + omp_unset_lock(&result_lock); + } + + if (thread_right < thread_right_border) + { + omp_set_lock(&result_lock); + if (left > right - (chunk_size - 1)) + iam_finished = true; + else + { + thread_right = right; + thread_right_border = right - (chunk_size - 1); + right -= chunk_size; + } + omp_unset_lock(&result_lock); + } + + if (iam_finished) + break; + + // Swap as usual. + while (thread_left < thread_right) + { + while (pred(begin[thread_left]) + && thread_left <= thread_left_border) + ++thread_left; + while (!pred(begin[thread_right]) + && thread_right >= thread_right_border) + --thread_right; + + if (thread_left > thread_left_border + || thread_right < thread_right_border) + // Fetch new chunk(s). + break; + + std::swap(begin[thread_left], begin[thread_right]); + ++thread_left; + --thread_right; + } + } + + // Now swap the leftover chunks to the right places. + if (thread_left <= thread_left_border) +# pragma omp atomic + ++leftover_left; + if (thread_right >= thread_right_border) +# pragma omp atomic + ++leftover_right; + +# pragma omp barrier + +# pragma omp single + { + leftnew = left - leftover_left * chunk_size; + rightnew = right + leftover_right * chunk_size; + } + +# pragma omp barrier + + // <=> thread_left_border + (chunk_size - 1) >= leftnew + if (thread_left <= thread_left_border + && thread_left_border >= leftnew) + { + // Chunk already in place, reserve spot. + reserved_left[(left - (thread_left_border + 1)) / chunk_size] + = true; + } + + // <=> thread_right_border - (chunk_size - 1) <= rightnew + if (thread_right >= thread_right_border + && thread_right_border <= rightnew) + { + // Chunk already in place, reserve spot. + reserved_right[((thread_right_border - 1) - right) + / chunk_size] = true; + } + +# pragma omp barrier + + if (thread_left <= thread_left_border + && thread_left_border < leftnew) + { + // Find spot and swap. + difference_type swapstart = -1; + omp_set_lock(&result_lock); + for (int r = 0; r < leftover_left; ++r) + if (!reserved_left[r]) + { + reserved_left[r] = true; + swapstart = left - (r + 1) * chunk_size; + break; + } + omp_unset_lock(&result_lock); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(swapstart != -1); +#endif + + std::swap_ranges(begin + thread_left_border + - (chunk_size - 1), + begin + thread_left_border + 1, + begin + swapstart); + } + + if (thread_right >= thread_right_border + && thread_right_border > rightnew) + { + // Find spot and swap + difference_type swapstart = -1; + omp_set_lock(&result_lock); + for (int r = 0; r < leftover_right; ++r) + if (!reserved_right[r]) + { + reserved_right[r] = true; + swapstart = right + r * chunk_size + 1; + break; + } + omp_unset_lock(&result_lock); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(swapstart != -1); +#endif + + std::swap_ranges(begin + thread_right_border, + begin + thread_right_border + chunk_size, + begin + swapstart); + } +#if _GLIBCXX_ASSERTIONS +# pragma omp barrier + +# pragma omp single + { + for (int r = 0; r < leftover_left; ++r) + _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]); + for (int r = 0; r < leftover_right; ++r) + _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]); + } + +# pragma omp barrier +#endif + +# pragma omp barrier + + left = leftnew; + right = rightnew; + } +# pragma omp flush(left, right) + } // end "recursion" //parallel + + difference_type final_left = left, final_right = right; + + while (final_left < final_right) + { + // Go right until key is geq than pivot. + while (pred(begin[final_left]) && final_left < final_right) + ++final_left; + + // Go left until key is less than pivot. + while (!pred(begin[final_right]) && final_left < final_right) + --final_right; + + if (final_left == final_right) + break; + std::swap(begin[final_left], begin[final_right]); + ++final_left; + --final_right; + } + + // All elements on the left side are < piv, all elements on the + // right are >= piv + delete[] reserved_left; + delete[] reserved_right; + + omp_destroy_lock(&result_lock); + + // Element "between" final_left and final_right might not have + // been regarded yet + if (final_left < n && !pred(begin[final_left])) + // Really swapped. + return final_left; + else + return final_left + 1; + } + +/** + * @brief Parallel implementation of std::nth_element(). + * @param begin Begin iterator of input sequence. + * @param nth Iterator of element that must be in position afterwards. + * @param end End iterator of input sequence. + * @param comp Comparator. + */ +template<typename RandomAccessIterator, typename Comparator> + void + parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth, + RandomAccessIterator end, Comparator comp) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + _GLIBCXX_CALL(end - begin) + + RandomAccessIterator split; + random_number rng; + + difference_type minimum_length = + std::max<difference_type>(2, _Settings::get().partition_minimal_n); + + // Break if input range to small. + while (static_cast<sequence_index_t>(end - begin) >= minimum_length) + { + difference_type n = end - begin; + + RandomAccessIterator pivot_pos = begin + rng(n); + + // Swap pivot_pos value to end. + if (pivot_pos != (end - 1)) + std::swap(*pivot_pos, *(end - 1)); + pivot_pos = end - 1; + + // XXX Comparator must have first_value_type, second_value_type, + // result_type + // Comparator == __gnu_parallel::lexicographic<S, int, + // __gnu_parallel::less<S, S> > + // pivot_pos == std::pair<S, int>* + // XXX binder2nd only for RandomAccessIterators?? + __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> + pred(comp, *pivot_pos); + + // Divide, leave pivot unchanged in last place. + RandomAccessIterator split_pos1, split_pos2; + split_pos1 = begin + parallel_partition(begin, end - 1, pred, + get_max_threads()); + + // Left side: < pivot_pos; right side: >= pivot_pos + + // Swap pivot back to middle. + if (split_pos1 != pivot_pos) + std::swap(*split_pos1, *pivot_pos); + pivot_pos = split_pos1; + + // In case all elements are equal, split_pos1 == 0 + if ((split_pos1 + 1 - begin) < (n >> 7) + || (end - split_pos1) < (n >> 7)) + { + // Very unequal split, one part smaller than one 128th + // elements not strictly larger than the pivot. + __gnu_parallel::unary_negate<__gnu_parallel:: + binder1st<Comparator, value_type, value_type, bool>, value_type> + pred(__gnu_parallel::binder1st<Comparator, value_type, + value_type, bool>(comp, *pivot_pos)); + + // Find other end of pivot-equal range. + split_pos2 = __gnu_sequential::partition(split_pos1 + 1, + end, pred); + } + else + // Only skip the pivot. + split_pos2 = split_pos1 + 1; + + // Compare iterators. + if (split_pos2 <= nth) + begin = split_pos2; + else if (nth < split_pos1) + end = split_pos1; + else + break; + } + + // Only at most _Settings::partition_minimal_n elements left. + __gnu_sequential::sort(begin, end, comp); + } + +/** @brief Parallel implementation of std::partial_sort(). +* @param begin Begin iterator of input sequence. +* @param middle Sort until this position. +* @param end End iterator of input sequence. +* @param comp Comparator. */ +template<typename RandomAccessIterator, typename Comparator> + void + parallel_partial_sort(RandomAccessIterator begin, + RandomAccessIterator middle, + RandomAccessIterator end, Comparator comp) + { + parallel_nth_element(begin, middle, end, comp); + std::sort(begin, middle, comp); + } + +} //namespace __gnu_parallel + +#undef _GLIBCXX_VOLATILE + +#endif /* _GLIBCXX_PARALLEL_PARTITION_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/queue.h b/gcc-4.4.0/libstdc++-v3/include/parallel/queue.h new file mode 100644 index 000000000..9686a9ac2 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/queue.h @@ -0,0 +1,149 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/queue.h + * @brief Lock-free double-ended queue. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_QUEUE_H +#define _GLIBCXX_PARALLEL_QUEUE_H 1 + +#include <parallel/types.h> +#include <parallel/base.h> +#include <parallel/compatibility.h> + +/** @brief Decide whether to declare certain variable volatile in this file. */ +#define _GLIBCXX_VOLATILE volatile + +namespace __gnu_parallel +{ + /**@brief Double-ended queue of bounded size, allowing lock-free + * atomic access. push_front() and pop_front() must not be called + * concurrently to each other, while pop_back() can be called + * concurrently at all times. + * @c empty(), @c size(), and @c top() are intentionally not provided. + * Calling them would not make sense in a concurrent setting. + * @param T Contained element type. */ + template<typename T> + class RestrictedBoundedConcurrentQueue + { + private: + /** @brief Array of elements, seen as cyclic buffer. */ + T* base; + + /** @brief Maximal number of elements contained at the same time. */ + sequence_index_t max_size; + + /** @brief Cyclic begin and end pointers contained in one + atomically changeable value. */ + _GLIBCXX_VOLATILE lcas_t borders; + + public: + /** @brief Constructor. Not to be called concurrent, of course. + * @param max_size Maximal number of elements to be contained. */ + RestrictedBoundedConcurrentQueue(sequence_index_t max_size) + { + this->max_size = max_size; + base = new T[max_size]; + borders = encode2(0, 0); +#pragma omp flush + } + + /** @brief Destructor. Not to be called concurrent, of course. */ + ~RestrictedBoundedConcurrentQueue() + { delete[] base; } + + /** @brief Pushes one element into the queue at the front end. + * Must not be called concurrently with pop_front(). */ + void + push_front(const T& t) + { + lcas_t former_borders = borders; + int former_front, former_back; + decode2(former_borders, former_front, former_back); + *(base + former_front % max_size) = t; +#if _GLIBCXX_ASSERTIONS + // Otherwise: front - back > max_size eventually. + _GLIBCXX_PARALLEL_ASSERT(((former_front + 1) - former_back) + <= max_size); +#endif + fetch_and_add(&borders, encode2(1, 0)); + } + + /** @brief Pops one element from the queue at the front end. + * Must not be called concurrently with pop_front(). */ + bool + pop_front(T& t) + { + int former_front, former_back; +#pragma omp flush + decode2(borders, former_front, former_back); + while (former_front > former_back) + { + // Chance. + lcas_t former_borders = encode2(former_front, former_back); + lcas_t new_borders = encode2(former_front - 1, former_back); + if (compare_and_swap(&borders, former_borders, new_borders)) + { + t = *(base + (former_front - 1) % max_size); + return true; + } +#pragma omp flush + decode2(borders, former_front, former_back); + } + return false; + } + + /** @brief Pops one element from the queue at the front end. + * Must not be called concurrently with pop_front(). */ + bool + pop_back(T& t) //queue behavior + { + int former_front, former_back; +#pragma omp flush + decode2(borders, former_front, former_back); + while (former_front > former_back) + { + // Chance. + lcas_t former_borders = encode2(former_front, former_back); + lcas_t new_borders = encode2(former_front, former_back + 1); + if (compare_and_swap(&borders, former_borders, new_borders)) + { + t = *(base + former_back % max_size); + return true; + } +#pragma omp flush + decode2(borders, former_front, former_back); + } + return false; + } + }; +} //namespace __gnu_parallel + +#undef _GLIBCXX_VOLATILE + +#endif /* _GLIBCXX_PARALLEL_QUEUE_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/quicksort.h b/gcc-4.4.0/libstdc++-v3/include/parallel/quicksort.h new file mode 100644 index 000000000..712b479e7 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/quicksort.h @@ -0,0 +1,179 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/quicksort.h + * @brief Implementation of a unbalanced parallel quicksort (in-place). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_QUICKSORT_H +#define _GLIBCXX_PARALLEL_QUICKSORT_H 1 + +#include <parallel/parallel.h> +#include <parallel/partition.h> + +namespace __gnu_parallel +{ + /** @brief Unbalanced quicksort divide step. + * @param begin Begin iterator of subsequence. + * @param end End iterator of subsequence. + * @param comp Comparator. + * @param pivot_rank Desired rank of the pivot. + * @param num_samples Choose pivot from that many samples. + * @param num_threads Number of threads that are allowed to work on + * this part. + */ + template<typename RandomAccessIterator, typename Comparator> + typename std::iterator_traits<RandomAccessIterator>::difference_type + parallel_sort_qs_divide(RandomAccessIterator begin, + RandomAccessIterator end, + Comparator comp, typename std::iterator_traits + <RandomAccessIterator>::difference_type pivot_rank, + typename std::iterator_traits + <RandomAccessIterator>::difference_type + num_samples, thread_index_t num_threads) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + num_samples = std::min(num_samples, n); + + // Allocate uninitialized, to avoid default constructor. + value_type* samples = + static_cast<value_type*>(::operator new(num_samples + * sizeof(value_type))); + + for (difference_type s = 0; s < num_samples; ++s) + { + const unsigned long long index = static_cast<unsigned long long>(s) + * n / num_samples; + ::new(&(samples[s])) value_type(begin[index]); + } + + __gnu_sequential::sort(samples, samples + num_samples, comp); + + value_type& pivot = samples[pivot_rank * num_samples / n]; + + __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> + pred(comp, pivot); + difference_type split = + parallel_partition(begin, end, pred, num_threads); + + ::operator delete(samples); + + return split; + } + + /** @brief Unbalanced quicksort conquer step. + * @param begin Begin iterator of subsequence. + * @param end End iterator of subsequence. + * @param comp Comparator. + * @param num_threads Number of threads that are allowed to work on + * this part. + */ + template<typename RandomAccessIterator, typename Comparator> + void + parallel_sort_qs_conquer(RandomAccessIterator begin, + RandomAccessIterator end, + Comparator comp, + thread_index_t num_threads) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + if (num_threads <= 1) + { + __gnu_sequential::sort(begin, end, comp); + return; + } + + difference_type n = end - begin, pivot_rank; + + if (n <= 1) + return; + + thread_index_t num_threads_left; + + if ((num_threads % 2) == 1) + num_threads_left = num_threads / 2 + 1; + else + num_threads_left = num_threads / 2; + + pivot_rank = n * num_threads_left / num_threads; + + difference_type split = + parallel_sort_qs_divide(begin, end, comp, pivot_rank, + _Settings::get().sort_qs_num_samples_preset, + num_threads); + +#pragma omp parallel sections num_threads(2) + { +#pragma omp section + parallel_sort_qs_conquer(begin, begin + split, + comp, num_threads_left); +#pragma omp section + parallel_sort_qs_conquer(begin + split, end, + comp, num_threads - num_threads_left); + } + } + + + + /** @brief Unbalanced quicksort main call. + * @param begin Begin iterator of input sequence. + * @param end End iterator input sequence, ignored. + * @param comp Comparator. + * @param num_threads Number of threads that are allowed to work on + * this part. + */ + template<typename RandomAccessIterator, typename Comparator> + void + parallel_sort_qs(RandomAccessIterator begin, + RandomAccessIterator end, + Comparator comp, + thread_index_t num_threads) + { + _GLIBCXX_CALL(n) + + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + + // At least one element per processor. + if (num_threads > n) + num_threads = static_cast<thread_index_t>(n); + + parallel_sort_qs_conquer(begin, begin + n, comp, num_threads); + } + +} //namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_QUICKSORT_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/random_number.h b/gcc-4.4.0/libstdc++-v3/include/parallel/random_number.h new file mode 100644 index 000000000..7e821a473 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/random_number.h @@ -0,0 +1,124 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/random_number.h + * @brief Random number generator based on the Mersenne twister. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_RANDOM_NUMBER_H +#define _GLIBCXX_PARALLEL_RANDOM_NUMBER_H 1 + +#include <parallel/types.h> +#include <tr1/random> + +namespace __gnu_parallel +{ + /** @brief Random number generator, based on the Mersenne twister. */ + class random_number + { + private: + std::tr1::mt19937 mt; + uint64 supremum; + uint64 RAND_SUP; + double supremum_reciprocal; + double RAND_SUP_REC; + + // Assumed to be twice as long as the usual random number. + uint64 cache; + + // Bit results. + int bits_left; + + static uint32 + scale_down(uint64 x, +#if _GLIBCXX_SCALE_DOWN_FPU + uint64 /*supremum*/, double supremum_reciprocal) +#else + uint64 supremum, double /*supremum_reciprocal*/) +#endif + { +#if _GLIBCXX_SCALE_DOWN_FPU + return uint32(x * supremum_reciprocal); +#else + return static_cast<uint32>(x % supremum); +#endif + } + + public: + /** @brief Default constructor. Seed with 0. */ + random_number() + : mt(0), supremum(0x100000000ULL), + RAND_SUP(1ULL << (sizeof(uint32) * 8)), + supremum_reciprocal(double(supremum) / double(RAND_SUP)), + RAND_SUP_REC(1.0 / double(RAND_SUP)), + cache(0), bits_left(0) { } + + /** @brief Constructor. + * @param seed Random seed. + * @param supremum Generate integer random numbers in the + * interval @c [0,supremum). */ + random_number(uint32 seed, uint64 supremum = 0x100000000ULL) + : mt(seed), supremum(supremum), + RAND_SUP(1ULL << (sizeof(uint32) * 8)), + supremum_reciprocal(double(supremum) / double(RAND_SUP)), + RAND_SUP_REC(1.0 / double(RAND_SUP)), + cache(0), bits_left(0) { } + + /** @brief Generate unsigned random 32-bit integer. */ + uint32 + operator()() + { return scale_down(mt(), supremum, supremum_reciprocal); } + + /** @brief Generate unsigned random 32-bit integer in the + interval @c [0,local_supremum). */ + uint32 + operator()(uint64 local_supremum) + { + return scale_down(mt(), local_supremum, + double(local_supremum * RAND_SUP_REC)); + } + + /** @brief Generate a number of random bits, run-time parameter. + * @param bits Number of bits to generate. */ + unsigned long + genrand_bits(int bits) + { + unsigned long res = cache & ((1 << bits) - 1); + cache = cache >> bits; + bits_left -= bits; + if (bits_left < 32) + { + cache |= ((uint64(mt())) << bits_left); + bits_left += 32; + } + return res; + } +}; + +} // namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_RANDOM_NUMBER_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/random_shuffle.h b/gcc-4.4.0/libstdc++-v3/include/parallel/random_shuffle.h new file mode 100644 index 000000000..6e0ebef15 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/random_shuffle.h @@ -0,0 +1,519 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/random_shuffle.h + * @brief Parallel implementation of std::random_shuffle(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H +#define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1 + +#include <limits> +#include <bits/stl_numeric.h> +#include <parallel/parallel.h> +#include <parallel/random_number.h> + +namespace __gnu_parallel +{ +/** @brief Type to hold the index of a bin. + * + * Since many variables of this type are allocated, it should be + * chosen as small as possible. + */ +typedef unsigned short bin_index; + +/** @brief Data known to every thread participating in + __gnu_parallel::parallel_random_shuffle(). */ +template<typename RandomAccessIterator> + struct DRandomShufflingGlobalData + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + /** @brief Begin iterator of the source. */ + RandomAccessIterator& source; + + /** @brief Temporary arrays for each thread. */ + value_type** temporaries; + + /** @brief Two-dimensional array to hold the thread-bin distribution. + * + * Dimensions (num_threads + 1) x (num_bins + 1). */ + difference_type** dist; + + /** @brief Start indexes of the threads' chunks. */ + difference_type* starts; + + /** @brief Number of the thread that will further process the + corresponding bin. */ + thread_index_t* bin_proc; + + /** @brief Number of bins to distribute to. */ + int num_bins; + + /** @brief Number of bits needed to address the bins. */ + int num_bits; + + /** @brief Constructor. */ + DRandomShufflingGlobalData(RandomAccessIterator& _source) + : source(_source) { } + }; + +/** @brief Local data for a thread participating in + __gnu_parallel::parallel_random_shuffle(). + */ +template<typename RandomAccessIterator, typename RandomNumberGenerator> + struct DRSSorterPU + { + /** @brief Number of threads participating in total. */ + int num_threads; + + /** @brief Begin index for bins taken care of by this thread. */ + bin_index bins_begin; + + /** @brief End index for bins taken care of by this thread. */ + bin_index bins_end; + + /** @brief Random seed for this thread. */ + uint32 seed; + + /** @brief Pointer to global data. */ + DRandomShufflingGlobalData<RandomAccessIterator>* sd; + }; + +/** @brief Generate a random number in @c [0,2^logp). + * @param logp Logarithm (basis 2) of the upper range bound. + * @param rng Random number generator to use. + */ +template<typename RandomNumberGenerator> + inline int + random_number_pow2(int logp, RandomNumberGenerator& rng) + { return rng.genrand_bits(logp); } + +/** @brief Random shuffle code executed by each thread. + * @param pus Array of thread-local data records. */ +template<typename RandomAccessIterator, typename RandomNumberGenerator> + void + parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator, + RandomNumberGenerator>* pus) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + thread_index_t iam = omp_get_thread_num(); + DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[iam]; + DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd; + + // Indexing: dist[bin][processor] + difference_type length = sd->starts[iam + 1] - sd->starts[iam]; + bin_index* oracles = new bin_index[length]; + difference_type* dist = new difference_type[sd->num_bins + 1]; + bin_index* bin_proc = new bin_index[sd->num_bins]; + value_type** temporaries = new value_type*[d->num_threads]; + + // Compute oracles and count appearances. + for (bin_index b = 0; b < sd->num_bins + 1; ++b) + dist[b] = 0; + int num_bits = sd->num_bits; + + random_number rng(d->seed); + + // First main loop. + for (difference_type i = 0; i < length; ++i) + { + bin_index oracle = random_number_pow2(num_bits, rng); + oracles[i] = oracle; + + // To allow prefix (partial) sum. + ++(dist[oracle + 1]); + } + + for (bin_index b = 0; b < sd->num_bins + 1; ++b) + sd->dist[b][iam + 1] = dist[b]; + +# pragma omp barrier + +# pragma omp single + { + // Sum up bins, sd->dist[s + 1][d->num_threads] now contains the + // total number of items in bin s + for (bin_index s = 0; s < sd->num_bins; ++s) + __gnu_sequential::partial_sum(sd->dist[s + 1], + sd->dist[s + 1] + d->num_threads + 1, + sd->dist[s + 1]); + } + +# pragma omp barrier + + sequence_index_t offset = 0, global_offset = 0; + for (bin_index s = 0; s < d->bins_begin; ++s) + global_offset += sd->dist[s + 1][d->num_threads]; + +# pragma omp barrier + + for (bin_index s = d->bins_begin; s < d->bins_end; ++s) + { + for (int t = 0; t < d->num_threads + 1; ++t) + sd->dist[s + 1][t] += offset; + offset = sd->dist[s + 1][d->num_threads]; + } + + sd->temporaries[iam] = static_cast<value_type*>( + ::operator new(sizeof(value_type) * offset)); + +# pragma omp barrier + + // Draw local copies to avoid false sharing. + for (bin_index b = 0; b < sd->num_bins + 1; ++b) + dist[b] = sd->dist[b][iam]; + for (bin_index b = 0; b < sd->num_bins; ++b) + bin_proc[b] = sd->bin_proc[b]; + for (thread_index_t t = 0; t < d->num_threads; ++t) + temporaries[t] = sd->temporaries[t]; + + RandomAccessIterator source = sd->source; + difference_type start = sd->starts[iam]; + + // Distribute according to oracles, second main loop. + for (difference_type i = 0; i < length; ++i) + { + bin_index target_bin = oracles[i]; + thread_index_t target_p = bin_proc[target_bin]; + + // Last column [d->num_threads] stays unchanged. + ::new(&(temporaries[target_p][dist[target_bin + 1]++])) + value_type(*(source + i + start)); + } + + delete[] oracles; + delete[] dist; + delete[] bin_proc; + delete[] temporaries; + +# pragma omp barrier + + // Shuffle bins internally. + for (bin_index b = d->bins_begin; b < d->bins_end; ++b) + { + value_type* begin = + sd->temporaries[iam] + + ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]), + * end = + sd->temporaries[iam] + sd->dist[b + 1][d->num_threads]; + sequential_random_shuffle(begin, end, rng); + std::copy(begin, end, sd->source + global_offset + + ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads])); + } + + ::operator delete(sd->temporaries[iam]); + } + +/** @brief Round up to the next greater power of 2. + * @param x Integer to round up */ +template<typename T> + T + round_up_to_pow2(T x) + { + if (x <= 1) + return 1; + else + return (T)1 << (__log2(x - 1) + 1); + } + +/** @brief Main parallel random shuffle step. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param n Length of sequence. + * @param num_threads Number of threads to use. + * @param rng Random number generator to use. + */ +template<typename RandomAccessIterator, typename RandomNumberGenerator> + void + parallel_random_shuffle_drs(RandomAccessIterator begin, + RandomAccessIterator end, + typename std::iterator_traits + <RandomAccessIterator>::difference_type n, + thread_index_t num_threads, + RandomNumberGenerator& rng) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + _GLIBCXX_CALL(n) + + const _Settings& __s = _Settings::get(); + + if (num_threads > n) + num_threads = static_cast<thread_index_t>(n); + + bin_index num_bins, num_bins_cache; + +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 + // Try the L1 cache first. + + // Must fit into L1. + num_bins_cache = std::max<difference_type>( + 1, n / (__s.L1_cache_size_lb / sizeof(value_type))); + num_bins_cache = round_up_to_pow2(num_bins_cache); + + // No more buckets than TLB entries, power of 2 + // Power of 2 and at least one element per bin, at most the TLB size. + num_bins = std::min<difference_type>(n, num_bins_cache); + +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB + // 2 TLB entries needed per bin. + num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins); +#endif + num_bins = round_up_to_pow2(num_bins); + + if (num_bins < num_bins_cache) + { +#endif + // Now try the L2 cache + // Must fit into L2 + num_bins_cache = static_cast<bin_index>(std::max<difference_type>( + 1, n / (__s.L2_cache_size / sizeof(value_type)))); + num_bins_cache = round_up_to_pow2(num_bins_cache); + + // No more buckets than TLB entries, power of 2. + num_bins = static_cast<bin_index>( + std::min(n, static_cast<difference_type>(num_bins_cache))); + // Power of 2 and at least one element per bin, at most the TLB size. +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB + // 2 TLB entries needed per bin. + num_bins = std::min( + static_cast<difference_type>(__s.TLB_size / 2), num_bins); +#endif + num_bins = round_up_to_pow2(num_bins); +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 + } +#endif + + num_threads = std::min<bin_index>(num_threads, num_bins); + + if (num_threads <= 1) + return sequential_random_shuffle(begin, end, rng); + + DRandomShufflingGlobalData<RandomAccessIterator> sd(begin); + DRSSorterPU<RandomAccessIterator, random_number >* pus; + difference_type* starts; + +# pragma omp parallel num_threads(num_threads) + { + thread_index_t num_threads = omp_get_num_threads(); +# pragma omp single + { + pus = new DRSSorterPU<RandomAccessIterator, random_number> + [num_threads]; + + sd.temporaries = new value_type*[num_threads]; + sd.dist = new difference_type*[num_bins + 1]; + sd.bin_proc = new thread_index_t[num_bins]; + for (bin_index b = 0; b < num_bins + 1; ++b) + sd.dist[b] = new difference_type[num_threads + 1]; + for (bin_index b = 0; b < (num_bins + 1); ++b) + { + sd.dist[0][0] = 0; + sd.dist[b][0] = 0; + } + starts = sd.starts = new difference_type[num_threads + 1]; + int bin_cursor = 0; + sd.num_bins = num_bins; + sd.num_bits = __log2(num_bins); + + difference_type chunk_length = n / num_threads, + split = n % num_threads, start = 0; + difference_type bin_chunk_length = num_bins / num_threads, + bin_split = num_bins % num_threads; + for (thread_index_t i = 0; i < num_threads; ++i) + { + starts[i] = start; + start += (i < split) ? (chunk_length + 1) : chunk_length; + int j = pus[i].bins_begin = bin_cursor; + + // Range of bins for this processor. + bin_cursor += (i < bin_split) ? + (bin_chunk_length + 1) : bin_chunk_length; + pus[i].bins_end = bin_cursor; + for (; j < bin_cursor; ++j) + sd.bin_proc[j] = i; + pus[i].num_threads = num_threads; + pus[i].seed = rng(std::numeric_limits<uint32>::max()); + pus[i].sd = &sd; + } + starts[num_threads] = start; + } //single + // Now shuffle in parallel. + parallel_random_shuffle_drs_pu(pus); + } // parallel + + delete[] starts; + delete[] sd.bin_proc; + for (int s = 0; s < (num_bins + 1); ++s) + delete[] sd.dist[s]; + delete[] sd.dist; + delete[] sd.temporaries; + + delete[] pus; + } + +/** @brief Sequential cache-efficient random shuffle. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param rng Random number generator to use. + */ +template<typename RandomAccessIterator, typename RandomNumberGenerator> + void + sequential_random_shuffle(RandomAccessIterator begin, + RandomAccessIterator end, + RandomNumberGenerator& rng) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + const _Settings& __s = _Settings::get(); + + bin_index num_bins, num_bins_cache; + +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 + // Try the L1 cache first, must fit into L1. + num_bins_cache = + std::max<difference_type> + (1, n / (__s.L1_cache_size_lb / sizeof(value_type))); + num_bins_cache = round_up_to_pow2(num_bins_cache); + + // No more buckets than TLB entries, power of 2 + // Power of 2 and at least one element per bin, at most the TLB size + num_bins = std::min(n, (difference_type)num_bins_cache); +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB + // 2 TLB entries needed per bin + num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins); +#endif + num_bins = round_up_to_pow2(num_bins); + + if (num_bins < num_bins_cache) + { +#endif + // Now try the L2 cache, must fit into L2. + num_bins_cache = + static_cast<bin_index>(std::max<difference_type>( + 1, n / (__s.L2_cache_size / sizeof(value_type)))); + num_bins_cache = round_up_to_pow2(num_bins_cache); + + // No more buckets than TLB entries, power of 2 + // Power of 2 and at least one element per bin, at most the TLB size. + num_bins = static_cast<bin_index> + (std::min(n, static_cast<difference_type>(num_bins_cache))); + +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB + // 2 TLB entries needed per bin + num_bins = + std::min<difference_type>(__s.TLB_size / 2, num_bins); +#endif + num_bins = round_up_to_pow2(num_bins); +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 + } +#endif + + int num_bits = __log2(num_bins); + + if (num_bins > 1) + { + value_type* target = static_cast<value_type*>( + ::operator new(sizeof(value_type) * n)); + bin_index* oracles = new bin_index[n]; + difference_type* dist0 = new difference_type[num_bins + 1], + * dist1 = new difference_type[num_bins + 1]; + + for (int b = 0; b < num_bins + 1; ++b) + dist0[b] = 0; + + random_number bitrng(rng(0xFFFFFFFF)); + + for (difference_type i = 0; i < n; ++i) + { + bin_index oracle = random_number_pow2(num_bits, bitrng); + oracles[i] = oracle; + + // To allow prefix (partial) sum. + ++(dist0[oracle + 1]); + } + + // Sum up bins. + __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0); + + for (int b = 0; b < num_bins + 1; ++b) + dist1[b] = dist0[b]; + + // Distribute according to oracles. + for (difference_type i = 0; i < n; ++i) + ::new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i)); + + for (int b = 0; b < num_bins; ++b) + { + sequential_random_shuffle(target + dist1[b], + target + dist1[b + 1], + rng); + } + + // Copy elements back. + std::copy(target, target + n, begin); + + delete[] dist0; + delete[] dist1; + delete[] oracles; + ::operator delete(target); + } + else + __gnu_sequential::random_shuffle(begin, end, rng); + } + +/** @brief Parallel random public call. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param rng Random number generator to use. + */ +template<typename RandomAccessIterator, typename RandomNumberGenerator> + inline void + parallel_random_shuffle(RandomAccessIterator begin, + RandomAccessIterator end, + RandomNumberGenerator rng = random_number()) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::difference_type difference_type; + difference_type n = end - begin; + parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ; + } + +} + +#endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/search.h b/gcc-4.4.0/libstdc++-v3/include/parallel/search.h new file mode 100644 index 000000000..31a853402 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/search.h @@ -0,0 +1,172 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/search.h + * @brief Parallel implementation base for std::search() and + * std::search_n(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_SEARCH_H +#define _GLIBCXX_PARALLEL_SEARCH_H 1 + +#include <bits/stl_algobase.h> + +#include <parallel/parallel.h> +#include <parallel/equally_split.h> + + +namespace __gnu_parallel +{ + /** + * @brief Precalculate advances for Knuth-Morris-Pratt algorithm. + * @param elements Begin iterator of sequence to search for. + * @param length Length of sequence to search for. + * @param advances Returned offsets. + */ +template<typename RandomAccessIterator, typename _DifferenceTp> + void + calc_borders(RandomAccessIterator elements, _DifferenceTp length, + _DifferenceTp* off) + { + typedef _DifferenceTp difference_type; + + off[0] = -1; + if (length > 1) + off[1] = 0; + difference_type k = 0; + for (difference_type j = 2; j <= length; j++) + { + while ((k >= 0) && !(elements[k] == elements[j-1])) + k = off[k]; + off[j] = ++k; + } + } + + // Generic parallel find algorithm (requires random access iterator). + + /** @brief Parallel std::search. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param end2 End iterator of second sequence. + * @param pred Find predicate. + * @return Place of finding in first sequences. */ +template<typename _RandomAccessIterator1, + typename _RandomAccessIterator2, + typename Pred> + _RandomAccessIterator1 + search_template(_RandomAccessIterator1 begin1, _RandomAccessIterator1 end1, + _RandomAccessIterator2 begin2, _RandomAccessIterator2 end2, + Pred pred) + { + typedef std::iterator_traits<_RandomAccessIterator1> traits_type; + typedef typename traits_type::difference_type difference_type; + + _GLIBCXX_CALL((end1 - begin1) + (end2 - begin2)); + + difference_type pattern_length = end2 - begin2; + + // Pattern too short. + if(pattern_length <= 0) + return end1; + + // Last point to start search. + difference_type input_length = (end1 - begin1) - pattern_length; + + // Where is first occurrence of pattern? defaults to end. + difference_type result = (end1 - begin1); + difference_type *splitters; + + // Pattern too long. + if (input_length < 0) + return end1; + + omp_lock_t result_lock; + omp_init_lock(&result_lock); + + thread_index_t num_threads = + std::max<difference_type>(1, + std::min<difference_type>(input_length, get_max_threads())); + + difference_type advances[pattern_length]; + calc_borders(begin2, pattern_length, advances); + +# pragma omp parallel num_threads(num_threads) + { +# pragma omp single + { + num_threads = omp_get_num_threads(); + splitters = new difference_type[num_threads + 1]; + equally_split(input_length, num_threads, splitters); + } + + thread_index_t iam = omp_get_thread_num(); + + difference_type start = splitters[iam], stop = splitters[iam + 1]; + + difference_type pos_in_pattern = 0; + bool found_pattern = false; + + while (start <= stop && !found_pattern) + { + // Get new value of result. + #pragma omp flush(result) + // No chance for this thread to find first occurrence. + if (result < start) + break; + while (pred(begin1[start + pos_in_pattern], + begin2[pos_in_pattern])) + { + ++pos_in_pattern; + if (pos_in_pattern == pattern_length) + { + // Found new candidate for result. + omp_set_lock(&result_lock); + result = std::min(result, start); + omp_unset_lock(&result_lock); + + found_pattern = true; + break; + } + } + // Make safe jump. + start += (pos_in_pattern - advances[pos_in_pattern]); + pos_in_pattern = + (advances[pos_in_pattern] < 0) ? 0 : advances[pos_in_pattern]; + } + } //parallel + + omp_destroy_lock(&result_lock); + + delete[] splitters; + + // Return iterator on found element. + return (begin1 + result); + } +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_SEARCH_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/set_operations.h b/gcc-4.4.0/libstdc++-v3/include/parallel/set_operations.h new file mode 100644 index 000000000..28008195b --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/set_operations.h @@ -0,0 +1,524 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** + * @file parallel/set_operations.h + * @brief Parallel implementations of set operations for random-access + * iterators. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Marius Elvert and Felix Bondarenko. + +#ifndef _GLIBCXX_PARALLEL_SET_OPERATIONS_H +#define _GLIBCXX_PARALLEL_SET_OPERATIONS_H 1 + +#include <omp.h> + +#include <parallel/settings.h> +#include <parallel/multiseq_selection.h> + +namespace __gnu_parallel +{ +template<typename InputIterator, typename OutputIterator> + OutputIterator + copy_tail(std::pair<InputIterator, InputIterator> b, + std::pair<InputIterator, InputIterator> e, OutputIterator r) + { + if (b.first != e.first) + { + do + { + *r++ = *b.first++; + } + while (b.first != e.first); + } + else + { + while (b.second != e.second) + *r++ = *b.second++; + } + return r; + } + +template<typename InputIterator, + typename OutputIterator, + typename Comparator> + struct symmetric_difference_func + { + typedef std::iterator_traits<InputIterator> traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename std::pair<InputIterator, InputIterator> iterator_pair; + + symmetric_difference_func(Comparator c) : comp(c) {} + + Comparator comp; + + OutputIterator + invoke(InputIterator a, InputIterator b, + InputIterator c, InputIterator d, + OutputIterator r) const + { + while (a != b && c != d) + { + if (comp(*a, *c)) + { + *r = *a; + ++a; + ++r; + } + else if (comp(*c, *a)) + { + *r = *c; + ++c; + ++r; + } + else + { + ++a; + ++c; + } + } + return std::copy(c, d, std::copy(a, b, r)); + } + + difference_type + count(InputIterator a, InputIterator b, + InputIterator c, InputIterator d) const + { + difference_type counter = 0; + + while (a != b && c != d) + { + if (comp(*a, *c)) + { + ++a; + ++counter; + } + else if (comp(*c, *a)) + { + ++c; + ++counter; + } + else + { + ++a; + ++c; + } + } + + return counter + (b - a) + (d - c); + } + + OutputIterator + first_empty(InputIterator c, InputIterator d, OutputIterator out) const + { return std::copy(c, d, out); } + + OutputIterator + second_empty(InputIterator a, InputIterator b, OutputIterator out) const + { return std::copy(a, b, out); } + }; + + +template<typename InputIterator, + typename OutputIterator, + typename Comparator> + struct difference_func + { + typedef std::iterator_traits<InputIterator> traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename std::pair<InputIterator, InputIterator> iterator_pair; + + difference_func(Comparator c) : comp(c) {} + + Comparator comp; + + OutputIterator + invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d, + OutputIterator r) const + { + while (a != b && c != d) + { + if (comp(*a, *c)) + { + *r = *a; + ++a; + ++r; + } + else if (comp(*c, *a)) + { ++c; } + else + { + ++a; + ++c; + } + } + return std::copy(a, b, r); + } + + difference_type + count(InputIterator a, InputIterator b, + InputIterator c, InputIterator d) const + { + difference_type counter = 0; + + while (a != b && c != d) + { + if (comp(*a, *c)) + { + ++a; + ++counter; + } + else if (comp(*c, *a)) + { ++c; } + else + { ++a; ++c; } + } + + return counter + (b - a); + } + + inline OutputIterator + first_empty(InputIterator c, InputIterator d, OutputIterator out) const + { return out; } + + inline OutputIterator + second_empty(InputIterator a, InputIterator b, OutputIterator out) const + { return std::copy(a, b, out); } + }; + + +template<typename InputIterator, + typename OutputIterator, + typename Comparator> + struct intersection_func + { + typedef std::iterator_traits<InputIterator> traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename std::pair<InputIterator, InputIterator> iterator_pair; + + intersection_func(Comparator c) : comp(c) {} + + Comparator comp; + + OutputIterator + invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d, + OutputIterator r) const + { + while (a != b && c != d) + { + if (comp(*a, *c)) + { ++a; } + else if (comp(*c, *a)) + { ++c; } + else + { + *r = *a; + ++a; + ++c; + ++r; + } + } + + return r; + } + + difference_type + count(InputIterator a, InputIterator b, + InputIterator c, InputIterator d) const + { + difference_type counter = 0; + + while (a != b && c != d) + { + if (comp(*a, *c)) + { ++a; } + else if (comp(*c, *a)) + { ++c; } + else + { + ++a; + ++c; + ++counter; + } + } + + return counter; + } + + inline OutputIterator + first_empty(InputIterator c, InputIterator d, OutputIterator out) const + { return out; } + + inline OutputIterator + second_empty(InputIterator a, InputIterator b, OutputIterator out) const + { return out; } + }; + +template<class InputIterator, class OutputIterator, class Comparator> + struct union_func + { + typedef typename std::iterator_traits<InputIterator>::difference_type + difference_type; + + union_func(Comparator c) : comp(c) {} + + Comparator comp; + + OutputIterator + invoke(InputIterator a, const InputIterator b, InputIterator c, + const InputIterator d, OutputIterator r) const + { + while (a != b && c != d) + { + if (comp(*a, *c)) + { + *r = *a; + ++a; + } + else if (comp(*c, *a)) + { + *r = *c; + ++c; + } + else + { + *r = *a; + ++a; + ++c; + } + ++r; + } + return std::copy(c, d, std::copy(a, b, r)); + } + + difference_type + count(InputIterator a, InputIterator b, + InputIterator c, InputIterator d) const + { + difference_type counter = 0; + + while (a != b && c != d) + { + if (comp(*a, *c)) + { ++a; } + else if (comp(*c, *a)) + { ++c; } + else + { + ++a; + ++c; + } + ++counter; + } + + counter += (b - a); + counter += (d - c); + return counter; + } + + inline OutputIterator + first_empty(InputIterator c, InputIterator d, OutputIterator out) const + { return std::copy(c, d, out); } + + inline OutputIterator + second_empty(InputIterator a, InputIterator b, OutputIterator out) const + { return std::copy(a, b, out); } + }; + +template<typename InputIterator, + typename OutputIterator, + typename Operation> + OutputIterator + parallel_set_operation(InputIterator begin1, InputIterator end1, + InputIterator begin2, InputIterator end2, + OutputIterator result, Operation op) + { + _GLIBCXX_CALL((end1 - begin1) + (end2 - begin2)) + + typedef std::iterator_traits<InputIterator> traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename std::pair<InputIterator, InputIterator> iterator_pair; + + if (begin1 == end1) + return op.first_empty(begin2, end2, result); + + if (begin2 == end2) + return op.second_empty(begin1, end1, result); + + const difference_type size = (end1 - begin1) + (end2 - begin2); + + const iterator_pair sequence[ 2 ] = + { std::make_pair(begin1, end1), std::make_pair(begin2, end2) } ; + OutputIterator return_value = result; + difference_type *borders; + iterator_pair *block_begins; + difference_type* lengths; + + thread_index_t num_threads = + std::min<difference_type>(get_max_threads(), + std::min(end1 - begin1, end2 - begin2)); + +# pragma omp parallel num_threads(num_threads) + { +# pragma omp single + { + num_threads = omp_get_num_threads(); + + borders = new difference_type[num_threads + 2]; + equally_split(size, num_threads + 1, borders); + block_begins = new iterator_pair[num_threads + 1]; + // Very start. + block_begins[0] = std::make_pair(begin1, begin2); + lengths = new difference_type[num_threads]; + } //single + + thread_index_t iam = omp_get_thread_num(); + + // Result from multiseq_partition. + InputIterator offset[2]; + const difference_type rank = borders[iam + 1]; + + multiseq_partition(sequence, sequence + 2, rank, offset, op.comp); + + // allowed to read? + // together + // *(offset[ 0 ] - 1) == *offset[ 1 ] + if (offset[ 0 ] != begin1 && offset[ 1 ] != end2 + && !op.comp(*(offset[ 0 ] - 1), *offset[ 1 ]) + && !op.comp(*offset[ 1 ], *(offset[ 0 ] - 1))) + { + // Avoid split between globally equal elements: move one to + // front in first sequence. + --offset[ 0 ]; + } + + iterator_pair block_end = block_begins[ iam + 1 ] = + iterator_pair(offset[ 0 ], offset[ 1 ]); + + // Make sure all threads have their block_begin result written out. +# pragma omp barrier + + iterator_pair block_begin = block_begins[ iam ]; + + // Begin working for the first block, while the others except + // the last start to count. + if (iam == 0) + { + // The first thread can copy already. + lengths[ iam ] = op.invoke(block_begin.first, block_end.first, + block_begin.second, block_end.second, + result) + - result; + } + else + { + lengths[ iam ] = op.count(block_begin.first, block_end.first, + block_begin.second, block_end.second); + } + + // Make sure everyone wrote their lengths. +# pragma omp barrier + + OutputIterator r = result; + + if (iam == 0) + { + // Do the last block. + for (int i = 0; i < num_threads; ++i) + r += lengths[i]; + + block_begin = block_begins[num_threads]; + + // Return the result iterator of the last block. + return_value = op.invoke( + block_begin.first, end1, block_begin.second, end2, r); + + } + else + { + for (int i = 0; i < iam; ++i) + r += lengths[ i ]; + + // Reset begins for copy pass. + op.invoke(block_begin.first, block_end.first, + block_begin.second, block_end.second, r); + } + } + return return_value; + } + + +template<typename InputIterator, + typename OutputIterator, + typename Comparator> + inline OutputIterator + parallel_set_union(InputIterator begin1, InputIterator end1, + InputIterator begin2, InputIterator end2, + OutputIterator result, Comparator comp) + { + return parallel_set_operation(begin1, end1, begin2, end2, result, + union_func< InputIterator, OutputIterator, Comparator>(comp)); + } + +template<typename InputIterator, + typename OutputIterator, + typename Comparator> + inline OutputIterator + parallel_set_intersection(InputIterator begin1, InputIterator end1, + InputIterator begin2, InputIterator end2, + OutputIterator result, Comparator comp) + { + return parallel_set_operation(begin1, end1, begin2, end2, result, + intersection_func<InputIterator, OutputIterator, Comparator>(comp)); + } + +template<typename InputIterator, + typename OutputIterator, + typename Comparator> + inline OutputIterator + parallel_set_difference(InputIterator begin1, InputIterator end1, + InputIterator begin2, InputIterator end2, + OutputIterator result, Comparator comp) + { + return parallel_set_operation(begin1, end1, begin2, end2, result, + difference_func<InputIterator, OutputIterator, Comparator>(comp)); + } + +template<typename InputIterator, + typename OutputIterator, + typename Comparator> + inline OutputIterator + parallel_set_symmetric_difference(InputIterator begin1, InputIterator end1, + InputIterator begin2, InputIterator end2, + OutputIterator result, Comparator comp) + { + return parallel_set_operation(begin1, end1, begin2, end2, result, + symmetric_difference_func<InputIterator, OutputIterator, Comparator> + (comp)); + } + +} + +#endif /* _GLIBCXX_PARALLEL_SET_OPERATIONS_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/settings.h b/gcc-4.4.0/libstdc++-v3/include/parallel/settings.h new file mode 100644 index 000000000..946e39cab --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/settings.h @@ -0,0 +1,281 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/settings.h + * @brief Runtime settings and tuning parameters, heuristics to decide + * whether to use parallelized algorithms. + * This file is a GNU parallel extension to the Standard C++ Library. + * + * @section parallelization_decision + * The decision whether to run an algorithm in parallel. + * + * There are several ways the user can switch on and off the parallel + * execution of an algorithm, both at compile- and run-time. + * + * Only sequential execution can be forced at compile-time. This + * reduces code size and protects code parts that have + * non-thread-safe side effects. + * + * Ultimately, forcing parallel execution at compile-time makes + * sense. Often, the sequential algorithm implementation is used as + * a subroutine, so no reduction in code size can be achieved. Also, + * the machine the program is run on might have only one processor + * core, so to avoid overhead, the algorithm is executed + * sequentially. + * + * To force sequential execution of an algorithm ultimately at + * compile-time, the user must add the tag + * __gnu_parallel::sequential_tag() to the end of the parameter list, + * e. g. + * + * \code + * std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag()); + * \endcode + * + * This is compatible with all overloaded algorithm variants. No + * additional code will be instantiated, at all. The same holds for + * most algorithm calls with iterators not providing random access. + * + * If the algorithm call is not forced to be executed sequentially + * at compile-time, the decision is made at run-time. + * The global variable __gnu_parallel::_Settings::algorithm_strategy + * is checked. It is a tristate variable corresponding to: + * + * a. force_sequential, meaning the sequential algorithm is executed. + * b. force_parallel, meaning the parallel algorithm is executed. + * c. heuristic + * + * For heuristic, the parallel algorithm implementation is called + * only if the input size is sufficiently large. For most + * algorithms, the input size is the (combined) length of the input + * sequence(s). The threshold can be set by the user, individually + * for each algorithm. The according variables are called + * __gnu_parallel::_Settings::[algorithm]_minimal_n . + * + * For some of the algorithms, there are even more tuning options, + * e. g. the ability to choose from multiple algorithm variants. See + * below for details. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_SETTINGS_H +#define _GLIBCXX_PARALLEL_SETTINGS_H 1 + +#include <parallel/types.h> + +/** + * @brief Determine at compile(?)-time if the parallel variant of an + * algorithm should be called. + * @param c A condition that is convertible to bool that is overruled by + * __gnu_parallel::_Settings::algorithm_strategy. Usually a decision + * based on the input size. + */ +#define _GLIBCXX_PARALLEL_CONDITION(c) (__gnu_parallel::_Settings::get().algorithm_strategy != __gnu_parallel::force_sequential && ((__gnu_parallel::get_max_threads() > 1 && (c)) || __gnu_parallel::_Settings::get().algorithm_strategy == __gnu_parallel::force_parallel)) + +/* +inline bool +parallel_condition(bool c) +{ + bool ret = false; + const _Settings& s = _Settings::get(); + if (s.algorithm_strategy != force_seqential) + { + if (s.algorithm_strategy == force_parallel) + ret = true; + else + ret = get_max_threads() > 1 && c; + } + return ret; +} +*/ + +namespace __gnu_parallel +{ + /// class _Settings + /// Run-time settings for the parallel mode, including all tunable parameters. + struct _Settings + { + _AlgorithmStrategy algorithm_strategy; + + _SortAlgorithm sort_algorithm; + _PartialSumAlgorithm partial_sum_algorithm; + _MultiwayMergeAlgorithm multiway_merge_algorithm; + _FindAlgorithm find_algorithm; + + _SplittingAlgorithm sort_splitting; + _SplittingAlgorithm merge_splitting; + _SplittingAlgorithm multiway_merge_splitting; + + // Per-algorithm settings. + + /// Minimal input size for accumulate. + sequence_index_t accumulate_minimal_n; + + /// Minimal input size for adjacent_difference. + unsigned int adjacent_difference_minimal_n; + + /// Minimal input size for count and count_if. + sequence_index_t count_minimal_n; + + /// Minimal input size for fill. + sequence_index_t fill_minimal_n; + + /// Block size increase factor for find. + double find_increasing_factor; + + /// Initial block size for find. + sequence_index_t find_initial_block_size; + + /// Maximal block size for find. + sequence_index_t find_maximum_block_size; + + /// Start with looking for this many elements sequentially, for find. + sequence_index_t find_sequential_search_size; + + /// Minimal input size for for_each. + sequence_index_t for_each_minimal_n; + + /// Minimal input size for generate. + sequence_index_t generate_minimal_n; + + /// Minimal input size for max_element. + sequence_index_t max_element_minimal_n; + + /// Minimal input size for merge. + sequence_index_t merge_minimal_n; + + /// Oversampling factor for merge. + unsigned int merge_oversampling; + + /// Minimal input size for min_element. + sequence_index_t min_element_minimal_n; + + /// Minimal input size for multiway_merge. + sequence_index_t multiway_merge_minimal_n; + + /// Oversampling factor for multiway_merge. + int multiway_merge_minimal_k; + + /// Oversampling factor for multiway_merge. + unsigned int multiway_merge_oversampling; + + /// Minimal input size for nth_element. + sequence_index_t nth_element_minimal_n; + + /// Chunk size for partition. + sequence_index_t partition_chunk_size; + + /// Chunk size for partition, relative to input size. If > 0.0, + /// this value overrides partition_chunk_size. + double partition_chunk_share; + + /// Minimal input size for partition. + sequence_index_t partition_minimal_n; + + /// Minimal input size for partial_sort. + sequence_index_t partial_sort_minimal_n; + + /// Ratio for partial_sum. Assume "sum and write result" to be + /// this factor slower than just "sum". + float partial_sum_dilation; + + /// Minimal input size for partial_sum. + unsigned int partial_sum_minimal_n; + + /// Minimal input size for random_shuffle. + unsigned int random_shuffle_minimal_n; + + /// Minimal input size for replace and replace_if. + sequence_index_t replace_minimal_n; + + /// Minimal input size for set_difference. + sequence_index_t set_difference_minimal_n; + + /// Minimal input size for set_intersection. + sequence_index_t set_intersection_minimal_n; + + /// Minimal input size for set_symmetric_difference. + sequence_index_t set_symmetric_difference_minimal_n; + + /// Minimal input size for set_union. + sequence_index_t set_union_minimal_n; + + /// Minimal input size for parallel sorting. + sequence_index_t sort_minimal_n; + + /// Oversampling factor for parallel std::sort (MWMS). + unsigned int sort_mwms_oversampling; + + /// Such many samples to take to find a good pivot (quicksort). + unsigned int sort_qs_num_samples_preset; + + /// Maximal subsequence length to switch to unbalanced base case. + /// Applies to std::sort with dynamically load-balanced quicksort. + sequence_index_t sort_qsb_base_case_maximal_n; + + /// Minimal input size for parallel std::transform. + sequence_index_t transform_minimal_n; + + /// Minimal input size for unique_copy. + sequence_index_t unique_copy_minimal_n; + + sequence_index_t workstealing_chunk_size; + + // Hardware dependent tuning parameters. + + /// Size of the L1 cache in bytes (underestimation). + unsigned long long L1_cache_size; + + /// Size of the L2 cache in bytes (underestimation). + unsigned long long L2_cache_size; + + /// Size of the Translation Lookaside Buffer (underestimation). + unsigned int TLB_size; + + /// Overestimation of cache line size. Used to avoid false + /// sharing, i. e. elements of different threads are at least this + /// amount apart. + unsigned int cache_line_size; + + // Statistics. + + /// The number of stolen ranges in load-balanced quicksort. + sequence_index_t qsb_steals; + + /// Get the global settings. + static const _Settings& + get() throw(); + + /// Set the global settings. + static void + set(_Settings&) throw(); + + explicit + _Settings() : algorithm_strategy(heuristic), sort_algorithm(MWMS), partial_sum_algorithm(LINEAR), multiway_merge_algorithm(LOSER_TREE), find_algorithm(CONSTANT_SIZE_BLOCKS), sort_splitting(EXACT), merge_splitting(EXACT), multiway_merge_splitting(EXACT), accumulate_minimal_n(1000), adjacent_difference_minimal_n(1000), count_minimal_n(1000), fill_minimal_n(1000), find_increasing_factor(2.0), find_initial_block_size(256), find_maximum_block_size(8192), find_sequential_search_size(256), for_each_minimal_n(1000), generate_minimal_n(1000), max_element_minimal_n(1000), merge_minimal_n(1000), merge_oversampling(10), min_element_minimal_n(1000), multiway_merge_minimal_n(1000), multiway_merge_minimal_k(2), multiway_merge_oversampling(10), nth_element_minimal_n(1000), partition_chunk_size(1000), partition_chunk_share(0.0), partition_minimal_n(1000), partial_sort_minimal_n(1000), partial_sum_dilation(1.0f), partial_sum_minimal_n(1000), random_shuffle_minimal_n(1000), replace_minimal_n(1000), set_difference_minimal_n(1000), set_intersection_minimal_n(1000), set_symmetric_difference_minimal_n(1000), set_union_minimal_n(1000), sort_minimal_n(1000), sort_mwms_oversampling(10), sort_qs_num_samples_preset(100), sort_qsb_base_case_maximal_n(100), transform_minimal_n(1000), unique_copy_minimal_n(10000), workstealing_chunk_size(100), L1_cache_size(16 << 10), L2_cache_size(256 << 10), TLB_size(128), cache_line_size(64), qsb_steals(0) + { } + }; +} + +#endif /* _GLIBCXX_PARALLEL_SETTINGS_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/sort.h b/gcc-4.4.0/libstdc++-v3/include/parallel/sort.h new file mode 100644 index 000000000..de226e8b4 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/sort.h @@ -0,0 +1,229 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/sort.h + * @brief Parallel sorting algorithm switch. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_SORT_H +#define _GLIBCXX_PARALLEL_SORT_H 1 + +#include <parallel/basic_iterator.h> +#include <parallel/features.h> +#include <parallel/parallel.h> + +#if _GLIBCXX_ASSERTIONS +#include <parallel/checkers.h> +#endif + +#if _GLIBCXX_MERGESORT +#include <parallel/multiway_mergesort.h> +#endif + +#if _GLIBCXX_QUICKSORT +#include <parallel/quicksort.h> +#endif + +#if _GLIBCXX_BAL_QUICKSORT +#include <parallel/balanced_quicksort.h> +#endif + +namespace __gnu_parallel +{ + //prototype + template<bool stable, typename RandomAccessIterator, + typename Comparator, typename Parallelism> + void + parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, Parallelism parallelism); + + /** + * @brief Choose multiway mergesort, splitting variant at run-time, + * for parallel sorting. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param comp Comparator. + * @callgraph + */ + template<bool stable, typename RandomAccessIterator, typename Comparator> + inline void + parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, multiway_mergesort_tag parallelism) + { + _GLIBCXX_CALL(end - begin) + + if(_Settings::get().sort_splitting == EXACT) + parallel_sort_mwms<stable, true> + (begin, end, comp, parallelism.get_num_threads()); + else + parallel_sort_mwms<stable, false> + (begin, end, comp, parallelism.get_num_threads()); + } + + /** + * @brief Choose multiway mergesort with exact splitting, + * for parallel sorting. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param comp Comparator. + * @callgraph + */ + template<bool stable, typename RandomAccessIterator, typename Comparator> + inline void + parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, multiway_mergesort_exact_tag parallelism) + { + _GLIBCXX_CALL(end - begin) + + parallel_sort_mwms<stable, true> + (begin, end, comp, parallelism.get_num_threads()); + } + + /** + * @brief Choose multiway mergesort with splitting by sampling, + * for parallel sorting. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param comp Comparator. + * @callgraph + */ + template<bool stable, typename RandomAccessIterator, typename Comparator> + inline void + parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, multiway_mergesort_sampling_tag parallelism) + { + _GLIBCXX_CALL(end - begin) + + parallel_sort_mwms<stable, false> + (begin, end, comp, parallelism.get_num_threads()); + } + + /** + * @brief Choose quicksort for parallel sorting. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param comp Comparator. + * @callgraph + */ + template<bool stable, typename RandomAccessIterator, typename Comparator> + inline void + parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, quicksort_tag parallelism) + { + _GLIBCXX_CALL(end - begin) + + _GLIBCXX_PARALLEL_ASSERT(stable == false); + + parallel_sort_qs(begin, end, comp, parallelism.get_num_threads()); + } + + /** + * @brief Choose balanced quicksort for parallel sorting. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param comp Comparator. + * @param stable Sort stable. + * @callgraph + */ + template<bool stable, typename RandomAccessIterator, typename Comparator> + inline void + parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, balanced_quicksort_tag parallelism) + { + _GLIBCXX_CALL(end - begin) + + _GLIBCXX_PARALLEL_ASSERT(stable == false); + + parallel_sort_qsb(begin, end, comp, parallelism.get_num_threads()); + } + + + /** + * @brief Choose multiway mergesort with exact splitting, + * for parallel sorting. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param comp Comparator. + * @callgraph + */ + template<bool stable, typename RandomAccessIterator, typename Comparator> + inline void + parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, default_parallel_tag parallelism) + { + _GLIBCXX_CALL(end - begin) + + parallel_sort<stable> + (begin, end, comp, + multiway_mergesort_exact_tag(parallelism.get_num_threads())); + } + + + /** + * @brief Choose a parallel sorting algorithm. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param comp Comparator. + * @param stable Sort stable. + * @callgraph + */ + template<bool stable, typename RandomAccessIterator, typename Comparator> + inline void + parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, parallel_tag parallelism) + { + _GLIBCXX_CALL(end - begin) + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + if (false) ; +#if _GLIBCXX_MERGESORT + else if (stable || _Settings::get().sort_algorithm == MWMS) + { + if(_Settings::get().sort_splitting == EXACT) + parallel_sort_mwms<stable, true> + (begin, end, comp, parallelism.get_num_threads()); + else + parallel_sort_mwms<false, false> + (begin, end, comp, parallelism.get_num_threads()); + } +#endif +#if _GLIBCXX_QUICKSORT + else if (_Settings::get().sort_algorithm == QS) + parallel_sort_qs(begin, end, comp, parallelism.get_num_threads()); +#endif +#if _GLIBCXX_BAL_QUICKSORT + else if (_Settings::get().sort_algorithm == QS_BALANCED) + parallel_sort_qsb(begin, end, comp, parallelism.get_num_threads()); +#endif + else + __gnu_sequential::sort(begin, end, comp); + } +} // end namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_SORT_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/tags.h b/gcc-4.4.0/libstdc++-v3/include/parallel/tags.h new file mode 100644 index 000000000..bc1a7e5c6 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/tags.h @@ -0,0 +1,191 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** + * @file parallel/tags.h + * @brief Tags for compile-time selection. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_TAGS_H +#define _GLIBCXX_PARALLEL_TAGS_H 1 + +#include <omp.h> +#include <parallel/types.h> + +namespace __gnu_parallel +{ + /** @brief Forces sequential execution at compile time. */ + struct sequential_tag { }; + + /** @brief Recommends parallel execution at compile time, + * optionally using a user-specified number of threads. */ + struct parallel_tag + { + private: + thread_index_t num_threads; + + public: + /** @brief Default constructor. Use default number of threads. */ + parallel_tag() + { + this->num_threads = 0; + } + + /** @brief Default constructor. Recommend number of threads to use. + * @param num_threads Desired number of threads. */ + parallel_tag(thread_index_t num_threads) + { + this->num_threads = num_threads; + } + + /** @brief Find out desired number of threads. + * @return Desired number of threads. */ + inline thread_index_t get_num_threads() + { + if(num_threads == 0) + return omp_get_max_threads(); + else + return num_threads; + } + + /** @brief Set the desired number of threads. + * @param num_threads Desired number of threads. */ + inline void set_num_threads(thread_index_t num_threads) + { + this->num_threads = num_threads; + } + }; + + /** @brief Recommends parallel execution using the + default parallel algorithm. */ + struct default_parallel_tag : public parallel_tag + { + default_parallel_tag() { } + default_parallel_tag(thread_index_t num_threads) + : parallel_tag(num_threads) { } + }; + + /** @brief Recommends parallel execution using dynamic + load-balancing at compile time. */ + struct balanced_tag : public parallel_tag { }; + + /** @brief Recommends parallel execution using static + load-balancing at compile time. */ + struct unbalanced_tag : public parallel_tag { }; + + /** @brief Recommends parallel execution using OpenMP dynamic + load-balancing at compile time. */ + struct omp_loop_tag : public parallel_tag { }; + + /** @brief Recommends parallel execution using OpenMP static + load-balancing at compile time. */ + struct omp_loop_static_tag : public parallel_tag { }; + + + /** @brief Base class for for std::find() variants. */ + struct find_tag { }; + + + /** @brief Forces parallel merging + * with exact splitting, at compile time. */ + struct exact_tag : public parallel_tag + { + exact_tag() { } + exact_tag(thread_index_t num_threads) + : parallel_tag(num_threads) { } + }; + + /** @brief Forces parallel merging + * with exact splitting, at compile time. */ + struct sampling_tag : public parallel_tag + { + sampling_tag() { } + sampling_tag(thread_index_t num_threads) + : parallel_tag(num_threads) { } + }; + + + /** @brief Forces parallel sorting using multiway mergesort + * at compile time. */ + struct multiway_mergesort_tag : public parallel_tag + { + multiway_mergesort_tag() { } + multiway_mergesort_tag(thread_index_t num_threads) + : parallel_tag(num_threads) { } + }; + + /** @brief Forces parallel sorting using multiway mergesort + * with exact splitting at compile time. */ + struct multiway_mergesort_exact_tag : public parallel_tag + { + multiway_mergesort_exact_tag() { } + multiway_mergesort_exact_tag(thread_index_t num_threads) + : parallel_tag(num_threads) { } + }; + + /** @brief Forces parallel sorting using multiway mergesort + * with splitting by sampling at compile time. */ + struct multiway_mergesort_sampling_tag : public parallel_tag + { + multiway_mergesort_sampling_tag() { } + multiway_mergesort_sampling_tag(thread_index_t num_threads) + : parallel_tag(num_threads) { } + }; + + /** @brief Forces parallel sorting using unbalanced quicksort + * at compile time. */ + struct quicksort_tag : public parallel_tag + { + quicksort_tag() { } + quicksort_tag(thread_index_t num_threads) + : parallel_tag(num_threads) { } + }; + + /** @brief Forces parallel sorting using balanced quicksort + * at compile time. */ + struct balanced_quicksort_tag : public parallel_tag + { + balanced_quicksort_tag() { } + balanced_quicksort_tag(thread_index_t num_threads) + : parallel_tag(num_threads) { } + }; + + + /** @brief Selects the growing block size variant for std::find(). + @see _GLIBCXX_FIND_GROWING_BLOCKS */ + struct growing_blocks_tag : public find_tag { }; + + /** @brief Selects the constant block size variant for std::find(). + @see _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS */ + struct constant_size_blocks_tag : public find_tag { }; + + /** @brief Selects the equal splitting variant for std::find(). + @see _GLIBCXX_FIND_EQUAL_SPLIT */ + struct equal_split_tag : public find_tag { }; +} + +#endif /* _GLIBCXX_PARALLEL_TAGS_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/types.h b/gcc-4.4.0/libstdc++-v3/include/parallel/types.h new file mode 100644 index 000000000..053dfff15 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/types.h @@ -0,0 +1,155 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/types.h + * @brief Basic types and typedefs. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_TYPES_H +#define _GLIBCXX_PARALLEL_TYPES_H 1 + +#include <cstdlib> + +namespace __gnu_parallel +{ + // Enumerated types. + + /// Run-time equivalents for the compile-time tags. + enum _Parallelism + { + /// Not parallel. + sequential, + + /// Parallel unbalanced (equal-sized chunks). + parallel_unbalanced, + + /// Parallel balanced (work-stealing). + parallel_balanced, + + /// Parallel with OpenMP dynamic load-balancing. + parallel_omp_loop, + + /// Parallel with OpenMP static load-balancing. + parallel_omp_loop_static, + + /// Parallel with OpenMP taskqueue construct. + parallel_taskqueue + }; + + /// Strategies for run-time algorithm selection: + // force_sequential, force_parallel, heuristic. + enum _AlgorithmStrategy + { + heuristic, + force_sequential, + force_parallel + }; + + /// Sorting algorithms: + // multi-way mergesort, quicksort, load-balanced quicksort. + enum _SortAlgorithm + { + MWMS, + QS, + QS_BALANCED + }; + + /// Merging algorithms: + // bubblesort-alike, loser-tree variants, enum sentinel. + enum _MultiwayMergeAlgorithm + { + LOSER_TREE + }; + + /// Partial sum algorithms: recursive, linear. + enum _PartialSumAlgorithm + { + RECURSIVE, + LINEAR + }; + + /// Sorting/merging algorithms: sampling, exact. + enum _SplittingAlgorithm + { + SAMPLING, + EXACT + }; + + /// Find algorithms: + // growing blocks, equal-sized blocks, equal splitting. + enum _FindAlgorithm + { + GROWING_BLOCKS, + CONSTANT_SIZE_BLOCKS, + EQUAL_SPLIT + }; + + /// Integer Types. + // XXX need to use <cstdint> + /** @brief 16-bit signed integer. */ + typedef short int16; + + /** @brief 16-bit unsigned integer. */ + typedef unsigned short uint16; + + /** @brief 32-bit signed integer. */ + typedef int int32; + + /** @brief 32-bit unsigned integer. */ + typedef unsigned int uint32; + + /** @brief 64-bit signed integer. */ + typedef long long int64; + + /** @brief 64-bit unsigned integer. */ + typedef unsigned long long uint64; + + /** + * @brief Unsigned integer to index elements. + * The total number of elements for each algorithm must fit into this type. + */ + typedef uint64 sequence_index_t; + + /** + * @brief Unsigned integer to index a thread number. + * The maximum thread number (for each processor) must fit into this type. + */ + typedef uint16 thread_index_t; + + // XXX atomics interface? + /// Longest compare-and-swappable integer type on this platform. + typedef int64 lcas_t; + + // XXX numeric_limits::digits? + /// Number of bits of ::lcas_t. + static const int lcas_t_bits = sizeof(lcas_t) * 8; + + /// ::lcas_t with the right half of bits set to 1. + static const lcas_t lcas_t_mask = ((lcas_t(1) << (lcas_t_bits / 2)) - 1); +} + +#endif /* _GLIBCXX_PARALLEL_TYPES_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/unique_copy.h b/gcc-4.4.0/libstdc++-v3/include/parallel/unique_copy.h new file mode 100644 index 000000000..a83c2c0c5 --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/unique_copy.h @@ -0,0 +1,192 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/unique_copy.h + * @brief Parallel implementations of std::unique_copy(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Robert Geisberger and Robin Dapp. + +#ifndef _GLIBCXX_PARALLEL_UNIQUE_COPY_H +#define _GLIBCXX_PARALLEL_UNIQUE_COPY_H 1 + +#include <parallel/parallel.h> +#include <parallel/multiseq_selection.h> + +namespace __gnu_parallel +{ + +/** @brief Parallel std::unique_copy(), w/o explicit equality predicate. + * @param first Begin iterator of input sequence. + * @param last End iterator of input sequence. + * @param result Begin iterator of result sequence. + * @param binary_pred Equality predicate. + * @return End iterator of result sequence. */ +template<typename InputIterator, + class OutputIterator, + class BinaryPredicate> + OutputIterator + parallel_unique_copy(InputIterator first, InputIterator last, + OutputIterator result, BinaryPredicate binary_pred) + { + _GLIBCXX_CALL(last - first) + + typedef std::iterator_traits<InputIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type size = last - first; + + if (size == 0) + return result; + + // Let the first thread process two parts. + difference_type *counter; + difference_type *borders; + + thread_index_t num_threads = get_max_threads(); + // First part contains at least one element. +# pragma omp parallel num_threads(num_threads) + { +# pragma omp single + { + num_threads = omp_get_num_threads(); + borders = new difference_type[num_threads + 2]; + equally_split(size, num_threads + 1, borders); + counter = new difference_type[num_threads + 1]; + } + + thread_index_t iam = omp_get_thread_num(); + + difference_type begin, end; + + // Check for length without duplicates + // Needed for position in output + difference_type i = 0; + OutputIterator out = result; + + if (iam == 0) + { + begin = borders[0] + 1; // == 1 + end = borders[iam + 1]; + + ++i; + *out++ = *first; + + for (InputIterator iter = first + begin; iter < first + end; ++iter) + { + if (!binary_pred(*iter, *(iter-1))) + { + ++i; + *out++ = *iter; + } + } + } + else + { + begin = borders[iam]; //one part + end = borders[iam + 1]; + + for (InputIterator iter = first + begin; iter < first + end; ++iter) + { + if (!binary_pred(*iter, *(iter - 1))) + ++i; + } + } + counter[iam] = i; + + // Last part still untouched. + difference_type begin_output; + +# pragma omp barrier + + // Store result in output on calculated positions. + begin_output = 0; + + if (iam == 0) + { + for (int t = 0; t < num_threads; ++t) + begin_output += counter[t]; + + i = 0; + + OutputIterator iter_out = result + begin_output; + + begin = borders[num_threads]; + end = size; + + for (InputIterator iter = first + begin; iter < first + end; ++iter) + { + if (iter == first || !binary_pred(*iter, *(iter - 1))) + { + ++i; + *iter_out++ = *iter; + } + } + + counter[num_threads] = i; + } + else + { + for (int t = 0; t < iam; t++) + begin_output += counter[t]; + + OutputIterator iter_out = result + begin_output; + for (InputIterator iter = first + begin; iter < first + end; ++iter) + { + if (!binary_pred(*iter, *(iter-1))) + *iter_out++ = *iter; + } + } + } + + difference_type end_output = 0; + for (int t = 0; t < num_threads + 1; t++) + end_output += counter[t]; + + delete[] borders; + + return result + end_output; + } + +/** @brief Parallel std::unique_copy(), without explicit equality predicate + * @param first Begin iterator of input sequence. + * @param last End iterator of input sequence. + * @param result Begin iterator of result sequence. + * @return End iterator of result sequence. */ +template<typename InputIterator, class OutputIterator> + inline OutputIterator + parallel_unique_copy(InputIterator first, InputIterator last, + OutputIterator result) + { + typedef typename std::iterator_traits<InputIterator>::value_type + value_type; + return parallel_unique_copy(first, last, result, + std::equal_to<value_type>()); + } + +}//namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_UNIQUE_COPY_H */ diff --git a/gcc-4.4.0/libstdc++-v3/include/parallel/workstealing.h b/gcc-4.4.0/libstdc++-v3/include/parallel/workstealing.h new file mode 100644 index 000000000..b82fe7cef --- /dev/null +++ b/gcc-4.4.0/libstdc++-v3/include/parallel/workstealing.h @@ -0,0 +1,306 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/workstealing.h + * @brief Parallelization of embarrassingly parallel execution by + * means of work-stealing. + * + * Work stealing is described in + * + * R. D. Blumofe and C. E. Leiserson. + * Scheduling multithreaded computations by work stealing. + * Journal of the ACM, 46(5):720–748, 1999. + * + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_WORKSTEALING_H +#define _GLIBCXX_PARALLEL_WORKSTEALING_H 1 + +#include <parallel/parallel.h> +#include <parallel/random_number.h> +#include <parallel/compatibility.h> + +namespace __gnu_parallel +{ + +#define _GLIBCXX_JOB_VOLATILE volatile + +/** @brief One job for a certain thread. */ +template<typename _DifferenceTp> + struct Job + { + typedef _DifferenceTp difference_type; + + /** @brief First element. + * + * Changed by owning and stealing thread. By stealing thread, + * always incremented. */ + _GLIBCXX_JOB_VOLATILE difference_type first; + + /** @brief Last element. + * + * Changed by owning thread only. */ + _GLIBCXX_JOB_VOLATILE difference_type last; + + /** @brief Number of elements, i. e. @c last-first+1. + * + * Changed by owning thread only. */ + _GLIBCXX_JOB_VOLATILE difference_type load; + }; + +/** @brief Work stealing algorithm for random access iterators. + * + * Uses O(1) additional memory. Synchronization at job lists is + * done with atomic operations. + * @param begin Begin iterator of element sequence. + * @param end End iterator of element sequence. + * @param op User-supplied functor (comparator, predicate, adding + * functor, ...). + * @param f Functor to "process" an element with op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param r Functor to "add" a single result to the already + * processed elements (depends on functionality). + * @param base Base value for reduction. + * @param output Pointer to position where final result is written to + * @param bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ +template<typename RandomAccessIterator, + typename Op, + typename Fu, + typename Red, + typename Result> + Op + for_each_template_random_access_workstealing(RandomAccessIterator begin, + RandomAccessIterator end, + Op op, Fu& f, Red r, + Result base, Result& output, + typename std::iterator_traits + <RandomAccessIterator>:: + difference_type bound) + { + _GLIBCXX_CALL(end - begin) + + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::difference_type difference_type; + + const _Settings& __s = _Settings::get(); + + difference_type chunk_size = static_cast<difference_type>(__s.workstealing_chunk_size); + + // How many jobs? + difference_type length = (bound < 0) ? (end - begin) : bound; + + // To avoid false sharing in a cache line. + const int stride = __s.cache_line_size * 10 / sizeof(Job<difference_type>) + 1; + + // Total number of threads currently working. + thread_index_t busy = 0; + + Job<difference_type> *job; + + omp_lock_t output_lock; + omp_init_lock(&output_lock); + + // Write base value to output. + output = base; + + // No more threads than jobs, at least one thread. + thread_index_t num_threads = + __gnu_parallel::max<thread_index_t>(1, + __gnu_parallel::min<difference_type>(length, get_max_threads())); + +# pragma omp parallel shared(busy) num_threads(num_threads) + { + +# pragma omp single + { + num_threads = omp_get_num_threads(); + + // Create job description array. + job = new Job<difference_type>[num_threads * stride]; + } + + // Initialization phase. + + // Flags for every thread if it is doing productive work. + bool iam_working = false; + + // Thread id. + thread_index_t iam = omp_get_thread_num(); + + // This job. + Job<difference_type>& my_job = job[iam * stride]; + + // Random number (for work stealing). + thread_index_t victim; + + // Local value for reduction. + Result result = Result(); + + // Number of elements to steal in one attempt. + difference_type steal; + + // Every thread has its own random number generator + // (modulo num_threads). + random_number rand_gen(iam, num_threads); + + // This thread is currently working. +# pragma omp atomic + ++busy; + + iam_working = true; + + // How many jobs per thread? last thread gets the rest. + my_job.first = + static_cast<difference_type>(iam * (length / num_threads)); + + my_job.last = (iam == (num_threads - 1)) ? + (length - 1) : ((iam + 1) * (length / num_threads) - 1); + my_job.load = my_job.last - my_job.first + 1; + + // Init result with first value (to have a base value for reduction). + if (my_job.first <= my_job.last) + { + // Cannot use volatile variable directly. + difference_type my_first = my_job.first; + result = f(op, begin + my_first); + ++my_job.first; + --my_job.load; + } + + RandomAccessIterator current; + +# pragma omp barrier + + // Actual work phase + // Work on own or stolen start + while (busy > 0) + { + // Work until no productive thread left. +# pragma omp flush(busy) + + // Thread has own work to do + while (my_job.first <= my_job.last) + { + // fetch-and-add call + // Reserve current job block (size chunk_size) in my queue. + difference_type current_job = + fetch_and_add<difference_type>(&(my_job.first), chunk_size); + + // Update load, to make the three values consistent, + // first might have been changed in the meantime + my_job.load = my_job.last - my_job.first + 1; + for (difference_type job_counter = 0; + job_counter < chunk_size && current_job <= my_job.last; + ++job_counter) + { + // Yes: process it! + current = begin + current_job; + ++current_job; + + // Do actual work. + result = r(result, f(op, current)); + } + +# pragma omp flush(busy) + } + + // After reaching this point, a thread's job list is empty. + if (iam_working) + { + // This thread no longer has work. +# pragma omp atomic + --busy; + + iam_working = false; + } + + difference_type supposed_first, supposed_last, supposed_load; + do + { + // Find random nonempty deque (not own), do consistency check. + yield(); +# pragma omp flush(busy) + victim = rand_gen(); + supposed_first = job[victim * stride].first; + supposed_last = job[victim * stride].last; + supposed_load = job[victim * stride].load; + } + while (busy > 0 + && ((supposed_load <= 0) + || ((supposed_first + supposed_load - 1) != supposed_last))); + + if (busy == 0) + break; + + if (supposed_load > 0) + { + // Has work and work to do. + // Number of elements to steal (at least one). + steal = (supposed_load < 2) ? 1 : supposed_load / 2; + + // Push victim's start forward. + difference_type stolen_first = + fetch_and_add<difference_type>( + &(job[victim * stride].first), steal); + difference_type stolen_try = + stolen_first + steal - difference_type(1); + + my_job.first = stolen_first; + my_job.last = __gnu_parallel::min(stolen_try, supposed_last); + my_job.load = my_job.last - my_job.first + 1; + + // Has potential work again. +# pragma omp atomic + ++busy; + iam_working = true; + +# pragma omp flush(busy) + } +# pragma omp flush(busy) + } // end while busy > 0 + // Add accumulated result to output. + omp_set_lock(&output_lock); + output = r(output, result); + omp_unset_lock(&output_lock); + } + + delete[] job; + + // Points to last element processed (needed as return value for + // some algorithms like transform) + f.finish_iterator = begin + length; + + omp_destroy_lock(&output_lock); + + return op; + } +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_WORKSTEALING_H */ |