3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file parallel/partition.h
26 * @brief Parallel implementation of std::partition(),
27 * std::nth_element(), and std::partial_sort().
28 * This file is a GNU parallel extension to the Standard C++ Library.
31 // Written by Johannes Singler and Felix Putze.
33 #ifndef _GLIBCXX_PARALLEL_PARTITION_H
34 #define _GLIBCXX_PARALLEL_PARTITION_H 1
36 #include <parallel/basic_iterator.h>
37 #include <parallel/sort.h>
38 #include <parallel/random_number.h>
39 #include <bits/stl_algo.h>
40 #include <parallel/parallel.h>
42 /** @brief Decide whether to declare certain variables volatile. */
43 #define _GLIBCXX_VOLATILE volatile
45 namespace __gnu_parallel
47 /** @brief Parallel implementation of std::partition.
48 * @param begin Begin iterator of input sequence to split.
49 * @param end End iterator of input sequence to split.
50 * @param pred Partition predicate, possibly including some kind of pivot.
51 * @param num_threads Maximum number of threads to use for this task.
52 * @return Number of elements not fulfilling the predicate. */
53 template<typename RandomAccessIterator, typename Predicate>
54 typename std::iterator_traits<RandomAccessIterator>::difference_type
55 parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
56 Predicate pred, thread_index_t num_threads)
58 typedef std::iterator_traits<RandomAccessIterator> traits_type;
59 typedef typename traits_type::value_type value_type;
60 typedef typename traits_type::difference_type difference_type;
62 difference_type n = end - begin;
66 const _Settings& __s = _Settings::get();
69 _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
70 _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
71 _GLIBCXX_VOLATILE difference_type leftnew, rightnew;
73 bool* reserved_left = NULL, * reserved_right = NULL;
75 difference_type chunk_size;
77 omp_lock_t result_lock;
78 omp_init_lock(&result_lock);
80 //at least two chunks per thread
81 if(right - left + 1 >= 2 * num_threads * chunk_size)
82 # pragma omp parallel num_threads(num_threads)
86 num_threads = omp_get_num_threads();
87 reserved_left = new bool[num_threads];
88 reserved_right = new bool[num_threads];
90 if (__s.partition_chunk_share > 0.0)
91 chunk_size = std::max<difference_type>(__s.partition_chunk_size,
92 (double)n * __s.partition_chunk_share
93 / (double)num_threads);
95 chunk_size = __s.partition_chunk_size;
98 while (right - left + 1 >= 2 * num_threads * chunk_size)
102 difference_type num_chunks = (right - left + 1) / chunk_size;
104 for (int r = 0; r < num_threads; ++r)
106 reserved_left[r] = false;
107 reserved_right[r] = false;
114 difference_type thread_left, thread_left_border,
115 thread_right, thread_right_border;
116 thread_left = left + 1;
118 // Just to satisfy the condition below.
119 thread_left_border = thread_left - 1;
120 thread_right = n - 1;
121 thread_right_border = thread_right + 1;
123 bool iam_finished = false;
124 while (!iam_finished)
126 if (thread_left > thread_left_border)
128 omp_set_lock(&result_lock);
129 if (left + (chunk_size - 1) > right)
134 thread_left_border = left + (chunk_size - 1);
137 omp_unset_lock(&result_lock);
140 if (thread_right < thread_right_border)
142 omp_set_lock(&result_lock);
143 if (left > right - (chunk_size - 1))
147 thread_right = right;
148 thread_right_border = right - (chunk_size - 1);
151 omp_unset_lock(&result_lock);
158 while (thread_left < thread_right)
160 while (pred(begin[thread_left])
161 && thread_left <= thread_left_border)
163 while (!pred(begin[thread_right])
164 && thread_right >= thread_right_border)
167 if (thread_left > thread_left_border
168 || thread_right < thread_right_border)
169 // Fetch new chunk(s).
172 std::swap(begin[thread_left], begin[thread_right]);
178 // Now swap the leftover chunks to the right places.
179 if (thread_left <= thread_left_border)
182 if (thread_right >= thread_right_border)
190 leftnew = left - leftover_left * chunk_size;
191 rightnew = right + leftover_right * chunk_size;
196 // <=> thread_left_border + (chunk_size - 1) >= leftnew
197 if (thread_left <= thread_left_border
198 && thread_left_border >= leftnew)
200 // Chunk already in place, reserve spot.
201 reserved_left[(left - (thread_left_border + 1)) / chunk_size]
205 // <=> thread_right_border - (chunk_size - 1) <= rightnew
206 if (thread_right >= thread_right_border
207 && thread_right_border <= rightnew)
209 // Chunk already in place, reserve spot.
210 reserved_right[((thread_right_border - 1) - right)
211 / chunk_size] = true;
216 if (thread_left <= thread_left_border
217 && thread_left_border < leftnew)
219 // Find spot and swap.
220 difference_type swapstart = -1;
221 omp_set_lock(&result_lock);
222 for (int r = 0; r < leftover_left; ++r)
223 if (!reserved_left[r])
225 reserved_left[r] = true;
226 swapstart = left - (r + 1) * chunk_size;
229 omp_unset_lock(&result_lock);
231 #if _GLIBCXX_ASSERTIONS
232 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
235 std::swap_ranges(begin + thread_left_border
237 begin + thread_left_border + 1,
241 if (thread_right >= thread_right_border
242 && thread_right_border > rightnew)
244 // Find spot and swap
245 difference_type swapstart = -1;
246 omp_set_lock(&result_lock);
247 for (int r = 0; r < leftover_right; ++r)
248 if (!reserved_right[r])
250 reserved_right[r] = true;
251 swapstart = right + r * chunk_size + 1;
254 omp_unset_lock(&result_lock);
256 #if _GLIBCXX_ASSERTIONS
257 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
260 std::swap_ranges(begin + thread_right_border,
261 begin + thread_right_border + chunk_size,
264 #if _GLIBCXX_ASSERTIONS
269 for (int r = 0; r < leftover_left; ++r)
270 _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
271 for (int r = 0; r < leftover_right; ++r)
272 _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
283 # pragma omp flush(left, right)
284 } // end "recursion" //parallel
286 difference_type final_left = left, final_right = right;
288 while (final_left < final_right)
290 // Go right until key is geq than pivot.
291 while (pred(begin[final_left]) && final_left < final_right)
294 // Go left until key is less than pivot.
295 while (!pred(begin[final_right]) && final_left < final_right)
298 if (final_left == final_right)
300 std::swap(begin[final_left], begin[final_right]);
305 // All elements on the left side are < piv, all elements on the
307 delete[] reserved_left;
308 delete[] reserved_right;
310 omp_destroy_lock(&result_lock);
312 // Element "between" final_left and final_right might not have
314 if (final_left < n && !pred(begin[final_left]))
318 return final_left + 1;
322 * @brief Parallel implementation of std::nth_element().
323 * @param begin Begin iterator of input sequence.
324 * @param nth Iterator of element that must be in position afterwards.
325 * @param end End iterator of input sequence.
326 * @param comp Comparator.
328 template<typename RandomAccessIterator, typename Comparator>
330 parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth,
331 RandomAccessIterator end, Comparator comp)
333 typedef std::iterator_traits<RandomAccessIterator> traits_type;
334 typedef typename traits_type::value_type value_type;
335 typedef typename traits_type::difference_type difference_type;
337 _GLIBCXX_CALL(end - begin)
339 RandomAccessIterator split;
342 difference_type minimum_length =
343 std::max<difference_type>(2, _Settings::get().partition_minimal_n);
345 // Break if input range to small.
346 while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
348 difference_type n = end - begin;
350 RandomAccessIterator pivot_pos = begin + rng(n);
352 // Swap pivot_pos value to end.
353 if (pivot_pos != (end - 1))
354 std::swap(*pivot_pos, *(end - 1));
357 // XXX Comparator must have first_value_type, second_value_type,
359 // Comparator == __gnu_parallel::lexicographic<S, int,
360 // __gnu_parallel::less<S, S> >
361 // pivot_pos == std::pair<S, int>*
362 // XXX binder2nd only for RandomAccessIterators??
363 __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
364 pred(comp, *pivot_pos);
366 // Divide, leave pivot unchanged in last place.
367 RandomAccessIterator split_pos1, split_pos2;
368 split_pos1 = begin + parallel_partition(begin, end - 1, pred,
371 // Left side: < pivot_pos; right side: >= pivot_pos
373 // Swap pivot back to middle.
374 if (split_pos1 != pivot_pos)
375 std::swap(*split_pos1, *pivot_pos);
376 pivot_pos = split_pos1;
378 // In case all elements are equal, split_pos1 == 0
379 if ((split_pos1 + 1 - begin) < (n >> 7)
380 || (end - split_pos1) < (n >> 7))
382 // Very unequal split, one part smaller than one 128th
383 // elements not strictly larger than the pivot.
384 __gnu_parallel::unary_negate<__gnu_parallel::
385 binder1st<Comparator, value_type, value_type, bool>, value_type>
386 pred(__gnu_parallel::binder1st<Comparator, value_type,
387 value_type, bool>(comp, *pivot_pos));
389 // Find other end of pivot-equal range.
390 split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
394 // Only skip the pivot.
395 split_pos2 = split_pos1 + 1;
397 // Compare iterators.
398 if (split_pos2 <= nth)
400 else if (nth < split_pos1)
406 // Only at most _Settings::partition_minimal_n elements left.
407 __gnu_sequential::sort(begin, end, comp);
410 /** @brief Parallel implementation of std::partial_sort().
411 * @param begin Begin iterator of input sequence.
412 * @param middle Sort until this position.
413 * @param end End iterator of input sequence.
414 * @param comp Comparator. */
415 template<typename RandomAccessIterator, typename Comparator>
417 parallel_partial_sort(RandomAccessIterator begin,
418 RandomAccessIterator middle,
419 RandomAccessIterator end, Comparator comp)
421 parallel_nth_element(begin, middle, end, comp);
422 std::sort(begin, middle, comp);
425 } //namespace __gnu_parallel
427 #undef _GLIBCXX_VOLATILE
429 #endif /* _GLIBCXX_PARALLEL_PARTITION_H */