Import gcc-4.4.1
[dragonfly.git] / contrib / gcc-4.4 / libstdc++-v3 / include / parallel / partition.h
1 // -*- C++ -*-
2
3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
9 // version.
10
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 // General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /** @file parallel/partition.h
26  *  @brief Parallel implementation of std::partition(),
27  *  std::nth_element(), and std::partial_sort().
28  *  This file is a GNU parallel extension to the Standard C++ Library.
29  */
30
31 // Written by Johannes Singler and Felix Putze.
32
33 #ifndef _GLIBCXX_PARALLEL_PARTITION_H
34 #define _GLIBCXX_PARALLEL_PARTITION_H 1
35
36 #include <parallel/basic_iterator.h>
37 #include <parallel/sort.h>
38 #include <parallel/random_number.h>
39 #include <bits/stl_algo.h>
40 #include <parallel/parallel.h>
41
42 /** @brief Decide whether to declare certain variables volatile. */
43 #define _GLIBCXX_VOLATILE volatile
44
45 namespace __gnu_parallel
46 {
47 /** @brief Parallel implementation of std::partition.
48   *  @param begin Begin iterator of input sequence to split.
49   *  @param end End iterator of input sequence to split.
50   *  @param pred Partition predicate, possibly including some kind of pivot.
51   *  @param num_threads Maximum number of threads to use for this task.
52   *  @return Number of elements not fulfilling the predicate. */
53 template<typename RandomAccessIterator, typename Predicate>
54   typename std::iterator_traits<RandomAccessIterator>::difference_type
55   parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
56                      Predicate pred, thread_index_t num_threads)
57   {
58     typedef std::iterator_traits<RandomAccessIterator> traits_type;
59     typedef typename traits_type::value_type value_type;
60     typedef typename traits_type::difference_type difference_type;
61
62     difference_type n = end - begin;
63
64     _GLIBCXX_CALL(n)
65
66     const _Settings& __s = _Settings::get();
67
68     // Shared.
69     _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
70     _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
71     _GLIBCXX_VOLATILE difference_type leftnew, rightnew;
72
73     bool* reserved_left = NULL, * reserved_right = NULL;
74
75     difference_type chunk_size;
76
77     omp_lock_t result_lock;
78     omp_init_lock(&result_lock);
79
80     //at least two chunks per thread
81     if(right - left + 1 >= 2 * num_threads * chunk_size)
82 #   pragma omp parallel num_threads(num_threads)
83       {
84 #       pragma omp single
85           {
86             num_threads = omp_get_num_threads();
87             reserved_left = new bool[num_threads];
88             reserved_right = new bool[num_threads];
89
90             if (__s.partition_chunk_share > 0.0)
91               chunk_size = std::max<difference_type>(__s.partition_chunk_size,
92                                     (double)n * __s.partition_chunk_share
93                                                      / (double)num_threads);
94             else
95               chunk_size = __s.partition_chunk_size;
96           }
97
98         while (right - left + 1 >= 2 * num_threads * chunk_size)
99           {
100 #           pragma omp single
101               {
102                 difference_type num_chunks = (right - left + 1) / chunk_size;
103
104                 for (int r = 0; r < num_threads; ++r)
105                   {
106                     reserved_left[r] = false;
107                     reserved_right[r] = false;
108                   }
109                 leftover_left = 0;
110                 leftover_right = 0;
111               } //implicit barrier
112
113             // Private.
114             difference_type thread_left, thread_left_border,
115                             thread_right, thread_right_border;
116             thread_left = left + 1;
117
118             // Just to satisfy the condition below.
119             thread_left_border = thread_left - 1;
120             thread_right = n - 1;
121             thread_right_border = thread_right + 1;
122
123             bool iam_finished = false;
124             while (!iam_finished)
125               {
126                 if (thread_left > thread_left_border)
127                   {
128                     omp_set_lock(&result_lock);
129                     if (left + (chunk_size - 1) > right)
130                       iam_finished = true;
131                     else
132                       {
133                         thread_left = left;
134                         thread_left_border = left + (chunk_size - 1);
135                         left += chunk_size;
136                       }
137                     omp_unset_lock(&result_lock);
138                   }
139
140                 if (thread_right < thread_right_border)
141                   {
142                     omp_set_lock(&result_lock);
143                     if (left > right - (chunk_size - 1))
144                       iam_finished = true;
145                     else
146                       {
147                         thread_right = right;
148                         thread_right_border = right - (chunk_size - 1);
149                         right -= chunk_size;
150                       }
151                     omp_unset_lock(&result_lock);
152                   }
153
154                 if (iam_finished)
155                   break;
156
157                 // Swap as usual.
158                 while (thread_left < thread_right)
159                   {
160                     while (pred(begin[thread_left])
161                             && thread_left <= thread_left_border)
162                       ++thread_left;
163                     while (!pred(begin[thread_right])
164                             && thread_right >= thread_right_border)
165                       --thread_right;
166
167                     if (thread_left > thread_left_border
168                         || thread_right < thread_right_border)
169                       // Fetch new chunk(s).
170                       break;
171
172                     std::swap(begin[thread_left], begin[thread_right]);
173                     ++thread_left;
174                     --thread_right;
175                   }
176               }
177
178             // Now swap the leftover chunks to the right places.
179             if (thread_left <= thread_left_border)
180 #             pragma omp atomic
181               ++leftover_left;
182             if (thread_right >= thread_right_border)
183 #             pragma omp atomic
184               ++leftover_right;
185
186 #           pragma omp barrier
187
188 #           pragma omp single
189               {
190                 leftnew = left - leftover_left * chunk_size;
191                 rightnew = right + leftover_right * chunk_size;
192               }
193
194 #           pragma omp barrier
195
196             // <=> thread_left_border + (chunk_size - 1) >= leftnew
197             if (thread_left <= thread_left_border
198                 && thread_left_border >= leftnew)
199               {
200                 // Chunk already in place, reserve spot.
201                 reserved_left[(left - (thread_left_border + 1)) / chunk_size]
202                     = true;
203               }
204
205             // <=> thread_right_border - (chunk_size - 1) <= rightnew
206             if (thread_right >= thread_right_border
207                 && thread_right_border <= rightnew)
208               {
209                 // Chunk already in place, reserve spot.
210                 reserved_right[((thread_right_border - 1) - right)
211                                / chunk_size] = true;
212               }
213
214 #           pragma omp barrier
215
216             if (thread_left <= thread_left_border
217                 && thread_left_border < leftnew)
218               {
219                 // Find spot and swap.
220                 difference_type swapstart = -1;
221                 omp_set_lock(&result_lock);
222                 for (int r = 0; r < leftover_left; ++r)
223                   if (!reserved_left[r])
224                     {
225                       reserved_left[r] = true;
226                       swapstart = left - (r + 1) * chunk_size;
227                       break;
228                     }
229                 omp_unset_lock(&result_lock);
230
231 #if _GLIBCXX_ASSERTIONS
232                 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
233 #endif
234
235                 std::swap_ranges(begin + thread_left_border
236                                  - (chunk_size - 1),
237                                  begin + thread_left_border + 1,
238                                  begin + swapstart);
239               }
240
241             if (thread_right >= thread_right_border
242                 && thread_right_border > rightnew)
243               {
244                 // Find spot and swap
245                 difference_type swapstart = -1;
246                 omp_set_lock(&result_lock);
247                 for (int r = 0; r < leftover_right; ++r)
248                   if (!reserved_right[r])
249                     {
250                       reserved_right[r] = true;
251                       swapstart = right + r * chunk_size + 1;
252                       break;
253                     }
254                 omp_unset_lock(&result_lock);
255
256 #if _GLIBCXX_ASSERTIONS
257                 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
258 #endif
259
260                 std::swap_ranges(begin + thread_right_border,
261                                  begin + thread_right_border + chunk_size,
262                                  begin + swapstart);
263               }
264 #if _GLIBCXX_ASSERTIONS
265 #             pragma omp barrier
266
267 #             pragma omp single
268                 {
269                   for (int r = 0; r < leftover_left; ++r)
270                     _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
271                   for (int r = 0; r < leftover_right; ++r)
272                     _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
273                 }
274
275 #             pragma omp barrier
276 #endif
277
278 #             pragma omp barrier
279
280               left = leftnew;
281               right = rightnew;
282           }
283 #         pragma omp flush(left, right)
284       } // end "recursion" //parallel
285
286     difference_type final_left = left, final_right = right;
287
288     while (final_left < final_right)
289       {
290         // Go right until key is geq than pivot.
291         while (pred(begin[final_left]) && final_left < final_right)
292           ++final_left;
293
294         // Go left until key is less than pivot.
295         while (!pred(begin[final_right]) && final_left < final_right)
296           --final_right;
297
298         if (final_left == final_right)
299           break;
300         std::swap(begin[final_left], begin[final_right]);
301         ++final_left;
302         --final_right;
303       }
304
305     // All elements on the left side are < piv, all elements on the
306     // right are >= piv
307     delete[] reserved_left;
308     delete[] reserved_right;
309
310     omp_destroy_lock(&result_lock);
311
312     // Element "between" final_left and final_right might not have
313     // been regarded yet
314     if (final_left < n && !pred(begin[final_left]))
315       // Really swapped.
316       return final_left;
317     else
318       return final_left + 1;
319   }
320
321 /**
322   *  @brief Parallel implementation of std::nth_element().
323   *  @param begin Begin iterator of input sequence.
324   *  @param nth Iterator of element that must be in position afterwards.
325   *  @param end End iterator of input sequence.
326   *  @param comp Comparator.
327   */
328 template<typename RandomAccessIterator, typename Comparator>
329   void 
330   parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth, 
331                        RandomAccessIterator end, Comparator comp)
332   {
333     typedef std::iterator_traits<RandomAccessIterator> traits_type;
334     typedef typename traits_type::value_type value_type;
335     typedef typename traits_type::difference_type difference_type;
336
337     _GLIBCXX_CALL(end - begin)
338
339     RandomAccessIterator split;
340     random_number rng;
341
342     difference_type minimum_length =
343       std::max<difference_type>(2, _Settings::get().partition_minimal_n);
344
345     // Break if input range to small.
346     while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
347       {
348         difference_type n = end - begin;
349
350         RandomAccessIterator pivot_pos = begin +  rng(n);
351
352         // Swap pivot_pos value to end.
353         if (pivot_pos != (end - 1))
354           std::swap(*pivot_pos, *(end - 1));
355         pivot_pos = end - 1;
356
357         // XXX Comparator must have first_value_type, second_value_type,
358         // result_type
359         // Comparator == __gnu_parallel::lexicographic<S, int,
360         // __gnu_parallel::less<S, S> >
361         // pivot_pos == std::pair<S, int>*
362         // XXX binder2nd only for RandomAccessIterators??
363         __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
364           pred(comp, *pivot_pos);
365
366         // Divide, leave pivot unchanged in last place.
367         RandomAccessIterator split_pos1, split_pos2;
368         split_pos1 = begin + parallel_partition(begin, end - 1, pred,
369                                                 get_max_threads());
370
371         // Left side: < pivot_pos; right side: >= pivot_pos
372
373         // Swap pivot back to middle.
374         if (split_pos1 != pivot_pos)
375           std::swap(*split_pos1, *pivot_pos);
376         pivot_pos = split_pos1;
377
378         // In case all elements are equal, split_pos1 == 0
379         if ((split_pos1 + 1 - begin) < (n >> 7)
380             || (end - split_pos1) < (n >> 7))
381           {
382             // Very unequal split, one part smaller than one 128th
383             // elements not strictly larger than the pivot.
384             __gnu_parallel::unary_negate<__gnu_parallel::
385               binder1st<Comparator, value_type, value_type, bool>, value_type>
386               pred(__gnu_parallel::binder1st<Comparator, value_type,
387                    value_type, bool>(comp, *pivot_pos));
388
389             // Find other end of pivot-equal range.
390             split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
391                                                      end, pred);
392           }
393         else
394           // Only skip the pivot.
395           split_pos2 = split_pos1 + 1;
396
397         // Compare iterators.
398         if (split_pos2 <= nth)
399           begin = split_pos2;
400         else if (nth < split_pos1)
401           end = split_pos1;
402         else
403           break;
404       }
405
406     // Only at most _Settings::partition_minimal_n elements left.
407     __gnu_sequential::sort(begin, end, comp);
408   }
409
410 /** @brief Parallel implementation of std::partial_sort().
411 *  @param begin Begin iterator of input sequence.
412 *  @param middle Sort until this position.
413 *  @param end End iterator of input sequence.
414 *  @param comp Comparator. */
415 template<typename RandomAccessIterator, typename Comparator>
416   void
417   parallel_partial_sort(RandomAccessIterator begin,
418                         RandomAccessIterator middle,
419                         RandomAccessIterator end, Comparator comp)
420   {
421     parallel_nth_element(begin, middle, end, comp);
422     std::sort(begin, middle, comp);
423   }
424
425 } //namespace __gnu_parallel
426
427 #undef _GLIBCXX_VOLATILE
428
429 #endif /* _GLIBCXX_PARALLEL_PARTITION_H */