Import GCC-8 to a new vendor branch
[dragonfly.git] / contrib / gcc-8.0 / libstdc++-v3 / config / cpu / i486 / opt / bits / opt_random.h
1 // Optimizations for random number functions, x86 version -*- C++ -*-
2
3 // Copyright (C) 2012-2018 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /** @file bits/opt_random.h
26  *  This is an internal header file, included by other library headers.
27  *  Do not attempt to use it directly. @headername{random}
28  */
29
30 #ifndef _BITS_OPT_RANDOM_H
31 #define _BITS_OPT_RANDOM_H 1
32
33 #ifdef __SSE3__
34 #include <pmmintrin.h>
35 #endif
36
37
38 #pragma GCC system_header
39
40
41 namespace std _GLIBCXX_VISIBILITY(default)
42 {
43 _GLIBCXX_BEGIN_NAMESPACE_VERSION
44
45 #ifdef __SSE3__
46   template<>
47     template<typename _UniformRandomNumberGenerator>
48       void
49       normal_distribution<double>::
50       __generate(typename normal_distribution<double>::result_type* __f,
51                  typename normal_distribution<double>::result_type* __t,
52                  _UniformRandomNumberGenerator& __urng,
53                  const param_type& __param)
54       {
55         typedef uint64_t __uctype;
56
57         if (__f == __t)
58           return;
59
60         if (_M_saved_available)
61           {
62             _M_saved_available = false;
63             *__f++ = _M_saved * __param.stddev() + __param.mean();
64
65             if (__f == __t)
66               return;
67           }
68
69         constexpr uint64_t __maskval = 0xfffffffffffffull;
70         static const __m128i __mask = _mm_set1_epi64x(__maskval);
71         static const __m128i __two = _mm_set1_epi64x(0x4000000000000000ull);
72         static const __m128d __three = _mm_set1_pd(3.0);
73         const __m128d __av = _mm_set1_pd(__param.mean());
74
75         const __uctype __urngmin = __urng.min();
76         const __uctype __urngmax = __urng.max();
77         const __uctype __urngrange = __urngmax - __urngmin;
78         const __uctype __uerngrange = __urngrange + 1;
79
80         while (__f + 1 < __t)
81           {
82             double __le;
83             __m128d __x;
84             do
85               {
86                 union
87                 {
88                   __m128i __i;
89                   __m128d __d;
90                 } __v;
91
92                 if (__urngrange > __maskval)
93                   {
94                     if (__detail::_Power_of_2(__uerngrange))
95                       __v.__i = _mm_and_si128(_mm_set_epi64x(__urng(),
96                                                              __urng()),
97                                               __mask);
98                     else
99                       {
100                         const __uctype __uerange = __maskval + 1;
101                         const __uctype __scaling = __urngrange / __uerange;
102                         const __uctype __past = __uerange * __scaling;
103                         uint64_t __v1;
104                         do
105                           __v1 = __uctype(__urng()) - __urngmin;
106                         while (__v1 >= __past);
107                         __v1 /= __scaling;
108                         uint64_t __v2;
109                         do
110                           __v2 = __uctype(__urng()) - __urngmin;
111                         while (__v2 >= __past);
112                         __v2 /= __scaling;
113
114                         __v.__i = _mm_set_epi64x(__v1, __v2);
115                       }
116                   }
117                 else if (__urngrange == __maskval)
118                   __v.__i = _mm_set_epi64x(__urng(), __urng());
119                 else if ((__urngrange + 2) * __urngrange >= __maskval
120                          && __detail::_Power_of_2(__uerngrange))
121                   {
122                     uint64_t __v1 = __urng() * __uerngrange + __urng();
123                     uint64_t __v2 = __urng() * __uerngrange + __urng();
124
125                     __v.__i = _mm_and_si128(_mm_set_epi64x(__v1, __v2),
126                                             __mask);
127                   }
128                 else
129                   {
130                     size_t __nrng = 2;
131                     __uctype __high = __maskval / __uerngrange / __uerngrange;
132                     while (__high > __uerngrange)
133                       {
134                         ++__nrng;
135                         __high /= __uerngrange;
136                       }
137                     const __uctype __highrange = __high + 1;
138                     const __uctype __scaling = __urngrange / __highrange;
139                     const __uctype __past = __highrange * __scaling;
140                     __uctype __tmp;
141
142                     uint64_t __v1;
143                     do
144                       {
145                         do
146                           __tmp = __uctype(__urng()) - __urngmin;
147                         while (__tmp >= __past);
148                         __v1 = __tmp / __scaling;
149                         for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
150                           {
151                             __tmp = __v1;
152                             __v1 *= __uerngrange;
153                             __v1 += __uctype(__urng()) - __urngmin;
154                           }
155                       }
156                     while (__v1 > __maskval || __v1 < __tmp);
157
158                     uint64_t __v2;
159                     do
160                       {
161                         do
162                           __tmp = __uctype(__urng()) - __urngmin;
163                         while (__tmp >= __past);
164                         __v2 = __tmp / __scaling;
165                         for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
166                           {
167                             __tmp = __v2;
168                             __v2 *= __uerngrange;
169                             __v2 += __uctype(__urng()) - __urngmin;
170                           }
171                       }
172                     while (__v2 > __maskval || __v2 < __tmp);
173
174                     __v.__i = _mm_set_epi64x(__v1, __v2);
175                   }
176
177                 __v.__i = _mm_or_si128(__v.__i, __two);
178                 __x = _mm_sub_pd(__v.__d, __three);
179                 __m128d __m = _mm_mul_pd(__x, __x);
180                 __le = _mm_cvtsd_f64(_mm_hadd_pd (__m, __m));
181               }
182             while (__le == 0.0 || __le >= 1.0);
183
184             double __mult = (std::sqrt(-2.0 * std::log(__le) / __le)
185                              * __param.stddev());
186
187             __x = _mm_add_pd(_mm_mul_pd(__x, _mm_set1_pd(__mult)), __av);
188
189             _mm_storeu_pd(__f, __x);
190             __f += 2;
191           }
192
193         if (__f != __t)
194           {
195             result_type __x, __y, __r2;
196
197             __detail::_Adaptor<_UniformRandomNumberGenerator, result_type>
198               __aurng(__urng);
199
200             do
201               {
202                 __x = result_type(2.0) * __aurng() - 1.0;
203                 __y = result_type(2.0) * __aurng() - 1.0;
204                 __r2 = __x * __x + __y * __y;
205               }
206             while (__r2 > 1.0 || __r2 == 0.0);
207
208             const result_type __mult = std::sqrt(-2 * std::log(__r2) / __r2);
209             _M_saved = __x * __mult;
210             _M_saved_available = true;
211             *__f = __y * __mult * __param.stddev() + __param.mean();
212           }
213       }
214 #endif
215
216
217 _GLIBCXX_END_NAMESPACE_VERSION
218 } // namespace
219
220
221 #endif // _BITS_OPT_RANDOM_H