]>
Commit | Line | Data |
---|---|---|
f22341db | 1 | /* This file is part of the Vc library. |
2 | ||
3 | Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org> | |
4 | ||
5 | Vc is free software: you can redistribute it and/or modify | |
6 | it under the terms of the GNU Lesser General Public License as | |
7 | published by the Free Software Foundation, either version 3 of | |
8 | the License, or (at your option) any later version. | |
9 | ||
10 | Vc is distributed in the hope that it will be useful, but | |
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with Vc. If not, see <http://www.gnu.org/licenses/>. | |
17 | ||
18 | */ | |
19 | ||
20 | #ifndef V_ALIGN | |
21 | # ifdef __GNUC__ | |
22 | # define V_ALIGN(n) __attribute__((aligned(n))) | |
23 | # else | |
24 | # define V_ALIGN(n) __declspec(align(n)) | |
25 | # endif | |
26 | #endif | |
27 | ||
7c616f25 | 28 | #include "Vc/common/const.h" |
29 | #include "Vc/avx/const_data.h" | |
30 | #include "Vc/sse/const_data.h" | |
f22341db | 31 | #include <Vc/version.h> |
32 | ||
33 | #include <cstdio> | |
34 | #include <cstdlib> | |
35 | #include <cstring> | |
36 | ||
37 | namespace Vc | |
38 | { | |
39 | namespace AVX | |
40 | { | |
41 | // cacheline 1 | |
42 | V_ALIGN(64) extern const unsigned int _IndexesFromZero32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; | |
43 | V_ALIGN(16) extern const unsigned short _IndexesFromZero16[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; | |
44 | V_ALIGN(16) extern const unsigned char _IndexesFromZero8 [16]= { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; | |
45 | ||
46 | // cacheline 2 | |
47 | template<> const double c_sin<double>::data[8] = { | |
48 | 0.15915494309189533576888376337251436, // 1 over 2pi | |
49 | 6.2831853071795864769252867665590058, // 2pi | |
50 | 1.5707963267948966192313216916397514, // pi over 2 | |
51 | 3.1415926535897932384626433832795029, // pi | |
52 | 1.666666666666666574148081281236954964697360992431640625e-01, // 1 over 3! | |
53 | 8.33333333333333321768510160154619370587170124053955078125e-03, // 1 over 5! | |
54 | 1.984126984126984125263171154784913596813566982746124267578125e-04, // 1 over 7! | |
55 | 2.755731922398589251095059327045788677423843182623386383056640625e-06 // 1 over 9! | |
56 | }; | |
57 | ||
58 | // cacheline 3 | |
59 | template<> const float c_sin<float>::data[8] = { | |
60 | 1.59154936671257019e-01f, // 1 over 2pi | |
61 | 6.28318548202514648f, // 2pi | |
62 | 1.57079637050628662f, // pi over 2 | |
63 | 3.14159274101257324f, // pi | |
64 | 1.66666671633720398e-01f, // 1 over 3! | |
65 | 8.33333376795053482e-03f, // 1 over 5! | |
66 | 1.98412701138295233e-04f, // 1 over 7! | |
67 | 2.75573188446287531e-06f // 1 over 9! | |
68 | }; | |
69 | ||
70 | const unsigned int c_general::absMaskFloat[2] = { 0xffffffffu, 0x7fffffffu }; | |
71 | const unsigned int c_general::signMaskFloat[2] = { 0x0u, 0x80000000u }; | |
72 | const float c_general::oneFloat = 1.f; | |
73 | const unsigned short c_general::minShort[2] = { 0x8000u, 0x8000u }; | |
74 | const unsigned short c_general::one16[2] = { 1, 1 }; | |
75 | const float c_general::_2power31 = 1u << 31; | |
76 | ||
77 | // cacheline 4 | |
78 | const double c_general::oneDouble = 1.; | |
79 | const unsigned long long c_general::frexpMask = 0xbfefffffffffffffull; | |
80 | ||
81 | const unsigned long long c_log<double>::data[21] = { | |
82 | 0x000003ff000003ffull // bias TODO: remove | |
83 | , 0x7ff0000000000000ull // exponentMask (+inf) | |
84 | ||
85 | , 0x3f1ab4c293c31bb0ull // P[0] | |
86 | , 0x3fdfd6f53f5652f2ull // P[1] | |
87 | , 0x4012d2baed926911ull // P[2] | |
88 | , 0x402cff72c63eeb2eull // P[3] | |
89 | , 0x4031efd6924bc84dull // P[4] | |
90 | , 0x401ed5637d7edcf8ull // P[5] | |
91 | ||
92 | , 0x40269320ae97ef8eull // Q[0] | |
93 | , 0x40469d2c4e19c033ull // Q[1] | |
94 | , 0x4054bf33a326bdbdull // Q[2] | |
95 | , 0x4051c9e2eb5eae21ull // Q[3] | |
96 | , 0x4037200a9e1f25b2ull // Q[4] | |
97 | ||
98 | , 0xfff0000000000000ull // -inf | |
99 | , 0x0010000000000000ull // min() | |
100 | , 0x3fe6a09e667f3bcdull // 1/sqrt(2) | |
101 | , 0x3fe6300000000000ull // round(ln(2) * 512) / 512 | |
102 | , 0xbf2bd0105c610ca8ull // ln(2) - round(ln(2) * 512) / 512 | |
103 | , 0x3fe0000000000000ull // 0.5 | |
104 | , 0x3fdbcb7b1526e50eull // log10(e) | |
105 | , 0x3ff71547652b82feull // log2(e) | |
106 | }; | |
107 | ||
108 | template<> const unsigned int c_log<float>::data[21] = { | |
109 | 0x0000007fu // bias TODO: remove | |
110 | , 0x7f800000u // exponentMask (+inf) | |
111 | ||
112 | , 0x3d9021bbu // 7.0376836292e-2f // P[0] | |
113 | , 0xbdebd1b8u // -1.1514610310e-1f // P[1] | |
114 | , 0x3def251au // 1.1676998740e-1f // P[2] | |
115 | , 0xbdfe5d4fu // -1.2420140846e-1f // P[3] | |
116 | , 0x3e11e9bfu // 1.4249322787e-1f // P[4] | |
117 | , 0xbe2aae50u // -1.6668057665e-1f // P[5] | |
118 | , 0x3e4cceacu // 2.0000714765e-1f // P[6] | |
119 | , 0xbe7ffffcu // -2.4999993993e-1f // P[7] | |
120 | , 0x3eaaaaaau // 3.3333331174e-1f // P[8] | |
121 | , 0 // padding because of c_log<double> | |
122 | , 0 // padding because of c_log<double> | |
123 | ||
124 | , 0xff800000u // -inf | |
125 | , 0x00800000u // min() | |
126 | , 0x3f3504f3u // 1/sqrt(2) | |
127 | , 0x3f318000u // round(ln(2) * 512) / 512 | |
128 | , 0xb95e8083u // ln(2) - round(ln(2) * 512) / 512 | |
129 | , 0x3f000000u // 0.5 | |
130 | , 0x3ede5bd9u // log10(e) | |
131 | , 0x3fb8aa3bu // log2(e) | |
132 | }; | |
133 | } // namespace AVX | |
134 | ||
135 | namespace SSE | |
136 | { | |
137 | // cacheline 1 | |
138 | V_ALIGN(64) const int c_general::absMaskFloat[4] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; | |
139 | V_ALIGN(16) const unsigned int c_general::signMaskFloat[4] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; | |
140 | V_ALIGN(16) const short c_general::minShort[8] = { -0x8000, -0x8000, -0x8000, -0x8000, -0x8000, -0x8000, -0x8000, -0x8000 }; | |
141 | V_ALIGN(16) extern const unsigned short _IndexesFromZero8[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; | |
142 | ||
143 | // cacheline 2 | |
144 | V_ALIGN(16) extern const unsigned int _IndexesFromZero4[4] = { 0, 1, 2, 3 }; | |
145 | V_ALIGN(16) const unsigned short c_general::one16[8] = { 1, 1, 1, 1, 1, 1, 1, 1 }; | |
146 | V_ALIGN(16) const unsigned int c_general::one32[4] = { 1, 1, 1, 1 }; | |
147 | V_ALIGN(16) const float c_general::oneFloat[4] = { 1.f, 1.f, 1.f, 1.f }; | |
148 | ||
149 | // cacheline 3 | |
150 | V_ALIGN(16) const double c_general::oneDouble[2] = { 1., 1. }; | |
151 | V_ALIGN(16) const long long c_general::absMaskDouble[2] = { 0x7fffffffffffffffll, 0x7fffffffffffffffll }; | |
152 | V_ALIGN(16) const unsigned long long c_general::signMaskDouble[2] = { 0x8000000000000000ull, 0x8000000000000000ull }; | |
153 | V_ALIGN(16) const unsigned long long c_general::frexpMask[2] = { 0xbfefffffffffffffull, 0xbfefffffffffffffull }; | |
154 | ||
155 | template<> const float c_sin<float>::data[4 * 8] = { | |
156 | // cacheline 4 | |
157 | // 1 over 2pi | |
158 | 1.59154936671257019e-01f, 1.59154936671257019e-01f, 1.59154936671257019e-01f, 1.59154936671257019e-01f, | |
159 | // 2pi | |
160 | 6.28318548202514648f, 6.28318548202514648f, 6.28318548202514648f, 6.28318548202514648f, | |
161 | // pi over 2 | |
162 | 1.57079637050628662f, 1.57079637050628662f, 1.57079637050628662f, 1.57079637050628662f, | |
163 | // pi | |
164 | 3.14159274101257324f, 3.14159274101257324f, 3.14159274101257324f, 3.14159274101257324f, | |
165 | ||
166 | // cacheline 5 | |
167 | // 1 over 3! | |
168 | 1.66666671633720398e-01f, 1.66666671633720398e-01f, 1.66666671633720398e-01f, 1.66666671633720398e-01f, | |
169 | // 1 over 5! | |
170 | 8.33333376795053482e-03f, 8.33333376795053482e-03f, 8.33333376795053482e-03f, 8.33333376795053482e-03f, | |
171 | // 1 over 7! | |
172 | 1.98412701138295233e-04f, 1.98412701138295233e-04f, 1.98412701138295233e-04f, 1.98412701138295233e-04f, | |
173 | // 1 over 9! | |
174 | 2.75573188446287531e-06f, 2.75573188446287531e-06f, 2.75573188446287531e-06f, 2.75573188446287531e-06f | |
175 | }; | |
176 | ||
177 | template<> const double c_sin<double>::data[2 * 8] = { | |
178 | // cacheline 6 | |
179 | // 1 over 2pi | |
180 | 0.15915494309189533576888376337251436, 0.15915494309189533576888376337251436, | |
181 | // 2pi | |
182 | 6.2831853071795864769252867665590058 , 6.2831853071795864769252867665590058 , | |
183 | // pi over 2 | |
184 | 1.5707963267948966192313216916397514 , 1.5707963267948966192313216916397514 , | |
185 | // pi | |
186 | 3.1415926535897932384626433832795029 , 3.1415926535897932384626433832795029 , | |
187 | ||
188 | // cacheline 7 | |
189 | // 1 over 3! | |
190 | 1.666666666666666574148081281236954964697360992431640625e-01, 1.666666666666666574148081281236954964697360992431640625e-01, | |
191 | // 1 over 5! | |
192 | 8.33333333333333321768510160154619370587170124053955078125e-03, 8.33333333333333321768510160154619370587170124053955078125e-03, | |
193 | // 1 over 7! | |
194 | 1.984126984126984125263171154784913596813566982746124267578125e-04, 1.984126984126984125263171154784913596813566982746124267578125e-04, | |
195 | // 1 over 9! | |
196 | 2.755731922398589251095059327045788677423843182623386383056640625e-06, 2.755731922398589251095059327045788677423843182623386383056640625e-06 | |
197 | }; | |
198 | ||
199 | // cacheline 8 | |
200 | V_ALIGN(16) extern const unsigned char _IndexesFromZero16[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; | |
201 | ||
202 | V_ALIGN(64) const unsigned long long c_log<double>::data[21 * 2] = { | |
203 | /* 0*/ 0x000003ff000003ffull, 0x000003ff000003ffull // bias TODO: remove | |
204 | /* 1*/ , 0x7ff0000000000000ull, 0x7ff0000000000000ull // exponentMask (+inf) | |
205 | ||
206 | /* 2*/ , 0x3f1ab4c293c31bb0ull, 0x3f1ab4c293c31bb0ull // P[0] | |
207 | /* 3*/ , 0x3fdfd6f53f5652f2ull, 0x3fdfd6f53f5652f2ull // P[1] | |
208 | /* 4*/ , 0x4012d2baed926911ull, 0x4012d2baed926911ull // P[2] | |
209 | /* 5*/ , 0x402cff72c63eeb2eull, 0x402cff72c63eeb2eull // P[3] | |
210 | /* 6*/ , 0x4031efd6924bc84dull, 0x4031efd6924bc84dull // P[4] | |
211 | /* 7*/ , 0x401ed5637d7edcf8ull, 0x401ed5637d7edcf8ull // P[5] | |
212 | ||
213 | /* 8*/ , 0x40269320ae97ef8eull, 0x40269320ae97ef8eull // Q[0] | |
214 | /* 9*/ , 0x40469d2c4e19c033ull, 0x40469d2c4e19c033ull // Q[1] | |
215 | /*10*/ , 0x4054bf33a326bdbdull, 0x4054bf33a326bdbdull // Q[2] | |
216 | /*11*/ , 0x4051c9e2eb5eae21ull, 0x4051c9e2eb5eae21ull // Q[3] | |
217 | /*12*/ , 0x4037200a9e1f25b2ull, 0x4037200a9e1f25b2ull // Q[4] | |
218 | ||
219 | /*13*/ , 0xfff0000000000000ull, 0xfff0000000000000ull // -inf | |
220 | /*14*/ , 0x0010000000000000ull, 0x0010000000000000ull // min() | |
221 | /*15*/ , 0x3fe6a09e667f3bcdull, 0x3fe6a09e667f3bcdull // 1/sqrt(2) | |
222 | /*16*/ , 0x3fe6300000000000ull, 0x3fe6300000000000ull // round(ln(2) * 512) / 512 | |
223 | /*17*/ , 0xbf2bd0105c610ca8ull, 0xbf2bd0105c610ca8ull // ln(2) - round(ln(2) * 512) / 512 | |
224 | /*18*/ , 0x3fe0000000000000ull, 0x3fe0000000000000ull // 0.5 | |
225 | /*19*/ , 0x3fdbcb7b1526e50eull, 0x3fdbcb7b1526e50eull // log10(e) | |
226 | /*20*/ , 0x3ff71547652b82feull, 0x3ff71547652b82feull // log2(e) | |
227 | }; | |
228 | ||
229 | template<> V_ALIGN(64) const unsigned int c_log<float>::data[21 * 4] = { | |
230 | 0x0000007fu, 0x0000007fu, 0x0000007fu, 0x0000007fu, // bias TODO: remove | |
231 | 0x7f800000u, 0x7f800000u, 0x7f800000u, 0x7f800000u, // exponentMask (+inf) | |
232 | ||
233 | 0x3d9021bbu, 0x3d9021bbu, 0x3d9021bbu, 0x3d9021bbu, // 7.0376836292e-2f // P[0] | |
234 | 0xbdebd1b8u, 0xbdebd1b8u, 0xbdebd1b8u, 0xbdebd1b8u, // -1.1514610310e-1f // P[1] | |
235 | 0x3def251au, 0x3def251au, 0x3def251au, 0x3def251au, // 1.1676998740e-1f // P[2] | |
236 | 0xbdfe5d4fu, 0xbdfe5d4fu, 0xbdfe5d4fu, 0xbdfe5d4fu, // -1.2420140846e-1f // P[3] | |
237 | 0x3e11e9bfu, 0x3e11e9bfu, 0x3e11e9bfu, 0x3e11e9bfu, // 1.4249322787e-1f // P[4] | |
238 | 0xbe2aae50u, 0xbe2aae50u, 0xbe2aae50u, 0xbe2aae50u, // -1.6668057665e-1f // P[5] | |
239 | 0x3e4cceacu, 0x3e4cceacu, 0x3e4cceacu, 0x3e4cceacu, // 2.0000714765e-1f // P[6] | |
240 | 0xbe7ffffcu, 0xbe7ffffcu, 0xbe7ffffcu, 0xbe7ffffcu, // -2.4999993993e-1f // P[7] | |
241 | 0x3eaaaaaau, 0x3eaaaaaau, 0x3eaaaaaau, 0x3eaaaaaau, // 3.3333331174e-1f // P[8] | |
242 | 0, 0, 0, 0, // padding because of c_log<double> | |
243 | 0, 0, 0, 0, // padding because of c_log<double> | |
244 | ||
245 | 0xff800000u, 0xff800000u, 0xff800000u, 0xff800000u, // -inf | |
246 | 0x00800000u, 0x00800000u, 0x00800000u, 0x00800000u, // min() | |
247 | 0x3f3504f3u, 0x3f3504f3u, 0x3f3504f3u, 0x3f3504f3u, // 1/sqrt(2) | |
248 | // ln(2) = 0x3fe62e42fefa39ef | |
249 | // ln(2) = Vc_buildDouble( 1, 0x00062e42fefa39ef, -1) | |
250 | // = Vc_buildFloat( 1, 0x00317217(f7d), -1) + Vc_buildFloat( 1, 0x0077d1cd, -25) | |
251 | // = Vc_buildFloat( 1, 0x00318000(000), -1) + Vc_buildFloat(-1, 0x005e8083, -13) | |
252 | 0x3f318000u, 0x3f318000u, 0x3f318000u, 0x3f318000u, // round(ln(2) * 512) / 512 | |
253 | 0xb95e8083u, 0xb95e8083u, 0xb95e8083u, 0xb95e8083u, // ln(2) - round(ln(2) * 512) / 512 | |
254 | 0x3f000000u, 0x3f000000u, 0x3f000000u, 0x3f000000u, // 0.5 | |
255 | 0x3ede5bd9u, 0x3ede5bd9u, 0x3ede5bd9u, 0x3ede5bd9u, // log10(e) | |
256 | 0x3fb8aa3bu, 0x3fb8aa3bu, 0x3fb8aa3bu, 0x3fb8aa3bu, // log2(e) | |
257 | // log10(2) = 0x3fd34413509f79ff | |
258 | // = Vc_buildDouble( 1, 0x00034413509f79ff, -2) | |
259 | // = Vc_buildFloat( 1, 0x001a209a(84fbcff8), -2) + Vc_buildFloat( 1, 0x0004fbcff(8), -26) | |
260 | //Vc_buildFloat( 1, 0x001a209a, -2), // log10(2) | |
261 | //Vc_buildFloat( 1, 0x001a209a, -2), // log10(2) | |
262 | //Vc_buildFloat( 1, 0x001a209a, -2), // log10(2) | |
263 | //Vc_buildFloat( 1, 0x001a209a, -2), // log10(2) | |
264 | }; | |
265 | } // namespace SSE | |
266 | ||
267 | V_ALIGN(64) unsigned int RandomState[16] = { | |
268 | 0x5a383a4fu, 0xc68bd45eu, 0x691d6d86u, 0xb367e14fu, | |
269 | 0xd689dbaau, 0xfde442aau, 0x3d265423u, 0x1a77885cu, | |
270 | 0x36ed2684u, 0xfb1f049du, 0x19e52f31u, 0x821e4dd7u, | |
271 | 0x23996d25u, 0x5962725au, 0x6aced4ceu, 0xd4c610f3u | |
272 | }; | |
273 | ||
274 | // dummy symbol to emit warnings with GCC 4.3 | |
275 | namespace Warnings { | |
276 | void _operator_bracket_warning() {} | |
277 | } // namespace Warnings | |
278 | ||
279 | const char LIBRARY_VERSION[] = VC_VERSION_STRING; | |
280 | const unsigned int LIBRARY_VERSION_NUMBER = VC_VERSION_NUMBER; | |
281 | const unsigned int LIBRARY_ABI_VERSION = VC_LIBRARY_ABI_VERSION; | |
282 | ||
283 | void checkLibraryAbi(unsigned int compileTimeAbi, unsigned int versionNumber, const char *compileTimeVersion) { | |
284 | if (LIBRARY_ABI_VERSION != compileTimeAbi || LIBRARY_VERSION_NUMBER < versionNumber) { | |
285 | printf("The versions of libVc.a (%s) and Vc/version.h (%s) are incompatible. Aborting.\n", LIBRARY_VERSION, compileTimeVersion); | |
286 | abort(); | |
287 | } | |
288 | } | |
289 | ||
290 | } // namespace Vc | |
291 | ||
292 | #undef V_ALIGN |