1 /* This file is part of the Vc library.
3 Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
30 * splits \p v into exponent and mantissa, the sign is kept with the mantissa
32 * The return value will be in the range [0.5, 1.0[
33 * The \p e value will be an integer defining the power-of-two exponent
35 inline double_v frexp(const double_v &v, int_v *e) {
36 const __m128i exponentBits = Const<double>::exponentMask().dataI();
37 const __m128i exponentPart = _mm_and_si128(_mm_castpd_si128(v.data()), exponentBits);
38 *e = _mm_sub_epi32(_mm_srli_epi64(exponentPart, 52), _mm_set1_epi32(0x3fe));
39 const __m128d exponentMaximized = _mm_or_pd(v.data(), _mm_castsi128_pd(exponentBits));
40 double_v ret = _mm_and_pd(exponentMaximized, _mm_load_pd(reinterpret_cast<const double *>(&c_general::frexpMask[0])));
41 double_m zeroMask = v == double_v::Zero();
42 ret(isnan(v) || !isfinite(v) || zeroMask) = v;
43 e->setZero(zeroMask.data());
46 inline float_v frexp(const float_v &v, int_v *e) {
47 const __m128i exponentBits = Const<float>::exponentMask().dataI();
48 const __m128i exponentPart = _mm_and_si128(_mm_castps_si128(v.data()), exponentBits);
49 *e = _mm_sub_epi32(_mm_srli_epi32(exponentPart, 23), _mm_set1_epi32(0x7e));
50 const __m128 exponentMaximized = _mm_or_ps(v.data(), _mm_castsi128_ps(exponentBits));
51 float_v ret = _mm_and_ps(exponentMaximized, _mm_castsi128_ps(_mm_set1_epi32(0xbf7fffffu)));
52 ret(isnan(v) || !isfinite(v) || v == float_v::Zero()) = v;
53 e->setZero(v == float_v::Zero());
56 inline sfloat_v frexp(const sfloat_v &v, short_v *e) {
57 const __m128i exponentBits = Const<float>::exponentMask().dataI();
58 const __m128i exponentPart0 = _mm_and_si128(_mm_castps_si128(v.data()[0]), exponentBits);
59 const __m128i exponentPart1 = _mm_and_si128(_mm_castps_si128(v.data()[1]), exponentBits);
60 *e = _mm_sub_epi16(_mm_packs_epi32(_mm_srli_epi32(exponentPart0, 23), _mm_srli_epi32(exponentPart1, 23)),
61 _mm_set1_epi16(0x7e));
62 const __m128 exponentMaximized0 = _mm_or_ps(v.data()[0], _mm_castsi128_ps(exponentBits));
63 const __m128 exponentMaximized1 = _mm_or_ps(v.data()[1], _mm_castsi128_ps(exponentBits));
64 sfloat_v ret = M256::create(
65 _mm_and_ps(exponentMaximized0, _mm_castsi128_ps(_mm_set1_epi32(0xbf7fffffu))),
66 _mm_and_ps(exponentMaximized1, _mm_castsi128_ps(_mm_set1_epi32(0xbf7fffffu)))
68 sfloat_m zeroMask = v == sfloat_v::Zero();
69 ret(isnan(v) || !isfinite(v) || zeroMask) = v;
70 e->setZero(static_cast<short_m>(zeroMask));
76 * x == (-)inf -> (-)inf
78 inline double_v ldexp(double_v::AsArg v, int_v::AsArg _e) {
80 e.setZero((v == double_v::Zero()).dataI());
81 const __m128i exponentBits = _mm_slli_epi64(e.data(), 52);
82 return _mm_castsi128_pd(_mm_add_epi64(_mm_castpd_si128(v.data()), exponentBits));
84 inline float_v ldexp(float_v::AsArg v, int_v::AsArg _e) {
86 e.setZero(static_cast<int_m>(v == float_v::Zero()));
87 return (v.reinterpretCast<int_v>() + (e << 23)).reinterpretCast<float_v>();
89 inline sfloat_v ldexp(sfloat_v::AsArg v, short_v::AsArg _e) {
91 e.setZero(static_cast<short_m>(v == sfloat_v::Zero()));
93 const __m128i exponentBits0 = _mm_unpacklo_epi16(_mm_setzero_si128(), e.data());
94 const __m128i exponentBits1 = _mm_unpackhi_epi16(_mm_setzero_si128(), e.data());
95 return M256::create(_mm_castsi128_ps(_mm_add_epi32(_mm_castps_si128(v.data()[0]), exponentBits0)),
96 _mm_castsi128_ps(_mm_add_epi32(_mm_castps_si128(v.data()[1]), exponentBits1)));
100 inline double_v floor(double_v::AsArg v) { return _mm_floor_pd(v.data()); }
101 inline float_v floor(float_v::AsArg v) { return _mm_floor_ps(v.data()); }
102 inline sfloat_v floor(sfloat_v::AsArg v) { return M256::create(_mm_floor_ps(v.data()[0]),
103 _mm_floor_ps(v.data()[1])); }
104 inline double_v ceil(double_v::AsArg v) { return _mm_ceil_pd(v.data()); }
105 inline float_v ceil(float_v::AsArg v) { return _mm_ceil_ps(v.data()); }
106 inline sfloat_v ceil(sfloat_v::AsArg v) { return M256::create(_mm_ceil_ps(v.data()[0]),
107 _mm_ceil_ps(v.data()[1])); }
109 static inline void floor_shift(float_v &v, float_v::AsArg e)
111 int_v x = _mm_setallone_si128();
113 x >>= static_cast<int_v>(e);
114 v &= x.reinterpretCast<float_v>();
117 static inline void floor_shift(sfloat_v &v, sfloat_v::AsArg e)
119 int_v x = _mm_setallone_si128();
122 x >>= _mm_cvttps_epi32(e.data()[0]);
123 y >>= _mm_cvttps_epi32(e.data()[1]);
124 v.data()[0] = _mm_and_ps(v.data()[0], _mm_castsi128_ps(x.data()));
125 v.data()[1] = _mm_and_ps(v.data()[1], _mm_castsi128_ps(y.data()));
128 static inline void floor_shift(double_v &v, double_v::AsArg e)
130 const long long initialMask = 0xfff0000000000000ull;
131 const uint_v shifts = static_cast<uint_v>(e);
136 d_ll mask0 = { initialMask >> shifts[0] };
137 d_ll mask1 = { initialMask >> shifts[1] };
138 v &= double_v(_mm_setr_pd(mask0.d, mask1.d));
142 inline Vector<T> floor(Vector<T> _v) {
144 typedef typename V::Mask M;
147 V e = abs(v).exponent();
148 const M negativeExponent = e < 0;
149 e.setZero(negativeExponent);
150 const M negativeInput = v < V::Zero();
154 v.setZero(negativeExponent);
155 v(negativeInput && _v != v) -= V::One();
160 inline Vector<T> ceil(Vector<T> _v) {
162 typedef typename V::Mask M;
165 V e = abs(v).exponent();
166 const M negativeExponent = e < 0;
167 e.setZero(negativeExponent);
168 const M positiveInput = v > V::Zero();
172 v.setZero(negativeExponent);
173 v(positiveInput && _v != v) += V::One();
180 #define VC__USE_NAMESPACE SSE
181 #include "../common/trigonometric.h"
182 #define VC__USE_NAMESPACE SSE
183 #include "../common/logarithm.h"
184 #define VC__USE_NAMESPACE SSE
185 #include "../common/exponential.h"
186 #undef VC__USE_NAMESPACE
188 #endif // VC_SSE_MATH_H