Vc/include/Vc/sse/math.h

   1 /*  This file is part of the Vc library.
   2
   3     Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
   4
   5     Vc is free software: you can redistribute it and/or modify
   6     it under the terms of the GNU Lesser General Public License as
   7     published by the Free Software Foundation, either version 3 of
   8     the License, or (at your option) any later version.
   9
  10     Vc is distributed in the hope that it will be useful, but
  11     WITHOUT ANY WARRANTY; without even the implied warranty of
  12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13     GNU Lesser General Public License for more details.
  14
  15     You should have received a copy of the GNU Lesser General Public
  16     License along with Vc.  If not, see <http://www.gnu.org/licenses/>.
  17
  18 */
  19
  20 #ifndef VC_SSE_MATH_H
  21 #define VC_SSE_MATH_H
  22
  23 #include "const.h"
  24
  25 namespace Vc
  26 {
  27 namespace SSE
  28 {
  29     /**
  30      * splits \p v into exponent and mantissa, the sign is kept with the mantissa
  31      *
  32      * The return value will be in the range [0.5, 1.0[
  33      * The \p e value will be an integer defining the power-of-two exponent
  34      */
  35     inline double_v frexp(const double_v &v, int_v *e) {
  36         const __m128i exponentBits = Const<double>::exponentMask().dataI();
  37         const __m128i exponentPart = _mm_and_si128(_mm_castpd_si128(v.data()), exponentBits);
  38         *e = _mm_sub_epi32(_mm_srli_epi64(exponentPart, 52), _mm_set1_epi32(0x3fe));
  39         const __m128d exponentMaximized = _mm_or_pd(v.data(), _mm_castsi128_pd(exponentBits));
  40         double_v ret = _mm_and_pd(exponentMaximized, _mm_load_pd(reinterpret_cast<const double *>(&c_general::frexpMask[0])));
  41         double_m zeroMask = v == double_v::Zero();
  42         ret(isnan(v) || !isfinite(v) || zeroMask) = v;
  43         e->setZero(zeroMask.data());
  44         return ret;
  45     }
  46     inline float_v frexp(const float_v &v, int_v *e) {
  47         const __m128i exponentBits = Const<float>::exponentMask().dataI();
  48         const __m128i exponentPart = _mm_and_si128(_mm_castps_si128(v.data()), exponentBits);
  49         *e = _mm_sub_epi32(_mm_srli_epi32(exponentPart, 23), _mm_set1_epi32(0x7e));
  50         const __m128 exponentMaximized = _mm_or_ps(v.data(), _mm_castsi128_ps(exponentBits));
  51         float_v ret = _mm_and_ps(exponentMaximized, _mm_castsi128_ps(_mm_set1_epi32(0xbf7fffffu)));
  52         ret(isnan(v) || !isfinite(v) || v == float_v::Zero()) = v;
  53         e->setZero(v == float_v::Zero());
  54         return ret;
  55     }
  56     inline sfloat_v frexp(const sfloat_v &v, short_v *e) {
  57         const __m128i exponentBits = Const<float>::exponentMask().dataI();
  58         const __m128i exponentPart0 = _mm_and_si128(_mm_castps_si128(v.data()[0]), exponentBits);
  59         const __m128i exponentPart1 = _mm_and_si128(_mm_castps_si128(v.data()[1]), exponentBits);
  60         *e = _mm_sub_epi16(_mm_packs_epi32(_mm_srli_epi32(exponentPart0, 23), _mm_srli_epi32(exponentPart1, 23)),
  61                 _mm_set1_epi16(0x7e));
  62         const __m128 exponentMaximized0 = _mm_or_ps(v.data()[0], _mm_castsi128_ps(exponentBits));
  63         const __m128 exponentMaximized1 = _mm_or_ps(v.data()[1], _mm_castsi128_ps(exponentBits));
  64         sfloat_v ret = M256::create(
  65                 _mm_and_ps(exponentMaximized0, _mm_castsi128_ps(_mm_set1_epi32(0xbf7fffffu))),
  66                 _mm_and_ps(exponentMaximized1, _mm_castsi128_ps(_mm_set1_epi32(0xbf7fffffu)))
  67                 );
  68         sfloat_m zeroMask = v == sfloat_v::Zero();
  69         ret(isnan(v) || !isfinite(v) || zeroMask) = v;
  70         e->setZero(static_cast<short_m>(zeroMask));
  71         return ret;
  72     }
  73
  74     /*             -> x * 2^e
  75      * x == NaN    -> NaN
  76      * x == (-)inf -> (-)inf
  77      */
  78     inline double_v ldexp(double_v::AsArg v, int_v::AsArg _e) {
  79         int_v e = _e;
  80         e.setZero((v == double_v::Zero()).dataI());
  81         const __m128i exponentBits = _mm_slli_epi64(e.data(), 52);
  82         return _mm_castsi128_pd(_mm_add_epi64(_mm_castpd_si128(v.data()), exponentBits));
  83     }
  84     inline float_v ldexp(float_v::AsArg v, int_v::AsArg _e) {
  85         int_v e = _e;
  86         e.setZero(static_cast<int_m>(v == float_v::Zero()));
  87         return (v.reinterpretCast<int_v>() + (e << 23)).reinterpretCast<float_v>();
  88     }
  89     inline sfloat_v ldexp(sfloat_v::AsArg v, short_v::AsArg _e) {
  90         short_v e = _e;
  91         e.setZero(static_cast<short_m>(v == sfloat_v::Zero()));
  92         e <<= (23 - 16);
  93         const __m128i exponentBits0 = _mm_unpacklo_epi16(_mm_setzero_si128(), e.data());
  94         const __m128i exponentBits1 = _mm_unpackhi_epi16(_mm_setzero_si128(), e.data());
  95         return M256::create(_mm_castsi128_ps(_mm_add_epi32(_mm_castps_si128(v.data()[0]), exponentBits0)),
  96                 _mm_castsi128_ps(_mm_add_epi32(_mm_castps_si128(v.data()[1]), exponentBits1)));
  97     }
  98
  99 #ifdef VC_IMPL_SSE4_1
 100     inline double_v floor(double_v::AsArg v) { return _mm_floor_pd(v.data()); }
 101     inline float_v floor(float_v::AsArg v) { return _mm_floor_ps(v.data()); }
 102     inline sfloat_v floor(sfloat_v::AsArg v) { return M256::create(_mm_floor_ps(v.data()[0]),
 103             _mm_floor_ps(v.data()[1])); }
 104     inline double_v ceil(double_v::AsArg v) { return _mm_ceil_pd(v.data()); }
 105     inline float_v ceil(float_v::AsArg v) { return _mm_ceil_ps(v.data()); }
 106     inline sfloat_v ceil(sfloat_v::AsArg v) { return M256::create(_mm_ceil_ps(v.data()[0]),
 107             _mm_ceil_ps(v.data()[1])); }
 108 #else
 109     static inline void floor_shift(float_v &v, float_v::AsArg e)
 110     {
 111         int_v x = _mm_setallone_si128();
 112         x <<= 23;
 113         x >>= static_cast<int_v>(e);
 114         v &= x.reinterpretCast<float_v>();
 115     }
 116
 117     static inline void floor_shift(sfloat_v &v, sfloat_v::AsArg e)
 118     {
 119         int_v x = _mm_setallone_si128();
 120         x <<= 23;
 121         int_v y = x;
 122         x >>= _mm_cvttps_epi32(e.data()[0]);
 123         y >>= _mm_cvttps_epi32(e.data()[1]);
 124         v.data()[0] = _mm_and_ps(v.data()[0], _mm_castsi128_ps(x.data()));
 125         v.data()[1] = _mm_and_ps(v.data()[1], _mm_castsi128_ps(y.data()));
 126     }
 127
 128     static inline void floor_shift(double_v &v, double_v::AsArg e)
 129     {
 130         const long long initialMask = 0xfff0000000000000ull;
 131         const uint_v shifts = static_cast<uint_v>(e);
 132         union d_ll {
 133             long long ll;
 134             double d;
 135         };
 136         d_ll mask0 = { initialMask >> shifts[0] };
 137         d_ll mask1 = { initialMask >> shifts[1] };
 138         v &= double_v(_mm_setr_pd(mask0.d, mask1.d));
 139     }
 140
 141     template<typename T>
 142     inline Vector<T> floor(Vector<T> _v) {
 143         typedef Vector<T> V;
 144         typedef typename V::Mask M;
 145
 146         V v = _v;
 147         V e = abs(v).exponent();
 148         const M negativeExponent = e < 0;
 149         e.setZero(negativeExponent);
 150         const M negativeInput = v < V::Zero();
 151
 152         floor_shift(v, e);
 153
 154         v.setZero(negativeExponent);
 155         v(negativeInput && _v != v) -= V::One();
 156         return v;
 157     }
 158
 159     template<typename T>
 160     inline Vector<T> ceil(Vector<T> _v) {
 161         typedef Vector<T> V;
 162         typedef typename V::Mask M;
 163
 164         V v = _v;
 165         V e = abs(v).exponent();
 166         const M negativeExponent = e < 0;
 167         e.setZero(negativeExponent);
 168         const M positiveInput = v > V::Zero();
 169
 170         floor_shift(v, e);
 171
 172         v.setZero(negativeExponent);
 173         v(positiveInput && _v != v) += V::One();
 174         return v;
 175     }
 176 #endif
 177 } // namespace SSE
 178 } // namespace Vc
 179
 180 #define VC__USE_NAMESPACE SSE
 181 #include "../common/trigonometric.h"
 182 #define VC__USE_NAMESPACE SSE
 183 #include "../common/logarithm.h"
 184 #define VC__USE_NAMESPACE SSE
 185 #include "../common/exponential.h"
 186 #undef VC__USE_NAMESPACE
 187
 188 #endif // VC_SSE_MATH_H