Vc/include/Vc/common/exponential.h

   1 #ifndef COMMON_EXPONENTIAL_H
   2 #define COMMON_EXPONENTIAL_H
   3 /*  This file is part of the Vc library. {{{
   4
   5     Copyright (C) 2012 Matthias Kretz <kretz@kde.org>
   6
   7     Vc is free software: you can redistribute it and/or modify
   8     it under the terms of the GNU Lesser General Public License as
   9     published by the Free Software Foundation, either version 3 of
  10     the License, or (at your option) any later version.
  11
  12     Vc is distributed in the hope that it will be useful, but
  13     WITHOUT ANY WARRANTY; without even the implied warranty of
  14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15     GNU Lesser General Public License for more details.
  16
  17     You should have received a copy of the GNU Lesser General Public
  18     License along with Vc.  If not, see <http://www.gnu.org/licenses/>.
  19
  20     -------------------------------------------------------------------
  21
  22     The exp implementation is derived from Cephes, which carries the
  23     following Copyright notice:
  24
  25     Cephes Math Library Release 2.2:  June, 1992
  26     Copyright 1984, 1987, 1989 by Stephen L. Moshier
  27     Direct inquiries to 30 Frost Street, Cambridge, MA 02140
  28
  29 }}}*/
  30
  31 #ifndef VC_COMMON_EXPONENTIAL_H
  32 #define VC_COMMON_EXPONENTIAL_H
  33
  34 #include "macros.h"
  35 namespace Vc
  36 {
  37 namespace Common
  38 {
  39     using Vc::VC__USE_NAMESPACE::c_log;
  40     using Vc::VC__USE_NAMESPACE::Vector;
  41     using Vc::VC__USE_NAMESPACE::floor;
  42     using Vc::VC__USE_NAMESPACE::ldexp;
  43
  44     static const float log2_e = 1.44269504088896341f;
  45     static const float MAXLOGF = 88.72283905206835f;
  46     static const float MINLOGF = -103.278929903431851103f; /* log(2^-149) */
  47     static const float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
  48
  49     template<typename T> struct TypenameForLdexp { typedef Vector<int> Type; };
  50     template<> struct TypenameForLdexp<Vc::sfloat> { typedef Vector<short> Type; };
  51
  52     template<typename T> static inline Vector<T> exp(Vector<T> x) {
  53         typedef Vector<T> V;
  54         typedef typename V::Mask M;
  55         typedef typename TypenameForLdexp<T>::Type I;
  56         typedef Const<T> C;
  57
  58         const M overflow  = x > MAXLOGF;
  59         const M underflow = x < MINLOGF;
  60
  61         // log₂(eˣ) = x * log₂(e) * log₂(2)
  62         //          = log₂(2^(x * log₂(e)))
  63         // => eˣ = 2^(x * log₂(e))
  64         // => n  = ⌊x * log₂(e) + ½⌋
  65         // => y  = x - n * ln(2)       | recall that: ln(2) * log₂(e) == 1
  66         // <=> eˣ = 2ⁿ * eʸ
  67         V z = floor(C::log2_e() * x + 0.5f);
  68         I n = static_cast<I>(z);
  69         x -= z * C::ln2_large();
  70         x -= z * C::ln2_small();
  71
  72         /* Theoretical peak relative error in [-0.5, +0.5] is 4.2e-9. */
  73         z = ((((( 1.9875691500E-4f  * x
  74                 + 1.3981999507E-3f) * x
  75                 + 8.3334519073E-3f) * x
  76                 + 4.1665795894E-2f) * x
  77                 + 1.6666665459E-1f) * x
  78                 + 5.0000001201E-1f) * (x * x)
  79                 + x
  80                 + 1.0f;
  81
  82         x = ldexp(z, n); // == z * 2ⁿ
  83
  84         x(overflow) = std::numeric_limits<typename V::EntryType>::infinity();
  85         x.setZero(underflow);
  86
  87         return x;
  88     }
  89     static inline Vector<double> exp(Vector<double>::AsArg _x) {
  90         Vector<double> x = _x;
  91         typedef Vector<double> V;
  92         typedef V::Mask M;
  93         typedef Const<double> C;
  94
  95         const M overflow  = x > Vc_buildDouble( 1, 0x0006232bdd7abcd2ull, 9); // max log
  96         const M underflow = x < Vc_buildDouble(-1, 0x0006232bdd7abcd2ull, 9); // min log
  97
  98         V px = floor(C::log2_e() * x + 0.5);
  99 #ifdef VC_IMPL_SSE
 100         Vector<int> n(px);
 101         n.data() = Mem::permute<X0, X2, X1, X3>(n.data());
 102 #elif defined(VC_IMPL_AVX)
 103         __m128i tmp = _mm256_cvttpd_epi32(px.data());
 104         Vector<int> n = AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp));
 105 #endif
 106         x -= px * C::ln2_large(); //Vc_buildDouble(1, 0x00062e4000000000ull, -1);  // ln2
 107         x -= px * C::ln2_small(); //Vc_buildDouble(1, 0x0007f7d1cf79abcaull, -20); // ln2
 108
 109         const double P[] = {
 110             Vc_buildDouble(1, 0x000089cdd5e44be8ull, -13),
 111             Vc_buildDouble(1, 0x000f06d10cca2c7eull,  -6),
 112             Vc_buildDouble(1, 0x0000000000000000ull,   0)
 113         };
 114         const double Q[] = {
 115             Vc_buildDouble(1, 0x00092eb6bc365fa0ull, -19),
 116             Vc_buildDouble(1, 0x0004ae39b508b6c0ull,  -9),
 117             Vc_buildDouble(1, 0x000d17099887e074ull,  -3),
 118             Vc_buildDouble(1, 0x0000000000000000ull,   1)
 119         };
 120         const V x2 = x * x;
 121         px = x * ((P[0] * x2 + P[1]) * x2 + P[2]);
 122         x =  px / ((((Q[0] * x2 + Q[1]) * x2 + Q[2]) * x2 + Q[3]) - px);
 123         x = V::One() + 2.0 * x;
 124
 125         x = ldexp(x, n); // == x * 2ⁿ
 126
 127         x(overflow) = std::numeric_limits<double>::infinity();
 128         x.setZero(underflow);
 129
 130         return x;
 131     }
 132 } // namespace Common
 133 namespace VC__USE_NAMESPACE
 134 {
 135     using Vc::Common::exp;
 136 } // namespace VC__USE_NAMESPACE
 137 } // namespace Vc
 138 #include "undomacros.h"
 139
 140 #endif // VC_COMMON_EXPONENTIAL_H
 141 #endif // COMMON_EXPONENTIAL_H