1 #ifndef COMMON_EXPONENTIAL_H
2 #define COMMON_EXPONENTIAL_H
3 /* This file is part of the Vc library. {{{
5 Copyright (C) 2012 Matthias Kretz <kretz@kde.org>
7 Vc is free software: you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as
9 published by the Free Software Foundation, either version 3 of
10 the License, or (at your option) any later version.
12 Vc is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
20 -------------------------------------------------------------------
22 The exp implementation is derived from Cephes, which carries the
23 following Copyright notice:
25 Cephes Math Library Release 2.2: June, 1992
26 Copyright 1984, 1987, 1989 by Stephen L. Moshier
27 Direct inquiries to 30 Frost Street, Cambridge, MA 02140
31 #ifndef VC_COMMON_EXPONENTIAL_H
32 #define VC_COMMON_EXPONENTIAL_H
39 using Vc::VC__USE_NAMESPACE::c_log;
40 using Vc::VC__USE_NAMESPACE::Vector;
41 using Vc::VC__USE_NAMESPACE::floor;
42 using Vc::VC__USE_NAMESPACE::ldexp;
44 static const float log2_e = 1.44269504088896341f;
45 static const float MAXLOGF = 88.72283905206835f;
46 static const float MINLOGF = -103.278929903431851103f; /* log(2^-149) */
47 static const float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
49 template<typename T> struct TypenameForLdexp { typedef Vector<int> Type; };
50 template<> struct TypenameForLdexp<Vc::sfloat> { typedef Vector<short> Type; };
52 template<typename T> static inline Vector<T> exp(Vector<T> x) {
54 typedef typename V::Mask M;
55 typedef typename TypenameForLdexp<T>::Type I;
58 const M overflow = x > MAXLOGF;
59 const M underflow = x < MINLOGF;
61 // log₂(eˣ) = x * log₂(e) * log₂(2)
62 // = log₂(2^(x * log₂(e)))
63 // => eˣ = 2^(x * log₂(e))
64 // => n = ⌊x * log₂(e) + ½⌋
65 // => y = x - n * ln(2) | recall that: ln(2) * log₂(e) == 1
67 V z = floor(C::log2_e() * x + 0.5f);
68 I n = static_cast<I>(z);
69 x -= z * C::ln2_large();
70 x -= z * C::ln2_small();
72 /* Theoretical peak relative error in [-0.5, +0.5] is 4.2e-9. */
73 z = ((((( 1.9875691500E-4f * x
74 + 1.3981999507E-3f) * x
75 + 8.3334519073E-3f) * x
76 + 4.1665795894E-2f) * x
77 + 1.6666665459E-1f) * x
78 + 5.0000001201E-1f) * (x * x)
82 x = ldexp(z, n); // == z * 2ⁿ
84 x(overflow) = std::numeric_limits<typename V::EntryType>::infinity();
89 static inline Vector<double> exp(Vector<double>::AsArg _x) {
90 Vector<double> x = _x;
91 typedef Vector<double> V;
93 typedef Const<double> C;
95 const M overflow = x > Vc_buildDouble( 1, 0x0006232bdd7abcd2ull, 9); // max log
96 const M underflow = x < Vc_buildDouble(-1, 0x0006232bdd7abcd2ull, 9); // min log
98 V px = floor(C::log2_e() * x + 0.5);
101 n.data() = Mem::permute<X0, X2, X1, X3>(n.data());
102 #elif defined(VC_IMPL_AVX)
103 __m128i tmp = _mm256_cvttpd_epi32(px.data());
104 Vector<int> n = AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp));
106 x -= px * C::ln2_large(); //Vc_buildDouble(1, 0x00062e4000000000ull, -1); // ln2
107 x -= px * C::ln2_small(); //Vc_buildDouble(1, 0x0007f7d1cf79abcaull, -20); // ln2
110 Vc_buildDouble(1, 0x000089cdd5e44be8ull, -13),
111 Vc_buildDouble(1, 0x000f06d10cca2c7eull, -6),
112 Vc_buildDouble(1, 0x0000000000000000ull, 0)
115 Vc_buildDouble(1, 0x00092eb6bc365fa0ull, -19),
116 Vc_buildDouble(1, 0x0004ae39b508b6c0ull, -9),
117 Vc_buildDouble(1, 0x000d17099887e074ull, -3),
118 Vc_buildDouble(1, 0x0000000000000000ull, 1)
121 px = x * ((P[0] * x2 + P[1]) * x2 + P[2]);
122 x = px / ((((Q[0] * x2 + Q[1]) * x2 + Q[2]) * x2 + Q[3]) - px);
123 x = V::One() + 2.0 * x;
125 x = ldexp(x, n); // == x * 2ⁿ
127 x(overflow) = std::numeric_limits<double>::infinity();
128 x.setZero(underflow);
132 } // namespace Common
133 namespace VC__USE_NAMESPACE
135 using Vc::Common::exp;
136 } // namespace VC__USE_NAMESPACE
138 #include "undomacros.h"
140 #endif // VC_COMMON_EXPONENTIAL_H
141 #endif // COMMON_EXPONENTIAL_H