1 /* This file is part of the Vc library.
3 Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
20 /* The log implementations are based on code from Julien Pommier which carries the following
21 copyright information:
24 Inspired by Intel Approximate Math library, and based on the
25 corresponding algorithms of the cephes math library
27 /* Copyright (C) 2007 Julien Pommier
29 This software is provided 'as-is', without any express or implied
30 warranty. In no event will the authors be held liable for any damages
31 arising from the use of this software.
33 Permission is granted to anyone to use this software for any purpose,
34 including commercial applications, and to alter it and redistribute it
35 freely, subject to the following restrictions:
37 1. The origin of this software must not be misrepresented; you must not
38 claim that you wrote the original software. If you use this software
39 in a product, an acknowledgment in the product documentation would be
40 appreciated but is not required.
41 2. Altered source versions must be plainly marked as such, and must not be
42 misrepresented as being the original software.
43 3. This notice may not be removed or altered from any source distribution.
45 (this is the zlib license)
48 #ifndef VC_COMMON_LOGARITHM_H
49 #define VC_COMMON_LOGARITHM_H
56 #ifdef VC__USE_NAMESPACE
57 using Vc::VC__USE_NAMESPACE::Const;
58 using Vc::VC__USE_NAMESPACE::Vector;
64 template<LogarithmBase Base>
67 template<typename T> static inline ALWAYS_INLINE void log_series(Vector<T> &VC_RESTRICT x, typename Vector<T>::AsArg exponent) {
70 // Taylor series around x = 2^exponent
71 // f(x) = ln(x) → exponent * ln(2) → C::ln2_small + C::ln2_large
72 // f'(x) = x⁻¹ → x → 1
73 // f''(x) = - x⁻² → -x² / 2 → C::_1_2()
74 // = 2!x⁻³ → x³ / 3 → C::P(8)
75 // = -3!x⁻⁴ → -x⁴ / 4 → C::P(7)
76 // = 4!x⁻⁵ → x⁵ / 5 → C::P(6)
78 // The high order coefficients are adjusted to reduce the error that occurs from ommission
79 // of higher order terms.
80 // P(0) is the smallest term and |x| < 1 ⇒ |xⁿ| > |xⁿ⁺¹|
81 // The order of additions must go from smallest to largest terms
82 const V x2 = x * x; // 0 → 4
84 V y2 = (C::P(6) * /*4 → 8*/ x2 + /* 8 → 11*/ C::P(7) * /*1 → 5*/ x) + /*11 → 14*/ C::P(8);
85 V y0 = (C::P(0) * /*5 → 9*/ x2 + /* 9 → 12*/ C::P(1) * /*2 → 6*/ x) + /*12 → 15*/ C::P(2);
86 V y1 = (C::P(3) * /*6 → 10*/ x2 + /*10 → 13*/ C::P(4) * /*3 → 7*/ x) + /*13 → 16*/ C::P(5);
87 const V x3 = x2 * x; // 7 → 11
88 const V x6 = x3 * x3; // 11 → 15
89 const V x9 = x6 * x3; // 15 → 19
90 V y = (y0 * /*19 → 23*/ x9 + /*23 → 26*/ y1 * /*16 → 20*/ x6) + /*26 → 29*/ y2 * /*14 → 18*/ x3;
91 #elif defined VC_LOG_ILP2
94 * movaps %xmm0, %xmm1 ; x 0 1
95 * movaps %xmm0, %xmm2 ; x 0 1
96 * mulps %xmm1, %xmm1 ; x2 1 5 *xmm1
97 * movaps <P8>, %xmm15 ; y8 1 2
98 * mulps %xmm1, %xmm2 ; x3 5 9 *xmm2
99 * movaps %xmm1, %xmm3 ; x2 5 6
100 * movaps %xmm1, %xmm4 ; x2 5 6
101 * mulps %xmm3, %xmm3 ; x4 6 10 *xmm3
102 * movaps %xmm2, %xmm5 ; x3 9 10
103 * movaps %xmm2, %xmm6 ; x3 9 10
104 * mulps %xmm2, %xmm4 ; x5 9 13 *xmm4
105 * movaps %xmm3, %xmm7 ; x4 10 11
106 * movaps %xmm3, %xmm8 ; x4 10 11
107 * movaps %xmm3, %xmm9 ; x4 10 11
108 * mulps %xmm5, %xmm5 ; x6 10 14 *xmm5
109 * mulps %xmm3, %xmm6 ; x7 11 15 *xmm6
110 * mulps %xmm7, %xmm7 ; x8 12 16 *xmm7
111 * movaps %xmm4, %xmm10 ; x5 13 14
112 * mulps %xmm4, %xmm8 ; x9 13 17 *xmm8
113 * mulps %xmm5, %xmm10 ; x11 14 18 *xmm10
114 * mulps %xmm5, %xmm9 ; x10 15 19 *xmm9
115 * mulps <P0>, %xmm10 ; y0 18 22
116 * mulps <P1>, %xmm9 ; y1 19 23
117 * mulps <P2>, %xmm8 ; y2 20 24
118 * mulps <P3>, %xmm7 ; y3 21 25
119 * addps %xmm10, %xmm9 ; y 23 26
120 * addps %xmm9, %xmm8 ; y 26 29
121 * addps %xmm8, %xmm7 ; y 29 32
123 const V x3 = x2 * x; // 4 → 8
124 const V x4 = x2 * x2; // 5 → 9
125 const V x5 = x2 * x3; // 8 → 12
126 const V x6 = x3 * x3; // 9 → 13
127 const V x7 = x4 * x3; //
128 const V x8 = x4 * x4;
129 const V x9 = x5 * x4;
130 const V x10 = x5 * x5;
131 const V x11 = x5 * x6; // 13 → 17
132 V y = C::P(0) * x11 + C::P(1) * x10 + C::P(2) * x9 + C::P(3) * x8 + C::P(4) * x7
133 + C::P(5) * x6 + C::P(6) * x5 + C::P(7) * x4 + C::P(8) * x3;
136 unrolled_loop16(i, 1, 9,
143 // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
144 y += exponent * C::ln2_small();
145 y -= x2 * C::_1_2(); // [0, 0.25[
147 x += exponent * C::ln2_large();
150 y += exponent * C::ln2_small();
151 y -= x2 * C::_1_2(); // [0, 0.25[
153 x += exponent * C::ln2_large();
161 y -= x_ * x * C::_1_2(); // [0, 0.25[
169 static inline ALWAYS_INLINE void log_series(Vector<double> &VC_RESTRICT x, Vector<double>::AsArg exponent) {
170 typedef Vector<double> V;
171 typedef Const<double> C;
175 unrolled_loop16(i, 1, 5,
177 y2 = y2 * x + C::Q(i);
182 // TODO: refactor the following with the float implementation:
185 // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
186 y += exponent * C::ln2_small();
187 y -= x2 * C::_1_2(); // [0, 0.25[
189 x += exponent * C::ln2_large();
192 y += exponent * C::ln2_small();
193 y -= x2 * C::_1_2(); // [0, 0.25[
195 x += exponent * C::ln2_large();
203 y -= x_ * x * C::_1_2(); // [0, 0.25[
211 template<typename T> static inline Vector<T> calc(Vector<T> x) {
213 typedef typename V::Mask M;
216 const M invalidMask = x < V::Zero();
217 const M infinityMask = x == V::Zero();
218 const M denormal = x <= C::min();
220 x(denormal) *= V(Vc_buildDouble(1, 0, 54)); // 2²⁵
221 V exponent = x.exponent(); // = ⎣log₂(x)⎦
222 exponent(denormal) -= 54;
224 x.setZero(C::exponentMask()); // keep only the fractional part ⇒ x ∈ [1, 2[
225 x |= C::_1_2(); // and set the exponent to 2⁻¹ ⇒ x ∈ [½, 1[
227 // split calculation in two cases:
230 // √½ defines the point where Δe(x) := log₂(x) - ⎣log₂(x)⎦ = ½, i.e.
231 // log₂(√½) - ⎣log₂(√½)⎦ = ½ * -1 - ⎣½ * -1⎦ = -½ + 1 = ½
233 const M smallX = x < C::_1_sqrt2();
234 x(smallX) += x; // => x ∈ [√½, 1[ ∪ [1.5, 1 + √½[
235 x -= V::One(); // => x ∈ [√½ - 1, 0[ ∪ [0.5, √½[
236 exponent(!smallX) += V::One();
238 log_series(x, exponent); // A: (ˣ⁄₂ᵉ - 1, e) B: (ˣ⁄₂ᵉ⁺¹ - 1, e + 1)
240 x.setQnan(invalidMask); // x < 0 → NaN
241 x(infinityMask) = C::neginf(); // x = 0 → -∞
247 template<typename T> static inline Vector<T> log(Vector<T> x) {
248 typedef typename Vector<T>::Mask M;
250 return LogImpl<BaseE>::calc(x);
252 template<typename T> static inline Vector<T> log10(Vector<T> x) {
253 typedef typename Vector<T>::Mask M;
255 return LogImpl<Base10>::calc(x);
257 template<typename T> static inline Vector<T> log2(Vector<T> x) {
258 typedef typename Vector<T>::Mask M;
260 return LogImpl<Base2>::calc(x);
262 } // namespace Common
263 #ifdef VC__USE_NAMESPACE
264 namespace VC__USE_NAMESPACE
266 using Vc::Common::log;
267 using Vc::Common::log10;
268 using Vc::Common::log2;
269 } // namespace VC__USE_NAMESPACE
270 #undef VC__USE_NAMESPACE
273 #include "undomacros.h"
275 #endif // VC_COMMON_LOGARITHM_H