Vc/include/Vc/common/logarithm.h

   1 /*  This file is part of the Vc library.
   2
   3     Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
   4
   5     Vc is free software: you can redistribute it and/or modify
   6     it under the terms of the GNU Lesser General Public License as
   7     published by the Free Software Foundation, either version 3 of
   8     the License, or (at your option) any later version.
   9
  10     Vc is distributed in the hope that it will be useful, but
  11     WITHOUT ANY WARRANTY; without even the implied warranty of
  12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13     GNU Lesser General Public License for more details.
  14
  15     You should have received a copy of the GNU Lesser General Public
  16     License along with Vc.  If not, see <http://www.gnu.org/licenses/>.
  17
  18 */
  19
  20 /* The log implementations are based on code from Julien Pommier which carries the following
  21    copyright information:
  22  */
  23 /*
  24    Inspired by Intel Approximate Math library, and based on the
  25    corresponding algorithms of the cephes math library
  26 */
  27 /* Copyright (C) 2007  Julien Pommier
  28
  29   This software is provided 'as-is', without any express or implied
  30   warranty.  In no event will the authors be held liable for any damages
  31   arising from the use of this software.
  32
  33   Permission is granted to anyone to use this software for any purpose,
  34   including commercial applications, and to alter it and redistribute it
  35   freely, subject to the following restrictions:
  36
  37   1. The origin of this software must not be misrepresented; you must not
  38      claim that you wrote the original software. If you use this software
  39      in a product, an acknowledgment in the product documentation would be
  40      appreciated but is not required.
  41   2. Altered source versions must be plainly marked as such, and must not be
  42      misrepresented as being the original software.
  43   3. This notice may not be removed or altered from any source distribution.
  44
  45   (this is the zlib license)
  46 */
  47
  48 #ifndef VC_COMMON_LOGARITHM_H
  49 #define VC_COMMON_LOGARITHM_H
  50
  51 #include "macros.h"
  52 namespace Vc
  53 {
  54 namespace Common
  55 {
  56 #ifdef VC__USE_NAMESPACE
  57 using Vc::VC__USE_NAMESPACE::Const;
  58 using Vc::VC__USE_NAMESPACE::Vector;
  59 #endif
  60 enum LogarithmBase {
  61     BaseE, Base10, Base2
  62 };
  63
  64 template<LogarithmBase Base>
  65 struct LogImpl
  66 {
  67     template<typename T> static inline ALWAYS_INLINE void log_series(Vector<T> &VC_RESTRICT x, typename Vector<T>::AsArg exponent) {
  68         typedef Vector<T> V;
  69         typedef Const<T> C;
  70         // Taylor series around x = 2^exponent
  71         //   f(x) = ln(x)   → exponent * ln(2) → C::ln2_small + C::ln2_large
  72         //  f'(x) =    x⁻¹  →  x               → 1
  73         // f''(x) = -  x⁻²  → -x² / 2          → C::_1_2()
  74         //        =  2!x⁻³  →  x³ / 3          → C::P(8)
  75         //        = -3!x⁻⁴  → -x⁴ / 4          → C::P(7)
  76         //        =  4!x⁻⁵  →  x⁵ / 5          → C::P(6)
  77         // ...
  78         // The high order coefficients are adjusted to reduce the error that occurs from ommission
  79         // of higher order terms.
  80         // P(0) is the smallest term and |x| < 1 ⇒ |xⁿ| > |xⁿ⁺¹|
  81         // The order of additions must go from smallest to largest terms
  82         const V x2 = x * x; // 0 → 4
  83 #ifdef VC_LOG_ILP
  84         V y2 = (C::P(6) * /*4 →  8*/ x2 + /* 8 → 11*/ C::P(7) * /*1 → 5*/ x) + /*11 → 14*/ C::P(8);
  85         V y0 = (C::P(0) * /*5 →  9*/ x2 + /* 9 → 12*/ C::P(1) * /*2 → 6*/ x) + /*12 → 15*/ C::P(2);
  86         V y1 = (C::P(3) * /*6 → 10*/ x2 + /*10 → 13*/ C::P(4) * /*3 → 7*/ x) + /*13 → 16*/ C::P(5);
  87         const V x3 = x2 * x;  // 7 → 11
  88         const V x6 = x3 * x3; // 11 → 15
  89         const V x9 = x6 * x3; // 15 → 19
  90         V y = (y0 * /*19 → 23*/ x9 + /*23 → 26*/ y1 * /*16 → 20*/ x6) + /*26 → 29*/ y2 * /*14 → 18*/ x3;
  91 #elif defined VC_LOG_ILP2
  92         /*
  93          *                            name start done
  94          *  movaps %xmm0, %xmm1     ; x     0     1
  95          *  movaps %xmm0, %xmm2     ; x     0     1
  96          *  mulps %xmm1, %xmm1      ; x2    1     5 *xmm1
  97          *  movaps <P8>, %xmm15     ; y8    1     2
  98          *  mulps %xmm1, %xmm2      ; x3    5     9 *xmm2
  99          *  movaps %xmm1, %xmm3     ; x2    5     6
 100          *  movaps %xmm1, %xmm4     ; x2    5     6
 101          *  mulps %xmm3, %xmm3      ; x4    6    10 *xmm3
 102          *  movaps %xmm2, %xmm5     ; x3    9    10
 103          *  movaps %xmm2, %xmm6     ; x3    9    10
 104          *  mulps %xmm2, %xmm4      ; x5    9    13 *xmm4
 105          *  movaps %xmm3, %xmm7     ; x4   10    11
 106          *  movaps %xmm3, %xmm8     ; x4   10    11
 107          *  movaps %xmm3, %xmm9     ; x4   10    11
 108          *  mulps %xmm5, %xmm5      ; x6   10    14 *xmm5
 109          *  mulps %xmm3, %xmm6      ; x7   11    15 *xmm6
 110          *  mulps %xmm7, %xmm7      ; x8   12    16 *xmm7
 111          *  movaps %xmm4, %xmm10    ; x5   13    14
 112          *  mulps %xmm4, %xmm8      ; x9   13    17 *xmm8
 113          *  mulps %xmm5, %xmm10     ; x11  14    18 *xmm10
 114          *  mulps %xmm5, %xmm9      ; x10  15    19 *xmm9
 115          *  mulps <P0>, %xmm10      ; y0   18    22
 116          *  mulps <P1>, %xmm9       ; y1   19    23
 117          *  mulps <P2>, %xmm8       ; y2   20    24
 118          *  mulps <P3>, %xmm7       ; y3   21    25
 119          *  addps %xmm10, %xmm9     ; y    23    26
 120          *  addps %xmm9, %xmm8      ; y    26    29
 121          *  addps %xmm8, %xmm7      ; y    29    32
 122          */
 123         const V x3 = x2 * x;  // 4  → 8
 124         const V x4 = x2 * x2; // 5  → 9
 125         const V x5 = x2 * x3; // 8  → 12
 126         const V x6 = x3 * x3; // 9  → 13
 127         const V x7 = x4 * x3; //
 128         const V x8 = x4 * x4;
 129         const V x9 = x5 * x4;
 130         const V x10 = x5 * x5;
 131         const V x11 = x5 * x6; // 13 → 17
 132         V y = C::P(0) * x11 + C::P(1) * x10 + C::P(2) * x9 + C::P(3) * x8 + C::P(4) * x7
 133             + C::P(5) * x6  + C::P(6) * x5  + C::P(7) * x4 + C::P(8) * x3;
 134 #else
 135         V y = C::P(0);
 136         unrolled_loop16(i, 1, 9,
 137                 y = y * x + C::P(i);
 138                 );
 139         y *= x * x2;
 140 #endif
 141         switch (Base) {
 142         case BaseE:
 143             // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
 144             y += exponent * C::ln2_small();
 145             y -= x2 * C::_1_2(); // [0, 0.25[
 146             x += y;
 147             x += exponent * C::ln2_large();
 148             break;
 149         case Base10:
 150             y += exponent * C::ln2_small();
 151             y -= x2 * C::_1_2(); // [0, 0.25[
 152             x += y;
 153             x += exponent * C::ln2_large();
 154             x *= C::log10_e();
 155             break;
 156         case Base2:
 157             {
 158                 const V x_ = x;
 159                 x *= C::log2_e();
 160                 y *= C::log2_e();
 161                 y -= x_ * x * C::_1_2(); // [0, 0.25[
 162                 x += y;
 163                 x += exponent;
 164                 break;
 165             }
 166         }
 167     }
 168
 169     static inline ALWAYS_INLINE void log_series(Vector<double> &VC_RESTRICT x, Vector<double>::AsArg exponent) {
 170         typedef Vector<double> V;
 171         typedef Const<double> C;
 172         const V x2 = x * x;
 173         V y = C::P(0);
 174         V y2 = C::Q(0) + x;
 175         unrolled_loop16(i, 1, 5,
 176                 y = y * x + C::P(i);
 177                 y2 = y2 * x + C::Q(i);
 178                 );
 179         y2 = x / y2;
 180         y = y * x + C::P(5);
 181         y = x2 * y * y2;
 182         // TODO: refactor the following with the float implementation:
 183         switch (Base) {
 184         case BaseE:
 185             // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
 186             y += exponent * C::ln2_small();
 187             y -= x2 * C::_1_2(); // [0, 0.25[
 188             x += y;
 189             x += exponent * C::ln2_large();
 190             break;
 191         case Base10:
 192             y += exponent * C::ln2_small();
 193             y -= x2 * C::_1_2(); // [0, 0.25[
 194             x += y;
 195             x += exponent * C::ln2_large();
 196             x *= C::log10_e();
 197             break;
 198         case Base2:
 199             {
 200                 const V x_ = x;
 201                 x *= C::log2_e();
 202                 y *= C::log2_e();
 203                 y -= x_ * x * C::_1_2(); // [0, 0.25[
 204                 x += y;
 205                 x += exponent;
 206                 break;
 207             }
 208         }
 209     }
 210
 211     template<typename T> static inline Vector<T> calc(Vector<T> x) {
 212         typedef Vector<T> V;
 213         typedef typename V::Mask M;
 214         typedef Const<T> C;
 215
 216         const M invalidMask = x < V::Zero();
 217         const M infinityMask = x == V::Zero();
 218         const M denormal = x <= C::min();
 219
 220         x(denormal) *= V(Vc_buildDouble(1, 0, 54)); // 2²⁵
 221         V exponent = x.exponent(); // = ⎣log₂(x)⎦
 222         exponent(denormal) -= 54;
 223
 224         x.setZero(C::exponentMask()); // keep only the fractional part ⇒ x ∈ [1, 2[
 225         x |= C::_1_2();               // and set the exponent to 2⁻¹   ⇒ x ∈ [½, 1[
 226
 227         // split calculation in two cases:
 228         // A: x ∈ [½, √½[
 229         // B: x ∈ [√½, 1[
 230         // √½ defines the point where Δe(x) := log₂(x) - ⎣log₂(x)⎦ = ½, i.e.
 231         // log₂(√½) - ⎣log₂(√½)⎦ = ½ * -1 - ⎣½ * -1⎦ = -½ + 1 = ½
 232
 233         const M smallX = x < C::_1_sqrt2();
 234         x(smallX) += x; // => x ∈ [√½,     1[ ∪ [1.5, 1 + √½[
 235         x -= V::One();  // => x ∈ [√½ - 1, 0[ ∪ [0.5, √½[
 236         exponent(!smallX) += V::One();
 237
 238         log_series(x, exponent); // A: (ˣ⁄₂ᵉ - 1, e)  B: (ˣ⁄₂ᵉ⁺¹ - 1, e + 1)
 239
 240         x.setQnan(invalidMask);        // x < 0 → NaN
 241         x(infinityMask) = C::neginf(); // x = 0 → -∞
 242
 243         return x;
 244     }
 245 };
 246
 247 template<typename T> static inline Vector<T> log(Vector<T> x) {
 248     typedef typename Vector<T>::Mask M;
 249     typedef Const<T> C;
 250     return LogImpl<BaseE>::calc(x);
 251 }
 252 template<typename T> static inline Vector<T> log10(Vector<T> x) {
 253     typedef typename Vector<T>::Mask M;
 254     typedef Const<T> C;
 255     return LogImpl<Base10>::calc(x);
 256 }
 257 template<typename T> static inline Vector<T> log2(Vector<T> x) {
 258     typedef typename Vector<T>::Mask M;
 259     typedef Const<T> C;
 260     return LogImpl<Base2>::calc(x);
 261 }
 262 } // namespace Common
 263 #ifdef VC__USE_NAMESPACE
 264 namespace VC__USE_NAMESPACE
 265 {
 266     using Vc::Common::log;
 267     using Vc::Common::log10;
 268     using Vc::Common::log2;
 269 } // namespace VC__USE_NAMESPACE
 270 #undef VC__USE_NAMESPACE
 271 #endif
 272 } // namespace Vc
 273 #include "undomacros.h"
 274
 275 #endif // VC_COMMON_LOGARITHM_H