/* msal_math64.c Munafo Stand-Alone Library 64-bit math routines (and some 80-bit Intel "long double") The trigonometric functions and their subroutines, with names like "__kernel_rem_pio2", are derived from the "fdlibm" routines at Netlib (http://www.netlib.org/fdlibm/), described as follows: lib ../fdlibm for C math library for machines that support IEEE 754 floating-point by Kwok C Ng # Version: 5.3 # Maintained-hy: fdlibm-comments@sun.com # Platforms: Require ANSI C compiler with IEEE 754 style arithmetic # Copying-Policy: Freely Redistributable # Keywords: libm,exp,log,sin,cos,floating-point,IEEE754 master ornl.gov and the individual source code files had the following copyright notices: * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * Copyright 2004 Sun Microsystems, Inc. All Rights Reserved. * * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. Additional parts are derived from the GNU Scientific Library (GSL), distributed under the ,= ,-_-. =. terms of the GNU General ((_/)o o(\_)) Public License 3.0: `-'(. .)`-' \_/ * Copyright (C) 1996, 1997, 1998, 1999, 2000 Gerard Jungman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 110-1301, USA. Most of RIES is licensed under GPL version 3 (June 2007). The GPL version 3 is applicable to the parts of MSAL (this stand-alone library of maths functions) that were added by Robert Munafo. The two GPLv3 license is available at: mrob.com/pub/ries/GPL-3.txt REVISION HISTORY 20121203 First version: only has msal_tan() and its supporting functions. 20121215 Add msal_sin, msal_cos 20130203 Add msal_lambertw; try to clean up various sign conversion warnings 20130820 Add long double trig functions (with precision suitable for 64-bit mantissa long doubles). 20141106 Add msal_test_lambertl and streamline msal_lambertwl a bit. 20141107 Add MSAL_NINF, eliminating bogus "floating constant exceeds range of 'float'" error caused by math.h in GCC 4.0.1 20150102 We are no longer using MAXFLOAT NOTES Other sources of math functions: see notes in ries.c, search for references to "f107_o.cpp" */ #if defined(_MSC_VER) # include #endif /* Sometimes it's necessary to define __LITTLE_ENDIAN explicitly but these catch some common cases. */ #ifndef __LITTLE_ENDIAN # if defined(__LITTLE_ENDIAN__) || \ defined(i386) || defined(__x86_64__) || defined(i486) || \ defined(intel) || defined(x86) || defined(i86pc) || \ defined(__alpha) || defined(__osf__) # define __LITTLE_ENDIAN # endif #endif typedef union { double value; unsigned int ints[2]; } msal_union_double_int2; #ifdef __LITTLE_ENDIAN # define __HI(x) *(1+(int*)&x) # define __LO(x) *(int*)&x # define MSAL_HI(x) (((msal_union_double_int2*)&x)->ints[1]) # define MSAL_LO(x) (((msal_union_double_int2*)&x)->ints[0]) #else # define __HI(x) *(int*)&x # define __LO(x) *(1+(int*)&x) # define MSAL_HI(x) (((msal_union_double_int2*)&x)->ints[0]) # define MSAL_LO(x) (((msal_union_double_int2*)&x)->ints[1]) #endif #ifdef __STDC__ # define MSAL__P(p) p #else # define MSAL__P(p) () #endif /* 2.7182818284590452353602874713526624... */ #define MSAL_E 2.71828182845904523e+00 #define MSAL_EL 2.7182818284590452353602874713526624L /* * ANSI/POSIX */ /* The "INFINITY" macro in math.h sometimes generates a bogus warning * "floating constant exceeds range of 'float'". We avoid it by using * code from a newer math.h */ #if defined(__GNUC__) && ! defined( __XLC__ ) # define MSAL_NINF (-(__builtin_huge_valf())) # define MSAL_NINFL (-(__builtin_huge_vall())) #elif (defined(__ppc__) || defined(__ppc64__)) # define MSAL_NINF -1e50f # define MSAL_NINFL -1e500L #elif (defined (__i386__) || defined( __x86_64__ )) # define MSAL_NINF -1e50f # define MSAL_NINFL -1e5000L #elif defined(_MSC_VER) # define MSAL_NINF (-(DBL_MAX+DBL_MAX)) # define MSAL_NINFL (-(LDBL_MAX+LDBL_MAX)) # define isnan(x) ((x)!=(x)) # define NAN (MSAL_NINF-MSAL_NINF) #else /* Use the "most common" backwards-compatible definition for the default */ # define MSAL_NINF -1e50f # define MSAL_NINFL -1e5000L #endif /* * set X_TLOSS = pi*2**52, which is possibly defined in * (one may replace the following line by "#include ") */ #define X_TLOSS 1.41484755040568800000e+16 #define DOMAIN 1 #define SING 2 #define OVERFLOW 3 #define UNDERFLOW 4 #define TLOSS 5 #define PLOSS 6 void msal_version_info MSAL__P((void)); extern double msal_sin MSAL__P((double)); extern double msal_fabs MSAL__P((double)); extern double msal_floor MSAL__P((double)); extern double msal_copysign MSAL__P((double, double)); extern double msal_scalbn MSAL__P((double, int)); extern int __ieee754_rem_pio2 MSAL__P((double,double*)); extern double __kernel_sin MSAL__P((double,double,int)); extern double __kernel_cos MSAL__P((double,double)); extern double __kernel_tan MSAL__P((double,double,int)); extern int __kernel_rem_pio2 MSAL__P((double*,double*,int,int,int,const int*)); extern double msal_cos MSAL__P((double)); extern double msal_tan MSAL__P((double)); extern long double ldbl_sincos MSAL__P((long double xq, long double zq, long double uq, double i)); extern long double msal_sinl MSAL__P((long double)); extern long double msal_cosl MSAL__P((long double)); extern long double msal_tanl MSAL__P((long double)); double msal_w_approx0 MSAL__P((double x, int branch)); double msal_w0_approx1 MSAL__P((double x)); double msal_w0_approx2 MSAL__P((double x)); double msal_lambertw MSAL__P((double x)); long double msal_lambertwl MSAL__P((long double x)); double msal_lanczos_ln_gamma MSAL__P((double z)); double msal_lanczos_gamma MSAL__P((double z)); double msal_digamma MSAL__P((double x)); double msal_dgamma MSAL__P((double z)); void msal_test_lambert MSAL__P((void)); void msal_test_lambertl MSAL__P((void)); void msal_test_gamma MSAL__P((void)); void msal_version_info(void) { printf("%s", "msal_math64 transcendental maths function library by Robert Munafo\n" " sine, cosine, and tangent functions adapted from code by SunSoft and\n" " Copyright (C) 1993, 2004 by Sun Microsystems, Inc.\n" " Lambert W function adapted from work by Darko Veberic, originally\n" " published at arxiv.org/abs/1003.1628\n" ); } /* * msal_fabs(x) returns the absolute value of x. */ #ifdef __STDC__ double msal_fabs(double x) #else double msal_fabs(x) double x; #endif { MSAL_HI(x) &= 0x7fffffff; return x; } /* * floor(x) * Return x rounded toward -inf to integral value * Method: * Bit twiddling. * Exception: * Inexact flag raised if x not equal to floor(x). */ #ifdef __STDC__ static const double huge = 1.0e300; #else static double huge = 1.0e300; #endif #if 0 /* Floor function, with explicit casts to avoid warnings. This is untested. */ #ifdef __STDC__ double msal_floor(double x) #else double msal_floor(x) double x; #endif { int i0,i1,j0; unsigned i,j; i0 = (int)MSAL_HI(x); i1 = (int)MSAL_LO(x); j0 = ((i0>>20)&0x7ff)-0x3ff; if(j0<20) { if(j0<0) { /* raise inexact if x != 0 */ if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */ if (i0>=0) { i0=i1=0; } else if (((i0&0x7fffffff)|i1)!=0) { i0=(int)0xbff00000;i1=0; } } } else { i = ((unsigned)0x000fffff)>>j0; if ((((unsigned)i0&i)|(unsigned)i1)==0) return x; /* x is integral */ if (huge+x>0.0) { /* raise inexact flag */ if (i0<0) i0 += (0x00100000)>>j0; i0 = (int)((unsigned)i0 & (~i)); i1=0; } } } else if (j0>51) { if(j0==0x400) return x+x; /* inf or NaN */ else return x; /* x is integral */ } else { i = ((unsigned)(0xffffffff))>>(j0-20); if ((((unsigned)i1)&i)==0) return x; /* x is integral */ if(huge+x>0.0) { /* raise inexact flag */ if(i0<0) { if (j0==20) i0+=1; else { j = (unsigned)i1+((unsigned)1<<(52-(unsigned)j0)); if(j<((unsigned)i1)) i0 +=1 ; /* got a carry */ i1=j; } } i1 &= (~i); } } MSAL_HI(x) = (unsigned)i0; MSAL_LO(x) = (unsigned)i1; return x; } #endif /* * copysign(double x, double y) * copysign(x,y) returns a value with the magnitude of x and * with the sign bit of y. */ #ifdef __STDC__ double msal_copysign(double x, double y) #else double msal_copysign(x,y) double x,y; #endif { MSAL_HI(x) = (MSAL_HI(x)&0x7fffffff)|(MSAL_HI(y)&0x80000000); return x; } /* * scalbn (double x, int n) * scalbn(x,n) returns x* 2**n computed by exponent * manipulation rather than by actually performing an * exponentiation or a multiplication. */ #ifdef __STDC__ static const double #else static double #endif two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */ tiny = 1.0e-300; #ifdef __STDC__ double msal_scalbn (double x, int n) #else double msal_scalbn (x,n) double x; int n; #endif { int k,hx,lx; hx = (int)MSAL_HI(x); lx = (int)MSAL_LO(x); k = (hx&0x7ff00000)>>20; /* extract exponent */ if (k==0) { /* 0 or subnormal x */ if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */ x *= two54; hx = (int)MSAL_HI(x); k = ((hx&0x7ff00000)>>20) - 54; if (n< -50000) return tiny*x; /*underflow*/ } if (k==0x7ff) return x+x; /* NaN or Inf */ k = k+n; if (k > 0x7fe) return huge*msal_copysign(huge,x); /* overflow */ if (k > 0) { /* normal result */ hx = (hx&(int)0x800fffff)|(k<<20); MSAL_HI(x) = (unsigned)hx; return x; } if (k <= -54) { if (n > 50000) /* in case integer overflow in n+k */ return huge*msal_copysign(huge,x); /*overflow*/ else return tiny*msal_copysign(tiny,x); /*underflow*/ } k += 54; /* subnormal result */ hx = (hx&(int)0x800fffff)|(k<<20); MSAL_HI(x) = (unsigned)hx; return x*twom54; } /* * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) * double x[],y[]; int e0,nx,prec; int ipio2[]; * * __kernel_rem_pio2 return the last three digits of N with * y = x - N*pi/2 * so that |y| < pi/2. * * The method is to compute the integer (mod 8) and fraction parts of * (2/pi)*x without doing the full multiplication. In general we * skip the part of the product that are known to be a huge integer ( * more accurately, = 0 mod 8 ). Thus the number of operations are * independent of the exponent of the input. * * (2/pi) is represented by an array of 24-bit integers in ipio2[]. * * Input parameters: * x[] The input value (must be positive) is broken into nx * pieces of 24-bit integers in double precision format. * x[i] will be the i-th 24 bit of x. The scaled exponent * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 * match x's up to 24 bits. * * Example of breaking a double positive z into x[0]+x[1]+x[2]: * e0 = ilogb(z)-23 * z = scalbn(z,-e0) * for i = 0,1,2 * x[i] = floor(z) * z = (z-x[i])*2**24 * * * y[] ouput result in an array of double precision numbers. * The dimension of y[] is: * 24-bit precision 1 * 53-bit precision 2 * 64-bit precision 2 * 113-bit precision 3 * The actual value is the sum of them. Thus for 113-bit * precison, one may have to do something like: * * long double t,w,r_head, r_tail; * t = (long double)y[2] + (long double)y[1]; * w = (long double)y[0]; * r_head = t+w; * r_tail = w - (r_head - t); * * e0 The exponent of x[0] * * nx dimension of x[] * * prec an integer indicating the precision: * 0 24 bits (single) * 1 53 bits (double) * 2 64 bits (extended) * 3 113 bits (quad) * * ipio2[] * integer array, contains the (24*i)-th to (24*i+23)-th * bit of 2/pi after binary point. The corresponding * floating value is * * ipio2[i] * 2^(-24(i+1)). * * External function: * double scalbn(), floor(); * * * Here is the description of some local variables: * * jk jk+1 is the initial number of terms of ipio2[] needed * in the computation. The recommended value is 2,3,4, * 6 for single, double, extended,and quad. * * jz local integer variable indicating the number of * terms of ipio2[] used. * * jx nx - 1 * * jv index for pointing to the suitable ipio2[] for the * computation. In general, we want * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 * is an integer. Thus * e0-3-24*jv >= 0 or (e0-3)/24 >= jv * Hence jv = max(0,(e0-3)/24). * * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. * * q[] double array with integral value, representing the * 24-bits chunk of the product of x and 2/pi. * * q0 the corresponding exponent of q[0]. Note that the * exponent for q[i] would be q0-24*i. * * PIo2[] double precision array, obtained by cutting pi/2 * into 24 bits chunks. * * f[] ipio2[] in floating point * * iq[] integer array by breaking up q[] in 24-bits chunk. * * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] * * ih integer. If >0 it indicates q[] is >= 0.5, hence * it also indicates the *sign* of the result. * */ /* * Constants: * The hexadecimal values are the intended ones for the following * constants. The decimal values may be used, provided that the * compiler will convert from decimal to binary accurately enough * to produce the hexadecimal values shown. */ #ifdef __STDC__ static const int init_jk[] = {2,3,4,6}; /* initial value for jk */ #else static int init_jk[] = {2,3,4,6}; #endif #ifdef __STDC__ static const double PIo2[] = { #else static double PIo2[] = { #endif 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ }; #ifdef __STDC__ static const double #else static double #endif zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */ #ifdef __STDC__ int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) #else int __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) double x[], y[]; int e0,nx,prec; int ipio2[]; #endif { int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; double z,fw,f[20],fq[20],q[20]; /* printf("__kernel_rem_pio2(%f,%f)\n", *x, *y); */ /* initialize jk*/ jk = init_jk[prec]; jp = jk; /* determine jx,jv,q0, note that 3>q0 */ jx = nx-1; jv = (e0-3)/24; if(jv<0) jv=0; q0 = e0-24*(jv+1); /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ j = jv-jx; m = jx+jk; for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (double) ipio2[j]; /* compute q[0],q[1],...q[jk] */ for (i=0;i<=jk;i++) { for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; } jz = jk; recompute: /* distill q[] into iq[] reversingly */ for(i=0,j=jz,z=q[jz];j>0;i++,j--) { fw = (double)((int)(twon24* z)); iq[i] = (int)(z-two24*fw); z = q[j-1]+fw; } /* compute n */ z = msal_scalbn(z,q0); /* actual value of z */ z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ n = (int) z; z -= (double)n; ih = 0; if(q0>0) { /* need iq[jz-1] to determine n */ i = (iq[jz-1]>>(24-q0)); n += i; iq[jz-1] -= i<<(24-q0); ih = iq[jz-1]>>(23-q0); } else if(q0==0) ih = iq[jz-1]>>23; else if(z>=0.5) ih=2; if(ih>0) { /* q > 0.5 */ n += 1; carry = 0; for(i=0;i0) { /* rare case: chance is 1 in 12 */ switch(q0) { case 1: iq[jz-1] &= 0x7fffff; break; case 2: iq[jz-1] &= 0x3fffff; break; } } if(ih==2) { z = one - z; if(carry!=0) z -= msal_scalbn(one,q0); } } /* check if recomputation is needed */ if(z==zero) { j = 0; for (i=jz-1;i>=jk;i--) j |= iq[i]; if(j==0) { /* need recomputation */ for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */ for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */ f[jx+i] = (double) ipio2[jv+i]; for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; } jz += k; goto recompute; } } /* chop off zero terms */ if(z==0.0) { jz -= 1; q0 -= 24; while(iq[jz]==0) { jz--; q0-=24;} } else { /* break z into 24-bit if necessary */ z = msal_scalbn(z,-q0); if(z>=two24) { fw = (double)((int)(twon24*z)); iq[jz] = (int)(z-two24*fw); jz += 1; q0 += 24; iq[jz] = (int) fw; } else iq[jz] = (int) z ; } /* convert integer "bit" chunk to floating-point value */ fw = msal_scalbn(one,q0); for(i=jz;i>=0;i--) { q[i] = fw*(double)iq[i]; fw*=twon24; } /* compute PIo2[0,...,jp]*q[jz,...,0] */ for(i=jz;i>=0;i--) { for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; fq[jz-i] = fw; } /* compress fq[] into y[] */ switch(prec) { case 0: fw = 0.0; for (i=jz;i>=0;i--) fw += fq[i]; y[0] = (ih==0)? fw: -fw; break; case 1: case 2: fw = 0.0; for (i=jz;i>=0;i--) fw += fq[i]; y[0] = (ih==0)? fw: -fw; fw = fq[0]-fw; for (i=1;i<=jz;i++) fw += fq[i]; y[1] = (ih==0)? fw: -fw; break; case 3: /* painful */ for (i=jz;i>0;i--) { fw = fq[i-1]+fq[i]; fq[i] += fq[i-1]-fw; fq[i-1] = fw; } for (i=jz;i>1;i--) { fw = fq[i-1]+fq[i]; fq[i] += fq[i-1]-fw; fq[i-1] = fw; } for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; if(ih==0) { y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; } else { y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; } } return n&7; } /* __ieee754_rem_pio2(x,y) * * return the remainder of x rem pi/2 in y[0]+y[1] * use __kernel_rem_pio2() */ /* * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi */ #ifdef __STDC__ static const int two_over_pi[] = { #else static int two_over_pi[] = { #endif 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, }; #ifdef __STDC__ static const int npio2_hw[] = { #else static int npio2_hw[] = { #endif 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A, 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C, 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB, 0x404858EB, 0x404921FB, }; /* * invpio2: 53 bits of 2/pi * pio2_1: first 33 bit of pi/2 * pio2_1t: pi/2 - pio2_1 * pio2_2: second 33 bit of pi/2 * pio2_2t: pi/2 - (pio2_1+pio2_2) * pio2_3: third 33 bit of pi/2 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) */ #ifdef __STDC__ static const double #else static double #endif half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */ pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */ pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ #ifdef __STDC__ int __ieee754_rem_pio2(double x, double *y) #else int __ieee754_rem_pio2(x,y) double x,y[]; #endif { double z,w,t,r,fn; double tx[3]; int e0,i,j,nx,n,ix,hx; hx = (int)MSAL_HI(x); /* high word of x */ ix = hx&0x7fffffff; if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */ {y[0] = x; y[1] = 0; return 0;} if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ if(hx>0) { z = x - pio2_1; if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ y[0] = z - pio2_1t; y[1] = (z-y[0])-pio2_1t; } else { /* near pi/2, use 33+33+53 bit pi */ z -= pio2_2; y[0] = z - pio2_2t; y[1] = (z-y[0])-pio2_2t; } return 1; } else { /* negative x */ z = x + pio2_1; if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ y[0] = z + pio2_1t; y[1] = (z-y[0])+pio2_1t; } else { /* near pi/2, use 33+33+53 bit pi */ z += pio2_2; y[0] = z + pio2_2t; y[1] = (z-y[0])+pio2_2t; } return -1; } } if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ t = msal_fabs(x); n = (int) (t*invpio2+half); fn = (double)n; r = t-fn*pio2_1; w = fn*pio2_1t; /* 1st round good to 85 bit */ if(n<32&&ix!=npio2_hw[n-1]) { y[0] = r-w; /* quick check no cancellation */ } else { j = ix>>20; y[0] = r-w; i = j-((((int)MSAL_HI(y[0]))>>20)&0x7ff); if(i>16) { /* 2nd iteration needed, good to 118 */ t = r; w = fn*pio2_2; r = t-w; w = fn*pio2_2t-((t-r)-w); y[0] = r-w; i = j-((((int)MSAL_HI(y[0]))>>20)&0x7ff); if(i>49) { /* 3rd iteration need, 151 bits acc */ t = r; /* will cover all possible cases */ w = fn*pio2_3; r = t-w; w = fn*pio2_3t-((t-r)-w); y[0] = r-w; } } } y[1] = (r-y[0])-w; if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} else return n; } /* * all other (large) arguments */ if(ix>=0x7ff00000) { /* x is inf or NaN */ y[0]=y[1]=x-x; return 0; } /* set z = scalbn(|x|,ilogb(x)-23) */ MSAL_LO(z) = MSAL_LO(x); e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */ MSAL_HI(z) = (unsigned)(ix - (e0<<20)); for(i=0;i<2;i++) { tx[i] = (double)((int)(z)); z = (z-tx[i])*two24; } tx[2] = z; nx = 3; while(tx[nx-1]==zero) nx--; /* skip zero term */ n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} return n; } /* __kernel_sin( x, y, iy) * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 * Input x is assumed to be bounded by ~pi/4 in magnitude. * Input y is the tail of x. * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). * * Algorithm * 1. Since sin(-x) = -sin(x), we need only to consider positive x. * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. * 3. sin(x) is approximated by a polynomial of degree 13 on * [0,pi/4] * 3 13 * sin(x) ~ x + S1*x + ... + S6*x * where * * |sin(x) 2 4 6 8 10 12 | -58 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 * | x | * * 4. sin(x+y) = sin(x) + sin'(x')*y * ~ sin(x) + (1-x*x/2)*y * For better accuracy, let * 3 2 2 2 2 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) * then 3 2 * sin(x) = x + (S1*x + (x *(r-y/2)+y)) */ #ifdef __STDC__ static const double #else static double #endif S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ #ifdef __STDC__ double __kernel_sin(double x, double y, int iy) #else double __kernel_sin(x, y, iy) double x,y; int iy; /* iy=0 if y is zero */ #endif { double z,r,v; int ix; ix = MSAL_HI(x)&0x7fffffff; /* high word of x */ if(ix<0x3e400000) /* |x| < 2**-27 */ {if((int)x==0) return x;} /* generate inexact */ z = x*x; v = z*x; r = S2+z*(S3+z*(S4+z*(S5+z*S6))); if(iy==0) return x+v*(S1+z*r); else return x-((z*(half*y-v*r)-y)-v*S1); } /* * __kernel_cos( x, y ) * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 * Input x is assumed to be bounded by ~pi/4 in magnitude. * Input y is the tail of x. * * Algorithm * 1. Since cos(-x) = cos(x), we need only to consider positive x. * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. * 3. cos(x) is approximated by a polynomial of degree 14 on * [0,pi/4] * 4 14 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x * where the remez error is * * | 2 4 6 8 10 12 14 | -58 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 * | | * * 4 6 8 10 12 14 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then * cos(x) = 1 - x*x/2 + r * since cos(x+y) ~ cos(x) - sin(x)*y * ~ cos(x) - x*y, * a correction term is necessary in cos(x) and hence * cos(x+y) = 1 - (x*x/2 - (r - x*y)) * For better accuracy when x > 0.3, let qx = |x|/4 with * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125. * Then * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)). * Note that 1-qx and (x*x/2-qx) is EXACT here, and the * magnitude of the latter is at least a quarter of x*x/2, * thus, reducing the rounding error in the subtraction. */ #ifdef __STDC__ static const double #else static double #endif C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ #ifdef __STDC__ double __kernel_cos(double x, double y) #else double __kernel_cos(x, y) double x,y; #endif { double a,hz,z,r,qx; int ix; ix = MSAL_HI(x)&0x7fffffff; /* ix = |x|'s high word*/ if(ix<0x3e400000) { /* if x < 2**27 */ if(((int)x)==0) return one; /* generate inexact */ } z = x*x; r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); if(ix < 0x3FD33333) /* if |x| < 0.3 */ return one - (0.5*z - (z*r - x*y)); else { if(ix > 0x3fe90000) { /* x > 0.78125 */ qx = 0.28125; } else { MSAL_HI(qx) = (unsigned)(ix-0x00200000); /* x/4 */ MSAL_LO(qx) = 0; } hz = 0.5*z-qx; a = one-qx; return a - (hz - (z*r-x*y)); } } /* INDENT OFF */ /* __kernel_tan( x, y, k ) * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854 * Input x is assumed to be bounded by ~pi/4 in magnitude. * Input y is the tail of x. * Input k indicates whether tan (if k = 1) or -1/tan (if k = -1) is returned. * * Algorithm * 1. Since tan(-x) = -tan(x), we need only to consider positive x. * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0. * 3. tan(x) is approximated by a odd polynomial of degree 27 on * [0,0.67434] * 3 27 * tan(x) ~ x + T1*x + ... + T13*x * where * * |tan(x) 2 4 26 | -59.2 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 * | x | * * Note: tan(x+y) = tan(x) + tan'(x)*y * ~ tan(x) + (1+x*x)*y * Therefore, for better accuracy in computing tan(x+y), let * 3 2 2 2 2 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) * then * 3 2 * tan(x+y) = x + (T1*x + (x *(r+y)+y)) * * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y)) * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y))) */ static const double T[] = { 3.33333333333334091986e-01, /* 3FD55555, 55555563 */ 1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */ 5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */ 2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */ 8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */ 3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */ 1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */ 5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */ 2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */ 7.81794442939557092300e-05, /* 3F147E88, A03792A6 */ 7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */ -1.85586374855275456654e-05, /* BEF375CB, DB605373 */ 2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */ /* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */ /* pio4 */ 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */ /* pio4lo */ 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */ }; #define pio4 T[14] #define pio4lo T[15] /* INDENT ON */ double __kernel_tan(double x, double y, int iy) { double z, r, v, w, s; int ix, hx; hx = (int)MSAL_HI(x); /* high word of x */ ix = hx & 0x7fffffff; /* high word of |x| */ if (ix < 0x3e300000) { /* x < 2**-28 */ if ((int) x == 0) { /* generate inexact */ if (((ix | (int)MSAL_LO(x)) | (iy + 1)) == 0) { return one / msal_fabs(x); } else { if (iy == 1) { return x; } else { /* compute -1 / (x+y) carefully */ double a, t; z = w = x + y; MSAL_LO(z) = 0; v = y - (z - x); t = a = -one / w; MSAL_LO(t) = 0; s = one + t * z; return t + a * (s + t * v); } } } } if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */ if (hx < 0) { x = -x; y = -y; } z = pio4 - x; w = pio4lo - y; x = z + w; y = 0.0; } z = x * x; w = z * z; /* * Break x^5*(T[1]+x^2*T[2]+...) into * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) */ r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11])))); v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12]))))); s = z * x; r = y + z * (s * (r + v) + y); r += T[0] * s; w = x + r; if (ix >= 0x3FE59428) { v = (double) iy; return (double) (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r))); } if (iy == 1) { return w; } else { /* * if allow error up to 2 ulp, simply return * -1.0 / (x+r) here */ /* compute -1.0 / (x+r) accurately */ double a, t; z = w; MSAL_LO(z) = 0; v = r - (z - x); /* z+v = r+x */ t = a = -1.0 / w; /* a = -1.0/w */ MSAL_LO(t) = 0; s = 1.0 + t * z; return t + a * (s + t * v); } } /* @(#)s_sin.c 1.3 95/01/18 */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* sin(x) * Return sine function of x. * * kernel function: * __kernel_sin ... sine function on [-pi/4,pi/4] * __kernel_cos ... cose function on [-pi/4,pi/4] * __ieee754_rem_pio2 ... argument reduction routine * * Method. * Let S,C and T denote the sin, cos and tan respectively on * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 * in [-pi/4 , +pi/4], and let n = k mod 4. * We have * * n sin(x) cos(x) tan(x) * ---------------------------------------------------------- * 0 S C T * 1 C -S -1/T * 2 -S -C T * 3 -C S -1/T * ---------------------------------------------------------- * * Special cases: * Let trig be any of sin, cos, or tan. * trig(+-INF) is NaN, with signals; * trig(NaN) is that NaN; * * Accuracy: * TRIG(x) returns trig(x) nearly rounded */ #ifdef __STDC__ double msal_sin(double x) #else double msal_sin(x) double x; #endif { double y[2],z=0.0; int n, ix; /* High word of x. */ ix = (int)MSAL_HI(x); /* |x| ~< pi/4 */ ix &= 0x7fffffff; if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0); /* sin(Inf or NaN) is NaN */ else if (ix>=0x7ff00000) return x-x; /* argument reduction needed */ else { n = __ieee754_rem_pio2(x,y); switch(n&3) { case 0: return __kernel_sin(y[0],y[1],1); case 1: return __kernel_cos(y[0],y[1]); case 2: return -__kernel_sin(y[0],y[1],1); default: return -__kernel_cos(y[0],y[1]); } } } /* cos(x) * Return cosine function of x. * * kernel function: * __kernel_sin ... sine function on [-pi/4,pi/4] * __kernel_cos ... cosine function on [-pi/4,pi/4] * __ieee754_rem_pio2 ... argument reduction routine * * Method. * Let S,C and T denote the sin, cos and tan respectively on * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 * in [-pi/4 , +pi/4], and let n = k mod 4. * We have * * n sin(x) cos(x) tan(x) * ---------------------------------------------------------- * 0 S C T * 1 C -S -1/T * 2 -S -C T * 3 -C S -1/T * ---------------------------------------------------------- * * Special cases: * Let trig be any of sin, cos, or tan. * trig(+-INF) is NaN, with signals; * trig(NaN) is that NaN; * * Accuracy: * TRIG(x) returns trig(x) nearly rounded */ #ifdef __STDC__ double msal_cos(double x) #else double msal_cos(x) double x; #endif { double y[2],z=0.0; int n, ix; /* High word of x. */ ix = (int)MSAL_HI(x); /* |x| ~< pi/4 */ ix &= 0x7fffffff; if(ix <= 0x3fe921fb) return __kernel_cos(x,z); /* cos(Inf or NaN) is NaN */ else if (ix>=0x7ff00000) return x-x; /* argument reduction needed */ else { n = __ieee754_rem_pio2(x,y); switch(n&3) { case 0: return __kernel_cos(y[0],y[1]); case 1: return -__kernel_sin(y[0],y[1],1); case 2: return -__kernel_cos(y[0],y[1]); default: return __kernel_sin(y[0],y[1],1); } } } /* tan(x) * Return tangent function of x. * * kernel function: * __kernel_tan ... tangent function on [-pi/4,pi/4] * __ieee754_rem_pio2 ... argument reduction routine * * Method. * Let S,C and T denote the sin, cos and tan respectively on * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 * in [-pi/4 , +pi/4], and let n = k mod 4. * We have * * n sin(x) cos(x) tan(x) * ---------------------------------------------------------- * 0 S C T * 1 C -S -1/T * 2 -S -C T * 3 -C S -1/T * ---------------------------------------------------------- * * Special cases: * Let trig be any of sin, cos, or tan. * trig(+-INF) is NaN, with signals; * trig(NaN) is that NaN; * * Accuracy: * TRIG(x) returns trig(x) nearly rounded */ #ifdef __STDC__ double msal_tan(double x) #else double msal_tan(x) double x; #endif { double y[2],z=0.0; int n, ix; /* High word of x. */ ix = (int)MSAL_HI(x); /* |x| ~< pi/4 */ ix &= 0x7fffffff; if (ix <= 0x3fe921fb) { return __kernel_tan(x,z,1); } /* tan(Inf or NaN) is NaN */ else if (ix>=0x7ff00000) return x-x; /* NaN */ /* argument reduction needed */ else { n = __ieee754_rem_pio2(x,y); return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even -1 -- n odd */ } } /* Your typical Taylor series sine-cosine routine */ #ifdef __STDC__ long double ldbl_sincos(long double xq, long double zq, long double uq, double i) #else long double ldbl_sincos(xq, zq, uq, i) long double xq; long double zq; long double uq; double i; #endif { long double k; int n; long double prev; xq = xq * xq; /* x^2 */ prev = zq; k = 2.0; for(n=30; n>0; n--) { /* char out[40]; f107_spfg(out, sizeof(out), 0, 32, zq); */ /* printf("%2d %s\n", n, out); */ uq = -uq * xq/(k*(k+i)); /* -x^2/2 or -x^3/6 */ zq = zq + uq; if (zq == prev) { n = 0; /* exit loop early */ } else { prev = zq; k += 2.0; } } return zq; } static long double ld_1o2pi = 0.63661977236758134307553; static long double ld_p05 = 1.570796326794896619231; static long double ld_pi = 3.141592653589793238462643; static long double ld_2pi = 6.28318530717958647692529; #ifdef __STDC__ long double msal_sinl(long double x) #else long double msal_sinl(x) long double x; #endif { long i; int neg = 0; /* for x<0, sin(x) = -sin(-x) */ if (x < 0.0) { x = -x; neg = 1; } /* remove factor of 2 pi */ if (x >= ld_2pi) { i = (long) (x * ld_1o2pi); x = x - ld_2pi * ((long double) i); } /* for x in [pi..2pi], sin(x)=-sin(x - pi) */ if (x > ld_pi) { x = x - ld_pi; neg = 1-neg; } /* for x in [pi/2..pi], sin(x)=sin(pi-x) */ if (x > ld_p05) { x = ld_pi - x; } if (neg) { return(- ldbl_sincos(x, x, x, 1.0)); } return(ldbl_sincos(x, x, x, 1.0)); } #ifdef __STDC__ long double msal_cosl(long double x) #else long double msal_cosl(x) long double x; #endif { long i; int neg = 0; /* for x<0, cos(x) = cos(-x) */ if (x < 0.0) { x = -x; } /* remove factor of 2 pi */ if (x >= ld_2pi) { i = (long) (x * ld_1o2pi); x = x - ld_2pi * ((long double) i); } /* for x in [pi..2pi], cos(x)=cos(2 pi - x) */ if (x > ld_pi) { x = ld_2pi - x; } /* for x in [pi/2..pi], cos(x)=-cos(pi-x) */ if (x > ld_p05) { x = ld_pi - x; neg = 1; } if (neg) { return(- ldbl_sincos(x, 1.0, 1.0, -1.0)); } return(ldbl_sincos(x, 1.0, 1.0, -1.0)); } #ifdef __STDC__ long double msal_tanl(long double x) #else long double msal_tanl(x) long double x; #endif { return(msal_sinl(x)/msal_cosl(x)); } /* -------------------------------------------------------------------------. | _ _ _ _ ___ | | | ._ ._ _ |_ _ ._ | | | |_ . . ._ _ | " _ ._ | | | , ,-| | | | | ) (-` | T |/\| | | | | | ( T | ( ) | | | | ~~~ `~` ~ ~ ~ '~ ~' ~ ` ~ ~ ~ ~ ~ ~ ~ ` ~ ~ ~ ~ | | | `------------------------------------------------------------------------- */ /* This code for the Lambert W function is derived from the work by Darko Veberic. This version is heavily adapted, but still has bits in common (most notably the coefficients) with the original. That source code came with this copyright notice: Copyright (C) 2009 Darko Veberic, darko.veberic@ung.si This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* msal_w_approx0 gives an initial estimate for the W_0 or W_-1 branch, suitable over the domain (-1/e, -0.32358) */ #ifdef __STDC__ double msal_w_approx0(double x, int branch) #else double msal_w_approx0(x, branch) double x; int branch; #endif { double esign, p; if (branch == 0) { esign = 1.0; } else { esign = -1.0; } p = esign * sqrt(2.0*(1.0 + MSAL_E*x)); return /* The coefficients are from Veberic's 2010 paper at */ -1 + /* arXiv:1003.1628v1 see the table right after eqn (37) */ p*( 1 + p*(-0.333333333333333333e0 + /* -1/3 */ p*( 0.152777777777777777e0 + /* 11/72 */ p*(-0.79629629629629630e-1 + /* -43/540 */ p*( 0.44502314814814814e-1 + /* 769/17280 */ p*(-0.25984714873603760e-1 + /* -221/8505 */ p*( 0.15635632532333920e-1 /* 680863/43545600 */ ))))))); } /* msal_w0_approx1 gives an initial estimate for the W_0 branch, suitable over the domain (-1/e, 8) */ #ifdef __STDC__ double msal_w0_approx1(double x) #else double msal_w0_approx1(x) double x; #endif { double est, a1, a2, a3, a4, b1, b2, b3, b4; if (x < 0.145) { a1 = 5.931375839364438; a2 = 11.39220550532913; a3 = 7.33888339911111; a4 = 0.653449016991959; b1 = 6.931373689597704; b2 = 16.82349461388016; b3 = 16.43072324143226; b4 = 5.115235195211697; } else { a1 = 2.445053070726557; a2 = 1.343664225958226; a3 = 0.148440055397592; a4 = 0.0008047501729130; b1 = 3.444708986486002; b2 = 3.292489857371952; b3 = 0.916460018803122; b4 = 0.0530686404483322; } est = x * (1.0 + a1*(x + a2 *(x + a3 *(x + a4*x)))) / (1.0 + b1*(x + b2 *(x + b3 *(x + b4*x)))); return est; } /* msal_w0_approx2 gives an initial estimate for the W_0 branch, suitable over the domain (8, inf) */ #ifdef __STDC__ double msal_w0_approx2(double x) #else double msal_w0_approx2(x) double x; #endif { double a, b, ia; a = log(x); b = log(a); ia = 1.0 / a; return a - b + b / a * (1.0 + ia * (0.5*(-2.0 + b) + ia * (1.0/6.0*(6.0 + b*(-9.0 + b*2.0)) + ia * (1.0/12.0*(-12.0 + b*(36.0 + b*(-22.0 + b*3.0))) + ia * 1.0/60.0*(60.0 + b*(-300.0 + b*(350.0 + b*(-125.0 + b*12.0)))) ) ) ) ); } #ifdef __STDC__ double msal_lambertw(double x) #else double msal_lambertw(x) double x; #endif { double a1, a2; double est, ee, s, t, u; int i, mh; if (x < (-1.0/MSAL_E)) { a1 = huge * huge; /* Inf */ a2 = (-1.0 * huge) * huge; /* Ninf */ return (a1+a2); /* Nan */ } mh = 3; if (x < -0.32358170806015724) { est = msal_w_approx0(x, 0); } else if (x < 8.707) { est = msal_w0_approx1(x); if (x < -0.25) { mh = 4; } } else { est = msal_w0_approx2(x); } /* Perform Halley's iteration */ for(i=0; i To: "math-fun mailing list"
Date: Sun, 8 Jan 2012 23:24:37 -0800 Subject: Re: [math-fun] Gamma function (formulas collection) I missed where Warren's paper probably says that, for "unrestricted" (arbitrary precision) numerics, the divergence of the log Gamma and polygamma series is rather irrelevant. A plausible algorithm is simply to add some positive integer n to z so that the Stirling series z+n swoops within the desired accuracy. Then divide by (z+1)(z+2)...(z+n). Windschitl's and Warren's improvements make this even more attractive. Looking to provide Gamma numerics for Robert's RIES, I accidentally overkilled the problem by Remezing up f[z]:=(0.011987912746518014561506488044010 + z^2 (-0.028678142684262583619372798888834 + z^2 (-0.10376325088888750555218325558841 + z^2 (-0.034612435282031905677869242849747 - 0.0024691358024691358024691358024691 z^2))))/(z^7 * (45.984169004371332413538940188341 + z^2 (54.768197084214553421696753890699 + z^2 (14.960893432079977923321408051895 + z^2)))) Then plotting z! /(Sqrt[2 Pi] (E^(-f[z] - 4 z) z^(2 + 6 z) Sinh[1/z]^(2 z))^(1/4) -1 9 < z < oo shows maximum error 6*10^-26 (at only one ripple because I neglected the weight function). So up to nine divides could be required for this method. But RIES only needs 10^-17, for which f[z] could be a lot simpler and n smaller than 9. To squeeze some actual fun out of this: Why is f an odd function? --rwg PS, I wasn't confused in that 2001 Remez posting--just confusing. In the next sentences I explained why truncating the Chebys doesn't minimax. I was just using "Chebychef optimal" to mean the best you could do by subtracting off Chebys. ] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ /* The following constants LG_g and LG_N are the "g" and "n" parameters / for the table of coefficients that follows them; several alternative / sets of coefficients are available at: / http://mrob.com/pub/ries/lanczos-gamma.html */ #if 1 /* A lower-precision version from "Numerical Recipes" */ # define LG_g 5.0 /* Lanczos parameter "g" */ # define LG_N 6 /* Range of coefficients i=[0..N] */ const double lct[LG_N+1] = { 1.000000000190015, 76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5 }; const double lcnum[LG_N+1] = { 8.7919924759596111106039069839910954707405381645486e7, 2.9416541482343776499806562571350221096212266811349e9, 4.100006789687650376061766307985658465119862514162e10, 3.0470491395554593783543149688032484055692798817589e11, 1.2735125543888013032240994786889999327005736243542e12, 2.8381487223293835117356885925163088875090863795676e12, 2.6349245000341171829201220803164421672401218788693e12 }; const double lcden[LG_N+1] = { 8.7919924742890022080764214474411305172689073093183e7, 1.8463184196006904636960485039626374086264705349568e9, 1.5385986830005753864133737533021978405220587791307e10, 6.4621144686024166229361697638692309301926468723489e10, 1.4278195778245339585916108430644395960044705470333e11, 1.5509074724645799895046807433286154232462352493637e11, 6.3302345814880815898150234421576139724336132627092e10 }; #elif 0 /* A version that was used in the GNU scientific library */ # define LG_g 7.0 /* Lanczos parameter "g" */ # define LG_N 8 /* Range of coefficients i=[0..N] */ const double lct[LG_N+1] = { 0.99999999999980993227684700473478, 676.520368121885098567009190444019, -1259.13921672240287047156078755283, 771.3234287776530788486528258894, -176.61502916214059906584551354, 12.507343278686904814458936853, -0.13857109526572011689554707, 9.984369578019570859563e-6, 1.50563273514931155834e-7 }; #elif 0 /* From Godfrey, 2001. He says this version gives 13 digits of accuracy. */ # define LG_g 9.0 /* Lanczos parameter "g" */ # define LG_N 10 /* Range of coefficients i=[0..N] */ const double lct[LG_N+1] = { 1.000000000000000174663, 5716.400188274341379136, -14815.30426768413909044, 14291.49277657478554025, -6348.160217641458813289, 1301.608286058321874105, -108.1767053514369634679, 2.605696505611755827729, -0.7423452510201416151527e-2, 0.5384136432509564062961e-7, -0.4023533141268236372067e-8 }; #else /* From Godfrey, 2001. He says this version gives 15 digits of accuracy. */ # define LG_g 4.7421875 /* Lanczos parameter "g" */ # define LG_N 14 /* Range of coefficients i=[0..N] */ const double lct[LG_N+1] = { 0.99999999999999709182, 57.156235665862923517, -59.597960355475491248, 14.136097974741747174, -0.49191381609762019978, .33994649984811888699e-4, .46523628927048575665e-4, -.98374475304879564677e-4, .15808870322491248884e-3, -.21026444172410488319e-3, .21743961811521264320e-3, -.16431810653676389022e-3, .84418223983852743293e-4, -.26190838401581408670e-4, .36899182659531622704e-5 }; #endif const double ln_sqrt_2_pi = 0.91893853320467274178; const double g_pi = 3.14159265358979323846; /* Compute the logarithm of the Gamma function using the Lanczos method. */ double msal_lanczos_ln_gamma(double z) { double sum, num, den; double base; int i; if (z < 0.5) { /* Use Euler's reflection formula: Gamma(z) = Pi / [Sin[Pi*z] * Gamma[1-z]]; */ return log(g_pi / msal_sin(g_pi * z)) - msal_lanczos_ln_gamma(1.0 - z); } z = z - 1.0; base = z + LG_g + 0.5; /* Base of the Lanczos exponential */ #ifdef GAMMA_LANCZOS_METHOD sum = 0; /* We start with the terms that have the smallest coefficients and largest / denominator. */ for(i=LG_N; i>=1; i--) { sum += lct[i] / (z + ((double) i)); } sum = sum + lct[0]; #else /* Use the Boost C++ library method */ num = lcnum[0]; den = lcden[0]; for(i=1; i<=LG_N; i++) { num = num*z + lcnum[i]; den = den*z + lcden[i]; } sum = num/den; #endif /* This printf is just for debugging printf("ls2p %7g l(b^e) %7g -b %7g l(s) %7g\n", ln_sqrt_2_pi, log(base)*(z+0.5), -base, log(sum)); */ /* Gamma[z] = Sqrt(2*Pi) * sum * base^[z + 0.5] / E^base */ return ((ln_sqrt_2_pi + log(sum)) - base) + log(base)*(z+0.5); } /* Compute the Gamma function, which is e to the power of ln_gamma. */ double msal_lanczos_gamma(double z) { return(exp(msal_lanczos_ln_gamma(z))); } /* The digamma function is the derivative of gammaln. / / Reference: / J Bernardo, / Psi ( Digamma ) Function, / Algorithm AS 103, / Applied Statistics, / Volume 25, Number 3, pages 315-317, 1976. / / From http://www.psc.edu/~burkardt/src/dirichlet/dirichlet.f / (with modifications for negative numbers and extra precision) */ double msal_digamma(double x) { double neginf = MSAL_NINF; static const double c = 12, digamma1 = -0.57721566490153286, trigamma1 = 1.6449340668482264365, /* pi^2/6 */ s = 1e-6, s3 = 1./12, s4 = 1./120, s5 = 1./252, s6 = 1./240, s7 = 1./132; /* static const double s8 = 691./32760, s9 = 1./12, s10 = 3617./8160; */ double result; /* Illegal arguments */ if((x == neginf) || isnan(x)) { return NAN; } /* Singularities */ if((x <= 0) && (floor(x) == x)) { return neginf; } /* Negative values / Use the reflection formula (Jeffrey 11.1.6): / digamma(-x) = digamma(x+1) + pi*cot(pi*x) / / This is related to the identity / digamma(-x) = digamma(x+1) - digamma(z) + digamma(1-z) / where z is the fractional part of x / For example: / digamma(-3.1) = 1/3.1 + 1/2.1 + 1/1.1 + 1/0.1 + digamma(1-0.1) / = digamma(4.1) - digamma(0.1) + digamma(1-0.1) / Then we use / digamma(1-z) - digamma(z) = pi*cot(pi*z) */ if(x < 0) { return msal_digamma(1-x) + g_pi/msal_tan(-g_pi*x); } /* Use Taylor series if argument <= S */ if(x <= s) return digamma1 - 1/x + trigamma1*x; /* Reduce to digamma(X + N) where (X + N) >= C */ result = 0; while(x < c) { result -= 1/x; x++; } /* Use de Moivre's expansion if argument >= C / This expansion can be computed in Maple via asympt(Psi(x),x) */ if(x >= c) { double r = 1/x, t; result += log(x) - 0.5*r; r *= r; /* this version for lame compilers */ t = (s5 - r * (s6 - r * s7)); result -= r * (s3 - r * (s4 - r * t)); } return result; } double msal_dgamma(double z) { return(msal_digamma(z)*msal_lanczos_gamma(z)); } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | Test functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ void msal_test_lambert(void) { double x, w, v, er; x = -0.375; /* Deliberately less than -1/e */ while (x < 1.0e20) { printf("W(%.9g) = ", x); w = msal_lambertw(x); printf("%.17g", w); v = w*exp(w); er = (x-v)/(1.0+x); printf(" err "); printf("%.17g", er); printf("\n"); /* With these increments and limits all the x values will be "round numbers" in binary */ if (x<-0.3125) { x+=0.0078125; } else if (x<2.0) { x+=0.25; } else if (x<=1000.0) { x*=2.0; } else if (x<=30000.0) { x*=5.0; } else { x *= 10.0; } } } void msal_test_lambertl(void) { long double x, w, v, er; long double k1 = 1.0; x = -0.375; /* Deliberately less than -1/e */ while (x < 1.0e20) { printf("W(%.9Lg) = ", x); w = msal_lambertwl(x); printf("%.20Lg", w); v = w*expl(w); er = (x-v)/(k1+x); printf(" err "); printf("%.20Lg", er); printf("\n"); /* With these increments and limits all the x values will be "round numbers" in binary */ if (x<-0.3125) { x+=0.0078125; } else if (x<2.0) { x+=0.25; } else if (x<=1000.0) { x*=2.0; } else if (x<=30000.0) { x*=5.0; } else { x *= 10.0; } } } void msal_test_gamma(void) { double z, g, g2, dz, dg, di; dz = 0.01; for(z=0.5; z<=6.0; z+=0.5) { g = msal_lanczos_gamma(z); g2 = msal_lanczos_gamma(z+dz); dg = (g2-g)/dz; di = (msal_dgamma(z)+msal_dgamma(z+dz))/2.0;; printf("Gamma[%.1f] ~ %15.12g; diff %g, digamma %15.12g\n", z, g, dg, di); } }