// f161_o.cpp // // Copyright (C) 1998-2013 Robert P. Munafo. /* To use in CodeWarrior: Add 'Macintosh HD:Users:myname:shared:proj:libs' to "access paths" Add a "Prefix file" in "C/C++ Language" for each target in the project, and make sure the prefix file defines "__MWCC__" (and "USE_CONS_CLASSIC" if you intend to call pr_raw_double_161, print_f161_a or f161_to_digits) To use in UNIX (makefile): Add '-I/home/myname/shared/proj/libs' to compile line 20070308 Create f161_o from f107 and begin implementing 20070310 First few simple tests pass (able to compute a=1/7, b=a+4/21, and c=b*b and convert each of these to digits) 20070311 Add F161_APPROX switch and implement versions of the 3 main operators (+, -, *) that calculate only 3 overlapping components instead of 4. As expected the times are moderately better. 20070316 Spend a while looking at what appears to be a bug in normalize_43: In 3 of the cases it calls (for example) "s1 = normalize_22(s, t3, e);" and then sets s2 to 0 regardless of whether e is nonzero. I add code to catch these cases and take statistics -- I discover that the discarded amount is always less than s0*2^-161. Also, "fixing" this by putting e into s2 does not cause any apparent change in the appearance of the Mandelbrot images. Add sloppy version of f161_native_square. 20080312 Fix a bug in f161_intpow 20100225 Add f161_sqrt(); add sign parameter to f161_to_digits(). */ #ifdef USE_CONS_CLASSIC # include #else # include #endif #include "f161_o.h" #include // "pf" stands for "primitive float"; it is the floating point type upon // which the library is built. #ifdef f161_pf # undef f161_pf #endif #define f161_pf double // These prototypes shouldn't be necessary but the compiler complains if you // don't have them. f161_o fabs ( const f161_o& z ); // Internal routines void print_f161_a(f161_o a, int cr) { /* print the fields of the f161 structure */ #ifdef USE_CONS_CLASSIC cons_put_str("["); cons_put_f8_p2(a.c0,20,3); cons_put_str("|"); cons_put_f8_p2(a.c1,20,3); cons_put_str("|"); cons_put_f8_p2(a.c2,20,3); cons_put_str("]"); if (cr) { cons_put_str("\n"); } #else printf("["); pr_raw_double_107(a.c0,1,1,1); printf("|"); pr_raw_double_107(a.c1,1,1,1); printf("|"); pr_raw_double_107(a.c2,1,1,1); printf("]"); if (cr) { printf("\n"); } #endif } void add_f161(f161_o a, f161_o b, f161_o & res) { res = a + b; } void sub_f161(f161_o a, f161_o b, f161_o & res) { res = a - b; } void div_f161(f161_o n, f161_o d, f161_o &res) { f161_o est, k1, e2; // Compute a pf approximation of 1/d k1 = 1.0; est.c0 = k1.c0 / d.c0; est.c1 = est.c2 = 0; // Use Newton's method to compute a better approximation // est2 = est1 + est1 * (1.0 - (est1 * denom)) // example: computing 1/5: // est1 .2001 // est1 * denom 1.0005 // 1.0 - % -0.0005 // est1 * % -0.00010005 // est1 + % .19999995 e2 = est + (est * (k1 - (est * d))); est = e2 + (e2 * (k1 - (e2 * d))); // Answer is numerator times reciprocal res = n * est; } // init routine void f161_o_init(void) { } // Conversion operators are all inline // Infix binary operators // most of these (add, sub, mul) are inline /* a x b ~= a0 b0 c + a0 b1 + a1 b0 d e + a0 b2 + a1 b1 + a2 b0 f g h + a1 b2 + a2 b1 j k c0 : : t0 c1,d1,e1 : add_1113 : t1, u2, u3 d2,e2,f2,g2,h2, u2 : add_P6_2 : t2, v3 f3,g3,h3,j3,k3, u3, v3 : + : t3 */ f161_o operator * ( const f161_o & a, const f161_o & b ) { f161_o res; f161_pf b1, d1, d2, e1, e2, f2, g2, h2, t0, t1, u2, t2; #if F161_APPROX t0 = f107_o::mul_112(a.c0, b.c0, b1); d1 = f107_o::mul_112(a.c0, b.c1, d2); e1 = f107_o::mul_112(a.c1, b.c0, e2); f2 = a.c0 * b.c2; g2 = a.c1 * b.c1; h2 = a.c2 * b.c0; t1 = f107_o::add_1112(b1, d1, e1, u2); t2 = d2 + e2 + f2 + g2 + h2 + u2; res.c0 = normalize_33(t0, t1, t2, res.c1, res.c2); #else f161_pf f3, g3, h3, j3, k3, u3, v3, t3; t0 = f107_o::mul_112(a.c0, b.c0, b1); d1 = f107_o::mul_112(a.c0, b.c1, d2); e1 = f107_o::mul_112(a.c1, b.c0, e2); f2 = f107_o::mul_112(a.c0, b.c2, f3); g2 = f107_o::mul_112(a.c1, b.c1, g3); h2 = f107_o::mul_112(a.c2, b.c0, h3); j3 = a.c1 * b.c2; k3 = a.c2 * b.c1; t1 = add_1113(b1, d1, e1, u2, u3); t2 = add_P6_2(d2, e2, f2, g2, h2, u2, v3); t3 = f3 + g3 + h3 + j3 + k3 + u3 + v3; res.c0 = normalize_43(t0, t1, t2, t3, res.c1, res.c2); #endif return res; } f161_o operator * ( f161_pf a, const f161_o & b ) { f161_o res; f161_pf b1, d1, d2, f2, t0, t1, u2, t2; #if F161_APPROX t0 = f107_o::mul_112(a, b.c0, b1); d1 = f107_o::mul_112(a, b.c1, d2); f2 = a * b.c2; t1 = f107_o::add_112(b1, d1, u2); t2 = d2 + f2 + u2; res.c0 = normalize_33(t0, t1, t2, res.c1, res.c2); #else f161_pf t3, f3, g3; t0 = f107_o::mul_112(a, b.c0, b1); d1 = f107_o::mul_112(a, b.c1, d2); f2 = f107_o::mul_112(a, b.c2, f3); t1 = f107_o::add_112(b1, d1, u2); t2 = add_1112(u2, d2, f2, g3); t3 = g3 + f3; res.c0 = normalize_43(t0, t1, t2, t3, res.c1, res.c2); #endif return res; } f161_o operator * ( const f161_o & a, f161_pf b ) { f161_o res; f161_pf t0, t1, t2, b1, d1, d2, f2, u2; #if F161_APPROX t0 = f107_o::mul_112(b, a.c0, b1); d1 = f107_o::mul_112(b, a.c1, d2); f2 = b * a.c2; t1 = f107_o::add_112(b1, d1, u2); t2 = d2 + f2 + u2; res.c0 = normalize_33(t0, t1, t2, res.c1, res.c2); #else f161_pf t3, f3, g3; t0 = f107_o::mul_112(b, a.c0, b1); d1 = f107_o::mul_112(b, a.c1, d2); f2 = f107_o::mul_112(b, a.c2, f3); t1 = f107_o::add_112(b1, d1, u2); t2 = add_1112(u2, d2, f2, g3); t3 = g3 + f3; res.c0 = normalize_43(t0, t1, t2, t3, res.c1, res.c2); #endif return res; } f161_o f161_native_square(f161_o a) { f161_o res; f161_pf b1, d1, d2, f2, g2, u2, t0, t1, t2; #if F161_APPROX t0 = f107_o::square_12(a.c0, b1); d1 = f107_o::mul_112(a.c0, a.c1, d2); d1 = 2.0 * d1; d2 = 2.0 * d2; f2 = 2.0 * a.c0 * a.c2; g2 = a.c1 * a.c1; t1 = f107_o::add_112(b1, d1, u2); t2 = d2 + f2 + g2 + u2; res.c0 = normalize_33(t0, t1, t2, res.c1, res.c2); #else f161_pf f3, g3, j3, v3, t3; t0 = square_12(a.c0, b1); d1 = f107_o::mul_112(a.c0, a.c1, d2); d1 = 2.0 * d1; d2 = 2.0 * d2; f2 = f107_o::mul_112(a.c0, a.c2, f3); f2 = 2.0 * f2; f3 = 2.0 * f3; g2 = square_12(a.c1, g3); j3 = 2.0 * a.c1 * a.c2; t1 = f107_o::add_112(b1, d1, u2); t2 = add_P4_2(d2, f2, g2, u2, v3); t3 = f3 + g3 + j3 + v3; res.c0 = normalize_43(t0, t1, t2, t3, res.c1, res.c2); #endif return res; } f161_o operator / ( const f161_o & numer, const f161_o & denom ) { f161_o res; div_f161(numer, denom, res); return res; } // Note we want to use full precision because this operator is often used // to calculate reciprocals of precisely representable values, e.g. // computing the exact value of 1/3 f161_o operator / ( const f161_o & lhs, f161_pf rhs ) { f161_o res; f161_o b; b.c0 = rhs; b.c1 = 0; b.c2 = 0; div_f161(lhs, b, res); return res; } f161_o operator / ( f161_pf lhs, const f161_o & rhs ) { f161_o res; f161_o a; a.c0 = lhs; a.c1 = 0; a.c2 = 0; div_f161(a, rhs, res); return res; } // unary operators are inline // Modify-assign operators f161_o & f161_o :: operator += ( f161_o & rhs ) { *this = *this + rhs; // add_f161(*this, rhs, *this); return *this; } f161_o & f161_o :: operator += ( f161_pf rhs ) { f161_o b; b.c0 = rhs; b.c1 = 0; b.c2 = 0; *this = *this + b; // add_f161(*this, b, *this); return *this; } f161_o & f161_o :: operator -= ( f161_o & rhs ) { *this = *this - rhs; // sub_f161(*this, rhs, *this); return *this; } f161_o & f161_o :: operator -= ( f161_pf rhs ) { f161_o b; b.c0 = rhs; b.c1 = 0; b.c2 = 0; *this = *this - b; // sub_f161(*this, b, *this); return *this; } // %%% it would be nice to add <<= and >>= operators to perform // the scale_2p function. // Comparison (relational) operators -- many are inlined int operator >= ( const f161_o & lhs, const f161_o & rhs ) { int res; res = cmp_f161(lhs, rhs); res = (res >= 0); return res; } int operator >= ( const f161_o & lhs, f161_pf rhs ) { f161_o r; int res; r = rhs; res = (lhs >= r); return res; } int operator >= ( f161_pf lhs, const f161_o & rhs ) { f161_o l; int res; l = lhs; res = (l >= rhs); return res; } int operator <= ( const f161_o & lhs, const f161_o & rhs ) { int res; res = cmp_f161(lhs, rhs); res = (res <= 0); return res; } int operator <= ( const f161_o & lhs, f161_pf rhs ) { f161_o r; int res; r = rhs; res = (lhs <= r); return res; } int operator <= ( f161_pf lhs, const f161_o & rhs ) { f161_o l; int res; l = lhs; res = (l <= rhs); return res; } int operator != ( const f161_o & lhs, const f161_o & rhs ) { int res; res = cmp_f161(lhs, rhs); res = (res != 0); return res; } int operator != ( const f161_o & lhs, f161_pf rhs ) { return ((lhs.c0 != rhs) || (lhs.c1 != 0) || (lhs.c2 != 0)); } int operator != ( f161_pf lhs, const f161_o & rhs ) { return ((lhs != rhs.c0) || (rhs.c1 != 0) || (rhs.c2 != 0)); } // Functions f161_o fabs ( const f161_o & z ) { f161_o rv; if (z.c0 < 0.0) { rv.c0 = -(z.c0); rv.c1 = -(z.c1); rv.c2 = -(z.c2); } else if (z.c0 > 0.0) { rv = z; } else if (z.c1 < 0.0) { rv.c0 = -(z.c0); rv.c1 = -(z.c1); rv.c2 = -(z.c2); } else if (z.c1 > 0.0) { rv = z; } else if (z.c2 < 0.0) { rv.c0 = -(z.c0); rv.c1 = -(z.c1); rv.c2 = -(z.c2); } else { rv = z; } return rv; } f161_o trunc ( const f161_o& z ) { f161_o rv; rv.c0 = trunc(z.c0); rv.c1 = trunc(z.c1); rv.c2 = trunc(z.c2); return rv; } // Continued-fraction approximation void cfa(f161_o x, f161_s16 order, f161_o *n, f161_o *d) { *n = trunc(x); *d = 1.0; if (order > 0) { f161_o n1, d1, x1, x2, x3; *n = trunc(x); x1 = x - (*n); x3 = 1.0; x2 = x3 / x1; cfa(x2, order-1, &n1, &d1); *n = (n1 * (*n)) + d1; *d = n1; } } f161_o f161_intpow(f161_o x, int p) { int i; f161_o rv; f161_o pow2; if (p > 0) { rv = 1.0; pow2 = x; i = p; while(i > 0) { if (i & 1) { rv = rv * pow2; } i >>= 1; pow2 = pow2 * pow2; } } else { rv = 1.0; } return(rv); } /* Square root, using the trusty old Babylonian method. */ f161_o f161_sqrt(f161_o x) { f161_o a, b; if (x <= 0.0) { a = 0.0; } else { a = (f161_o) sqrt(x.c0); b = x / a; a = (a + b) / 2.0; b = x / a; a = (a + b) / 2.0; b = x / a; a = (a + b) / 2.0; } return(a); } void f161_to_digits(f161_o x, char *s, int *expn, int *sign, int precision) { int D = precision + 1; /* number of digits to compute */ f161_o r; f161_o p; int e; /* exponent */ int i, d; int sgn; sgn = 0; if (x < 0.0) { x = - x; sgn = 1; } if (sign) { *sign = sgn; } r = fabs(x); if (x.c0 == 0.0) { /* x == 0.0 */ if (expn) { *expn = 0; } for (i = 0; i < precision; i++) s[i] = '0'; return; } /* First determine the (approximate) exponent. */ e = (int) (floor(log10(fabs(x.c0)))); if (e < -300) { r = r * f161_intpow(f161_o(10.0), 300); p = f161_intpow(f161_o(10.0), (e + 300)); r = r / p; } else if (e > 0) { p = f161_intpow(f161_o(10.0), e); r = r / p; } else if (e < 0) { p = f161_intpow(f161_o(10.0), -e); r = r * p; } /* Fix exponent if we are off by one */ if (r >= 10.0) { r = r / 10.0; e++; } else if (r < 1.0) { r = r * 10.0; e--; } if (r >= 10.0 || r < 1.0) { #ifdef USE_CONS_CLASSIC cons_put_str("f161_to_digits: can't compute exponent.\n"); #else printf("f161_to_digits: can't compute exponent.\n"); #endif return; } /* Extract the digits */ for (i = 0; i < D; i++) { d = ((int) (r.c0)); r = r - ((f161_o) d); r = r * ((f161_o) 10.0); s[i] = (char) (d + '0'); } /* Fix negative digits. */ for (i = D-1; i > 0; i--) { if (s[i] < '0') { s[i-1]--; s[i] += 10; } } if (s[0] <= '0') { #ifdef USE_CONS_CLASSIC cons_put_str("f161_to_digits: non-positive leading digit.\n"); #else printf("f161_o_to_digits: non-positive leading digit.\n"); #endif return; } /* Round, handle carry */ if (s[D-1] >= '5') { s[D-2]++; i = D-2; while (i > 0 && s[i] > '9') { s[i] -= 10; s[--i]++; } } /* If first digit is 10, shift everything. */ if (s[0] > '9') { e++; for (i = precision; i >= 2; i--) s[i] = s[i-1]; s[0] = '1'; s[1] = '0'; } s[precision] = 0; if (expn) *expn = e; } /* f161_spf prints an f161_o into a string, using the type of format I usually want for the center and size coordinates in a Mandelbrot view. For a given precision, all output will have the same number of digits, including 0 digits. For example, with precision 5: 1.3333, 0.3333, 0.0333. The parameters are: char * s1 - output string. Must have enough space for requested precision, plus 3 more characters for the sign, decimal point and terminating null. sign_flag - pass in '+' if you want explicit + signs precision - number of digits to generate, *including* leading 0's if the number is in the range (-1.0..1.0) f161_o x - the number to format. DIGMAX_F161_SPF is 64 because in some situations, notably midgets on the spike (R2t option of tmz-core) we can get twice as many digits as the next-lower precision, i.e. 2*32 in this case because the next lower precision (f107) has 107*log(2)/log(10) = 32 digits of precision. */ #define DIGMAX_F161_SPF 64 void f161_spf(char *s1, char sign_flag, int precision, f161_o x) { char *s; int sign; int exponent; int i; char s2[DIGMAX_F161_SPF+5]; char s3[DIGMAX_F161_SPF+5]; if (precision > DIGMAX_F161_SPF) { precision = DIGMAX_F161_SPF; } else if (precision < 1) { precision = 1; } s = s1; f161_to_digits(x, s2, &exponent, &sign, precision); f107_strncpy(s3, s2, DIGMAX_F161_SPF+4); s3[DIGMAX_F161_SPF+4] = 0; if (sign) { *s++ = '-'; } else if (sign_flag == '+') { *s++ = '+'; } if ((exponent > 0) && (exponent < precision)) { i = 0; *s++ = s2[i++]; while (exponent) { *s++ = s2[i++]; exponent--; } *s++ = '.'; for(; i