// f215_o.h // // Copyright (C) 1998-2013 Robert P. Munafo. // // float_215 type for C++ // // This is a high-precision float type built on four IEEE 64-bit doubles. // For more info see f215_o.cpp // // 20100909 Eliminate a warning. // 20100915 Add explicit downconvert routine f107() #ifndef _f215_o_ #define _f215_o_ #ifndef F215_APPROX # define F215_APPROX 1 #endif #ifdef __MWCC__ //# include # include # include #else # ifdef __GNUC__ //# include "my-includes/rpm_types.h" # include "f107_o.h" # include "f161_o.h" # else //# include "::my-includes:rpm_types.h" # include "f107_o.h" # include "f161_o.h" # endif #endif // "pf" stands for "primitive float"; it is the floating point type upon // which the library is built. #ifdef f215_pf # undef f215_pf #endif #define f215_pf double class f215_o { private: public: f215_pf c0; f215_pf c1; f215_pf c2; f215_pf c3; friend void print_f215_a(f215_o a, int cr); friend void add_f215(f215_o a, f215_o b, f215_o &res); friend void sub_f215(f215_o a, f215_o b, f215_o &res); friend void div_f215(f215_o numer, f215_o denom, f215_o &res); friend f107_s16 cmp_f215(f215_o a, f215_o b); f215_o ( ); f215_o ( f215_pf x ); f215_o ( const f215_o & ); operator short ( ); operator long ( ); operator float ( ); operator double ( ); operator long double ( ); operator f107_o ( ); operator f161_o ( ); friend f161_o f161( f215_o x); friend f107_o f107( f215_o x); f215_o & operator = ( const f215_o & ); f215_o & operator = ( f215_pf ); f215_o & operator = ( const f107_o & ); f215_o & operator = ( const f161_o & ); friend f215_o operator + ( const f215_o &, const f215_o & ); friend f215_o operator + ( f215_pf, const f215_o & ); friend f215_o operator + ( const f215_o &, f215_pf ); friend f215_o operator - ( const f215_o &, const f215_o & ); friend f215_o operator - ( f215_pf, const f215_o & ); friend f215_o operator - ( const f215_o &, f215_pf ); friend f215_o operator * ( const f215_o &, const f215_o & ); friend f215_o operator * ( f215_pf, const f215_o & ); friend f215_o operator * ( const f215_o &, f215_pf ); friend f215_o operator / ( const f215_o &, const f215_o & ); friend f215_o operator / ( f215_pf, const f215_o & ); friend f215_o operator / ( const f215_o &, f215_pf ); friend f215_o operator - ( const f215_o & ); f215_o & operator += ( f215_o & ); f215_o & operator += ( f215_pf ); f215_o & operator -= ( f215_o & ); f215_o & operator -= ( f215_pf ); friend int operator < ( const f215_o &, const f215_o & ); friend int operator < ( f215_pf, const f215_o & ); friend int operator < ( const f215_o &, f215_pf ); friend int operator << ( const f215_o &, const f215_o & ); friend int operator << ( f215_pf, const f215_o & ); friend int operator << ( const f215_o &, f215_pf ); friend int operator > ( const f215_o &, const f215_o & ); friend int operator > ( f215_pf, const f215_o & ); friend int operator > ( const f215_o &, f215_pf ); friend int operator >> ( const f215_o &, const f215_o & ); friend int operator >> ( f215_pf, const f215_o & ); friend int operator >> ( const f215_o &, f215_pf ); friend int operator >= ( const f215_o &, const f215_o & ); friend int operator >= ( const f215_o &, f215_pf ); friend int operator >= ( f215_pf, const f215_o & ); friend int operator <= ( const f215_o &, const f215_o & ); friend int operator <= ( const f215_o &, f215_pf ); friend int operator <= ( f215_pf, const f215_o & ); friend int operator != ( const f215_o &, const f215_o & ); friend int operator != ( f215_pf, const f215_o & ); friend int operator != ( const f215_o &, f215_pf ); friend f215_o square ( const f215_o& z ); friend f215_o trunc ( const f215_o& z ); friend f215_o fabs ( const f215_o& z ); friend void f215_to_digits(f215_o x, char *s, int *expn, int *sign, int precision); }; inline f215_o::f215_o ( ): c0(0), c1(0), c2(0), c3(0) { } void f215_o_init(void); // f107_o cv_f215_f107( const f215_o & ); f215_o f215_native_2x ( const f215_o & ); inline f215_o f215_native_2x( const f215_o & x ) { f215_o res; res.c0 = x.c0 * 2.0; res.c1 = x.c1 * 2.0; res.c2 = x.c2 * 2.0; res.c3 = x.c3 * 2.0; return(res); } f215_o f215_native_square(f215_o a); void cfa(f215_o x, f107_s16 order, f215_o *n, f215_o *d); void pr_raw_double_215(double d, int sexp, int hm, int mant); void pr_mant_comp(double mc); f215_o f215_sqrt(f215_o x); f215_o f215_intpow(f215_o x, int p); void f215_spf(char *s1, char sign_flag, int precision, f215_o x); // Assignment operators inline f215_o & f215_o :: operator = ( f215_pf x) { c0 = x; c1 = 0; c2 = 0; c3 = 0; return *this; } inline f215_o & f215_o :: operator = ( const f107_o & x) { c0 = x.c0; c1 = x.c1; c2 = 0; c3 = 0; return *this; } inline f215_o & f215_o :: operator = ( const f161_o & x) { c0 = x.c0; c1 = x.c1; c2 = x.c2; c3 = 0; return *this; } inline f215_o & f215_o :: operator = ( const f215_o & x) { c0 = x.c0; c1 = x.c1; c2 = x.c2; c3 = x.c3; return *this; } // Conversion operators: DEMOTION inline f215_o :: operator short ( ) { short result; result = (short) c0; return result; } inline f215_o :: operator long ( ) { long result; // The assumption here is that "long" is less than 107 bits. result = ((long) c0) + ((long) c1); return result; } inline f215_o :: operator float ( ) { float result; result = c0; return result; } inline f215_o :: operator double ( ) { double result; result = c0; return result; } inline f215_o :: operator long double ( ) { long double result; // The assumption here is that "long double" is less than 107 bits. result = ((long double) c0) + ((long double) c1); return result; } inline f215_o :: operator f107_o ( ) { f107_o result; result.c0 = c0; result.c1 = c1; return result; } inline f215_o :: operator f161_o ( ) { f161_o result; result.c0 = c0; result.c1 = c1; result.c2 = c2; return result; } inline f161_o f161(f215_o x) { f161_o result; result.c0 = x.c0; result.c1 = x.c1; result.c2 = x.c2; return result; } inline f107_o f107(f215_o x) { f107_o result; result.c0 = x.c0; result.c1 = x.c1; return result; } // Conversion operators: PROMOTION inline f215_o::f215_o ( f215_pf x ): c0(x), c1(0.0), c2(0.0), c3(0.0) { } inline f215_o::f215_o ( const f215_o & x ): c0(x.c0), c1(x.c1), c2(x.c2), c3(x.c3) { } // -------------- ADDITION and SUBTRACTION PRIMITIVES ---------------- /* This is a modified version of RENORMALIZE that reduces 4 components to 4. Here is pseudocode based on that in the comment for normalize_43: inline f215_pf normalize_44(f215_pf x0, f215_pf x1, f215_pf x2, f215_pf x3, f215_pf &s1, f215_pf &s2, f215_pf &s3) { f215_pf s, e, t3, t2, t1; f215_pf S[3]; int k; s = normalize_22(x2, x3, t3); s = normalize_22(x1, s, t2); s = normalize_22(x0, s, t1); k = 0; S[0] = S[1] = S[2] = S[3] = 0; e = t1; if (e != 0) { S[k] = s; s = e; k=k+1; } s = normalize_22(s, t2, e); if (e != 0) { S[k] = s; s = e; k=k+1; } s = normalize_22(s, t3, e); S[k] = s; S[k+1] = e; s1 = S[1]; s2 = S[2]; s3 = S[3]; return S[0]; } */ inline f215_pf normalize_44(f215_pf x0, f215_pf x1, f215_pf x2, f215_pf x3, f215_pf &s1, f215_pf &s2, f215_pf &s3) { f215_pf s, e, t3, t2, s0; s = f107_o::normalize_22(x2, x3, t3); s = f107_o::normalize_22(x1, s, t2); s = f107_o::normalize_22(x0, s, e); // k = 0 if (e != 0) { // cases T-*-* // k = 0 s0 = s; s = e; // k = 1 s = f107_o::normalize_22(s, t2, e); if (e != 0) { // cases T-T-* // k = 1 s1 = s; s = e; // k = 2 s = f107_o::normalize_22(s, t3, e); s2 = s; s3 = e; } else { // cases T-F-* // k = 1 s = f107_o::normalize_22(s, t3, e); s1 = s; s2 = e; s3 = 0; } } else { // cases F-*-* // k = 0 s = f107_o::normalize_22(s, t2, e); if (e != 0) { // cases F-T-* // k = 0 s0 = s; s = e; // k = 1 s = f107_o::normalize_22(s, t3, e); s1 = s; s2 = e; s3 = 0; } else { // cases F-F-* // k = 0 s = f107_o::normalize_22(s, t3, e); s0 = s; s1 = e; s2 = s3 = 0; } } return s0; } // -------------- ADDITION and SUBTRACTION OPERATORS ---------------- inline f215_o operator + ( const f215_o & a, const f215_o & b ) { f215_o res; f215_pf t0, t1, t2, u, v, w, x, y2, y3; f215_pf t3, z3, b3; #if F215_APPROX t0 = f107_o::add_112(a.c0, b.c0, u); v = f107_o::add_112(a.c1, b.c1, w); y2 = f107_o::add_112(a.c2, b.c2, y3); z3 = a.c3 + b.c3; t1 = f107_o::add_112(u, v, x); t2 = f107_o::add_1112(w, x, y2, b3); t3 = b3 + y3 + z3; res.c0 = normalize_44(t0, t1, t2, t3, res.c1, res.c2, res.c3); #else #endif return res; } inline f215_o operator + ( const f215_o & a, f215_pf b ) { f215_o res; f215_pf x0, x1, x2, t, x3; #if F215_APPROX x0 = f107_o::add_112(a.c0, b, t); x1 = f107_o::add_112(a.c1, t, t); x2 = f107_o::add_112(a.c2, t, t); x3 = a.c3 + t; res.c0 = normalize_44(x0, x1, x2, x3, res.c1, res.c2, res.c3); #else #endif return res; } inline f215_o operator + ( f215_pf b, const f215_o & a ) { f215_o res; f215_pf x0, x1, x2, t, x3; #if F215_APPROX x0 = f107_o::add_112(a.c0, b, t); x1 = f107_o::add_112(a.c1, t, t); x2 = f107_o::add_112(a.c2, t, t); x3 = a.c3 + t; res.c0 = normalize_44(x0, x1, x2, x3, res.c1, res.c2, res.c3); #else #endif return res; } inline f215_o operator - ( const f215_o & a, const f215_o & b ) { f215_o res; f215_pf t0, t1, t2, t3, u, v, w, x, y2; f215_pf y3, z3, b3; #if F215_APPROX t0 = f107_o::sub_112(a.c0, b.c0, u); v = f107_o::sub_112(a.c1, b.c1, w); y2 = f107_o::sub_112(a.c2, b.c2, y3); z3 = a.c3 - b.c3; t1 = f107_o::add_112(u, v, x); t2 = f107_o::add_1112(w, x, y2, b3); t3 = b3 + y3 + z3; res.c0 = normalize_44(t0, t1, t2, t3, res.c1, res.c2, res.c3); #else #endif return res; } inline f215_o operator - ( const f215_o & a, f215_pf b ) { f215_o res; f215_pf x0, x1, x2, x3, t; #if F215_APPROX x0 = f107_o::sub_112(a.c0, b, t); x1 = f107_o::add_112(a.c1, t, t); x2 = f107_o::add_112(a.c2, t, t); x3 = a.c3 + t; res.c0 = normalize_44(x0, x1, x2, x3, res.c1, res.c2, res.c3); #else #endif return res; } inline f215_o operator - ( f215_pf b, const f215_o & a ) { f215_o res; f215_pf x0, x1, x2, x3, t; #if F215_APPROX x0 = f107_o::add_112(-(a.c0), b, t); x1 = f107_o::add_112(-(a.c1), t, t); x2 = f107_o::add_112(-(a.c2), t, t); x3 = t - a.c3; res.c0 = normalize_44(x0, x1, x2, x3, res.c1, res.c2, res.c3); #else #endif return res; } // ----------- comparison inline f107_s16 cmp_f215(f215_o a, f215_o b) { f107_s16 result; if (a.c0 > b.c0) { result = 1; } else if (a.c0 < b.c0) { result = -1; } else { if (a.c1 > b.c1) { result = 1; } else if (a.c1 < b.c1) { result = -1; } else { if (a.c2 > b.c2) { result = 1; } else if (a.c2 < b.c2) { result = -1; } else { if (a.c3 > b.c3) { result = 1; } else if (a.c3 < b.c3) { result = -1; } else { result = 0; } } } } return(result); } // Comparison (relational) operators inline int operator < ( const f215_o & lhs, const f215_o & rhs ) { return(cmp_f215(lhs, rhs) < 0); } inline int operator << ( const f215_o & lhs, const f215_o & rhs ) { return(lhs.c0 < rhs.c0); } inline int operator < ( const f215_o & lhs, f215_pf rhs ) { return ((lhs.c0 < rhs) || ((lhs.c0 == rhs) && (lhs.c1 < 0)) || ((lhs.c0 == rhs) && (lhs.c1 == 0) && (lhs.c2 < 0)) || ((lhs.c0 == rhs) && (lhs.c1 == 0) && (lhs.c2 == 0) && (lhs.c3 < 0)) ); } inline int operator << ( const f215_o & lhs, f215_pf rhs ) { return(lhs.c0 < rhs); } inline int operator < ( f215_pf lhs, const f215_o & rhs ) { return ((lhs < rhs.c0) || ((lhs == rhs.c0) && (rhs.c1 > 0)) || ((lhs == rhs.c0) && (rhs.c1 == 0) && (rhs.c2 > 0)) || ((lhs == rhs.c0) && (rhs.c1 == 0) && (rhs.c2 == 0) && (rhs.c3 > 0)) ); } inline int operator << ( f215_pf lhs, const f215_o & rhs ) { return (lhs < rhs.c0); } inline int operator > ( const f215_o & lhs, const f215_o & rhs ) { return (cmp_f215(lhs, rhs) > 0); } inline int operator >> ( const f215_o & lhs, const f215_o & rhs ) { return (lhs.c0 > rhs.c0); } inline int operator > ( f215_pf lhs, const f215_o & rhs ) { return ((lhs > rhs.c0) || ((lhs == rhs.c0) && (rhs.c1 < 0)) || ((lhs == rhs.c0) && (rhs.c1 == 0) && (rhs.c2 < 0)) || ((lhs == rhs.c0) && (rhs.c1 == 0) && (rhs.c2 == 0) && (rhs.c3 < 0)) ); } inline int operator >> ( f215_pf lhs, const f215_o & rhs ) { return (lhs > rhs.c0); } inline int operator > ( const f215_o & lhs, f215_pf rhs ) { return ((lhs.c0 > rhs) || ((lhs.c0 == rhs) && (lhs.c1 > 0)) || ((lhs.c0 == rhs) && (lhs.c1 == 0) && (lhs.c2 > 0)) || ((lhs.c0 == rhs) && (lhs.c1 == 0) && (lhs.c2 == 0) && (lhs.c3 > 0)) ); } inline int operator >> ( const f215_o & lhs, f215_pf rhs ) { return (lhs.c0 > rhs); } // ------------ building blocks for multiply // unary operators inline f215_o operator - ( const f215_o & lhs ) { f215_o res; res.c0 = -(lhs.c0); res.c1 = -(lhs.c1); res.c2 = -(lhs.c2); res.c3 = -(lhs.c3); return res; } #undef f215_pf #endif /* end of f215_o.h */