diff --git a/3rdparty/softfloat/f2xm1.c b/3rdparty/softfloat/f2xm1.c new file mode 100644 index 00000000000..bd5db4471a7 --- /dev/null +++ b/3rdparty/softfloat/f2xm1.c @@ -0,0 +1,270 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#define FLOAT128 + +#define USE_estimateDiv128To64 +#include "mamesf.h" +#include "softfloat.h" +#include "fpu_constant.h" + +#define packFloat_128(zHi, zLo) {(zHi), (zLo)} +#define PACK_FLOAT_128(hi,lo) packFloat_128(LIT64(hi),LIT64(lo)) + +static const floatx80 floatx80_negone = packFloatx80(1, 0x3fff, 0x8000000000000000U); +static const floatx80 floatx80_neghalf = packFloatx80(1, 0x3ffe, 0x8000000000000000U); +static const float128 float128_ln2 = + packFloat_128(0x3ffe62e42fefa39eU, 0xf35793c7673007e6U); + +#define LN2_SIG_HI 0xb17217f7d1cf79abU +#define LN2_SIG_LO 0xc000000000000000U /* 67-bit precision */ + +#define EXP_ARR_SIZE 15 + +static float128 exp_arr[EXP_ARR_SIZE] = +{ + PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */ + PACK_FLOAT_128(0x3ffe000000000000, 0x0000000000000000), /* 2 */ + PACK_FLOAT_128(0x3ffc555555555555, 0x5555555555555555), /* 3 */ + PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */ + PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */ + PACK_FLOAT_128(0x3ff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */ + PACK_FLOAT_128(0x3ff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */ + PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */ + PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */ + PACK_FLOAT_128(0x3fe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */ + PACK_FLOAT_128(0x3fe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */ + PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */ + PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */ + PACK_FLOAT_128(0x3fda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */ + PACK_FLOAT_128(0x3fd6ae7f3e733b81, 0xf11d8656b0ee8cb0) /* 15 */ +}; + +#define EXP_BIAS 0x3FFF + +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +INLINE bits64 extractFloatx80Frac( floatx80 a ) +{ + return a.low; + +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +INLINE int32 extractFloatx80Exp( floatx80 a ) +{ + return a.high & 0x7FFF; + +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the extended double-precision floating-point value +| `a'. +*----------------------------------------------------------------------------*/ + +INLINE flag extractFloatx80Sign( floatx80 a ) +{ + return a.high>>15; + +} + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal extended double-precision floating-point value +| represented by the denormalized significand `aSig'. The normalized exponent +| and significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +INLINE void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr) +{ + int shiftCount = countLeadingZeros64(aSig); + *zSigPtr = aSig< C * x q(x) = > C * x +// -- 2k -- 2k+1 +// +// f(x) ~ [ p(x) + x * q(x) ] +// + +static float128 EvalPoly(float128 x, float128 *arr, unsigned n) +{ + float128 x2 = float128_mul(x, x); + unsigned i; + + assert(n > 1); + + float128 r1 = arr[--n]; + i = n; + while(i >= 2) { + r1 = float128_mul(r1, x2); + i -= 2; + r1 = float128_add(r1, arr[i]); + } + if (i) r1 = float128_mul(r1, x); + + float128 r2 = arr[--n]; + i = n; + while(i >= 2) { + r2 = float128_mul(r2, x2); + i -= 2; + r2 = float128_add(r2, arr[i]); + } + if (i) r2 = float128_mul(r2, x); + + return float128_add(r1, r2); +} + +/* required -1 < x < 1 */ +static float128 poly_exp(float128 x) +{ +/* + // 2 3 4 5 6 7 8 9 + // x x x x x x x x x + // e - 1 ~ x + --- + --- + --- + --- + --- + --- + --- + --- + ... + // 2! 3! 4! 5! 6! 7! 8! 9! + // + // 2 3 4 5 6 7 8 + // x x x x x x x x + // = x [ 1 + --- + --- + --- + --- + --- + --- + --- + --- + ... ] + // 2! 3! 4! 5! 6! 7! 8! 9! + // + // 8 8 + // -- 2k -- 2k+1 + // p(x) = > C * x q(x) = > C * x + // -- 2k -- 2k+1 + // k=0 k=0 + // + // x + // e - 1 ~ x * [ p(x) + x * q(x) ] + // +*/ + float128 t = EvalPoly(x, exp_arr, EXP_ARR_SIZE); + return float128_mul(t, x); +} + +// ================================================= +// x +// FX2M1 Compute 2 - 1 +// ================================================= + +// +// Uses the following identities: +// +// 1. ---------------------------------------------------------- +// x x*ln(2) +// 2 = e +// +// 2. ---------------------------------------------------------- +// 2 3 4 5 n +// x x x x x x x +// e = 1 + --- + --- + --- + --- + --- + ... + --- + ... +// 1! 2! 3! 4! 5! n! +// + +floatx80 f2xm1(floatx80 a) +{ + bits64 zSig0, zSig1, zSig2; + + bits64 aSig = extractFloatx80Frac(a); + sbits32 aExp = extractFloatx80Exp(a); + int aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((bits64) (aSig<<1)) + return propagateFloatx80NaN(a); + + return (aSign) ? floatx80_negone : a; + } + + if (aExp == 0) { + if (aSig == 0) return a; + float_raise(float_flag_denormal | float_flag_inexact); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + + tiny_argument: + mul128By64To192(LN2_SIG_HI, LN2_SIG_LO, aSig, &zSig0, &zSig1, &zSig2); + if (0 < (sbits64) zSig0) { + shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); + --aExp; + } + return + roundAndPackFloatx80(80, aSign, aExp, zSig0, zSig1); + } + + float_raise(float_flag_inexact); + + if (aExp < 0x3FFF) + { + if (aExp < EXP_BIAS-68) + goto tiny_argument; + + /* ******************************** */ + /* using float128 for approximation */ + /* ******************************** */ + + float128 x = floatx80_to_float128(a); + x = float128_mul(x, float128_ln2); + x = poly_exp(x); + return float128_to_floatx80(x); + } + else + { + if (a.high == 0xBFFF && ! (aSig<<1)) + return floatx80_neghalf; + + return a; + } +} diff --git a/3rdparty/softfloat/fpatan.c b/3rdparty/softfloat/fpatan.c index a9a0afabf4f..db4e1f983d2 100644 --- a/3rdparty/softfloat/fpatan.c +++ b/3rdparty/softfloat/fpatan.c @@ -372,7 +372,7 @@ return_PI_or_ZERO: approximation_completed: if (swap) x = float128_sub(float128_pi2, x); floatx80 result = float128_to_floatx80(x); - if (zSign) floatx80_chs(result); + if (zSign) result = floatx80_chs(result); int rSign = extractFloatx80Sign(result); if (!bSign && rSign) return floatx80_add(result, floatx80_pi); diff --git a/3rdparty/softfloat/fyl2x.c b/3rdparty/softfloat/fyl2x.c index 44c3e7610bc..ed75e3e569c 100644 --- a/3rdparty/softfloat/fyl2x.c +++ b/3rdparty/softfloat/fyl2x.c @@ -251,7 +251,7 @@ static float128 poly_l2p1(float128 x) // 1-u 3 5 7 2n+1 // -static floatx80 fyl2x(floatx80 a, floatx80 b) +floatx80 fyl2x(floatx80 a, floatx80 b) { uint64_t aSig = extractFloatx80Frac(a); int32_t aExp = extractFloatx80Exp(a); diff --git a/scripts/src/3rdparty.lua b/scripts/src/3rdparty.lua index 491450f60a3..abad8719c38 100644 --- a/scripts/src/3rdparty.lua +++ b/scripts/src/3rdparty.lua @@ -211,6 +211,7 @@ end MAME_DIR .. "3rdparty/softfloat/fsincos.c", MAME_DIR .. "3rdparty/softfloat/fpatan.c", MAME_DIR .. "3rdparty/softfloat/fyl2x.c", + MAME_DIR .. "3rdparty/softfloat/f2xm1.c", } diff --git a/src/devices/cpu/i386/x87ops.hxx b/src/devices/cpu/i386/x87ops.hxx index 6579778de87..c968f65a2b0 100644 --- a/src/devices/cpu/i386/x87ops.hxx +++ b/src/devices/cpu/i386/x87ops.hxx @@ -2381,12 +2381,9 @@ void i386_device::x87_f2xm1(uint8_t modrm) } else { - // TODO: Inaccurate - double x = fx80_to_double(ST(0)); - double res = pow(2.0, x) - 1; - result = double_to_fx80(res); + extern floatx80 f2xm1(floatx80 a); + result = f2xm1(ST(0)); } - if (x87_check_exceptions()) { x87_write_stack(0, result, true); @@ -2412,7 +2409,6 @@ void i386_device::x87_fyl2x(uint8_t modrm) else { floatx80 x = ST(0); - floatx80 y = ST(1); if (x.high & 0x8000) { @@ -2421,10 +2417,8 @@ void i386_device::x87_fyl2x(uint8_t modrm) } else { - // TODO: Inaccurate - double d64 = fx80_to_double(x); - double l2x = log(d64)/log(2.0); - result = floatx80_mul(double_to_fx80(l2x), y); + extern floatx80 fyl2x(floatx80 a, floatx80 b); + result = fyl2x(ST(0), ST(1)); } } @@ -2453,13 +2447,8 @@ void i386_device::x87_fyl2xp1(uint8_t modrm) } else { - floatx80 x = ST(0); - floatx80 y = ST(1); - - // TODO: Inaccurate - double d64 = fx80_to_double(x); - double l2x1 = log(d64 + 1.0)/log(2.0); - result = floatx80_mul(double_to_fx80(l2x1), y); + extern floatx80 fyl2xp1(floatx80 a, floatx80 b); + result = fyl2xp1(ST(0), ST(1)); } if (x87_check_exceptions()) @@ -2530,16 +2519,14 @@ void i386_device::x87_fpatan(uint8_t modrm) if (x87_mf_fault()) return; - if (X87_IS_ST_EMPTY(0)) + if (X87_IS_ST_EMPTY(0) || X87_IS_ST_EMPTY(1)) { x87_set_stack_underflow(); result = fx80_inan; } else { - // TODO: Inaccurate - double val = atan2(fx80_to_double(ST(1)) , fx80_to_double(ST(0))); - result = double_to_fx80(val); + result = floatx80_fpatan(ST(0), ST(1)); } if (x87_check_exceptions()) @@ -2653,7 +2640,7 @@ void i386_device::x87_fsincos(uint8_t modrm) s_result = c_result = ST(0); -#if 0 // TODO: Function produces bad values +#if 1 // TODO: Function produces bad values if (sf_fsincos(s_result, &s_result, &c_result) != -1) m_x87_sw &= ~X87_SW_C2; else