Improved speed of hyperbolic loop

This commit is contained in:
claudio 2014-05-10 15:08:34 -04:00
parent 02063e35cf
commit c942fb6a86
4 changed files with 176 additions and 33 deletions

View file

@ -48,6 +48,7 @@
* n > 0 and m >= n
* The calling function has to handle a possible final carry.
*/
mpd_uint_t
_mpd_baseadd(mpd_uint_t *w, const mpd_uint_t *u, const mpd_uint_t *v,
mpd_size_t m, mpd_size_t n)
@ -78,6 +79,7 @@ _mpd_baseadd(mpd_uint_t *w, const mpd_uint_t *u, const mpd_uint_t *v,
return carry;
}
/*
* Add the contents of u to w. Carries are propagated further. The caller
* has to make sure that w is big enough.

View file

@ -297,23 +297,28 @@ void LIB_HANDLER()
case EXP:
{
mpd_t x;
mpd_t dec;
if(rplDepthData()<1) {
Exceptions|=EX_BADARGCOUNT;
ExceptionPointer=IPtr;
return;
}
rplReadNumberAsReal(rplPeekData(1),&x);
rplReadNumberAsReal(rplPeekData(1),&dec);
if(Exceptions) return;
mpd_exp(&RReg[0],&x,&Context);
if(Exceptions) return;
hyp_exp(&dec);
BINT exponent=RReg[0].exp;
RReg[0].exp=Context.prec-RReg[0].digits;
mpd_round_to_intx(&RReg[2],&RReg[0],&Context); // ROUND TO THE REQUESTED PRECISION
RReg[2].exp=exponent-RReg[0].exp;
mpd_reduce(&RReg[1],&RReg[2],&Context);
rplDropData(1);
rplRRegToRealPush(0);
rplRRegToRealPush(1); // EXP
return;
}
case SINH:
{
@ -328,11 +333,11 @@ void LIB_HANDLER()
hyp_sinhcosh(&dec);
BINT exponent=RReg[7].exp;
RReg[7].exp=Context.prec-RReg[7].digits;
mpd_round_to_intx(&RReg[2],&RReg[7],&Context); // ROUND TO THE REQUESTED PRECISION
RReg[2].exp=exponent+(RReg[7].digits-RReg[2].digits);
mpd_reduce(&RReg[0],&RReg[2],&Context);
BINT exponent=RReg[2].exp;
RReg[2].exp=Context.prec-RReg[2].digits;
mpd_round_to_intx(&RReg[7],&RReg[2],&Context); // ROUND TO THE REQUESTED PRECISION
RReg[7].exp=exponent-RReg[2].exp;
mpd_reduce(&RReg[0],&RReg[7],&Context);
rplDropData(1);
rplRRegToRealPush(0); // SINH
@ -352,10 +357,12 @@ void LIB_HANDLER()
if(Exceptions) return;
hyp_sinhcosh(&dec);
RReg[6].exp+=Context.prec;
mpd_round_to_intx(&RReg[1],&RReg[6],&Context); // ROUND TO THE REQUESTED PRECISION
RReg[1].exp-=Context.prec;
mpd_reduce(&RReg[0],&RReg[1],&Context);
BINT exponent=RReg[1].exp;
RReg[1].exp=Context.prec-RReg[1].digits;
mpd_round_to_intx(&RReg[2],&RReg[1],&Context); // ROUND TO THE REQUESTED PRECISION
RReg[2].exp=exponent-RReg[1].exp;
mpd_reduce(&RReg[0],&RReg[2],&Context);
rplDropData(1);
rplRRegToRealPush(0); // COSH

View file

@ -280,8 +280,7 @@ BYTEPTR testprogram=(BYTEPTR) "2025 SETPREC "
*/
BYTEPTR testprogram=(BYTEPTR) "2007 SETPREC "
" -9.2345678E3 CEXP "
" -9.2345678E3 SINH -9.2345678E3 COSH + "
" 1.2345678 11 FOR I I SINH NEXT "
;

View file

@ -773,6 +773,7 @@ znext=&RReg[5];
xnext=&RReg[6];
ynext=&RReg[7];
digits=(digits+1)>>1;
for(exponent=startexp;exponent<startexp+digits;++exponent)
{
@ -812,12 +813,147 @@ for(exponent=startexp;exponent<startexp+digits;++exponent)
}
// THE FINAL RESULTS ARE ALWAYS IN RREG[0], RREG[1] AND RREG[2]
// FINAL ROTATION SHOULD NOT AFFECT THE Kh CONSTANT
mpd_qfma(xnext,z,y,x,&Context,&status); // x(i+1)=x(i)+S(i)*y(i)
mpd_qfma(ynext,z,x,y,&Context,&status); // y(i+1)=y(i)+S(i)*x(i)
// THE FINAL RESULTS ARE ALWAYS IN RREG[6] AND RREG[7]
// RESULTS HAVE TYPICALLY 9 DIGITS MORE THAN REQUIRED, NONE OF THEM ARE ACCURATE
// SO ROUNDING/FINALIZING IS NEEDED
}
static void CORDIC_Hyp_Rotational_unrolled(int digits,int startexp)
{
int exponent;
uint32_t status;
mpd_t *x,*y,*z;
mpd_t *xnext,*ynext,*znext;
// USE RReg[0]=z; RReg[1]=x; RReg[2]=y;
// THE INITIAL VALUES MUST'VE BEEN SET
z=&RReg[0];
x=&RReg[1];
y=&RReg[2];
znext=&RReg[5];
xnext=&RReg[6];
ynext=&RReg[7];
digits=(digits+1)>>1;
for(exponent=startexp;exponent<startexp+digits;++exponent)
{
// ITERATION W/5
// RReg[3]= (5*10^-exponent)*y
mpd_qadd(&RReg[3],y,y,&Context,&status);
mpd_qadd(&RReg[4],&RReg[3],&RReg[3],&Context,&status);
mpd_qadd(&RReg[3],&RReg[4],y,&Context,&status);
RReg[3].exp-=exponent;
if(!(z->flags&MPD_NEG)) RReg[3].flags&=~MPD_NEG;
else RReg[3].flags|=MPD_NEG;
mpd_qadd(xnext,&RReg[3],x,&Context,&status); // x(i+1)=x(i)+S(i)*y(i)
mpd_qadd(&RReg[3],x,x,&Context,&status);
mpd_qadd(&RReg[4],&RReg[3],&RReg[3],&Context,&status);
mpd_qadd(&RReg[3],&RReg[4],x,&Context,&status);
RReg[3].exp-=exponent;
if(!(z->flags&MPD_NEG)) RReg[3].flags&=~MPD_NEG;
else RReg[3].flags|=MPD_NEG;
mpd_qadd(ynext,&RReg[3],y,&Context,&status); // y(i+1)=y(i)+S(i)*x(i)
atanh_5_table(exponent,&RReg[4]); // GET Alpha(i)
RReg[4].flags|=z->flags&MPD_NEG;
mpd_qsub(znext,z,&RReg[4],&Context,&status); // z(i+1)=z(i)-Alpha(i)
// FIRST ITERATION WITH 2
// RReg[3]= (2*10^-exponent)*y
mpd_qadd(&RReg[3],ynext,ynext,&Context,&status);
RReg[3].exp-=exponent;
if(!(znext->flags&MPD_NEG)) RReg[3].flags&=~MPD_NEG;
else RReg[3].flags|=MPD_NEG;
mpd_qadd(x,&RReg[3],xnext,&Context,&status); // x(i+1)=x(i)+S(i)*y(i)
mpd_qadd(&RReg[3],xnext,xnext,&Context,&status);
RReg[3].exp-=exponent;
if(!(znext->flags&MPD_NEG)) RReg[3].flags&=~MPD_NEG;
else RReg[3].flags|=MPD_NEG;
mpd_qadd(y,&RReg[3],ynext,&Context,&status); // y(i+1)=y(i)+S(i)*x(i)
atanh_2_table(exponent,&RReg[4]); // GET Alpha(i)
RReg[4].flags|=znext->flags&MPD_NEG;
mpd_qsub(z,znext,&RReg[4],&Context,&status); // z(i+1)=z(i)-Alpha(i)
// SECOND ITERATION WITH 2
mpd_qadd(&RReg[3],y,y,&Context,&status);
RReg[3].exp-=exponent;
if(!(z->flags&MPD_NEG)) RReg[3].flags&=~MPD_NEG;
else RReg[3].flags|=MPD_NEG;
mpd_qadd(xnext,&RReg[3],x,&Context,&status); // x(i+1)=x(i)+S(i)*y(i)
mpd_qadd(&RReg[3],x,x,&Context,&status);
RReg[3].exp-=exponent;
if(!(z->flags&MPD_NEG)) RReg[3].flags&=~MPD_NEG;
else RReg[3].flags|=MPD_NEG;
mpd_qadd(ynext,&RReg[3],y,&Context,&status); // y(i+1)=y(i)+S(i)*x(i)
RReg[4].flags&=~MPD_NEG;
RReg[4].flags|=z->flags&MPD_NEG;
mpd_qsub(znext,z,&RReg[4],&Context,&status); // z(i+1)=z(i)-Alpha(i)
// ITERATION WITH 1
ynext->exp-=exponent;
if(!(znext->flags&MPD_NEG)) {
mpd_qadd(x,ynext,xnext,&Context,&status); // x(i+1)=x(i)+S(i)*y(i)
}
else {
mpd_qsub(x,xnext,ynext,&Context,&status); // x(i+1)=x(i)+S(i)*y(i)
}
ynext->exp+=exponent;
xnext->exp-=exponent;
if(!(znext->flags&MPD_NEG)) {
mpd_qadd(y,ynext,xnext,&Context,&status); // x(i+1)=x(i)+S(i)*y(i)
}
else {
mpd_qsub(y,ynext,xnext,&Context,&status); // x(i+1)=x(i)+S(i)*y(i)
}
xnext->exp+=exponent;
atanh_1_table(exponent,&RReg[4]); // GET Alpha(i)
RReg[4].flags|=znext->flags&MPD_NEG;
mpd_qsub(z,znext,&RReg[4],&Context,&status); // z(i+1)=z(i)-Alpha(i)
}
// THE FINAL RESULTS ARE ALWAYS IN RREG[0], RREG[1] AND RREG[2]
// FINAL ROTATION SHOULD NOT AFFECT THE Kh CONSTANT
mpd_qfma(xnext,z,y,x,&Context,&status); // x(i+1)=x(i)+S(i)*y(i)
mpd_qfma(ynext,z,x,y,&Context,&status); // y(i+1)=y(i)+S(i)*x(i)
// THE FINAL RESULTS ARE ALWAYS IN RREG[6] AND RREG[7]
// RESULTS HAVE TYPICALLY 9 DIGITS MORE THAN REQUIRED, NONE OF THEM ARE ACCURATE
// SO ROUNDING/FINALIZING IS NEEDED
}
// CALCULATES EXP(x0), AND RETURNS IT IN RREG[0]
void hyp_exp(mpd_t *x0)
{
@ -909,7 +1045,7 @@ Context.prec-=MPD_RDIGITS;
}
// CALCULATES SINH(x0) AND COSH(x0), AND RETURNS THEM IN RREG[1] AND RREG[2]
void hyp_sinhcosh(mpd_t *x0)
{
@ -941,9 +1077,9 @@ if(mpd_cmp(x0,&RReg[7],&Context)==-1) {
// WE ARE DEALING WITH SMALL ANGLES
quotient=0;
startexp=-x0->exp-x0->digits;
startexp=-x0->exp-x0->digits+1;
assert(startexp>1);
if(startexp<1) startexp=1;
mpd_copy(&RReg[0],x0,&Context);
}
else {
@ -984,7 +1120,7 @@ if(mpd_cmp(&RReg[0],&ln10_2,&Context)==1) {
// z=x0
// THE REDUCED ANGLE IS ALREADY IN RReg[0]
RReg[0].flags^=isneg;
//RReg[0].flags^=isneg;
// RReg[6]=0.5;
@ -1011,31 +1147,30 @@ mpd_sub(&RReg[2],&RReg[6],&RReg[7],&Context);
// RReg[1] = x0
// RReg[2] = y0
CORDIC_Hyp_Rotational((Context.prec>REAL_PRECISION_MAX)? REAL_PRECISION_MAX+9:Context.prec,startexp);
CORDIC_Hyp_Rotational_unrolled((Context.prec>REAL_PRECISION_MAX)? REAL_PRECISION_MAX+9:Context.prec,startexp);
// HERE RReg[0] = angle residue
// RReg[1] = cosh(angle)-1
// RReg[2] = sinh(angle)
// HERE:
// RReg[6] = cosh(angle)
// RReg[7] = sinh(angle)
// ADJUST BY PROPER CONSTANT Kh
const_Kh_table(startexp,&RReg[3]);
//mpd_fma(&RReg[6],&RReg[1],&RReg[3],&RReg[3],&Context);
mpd_mul(&RReg[6],&RReg[1],&RReg[3],&Context);
mpd_mul(&RReg[7],&RReg[2],&RReg[3],&Context);
mpd_mul(&RReg[1],&RReg[6],&RReg[3],&Context);
mpd_mul(&RReg[2],&RReg[7],&RReg[3],&Context);
// ADJUST RESULT BY N
RReg[6].exp+=quotient;
RReg[7].exp+=quotient;
RReg[1].exp+=quotient;
RReg[2].exp+=quotient;
// AND ADJUST THE SIGN FOR SINH
RReg[7].flags|=isneg;
RReg[2].flags^=isneg;
Context.prec-=MPD_RDIGITS;
// RETURNED RESULTS IN RREG[6] AND RREG[7]
// RETURNED RESULTS IN RREG[1] AND RREG[2]
}