Tarantool development patches archive
 help / color / mirror / Atom feed
From: Sergey Bronnikov via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Sergey Kaplun <skaplun@tarantool.org>,
	Maxim Kokryashkin <m.kokryashkin@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: Re: [Tarantool-patches] [PATCH luajit 2/5] Remove pow() splitting and cleanup backends.
Date: Fri, 18 Aug 2023 14:08:41 +0300	[thread overview]
Message-ID: <2ea9a274-6d29-e440-44e1-45e9eb66ec34@tarantool.org> (raw)
In-Reply-To: <04224626635ddc4c5bb3341088dddd0d310f7e9f.1692089299.git.skaplun@tarantool.org>

Hi, Sergey


thanks for the patch! LGTM


On 8/15/23 12:36, Sergey Kaplun wrote:
> From: Mike Pall <mike>
>
> (cherry-picked from commit b2307c8ad817e350d65cc909a579ca2f77439682)
>
> The JIT engine tries to split b^c to exp2(c * log2(b)) with attempt to
> rejoin them later for some backends. It adds a dependency on C99
> exp2() and log2(), which aren't part of some libm implementations.
> Also, for some cases for IEEE754 we can see, that exp2(log2(x)) != x,
> due to mathematical functions accuracy and double precision
> restrictions. So, the values on the JIT slots and Lua stack are
> inconsistent.
>
> This patch removes splitting of pow operator, so IR_POW is emitting for
> all cases (except power of 0.5 replaced with sqrt operation).
>
> Also this patch does some refactoring:
>
> * Functions `asm_pow()`, `asm_mod()`, `asm_ldexp()`, `asm_div()`
>    (replaced with `asm_fpdiv()` for CPU architectures) are moved to the
>    <src/lj_asm.c> as far as their implementation is generic for all
>    architectures.
> * Fusing of IR_HREF + IR_EQ/IR_NE moved to a `asm_fuseequal()`.
> * Since `lj_vm_exp2()` subroutine and `IRFPM_EXP2` are removed as no
>    longer used.
>
> Sergey Kaplun:
> * added the description and the test for the problem
>
> Part of tarantool/tarantool#8825
> ---
>   src/lj_arch.h                                 |   3 -
>   src/lj_asm.c                                  | 106 +++++++++++-------
>   src/lj_asm_arm.h                              |  10 +-
>   src/lj_asm_arm64.h                            |  39 +------
>   src/lj_asm_mips.h                             |  38 +------
>   src/lj_asm_ppc.h                              |   9 +-
>   src/lj_asm_x86.h                              |  37 +-----
>   src/lj_ir.h                                   |   2 +-
>   src/lj_ircall.h                               |   1 -
>   src/lj_opt_fold.c                             |  18 ++-
>   src/lj_opt_narrow.c                           |  20 +---
>   src/lj_opt_split.c                            |  21 ----
>   src/lj_vm.h                                   |   5 -
>   src/lj_vmmath.c                               |   8 --
>   .../lj-9-pow-inconsistencies.test.lua         |  63 +++++++++++
>   15 files changed, 158 insertions(+), 222 deletions(-)
>   create mode 100644 test/tarantool-tests/lj-9-pow-inconsistencies.test.lua
>
> diff --git a/src/lj_arch.h b/src/lj_arch.h
> index cf31a291..3bdbe84e 100644
> --- a/src/lj_arch.h
> +++ b/src/lj_arch.h
> @@ -607,9 +607,6 @@
>   #if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
>   #define LUAJIT_NO_LOG2
>   #endif
> -#if defined(__symbian__) || LJ_TARGET_WINDOWS
> -#define LUAJIT_NO_EXP2
> -#endif
>   #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
>   #define LJ_NO_SYSTEM		1
>   #endif
> diff --git a/src/lj_asm.c b/src/lj_asm.c
> index b352fd35..a6906b19 100644
> --- a/src/lj_asm.c
> +++ b/src/lj_asm.c
> @@ -1356,32 +1356,6 @@ static void asm_call(ASMState *as, IRIns *ir)
>     asm_gencall(as, ci, args);
>   }
>   
> -#if !LJ_SOFTFP32
> -static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
> -{
> -  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
> -  IRRef args[2];
> -  args[0] = lref;
> -  args[1] = rref;
> -  asm_setupresult(as, ir, ci);
> -  asm_gencall(as, ci, args);
> -}
> -
> -static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
> -{
> -  IRIns *irp = IR(ir->op1);
> -  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
> -    IRIns *irpp = IR(irp->op1);
> -    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
> -	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
> -      asm_fppow(as, ir, irpp->op1, irp->op2);
> -      return 1;
> -    }
> -  }
> -  return 0;
> -}
> -#endif
> -
>   /* -- PHI and loop handling ----------------------------------------------- */
>   
>   /* Break a PHI cycle by renaming to a free register (evict if needed). */
> @@ -1652,6 +1626,62 @@ static void asm_loop(ASMState *as)
>   #error "Missing assembler for target CPU"
>   #endif
>   
> +/* -- Common instruction helpers ------------------------------------------ */
> +
> +#if !LJ_SOFTFP32
> +#if !LJ_TARGET_X86ORX64
> +#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
> +#define asm_fppowi(as, ir)	asm_callid(as, ir, IRCALL_lj_vm_powi)
> +#endif
> +
> +static void asm_pow(ASMState *as, IRIns *ir)
> +{
> +#if LJ_64 && LJ_HASFFI
> +  if (!irt_isnum(ir->t))
> +    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
> +					  IRCALL_lj_carith_powu64);
> +  else
> +#endif
> +  if (irt_isnum(IR(ir->op2)->t))
> +    asm_callid(as, ir, IRCALL_pow);
> +  else
> +    asm_fppowi(as, ir);
> +}
> +
> +static void asm_div(ASMState *as, IRIns *ir)
> +{
> +#if LJ_64 && LJ_HASFFI
> +  if (!irt_isnum(ir->t))
> +    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
> +					  IRCALL_lj_carith_divu64);
> +  else
> +#endif
> +    asm_fpdiv(as, ir);
> +}
> +#endif
> +
> +static void asm_mod(ASMState *as, IRIns *ir)
> +{
> +#if LJ_64 && LJ_HASFFI
> +  if (!irt_isint(ir->t))
> +    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
> +					  IRCALL_lj_carith_modu64);
> +  else
> +#endif
> +    asm_callid(as, ir, IRCALL_lj_vm_modi);
> +}
> +
> +static void asm_fuseequal(ASMState *as, IRIns *ir)
> +{
> +  /* Fuse HREF + EQ/NE. */
> +  if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
> +    as->curins--;
> +    asm_href(as, ir-1, (IROp)ir->o);
> +  } else {
> +    asm_equal(as, ir);
> +  }
> +}
> +
>   /* -- Instruction dispatch ------------------------------------------------ */
>   
>   /* Assemble a single instruction. */
> @@ -1674,14 +1704,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
>     case IR_ABC:
>       asm_comp(as, ir);
>       break;
> -  case IR_EQ: case IR_NE:
> -    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
> -      as->curins--;
> -      asm_href(as, ir-1, (IROp)ir->o);
> -    } else {
> -      asm_equal(as, ir);
> -    }
> -    break;
> +  case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
>   
>     case IR_RETF: asm_retf(as, ir); break;
>   
> @@ -1750,7 +1773,13 @@ static void asm_ir(ASMState *as, IRIns *ir)
>     case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
>     case IR_TNEW: asm_tnew(as, ir); break;
>     case IR_TDUP: asm_tdup(as, ir); break;
> -  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
> +  case IR_CNEW: case IR_CNEWI:
> +#if LJ_HASFFI
> +    asm_cnew(as, ir);
> +#else
> +    lua_assert(0);
> +#endif
> +    break;
>   
>     /* Buffer operations. */
>     case IR_BUFHDR: asm_bufhdr(as, ir); break;
> @@ -2215,6 +2244,10 @@ static void asm_setup_regsp(ASMState *as)
>   	if (inloop)
>   	  as->modset |= RSET_SCRATCH;
>   #if LJ_TARGET_X86
> +	if (irt_isnum(IR(ir->op2)->t)) {
> +	  if (as->evenspill < 4)  /* Leave room to call pow(). */
> +	    as->evenspill = 4;
> +	}
>   	break;
>   #else
>   	ir->prev = REGSP_HINT(RID_FPRET);
> @@ -2240,9 +2273,6 @@ static void asm_setup_regsp(ASMState *as)
>   	  continue;
>   	}
>   	break;
> -      } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
> -	if (as->evenspill < 4)  /* Leave room to call pow(). */
> -	  as->evenspill = 4;
>         }
>   #endif
>         if (inloop)
> diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
> index 2894e5c9..29a07c80 100644
> --- a/src/lj_asm_arm.h
> +++ b/src/lj_asm_arm.h
> @@ -1275,8 +1275,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
>     ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
>   	       ra_releasetmp(as, ASMREF_TMP1));
>   }
> -#else
> -#define asm_cnew(as, ir)	((void)0)
>   #endif
>   
>   /* -- Write barriers ------------------------------------------------------ */
> @@ -1371,8 +1369,6 @@ static void asm_callround(ASMState *as, IRIns *ir, int id)
>   
>   static void asm_fpmath(ASMState *as, IRIns *ir)
>   {
> -  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
> -    return;
>     if (ir->op2 <= IRFPM_TRUNC)
>       asm_callround(as, ir, ir->op2);
>     else if (ir->op2 == IRFPM_SQRT)
> @@ -1499,14 +1495,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
>   #define asm_mulov(as, ir)	asm_mul(as, ir)
>   
>   #if !LJ_SOFTFP
> -#define asm_div(as, ir)		asm_fparith(as, ir, ARMI_VDIV_D)
> -#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
> +#define asm_fpdiv(as, ir)	asm_fparith(as, ir, ARMI_VDIV_D)
>   #define asm_abs(as, ir)		asm_fpunary(as, ir, ARMI_VABS_D)
> -#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
>   #endif
>   
> -#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
> -
>   static void asm_neg(ASMState *as, IRIns *ir)
>   {
>   #if !LJ_SOFTFP
> diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
> index aea251a9..c3d6889e 100644
> --- a/src/lj_asm_arm64.h
> +++ b/src/lj_asm_arm64.h
> @@ -1249,8 +1249,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
>     ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
>   	       ra_releasetmp(as, ASMREF_TMP1));
>   }
> -#else
> -#define asm_cnew(as, ir)	((void)0)
>   #endif
>   
>   /* -- Write barriers ------------------------------------------------------ */
> @@ -1327,8 +1325,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
>     } else if (fpm <= IRFPM_TRUNC) {
>       asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
>   			fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
> -  } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
> -    return;
>     } else {
>       asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
>     }
> @@ -1435,45 +1431,12 @@ static void asm_mul(ASMState *as, IRIns *ir)
>     asm_intmul(as, ir);
>   }
>   
> -static void asm_div(ASMState *as, IRIns *ir)
> -{
> -#if LJ_HASFFI
> -  if (!irt_isnum(ir->t))
> -    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
> -					  IRCALL_lj_carith_divu64);
> -  else
> -#endif
> -    asm_fparith(as, ir, A64I_FDIVd);
> -}
> -
> -static void asm_pow(ASMState *as, IRIns *ir)
> -{
> -#if LJ_HASFFI
> -  if (!irt_isnum(ir->t))
> -    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
> -					  IRCALL_lj_carith_powu64);
> -  else
> -#endif
> -    asm_callid(as, ir, IRCALL_lj_vm_powi);
> -}
> -
>   #define asm_addov(as, ir)	asm_add(as, ir)
>   #define asm_subov(as, ir)	asm_sub(as, ir)
>   #define asm_mulov(as, ir)	asm_mul(as, ir)
>   
> +#define asm_fpdiv(as, ir)	asm_fparith(as, ir, A64I_FDIVd)
>   #define asm_abs(as, ir)		asm_fpunary(as, ir, A64I_FABS)
> -#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
> -
> -static void asm_mod(ASMState *as, IRIns *ir)
> -{
> -#if LJ_HASFFI
> -  if (!irt_isint(ir->t))
> -    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
> -					  IRCALL_lj_carith_modu64);
> -  else
> -#endif
> -    asm_callid(as, ir, IRCALL_lj_vm_modi);
> -}
>   
>   static void asm_neg(ASMState *as, IRIns *ir)
>   {
> diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
> index 4626507b..0f92959b 100644
> --- a/src/lj_asm_mips.h
> +++ b/src/lj_asm_mips.h
> @@ -1613,8 +1613,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
>     ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
>   	       ra_releasetmp(as, ASMREF_TMP1));
>   }
> -#else
> -#define asm_cnew(as, ir)	((void)0)
>   #endif
>   
>   /* -- Write barriers ------------------------------------------------------ */
> @@ -1683,8 +1681,6 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
>   #if !LJ_SOFTFP32
>   static void asm_fpmath(ASMState *as, IRIns *ir)
>   {
> -  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
> -    return;
>   #if !LJ_SOFTFP
>     if (ir->op2 <= IRFPM_TRUNC)
>       asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
> @@ -1772,41 +1768,13 @@ static void asm_mul(ASMState *as, IRIns *ir)
>     }
>   }
>   
> -static void asm_mod(ASMState *as, IRIns *ir)
> -{
> -#if LJ_64 && LJ_HASFFI
> -  if (!irt_isint(ir->t))
> -    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
> -					  IRCALL_lj_carith_modu64);
> -  else
> -#endif
> -    asm_callid(as, ir, IRCALL_lj_vm_modi);
> -}
> -
>   #if !LJ_SOFTFP32
> -static void asm_pow(ASMState *as, IRIns *ir)
> -{
> -#if LJ_64 && LJ_HASFFI
> -  if (!irt_isnum(ir->t))
> -    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
> -					  IRCALL_lj_carith_powu64);
> -  else
> -#endif
> -    asm_callid(as, ir, IRCALL_lj_vm_powi);
> -}
> -
> -static void asm_div(ASMState *as, IRIns *ir)
> +static void asm_fpdiv(ASMState *as, IRIns *ir)
>   {
> -#if LJ_64 && LJ_HASFFI
> -  if (!irt_isnum(ir->t))
> -    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
> -					  IRCALL_lj_carith_divu64);
> -  else
> -#endif
>   #if !LJ_SOFTFP
>       asm_fparith(as, ir, MIPSI_DIV_D);
>   #else
> -  asm_callid(as, ir, IRCALL_softfp_div);
> +    asm_callid(as, ir, IRCALL_softfp_div);
>   #endif
>   }
>   #endif
> @@ -1844,8 +1812,6 @@ static void asm_abs(ASMState *as, IRIns *ir)
>   }
>   #endif
>   
> -#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
> -
>   static void asm_arithov(ASMState *as, IRIns *ir)
>   {
>     /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */
> diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
> index 6aaed058..62a5c3e2 100644
> --- a/src/lj_asm_ppc.h
> +++ b/src/lj_asm_ppc.h
> @@ -1177,8 +1177,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
>     ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
>   	       ra_releasetmp(as, ASMREF_TMP1));
>   }
> -#else
> -#define asm_cnew(as, ir)	((void)0)
>   #endif
>   
>   /* -- Write barriers ------------------------------------------------------ */
> @@ -1249,8 +1247,6 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
>   
>   static void asm_fpmath(ASMState *as, IRIns *ir)
>   {
> -  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
> -    return;
>     if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
>       asm_fpunary(as, ir, PPCI_FSQRT);
>     else
> @@ -1364,9 +1360,7 @@ static void asm_mul(ASMState *as, IRIns *ir)
>     }
>   }
>   
> -#define asm_div(as, ir)		asm_fparith(as, ir, PPCI_FDIV)
> -#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
> -#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
> +#define asm_fpdiv(as, ir)	asm_fparith(as, ir, PPCI_FDIV)
>   
>   static void asm_neg(ASMState *as, IRIns *ir)
>   {
> @@ -1390,7 +1384,6 @@ static void asm_neg(ASMState *as, IRIns *ir)
>   }
>   
>   #define asm_abs(as, ir)		asm_fpunary(as, ir, PPCI_FABS)
> -#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
>   
>   static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
>   {
> diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
> index 63d332ca..5f5fe3cf 100644
> --- a/src/lj_asm_x86.h
> +++ b/src/lj_asm_x86.h
> @@ -1857,8 +1857,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
>     asm_gencall(as, ci, args);
>     emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
>   }
> -#else
> -#define asm_cnew(as, ir)	((void)0)
>   #endif
>   
>   /* -- Write barriers ------------------------------------------------------ */
> @@ -1964,8 +1962,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
>   		    fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
>         ra_left(as, RID_XMM0, ir->op1);
>       }
> -  } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
> -    /* Rejoined to pow(). */
>     } else {
>       asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
>     }
> @@ -2000,17 +1996,6 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
>     ra_left(as, RID_EAX, ir->op2);
>   }
>   
> -static void asm_pow(ASMState *as, IRIns *ir)
> -{
> -#if LJ_64 && LJ_HASFFI
> -  if (!irt_isnum(ir->t))
> -    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
> -					  IRCALL_lj_carith_powu64);
> -  else
> -#endif
> -    asm_fppowi(as, ir);
> -}
> -
>   static int asm_swapops(ASMState *as, IRIns *ir)
>   {
>     IRIns *irl = IR(ir->op1);
> @@ -2208,27 +2193,7 @@ static void asm_mul(ASMState *as, IRIns *ir)
>       asm_intarith(as, ir, XOg_X_IMUL);
>   }
>   
> -static void asm_div(ASMState *as, IRIns *ir)
> -{
> -#if LJ_64 && LJ_HASFFI
> -  if (!irt_isnum(ir->t))
> -    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
> -					  IRCALL_lj_carith_divu64);
> -  else
> -#endif
> -    asm_fparith(as, ir, XO_DIVSD);
> -}
> -
> -static void asm_mod(ASMState *as, IRIns *ir)
> -{
> -#if LJ_64 && LJ_HASFFI
> -  if (!irt_isint(ir->t))
> -    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
> -					  IRCALL_lj_carith_modu64);
> -  else
> -#endif
> -    asm_callid(as, ir, IRCALL_lj_vm_modi);
> -}
> +#define asm_fpdiv(as, ir)	asm_fparith(as, ir, XO_DIVSD)
>   
>   static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
>   {
> diff --git a/src/lj_ir.h b/src/lj_ir.h
> index e8bca275..43e55069 100644
> --- a/src/lj_ir.h
> +++ b/src/lj_ir.h
> @@ -177,7 +177,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE);
>   /* FPMATH sub-functions. ORDER FPM. */
>   #define IRFPMDEF(_) \
>     _(FLOOR) _(CEIL) _(TRUNC)  /* Must be first and in this order. */ \
> -  _(SQRT) _(EXP2) _(LOG) _(LOG2) \
> +  _(SQRT) _(LOG) _(LOG2) \
>     _(OTHER)
>   
>   typedef enum {
> diff --git a/src/lj_ircall.h b/src/lj_ircall.h
> index bbad35b1..af064a6f 100644
> --- a/src/lj_ircall.h
> +++ b/src/lj_ircall.h
> @@ -192,7 +192,6 @@ typedef struct CCallInfo {
>     _(FPMATH,	lj_vm_ceil,		1,   N, NUM, XA_FP) \
>     _(FPMATH,	lj_vm_trunc,		1,   N, NUM, XA_FP) \
>     _(FPMATH,	sqrt,			1,   N, NUM, XA_FP) \
> -  _(ANY,	lj_vm_exp2,		1,   N, NUM, XA_FP) \
>     _(ANY,	log,			1,   N, NUM, XA_FP) \
>     _(ANY,	lj_vm_log2,		1,   N, NUM, XA_FP) \
>     _(ANY,	lj_vm_powi,		2,   N, NUM, XA_FP) \
> diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
> index 27e489af..cd803d87 100644
> --- a/src/lj_opt_fold.c
> +++ b/src/lj_opt_fold.c
> @@ -237,10 +237,11 @@ LJFOLDF(kfold_fpcall2)
>   }
>   
>   LJFOLD(POW KNUM KINT)
> +LJFOLD(POW KNUM KNUM)
>   LJFOLDF(kfold_numpow)
>   {
>     lua_Number a = knumleft;
> -  lua_Number b = (lua_Number)fright->i;
> +  lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
>     lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
>     return lj_ir_knum(J, y);
>   }
> @@ -1077,7 +1078,7 @@ LJFOLDF(simplify_nummuldiv_negneg)
>   }
>   
>   LJFOLD(POW any KINT)
> -LJFOLDF(simplify_numpow_xk)
> +LJFOLDF(simplify_numpow_xkint)
>   {
>     int32_t k = fright->i;
>     TRef ref = fins->op1;
> @@ -1106,13 +1107,22 @@ LJFOLDF(simplify_numpow_xk)
>     return ref;
>   }
>   
> +LJFOLD(POW any KNUM)
> +LJFOLDF(simplify_numpow_xknum)
> +{
> +  if (knumright == 0.5)  /* x ^ 0.5 ==> sqrt(x) */
> +    return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT);
> +  return NEXTFOLD;
> +}
> +
>   LJFOLD(POW KNUM any)
>   LJFOLDF(simplify_numpow_kx)
>   {
>     lua_Number n = knumleft;
> -  if (n == 2.0) {  /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
> -    fins->o = IR_CONV;
> +  if (n == 2.0 && irt_isint(fright->t)) {  /* 2.0 ^ i ==> ldexp(1.0, i) */
>   #if LJ_TARGET_X86ORX64
> +    /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */
> +    fins->o = IR_CONV;
>       fins->op1 = fins->op2;
>       fins->op2 = IRCONV_NUM_INT;
>       fins->op2 = (IRRef1)lj_opt_fold(J);
> diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
> index bb61f97b..4f285334 100644
> --- a/src/lj_opt_narrow.c
> +++ b/src/lj_opt_narrow.c
> @@ -593,10 +593,10 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
>     /* Narrowing must be unconditional to preserve (-x)^i semantics. */
>     if (tvisint(vc) || numisint(numV(vc))) {
>       int checkrange = 0;
> -    /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */
> +    /* pow() is faster for bigger exponents. But do this only for (+k)^i. */
>       if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
>         int32_t k = numberVint(vc);
> -      if (!(k >= -65536 && k <= 65536)) goto split_pow;
> +      if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
>         checkrange = 1;
>       }
>       if (!tref_isinteger(rc)) {
> @@ -607,19 +607,11 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
>         TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
>         emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
>       }
> -    return emitir(IRTN(IR_POW), rb, rc);
> +  } else {
> +force_pow_num:
> +    rc = lj_ir_tonum(J, rc);  /* Want POW(num, num), not POW(num, int). */
>     }
> -split_pow:
> -  /* FOLD covers most cases, but some are easier to do here. */
> -  if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb)))))
> -    return rb;  /* 1 ^ x ==> 1 */
> -  rc = lj_ir_tonum(J, rc);
> -  if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5)
> -    return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT);  /* x ^ 0.5 ==> sqrt(x) */
> -  /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */
> -  rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2);
> -  rc = emitir(IRTN(IR_MUL), rb, rc);
> -  return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2);
> +  return emitir(IRTN(IR_POW), rb, rc);
>   }
>   
>   /* -- Predictive narrowing of induction variables ------------------------- */
> diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
> index 2fc36b8d..c10a85cb 100644
> --- a/src/lj_opt_split.c
> +++ b/src/lj_opt_split.c
> @@ -403,27 +403,6 @@ static void split_ir(jit_State *J)
>   	hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
>   	break;
>         case IR_FPMATH:
> -	/* Try to rejoin pow from EXP2, MUL and LOG2. */
> -	if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
> -	  IRIns *irp = IR(nir->op1);
> -	  if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
> -	    IRIns *irm4 = IR(irp->op1);
> -	    IRIns *irm3 = IR(irm4->op1);
> -	    IRIns *irm12 = IR(irm3->op1);
> -	    IRIns *irl1 = IR(irm12->op1);
> -	    if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
> -		irl1->op2 == IRCALL_lj_vm_log2) {
> -	      IRRef tmp = irl1->op1;  /* Recycle first two args from LOG2. */
> -	      IRRef arg3 = irm3->op2, arg4 = irm4->op2;
> -	      J->cur.nins--;
> -	      tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
> -	      tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
> -	      ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
> -	      hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
> -	      break;
> -	    }
> -	  }
> -	}
>   	hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
>   	break;
>         case IR_LDEXP:
> diff --git a/src/lj_vm.h b/src/lj_vm.h
> index 411caafa..abaa7c52 100644
> --- a/src/lj_vm.h
> +++ b/src/lj_vm.h
> @@ -95,11 +95,6 @@ LJ_ASMF double lj_vm_trunc(double);
>   LJ_ASMF double lj_vm_trunc_sf(double);
>   #endif
>   #endif
> -#ifdef LUAJIT_NO_EXP2
> -LJ_ASMF double lj_vm_exp2(double);
> -#else
> -#define lj_vm_exp2	exp2
> -#endif
>   #if LJ_HASFFI
>   LJ_ASMF int lj_vm_errno(void);
>   #endif
> diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
> index ae4e0f15..9c0d3fde 100644
> --- a/src/lj_vmmath.c
> +++ b/src/lj_vmmath.c
> @@ -79,13 +79,6 @@ double lj_vm_log2(double a)
>   }
>   #endif
>   
> -#ifdef LUAJIT_NO_EXP2
> -double lj_vm_exp2(double a)
> -{
> -  return exp(a * 0.6931471805599453);
> -}
> -#endif
> -
>   #if !LJ_TARGET_X86ORX64
>   /* Unsigned x^k. */
>   static double lj_vm_powui(double x, uint32_t k)
> @@ -128,7 +121,6 @@ double lj_vm_foldfpm(double x, int fpm)
>     case IRFPM_CEIL: return lj_vm_ceil(x);
>     case IRFPM_TRUNC: return lj_vm_trunc(x);
>     case IRFPM_SQRT: return sqrt(x);
> -  case IRFPM_EXP2: return lj_vm_exp2(x);
>     case IRFPM_LOG: return log(x);
>     case IRFPM_LOG2: return lj_vm_log2(x);
>     default: lua_assert(0);
> diff --git a/test/tarantool-tests/lj-9-pow-inconsistencies.test.lua b/test/tarantool-tests/lj-9-pow-inconsistencies.test.lua
> new file mode 100644
> index 00000000..21b3a0d9
> --- /dev/null
> +++ b/test/tarantool-tests/lj-9-pow-inconsistencies.test.lua
> @@ -0,0 +1,63 @@
> +local tap = require('tap')
> +-- Test to demonstrate the incorrect JIT behaviour when splitting
> +-- IR_POW.
> +-- See also https://github.com/LuaJIT/LuaJIT/issues/9.
> +local test = tap.test('lj-9-pow-inconsistencies'):skipcond({
> +  ['Test requires JIT enabled'] = not jit.status(),
> +})
> +
> +local nan = 0 / 0
> +local inf = math.huge
> +
> +-- Table with some corner cases to check:
> +local INTERESTING_VALUES = {
> +  -- 0, -0, 1, -1 special cases with nan, inf, etc..
> +  0, -0, 1, -1, nan, inf, -inf,
> +  -- x ^  inf = 0 (inf), if |x| < 1 (|x| > 1).
> +  -- x ^ -inf = inf (0), if |x| < 1 (|x| > 1).
> +  0.999999, 1.000001, -0.999999, -1.000001,
> +}
> +test:plan(1 + (#INTERESTING_VALUES) ^ 2)
> +
> +jit.opt.start('hotloop=1')
> +
> +-- The JIT engine tries to split b^c to exp2(c * log2(b)).
> +-- For some cases for IEEE754 we can see, that
> +-- (double)exp2((double)log2(x)) != x, due to mathematical
> +-- functions accuracy and double precision restrictions.
> +-- Just use some numbers to observe this misbehaviour.
> +local res = {}
> +local cnt = 1
> +while cnt < 4 do
> +  -- XXX: use local variable to prevent folding via parser.
> +  local b = -0.90000000001
> +  res[cnt] = 1000 ^ b
> +  cnt = cnt + 1
> +end
> +
> +test:samevalues(res, 'consistent pow operator behaviour for corner case')
> +
> +-- Prevent JIT side effects for parent loops.
> +jit.off()
> +for i = 1, #INTERESTING_VALUES do
> +  for j = 1, #INTERESTING_VALUES do
> +    local b = INTERESTING_VALUES[i]
> +    local c = INTERESTING_VALUES[j]
> +    local results = {}
> +    local counter = 1
> +    jit.on()
> +    while counter < 4 do
> +      results[counter] = b ^ c
> +      counter = counter + 1
> +    end
> +    -- Prevent JIT side effects.
> +    jit.off()
> +    jit.flush()
> +    test:samevalues(
> +      results,
> +      ('consistent pow operator behaviour for (%s)^(%s)'):format(b, c)
> +    )
> +  end
> +end
> +
> +test:done(true)

  parent reply	other threads:[~2023-08-18 11:08 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-15  9:36 [Tarantool-patches] [PATCH luajit 0/5] Fix pow inconsistencies and improve asserts Sergey Kaplun via Tarantool-patches
2023-08-15  9:36 ` [Tarantool-patches] [PATCH luajit 1/5] test: introduce `samevalues()` TAP checker Sergey Kaplun via Tarantool-patches
2023-08-17 14:03   ` Maxim Kokryashkin via Tarantool-patches
2023-08-17 15:03     ` Sergey Kaplun via Tarantool-patches
2023-08-18 10:43   ` Sergey Bronnikov via Tarantool-patches
2023-08-18 10:58     ` Sergey Kaplun via Tarantool-patches
2023-08-18 11:12       ` Sergey Bronnikov via Tarantool-patches
2023-08-21 10:47         ` Igor Munkin via Tarantool-patches
2023-08-24  7:44           ` Sergey Bronnikov via Tarantool-patches
2023-08-15  9:36 ` [Tarantool-patches] [PATCH luajit 2/5] Remove pow() splitting and cleanup backends Sergey Kaplun via Tarantool-patches
2023-08-17 14:52   ` Maxim Kokryashkin via Tarantool-patches
2023-08-17 15:33     ` Sergey Kaplun via Tarantool-patches
2023-08-20  9:48       ` Maxim Kokryashkin via Tarantool-patches
2023-08-18 11:08   ` Sergey Bronnikov via Tarantool-patches [this message]
2023-08-15  9:36 ` [Tarantool-patches] [PATCH luajit 3/5] Improve assertions Sergey Kaplun via Tarantool-patches
2023-08-17 14:58   ` Maxim Kokryashkin via Tarantool-patches
2023-08-18  7:56     ` Sergey Kaplun via Tarantool-patches
2023-08-18 11:20   ` Sergey Bronnikov via Tarantool-patches
2023-08-15  9:36 ` [Tarantool-patches] [PATCH luajit 4/5] Fix pow() optimization inconsistencies Sergey Kaplun via Tarantool-patches
2023-08-18 12:45   ` Sergey Bronnikov via Tarantool-patches
2023-08-21  8:07     ` Sergey Kaplun via Tarantool-patches
2023-08-20  9:26   ` Maxim Kokryashkin via Tarantool-patches
2023-08-21  8:06     ` Sergey Kaplun via Tarantool-patches
2023-08-21  9:00       ` Maxim Kokryashkin via Tarantool-patches
2023-08-21  9:31         ` Sergey Kaplun via Tarantool-patches
2023-08-15  9:36 ` [Tarantool-patches] [PATCH luajit 5/5] Revert to trival pow() optimizations to prevent inaccuracies Sergey Kaplun via Tarantool-patches
2023-08-18 12:49   ` Sergey Bronnikov via Tarantool-patches
2023-08-21  8:16     ` Sergey Kaplun via Tarantool-patches
2023-08-20  9:37   ` Maxim Kokryashkin via Tarantool-patches
2023-08-21  8:15     ` Sergey Kaplun via Tarantool-patches
2023-08-21  9:06       ` Maxim Kokryashkin via Tarantool-patches
2023-08-21  9:36         ` Sergey Kaplun via Tarantool-patches
2023-08-24  7:47 ` [Tarantool-patches] [PATCH luajit 0/5] Fix pow inconsistencies and improve asserts Sergey Bronnikov via Tarantool-patches
2023-08-31 15:18 ` Igor Munkin via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2ea9a274-6d29-e440-44e1-45e9eb66ec34@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=m.kokryashkin@tarantool.org \
    --cc=sergeyb@tarantool.org \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit 2/5] Remove pow() splitting and cleanup backends.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox