[Tarantool-patches] [PATCH luajit 5/5] Revert to trival pow() optimizations to prevent inaccuracies.

Fri Aug 18 15:49:42 MSK 2023

Hi, Sergey

Thanks for the patch!

typo in subj: trival -> trivial, however seems it is not fixable because 
a part of original commit

LGTM

On 8/15/23 12:36, Sergey Kaplun wrote:
> From: Mike Pall <mike>
>
> (cherry-picked from commit 96d6d5032098ea9f0002165394a8774dcaa0c0ce)
>
> This patch fixes different misbehaviour between JIT-compiled code and
typo: misbehaviour -> misbehaviours
> the interpreter for power operator with the following ways:
> * Drop folding optimizations for base ^ n => base * base ..., as far as
>    pow(base, n) isn't interchangeable with just multiplicity of numbers
>    and depends on the <math.h> implementation.
> * Since the internal power function is inaccurate for very big or small
>    powers, it is dropped, and `pow()` from the standard library is used
>    instead. To save consistency between JIT behaviour and the VM
>    narrowing optimization is dropped, and only trivial folding
>    optimizations are used. Also, `math_extern2` version with two
>    parameters is dropped, since it's no more used.
>
> Also, this fixes failures of the [220/502] lib/string/format/num.lua
> test [1] from LuaJIT-test suite.
>
> [1]: https://www.exploringbinary.com/incorrect-floating-point-to-decimal-conversions/
>
> Sergey Kaplun:
> * added the description and the test for the problem
>
> Part of tarantool/tarantool#8825
> ---
>   src/lj_asm.c                                  |  3 +-
>   src/lj_dispatch.h                             |  2 +-
>   src/lj_ffrecord.c                             |  4 +-
>   src/lj_ircall.h                               |  3 +-
>   src/lj_iropt.h                                |  1 -
>   src/lj_opt_fold.c                             | 37 ++++------------
>   src/lj_opt_narrow.c                           | 24 ----------
>   src/lj_opt_split.c                            |  2 +-
>   src/lj_record.c                               |  2 +-
>   src/lj_vm.h                                   |  3 --
>   src/lj_vmmath.c                               | 44 +------------------
>   src/vm_arm.dasc                               | 13 +++---
>   src/vm_arm64.dasc                             | 11 ++---
>   src/vm_mips.dasc                              | 11 ++---
>   src/vm_mips64.dasc                            | 11 ++---
>   src/vm_ppc.dasc                               | 11 ++---
>   src/vm_x64.dasc                               |  9 ++--
>   src/vm_x86.dasc                               | 11 ++---
>   .../lj-684-pow-inconsistencies.test.lua       | 21 ++++++++-
>   19 files changed, 64 insertions(+), 159 deletions(-)
>
> diff --git a/src/lj_asm.c b/src/lj_asm.c
> index 65261d50..3a1909d5 100644
> --- a/src/lj_asm.c
> +++ b/src/lj_asm.c
> @@ -1660,8 +1660,7 @@ static void asm_pow(ASMState *as, IRIns *ir)
>   					  IRCALL_lj_carith_powu64);
>     else
>   #endif
> -  asm_callid(as, ir, irt_isnum(IR(ir->op2)->t) ? IRCALL_lj_vm_pow :
> -						 IRCALL_lj_vm_powi);
> +  asm_callid(as, ir, IRCALL_pow);
>   }
>   
>   static void asm_div(ASMState *as, IRIns *ir)
> diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
> index af870a75..b8bc2594 100644
> --- a/src/lj_dispatch.h
> +++ b/src/lj_dispatch.h
> @@ -44,7 +44,7 @@ extern double __divdf3(double a, double b);
>   #define GOTDEF(_) \
>     _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
>     _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
> -  _(lj_vm_pow) _(fmod) _(ldexp) _(lj_vm_modi) \
> +  _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \
>     _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
>     _(lj_dispatch_profile) _(lj_err_throw) \
>     _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
> diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
> index 0746ec64..99a6b918 100644
> --- a/src/lj_ffrecord.c
> +++ b/src/lj_ffrecord.c
> @@ -590,8 +590,8 @@ static void LJ_FASTCALL recff_math_call(jit_State *J, RecordFFData *rd)
>   
>   static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
>   {
> -  J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1],
> -				 &rd->argv[0], &rd->argv[1]);
> +  J->base[0] = lj_opt_narrow_arith(J, J->base[0], J->base[1],
> +				   &rd->argv[0], &rd->argv[1], IR_POW);
>     UNUSED(rd);
>   }
>   
> diff --git a/src/lj_ircall.h b/src/lj_ircall.h
> index ac0888a0..9c195918 100644
> --- a/src/lj_ircall.h
> +++ b/src/lj_ircall.h
> @@ -194,8 +194,7 @@ typedef struct CCallInfo {
>     _(FPMATH,	sqrt,			1,   N, NUM, XA_FP) \
>     _(ANY,	log,			1,   N, NUM, XA_FP) \
>     _(ANY,	lj_vm_log2,		1,   N, NUM, XA_FP) \
> -  _(ANY,	lj_vm_powi,		2,   N, NUM, XA_FP) \
> -  _(ANY,	lj_vm_pow,		2,   N, NUM, XA2_FP) \
> +  _(ANY,	pow,			2,   N, NUM, XA2_FP) \
>     _(ANY,	atan2,			2,   N, NUM, XA2_FP) \
>     _(ANY,	ldexp,			2,   N, NUM, XA_FP) \
>     _(SOFTFP,	lj_vm_tobit,		1,   N, INT, XA_FP32) \
> diff --git a/src/lj_iropt.h b/src/lj_iropt.h
> index a59ba3f4..7ee1ea86 100644
> --- a/src/lj_iropt.h
> +++ b/src/lj_iropt.h
> @@ -144,7 +144,6 @@ LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
>   				 TValue *vb, TValue *vc, IROp op);
>   LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc);
>   LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
> -LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
>   LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
>   
>   /* Optimization passes. */
> diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
> index 7d7cc9d1..09e6c87b 100644
> --- a/src/lj_opt_fold.c
> +++ b/src/lj_opt_fold.c
> @@ -236,14 +236,10 @@ LJFOLDF(kfold_fpcall2)
>     return NEXTFOLD;
>   }
>   
> -LJFOLD(POW KNUM KINT)
>   LJFOLD(POW KNUM KNUM)
>   LJFOLDF(kfold_numpow)
>   {
> -  lua_Number a = knumleft;
> -  lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
> -  lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
> -  return lj_ir_knum(J, y);
> +  return lj_ir_knum(J, lj_vm_foldarith(knumleft, knumright, IR_POW - IR_ADD));
>   }
>   
>   /* Must not use kfold_kref for numbers (could be NaN). */
> @@ -1084,34 +1080,17 @@ LJFOLDF(simplify_nummuldiv_negneg)
>     return RETRYFOLD;
>   }
>   
> -LJFOLD(POW any KINT)
> -LJFOLDF(simplify_numpow_xkint)
> +LJFOLD(POW any KNUM)
> +LJFOLDF(simplify_numpow_k)
>   {
> -  int32_t k = fright->i;
> -  TRef ref = fins->op1;
> -  if (k == 0)  /* x ^ 0 ==> 1 */
> +  if (knumright == 0)  /* x ^ 0 ==> 1 */
>       return lj_ir_knum_one(J);  /* Result must be a number, not an int. */
> -  if (k == 1)  /* x ^ 1 ==> x */
> +  else if (knumright == 1)  /* x ^ 1 ==> x */
>       return LEFTFOLD;
> -  if ((uint32_t)(k+65536) > 2*65536u)  /* Limit code explosion. */
> +  else if (knumright == 2)  /* x ^ 2 ==> x * x */
> +    return emitir(IRTN(IR_MUL), fins->op1, fins->op1);
> +  else
>       return NEXTFOLD;
> -  if (k < 0) {  /* x ^ (-k) ==> (1/x) ^ k. */
> -    ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
> -    k = -k;
> -  }
> -  /* Unroll x^k for 1 <= k <= 65536. */
> -  for (; (k & 1) == 0; k >>= 1)  /* Handle leading zeros. */
> -    ref = emitir(IRTN(IR_MUL), ref, ref);
> -  if ((k >>= 1) != 0) {  /* Handle trailing bits. */
> -    TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
> -    for (; k != 1; k >>= 1) {
> -      if (k & 1)
> -	ref = emitir(IRTN(IR_MUL), ref, tmp);
> -      tmp = emitir(IRTN(IR_MUL), tmp, tmp);
> -    }
> -    ref = emitir(IRTN(IR_MUL), ref, tmp);
> -  }
> -  return ref;
>   }
>   
>   /* -- Simplify conversions ------------------------------------------------ */
> diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
> index d6601f4c..db0da10f 100644
> --- a/src/lj_opt_narrow.c
> +++ b/src/lj_opt_narrow.c
> @@ -584,30 +584,6 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
>     return emitir(IRTN(IR_SUB), rb, tmp);
>   }
>   
> -/* Narrowing of power operator or math.pow. */
> -TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
> -{
> -  rb = conv_str_tonum(J, rb, vb);
> -  rb = lj_ir_tonum(J, rb);  /* Left arg is always treated as an FP number. */
> -  rc = conv_str_tonum(J, rc, vc);
> -  if (tvisint(vc) || numisint(numV(vc))) {
> -    int32_t k = numberVint(vc);
> -    if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
> -    if (!tref_isinteger(rc)) {
> -      /* Guarded conversion to integer! */
> -      rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
> -    }
> -    if (!tref_isk(rc)) {  /* Range guard: -65536 <= i <= 65536 */
> -      TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
> -      emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
> -    }
> -  } else {
> -force_pow_num:
> -    rc = lj_ir_tonum(J, rc);  /* Want POW(num, num), not POW(num, int). */
> -  }
> -  return emitir(IRTN(IR_POW), rb, rc);
> -}
> -
>   /* -- Predictive narrowing of induction variables ------------------------- */
>   
>   /* Narrow a single runtime value. */
> diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
> index a619d852..0dc6394f 100644
> --- a/src/lj_opt_split.c
> +++ b/src/lj_opt_split.c
> @@ -400,7 +400,7 @@ static void split_ir(jit_State *J)
>   	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
>   	break;
>         case IR_POW:
> -	hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
> +	hi = split_call_li(J, hisubst, oir, ir, IRCALL_pow);
>   	break;
>         case IR_FPMATH:
>   	hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
> diff --git a/src/lj_record.c b/src/lj_record.c
> index d1332bfc..34d1210a 100644
> --- a/src/lj_record.c
> +++ b/src/lj_record.c
> @@ -2268,7 +2268,7 @@ void lj_record_ins(jit_State *J)
>   
>     case BC_POW:
>       if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
> -      rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv);
> +      rc = lj_opt_narrow_arith(J, rb, rc, rbv, rcv, IR_POW);
>       else
>         rc = rec_mm_arith(J, &ix, MM_pow);
>       break;
> diff --git a/src/lj_vm.h b/src/lj_vm.h
> index f6f28a08..79166e5e 100644
> --- a/src/lj_vm.h
> +++ b/src/lj_vm.h
> @@ -96,9 +96,6 @@ LJ_ASMF int lj_vm_errno(void);
>   #endif
>   #endif
>   
> -LJ_ASMF double lj_vm_powi(double, int32_t);
> -LJ_ASMF double lj_vm_pow(double, double);
> -
>   /* Continuations for metamethods. */
>   LJ_ASMF void lj_cont_cat(void);  /* Continue with concatenation. */
>   LJ_ASMF void lj_cont_ra(void);  /* Store result in RA from instruction. */
> diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
> index 539f955b..506867f8 100644
> --- a/src/lj_vmmath.c
> +++ b/src/lj_vmmath.c
> @@ -30,52 +30,12 @@ LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
>   LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
>   LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
>   LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
> +LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
>   LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
>   #endif
>   
>   /* -- Helper functions ---------------------------------------------------- */
>   
> -/* Unsigned x^k. */
> -static double lj_vm_powui(double x, uint32_t k)
> -{
> -  double y;
> -  lj_assertX(k != 0, "pow with zero exponent");
> -  for (; (k & 1) == 0; k >>= 1) x *= x;
> -  y = x;
> -  if ((k >>= 1) != 0) {
> -    for (;;) {
> -      x *= x;
> -      if (k == 1) break;
> -      if (k & 1) y *= x;
> -      k >>= 1;
> -    }
> -    y *= x;
> -  }
> -  return y;
> -}
> -
> -/* Signed x^k. */
> -double lj_vm_powi(double x, int32_t k)
> -{
> -  if (k > 1)
> -    return lj_vm_powui(x, (uint32_t)k);
> -  else if (k == 1)
> -    return x;
> -  else if (k == 0)
> -    return 1.0;
> -  else
> -    return 1.0 / lj_vm_powui(x, (uint32_t)-k);
> -}
> -
> -double lj_vm_pow(double x, double y)
> -{
> -  int32_t k = lj_num2int(y);
> -  if ((k >= -65536 && k <= 65536) && y == (double)k)
> -    return lj_vm_powi(x, k);
> -  else
> -    return pow(x, y);
> -}
> -
>   double lj_vm_foldarith(double x, double y, int op)
>   {
>     switch (op) {
> @@ -84,7 +44,7 @@ double lj_vm_foldarith(double x, double y, int op)
>     case IR_MUL - IR_ADD: return x*y; break;
>     case IR_DIV - IR_ADD: return x/y; break;
>     case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break;
> -  case IR_POW - IR_ADD: return lj_vm_pow(x, y); break;
> +  case IR_POW - IR_ADD: return pow(x, y); break;
>     case IR_NEG - IR_ADD: return -x; break;
>     case IR_ABS - IR_ADD: return fabs(x); break;
>   #if LJ_HASJIT
> diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
> index 792f0363..767d31f9 100644
> --- a/src/vm_arm.dasc
> +++ b/src/vm_arm.dasc
> @@ -1485,11 +1485,11 @@ static void build_subroutines(BuildCtx *ctx)
>     |.endif
>     |.endmacro
>     |
> -  |.macro math_extern2, name, func
> +  |.macro math_extern2, func
>     |.if HFABI
> -  |  .ffunc_dd math_ .. name
> +  |  .ffunc_dd math_ .. func
>     |.else
> -  |  .ffunc_nn math_ .. name
> +  |  .ffunc_nn math_ .. func
>     |.endif
>     |  .IOS mov RA, BASE
>     |  bl extern func
> @@ -1500,9 +1500,6 @@ static void build_subroutines(BuildCtx *ctx)
>     |  b ->fff_restv
>     |.endif
>     |.endmacro
> -  |.macro math_extern2, func
> -  |  math_extern2 func, func
> -  |.endmacro
>     |
>     |.if FPU
>     |  .ffunc_d math_sqrt
> @@ -1548,7 +1545,7 @@ static void build_subroutines(BuildCtx *ctx)
>     |  math_extern sinh
>     |  math_extern cosh
>     |  math_extern tanh
> -  |  math_extern2 pow, lj_vm_pow
> +  |  math_extern2 pow
>     |  math_extern2 atan2
>     |  math_extern2 fmod
>     |
> @@ -3156,7 +3153,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>       break;
>     case BC_POW:
>       |  // NYI: (partial) integer arithmetic.
> -    |  ins_arithfp extern, extern lj_vm_pow
> +    |  ins_arithfp extern, extern pow
>       break;
>   
>     case BC_CAT:
> diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
> index fb267a76..de33bde4 100644
> --- a/src/vm_arm64.dasc
> +++ b/src/vm_arm64.dasc
> @@ -1391,14 +1391,11 @@ static void build_subroutines(BuildCtx *ctx)
>     |  b ->fff_resn
>     |.endmacro
>     |
> -  |.macro math_extern2, name, func
> -  |  .ffunc_nn math_ .. name
> +  |.macro math_extern2, func
> +  |  .ffunc_nn math_ .. func
>     |  bl extern func
>     |  b ->fff_resn
>     |.endmacro
> -  |.macro math_extern2, func
> -  |  math_extern2 func, func
> -  |.endmacro
>     |
>     |.ffunc_n math_sqrt
>     |  fsqrt d0, d0
> @@ -1427,7 +1424,7 @@ static void build_subroutines(BuildCtx *ctx)
>     |  math_extern sinh
>     |  math_extern cosh
>     |  math_extern tanh
> -  |  math_extern2 pow, lj_vm_pow
> +  |  math_extern2 pow
>     |  math_extern2 atan2
>     |  math_extern2 fmod
>     |
> @@ -2624,7 +2621,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>       |  ins_arithload FARG1, FARG2
>       |  ins_arithfallback ins_arithcheck_num
>       |.if "fpins" == "fpow"
> -    |  bl extern lj_vm_pow
> +    |  bl extern pow
>       |.else
>       |  fpins FARG1, FARG1, FARG2
>       |.endif
> diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
> index 5664f503..32caabf7 100644
> --- a/src/vm_mips.dasc
> +++ b/src/vm_mips.dasc
> @@ -1631,17 +1631,14 @@ static void build_subroutines(BuildCtx *ctx)
>     |.  nop
>     |.endmacro
>     |
> -  |.macro math_extern2, name, func
> -  |  .ffunc_nn math_ .. name
> +  |.macro math_extern2, func
> +  |  .ffunc_nn math_ .. func
>     |.  load_got func
>     |  call_extern
>     |.  nop
>     |  b ->fff_resn
>     |.  nop
>     |.endmacro
> -  |.macro math_extern2, func
> -  |  math_extern2 func, func
> -  |.endmacro
>     |
>     |// TODO: Return integer type if result is integer (own sf implementation).
>     |.macro math_round, func
> @@ -1695,7 +1692,7 @@ static void build_subroutines(BuildCtx *ctx)
>     |  math_extern sinh
>     |  math_extern cosh
>     |  math_extern tanh
> -  |  math_extern2 pow, lj_vm_pow
> +  |  math_extern2 pow
>     |  math_extern2 atan2
>     |  math_extern2 fmod
>     |
> @@ -3588,7 +3585,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>       |  sltiu AT, SFARG1HI, LJ_TISNUM
>       |  sltiu TMP0, SFARG2HI, LJ_TISNUM
>       |  and AT, AT, TMP0
> -    |  load_got lj_vm_pow
> +    |  load_got pow
>       |  beqz AT, ->vmeta_arith
>       |.  addu RA, BASE, RA
>       |.if FPU
> diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
> index 249605d4..44fba36c 100644
> --- a/src/vm_mips64.dasc
> +++ b/src/vm_mips64.dasc
> @@ -1669,17 +1669,14 @@ static void build_subroutines(BuildCtx *ctx)
>     |.  nop
>     |.endmacro
>     |
> -  |.macro math_extern2, name, func
> -  |  .ffunc_nn math_ .. name
> +  |.macro math_extern2, func
> +  |  .ffunc_nn math_ .. func
>     |.  load_got func
>     |  call_extern
>     |.  nop
>     |  b ->fff_resn
>     |.  nop
>     |.endmacro
> -  |.macro math_extern2, func
> -  |  math_extern2 func, func
> -  |.endmacro
>     |
>     |// TODO: Return integer type if result is integer (own sf implementation).
>     |.macro math_round, func
> @@ -1733,7 +1730,7 @@ static void build_subroutines(BuildCtx *ctx)
>     |  math_extern sinh
>     |  math_extern cosh
>     |  math_extern tanh
> -  |  math_extern2 pow, lj_vm_pow
> +  |  math_extern2 pow
>     |  math_extern2 atan2
>     |  math_extern2 fmod
>     |
> @@ -3826,7 +3823,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>       |  sltiu TMP0, TMP0, LJ_TISNUM
>       |   sltiu TMP1, TMP1, LJ_TISNUM
>       |  and AT, TMP0, TMP1
> -    |  load_got lj_vm_pow
> +    |  load_got pow
>       |  beqz AT, ->vmeta_arith
>       |.  daddu RA, BASE, RA
>       |.if FPU
> diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
> index 94af63e6..980ad897 100644
> --- a/src/vm_ppc.dasc
> +++ b/src/vm_ppc.dasc
> @@ -2032,14 +2032,11 @@ static void build_subroutines(BuildCtx *ctx)
>     |  b ->fff_resn
>     |.endmacro
>     |
> -  |.macro math_extern2, name, func
> -  |  .ffunc_nn math_ .. name
> +  |.macro math_extern2, func
> +  |  .ffunc_nn math_ .. func
>     |  blex func
>     |  b ->fff_resn
>     |.endmacro
> -  |.macro math_extern2, func
> -  |  math_extern2 func, func
> -  |.endmacro
>     |
>     |.macro math_round, func
>     |  .ffunc_1 math_ .. func
> @@ -2164,7 +2161,7 @@ static void build_subroutines(BuildCtx *ctx)
>     |  math_extern sinh
>     |  math_extern cosh
>     |  math_extern tanh
> -  |  math_extern2 pow, lj_vm_pow
> +  |  math_extern2 pow
>     |  math_extern2 atan2
>     |  math_extern2 fmod
>     |
> @@ -4157,7 +4154,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>       |  checknum cr1, CARG3
>       |  crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
>       |  bge ->vmeta_arith_vv
> -    |  blex lj_vm_pow
> +    |  blex pow
>       |  ins_next1
>       |.if FPU
>       |  stfdx FARG1, BASE, RA
> diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
> index acbe8dc2..09bf67e5 100644
> --- a/src/vm_x64.dasc
> +++ b/src/vm_x64.dasc
> @@ -1825,16 +1825,13 @@ static void build_subroutines(BuildCtx *ctx)
>     |  jmp ->fff_resxmm0
>     |.endmacro
>     |
> -  |.macro math_extern2, name, func
> -  |  .ffunc_nn math_ .. name
> +  |.macro math_extern2, func
> +  |  .ffunc_nn math_ .. func
>     |  mov RB, BASE
>     |  call extern func
>     |  mov BASE, RB
>     |  jmp ->fff_resxmm0
>     |.endmacro
> -  |.macro math_extern2, func
> -  |  math_extern2 func, func
> -  |.endmacro
>     |
>     |  math_extern log10
>     |  math_extern exp
> @@ -1847,7 +1844,7 @@ static void build_subroutines(BuildCtx *ctx)
>     |  math_extern sinh
>     |  math_extern cosh
>     |  math_extern tanh
> -  |  math_extern2 pow, lj_vm_pow
> +  |  math_extern2 pow
>     |  math_extern2 atan2
>     |  math_extern2 fmod
>     |
> diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
> index bf30cce6..f16ade1a 100644
> --- a/src/vm_x86.dasc
> +++ b/src/vm_x86.dasc
> @@ -2240,8 +2240,8 @@ static void build_subroutines(BuildCtx *ctx)
>     |  jmp ->fff_resfp
>     |.endmacro
>     |
> -  |.macro math_extern2, name, func
> -  |  .ffunc_nnsse math_ .. name
> +  |.macro math_extern2, func
> +  |  .ffunc_nnsse math_ .. func
>     |.if not X64
>     |  movsd FPARG1, xmm0
>     |  movsd FPARG3, xmm1
> @@ -2251,9 +2251,6 @@ static void build_subroutines(BuildCtx *ctx)
>     |  mov BASE, RB
>     |  jmp ->fff_resfp
>     |.endmacro
> -  |.macro math_extern2, func
> -  |  math_extern2 func, func
> -  |.endmacro
>     |
>     |  math_extern log10
>     |  math_extern exp
> @@ -2266,7 +2263,7 @@ static void build_subroutines(BuildCtx *ctx)
>     |  math_extern sinh
>     |  math_extern cosh
>     |  math_extern tanh
> -  |  math_extern2 pow, lj_vm_pow
> +  |  math_extern2 pow
>     |  math_extern2 atan2
>     |  math_extern2 fmod
>     |
> @@ -3944,7 +3941,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>       |  movsd FPARG1, xmm0
>       |  movsd FPARG3, xmm1
>       |.endif
> -    |  call extern lj_vm_pow
> +    |  call extern pow
>       |  movzx RA, PC_RA
>       |  mov BASE, RB
>       |.if X64
> diff --git a/test/tarantool-tests/lj-684-pow-inconsistencies.test.lua b/test/tarantool-tests/lj-684-pow-inconsistencies.test.lua
> index 5129fc45..ab9db3df 100644
> --- a/test/tarantool-tests/lj-684-pow-inconsistencies.test.lua
> +++ b/test/tarantool-tests/lj-684-pow-inconsistencies.test.lua
> @@ -2,14 +2,15 @@ local tap = require('tap')
>   -- Test to demonstrate the incorrect JIT behaviour for different
>   -- power operation optimizations.
>   -- See also:
> --- https://github.com/LuaJIT/LuaJIT/issues/684.
> +-- https://github.com/LuaJIT/LuaJIT/issues/684,
> +-- https://github.com/LuaJIT/LuaJIT/issues/817.
>   local test = tap.test('lj-684-pow-inconsistencies'):skipcond({
>     ['Test requires JIT enabled'] = not jit.status(),
>   })
>   
>   local tostring = tostring
>   
> -test:plan(4)
> +test:plan(5)
>   
>   jit.opt.start('hotloop=1')
>   
> @@ -64,6 +65,22 @@ jit.flush()
>   
>   test:samevalues(res, ('consistent results for folding 2921 ^ 0.5'))
>   
> +-- -948388 ^ 3 = -0x1.7ad0e8ad7439dp+59.
> +res = {}
> +-- XXX: use local variable to prevent folding via parser.
> +-- XXX: use stack slot out of trace to prevent constant folding.
> +local corner_case_3 = -948388
> +jit.on()
> +for i = 1, 4 do
> +  res[i] = corner_case_3 ^ 3
> +end
> +
> +-- XXX: Prevent hotcount side effects.
> +jit.off()
> +jit.flush()
> +
> +test:samevalues(res, ('consistent results for int pow (-948388) ^ 3'))
> +
>   -- Narrowing for non-constant base of power operation.
>   local function pow(base, power)
>     return base ^ power