[Tarantool-patches] [PATCH luajit 5/5] Revert to trival pow() optimizations to prevent inaccuracies.

Tue Aug 15 12:36:31 MSK 2023

From: Mike Pall <mike>

(cherry-picked from commit 96d6d5032098ea9f0002165394a8774dcaa0c0ce)

This patch fixes different misbehaviour between JIT-compiled code and
the interpreter for power operator with the following ways:
* Drop folding optimizations for base ^ n => base * base ..., as far as
  pow(base, n) isn't interchangeable with just multiplicity of numbers
  and depends on the <math.h> implementation.
* Since the internal power function is inaccurate for very big or small
  powers, it is dropped, and `pow()` from the standard library is used
  instead. To save consistency between JIT behaviour and the VM
  narrowing optimization is dropped, and only trivial folding
  optimizations are used. Also, `math_extern2` version with two
  parameters is dropped, since it's no more used.

Also, this fixes failures of the [220/502] lib/string/format/num.lua
test [1] from LuaJIT-test suite.

[1]: https://www.exploringbinary.com/incorrect-floating-point-to-decimal-conversions/

Sergey Kaplun:
* added the description and the test for the problem

Part of tarantool/tarantool#8825
---
 src/lj_asm.c                                  |  3 +-
 src/lj_dispatch.h                             |  2 +-
 src/lj_ffrecord.c                             |  4 +-
 src/lj_ircall.h                               |  3 +-
 src/lj_iropt.h                                |  1 -
 src/lj_opt_fold.c                             | 37 ++++------------
 src/lj_opt_narrow.c                           | 24 ----------
 src/lj_opt_split.c                            |  2 +-
 src/lj_record.c                               |  2 +-
 src/lj_vm.h                                   |  3 --
 src/lj_vmmath.c                               | 44 +------------------
 src/vm_arm.dasc                               | 13 +++---
 src/vm_arm64.dasc                             | 11 ++---
 src/vm_mips.dasc                              | 11 ++---
 src/vm_mips64.dasc                            | 11 ++---
 src/vm_ppc.dasc                               | 11 ++---
 src/vm_x64.dasc                               |  9 ++--
 src/vm_x86.dasc                               | 11 ++---
 .../lj-684-pow-inconsistencies.test.lua       | 21 ++++++++-
 19 files changed, 64 insertions(+), 159 deletions(-)

diff --git a/src/lj_asm.c b/src/lj_asm.c
index 65261d50..3a1909d5 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1660,8 +1660,7 @@ static void asm_pow(ASMState *as, IRIns *ir)
 					  IRCALL_lj_carith_powu64);
   else
 #endif
-  asm_callid(as, ir, irt_isnum(IR(ir->op2)->t) ? IRCALL_lj_vm_pow :
-						 IRCALL_lj_vm_powi);
+  asm_callid(as, ir, IRCALL_pow);
 }
 
 static void asm_div(ASMState *as, IRIns *ir)
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index af870a75..b8bc2594 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -44,7 +44,7 @@ extern double __divdf3(double a, double b);
 #define GOTDEF(_) \
   _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
   _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
-  _(lj_vm_pow) _(fmod) _(ldexp) _(lj_vm_modi) \
+  _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \
   _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
   _(lj_dispatch_profile) _(lj_err_throw) \
   _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 0746ec64..99a6b918 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -590,8 +590,8 @@ static void LJ_FASTCALL recff_math_call(jit_State *J, RecordFFData *rd)
 
 static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
 {
-  J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1],
-				 &rd->argv[0], &rd->argv[1]);
+  J->base[0] = lj_opt_narrow_arith(J, J->base[0], J->base[1],
+				   &rd->argv[0], &rd->argv[1], IR_POW);
   UNUSED(rd);
 }
 
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index ac0888a0..9c195918 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -194,8 +194,7 @@ typedef struct CCallInfo {
   _(FPMATH,	sqrt,			1,   N, NUM, XA_FP) \
   _(ANY,	log,			1,   N, NUM, XA_FP) \
   _(ANY,	lj_vm_log2,		1,   N, NUM, XA_FP) \
-  _(ANY,	lj_vm_powi,		2,   N, NUM, XA_FP) \
-  _(ANY,	lj_vm_pow,		2,   N, NUM, XA2_FP) \
+  _(ANY,	pow,			2,   N, NUM, XA2_FP) \
   _(ANY,	atan2,			2,   N, NUM, XA2_FP) \
   _(ANY,	ldexp,			2,   N, NUM, XA_FP) \
   _(SOFTFP,	lj_vm_tobit,		1,   N, INT, XA_FP32) \
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index a59ba3f4..7ee1ea86 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -144,7 +144,6 @@ LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
 				 TValue *vb, TValue *vc, IROp op);
 LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc);
 LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
-LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
 LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
 
 /* Optimization passes. */
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 7d7cc9d1..09e6c87b 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -236,14 +236,10 @@ LJFOLDF(kfold_fpcall2)
   return NEXTFOLD;
 }
 
-LJFOLD(POW KNUM KINT)
 LJFOLD(POW KNUM KNUM)
 LJFOLDF(kfold_numpow)
 {
-  lua_Number a = knumleft;
-  lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
-  lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
-  return lj_ir_knum(J, y);
+  return lj_ir_knum(J, lj_vm_foldarith(knumleft, knumright, IR_POW - IR_ADD));
 }
 
 /* Must not use kfold_kref for numbers (could be NaN). */
@@ -1084,34 +1080,17 @@ LJFOLDF(simplify_nummuldiv_negneg)
   return RETRYFOLD;
 }
 
-LJFOLD(POW any KINT)
-LJFOLDF(simplify_numpow_xkint)
+LJFOLD(POW any KNUM)
+LJFOLDF(simplify_numpow_k)
 {
-  int32_t k = fright->i;
-  TRef ref = fins->op1;
-  if (k == 0)  /* x ^ 0 ==> 1 */
+  if (knumright == 0)  /* x ^ 0 ==> 1 */
     return lj_ir_knum_one(J);  /* Result must be a number, not an int. */
-  if (k == 1)  /* x ^ 1 ==> x */
+  else if (knumright == 1)  /* x ^ 1 ==> x */
     return LEFTFOLD;
-  if ((uint32_t)(k+65536) > 2*65536u)  /* Limit code explosion. */
+  else if (knumright == 2)  /* x ^ 2 ==> x * x */
+    return emitir(IRTN(IR_MUL), fins->op1, fins->op1);
+  else
     return NEXTFOLD;
-  if (k < 0) {  /* x ^ (-k) ==> (1/x) ^ k. */
-    ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
-    k = -k;
-  }
-  /* Unroll x^k for 1 <= k <= 65536. */
-  for (; (k & 1) == 0; k >>= 1)  /* Handle leading zeros. */
-    ref = emitir(IRTN(IR_MUL), ref, ref);
-  if ((k >>= 1) != 0) {  /* Handle trailing bits. */
-    TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
-    for (; k != 1; k >>= 1) {
-      if (k & 1)
-	ref = emitir(IRTN(IR_MUL), ref, tmp);
-      tmp = emitir(IRTN(IR_MUL), tmp, tmp);
-    }
-    ref = emitir(IRTN(IR_MUL), ref, tmp);
-  }
-  return ref;
 }
 
 /* -- Simplify conversions ------------------------------------------------ */
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index d6601f4c..db0da10f 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -584,30 +584,6 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
   return emitir(IRTN(IR_SUB), rb, tmp);
 }
 
-/* Narrowing of power operator or math.pow. */
-TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
-{
-  rb = conv_str_tonum(J, rb, vb);
-  rb = lj_ir_tonum(J, rb);  /* Left arg is always treated as an FP number. */
-  rc = conv_str_tonum(J, rc, vc);
-  if (tvisint(vc) || numisint(numV(vc))) {
-    int32_t k = numberVint(vc);
-    if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
-    if (!tref_isinteger(rc)) {
-      /* Guarded conversion to integer! */
-      rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
-    }
-    if (!tref_isk(rc)) {  /* Range guard: -65536 <= i <= 65536 */
-      TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
-      emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
-    }
-  } else {
-force_pow_num:
-    rc = lj_ir_tonum(J, rc);  /* Want POW(num, num), not POW(num, int). */
-  }
-  return emitir(IRTN(IR_POW), rb, rc);
-}
-
 /* -- Predictive narrowing of induction variables ------------------------- */
 
 /* Narrow a single runtime value. */
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index a619d852..0dc6394f 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -400,7 +400,7 @@ static void split_ir(jit_State *J)
 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
 	break;
       case IR_POW:
-	hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
+	hi = split_call_li(J, hisubst, oir, ir, IRCALL_pow);
 	break;
       case IR_FPMATH:
 	hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
diff --git a/src/lj_record.c b/src/lj_record.c
index d1332bfc..34d1210a 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -2268,7 +2268,7 @@ void lj_record_ins(jit_State *J)
 
   case BC_POW:
     if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
-      rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv);
+      rc = lj_opt_narrow_arith(J, rb, rc, rbv, rcv, IR_POW);
     else
       rc = rec_mm_arith(J, &ix, MM_pow);
     break;
diff --git a/src/lj_vm.h b/src/lj_vm.h
index f6f28a08..79166e5e 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -96,9 +96,6 @@ LJ_ASMF int lj_vm_errno(void);
 #endif
 #endif
 
-LJ_ASMF double lj_vm_powi(double, int32_t);
-LJ_ASMF double lj_vm_pow(double, double);
-
 /* Continuations for metamethods. */
 LJ_ASMF void lj_cont_cat(void);  /* Continue with concatenation. */
 LJ_ASMF void lj_cont_ra(void);  /* Store result in RA from instruction. */
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 539f955b..506867f8 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -30,52 +30,12 @@ LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
 LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
 LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
 LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
+LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
 LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
 #endif
 
 /* -- Helper functions ---------------------------------------------------- */
 
-/* Unsigned x^k. */
-static double lj_vm_powui(double x, uint32_t k)
-{
-  double y;
-  lj_assertX(k != 0, "pow with zero exponent");
-  for (; (k & 1) == 0; k >>= 1) x *= x;
-  y = x;
-  if ((k >>= 1) != 0) {
-    for (;;) {
-      x *= x;
-      if (k == 1) break;
-      if (k & 1) y *= x;
-      k >>= 1;
-    }
-    y *= x;
-  }
-  return y;
-}
-
-/* Signed x^k. */
-double lj_vm_powi(double x, int32_t k)
-{
-  if (k > 1)
-    return lj_vm_powui(x, (uint32_t)k);
-  else if (k == 1)
-    return x;
-  else if (k == 0)
-    return 1.0;
-  else
-    return 1.0 / lj_vm_powui(x, (uint32_t)-k);
-}
-
-double lj_vm_pow(double x, double y)
-{
-  int32_t k = lj_num2int(y);
-  if ((k >= -65536 && k <= 65536) && y == (double)k)
-    return lj_vm_powi(x, k);
-  else
-    return pow(x, y);
-}
-
 double lj_vm_foldarith(double x, double y, int op)
 {
   switch (op) {
@@ -84,7 +44,7 @@ double lj_vm_foldarith(double x, double y, int op)
   case IR_MUL - IR_ADD: return x*y; break;
   case IR_DIV - IR_ADD: return x/y; break;
   case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break;
-  case IR_POW - IR_ADD: return lj_vm_pow(x, y); break;
+  case IR_POW - IR_ADD: return pow(x, y); break;
   case IR_NEG - IR_ADD: return -x; break;
   case IR_ABS - IR_ADD: return fabs(x); break;
 #if LJ_HASJIT
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 792f0363..767d31f9 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -1485,11 +1485,11 @@ static void build_subroutines(BuildCtx *ctx)
   |.endif
   |.endmacro
   |
-  |.macro math_extern2, name, func
+  |.macro math_extern2, func
   |.if HFABI
-  |  .ffunc_dd math_ .. name
+  |  .ffunc_dd math_ .. func
   |.else
-  |  .ffunc_nn math_ .. name
+  |  .ffunc_nn math_ .. func
   |.endif
   |  .IOS mov RA, BASE
   |  bl extern func
@@ -1500,9 +1500,6 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->fff_restv
   |.endif
   |.endmacro
-  |.macro math_extern2, func
-  |  math_extern2 func, func
-  |.endmacro
   |
   |.if FPU
   |  .ffunc_d math_sqrt
@@ -1548,7 +1545,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  math_extern sinh
   |  math_extern cosh
   |  math_extern tanh
-  |  math_extern2 pow, lj_vm_pow
+  |  math_extern2 pow
   |  math_extern2 atan2
   |  math_extern2 fmod
   |
@@ -3156,7 +3153,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     break;
   case BC_POW:
     |  // NYI: (partial) integer arithmetic.
-    |  ins_arithfp extern, extern lj_vm_pow
+    |  ins_arithfp extern, extern pow
     break;
 
   case BC_CAT:
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index fb267a76..de33bde4 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -1391,14 +1391,11 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->fff_resn
   |.endmacro
   |
-  |.macro math_extern2, name, func
-  |  .ffunc_nn math_ .. name
+  |.macro math_extern2, func
+  |  .ffunc_nn math_ .. func
   |  bl extern func
   |  b ->fff_resn
   |.endmacro
-  |.macro math_extern2, func
-  |  math_extern2 func, func
-  |.endmacro
   |
   |.ffunc_n math_sqrt
   |  fsqrt d0, d0
@@ -1427,7 +1424,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  math_extern sinh
   |  math_extern cosh
   |  math_extern tanh
-  |  math_extern2 pow, lj_vm_pow
+  |  math_extern2 pow
   |  math_extern2 atan2
   |  math_extern2 fmod
   |
@@ -2624,7 +2621,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_arithload FARG1, FARG2
     |  ins_arithfallback ins_arithcheck_num
     |.if "fpins" == "fpow"
-    |  bl extern lj_vm_pow
+    |  bl extern pow
     |.else
     |  fpins FARG1, FARG1, FARG2
     |.endif
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 5664f503..32caabf7 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -1631,17 +1631,14 @@ static void build_subroutines(BuildCtx *ctx)
   |.  nop
   |.endmacro
   |
-  |.macro math_extern2, name, func
-  |  .ffunc_nn math_ .. name
+  |.macro math_extern2, func
+  |  .ffunc_nn math_ .. func
   |.  load_got func
   |  call_extern
   |.  nop
   |  b ->fff_resn
   |.  nop
   |.endmacro
-  |.macro math_extern2, func
-  |  math_extern2 func, func
-  |.endmacro
   |
   |// TODO: Return integer type if result is integer (own sf implementation).
   |.macro math_round, func
@@ -1695,7 +1692,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  math_extern sinh
   |  math_extern cosh
   |  math_extern tanh
-  |  math_extern2 pow, lj_vm_pow
+  |  math_extern2 pow
   |  math_extern2 atan2
   |  math_extern2 fmod
   |
@@ -3588,7 +3585,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  sltiu AT, SFARG1HI, LJ_TISNUM
     |  sltiu TMP0, SFARG2HI, LJ_TISNUM
     |  and AT, AT, TMP0
-    |  load_got lj_vm_pow
+    |  load_got pow
     |  beqz AT, ->vmeta_arith
     |.  addu RA, BASE, RA
     |.if FPU
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index 249605d4..44fba36c 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -1669,17 +1669,14 @@ static void build_subroutines(BuildCtx *ctx)
   |.  nop
   |.endmacro
   |
-  |.macro math_extern2, name, func
-  |  .ffunc_nn math_ .. name
+  |.macro math_extern2, func
+  |  .ffunc_nn math_ .. func
   |.  load_got func
   |  call_extern
   |.  nop
   |  b ->fff_resn
   |.  nop
   |.endmacro
-  |.macro math_extern2, func
-  |  math_extern2 func, func
-  |.endmacro
   |
   |// TODO: Return integer type if result is integer (own sf implementation).
   |.macro math_round, func
@@ -1733,7 +1730,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  math_extern sinh
   |  math_extern cosh
   |  math_extern tanh
-  |  math_extern2 pow, lj_vm_pow
+  |  math_extern2 pow
   |  math_extern2 atan2
   |  math_extern2 fmod
   |
@@ -3826,7 +3823,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  sltiu TMP0, TMP0, LJ_TISNUM
     |   sltiu TMP1, TMP1, LJ_TISNUM
     |  and AT, TMP0, TMP1
-    |  load_got lj_vm_pow
+    |  load_got pow
     |  beqz AT, ->vmeta_arith
     |.  daddu RA, BASE, RA
     |.if FPU
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 94af63e6..980ad897 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -2032,14 +2032,11 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->fff_resn
   |.endmacro
   |
-  |.macro math_extern2, name, func
-  |  .ffunc_nn math_ .. name
+  |.macro math_extern2, func
+  |  .ffunc_nn math_ .. func
   |  blex func
   |  b ->fff_resn
   |.endmacro
-  |.macro math_extern2, func
-  |  math_extern2 func, func
-  |.endmacro
   |
   |.macro math_round, func
   |  .ffunc_1 math_ .. func
@@ -2164,7 +2161,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  math_extern sinh
   |  math_extern cosh
   |  math_extern tanh
-  |  math_extern2 pow, lj_vm_pow
+  |  math_extern2 pow
   |  math_extern2 atan2
   |  math_extern2 fmod
   |
@@ -4157,7 +4154,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  checknum cr1, CARG3
     |  crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
     |  bge ->vmeta_arith_vv
-    |  blex lj_vm_pow
+    |  blex pow
     |  ins_next1
     |.if FPU
     |  stfdx FARG1, BASE, RA
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index acbe8dc2..09bf67e5 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -1825,16 +1825,13 @@ static void build_subroutines(BuildCtx *ctx)
   |  jmp ->fff_resxmm0
   |.endmacro
   |
-  |.macro math_extern2, name, func
-  |  .ffunc_nn math_ .. name
+  |.macro math_extern2, func
+  |  .ffunc_nn math_ .. func
   |  mov RB, BASE
   |  call extern func
   |  mov BASE, RB
   |  jmp ->fff_resxmm0
   |.endmacro
-  |.macro math_extern2, func
-  |  math_extern2 func, func
-  |.endmacro
   |
   |  math_extern log10
   |  math_extern exp
@@ -1847,7 +1844,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  math_extern sinh
   |  math_extern cosh
   |  math_extern tanh
-  |  math_extern2 pow, lj_vm_pow
+  |  math_extern2 pow
   |  math_extern2 atan2
   |  math_extern2 fmod
   |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index bf30cce6..f16ade1a 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -2240,8 +2240,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  jmp ->fff_resfp
   |.endmacro
   |
-  |.macro math_extern2, name, func
-  |  .ffunc_nnsse math_ .. name
+  |.macro math_extern2, func
+  |  .ffunc_nnsse math_ .. func
   |.if not X64
   |  movsd FPARG1, xmm0
   |  movsd FPARG3, xmm1
@@ -2251,9 +2251,6 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov BASE, RB
   |  jmp ->fff_resfp
   |.endmacro
-  |.macro math_extern2, func
-  |  math_extern2 func, func
-  |.endmacro
   |
   |  math_extern log10
   |  math_extern exp
@@ -2266,7 +2263,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  math_extern sinh
   |  math_extern cosh
   |  math_extern tanh
-  |  math_extern2 pow, lj_vm_pow
+  |  math_extern2 pow
   |  math_extern2 atan2
   |  math_extern2 fmod
   |
@@ -3944,7 +3941,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  movsd FPARG1, xmm0
     |  movsd FPARG3, xmm1
     |.endif
-    |  call extern lj_vm_pow
+    |  call extern pow
     |  movzx RA, PC_RA
     |  mov BASE, RB
     |.if X64
diff --git a/test/tarantool-tests/lj-684-pow-inconsistencies.test.lua b/test/tarantool-tests/lj-684-pow-inconsistencies.test.lua
index 5129fc45..ab9db3df 100644
--- a/test/tarantool-tests/lj-684-pow-inconsistencies.test.lua
+++ b/test/tarantool-tests/lj-684-pow-inconsistencies.test.lua
@@ -2,14 +2,15 @@ local tap = require('tap')
 -- Test to demonstrate the incorrect JIT behaviour for different
 -- power operation optimizations.
 -- See also:
--- https://github.com/LuaJIT/LuaJIT/issues/684.
+-- https://github.com/LuaJIT/LuaJIT/issues/684,
+-- https://github.com/LuaJIT/LuaJIT/issues/817.
 local test = tap.test('lj-684-pow-inconsistencies'):skipcond({
   ['Test requires JIT enabled'] = not jit.status(),
 })
 
 local tostring = tostring
 
-test:plan(4)
+test:plan(5)
 
 jit.opt.start('hotloop=1')
 
@@ -64,6 +65,22 @@ jit.flush()
 
 test:samevalues(res, ('consistent results for folding 2921 ^ 0.5'))
 
+-- -948388 ^ 3 = -0x1.7ad0e8ad7439dp+59.
+res = {}
+-- XXX: use local variable to prevent folding via parser.
+-- XXX: use stack slot out of trace to prevent constant folding.
+local corner_case_3 = -948388
+jit.on()
+for i = 1, 4 do
+  res[i] = corner_case_3 ^ 3
+end
+
+-- XXX: Prevent hotcount side effects.
+jit.off()
+jit.flush()
+
+test:samevalues(res, ('consistent results for int pow (-948388) ^ 3'))
+
 -- Narrowing for non-constant base of power operation.
 local function pow(base, power)
   return base ^ power
-- 
2.41.0