[Tarantool-patches] [PATCH luajit] IR_MIN/IR_MAX is non-commutative due to underlying FPU ops.

Tue Oct 24 13:14:20 MSK 2023

From: Mike Pall <mike>

Thanks to Peter Cawley.

(cherry-picked from commit 7a2b83a0c5d980bf3db0aeda33c79e7bb4b3da01)

Even after the commit c05d103305da0626e025dbf81370ca9f4f788c83 ("Fix
math.min()/math.max() inconsistencies."), some of the corner cases (see
tests in the commit for details) for `math.min()`/`math.max()` still
inconsistent in the JIT and the VM. This happens because `IR_MIN` and
`IR_MAX` are marked as commutative, so their operands were swapped by
`asm_swapops()`. As a result, because `minsd`[1]/`maxsd`[2] instructions
don't change the sign byte of the destination register, its sign is
preserved if we compare 0 with -0. When we compare something with NaN,
the second source operand (either a NaN or a valid floating-point value)
is written to the result. Hence, swapping the operands changed the
resulting value.

This patch removes the commutative flag from the aforementioned IRs to
prevent swapping of their operands.

[1]: https://c9x.me/x86/html/file_module_x86_id_173.html
[2]: https://c9x.me/x86/html/file_module_x86_id_168.html

Sergey Kaplun:
* added the description and the test for the problem

Part of tarantool/tarantool#9145
---

Branch: https://github.com/tarantool/luajit/tree/skaplun/lj-1082-min-max-0-commutative
Tarantool PR: https://github.com/tarantool/tarantool/pull/9294
Related Issues:
* https://github.com/tarantool/tarantool/issues/9145
* https://github.com/LuaJIT/LuaJIT/issues/1082
* https://github.com/LuaJIT/LuaJIT/issues/957

 src/lj_ir.h                                   |  4 +-
 test/tarantool-tests/gh-6163-min-max.test.lua | 23 ++++-------
 .../lj-1082-min-max-0-commutative.test.lua    | 41 +++++++++++++++++++
 3 files changed, 51 insertions(+), 17 deletions(-)
 create mode 100644 test/tarantool-tests/lj-1082-min-max-0-commutative.test.lua

diff --git a/src/lj_ir.h b/src/lj_ir.h
index 46af54e4..53e9dfd6 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -76,8 +76,8 @@
   \
   _(ABS,	N , ref, ref) \
   _(LDEXP,	N , ref, ref) \
-  _(MIN,	C , ref, ref) \
-  _(MAX,	C , ref, ref) \
+  _(MIN,	N , ref, ref) \
+  _(MAX,	N , ref, ref) \
   _(FPMATH,	N , ref, lit) \
   \
   /* Overflow-checking arithmetic ops. */ \
diff --git a/test/tarantool-tests/gh-6163-min-max.test.lua b/test/tarantool-tests/gh-6163-min-max.test.lua
index 4bc6155c..f52cb5ca 100644
--- a/test/tarantool-tests/gh-6163-min-max.test.lua
+++ b/test/tarantool-tests/gh-6163-min-max.test.lua
@@ -3,11 +3,6 @@ local test = tap.test('gh-6163-jit-min-max'):skipcond({
   ['Test requires JIT enabled'] = not jit.status(),
 })
 
-local x86_64 = jit.arch == 'x86' or jit.arch == 'x64'
--- XXX: table to use for dummy check for some inconsistent results
--- on the x86/64 architecture.
-local DUMMY_TAB = {}
-
 test:plan(18)
 --
 -- gh-6163: math.min/math.max inconsistencies.
@@ -110,17 +105,17 @@ result = {}
 for k = 1, 4 do
     result[k] = min(x, min(x, nan))
 end
--- FIXME: results are still inconsistent for the x86/64 architecture.
+-- See also https://github.com/LuaJIT/LuaJIT/issues/957.
 -- expected: nan nan nan nan
-test:samevalues(x86_64 and DUMMY_TAB or result, 'math.min: comm_dup_minmax')
+test:samevalues(result, 'math.min: comm_dup_minmax')
 
 result = {}
 for k = 1, 4 do
     result[k] = max(x, max(x, nan))
 end
--- FIXME: results are still inconsistent for the x86/64 architecture.
+-- See also https://github.com/LuaJIT/LuaJIT/issues/957.
 -- expected: nan nan nan nan
-test:samevalues(x86_64 and DUMMY_TAB or result, 'math.max: comm_dup_minmax')
+test:samevalues(result, 'math.max: comm_dup_minmax')
 
 -- The following optimization should be disabled:
 -- (x o k1) o k2 ==> x o (k1 o k2)
@@ -179,10 +174,9 @@ result = {}
 for k = 1, 4 do
   result[k] = min(1, max(1, nan))
 end
--- FIXME: results are still inconsistent for the x86/64 architecture.
+-- See also https://github.com/LuaJIT/LuaJIT/issues/957.
 -- expected: nan nan nan nan
-test:samevalues(x86_64 and DUMMY_TAB or result,
-                'min-max-case2: reassoc_minmax_right')
+test:samevalues(result, 'min-max-case2: reassoc_minmax_right')
 
 result = {}
 for k = 1, 4 do
@@ -195,10 +189,9 @@ result = {}
 for k = 1, 4 do
   result[k] = max(1, min(1, nan))
 end
--- FIXME: results are still inconsistent for the x86/64 architecture.
+-- See also https://github.com/LuaJIT/LuaJIT/issues/957.
 -- expected: nan nan nan nan
-test:samevalues(x86_64 and DUMMY_TAB or result,
-                'max-min-case2: reassoc_minmax_right')
+test:samevalues(result, 'max-min-case2: reassoc_minmax_right')
 
 -- XXX: If we look into the disassembled code of `lj_vm_foldarith()`
 -- we can see the following:
diff --git a/test/tarantool-tests/lj-1082-min-max-0-commutative.test.lua b/test/tarantool-tests/lj-1082-min-max-0-commutative.test.lua
new file mode 100644
index 00000000..99bfa298
--- /dev/null
+++ b/test/tarantool-tests/lj-1082-min-max-0-commutative.test.lua
@@ -0,0 +1,41 @@
+local tap = require('tap')
+
+-- Test file to demonstrate LuaJIT inconsistent behaviour for
+-- `math.min()` and `math.max()` operations when comparing 0 with
+-- -0 in the JIT and the VM.
+-- See also: https://github.com/LuaJIT/LuaJIT/issues/1082.
+
+local test = tap.test('lj-1082-min-max-0-commutative'):skipcond({
+  ['Test requires JIT enabled'] = not jit.status(),
+})
+
+test:plan(4)
+
+-- XXX: simplify `jit.dump()` output.
+local min, max = math.min, math.max
+
+-- Use local variables to prevent fold optimization.
+local minus_zero = -0
+local zero = 0
+
+local results_min_pm = {}
+local results_min_mp = {}
+local results_max_mp = {}
+local results_max_pm = {}
+
+jit.opt.start('hotloop=1')
+for i = 1, 4 do
+  -- The resulting value is the second parameter for comparison.
+  -- Use `tostring()` to distinguish 0 and -0.
+  results_min_pm[i] = tostring(min(zero, minus_zero))
+  results_min_mp[i] = tostring(min(minus_zero, zero))
+  results_max_pm[i] = tostring(max(zero, minus_zero))
+  results_max_mp[i] = tostring(max(minus_zero, zero))
+end
+
+test:samevalues(results_min_pm, 'min(0, -0)')
+test:samevalues(results_min_mp, 'min(-0, 0)')
+test:samevalues(results_max_pm, 'max(0, -0)')
+test:samevalues(results_max_mp, 'max(-0, 0)')
+
+test:done(true)
-- 
2.42.0