Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH luajit] ARM64: Fix {AHUV}LOAD specialized to nil/false/true.
@ 2023-01-25 18:57 Sergey Kaplun via Tarantool-patches
  2023-02-28  9:52 ` Maxim Kokryashkin via Tarantool-patches
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Sergey Kaplun via Tarantool-patches @ 2023-01-25 18:57 UTC (permalink / raw)
  To: Sergey Ostanevich, Maxim Kokryashkin; +Cc: tarantool-patches

From: Mike Pall <mike>

Reported by caohongqing.

(cherry picked from commit 5bf0da3d7c02f9959fa3a9fb721e0565137b70c8)

If there is high register pressure, and there are almost all registers
in use during the aforementioned assembling, the same register is chosen
as the one holding the given stack slot and the one holding the constant
value for the type comparison. As the result we get the following
assertion guard check in assembly:
| cmp   x0, x0, lsr #32
| bne   ->0
Which is always false.

This happens because the `tmp` register (used for loading constant type
to compare in assertion guard) is scratched from `gpr` register set, but
not from `allow` set, which is used during picking the register for slot
loading (at the begging `allow` and `gpr` sets are the same).

This patch changes `allow` set to `gpr` to fix the issue.

Sergey Kaplun:
* added the description and the test for the problem

Part of tarantool/tarantool#8069
---

Branch: https://github.com/tarantool/luajit/tree/skaplun/or-94-fix-bool-ahuvload
PR: https://github.com/tarantool/tarantool/pull/8214
Issues:
* https://github.com/tarantool/tarantool/issues/8069
* https://github.com/openresty/luajit2/pull/94

 src/lj_asm_arm64.h                            |   2 +-
 .../or-94-arm64-ir-ahuvload-bool.test.lua     | 145 ++++++++++++++++++
 2 files changed, 146 insertions(+), 1 deletion(-)
 create mode 100644 test/tarantool-tests/or-94-arm64-ir-ahuvload-bool.test.lua

diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index 4aeb51f3..30d34ca1 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -1055,7 +1055,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
     emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
   } else {
     emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
-	    ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp);
+	    ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, gpr), tmp);
   }
   if (ofs & FUSE_REG)
     emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
diff --git a/test/tarantool-tests/or-94-arm64-ir-ahuvload-bool.test.lua b/test/tarantool-tests/or-94-arm64-ir-ahuvload-bool.test.lua
new file mode 100644
index 00000000..a9b0071c
--- /dev/null
+++ b/test/tarantool-tests/or-94-arm64-ir-ahuvload-bool.test.lua
@@ -0,0 +1,145 @@
+local tap = require('tap')
+-- Test file to demonstrate the incorrect JIT assembling
+-- for IR_{AHUV}LOAD specialized to boolean.
+-- See also: https://github.com/openresty/luajit2/pull/94.
+
+-- If there is high register pressure, and there are almost
+-- all registers in use during the aforementioned assembling,
+-- the same register is chosen as the one holding the given
+-- stack slot and the one holding the constant value for the type
+-- comparison. As the result we get the following assertion
+-- guard check in assembly:
+-- | cmp   x0, x0, lsr #32
+-- | bne   ->0
+-- Which is always false.
+local ffi = require('ffi')
+-- Need for code generation.
+_G.ffi = ffi
+
+local traceinfo = require('jit.util').traceinfo
+
+-- Each payload will be recording with the corresponding IR.
+local TESTS = {
+  {irname = 'ALOAD', payload = 'aload()'},
+  {irname = 'HLOAD', payload = 'hload()'},
+  {irname = 'ULOAD', payload = 'uload()'},
+  {irname = 'VLOAD', payload = '...'},
+}
+local N_TESTS = #TESTS
+
+local test = tap.test('or-94-arm64-ir-ahuvload-bool')
+test:plan(N_TESTS)
+
+-- Functions to be inlined on trace to generate different
+-- types of IRs (ALOAD, HLOAD, ULOAD).
+-- Declare as global for code generation.
+local arr = {true}
+local function aload()
+  return arr[1]
+end
+_G.aload = aload
+
+local h = {data = true}
+local function hload()
+  local boolvalue = h.data
+  return boolvalue
+end
+_G.hload = hload
+
+do
+  local upvalue = true
+  local function uload()
+    return upvalue
+  end
+  -- Make upvalue muttable. Not really need to return this
+  -- function.
+  local function _()
+    upvalue = not upvalue
+  end
+  _G.uload = uload
+end
+
+-- This function generate code like the following:
+-- | local test_f(...)
+-- |   local r
+-- |   local rup1
+-- |   --[[...]]
+-- |   for _ = 1, 4 do
+-- |     r1 = ffi.cast("int", 1)
+-- |     --[[...]]
+-- |     r = main_payload()
+-- |     rup1 = r1
+-- |     --[[...]]
+-- |   end
+-- | end
+-- | return test_f
+-- Those `rn` variables before and after `main_payload` are
+-- required to generate enough register pressure (for GPR). Amount
+-- of repeats is empirical.
+-- Additional `test_f(...)` wrapper is needed for IR_VLOAD usage,
+-- when `main_payload` is just `...`.
+local function generate_payload(n_fillers, main_payload)
+  local code_chunk = 'local function test_f(...)\n'
+  code_chunk = code_chunk .. 'local r\n'
+  for i = 1, n_fillers do
+    code_chunk = code_chunk .. ('local rup%d\n'):format(i)
+  end
+  code_chunk = code_chunk .. 'for _ = 1, 4 do\n'
+  for i = 1, n_fillers do
+    code_chunk = code_chunk ..
+      ('local r%d = ffi.cast("int", %d)\n'):format(i, i)
+  end
+  code_chunk = code_chunk .. 'r = ' .. main_payload .. '\n'
+  for i = 1, n_fillers do
+    code_chunk = code_chunk .. ('rup%d = r%d\n'):format(i, i)
+  end
+  code_chunk = code_chunk .. 'end\nend\n'
+  code_chunk = code_chunk .. 'return test_f'
+  local f, err = loadstring(code_chunk, 'test_function')
+  assert(type(f) == 'function', err)
+  f = f()
+  assert(type(f) == 'function', 'returned generated value is not a function')
+  return f
+end
+
+-- Disable sink optimization to allocate more registers in a
+-- "convenient" way. 'hotexit' option is required to be sure that
+-- we will start a new trace on false-positive guard assertion.
+-- The new trace contains the same IR and so the same assertion
+-- guard. This trace will be executed, assertion guard failed
+-- again and the new third trace will be recorded. This trace will
+-- be the last one to record as far as iterations over cycle are
+-- finished and we returning from the function. The report of
+-- `jit.dump` before the patch is the following:
+-- | TRACE 1 start "test_function":29
+-- | TRACE 1 stop -> loop
+-- | TRACE 1 exit 0
+-- | TRACE 2 start 1/0 "test_function":30
+-- | TRACE 2 stop -> loop
+-- | TRACE 2 exit 0
+-- | TRACE 3 start 2/0 "test_function":30
+-- | TRACE 3 stop -> return
+-- On the other hand, after the patch we will have only 2 traces:
+-- the main one, and the second is recorded on exit from loop.
+-- Hence, the report of `jit.dump` will be the following:
+-- | TRACE 1 start "test_function":29
+-- | TRACE 1 stop -> loop
+-- | TRACE 1 exit 3
+-- | TRACE 2 start 1/3 "test_function":84
+-- | TRACE 2 stop -> return
+jit.opt.start('hotloop=1', 'hotexit=1', '-sink')
+-- Prevent random hotcount to be sure that the cycle is compiled
+-- first.
+jit.off()
+jit.flush()
+for i = 1, N_TESTS do
+  local f = generate_payload(26, TESTS[i].payload)
+  jit.on()
+  -- Argument is needed only for IR_VLOAD.
+  f(true)
+  jit.off()
+  test:ok(not traceinfo(3), 'not recorded sidetrace for IR_' .. TESTS[i].irname)
+  jit.flush()
+end
+
+os.exit(test:check() and 0 or 1)
-- 
2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-03-30 17:44 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-25 18:57 [Tarantool-patches] [PATCH luajit] ARM64: Fix {AHUV}LOAD specialized to nil/false/true Sergey Kaplun via Tarantool-patches
2023-02-28  9:52 ` Maxim Kokryashkin via Tarantool-patches
2023-03-02  5:52   ` Sergey Kaplun via Tarantool-patches
2023-03-06 15:29     ` Maxim Kokryashkin via Tarantool-patches
2023-03-10 14:49 ` Igor Munkin via Tarantool-patches
2023-03-30 17:37 ` Igor Munkin via Tarantool-patches

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox