[Tarantool-patches] [PATCH luajit v2] x64: Fix 64 bit shift code generation.

Maxim Kokryashkin max.kokryashkin at gmail.com
Tue Jun 13 15:42:36 MSK 2023


From: Mike Pall <mike>

Reported by Philipp Kutin.
Fix contributed by Peter Cawley.

(cherry-picked from commit 03a7ebca4f6819658cdaa12ba3af54a17b8035e9)

In a situation where a variable shift left bitwise rotation that
has a 64-bit result is recorded on an x86 64-bit processor and
the result is supposed to end up in the `rcx` register, that value
could be written into the `ecx` instead, thus being truncated into
32 bits. This patch fixes the described behavior, so now that
value is written into the `rcx`.

Resulting assembly changes from the following before the patch:
| rol rsi, cl
| mov ecx, esi

to the following after the patch:
| rol rsi, cl
| mov rcx, rsi

Importantly, the same behavior is impossible with the right
rotation on machines with BMI2 support because there is a
BMI2 instruction for it, so it is handled differently.

Maxim Kokryashkin:
* added the description and the test for the problem

Part of tarantool/tarantool#8516
---
Changes in v2:
- Fixed comments as per review by Sergey

Branch: https://github.com/tarantool/luajit/tree/fckxorg/fix-bit-shift-generation
PR: https://github.com/tarantool/tarantool/pull/8727

 src/lj_asm_x86.h                              |  2 +-
 test/tarantool-tests/CMakeLists.txt           |  1 +
 .../fix-bit-shift-generation.test.lua         | 48 +++++++++++++++++++
 .../fix-bit-shift-generation/CMakeLists.txt   |  1 +
 .../libtestbitshift.c                         |  8 ++++
 5 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 test/tarantool-tests/fix-bit-shift-generation.test.lua
 create mode 100644 test/tarantool-tests/fix-bit-shift-generation/CMakeLists.txt
 create mode 100644 test/tarantool-tests/fix-bit-shift-generation/libtestbitshift.c

diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index e6c42c6d..63d332ca 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -2328,7 +2328,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv)
     dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
     if (dest == RID_ECX) {
       dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX));
-      emit_rr(as, XO_MOV, RID_ECX, dest);
+      emit_rr(as, XO_MOV, REX_64IR(ir, RID_ECX), dest);
     }
     right = irr->r;
     if (ra_noreg(right))
diff --git a/test/tarantool-tests/CMakeLists.txt b/test/tarantool-tests/CMakeLists.txt
index a428d009..d36271f1 100644
--- a/test/tarantool-tests/CMakeLists.txt
+++ b/test/tarantool-tests/CMakeLists.txt
@@ -54,6 +54,7 @@ macro(BuildTestCLib lib sources)
 endmacro()
 
 add_subdirectory(ffi-ccall)
+add_subdirectory(fix-bit-shift-generation)
 add_subdirectory(gh-4427-ffi-sandwich)
 add_subdirectory(gh-5813-resolving-of-c-symbols/both)
 add_subdirectory(gh-5813-resolving-of-c-symbols/hash)
diff --git a/test/tarantool-tests/fix-bit-shift-generation.test.lua b/test/tarantool-tests/fix-bit-shift-generation.test.lua
new file mode 100644
index 00000000..e3f30eae
--- /dev/null
+++ b/test/tarantool-tests/fix-bit-shift-generation.test.lua
@@ -0,0 +1,48 @@
+local tap = require('tap')
+local test = tap.test('fix-bit-shift-generation'):skipcond({
+  ['Test requires JIT enabled'] = not jit.status(),
+})
+
+local NTESTS = 4
+
+test:plan(NTESTS)
+
+local ffi = require('ffi')
+local bit = require('bit')
+local rol = bit.rol
+local shl = bit.lshift
+
+local testbitshift = ffi.load('testbitshift')
+ffi.cdef[[
+uint64_t
+testbitshift
+(const int arg1, const int arg2, const int arg3, const uint64_t arg4)
+]]
+
+local result = {}
+jit.opt.start('hotloop=1')
+
+for i = 1, NTESTS do
+  -- The rotation is performed beyond the 32-bit size, for
+  -- truncation to become noticeable. `testbitshift` is used to
+  -- ensure that the result of rotation goes into the `rcx`,
+  -- corresponding to the x86_64 ABI. Although it is possible to
+  -- use a function from the C standard library for that, all of
+  -- the suitable ones are variadic, and variadics are recorded
+  -- incorrectly on Apple Silicon.
+  result[i] = testbitshift.testbitshift(1, 1, 1, rol(1ULL, i + 32))
+  -- Resulting assembly for the `rol` instruction above changes
+  -- from the following before the patch:
+  -- | rol rsi, cl
+  -- | mov ecx, esi
+  --
+  -- to the following after the patch:
+  -- | rol rsi, cl
+  -- | mov rcx, rsi
+end
+
+for i = 1, NTESTS do
+  test:ok(result[i] == shl(1ULL, i + 32), 'valid rol')
+end
+
+os.exit(test:check() and 0 or 1)
diff --git a/test/tarantool-tests/fix-bit-shift-generation/CMakeLists.txt b/test/tarantool-tests/fix-bit-shift-generation/CMakeLists.txt
new file mode 100644
index 00000000..f85f875b
--- /dev/null
+++ b/test/tarantool-tests/fix-bit-shift-generation/CMakeLists.txt
@@ -0,0 +1 @@
+BuildTestCLib(libtestbitshift libtestbitshift.c)
diff --git a/test/tarantool-tests/fix-bit-shift-generation/libtestbitshift.c b/test/tarantool-tests/fix-bit-shift-generation/libtestbitshift.c
new file mode 100644
index 00000000..0785ebba
--- /dev/null
+++ b/test/tarantool-tests/fix-bit-shift-generation/libtestbitshift.c
@@ -0,0 +1,8 @@
+#include <stdint.h>
+
+uint64_t
+testbitshift
+(const int arg1, const int arg2, const int arg3, const uint64_t arg4)
+{
+	return arg4;
+}
-- 
2.40.1



More information about the Tarantool-patches mailing list