Sergey,

thanks for the patch! See my comments below.

Sergey

On 22.04.2024 11:49, Sergey Kaplun wrote:

From: Mike Pall <mike>

Thanks to Sergey Kaplun.

(cherry picked from commit b8b49bf3954b23e32e34187a6ada00021c26e172)

The previous commit doesn't handle the case when the error code is
`LUA_ERRMEM`. This patch adds a workaround by using the generic error
message.

Sergey Kaplun:
* added the description and the test for the problem

Part of tarantool/tarantool#9924
---
 src/lj_ffrecord.c                             |  2 +
 .../lj-1166-error-stitch-oom-ir-buff.test.lua | 41 ++++++++++++++++++-
 ...j-1166-error-stitch-oom-snap-buff.test.lua | 37 +++++++++++++++--
 3 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index ff14e9e4..d5fc081e 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -150,6 +150,8 @@ static void recff_stitch(jit_State *J)
   if (errcode) {
     if (errcode == LUA_ERRRUN)
       copyTV(L, L->top-1, L->top + (1 + LJ_FR2));
+    else
+      setintV(L->top-1, (int32_t)LJ_TRERR_RECERR);
     lj_err_throw(L, errcode);  /* Propagate errors. */
   }
 }
diff --git a/test/tarantool-tests/lj-1166-error-stitch-oom-ir-buff.test.lua b/test/tarantool-tests/lj-1166-error-stitch-oom-ir-buff.test.lua
index e3a5397d..cf3ab0f5 100644
--- a/test/tarantool-tests/lj-1166-error-stitch-oom-ir-buff.test.lua
+++ b/test/tarantool-tests/lj-1166-error-stitch-oom-ir-buff.test.lua
@@ -10,10 +10,18 @@ local test = tap.test('lj-1166-error-stitch-oom-snap-buff'):skipcond({
   ['Disabled on *BSD due to #4819'] = jit.os == 'BSD',
 })
 
-test:plan(1)
-
+local jparse = require('utils').jit.parse
 local mockalloc = require('mockalloc')
 
+local IS_DUALNUM = tostring(tonumber('-0')) ~= tostring(-0)
+
+-- XXX: Avoid other traces compilation due to hotcount collisions
+-- for predictable results.
+jit.off()
+jit.flush()
+
+test:plan(2)
+
 local function create_chunk(n_slots)
   local chunk = ''
   for i = 1, n_slots do
@@ -33,6 +41,10 @@ end
 -- XXX: amount of slots is empirical.
 local tracef = assert(loadstring(create_chunk(175)))
 
+-- We only need the abort reason in the test.
+jparse.start('t')
+
+jit.on()
 jit.opt.start('hotloop=1', '-loop', '-fold')
 
 mockalloc.mock()
@@ -41,6 +53,31 @@ tracef()
 
 mockalloc.unmock()
 
+local _, aborted_traces = jparse.finish()
+
+jit.off()
+
 test:ok(true, 'stack is balanced')
 
+-- Tarantool may compile traces on the startup. These traces
+-- already exceed the maximum IR amount before the trace in this
+-- test is compiled. Hence, there is no need to reallocate the IR
+-- buffer, so the check for the IR size is not triggered.
+test:skipcond({
+  -- luacheck: no global

I made a patch that remove inline suppressions [1].

I propose to merge it and remove inline suppressions in your patch series too.

[1]: https://lists.tarantool.org/tarantool-patches/88eab16fca9056a057df5506a0af637c8d4a0ffd.1717682341.git.sergeyb@tarantool.org/T/#u

+  ['Impossible to predict the number of IRs for Tarantool'] = _TARANTOOL,
+  -- The amount of IR for traces is different for non x86/x64
+  -- arches and DUALNUM mode.
+  ['Disabled for non-x86_64 arches'] = jit.arch ~= 'x64' and jit.arch ~= 'x86',
+  ['Disabled for DUALNUM mode'] = IS_DUALNUM,
+})
+
+assert(aborted_traces and aborted_traces[1], 'aborted trace is persisted')
+
+-- We tried to compile only one trace.
+local reason = aborted_traces[1][1].abort_reason
+
+test:like(reason, 'error thrown or hook called during recording',
+          'abort reason is correct')
+
 test:done(true)
diff --git a/test/tarantool-tests/lj-1166-error-stitch-oom-snap-buff.test.lua b/test/tarantool-tests/lj-1166-error-stitch-oom-snap-buff.test.lua
index 8d671f8d..8bbdd96b 100644
--- a/test/tarantool-tests/lj-1166-error-stitch-oom-snap-buff.test.lua
+++ b/test/tarantool-tests/lj-1166-error-stitch-oom-snap-buff.test.lua
@@ -10,10 +10,16 @@ local test = tap.test('lj-1166-error-stitch-oom-snap-buff'):skipcond({
   ['Disabled on *BSD due to #4819'] = jit.os == 'BSD',
 })
 
-test:plan(1)
-
+local jparse = require('utils').jit.parse
 local mockalloc = require('mockalloc')
 
+-- XXX: Avoid other traces compilation due to hotcount collisions
+-- for predictable results.
+jit.off()
+jit.flush()
+
+test:plan(2)
+
 local function create_chunk(n_conds)
   local chunk = ''
   chunk = chunk .. 'for i = 1, 2 do\n'
@@ -27,6 +33,7 @@ local function create_chunk(n_conds)
   return chunk
 end
 
+jit.on()
 -- XXX: Need to compile the cycle in the `create_chunk()` to
 -- preallocate the snapshot buffer.
 jit.opt.start('hotloop=1', '-loop', '-fold')
@@ -38,9 +45,11 @@ local tracef = assert(loadstring(create_chunk(6)))
 jit.off()
 jit.flush()
 
+-- We only need the abort reason in the test.
+jparse.start('t')

Same comment as in previous mail - let's add a comment regarding 't'.

+
 -- XXX: Update hotcounts to avoid hash collisions.
 jit.opt.start('hotloop=1')
-
 jit.on()
 
 mockalloc.mock()
@@ -49,6 +58,28 @@ tracef()
 
 mockalloc.unmock()

Same comment as in previous mail - let's avoid name 'mock' here.

 
+local _, aborted_traces = jparse.finish()
+
+jit.off()
+
 test:ok(true, 'stack is balanced')
 
+-- Tarantool may compile traces on the startup. These traces
+-- already exceed the maximum snapshot amount before the trace in
+-- this test is compiled. Hence, there is no need to reallocate
+-- the snapshot buffer, so the check for the snap size is not
+-- triggered.
+test:skipcond({
+  -- luacheck: no global
+  ['Impossible to predict the number of snapshots for Tarantool'] = _TARANTOOL,
+})
+
+assert(aborted_traces and aborted_traces[1], 'aborted trace is persisted')
+
+-- We tried to compile only one trace.
+local reason = aborted_traces[1][1].abort_reason
+
+test:like(reason, 'error thrown or hook called during recording',
+          'abort reason is correct')
+
 test:done(true)