[Tarantool-patches] [PATCH luajit v1 4/5] Handle all types of errors during trace stitching.
Sergey Bronnikov
sergeyb at tarantool.org
Thu Jun 6 16:03:11 MSK 2024
Hi, Sergey,
thanks for the patch! Please see my comments.
On 22.04.2024 11:49, Sergey Kaplun wrote:
> From: Mike Pall <mike>
>
> Thanks to Sergey Kaplun and Peter Cawley.
>
> (cherry picked from commit d06beb0480c5d1eb53b3343e78063950275aa281)
>
> This commit is a follow-up for the commit
> 1b8216023d5a79814389f1c1affef27c15d9de27 ("Throw any errors before stack
> changes in trace stitching."). The patch prepends failures for the
> specific error to be thrown. Nevertheless, the error may be thrown due
> to retrying trace recording in the case when table bump optimization
> is enabled or when OOM is observed during reallocation of the snapshot
> or IR buffers.
>
> This patch adds the corresponding protected frame and rethrows the error
> after a fixup of the stack.
>
> This patch also tests the correctness of copying the error message to
> the top of the stack to get a valid "abort" reason in the `jit.dump`
> utility.
>
> Also, this patch fixes a non-ASCII space character in the comment for
> <lj-720-errors-before-stitch.test.lua>.
>
> Sergey Kaplun:
> * added the description and the test for the problem
>
> Part of tarantool/tarantool#9924
> ---
> src/lj_ffrecord.c | 21 ++++++--
> test/tarantool-tests/CMakeLists.txt | 1 +
> .../lj-1166-error-stitch-oom-ir-buff.test.lua | 46 ++++++++++++++++
> ...j-1166-error-stitch-oom-snap-buff.test.lua | 54 +++++++++++++++++++
> .../lj-1166-error-stitch-table-bump.test.lua | 38 +++++++++++++
> .../lj-1166-error-stitch/CMakeLists.txt | 1 +
> .../lj-1166-error-stitch/mockalloc.c | 51 ++++++++++++++++++
> .../lj-720-errors-before-stitch.test.lua | 40 +++++++++++++-
> 8 files changed, 245 insertions(+), 7 deletions(-)
> create mode 100644 test/tarantool-tests/lj-1166-error-stitch-oom-ir-buff.test.lua
> create mode 100644 test/tarantool-tests/lj-1166-error-stitch-oom-snap-buff.test.lua
> create mode 100644 test/tarantool-tests/lj-1166-error-stitch-table-bump.test.lua
> create mode 100644 test/tarantool-tests/lj-1166-error-stitch/CMakeLists.txt
> create mode 100644 test/tarantool-tests/lj-1166-error-stitch/mockalloc.c
>
> diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
> index e3ed80fb..ff14e9e4 100644
> --- a/src/lj_ffrecord.c
> +++ b/src/lj_ffrecord.c
> @@ -96,6 +96,14 @@ static ptrdiff_t results_wanted(jit_State *J)
> return -1;
> }
>
> +static TValue *rec_stop_stitch_cp(lua_State *L, lua_CFunction dummy, void *ud)
> +{
> + jit_State *J = (jit_State *)ud;
> + lj_record_stop(J, LJ_TRLINK_STITCH, 0);
> + UNUSED(L); UNUSED(dummy);
> + return NULL;
> +}
> +
> /* Trace stitching: add continuation below frame to start a new trace. */
> static void recff_stitch(jit_State *J)
> {
> @@ -106,10 +114,7 @@ static void recff_stitch(jit_State *J)
> TValue *nframe = base + 1 + LJ_FR2;
> const BCIns *pc = frame_pc(base-1);
> TValue *pframe = frame_prevl(base-1);
> -
> - /* Check for this now. Throwing in lj_record_stop messes up the stack. */
> - if (J->cur.nsnap >= (MSize)J->param[JIT_P_maxsnap])
> - lj_trace_err(J, LJ_TRERR_SNAPOV);
> + int errcode;
>
> /* Move func + args up in Lua stack and insert continuation. */
> memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
> @@ -134,13 +139,19 @@ static void recff_stitch(jit_State *J)
> J->baseslot += 2 + LJ_FR2;
> J->framedepth++;
>
> - lj_record_stop(J, LJ_TRLINK_STITCH, 0);
> + errcode = lj_vm_cpcall(L, NULL, J, rec_stop_stitch_cp);
>
> /* Undo Lua stack changes. */
> memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot);
> setframe_pc(base-1, pc);
> L->base -= 2 + LJ_FR2;
> L->top -= 2 + LJ_FR2;
> +
> + if (errcode) {
> + if (errcode == LUA_ERRRUN)
> + copyTV(L, L->top-1, L->top + (1 + LJ_FR2));
> + lj_err_throw(L, errcode); /* Propagate errors. */
> + }
> }
>
> /* Fallback handler for fast functions that are not recorded (yet). */
> diff --git a/test/tarantool-tests/CMakeLists.txt b/test/tarantool-tests/CMakeLists.txt
> index 56660932..d7c96078 100644
> --- a/test/tarantool-tests/CMakeLists.txt
> +++ b/test/tarantool-tests/CMakeLists.txt
> @@ -39,6 +39,7 @@ add_subdirectory(lj-802-panic-at-mcode-protfail)
> add_subdirectory(lj-flush-on-trace)
> add_subdirectory(lj-1004-oom-error-frame)
> add_subdirectory(lj-1066-fix-cur_L-after-coroutine-resume)
> +add_subdirectory(lj-1166-error-stitch)
>
> # The part of the memory profiler toolchain is located in tools
> # directory, jit, profiler, and bytecode toolchains are located
> diff --git a/test/tarantool-tests/lj-1166-error-stitch-oom-ir-buff.test.lua b/test/tarantool-tests/lj-1166-error-stitch-oom-ir-buff.test.lua
> new file mode 100644
> index 00000000..e3a5397d
> --- /dev/null
> +++ b/test/tarantool-tests/lj-1166-error-stitch-oom-ir-buff.test.lua
> @@ -0,0 +1,46 @@
> +local tap = require('tap')
> +
> +-- Test file to demonstrate unbalanced Lua stack after instruction
> +-- recording due to throwing an error at recording of a stitched
> +-- function.
> +-- See also:https://github.com/LuaJIT/LuaJIT/issues/1166.
> +
> +local test = tap.test('lj-1166-error-stitch-oom-snap-buff'):skipcond({
should a name in tap.test match to test file name?
now it is not.
> + ['Test requires JIT enabled'] = not jit.status(),
> + ['Disabled on *BSD due to #4819'] = jit.os == 'BSD',
> +})
> +
> +test:plan(1)
> +
> +local mockalloc = require('mockalloc')
> +
> +local function create_chunk(n_slots)
I would add a comment like this:
--- a/test/tarantool-tests/lj-1166-error-stitch-oom-ir-buff.test.lua
+++ b/test/tarantool-tests/lj-1166-error-stitch-oom-ir-buff.test.lua
@@ -14,6 +14,18 @@ test:plan(1)
local mockalloc = require('mockalloc')
+-- Generate a Lua chunk like below:
+-- local s1
+-- local s2
+-- ...
+-- local sN
+-- for i = 1, 2 do
+-- s1 = i + 1
+-- s2 = i + 2
+-- ...
+-- sN = i + N
+-- math.modf(1)
+-- end
local function create_chunk(n_slots)
local chunk = ''
for i = 1, n_slots do
> + local chunk = ''
> + for i = 1, n_slots do
> + chunk = chunk .. ('local s%d\n'):format(i)
> + end
> + chunk = chunk .. 'for i = 1, 2 do\n'
> + -- Generate additional IR instructions.
> + for i = 1, n_slots do
> + chunk = chunk .. (' s%d = i + %d\n'):format(i, i)
> + end
> + -- `math.modf()` recording is NYI.
> + chunk = chunk .. ' math.modf(1)\n'
> + chunk = chunk .. 'end\n'
> + return chunk
> +end
> +
> +-- XXX: amount of slots is empirical.
> +local tracef = assert(loadstring(create_chunk(175)))
> +
> +jit.opt.start('hotloop=1', '-loop', '-fold')
> +
> +mockalloc.mock()
> +
> +tracef()
> +
> +mockalloc.unmock()
> +
> +test:ok(true, 'stack is balanced')
> +
> +test:done(true)
> diff --git a/test/tarantool-tests/lj-1166-error-stitch-oom-snap-buff.test.lua b/test/tarantool-tests/lj-1166-error-stitch-oom-snap-buff.test.lua
> new file mode 100644
> index 00000000..8d671f8d
> --- /dev/null
> +++ b/test/tarantool-tests/lj-1166-error-stitch-oom-snap-buff.test.lua
> @@ -0,0 +1,54 @@
> +local tap = require('tap')
> +
> +-- Test file to demonstrate unbalanced Lua stack after instruction
> +-- recording due to throwing an error at recording of a stitched
> +-- function.
> +-- See also:https://github.com/LuaJIT/LuaJIT/issues/1166.
> +
> +local test = tap.test('lj-1166-error-stitch-oom-snap-buff'):skipcond({
> + ['Test requires JIT enabled'] = not jit.status(),
> + ['Disabled on *BSD due to #4819'] = jit.os == 'BSD',
> +})
> +
> +test:plan(1)
> +
> +local mockalloc = require('mockalloc')
> +
> +local function create_chunk(n_conds)
the same as above: please add a comment with an example of generated Lua
chunk
> + local chunk = ''
> + chunk = chunk .. 'for i = 1, 2 do\n'
> + -- Each condition adds additional snapshot.
> + for i = 1, n_conds do
> + chunk = chunk .. (' if i < %d then end\n'):format(i + n_conds)
> + end
> + -- `math.modf()` recording is NYI.
> + chunk = chunk .. ' math.modf(1)\n'
> + chunk = chunk .. 'end\n'
> + return chunk
> +end
> +
> +-- XXX: Need to compile the cycle in the `create_chunk()` to
> +-- preallocate the snapshot buffer.
> +jit.opt.start('hotloop=1', '-loop', '-fold')
> +
> +-- XXX: Amount of slots is empirical.
> +local tracef = assert(loadstring(create_chunk(6)))
> +
> +-- XXX: Remove previous trace.
> +jit.off()
> +jit.flush()
> +
> +-- XXX: Update hotcounts to avoid hash collisions.
> +jit.opt.start('hotloop=1')
> +
> +jit.on()
> +
> +mockalloc.mock()
> +
> +tracef()
> +
> +mockalloc.unmock()
> +
> +test:ok(true, 'stack is balanced')
> +
> +test:done(true)
> diff --git a/test/tarantool-tests/lj-1166-error-stitch-table-bump.test.lua b/test/tarantool-tests/lj-1166-error-stitch-table-bump.test.lua
> new file mode 100644
> index 00000000..f2453bbe
> --- /dev/null
> +++ b/test/tarantool-tests/lj-1166-error-stitch-table-bump.test.lua
this test is not failed after reverting patch
> @@ -0,0 +1,38 @@
> +local tap = require('tap')
> +
> +-- Test file to demonstrate unbalanced Lua stack after instruction
> +-- recording due to throwing an error at recording of a stitched
> +-- function. The test fails with LUAJIT_ENABLE_TABLE_BUMP enabled.
> +-- See also:
> +-- *https://github.com/LuaJIT/LuaJIT/issues/606,
> +-- *https://github.com/LuaJIT/LuaJIT/issues/1166.
> +
> +local test = tap.test('lj-1166-error-stitch-table-bump'):skipcond({
> + ['Test requires JIT enabled'] = not jit.status(),
> +})
> +
> +test:plan(1)
> +
> +-- `math.modf` recording is NYI.
> +-- Local `modf` simplifies `jit.dump()` output.
> +local modf = math.modf
> +
> +jit.opt.start('hotloop=1')
> +
> +-- luacheck: no unused
> +local t
> +-- There is no need to run the trace itself. Just check the
> +-- correctness of a recording.
> +for i = 1, 2 do
> + t = {}
> + -- Cause table rehashing to trigger table bump optimization.
> + t[i] = i
> + -- Forcify stitch. This will throw an error at the end of
> + -- recording, since trace recording should be retried after
> + -- bytecode updating.
> + modf(1)
> +end
> +
> +test:ok(true, 'stack is balanced')
> +
> +test:done(true)
> diff --git a/test/tarantool-tests/lj-1166-error-stitch/CMakeLists.txt b/test/tarantool-tests/lj-1166-error-stitch/CMakeLists.txt
> new file mode 100644
> index 00000000..1ebf253b
> --- /dev/null
> +++ b/test/tarantool-tests/lj-1166-error-stitch/CMakeLists.txt
> @@ -0,0 +1 @@
> +BuildTestCLib(mockalloc mockalloc.c)
> diff --git a/test/tarantool-tests/lj-1166-error-stitch/mockalloc.c b/test/tarantool-tests/lj-1166-error-stitch/mockalloc.c
> new file mode 100644
> index 00000000..d6d3492e
> --- /dev/null
> +++ b/test/tarantool-tests/lj-1166-error-stitch/mockalloc.c
> @@ -0,0 +1,51 @@
> +#include "lua.h"
> +#include "lauxlib.h"
> +
> +#undef NDEBUG
> +#include <assert.h>
> +
> +static lua_Alloc old_allocf = NULL;
> +static void *old_alloc_state = NULL;
> +
> +/* Function to be used instead of the default allocator. */
> +static void *mock_allocf(void *ud, void *ptr, size_t osize, size_t nsize)
> +{
> + assert(old_allocf != NULL);
> + /*
> + * Check the specific reallocation related to the IR
> + * buffer or the snapshot buffer.
> + */
> + if (osize * 2 == nsize)
> + return NULL;
> + return old_allocf(ud, ptr, osize, nsize);
> +}
> +
> +static int mock(lua_State *L)
It is actually not a test mock.
According to definition [1] test mock imitate a behavior of a real object.
Your memory allocator behaves as a real allocator, but in some cases it
will return
a NULL instead of memory address. What if we rename "mock" to "allocator
with fault injection"?
1. https://www.martinfowler.com/articles/mocksArentStubs.html
> +{
> + assert(old_allocf == NULL);
> + old_allocf = lua_getallocf(L, &old_alloc_state);
> + lua_setallocf(L, mock_allocf, old_alloc_state);
> + return 0;
> +}
> +
> +static int unmock(lua_State *L)
> +{
> + assert(old_allocf != NULL);
> + assert(old_allocf != mock_allocf);
> + lua_setallocf(L, old_allocf, old_alloc_state);
> + old_allocf = NULL;
> + old_alloc_state = NULL;
> + return 0;
> +}
> +
> +static const struct luaL_Reg mockalloc[] = {
> + {"mock", mock},
> + {"unmock", unmock},
> + {NULL, NULL}
> +};
> +
> +LUA_API int luaopen_mockalloc(lua_State *L)
> +{
> + luaL_register(L, "mockalloc", mockalloc);
> + return 1;
> +}
> diff --git a/test/tarantool-tests/lj-720-errors-before-stitch.test.lua b/test/tarantool-tests/lj-720-errors-before-stitch.test.lua
> index d750b721..6e8f70c2 100644
> --- a/test/tarantool-tests/lj-720-errors-before-stitch.test.lua
> +++ b/test/tarantool-tests/lj-720-errors-before-stitch.test.lua
> @@ -1,13 +1,27 @@
> local tap = require('tap')
> local test = tap.test('lj-720-errors-before-stitch'):skipcond({
> ['Test requires JIT enabled'] = not jit.status(),
> + ['Disabled on *BSD due to #4819'] = jit.os == 'BSD',
> })
> -test:plan(1)
>
> --- `math.modf` recording is NYI.
> +local jparse = require('utils').jit.parse
> +
> +-- `math.modf` recording is NYI.
> -- Local `modf` simplifies `jit.dump()` output.
> local modf = math.modf
> +
> +-- XXX: Avoid other traces compilation due to hotcount collisions
> +-- for predictable results.
> +jit.off()
> +jit.flush()
> +
> +test:plan(2)
> +
> +-- We only need the abort reason in the test.
> +jparse.start('t')
> +
> jit.opt.start('hotloop=1', 'maxsnap=1')
> +jit.on()
>
> -- The loop has only two iterations: the first to detect its
> -- hotness and the second to record it. The snapshot limit is
> @@ -17,5 +31,27 @@ for _ = 1, 2 do
> modf(1.2)
> end
>
> +local _, aborted_traces = jparse.finish()
> +
> +jit.off()
> +
> test:ok(true, 'stack is balanced')
> +
> +-- Tarantool may compile traces on the startup. These traces
> +-- already exceed the maximum snapshot amount we set after they
> +-- are compiled. Hence, there is no need to reallocate the
> +-- snapshot buffer, so the check for the snap size is not
> +-- triggered.
> +test:skipcond({
> + -- luacheck: no global
> + ['Impossible to predict the number of snapshots for Tarantool'] = _TARANTOOL,
> +})
> +
> +assert(aborted_traces and aborted_traces[1], 'aborted trace is persisted')
> +
> +-- We tried to compile only one trace.
> +local reason = aborted_traces[1][1].abort_reason
> +
> +test:like(reason, 'too many snapshots', 'abort reason is correct')
> +
> test:done(true)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.tarantool.org/pipermail/tarantool-patches/attachments/20240606/af7ced20/attachment.htm>
More information about the Tarantool-patches
mailing list