Hello, Sergey,


thanks for the comments. See my comments below.

Everything is fixed and force-pushed to the branch.

Sergey

On 05.02.2025 12:48, Sergey Kaplun via Tarantool-patches wrote:
Hi, Segey!
Thanks for the patch!
It is really nice that we covered all branches, nice work!

LGTM, after fixing a bunch of nits below.

Side note: it is better to send v2 in the separate letter to glance it
if necessary.

On 29.01.25, Sergey Bronnikov wrote:
From: Mike Pall <mike>

Thanks to Sergey Bronnikov and Peter Cawley.

(cherry picked from commit b138ccfa918518a152bc830fef3d53cd0a922e36)

When recording a trace using a Lua code like
`repeat until `a >= 'b' > 'b'` a Lua error is encountered
(`attempt to compare string with nil`), which (along with
raising the error) causes an asynchronous trace abort.
The trace abort remains pending until the call of
`lua_pcall(L, 2, 0, 0)` that actually calls `jit.attach(trace_cb, nil)`.
I would rather say:
| ... until the next reentrance to the LuaJIT VM via `lj_vm_call()` or
| `lj_vm_pcall().
Updated.

Also, please add this comment to the C test header to make it clear why
do we use additional `lua_pcall()`-s with dummy functions.
Done.

      
On handling abort LuaJIT is searching for the topmost Lua frame
on the stack, that was active when the trace abort happened,
it is needed to generate a better error message.
Unfortunately, because the abort was due to an error, and
the error was caught by a `lua_pcall` with unspecified
Typo: s/unspecified/an unspecified/
Fixed.

      
error function (4th argument), the Lua frame that caused the abort
was already removed as part of error processing, so the search
cannot find it. Furthermore, in this particular case, there are
no Lua frames on the stack, which isn't something the search code
had considered possible.

Sergey Bronnikov:
* added the description and the test for the problem

Part of tarantool/tarantool#10709
This is 11055 ticket now.
Fixed, thanks!

---
Branch: https://github.com/tarantool/luajit/tree/ligurio/lj-1087-vm-handler-call

Related issues:
- https://github.com/LuaJIT/LuaJIT/issues/1087
- https://github.com/tarantool/tarantool/issues/10709

 src/lj_trace.c                                |  26 +--
 test/tarantool-c-tests/CMakeLists.txt         |   4 +
 .../lj-1087-vm-handler-call.test.c            | 170 ++++++++++++++++++
 3 files changed, 190 insertions(+), 10 deletions(-)
 create mode 100644 test/tarantool-c-tests/lj-1087-vm-handler-call.test.c

diff --git a/src/lj_trace.c b/src/lj_trace.c
index 94cb27e5..6b97cc13 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
<snipped>

diff --git a/test/tarantool-c-tests/CMakeLists.txt b/test/tarantool-c-tests/CMakeLists.txt
index c4a402d0..0e7c4698 100644
--- a/test/tarantool-c-tests/CMakeLists.txt
+++ b/test/tarantool-c-tests/CMakeLists.txt
@@ -36,6 +36,10 @@ add_test_suite_target(tarantool-c-tests
   DEPENDS libluajit libtest tarantool-c-tests-build
 )
 
+if(NOT LUAJIT_DISABLE_JIT)
+  AppendFlags(TESTS_C_FLAGS "-DLJ_HASJIT")
+endif(NOT LUAJIT_DISABLE_JIT)
+
It is better just to include <lj_arch.h> in the test instead. See the example
of usage in the <misclib-getmetrics-capi.test.c>. Therefore this part is
excess.
Fixed.

      
 set(CTEST_SRC_SUFFIX ".test.c")
 file(GLOB tests "${CMAKE_CURRENT_SOURCE_DIR}/*${CTEST_SRC_SUFFIX}")
 foreach(test_source ${tests})
diff --git a/test/tarantool-c-tests/lj-1087-vm-handler-call.test.c b/test/tarantool-c-tests/lj-1087-vm-handler-call.test.c
new file mode 100644
index 00000000..b7db9857
--- /dev/null
+++ b/test/tarantool-c-tests/lj-1087-vm-handler-call.test.c
@@ -0,0 +1,170 @@
+#include <assert.h>
It is better to undef the `NDEBUG` macro for the non-Debug build to
avoid hidden errors in the tests. See other C tests for examples.

Fixed.
+#include <stdbool.h>
This include looks excess.

Removed.



+
+#include "lua.h"
+#include "lauxlib.h"
+
+#include "test.h"
+#include "utils.h"
+
+/*
+ * Test file to demonstrate a segmentation fault under
+ * AddressSanitizer, when C function is used as a VM handler
I suppose ASan isn't required to reproduce the issue.
Removed.

      
+ * in LuaJIT:
+ *
+ * Program received signal SIGSEGV, Segmentation fault.
+ * 0x000055555557e77d in trace_abort (J=0x7ffff7f9b6b8) at lj_trace.c:615
+ * 615         lj_vmevent_send(L, TRACE,
+ * (gdb) bt
+ *
+ * See details in https://github.com/LuaJIT/LuaJIT/issues/1087.
+ */
+
+#define UNUSED(x) ((void)(x))
Friendly reminder: You may rebase on the current master and not define
this macro here.
Done.

+
Minor: Please add the comment that `event == NULL` disables the
corresponding handler.

Added.



+static void jit_attach(lua_State *L, void *cb, const char *event)
Nit: Why don't you use `lua_CFunction` here to avoid the cast to it
below?
Fixed.

+{
+	lua_getglobal(L, "jit");
+	lua_getfield(L, -1, "attach");
+	lua_pushcfunction(L, (lua_CFunction)cb);
+	if (event != NULL) {
Nit: Brackets are excess.
Feel free to ignore.

Removed, anyway it is a test code and without braces it looks more clean.

However, I'm always enclosing statements in loop/if/else in braces,

because I remember about "goto fail" issue [1].


1. https://dwheeler.com/essays/apple-goto-fail.html


      
+		lua_pushstring(L, event);
+	} else {
+		lua_pushnil(L);
+	}
+	lua_pcall(L, 2, 0, 0);
+}
+
+static int trace_cb(lua_State *L) {
+	UNUSED(L);
+	return 0;
+}
+
+static int handle_luafunc_frame(void *test_state)
+{
<snipped>

+}
+
+static int nop(lua_State *L)
+{
Looks like here should be `UNUSED(L)` too.
Added.

+	return 0;
+}
This function is similar to the `trace_cb()` maybe we just use `nop()`
as a trace callback always?
Good idea! Removed `trace_cb()` and use `nop()` instead.

      
+
+static int cframe(lua_State *L)
+{
+	int rc = luaL_dostring(L, "repeat until nil > 1");
+	assert(rc == 1);
+	lua_pop(L, 1);		/* Remove errmsg. */
Nit: Please move the comment to the previous line.

Fixed.



      
+
+	lua_pushcfunction(L, nop);
+	lua_call(L, 0, 0);
+
+	return 0;
+}
+
+static int handle_c_frame(void *test_state)
+{
+	/* Setup. */
+	lua_State *L = test_state;
+	jit_attach(L, (void *)trace_cb, "trace");
+
+	lua_pushcfunction(L, cframe);
+	lua_call(L, 0, 0);
+
+	/* Teardown. */
+	lua_settop(L, 0);
+
+	return TEST_EXIT_SUCCESS;
+}
+
+static int global_f(lua_State* L)
+{
+	lua_pushstring(L, "М");
+	lua_pushstring(L, "И");
+	lua_pushstring(L, "Р");
Please use only ASCII characters in the strings.
Ok, replaced with "L", "U", "A". (later removed func at all, see next comment)

+	lua_concat(L, 3);
All this content looks excess, may we just use `nop()` as a `global_f()`
instead?
Right, replaced with "nop()".

      
+
+	return 1;
+}
+
+static int handle_cont_frame(void *test_state)
+{
+	const char lua_chunk[] =
+		"local t = setmetatable({}, { __index = global_f })"
+		"for i = 1, 4 do"
+		"    _ = t[1]"
+		"end";
+
+	/* Setup. */
+	lua_State *L = test_state;
+	jit_attach(L, (void *)trace_cb, "trace");
+	int res = luaL_dostring(L, "jit.opt.start('minstitch=16')");
It is better to use the 32767 (REF_DROP - REF_BIAS) instead of 16 (with
the corresponding comment). See the commit
0fdf06b456e6a8492308e53c96316a25eb058f30 ("test: relax JIT setup in
misc.getmetrics test") for the details.
Fixed.

+	assert(res == 0);
+	lua_pushcfunction(L, global_f);
+	lua_setglobal(L, "global_f");
+
+	res = luaL_loadstring(L, lua_chunk);
+	assert(res == 0);
+	lua_pcall(L, 0, LUA_MULTRET, 0);
+
+	/* Teardown. */
+	lua_settop(L, 0);
It is better to restore the default minstitch value here too.
Fixed, restored to 0.

+
+	return TEST_EXIT_SUCCESS;
+}
+
+static int handle_bottom_frame(void *test_state)
+{
+	lua_State *L = test_state;
+
+	/* Attach VM call handler. */
+	jit_attach(L, (void *)trace_cb, "trace");
+
+	/* Load a Lua code that generate a trace abort. */
+	int rc = luaL_dostring(L, "repeat until nil > 1");
+	assert(rc == 1);
+
+	/* Triggers segmentation fault. */
+	jit_attach(L, (void *)trace_cb, NULL);
+
+	/* Teardown. */
+	lua_settop(L, 0);
+
+	return TEST_EXIT_SUCCESS;
+}
+
+int main(void)
+{
+#if !defined(LJ_HASJIT)
It is better to use early return here `if (!LJ_HASJIT)`.

+	return skip_all("JIT is disabled");
+#else /* LJ_HASJIT */
+	lua_State *L = utils_lua_init();
+	const struct test_unit tgroup[] = {
+		test_unit_def(handle_luafunc_frame),
+		test_unit_def(handle_bottom_frame),
+		test_unit_def(handle_cont_frame),
+		test_unit_def(handle_c_frame),
+	};
+	luaL_openlibs(L);
`luaL_openlibs()` is already called inside `utils_lua_init()`.


Removed.


+	int res = luaL_dostring(L, "jit.opt.start('hotloop=1')");
+	assert(res == 0);
+	const int test_result = test_run_group(tgroup, L);
+	utils_lua_close(L);
+	return test_result;
+#endif /* LJ_HASJIT */
+}
-- 
2.34.1