* [Tarantool-patches] [PATCH luajit 1/2][v3] Fix embedded bytecode loader.
2023-10-20 13:17 [Tarantool-patches] [PATCH luajit 0/2][v3] Fix embedded bytecode loader Sergey Bronnikov via Tarantool-patches
@ 2023-10-20 13:17 ` Sergey Bronnikov via Tarantool-patches
2023-10-20 13:17 ` [Tarantool-patches] [PATCH luajit 2/2][v3] Followup fix for " Sergey Bronnikov via Tarantool-patches
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Sergey Bronnikov via Tarantool-patches @ 2023-10-20 13:17 UTC (permalink / raw)
To: tarantool-patches, Sergey Kaplun, max.kokryashkin, Igor Munkin
From: Mike Pall <mike>
(cherry-picked from commit 820339960123dc78a7ce03edf53fcf4fdae0e55d)
The original problem is specific to x32 and is as follows: when a chunk
with a bytecode library is loaded into memory, and the address is higher
than 0x80000100, the `LexState->pe`, that contains an address of the end
of the bytecode chunk in the memory, will wrap around and become smaller
than the address in `LexState->p`, that contains an address of the
beginning of bytecode chunk in the memory. In `bcread_fill()` called by
`bcread_want()`, `memcpy()` is called with a very large size and causes
the bus error on x86 and the segmentation fault on ARM Android.
The problem cannot be reproduced on platforms supported by Tarantool
(ARM64, x86_64), so test doesn't reproduce a problem without a patch and
tests the patch partially.
Sergey Bronnikov:
* added the description and the test
Part of tarantool/tarantool#9145
---
src/lib_package.c | 4 +--
src/lj_bcread.c | 10 +++----
src/lj_lex.c | 6 +++++
src/lj_lex.h | 1 +
test/tarantool-tests/CMakeLists.txt | 1 +
.../lj-549-bytecode-loader.test.lua | 27 +++++++++++++++++++
.../lj-549-bytecode-loader/CMakeLists.txt | 20 ++++++++++++++
.../lj-549-bytecode-loader/script.lua | 3 +++
8 files changed, 65 insertions(+), 7 deletions(-)
create mode 100644 test/tarantool-tests/lj-549-bytecode-loader.test.lua
create mode 100644 test/tarantool-tests/lj-549-bytecode-loader/CMakeLists.txt
create mode 100644 test/tarantool-tests/lj-549-bytecode-loader/script.lua
diff --git a/src/lib_package.c b/src/lib_package.c
index b49f0209..12603038 100644
--- a/src/lib_package.c
+++ b/src/lib_package.c
@@ -260,7 +260,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
lua_pop(L, 1);
if (bcdata) {
- if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
+ if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
return PACKAGE_ERR_LOAD;
return 0;
}
@@ -431,7 +431,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
if (lua_isnil(L, -1)) { /* Not found? */
const char *bcname = mksymname(L, name, SYMPREFIX_BC);
const char *bcdata = ll_bcsym(NULL, bcname);
- if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
+ if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
}
return 1;
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index cddf6ff1..48ec15e4 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -79,6 +79,7 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
ls->c = -1; /* Only bad if we get called again. */
break;
}
+ if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L);
if (n) { /* Append to buffer. */
n += (MSize)sz;
p = lj_buf_need(&ls->sb, n < len ? len : n);
@@ -90,20 +91,20 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
ls->p = buf;
ls->pe = buf + sz;
}
- } while (ls->p + len > ls->pe);
+ } while ((MSize)(ls->pe - ls->p) < len);
}
/* Need a certain number of bytes. */
static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
{
- if (LJ_UNLIKELY(ls->p + len > ls->pe))
+ if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
bcread_fill(ls, len, 1);
}
/* Want to read up to a certain number of bytes, but may need less. */
static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
{
- if (LJ_UNLIKELY(ls->p + len > ls->pe))
+ if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
bcread_fill(ls, len, 0);
}
@@ -463,8 +464,7 @@ GCproto *lj_bcread(LexState *ls)
setprotoV(L, L->top, pt);
incr_top(L);
}
- if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 ||
- L->top-1 != bcread_oldtop(L, ls))
+ if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls))
bcread_error(ls, LJ_ERR_BCBAD);
/* Pop off last prototype. */
L->top--;
diff --git a/src/lj_lex.c b/src/lj_lex.c
index cef3c683..6291705f 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -48,6 +48,11 @@ static LJ_NOINLINE LexChar lex_more(LexState *ls)
size_t sz;
const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
if (p == NULL || sz == 0) return LEX_EOF;
+ if (sz >= LJ_MAX_BUF) {
+ if (sz != ~(size_t)0) lj_err_mem(ls->L);
+ sz = ~(uintptr_t)0 - (uintptr_t)p;
+ ls->endmark = 1;
+ }
ls->pe = p + sz;
ls->p = p + 1;
return (LexChar)(uint8_t)p[0];
@@ -408,6 +413,7 @@ int lj_lex_setup(lua_State *L, LexState *ls)
ls->lookahead = TK_eof; /* No look-ahead token. */
ls->linenumber = 1;
ls->lastline = 1;
+ ls->endmark = 0;
lex_next(ls); /* Read-ahead first char. */
if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
(uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
diff --git a/src/lj_lex.h b/src/lj_lex.h
index ae05a954..a26e504a 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -73,6 +73,7 @@ typedef struct LexState {
BCInsLine *bcstack; /* Stack for bytecode instructions/line numbers. */
MSize sizebcstack; /* Size of bytecode stack. */
uint32_t level; /* Syntactical nesting level. */
+ int endmark; /* Trust bytecode end marker, even if not at EOF. */
} LexState;
LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
diff --git a/test/tarantool-tests/CMakeLists.txt b/test/tarantool-tests/CMakeLists.txt
index c15d6037..55226143 100644
--- a/test/tarantool-tests/CMakeLists.txt
+++ b/test/tarantool-tests/CMakeLists.txt
@@ -63,6 +63,7 @@ add_subdirectory(gh-5813-resolving-of-c-symbols/stripped)
add_subdirectory(gh-6098-fix-side-exit-patching-on-arm64)
add_subdirectory(gh-6189-cur_L)
add_subdirectory(lj-416-xor-before-jcc)
+add_subdirectory(lj-549-bytecode-loader)
add_subdirectory(lj-601-fix-gc-finderrfunc)
add_subdirectory(lj-727-lightuserdata-itern)
add_subdirectory(lj-802-panic-at-mcode-protfail)
diff --git a/test/tarantool-tests/lj-549-bytecode-loader.test.lua b/test/tarantool-tests/lj-549-bytecode-loader.test.lua
new file mode 100644
index 00000000..490d94fc
--- /dev/null
+++ b/test/tarantool-tests/lj-549-bytecode-loader.test.lua
@@ -0,0 +1,27 @@
+local tap = require('tap')
+local test = tap.test('lj-549-bytecode-loader')
+
+test:plan(2)
+
+-- Test creates a shared library with LuaJIT bytecode,
+-- loads shared library as a Lua module and checks,
+-- that no crashes eliminated.
+--
+-- Manual steps for reproducing are the following:
+--
+-- $ make HOST_CC='gcc -m32' TARGET_CFLAGS='-m32' \
+-- TARGET_LDFLAGS='-m32' \
+-- TARGET_SHLDFLAGS='-m32' \
+-- -f Makefile.original
+-- $ echo 'print("test")' > a.lua
+-- $ LUA_PATH="src/?.lua;;" luajit -b a.lua a.c
+-- $ gcc -m32 -fPIC -shared a.c -o a.so
+-- $ luajit -e "require('a')"
+-- Program received signal SIGBUS, Bus error
+
+local module_name = 'script'
+local ok, module = pcall(require, module_name)
+test:is(ok, true, 'bytecode loader works')
+test:is(module.msg, 'Lango team', 'message is ok')
+
+test:done(true)
diff --git a/test/tarantool-tests/lj-549-bytecode-loader/CMakeLists.txt b/test/tarantool-tests/lj-549-bytecode-loader/CMakeLists.txt
new file mode 100644
index 00000000..6431c682
--- /dev/null
+++ b/test/tarantool-tests/lj-549-bytecode-loader/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(LIB_NAME "script")
+set(LUA_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${LIB_NAME}.lua)
+set(C_FILE ${LIB_NAME}.c)
+
+make_lua_path(LUA_PATH
+ PATHS
+ ${PROJECT_SOURCE_DIR}/src/?.lua
+ ${PROJECT_SOURCE_DIR}/src/jit/?.lua
+)
+
+add_custom_target(export_bc
+ COMMAND ${CMAKE_COMMAND} -E env LUA_PATH=${LUA_PATH} ${LUAJIT_BINARY} -b ${LUA_FILE} ${C_FILE}
+ DEPENDS luajit-main ${LUA_FILE}
+ BYPRODUCTS ${C_FILE}
+ COMMENT "Exporting bytecode to a C file"
+ VERBATIM
+)
+
+BuildTestCLib(${LIB_NAME} ${C_FILE})
+add_dependencies(${LIB_NAME} export_bc)
diff --git a/test/tarantool-tests/lj-549-bytecode-loader/script.lua b/test/tarantool-tests/lj-549-bytecode-loader/script.lua
new file mode 100644
index 00000000..e72162af
--- /dev/null
+++ b/test/tarantool-tests/lj-549-bytecode-loader/script.lua
@@ -0,0 +1,3 @@
+return {
+ msg = 'Lango team',
+}
--
2.34.1
^ permalink raw reply [flat|nested] 6+ messages in thread
* [Tarantool-patches] [PATCH luajit 2/2][v3] Followup fix for embedded bytecode loader.
2023-10-20 13:17 [Tarantool-patches] [PATCH luajit 0/2][v3] Fix embedded bytecode loader Sergey Bronnikov via Tarantool-patches
2023-10-20 13:17 ` [Tarantool-patches] [PATCH luajit 1/2][v3] " Sergey Bronnikov via Tarantool-patches
@ 2023-10-20 13:17 ` Sergey Bronnikov via Tarantool-patches
2024-02-08 15:52 ` [Tarantool-patches] [PATCH luajit 0/2][v3] Fix " Igor Munkin via Tarantool-patches
2024-02-15 13:49 ` Igor Munkin via Tarantool-patches
3 siblings, 0 replies; 6+ messages in thread
From: Sergey Bronnikov via Tarantool-patches @ 2023-10-20 13:17 UTC (permalink / raw)
To: tarantool-patches, Sergey Kaplun, max.kokryashkin, Igor Munkin
From: Mike Pall <mike>
(cherry-picked from commit e49863eda13d095b1a78fd4ca0fd3a6a9a17d782)
The patch follows up a previous patch and limits the total size of a
chunk load by `lua_load` with size `LJ_MAX_BUF - 1`.
The proposed test `lj-549-lua-load.test` checks corner cases in
`lua_load` function and covers this and the previous patch partially.
Sergey Bronnikov:
* added the description and the test
Part of tarantool/tarantool#9145
---
src/lj_lex.c | 1 +
test/tarantool-c-tests/lj-549-lua-load.test.c | 111 ++++++++++++++++++
2 files changed, 112 insertions(+)
create mode 100644 test/tarantool-c-tests/lj-549-lua-load.test.c
diff --git a/src/lj_lex.c b/src/lj_lex.c
index 6291705f..13495c41 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -51,6 +51,7 @@ static LJ_NOINLINE LexChar lex_more(LexState *ls)
if (sz >= LJ_MAX_BUF) {
if (sz != ~(size_t)0) lj_err_mem(ls->L);
sz = ~(uintptr_t)0 - (uintptr_t)p;
+ if (sz >= LJ_MAX_BUF) sz = LJ_MAX_BUF-1;
ls->endmark = 1;
}
ls->pe = p + sz;
diff --git a/test/tarantool-c-tests/lj-549-lua-load.test.c b/test/tarantool-c-tests/lj-549-lua-load.test.c
new file mode 100644
index 00000000..74eaf716
--- /dev/null
+++ b/test/tarantool-c-tests/lj-549-lua-load.test.c
@@ -0,0 +1,111 @@
+#include "test.h"
+#include "utils.h"
+
+#include "lj_def.h"
+
+#define UNUSED(x) ((void)(x))
+
+/*
+ * Function generates a huge chunk of "bytecode" with a size
+ * bigger than LJ_MAX_BUF. The generated chunk must enable
+ * endmark in a Lex state.
+ */
+static const char *
+bc_reader_with_endmark(lua_State *L, void *data, size_t *size)
+{
+ UNUSED(data);
+ *size = ~(size_t)0;
+
+ return NULL;
+}
+
+static int bc_loader_with_endmark(void *test_state)
+{
+ lua_State *L = test_state;
+ void *ud = NULL;
+ int res = lua_load(L, bc_reader_with_endmark, ud, "endmark");
+
+ /*
+ * Make sure we passed the condition with lj_err_mem
+ * in the function `lex_more`.
+ */
+ assert_true(res != LUA_ERRMEM);
+ assert_true(lua_gettop(L) == 1);
+ lua_settop(L, 0);
+
+ return TEST_EXIT_SUCCESS;
+}
+
+enum bc_emission_state {
+ EMIT_BC,
+ EMIT_EOF,
+};
+
+typedef struct {
+ enum bc_emission_state state;
+} dt;
+
+/*
+ * Function returns the bytecode chunk on the first call and NULL
+ * and size equal to zero on the second call. Triggers the flag
+ * `END_OF_STREAM` in the function `lex_more`.
+ */
+static const char *
+bc_reader_with_eof(lua_State *L, void *data, size_t *size)
+{
+ UNUSED(L);
+ dt *test_data = (dt *)data;
+ if (test_data->state == EMIT_EOF) {
+ *size = 0;
+ return NULL;
+ }
+
+ static char *bc_chunk = NULL;
+
+ /*
+ * Minimal size of a buffer with bytecode:
+ * signature (1 byte) and a bytecode itself (1 byte).
+ */
+ size_t sz = 2;
+ free(bc_chunk);
+ bc_chunk = malloc(sz);
+ /*
+ * `lua_load` automatically detects whether the chunk is text
+ * or binary and loads it accordingly. We need a trace for
+ * *bytecode* input, so it is necessary to deceive a check in
+ * `lj_lex_setup`, that makes a sanity check and detects
+ * whether input is bytecode or text by the first char.
+ * Put `LUA_SIGNATURE[0]` at the beginning of the allocated
+ * region.
+ */
+ bc_chunk[0] = LUA_SIGNATURE[0];
+ *size = sz;
+ test_data->state = EMIT_EOF;
+
+ return bc_chunk;
+}
+
+static int bc_loader_with_eof(void *test_state)
+{
+ lua_State *L = test_state;
+ dt test_data = {0};
+ test_data.state = EMIT_BC;
+ int res = lua_load(L, bc_reader_with_eof, &test_data, "eof");
+ assert_true(res == LUA_ERRSYNTAX);
+ lua_settop(L, 0);
+
+ return TEST_EXIT_SUCCESS;
+}
+
+int main(void)
+{
+ lua_State *L = utils_lua_init();
+ const struct test_unit tgroup[] = {
+ test_unit_def(bc_loader_with_endmark),
+ test_unit_def(bc_loader_with_eof)
+ };
+
+ const int test_result = test_run_group(tgroup, L);
+ utils_lua_close(L);
+ return test_result;
+}
--
2.34.1
^ permalink raw reply [flat|nested] 6+ messages in thread