From: Sergey Bronnikov via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: tarantool-patches@dev.tarantool.org, Sergey Kaplun <skaplun@tarantool.org>, max.kokryashkin@gmail.com Subject: [Tarantool-patches] [PATCH luajit 1/2][v2] Fix embedded bytecode loader. Date: Thu, 31 Aug 2023 14:49:01 +0300 [thread overview] Message-ID: <d5a507b956b6fb0f05877e5da487d47bec610776.1693480177.git.sergeyb@tarantool.org> (raw) Message-ID: <20230831114901.G57Mkm-daVo_vuFqfDUVJA8a6Hge71n-qyN494a4LTs@z> (raw) In-Reply-To: <cover.1693480177.git.sergeyb@tarantool.org> From: Sergey Bronnikov <sergeyb@tarantool.org> (cherry-picked from commit 820339960123dc78a7ce03edf53fcf4fdae0e55d) The original problem is specific to x32 and is as follows: when a chunk with a bytecode library is loaded into memory, and the address is higher than 0x80000100, the `LexState->pe`, that contains an address of the end of the bytecode chunk in the memory, will wrap around and become smaller than the address in `LexState->p`, that contains an address of the beginning of bytecode chunk in the memory. In `bcread_fill()` called by `bcread_want()`, `memcpy()` is called with a very large size and causes bus error on x86 and segmentation fault on ARM Android. The problem cannot be reproduced on platforms supported by Tarantool (ARM64, x86_64), so test doesn't reproduce a problem without a patch and tests the patch partially. Sergey Bronnikov: * added the description and the test --- src/lib_package.c | 4 +- src/lj_bcread.c | 10 +- src/lj_lex.c | 6 ++ src/lj_lex.h | 1 + .../lj-549-bytecode-loader.test.lua | 96 +++++++++++++++++++ 5 files changed, 110 insertions(+), 7 deletions(-) create mode 100644 test/tarantool-tests/lj-549-bytecode-loader.test.lua diff --git a/src/lib_package.c b/src/lib_package.c index b49f0209..12603038 100644 --- a/src/lib_package.c +++ b/src/lib_package.c @@ -260,7 +260,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r) const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC)); lua_pop(L, 1); if (bcdata) { - if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0) + if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0) return PACKAGE_ERR_LOAD; return 0; } @@ -431,7 +431,7 @@ static int lj_cf_package_loader_preload(lua_State *L) if (lua_isnil(L, -1)) { /* Not found? */ const char *bcname = mksymname(L, name, SYMPREFIX_BC); const char *bcdata = ll_bcsym(NULL, bcname); - if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0) + if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0) lua_pushfstring(L, "\n\tno field package.preload['%s']", name); } return 1; diff --git a/src/lj_bcread.c b/src/lj_bcread.c index cddf6ff1..48ec15e4 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -79,6 +79,7 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) ls->c = -1; /* Only bad if we get called again. */ break; } + if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L); if (n) { /* Append to buffer. */ n += (MSize)sz; p = lj_buf_need(&ls->sb, n < len ? len : n); @@ -90,20 +91,20 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) ls->p = buf; ls->pe = buf + sz; } - } while (ls->p + len > ls->pe); + } while ((MSize)(ls->pe - ls->p) < len); } /* Need a certain number of bytes. */ static LJ_AINLINE void bcread_need(LexState *ls, MSize len) { - if (LJ_UNLIKELY(ls->p + len > ls->pe)) + if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) bcread_fill(ls, len, 1); } /* Want to read up to a certain number of bytes, but may need less. */ static LJ_AINLINE void bcread_want(LexState *ls, MSize len) { - if (LJ_UNLIKELY(ls->p + len > ls->pe)) + if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) bcread_fill(ls, len, 0); } @@ -463,8 +464,7 @@ GCproto *lj_bcread(LexState *ls) setprotoV(L, L->top, pt); incr_top(L); } - if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 || - L->top-1 != bcread_oldtop(L, ls)) + if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) bcread_error(ls, LJ_ERR_BCBAD); /* Pop off last prototype. */ L->top--; diff --git a/src/lj_lex.c b/src/lj_lex.c index cef3c683..6291705f 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c @@ -48,6 +48,11 @@ static LJ_NOINLINE LexChar lex_more(LexState *ls) size_t sz; const char *p = ls->rfunc(ls->L, ls->rdata, &sz); if (p == NULL || sz == 0) return LEX_EOF; + if (sz >= LJ_MAX_BUF) { + if (sz != ~(size_t)0) lj_err_mem(ls->L); + sz = ~(uintptr_t)0 - (uintptr_t)p; + ls->endmark = 1; + } ls->pe = p + sz; ls->p = p + 1; return (LexChar)(uint8_t)p[0]; @@ -408,6 +413,7 @@ int lj_lex_setup(lua_State *L, LexState *ls) ls->lookahead = TK_eof; /* No look-ahead token. */ ls->linenumber = 1; ls->lastline = 1; + ls->endmark = 0; lex_next(ls); /* Read-ahead first char. */ if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb && (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ diff --git a/src/lj_lex.h b/src/lj_lex.h index ae05a954..a26e504a 100644 --- a/src/lj_lex.h +++ b/src/lj_lex.h @@ -73,6 +73,7 @@ typedef struct LexState { BCInsLine *bcstack; /* Stack for bytecode instructions/line numbers. */ MSize sizebcstack; /* Size of bytecode stack. */ uint32_t level; /* Syntactical nesting level. */ + int endmark; /* Trust bytecode end marker, even if not at EOF. */ } LexState; LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls); diff --git a/test/tarantool-tests/lj-549-bytecode-loader.test.lua b/test/tarantool-tests/lj-549-bytecode-loader.test.lua new file mode 100644 index 00000000..889be80a --- /dev/null +++ b/test/tarantool-tests/lj-549-bytecode-loader.test.lua @@ -0,0 +1,96 @@ +local tap = require('tap') +local ffi = require('ffi') +local utils = require('utils') +local test = tap.test('lj-549-bytecode-loader'):skipcond({ + -- ['Test requires GC64 mode enabled'] = not require('ffi').abi('gc64'), +}) + +test:plan(1) + +-- Test creates a shared library with LuaJIT bytecode, +-- loads shared library as a Lua module and checks, +-- that no crashes eliminated. +-- +-- $ make HOST_CC='gcc -m32' TARGET_CFLAGS='-m32' \ +-- TARGET_LDFLAGS='-m32' \ +-- TARGET_SHLDFLAGS='-m32' \ +-- -f Makefile.original +-- $ echo 'print("test")' > a.lua +-- $ LUA_PATH="src/?.lua;;" luajit -b a.lua a.c +-- $ gcc -m32 -fPIC -shared a.c -o a.so +-- $ luajit -e "require('a')" +-- Program received signal SIGBUS, Bus error + +local function file_exists(fname) + return io.open(fname, 'r') or true and false +end + +local function get_file_name(file) + return file:match("[^/]*$") +end + +local stdout_msg = 'Lango team' +local lua_code = ('print(%q)'):format(stdout_msg) +local fpath = os.tmpname() +local path_lua = ('%s.lua'):format(fpath) +local path_c = ('%s.c'):format(fpath) +local path_so = ('%s.so'):format(fpath) + +-- Create a file with a minimal Lua code. +local fh = assert(io.open(path_lua, 'w')) +fh:write(lua_code) +fh:close() + +local module_name = assert(get_file_name(fpath)) + +local basedir = function(path) + local sep = '/' + return path:match('(.*' .. sep .. ')') or './' +end + +-- Create a C file with LuaJIT bytecode. +-- We cannot use utils.makecmd, because command-line generated +-- by `makecmd` contains `-e` that is incompatible with option `-b`. +local function create_c_file(pathlua, pathc) + local lua_path = os.getenv('LUA_PATH') + local lua_bin = require('utils').exec.luacmd(arg):match('%S+') + local cmd_fmt = 'LUA_PATH="%s" %s -b %s %s' + local cmd = (cmd_fmt):format(lua_path, lua_bin, pathlua, pathc) + local ret = os.execute(cmd) + assert(ret == 0, 'create a C file with bytecode') +end + +create_c_file(path_lua, path_c) +assert(file_exists(path_c)) + +-- Compile C source code with LuaJIT bytecode to a shared library. +-- `-m64` is not available on ARM64, see +-- "3.18.1 AArch64 Options in the manual", +-- https://gcc.gnu.org/onlinedocs/gcc/AArch64-Options.html +local cflags_64 = jit.arch == 'arm64' and '-march=armv8-a' or '-m64' +local cflags = ffi.abi('32bit') and '-m32' or cflags_64 +local cc_cmd = ('cc %s -fPIC -shared %s -o %s'):format(cflags, path_c, path_so) +local ph = io.popen(cc_cmd) +ph:close() +assert(file_exists(path_so)) + +-- Load shared library as a Lua module. +local lua_cpath = ('"/tmp/?.so;"'):format(basedir(fpath)) +assert(file_exists(path_so)) +local cmd = utils.exec.makecmd(arg, { + script = ('-e "require([[%s]])"'):format(module_name), + env = { + LUA_CPATH = lua_cpath, + -- It is required to cleanup LUA_PATH, otherwise + -- LuaJIT loads Lua module, see tarantool-tests/utils/init.lua. + LUA_PATH = '', + }, +}) +local res = cmd() +test:ok(res == stdout_msg, 'bytecode loader works') + +os.remove(path_lua) +os.remove(path_c) +os.remove(path_so) + +os.exit(test:check() and 0 or 1) -- 2.34.1
next prev parent reply other threads:[~2023-08-31 11:49 UTC|newest] Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-08-31 11:29 [Tarantool-patches] [PATCH luajit 0/2][v2] " Sergey Bronnikov via Tarantool-patches 2023-08-31 11:30 ` Sergey Bronnikov via Tarantool-patches [this message] 2023-08-31 11:49 ` [Tarantool-patches] [PATCH luajit 1/2][v2] " Sergey Bronnikov via Tarantool-patches 2023-09-01 9:42 ` Maxim Kokryashkin via Tarantool-patches 2023-09-04 9:31 ` Sergey Bronnikov via Tarantool-patches 2023-09-05 6:34 ` Maxim Kokryashkin via Tarantool-patches 2023-09-05 14:10 ` Sergey Kaplun via Tarantool-patches 2023-09-07 15:21 ` Sergey Bronnikov via Tarantool-patches 2023-09-11 8:45 ` Sergey Kaplun via Tarantool-patches 2023-09-12 10:20 ` Sergey Bronnikov via Tarantool-patches 2023-10-31 11:30 ` Sergey Kaplun via Tarantool-patches 2023-09-05 14:12 ` Sergey Kaplun via Tarantool-patches 2023-09-07 7:06 ` Sergey Bronnikov via Tarantool-patches 2023-08-31 11:32 ` [Tarantool-patches] [PATCH luajit 2/2][v2] Followup fix for " Sergey Bronnikov via Tarantool-patches 2023-09-01 10:05 ` Maxim Kokryashkin via Tarantool-patches 2023-09-04 16:34 ` Sergey Bronnikov via Tarantool-patches 2023-09-05 6:45 ` Maxim Kokryashkin via Tarantool-patches 2023-09-05 12:55 ` Sergey Kaplun via Tarantool-patches 2023-09-07 7:04 ` Sergey Bronnikov via Tarantool-patches 2023-09-11 9:26 ` Sergey Kaplun via Tarantool-patches 2023-09-12 10:30 ` Sergey Bronnikov via Tarantool-patches
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=d5a507b956b6fb0f05877e5da487d47bec610776.1693480177.git.sergeyb@tarantool.org \ --to=tarantool-patches@dev.tarantool.org \ --cc=estetus@gmail.com \ --cc=max.kokryashkin@gmail.com \ --cc=skaplun@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH luajit 1/2][v2] Fix embedded bytecode loader.' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox