Tarantool development patches archive
 help / color / mirror / Atom feed
From: Sergey Bronnikov via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: tarantool-patches@dev.tarantool.org,
	Sergey Kaplun <skaplun@tarantool.org>,
	max.kokryashkin@gmail.com
Subject: [Tarantool-patches] [PATCH luajit 1/2][v2] Fix embedded bytecode loader.
Date: Thu, 31 Aug 2023 14:49:01 +0300	[thread overview]
Message-ID: <d5a507b956b6fb0f05877e5da487d47bec610776.1693480177.git.sergeyb@tarantool.org> (raw)
Message-ID: <20230831114901.G57Mkm-daVo_vuFqfDUVJA8a6Hge71n-qyN494a4LTs@z> (raw)
In-Reply-To: <cover.1693480177.git.sergeyb@tarantool.org>

From: Sergey Bronnikov <sergeyb@tarantool.org>

(cherry-picked from commit 820339960123dc78a7ce03edf53fcf4fdae0e55d)

The original problem is specific to x32 and is as follows: when a chunk
with a bytecode library is loaded into memory, and the address is higher
than 0x80000100, the `LexState->pe`, that contains an address of the end
of the bytecode chunk in the memory, will wrap around and become smaller
than the address in `LexState->p`, that contains an address of the
beginning of bytecode chunk in the memory. In `bcread_fill()` called by
`bcread_want()`, `memcpy()` is called with a very large size and causes
bus error on x86 and segmentation fault on ARM Android.

The problem cannot be reproduced on platforms supported by Tarantool
(ARM64, x86_64), so test doesn't reproduce a problem without a patch and
tests the patch partially.

Sergey Bronnikov:
* added the description and the test
---
 src/lib_package.c                             |  4 +-
 src/lj_bcread.c                               | 10 +-
 src/lj_lex.c                                  |  6 ++
 src/lj_lex.h                                  |  1 +
 .../lj-549-bytecode-loader.test.lua           | 96 +++++++++++++++++++
 5 files changed, 110 insertions(+), 7 deletions(-)
 create mode 100644 test/tarantool-tests/lj-549-bytecode-loader.test.lua

diff --git a/src/lib_package.c b/src/lib_package.c
index b49f0209..12603038 100644
--- a/src/lib_package.c
+++ b/src/lib_package.c
@@ -260,7 +260,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
       const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
       lua_pop(L, 1);
       if (bcdata) {
-	if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
+	if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
 	  return PACKAGE_ERR_LOAD;
 	return 0;
       }
@@ -431,7 +431,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
   if (lua_isnil(L, -1)) {  /* Not found? */
     const char *bcname = mksymname(L, name, SYMPREFIX_BC);
     const char *bcdata = ll_bcsym(NULL, bcname);
-    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
+    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
       lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
   }
   return 1;
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index cddf6ff1..48ec15e4 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -79,6 +79,7 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
       ls->c = -1;  /* Only bad if we get called again. */
       break;
     }
+    if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L);
     if (n) {  /* Append to buffer. */
       n += (MSize)sz;
       p = lj_buf_need(&ls->sb, n < len ? len : n);
@@ -90,20 +91,20 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
       ls->p = buf;
       ls->pe = buf + sz;
     }
-  } while (ls->p + len > ls->pe);
+  } while ((MSize)(ls->pe - ls->p) < len);
 }
 
 /* Need a certain number of bytes. */
 static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
 {
-  if (LJ_UNLIKELY(ls->p + len > ls->pe))
+  if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
     bcread_fill(ls, len, 1);
 }
 
 /* Want to read up to a certain number of bytes, but may need less. */
 static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
 {
-  if (LJ_UNLIKELY(ls->p + len > ls->pe))
+  if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
     bcread_fill(ls, len, 0);
 }
 
@@ -463,8 +464,7 @@ GCproto *lj_bcread(LexState *ls)
     setprotoV(L, L->top, pt);
     incr_top(L);
   }
-  if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 ||
-      L->top-1 != bcread_oldtop(L, ls))
+  if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls))
     bcread_error(ls, LJ_ERR_BCBAD);
   /* Pop off last prototype. */
   L->top--;
diff --git a/src/lj_lex.c b/src/lj_lex.c
index cef3c683..6291705f 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -48,6 +48,11 @@ static LJ_NOINLINE LexChar lex_more(LexState *ls)
   size_t sz;
   const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
   if (p == NULL || sz == 0) return LEX_EOF;
+  if (sz >= LJ_MAX_BUF) {
+    if (sz != ~(size_t)0) lj_err_mem(ls->L);
+    sz = ~(uintptr_t)0 - (uintptr_t)p;
+    ls->endmark = 1;
+  }
   ls->pe = p + sz;
   ls->p = p + 1;
   return (LexChar)(uint8_t)p[0];
@@ -408,6 +413,7 @@ int lj_lex_setup(lua_State *L, LexState *ls)
   ls->lookahead = TK_eof;  /* No look-ahead token. */
   ls->linenumber = 1;
   ls->lastline = 1;
+  ls->endmark = 0;
   lex_next(ls);  /* Read-ahead first char. */
   if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
       (uint8_t)ls->p[1] == 0xbf) {  /* Skip UTF-8 BOM (if buffered). */
diff --git a/src/lj_lex.h b/src/lj_lex.h
index ae05a954..a26e504a 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -73,6 +73,7 @@ typedef struct LexState {
   BCInsLine *bcstack;	/* Stack for bytecode instructions/line numbers. */
   MSize sizebcstack;	/* Size of bytecode stack. */
   uint32_t level;	/* Syntactical nesting level. */
+  int endmark;		/* Trust bytecode end marker, even if not at EOF. */
 } LexState;
 
 LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
diff --git a/test/tarantool-tests/lj-549-bytecode-loader.test.lua b/test/tarantool-tests/lj-549-bytecode-loader.test.lua
new file mode 100644
index 00000000..889be80a
--- /dev/null
+++ b/test/tarantool-tests/lj-549-bytecode-loader.test.lua
@@ -0,0 +1,96 @@
+local tap = require('tap')
+local ffi = require('ffi')
+local utils = require('utils')
+local test = tap.test('lj-549-bytecode-loader'):skipcond({
+    -- ['Test requires GC64 mode enabled'] = not require('ffi').abi('gc64'),
+})
+
+test:plan(1)
+
+-- Test creates a shared library with LuaJIT bytecode,
+-- loads shared library as a Lua module and checks,
+-- that no crashes eliminated.
+--
+-- $ make HOST_CC='gcc -m32' TARGET_CFLAGS='-m32' \
+--                           TARGET_LDFLAGS='-m32' \
+--                           TARGET_SHLDFLAGS='-m32' \
+--                           -f Makefile.original
+-- $ echo 'print("test")' > a.lua
+-- $ LUA_PATH="src/?.lua;;" luajit -b a.lua a.c
+-- $ gcc -m32 -fPIC -shared a.c -o a.so
+-- $ luajit -e "require('a')"
+-- Program received signal SIGBUS, Bus error
+
+local function file_exists(fname)
+   return io.open(fname, 'r') or true and false
+end
+
+local function get_file_name(file)
+    return file:match("[^/]*$")
+end
+
+local stdout_msg = 'Lango team'
+local lua_code = ('print(%q)'):format(stdout_msg)
+local fpath = os.tmpname()
+local path_lua = ('%s.lua'):format(fpath)
+local path_c = ('%s.c'):format(fpath)
+local path_so = ('%s.so'):format(fpath)
+
+-- Create a file with a minimal Lua code.
+local fh = assert(io.open(path_lua, 'w'))
+fh:write(lua_code)
+fh:close()
+
+local module_name = assert(get_file_name(fpath))
+
+local basedir = function(path)
+    local sep = '/'
+    return path:match('(.*' .. sep .. ')') or './'
+end
+
+-- Create a C file with LuaJIT bytecode.
+-- We cannot use utils.makecmd, because command-line generated
+-- by `makecmd` contains `-e` that is incompatible with option `-b`.
+local function create_c_file(pathlua, pathc)
+  local lua_path = os.getenv('LUA_PATH')
+  local lua_bin = require('utils').exec.luacmd(arg):match('%S+')
+  local cmd_fmt = 'LUA_PATH="%s" %s -b %s %s'
+  local cmd = (cmd_fmt):format(lua_path, lua_bin, pathlua, pathc)
+  local ret = os.execute(cmd)
+  assert(ret == 0, 'create a C file with bytecode')
+end
+
+create_c_file(path_lua, path_c)
+assert(file_exists(path_c))
+
+-- Compile C source code with LuaJIT bytecode to a shared library.
+-- `-m64` is not available on ARM64, see
+-- "3.18.1 AArch64 Options in the manual",
+-- https://gcc.gnu.org/onlinedocs/gcc/AArch64-Options.html
+local cflags_64 = jit.arch == 'arm64' and '-march=armv8-a' or '-m64'
+local cflags = ffi.abi('32bit') and '-m32' or cflags_64
+local cc_cmd = ('cc %s -fPIC -shared %s -o %s'):format(cflags, path_c, path_so)
+local ph = io.popen(cc_cmd)
+ph:close()
+assert(file_exists(path_so))
+
+-- Load shared library as a Lua module.
+local lua_cpath = ('"/tmp/?.so;"'):format(basedir(fpath))
+assert(file_exists(path_so))
+local cmd = utils.exec.makecmd(arg, {
+    script = ('-e "require([[%s]])"'):format(module_name),
+    env = {
+        LUA_CPATH = lua_cpath,
+        -- It is required to cleanup LUA_PATH, otherwise
+        -- LuaJIT loads Lua module, see tarantool-tests/utils/init.lua.
+        LUA_PATH = '',
+    },
+})
+local res = cmd()
+test:ok(res == stdout_msg, 'bytecode loader works')
+
+os.remove(path_lua)
+os.remove(path_c)
+os.remove(path_so)
+
+os.exit(test:check() and 0 or 1)
-- 
2.34.1


  reply	other threads:[~2023-08-31 11:49 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-31 11:29 [Tarantool-patches] [PATCH luajit 0/2][v2] " Sergey Bronnikov via Tarantool-patches
2023-08-31 11:30 ` Sergey Bronnikov via Tarantool-patches [this message]
2023-08-31 11:49   ` [Tarantool-patches] [PATCH luajit 1/2][v2] " Sergey Bronnikov via Tarantool-patches
2023-09-01  9:42   ` Maxim Kokryashkin via Tarantool-patches
2023-09-04  9:31     ` Sergey Bronnikov via Tarantool-patches
2023-09-05  6:34       ` Maxim Kokryashkin via Tarantool-patches
2023-09-05 14:10   ` Sergey Kaplun via Tarantool-patches
2023-09-07 15:21     ` Sergey Bronnikov via Tarantool-patches
2023-09-11  8:45       ` Sergey Kaplun via Tarantool-patches
2023-09-12 10:20         ` Sergey Bronnikov via Tarantool-patches
2023-10-31 11:30           ` Sergey Kaplun via Tarantool-patches
2023-09-05 14:12   ` Sergey Kaplun via Tarantool-patches
2023-09-07  7:06     ` Sergey Bronnikov via Tarantool-patches
2023-08-31 11:32 ` [Tarantool-patches] [PATCH luajit 2/2][v2] Followup fix for " Sergey Bronnikov via Tarantool-patches
2023-09-01 10:05   ` Maxim Kokryashkin via Tarantool-patches
2023-09-04 16:34     ` Sergey Bronnikov via Tarantool-patches
2023-09-05  6:45       ` Maxim Kokryashkin via Tarantool-patches
2023-09-05 12:55   ` Sergey Kaplun via Tarantool-patches
2023-09-07  7:04     ` Sergey Bronnikov via Tarantool-patches
2023-09-11  9:26       ` Sergey Kaplun via Tarantool-patches
2023-09-12 10:30         ` Sergey Bronnikov via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d5a507b956b6fb0f05877e5da487d47bec610776.1693480177.git.sergeyb@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=estetus@gmail.com \
    --cc=max.kokryashkin@gmail.com \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit 1/2][v2] Fix embedded bytecode loader.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox