[Tarantool-patches] [PATCH 1/2] Fix embedded bytecode loader.
Sergey Bronnikov
estetus at gmail.com
Tue Jul 25 19:36:24 MSK 2023
From: sergeyb at tarantool.org
(cherry-picked from commit 820339960123dc78a7ce03edf53fcf4fdae0e55d)
Original problem is specific for x32 and is as follows: when a chunk
with bytecode library is loaded into memory, and the address is higher
than 0x80000100, the LexState->pe, that contains an address of the end
of bytecode chunk in the memory, will wrap around and become smaller
than address in LexState->p, that contains an address of the beginning
of bytecode chunk in the memory. In bcread_fill() called by
bcread_want(), memcpy() is called with a very large size and causes bus
error on x86 and segmentation fault on ARM android.
The problem cannot be reproduced on platforms supported by Tarantool
(ARM64, x86_64), so test doesn't reproduce a problem without a patch and
tests patch partially.
Sergey Bronnikov:
* added the description
---
src/lj_bcread.c | 10 +++++-----
src/lj_lex.c | 6 ++++++
src/lj_lex.h | 1 +
3 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index f6c7ad25..315ad4d8 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -79,6 +79,7 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
ls->c = -1; /* Only bad if we get called again. */
break;
}
+ if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L);
if (n) { /* Append to buffer. */
n += (MSize)sz;
p = lj_buf_need(&ls->sb, n < len ? len : n);
@@ -90,20 +91,20 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
ls->p = buf;
ls->pe = buf + sz;
}
- } while (ls->p + len > ls->pe);
+ } while ((MSize)(ls->pe - ls->p) < len);
}
/* Need a certain number of bytes. */
static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
{
- if (LJ_UNLIKELY(ls->p + len > ls->pe))
+ if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
bcread_fill(ls, len, 1);
}
/* Want to read up to a certain number of bytes, but may need less. */
static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
{
- if (LJ_UNLIKELY(ls->p + len > ls->pe))
+ if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
bcread_fill(ls, len, 0);
}
@@ -463,8 +464,7 @@ GCproto *lj_bcread(LexState *ls)
setprotoV(L, L->top, pt);
incr_top(L);
}
- if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 ||
- L->top-1 != bcread_oldtop(L, ls))
+ if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls))
bcread_error(ls, LJ_ERR_BCBAD);
/* Pop off last prototype. */
L->top--;
diff --git a/src/lj_lex.c b/src/lj_lex.c
index 52856912..82e4ba6f 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -48,6 +48,11 @@ static LJ_NOINLINE LexChar lex_more(LexState *ls)
size_t sz;
const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
if (p == NULL || sz == 0) return LEX_EOF;
+ if (sz >= LJ_MAX_BUF) {
+ if (sz != ~(size_t)0) lj_err_mem(ls->L);
+ sz = ~(uintptr_t)0 - (uintptr_t)p;
+ ls->endmark = 1;
+ }
ls->pe = p + sz;
ls->p = p + 1;
return (LexChar)(uint8_t)p[0];
@@ -406,6 +411,7 @@ int lj_lex_setup(lua_State *L, LexState *ls)
ls->lookahead = TK_eof; /* No look-ahead token. */
ls->linenumber = 1;
ls->lastline = 1;
+ ls->endmark = 0;
lex_next(ls); /* Read-ahead first char. */
if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
(uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
diff --git a/src/lj_lex.h b/src/lj_lex.h
index 33fa8657..38d28533 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -73,6 +73,7 @@ typedef struct LexState {
BCInsLine *bcstack; /* Stack for bytecode instructions/line numbers. */
MSize sizebcstack; /* Size of bytecode stack. */
uint32_t level; /* Syntactical nesting level. */
+ int endmark; /* Trust bytecode end marker, even if not at EOF. */
} LexState;
LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
--
2.34.1
More information about the Tarantool-patches
mailing list