[Tarantool-patches] [PATCH 1/2] Fix embedded bytecode loader. - Sergey Bronnikov via Tarantool-patches

Tarantool development patches archive
 help / color / mirror / Atom feed

From: Sergey Bronnikov via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: tarantool-patches@dev.tarantool.org,
	Sergey Kaplun <skaplun@tarantool.org>,
	max.kokryashkin@gmail.com
Subject: [Tarantool-patches] [PATCH 1/2] Fix embedded bytecode loader.
Date: Tue, 25 Jul 2023 19:36:24 +0300	[thread overview]
Message-ID: <6426e58a9a72691ccffc84001c21e363c8da6312.1690300762.git.sergeyb@tarantool.org> (raw)
In-Reply-To: <cover.1690300762.git.sergeyb@tarantool.org>

From: sergeyb@tarantool.org

(cherry-picked from commit 820339960123dc78a7ce03edf53fcf4fdae0e55d)

Original problem is specific for x32 and is as follows: when a chunk
with bytecode library is loaded into memory, and the address is higher
than 0x80000100, the LexState->pe, that contains an address of the end
of bytecode chunk in the memory, will wrap around and become smaller
than address in LexState->p, that contains an address of the beginning
of bytecode chunk in the memory. In bcread_fill() called by
bcread_want(), memcpy() is called with a very large size and causes bus
error on x86 and segmentation fault on ARM android.

The problem cannot be reproduced on platforms supported by Tarantool
(ARM64, x86_64), so test doesn't reproduce a problem without a patch and
tests patch partially.

Sergey Bronnikov:
* added the description
---
 src/lj_bcread.c | 10 +++++-----
 src/lj_lex.c    |  6 ++++++
 src/lj_lex.h    |  1 +
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index f6c7ad25..315ad4d8 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -79,6 +79,7 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
       ls->c = -1;  /* Only bad if we get called again. */
       break;
     }
+    if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L);
     if (n) {  /* Append to buffer. */
       n += (MSize)sz;
       p = lj_buf_need(&ls->sb, n < len ? len : n);
@@ -90,20 +91,20 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
       ls->p = buf;
       ls->pe = buf + sz;
     }
-  } while (ls->p + len > ls->pe);
+  } while ((MSize)(ls->pe - ls->p) < len);
 }
 
 /* Need a certain number of bytes. */
 static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
 {
-  if (LJ_UNLIKELY(ls->p + len > ls->pe))
+  if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
     bcread_fill(ls, len, 1);
 }
 
 /* Want to read up to a certain number of bytes, but may need less. */
 static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
 {
-  if (LJ_UNLIKELY(ls->p + len > ls->pe))
+  if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
     bcread_fill(ls, len, 0);
 }
 
@@ -463,8 +464,7 @@ GCproto *lj_bcread(LexState *ls)
     setprotoV(L, L->top, pt);
     incr_top(L);
   }
-  if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 ||
-      L->top-1 != bcread_oldtop(L, ls))
+  if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls))
     bcread_error(ls, LJ_ERR_BCBAD);
   /* Pop off last prototype. */
   L->top--;
diff --git a/src/lj_lex.c b/src/lj_lex.c
index 52856912..82e4ba6f 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -48,6 +48,11 @@ static LJ_NOINLINE LexChar lex_more(LexState *ls)
   size_t sz;
   const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
   if (p == NULL || sz == 0) return LEX_EOF;
+  if (sz >= LJ_MAX_BUF) {
+    if (sz != ~(size_t)0) lj_err_mem(ls->L);
+    sz = ~(uintptr_t)0 - (uintptr_t)p;
+    ls->endmark = 1;
+  }
   ls->pe = p + sz;
   ls->p = p + 1;
   return (LexChar)(uint8_t)p[0];
@@ -406,6 +411,7 @@ int lj_lex_setup(lua_State *L, LexState *ls)
   ls->lookahead = TK_eof;  /* No look-ahead token. */
   ls->linenumber = 1;
   ls->lastline = 1;
+  ls->endmark = 0;
   lex_next(ls);  /* Read-ahead first char. */
   if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
       (uint8_t)ls->p[1] == 0xbf) {  /* Skip UTF-8 BOM (if buffered). */
diff --git a/src/lj_lex.h b/src/lj_lex.h
index 33fa8657..38d28533 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -73,6 +73,7 @@ typedef struct LexState {
   BCInsLine *bcstack;	/* Stack for bytecode instructions/line numbers. */
   MSize sizebcstack;	/* Size of bytecode stack. */
   uint32_t level;	/* Syntactical nesting level. */
+  int endmark;		/* Trust bytecode end marker, even if not at EOF. */
 } LexState;
 
 LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
-- 
2.34.1

next prev parent reply	other threads:[~2023-07-25 16:36 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-25 16:34 [Tarantool-patches] [PATCH 0/2] " Sergey Bronnikov via Tarantool-patches
2023-07-25 16:36 ` Sergey Bronnikov via Tarantool-patches [this message]
2023-07-31 12:01   ` [Tarantool-patches] [PATCH 1/2] " Maxim Kokryashkin via Tarantool-patches
2023-08-01  9:56     ` Sergey Bronnikov via Tarantool-patches
2023-08-06 11:09     ` Sergey Kaplun via Tarantool-patches
2023-08-15  8:51       ` Maxim Kokryashkin via Tarantool-patches
2023-07-25 16:37 ` [Tarantool-patches] [PATCH 2/2] Followup fix for " Sergey Bronnikov via Tarantool-patches
2023-07-31 12:20   ` Maxim Kokryashkin via Tarantool-patches
2023-08-01  9:53     ` Sergey Bronnikov via Tarantool-patches
2023-08-14  8:14       ` Sergey Bronnikov via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6426e58a9a72691ccffc84001c21e363c8da6312.1690300762.git.sergeyb@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=estetus@gmail.com \
    --cc=max.kokryashkin@gmail.com \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH 1/2] Fix embedded bytecode loader.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox