[Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char

Igor Munkin imun at tarantool.org
Fri Feb 14 02:57:36 MSK 2020


The routine used for conversion a string representation to number
(lj_strscan_scan) doesn't respect the size of the given string/buffer.
Such behaviour leads to the following results:

| local a = tonumber("inf\x00imun")   -- the result is 'inf'
| local b = tonumber("\x36\x00\x80") -- the result is 6

The behaviour described above is similar to the one vanila Lua 5.1 has:

| $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
| Lua 5.1	inf

However, the issue is fixed in Lua 5.2 and the results are the following:
| $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
| Lua 5.2	nil

The patch introduces additional parameter to lj_strscan_scan routine to
detect whether there is nothing left after the null character.

Relates to tarantool#4773

Reported-by: Alexander Turenko <alexander.turenko at tarantool.org>
Signed-off-by: Igor Munkin <imun at tarantool.org>
---

This is a backport of the origin commit from v2.1 branch in LuaJIT
repo extended with tests and commit message. Commit subject is left
unchanged with the respect to the origin commit.

Upstream commit: https://github.com/LuaJIT/LuaJIT/commit/0ad60ccbc3768fa8e3e726858adf261950edbc22
Issue: https://github.com/tarantool/tarantool/issues/4773
Branch: https://github.com/tarantool/luajit/tree/imun/tonumber-fail-on-NUL-char

 src/lj_cparse.c                               |  3 ++-
 src/lj_lex.c                                  |  2 +-
 src/lj_strscan.c                              | 11 +++++----
 src/lj_strscan.h                              |  3 ++-
 ...gh-4773-tonumber-fail-on-NUL-char.test.lua | 24 +++++++++++++++++++
 5 files changed, 36 insertions(+), 7 deletions(-)
 create mode 100755 test/gh-4773-tonumber-fail-on-NUL-char.test.lua

diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index 24ef034..fb44056 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -151,7 +151,8 @@ static CPToken cp_number(CPState *cp)
   TValue o;
   do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
   cp_save(cp, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
+  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), sbuflen(&cp->sb)-1,
+			&o, STRSCAN_OPT_C);
   if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
   else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
   else if (!(cp->mode & CPARSE_MODE_SKIP))
diff --git a/src/lj_lex.c b/src/lj_lex.c
index 2d2f819..5285691 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -99,7 +99,7 @@ static void lex_number(LexState *ls, TValue *tv)
     lex_savenext(ls);
   }
   lex_save(ls, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
+  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), sbuflen(&ls->sb)-1, tv,
 	  (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
 	  (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
   if (LJ_DUALNUM && fmt == STRSCAN_INT) {
diff --git a/src/lj_strscan.c b/src/lj_strscan.c
index f5f35c9..08f41f1 100644
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -370,9 +370,11 @@ static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
 }
 
 /* Scan string containing a number. Returns format. Returns value in o. */
-StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
+StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
+			   uint32_t opt)
 {
   int32_t neg = 0;
+  const uint8_t *pe = p + len;
 
   /* Remove leading space, parse sign and non-numbers. */
   if (LJ_UNLIKELY(!lj_char_isdigit(*p))) {
@@ -390,7 +392,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
 	p += 3;
       }
       while (lj_char_isspace(*p)) p++;
-      if (*p) return STRSCAN_ERROR;
+      if (*p || p < pe) return STRSCAN_ERROR;
       o->u64 = tmp.u64;
       return STRSCAN_NUM;
     }
@@ -488,6 +490,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
       while (lj_char_isspace(*p)) p++;
       if (*p) return STRSCAN_ERROR;
     }
+    if (p < pe) return STRSCAN_ERROR;
 
     /* Fast path for decimal 32 bit integers. */
     if (fmt == STRSCAN_INT && base == 10 &&
@@ -524,7 +527,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
 
 int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
 {
-  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
+  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
 				   STRSCAN_OPT_TONUM);
   lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM);
   return (fmt != STRSCAN_ERROR);
@@ -533,7 +536,7 @@ int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
 #if LJ_DUALNUM
 int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o)
 {
-  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
+  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
 				   STRSCAN_OPT_TOINT);
   lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT);
   if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM);
diff --git a/src/lj_strscan.h b/src/lj_strscan.h
index 6fb0dda..fdfe894 100644
--- a/src/lj_strscan.h
+++ b/src/lj_strscan.h
@@ -22,7 +22,8 @@ typedef enum {
   STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64,
 } StrScanFmt;
 
-LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt);
+LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
+				   uint32_t opt);
 LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o);
 #if LJ_DUALNUM
 LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o);
diff --git a/test/gh-4773-tonumber-fail-on-NUL-char.test.lua b/test/gh-4773-tonumber-fail-on-NUL-char.test.lua
new file mode 100755
index 0000000..a660979
--- /dev/null
+++ b/test/gh-4773-tonumber-fail-on-NUL-char.test.lua
@@ -0,0 +1,24 @@
+#!/usr/bin/env tarantool
+
+local tap = require('tap')
+
+local test = tap.test("Tarantool 4773")
+test:plan(3)
+
+-- Test file to demonstrate LuaJIT tonumber routine fails on NUL char,
+-- details:
+--     https://github.com/tarantool/tarantool/issues/4773
+
+local t = {
+  zero = '0',
+  null = '\x00',
+  tail = 'imun',
+}
+
+-- Since VM, Lua/C API and compiler use a single routine for conversion numeric
+-- string to a number, test cases are reduced to the following:
+test:is(tonumber(t.zero), 0)
+test:is(tonumber(t.zero .. t.tail), nil)
+test:is(tonumber(t.zero .. t.null .. t.tail), nil)
+
+os.exit(test:check() and 0 or 1)
-- 
2.25.0



More information about the Tarantool-patches mailing list