Tarantool development patches archive
 help / color / mirror / Atom feed
From: Igor Munkin <imun@tarantool.org>
To: Sergey Ostanevich <sergos@tarantool.org>,
	Alexander Turenko <alexander.turenko@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char
Date: Fri, 14 Feb 2020 02:57:36 +0300	[thread overview]
Message-ID: <62003dc4b5a3672d02c3ec599b5ecb65a557d6b5.1581635592.git.imun@tarantool.org> (raw)

The routine used for conversion a string representation to number
(lj_strscan_scan) doesn't respect the size of the given string/buffer.
Such behaviour leads to the following results:

| local a = tonumber("inf\x00imun")   -- the result is 'inf'
| local b = tonumber("\x36\x00\x80") -- the result is 6

The behaviour described above is similar to the one vanila Lua 5.1 has:

| $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
| Lua 5.1	inf

However, the issue is fixed in Lua 5.2 and the results are the following:
| $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
| Lua 5.2	nil

The patch introduces additional parameter to lj_strscan_scan routine to
detect whether there is nothing left after the null character.

Relates to tarantool#4773

Reported-by: Alexander Turenko <alexander.turenko@tarantool.org>
Signed-off-by: Igor Munkin <imun@tarantool.org>
---

This is a backport of the origin commit from v2.1 branch in LuaJIT
repo extended with tests and commit message. Commit subject is left
unchanged with the respect to the origin commit.

Upstream commit: https://github.com/LuaJIT/LuaJIT/commit/0ad60ccbc3768fa8e3e726858adf261950edbc22
Issue: https://github.com/tarantool/tarantool/issues/4773
Branch: https://github.com/tarantool/luajit/tree/imun/tonumber-fail-on-NUL-char

 src/lj_cparse.c                               |  3 ++-
 src/lj_lex.c                                  |  2 +-
 src/lj_strscan.c                              | 11 +++++----
 src/lj_strscan.h                              |  3 ++-
 ...gh-4773-tonumber-fail-on-NUL-char.test.lua | 24 +++++++++++++++++++
 5 files changed, 36 insertions(+), 7 deletions(-)
 create mode 100755 test/gh-4773-tonumber-fail-on-NUL-char.test.lua

diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index 24ef034..fb44056 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -151,7 +151,8 @@ static CPToken cp_number(CPState *cp)
   TValue o;
   do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
   cp_save(cp, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
+  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), sbuflen(&cp->sb)-1,
+			&o, STRSCAN_OPT_C);
   if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
   else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
   else if (!(cp->mode & CPARSE_MODE_SKIP))
diff --git a/src/lj_lex.c b/src/lj_lex.c
index 2d2f819..5285691 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -99,7 +99,7 @@ static void lex_number(LexState *ls, TValue *tv)
     lex_savenext(ls);
   }
   lex_save(ls, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
+  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), sbuflen(&ls->sb)-1, tv,
 	  (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
 	  (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
   if (LJ_DUALNUM && fmt == STRSCAN_INT) {
diff --git a/src/lj_strscan.c b/src/lj_strscan.c
index f5f35c9..08f41f1 100644
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -370,9 +370,11 @@ static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
 }
 
 /* Scan string containing a number. Returns format. Returns value in o. */
-StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
+StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
+			   uint32_t opt)
 {
   int32_t neg = 0;
+  const uint8_t *pe = p + len;
 
   /* Remove leading space, parse sign and non-numbers. */
   if (LJ_UNLIKELY(!lj_char_isdigit(*p))) {
@@ -390,7 +392,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
 	p += 3;
       }
       while (lj_char_isspace(*p)) p++;
-      if (*p) return STRSCAN_ERROR;
+      if (*p || p < pe) return STRSCAN_ERROR;
       o->u64 = tmp.u64;
       return STRSCAN_NUM;
     }
@@ -488,6 +490,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
       while (lj_char_isspace(*p)) p++;
       if (*p) return STRSCAN_ERROR;
     }
+    if (p < pe) return STRSCAN_ERROR;
 
     /* Fast path for decimal 32 bit integers. */
     if (fmt == STRSCAN_INT && base == 10 &&
@@ -524,7 +527,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
 
 int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
 {
-  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
+  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
 				   STRSCAN_OPT_TONUM);
   lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM);
   return (fmt != STRSCAN_ERROR);
@@ -533,7 +536,7 @@ int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
 #if LJ_DUALNUM
 int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o)
 {
-  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
+  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
 				   STRSCAN_OPT_TOINT);
   lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT);
   if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM);
diff --git a/src/lj_strscan.h b/src/lj_strscan.h
index 6fb0dda..fdfe894 100644
--- a/src/lj_strscan.h
+++ b/src/lj_strscan.h
@@ -22,7 +22,8 @@ typedef enum {
   STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64,
 } StrScanFmt;
 
-LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt);
+LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
+				   uint32_t opt);
 LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o);
 #if LJ_DUALNUM
 LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o);
diff --git a/test/gh-4773-tonumber-fail-on-NUL-char.test.lua b/test/gh-4773-tonumber-fail-on-NUL-char.test.lua
new file mode 100755
index 0000000..a660979
--- /dev/null
+++ b/test/gh-4773-tonumber-fail-on-NUL-char.test.lua
@@ -0,0 +1,24 @@
+#!/usr/bin/env tarantool
+
+local tap = require('tap')
+
+local test = tap.test("Tarantool 4773")
+test:plan(3)
+
+-- Test file to demonstrate LuaJIT tonumber routine fails on NUL char,
+-- details:
+--     https://github.com/tarantool/tarantool/issues/4773
+
+local t = {
+  zero = '0',
+  null = '\x00',
+  tail = 'imun',
+}
+
+-- Since VM, Lua/C API and compiler use a single routine for conversion numeric
+-- string to a number, test cases are reduced to the following:
+test:is(tonumber(t.zero), 0)
+test:is(tonumber(t.zero .. t.tail), nil)
+test:is(tonumber(t.zero .. t.null .. t.tail), nil)
+
+os.exit(test:check() and 0 or 1)
-- 
2.25.0

             reply	other threads:[~2020-02-14  0:02 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-13 23:57 Igor Munkin [this message]
2020-02-14  0:07 ` Igor Munkin
2020-03-03 14:53 ` Sergey Ostanevich
2020-03-03 17:36 ` Igor Munkin
2020-03-05  7:49 ` Kirill Yukhin
2020-03-05  9:36   ` Igor Munkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=62003dc4b5a3672d02c3ec599b5ecb65a557d6b5.1581635592.git.imun@tarantool.org \
    --to=imun@tarantool.org \
    --cc=alexander.turenko@tarantool.org \
    --cc=sergos@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox