Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char
@ 2020-02-13 23:57 Igor Munkin
  2020-02-14  0:07 ` Igor Munkin
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Igor Munkin @ 2020-02-13 23:57 UTC (permalink / raw)
  To: Sergey Ostanevich, Alexander Turenko; +Cc: tarantool-patches

The routine used for conversion a string representation to number
(lj_strscan_scan) doesn't respect the size of the given string/buffer.
Such behaviour leads to the following results:

| local a = tonumber("inf\x00imun")   -- the result is 'inf'
| local b = tonumber("\x36\x00\x80") -- the result is 6

The behaviour described above is similar to the one vanila Lua 5.1 has:

| $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
| Lua 5.1	inf

However, the issue is fixed in Lua 5.2 and the results are the following:
| $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
| Lua 5.2	nil

The patch introduces additional parameter to lj_strscan_scan routine to
detect whether there is nothing left after the null character.

Relates to tarantool#4773

Reported-by: Alexander Turenko <alexander.turenko@tarantool.org>
Signed-off-by: Igor Munkin <imun@tarantool.org>
---

This is a backport of the origin commit from v2.1 branch in LuaJIT
repo extended with tests and commit message. Commit subject is left
unchanged with the respect to the origin commit.

Upstream commit: https://github.com/LuaJIT/LuaJIT/commit/0ad60ccbc3768fa8e3e726858adf261950edbc22
Issue: https://github.com/tarantool/tarantool/issues/4773
Branch: https://github.com/tarantool/luajit/tree/imun/tonumber-fail-on-NUL-char

 src/lj_cparse.c                               |  3 ++-
 src/lj_lex.c                                  |  2 +-
 src/lj_strscan.c                              | 11 +++++----
 src/lj_strscan.h                              |  3 ++-
 ...gh-4773-tonumber-fail-on-NUL-char.test.lua | 24 +++++++++++++++++++
 5 files changed, 36 insertions(+), 7 deletions(-)
 create mode 100755 test/gh-4773-tonumber-fail-on-NUL-char.test.lua

diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index 24ef034..fb44056 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -151,7 +151,8 @@ static CPToken cp_number(CPState *cp)
   TValue o;
   do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
   cp_save(cp, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
+  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), sbuflen(&cp->sb)-1,
+			&o, STRSCAN_OPT_C);
   if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
   else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
   else if (!(cp->mode & CPARSE_MODE_SKIP))
diff --git a/src/lj_lex.c b/src/lj_lex.c
index 2d2f819..5285691 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -99,7 +99,7 @@ static void lex_number(LexState *ls, TValue *tv)
     lex_savenext(ls);
   }
   lex_save(ls, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
+  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), sbuflen(&ls->sb)-1, tv,
 	  (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
 	  (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
   if (LJ_DUALNUM && fmt == STRSCAN_INT) {
diff --git a/src/lj_strscan.c b/src/lj_strscan.c
index f5f35c9..08f41f1 100644
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -370,9 +370,11 @@ static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
 }
 
 /* Scan string containing a number. Returns format. Returns value in o. */
-StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
+StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
+			   uint32_t opt)
 {
   int32_t neg = 0;
+  const uint8_t *pe = p + len;
 
   /* Remove leading space, parse sign and non-numbers. */
   if (LJ_UNLIKELY(!lj_char_isdigit(*p))) {
@@ -390,7 +392,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
 	p += 3;
       }
       while (lj_char_isspace(*p)) p++;
-      if (*p) return STRSCAN_ERROR;
+      if (*p || p < pe) return STRSCAN_ERROR;
       o->u64 = tmp.u64;
       return STRSCAN_NUM;
     }
@@ -488,6 +490,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
       while (lj_char_isspace(*p)) p++;
       if (*p) return STRSCAN_ERROR;
     }
+    if (p < pe) return STRSCAN_ERROR;
 
     /* Fast path for decimal 32 bit integers. */
     if (fmt == STRSCAN_INT && base == 10 &&
@@ -524,7 +527,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
 
 int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
 {
-  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
+  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
 				   STRSCAN_OPT_TONUM);
   lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM);
   return (fmt != STRSCAN_ERROR);
@@ -533,7 +536,7 @@ int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
 #if LJ_DUALNUM
 int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o)
 {
-  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
+  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
 				   STRSCAN_OPT_TOINT);
   lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT);
   if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM);
diff --git a/src/lj_strscan.h b/src/lj_strscan.h
index 6fb0dda..fdfe894 100644
--- a/src/lj_strscan.h
+++ b/src/lj_strscan.h
@@ -22,7 +22,8 @@ typedef enum {
   STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64,
 } StrScanFmt;
 
-LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt);
+LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
+				   uint32_t opt);
 LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o);
 #if LJ_DUALNUM
 LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o);
diff --git a/test/gh-4773-tonumber-fail-on-NUL-char.test.lua b/test/gh-4773-tonumber-fail-on-NUL-char.test.lua
new file mode 100755
index 0000000..a660979
--- /dev/null
+++ b/test/gh-4773-tonumber-fail-on-NUL-char.test.lua
@@ -0,0 +1,24 @@
+#!/usr/bin/env tarantool
+
+local tap = require('tap')
+
+local test = tap.test("Tarantool 4773")
+test:plan(3)
+
+-- Test file to demonstrate LuaJIT tonumber routine fails on NUL char,
+-- details:
+--     https://github.com/tarantool/tarantool/issues/4773
+
+local t = {
+  zero = '0',
+  null = '\x00',
+  tail = 'imun',
+}
+
+-- Since VM, Lua/C API and compiler use a single routine for conversion numeric
+-- string to a number, test cases are reduced to the following:
+test:is(tonumber(t.zero), 0)
+test:is(tonumber(t.zero .. t.tail), nil)
+test:is(tonumber(t.zero .. t.null .. t.tail), nil)
+
+os.exit(test:check() and 0 or 1)
-- 
2.25.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char
  2020-02-13 23:57 [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char Igor Munkin
@ 2020-02-14  0:07 ` Igor Munkin
  2020-03-03 14:53 ` Sergey Ostanevich
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Igor Munkin @ 2020-02-14  0:07 UTC (permalink / raw)
  To: Sergey Ostanevich, Alexander Turenko; +Cc: tarantool-patches

Mistyped the issue link. Fixed, squashed, force-pushed to the branch.

On 14.02.20, Igor Munkin wrote:
> The routine used for conversion a string representation to number
> (lj_strscan_scan) doesn't respect the size of the given string/buffer.
> Such behaviour leads to the following results:
> 
> | local a = tonumber("inf\x00imun")   -- the result is 'inf'
> | local b = tonumber("\x36\x00\x80") -- the result is 6
> 
> The behaviour described above is similar to the one vanila Lua 5.1 has:
> 
> | $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
> | Lua 5.1	inf
> 
> However, the issue is fixed in Lua 5.2 and the results are the following:
> | $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
> | Lua 5.2	nil
> 
> The patch introduces additional parameter to lj_strscan_scan routine to
> detect whether there is nothing left after the null character.
> 
> Relates to tarantool#4773

Relates to tarantool/tarantool#4773

> 
> Reported-by: Alexander Turenko <alexander.turenko@tarantool.org>
> Signed-off-by: Igor Munkin <imun@tarantool.org>
> ---
> 
> This is a backport of the origin commit from v2.1 branch in LuaJIT
> repo extended with tests and commit message. Commit subject is left
> unchanged with the respect to the origin commit.
> 
> Upstream commit: https://github.com/LuaJIT/LuaJIT/commit/0ad60ccbc3768fa8e3e726858adf261950edbc22
> Issue: https://github.com/tarantool/tarantool/issues/4773
> Branch: https://github.com/tarantool/luajit/tree/imun/tonumber-fail-on-NUL-char
> 
>  src/lj_cparse.c                               |  3 ++-
>  src/lj_lex.c                                  |  2 +-
>  src/lj_strscan.c                              | 11 +++++----
>  src/lj_strscan.h                              |  3 ++-
>  ...gh-4773-tonumber-fail-on-NUL-char.test.lua | 24 +++++++++++++++++++
>  5 files changed, 36 insertions(+), 7 deletions(-)
>  create mode 100755 test/gh-4773-tonumber-fail-on-NUL-char.test.lua
> 
> diff --git a/src/lj_cparse.c b/src/lj_cparse.c
> index 24ef034..fb44056 100644
> --- a/src/lj_cparse.c
> +++ b/src/lj_cparse.c
> @@ -151,7 +151,8 @@ static CPToken cp_number(CPState *cp)
>    TValue o;
>    do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
>    cp_save(cp, '\0');
> -  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
> +  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), sbuflen(&cp->sb)-1,
> +			&o, STRSCAN_OPT_C);
>    if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
>    else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
>    else if (!(cp->mode & CPARSE_MODE_SKIP))
> diff --git a/src/lj_lex.c b/src/lj_lex.c
> index 2d2f819..5285691 100644
> --- a/src/lj_lex.c
> +++ b/src/lj_lex.c
> @@ -99,7 +99,7 @@ static void lex_number(LexState *ls, TValue *tv)
>      lex_savenext(ls);
>    }
>    lex_save(ls, '\0');
> -  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
> +  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), sbuflen(&ls->sb)-1, tv,
>  	  (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
>  	  (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
>    if (LJ_DUALNUM && fmt == STRSCAN_INT) {
> diff --git a/src/lj_strscan.c b/src/lj_strscan.c
> index f5f35c9..08f41f1 100644
> --- a/src/lj_strscan.c
> +++ b/src/lj_strscan.c
> @@ -370,9 +370,11 @@ static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
>  }
>  
>  /* Scan string containing a number. Returns format. Returns value in o. */
> -StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
> +StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
> +			   uint32_t opt)
>  {
>    int32_t neg = 0;
> +  const uint8_t *pe = p + len;
>  
>    /* Remove leading space, parse sign and non-numbers. */
>    if (LJ_UNLIKELY(!lj_char_isdigit(*p))) {
> @@ -390,7 +392,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
>  	p += 3;
>        }
>        while (lj_char_isspace(*p)) p++;
> -      if (*p) return STRSCAN_ERROR;
> +      if (*p || p < pe) return STRSCAN_ERROR;
>        o->u64 = tmp.u64;
>        return STRSCAN_NUM;
>      }
> @@ -488,6 +490,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
>        while (lj_char_isspace(*p)) p++;
>        if (*p) return STRSCAN_ERROR;
>      }
> +    if (p < pe) return STRSCAN_ERROR;
>  
>      /* Fast path for decimal 32 bit integers. */
>      if (fmt == STRSCAN_INT && base == 10 &&
> @@ -524,7 +527,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
>  
>  int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
>  {
> -  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
> +  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
>  				   STRSCAN_OPT_TONUM);
>    lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM);
>    return (fmt != STRSCAN_ERROR);
> @@ -533,7 +536,7 @@ int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
>  #if LJ_DUALNUM
>  int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o)
>  {
> -  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
> +  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
>  				   STRSCAN_OPT_TOINT);
>    lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT);
>    if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM);
> diff --git a/src/lj_strscan.h b/src/lj_strscan.h
> index 6fb0dda..fdfe894 100644
> --- a/src/lj_strscan.h
> +++ b/src/lj_strscan.h
> @@ -22,7 +22,8 @@ typedef enum {
>    STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64,
>  } StrScanFmt;
>  
> -LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt);
> +LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
> +				   uint32_t opt);
>  LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o);
>  #if LJ_DUALNUM
>  LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o);
> diff --git a/test/gh-4773-tonumber-fail-on-NUL-char.test.lua b/test/gh-4773-tonumber-fail-on-NUL-char.test.lua
> new file mode 100755
> index 0000000..a660979
> --- /dev/null
> +++ b/test/gh-4773-tonumber-fail-on-NUL-char.test.lua
> @@ -0,0 +1,24 @@
> +#!/usr/bin/env tarantool
> +
> +local tap = require('tap')
> +
> +local test = tap.test("Tarantool 4773")
> +test:plan(3)
> +
> +-- Test file to demonstrate LuaJIT tonumber routine fails on NUL char,
> +-- details:
> +--     https://github.com/tarantool/tarantool/issues/4773
> +
> +local t = {
> +  zero = '0',
> +  null = '\x00',
> +  tail = 'imun',
> +}
> +
> +-- Since VM, Lua/C API and compiler use a single routine for conversion numeric
> +-- string to a number, test cases are reduced to the following:
> +test:is(tonumber(t.zero), 0)
> +test:is(tonumber(t.zero .. t.tail), nil)
> +test:is(tonumber(t.zero .. t.null .. t.tail), nil)
> +
> +os.exit(test:check() and 0 or 1)
> -- 
> 2.25.0
> 

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char
  2020-02-13 23:57 [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char Igor Munkin
  2020-02-14  0:07 ` Igor Munkin
@ 2020-03-03 14:53 ` Sergey Ostanevich
  2020-03-03 17:36 ` Igor Munkin
  2020-03-05  7:49 ` Kirill Yukhin
  3 siblings, 0 replies; 6+ messages in thread
From: Sergey Ostanevich @ 2020-03-03 14:53 UTC (permalink / raw)
  To: Igor Munkin; +Cc: tarantool-patches

Hi!

Thanks for the patch, LGTM.

Sergos

On 14 фев 02:57, Igor Munkin wrote:
> The routine used for conversion a string representation to number
> (lj_strscan_scan) doesn't respect the size of the given string/buffer.
> Such behaviour leads to the following results:
> 
> | local a = tonumber("inf\x00imun")   -- the result is 'inf'
> | local b = tonumber("\x36\x00\x80") -- the result is 6
> 
> The behaviour described above is similar to the one vanila Lua 5.1 has:
> 
> | $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
> | Lua 5.1	inf
> 
> However, the issue is fixed in Lua 5.2 and the results are the following:
> | $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
> | Lua 5.2	nil
> 
> The patch introduces additional parameter to lj_strscan_scan routine to
> detect whether there is nothing left after the null character.
> 
> Relates to tarantool#4773
> 
> Reported-by: Alexander Turenko <alexander.turenko@tarantool.org>
> Signed-off-by: Igor Munkin <imun@tarantool.org>
> ---
> 
> This is a backport of the origin commit from v2.1 branch in LuaJIT
> repo extended with tests and commit message. Commit subject is left
> unchanged with the respect to the origin commit.
> 
> Upstream commit: https://github.com/LuaJIT/LuaJIT/commit/0ad60ccbc3768fa8e3e726858adf261950edbc22
> Issue: https://github.com/tarantool/tarantool/issues/4773
> Branch: https://github.com/tarantool/luajit/tree/imun/tonumber-fail-on-NUL-char
> 
>  src/lj_cparse.c                               |  3 ++-
>  src/lj_lex.c                                  |  2 +-
>  src/lj_strscan.c                              | 11 +++++----
>  src/lj_strscan.h                              |  3 ++-
>  ...gh-4773-tonumber-fail-on-NUL-char.test.lua | 24 +++++++++++++++++++
>  5 files changed, 36 insertions(+), 7 deletions(-)
>  create mode 100755 test/gh-4773-tonumber-fail-on-NUL-char.test.lua
> 
> diff --git a/src/lj_cparse.c b/src/lj_cparse.c
> index 24ef034..fb44056 100644
> --- a/src/lj_cparse.c
> +++ b/src/lj_cparse.c
> @@ -151,7 +151,8 @@ static CPToken cp_number(CPState *cp)
>    TValue o;
>    do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
>    cp_save(cp, '\0');
> -  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
> +  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), sbuflen(&cp->sb)-1,
> +			&o, STRSCAN_OPT_C);
>    if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
>    else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
>    else if (!(cp->mode & CPARSE_MODE_SKIP))
> diff --git a/src/lj_lex.c b/src/lj_lex.c
> index 2d2f819..5285691 100644
> --- a/src/lj_lex.c
> +++ b/src/lj_lex.c
> @@ -99,7 +99,7 @@ static void lex_number(LexState *ls, TValue *tv)
>      lex_savenext(ls);
>    }
>    lex_save(ls, '\0');
> -  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
> +  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), sbuflen(&ls->sb)-1, tv,
>  	  (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
>  	  (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
>    if (LJ_DUALNUM && fmt == STRSCAN_INT) {
> diff --git a/src/lj_strscan.c b/src/lj_strscan.c
> index f5f35c9..08f41f1 100644
> --- a/src/lj_strscan.c
> +++ b/src/lj_strscan.c
> @@ -370,9 +370,11 @@ static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
>  }
>  
>  /* Scan string containing a number. Returns format. Returns value in o. */
> -StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
> +StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
> +			   uint32_t opt)
>  {
>    int32_t neg = 0;
> +  const uint8_t *pe = p + len;
>  
>    /* Remove leading space, parse sign and non-numbers. */
>    if (LJ_UNLIKELY(!lj_char_isdigit(*p))) {
> @@ -390,7 +392,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
>  	p += 3;
>        }
>        while (lj_char_isspace(*p)) p++;
> -      if (*p) return STRSCAN_ERROR;
> +      if (*p || p < pe) return STRSCAN_ERROR;
>        o->u64 = tmp.u64;
>        return STRSCAN_NUM;
>      }
> @@ -488,6 +490,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
>        while (lj_char_isspace(*p)) p++;
>        if (*p) return STRSCAN_ERROR;
>      }
> +    if (p < pe) return STRSCAN_ERROR;
>  
>      /* Fast path for decimal 32 bit integers. */
>      if (fmt == STRSCAN_INT && base == 10 &&
> @@ -524,7 +527,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
>  
>  int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
>  {
> -  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
> +  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
>  				   STRSCAN_OPT_TONUM);
>    lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM);
>    return (fmt != STRSCAN_ERROR);
> @@ -533,7 +536,7 @@ int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
>  #if LJ_DUALNUM
>  int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o)
>  {
> -  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
> +  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
>  				   STRSCAN_OPT_TOINT);
>    lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT);
>    if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM);
> diff --git a/src/lj_strscan.h b/src/lj_strscan.h
> index 6fb0dda..fdfe894 100644
> --- a/src/lj_strscan.h
> +++ b/src/lj_strscan.h
> @@ -22,7 +22,8 @@ typedef enum {
>    STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64,
>  } StrScanFmt;
>  
> -LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt);
> +LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
> +				   uint32_t opt);
>  LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o);
>  #if LJ_DUALNUM
>  LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o);
> diff --git a/test/gh-4773-tonumber-fail-on-NUL-char.test.lua b/test/gh-4773-tonumber-fail-on-NUL-char.test.lua
> new file mode 100755
> index 0000000..a660979
> --- /dev/null
> +++ b/test/gh-4773-tonumber-fail-on-NUL-char.test.lua
> @@ -0,0 +1,24 @@
> +#!/usr/bin/env tarantool
> +
> +local tap = require('tap')
> +
> +local test = tap.test("Tarantool 4773")
> +test:plan(3)
> +
> +-- Test file to demonstrate LuaJIT tonumber routine fails on NUL char,
> +-- details:
> +--     https://github.com/tarantool/tarantool/issues/4773
> +
> +local t = {
> +  zero = '0',
> +  null = '\x00',
> +  tail = 'imun',
> +}
> +
> +-- Since VM, Lua/C API and compiler use a single routine for conversion numeric
> +-- string to a number, test cases are reduced to the following:
> +test:is(tonumber(t.zero), 0)
> +test:is(tonumber(t.zero .. t.tail), nil)
> +test:is(tonumber(t.zero .. t.null .. t.tail), nil)
> +
> +os.exit(test:check() and 0 or 1)
> -- 
> 2.25.0
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char
  2020-02-13 23:57 [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char Igor Munkin
  2020-02-14  0:07 ` Igor Munkin
  2020-03-03 14:53 ` Sergey Ostanevich
@ 2020-03-03 17:36 ` Igor Munkin
  2020-03-05  7:49 ` Kirill Yukhin
  3 siblings, 0 replies; 6+ messages in thread
From: Igor Munkin @ 2020-03-03 17:36 UTC (permalink / raw)
  To: Sergey Ostanevich, Alexander Turenko; +Cc: tarantool-patches

Kirill,

We discussed offline with Sasha and his review looks to be excess
considering Sergos' ack and the patch presence in upstream.

Please proceed.

On 14.02.20, Igor Munkin wrote:
> The routine used for conversion a string representation to number
> (lj_strscan_scan) doesn't respect the size of the given string/buffer.
> Such behaviour leads to the following results:
> 
> | local a = tonumber("inf\x00imun")   -- the result is 'inf'
> | local b = tonumber("\x36\x00\x80") -- the result is 6
> 
> The behaviour described above is similar to the one vanila Lua 5.1 has:
> 
> | $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
> | Lua 5.1	inf
> 
> However, the issue is fixed in Lua 5.2 and the results are the following:
> | $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
> | Lua 5.2	nil
> 
> The patch introduces additional parameter to lj_strscan_scan routine to
> detect whether there is nothing left after the null character.
> 
> Relates to tarantool#4773
> 
> Reported-by: Alexander Turenko <alexander.turenko@tarantool.org>

Added Sergos' ack, force-pushed to the branch.

> Signed-off-by: Igor Munkin <imun@tarantool.org>
> ---
> 
> This is a backport of the origin commit from v2.1 branch in LuaJIT
> repo extended with tests and commit message. Commit subject is left
> unchanged with the respect to the origin commit.
> 
> Upstream commit: https://github.com/LuaJIT/LuaJIT/commit/0ad60ccbc3768fa8e3e726858adf261950edbc22
> Issue: https://github.com/tarantool/tarantool/issues/4773
> Branch: https://github.com/tarantool/luajit/tree/imun/tonumber-fail-on-NUL-char
> 
>  src/lj_cparse.c                               |  3 ++-
>  src/lj_lex.c                                  |  2 +-
>  src/lj_strscan.c                              | 11 +++++----
>  src/lj_strscan.h                              |  3 ++-
>  ...gh-4773-tonumber-fail-on-NUL-char.test.lua | 24 +++++++++++++++++++
>  5 files changed, 36 insertions(+), 7 deletions(-)
>  create mode 100755 test/gh-4773-tonumber-fail-on-NUL-char.test.lua
> 

<snipped>

> -- 
> 2.25.0
> 

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char
  2020-02-13 23:57 [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char Igor Munkin
                   ` (2 preceding siblings ...)
  2020-03-03 17:36 ` Igor Munkin
@ 2020-03-05  7:49 ` Kirill Yukhin
  2020-03-05  9:36   ` Igor Munkin
  3 siblings, 1 reply; 6+ messages in thread
From: Kirill Yukhin @ 2020-03-05  7:49 UTC (permalink / raw)
  To: Igor Munkin; +Cc: tarantool-patches

Hello,
On 14 фев 02:57, Igor Munkin wrote:
> The routine used for conversion a string representation to number
> (lj_strscan_scan) doesn't respect the size of the given string/buffer.
> Such behaviour leads to the following results:
> 
> | local a = tonumber("inf\x00imun")   -- the result is 'inf'
> | local b = tonumber("\x36\x00\x80") -- the result is 6
> 
> The behaviour described above is similar to the one vanila Lua 5.1 has:
> 
> | $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
> | Lua 5.1	inf
> 
> However, the issue is fixed in Lua 5.2 and the results are the following:
> | $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
> | Lua 5.2	nil
> 
> The patch introduces additional parameter to lj_strscan_scan routine to
> detect whether there is nothing left after the null character.
> 
> Relates to tarantool#4773
> 
> Reported-by: Alexander Turenko <alexander.turenko@tarantool.org>
> Signed-off-by: Igor Munkin <imun@tarantool.org>

I've checked your patch inti tarantool/luajit repo and
bumped new version in 1.10, 2.2, 2.3 and master.

--
Regards, Kirill Yukhin

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char
  2020-03-05  7:49 ` Kirill Yukhin
@ 2020-03-05  9:36   ` Igor Munkin
  0 siblings, 0 replies; 6+ messages in thread
From: Igor Munkin @ 2020-03-05  9:36 UTC (permalink / raw)
  To: Kirill Yukhin; +Cc: tarantool-patches

Kirill,

Thanks! As you reminded, here is a ChangeLog entry:

@ChangeLog:
* Fixed string to number conversion: current implementation respects the
  buffer length (gh-4773).

On 05.03.20, Kirill Yukhin wrote:
> Hello,
> On 14 фев 02:57, Igor Munkin wrote:
> > The routine used for conversion a string representation to number
> > (lj_strscan_scan) doesn't respect the size of the given string/buffer.
> > Such behaviour leads to the following results:
> > 
> > | local a = tonumber("inf\x00imun")   -- the result is 'inf'
> > | local b = tonumber("\x36\x00\x80") -- the result is 6
> > 
> > The behaviour described above is similar to the one vanila Lua 5.1 has:
> > 
> > | $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
> > | Lua 5.1	inf
> > 
> > However, the issue is fixed in Lua 5.2 and the results are the following:
> > | $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
> > | Lua 5.2	nil
> > 
> > The patch introduces additional parameter to lj_strscan_scan routine to
> > detect whether there is nothing left after the null character.
> > 
> > Relates to tarantool#4773
> > 
> > Reported-by: Alexander Turenko <alexander.turenko@tarantool.org>
> > Signed-off-by: Igor Munkin <imun@tarantool.org>
> 
> I've checked your patch inti tarantool/luajit repo and
> bumped new version in 1.10, 2.2, 2.3 and master.
> 
> --
> Regards, Kirill Yukhin

Added to the corresponding release notes.

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-03-05  9:41 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-13 23:57 [Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char Igor Munkin
2020-02-14  0:07 ` Igor Munkin
2020-03-03 14:53 ` Sergey Ostanevich
2020-03-03 17:36 ` Igor Munkin
2020-03-05  7:49 ` Kirill Yukhin
2020-03-05  9:36   ` Igor Munkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox