[Tarantool-patches] [PATCH luajit] Make string to number conversions fail on NUL char

Sergey Ostanevich sergos at tarantool.org
Tue Mar 3 17:53:45 MSK 2020


Hi!

Thanks for the patch, LGTM.

Sergos

On 14 фев 02:57, Igor Munkin wrote:
> The routine used for conversion a string representation to number
> (lj_strscan_scan) doesn't respect the size of the given string/buffer.
> Such behaviour leads to the following results:
> 
> | local a = tonumber("inf\x00imun")   -- the result is 'inf'
> | local b = tonumber("\x36\x00\x80") -- the result is 6
> 
> The behaviour described above is similar to the one vanila Lua 5.1 has:
> 
> | $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
> | Lua 5.1	inf
> 
> However, the issue is fixed in Lua 5.2 and the results are the following:
> | $ ./lua -e 'print(_VERSION, tonumber("inf"..string.char(0).."imun"))'
> | Lua 5.2	nil
> 
> The patch introduces additional parameter to lj_strscan_scan routine to
> detect whether there is nothing left after the null character.
> 
> Relates to tarantool#4773
> 
> Reported-by: Alexander Turenko <alexander.turenko at tarantool.org>
> Signed-off-by: Igor Munkin <imun at tarantool.org>
> ---
> 
> This is a backport of the origin commit from v2.1 branch in LuaJIT
> repo extended with tests and commit message. Commit subject is left
> unchanged with the respect to the origin commit.
> 
> Upstream commit: https://github.com/LuaJIT/LuaJIT/commit/0ad60ccbc3768fa8e3e726858adf261950edbc22
> Issue: https://github.com/tarantool/tarantool/issues/4773
> Branch: https://github.com/tarantool/luajit/tree/imun/tonumber-fail-on-NUL-char
> 
>  src/lj_cparse.c                               |  3 ++-
>  src/lj_lex.c                                  |  2 +-
>  src/lj_strscan.c                              | 11 +++++----
>  src/lj_strscan.h                              |  3 ++-
>  ...gh-4773-tonumber-fail-on-NUL-char.test.lua | 24 +++++++++++++++++++
>  5 files changed, 36 insertions(+), 7 deletions(-)
>  create mode 100755 test/gh-4773-tonumber-fail-on-NUL-char.test.lua
> 
> diff --git a/src/lj_cparse.c b/src/lj_cparse.c
> index 24ef034..fb44056 100644
> --- a/src/lj_cparse.c
> +++ b/src/lj_cparse.c
> @@ -151,7 +151,8 @@ static CPToken cp_number(CPState *cp)
>    TValue o;
>    do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
>    cp_save(cp, '\0');
> -  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
> +  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), sbuflen(&cp->sb)-1,
> +			&o, STRSCAN_OPT_C);
>    if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
>    else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
>    else if (!(cp->mode & CPARSE_MODE_SKIP))
> diff --git a/src/lj_lex.c b/src/lj_lex.c
> index 2d2f819..5285691 100644
> --- a/src/lj_lex.c
> +++ b/src/lj_lex.c
> @@ -99,7 +99,7 @@ static void lex_number(LexState *ls, TValue *tv)
>      lex_savenext(ls);
>    }
>    lex_save(ls, '\0');
> -  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
> +  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), sbuflen(&ls->sb)-1, tv,
>  	  (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
>  	  (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
>    if (LJ_DUALNUM && fmt == STRSCAN_INT) {
> diff --git a/src/lj_strscan.c b/src/lj_strscan.c
> index f5f35c9..08f41f1 100644
> --- a/src/lj_strscan.c
> +++ b/src/lj_strscan.c
> @@ -370,9 +370,11 @@ static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
>  }
>  
>  /* Scan string containing a number. Returns format. Returns value in o. */
> -StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
> +StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
> +			   uint32_t opt)
>  {
>    int32_t neg = 0;
> +  const uint8_t *pe = p + len;
>  
>    /* Remove leading space, parse sign and non-numbers. */
>    if (LJ_UNLIKELY(!lj_char_isdigit(*p))) {
> @@ -390,7 +392,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
>  	p += 3;
>        }
>        while (lj_char_isspace(*p)) p++;
> -      if (*p) return STRSCAN_ERROR;
> +      if (*p || p < pe) return STRSCAN_ERROR;
>        o->u64 = tmp.u64;
>        return STRSCAN_NUM;
>      }
> @@ -488,6 +490,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
>        while (lj_char_isspace(*p)) p++;
>        if (*p) return STRSCAN_ERROR;
>      }
> +    if (p < pe) return STRSCAN_ERROR;
>  
>      /* Fast path for decimal 32 bit integers. */
>      if (fmt == STRSCAN_INT && base == 10 &&
> @@ -524,7 +527,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
>  
>  int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
>  {
> -  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
> +  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
>  				   STRSCAN_OPT_TONUM);
>    lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM);
>    return (fmt != STRSCAN_ERROR);
> @@ -533,7 +536,7 @@ int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
>  #if LJ_DUALNUM
>  int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o)
>  {
> -  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
> +  StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
>  				   STRSCAN_OPT_TOINT);
>    lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT);
>    if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM);
> diff --git a/src/lj_strscan.h b/src/lj_strscan.h
> index 6fb0dda..fdfe894 100644
> --- a/src/lj_strscan.h
> +++ b/src/lj_strscan.h
> @@ -22,7 +22,8 @@ typedef enum {
>    STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64,
>  } StrScanFmt;
>  
> -LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt);
> +LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
> +				   uint32_t opt);
>  LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o);
>  #if LJ_DUALNUM
>  LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o);
> diff --git a/test/gh-4773-tonumber-fail-on-NUL-char.test.lua b/test/gh-4773-tonumber-fail-on-NUL-char.test.lua
> new file mode 100755
> index 0000000..a660979
> --- /dev/null
> +++ b/test/gh-4773-tonumber-fail-on-NUL-char.test.lua
> @@ -0,0 +1,24 @@
> +#!/usr/bin/env tarantool
> +
> +local tap = require('tap')
> +
> +local test = tap.test("Tarantool 4773")
> +test:plan(3)
> +
> +-- Test file to demonstrate LuaJIT tonumber routine fails on NUL char,
> +-- details:
> +--     https://github.com/tarantool/tarantool/issues/4773
> +
> +local t = {
> +  zero = '0',
> +  null = '\x00',
> +  tail = 'imun',
> +}
> +
> +-- Since VM, Lua/C API and compiler use a single routine for conversion numeric
> +-- string to a number, test cases are reduced to the following:
> +test:is(tonumber(t.zero), 0)
> +test:is(tonumber(t.zero .. t.tail), nil)
> +test:is(tonumber(t.zero .. t.null .. t.tail), nil)
> +
> +os.exit(test:check() and 0 or 1)
> -- 
> 2.25.0
> 


More information about the Tarantool-patches mailing list