[tarantool-patches] Re: [PATCH] json: clarify bad syntax error messages

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Mon Sep 30 22:29:05 MSK 2019


Hi! Thanks for the fixes!

LGTM.

On 29/09/2019 18:04, Roman Khabibov wrote:
> 
> 
>> On Sep 27, 2019, at 22:54, Vladislav Shpilevoy <v.shpilevoy at tarantool.org> wrote:
>>
>> Hi! Thanks for the patch!
>>
>>> diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
>>> index 6b03e391a..0dee2deb9 100644
>>> --- a/third_party/lua-cjson/lua_cjson.c
>>> +++ b/third_party/lua-cjson/lua_cjson.c
>>> @@ -105,11 +107,12 @@ typedef struct {
>>>     strbuf_t *tmp;    /* Temporary storage for strings */
>>>     struct luaL_serializer *cfg;
>>>     int current_depth;
>>> +    int line_count;
>>> +    const char *cur_line_ptr;
>>> } json_parse_t;
>>>
>>> typedef struct {
>>>     json_token_type_t type;
>>> -    int index;
>>
>> Unfortunately, you can't remove token index. Otherwise
>> error messages will report wrong positions:
>>
>> tarantool> json.decode("{ 100: 200 }")
>> ---
>> - error: Expected object key string but found T_INT on line 1 at character 6
>> ...
>>
>> Error was on character 3 (begin of '100'), not 6.
> @@ -829,9 +837,9 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json,
>      else
>          found = json_token_type_name[token->type];
>  
> -    /* Note: token->index is 0 based, display starting from 1 */
> -    luaL_error(l, "Expected %s but found %s at character %d",
> -               exp, found, token->index + 1);
> +    /* Note: token->column_index is 0 based, display starting from 1 */
> +    luaL_error(l, "Expected %s but found %s on line %d at character %d", exp,
> +               found, json->line_count, token->column_index + 1);
>  }
> 
> commit 9f9bd3eb2d064129ff6b1a764140ebef242d7ff7
> Author: Roman Khabibov <roman.habibov at tarantool.org>
> Date:   Wed Sep 18 16:47:48 2019 +0300
> 
>     json: clarify bad syntax error messages
>     
>     Count lines in the json parsing structure. It is needed to print
>     the number of line and column where a mistake was made.
>     
>     Closes #3316
> 
> diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua
> index 10f6f4ab2..2efbe3648 100755
> --- a/test/app-tap/json.test.lua
> +++ b/test/app-tap/json.test.lua
> @@ -22,7 +22,7 @@ end
>  
>  tap.test("json", function(test)
>      local serializer = require('json')
> -    test:plan(32)
> +    test:plan(39)
>  
>      test:test("unsigned", common.test_unsigned, serializer)
>      test:test("signed", common.test_signed, serializer)
> @@ -121,4 +121,34 @@ tap.test("json", function(test)
>      test:is(serializer.encode(rec4),
>              '{"a":{"a":null,"b":null},"b":{"a":null,"b":null}}')
>      serializer.cfg({encode_max_depth = orig_encode_max_depth})
> +
> +    --
> +    -- gh-3316: Make sure that line number is printed in the error
> +    -- message.
> +    --
> +    local _, err_msg
> +    _, err_msg = pcall(serializer.decode, 'a{"hello": \n"world"}')
> +    test:ok(string.find(err_msg, 'line 1 at character 1') ~= nil,
> +            'mistake on first line')
> +    _, err_msg = pcall(serializer.decode, '{"hello": \n"world"a}')
> +    test:ok(string.find(err_msg, 'line 2 at character 8') ~= nil,
> +            'mistake on second line')
> +    _, err_msg = pcall(serializer.decode, '\n\n\n\n{"hello": "world"a}')
> +    test:ok(string.find(err_msg, 'line 5 at character 18') ~= nil,
> +            'mistake on fifth line')
> +    serializer.cfg{decode_max_depth = 1}
> +    _, err_msg = pcall(serializer.decode,
> +                       '{"hello": {"world": {"hello": "world"}}}')
> +    test:ok(string.find(err_msg, 'line 1 at character 11') ~= nil,
> +            'mistake on first line')
> +    _, err_msg = pcall(serializer.decode,
> +                       '{"hello": \n{"world": {"hello": "world"}}}')
> +    test:ok(string.find(err_msg, 'line 2 at character 1') ~= nil,
> +            'mistake on second line')
> +    _, err_msg = pcall(serializer.decode, "{ 100: 200 }")
> +    test:ok(string.find(err_msg, 'line 1 at character 3') ~= nil,
> +            'mistake on first line')
> +    _, err_msg = pcall(serializer.decode, '{"hello": "world",\n 100: 200}')
> +    test:ok(string.find(err_msg, 'line 2 at character 2') ~= nil,
> +            'mistake on second line')
>  end)
> diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
> index 6b03e391a..1a446c035 100644
> --- a/third_party/lua-cjson/lua_cjson.c
> +++ b/third_party/lua-cjson/lua_cjson.c
> @@ -66,6 +66,7 @@ typedef enum {
>      T_COMMA,
>      T_END,
>      T_WHITESPACE,
> +    T_LINEFEED,
>      T_ERROR,
>      T_UNKNOWN
>  } json_token_type_t;
> @@ -85,6 +86,7 @@ static const char *json_token_type_name[] = {
>      "T_COMMA",
>      "T_END",
>      "T_WHITESPACE",
> +    "T_LINEFEED",
>      "T_ERROR",
>      "T_UNKNOWN",
>      NULL
> @@ -105,11 +107,13 @@ typedef struct {
>      strbuf_t *tmp;    /* Temporary storage for strings */
>      struct luaL_serializer *cfg;
>      int current_depth;
> +    int line_count;
> +    const char *cur_line_ptr;
>  } json_parse_t;
>  
>  typedef struct {
>      json_token_type_t type;
> -    int index;
> +    int column_index;
>      union {
>          const char *string;
>          double number;
> @@ -195,7 +199,7 @@ static void json_create_tokens()
>      ch2token['\0'] = T_END;
>      ch2token[' '] = T_WHITESPACE;
>      ch2token['\t'] = T_WHITESPACE;
> -    ch2token['\n'] = T_WHITESPACE;
> +    ch2token['\n'] = T_LINEFEED;
>      ch2token['\r'] = T_WHITESPACE;
>  
>      /* Update characters that require further processing */
> @@ -592,7 +596,7 @@ static void json_set_token_error(json_token_t *token, json_parse_t *json,
>                                   const char *errtype)
>  {
>      token->type = T_ERROR;
> -    token->index = json->ptr - json->data;
> +    token->column_index = json->ptr - json->cur_line_ptr;
>      token->value.string = errtype;
>  }
>  
> @@ -732,14 +736,18 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
>      while (1) {
>          ch = (unsigned char)*(json->ptr);
>          token->type = ch2token[ch];
> -        if (token->type != T_WHITESPACE)
> +        if (token->type == T_LINEFEED) {
> +            json->line_count++;
> +            json->cur_line_ptr = json->ptr + 1;
> +        } else if (token->type != T_WHITESPACE) {
>              break;
> +        }
>          json->ptr++;
>      }
>  
>      /* Store location of new token. Required when throwing errors
>       * for unexpected tokens (syntax errors). */
> -    token->index = json->ptr - json->data;
> +    token->column_index = json->ptr - json->cur_line_ptr;
>  
>      /* Don't advance the pointer for an error or the end */
>      if (token->type == T_ERROR) {
> @@ -829,9 +837,9 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json,
>      else
>          found = json_token_type_name[token->type];
>  
> -    /* Note: token->index is 0 based, display starting from 1 */
> -    luaL_error(l, "Expected %s but found %s at character %d",
> -               exp, found, token->index + 1);
> +    /* Note: token->column_index is 0 based, display starting from 1 */
> +    luaL_error(l, "Expected %s but found %s on line %d at character %d", exp,
> +               found, json->line_count, token->column_index + 1);
>  }
>  
>  static inline void json_decode_ascend(json_parse_t *json)
> @@ -849,8 +857,9 @@ static void json_decode_descend(lua_State *l, json_parse_t *json, int slots)
>      }
>  
>      strbuf_free(json->tmp);
> -    luaL_error(l, "Found too many nested data structures (%d) at character %d",
> -        json->current_depth, json->ptr - json->data);
> +    luaL_error(l, "Found too many nested data structures (%d) on line %d at "
> +               "character %d", json->current_depth, json->line_count,
> +               json->ptr - json->cur_line_ptr);
>  }
>  
>  static void json_parse_object_context(lua_State *l, json_parse_t *json)
> @@ -1001,6 +1010,8 @@ static int json_decode(lua_State *l)
>      json.data = luaL_checklstring(l, 1, &json_len);
>      json.current_depth = 0;
>      json.ptr = json.data;
> +    json.line_count = 1;
> +    json.cur_line_ptr = json.data;
>  
>      /* Detect Unicode other than UTF-8 (see RFC 4627, Sec 3)
>       *
> 




More information about the Tarantool-patches mailing list