[Tarantool-patches] [PATCH 2/2] json: print context in error mesages

Leonid Vasiliev lvasiliev at tarantool.org
Mon Oct 12 12:20:57 MSK 2020


Hi! Thank you for the patch.
I have two small questions:

On 09.10.2020 00:59, Roman Khabibov wrote:
> Context is just a string with a few characters before and after
> wrong token, wrong token itself and a symbolic arrow pointing to
> this token.
> 
> Closes #4339Hi
> ---
>   test/app-tap/json.test.lua        | 20 ++++++++++-
>   third_party/lua-cjson/lua_cjson.c | 59 ++++++++++++++++++++++++++++---
>   2 files changed, 73 insertions(+), 6 deletions(-)
> 
> diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua
> index 6d511e686..70e9f6cf7 100755
> --- a/test/app-tap/json.test.lua
> +++ b/test/app-tap/json.test.lua
> @@ -22,7 +22,7 @@ end
>   
>   tap.test("json", function(test)
>       local serializer = require('json')
> -    test:plan(51)
> +    test:plan(57)
>   
>       test:test("unsigned", common.test_unsigned, serializer)
>       test:test("signed", common.test_signed, serializer)
> @@ -184,4 +184,22 @@ tap.test("json", function(test)
>       test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of T_COMMA')
>       _, err_msg = pcall(serializer.decode, '{')
>       test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of T_END')
> +
> +    --
> +    -- gh-4339: Make sure that context is printed.
> +    --
> +    _, err_msg = pcall(serializer.decode, '{{: "world"}')
> +    test:ok(string.find(err_msg, '{ >> {: "worl') ~= nil, 'context #1')
> +    _, err_msg = pcall(serializer.decode, '{"a": "world"}}')
> +    test:ok(string.find(err_msg, '"world"} >> }') ~= nil, 'context #2')
> +    _, err_msg = pcall(serializer.decode, '{1: "world"}')
> +    test:ok(string.find(err_msg, '{ >> 1: "worl') ~= nil, 'context #3')
> +    _, err_msg = pcall(serializer.decode, '{')
> +    test:ok(string.find(err_msg, '{ >> ') ~= nil, 'context #4')
> +    _, err_msg = pcall(serializer.decode, '}')
> +    test:ok(string.find(err_msg, ' >> }') ~= nil, 'context #5')
> +    serializer.cfg{decode_max_depth = 1}
> +    _, err_msg = pcall(serializer.decode, '{"a": {a = {}}}')
> +    test:ok(string.find(err_msg, '{"a":  >> {a = {}}') ~= nil, 'context #6')
> +
>   end)
> diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
> index 33cf30577..f9e06172d 100644
> --- a/third_party/lua-cjson/lua_cjson.c
> +++ b/third_party/lua-cjson/lua_cjson.c
> @@ -831,6 +831,48 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
>       json_set_token_error(token, json, "invalid token");
>   }
>   
> +enum context_length {
> +    CONTEXT_ARROW_LENGTH = 4,
> +    CONTEXT_MAX_LENGTH_BEFORE = 8,
> +    CONTEXT_MAX_LENGTH_AFTER = 8,
> +    CONTEXT_MAX_LENGTH = CONTEXT_MAX_LENGTH_BEFORE + CONTEXT_MAX_LENGTH_AFTER +
> +    CONTEXT_ARROW_LENGTH,
> +};
> +
> +/**
> + * Copy characters near wrong token with the position @a
> + * column_index to a static string buffer @a context and lay out
> + * arrow " >> " before this token.
> + *
> + * @param context      String static buffer to fill.
> + * @param json         Structure with pointers to parsing string.
> + * @param column_index Position of wrong token in the current
> + *                     line.
> + */
> +static void fill_context(char *context, json_parse_t *json, int column_index)
> +{
> +    assert(column_index >= 0);
> +    int length_before = column_index < CONTEXT_MAX_LENGTH_BEFORE ?
> +                        column_index : CONTEXT_MAX_LENGTH_BEFORE;
> +    const char *src = json->cur_line_ptr + column_index - length_before;
> +    /* Fill context before the arrow. */
> +    memcpy(context, src, length_before);
> +    context += length_before;
> +    src += length_before;
> +
> +    /* Make the arrow. */
> +    *(context++) = ' ';
> +    memset(context, '>', CONTEXT_ARROW_LENGTH - 2);
> +    context += CONTEXT_ARROW_LENGTH - 2;
> +    *(context++) = ' ';
> +
> +    /* Fill context after the arrow. */
> +    const char *end = context + CONTEXT_MAX_LENGTH_AFTER;
> +    for (; context < end && *src != '\0' && *src != '\n'; ++src, ++context)
> +        *context = *src;
> +    *context = '\0';
> +}
> +
>   /* This function does not return.
>    * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
>    * The only supported exception is the temporary parser string
> @@ -849,9 +891,13 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json,
>       else
>           found = json_token_type_name[token->type];
>   
> +    int column_index = token->column_index;
> +    char context[CONTEXT_MAX_LENGTH + 1];
> +    fill_context(context, json, column_index);

What for you using the additional variable "column_index"? Maybe just
"fill_context(context, json, token->column_index)".

> +
>       /* Note: token->column_index is 0 based, display starting from 1 */
> -    luaL_error(l, "Expected %s but found %s on line %d at character %d", exp,
> -               found, json->line_count, token->column_index + 1);
> +    luaL_error(l, "Expected %s but found %s on line %d at character %d here "
> +               "'%s'", exp, found, json->line_count, column_index + 1, context);
>   }
>   
>   static inline void json_decode_ascend(json_parse_t *json)
> @@ -868,10 +914,13 @@ static void json_decode_descend(lua_State *l, json_parse_t *json, int slots)
>           return;
>       }
>   
> +    char context[CONTEXT_MAX_LENGTH + 1];
> +    fill_context(context, json, json->ptr - json->cur_line_ptr - 1);

Typically, the code uses "token-> column_index = json-> ptr - json-> 
cur_line_ptr;".
Why do you use "json->ptr - json->cur_line_ptr - 1"?

> +
>       strbuf_free(json->tmp);
> -    luaL_error(l, "Found too many nested data structures (%d) on line %d at "
> -               "character %d", json->current_depth, json->line_count,
> -               json->ptr - json->cur_line_ptr);
> +    luaL_error(l, "Found too many nested data structures (%d) on line %d at cha"
> +               "racter %d here '%s'", json->current_depth, json->line_count,
> +               json->ptr - json->cur_line_ptr, context);
>   }
>   
>   static void json_parse_object_context(lua_State *l, json_parse_t *json)
> 


More information about the Tarantool-patches mailing list