From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp33.i.mail.ru (smtp33.i.mail.ru [94.100.177.93]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 9E77C469719 for ; Wed, 4 Nov 2020 18:06:49 +0300 (MSK) References: <20201008215956.93983-1-roman.habibov@tarantool.org> <20201008215956.93983-3-roman.habibov@tarantool.org> From: roman Message-ID: <413b5429-cb62-d0c1-8b4c-9b6cfdce8b51@tarantool.org> Date: Tue, 3 Nov 2020 12:53:35 +0300 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset="utf-8"; format="flowed" Content-Transfer-Encoding: 8bit Content-Language: en-US Subject: Re: [Tarantool-patches] [PATCH 2/2] json: print context in error mesages List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Leonid Vasiliev , tarantool-patches@dev.tarantool.org Hi! Thanks for the review. On 12.10.2020 12:20, Leonid Vasiliev wrote: > Hi! Thank you for the patch. > I have two small questions: > > On 09.10.2020 00:59, Roman Khabibov wrote: >> Context is just a string with a few characters before and after >> wrong token, wrong token itself and a symbolic arrow pointing to >> this token. >> >> Closes #4339Hi >> --- >>   test/app-tap/json.test.lua        | 20 ++++++++++- >>   third_party/lua-cjson/lua_cjson.c | 59 ++++++++++++++++++++++++++++--- >>   2 files changed, 73 insertions(+), 6 deletions(-) >> >> diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua >> index 6d511e686..70e9f6cf7 100755 >> --- a/test/app-tap/json.test.lua >> +++ b/test/app-tap/json.test.lua >> @@ -22,7 +22,7 @@ end >>     tap.test("json", function(test) >>       local serializer = require('json') >> -    test:plan(51) >> +    test:plan(57) >>         test:test("unsigned", common.test_unsigned, serializer) >>       test:test("signed", common.test_signed, serializer) >> @@ -184,4 +184,22 @@ tap.test("json", function(test) >>       test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of >> T_COMMA') >>       _, err_msg = pcall(serializer.decode, '{') >>       test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of >> T_END') >> + >> +    -- >> +    -- gh-4339: Make sure that context is printed. >> +    -- >> +    _, err_msg = pcall(serializer.decode, '{{: "world"}') >> +    test:ok(string.find(err_msg, '{ >> {: "worl') ~= nil, 'context #1') >> +    _, err_msg = pcall(serializer.decode, '{"a": "world"}}') >> +    test:ok(string.find(err_msg, '"world"} >> }') ~= nil, 'context #2') >> +    _, err_msg = pcall(serializer.decode, '{1: "world"}') >> +    test:ok(string.find(err_msg, '{ >> 1: "worl') ~= nil, 'context #3') >> +    _, err_msg = pcall(serializer.decode, '{') >> +    test:ok(string.find(err_msg, '{ >> ') ~= nil, 'context #4') >> +    _, err_msg = pcall(serializer.decode, '}') >> +    test:ok(string.find(err_msg, ' >> }') ~= nil, 'context #5') >> +    serializer.cfg{decode_max_depth = 1} >> +    _, err_msg = pcall(serializer.decode, '{"a": {a = {}}}') >> +    test:ok(string.find(err_msg, '{"a":  >> {a = {}}') ~= nil, >> 'context #6') >> + >>   end) >> diff --git a/third_party/lua-cjson/lua_cjson.c >> b/third_party/lua-cjson/lua_cjson.c >> index 33cf30577..f9e06172d 100644 >> --- a/third_party/lua-cjson/lua_cjson.c >> +++ b/third_party/lua-cjson/lua_cjson.c >> @@ -831,6 +831,48 @@ static void json_next_token(json_parse_t *json, >> json_token_t *token) >>       json_set_token_error(token, json, "invalid token"); >>   } >>   +enum context_length { >> +    CONTEXT_ARROW_LENGTH = 4, >> +    CONTEXT_MAX_LENGTH_BEFORE = 8, >> +    CONTEXT_MAX_LENGTH_AFTER = 8, >> +    CONTEXT_MAX_LENGTH = CONTEXT_MAX_LENGTH_BEFORE + >> CONTEXT_MAX_LENGTH_AFTER + >> +    CONTEXT_ARROW_LENGTH, >> +}; >> + >> +/** >> + * Copy characters near wrong token with the position @a >> + * column_index to a static string buffer @a context and lay out >> + * arrow " >> " before this token. >> + * >> + * @param context      String static buffer to fill. >> + * @param json         Structure with pointers to parsing string. >> + * @param column_index Position of wrong token in the current >> + *                     line. >> + */ >> +static void fill_context(char *context, json_parse_t *json, int >> column_index) >> +{ >> +    assert(column_index >= 0); >> +    int length_before = column_index < CONTEXT_MAX_LENGTH_BEFORE ? >> +                        column_index : CONTEXT_MAX_LENGTH_BEFORE; >> +    const char *src = json->cur_line_ptr + column_index - >> length_before; >> +    /* Fill context before the arrow. */ >> +    memcpy(context, src, length_before); >> +    context += length_before; >> +    src += length_before; >> + >> +    /* Make the arrow. */ >> +    *(context++) = ' '; >> +    memset(context, '>', CONTEXT_ARROW_LENGTH - 2); >> +    context += CONTEXT_ARROW_LENGTH - 2; >> +    *(context++) = ' '; >> + >> +    /* Fill context after the arrow. */ >> +    const char *end = context + CONTEXT_MAX_LENGTH_AFTER; >> +    for (; context < end && *src != '\0' && *src != '\n'; ++src, >> ++context) >> +        *context = *src; >> +    *context = '\0'; >> +} >> + >>   /* This function does not return. >>    * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED. >>    * The only supported exception is the temporary parser string >> @@ -849,9 +891,13 @@ static void json_throw_parse_error(lua_State *l, >> json_parse_t *json, >>       else >>           found = json_token_type_name[token->type]; >>   +    int column_index = token->column_index; >> +    char context[CONTEXT_MAX_LENGTH + 1]; >> +    fill_context(context, json, column_index); > > What for you using the additional variable "column_index"? Maybe just > "fill_context(context, json, token->column_index)". Yes. Fixed. >> + >>       /* Note: token->column_index is 0 based, display starting from >> 1 */ >> -    luaL_error(l, "Expected %s but found %s on line %d at character >> %d", exp, >> -               found, json->line_count, token->column_index + 1); >> +    luaL_error(l, "Expected %s but found %s on line %d at character >> %d here " >> +               "'%s'", exp, found, json->line_count, column_index + >> 1, context); >>   } >>     static inline void json_decode_ascend(json_parse_t *json) >> @@ -868,10 +914,13 @@ static void json_decode_descend(lua_State *l, >> json_parse_t *json, int slots) >>           return; >>       } >>   +    char context[CONTEXT_MAX_LENGTH + 1]; >> +    fill_context(context, json, json->ptr - json->cur_line_ptr - 1); > > Typically, the code uses "token-> column_index = json-> ptr - json-> > cur_line_ptr;". > Why do you use "json->ptr - json->cur_line_ptr - 1"? Because, in this case json->ptr point to the character after the character we need to point arrow. I mean (json->ptr - 1) - json->cur_line_ptr >> + >>       strbuf_free(json->tmp); >> -    luaL_error(l, "Found too many nested data structures (%d) on >> line %d at " >> -               "character %d", json->current_depth, json->line_count, >> -               json->ptr - json->cur_line_ptr); >> +    luaL_error(l, "Found too many nested data structures (%d) on >> line %d at cha" >> +               "racter %d here '%s'", json->current_depth, >> json->line_count, >> +               json->ptr - json->cur_line_ptr, context); >>   } >>     static void json_parse_object_context(lua_State *l, json_parse_t >> *json) >> I decided to rename context to err_context to avoid confusion with json context (see functions in lua_cjson.c below). commit 6c5cc1e6f067f1c9358a8bee03501f5df23f0191 Author: Roman Khabibov Date:   Thu Dec 12 16:55:33 2019 +0300     json: print context in error mesages     Context is just a string with a few characters before and after     wrong token, wrong token itself and a symbolic arrow pointing to     this token.     Closes #4339 diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua index 6d511e6..70e9f6c 100755 --- a/test/app-tap/json.test.lua +++ b/test/app-tap/json.test.lua @@ -22,7 +22,7 @@ end  tap.test("json", function(test)      local serializer = require('json') -    test:plan(51) +    test:plan(57)      test:test("unsigned", common.test_unsigned, serializer)      test:test("signed", common.test_signed, serializer) @@ -184,4 +184,22 @@ tap.test("json", function(test)      test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of T_COMMA')      _, err_msg = pcall(serializer.decode, '{')      test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of T_END') + +    -- +    -- gh-4339: Make sure that context is printed. +    -- +    _, err_msg = pcall(serializer.decode, '{{: "world"}') +    test:ok(string.find(err_msg, '{ >> {: "worl') ~= nil, 'context #1') +    _, err_msg = pcall(serializer.decode, '{"a": "world"}}') +    test:ok(string.find(err_msg, '"world"} >> }') ~= nil, 'context #2') +    _, err_msg = pcall(serializer.decode, '{1: "world"}') +    test:ok(string.find(err_msg, '{ >> 1: "worl') ~= nil, 'context #3') +    _, err_msg = pcall(serializer.decode, '{') +    test:ok(string.find(err_msg, '{ >> ') ~= nil, 'context #4') +    _, err_msg = pcall(serializer.decode, '}') +    test:ok(string.find(err_msg, ' >> }') ~= nil, 'context #5') +    serializer.cfg{decode_max_depth = 1} +    _, err_msg = pcall(serializer.decode, '{"a": {a = {}}}') +    test:ok(string.find(err_msg, '{"a":  >> {a = {}}') ~= nil, 'context #6') +  end) diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c index 33cf305..38e9998 100644 --- a/third_party/lua-cjson/lua_cjson.c +++ b/third_party/lua-cjson/lua_cjson.c @@ -831,6 +831,50 @@ static void json_next_token(json_parse_t *json, json_token_t *token)      json_set_token_error(token, json, "invalid token");  } +enum err_context_length { +    ERR_CONTEXT_ARROW_LENGTH = 4, +    ERR_CONTEXT_MAX_LENGTH_BEFORE = 8, +    ERR_CONTEXT_MAX_LENGTH_AFTER = 8, +    ERR_CONTEXT_MAX_LENGTH = ERR_CONTEXT_MAX_LENGTH_BEFORE + +    ERR_CONTEXT_MAX_LENGTH_AFTER + ERR_CONTEXT_ARROW_LENGTH, +}; + +/** + * Copy characters near wrong token with the position @a + * column_index to a static string buffer @a err_context and lay + * out arrow " >> " before this token. + * + * @param context      String static buffer to fill. + * @param json         Structure with pointers to parsing string. + * @param column_index Position of wrong token in the current + *                     line. + */ +static void fill_err_context(char *err_context, json_parse_t *json, +                             int column_index) +{ +    assert(column_index >= 0); +    int length_before = column_index < ERR_CONTEXT_MAX_LENGTH_BEFORE ? +                        column_index : ERR_CONTEXT_MAX_LENGTH_BEFORE; +    const char *src = json->cur_line_ptr + column_index - length_before; +    /* Fill error context before the arrow. */ +    memcpy(err_context, src, length_before); +    err_context += length_before; +    src += length_before; + +    /* Make the arrow. */ +    *(err_context++) = ' '; +    memset(err_context, '>', ERR_CONTEXT_ARROW_LENGTH - 2); +    err_context += ERR_CONTEXT_ARROW_LENGTH - 2; +    *(err_context++) = ' '; + +    /* Fill error context after the arrow. */ +    const char *end = err_context + ERR_CONTEXT_MAX_LENGTH_AFTER; +    for (; err_context < end && *src != '\0' && *src != '\n'; ++src, +         ++err_context) +        *err_context = *src; +    *err_context = '\0'; +} +  /* This function does not return.   * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.   * The only supported exception is the temporary parser string @@ -849,9 +893,13 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json,      else          found = json_token_type_name[token->type]; +    char err_context[ERR_CONTEXT_MAX_LENGTH + 1]; +    fill_err_context(err_context, json, token->column_index); +      /* Note: token->column_index is 0 based, display starting from 1 */ -    luaL_error(l, "Expected %s but found %s on line %d at character %d", exp, -               found, json->line_count, token->column_index + 1); +    luaL_error(l, "Expected %s but found %s on line %d at character %d here " +               "'%s'", exp, found, json->line_count, token->column_index + 1, +               err_context);  }  static inline void json_decode_ascend(json_parse_t *json) @@ -868,10 +916,13 @@ static void json_decode_descend(lua_State *l, json_parse_t *json, int slots)          return;      } +    char err_context[ERR_CONTEXT_MAX_LENGTH + 1]; +    fill_err_context(err_context, json, json->ptr - json->cur_line_ptr - 1); +      strbuf_free(json->tmp); -    luaL_error(l, "Found too many nested data structures (%d) on line %d at " -               "character %d", json->current_depth, json->line_count, -               json->ptr - json->cur_line_ptr); +    luaL_error(l, "Found too many nested data structures (%d) on line %d at cha" +               "racter %d here '%s'", json->current_depth, json->line_count, +               json->ptr - json->cur_line_ptr, err_context);  }  static void json_parse_object_context(lua_State *l, json_parse_t *json)