From: Roman Khabibov <roman.habibov@tarantool.org> To: tarantool-patches@dev.tarantool.org Cc: v.shpilevoy@tarantool.org Subject: [Tarantool-patches] [PATCH v2 2/2] json: print context in error mesages Date: Sun, 15 Dec 2019 17:42:47 +0300 [thread overview] Message-ID: <d4a4c85c4c6b2041d7be1ce1c6f33351f6dcab7a.1576420905.git.roman.habibov@tarantool.org> (raw) In-Reply-To: <cover.1576420905.git.roman.habibov@tarantool.org> Context is just a string with a few characters before and after wrong token, wrong token itself and a symbolic arrow pointing to this token. Closes #4339 --- test/app-tap/json.test.lua | 20 +++++++++- third_party/lua-cjson/lua_cjson.c | 62 ++++++++++++++++++++++++++++--- 2 files changed, 76 insertions(+), 6 deletions(-) diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua index 6d511e686..70e9f6cf7 100755 --- a/test/app-tap/json.test.lua +++ b/test/app-tap/json.test.lua @@ -22,7 +22,7 @@ end tap.test("json", function(test) local serializer = require('json') - test:plan(51) + test:plan(57) test:test("unsigned", common.test_unsigned, serializer) test:test("signed", common.test_signed, serializer) @@ -184,4 +184,22 @@ tap.test("json", function(test) test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of T_COMMA') _, err_msg = pcall(serializer.decode, '{') test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of T_END') + + -- + -- gh-4339: Make sure that context is printed. + -- + _, err_msg = pcall(serializer.decode, '{{: "world"}') + test:ok(string.find(err_msg, '{ >> {: "worl') ~= nil, 'context #1') + _, err_msg = pcall(serializer.decode, '{"a": "world"}}') + test:ok(string.find(err_msg, '"world"} >> }') ~= nil, 'context #2') + _, err_msg = pcall(serializer.decode, '{1: "world"}') + test:ok(string.find(err_msg, '{ >> 1: "worl') ~= nil, 'context #3') + _, err_msg = pcall(serializer.decode, '{') + test:ok(string.find(err_msg, '{ >> ') ~= nil, 'context #4') + _, err_msg = pcall(serializer.decode, '}') + test:ok(string.find(err_msg, ' >> }') ~= nil, 'context #5') + serializer.cfg{decode_max_depth = 1} + _, err_msg = pcall(serializer.decode, '{"a": {a = {}}}') + test:ok(string.find(err_msg, '{"a": >> {a = {}}') ~= nil, 'context #6') + end) diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c index e68b52847..655d6550e 100644 --- a/third_party/lua-cjson/lua_cjson.c +++ b/third_party/lua-cjson/lua_cjson.c @@ -825,6 +825,50 @@ static void json_next_token(json_parse_t *json, json_token_t *token) json_set_token_error(token, json, "invalid token"); } +enum context_length { + CONTEXT_ARROW_LENGTH = 4, + CONTEXT_MAX_LENGTH_BEFORE = 8, + CONTEXT_MAX_LENGTH_AFTER = 8, + CONTEXT_MAX_LENGTH = CONTEXT_MAX_LENGTH_BEFORE + CONTEXT_MAX_LENGTH_AFTER + + CONTEXT_ARROW_LENGTH, +}; + +/** + * Copy characters near wrong token with the position @a + * column_index to a static string buffer @a context and lay out + * arrow " >> " before this token. + * + * @param context String static buffer to fill. + * @param json Structure with pointers to parsing string. + * @param column_index Position of wrong token in the current + * line. + */ +static void fill_context(char *context, json_parse_t *json, int column_index) +{ + assert(column_index >= 0); + int length_before = column_index < CONTEXT_MAX_LENGTH_BEFORE ? + column_index : CONTEXT_MAX_LENGTH_BEFORE; + const char *src = json->cur_line_ptr + column_index - length_before; + int i = 0; + /* Fill context before the arrow. */ + for (; i < length_before; i++) + context[i] = src[i]; + + /* Make the arrow. */ + context[i] = ' '; + memset(context + i + 1, '>', CONTEXT_ARROW_LENGTH - 2); + context[i + CONTEXT_ARROW_LENGTH - 1] = ' '; + + /* Fill context after the arrow. */ + for (int n = 0; n < CONTEXT_MAX_LENGTH_AFTER && src[i] != '\0' && + src[i] != '\n'; i++) { + context[i + CONTEXT_ARROW_LENGTH] = src[i]; + n++; + } + assert(i + CONTEXT_ARROW_LENGTH <= CONTEXT_MAX_LENGTH); + context[i + CONTEXT_ARROW_LENGTH] = '\0'; +} + /* This function does not return. * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED. * The only supported exception is the temporary parser string @@ -843,9 +887,14 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json, else found = json_token_type_name[token->type]; + int column_index = token->column_index; + char context[CONTEXT_MAX_LENGTH + 1]; + fill_context(context, json, column_index); + /* Note: token->column_index is 0 based, display starting from 1 */ - luaL_error(l, "Expected %s but found %s on line %d at character %d", exp, - found, json->line_count, token->column_index + 1); + luaL_error(l, "Expected %s but found %s on line %d at character %d here " + "'%s'", exp, found, json->line_count, token->column_index + 1, + context); } static inline void json_decode_ascend(json_parse_t *json) @@ -862,10 +911,13 @@ static void json_decode_descend(lua_State *l, json_parse_t *json, int slots) return; } + char context[CONTEXT_MAX_LENGTH + 1]; + fill_context(context, json, json->ptr - json->cur_line_ptr - 1); + strbuf_free(json->tmp); - luaL_error(l, "Found too many nested data structures (%d) on line %d at " - "character %d", json->current_depth, json->line_count, - json->ptr - json->cur_line_ptr); + luaL_error(l, "Found too many nested data structures (%d) on line %d at cha" + "racter %d here '%s'", json->current_depth, json->line_count, + json->ptr - json->cur_line_ptr, context); } static void json_parse_object_context(lua_State *l, json_parse_t *json) -- 2.21.0 (Apple Git-122)
next prev parent reply other threads:[~2019-12-15 14:42 UTC|newest] Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-12-15 14:42 [Tarantool-patches] [PATCH v2 0/2] Improve json " Roman Khabibov 2019-12-15 14:42 ` [Tarantool-patches] [PATCH v2 1/2] json: make error messages more readable Roman Khabibov 2019-12-21 14:56 ` Roman Khabibov 2019-12-15 14:42 ` Roman Khabibov [this message] 2019-12-19 23:19 ` [Tarantool-patches] [PATCH v2 2/2] json: print context in error mesages Vladislav Shpilevoy 2019-12-21 14:56 ` Roman Khabibov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=d4a4c85c4c6b2041d7be1ce1c6f33351f6dcab7a.1576420905.git.roman.habibov@tarantool.org \ --to=roman.habibov@tarantool.org \ --cc=tarantool-patches@dev.tarantool.org \ --cc=v.shpilevoy@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH v2 2/2] json: print context in error mesages' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox