[Tarantool-patches] [PATCH 2/2] json: print context in error mesages
roman
roman.habibov at tarantool.org
Tue Nov 3 12:53:35 MSK 2020
Hi! Thanks for the review.
On 12.10.2020 12:20, Leonid Vasiliev wrote:
> Hi! Thank you for the patch.
> I have two small questions:
>
> On 09.10.2020 00:59, Roman Khabibov wrote:
>> Context is just a string with a few characters before and after
>> wrong token, wrong token itself and a symbolic arrow pointing to
>> this token.
>>
>> Closes #4339Hi
>> ---
>> test/app-tap/json.test.lua | 20 ++++++++++-
>> third_party/lua-cjson/lua_cjson.c | 59 ++++++++++++++++++++++++++++---
>> 2 files changed, 73 insertions(+), 6 deletions(-)
>>
>> diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua
>> index 6d511e686..70e9f6cf7 100755
>> --- a/test/app-tap/json.test.lua
>> +++ b/test/app-tap/json.test.lua
>> @@ -22,7 +22,7 @@ end
>> tap.test("json", function(test)
>> local serializer = require('json')
>> - test:plan(51)
>> + test:plan(57)
>> test:test("unsigned", common.test_unsigned, serializer)
>> test:test("signed", common.test_signed, serializer)
>> @@ -184,4 +184,22 @@ tap.test("json", function(test)
>> test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of
>> T_COMMA')
>> _, err_msg = pcall(serializer.decode, '{')
>> test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of
>> T_END')
>> +
>> + --
>> + -- gh-4339: Make sure that context is printed.
>> + --
>> + _, err_msg = pcall(serializer.decode, '{{: "world"}')
>> + test:ok(string.find(err_msg, '{ >> {: "worl') ~= nil, 'context #1')
>> + _, err_msg = pcall(serializer.decode, '{"a": "world"}}')
>> + test:ok(string.find(err_msg, '"world"} >> }') ~= nil, 'context #2')
>> + _, err_msg = pcall(serializer.decode, '{1: "world"}')
>> + test:ok(string.find(err_msg, '{ >> 1: "worl') ~= nil, 'context #3')
>> + _, err_msg = pcall(serializer.decode, '{')
>> + test:ok(string.find(err_msg, '{ >> ') ~= nil, 'context #4')
>> + _, err_msg = pcall(serializer.decode, '}')
>> + test:ok(string.find(err_msg, ' >> }') ~= nil, 'context #5')
>> + serializer.cfg{decode_max_depth = 1}
>> + _, err_msg = pcall(serializer.decode, '{"a": {a = {}}}')
>> + test:ok(string.find(err_msg, '{"a": >> {a = {}}') ~= nil,
>> 'context #6')
>> +
>> end)
>> diff --git a/third_party/lua-cjson/lua_cjson.c
>> b/third_party/lua-cjson/lua_cjson.c
>> index 33cf30577..f9e06172d 100644
>> --- a/third_party/lua-cjson/lua_cjson.c
>> +++ b/third_party/lua-cjson/lua_cjson.c
>> @@ -831,6 +831,48 @@ static void json_next_token(json_parse_t *json,
>> json_token_t *token)
>> json_set_token_error(token, json, "invalid token");
>> }
>> +enum context_length {
>> + CONTEXT_ARROW_LENGTH = 4,
>> + CONTEXT_MAX_LENGTH_BEFORE = 8,
>> + CONTEXT_MAX_LENGTH_AFTER = 8,
>> + CONTEXT_MAX_LENGTH = CONTEXT_MAX_LENGTH_BEFORE +
>> CONTEXT_MAX_LENGTH_AFTER +
>> + CONTEXT_ARROW_LENGTH,
>> +};
>> +
>> +/**
>> + * Copy characters near wrong token with the position @a
>> + * column_index to a static string buffer @a context and lay out
>> + * arrow " >> " before this token.
>> + *
>> + * @param context String static buffer to fill.
>> + * @param json Structure with pointers to parsing string.
>> + * @param column_index Position of wrong token in the current
>> + * line.
>> + */
>> +static void fill_context(char *context, json_parse_t *json, int
>> column_index)
>> +{
>> + assert(column_index >= 0);
>> + int length_before = column_index < CONTEXT_MAX_LENGTH_BEFORE ?
>> + column_index : CONTEXT_MAX_LENGTH_BEFORE;
>> + const char *src = json->cur_line_ptr + column_index -
>> length_before;
>> + /* Fill context before the arrow. */
>> + memcpy(context, src, length_before);
>> + context += length_before;
>> + src += length_before;
>> +
>> + /* Make the arrow. */
>> + *(context++) = ' ';
>> + memset(context, '>', CONTEXT_ARROW_LENGTH - 2);
>> + context += CONTEXT_ARROW_LENGTH - 2;
>> + *(context++) = ' ';
>> +
>> + /* Fill context after the arrow. */
>> + const char *end = context + CONTEXT_MAX_LENGTH_AFTER;
>> + for (; context < end && *src != '\0' && *src != '\n'; ++src,
>> ++context)
>> + *context = *src;
>> + *context = '\0';
>> +}
>> +
>> /* This function does not return.
>> * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
>> * The only supported exception is the temporary parser string
>> @@ -849,9 +891,13 @@ static void json_throw_parse_error(lua_State *l,
>> json_parse_t *json,
>> else
>> found = json_token_type_name[token->type];
>> + int column_index = token->column_index;
>> + char context[CONTEXT_MAX_LENGTH + 1];
>> + fill_context(context, json, column_index);
>
> What for you using the additional variable "column_index"? Maybe just
> "fill_context(context, json, token->column_index)".
Yes. Fixed.
>> +
>> /* Note: token->column_index is 0 based, display starting from
>> 1 */
>> - luaL_error(l, "Expected %s but found %s on line %d at character
>> %d", exp,
>> - found, json->line_count, token->column_index + 1);
>> + luaL_error(l, "Expected %s but found %s on line %d at character
>> %d here "
>> + "'%s'", exp, found, json->line_count, column_index +
>> 1, context);
>> }
>> static inline void json_decode_ascend(json_parse_t *json)
>> @@ -868,10 +914,13 @@ static void json_decode_descend(lua_State *l,
>> json_parse_t *json, int slots)
>> return;
>> }
>> + char context[CONTEXT_MAX_LENGTH + 1];
>> + fill_context(context, json, json->ptr - json->cur_line_ptr - 1);
>
> Typically, the code uses "token-> column_index = json-> ptr - json->
> cur_line_ptr;".
> Why do you use "json->ptr - json->cur_line_ptr - 1"?
Because, in this case json->ptr point to the character after the
character we need to
point arrow. I mean (json->ptr - 1) - json->cur_line_ptr
>> +
>> strbuf_free(json->tmp);
>> - luaL_error(l, "Found too many nested data structures (%d) on
>> line %d at "
>> - "character %d", json->current_depth, json->line_count,
>> - json->ptr - json->cur_line_ptr);
>> + luaL_error(l, "Found too many nested data structures (%d) on
>> line %d at cha"
>> + "racter %d here '%s'", json->current_depth,
>> json->line_count,
>> + json->ptr - json->cur_line_ptr, context);
>> }
>> static void json_parse_object_context(lua_State *l, json_parse_t
>> *json)
>>
I decided to rename context to err_context to avoid confusion
with json context (see functions in lua_cjson.c below).
commit 6c5cc1e6f067f1c9358a8bee03501f5df23f0191
Author: Roman Khabibov <roman.habibov at tarantool.org>
Date: Thu Dec 12 16:55:33 2019 +0300
json: print context in error mesages
Context is just a string with a few characters before and after
wrong token, wrong token itself and a symbolic arrow pointing to
this token.
Closes #4339
diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua
index 6d511e6..70e9f6c 100755
--- a/test/app-tap/json.test.lua
+++ b/test/app-tap/json.test.lua
@@ -22,7 +22,7 @@ end
tap.test("json", function(test)
local serializer = require('json')
- test:plan(51)
+ test:plan(57)
test:test("unsigned", common.test_unsigned, serializer)
test:test("signed", common.test_signed, serializer)
@@ -184,4 +184,22 @@ tap.test("json", function(test)
test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of
T_COMMA')
_, err_msg = pcall(serializer.decode, '{')
test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of T_END')
+
+ --
+ -- gh-4339: Make sure that context is printed.
+ --
+ _, err_msg = pcall(serializer.decode, '{{: "world"}')
+ test:ok(string.find(err_msg, '{ >> {: "worl') ~= nil, 'context #1')
+ _, err_msg = pcall(serializer.decode, '{"a": "world"}}')
+ test:ok(string.find(err_msg, '"world"} >> }') ~= nil, 'context #2')
+ _, err_msg = pcall(serializer.decode, '{1: "world"}')
+ test:ok(string.find(err_msg, '{ >> 1: "worl') ~= nil, 'context #3')
+ _, err_msg = pcall(serializer.decode, '{')
+ test:ok(string.find(err_msg, '{ >> ') ~= nil, 'context #4')
+ _, err_msg = pcall(serializer.decode, '}')
+ test:ok(string.find(err_msg, ' >> }') ~= nil, 'context #5')
+ serializer.cfg{decode_max_depth = 1}
+ _, err_msg = pcall(serializer.decode, '{"a": {a = {}}}')
+ test:ok(string.find(err_msg, '{"a": >> {a = {}}') ~= nil, 'context
#6')
+
end)
diff --git a/third_party/lua-cjson/lua_cjson.c
b/third_party/lua-cjson/lua_cjson.c
index 33cf305..38e9998 100644
--- a/third_party/lua-cjson/lua_cjson.c
+++ b/third_party/lua-cjson/lua_cjson.c
@@ -831,6 +831,50 @@ static void json_next_token(json_parse_t *json,
json_token_t *token)
json_set_token_error(token, json, "invalid token");
}
+enum err_context_length {
+ ERR_CONTEXT_ARROW_LENGTH = 4,
+ ERR_CONTEXT_MAX_LENGTH_BEFORE = 8,
+ ERR_CONTEXT_MAX_LENGTH_AFTER = 8,
+ ERR_CONTEXT_MAX_LENGTH = ERR_CONTEXT_MAX_LENGTH_BEFORE +
+ ERR_CONTEXT_MAX_LENGTH_AFTER + ERR_CONTEXT_ARROW_LENGTH,
+};
+
+/**
+ * Copy characters near wrong token with the position @a
+ * column_index to a static string buffer @a err_context and lay
+ * out arrow " >> " before this token.
+ *
+ * @param context String static buffer to fill.
+ * @param json Structure with pointers to parsing string.
+ * @param column_index Position of wrong token in the current
+ * line.
+ */
+static void fill_err_context(char *err_context, json_parse_t *json,
+ int column_index)
+{
+ assert(column_index >= 0);
+ int length_before = column_index < ERR_CONTEXT_MAX_LENGTH_BEFORE ?
+ column_index : ERR_CONTEXT_MAX_LENGTH_BEFORE;
+ const char *src = json->cur_line_ptr + column_index - length_before;
+ /* Fill error context before the arrow. */
+ memcpy(err_context, src, length_before);
+ err_context += length_before;
+ src += length_before;
+
+ /* Make the arrow. */
+ *(err_context++) = ' ';
+ memset(err_context, '>', ERR_CONTEXT_ARROW_LENGTH - 2);
+ err_context += ERR_CONTEXT_ARROW_LENGTH - 2;
+ *(err_context++) = ' ';
+
+ /* Fill error context after the arrow. */
+ const char *end = err_context + ERR_CONTEXT_MAX_LENGTH_AFTER;
+ for (; err_context < end && *src != '\0' && *src != '\n'; ++src,
+ ++err_context)
+ *err_context = *src;
+ *err_context = '\0';
+}
+
/* This function does not return.
* DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
* The only supported exception is the temporary parser string
@@ -849,9 +893,13 @@ static void json_throw_parse_error(lua_State *l,
json_parse_t *json,
else
found = json_token_type_name[token->type];
+ char err_context[ERR_CONTEXT_MAX_LENGTH + 1];
+ fill_err_context(err_context, json, token->column_index);
+
/* Note: token->column_index is 0 based, display starting from 1 */
- luaL_error(l, "Expected %s but found %s on line %d at character
%d", exp,
- found, json->line_count, token->column_index + 1);
+ luaL_error(l, "Expected %s but found %s on line %d at character %d
here "
+ "'%s'", exp, found, json->line_count,
token->column_index + 1,
+ err_context);
}
static inline void json_decode_ascend(json_parse_t *json)
@@ -868,10 +916,13 @@ static void json_decode_descend(lua_State *l,
json_parse_t *json, int slots)
return;
}
+ char err_context[ERR_CONTEXT_MAX_LENGTH + 1];
+ fill_err_context(err_context, json, json->ptr - json->cur_line_ptr
- 1);
+
strbuf_free(json->tmp);
- luaL_error(l, "Found too many nested data structures (%d) on line
%d at "
- "character %d", json->current_depth, json->line_count,
- json->ptr - json->cur_line_ptr);
+ luaL_error(l, "Found too many nested data structures (%d) on line
%d at cha"
+ "racter %d here '%s'", json->current_depth,
json->line_count,
+ json->ptr - json->cur_line_ptr, err_context);
}
static void json_parse_object_context(lua_State *l, json_parse_t *json)
More information about the Tarantool-patches
mailing list