Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH v2 0/2] Improve json error mesages.
@ 2019-12-15 14:42 Roman Khabibov
  2019-12-15 14:42 ` [Tarantool-patches] [PATCH v2 1/2] json: make error messages more readable Roman Khabibov
  2019-12-15 14:42 ` [Tarantool-patches] [PATCH v2 2/2] json: print context in error mesages Roman Khabibov
  0 siblings, 2 replies; 6+ messages in thread
From: Roman Khabibov @ 2019-12-15 14:42 UTC (permalink / raw)
  To: tarantool-patches; +Cc: v.shpilevoy

The first patch fixes the token naming.
The second one adds the context.

Roman Khabibov (2):
  json: make error messages more readable
  json: print context in error mesages

 test/app-tap/json.test.lua        |  50 ++++++++++++++-
 third_party/lua-cjson/lua_cjson.c | 100 +++++++++++++++++++++++-------
 2 files changed, 125 insertions(+), 25 deletions(-)

Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4339-json-err
Issue: https://github.com/tarantool/tarantool/issues/4339

-- 
2.21.0 (Apple Git-122)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Tarantool-patches] [PATCH v2 1/2] json: make error messages more readable
  2019-12-15 14:42 [Tarantool-patches] [PATCH v2 0/2] Improve json error mesages Roman Khabibov
@ 2019-12-15 14:42 ` Roman Khabibov
  2019-12-21 14:56   ` Roman Khabibov
  2019-12-15 14:42 ` [Tarantool-patches] [PATCH v2 2/2] json: print context in error mesages Roman Khabibov
  1 sibling, 1 reply; 6+ messages in thread
From: Roman Khabibov @ 2019-12-15 14:42 UTC (permalink / raw)
  To: tarantool-patches; +Cc: v.shpilevoy

Print tokens themselves instead of token names "T_*" in the error
messages.

Part of #4339
---
 test/app-tap/json.test.lua        | 32 +++++++++++++++++++++++++-
 third_party/lua-cjson/lua_cjson.c | 38 +++++++++++++++----------------
 2 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua
index fadfc74ec..6d511e686 100755
--- a/test/app-tap/json.test.lua
+++ b/test/app-tap/json.test.lua
@@ -22,7 +22,7 @@ end
 
 tap.test("json", function(test)
     local serializer = require('json')
-    test:plan(40)
+    test:plan(51)
 
     test:test("unsigned", common.test_unsigned, serializer)
     test:test("signed", common.test_signed, serializer)
@@ -154,4 +154,34 @@ tap.test("json", function(test)
     _, err_msg = pcall(serializer.decode, '{"hello": "world",\n 100: 200}')
     test:ok(string.find(err_msg, 'line 2 at character 2') ~= nil,
             'mistake on second line')
+
+    --
+    -- gh-4339: Make sure that tokens 'T_*' are absent in error
+    -- messages and a context is printed.
+    --
+    _, err_msg = pcall(serializer.decode, '{{: "world"}')
+    test:ok(string.find(err_msg, '\'{\'') ~= nil, '"{" instead of T_OBJ_BEGIN')
+    _, err_msg = pcall(serializer.decode, '{"a": "world"}}')
+    test:ok(string.find(err_msg, '\'}\'') ~= nil, '"}" instead of T_OBJ_END')
+    _, err_msg = pcall(serializer.decode, '{[: "world"}')
+    test:ok(string.find(err_msg, '\'[\'', 1, true) ~= nil,
+            '"[" instead of T_ARR_BEGIN')
+    _, err_msg = pcall(serializer.decode, '{]: "world"}')
+    test:ok(string.find(err_msg, '\']\'', 1, true) ~= nil,
+            '"]" instead of T_ARR_END')
+    _, err_msg = pcall(serializer.decode, '{1: "world"}')
+    test:ok(string.find(err_msg, 'int') ~= nil, 'int instead of T_INT')
+    _, err_msg = pcall(serializer.decode, '{1.0: "world"}')
+    test:ok(string.find(err_msg, 'number') ~= nil, 'number instead of T_NUMBER')
+    _, err_msg = pcall(serializer.decode, '{true: "world"}')
+    test:ok(string.find(err_msg, 'boolean') ~= nil,
+            'boolean instead of T_BOOLEAN')
+    _, err_msg = pcall(serializer.decode, '{null: "world"}')
+    test:ok(string.find(err_msg, 'null') ~= nil, 'null instead of T_NULL')
+    _, err_msg = pcall(serializer.decode, '{:: "world"}')
+    test:ok(string.find(err_msg, 'colon') ~= nil, 'colon instead of T_COLON')
+    _, err_msg = pcall(serializer.decode, '{,: "world"}')
+    test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of T_COMMA')
+    _, err_msg = pcall(serializer.decode, '{')
+    test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of T_END')
 end)
diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
index 3d25814f3..e68b52847 100644
--- a/third_party/lua-cjson/lua_cjson.c
+++ b/third_party/lua-cjson/lua_cjson.c
@@ -72,23 +72,23 @@ typedef enum {
 } json_token_type_t;
 
 static const char *json_token_type_name[] = {
-    "T_OBJ_BEGIN",
-    "T_OBJ_END",
-    "T_ARR_BEGIN",
-    "T_ARR_END",
-    "T_STRING",
-    "T_UINT",
-    "T_INT",
-    "T_NUMBER",
-    "T_BOOLEAN",
-    "T_NULL",
-    "T_COLON",
-    "T_COMMA",
-    "T_END",
-    "T_WHITESPACE",
-    "T_LINEFEED",
-    "T_ERROR",
-    "T_UNKNOWN",
+    "'{'",
+    "'}'",
+    "'['",
+    "']'",
+    "string",
+    "unsigned int",
+    "int",
+    "number",
+    "boolean",
+    "null",
+    "colon",
+    "comma",
+    "end",
+    "whitespace",
+    "line feed",
+    "error",
+    "unknown symbol",
     NULL
 };
 
@@ -914,7 +914,7 @@ static void json_parse_object_context(lua_State *l, json_parse_t *json)
         }
 
         if (token.type != T_COMMA)
-            json_throw_parse_error(l, json, "comma or object end", &token);
+            json_throw_parse_error(l, json, "comma or '}'", &token);
 
         json_next_token(json, &token);
     }
@@ -954,7 +954,7 @@ static void json_parse_array_context(lua_State *l, json_parse_t *json)
         }
 
         if (token.type != T_COMMA)
-            json_throw_parse_error(l, json, "comma or array end", &token);
+            json_throw_parse_error(l, json, "comma or ']'", &token);
 
         json_next_token(json, &token);
     }
-- 
2.21.0 (Apple Git-122)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Tarantool-patches] [PATCH v2 2/2] json: print context in error mesages
  2019-12-15 14:42 [Tarantool-patches] [PATCH v2 0/2] Improve json error mesages Roman Khabibov
  2019-12-15 14:42 ` [Tarantool-patches] [PATCH v2 1/2] json: make error messages more readable Roman Khabibov
@ 2019-12-15 14:42 ` Roman Khabibov
  2019-12-19 23:19   ` Vladislav Shpilevoy
  1 sibling, 1 reply; 6+ messages in thread
From: Roman Khabibov @ 2019-12-15 14:42 UTC (permalink / raw)
  To: tarantool-patches; +Cc: v.shpilevoy

Context is just a string with a few characters before and after
wrong token, wrong token itself and a symbolic arrow pointing to
this token.

Closes #4339
---
 test/app-tap/json.test.lua        | 20 +++++++++-
 third_party/lua-cjson/lua_cjson.c | 62 ++++++++++++++++++++++++++++---
 2 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua
index 6d511e686..70e9f6cf7 100755
--- a/test/app-tap/json.test.lua
+++ b/test/app-tap/json.test.lua
@@ -22,7 +22,7 @@ end
 
 tap.test("json", function(test)
     local serializer = require('json')
-    test:plan(51)
+    test:plan(57)
 
     test:test("unsigned", common.test_unsigned, serializer)
     test:test("signed", common.test_signed, serializer)
@@ -184,4 +184,22 @@ tap.test("json", function(test)
     test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of T_COMMA')
     _, err_msg = pcall(serializer.decode, '{')
     test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of T_END')
+
+    --
+    -- gh-4339: Make sure that context is printed.
+    --
+    _, err_msg = pcall(serializer.decode, '{{: "world"}')
+    test:ok(string.find(err_msg, '{ >> {: "worl') ~= nil, 'context #1')
+    _, err_msg = pcall(serializer.decode, '{"a": "world"}}')
+    test:ok(string.find(err_msg, '"world"} >> }') ~= nil, 'context #2')
+    _, err_msg = pcall(serializer.decode, '{1: "world"}')
+    test:ok(string.find(err_msg, '{ >> 1: "worl') ~= nil, 'context #3')
+    _, err_msg = pcall(serializer.decode, '{')
+    test:ok(string.find(err_msg, '{ >> ') ~= nil, 'context #4')
+    _, err_msg = pcall(serializer.decode, '}')
+    test:ok(string.find(err_msg, ' >> }') ~= nil, 'context #5')
+    serializer.cfg{decode_max_depth = 1}
+    _, err_msg = pcall(serializer.decode, '{"a": {a = {}}}')
+    test:ok(string.find(err_msg, '{"a":  >> {a = {}}') ~= nil, 'context #6')
+
 end)
diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
index e68b52847..655d6550e 100644
--- a/third_party/lua-cjson/lua_cjson.c
+++ b/third_party/lua-cjson/lua_cjson.c
@@ -825,6 +825,50 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
     json_set_token_error(token, json, "invalid token");
 }
 
+enum context_length {
+    CONTEXT_ARROW_LENGTH = 4,
+    CONTEXT_MAX_LENGTH_BEFORE = 8,
+    CONTEXT_MAX_LENGTH_AFTER = 8,
+    CONTEXT_MAX_LENGTH = CONTEXT_MAX_LENGTH_BEFORE + CONTEXT_MAX_LENGTH_AFTER +
+    CONTEXT_ARROW_LENGTH,
+};
+
+/**
+ * Copy characters near wrong token with the position @a
+ * column_index to a static string buffer @a context and lay out
+ * arrow " >> " before this token.
+ *
+ * @param context      String static buffer to fill.
+ * @param json         Structure with pointers to parsing string.
+ * @param column_index Position of wrong token in the current
+ *                     line.
+ */
+static void fill_context(char *context, json_parse_t *json, int column_index)
+{
+    assert(column_index >= 0);
+    int length_before = column_index < CONTEXT_MAX_LENGTH_BEFORE ?
+        column_index : CONTEXT_MAX_LENGTH_BEFORE;
+    const char *src = json->cur_line_ptr + column_index - length_before;
+    int i = 0;
+    /* Fill context before the arrow. */
+    for (; i < length_before; i++)
+        context[i] = src[i];
+
+    /* Make the arrow. */
+    context[i] = ' ';
+    memset(context + i + 1, '>', CONTEXT_ARROW_LENGTH - 2);
+    context[i + CONTEXT_ARROW_LENGTH - 1] = ' ';
+
+    /* Fill context after the arrow. */
+    for (int n = 0; n < CONTEXT_MAX_LENGTH_AFTER && src[i] != '\0' &&
+         src[i] != '\n'; i++) {
+        context[i + CONTEXT_ARROW_LENGTH] = src[i];
+        n++;
+    }
+    assert(i + CONTEXT_ARROW_LENGTH <= CONTEXT_MAX_LENGTH);
+    context[i + CONTEXT_ARROW_LENGTH] = '\0';
+}
+
 /* This function does not return.
  * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
  * The only supported exception is the temporary parser string
@@ -843,9 +887,14 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json,
     else
         found = json_token_type_name[token->type];
 
+    int column_index = token->column_index;
+    char context[CONTEXT_MAX_LENGTH + 1];
+    fill_context(context, json, column_index);
+
     /* Note: token->column_index is 0 based, display starting from 1 */
-    luaL_error(l, "Expected %s but found %s on line %d at character %d", exp,
-               found, json->line_count, token->column_index + 1);
+    luaL_error(l, "Expected %s but found %s on line %d at character %d here "
+               "'%s'", exp, found, json->line_count, token->column_index + 1,
+               context);
 }
 
 static inline void json_decode_ascend(json_parse_t *json)
@@ -862,10 +911,13 @@ static void json_decode_descend(lua_State *l, json_parse_t *json, int slots)
         return;
     }
 
+    char context[CONTEXT_MAX_LENGTH + 1];
+    fill_context(context, json, json->ptr - json->cur_line_ptr - 1);
+
     strbuf_free(json->tmp);
-    luaL_error(l, "Found too many nested data structures (%d) on line %d at "
-               "character %d", json->current_depth, json->line_count,
-               json->ptr - json->cur_line_ptr);
+    luaL_error(l, "Found too many nested data structures (%d) on line %d at cha"
+               "racter %d here '%s'", json->current_depth, json->line_count,
+               json->ptr - json->cur_line_ptr, context);
 }
 
 static void json_parse_object_context(lua_State *l, json_parse_t *json)
-- 
2.21.0 (Apple Git-122)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH v2 2/2] json: print context in error mesages
  2019-12-15 14:42 ` [Tarantool-patches] [PATCH v2 2/2] json: print context in error mesages Roman Khabibov
@ 2019-12-19 23:19   ` Vladislav Shpilevoy
  2019-12-21 14:56     ` Roman Khabibov
  0 siblings, 1 reply; 6+ messages in thread
From: Vladislav Shpilevoy @ 2019-12-19 23:19 UTC (permalink / raw)
  To: Roman Khabibov, tarantool-patches

Hi! Thanks for the patch!

See my review fixes here and on top of the branch.

Please, review them, and if you are ok, then squash.
In that case it will be LGTM.

==============================================================================

diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
index 655d6550e..79b0253f0 100644
--- a/third_party/lua-cjson/lua_cjson.c
+++ b/third_party/lua-cjson/lua_cjson.c
@@ -847,26 +847,24 @@ static void fill_context(char *context, json_parse_t *json, int column_index)
 {
     assert(column_index >= 0);
     int length_before = column_index < CONTEXT_MAX_LENGTH_BEFORE ?
-        column_index : CONTEXT_MAX_LENGTH_BEFORE;
+                        column_index : CONTEXT_MAX_LENGTH_BEFORE;
     const char *src = json->cur_line_ptr + column_index - length_before;
-    int i = 0;
     /* Fill context before the arrow. */
-    for (; i < length_before; i++)
-        context[i] = src[i];
+    memcpy(context, src, length_before);
+    context += length_before;
+    src += length_before;
 
     /* Make the arrow. */
-    context[i] = ' ';
-    memset(context + i + 1, '>', CONTEXT_ARROW_LENGTH - 2);
-    context[i + CONTEXT_ARROW_LENGTH - 1] = ' ';
+    *(context++) = ' ';
+    memset(context, '>', CONTEXT_ARROW_LENGTH - 2);
+    context += CONTEXT_ARROW_LENGTH - 2;
+    *(context++) = ' ';
 
     /* Fill context after the arrow. */
-    for (int n = 0; n < CONTEXT_MAX_LENGTH_AFTER && src[i] != '\0' &&
-         src[i] != '\n'; i++) {
-        context[i + CONTEXT_ARROW_LENGTH] = src[i];
-        n++;
-    }
-    assert(i + CONTEXT_ARROW_LENGTH <= CONTEXT_MAX_LENGTH);
-    context[i + CONTEXT_ARROW_LENGTH] = '\0';
+    const char *end = context + CONTEXT_MAX_LENGTH_AFTER;
+    for (; context < end && *src != '\0' && *src != '\n'; ++src, ++context)
+        *context = *src;
+    *context = '\0';
 }
 
 /* This function does not return.
@@ -893,8 +891,7 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json,
 
     /* Note: token->column_index is 0 based, display starting from 1 */
     luaL_error(l, "Expected %s but found %s on line %d at character %d here "
-               "'%s'", exp, found, json->line_count, token->column_index + 1,
-               context);
+               "'%s'", exp, found, json->line_count, column_index + 1, context);
 }
 
 static inline void json_decode_ascend(json_parse_t *json)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH v2 2/2] json: print context in error mesages
  2019-12-19 23:19   ` Vladislav Shpilevoy
@ 2019-12-21 14:56     ` Roman Khabibov
  0 siblings, 0 replies; 6+ messages in thread
From: Roman Khabibov @ 2019-12-21 14:56 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: tarantool-patches, Vladislav Shpilevoy

Hi Thanks for the review. Vlad, I’m ok with it. Alexander, can you, please, do a second review?

https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4339-json-err

> On Dec 20, 2019, at 02:19, Vladislav Shpilevoy <v.shpilevoy@tarantool.org> wrote:
> 
> Hi! Thanks for the patch!
> 
> See my review fixes here and on top of the branch.
> 
> Please, review them, and if you are ok, then squash.
> In that case it will be LGTM.
> 
> ==============================================================================
> 
> diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
> index 655d6550e..79b0253f0 100644
> --- a/third_party/lua-cjson/lua_cjson.c
> +++ b/third_party/lua-cjson/lua_cjson.c
> @@ -847,26 +847,24 @@ static void fill_context(char *context, json_parse_t *json, int column_index)
> {
>     assert(column_index >= 0);
>     int length_before = column_index < CONTEXT_MAX_LENGTH_BEFORE ?
> -        column_index : CONTEXT_MAX_LENGTH_BEFORE;
> +                        column_index : CONTEXT_MAX_LENGTH_BEFORE;
>     const char *src = json->cur_line_ptr + column_index - length_before;
> -    int i = 0;
>     /* Fill context before the arrow. */
> -    for (; i < length_before; i++)
> -        context[i] = src[i];
> +    memcpy(context, src, length_before);
> +    context += length_before;
> +    src += length_before;
> 
>     /* Make the arrow. */
> -    context[i] = ' ';
> -    memset(context + i + 1, '>', CONTEXT_ARROW_LENGTH - 2);
> -    context[i + CONTEXT_ARROW_LENGTH - 1] = ' ';
> +    *(context++) = ' ';
> +    memset(context, '>', CONTEXT_ARROW_LENGTH - 2);
> +    context += CONTEXT_ARROW_LENGTH - 2;
> +    *(context++) = ' ';
> 
>     /* Fill context after the arrow. */
> -    for (int n = 0; n < CONTEXT_MAX_LENGTH_AFTER && src[i] != '\0' &&
> -         src[i] != '\n'; i++) {
> -        context[i + CONTEXT_ARROW_LENGTH] = src[i];
> -        n++;
> -    }
> -    assert(i + CONTEXT_ARROW_LENGTH <= CONTEXT_MAX_LENGTH);
> -    context[i + CONTEXT_ARROW_LENGTH] = '\0';
> +    const char *end = context + CONTEXT_MAX_LENGTH_AFTER;
> +    for (; context < end && *src != '\0' && *src != '\n'; ++src, ++context)
> +        *context = *src;
> +    *context = '\0';
> }
> 
> /* This function does not return.
> @@ -893,8 +891,7 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json,
> 
>     /* Note: token->column_index is 0 based, display starting from 1 */
>     luaL_error(l, "Expected %s but found %s on line %d at character %d here "
> -               "'%s'", exp, found, json->line_count, token->column_index + 1,
> -               context);
> +               "'%s'", exp, found, json->line_count, column_index + 1, context);
> }
> 
> static inline void json_decode_ascend(json_parse_t *json)

commit f990fa9fa9e25efc50229ca40418fd427f6e8261
Author: Roman Khabibov <roman.habibov@tarantool.org>
Date:   Thu Dec 12 16:55:33 2019 +0300

    json: print context in error mesages
    
    Context is just a string with a few characters before and after
    wrong token, wrong token itself and a symbolic arrow pointing to
    this token.
    
    Closes #4339

diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua
index 6d511e686..70e9f6cf7 100755
--- a/test/app-tap/json.test.lua
+++ b/test/app-tap/json.test.lua
@@ -22,7 +22,7 @@ end
 
 tap.test("json", function(test)
     local serializer = require('json')
-    test:plan(51)
+    test:plan(57)
 
     test:test("unsigned", common.test_unsigned, serializer)
     test:test("signed", common.test_signed, serializer)
@@ -184,4 +184,22 @@ tap.test("json", function(test)
     test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of T_COMMA')
     _, err_msg = pcall(serializer.decode, '{')
     test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of T_END')
+
+    --
+    -- gh-4339: Make sure that context is printed.
+    --
+    _, err_msg = pcall(serializer.decode, '{{: "world"}')
+    test:ok(string.find(err_msg, '{ >> {: "worl') ~= nil, 'context #1')
+    _, err_msg = pcall(serializer.decode, '{"a": "world"}}')
+    test:ok(string.find(err_msg, '"world"} >> }') ~= nil, 'context #2')
+    _, err_msg = pcall(serializer.decode, '{1: "world"}')
+    test:ok(string.find(err_msg, '{ >> 1: "worl') ~= nil, 'context #3')
+    _, err_msg = pcall(serializer.decode, '{')
+    test:ok(string.find(err_msg, '{ >> ') ~= nil, 'context #4')
+    _, err_msg = pcall(serializer.decode, '}')
+    test:ok(string.find(err_msg, ' >> }') ~= nil, 'context #5')
+    serializer.cfg{decode_max_depth = 1}
+    _, err_msg = pcall(serializer.decode, '{"a": {a = {}}}')
+    test:ok(string.find(err_msg, '{"a":  >> {a = {}}') ~= nil, 'context #6')
+
 end)
diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
index e68b52847..79b0253f0 100644
--- a/third_party/lua-cjson/lua_cjson.c
+++ b/third_party/lua-cjson/lua_cjson.c
@@ -825,6 +825,48 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
     json_set_token_error(token, json, "invalid token");
 }
 
+enum context_length {
+    CONTEXT_ARROW_LENGTH = 4,
+    CONTEXT_MAX_LENGTH_BEFORE = 8,
+    CONTEXT_MAX_LENGTH_AFTER = 8,
+    CONTEXT_MAX_LENGTH = CONTEXT_MAX_LENGTH_BEFORE + CONTEXT_MAX_LENGTH_AFTER +
+    CONTEXT_ARROW_LENGTH,
+};
+
+/**
+ * Copy characters near wrong token with the position @a
+ * column_index to a static string buffer @a context and lay out
+ * arrow " >> " before this token.
+ *
+ * @param context      String static buffer to fill.
+ * @param json         Structure with pointers to parsing string.
+ * @param column_index Position of wrong token in the current
+ *                     line.
+ */
+static void fill_context(char *context, json_parse_t *json, int column_index)
+{
+    assert(column_index >= 0);
+    int length_before = column_index < CONTEXT_MAX_LENGTH_BEFORE ?
+                        column_index : CONTEXT_MAX_LENGTH_BEFORE;
+    const char *src = json->cur_line_ptr + column_index - length_before;
+    /* Fill context before the arrow. */
+    memcpy(context, src, length_before);
+    context += length_before;
+    src += length_before;
+
+    /* Make the arrow. */
+    *(context++) = ' ';
+    memset(context, '>', CONTEXT_ARROW_LENGTH - 2);
+    context += CONTEXT_ARROW_LENGTH - 2;
+    *(context++) = ' ';
+
+    /* Fill context after the arrow. */
+    const char *end = context + CONTEXT_MAX_LENGTH_AFTER;
+    for (; context < end && *src != '\0' && *src != '\n'; ++src, ++context)
+        *context = *src;
+    *context = '\0';
+}
+
 /* This function does not return.
  * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
  * The only supported exception is the temporary parser string
@@ -843,9 +885,13 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json,
     else
         found = json_token_type_name[token->type];
 
+    int column_index = token->column_index;
+    char context[CONTEXT_MAX_LENGTH + 1];
+    fill_context(context, json, column_index);
+
     /* Note: token->column_index is 0 based, display starting from 1 */
-    luaL_error(l, "Expected %s but found %s on line %d at character %d", exp,
-               found, json->line_count, token->column_index + 1);
+    luaL_error(l, "Expected %s but found %s on line %d at character %d here "
+               "'%s'", exp, found, json->line_count, column_index + 1, context);
 }
 
 static inline void json_decode_ascend(json_parse_t *json)
@@ -862,10 +908,13 @@ static void json_decode_descend(lua_State *l, json_parse_t *json, int slots)
         return;
     }
 
+    char context[CONTEXT_MAX_LENGTH + 1];
+    fill_context(context, json, json->ptr - json->cur_line_ptr - 1);
+
     strbuf_free(json->tmp);
-    luaL_error(l, "Found too many nested data structures (%d) on line %d at "
-               "character %d", json->current_depth, json->line_count,
-               json->ptr - json->cur_line_ptr);
+    luaL_error(l, "Found too many nested data structures (%d) on line %d at cha"
+               "racter %d here '%s'", json->current_depth, json->line_count,
+               json->ptr - json->cur_line_ptr, context);
 }
 
 static void json_parse_object_context(lua_State *l, json_parse_t *json)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH v2 1/2] json: make error messages more readable
  2019-12-15 14:42 ` [Tarantool-patches] [PATCH v2 1/2] json: make error messages more readable Roman Khabibov
@ 2019-12-21 14:56   ` Roman Khabibov
  0 siblings, 0 replies; 6+ messages in thread
From: Roman Khabibov @ 2019-12-21 14:56 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: tarantool-patches, Vladislav Shpilevoy

commit a427872128d30bd769e63a1254af774c1a24aa28
Author: Roman Khabibov <roman.habibov@tarantool.org>
Date:   Wed Oct 9 17:07:41 2019 +0300

    json: make error messages more readable
    
    Print tokens themselves instead of token names "T_*" in the error
    messages.
    
    Part of #4339

diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua
index fadfc74ec..6d511e686 100755
--- a/test/app-tap/json.test.lua
+++ b/test/app-tap/json.test.lua
@@ -22,7 +22,7 @@ end
 
 tap.test("json", function(test)
     local serializer = require('json')
-    test:plan(40)
+    test:plan(51)
 
     test:test("unsigned", common.test_unsigned, serializer)
     test:test("signed", common.test_signed, serializer)
@@ -154,4 +154,34 @@ tap.test("json", function(test)
     _, err_msg = pcall(serializer.decode, '{"hello": "world",\n 100: 200}')
     test:ok(string.find(err_msg, 'line 2 at character 2') ~= nil,
             'mistake on second line')
+
+    --
+    -- gh-4339: Make sure that tokens 'T_*' are absent in error
+    -- messages and a context is printed.
+    --
+    _, err_msg = pcall(serializer.decode, '{{: "world"}')
+    test:ok(string.find(err_msg, '\'{\'') ~= nil, '"{" instead of T_OBJ_BEGIN')
+    _, err_msg = pcall(serializer.decode, '{"a": "world"}}')
+    test:ok(string.find(err_msg, '\'}\'') ~= nil, '"}" instead of T_OBJ_END')
+    _, err_msg = pcall(serializer.decode, '{[: "world"}')
+    test:ok(string.find(err_msg, '\'[\'', 1, true) ~= nil,
+            '"[" instead of T_ARR_BEGIN')
+    _, err_msg = pcall(serializer.decode, '{]: "world"}')
+    test:ok(string.find(err_msg, '\']\'', 1, true) ~= nil,
+            '"]" instead of T_ARR_END')
+    _, err_msg = pcall(serializer.decode, '{1: "world"}')
+    test:ok(string.find(err_msg, 'int') ~= nil, 'int instead of T_INT')
+    _, err_msg = pcall(serializer.decode, '{1.0: "world"}')
+    test:ok(string.find(err_msg, 'number') ~= nil, 'number instead of T_NUMBER')
+    _, err_msg = pcall(serializer.decode, '{true: "world"}')
+    test:ok(string.find(err_msg, 'boolean') ~= nil,
+            'boolean instead of T_BOOLEAN')
+    _, err_msg = pcall(serializer.decode, '{null: "world"}')
+    test:ok(string.find(err_msg, 'null') ~= nil, 'null instead of T_NULL')
+    _, err_msg = pcall(serializer.decode, '{:: "world"}')
+    test:ok(string.find(err_msg, 'colon') ~= nil, 'colon instead of T_COLON')
+    _, err_msg = pcall(serializer.decode, '{,: "world"}')
+    test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of T_COMMA')
+    _, err_msg = pcall(serializer.decode, '{')
+    test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of T_END')
 end)
diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
index 3d25814f3..e68b52847 100644
--- a/third_party/lua-cjson/lua_cjson.c
+++ b/third_party/lua-cjson/lua_cjson.c
@@ -72,23 +72,23 @@ typedef enum {
 } json_token_type_t;
 
 static const char *json_token_type_name[] = {
-    "T_OBJ_BEGIN",
-    "T_OBJ_END",
-    "T_ARR_BEGIN",
-    "T_ARR_END",
-    "T_STRING",
-    "T_UINT",
-    "T_INT",
-    "T_NUMBER",
-    "T_BOOLEAN",
-    "T_NULL",
-    "T_COLON",
-    "T_COMMA",
-    "T_END",
-    "T_WHITESPACE",
-    "T_LINEFEED",
-    "T_ERROR",
-    "T_UNKNOWN",
+    "'{'",
+    "'}'",
+    "'['",
+    "']'",
+    "string",
+    "unsigned int",
+    "int",
+    "number",
+    "boolean",
+    "null",
+    "colon",
+    "comma",
+    "end",
+    "whitespace",
+    "line feed",
+    "error",
+    "unknown symbol",
     NULL
 };
 
@@ -914,7 +914,7 @@ static void json_parse_object_context(lua_State *l, json_parse_t *json)
         }
 
         if (token.type != T_COMMA)
-            json_throw_parse_error(l, json, "comma or object end", &token);
+            json_throw_parse_error(l, json, "comma or '}'", &token);
 
         json_next_token(json, &token);
     }
@@ -954,7 +954,7 @@ static void json_parse_array_context(lua_State *l, json_parse_t *json)
         }
 
         if (token.type != T_COMMA)
-            json_throw_parse_error(l, json, "comma or array end", &token);
+            json_throw_parse_error(l, json, "comma or ']'", &token);
 
         json_next_token(json, &token);
     }

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-12-21 14:56 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-15 14:42 [Tarantool-patches] [PATCH v2 0/2] Improve json error mesages Roman Khabibov
2019-12-15 14:42 ` [Tarantool-patches] [PATCH v2 1/2] json: make error messages more readable Roman Khabibov
2019-12-21 14:56   ` Roman Khabibov
2019-12-15 14:42 ` [Tarantool-patches] [PATCH v2 2/2] json: print context in error mesages Roman Khabibov
2019-12-19 23:19   ` Vladislav Shpilevoy
2019-12-21 14:56     ` Roman Khabibov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox