From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 5097929987 for ; Thu, 5 Apr 2018 10:51:36 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id xivcG1birDBv for ; Thu, 5 Apr 2018 10:51:36 -0400 (EDT) Received: from smtp37.i.mail.ru (smtp37.i.mail.ru [94.100.177.97]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id CC8AC29935 for ; Thu, 5 Apr 2018 10:51:35 -0400 (EDT) Subject: [tarantool-patches] Fwd: Re: [commits] [tarantool] 01/01: ICU Unicode support for JSON parser. References: <5f61ca8f-052d-5a58-a15e-b23d3a6802b2@tarantool.org> From: Vladislav Shpilevoy Message-ID: <5a7f243d-21ac-fe11-a581-035ee0c6e307@tarantool.org> Date: Thu, 5 Apr 2018 17:51:33 +0300 MIME-Version: 1.0 In-Reply-To: <5f61ca8f-052d-5a58-a15e-b23d3a6802b2@tarantool.org> Content-Type: text/plain; charset="utf-8"; format="flowed" Content-Transfer-Encoding: 8bit Content-Language: en-US Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org, Kirill Shcherbatov Sorry, sent to commits accidentally. -------- Перенаправленное сообщение -------- Тема: Re: [commits] [tarantool] 01/01: ICU Unicode support for JSON parser. Дата: Thu, 5 Apr 2018 17:48:38 +0300 От: Vladislav Shpilevoy Отвечать: Vladislav Shpilevoy , commits@tarantool.org Кому: Kirill Shcherbatov , commits@tarantool.org Hello. See 18 comments below. 05.04.2018 17:06, Kirill Shcherbatov пишет: > This is an automated email from the git hooks/post-receive script. > > kshcherbatov pushed a commit to branch gh-1285-tuple-field-by-json-icu > in repository tarantool. > > commit ab52c7a18441037c19d0b2759558a7eb93aabb0d > Author: Kirill Shcherbatov > AuthorDate: Thu Apr 5 17:04:15 2018 +0300 > > ICU Unicode support for JSON parser. > --- > src/box/CMakeLists.txt | 2 +- > src/box/lua/tuple.c | 172 ++++++---------------------------------- > src/box/lua/tuple.lua | 7 +- > src/box/tuple_format.c | 155 ++++++++++++++++++++++++++++++++++++ > src/box/tuple_format.h | 15 ++++ > src/lib/json/path.c | 191 ++++++++++++++++++++++++++++++--------------- > src/lib/json/path.h | 25 ++++-- > test/engine/tuple.result | 57 ++++++++++---- > test/engine/tuple.test.lua | 13 ++- > test/unit/CMakeLists.txt | 2 +- > test/unit/json_path.c | 2 +- > 11 files changed, 398 insertions(+), 243 deletions(-) > > diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt > index add0ff9..0f5b197 100644 > --- a/src/box/CMakeLists.txt > +++ b/src/box/CMakeLists.txt > @@ -44,7 +44,7 @@ add_library(tuple STATIC > field_def.c > opt_def.c > ) > -target_link_libraries(tuple box_error core ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit) > +target_link_libraries(tuple json_path box_error core ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries}) 1. Remove ${common_libraries} please, the tuple lib must not depend on Lua. See how to do it in the next comments. > @@ -497,81 +416,34 @@ tuple_field_go_to_key(const char **field, const char *key, int len) > static int > lbox_tuple_field_by_path(struct lua_State *L) > { > - const char *field; > struct tuple *tuple = luaT_istuple(L, 1); > /* Is checked in Lua wrapper. */ > assert(tuple != NULL); > - if (lua_isnumber(L, 2)) { > + const char *field = NULL; > + if (!lua_isnumber(L, 2)) { > + /* string */ > + field = tuple_field_raw_by_path(tuple_format(tuple), tuple_data(tuple), > + tuple_field_map(tuple), > + lua_tostring(L, 2), (int)lua_strlen(L, 2), > + lua_hashstring(L, 2)); 2. Add a tuple_field_by_path wrapper for it, as I told you verbally. Moreover this code is out of 80 symbols width. > + /* error code or message */ > + struct error *e = diag_last_error(diag_get()); 3. Please, move all of this error checking into !lua_isnumber branch. Tuple_field never returns error, because sometimes absent field is ok. > + if (field == NULL && e != NULL) > + lua_pushstring(L, e->errmsg); > + else > + lua_pushinteger(L, -1*(field == NULL)); > + if (field) 4. Use explicit != NULL, please. > diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua > index b51b4df..3785568 100644 > --- a/src/box/lua/tuple.lua > +++ b/src/box/lua/tuple.lua > @@ -6,6 +6,7 @@ local msgpackffi = require('msgpackffi') > local fun = require('fun') > local buffer = require('buffer') > local internal = require('box.internal') > +local log = require('log') 5. Unsed module. > > ffi.cdef[[ > /** \cond public */ > @@ -302,6 +303,7 @@ ffi.metatype(tuple_t, { > end; > __tostring = internal.tuple.tostring; > __index = function(tuple, key) > + local rc, field > if type(key) == "string" or type(key) == "number" then > -- Try to get a field by json path or by [index]. If > -- it was not found (rc ~= 0) then return a method > @@ -311,12 +313,13 @@ ffi.metatype(tuple_t, { > -- returns field value, not tuple_bsize function. To > -- access hidden methods use > -- 'box.tuple.(T, [args...])'. > - local rc, field = tuple_field_by_path(tuple, key) > + rc, field = tuple_field_by_path(tuple, key) > if rc == 0 then > return field > end > end > - return methods[key] > + local method = methods[key] > + return method == box.NULL and rc ~= -1 and rc or method 6. You must not return -1. Error must be nil + error message/object. > diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c > index 3e2c8bf..f2dffaf 100644 > --- a/src/box/tuple_format.c > +++ b/src/box/tuple_format.c > @@ -28,6 +28,8 @@ > * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF > * SUCH DAMAGE. > */ > +#include > +#include 7. Do not include lua headers in files, that are not in lua/ dir. Tarantool core must not depend on lua. As I told you earlier, you must use field_name_hash function, declared in tuple_dictionary.h. It allows to remove lua dependency and remove ${common_libraries} from cmake file. > +const char * > +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, > + const uint32_t *field_map, > + const char *path,uint32_t path_len, uint32_t path_hash) 8. Path argument can be placed on the previous line. 9. After ',' put a space please. > +{ > + int rc = 0; > + const char *field; > + struct json_path_parser parser; > + struct json_path_node node; > + json_path_parser_create(&parser, path, path_len); > + rc = json_path_next(&parser, &node); > + if (rc != 0 || node.type == JSON_PATH_END) > + goto not_found; > + if (node.type == JSON_PATH_NUM) { > + int index = node.num; > + if (index == 0) > + goto not_found; > + index -= TUPLE_INDEX_BASE; > + field = tuple_field_raw(format, tuple, > + field_map, index); > + if (field == NULL) > + goto not_found; > + } else { > + assert(node.type == JSON_PATH_STR); > + /* First part of a path is a field name. */ > + const char *name = node.str; > + uint32_t name_len = node.len; > + uint32_t name_hash; > + if (path_len == name_len) { > + name_hash = path_hash; > + } else { > + /* > + * If a string is "field....", then its > + * precalculated juajit hash can not be > + * used. A tuple dictionary hashes only > + * name, not path. > + */ > + name_hash = lua_hash(name, name_len); 10. Good comment! One remark - use field_name_hash instead of explicit lua_hash. > + } > + field = tuple_field_raw_by_name(format, tuple, > + field_map, > + name, name_len, name_hash); > + if (field == NULL) > + goto not_found; > + } > + while ((rc = json_path_next(&parser, &node)) == 0 && > + node.type != JSON_PATH_END) { > + if (node.type == JSON_PATH_NUM) { > + rc = tuple_field_go_to_index(&field, node.num); > + } else { > + assert(node.type == JSON_PATH_STR); > + rc = tuple_field_go_to_key(&field, node.str, node.len); > + } > + if (rc != 0) { > + rc = 0; /* prevent error raise */ > + goto not_found; > + } > + } > + if (rc == 0) > + return field; > + not_found: 11. Please, put labels to the line beginning. > diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h > index d33c77a..3e7e595 100644 > --- a/src/box/tuple_format.h > +++ b/src/box/tuple_format.h > @@ -343,6 +343,21 @@ tuple_field_raw_by_name(struct tuple_format *format, const char *tuple, > return tuple_field_raw(format, tuple, field_map, fieldno); > } > > +/** > + * Get tuple field by its path. > + * @param Tuple. > + * @param path Field path. > + * @param path_len Length of @a path. > + * @param path_hash Hash of @a path. > + * > + * @retval not NULL on field found. > + * @retval NULL No field by @a path. > + */ > +const char * > +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, > + const uint32_t *field_map, > + const char *path,uint32_t path_len, uint32_t path_hash); 12. Please, add tuple_field_by_path as well to tuple.h, that calls this function. It allows you to call tuple_field_by_path in lua/tuple.c. > + > #if defined(__cplusplus) > } /* extern "C" */ > #endif /* defined(__cplusplus) */ > diff --git a/src/lib/json/path.c b/src/lib/json/path.c > index 4a6174e..f3fcc46 100644 > --- a/src/lib/json/path.c > +++ b/src/lib/json/path.c > @@ -31,8 +31,10 @@ > > #include "path.h" > #include > +#include > #include "trivia/util.h" > > + 13. Garbage diff. > @@ -90,27 +130,40 @@ json_parse_string(struct json_path_parser *parser, struct json_path_node *node) > static inline int > json_parse_integer(struct json_path_parser *parser, struct json_path_node *node) > { > - const char *end = parser->src + parser->src_len; > - const char *pos = parser->pos; > - assert(pos < end); > - const char *str = pos; > - int len = 0; > - for (char c = *pos; pos < end && isdigit(c); c = *++pos) > - ++len; > - if (len == 0) > - return pos - parser->src + 1; > - parser->pos = pos; > + assert(parser->offset < parser->src_len); > + int str_offset = parser->offset; > + int last_offset = str_offset; > + int len = 0, rc = 0; > + UChar32 c = 0; > + > + while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) { 14. I remember, that you told me, that a digit can be read using simple char. Can you please implement this cycle with no UChar? > > +static inline char > +json_curr_char(struct json_path_parser *parser) > +{ > + return *json_path_curr_substring(parser); > +} 15. I do not like that on each char you must do string + offset. Please, find a way to store this position. Why did you delete parser->pos? > diff --git a/src/lib/json/path.h b/src/lib/json/path.h > index 6e8db4c..15292fb 100644 > --- a/src/lib/json/path.h > +++ b/src/lib/json/path.h > @@ -45,10 +45,11 @@ extern "C" { > struct json_path_parser { > /** Source string. */ > const char *src; > - /** Length of src. */ > + /** Length of string. */ > int src_len; > - /** Current parser's position. */ > - const char *pos; > + /** Current parser's offset. */ > + int offset; > + int invalid_sign_off; 16. Please, add a comment. > }; > > enum json_path_type { > @@ -78,18 +79,30 @@ struct json_path_node { > }; > > /** > - * Create @a parser. > + * Init @a parser. 17. Garbage diff. > diff --git a/test/engine/tuple.result b/test/engine/tuple.result > index 2d7367a..3a8e828 100644 > --- a/test/engine/tuple.result > +++ b/test/engine/tuple.result > @@ -602,7 +602,10 @@ format[2] = {name = 'field2', type = 'array'} > format[3] = {name = 'field3', type = 'map'} > --- > ... > -format[4] = {name = 'field4', type = 'string'} > +format[4] = {name = 'field4', type = 'string' } 18. Garbage diff.