From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id C336E2D9C9 for ; Fri, 6 Apr 2018 07:09:09 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id egWaEitU_sSA for ; Fri, 6 Apr 2018 07:09:09 -0400 (EDT) Received: from smtpng1.m.smailru.net (smtpng1.m.smailru.net [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 47E752D9BD for ; Fri, 6 Apr 2018 07:09:09 -0400 (EDT) From: Kirill Shcherbatov Subject: [tarantool-patches] [PATCH v2 3/3] Lua: implement json path access to tuple fields Date: Fri, 6 Apr 2018 14:08:57 +0300 Message-Id: In-Reply-To: References: MIME-Version: 1.0 In-Reply-To: References: Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: v.shpilevoy@tarantool.org, Kirill Shcherbatov New tuple_field_raw_by_path and tuple_field_by_path APIs. Resolves #1285 --- src/box/CMakeLists.txt | 4 +- src/box/lua/tuple.c | 63 +++++++++---- src/box/lua/tuple.lua | 52 ++++------- src/box/tuple.h | 21 +++++ src/box/tuple_format.c | 164 +++++++++++++++++++++++++++++++++ src/box/tuple_format.h | 19 ++++ test/engine/tuple.result | 225 +++++++++++++++++++++++++++++++++++++++++++++ test/engine/tuple.test.lua | 66 +++++++++++++ 8 files changed, 556 insertions(+), 58 deletions(-) diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index ad7f910..88c2c60 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -45,7 +45,7 @@ add_library(tuple STATIC field_def.c opt_def.c ) -target_link_libraries(tuple box_error core ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit) +target_link_libraries(tuple json_path box_error core ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit) add_library(xlog STATIC xlog.c) target_link_libraries(xlog core box_error crc32 ${ZSTD_LIBRARIES}) @@ -131,5 +131,5 @@ add_library(box STATIC ${bin_sources}) target_link_libraries(box box_error tuple stat xrow xlog vclock crc32 scramble - ${common_libraries}) + json_path ${common_libraries}) add_dependencies(box build_bundled_libs) diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c index 7ca4299..6967da6 100644 --- a/src/box/lua/tuple.c +++ b/src/box/lua/tuple.c @@ -41,6 +41,7 @@ #include "box/tuple_convert.h" #include "box/errcode.h" #include "box/memtx_tuple.h" +#include "json/path.h" /** {{{ box.tuple Lua library * @@ -402,36 +403,58 @@ lbox_tuple_transform(struct lua_State *L) } /** - * Find a tuple field using its name. + * Find a tuple field by JSON path. If a field was not found and a + * path contains JSON syntax errors, then an exception is raised. * @param L Lua state. - * @param tuple 1-th argument on lua stack, tuple to get field + * @param tuple 1-th argument on a lua stack, tuple to get field * from. - * @param field_name 2-th argument on lua stack, field name to - * get. + * @param path 2-th argument on lua stack. Can be field name, + * JSON path to a field or a field number. * - * @retval If a field was not found, return -1 and nil to lua else - * return 0 and decoded field. + * @retval not nil Found field value. + * @retval nil A field is NULL or does not exist. */ static int -lbox_tuple_field_by_name(struct lua_State *L) +lbox_tuple_field_by_path(struct lua_State *L) { struct tuple *tuple = luaT_istuple(L, 1); /* Is checked in Lua wrapper. */ assert(tuple != NULL); - assert(lua_isstring(L, 2)); - size_t name_len; - const char *name = lua_tolstring(L, 2, &name_len); - uint32_t name_hash = lua_hashstring(L, 2); - const char *field = - tuple_field_by_name(tuple, name, name_len, name_hash); - if (field == NULL) { - lua_pushinteger(L, -1); - lua_pushnil(L); - return 2; + const char *field = NULL; + if (lua_isnumber(L, 2)) { + double dbl_index = lua_tonumber(L, 2); + if (dbl_index != floor(dbl_index)) + goto usage_error; + int index = (int) floor(dbl_index) - TUPLE_INDEX_BASE; + if (index >= 0) { + field = tuple_field(tuple, index); + if (field == NULL) { + lua_pushnil(L); + return 1; + } + } else { + lua_pushnil(L); + return 1; + } + } else if (lua_isstring(L, 2)) { + size_t len; + const char *path = lua_tolstring(L, 2, &len); + if (len == 0) + goto usage_error; + if (tuple_field_by_path(tuple, path, (uint32_t) len, + lua_hashstring(L, 2), &field) != 0) { + return luaT_error(L); + } else if (field == NULL) { + lua_pushnil(L); + return 1; + } + } else { +usage_error: + return luaL_error(L, "Usage: tuple[ or number >= 1]"); } - lua_pushinteger(L, 0); + assert(field != NULL); luamp_decode(L, luaL_msgpack_default, &field); - return 2; + return 1; } static int @@ -470,8 +493,8 @@ static const struct luaL_Reg lbox_tuple_meta[] = { {"tostring", lbox_tuple_to_string}, {"slice", lbox_tuple_slice}, {"transform", lbox_tuple_transform}, - {"tuple_field_by_name", lbox_tuple_field_by_name}, {"tuple_to_map", lbox_tuple_to_map}, + {"tuple_field_by_path", lbox_tuple_field_by_path}, {NULL, NULL} }; diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua index 001971a..0eee325 100644 --- a/src/box/lua/tuple.lua +++ b/src/box/lua/tuple.lua @@ -9,16 +9,9 @@ local internal = require('box.internal') ffi.cdef[[ /** \cond public */ -typedef struct tuple_format box_tuple_format_t; - -box_tuple_format_t * -box_tuple_format_default(void); typedef struct tuple box_tuple_t; -box_tuple_t * -box_tuple_new(box_tuple_format_t *format, const char *data, const char *end); - int box_tuple_ref(box_tuple_t *tuple); @@ -34,9 +27,6 @@ box_tuple_bsize(const box_tuple_t *tuple); ssize_t box_tuple_to_buf(const box_tuple_t *tuple, char *buf, size_t size); -box_tuple_format_t * -box_tuple_format(const box_tuple_t *tuple); - const char * box_tuple_field(const box_tuple_t *tuple, uint32_t i); @@ -278,9 +268,9 @@ end msgpackffi.on_encode(const_tuple_ref_t, tuple_to_msgpack) -local function tuple_field_by_name(tuple, name) +local function tuple_field_by_path(tuple, path) tuple_check(tuple, "tuple['field_name']"); - return internal.tuple.tuple_field_by_name(tuple, name) + return internal.tuple.tuple_field_by_path(tuple, path) end local methods = { @@ -306,38 +296,28 @@ end methods["__serialize"] = tuple_totable -- encode hook for msgpack/yaml/json -local tuple_field = function(tuple, field_n) - local field = builtin.box_tuple_field(tuple, field_n - 1) - if field == nil then - return nil - end - -- Use () to shrink stack to the first return value - return (msgpackffi.decode_unchecked(field)) -end - - ffi.metatype(tuple_t, { __len = function(tuple) return builtin.box_tuple_field_count(tuple) end; __tostring = internal.tuple.tostring; __index = function(tuple, key) - if type(key) == "number" then - return tuple_field(tuple, key) - elseif type(key) == "string" then - -- Try to get a field with a name = key. If it was not - -- found (rc ~= 0) then return a method from the - -- vtable. If a collision occurred, then fields have - -- higher priority. For example, if a tuple T has a - -- field with name 'bsize', then T.bsize returns field - -- value, not tuple_bsize function. To access hidden - -- methods use 'box.tuple.(T, [args...])'. - local rc, field = tuple_field_by_name(tuple, key) - if rc == 0 then - return field + local res + if type(key) == "string" or type(key) == "number" then + -- Try to get a field by json path or by [index]. If + -- it was not found (rc ~= 0) then return a method + -- from the vtable. If a collision occurred, then + -- fields have higher priority. For example, if a + -- tuple T has a field with name 'bsize', then T.bsize + -- returns field value, not tuple_bsize function. To + -- access hidden methods use + -- 'box.tuple.(T, [args...])'. + res = tuple_field_by_path(tuple, key) + if res ~= nil then + return res end end - return methods[key] + return methods[key] or res end; __eq = function(tuple_a, tuple_b) -- Two tuple are considered equal if they have same memory address diff --git a/src/box/tuple.h b/src/box/tuple.h index 6ebedf5..68ae9cd 100644 --- a/src/box/tuple.h +++ b/src/box/tuple.h @@ -514,6 +514,27 @@ tuple_field(const struct tuple *tuple, uint32_t fieldno) } /** + * Get tuple field by its JSON path. + * @param tuple. + * @param path Field path. + * @param path_len Length of @a path. + * @param path_hash Hash of @a path. + * @param[out] field Found field, or NULL, if not found. + * + * @retval 0 Success. + * @retval -1 Error in JSON path. + */ +static inline int +tuple_field_by_path(struct tuple *tuple, const char *path, + uint32_t path_len, uint32_t path_hash, + const char **field) +{ + return tuple_field_raw_by_path(tuple_format(tuple), tuple_data(tuple), + tuple_field_map(tuple), path, path_len, + path_hash, field); +} + +/** * Get tuple field by its name. * @param tuple Tuple to get field from. * @param name Field name. diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index e458f49..94342ea 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -28,6 +28,7 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include "json/path.h" #include "tuple_format.h" /** Global table of tuple formats */ @@ -478,3 +479,166 @@ box_tuple_format_unref(box_tuple_format_t *format) { tuple_format_unref(format); } + +/** + * Propagate @a field to MessagePack(field)[index]. + * @param[in][out] field Field to propagate. + * @param index 1-based index to propagate to. + * + * @retval 0 Success, the index was found. + * @retval -1 Not found. + */ +static inline int +tuple_field_go_to_index(const char **field, uint64_t index) +{ + enum mp_type type = mp_typeof(**field); + if (type == MP_ARRAY) { + if (index == 0) + return -1; + /* Make index 0-based. */ + index -= TUPLE_INDEX_BASE; + uint32_t count = mp_decode_array(field); + if (index >= count) + return -1; + for (; index > 0; --index) + mp_next(field); + return 0; + } else if (type == MP_MAP) { + uint64_t count = mp_decode_map(field); + for (; count > 0; --count) { + type = mp_typeof(**field); + if (type == MP_UINT) { + uint64_t value = mp_decode_uint(field); + if (value == index) + return 0; + } else if (type == MP_INT) { + int64_t value = mp_decode_int(field); + if (value >= 0 && (uint64_t)value == index) + return 0; + } else { + /* Skip key. */ + mp_next(field); + } + /* Skip value. */ + mp_next(field); + } + } + return -1; +} + +/** + * Propagate @a field to MessagePack(field)[key]. + * @param[in][out] field Field to propagate. + * @param key Key to propagate to. + * @param len Length of @a key. + * + * @retval 0 Success, the index was found. + * @retval -1 Not found. + */ +static inline int +tuple_field_go_to_key(const char **field, const char *key, int len) +{ + enum mp_type type = mp_typeof(**field); + if (type != MP_MAP) + return -1; + uint64_t count = mp_decode_map(field); + for (; count > 0; --count) { + type = mp_typeof(**field); + if (type == MP_STR) { + uint32_t value_len; + const char *value = mp_decode_str(field, &value_len); + if (value_len == (uint)len && + memcmp(value, key, len) == 0) + return 0; + } else { + /* Skip key. */ + mp_next(field); + } + /* Skip value. */ + mp_next(field); + } + return -1; +} + +int +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, + const uint32_t *field_map, const char *path, + uint32_t path_len, uint32_t path_hash, + const char **field) +{ + assert(path_len > 0); + struct json_path_parser parser; + struct json_path_node node; + json_path_parser_create(&parser, path, path_len); + int rc = json_path_next(&parser, &node); + if (rc != 0) + goto best_effort; + switch(node.type) { + case JSON_PATH_NUM: { + int index = node.num; + if (index == 0) + goto best_effort; + index -= TUPLE_INDEX_BASE; + *field = tuple_field_raw(format, tuple, field_map, index); + if (*field == NULL) + goto best_effort; + break; + } + case JSON_PATH_STR: { + /* First part of a path is a field name. */ + uint32_t name_hash; + if (path_len == (uint32_t) node.len) { + name_hash = path_hash; + } else { + /* + * If a string is "field....", then its + * precalculated juajit hash can not be + * used. A tuple dictionary hashes only + * name, not path. + */ + name_hash = field_name_hash(node.str, node.len); + } + *field = tuple_field_raw_by_name(format, tuple, field_map, + node.str, node.len, name_hash); + if (*field == NULL) + goto best_effort; + break; + } + default: + assert(node.type == JSON_PATH_END); + *field = NULL; + return 0; + } + while (rc == 0 && (rc = json_path_next(&parser, &node)) == 0) { + switch(node.type) { + case JSON_PATH_NUM: + rc = tuple_field_go_to_index(field, node.num); + break; + case JSON_PATH_STR: + rc = tuple_field_go_to_key(field, node.str, node.len); + break; + default: + assert(node.type == JSON_PATH_END); + return 0; + } + } + assert(rc != 0); + /* + * It is possible, that a field has a name as + * well-formatted JSON. For example 'a.b.c.d' can be field + * name. If a data was not found by such path, then try + * to interpret the whole path as a field name. + * The same is true for field names, that are not valid + * JSON. + */ +best_effort: + *field = tuple_field_raw_by_name(format, tuple, field_map, path, + path_len, path_hash); + if (rc > 0 && *field == NULL) { + diag_set(ClientError, ER_ILLEGAL_PARAMS, + tt_sprintf("error in path on position %d", rc)); + return -1; + } else { + return 0; + } +} diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h index d35182d..a7dc9c7 100644 --- a/src/box/tuple_format.h +++ b/src/box/tuple_format.h @@ -377,6 +377,25 @@ tuple_field_raw_by_name(struct tuple_format *format, const char *tuple, return tuple_field_raw(format, tuple, field_map, fieldno); } +/** + * Get tuple field by its path. + * @param format Tuple format. + * @param tuple MessagePack tuple's body. + * @param field_map Tuple field map. + * @param path Field path. + * @param path_len Length of @a path. + * @param path_hash Hash of @a path. + * @param[out] field Found field, or NULL, if not found. + * + * @retval 0 Success. + * @retval -1 Error in JSON path. + */ +int +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, + const uint32_t *field_map, const char *path, + uint32_t path_len, uint32_t path_hash, + const char **field); + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/test/engine/tuple.result b/test/engine/tuple.result index b3b23b2..7aeb457 100644 --- a/test/engine/tuple.result +++ b/test/engine/tuple.result @@ -590,6 +590,231 @@ maplen(t1map), t1map[1], t1map[2], t1map[3] s:drop() --- ... +format = {} +--- +... +format[1] = {name = 'field1', type = 'unsigned'} +--- +... +format[2] = {name = 'field2', type = 'array'} +--- +... +format[3] = {name = 'field3', type = 'map'} +--- +... +format[4] = {name = 'field4', type = 'string' } +--- +... +format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 'string'} +--- +... +s = box.schema.space.create('test', {format = format}) +--- +... +pk = s:create_index('pk') +--- +... +field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, key="value1", value="key1"}} +--- +... +field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200} +--- +... +t = s:replace{1, field2, field3, "123456", "yes, this"} +--- +... +t[1] +--- +- 1 +... +t[2] +--- +- [1, 2, 3, '4', [5, 6, 7], {'привет中国world': {'中国': 'привет'}, 'key': 'value1', 'value': 'key1'}] +... +t[3] +--- +- {'k1': 100, 'k3': [{'a': 1, 'b': 2}, {'c': 3, 'd': 4}], -1: 200, 10: 100, 'k2': [ + 1, 2, 3]} +... +t[4] +--- +- '123456' +... +t[2][1] +--- +- 1 +... +t["[2][1]"] +--- +- 1 +... +t[2][5] +--- +- [5, 6, 7] +... +t["[2][5]"] +--- +- [5, 6, 7] +... +t["[2][5][1]"] +--- +- 5 +... +t["[2][5][2]"] +--- +- 6 +... +t["[2][5][3]"] +--- +- 7 +... +t["[2][6].key"] +--- +- value1 +... +t["[2][6].value"] +--- +- key1 +... +t["[2][6]['key']"] +--- +- value1 +... +t["[2][6]['value']"] +--- +- key1 +... +t[2][6].привет中国world.中国 +--- +- привет +... +t["[2][6].привет中国world"].中国 +--- +- привет +... +t["[2][6].привет中国world.中国"] +--- +- привет +... +t["[2][6]['привет中国world']"]["中国"] +--- +- привет +... +t["[2][6]['привет中国world']['中国']"] +--- +- привет +... +t["[2][6]['привет中国world']['中国a']"] +--- +- yes, this +... +t["[3].k3[2].c"] +--- +- 3 +... +t["[4]"] +--- +- '123456' +... +t.field1 +--- +- 1 +... +t.field2[5] +--- +- [5, 6, 7] +... +t[".field1"] +--- +- 1 +... +t["field1"] +--- +- 1 +... +t["[3][10]"] +--- +- 100 +... +-- Not found. +t[0] +--- +- null +... +t["[0]"] +--- +- null +... +t["[1000]"] +--- +- null +... +t.field1000 +--- +- null +... +t["not_found"] +--- +- null +... +t["[2][5][10]"] +--- +- null +... +t["[2][6].key100"] +--- +- null +... +t["[2][0]"] -- 0-based index in array. +--- +- null +... +t["[4][3]"] -- Can not index string. +--- +- null +... +t["[4]['key']"] +--- +- null +... +-- Not found 'a'. Return 'null' despite of syntax error on a +-- next position. +t["a.b.c d.e.f"] +--- +- null +... +-- Sytax errors. +t[""] +--- +- error: 'builtin/box/tuple.lua:315: Usage: tuple[ or number >= 1]' +... +t["[2].[5]"] +--- +- error: Illegal parameters, error in path on position 5 +... +t["[-1]"] +--- +- error: Illegal parameters, error in path on position 2 +... +t[".."] +--- +- error: Illegal parameters, error in path on position 2 +... +t["[["] +--- +- error: Illegal parameters, error in path on position 2 +... +t["]]"] +--- +- error: Illegal parameters, error in path on position 1 +... +t["{"] +--- +- error: Illegal parameters, error in path on position 1 +... +s:drop() +--- +... engine = nil --- ... diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua index 6d7d254..90da8b2 100644 --- a/test/engine/tuple.test.lua +++ b/test/engine/tuple.test.lua @@ -200,5 +200,71 @@ t1map = t1:tomap() maplen(t1map), t1map[1], t1map[2], t1map[3] s:drop() +format = {} +format[1] = {name = 'field1', type = 'unsigned'} +format[2] = {name = 'field2', type = 'array'} +format[3] = {name = 'field3', type = 'map'} +format[4] = {name = 'field4', type = 'string' } +format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 'string'} +s = box.schema.space.create('test', {format = format}) +pk = s:create_index('pk') +field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, key="value1", value="key1"}} +field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200} +t = s:replace{1, field2, field3, "123456", "yes, this"} +t[1] +t[2] +t[3] +t[4] +t[2][1] +t["[2][1]"] +t[2][5] +t["[2][5]"] +t["[2][5][1]"] +t["[2][5][2]"] +t["[2][5][3]"] +t["[2][6].key"] +t["[2][6].value"] +t["[2][6]['key']"] +t["[2][6]['value']"] +t[2][6].привет中国world.中国 +t["[2][6].привет中国world"].中国 +t["[2][6].привет中国world.中国"] +t["[2][6]['привет中国world']"]["中国"] +t["[2][6]['привет中国world']['中国']"] +t["[2][6]['привет中国world']['中国a']"] +t["[3].k3[2].c"] +t["[4]"] +t.field1 +t.field2[5] +t[".field1"] +t["field1"] +t["[3][10]"] + +-- Not found. +t[0] +t["[0]"] +t["[1000]"] +t.field1000 +t["not_found"] +t["[2][5][10]"] +t["[2][6].key100"] +t["[2][0]"] -- 0-based index in array. +t["[4][3]"] -- Can not index string. +t["[4]['key']"] +-- Not found 'a'. Return 'null' despite of syntax error on a +-- next position. +t["a.b.c d.e.f"] + +-- Sytax errors. +t[""] +t["[2].[5]"] +t["[-1]"] +t[".."] +t["[["] +t["]]"] +t["{"] + +s:drop() + engine = nil test_run = nil -- 2.7.4