From: Kirill Shcherbatov <kshcherbatov@tarantool.org> To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: kostja@tarantool.org, Kirill Shcherbatov <kshcherbatov@tarantool.org> Subject: [PATCH v5 9/9] box: specify indexes in user-friendly form Date: Mon, 26 Nov 2018 13:49:43 +0300 [thread overview] Message-ID: <d8c6fe8028944a7d5b198eb18ca21e7f6392fd07.1543229303.git.kshcherbatov@tarantool.org> (raw) In-Reply-To: <cover.1543229303.git.kshcherbatov@tarantool.org> In-Reply-To: <cover.1543229303.git.kshcherbatov@tarantool.org> It is now possible to create indexes by JSON path and using field names specified in the space format. Closes #1012 @TarantoolBot document Title: Indexes by JSON path Sometimes field data could have complex document structure. When this structure is consistent across whole document, you are able to create an index by JSON path. Example: s:create_index('json_index', {parts = {{'FIO["fname"]', 'str'}}}) --- src/box/lua/index.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++ src/box/lua/schema.lua | 22 ++++++++-------- test/engine/tuple.result | 41 ++++++++++++++++++++++++++++ test/engine/tuple.test.lua | 12 +++++++++ 4 files changed, 130 insertions(+), 11 deletions(-) diff --git a/src/box/lua/index.c b/src/box/lua/index.c index ef89c39..ef81ab2 100644 --- a/src/box/lua/index.c +++ b/src/box/lua/index.c @@ -35,6 +35,9 @@ #include "box/info.h" #include "box/lua/info.h" #include "box/lua/tuple.h" +#include "box/schema.h" +#include "box/tuple_format.h" +#include "json/json.h" #include "box/lua/misc.h" /* lbox_encode_tuple_on_gc() */ /** {{{ box.index Lua library: access to spaces and indexes @@ -328,6 +331,68 @@ lbox_index_compact(lua_State *L) return 0; } +/** + * Resolve field index by absolute JSON path first component and + * return relative JSON path. + */ +static int +lbox_index_path_resolve(struct lua_State *L) +{ + if (lua_gettop(L) != 3 || + !lua_isnumber(L, 1) || !lua_isnumber(L, 2) || !lua_isstring(L, 3)) { + return luaL_error(L, "Usage box.internal." + "path_resolve(part_id, space_id, path)"); + } + uint32_t part_id = lua_tonumber(L, 1); + uint32_t space_id = lua_tonumber(L, 2); + size_t path_len; + const char *path = lua_tolstring(L, 3, &path_len); + struct space *space = space_cache_find(space_id); + if (space == NULL) + return luaT_error(L); + struct json_lexer lexer; + struct json_token token; + json_lexer_create(&lexer, path, path_len); + int rc = json_lexer_next_token(&lexer, &token); + if (rc != 0) { + const char *err_msg = + tt_sprintf("options.parts[%d]: error in path on " + "position %d", part_id, rc); + diag_set(ClientError, ER_ILLEGAL_PARAMS, err_msg); + return luaT_error(L); + } + assert(space->format != NULL && space->format->dict != NULL); + uint32_t fieldno; + uint32_t field_count = tuple_format_field_count(space->format); + if (token.key.type == JSON_TOKEN_NUM && + (fieldno = token.key.num - TUPLE_INDEX_BASE) >= field_count) { + const char *err_msg = + tt_sprintf("options.parts[%d]: field '%d' referenced " + "in path is greater than format field " + "count %d", part_id, + fieldno + TUPLE_INDEX_BASE, field_count); + diag_set(ClientError, ER_ILLEGAL_PARAMS, err_msg); + return luaT_error(L); + } else if (token.key.type == JSON_TOKEN_STR && + tuple_fieldno_by_name(space->format->dict, token.key.str, + token.key.len, + field_name_hash(token.key.str, + token.key.len), + &fieldno) != 0) { + const char *err_msg = + tt_sprintf("options.parts[%d]: field was not found by " + "name '%.*s'", part_id, token.key.len, + token.key.str); + diag_set(ClientError, ER_ILLEGAL_PARAMS, err_msg); + return luaT_error(L); + } + fieldno += TUPLE_INDEX_BASE; + path += lexer.offset; + lua_pushnumber(L, fieldno); + lua_pushstring(L, path); + return 2; +} + /* }}} */ void @@ -365,6 +430,7 @@ box_lua_index_init(struct lua_State *L) {"truncate", lbox_truncate}, {"stat", lbox_index_stat}, {"compact", lbox_index_compact}, + {"path_resolve", lbox_index_path_resolve}, {NULL, NULL} }; diff --git a/src/box/lua/schema.lua b/src/box/lua/schema.lua index 8a804f0..497cf19 100644 --- a/src/box/lua/schema.lua +++ b/src/box/lua/schema.lua @@ -575,7 +575,7 @@ local function update_index_parts_1_6_0(parts) return result end -local function update_index_parts(format, parts) +local function update_index_parts(format, parts, space_id) if type(parts) ~= "table" then box.error(box.error.ILLEGAL_PARAMS, "options.parts parameter should be a table") @@ -626,16 +626,16 @@ local function update_index_parts(format, parts) box.error(box.error.ILLEGAL_PARAMS, "options.parts[" .. i .. "]: field (name or number) is expected") elseif type(part.field) == 'string' then - for k,v in pairs(format) do - if v.name == part.field then - part.field = k - break - end - end - if type(part.field) == 'string' then + local idx, path = box.internal.path_resolve(i, space_id, part.field) + if part.path ~= nil and part.path ~= path then box.error(box.error.ILLEGAL_PARAMS, - "options.parts[" .. i .. "]: field was not found by name '" .. part.field .. "'") + "options.parts[" .. i .. "]: field path '".. + part.path.." doesn't math the path '" .. + part.field .. "'") end + parts_can_be_simplified = parts_can_be_simplified and path == nil + part.field = idx + part.path = path or part.path elseif part.field == 0 then box.error(box.error.ILLEGAL_PARAMS, "options.parts[" .. i .. "]: field (number) must be one-based") @@ -792,7 +792,7 @@ box.schema.index.create = function(space_id, name, options) end end local parts, parts_can_be_simplified = - update_index_parts(format, options.parts) + update_index_parts(format, options.parts, space_id) -- create_index() options contains type, parts, etc, -- stored separately. Remove these members from index_opts local index_opts = { @@ -959,7 +959,7 @@ box.schema.index.alter = function(space_id, index_id, options) if options.parts then local parts_can_be_simplified parts, parts_can_be_simplified = - update_index_parts(format, options.parts) + update_index_parts(format, options.parts, space_id) -- save parts in old format if possible if parts_can_be_simplified then parts = simplify_index_parts(parts) diff --git a/test/engine/tuple.result b/test/engine/tuple.result index a07e23c..44455bf 100644 --- a/test/engine/tuple.result +++ b/test/engine/tuple.result @@ -1007,6 +1007,47 @@ assert(idx.parts[2].path == "FIO.fname") --- - true ... +format = {{'int1', 'unsigned'}, {'int2', 'unsigned'}, {'data', 'array'}, {'int3', 'unsigned'}, {'int4', 'unsigned'}} +--- +... +s:format(format) +--- +- error: Field [2]FIO.fname has type 'array' in one index, but type 'map' in another +... +format = {{'int1', 'unsigned'}, {'int2', 'unsigned'}, {'data', 'map'}, {'int3', 'unsigned'}, {'int4', 'unsigned'}} +--- +... +s:format(format) +--- +... +s:create_index('test3', {parts = {{2, 'number'}, {']sad.FIO["fname"]', 'str'}}}) +--- +- error: 'Illegal parameters, options.parts[2]: error in path on position 1' +... +s:create_index('test3', {parts = {{2, 'number'}, {'[666].FIO["fname"]', 'str'}}}) +--- +- error: 'Illegal parameters, options.parts[2]: field ''666'' referenced in path is + greater than format field count 5' +... +s:create_index('test3', {parts = {{2, 'number'}, {'invalid.FIO["fname"]', 'str'}}}) +--- +- error: 'Illegal parameters, options.parts[2]: field was not found by name ''invalid''' +... +idx3 = s:create_index('test3', {parts = {{2, 'number'}, {'data.FIO["fname"]', 'str'}}}) +--- +... +assert(idx3 ~= nil) +--- +- true +... +assert(idx3.parts[2].path == ".FIO[\"fname\"]") +--- +- true +... +-- Vinyl has optimizations that omit index checks, so errors could differ. +idx3:drop() +--- +... s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5} --- - error: 'Tuple field 3 type does not match one required by operation: expected map' diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua index 8630850..fb366c8 100644 --- a/test/engine/tuple.test.lua +++ b/test/engine/tuple.test.lua @@ -327,6 +327,18 @@ s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO....fname idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO.fname'}, {3, 'str', path = '["FIO"]["sname"]'}}}) assert(idx ~= nil) assert(idx.parts[2].path == "FIO.fname") +format = {{'int1', 'unsigned'}, {'int2', 'unsigned'}, {'data', 'array'}, {'int3', 'unsigned'}, {'int4', 'unsigned'}} +s:format(format) +format = {{'int1', 'unsigned'}, {'int2', 'unsigned'}, {'data', 'map'}, {'int3', 'unsigned'}, {'int4', 'unsigned'}} +s:format(format) +s:create_index('test3', {parts = {{2, 'number'}, {']sad.FIO["fname"]', 'str'}}}) +s:create_index('test3', {parts = {{2, 'number'}, {'[666].FIO["fname"]', 'str'}}}) +s:create_index('test3', {parts = {{2, 'number'}, {'invalid.FIO["fname"]', 'str'}}}) +idx3 = s:create_index('test3', {parts = {{2, 'number'}, {'data.FIO["fname"]', 'str'}}}) +assert(idx3 ~= nil) +assert(idx3.parts[2].path == ".FIO[\"fname\"]") +-- Vinyl has optimizations that omit index checks, so errors could differ. +idx3:drop() s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5} s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5} s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5} -- 2.7.4
next prev parent reply other threads:[~2018-11-26 10:49 UTC|newest] Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-11-26 10:49 [PATCH v5 0/9] box: indexes by JSON path Kirill Shcherbatov 2018-11-26 10:49 ` [PATCH v5 1/9] box: refactor json_path_parser class Kirill Shcherbatov 2018-11-26 12:53 ` [tarantool-patches] " Kirill Shcherbatov 2018-11-29 15:39 ` Vladimir Davydov 2018-11-26 10:49 ` [PATCH v5 2/9] lib: implement JSON tree class for json library Kirill Shcherbatov 2018-11-26 12:53 ` [tarantool-patches] " Kirill Shcherbatov 2018-11-29 17:38 ` Vladimir Davydov 2018-11-29 17:50 ` Vladimir Davydov 2018-12-04 15:22 ` Vladimir Davydov 2018-12-04 15:47 ` [tarantool-patches] " Kirill Shcherbatov 2018-12-04 17:54 ` Vladimir Davydov 2018-12-05 8:37 ` Kirill Shcherbatov 2018-12-05 9:07 ` Vladimir Davydov 2018-12-05 9:52 ` Vladimir Davydov 2018-12-06 7:56 ` Kirill Shcherbatov 2018-12-06 7:56 ` [tarantool-patches] Re: [PATCH v5 2/9] lib: make index_base support for json_lexer Kirill Shcherbatov 2018-11-26 10:49 ` [PATCH v5 3/9] box: manage format fields with JSON tree class Kirill Shcherbatov 2018-11-29 19:07 ` Vladimir Davydov 2018-12-04 15:47 ` [tarantool-patches] " Kirill Shcherbatov 2018-12-04 16:09 ` Vladimir Davydov 2018-12-04 16:32 ` Kirill Shcherbatov 2018-12-05 8:37 ` Kirill Shcherbatov 2018-12-06 7:56 ` Kirill Shcherbatov 2018-12-06 8:06 ` Vladimir Davydov 2018-11-26 10:49 ` [PATCH v5 4/9] lib: introduce json_path_cmp routine Kirill Shcherbatov 2018-11-30 10:46 ` Vladimir Davydov 2018-12-03 17:37 ` [tarantool-patches] " Konstantin Osipov 2018-12-03 18:48 ` Vladimir Davydov 2018-12-03 20:14 ` Konstantin Osipov 2018-12-06 7:56 ` [tarantool-patches] Re: [PATCH v5 4/9] lib: introduce json_path_cmp, json_path_validate Kirill Shcherbatov 2018-11-26 10:49 ` [tarantool-patches] [PATCH v5 5/9] box: introduce JSON indexes Kirill Shcherbatov 2018-11-30 21:28 ` Vladimir Davydov 2018-12-01 16:49 ` Vladimir Davydov 2018-11-26 10:49 ` [PATCH v5 6/9] box: introduce has_json_paths flag in templates Kirill Shcherbatov 2018-11-26 10:49 ` [PATCH v5 7/9] box: tune tuple_field_raw_by_path for indexed data Kirill Shcherbatov 2018-12-01 17:20 ` Vladimir Davydov 2018-11-26 10:49 ` [PATCH v5 8/9] box: introduce offset slot cache in key_part Kirill Shcherbatov 2018-12-03 21:04 ` Vladimir Davydov 2018-12-04 15:51 ` Vladimir Davydov 2018-11-26 10:49 ` Kirill Shcherbatov [this message] 2018-12-04 12:22 ` [PATCH v5 9/9] box: specify indexes in user-friendly form Vladimir Davydov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=d8c6fe8028944a7d5b198eb18ca21e7f6392fd07.1543229303.git.kshcherbatov@tarantool.org \ --to=kshcherbatov@tarantool.org \ --cc=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --cc=vdavydov.dev@gmail.com \ --subject='Re: [PATCH v5 9/9] box: specify indexes in user-friendly form' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox