Tarantool development patches archive
 help / color / mirror / Atom feed
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org,
	Vladislav Shpilevoy <v.shpilevoy@tarantool.org>,
	Vladimir Davydov <vdavydov.dev@gmail.com>
Subject: Re: [tarantool-patches] Re: [PATCH v3 4/4] box: specify indexes in user-friendly form
Date: Thu, 6 Sep 2018 15:46:56 +0300	[thread overview]
Message-ID: <76e3d264-351d-7d0b-79a7-2bd084d774c3@tarantool.org> (raw)
In-Reply-To: <1d55c8a2-d52c-36e3-d0f2-856992977b1a@tarantool.org>

> 1. It is box sub-module so use ClientError please.
Fixed.

> 2. How do you use node.num before checking that it is num? And why
> referencing a field out of format is forbidden? I am free to define
> indexes on non-formatted fields.
Ok, fixed.

> 3. Fits in one line.
Fixed.
================================================

From bf784403070114ea7ec065ca93903c8353bd7e73 Mon Sep 17 00:00:00 2001
Message-Id: <bf784403070114ea7ec065ca93903c8353bd7e73.1536237903.git.kshcherbatov@tarantool.org>
In-Reply-To: <cover.1536237903.git.kshcherbatov@tarantool.org>
References: <cover.1536237903.git.kshcherbatov@tarantool.org>
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
Date: Thu, 23 Aug 2018 18:11:04 +0300
Subject: [PATCH 4/4] box: specify indexes in user-friendly form

Since now it is possible to create indexes by JSON-path
using field names specified in format.

Closes #1012.

@TarantoolBot document
Title: Indexes by JSON path
Sometimes field data could have complex document structure.
When this structure is consistent across whole document,
you are able to create an index by JSON path.

Example:
s:create_index('json_index',
           {parts = {{'data.FIO["fname"]', 'str'}}})
---
 src/box/lua/index.c         | 73 +++++++++++++++++++++++++++++++++++++++++++++
 src/box/lua/schema.lua      | 20 ++++++-------
 src/box/tuple_format.c      |  3 +-
 test/engine/iterator.result |  2 +-
 test/engine/tuple.result    | 41 +++++++++++++++++++++++++
 test/engine/tuple.test.lua  | 12 ++++++++
 6 files changed, 137 insertions(+), 14 deletions(-)

diff --git a/src/box/lua/index.c b/src/box/lua/index.c
index ef89c39..8f1b038 100644
--- a/src/box/lua/index.c
+++ b/src/box/lua/index.c
@@ -28,6 +28,9 @@
  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
+#include "box/schema.h"
+#include "box/tuple_format.h"
+#include "json/path.h"
 #include "box/lua/index.h"
 #include "lua/utils.h"
 #include "box/box.h"
@@ -328,6 +331,75 @@ lbox_index_compact(lua_State *L)
 	return 0;
 }
 
+static int
+lbox_index_resolve_path(struct lua_State *L)
+{
+	if (lua_gettop(L) != 3 ||
+	    !lua_isnumber(L, 1) || !lua_isnumber(L, 2) || !lua_isstring(L, 3)) {
+		return luaL_error(L, "Usage box.internal."
+				     "path_resolve(part_id, space_id, path)");
+	}
+	uint32_t part_id = lua_tonumber(L, 1);
+	uint32_t space_id = lua_tonumber(L, 2);
+	size_t path_len;
+	const char *path = lua_tolstring(L, 3, &path_len);
+	struct space *space = space_cache_find(space_id);
+	if (space == NULL)
+		return luaT_error(L);
+	struct json_path_parser parser;
+	struct json_path_node node;
+	json_path_parser_create(&parser, path, path_len);
+	int rc = json_path_next(&parser, &node);
+	if (rc != 0) {
+		const char *err_msg =
+			tt_sprintf("options.parts[%d]: error in path on "
+				   "position %d", part_id, rc);
+		diag_set(ClientError, ER_ILLEGAL_PARAMS, err_msg);
+		return luaT_error(L);
+	}
+	assert(space->format != NULL && space->format->dict != NULL);
+	uint32_t fieldno;
+	if (node.type == JSON_PATH_NUM &&
+	    (fieldno = node.num - TUPLE_INDEX_BASE) >=
+	    space->format->field_count) {
+		const char *err_msg =
+			tt_sprintf("options.parts[%d]: field '%d' referenced "
+				   "in path is greater than format field "
+				   "count %d", part_id,
+				   fieldno + TUPLE_INDEX_BASE,
+				   space->format->field_count);
+		diag_set(ClientError, ER_ILLEGAL_PARAMS, err_msg);
+		return luaT_error(L);
+	} else if (node.type == JSON_PATH_STR &&
+		   tuple_fieldno_by_name(space->format->dict, node.str, node.len,
+					 field_name_hash(node.str, node.len),
+					 &fieldno) != 0) {
+		const char *err_msg =
+			tt_sprintf("options.parts[%d]: field was not found by "
+				   "name '%.*s'", part_id, node.len, node.str);
+		diag_set(ClientError, ER_ILLEGAL_PARAMS, err_msg);
+		return luaT_error(L);
+	}
+	fieldno += TUPLE_INDEX_BASE;
+
+	char *path_resolved = region_alloc(&fiber()->gc, path_len + 1);
+	if (path_resolved == NULL) {
+		diag_set(OutOfMemory, path_len + 1, "region_alloc",
+			"path_resolved");
+		return luaT_error(L);
+	}
+
+	path = path_resolved;
+	path_resolved += sprintf(path_resolved, "[%d]", fieldno);
+	memcpy(path_resolved, parser.src + parser.offset,
+		parser.src_len - parser.offset);
+	path_resolved[parser.src_len - parser.offset] = '\0';
+
+	lua_pushnumber(L, fieldno);
+	lua_pushstring(L, path);
+	return 2;
+}
+
 /* }}} */
 
 void
@@ -365,6 +437,7 @@ box_lua_index_init(struct lua_State *L)
 		{"truncate", lbox_truncate},
 		{"stat", lbox_index_stat},
 		{"compact", lbox_index_compact},
+		{"path_resolve", lbox_index_resolve_path},
 		{NULL, NULL}
 	};
 
diff --git a/src/box/lua/schema.lua b/src/box/lua/schema.lua
index 540a2a5..bc11375 100644
--- a/src/box/lua/schema.lua
+++ b/src/box/lua/schema.lua
@@ -556,7 +556,7 @@ local function update_index_parts_1_6_0(parts)
     return result
 end
 
-local function update_index_parts(format, parts)
+local function update_index_parts(format, parts, space_id)
     if type(parts) ~= "table" then
         box.error(box.error.ILLEGAL_PARAMS,
         "options.parts parameter should be a table")
@@ -607,16 +607,14 @@ local function update_index_parts(format, parts)
             box.error(box.error.ILLEGAL_PARAMS,
                       "options.parts[" .. i .. "]: field (name or number) is expected")
         elseif type(part.field) == 'string' then
-            for k,v in pairs(format) do
-                if v.name == part.field then
-                    part.field = k
-                    break
-                end
-            end
-            if type(part.field) == 'string' then
+            local idx, path = box.internal.path_resolve(i, space_id, part.field)
+            if part.path ~= nil and part.path ~= path then
                 box.error(box.error.ILLEGAL_PARAMS,
-                          "options.parts[" .. i .. "]: field was not found by name '" .. part.field .. "'")
+                          "options.parts[" .. i .. "]: field path '"..part.path.." doesn't math path resolved by name '" .. part.field .. "'")
             end
+            parts_can_be_simplified = parts_can_be_simplified and path == nil
+            part.field = idx
+            part.path = path or part.path
         elseif part.field == 0 then
             box.error(box.error.ILLEGAL_PARAMS,
                       "options.parts[" .. i .. "]: field (number) must be one-based")
@@ -767,7 +765,7 @@ box.schema.index.create = function(space_id, name, options)
         end
     end
     local parts, parts_can_be_simplified =
-        update_index_parts(format, options.parts)
+        update_index_parts(format, options.parts, space_id)
     -- create_index() options contains type, parts, etc,
     -- stored separately. Remove these members from index_opts
     local index_opts = {
@@ -934,7 +932,7 @@ box.schema.index.alter = function(space_id, index_id, options)
     if options.parts then
         local parts_can_be_simplified
         parts, parts_can_be_simplified =
-            update_index_parts(format, options.parts)
+            update_index_parts(format, options.parts, space_id)
         -- save parts in old format if possible
         if parts_can_be_simplified then
             parts = simplify_index_parts(parts)
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 00170c9..9587e6b 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -1119,8 +1119,7 @@ tuple_field_by_part_raw(const struct tuple_format *format, const char *data,
 		 * Legacy tuple having no field map for
 		 * JSON index.
 		 */
-		uint32_t path_hash =
-			field_name_hash(part->path, part->path_len);
+		uint32_t path_hash = field_name_hash(part->path, part->path_len);
 		const char *raw = NULL;
 		if (tuple_field_raw_by_path(format, data, field_map,
 					    part->path, part->path_len,
diff --git a/test/engine/iterator.result b/test/engine/iterator.result
index 10097ed..05d892d 100644
--- a/test/engine/iterator.result
+++ b/test/engine/iterator.result
@@ -4213,7 +4213,7 @@ s:replace{35}
 ...
 state, value = gen(param,state)
 ---
-- error: 'builtin/box/schema.lua:1034: usage: next(param, state)'
+- error: 'builtin/box/schema.lua:1032: usage: next(param, state)'
 ...
 value
 ---
diff --git a/test/engine/tuple.result b/test/engine/tuple.result
index b74bb23..d28d6b4 100644
--- a/test/engine/tuple.result
+++ b/test/engine/tuple.result
@@ -1009,6 +1009,47 @@ assert(idx ~= nil)
 ---
 - true
 ...
+format = {{'int1', 'unsigned'}, {'int2', 'unsigned'}, {'data', 'array'}, {'int3', 'unsigned'}, {'int4', 'unsigned'}}
+---
+...
+s:format(format)
+---
+- error: Field 3 has type 'map' in one index, but type 'array' in another
+...
+format = {{'int1', 'unsigned'}, {'int2', 'unsigned'}, {'data', 'map'}, {'int3', 'unsigned'}, {'int4', 'unsigned'}}
+---
+...
+s:format(format)
+---
+...
+s:create_index('test3', {parts = {{2, 'number'}, {']sad.FIO["fname"]', 'str'}}})
+---
+- error: 'Illegal parameters, options.parts[2]: error in path on position 1'
+...
+s:create_index('test3', {parts = {{2, 'number'}, {'[666].FIO["fname"]', 'str'}}})
+---
+- error: 'Illegal parameters, options.parts[2]: field ''666'' referenced in path is
+    greater than format field count 5'
+...
+s:create_index('test3', {parts = {{2, 'number'}, {'invalid.FIO["fname"]', 'str'}}})
+---
+- error: 'Illegal parameters, options.parts[2]: field was not found by name ''invalid'''
+...
+idx3 = s:create_index('test3', {parts = {{2, 'number'}, {'data.FIO["fname"]', 'str'}}})
+---
+...
+assert(idx3 ~= nil)
+---
+- true
+...
+assert(idx3.parts[2].path == "[3][\"FIO\"][\"fname\"]")
+---
+- true
+...
+-- Vinyl has optimizations that omit index checks, so errors could differ.
+idx3:drop()
+---
+...
 s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5}
 ---
 - error: 'Tuple doesn''t math document structure: invalid field 3 document content
diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua
index d563c66..1508f99 100644
--- a/test/engine/tuple.test.lua
+++ b/test/engine/tuple.test.lua
@@ -327,6 +327,18 @@ s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[2].FIO.fnam
 s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[3].FIO....fname'}}})
 idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[3]["FIO"]["fname"]'}, {3, 'str', path = '[3]["FIO"]["sname"]'}}})
 assert(idx ~= nil)
+format = {{'int1', 'unsigned'}, {'int2', 'unsigned'}, {'data', 'array'}, {'int3', 'unsigned'}, {'int4', 'unsigned'}}
+s:format(format)
+format = {{'int1', 'unsigned'}, {'int2', 'unsigned'}, {'data', 'map'}, {'int3', 'unsigned'}, {'int4', 'unsigned'}}
+s:format(format)
+s:create_index('test3', {parts = {{2, 'number'}, {']sad.FIO["fname"]', 'str'}}})
+s:create_index('test3', {parts = {{2, 'number'}, {'[666].FIO["fname"]', 'str'}}})
+s:create_index('test3', {parts = {{2, 'number'}, {'invalid.FIO["fname"]', 'str'}}})
+idx3 = s:create_index('test3', {parts = {{2, 'number'}, {'data.FIO["fname"]', 'str'}}})
+assert(idx3 ~= nil)
+assert(idx3.parts[2].path == "[3][\"FIO\"][\"fname\"]")
+-- Vinyl has optimizations that omit index checks, so errors could differ.
+idx3:drop()
 s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5}
 s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5}
 s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5}
-- 
2.7.4

  reply	other threads:[~2018-09-06 12:46 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-27  7:37 [tarantool-patches] [PATCH v3 0/4] box: indexes by JSON path Kirill Shcherbatov
2018-08-27  7:37 ` [tarantool-patches] [PATCH v3 1/4] rfc: describe a Tarantool JSON indexes Kirill Shcherbatov
2018-08-27  7:37 ` [tarantool-patches] [PATCH v3 2/4] box: introduce slot_cache in key_part Kirill Shcherbatov
2018-09-03 10:35   ` [tarantool-patches] " Vladislav Shpilevoy
2018-09-06 12:47     ` Kirill Shcherbatov
2018-09-17 17:08       ` Vladimir Davydov
2018-08-27  7:37 ` [tarantool-patches] [PATCH v3 3/4] box: introduce JSON indexes Kirill Shcherbatov
2018-09-03 10:32   ` [tarantool-patches] " Vladislav Shpilevoy
2018-09-03 10:35     ` Vladislav Shpilevoy
2018-09-06 12:46     ` Kirill Shcherbatov
2018-08-27  7:37 ` [tarantool-patches] [PATCH v3 4/4] box: specify indexes in user-friendly form Kirill Shcherbatov
2018-09-03 10:32   ` [tarantool-patches] " Vladislav Shpilevoy
2018-09-06 12:46     ` Kirill Shcherbatov [this message]
2018-09-17 15:50 ` [tarantool-patches] [PATCH v3 0/4] box: indexes by JSON path Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=76e3d264-351d-7d0b-79a7-2bd084d774c3@tarantool.org \
    --to=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=v.shpilevoy@tarantool.org \
    --cc=vdavydov.dev@gmail.com \
    --subject='Re: [tarantool-patches] Re: [PATCH v3 4/4] box: specify indexes in user-friendly form' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox