Tarantool development patches archive
 help / color / mirror / Atom feed
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org
Cc: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Subject: [tarantool-patches] [PATCH v2 2/3] lua: implement json path access to tuple fields
Date: Thu, 29 Mar 2018 17:22:03 +0300	[thread overview]
Message-ID: <92b7caf27491cef803bb6004f1616176a15d96c7.1522333265.git.kshcherbatov@tarantool.org> (raw)
In-Reply-To: <cover.1522333265.git.kshcherbatov@tarantool.org>
In-Reply-To: <cover.1522333265.git.kshcherbatov@tarantool.org>

From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>

In progress ...

Closes #1285
---
 src/box/CMakeLists.txt     |   2 +-
 src/box/lua/tuple.c        | 176 +++++++++++++++++++++++++++++++++++-----
 src/box/lua/tuple.lua      |  45 +++--------
 test/engine/tuple.result   | 198 +++++++++++++++++++++++++++++++++++++++++++++
 test/engine/tuple.test.lua |  59 ++++++++++++++
 5 files changed, 428 insertions(+), 52 deletions(-)

diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index e420fe3..add0ff9 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -130,5 +130,5 @@ add_library(box STATIC
     ${bin_sources})
 
 target_link_libraries(box box_error tuple stat xrow xlog vclock crc32 scramble
-                      ${common_libraries})
+                      json_path ${common_libraries})
 add_dependencies(box build_bundled_libs)
diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index 7ca4299..99b9ff2 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -41,6 +41,7 @@
 #include "box/tuple_convert.h"
 #include "box/errcode.h"
 #include "box/memtx_tuple.h"
+#include "json/path.h"
 
 /** {{{ box.tuple Lua library
  *
@@ -402,36 +403,175 @@ lbox_tuple_transform(struct lua_State *L)
 }
 
 /**
- * Find a tuple field using its name.
+ * Propagate @a field to MessagePack(field)[index].
+ * @param[in][out] field Field to propagate.
+ * @param index 1-based index to propagate to.
+ *
+ * @retval  0 Success, the index was found.
+ * @retval -1 Not found.
+ */
+static inline int
+tuple_field_go_to_index(const char **field, uint64_t index)
+{
+	assert(index >= 0);
+	enum mp_type type = mp_typeof(**field);
+	if (type == MP_ARRAY) {
+		if (index == 0)
+			return -1;
+		/* Make index 0-based. */
+		index -= TUPLE_INDEX_BASE;
+		uint32_t count = mp_decode_array(field);
+		if (index >= count)
+			return -1;
+		for (; index > 0; --index)
+			mp_next(field);
+		return 0;
+	} else if (type == MP_MAP) {
+		uint64_t count = mp_decode_map(field);
+		for (; count > 0; --count) {
+			type = mp_typeof(**field);
+			if (type == MP_UINT) {
+				uint64_t value = mp_decode_uint(field);
+				if (value == index)
+					return 0;
+			} else if (type == MP_INT) {
+				int64_t value = mp_decode_int(field);
+				if (value >= 0 && (uint64_t)value == index)
+					return 0;
+			} else {
+				/* Skip key. */
+				mp_next(field);
+			}
+			/* Skip value. */
+			mp_next(field);
+		}
+	}
+	return -1;
+}
+
+/**
+ * Propagate @a field to MessagePack(field)[key].
+ * @param[in][out] field Field to propagate.
+ * @param key Key to propagate to.
+ * @param len Length of @a key.
+ *
+ * @retval  0 Success, the index was found.
+ * @retval -1 Not found.
+ */
+static inline int
+tuple_field_go_to_key(const char **field, const char *key, int len)
+{
+	enum mp_type type = mp_typeof(**field);
+	if (type != MP_MAP)
+		return -1;
+	uint64_t count = mp_decode_map(field);
+	for (; count > 0; --count) {
+		type = mp_typeof(**field);
+		if (type == MP_STR) {
+			uint32_t value_len;
+			const char *value = mp_decode_str(field, &value_len);
+			if (value_len == (uint)len &&
+			    memcmp(value, key, len) == 0)
+				return 0;
+		} else {
+			/* Skip key. */
+			mp_next(field);
+		}
+		/* Skip value. */
+		mp_next(field);
+	}
+	return -1;
+}
+
+/**
+ * Find a tuple field by JSON path.
  * @param L Lua state.
- * @param tuple 1-th argument on lua stack, tuple to get field
+ * @param tuple 1-th argument on a lua stack, tuple to get field
  *        from.
- * @param field_name 2-th argument on lua stack, field name to
- *        get.
+ * @param path 2-th argument on lua stack. Can be field name,
+ *        JSON path to a field or a field number.
  *
  * @retval If a field was not found, return -1 and nil to lua else
  *         return 0 and decoded field.
  */
 static int
-lbox_tuple_field_by_name(struct lua_State *L)
+lbox_tuple_field_by_path(struct lua_State *L)
 {
+	const char *field;
 	struct tuple *tuple = luaT_istuple(L, 1);
 	/* Is checked in Lua wrapper. */
 	assert(tuple != NULL);
-	assert(lua_isstring(L, 2));
-	size_t name_len;
-	const char *name = lua_tolstring(L, 2, &name_len);
-	uint32_t name_hash = lua_hashstring(L, 2);
-	const char *field =
-		tuple_field_by_name(tuple, name, name_len, name_hash);
-	if (field == NULL) {
-		lua_pushinteger(L, -1);
-		lua_pushnil(L);
+	if (lua_isnumber(L, 2)) {
+		int index = lua_tointeger(L, 2);
+		index -= TUPLE_INDEX_BASE;
+		if (index < 0) {
+not_found:
+			lua_pushinteger(L, -1);
+			lua_pushnil(L);
+			return 2;
+		}
+		field = tuple_field(tuple, index);
+		if (field == NULL)
+			goto not_found;
+push_value:
+		lua_pushinteger(L, 0);
+		luamp_decode(L, luaL_msgpack_default, &field);
 		return 2;
 	}
-	lua_pushinteger(L, 0);
-	luamp_decode(L, luaL_msgpack_default, &field);
-	return 2;
+	assert(lua_isstring(L, 2));
+	size_t path_len;
+	const char *path = lua_tolstring(L, 2, &path_len);
+	struct json_path_parser parser;
+	struct json_path_node node;
+	json_path_parser_create(&parser, path, path_len);
+	int rc = json_path_next(&parser, &node);
+	if (rc != 0 || node.type == JSON_PATH_END)
+		luaL_error(L, "Error in path on position %d", rc);
+	if (node.type == JSON_PATH_NUM) {
+		int index = node.num;
+		if (index == 0)
+			goto not_found;
+		index -= TUPLE_INDEX_BASE;
+		field = tuple_field(tuple, index);
+		if (field == NULL)
+			goto not_found;
+	} else {
+		assert(node.type == JSON_PATH_STR);
+		/* First part of a path is a field name. */
+		const char *name = node.str;
+		uint32_t name_len = node.len;
+		uint32_t name_hash;
+		if (path_len == name_len) {
+			name_hash = lua_hashstring(L, 2);
+		} else {
+			/*
+			 * If a string is "field....", then its
+			 * precalculated juajit hash can not be
+			 * used. A tuple dictionary hashes only
+			 * name, not path.
+			 */
+			name_hash = lua_hash(name, name_len);
+		}
+		field = tuple_field_by_name(tuple, name, name_len, name_hash);
+		if (field == NULL)
+			goto not_found;
+	}
+	while ((rc = json_path_next(&parser, &node)) == 0 &&
+	       node.type != JSON_PATH_END) {
+		if (node.type == JSON_PATH_NUM) {
+			rc = tuple_field_go_to_index(&field, node.num);
+		} else {
+			assert(node.type == JSON_PATH_STR);
+			rc = tuple_field_go_to_key(&field, node.str, node.len);
+		}
+		if (rc != 0)
+			goto not_found;
+	}
+	if (rc == 0)
+		goto push_value;
+	luaL_error(L, "Error in path on position %d", rc);
+	unreachable();
+	goto not_found;
 }
 
 static int
@@ -470,8 +610,8 @@ static const struct luaL_Reg lbox_tuple_meta[] = {
 	{"tostring", lbox_tuple_to_string},
 	{"slice", lbox_tuple_slice},
 	{"transform", lbox_tuple_transform},
-	{"tuple_field_by_name", lbox_tuple_field_by_name},
 	{"tuple_to_map", lbox_tuple_to_map},
+	{"tuple_field_by_path", lbox_tuple_field_by_path},
 	{NULL, NULL}
 };
 
diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
index 001971a..b51b4df 100644
--- a/src/box/lua/tuple.lua
+++ b/src/box/lua/tuple.lua
@@ -9,16 +9,9 @@ local internal = require('box.internal')
 
 ffi.cdef[[
 /** \cond public */
-typedef struct tuple_format box_tuple_format_t;
-
-box_tuple_format_t *
-box_tuple_format_default(void);
 
 typedef struct tuple box_tuple_t;
 
-box_tuple_t *
-box_tuple_new(box_tuple_format_t *format, const char *data, const char *end);
-
 int
 box_tuple_ref(box_tuple_t *tuple);
 
@@ -34,9 +27,6 @@ box_tuple_bsize(const box_tuple_t *tuple);
 ssize_t
 box_tuple_to_buf(const box_tuple_t *tuple, char *buf, size_t size);
 
-box_tuple_format_t *
-box_tuple_format(const box_tuple_t *tuple);
-
 const char *
 box_tuple_field(const box_tuple_t *tuple, uint32_t i);
 
@@ -278,9 +268,9 @@ end
 
 msgpackffi.on_encode(const_tuple_ref_t, tuple_to_msgpack)
 
-local function tuple_field_by_name(tuple, name)
+local function tuple_field_by_path(tuple, path)
     tuple_check(tuple, "tuple['field_name']");
-    return internal.tuple.tuple_field_by_name(tuple, name)
+    return internal.tuple.tuple_field_by_path(tuple, path)
 end
 
 local methods = {
@@ -306,33 +296,22 @@ end
 
 methods["__serialize"] = tuple_totable -- encode hook for msgpack/yaml/json
 
-local tuple_field = function(tuple, field_n)
-    local field = builtin.box_tuple_field(tuple, field_n - 1)
-    if field == nil then
-        return nil
-    end
-    -- Use () to shrink stack to the first return value
-    return (msgpackffi.decode_unchecked(field))
-end
-
-
 ffi.metatype(tuple_t, {
     __len = function(tuple)
         return builtin.box_tuple_field_count(tuple)
     end;
     __tostring = internal.tuple.tostring;
     __index = function(tuple, key)
-        if type(key) == "number" then
-            return tuple_field(tuple, key)
-        elseif type(key) == "string" then
-            -- Try to get a field with a name = key. If it was not
-            -- found (rc ~= 0) then return a method from the
-            -- vtable. If a collision occurred, then fields have
-            -- higher priority. For example, if a tuple T has a
-            -- field with name 'bsize', then T.bsize returns field
-            -- value, not tuple_bsize function. To access hidden
-            -- methods use 'box.tuple.<method_name>(T, [args...])'.
-            local rc, field = tuple_field_by_name(tuple, key)
+        if type(key) == "string" or type(key) == "number" then
+            -- Try to get a field by json path or by [index]. If
+            -- it was not found (rc ~= 0) then return a method
+            -- from the vtable. If a collision occurred, then
+            -- fields have higher priority. For example, if a
+            -- tuple T has a field with name 'bsize', then T.bsize
+            -- returns field value, not tuple_bsize function. To
+            -- access hidden methods use
+            -- 'box.tuple.<method_name>(T, [args...])'.
+            local rc, field = tuple_field_by_path(tuple, key)
             if rc == 0 then
                 return field
             end
diff --git a/test/engine/tuple.result b/test/engine/tuple.result
index b3b23b2..2d7367a 100644
--- a/test/engine/tuple.result
+++ b/test/engine/tuple.result
@@ -590,6 +590,204 @@ maplen(t1map), t1map[1], t1map[2], t1map[3]
 s:drop()
 ---
 ...
+format = {}
+---
+...
+format[1] = {name = 'field1', type = 'unsigned'}
+---
+...
+format[2] = {name = 'field2', type = 'array'}
+---
+...
+format[3] = {name = 'field3', type = 'map'}
+---
+...
+format[4] = {name = 'field4', type = 'string'}
+---
+...
+s = box.schema.space.create('test', {format = format})
+---
+...
+pk = s:create_index('pk')
+---
+...
+field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
+---
+...
+field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200}
+---
+...
+t = s:replace{1, field2, field3, "123456"}
+---
+...
+t[1]
+---
+- 1
+...
+t[2]
+---
+- [1, 2, 3, '4', [5, 6, 7], {'key': 'key1', 'value': 'value1'}]
+...
+t[3]
+---
+- {'k1': 100, 'k3': [{'a': 1, 'b': 2}, {'c': 3, 'd': 4}], -1: 200, 10: 100, 'k2': [
+    1, 2, 3]}
+...
+t[4]
+---
+- '123456'
+...
+t[2][1]
+---
+- 1
+...
+t["[2][1]"]
+---
+- 1
+...
+t[2][5]
+---
+- [5, 6, 7]
+...
+t["[2][5]"]
+---
+- [5, 6, 7]
+...
+t["[2][5][1]"]
+---
+- 5
+...
+t["[2][5][2]"]
+---
+- 6
+...
+t["[2][5][3]"]
+---
+- 7
+...
+t["[2][6].key"]
+---
+- key1
+...
+t["[2][6].value"]
+---
+- value1
+...
+t["[2][6]['key']"]
+---
+- key1
+...
+t["[2][6]['value']"]
+---
+- value1
+...
+t["[3].k3[2].c"]
+---
+- 3
+...
+t["[4]"]
+---
+- '123456'
+...
+t.field1
+---
+- 1
+...
+t.field2[5]
+---
+- [5, 6, 7]
+...
+t[".field1"]
+---
+- 1
+...
+t["field1"]
+---
+- 1
+...
+t["[3][10]"]
+---
+- 100
+...
+-- Not found.
+t[0]
+---
+- null
+...
+t["[0]"]
+---
+- null
+...
+t["[1000]"]
+---
+- null
+...
+t.field1000
+---
+- null
+...
+t["not_found"]
+---
+- null
+...
+t["[2][5][10]"]
+---
+- null
+...
+t["[2][6].key100"]
+---
+- null
+...
+t["[2][0]"] -- 0-based index in array.
+---
+- null
+...
+t["[4][3]"] -- Can not index string.
+---
+- null
+...
+t["[4]['key']"]
+---
+- null
+...
+-- Not found 'a'. Return 'null' despite of syntax error on a
+-- next position.
+t["a.b.c d.e.f"]
+---
+- null
+...
+-- Sytax errors.
+t[""]
+---
+- error: 'builtin/box/tuple.lua:314: Error in path on position 0'
+...
+t["[2].[5]"]
+---
+- error: 'builtin/box/tuple.lua:314: Error in path on position 5'
+...
+t["[-1]"]
+---
+- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
+...
+t[".."]
+---
+- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
+...
+t["[["]
+---
+- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
+...
+t["]]"]
+---
+- error: 'builtin/box/tuple.lua:314: Error in path on position 1'
+...
+t["{"]
+---
+- error: 'builtin/box/tuple.lua:314: Error in path on position 1'
+...
+s:drop()
+---
+...
 engine = nil
 ---
 ...
diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua
index 6d7d254..ba3482d 100644
--- a/test/engine/tuple.test.lua
+++ b/test/engine/tuple.test.lua
@@ -200,5 +200,64 @@ t1map = t1:tomap()
 maplen(t1map), t1map[1], t1map[2], t1map[3]
 s:drop()
 
+format = {}
+format[1] = {name = 'field1', type = 'unsigned'}
+format[2] = {name = 'field2', type = 'array'}
+format[3] = {name = 'field3', type = 'map'}
+format[4] = {name = 'field4', type = 'string'}
+s = box.schema.space.create('test', {format = format})
+pk = s:create_index('pk')
+field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
+field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200}
+t = s:replace{1, field2, field3, "123456"}
+t[1]
+t[2]
+t[3]
+t[4]
+t[2][1]
+t["[2][1]"]
+t[2][5]
+t["[2][5]"]
+t["[2][5][1]"]
+t["[2][5][2]"]
+t["[2][5][3]"]
+t["[2][6].key"]
+t["[2][6].value"]
+t["[2][6]['key']"]
+t["[2][6]['value']"]
+t["[3].k3[2].c"]
+t["[4]"]
+t.field1
+t.field2[5]
+t[".field1"]
+t["field1"]
+t["[3][10]"]
+
+-- Not found.
+t[0]
+t["[0]"]
+t["[1000]"]
+t.field1000
+t["not_found"]
+t["[2][5][10]"]
+t["[2][6].key100"]
+t["[2][0]"] -- 0-based index in array.
+t["[4][3]"] -- Can not index string.
+t["[4]['key']"]
+-- Not found 'a'. Return 'null' despite of syntax error on a
+-- next position.
+t["a.b.c d.e.f"]
+
+-- Sytax errors.
+t[""]
+t["[2].[5]"]
+t["[-1]"]
+t[".."]
+t["[["]
+t["]]"]
+t["{"]
+
+s:drop()
+
 engine = nil
 test_run = nil
-- 
2.7.4

  parent reply	other threads:[~2018-03-29 14:22 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-29 14:22 [tarantool-patches] [PATCH v2 0/3] tuple field access via a json path Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 1/3] Introduce json_path_parser Kirill Shcherbatov
2018-03-29 14:22 ` Kirill Shcherbatov [this message]
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 3/3] Multibyte characters support Kirill Shcherbatov
2018-03-29 18:04   ` [tarantool-patches] " Kirill Shcherbatov
2018-03-30 10:24     ` v.shpilevoy
2018-03-30 10:25       ` v.shpilevoy
2018-04-02 19:19       ` Kirill Shcherbatov
2018-04-03 10:20         ` Vladislav Shpilevoy
2018-04-05 14:09           ` [tarantool-patches] [PATCH v2 1/1] ICU Unicode support for JSON parser Kirill Shcherbatov
2018-04-05 18:00             ` [tarantool-patches] " Kirill Shcherbatov
2018-04-05 23:32               ` Vladislav Shpilevoy
2018-04-04 10:37 ` [tarantool-patches] [PATCH v2 3/3] Multibyte characters support ICU Kirill Shcherbatov
2018-04-04 11:30   ` [tarantool-patches] " Vladislav Shpilevoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=92b7caf27491cef803bb6004f1616176a15d96c7.1522333265.git.kshcherbatov@tarantool.org \
    --to=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [tarantool-patches] [PATCH v2 2/3] lua: implement json path access to tuple fields' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox