[tarantool-patches] [PATCH v2 3/3] Lua: implement json path access to tuple fields
Kirill Shcherbatov
kshcherbatov at tarantool.org
Fri Apr 6 14:08:57 MSK 2018
New tuple_field_raw_by_path and tuple_field_by_path APIs.
Resolves #1285
---
src/box/CMakeLists.txt | 4 +-
src/box/lua/tuple.c | 63 +++++++++----
src/box/lua/tuple.lua | 52 ++++-------
src/box/tuple.h | 21 +++++
src/box/tuple_format.c | 164 +++++++++++++++++++++++++++++++++
src/box/tuple_format.h | 19 ++++
test/engine/tuple.result | 225 +++++++++++++++++++++++++++++++++++++++++++++
test/engine/tuple.test.lua | 66 +++++++++++++
8 files changed, 556 insertions(+), 58 deletions(-)
diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index ad7f910..88c2c60 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -45,7 +45,7 @@ add_library(tuple STATIC
field_def.c
opt_def.c
)
-target_link_libraries(tuple box_error core ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit)
+target_link_libraries(tuple json_path box_error core ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit)
add_library(xlog STATIC xlog.c)
target_link_libraries(xlog core box_error crc32 ${ZSTD_LIBRARIES})
@@ -131,5 +131,5 @@ add_library(box STATIC
${bin_sources})
target_link_libraries(box box_error tuple stat xrow xlog vclock crc32 scramble
- ${common_libraries})
+ json_path ${common_libraries})
add_dependencies(box build_bundled_libs)
diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index 7ca4299..6967da6 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -41,6 +41,7 @@
#include "box/tuple_convert.h"
#include "box/errcode.h"
#include "box/memtx_tuple.h"
+#include "json/path.h"
/** {{{ box.tuple Lua library
*
@@ -402,36 +403,58 @@ lbox_tuple_transform(struct lua_State *L)
}
/**
- * Find a tuple field using its name.
+ * Find a tuple field by JSON path. If a field was not found and a
+ * path contains JSON syntax errors, then an exception is raised.
* @param L Lua state.
- * @param tuple 1-th argument on lua stack, tuple to get field
+ * @param tuple 1-th argument on a lua stack, tuple to get field
* from.
- * @param field_name 2-th argument on lua stack, field name to
- * get.
+ * @param path 2-th argument on lua stack. Can be field name,
+ * JSON path to a field or a field number.
*
- * @retval If a field was not found, return -1 and nil to lua else
- * return 0 and decoded field.
+ * @retval not nil Found field value.
+ * @retval nil A field is NULL or does not exist.
*/
static int
-lbox_tuple_field_by_name(struct lua_State *L)
+lbox_tuple_field_by_path(struct lua_State *L)
{
struct tuple *tuple = luaT_istuple(L, 1);
/* Is checked in Lua wrapper. */
assert(tuple != NULL);
- assert(lua_isstring(L, 2));
- size_t name_len;
- const char *name = lua_tolstring(L, 2, &name_len);
- uint32_t name_hash = lua_hashstring(L, 2);
- const char *field =
- tuple_field_by_name(tuple, name, name_len, name_hash);
- if (field == NULL) {
- lua_pushinteger(L, -1);
- lua_pushnil(L);
- return 2;
+ const char *field = NULL;
+ if (lua_isnumber(L, 2)) {
+ double dbl_index = lua_tonumber(L, 2);
+ if (dbl_index != floor(dbl_index))
+ goto usage_error;
+ int index = (int) floor(dbl_index) - TUPLE_INDEX_BASE;
+ if (index >= 0) {
+ field = tuple_field(tuple, index);
+ if (field == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+ } else {
+ lua_pushnil(L);
+ return 1;
+ }
+ } else if (lua_isstring(L, 2)) {
+ size_t len;
+ const char *path = lua_tolstring(L, 2, &len);
+ if (len == 0)
+ goto usage_error;
+ if (tuple_field_by_path(tuple, path, (uint32_t) len,
+ lua_hashstring(L, 2), &field) != 0) {
+ return luaT_error(L);
+ } else if (field == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+ } else {
+usage_error:
+ return luaL_error(L, "Usage: tuple[<path> or number >= 1]");
}
- lua_pushinteger(L, 0);
+ assert(field != NULL);
luamp_decode(L, luaL_msgpack_default, &field);
- return 2;
+ return 1;
}
static int
@@ -470,8 +493,8 @@ static const struct luaL_Reg lbox_tuple_meta[] = {
{"tostring", lbox_tuple_to_string},
{"slice", lbox_tuple_slice},
{"transform", lbox_tuple_transform},
- {"tuple_field_by_name", lbox_tuple_field_by_name},
{"tuple_to_map", lbox_tuple_to_map},
+ {"tuple_field_by_path", lbox_tuple_field_by_path},
{NULL, NULL}
};
diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
index 001971a..0eee325 100644
--- a/src/box/lua/tuple.lua
+++ b/src/box/lua/tuple.lua
@@ -9,16 +9,9 @@ local internal = require('box.internal')
ffi.cdef[[
/** \cond public */
-typedef struct tuple_format box_tuple_format_t;
-
-box_tuple_format_t *
-box_tuple_format_default(void);
typedef struct tuple box_tuple_t;
-box_tuple_t *
-box_tuple_new(box_tuple_format_t *format, const char *data, const char *end);
-
int
box_tuple_ref(box_tuple_t *tuple);
@@ -34,9 +27,6 @@ box_tuple_bsize(const box_tuple_t *tuple);
ssize_t
box_tuple_to_buf(const box_tuple_t *tuple, char *buf, size_t size);
-box_tuple_format_t *
-box_tuple_format(const box_tuple_t *tuple);
-
const char *
box_tuple_field(const box_tuple_t *tuple, uint32_t i);
@@ -278,9 +268,9 @@ end
msgpackffi.on_encode(const_tuple_ref_t, tuple_to_msgpack)
-local function tuple_field_by_name(tuple, name)
+local function tuple_field_by_path(tuple, path)
tuple_check(tuple, "tuple['field_name']");
- return internal.tuple.tuple_field_by_name(tuple, name)
+ return internal.tuple.tuple_field_by_path(tuple, path)
end
local methods = {
@@ -306,38 +296,28 @@ end
methods["__serialize"] = tuple_totable -- encode hook for msgpack/yaml/json
-local tuple_field = function(tuple, field_n)
- local field = builtin.box_tuple_field(tuple, field_n - 1)
- if field == nil then
- return nil
- end
- -- Use () to shrink stack to the first return value
- return (msgpackffi.decode_unchecked(field))
-end
-
-
ffi.metatype(tuple_t, {
__len = function(tuple)
return builtin.box_tuple_field_count(tuple)
end;
__tostring = internal.tuple.tostring;
__index = function(tuple, key)
- if type(key) == "number" then
- return tuple_field(tuple, key)
- elseif type(key) == "string" then
- -- Try to get a field with a name = key. If it was not
- -- found (rc ~= 0) then return a method from the
- -- vtable. If a collision occurred, then fields have
- -- higher priority. For example, if a tuple T has a
- -- field with name 'bsize', then T.bsize returns field
- -- value, not tuple_bsize function. To access hidden
- -- methods use 'box.tuple.<method_name>(T, [args...])'.
- local rc, field = tuple_field_by_name(tuple, key)
- if rc == 0 then
- return field
+ local res
+ if type(key) == "string" or type(key) == "number" then
+ -- Try to get a field by json path or by [index]. If
+ -- it was not found (rc ~= 0) then return a method
+ -- from the vtable. If a collision occurred, then
+ -- fields have higher priority. For example, if a
+ -- tuple T has a field with name 'bsize', then T.bsize
+ -- returns field value, not tuple_bsize function. To
+ -- access hidden methods use
+ -- 'box.tuple.<method_name>(T, [args...])'.
+ res = tuple_field_by_path(tuple, key)
+ if res ~= nil then
+ return res
end
end
- return methods[key]
+ return methods[key] or res
end;
__eq = function(tuple_a, tuple_b)
-- Two tuple are considered equal if they have same memory address
diff --git a/src/box/tuple.h b/src/box/tuple.h
index 6ebedf5..68ae9cd 100644
--- a/src/box/tuple.h
+++ b/src/box/tuple.h
@@ -514,6 +514,27 @@ tuple_field(const struct tuple *tuple, uint32_t fieldno)
}
/**
+ * Get tuple field by its JSON path.
+ * @param tuple.
+ * @param path Field path.
+ * @param path_len Length of @a path.
+ * @param path_hash Hash of @a path.
+ * @param[out] field Found field, or NULL, if not found.
+ *
+ * @retval 0 Success.
+ * @retval -1 Error in JSON path.
+ */
+static inline int
+tuple_field_by_path(struct tuple *tuple, const char *path,
+ uint32_t path_len, uint32_t path_hash,
+ const char **field)
+{
+ return tuple_field_raw_by_path(tuple_format(tuple), tuple_data(tuple),
+ tuple_field_map(tuple), path, path_len,
+ path_hash, field);
+}
+
+/**
* Get tuple field by its name.
* @param tuple Tuple to get field from.
* @param name Field name.
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index e458f49..94342ea 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -28,6 +28,7 @@
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+#include "json/path.h"
#include "tuple_format.h"
/** Global table of tuple formats */
@@ -478,3 +479,166 @@ box_tuple_format_unref(box_tuple_format_t *format)
{
tuple_format_unref(format);
}
+
+/**
+ * Propagate @a field to MessagePack(field)[index].
+ * @param[in][out] field Field to propagate.
+ * @param index 1-based index to propagate to.
+ *
+ * @retval 0 Success, the index was found.
+ * @retval -1 Not found.
+ */
+static inline int
+tuple_field_go_to_index(const char **field, uint64_t index)
+{
+ enum mp_type type = mp_typeof(**field);
+ if (type == MP_ARRAY) {
+ if (index == 0)
+ return -1;
+ /* Make index 0-based. */
+ index -= TUPLE_INDEX_BASE;
+ uint32_t count = mp_decode_array(field);
+ if (index >= count)
+ return -1;
+ for (; index > 0; --index)
+ mp_next(field);
+ return 0;
+ } else if (type == MP_MAP) {
+ uint64_t count = mp_decode_map(field);
+ for (; count > 0; --count) {
+ type = mp_typeof(**field);
+ if (type == MP_UINT) {
+ uint64_t value = mp_decode_uint(field);
+ if (value == index)
+ return 0;
+ } else if (type == MP_INT) {
+ int64_t value = mp_decode_int(field);
+ if (value >= 0 && (uint64_t)value == index)
+ return 0;
+ } else {
+ /* Skip key. */
+ mp_next(field);
+ }
+ /* Skip value. */
+ mp_next(field);
+ }
+ }
+ return -1;
+}
+
+/**
+ * Propagate @a field to MessagePack(field)[key].
+ * @param[in][out] field Field to propagate.
+ * @param key Key to propagate to.
+ * @param len Length of @a key.
+ *
+ * @retval 0 Success, the index was found.
+ * @retval -1 Not found.
+ */
+static inline int
+tuple_field_go_to_key(const char **field, const char *key, int len)
+{
+ enum mp_type type = mp_typeof(**field);
+ if (type != MP_MAP)
+ return -1;
+ uint64_t count = mp_decode_map(field);
+ for (; count > 0; --count) {
+ type = mp_typeof(**field);
+ if (type == MP_STR) {
+ uint32_t value_len;
+ const char *value = mp_decode_str(field, &value_len);
+ if (value_len == (uint)len &&
+ memcmp(value, key, len) == 0)
+ return 0;
+ } else {
+ /* Skip key. */
+ mp_next(field);
+ }
+ /* Skip value. */
+ mp_next(field);
+ }
+ return -1;
+}
+
+int
+tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
+ const uint32_t *field_map, const char *path,
+ uint32_t path_len, uint32_t path_hash,
+ const char **field)
+{
+ assert(path_len > 0);
+ struct json_path_parser parser;
+ struct json_path_node node;
+ json_path_parser_create(&parser, path, path_len);
+ int rc = json_path_next(&parser, &node);
+ if (rc != 0)
+ goto best_effort;
+ switch(node.type) {
+ case JSON_PATH_NUM: {
+ int index = node.num;
+ if (index == 0)
+ goto best_effort;
+ index -= TUPLE_INDEX_BASE;
+ *field = tuple_field_raw(format, tuple, field_map, index);
+ if (*field == NULL)
+ goto best_effort;
+ break;
+ }
+ case JSON_PATH_STR: {
+ /* First part of a path is a field name. */
+ uint32_t name_hash;
+ if (path_len == (uint32_t) node.len) {
+ name_hash = path_hash;
+ } else {
+ /*
+ * If a string is "field....", then its
+ * precalculated juajit hash can not be
+ * used. A tuple dictionary hashes only
+ * name, not path.
+ */
+ name_hash = field_name_hash(node.str, node.len);
+ }
+ *field = tuple_field_raw_by_name(format, tuple, field_map,
+ node.str, node.len, name_hash);
+ if (*field == NULL)
+ goto best_effort;
+ break;
+ }
+ default:
+ assert(node.type == JSON_PATH_END);
+ *field = NULL;
+ return 0;
+ }
+ while (rc == 0 && (rc = json_path_next(&parser, &node)) == 0) {
+ switch(node.type) {
+ case JSON_PATH_NUM:
+ rc = tuple_field_go_to_index(field, node.num);
+ break;
+ case JSON_PATH_STR:
+ rc = tuple_field_go_to_key(field, node.str, node.len);
+ break;
+ default:
+ assert(node.type == JSON_PATH_END);
+ return 0;
+ }
+ }
+ assert(rc != 0);
+ /*
+ * It is possible, that a field has a name as
+ * well-formatted JSON. For example 'a.b.c.d' can be field
+ * name. If a data was not found by such path, then try
+ * to interpret the whole path as a field name.
+ * The same is true for field names, that are not valid
+ * JSON.
+ */
+best_effort:
+ *field = tuple_field_raw_by_name(format, tuple, field_map, path,
+ path_len, path_hash);
+ if (rc > 0 && *field == NULL) {
+ diag_set(ClientError, ER_ILLEGAL_PARAMS,
+ tt_sprintf("error in path on position %d", rc));
+ return -1;
+ } else {
+ return 0;
+ }
+}
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index d35182d..a7dc9c7 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -377,6 +377,25 @@ tuple_field_raw_by_name(struct tuple_format *format, const char *tuple,
return tuple_field_raw(format, tuple, field_map, fieldno);
}
+/**
+ * Get tuple field by its path.
+ * @param format Tuple format.
+ * @param tuple MessagePack tuple's body.
+ * @param field_map Tuple field map.
+ * @param path Field path.
+ * @param path_len Length of @a path.
+ * @param path_hash Hash of @a path.
+ * @param[out] field Found field, or NULL, if not found.
+ *
+ * @retval 0 Success.
+ * @retval -1 Error in JSON path.
+ */
+int
+tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
+ const uint32_t *field_map, const char *path,
+ uint32_t path_len, uint32_t path_hash,
+ const char **field);
+
#if defined(__cplusplus)
} /* extern "C" */
#endif /* defined(__cplusplus) */
diff --git a/test/engine/tuple.result b/test/engine/tuple.result
index b3b23b2..7aeb457 100644
--- a/test/engine/tuple.result
+++ b/test/engine/tuple.result
@@ -590,6 +590,231 @@ maplen(t1map), t1map[1], t1map[2], t1map[3]
s:drop()
---
...
+format = {}
+---
+...
+format[1] = {name = 'field1', type = 'unsigned'}
+---
+...
+format[2] = {name = 'field2', type = 'array'}
+---
+...
+format[3] = {name = 'field3', type = 'map'}
+---
+...
+format[4] = {name = 'field4', type = 'string' }
+---
+...
+format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 'string'}
+---
+...
+s = box.schema.space.create('test', {format = format})
+---
+...
+pk = s:create_index('pk')
+---
+...
+field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, key="value1", value="key1"}}
+---
+...
+field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200}
+---
+...
+t = s:replace{1, field2, field3, "123456", "yes, this"}
+---
+...
+t[1]
+---
+- 1
+...
+t[2]
+---
+- [1, 2, 3, '4', [5, 6, 7], {'привет中国world': {'中国': 'привет'}, 'key': 'value1', 'value': 'key1'}]
+...
+t[3]
+---
+- {'k1': 100, 'k3': [{'a': 1, 'b': 2}, {'c': 3, 'd': 4}], -1: 200, 10: 100, 'k2': [
+ 1, 2, 3]}
+...
+t[4]
+---
+- '123456'
+...
+t[2][1]
+---
+- 1
+...
+t["[2][1]"]
+---
+- 1
+...
+t[2][5]
+---
+- [5, 6, 7]
+...
+t["[2][5]"]
+---
+- [5, 6, 7]
+...
+t["[2][5][1]"]
+---
+- 5
+...
+t["[2][5][2]"]
+---
+- 6
+...
+t["[2][5][3]"]
+---
+- 7
+...
+t["[2][6].key"]
+---
+- value1
+...
+t["[2][6].value"]
+---
+- key1
+...
+t["[2][6]['key']"]
+---
+- value1
+...
+t["[2][6]['value']"]
+---
+- key1
+...
+t[2][6].привет中国world.中国
+---
+- привет
+...
+t["[2][6].привет中国world"].中国
+---
+- привет
+...
+t["[2][6].привет中国world.中国"]
+---
+- привет
+...
+t["[2][6]['привет中国world']"]["中国"]
+---
+- привет
+...
+t["[2][6]['привет中国world']['中国']"]
+---
+- привет
+...
+t["[2][6]['привет中国world']['中国a']"]
+---
+- yes, this
+...
+t["[3].k3[2].c"]
+---
+- 3
+...
+t["[4]"]
+---
+- '123456'
+...
+t.field1
+---
+- 1
+...
+t.field2[5]
+---
+- [5, 6, 7]
+...
+t[".field1"]
+---
+- 1
+...
+t["field1"]
+---
+- 1
+...
+t["[3][10]"]
+---
+- 100
+...
+-- Not found.
+t[0]
+---
+- null
+...
+t["[0]"]
+---
+- null
+...
+t["[1000]"]
+---
+- null
+...
+t.field1000
+---
+- null
+...
+t["not_found"]
+---
+- null
+...
+t["[2][5][10]"]
+---
+- null
+...
+t["[2][6].key100"]
+---
+- null
+...
+t["[2][0]"] -- 0-based index in array.
+---
+- null
+...
+t["[4][3]"] -- Can not index string.
+---
+- null
+...
+t["[4]['key']"]
+---
+- null
+...
+-- Not found 'a'. Return 'null' despite of syntax error on a
+-- next position.
+t["a.b.c d.e.f"]
+---
+- null
+...
+-- Sytax errors.
+t[""]
+---
+- error: 'builtin/box/tuple.lua:315: Usage: tuple[<path> or number >= 1]'
+...
+t["[2].[5]"]
+---
+- error: Illegal parameters, error in path on position 5
+...
+t["[-1]"]
+---
+- error: Illegal parameters, error in path on position 2
+...
+t[".."]
+---
+- error: Illegal parameters, error in path on position 2
+...
+t["[["]
+---
+- error: Illegal parameters, error in path on position 2
+...
+t["]]"]
+---
+- error: Illegal parameters, error in path on position 1
+...
+t["{"]
+---
+- error: Illegal parameters, error in path on position 1
+...
+s:drop()
+---
+...
engine = nil
---
...
diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua
index 6d7d254..90da8b2 100644
--- a/test/engine/tuple.test.lua
+++ b/test/engine/tuple.test.lua
@@ -200,5 +200,71 @@ t1map = t1:tomap()
maplen(t1map), t1map[1], t1map[2], t1map[3]
s:drop()
+format = {}
+format[1] = {name = 'field1', type = 'unsigned'}
+format[2] = {name = 'field2', type = 'array'}
+format[3] = {name = 'field3', type = 'map'}
+format[4] = {name = 'field4', type = 'string' }
+format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 'string'}
+s = box.schema.space.create('test', {format = format})
+pk = s:create_index('pk')
+field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, key="value1", value="key1"}}
+field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200}
+t = s:replace{1, field2, field3, "123456", "yes, this"}
+t[1]
+t[2]
+t[3]
+t[4]
+t[2][1]
+t["[2][1]"]
+t[2][5]
+t["[2][5]"]
+t["[2][5][1]"]
+t["[2][5][2]"]
+t["[2][5][3]"]
+t["[2][6].key"]
+t["[2][6].value"]
+t["[2][6]['key']"]
+t["[2][6]['value']"]
+t[2][6].привет中国world.中国
+t["[2][6].привет中国world"].中国
+t["[2][6].привет中国world.中国"]
+t["[2][6]['привет中国world']"]["中国"]
+t["[2][6]['привет中国world']['中国']"]
+t["[2][6]['привет中国world']['中国a']"]
+t["[3].k3[2].c"]
+t["[4]"]
+t.field1
+t.field2[5]
+t[".field1"]
+t["field1"]
+t["[3][10]"]
+
+-- Not found.
+t[0]
+t["[0]"]
+t["[1000]"]
+t.field1000
+t["not_found"]
+t["[2][5][10]"]
+t["[2][6].key100"]
+t["[2][0]"] -- 0-based index in array.
+t["[4][3]"] -- Can not index string.
+t["[4]['key']"]
+-- Not found 'a'. Return 'null' despite of syntax error on a
+-- next position.
+t["a.b.c d.e.f"]
+
+-- Sytax errors.
+t[""]
+t["[2].[5]"]
+t["[-1]"]
+t[".."]
+t["[["]
+t["]]"]
+t["{"]
+
+s:drop()
+
engine = nil
test_run = nil
--
2.7.4
More information about the Tarantool-patches
mailing list