From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com
Cc: kostja@tarantool.org, Kirill Shcherbatov <kshcherbatov@tarantool.org>
Subject: [tarantool-patches] [PATCH v5 5/9] box: introduce JSON indexes
Date: Mon, 26 Nov 2018 13:49:39 +0300 [thread overview]
Message-ID: <f9059a9633c48f7c18ee787272ac3b43af8e7742.1543229303.git.kshcherbatov@tarantool.org> (raw)
In-Reply-To: <cover.1543229303.git.kshcherbatov@tarantool.org>
In-Reply-To: <cover.1543229303.git.kshcherbatov@tarantool.org>
New JSON-path-based indexes allows to index documents content.
As we need to store user-defined JSON path in key_part
and key_part_def, we have introduced path and path_len
fields. JSON path is verified and transformed to canonical
form on index msgpack unpack.
Path string stored as a part of the key_def allocation:
+-------+---------+-------+---------+-------+-------+-------+
|key_def|key_part1| ... |key_partN| path1 | pathK | pathN |
+-------+---------+-------+---------+-------+-------+-------+
| ^
|-> path _________________|
With format creation JSON paths are stored at the end of format
allocation:
+------------+------------+-------+------------+-------+
|tuple_format|tuple_field1| ... |tuple_fieldN| pathK |
+------------+------------+-------+------------+-------+
Part of #1012
---
src/box/errcode.h | 2 +-
src/box/index_def.c | 8 +-
src/box/key_def.c | 164 +++++++++++++---
src/box/key_def.h | 23 ++-
src/box/lua/space.cc | 5 +
src/box/memtx_engine.c | 3 +
src/box/sql.c | 1 +
src/box/sql/build.c | 1 +
src/box/sql/select.c | 6 +-
src/box/sql/where.c | 1 +
src/box/tuple.c | 38 +---
src/box/tuple_compare.cc | 13 +-
| 21 ++-
src/box/tuple_format.c | 439 ++++++++++++++++++++++++++++++++++++-------
src/box/tuple_format.h | 38 +++-
src/box/tuple_hash.cc | 2 +-
src/box/vinyl.c | 3 +
src/box/vy_log.c | 3 +-
src/box/vy_point_lookup.c | 2 -
src/box/vy_stmt.c | 166 +++++++++++++---
test/box/misc.result | 1 +
test/engine/tuple.result | 416 ++++++++++++++++++++++++++++++++++++++++
test/engine/tuple.test.lua | 121 ++++++++++++
23 files changed, 1306 insertions(+), 171 deletions(-)
diff --git a/src/box/errcode.h b/src/box/errcode.h
index 73359eb..2f979ab 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -138,7 +138,7 @@ struct errcode_record {
/* 83 */_(ER_ROLE_EXISTS, "Role '%s' already exists") \
/* 84 */_(ER_CREATE_ROLE, "Failed to create role '%s': %s") \
/* 85 */_(ER_INDEX_EXISTS, "Index '%s' already exists") \
- /* 86 */_(ER_UNUSED6, "") \
+ /* 86 */_(ER_DATA_STRUCTURE_MISMATCH, "Tuple doesn't math document structure: %s") \
/* 87 */_(ER_ROLE_LOOP, "Granting role '%s' to role '%s' would create a loop") \
/* 88 */_(ER_GRANT, "Incorrect grant arguments: %s") \
/* 89 */_(ER_PRIV_GRANTED, "User '%s' already has %s access on %s '%s'") \
diff --git a/src/box/index_def.c b/src/box/index_def.c
index 45c74d9..de4ea85 100644
--- a/src/box/index_def.c
+++ b/src/box/index_def.c
@@ -31,6 +31,7 @@
#include "index_def.h"
#include "schema_def.h"
#include "identifier.h"
+#include "json/json.h"
const char *index_type_strs[] = { "HASH", "TREE", "BITSET", "RTREE" };
@@ -298,8 +299,11 @@ index_def_is_valid(struct index_def *index_def, const char *space_name)
* Courtesy to a user who could have made
* a typo.
*/
- if (index_def->key_def->parts[i].fieldno ==
- index_def->key_def->parts[j].fieldno) {
+ struct key_part *part_a = &index_def->key_def->parts[i];
+ struct key_part *part_b = &index_def->key_def->parts[j];
+ if (part_a->fieldno == part_b->fieldno &&
+ json_path_cmp(part_a->path, part_a->path_len,
+ part_b->path, part_b->path_len) == 0){
diag_set(ClientError, ER_MODIFY_INDEX,
index_def->name, space_name,
"same key part is indexed twice");
diff --git a/src/box/key_def.c b/src/box/key_def.c
index 2119ca3..bc6cecd 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -28,6 +28,8 @@
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+#include "fiber.h"
+#include "json/json.h"
#include "key_def.h"
#include "tuple_compare.h"
#include "tuple_extract_key.h"
@@ -44,7 +46,8 @@ const struct key_part_def key_part_def_default = {
COLL_NONE,
false,
ON_CONFLICT_ACTION_DEFAULT,
- SORT_ORDER_ASC
+ SORT_ORDER_ASC,
+ NULL
};
static int64_t
@@ -59,6 +62,7 @@ part_type_by_name_wrapper(const char *str, uint32_t len)
#define PART_OPT_NULLABILITY "is_nullable"
#define PART_OPT_NULLABLE_ACTION "nullable_action"
#define PART_OPT_SORT_ORDER "sort_order"
+#define PART_OPT_PATH "path"
const struct opt_def part_def_reg[] = {
OPT_DEF_ENUM(PART_OPT_TYPE, field_type, struct key_part_def, type,
@@ -71,6 +75,7 @@ const struct opt_def part_def_reg[] = {
struct key_part_def, nullable_action, NULL),
OPT_DEF_ENUM(PART_OPT_SORT_ORDER, sort_order, struct key_part_def,
sort_order, NULL),
+ OPT_DEF(PART_OPT_PATH, OPT_STRPTR, struct key_part_def, path),
OPT_END,
};
@@ -106,13 +111,25 @@ const uint32_t key_mp_type[] = {
struct key_def *
key_def_dup(const struct key_def *src)
{
- size_t sz = key_def_sizeof(src->part_count);
- struct key_def *res = (struct key_def *)malloc(sz);
+ const struct key_part *parts = src->parts;
+ const struct key_part *parts_end = parts + src->part_count;
+ size_t sz = 0;
+ for (; parts < parts_end; parts++)
+ sz += parts->path != NULL ? parts->path_len + 1 : 0;
+ sz = key_def_sizeof(src->part_count, sz);
+ struct key_def *res = (struct key_def *)calloc(1, sz);
if (res == NULL) {
diag_set(OutOfMemory, sz, "malloc", "res");
return NULL;
}
memcpy(res, src, sz);
+ /* Update paths to point to the new memory chunk.*/
+ for (uint32_t i = 0; i < src->part_count; i++) {
+ if (src->parts[i].path == NULL)
+ continue;
+ size_t path_offset = src->parts[i].path - (char *)src;
+ res->parts[i].path = (char *)res + path_offset;
+ }
return res;
}
@@ -120,8 +137,23 @@ void
key_def_swap(struct key_def *old_def, struct key_def *new_def)
{
assert(old_def->part_count == new_def->part_count);
- for (uint32_t i = 0; i < new_def->part_count; i++)
- SWAP(old_def->parts[i], new_def->parts[i]);
+ for (uint32_t i = 0; i < new_def->part_count; i++) {
+ if (old_def->parts[i].path == NULL) {
+ SWAP(old_def->parts[i], new_def->parts[i]);
+ } else {
+ /*
+ * Since the data is located in memory
+ * in the same order (otherwise rebuild
+ * would be called), just update the
+ * pointers.
+ */
+ size_t path_offset =
+ old_def->parts[i].path - (char *)old_def;
+ SWAP(old_def->parts[i], new_def->parts[i]);
+ old_def->parts[i].path = (char *)old_def + path_offset;
+ new_def->parts[i].path = (char *)new_def + path_offset;
+ }
+ }
SWAP(*old_def, *new_def);
}
@@ -144,24 +176,38 @@ static void
key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
enum field_type type, enum on_conflict_action nullable_action,
struct coll *coll, uint32_t coll_id,
- enum sort_order sort_order)
+ enum sort_order sort_order, const char *path,
+ uint32_t path_len)
{
assert(part_no < def->part_count);
assert(type < field_type_MAX);
def->is_nullable |= (nullable_action == ON_CONFLICT_ACTION_NONE);
+ def->has_json_paths |= path != NULL;
def->parts[part_no].nullable_action = nullable_action;
def->parts[part_no].fieldno = fieldno;
def->parts[part_no].type = type;
def->parts[part_no].coll = coll;
def->parts[part_no].coll_id = coll_id;
def->parts[part_no].sort_order = sort_order;
+ if (path != NULL) {
+ def->parts[part_no].path_len = path_len;
+ assert(def->parts[part_no].path != NULL);
+ memcpy(def->parts[part_no].path, path, path_len);
+ def->parts[part_no].path[path_len] = '\0';
+ } else {
+ def->parts[part_no].path_len = 0;
+ def->parts[part_no].path = NULL;
+ }
column_mask_set_fieldno(&def->column_mask, fieldno);
}
struct key_def *
key_def_new(const struct key_part_def *parts, uint32_t part_count)
{
- size_t sz = key_def_sizeof(part_count);
+ ssize_t sz = 0;
+ for (uint32_t i = 0; i < part_count; i++)
+ sz += parts[i].path != NULL ? strlen(parts[i].path) + 1 : 0;
+ sz = key_def_sizeof(part_count, sz);
struct key_def *def = calloc(1, sz);
if (def == NULL) {
diag_set(OutOfMemory, sz, "malloc", "struct key_def");
@@ -171,6 +217,7 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count)
def->part_count = part_count;
def->unique_part_count = part_count;
+ char *data = (char *)def + key_def_sizeof(part_count, 0);
for (uint32_t i = 0; i < part_count; i++) {
const struct key_part_def *part = &parts[i];
struct coll *coll = NULL;
@@ -184,16 +231,23 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count)
}
coll = coll_id->coll;
}
+ uint32_t path_len = 0;
+ if (part->path != NULL) {
+ path_len = strlen(part->path);
+ def->parts[i].path = data;
+ data += path_len + 1;
+ }
key_def_set_part(def, i, part->fieldno, part->type,
part->nullable_action, coll, part->coll_id,
- part->sort_order);
+ part->sort_order, part->path, path_len);
}
key_def_set_cmp(def);
return def;
}
-void
-key_def_dump_parts(const struct key_def *def, struct key_part_def *parts)
+int
+key_def_dump_parts(struct region *pool, const struct key_def *def,
+ struct key_part_def *parts)
{
for (uint32_t i = 0; i < def->part_count; i++) {
const struct key_part *part = &def->parts[i];
@@ -203,13 +257,27 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts)
part_def->is_nullable = key_part_is_nullable(part);
part_def->nullable_action = part->nullable_action;
part_def->coll_id = part->coll_id;
+ if (part->path != NULL) {
+ char *path = region_alloc(pool, part->path_len + 1);
+ if (path == NULL) {
+ diag_set(OutOfMemory, part->path_len + 1,
+ "region_alloc", "part_def->path");
+ return -1;
+ }
+ memcpy(path, part->path, part->path_len);
+ path[part->path_len] = '\0';
+ part_def->path = path;
+ } else {
+ part_def->path = NULL;
+}
}
+ return 0;
}
box_key_def_t *
box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
{
- size_t sz = key_def_sizeof(part_count);
+ size_t sz = key_def_sizeof(part_count, 0);
struct key_def *key_def = calloc(1, sz);
if (key_def == NULL) {
diag_set(OutOfMemory, sz, "malloc", "struct key_def");
@@ -223,7 +291,7 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
key_def_set_part(key_def, item, fields[item],
(enum field_type)types[item],
ON_CONFLICT_ACTION_DEFAULT,
- NULL, COLL_NONE, SORT_ORDER_ASC);
+ NULL, COLL_NONE, SORT_ORDER_ASC, NULL, 0);
}
key_def_set_cmp(key_def);
return key_def;
@@ -272,6 +340,10 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1,
if (key_part_is_nullable(part1) != key_part_is_nullable(part2))
return key_part_is_nullable(part1) <
key_part_is_nullable(part2) ? -1 : 1;
+ int rc;
+ if ((rc = json_path_cmp(part1->path, part1->path_len,
+ part2->path, part2->path_len)) != 0)
+ return rc;
}
return part_count1 < part_count2 ? -1 : part_count1 > part_count2;
}
@@ -303,8 +375,15 @@ key_def_snprint_parts(char *buf, int size, const struct key_part_def *parts,
for (uint32_t i = 0; i < part_count; i++) {
const struct key_part_def *part = &parts[i];
assert(part->type < field_type_MAX);
- SNPRINT(total, snprintf, buf, size, "%d, '%s'",
- (int)part->fieldno, field_type_strs[part->type]);
+ if (part->path != NULL) {
+ SNPRINT(total, snprintf, buf, size, "%d, '%s', '%s'",
+ (int)part->fieldno, part->path,
+ field_type_strs[part->type]);
+ } else {
+ SNPRINT(total, snprintf, buf, size, "%d, '%s'",
+ (int)part->fieldno,
+ field_type_strs[part->type]);
+ }
if (i < part_count - 1)
SNPRINT(total, snprintf, buf, size, ", ");
}
@@ -323,6 +402,8 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count)
count++;
if (part->is_nullable)
count++;
+ if (part->path != NULL)
+ count++;
size += mp_sizeof_map(count);
size += mp_sizeof_str(strlen(PART_OPT_FIELD));
size += mp_sizeof_uint(part->fieldno);
@@ -337,6 +418,10 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count)
size += mp_sizeof_str(strlen(PART_OPT_NULLABILITY));
size += mp_sizeof_bool(part->is_nullable);
}
+ if (part->path != NULL) {
+ size += mp_sizeof_str(strlen(PART_OPT_PATH));
+ size += mp_sizeof_str(strlen(part->path));
+ }
}
return size;
}
@@ -352,6 +437,8 @@ key_def_encode_parts(char *data, const struct key_part_def *parts,
count++;
if (part->is_nullable)
count++;
+ if (part->path != NULL)
+ count++;
data = mp_encode_map(data, count);
data = mp_encode_str(data, PART_OPT_FIELD,
strlen(PART_OPT_FIELD));
@@ -371,6 +458,12 @@ key_def_encode_parts(char *data, const struct key_part_def *parts,
strlen(PART_OPT_NULLABILITY));
data = mp_encode_bool(data, part->is_nullable);
}
+ if (part->path != NULL) {
+ data = mp_encode_str(data, PART_OPT_PATH,
+ strlen(PART_OPT_PATH));
+ data = mp_encode_str(data, part->path,
+ strlen(part->path));
+ }
}
return data;
}
@@ -432,6 +525,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count,
fields[part->fieldno].is_nullable :
key_part_def_default.is_nullable);
part->coll_id = COLL_NONE;
+ part->path = NULL;
}
return 0;
}
@@ -445,6 +539,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
return key_def_decode_parts_166(parts, part_count, data,
fields, field_count);
}
+ struct region *region = &fiber()->gc;
for (uint32_t i = 0; i < part_count; i++) {
struct key_part_def *part = &parts[i];
if (mp_typeof(**data) != MP_MAP) {
@@ -468,7 +563,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
const char *key = mp_decode_str(data, &key_len);
if (opts_parse_key(part, part_def_reg, key, key_len, data,
ER_WRONG_INDEX_OPTIONS,
- i + TUPLE_INDEX_BASE, NULL,
+ i + TUPLE_INDEX_BASE, region,
false) != 0)
return -1;
if (is_action_missing &&
@@ -533,7 +628,9 @@ key_def_find(const struct key_def *key_def, const struct key_part *to_find)
const struct key_part *part = key_def->parts;
const struct key_part *end = part + key_def->part_count;
for (; part != end; part++) {
- if (part->fieldno == to_find->fieldno)
+ if (part->fieldno == to_find->fieldno &&
+ json_path_cmp(part->path, part->path_len,
+ to_find->path, to_find->path_len) == 0)
return part;
}
return NULL;
@@ -559,18 +656,27 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
* Find and remove part duplicates, i.e. parts counted
* twice since they are present in both key defs.
*/
- const struct key_part *part = second->parts;
- const struct key_part *end = part + second->part_count;
+ size_t sz = 0;
+ const struct key_part *part = first->parts;
+ const struct key_part *end = part + first->part_count;
+ for (; part != end; part++) {
+ if (part->path != NULL)
+ sz += part->path_len + 1;
+ }
+ part = second->parts;
+ end = part + second->part_count;
for (; part != end; part++) {
if (key_def_find(first, part) != NULL)
--new_part_count;
+ else if (part->path != NULL)
+ sz += part->path_len + 1;
}
+ sz = key_def_sizeof(new_part_count, sz);
struct key_def *new_def;
- new_def = (struct key_def *)calloc(1, key_def_sizeof(new_part_count));
+ new_def = (struct key_def *)calloc(1, sz);
if (new_def == NULL) {
- diag_set(OutOfMemory, key_def_sizeof(new_part_count), "malloc",
- "new_def");
+ diag_set(OutOfMemory, sz, "malloc", "new_def");
return NULL;
}
new_def->part_count = new_part_count;
@@ -578,15 +684,22 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
new_def->is_nullable = first->is_nullable || second->is_nullable;
new_def->has_optional_parts = first->has_optional_parts ||
second->has_optional_parts;
+ /* Path data write position in the new key_def. */
+ char *data = (char *)new_def + key_def_sizeof(new_part_count, 0);
/* Write position in the new key def. */
uint32_t pos = 0;
/* Append first key def's parts to the new index_def. */
part = first->parts;
end = part + first->part_count;
for (; part != end; part++) {
+ if (part->path != NULL) {
+ new_def->parts[pos].path = data;
+ data += part->path_len + 1;
+ }
key_def_set_part(new_def, pos++, part->fieldno, part->type,
part->nullable_action, part->coll,
- part->coll_id, part->sort_order);
+ part->coll_id, part->sort_order, part->path,
+ part->path_len);
}
/* Set-append second key def's part to the new key def. */
@@ -595,9 +708,14 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
for (; part != end; part++) {
if (key_def_find(first, part) != NULL)
continue;
+ if (part->path != NULL) {
+ new_def->parts[pos].path = data;
+ data += part->path_len + 1;
+ }
key_def_set_part(new_def, pos++, part->fieldno, part->type,
part->nullable_action, part->coll,
- part->coll_id, part->sort_order);
+ part->coll_id, part->sort_order, part->path,
+ part->path_len);
}
key_def_set_cmp(new_def);
return new_def;
diff --git a/src/box/key_def.h b/src/box/key_def.h
index d4da6c5..7731e48 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -68,6 +68,8 @@ struct key_part_def {
enum on_conflict_action nullable_action;
/** Part sort order. */
enum sort_order sort_order;
+ /** JSON path to data. */
+ const char *path;
};
extern const struct key_part_def key_part_def_default;
@@ -86,6 +88,13 @@ struct key_part {
enum on_conflict_action nullable_action;
/** Part sort order. */
enum sort_order sort_order;
+ /**
+ * JSON path to data in 'canonical' form.
+ * Read json_path_normalize to get more details.
+ */
+ char *path;
+ /** The length of JSON path. */
+ uint32_t path_len;
};
struct key_def;
@@ -152,6 +161,8 @@ struct key_def {
uint32_t unique_part_count;
/** True, if at least one part can store NULL. */
bool is_nullable;
+ /** True, if some key part has JSON path. */
+ bool has_json_paths;
/**
* True, if some key parts can be absent in a tuple. These
* fields assumed to be MP_NIL.
@@ -245,9 +256,10 @@ box_tuple_compare_with_key(const box_tuple_t *tuple_a, const char *key_b,
/** \endcond public */
static inline size_t
-key_def_sizeof(uint32_t part_count)
+key_def_sizeof(uint32_t part_count, uint32_t paths_size)
{
- return sizeof(struct key_def) + sizeof(struct key_part) * part_count;
+ return sizeof(struct key_def) + sizeof(struct key_part) * part_count +
+ paths_size;
}
/**
@@ -260,8 +272,9 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count);
/**
* Dump part definitions of the given key def.
*/
-void
-key_def_dump_parts(const struct key_def *def, struct key_part_def *parts);
+int
+key_def_dump_parts(struct region *pool, const struct key_def *def,
+ struct key_part_def *parts);
/**
* Update 'has_optional_parts' of @a key_def with correspondence
@@ -368,6 +381,8 @@ key_validate_parts(const struct key_def *key_def, const char *key,
static inline bool
key_def_is_sequential(const struct key_def *key_def)
{
+ if (key_def->has_json_paths)
+ return false;
for (uint32_t part_id = 0; part_id < key_def->part_count; part_id++) {
if (key_def->parts[part_id].fieldno != part_id)
return false;
diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc
index 7cae436..a882a9d 100644
--- a/src/box/lua/space.cc
+++ b/src/box/lua/space.cc
@@ -296,6 +296,11 @@ lbox_fillspace(struct lua_State *L, struct space *space, int i)
lua_pushnumber(L, part->fieldno + TUPLE_INDEX_BASE);
lua_setfield(L, -2, "fieldno");
+ if (part->path != NULL) {
+ lua_pushstring(L, part->path);
+ lua_setfield(L, -2, "path");
+ }
+
lua_pushboolean(L, key_part_is_nullable(part));
lua_setfield(L, -2, "is_nullable");
diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 28afb32..1bc46c6 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -1316,6 +1316,9 @@ memtx_index_def_change_requires_rebuild(struct index *index,
return true;
if (old_part->coll != new_part->coll)
return true;
+ if (json_path_cmp(old_part->path, old_part->path_len,
+ new_part->path, new_part->path_len) != 0)
+ return true;
}
return false;
}
diff --git a/src/box/sql.c b/src/box/sql.c
index 0e4e0f4..d199171 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -378,6 +378,7 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
part->nullable_action = ON_CONFLICT_ACTION_NONE;
part->is_nullable = true;
part->sort_order = SORT_ORDER_ASC;
+ part->path = NULL;
if (def != NULL && i < def->part_count)
part->coll_id = def->parts[i].coll_id;
else
diff --git a/src/box/sql/build.c b/src/box/sql/build.c
index b5abaee..9f5d5aa 100644
--- a/src/box/sql/build.c
+++ b/src/box/sql/build.c
@@ -2423,6 +2423,7 @@ index_fill_def(struct Parse *parse, struct index *index,
part->is_nullable = part->nullable_action == ON_CONFLICT_ACTION_NONE;
part->sort_order = SORT_ORDER_ASC;
part->coll_id = coll_id;
+ part->path = NULL;
}
key_def = key_def_new(key_parts, expr_list->nExpr);
if (key_def == NULL)
diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index ca709b4..0734712 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -1349,6 +1349,7 @@ sql_key_info_new(sqlite3 *db, uint32_t part_count)
part->is_nullable = false;
part->nullable_action = ON_CONFLICT_ACTION_ABORT;
part->sort_order = SORT_ORDER_ASC;
+ part->path = NULL;
}
return key_info;
}
@@ -1356,6 +1357,9 @@ sql_key_info_new(sqlite3 *db, uint32_t part_count)
struct sql_key_info *
sql_key_info_new_from_key_def(sqlite3 *db, const struct key_def *key_def)
{
+ /** SQL key_parts could not have JSON paths. */
+ for (uint32_t i = 0; i < key_def->part_count; i++)
+ assert(key_def->parts[i].path == NULL);
struct sql_key_info *key_info = sqlite3DbMallocRawNN(db,
sql_key_info_sizeof(key_def->part_count));
if (key_info == NULL) {
@@ -1366,7 +1370,7 @@ sql_key_info_new_from_key_def(sqlite3 *db, const struct key_def *key_def)
key_info->key_def = NULL;
key_info->refs = 1;
key_info->part_count = key_def->part_count;
- key_def_dump_parts(key_def, key_info->parts);
+ key_def_dump_parts(&fiber()->gc, key_def, key_info->parts);
return key_info;
}
diff --git a/src/box/sql/where.c b/src/box/sql/where.c
index 9c3462b..78f70f4 100644
--- a/src/box/sql/where.c
+++ b/src/box/sql/where.c
@@ -2807,6 +2807,7 @@ whereLoopAddBtree(WhereLoopBuilder * pBuilder, /* WHERE clause information */
part.is_nullable = false;
part.sort_order = SORT_ORDER_ASC;
part.coll_id = COLL_NONE;
+ part.path = NULL;
struct key_def *key_def = key_def_new(&part, 1);
if (key_def == NULL) {
diff --git a/src/box/tuple.c b/src/box/tuple.c
index aae1c3c..62e06e7 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -138,38 +138,18 @@ runtime_tuple_delete(struct tuple_format *format, struct tuple *tuple)
int
tuple_validate_raw(struct tuple_format *format, const char *tuple)
{
- if (tuple_format_field_count(format) == 0)
- return 0; /* Nothing to check */
-
- /* Check to see if the tuple has a sufficient number of fields. */
- uint32_t field_count = mp_decode_array(&tuple);
- if (format->exact_field_count > 0 &&
- format->exact_field_count != field_count) {
- diag_set(ClientError, ER_EXACT_FIELD_COUNT,
- (unsigned) field_count,
- (unsigned) format->exact_field_count);
+ struct region *region = &fiber()->gc;
+ uint32_t used = region_used(region);
+ uint32_t *field_map = region_alloc(region, format->field_map_size);
+ if (field_map == NULL) {
+ diag_set(OutOfMemory, format->field_map_size, "region_alloc",
+ "field_map");
return -1;
}
- if (unlikely(field_count < format->min_field_count)) {
- diag_set(ClientError, ER_MIN_FIELD_COUNT,
- (unsigned) field_count,
- (unsigned) format->min_field_count);
+ field_map = (uint32_t *)((char *)field_map + format->field_map_size);
+ if (tuple_init_field_map(format, field_map, tuple, true) != 0)
return -1;
- }
-
- /* Check field types */
- struct tuple_field *field = tuple_format_field(format, 0);
- uint32_t i = 0;
- uint32_t defined_field_count =
- MIN(field_count, tuple_format_field_count(format));
- for (; i < defined_field_count; ++i) {
- field = tuple_format_field(format, i);
- if (key_mp_type_validate(field->type, mp_typeof(*tuple),
- ER_FIELD_TYPE, i + TUPLE_INDEX_BASE,
- tuple_field_is_nullable(field)))
- return -1;
- mp_next(&tuple);
- }
+ region_truncate(region, used);
return 0;
}
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index e21b009..554c29f 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -469,7 +469,8 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
struct key_part *part = key_def->parts;
const char *tuple_a_raw = tuple_data(tuple_a);
const char *tuple_b_raw = tuple_data(tuple_b);
- if (key_def->part_count == 1 && part->fieldno == 0) {
+ if (key_def->part_count == 1 && part->fieldno == 0 &&
+ part->path == NULL) {
/*
* First field can not be optional - empty tuples
* can not exist.
@@ -493,8 +494,8 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
}
bool was_null_met = false;
- const struct tuple_format *format_a = tuple_format(tuple_a);
- const struct tuple_format *format_b = tuple_format(tuple_b);
+ struct tuple_format *format_a = tuple_format(tuple_a);
+ struct tuple_format *format_b = tuple_format(tuple_b);
const uint32_t *field_map_a = tuple_field_map(tuple_a);
const uint32_t *field_map_b = tuple_field_map(tuple_b);
struct key_part *end;
@@ -585,7 +586,7 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
assert(key != NULL || part_count == 0);
assert(part_count <= key_def->part_count);
struct key_part *part = key_def->parts;
- const struct tuple_format *format = tuple_format(tuple);
+ struct tuple_format *format = tuple_format(tuple);
const char *tuple_raw = tuple_data(tuple);
const uint32_t *field_map = tuple_field_map(tuple);
enum mp_type a_type, b_type;
@@ -1027,7 +1028,7 @@ tuple_compare_create(const struct key_def *def)
}
}
assert(! def->has_optional_parts);
- if (!key_def_has_collation(def)) {
+ if (!key_def_has_collation(def) && !def->has_json_paths) {
/* Precalculated comparators don't use collation */
for (uint32_t k = 0;
k < sizeof(cmp_arr) / sizeof(cmp_arr[0]); k++) {
@@ -1247,7 +1248,7 @@ tuple_compare_with_key_create(const struct key_def *def)
}
}
assert(! def->has_optional_parts);
- if (!key_def_has_collation(def)) {
+ if (!key_def_has_collation(def) && !def->has_json_paths) {
/* Precalculated comparators don't use collation */
for (uint32_t k = 0;
k < sizeof(cmp_wk_arr) / sizeof(cmp_wk_arr[0]);
--git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc
index e9d7cac..04c5463 100644
--- a/src/box/tuple_extract_key.cc
+++ b/src/box/tuple_extract_key.cc
@@ -10,7 +10,8 @@ key_def_parts_are_sequential(const struct key_def *def, int i)
{
uint32_t fieldno1 = def->parts[i].fieldno + 1;
uint32_t fieldno2 = def->parts[i + 1].fieldno;
- return fieldno1 == fieldno2;
+ return fieldno1 == fieldno2 && def->parts[i].path == NULL &&
+ def->parts[i + 1].path == NULL;
}
/** True, if a key con contain two or more parts in sequence. */
@@ -111,7 +112,7 @@ tuple_extract_key_slowpath(const struct tuple *tuple,
const char *data = tuple_data(tuple);
uint32_t part_count = key_def->part_count;
uint32_t bsize = mp_sizeof_array(part_count);
- const struct tuple_format *format = tuple_format(tuple);
+ struct tuple_format *format = tuple_format(tuple);
const uint32_t *field_map = tuple_field_map(tuple);
const char *tuple_end = data + tuple->bsize;
@@ -241,7 +242,8 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
if (!key_def_parts_are_sequential(key_def, i))
break;
}
- uint32_t end_fieldno = key_def->parts[i].fieldno;
+ const struct key_part *part = &key_def->parts[i];
+ uint32_t end_fieldno = part->fieldno;
if (fieldno < current_fieldno) {
/* Rewind. */
@@ -283,6 +285,15 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
current_fieldno++;
}
}
+ const char *field_last, *field_end_last;
+ if (part->path != NULL) {
+ field_last = field;
+ field_end_last = field_end;
+ (void)tuple_field_go_to_path(&field, part->path,
+ part->path_len);
+ field_end = field;
+ mp_next(&field_end);
+ }
memcpy(key_buf, field, field_end - field);
key_buf += field_end - field;
if (has_optional_parts && null_count != 0) {
@@ -291,6 +302,10 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
} else {
assert(key_buf - key <= data_end - data);
}
+ if (part->path != NULL) {
+ field = field_last;
+ field_end = field_end_last;
+ }
}
if (key_size != NULL)
*key_size = (uint32_t)(key_buf - key);
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 92028c5..193d0d8 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -28,6 +28,7 @@
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+#include "fiber.h"
#include "json/json.h"
#include "tuple_format.h"
#include "coll_id_cache.h"
@@ -51,7 +52,8 @@ tuple_field_create(struct json_token *token)
ret->offset_slot = TUPLE_OFFSET_SLOT_NIL;
ret->coll_id = COLL_NONE;
ret->nullable_action = ON_CONFLICT_ACTION_NONE;
- ret->token = *token;
+ if (token != NULL)
+ ret->token = *token;
return ret;
}
@@ -61,14 +63,114 @@ tuple_field_destroy(struct tuple_field *field)
free(field);
}
+/** Build a JSON tree path for specified path. */
+static struct tuple_field *
+tuple_field_tree_add_path(struct tuple_format *format, const char *path,
+ uint32_t path_len, uint32_t fieldno)
+{
+ int rc = 0;
+ struct json_tree *tree = &format->tree;
+ struct tuple_field *parent = tuple_format_field(format, fieldno);
+ struct tuple_field *field = tuple_field_create(NULL);
+ if (unlikely(field == NULL))
+ goto end;
+
+ struct json_lexer lexer;
+ bool is_last_new = false;
+ json_lexer_create(&lexer, path, path_len);
+ while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 &&
+ field->token.key.type != JSON_TOKEN_END) {
+ enum field_type iterm_node_type =
+ field->token.key.type == JSON_TOKEN_STR ?
+ FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
+ if (parent->type != FIELD_TYPE_ANY &&
+ parent->type != iterm_node_type) {
+ const char *name =
+ tt_sprintf("[%d]%.*s", fieldno, path_len, path);
+ diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH, name,
+ field_type_strs[parent->type],
+ field_type_strs[iterm_node_type]);
+ parent = NULL;
+ goto end;
+ }
+ struct tuple_field *next =
+ json_tree_lookup_entry(tree, &parent->token,
+ &field->token,
+ struct tuple_field, token);
+ if (next == NULL) {
+ rc = json_tree_add(tree, &parent->token, &field->token);
+ if (unlikely(rc != 0)) {
+ diag_set(OutOfMemory, sizeof(struct json_token),
+ "json_tree_add", "tree");
+ parent = NULL;
+ goto end;
+ }
+ next = field;
+ is_last_new = true;
+ field = tuple_field_create(NULL);
+ if (unlikely(next == NULL))
+ goto end;
+ } else {
+ is_last_new = false;
+ }
+ parent->type = iterm_node_type;
+ parent = next;
+ }
+ if (rc != 0 || field->token.key.type != JSON_TOKEN_END) {
+ const char *err_msg =
+ tt_sprintf("invalid JSON path '%s': path has invalid "
+ "structure (error at position %d)", path,
+ rc);
+ diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
+ fieldno + TUPLE_INDEX_BASE, err_msg);
+ parent = NULL;
+ goto end;
+ }
+ assert(parent != NULL);
+ /* Update tree depth information. */
+ if (is_last_new) {
+ uint32_t depth = 1;
+ for (struct json_token *iter = parent->token.parent;
+ iter != &format->tree.root; iter = iter->parent, ++depth) {
+ struct tuple_field *record =
+ json_tree_entry(iter, struct tuple_field,
+ token);
+ record->subtree_depth =
+ MAX(record->subtree_depth, depth);
+ }
+ }
+end:
+ tuple_field_destroy(field);
+ return parent;
+}
+
static int
tuple_format_use_key_part(struct tuple_format *format,
const struct field_def *fields, uint32_t field_count,
const struct key_part *part, bool is_sequential,
- int *current_slot)
+ int *current_slot, char **path_data)
{
assert(part->fieldno < tuple_format_field_count(format));
struct tuple_field *field = tuple_format_field(format, part->fieldno);
+ if (unlikely(part->path != NULL)) {
+ assert(!is_sequential);
+ /**
+ * Copy JSON path data to reserved area at the
+ * end of format allocation.
+ */
+ memcpy(*path_data, part->path, part->path_len);
+ (*path_data)[part->path_len] = '\0';
+ struct tuple_field *root = field;
+ field = tuple_field_tree_add_path(format, *path_data,
+ part->path_len,
+ part->fieldno);
+ if (field == NULL)
+ return -1;
+ format->subtree_depth =
+ MAX(format->subtree_depth, root->subtree_depth + 1);
+ field->is_key_part = true;
+ *path_data += part->path_len + 1;
+ }
/*
* If a field is not present in the space format,
* inherit nullable action of the first key part
@@ -113,7 +215,10 @@ tuple_format_use_key_part(struct tuple_format *format,
field->type)) {
const char *name;
int fieldno = part->fieldno + TUPLE_INDEX_BASE;
- if (part->fieldno >= field_count) {
+ if (unlikely(part->path != NULL)) {
+ name = tt_sprintf("[%d]%.*s", fieldno, part->path_len,
+ part->path);
+ } else if (part->fieldno >= field_count) {
name = tt_sprintf("%d", fieldno);
} else {
const struct field_def *def =
@@ -137,10 +242,9 @@ tuple_format_use_key_part(struct tuple_format *format,
* simply accessible, so we don't store an offset for it.
*/
if (field->offset_slot == TUPLE_OFFSET_SLOT_NIL &&
- is_sequential == false && part->fieldno > 0) {
- *current_slot = *current_slot - 1;
- field->offset_slot = *current_slot;
- }
+ is_sequential == false &&
+ (part->fieldno > 0 || part->path != NULL))
+ field->offset_slot = (*current_slot = *current_slot - 1);
return 0;
}
@@ -181,7 +285,7 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
}
int current_slot = 0;
-
+ char *paths_data = (char *)format + sizeof(struct tuple_format);
/* extract field type info */
for (uint16_t key_no = 0; key_no < key_count; ++key_no) {
const struct key_def *key_def = keys[key_no];
@@ -193,7 +297,8 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
if (tuple_format_use_key_part(format, fields,
field_count, part,
is_sequential,
- ¤t_slot) != 0)
+ ¤t_slot,
+ &paths_data) != 0)
return -1;
}
}
@@ -261,6 +366,8 @@ static struct tuple_format *
tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
uint32_t space_field_count, struct tuple_dictionary *dict)
{
+ /* Size of area to store paths. */
+ uint32_t paths_size = 0;
uint32_t index_field_count = 0;
/* find max max field no */
for (uint16_t key_no = 0; key_no < key_count; ++key_no) {
@@ -270,13 +377,16 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
for (; part < pend; part++) {
index_field_count = MAX(index_field_count,
part->fieldno + 1);
+ if (part->path != NULL)
+ paths_size += part->path_len + 1;
}
}
uint32_t field_count = MAX(space_field_count, index_field_count);
- struct tuple_format *format = malloc(sizeof(struct tuple_format));
+ uint32_t allocation_size = sizeof(struct tuple_format) + paths_size;
+ struct tuple_format *format = malloc(allocation_size);
if (format == NULL) {
- diag_set(OutOfMemory, sizeof(struct tuple_format), "malloc",
+ diag_set(OutOfMemory, allocation_size, "malloc",
"tuple format");
return NULL;
}
@@ -284,6 +394,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
free(format);
return NULL;
}
+ format->subtree_depth = 1;
struct json_token token;
memset(&token, 0, sizeof(token));
token.key.type = JSON_TOKEN_NUM;
@@ -306,6 +417,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
format->dict = dict;
tuple_dictionary_ref(dict);
}
+ format->allocation_size = allocation_size;
format->refs = 0;
format->id = FORMAT_ID_NIL;
format->index_field_count = index_field_count;
@@ -377,16 +489,37 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
{
if (format1->exact_field_count != format2->exact_field_count)
return false;
- uint32_t format1_field_count = tuple_format_field_count(format1);
- uint32_t format2_field_count = tuple_format_field_count(format2);
- for (uint32_t i = 0; i < format1_field_count; ++i) {
- const struct tuple_field *field1 =
- tuple_format_field(format1, i);
+ struct tuple_field *field1;
+ struct json_token *field2_prev_token = NULL;
+ struct json_token *skip_root_token = NULL;
+ struct json_token *field1_prev_token = &format1->tree.root;
+ json_tree_foreach_entry_preorder(field1, &format1->tree.root,
+ struct tuple_field, token) {
+ /* Test if subtree skip is required. */
+ if (skip_root_token != NULL) {
+ struct json_token *tmp = &field1->token;
+ while (tmp->parent != NULL &&
+ tmp->parent != skip_root_token)
+ tmp = tmp->parent;
+ if (tmp->parent == skip_root_token)
+ continue;
+ }
+ skip_root_token = NULL;
+ /* Lookup for a valid parent node in new tree. */
+ while (field1_prev_token != field1->token.parent) {
+ field1_prev_token = field1_prev_token->parent;
+ field2_prev_token = field2_prev_token->parent;
+ assert(field1_prev_token != NULL);
+ }
+ struct tuple_field *field2 =
+ json_tree_lookup_entry(&format2->tree, field2_prev_token,
+ &field1->token,
+ struct tuple_field, token);
/*
* The field has a data type in format1, but has
* no data type in format2.
*/
- if (i >= format2_field_count) {
+ if (field2 == NULL) {
/*
* The field can get a name added
* for it, and this doesn't require a data
@@ -397,13 +530,13 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
* NULLs or miss the subject field.
*/
if (field1->type == FIELD_TYPE_ANY &&
- tuple_field_is_nullable(field1))
+ tuple_field_is_nullable(field1)) {
+ skip_root_token = &field1->token;
continue;
- else
+ } else {
return false;
+ }
}
- const struct tuple_field *field2 =
- tuple_format_field(format2, i);
if (! field_type1_contains_type2(field1->type, field2->type))
return false;
/*
@@ -413,10 +546,82 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
if (tuple_field_is_nullable(field2) &&
!tuple_field_is_nullable(field1))
return false;
+
+ field2_prev_token = &field2->token;
+ field1_prev_token = &field1->token;
}
return true;
}
+/** Find a field in format by offset slot. */
+static struct tuple_field *
+tuple_field_by_offset_slot(const struct tuple_format *format,
+ int32_t offset_slot)
+{
+ struct tuple_field *field;
+ struct json_token *root = (struct json_token *)&format->tree.root;
+ json_tree_foreach_entry_preorder(field, root, struct tuple_field,
+ token) {
+ if (field->offset_slot == offset_slot)
+ return field;
+ }
+ return NULL;
+}
+
+/**
+ * Verify field_map and raise error on some indexed field has
+ * not been initialized. Routine rely on field_map has been
+ * initialized with UINT32_MAX marker before field_map
+ * initialization.
+ */
+static int
+tuple_field_map_validate(const struct tuple_format *format, uint32_t *field_map)
+{
+ struct json_token *tree_node = (struct json_token *)&format->tree.root;
+ /* Lookup for absent not-nullable fields. */
+ int32_t field_map_items =
+ (int32_t)(format->field_map_size/sizeof(field_map[0]));
+ for (int32_t i = -1; i >= -field_map_items; i--) {
+ if (field_map[i] != UINT32_MAX)
+ continue;
+
+ struct tuple_field *field =
+ tuple_field_by_offset_slot(format, i);
+ assert(field != NULL);
+ /* Lookup for field number in tree. */
+ struct json_token *parent = &field->token;
+ while (parent->parent != &format->tree.root)
+ parent = parent->parent;
+ assert(parent->key.type == JSON_TOKEN_NUM);
+ uint32_t fieldno = parent->key.num;
+
+ tree_node = &field->token;
+ const char *err_msg;
+ if (field->token.key.type == JSON_TOKEN_STR) {
+ err_msg = tt_sprintf("invalid field %d document "
+ "content: map doesn't contain a "
+ "key '%.*s' defined in index",
+ fieldno, tree_node->key.len,
+ tree_node->key.str);
+ } else if (field->token.key.type == JSON_TOKEN_NUM) {
+ err_msg = tt_sprintf("invalid field %d document "
+ "content: array size %d is less "
+ "than size %d defined in index",
+ fieldno, tree_node->key.num,
+ tree_node->parent->child_count);
+ }
+ diag_set(ClientError, ER_DATA_STRUCTURE_MISMATCH, err_msg);
+ return -1;
+ }
+ return 0;
+}
+
+struct parse_ctx {
+ enum json_token_type child_type;
+ uint32_t items;
+ uint32_t curr;
+};
+
/** @sa declaration for details. */
int
tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
@@ -442,44 +647,123 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
(unsigned) format->min_field_count);
return -1;
}
-
- /* first field is simply accessible, so we do not store offset to it */
- enum mp_type mp_type = mp_typeof(*pos);
- const struct tuple_field *field =
- tuple_format_field((struct tuple_format *)format, 0);
- if (validate &&
- key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE,
- TUPLE_INDEX_BASE, tuple_field_is_nullable(field)))
- return -1;
- mp_next(&pos);
- /* other fields...*/
- uint32_t i = 1;
uint32_t defined_field_count = MIN(field_count, validate ?
tuple_format_field_count(format) :
format->index_field_count);
- if (field_count < format->index_field_count) {
- /*
- * Nullify field map to be able to detect by 0,
- * which key fields are absent in tuple_field().
- */
- memset((char *)field_map - format->field_map_size, 0,
- format->field_map_size);
- }
- for (; i < defined_field_count; ++i) {
- field = tuple_format_field((struct tuple_format *)format, i);
- mp_type = mp_typeof(*pos);
- if (validate &&
- key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE,
- i + TUPLE_INDEX_BASE,
- tuple_field_is_nullable(field)))
- return -1;
- if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
- field_map[field->offset_slot] =
- (uint32_t) (pos - tuple);
+ /*
+ * Fill field_map with marker for toutine
+ * tuple_field_map_validate to detect absent fields.
+ */
+ memset((char *)field_map - format->field_map_size,
+ validate ? UINT32_MAX : 0, format->field_map_size);
+
+ struct region *region = &fiber()->gc;
+ uint32_t mp_stack_items = format->subtree_depth + 1;
+ uint32_t mp_stack_size = mp_stack_items * sizeof(struct parse_ctx);
+ struct parse_ctx *mp_stack = region_alloc(region, mp_stack_size);
+ if (unlikely(mp_stack == NULL)) {
+ diag_set(OutOfMemory, mp_stack_size, "region_alloc",
+ "mp_stack");
+ return -1;
+ }
+ mp_stack[0] = (struct parse_ctx){
+ .child_type = JSON_TOKEN_NUM,
+ .items = defined_field_count,
+ .curr = 0,
+ };
+ uint32_t mp_stack_idx = 0;
+ struct json_tree *tree = (struct json_tree *)&format->tree;
+ struct json_token *parent = &tree->root;
+ while (mp_stack[0].curr <= mp_stack[0].items) {
+ /* Prepare key for tree lookup. */
+ struct json_token token;
+ token.key.type = mp_stack[mp_stack_idx].child_type;
+ ++mp_stack[mp_stack_idx].curr;
+ if (token.key.type == JSON_TOKEN_NUM) {
+ token.key.num = mp_stack[mp_stack_idx].curr;
+ } else if (token.key.type == JSON_TOKEN_STR) {
+ if (mp_typeof(*pos) != MP_STR) {
+ /*
+ * We do not support non-string
+ * keys in maps.
+ */
+ mp_next(&pos);
+ mp_next(&pos);
+ continue;
+ }
+ token.key.str =
+ mp_decode_str(&pos, (uint32_t *)&token.key.len);
+ } else {
+ unreachable();
+ }
+ struct tuple_field *field =
+ json_tree_lookup_entry(tree, parent, &token,
+ struct tuple_field, token);
+ enum mp_type type = mp_typeof(*pos);
+ if (field != NULL) {
+ bool is_nullable = tuple_field_is_nullable(field);
+ if (validate &&
+ key_mp_type_validate(field->type, type,
+ ER_FIELD_TYPE,
+ mp_stack[0].curr,
+ is_nullable) != 0)
+ return -1;
+ if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
+ field_map[field->offset_slot] =
+ (uint32_t)(pos - tuple);
+ }
+ }
+ /* Prepare stack info for next iteration. */
+ if (field != NULL && type == MP_ARRAY &&
+ mp_stack_idx + 1 < format->subtree_depth) {
+ uint32_t size = mp_decode_array(&pos);
+ if (unlikely(size == 0))
+ continue;
+ parent = &field->token;
+ mp_stack[++mp_stack_idx] = (struct parse_ctx){
+ .child_type = JSON_TOKEN_NUM,
+ .items = size,
+ .curr = 0,
+ };
+ } else if (field != NULL && type == MP_MAP &&
+ mp_stack_idx + 1 < format->subtree_depth) {
+ uint32_t size = mp_decode_map(&pos);
+ if (unlikely(size == 0))
+ continue;
+ parent = &field->token;
+ mp_stack[++mp_stack_idx] = (struct parse_ctx){
+ .child_type = JSON_TOKEN_STR,
+ .items = size,
+ .curr = 0,
+ };
+ } else {
+ mp_next(&pos);
+ while (mp_stack[mp_stack_idx].curr >=
+ mp_stack[mp_stack_idx].items) {
+ assert(parent != NULL);
+ parent = parent->parent;
+ if (mp_stack_idx-- == 0)
+ goto end;
+ }
}
- mp_next(&pos);
+ };
+end:;
+ /*
+ * Field map has already been initialized with zeros when
+ * no validation is required.
+ */
+ if (!validate)
+ return 0;
+ struct tuple_field *field;
+ struct json_token *root = (struct json_token *)&format->tree.root;
+ json_tree_foreach_entry_preorder(field, root, struct tuple_field,
+ token) {
+ if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL &&
+ tuple_field_is_nullable(field) &&
+ field_map[field->offset_slot] == UINT32_MAX)
+ field_map[field->offset_slot] = 0;
}
- return 0;
+ return tuple_field_map_validate(format, field_map);
}
uint32_t
@@ -617,15 +901,7 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
return -1;
}
-/**
- * Retrieve msgpack data by JSON path.
- * @param data Pointer to msgpack with data.
- * @param path The path to process.
- * @param path_len The length of the @path.
- * @retval 0 On success.
- * @retval >0 On path parsing error, invalid character position.
- */
-static int
+int
tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len)
{
int rc;
@@ -731,3 +1007,40 @@ error:
tt_sprintf("error in path on position %d", rc));
return -1;
}
+
+const char *
+tuple_field_by_part_raw(struct tuple_format *format, const char *data,
+ const uint32_t *field_map, struct key_part *part)
+{
+ if (likely(part->path == NULL))
+ return tuple_field_raw(format, data, field_map, part->fieldno);
+
+ uint32_t field_count = tuple_format_field_count(format);
+ struct tuple_field *root_field =
+ likely(part->fieldno < field_count) ?
+ tuple_format_field(format, part->fieldno) : NULL;
+ struct tuple_field *field =
+ unlikely(root_field == NULL) ? NULL:
+ tuple_format_field_by_path(format, root_field, part->path,
+ part->path_len);
+ if (unlikely(field == NULL)) {
+ /*
+ * Legacy tuple having no field map for JSON
+ * index require full path parse.
+ */
+ const char *field_raw =
+ tuple_field_raw(format, data, field_map, part->fieldno);
+ if (unlikely(field_raw == NULL))
+ return NULL;
+ if (tuple_field_go_to_path(&field_raw, part->path,
+ part->path_len) != 0)
+ return NULL;
+ return field_raw;
+ }
+ int32_t offset_slot = field->offset_slot;
+ assert(offset_slot < 0);
+ assert(-offset_slot * sizeof(uint32_t) <= format->field_map_size);
+ if (unlikely(field_map[offset_slot] == 0))
+ return NULL;
+ return data + field_map[offset_slot];
+}
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index 2da773b..860f052 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -116,6 +116,8 @@ struct tuple_field {
uint32_t coll_id;
/** An JSON entry to organize tree. */
struct json_token token;
+ /** A maximum depth of field subtree. */
+ uint32_t subtree_depth;
};
/**
@@ -169,12 +171,16 @@ struct tuple_format {
* index_field_count <= min_field_count <= field_count.
*/
uint32_t min_field_count;
+ /** Size of format allocation. */
+ uint32_t allocation_size;
/**
* Shared names storage used by all formats of a space.
*/
struct tuple_dictionary *dict;
/** JSON tree of fields. */
struct json_tree tree;
+ /** A maximum depth of fields subtree. */
+ uint32_t subtree_depth;
};
@@ -196,6 +202,17 @@ tuple_format_field(struct tuple_format *format, uint32_t fieldno)
struct tuple_field, token);
}
+static inline struct tuple_field *
+tuple_format_field_by_path(struct tuple_format *format,
+ struct tuple_field *root, const char *path,
+ uint32_t path_len)
+{
+ return json_tree_lookup_path_entry(&format->tree, &root->token,
+ path, path_len, struct tuple_field,
+ token);
+}
+
+
extern struct tuple_format **tuple_formats;
static inline uint32_t
@@ -397,6 +414,18 @@ tuple_field_raw_by_name(struct tuple_format *format, const char *tuple,
}
/**
+ * Retrieve msgpack data by JSON path.
+ * @param data Pointer to msgpack with data.
+ * @param path The path to process.
+ * @param path_len The length of the @path.
+ * @retval 0 On success.
+ * @retval >0 On path parsing error, invalid character position.
+ */
+int
+tuple_field_go_to_path(const char **data, const char *path,
+ uint32_t path_len);
+
+/**
* Get tuple field by its path.
* @param format Tuple format.
* @param tuple MessagePack tuple's body.
@@ -423,12 +452,9 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
* @param part Index part to use.
* @retval Field data if the field exists or NULL.
*/
-static inline const char *
-tuple_field_by_part_raw(const struct tuple_format *format, const char *data,
- const uint32_t *field_map, struct key_part *part)
-{
- return tuple_field_raw(format, data, field_map, part->fieldno);
-}
+const char *
+tuple_field_by_part_raw(struct tuple_format *format, const char *data,
+ const uint32_t *field_map, struct key_part *part);
#if defined(__cplusplus)
} /* extern "C" */
diff --git a/src/box/tuple_hash.cc b/src/box/tuple_hash.cc
index b394804..3486ce1 100644
--- a/src/box/tuple_hash.cc
+++ b/src/box/tuple_hash.cc
@@ -222,7 +222,7 @@ key_hash_slowpath(const char *key, struct key_def *key_def);
void
tuple_hash_func_set(struct key_def *key_def) {
- if (key_def->is_nullable)
+ if (key_def->is_nullable || key_def->has_json_paths)
goto slowpath;
/*
* Check that key_def defines sequential a key without holes
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index ce81c6a..3c9fbf8 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -982,6 +982,9 @@ vinyl_index_def_change_requires_rebuild(struct index *index,
return true;
if (!field_type1_contains_type2(new_part->type, old_part->type))
return true;
+ if (json_path_cmp(old_part->path, old_part->path_len,
+ new_part->path, new_part->path_len) != 0)
+ return true;
}
return false;
}
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index 8a8f9d7..0550144 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -711,7 +711,8 @@ vy_log_record_dup(struct region *pool, const struct vy_log_record *src)
"struct key_part_def");
goto err;
}
- key_def_dump_parts(src->key_def, dst->key_parts);
+ if (key_def_dump_parts(pool, src->key_def, dst->key_parts) != 0)
+ goto err;
dst->key_part_count = src->key_def->part_count;
dst->key_def = NULL;
}
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index 7b704b8..9d5e220 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -196,8 +196,6 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx,
const struct vy_read_view **rv,
struct tuple *key, struct tuple **ret)
{
- assert(tuple_field_count(key) >= lsm->cmp_def->part_count);
-
*ret = NULL;
double start_time = ev_monotonic_now(loop());
int rc = 0;
diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
index 3e60fec..2f35284 100644
--- a/src/box/vy_stmt.c
+++ b/src/box/vy_stmt.c
@@ -29,6 +29,7 @@
* SUCH DAMAGE.
*/
+#include "assoc.h"
#include "vy_stmt.h"
#include <stdlib.h>
@@ -370,6 +371,85 @@ vy_stmt_replace_from_upsert(const struct tuple *upsert)
return replace;
}
+/**
+ * Construct tuple or calculate it's size. The fields_iov_ht
+ * is a hashtable that links leaf field records of field path
+ * tree and iovs that contain raw data. Function also fills the
+ * tuple field_map when write_data flag is set true.
+ */
+static void
+vy_stmt_tuple_restore_raw(struct tuple_format *format, char *tuple_raw,
+ uint32_t *field_map, char **offset,
+ struct mh_i64ptr_t *fields_iov_ht, bool write_data)
+{
+ struct tuple_field *prev = NULL;
+ struct tuple_field *curr;
+ json_tree_foreach_entry_preorder(curr, &format->tree.root,
+ struct tuple_field, token) {
+ struct json_token *curr_node = &curr->token;
+ struct tuple_field *parent =
+ curr_node->parent == NULL ? NULL :
+ json_tree_entry(curr_node->parent, struct tuple_field,
+ token);
+ if (parent != NULL && parent->type == FIELD_TYPE_ARRAY &&
+ curr_node->sibling_idx > 0) {
+ /*
+ * Fill unindexed array items with nulls.
+ * Gaps size calculated as a difference
+ * between sibling nodes.
+ */
+ for (uint32_t i = curr_node->sibling_idx - 1;
+ curr_node->parent->children[i] == NULL &&
+ i > 0; i--) {
+ *offset = !write_data ?
+ (*offset += mp_sizeof_nil()) :
+ mp_encode_nil(*offset);
+ }
+ } else if (parent != NULL && parent->type == FIELD_TYPE_MAP) {
+ /* Set map key. */
+ const char *str = curr_node->key.str;
+ uint32_t len = curr_node->key.len;
+ *offset = !write_data ?
+ (*offset += mp_sizeof_str(len)) :
+ mp_encode_str(*offset, str, len);
+ }
+ /* Fill data. */
+ uint32_t children_count = curr_node->child_count;
+ if (curr->type == FIELD_TYPE_ARRAY) {
+ *offset = !write_data ?
+ (*offset += mp_sizeof_array(children_count)) :
+ mp_encode_array(*offset, children_count);
+ } else if (curr->type == FIELD_TYPE_MAP) {
+ *offset = !write_data ?
+ (*offset += mp_sizeof_map(children_count)) :
+ mp_encode_map(*offset, children_count);
+ } else {
+ /* Leaf record. */
+ mh_int_t k = mh_i64ptr_find(fields_iov_ht,
+ (uint64_t)curr, NULL);
+ struct iovec *iov =
+ k != mh_end(fields_iov_ht) ?
+ mh_i64ptr_node(fields_iov_ht, k)->val : NULL;
+ if (iov == NULL) {
+ *offset = !write_data ?
+ (*offset += mp_sizeof_nil()) :
+ mp_encode_nil(*offset);
+ } else {
+ uint32_t data_offset = *offset - tuple_raw;
+ int32_t slot = curr->offset_slot;
+ if (write_data) {
+ memcpy(*offset, iov->iov_base,
+ iov->iov_len);
+ if (slot != TUPLE_OFFSET_SLOT_NIL)
+ field_map[slot] = data_offset;
+ }
+ *offset += iov->iov_len;
+ }
+ }
+ prev = curr;
+ }
+}
+
static struct tuple *
vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type,
const struct key_def *cmp_def,
@@ -378,51 +458,79 @@ vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type,
/* UPSERT can't be surrogate. */
assert(type != IPROTO_UPSERT);
struct region *region = &fiber()->gc;
+ struct tuple *stmt = NULL;
uint32_t field_count = format->index_field_count;
- struct iovec *iov = region_alloc(region, sizeof(*iov) * field_count);
+ uint32_t part_count = mp_decode_array(&key);
+ assert(part_count == cmp_def->part_count);
+ struct iovec *iov = region_alloc(region, sizeof(*iov) * part_count);
if (iov == NULL) {
- diag_set(OutOfMemory, sizeof(*iov) * field_count,
- "region", "iov for surrogate key");
+ diag_set(OutOfMemory, sizeof(*iov) * part_count, "region",
+ "iov for surrogate key");
return NULL;
}
- memset(iov, 0, sizeof(*iov) * field_count);
- uint32_t part_count = mp_decode_array(&key);
- assert(part_count == cmp_def->part_count);
- assert(part_count <= field_count);
- uint32_t nulls_count = field_count - cmp_def->part_count;
- uint32_t bsize = mp_sizeof_array(field_count) +
- mp_sizeof_nil() * nulls_count;
- for (uint32_t i = 0; i < part_count; ++i) {
- const struct key_part *part = &cmp_def->parts[i];
+ /* Hastable linking leaf field and corresponding iov. */
+ struct mh_i64ptr_t *fields_iov_ht = mh_i64ptr_new();
+ if (fields_iov_ht == NULL) {
+ diag_set(OutOfMemory, sizeof(struct mh_i64ptr_t),
+ "mh_i64ptr_new", "fields_iov_ht");
+ return NULL;
+ }
+ if (mh_i64ptr_reserve(fields_iov_ht, part_count, NULL) != 0) {
+ diag_set(OutOfMemory, part_count, "mh_i64ptr_reserve",
+ "fields_iov_ht");
+ goto end;
+ }
+ memset(iov, 0, sizeof(*iov) * part_count);
+ const struct key_part *part = cmp_def->parts;
+ for (uint32_t i = 0; i < part_count; ++i, ++part) {
assert(part->fieldno < field_count);
const char *svp = key;
- iov[part->fieldno].iov_base = (char *) key;
+ iov[i].iov_base = (char *) key;
mp_next(&key);
- iov[part->fieldno].iov_len = key - svp;
- bsize += key - svp;
+ iov[i].iov_len = key - svp;
+ struct tuple_field *field;
+ field = tuple_format_field(format, part->fieldno);
+ assert(field != NULL);
+ if (unlikely(part->path != NULL)) {
+ field = tuple_format_field_by_path(format, field,
+ part->path,
+ part->path_len);
+ }
+ assert(field != NULL);
+ struct mh_i64ptr_node_t node = {(uint64_t)field, &iov[i]};
+ mh_int_t k = mh_i64ptr_put(fields_iov_ht, &node, NULL, NULL);
+ if (unlikely(k == mh_end(fields_iov_ht))) {
+ diag_set(OutOfMemory, part_count, "mh_i64ptr_put",
+ "fields_iov_ht");
+ goto end;
+ }
+ k = mh_i64ptr_find(fields_iov_ht, (uint64_t)field, NULL);
+ assert(k != mh_end(fields_iov_ht));
}
+ /* Calculate tuple size to make allocation. */
+ char *data = NULL;
+ vy_stmt_tuple_restore_raw(format, NULL, NULL, &data, fields_iov_ht,
+ false);
+ uint32_t bsize = mp_sizeof_array(field_count) + data - (char *)NULL;
- struct tuple *stmt = vy_stmt_alloc(format, bsize);
+ stmt = vy_stmt_alloc(format, bsize);
if (stmt == NULL)
- return NULL;
+ goto end;
+ /* Construct tuple. */
char *raw = (char *) tuple_data(stmt);
uint32_t *field_map = (uint32_t *) raw;
+ memset((char *)field_map - format->field_map_size, 0,
+ format->field_map_size);
char *wpos = mp_encode_array(raw, field_count);
- for (uint32_t i = 0; i < field_count; ++i) {
- const struct tuple_field *field = tuple_format_field(format, i);
- if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
- field_map[field->offset_slot] = wpos - raw;
- if (iov[i].iov_base == NULL) {
- wpos = mp_encode_nil(wpos);
- } else {
- memcpy(wpos, iov[i].iov_base, iov[i].iov_len);
- wpos += iov[i].iov_len;
- }
- }
- assert(wpos == raw + bsize);
+ vy_stmt_tuple_restore_raw(format, raw, field_map, &wpos, fields_iov_ht,
+ true);
+
+ assert(wpos <= raw + bsize);
vy_stmt_set_type(stmt, type);
+end:
+ mh_i64ptr_delete(fields_iov_ht);
return stmt;
}
diff --git a/test/box/misc.result b/test/box/misc.result
index 9f863d9..97070f3 100644
--- a/test/box/misc.result
+++ b/test/box/misc.result
@@ -415,6 +415,7 @@ t;
83: box.error.ROLE_EXISTS
84: box.error.CREATE_ROLE
85: box.error.INDEX_EXISTS
+ 86: box.error.DATA_STRUCTURE_MISMATCH
87: box.error.ROLE_LOOP
88: box.error.GRANT
89: box.error.PRIV_GRANTED
diff --git a/test/engine/tuple.result b/test/engine/tuple.result
index 35c700e..322821e 100644
--- a/test/engine/tuple.result
+++ b/test/engine/tuple.result
@@ -954,6 +954,422 @@ type(tuple:tomap().fourth)
s:drop()
---
...
+--
+-- gh-1012: Indexes for JSON-defined paths.
+--
+box.cfg()
+---
+...
+s = box.schema.space.create('withdata', {engine = engine})
+---
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}})
+---
+- error: 'Can''t create or modify index ''test1'' in space ''withdata'': same key
+ part is indexed twice'
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}})
+---
+- error: 'Wrong index options (field 2): ''path'' must be string'
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = 'FIO'}}})
+---
+- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type
+ ''map'' is not supported'
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}})
+---
+- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type
+ ''array'' is not supported'
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO'}, {3, 'str', path = '["FIO"].fname'}}})
+---
+- error: Field [2]["FIO"].fname has type 'string' in one index, but type 'map' in
+ another
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}})
+---
+- error: Field [2]["FIO"].fname has type 'array' in one index, but type 'map' in another
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO....fname'}}})
+---
+- error: 'Wrong index options (field 3): invalid JSON path ''FIO....fname'': path
+ has invalid structure (error at position 5)'
+...
+idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO.fname'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+---
+...
+assert(idx ~= nil)
+---
+- true
+...
+assert(idx.parts[2].path == "FIO.fname")
+---
+- true
+...
+s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5}
+---
+- error: 'Tuple field 3 type does not match one required by operation: expected map'
+...
+s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5}
+---
+- error: 'Tuple field 3 type does not match one required by operation: expected string'
+...
+s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5}
+---
+- error: 'Tuple doesn''t math document structure: invalid field 3 document content:
+ map doesn''t contain a key ''sname'' defined in index'
+...
+s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+---
+- [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+---
+- error: Duplicate key exists in unique index 'test1' in space 'withdata'
+...
+s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5}
+---
+- error: Duplicate key exists in unique index 'test1' in space 'withdata'
+...
+s:insert{7, 7, {town = 'Moscow', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5}
+---
+- [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}},
+ 4, 5]
+...
+idx:select()
+---
+- - [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+ - [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}},
+ 4, 5]
+...
+idx:min()
+---
+- [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+idx:max()
+---
+- [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}},
+ 4, 5]
+...
+s:drop()
+---
+...
+s = box.schema.create_space('withdata', {engine = engine})
+---
+...
+parts = {}
+---
+...
+parts[1] = {1, 'unsigned', path='[2]'}
+---
+...
+pk = s:create_index('pk', {parts = parts})
+---
+...
+s:insert{{1, 2}, 3}
+---
+- [[1, 2], 3]
+...
+s:upsert({{box.null, 2}}, {{'+', 2, 5}})
+---
+...
+s:get(2)
+---
+- [[1, 2], 8]
+...
+s:drop()
+---
+...
+-- Create index on space with data
+s = box.schema.space.create('withdata', {engine = engine})
+---
+...
+pk = s:create_index('primary', { type = 'tree' })
+---
+...
+s:insert{1, 7, {town = 'London', FIO = 1234}, 4, 5}
+---
+- [1, 7, {'town': 'London', 'FIO': 1234}, 4, 5]
+...
+s:insert{2, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+---
+- [2, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+s:insert{3, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+---
+- [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+s:insert{4, 7, {town = 'London', FIO = {1,2,3}}, 4, 5}
+---
+- [4, 7, {'town': 'London', 'FIO': [1, 2, 3]}, 4, 5]
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+---
+- error: 'Tuple field 3 type does not match one required by operation: expected map'
+...
+_ = s:delete(1)
+---
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+---
+- error: Duplicate key exists in unique index 'test1' in space 'withdata'
+...
+_ = s:delete(2)
+---
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+---
+- error: 'Tuple field 3 type does not match one required by operation: expected map'
+...
+_ = s:delete(4)
+---
+...
+idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]', is_nullable = true}, {3, 'str', path = '["FIO"]["sname"]'}, {3, 'str', path = '["FIO"]["extra"]', is_nullable = true}}})
+---
+...
+assert(idx ~= nil)
+---
+- true
+...
+s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '["FIO"]["fname"]'}}})
+---
+- error: Field [3]["FIO"]["fname"] has type 'string' in one index, but type 'number'
+ in another
+...
+idx2 = s:create_index('test2', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}}})
+---
+...
+assert(idx2 ~= nil)
+---
+- true
+...
+t = s:insert{5, 7, {town = 'Matrix', FIO = {fname = 'Agent', sname = 'Smith'}}, 4, 5}
+---
+...
+idx:select()
+---
+- - [5, 7, {'town': 'Matrix', 'FIO': {'fname': 'Agent', 'sname': 'Smith'}}, 4, 5]
+ - [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+idx:min()
+---
+- [5, 7, {'town': 'Matrix', 'FIO': {'fname': 'Agent', 'sname': 'Smith'}}, 4, 5]
+...
+idx:max()
+---
+- [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+idx:drop()
+---
+...
+s:drop()
+---
+...
+-- Test complex JSON indexes
+s = box.schema.space.create('withdata', {engine = engine})
+---
+...
+parts = {}
+---
+...
+parts[1] = {1, 'str', path='[3][2].a'}
+---
+...
+parts[2] = {1, 'unsigned', path = '[3][1]'}
+---
+...
+parts[3] = {2, 'str', path = '[2].d[1]'}
+---
+...
+pk = s:create_index('primary', { type = 'tree', parts = parts})
+---
+...
+s:insert{{1, 2, {3, {3, a = 'str', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {1, 2, 3}}
+---
+- [[1, 2, [3, {1: 3, 'a': 'str', 'b': 5}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6,
+ [1, 2, 3]]
+...
+s:insert{{1, 2, {3, {a = 'str', b = 1}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6}
+---
+- error: Duplicate key exists in unique index 'primary' in space 'withdata'
+...
+parts = {}
+---
+...
+parts[1] = {4, 'unsigned', path='[1]', is_nullable = false}
+---
+...
+parts[2] = {4, 'unsigned', path='[2]', is_nullable = true}
+---
+...
+parts[3] = {4, 'unsigned', path='[4]', is_nullable = true}
+---
+...
+trap_idx = s:create_index('trap', { type = 'tree', parts = parts})
+---
+...
+s:insert{{1, 2, {3, {3, a = 'str2', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {}}
+---
+- error: 'Tuple doesn''t math document structure: invalid field 4 document content:
+ array size 1 is less than size 4 defined in index'
+...
+parts = {}
+---
+...
+parts[1] = {1, 'unsigned', path='[3][2].b' }
+---
+...
+parts[2] = {3, 'unsigned'}
+---
+...
+crosspart_idx = s:create_index('crosspart', { parts = parts})
+---
+...
+s:insert{{1, 2, {3, {a = 'str2', b = 2}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {9, 2, 3}}
+---
+- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9,
+ 2, 3]]
+...
+parts = {}
+---
+...
+parts[1] = {1, 'unsigned', path='[3][2].b'}
+---
+...
+num_idx = s:create_index('numeric', {parts = parts})
+---
+...
+s:insert{{1, 2, {3, {a = 'str3', b = 9}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {0}}
+---
+- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]]
+...
+num_idx:get(2)
+---
+- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9,
+ 2, 3]]
+...
+num_idx:select()
+---
+- - [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [
+ 9, 2, 3]]
+ - [[1, 2, [3, {1: 3, 'a': 'str', 'b': 5}]], ['c', {'d': ['e', 'f'], 'e': 'g'}],
+ 6, [1, 2, 3]]
+ - [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [
+ 0]]
+...
+num_idx:max()
+---
+- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]]
+...
+num_idx:min()
+---
+- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9,
+ 2, 3]]
+...
+assert(crosspart_idx:max() == num_idx:max())
+---
+- true
+...
+assert(crosspart_idx:min() == num_idx:min())
+---
+- true
+...
+trap_idx:max()
+---
+- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9,
+ 2, 3]]
+...
+trap_idx:min()
+---
+- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]]
+...
+s:drop()
+---
+...
+s = box.schema.space.create('withdata', {engine = engine})
+---
+...
+pk_simplified = s:create_index('primary', { type = 'tree', parts = {{1, 'unsigned'}}})
+---
+...
+assert(pk_simplified.path == box.NULL)
+---
+- true
+...
+idx = s:create_index('idx', {parts = {{2, 'integer', path = 'a'}}})
+---
+...
+s:insert{31, {a = 1, aa = -1}}
+---
+- [31, {'a': 1, 'aa': -1}]
+...
+s:insert{22, {a = 2, aa = -2}}
+---
+- [22, {'a': 2, 'aa': -2}]
+...
+s:insert{13, {a = 3, aa = -3}}
+---
+- [13, {'a': 3, 'aa': -3}]
+...
+idx:select()
+---
+- - [31, {'a': 1, 'aa': -1}]
+ - [22, {'a': 2, 'aa': -2}]
+ - [13, {'a': 3, 'aa': -3}]
+...
+idx:alter({parts = {{2, 'integer', path = 'aa'}}})
+---
+...
+idx:select()
+---
+- - [13, {'a': 3, 'aa': -3}]
+ - [22, {'a': 2, 'aa': -2}]
+ - [31, {'a': 1, 'aa': -1}]
+...
+s:drop()
+---
+...
+-- incompatible format change
+s = box.schema.space.create('test')
+---
+...
+i = s:create_index('pk', {parts = {{1, 'integer', path = '[1]'}}})
+---
+...
+s:insert{{-1}}
+---
+- [[-1]]
+...
+i:alter{parts = {{1, 'string', path = '[1]'}}}
+---
+- error: 'Tuple field 1 type does not match one required by operation: expected string'
+...
+s:insert{{'a'}}
+---
+- error: 'Tuple field 1 type does not match one required by operation: expected integer'
+...
+i:drop()
+---
+...
+i = s:create_index('pk', {parts = {{1, 'integer', path = '[1].FIO'}}})
+---
+...
+s:insert{{{FIO=-1}}}
+---
+- [[{'FIO': -1}]]
+...
+i:alter{parts = {{1, 'integer', path = '[1][1]'}}}
+---
+- error: 'Tuple field 1 type does not match one required by operation: expected array'
+...
+i:alter{parts = {{1, 'integer', path = '[1].FIO[1]'}}}
+---
+- error: 'Tuple field 1 type does not match one required by operation: expected array'
+...
+s:drop()
+---
+...
engine = nil
---
...
diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua
index edc3dab..d53ab42 100644
--- a/test/engine/tuple.test.lua
+++ b/test/engine/tuple.test.lua
@@ -312,5 +312,126 @@ tuple:tomap().fourth
type(tuple:tomap().fourth)
s:drop()
+--
+-- gh-1012: Indexes for JSON-defined paths.
+--
+box.cfg()
+s = box.schema.space.create('withdata', {engine = engine})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = 'FIO'}}})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO'}, {3, 'str', path = '["FIO"].fname'}}})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO....fname'}}})
+idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO.fname'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+assert(idx ~= nil)
+assert(idx.parts[2].path == "FIO.fname")
+s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5}
+s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5}
+s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5}
+s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5}
+s:insert{7, 7, {town = 'Moscow', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5}
+idx:select()
+idx:min()
+idx:max()
+s:drop()
+
+s = box.schema.create_space('withdata', {engine = engine})
+parts = {}
+parts[1] = {1, 'unsigned', path='[2]'}
+pk = s:create_index('pk', {parts = parts})
+s:insert{{1, 2}, 3}
+s:upsert({{box.null, 2}}, {{'+', 2, 5}})
+s:get(2)
+s:drop()
+
+-- Create index on space with data
+s = box.schema.space.create('withdata', {engine = engine})
+pk = s:create_index('primary', { type = 'tree' })
+s:insert{1, 7, {town = 'London', FIO = 1234}, 4, 5}
+s:insert{2, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+s:insert{3, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+s:insert{4, 7, {town = 'London', FIO = {1,2,3}}, 4, 5}
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+_ = s:delete(1)
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+_ = s:delete(2)
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+_ = s:delete(4)
+idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]', is_nullable = true}, {3, 'str', path = '["FIO"]["sname"]'}, {3, 'str', path = '["FIO"]["extra"]', is_nullable = true}}})
+assert(idx ~= nil)
+s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '["FIO"]["fname"]'}}})
+idx2 = s:create_index('test2', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}}})
+assert(idx2 ~= nil)
+t = s:insert{5, 7, {town = 'Matrix', FIO = {fname = 'Agent', sname = 'Smith'}}, 4, 5}
+idx:select()
+idx:min()
+idx:max()
+idx:drop()
+s:drop()
+
+-- Test complex JSON indexes
+s = box.schema.space.create('withdata', {engine = engine})
+parts = {}
+parts[1] = {1, 'str', path='[3][2].a'}
+parts[2] = {1, 'unsigned', path = '[3][1]'}
+parts[3] = {2, 'str', path = '[2].d[1]'}
+pk = s:create_index('primary', { type = 'tree', parts = parts})
+s:insert{{1, 2, {3, {3, a = 'str', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {1, 2, 3}}
+s:insert{{1, 2, {3, {a = 'str', b = 1}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6}
+parts = {}
+parts[1] = {4, 'unsigned', path='[1]', is_nullable = false}
+parts[2] = {4, 'unsigned', path='[2]', is_nullable = true}
+parts[3] = {4, 'unsigned', path='[4]', is_nullable = true}
+trap_idx = s:create_index('trap', { type = 'tree', parts = parts})
+s:insert{{1, 2, {3, {3, a = 'str2', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {}}
+parts = {}
+parts[1] = {1, 'unsigned', path='[3][2].b' }
+parts[2] = {3, 'unsigned'}
+crosspart_idx = s:create_index('crosspart', { parts = parts})
+s:insert{{1, 2, {3, {a = 'str2', b = 2}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {9, 2, 3}}
+parts = {}
+parts[1] = {1, 'unsigned', path='[3][2].b'}
+num_idx = s:create_index('numeric', {parts = parts})
+s:insert{{1, 2, {3, {a = 'str3', b = 9}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {0}}
+num_idx:get(2)
+num_idx:select()
+num_idx:max()
+num_idx:min()
+assert(crosspart_idx:max() == num_idx:max())
+assert(crosspart_idx:min() == num_idx:min())
+trap_idx:max()
+trap_idx:min()
+s:drop()
+
+s = box.schema.space.create('withdata', {engine = engine})
+pk_simplified = s:create_index('primary', { type = 'tree', parts = {{1, 'unsigned'}}})
+assert(pk_simplified.path == box.NULL)
+idx = s:create_index('idx', {parts = {{2, 'integer', path = 'a'}}})
+s:insert{31, {a = 1, aa = -1}}
+s:insert{22, {a = 2, aa = -2}}
+s:insert{13, {a = 3, aa = -3}}
+idx:select()
+idx:alter({parts = {{2, 'integer', path = 'aa'}}})
+idx:select()
+s:drop()
+
+-- incompatible format change
+s = box.schema.space.create('test')
+i = s:create_index('pk', {parts = {{1, 'integer', path = '[1]'}}})
+s:insert{{-1}}
+i:alter{parts = {{1, 'string', path = '[1]'}}}
+s:insert{{'a'}}
+i:drop()
+i = s:create_index('pk', {parts = {{1, 'integer', path = '[1].FIO'}}})
+s:insert{{{FIO=-1}}}
+i:alter{parts = {{1, 'integer', path = '[1][1]'}}}
+i:alter{parts = {{1, 'integer', path = '[1].FIO[1]'}}}
+s:drop()
+
+
engine = nil
test_run = nil
--
2.7.4
next prev parent reply other threads:[~2018-11-26 10:49 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-26 10:49 [PATCH v5 0/9] box: indexes by JSON path Kirill Shcherbatov
2018-11-26 10:49 ` [PATCH v5 1/9] box: refactor json_path_parser class Kirill Shcherbatov
2018-11-26 12:53 ` [tarantool-patches] " Kirill Shcherbatov
2018-11-29 15:39 ` Vladimir Davydov
2018-11-26 10:49 ` [PATCH v5 2/9] lib: implement JSON tree class for json library Kirill Shcherbatov
2018-11-26 12:53 ` [tarantool-patches] " Kirill Shcherbatov
2018-11-29 17:38 ` Vladimir Davydov
2018-11-29 17:50 ` Vladimir Davydov
2018-12-04 15:22 ` Vladimir Davydov
2018-12-04 15:47 ` [tarantool-patches] " Kirill Shcherbatov
2018-12-04 17:54 ` Vladimir Davydov
2018-12-05 8:37 ` Kirill Shcherbatov
2018-12-05 9:07 ` Vladimir Davydov
2018-12-05 9:52 ` Vladimir Davydov
2018-12-06 7:56 ` Kirill Shcherbatov
2018-12-06 7:56 ` [tarantool-patches] Re: [PATCH v5 2/9] lib: make index_base support for json_lexer Kirill Shcherbatov
2018-11-26 10:49 ` [PATCH v5 3/9] box: manage format fields with JSON tree class Kirill Shcherbatov
2018-11-29 19:07 ` Vladimir Davydov
2018-12-04 15:47 ` [tarantool-patches] " Kirill Shcherbatov
2018-12-04 16:09 ` Vladimir Davydov
2018-12-04 16:32 ` Kirill Shcherbatov
2018-12-05 8:37 ` Kirill Shcherbatov
2018-12-06 7:56 ` Kirill Shcherbatov
2018-12-06 8:06 ` Vladimir Davydov
2018-11-26 10:49 ` [PATCH v5 4/9] lib: introduce json_path_cmp routine Kirill Shcherbatov
2018-11-30 10:46 ` Vladimir Davydov
2018-12-03 17:37 ` [tarantool-patches] " Konstantin Osipov
2018-12-03 18:48 ` Vladimir Davydov
2018-12-03 20:14 ` Konstantin Osipov
2018-12-06 7:56 ` [tarantool-patches] Re: [PATCH v5 4/9] lib: introduce json_path_cmp, json_path_validate Kirill Shcherbatov
2018-11-26 10:49 ` Kirill Shcherbatov [this message]
2018-11-30 21:28 ` [tarantool-patches] [PATCH v5 5/9] box: introduce JSON indexes Vladimir Davydov
2018-12-01 16:49 ` Vladimir Davydov
2018-11-26 10:49 ` [PATCH v5 6/9] box: introduce has_json_paths flag in templates Kirill Shcherbatov
2018-11-26 10:49 ` [PATCH v5 7/9] box: tune tuple_field_raw_by_path for indexed data Kirill Shcherbatov
2018-12-01 17:20 ` Vladimir Davydov
2018-11-26 10:49 ` [PATCH v5 8/9] box: introduce offset slot cache in key_part Kirill Shcherbatov
2018-12-03 21:04 ` Vladimir Davydov
2018-12-04 15:51 ` Vladimir Davydov
2018-11-26 10:49 ` [PATCH v5 9/9] box: specify indexes in user-friendly form Kirill Shcherbatov
2018-12-04 12:22 ` Vladimir Davydov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f9059a9633c48f7c18ee787272ac3b43af8e7742.1543229303.git.kshcherbatov@tarantool.org \
--to=kshcherbatov@tarantool.org \
--cc=kostja@tarantool.org \
--cc=tarantool-patches@freelists.org \
--cc=vdavydov.dev@gmail.com \
--subject='Re: [tarantool-patches] [PATCH v5 5/9] box: introduce JSON indexes' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox