From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com
Cc: kostja@tarantool.org, Kirill Shcherbatov <kshcherbatov@tarantool.org>
Subject: [PATCH v7 1/5] box: introduce JSON Indexes
Date: Wed, 9 Jan 2019 11:29:36 +0300 [thread overview]
Message-ID: <90b211d01c5a7af0e5b3015c1a33b0b27d432ab0.1547022001.git.kshcherbatov@tarantool.org> (raw)
In-Reply-To: <cover.1547022001.git.kshcherbatov@tarantool.org>
New JSON indexes allows to index documents content.
At first, introduced new key_part fields path and path_len
representing JSON path string specified by user. Modified
tuple_format_use_key_part routine constructs corresponding
tuple_fields chain in tuple_format:fields tree to indexed data.
The resulting tree is used for type checking and for alloctating
indexed fields offset slots.
Refined tuple_init_field_map routine logic parses tuple msgpack
in depth using stack allocated on region and initialize field
map with corresponding tuple_format:field if any. This stack is
necessary as mp-container(map or array) length is specified at
the frame beginning, but this information is also required to
determine mp-container end.
The other essential feature is vinyl's secondary key restored by
key_part (stmt) extracted keys loaded from disc.
New tuple_format_stmt_encode would traverse tuple_format:fields
tree and construct vy_stmt data using iov's array to place data
blobs for indexed leafs.
Introduced vy_stmt_meta_size - precalculated stmt size as if all
leaf fields are zero. It allows allocate stmt chunk without extra
traversing a tree.
Example:
To create a new JSON index specify path to document data as a
part of key_part:
parts = {{3, 'str', path = '.FIO.fname', is_nullable = false}}
idx = s:create_index('json_idx', {parts = parse})
idx:select("Ivanov")
Part of #1012
---
src/box/alter.cc | 2 +-
src/box/index_def.c | 10 +-
src/box/key_def.c | 166 +++++++++++--
src/box/key_def.h | 33 ++-
src/box/lua/space.cc | 5 +
src/box/memtx_engine.c | 4 +
src/box/schema_def.h | 1 +
src/box/sql.c | 1 +
src/box/sql/build.c | 1 +
src/box/sql/select.c | 3 +-
src/box/sql/where.c | 1 +
src/box/tuple_compare.cc | 7 +-
| 26 +-
src/box/tuple_format.c | 463 +++++++++++++++++++++++++++++------
src/box/tuple_format.h | 70 +++++-
src/box/tuple_hash.cc | 2 +-
src/box/vinyl.c | 4 +
src/box/vy_log.c | 11 +-
src/box/vy_point_lookup.c | 2 -
src/box/vy_stmt.c | 49 ++--
src/lib/json/json.c | 7 +-
src/lib/json/json.h | 16 ++
test/engine/json.result | 448 +++++++++++++++++++++++++++++++++
test/engine/json.test.lua | 129 ++++++++++
24 files changed, 1321 insertions(+), 140 deletions(-)
create mode 100644 test/engine/json.result
create mode 100644 test/engine/json.test.lua
diff --git a/src/box/alter.cc b/src/box/alter.cc
index 0589c9678..9656a4189 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -268,7 +268,7 @@ index_def_new_from_tuple(struct tuple *tuple, struct space *space)
});
if (key_def_decode_parts(part_def, part_count, &parts,
space->def->fields,
- space->def->field_count) != 0)
+ space->def->field_count, &fiber()->gc) != 0)
diag_raise();
key_def = key_def_new(part_def, part_count);
if (key_def == NULL)
diff --git a/src/box/index_def.c b/src/box/index_def.c
index 2ba57ee9d..58137ed07 100644
--- a/src/box/index_def.c
+++ b/src/box/index_def.c
@@ -31,6 +31,8 @@
#include "index_def.h"
#include "schema_def.h"
#include "identifier.h"
+#include "tuple_format.h"
+#include "json/json.h"
const char *index_type_strs[] = { "HASH", "TREE", "BITSET", "RTREE" };
@@ -278,8 +280,12 @@ index_def_is_valid(struct index_def *index_def, const char *space_name)
* Courtesy to a user who could have made
* a typo.
*/
- if (index_def->key_def->parts[i].fieldno ==
- index_def->key_def->parts[j].fieldno) {
+ struct key_part *part_a = &index_def->key_def->parts[i];
+ struct key_part *part_b = &index_def->key_def->parts[j];
+ if (part_a->fieldno == part_b->fieldno &&
+ json_path_cmp(part_a->path, part_a->path_len,
+ part_b->path, part_b->path_len,
+ TUPLE_INDEX_BASE) == 0){
diag_set(ClientError, ER_MODIFY_INDEX,
index_def->name, space_name,
"same key part is indexed twice");
diff --git a/src/box/key_def.c b/src/box/key_def.c
index dae3580e2..3012b05df 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -28,6 +28,7 @@
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+#include "json/json.h"
#include "key_def.h"
#include "tuple_compare.h"
#include "tuple_extract_key.h"
@@ -35,6 +36,7 @@
#include "column_mask.h"
#include "schema_def.h"
#include "coll_id_cache.h"
+#include "small/region.h"
const char *sort_order_strs[] = { "asc", "desc", "undef" };
@@ -44,7 +46,8 @@ const struct key_part_def key_part_def_default = {
COLL_NONE,
false,
ON_CONFLICT_ACTION_DEFAULT,
- SORT_ORDER_ASC
+ SORT_ORDER_ASC,
+ NULL
};
static int64_t
@@ -59,6 +62,7 @@ part_type_by_name_wrapper(const char *str, uint32_t len)
#define PART_OPT_NULLABILITY "is_nullable"
#define PART_OPT_NULLABLE_ACTION "nullable_action"
#define PART_OPT_SORT_ORDER "sort_order"
+#define PART_OPT_PATH "path"
const struct opt_def part_def_reg[] = {
OPT_DEF_ENUM(PART_OPT_TYPE, field_type, struct key_part_def, type,
@@ -71,19 +75,30 @@ const struct opt_def part_def_reg[] = {
struct key_part_def, nullable_action, NULL),
OPT_DEF_ENUM(PART_OPT_SORT_ORDER, sort_order, struct key_part_def,
sort_order, NULL),
+ OPT_DEF(PART_OPT_PATH, OPT_STRPTR, struct key_part_def, path),
OPT_END,
};
struct key_def *
key_def_dup(const struct key_def *src)
{
- size_t sz = key_def_sizeof(src->part_count);
- struct key_def *res = (struct key_def *)malloc(sz);
+ size_t sz = 0;
+ for (uint32_t i = 0; i < src->part_count; i++)
+ sz += src->parts[i].path_len;
+ sz = key_def_sizeof(src->part_count, sz);
+ struct key_def *res = (struct key_def *)calloc(1, sz);
if (res == NULL) {
diag_set(OutOfMemory, sz, "malloc", "res");
return NULL;
}
memcpy(res, src, sz);
+ /* Update paths to point to the new memory chunk.*/
+ for (uint32_t i = 0; i < src->part_count; i++) {
+ if (src->parts[i].path == NULL)
+ continue;
+ size_t path_offset = src->parts[i].path - (char *)src;
+ res->parts[i].path = (char *)res + path_offset;
+ }
return res;
}
@@ -91,8 +106,16 @@ void
key_def_swap(struct key_def *old_def, struct key_def *new_def)
{
assert(old_def->part_count == new_def->part_count);
- for (uint32_t i = 0; i < new_def->part_count; i++)
+ for (uint32_t i = 0; i < new_def->part_count; i++) {
SWAP(old_def->parts[i], new_def->parts[i]);
+ /*
+ * Paths are allocated as a part of key_def so
+ * we need to swap path pointers back - it's OK
+ * as paths aren't supposed to change.
+ */
+ assert(old_def->parts[i].path_len == new_def->parts[i].path_len);
+ SWAP(old_def->parts[i].path, new_def->parts[i].path);
+ }
SWAP(*old_def, *new_def);
}
@@ -115,24 +138,39 @@ static void
key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
enum field_type type, enum on_conflict_action nullable_action,
struct coll *coll, uint32_t coll_id,
- enum sort_order sort_order)
+ enum sort_order sort_order, const char *path,
+ uint32_t path_len, char **paths)
{
assert(part_no < def->part_count);
assert(type < field_type_MAX);
def->is_nullable |= (nullable_action == ON_CONFLICT_ACTION_NONE);
+ def->has_json_paths |= path != NULL;
def->parts[part_no].nullable_action = nullable_action;
def->parts[part_no].fieldno = fieldno;
def->parts[part_no].type = type;
def->parts[part_no].coll = coll;
def->parts[part_no].coll_id = coll_id;
def->parts[part_no].sort_order = sort_order;
+ if (path != NULL) {
+ assert(paths != NULL);
+ def->parts[part_no].path = *paths;
+ *paths += path_len;
+ memcpy(def->parts[part_no].path, path, path_len);
+ def->parts[part_no].path_len = path_len;
+ } else {
+ def->parts[part_no].path = NULL;
+ def->parts[part_no].path_len = 0;
+ }
column_mask_set_fieldno(&def->column_mask, fieldno);
}
struct key_def *
key_def_new(const struct key_part_def *parts, uint32_t part_count)
{
- size_t sz = key_def_sizeof(part_count);
+ ssize_t sz = 0;
+ for (uint32_t i = 0; i < part_count; i++)
+ sz += parts[i].path != NULL ? strlen(parts[i].path) : 0;
+ sz = key_def_sizeof(part_count, sz);
struct key_def *def = calloc(1, sz);
if (def == NULL) {
diag_set(OutOfMemory, sz, "malloc", "struct key_def");
@@ -142,6 +180,8 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count)
def->part_count = part_count;
def->unique_part_count = part_count;
+ /* Paths data in key_def chunk. */
+ char *paths = (char *)def + key_def_sizeof(part_count, 0);
for (uint32_t i = 0; i < part_count; i++) {
const struct key_part_def *part = &parts[i];
struct coll *coll = NULL;
@@ -155,16 +195,18 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count)
}
coll = coll_id->coll;
}
+ uint32_t path_len = part->path != NULL ? strlen(part->path) : 0;
key_def_set_part(def, i, part->fieldno, part->type,
part->nullable_action, coll, part->coll_id,
- part->sort_order);
+ part->sort_order, part->path, path_len, &paths);
}
key_def_set_cmp(def);
return def;
}
-void
-key_def_dump_parts(const struct key_def *def, struct key_part_def *parts)
+int
+key_def_dump_parts(const struct key_def *def, struct key_part_def *parts,
+ struct region *region)
{
for (uint32_t i = 0; i < def->part_count; i++) {
const struct key_part *part = &def->parts[i];
@@ -174,13 +216,27 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts)
part_def->is_nullable = key_part_is_nullable(part);
part_def->nullable_action = part->nullable_action;
part_def->coll_id = part->coll_id;
+ if (part->path != NULL) {
+ char *path = region_alloc(region, part->path_len + 1);
+ if (path == NULL) {
+ diag_set(OutOfMemory, part->path_len + 1,
+ "region_alloc", "part_def->path");
+ return -1;
+ }
+ memcpy(path, part->path, part->path_len);
+ path[part->path_len] = '\0';
+ part_def->path = path;
+ } else {
+ part_def->path = NULL;
+ }
}
+ return 0;
}
box_key_def_t *
box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
{
- size_t sz = key_def_sizeof(part_count);
+ size_t sz = key_def_sizeof(part_count, 0);
struct key_def *key_def = calloc(1, sz);
if (key_def == NULL) {
diag_set(OutOfMemory, sz, "malloc", "struct key_def");
@@ -194,7 +250,8 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
key_def_set_part(key_def, item, fields[item],
(enum field_type)types[item],
ON_CONFLICT_ACTION_DEFAULT,
- NULL, COLL_NONE, SORT_ORDER_ASC);
+ NULL, COLL_NONE, SORT_ORDER_ASC, NULL, 0,
+ NULL);
}
key_def_set_cmp(key_def);
return key_def;
@@ -243,6 +300,11 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1,
if (key_part_is_nullable(part1) != key_part_is_nullable(part2))
return key_part_is_nullable(part1) <
key_part_is_nullable(part2) ? -1 : 1;
+ int rc = json_path_cmp(part1->path, part1->path_len,
+ part2->path, part2->path_len,
+ TUPLE_INDEX_BASE);
+ if (rc != 0)
+ return rc;
}
return part_count1 < part_count2 ? -1 : part_count1 > part_count2;
}
@@ -274,8 +336,15 @@ key_def_snprint_parts(char *buf, int size, const struct key_part_def *parts,
for (uint32_t i = 0; i < part_count; i++) {
const struct key_part_def *part = &parts[i];
assert(part->type < field_type_MAX);
- SNPRINT(total, snprintf, buf, size, "%d, '%s'",
- (int)part->fieldno, field_type_strs[part->type]);
+ if (part->path != NULL) {
+ SNPRINT(total, snprintf, buf, size, "%d, '%s', '%s'",
+ (int)part->fieldno, field_type_strs[part->type],
+ part->path);
+ } else {
+ SNPRINT(total, snprintf, buf, size, "%d, '%s'",
+ (int)part->fieldno,
+ field_type_strs[part->type]);
+ }
if (i < part_count - 1)
SNPRINT(total, snprintf, buf, size, ", ");
}
@@ -294,6 +363,8 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count)
count++;
if (part->is_nullable)
count++;
+ if (part->path != NULL)
+ count++;
size += mp_sizeof_map(count);
size += mp_sizeof_str(strlen(PART_OPT_FIELD));
size += mp_sizeof_uint(part->fieldno);
@@ -308,6 +379,10 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count)
size += mp_sizeof_str(strlen(PART_OPT_NULLABILITY));
size += mp_sizeof_bool(part->is_nullable);
}
+ if (part->path != NULL) {
+ size += mp_sizeof_str(strlen(PART_OPT_PATH));
+ size += mp_sizeof_str(strlen(part->path));
+ }
}
return size;
}
@@ -323,6 +398,8 @@ key_def_encode_parts(char *data, const struct key_part_def *parts,
count++;
if (part->is_nullable)
count++;
+ if (part->path != NULL)
+ count++;
data = mp_encode_map(data, count);
data = mp_encode_str(data, PART_OPT_FIELD,
strlen(PART_OPT_FIELD));
@@ -342,6 +419,12 @@ key_def_encode_parts(char *data, const struct key_part_def *parts,
strlen(PART_OPT_NULLABILITY));
data = mp_encode_bool(data, part->is_nullable);
}
+ if (part->path != NULL) {
+ data = mp_encode_str(data, PART_OPT_PATH,
+ strlen(PART_OPT_PATH));
+ data = mp_encode_str(data, part->path,
+ strlen(part->path));
+ }
}
return data;
}
@@ -403,6 +486,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count,
fields[part->fieldno].is_nullable :
key_part_def_default.is_nullable);
part->coll_id = COLL_NONE;
+ part->path = NULL;
}
return 0;
}
@@ -410,7 +494,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count,
int
key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
const char **data, const struct field_def *fields,
- uint32_t field_count)
+ uint32_t field_count, struct region *region)
{
if (mp_typeof(**data) == MP_ARRAY) {
return key_def_decode_parts_166(parts, part_count, data,
@@ -439,7 +523,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
const char *key = mp_decode_str(data, &key_len);
if (opts_parse_key(part, part_def_reg, key, key_len, data,
ER_WRONG_INDEX_OPTIONS,
- i + TUPLE_INDEX_BASE, NULL,
+ i + TUPLE_INDEX_BASE, region,
false) != 0)
return -1;
if (is_action_missing &&
@@ -485,6 +569,27 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
"index part: unknown sort order");
return -1;
}
+ if (part->path != NULL) {
+ uint32_t path_len = strlen(part->path);
+ if (path_len > BOX_JSON_PATH_MAX) {
+ diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
+ "JSON path is too long");
+ return -1;
+ }
+ int rc = json_path_validate(part->path, path_len,
+ TUPLE_INDEX_BASE);
+ if (rc != 0) {
+ const char *err_msg =
+ tt_sprintf("invalid JSON path '%s': "
+ "error in path on "
+ "position %d", part->path,
+ rc);
+ diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
+ part->fieldno + TUPLE_INDEX_BASE,
+ err_msg);
+ return -1;
+ }
+ }
}
return 0;
}
@@ -504,7 +609,10 @@ key_def_find(const struct key_def *key_def, const struct key_part *to_find)
const struct key_part *part = key_def->parts;
const struct key_part *end = part + key_def->part_count;
for (; part != end; part++) {
- if (part->fieldno == to_find->fieldno)
+ if (part->fieldno == to_find->fieldno &&
+ json_path_cmp(part->path, part->path_len,
+ to_find->path, to_find->path_len,
+ TUPLE_INDEX_BASE) == 0)
return part;
}
return NULL;
@@ -530,18 +638,25 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
* Find and remove part duplicates, i.e. parts counted
* twice since they are present in both key defs.
*/
- const struct key_part *part = second->parts;
- const struct key_part *end = part + second->part_count;
+ size_t sz = 0;
+ const struct key_part *part = first->parts;
+ const struct key_part *end = part + first->part_count;
+ for (; part != end; part++)
+ sz += part->path_len;
+ part = second->parts;
+ end = part + second->part_count;
for (; part != end; part++) {
if (key_def_find(first, part) != NULL)
--new_part_count;
+ else
+ sz += part->path_len;
}
+ sz = key_def_sizeof(new_part_count, sz);
struct key_def *new_def;
- new_def = (struct key_def *)calloc(1, key_def_sizeof(new_part_count));
+ new_def = (struct key_def *)calloc(1, sz);
if (new_def == NULL) {
- diag_set(OutOfMemory, key_def_sizeof(new_part_count), "malloc",
- "new_def");
+ diag_set(OutOfMemory, sz, "malloc", "new_def");
return NULL;
}
new_def->part_count = new_part_count;
@@ -549,6 +664,9 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
new_def->is_nullable = first->is_nullable || second->is_nullable;
new_def->has_optional_parts = first->has_optional_parts ||
second->has_optional_parts;
+
+ /* Paths data in the new key_def chunk. */
+ char *paths = (char *)new_def + key_def_sizeof(new_part_count, 0);
/* Write position in the new key def. */
uint32_t pos = 0;
/* Append first key def's parts to the new index_def. */
@@ -557,7 +675,8 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
for (; part != end; part++) {
key_def_set_part(new_def, pos++, part->fieldno, part->type,
part->nullable_action, part->coll,
- part->coll_id, part->sort_order);
+ part->coll_id, part->sort_order, part->path,
+ part->path_len, &paths);
}
/* Set-append second key def's part to the new key def. */
@@ -568,7 +687,8 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
continue;
key_def_set_part(new_def, pos++, part->fieldno, part->type,
part->nullable_action, part->coll,
- part->coll_id, part->sort_order);
+ part->coll_id, part->sort_order, part->path,
+ part->path_len, &paths);
}
key_def_set_cmp(new_def);
return new_def;
diff --git a/src/box/key_def.h b/src/box/key_def.h
index d1866303b..c6b7a8c74 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -64,6 +64,11 @@ struct key_part_def {
enum on_conflict_action nullable_action;
/** Part sort order. */
enum sort_order sort_order;
+ /**
+ * JSON path to indexed data, relative to the field number,
+ * or NULL if this key part indexes a top-level field.
+ */
+ const char *path;
};
extern const struct key_part_def key_part_def_default;
@@ -82,6 +87,15 @@ struct key_part {
enum on_conflict_action nullable_action;
/** Part sort order. */
enum sort_order sort_order;
+ /**
+ * JSON path to indexed data, relative to the field number,
+ * or NULL if this key part indexes a top-level field.
+ * This sting is not 0-terminated. Memory is allocated
+ * at the end of key_def chunk.
+ */
+ char *path;
+ /** The length of JSON path. */
+ uint32_t path_len;
};
struct key_def;
@@ -148,6 +162,8 @@ struct key_def {
uint32_t unique_part_count;
/** True, if at least one part can store NULL. */
bool is_nullable;
+ /** True, if some key part has JSON path. */
+ bool has_json_paths;
/**
* True, if some key parts can be absent in a tuple. These
* fields assumed to be MP_NIL.
@@ -241,9 +257,10 @@ box_tuple_compare_with_key(const box_tuple_t *tuple_a, const char *key_b,
/** \endcond public */
static inline size_t
-key_def_sizeof(uint32_t part_count)
+key_def_sizeof(uint32_t part_count, uint32_t paths_size)
{
- return sizeof(struct key_def) + sizeof(struct key_part) * part_count;
+ return sizeof(struct key_def) + sizeof(struct key_part) * part_count +
+ paths_size;
}
/**
@@ -255,9 +272,13 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count);
/**
* Dump part definitions of the given key def.
+ * Region is required to make allocations for JSON paths when some
+ * path present. JSON path strings are 0-terminated.
+ * Return -1 on memory allocation error, 0 on success.
*/
-void
-key_def_dump_parts(const struct key_def *def, struct key_part_def *parts);
+int
+key_def_dump_parts(const struct key_def *def, struct key_part_def *parts,
+ struct region *region);
/**
* Update 'has_optional_parts' of @a key_def with correspondence
@@ -303,7 +324,7 @@ key_def_encode_parts(char *data, const struct key_part_def *parts,
int
key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
const char **data, const struct field_def *fields,
- uint32_t field_count);
+ uint32_t field_count, struct region *region);
/**
* Returns the part in index_def->parts for the specified fieldno.
@@ -364,6 +385,8 @@ key_validate_parts(const struct key_def *key_def, const char *key,
static inline bool
key_def_is_sequential(const struct key_def *key_def)
{
+ if (key_def->has_json_paths)
+ return false;
for (uint32_t part_id = 0; part_id < key_def->part_count; part_id++) {
if (key_def->parts[part_id].fieldno != part_id)
return false;
diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc
index 7cae436f1..1f152917e 100644
--- a/src/box/lua/space.cc
+++ b/src/box/lua/space.cc
@@ -296,6 +296,11 @@ lbox_fillspace(struct lua_State *L, struct space *space, int i)
lua_pushnumber(L, part->fieldno + TUPLE_INDEX_BASE);
lua_setfield(L, -2, "fieldno");
+ if (part->path != NULL) {
+ lua_pushlstring(L, part->path, part->path_len);
+ lua_setfield(L, -2, "path");
+ }
+
lua_pushboolean(L, key_part_is_nullable(part));
lua_setfield(L, -2, "is_nullable");
diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 5cf70ab94..2cae791e1 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -1317,6 +1317,10 @@ memtx_index_def_change_requires_rebuild(struct index *index,
return true;
if (old_part->coll != new_part->coll)
return true;
+ if (json_path_cmp(old_part->path, old_part->path_len,
+ new_part->path, new_part->path_len,
+ TUPLE_INDEX_BASE) != 0)
+ return true;
}
return false;
}
diff --git a/src/box/schema_def.h b/src/box/schema_def.h
index a760ecc3f..b7a9d3284 100644
--- a/src/box/schema_def.h
+++ b/src/box/schema_def.h
@@ -44,6 +44,7 @@ enum {
BOX_INDEX_MAX = 128,
BOX_NAME_MAX = 65000,
BOX_INVALID_NAME_MAX = 64,
+ BOX_JSON_PATH_MAX = 512,
ENGINE_NAME_MAX = 16,
FIELD_TYPE_NAME_MAX = 16,
GRANT_NAME_MAX = 16,
diff --git a/src/box/sql.c b/src/box/sql.c
index 8c7607d84..c54a0c0ce 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -380,6 +380,7 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
part->nullable_action = ON_CONFLICT_ACTION_NONE;
part->is_nullable = true;
part->sort_order = SORT_ORDER_ASC;
+ part->path = NULL;
if (def != NULL && i < def->part_count)
part->coll_id = def->parts[i].coll_id;
else
diff --git a/src/box/sql/build.c b/src/box/sql/build.c
index 49b90b5d0..947daf8f6 100644
--- a/src/box/sql/build.c
+++ b/src/box/sql/build.c
@@ -2185,6 +2185,7 @@ index_fill_def(struct Parse *parse, struct index *index,
part->is_nullable = part->nullable_action == ON_CONFLICT_ACTION_NONE;
part->sort_order = SORT_ORDER_ASC;
part->coll_id = coll_id;
+ part->path = NULL;
}
key_def = key_def_new(key_parts, expr_list->nExpr);
if (key_def == NULL)
diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index 02ee225f1..3f136a342 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -1360,6 +1360,7 @@ sql_key_info_new(sqlite3 *db, uint32_t part_count)
part->is_nullable = false;
part->nullable_action = ON_CONFLICT_ACTION_ABORT;
part->sort_order = SORT_ORDER_ASC;
+ part->path = NULL;
}
return key_info;
}
@@ -1377,7 +1378,7 @@ sql_key_info_new_from_key_def(sqlite3 *db, const struct key_def *key_def)
key_info->key_def = NULL;
key_info->refs = 1;
key_info->part_count = key_def->part_count;
- key_def_dump_parts(key_def, key_info->parts);
+ key_def_dump_parts(key_def, key_info->parts, NULL);
return key_info;
}
diff --git a/src/box/sql/where.c b/src/box/sql/where.c
index 571b5af78..814bd3926 100644
--- a/src/box/sql/where.c
+++ b/src/box/sql/where.c
@@ -2807,6 +2807,7 @@ whereLoopAddBtree(WhereLoopBuilder * pBuilder, /* WHERE clause information */
part.is_nullable = false;
part.sort_order = SORT_ORDER_ASC;
part.coll_id = COLL_NONE;
+ part.path = NULL;
struct key_def *key_def = key_def_new(&part, 1);
if (key_def == NULL) {
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index 3fe4cae32..7ab6e3bf6 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -469,7 +469,8 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
struct key_part *part = key_def->parts;
const char *tuple_a_raw = tuple_data(tuple_a);
const char *tuple_b_raw = tuple_data(tuple_b);
- if (key_def->part_count == 1 && part->fieldno == 0) {
+ if (key_def->part_count == 1 && part->fieldno == 0 &&
+ part->path == NULL) {
/*
* First field can not be optional - empty tuples
* can not exist.
@@ -1027,7 +1028,7 @@ tuple_compare_create(const struct key_def *def)
}
}
assert(! def->has_optional_parts);
- if (!key_def_has_collation(def)) {
+ if (!key_def_has_collation(def) && !def->has_json_paths) {
/* Precalculated comparators don't use collation */
for (uint32_t k = 0;
k < sizeof(cmp_arr) / sizeof(cmp_arr[0]); k++) {
@@ -1247,7 +1248,7 @@ tuple_compare_with_key_create(const struct key_def *def)
}
}
assert(! def->has_optional_parts);
- if (!key_def_has_collation(def)) {
+ if (!key_def_has_collation(def) && !def->has_json_paths) {
/* Precalculated comparators don't use collation */
for (uint32_t k = 0;
k < sizeof(cmp_wk_arr) / sizeof(cmp_wk_arr[0]);
--git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc
index ac8b5a44e..c40d7887d 100644
--- a/src/box/tuple_extract_key.cc
+++ b/src/box/tuple_extract_key.cc
@@ -10,7 +10,8 @@ key_def_parts_are_sequential(const struct key_def *def, int i)
{
uint32_t fieldno1 = def->parts[i].fieldno + 1;
uint32_t fieldno2 = def->parts[i + 1].fieldno;
- return fieldno1 == fieldno2;
+ return fieldno1 == fieldno2 && def->parts[i].path == NULL &&
+ def->parts[i + 1].path == NULL;
}
/** True, if a key con contain two or more parts in sequence. */
@@ -241,7 +242,8 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
if (!key_def_parts_are_sequential(key_def, i))
break;
}
- uint32_t end_fieldno = key_def->parts[i].fieldno;
+ const struct key_part *part = &key_def->parts[i];
+ uint32_t end_fieldno = part->fieldno;
if (fieldno < current_fieldno) {
/* Rewind. */
@@ -283,6 +285,22 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
current_fieldno++;
}
}
+ const char *field_last, *field_end_last;
+ if (part->path != NULL) {
+ field_last = field;
+ field_end_last = field_end;
+ MAYBE_UNUSED int rc =
+ tuple_field_go_to_path(&field, part->path,
+ part->path_len);
+ /*
+ * All tuples must be valid as all
+ * integrity checks has already been
+ * passed.
+ */
+ assert(rc == 0);
+ field_end = field;
+ mp_next(&field_end);
+ }
memcpy(key_buf, field, field_end - field);
key_buf += field_end - field;
if (has_optional_parts && null_count != 0) {
@@ -291,6 +309,10 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
} else {
assert(key_buf - key <= data_end - data);
}
+ if (part->path != NULL) {
+ field = field_last;
+ field_end = field_end_last;
+ }
}
if (key_size != NULL)
*key_size = (uint32_t)(key_buf - key);
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index e11b4e6f3..c81c23fd1 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -28,6 +28,7 @@
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+#include <sys/uio.h>
#include "bit/bit.h"
#include "fiber.h"
#include "json/json.h"
@@ -66,12 +67,88 @@ tuple_field_delete(struct tuple_field *field)
/** Return path to a tuple field. Used for error reporting. */
static const char *
-tuple_field_path(const struct tuple_field *field)
+tuple_field_path(const struct tuple_field *field, bool json_only)
{
assert(field->token.parent != NULL);
- assert(field->token.parent->parent == NULL);
- assert(field->token.type == JSON_TOKEN_NUM);
- return int2str(field->token.num + TUPLE_INDEX_BASE);
+ char *path;
+ if (!json_only && field->token.parent->type == JSON_TOKEN_END) {
+ assert(field->token.type == JSON_TOKEN_NUM);
+ path = int2str(field->token.num + TUPLE_INDEX_BASE);
+ } else {
+ path = tt_static_buf();
+ MAYBE_UNUSED int rc =
+ json_tree_snprint_path(path, TT_STATIC_BUF_LEN,
+ &field->token, TUPLE_INDEX_BASE);
+ assert(rc > 0 && rc < TT_STATIC_BUF_LEN);
+ }
+ return path;
+}
+
+/**
+ * Add corresponding format:fields for specified JSON path.
+ * Return a pointer to the leaf field on success, NULL on memory
+ * allocation error or type/nullability mistmatch error, diag
+ * message is set.
+ */
+static struct tuple_field *
+tuple_field_tree_add_path(struct tuple_format *format, const char *path,
+ uint32_t path_len, uint32_t fieldno)
+{
+ int rc = 0;
+ struct json_tree *tree = &format->fields;
+ struct tuple_field *parent = tuple_format_field(format, fieldno);
+ struct tuple_field *field = tuple_field_new();
+ if (field == NULL)
+ goto fail;
+
+ struct json_lexer lexer;
+ uint32_t token_count = 0;
+ json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
+ while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 &&
+ field->token.type != JSON_TOKEN_END) {
+ enum field_type expected_type =
+ field->token.type == JSON_TOKEN_STR ?
+ FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
+ if (field_type1_contains_type2(parent->type, expected_type)) {
+ parent->type = expected_type;
+ } else if (!field_type1_contains_type2(expected_type,
+ parent->type)) {
+ diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
+ tuple_field_path(parent, false),
+ field_type_strs[parent->type],
+ field_type_strs[expected_type]);
+ goto fail;
+ }
+ struct tuple_field *next =
+ json_tree_lookup_entry(tree, &parent->token,
+ &field->token,
+ struct tuple_field, token);
+ if (next == NULL) {
+ rc = json_tree_add(tree, &parent->token, &field->token);
+ if (rc != 0) {
+ diag_set(OutOfMemory, sizeof(struct json_token),
+ "json_tree_add", "tree");
+ goto fail;
+ }
+ next = field;
+ field = tuple_field_new();
+ if (field == NULL)
+ goto fail;
+ }
+ parent = next;
+ token_count++;
+ }
+ /* Path has been verified key_def_decode_parts. */
+ assert(rc == 0 && field->token.type == JSON_TOKEN_END);
+ assert(parent != NULL);
+ /* Update tree depth information. */
+ format->max_path_tokens = MAX(format->max_path_tokens, token_count + 1);
+end:
+ tuple_field_delete(field);
+ return parent;
+fail:
+ parent = NULL;
+ goto end;
}
/**
@@ -95,10 +172,25 @@ tuple_format_field_by_id(struct tuple_format *format, uint32_t id)
static int
tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count,
const struct key_part *part, bool is_sequential,
- int *current_slot)
+ int *current_slot, char **paths)
{
assert(part->fieldno < tuple_format_field_count(format));
- struct tuple_field *field = tuple_format_field(format, part->fieldno);
+ struct tuple_field *field;
+ if (part->path == NULL) {
+ field = tuple_format_field(format, part->fieldno);
+ } else {
+ assert(!is_sequential);
+ /**
+ * Copy JSON path data to reserved area at the
+ * end of format allocation.
+ */
+ memcpy(*paths, part->path, part->path_len);
+ field = tuple_field_tree_add_path(format, *paths, part->path_len,
+ part->fieldno);
+ if (field == NULL)
+ return -1;
+ *paths += part->path_len;
+ }
/*
* If a field is not present in the space format,
* inherit nullable action of the first key part
@@ -124,7 +216,7 @@ tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count,
field->nullable_action = part->nullable_action;
} else if (field->nullable_action != part->nullable_action) {
diag_set(ClientError, ER_ACTION_MISMATCH,
- tuple_field_path(field),
+ tuple_field_path(field, false),
on_conflict_action_strs[field->nullable_action],
on_conflict_action_strs[part->nullable_action]);
return -1;
@@ -146,7 +238,7 @@ tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count,
errcode = ER_FORMAT_MISMATCH_INDEX_PART;
else
errcode = ER_INDEX_PART_TYPE_MISMATCH;
- diag_set(ClientError, errcode, tuple_field_path(field),
+ diag_set(ClientError, errcode, tuple_field_path(field, false),
field_type_strs[field->type],
field_type_strs[part->type]);
return -1;
@@ -158,13 +250,93 @@ tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count,
* simply accessible, so we don't store an offset for it.
*/
if (field->offset_slot == TUPLE_OFFSET_SLOT_NIL &&
- is_sequential == false && part->fieldno > 0) {
+ is_sequential == false &&
+ (part->fieldno > 0 || part->path != NULL)) {
*current_slot = *current_slot - 1;
field->offset_slot = *current_slot;
}
return 0;
}
+/**
+ * Get format:field parent field_type.
+ * This routine is required as first-level fields has no parent
+ * field so it could not be retrieved with json_tree_entry.
+ */
+static enum field_type
+tuple_format_field_parent_type(struct tuple_format *format,
+ struct tuple_field *field)
+{
+ struct json_token *parent = field->token.parent;
+ if (parent == &format->fields.root)
+ return FIELD_TYPE_ARRAY;
+ return json_tree_entry(parent, struct tuple_field, token)->type;
+}
+
+uint32_t
+tuple_format_stmt_encode(struct tuple_format *format, char **offset,
+ char *tuple_raw, uint32_t *field_map,
+ struct iovec *iov)
+{
+ bool write = offset != NULL;
+ uint32_t size = 0;
+ struct tuple_field *field;
+ json_tree_foreach_entry_preorder(field, &format->fields.root,
+ struct tuple_field, token) {
+ enum field_type parent_type =
+ tuple_format_field_parent_type(format, field);
+ if (parent_type == FIELD_TYPE_ARRAY &&
+ field->token.sibling_idx > 0) {
+ /*
+ * Write nil istead of omitted array
+ * members.
+ */
+ struct json_token **neighbors =
+ field->token.parent->children;
+ for (uint32_t i = field->token.sibling_idx - 1;
+ neighbors[i] == NULL && i > 0; i--) {
+ if (write)
+ *offset = mp_encode_nil(*offset);
+ size += mp_sizeof_nil();
+ }
+ } else if (parent_type == FIELD_TYPE_MAP) {
+ /* Write map key string. */
+ const char *str = field->token.str;
+ uint32_t len = field->token.len;
+ if (write)
+ *offset = mp_encode_str(*offset, str, len);
+ size += mp_sizeof_str(len);
+ }
+ /* Fill data. */
+ uint32_t children_cnt = field->token.max_child_idx + 1;
+ if (json_token_is_leaf(&field->token)) {
+ if (!write || iov[field->id].iov_len == 0) {
+ if (write)
+ *offset = mp_encode_nil(*offset);
+ size += mp_sizeof_nil();
+ } else {
+ memcpy(*offset, iov[field->id].iov_base,
+ iov[field->id].iov_len);
+ uint32_t data_offset = *offset - tuple_raw;
+ int32_t slot = field->offset_slot;
+ if (slot != TUPLE_OFFSET_SLOT_NIL)
+ field_map[slot] = data_offset;
+ *offset += iov[field->id].iov_len;
+ size += iov[field->id].iov_len;
+ }
+ } else if (field->type == FIELD_TYPE_ARRAY) {
+ if (write)
+ *offset = mp_encode_array(*offset, children_cnt);
+ size += mp_sizeof_array(children_cnt);
+ } else if (field->type == FIELD_TYPE_MAP) {
+ if (write)
+ *offset = mp_encode_map(*offset, children_cnt);
+ size += mp_sizeof_map(children_cnt);
+ }
+ }
+ return size;
+}
+
/**
* Extract all available type info from keys and field
* definitions.
@@ -203,6 +375,11 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
int current_slot = 0;
+ /*
+ * Set pointer to reserved area in the format chunk
+ * allocated with tuple_format_alloc call.
+ */
+ char *paths = (char *)format + sizeof(struct tuple_format);
/* extract field type info */
for (uint16_t key_no = 0; key_no < key_count; ++key_no) {
const struct key_def *key_def = keys[key_no];
@@ -213,7 +390,8 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
for (; part < parts_end; part++) {
if (tuple_format_use_key_part(format, field_count, part,
is_sequential,
- ¤t_slot) != 0)
+ ¤t_slot,
+ &paths) != 0)
return -1;
}
}
@@ -236,9 +414,12 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
"malloc", "required field bitmap");
return -1;
}
+ uint32_t id = 0;
struct tuple_field *field;
json_tree_foreach_entry_preorder(field, &format->fields.root,
struct tuple_field, token) {
+ /* Set the unique field identifier. */
+ field->id = id++;
/*
* Mark all leaf non-nullable fields as required
* by setting the corresponding bit in the bitmap
@@ -248,6 +429,10 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
!tuple_field_is_nullable(field))
bit_set(format->required_fields, field->id);
}
+ /* Update format metadate for a new format:fields tree. */
+ format->total_field_count = id;
+ format->vy_stmt_size = tuple_format_stmt_encode(format, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -317,6 +502,8 @@ static struct tuple_format *
tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
uint32_t space_field_count, struct tuple_dictionary *dict)
{
+ /* Size of area to store paths. */
+ uint32_t paths_size = 0;
uint32_t index_field_count = 0;
/* find max max field no */
for (uint16_t key_no = 0; key_no < key_count; ++key_no) {
@@ -326,13 +513,15 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
for (; part < pend; part++) {
index_field_count = MAX(index_field_count,
part->fieldno + 1);
+ paths_size += part->path_len;
}
}
uint32_t field_count = MAX(space_field_count, index_field_count);
- struct tuple_format *format = malloc(sizeof(struct tuple_format));
+ uint32_t allocation_size = sizeof(struct tuple_format) + paths_size;
+ struct tuple_format *format = malloc(allocation_size);
if (format == NULL) {
- diag_set(OutOfMemory, sizeof(struct tuple_format), "malloc",
+ diag_set(OutOfMemory, allocation_size, "malloc",
"tuple format");
return NULL;
}
@@ -346,7 +535,6 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
struct tuple_field *field = tuple_field_new();
if (field == NULL)
goto error;
- field->id = fieldno;
field->token.num = fieldno;
field->token.type = JSON_TOKEN_NUM;
if (json_tree_add(&format->fields, &format->fields.root,
@@ -368,6 +556,8 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
}
format->total_field_count = field_count;
format->required_fields = NULL;
+ format->max_path_tokens = 1;
+ format->vy_stmt_size = UINT32_MAX;
format->refs = 0;
format->id = FORMAT_ID_NIL;
format->index_field_count = index_field_count;
@@ -428,15 +618,22 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
{
if (format1->exact_field_count != format2->exact_field_count)
return false;
- uint32_t format1_field_count = tuple_format_field_count(format1);
- uint32_t format2_field_count = tuple_format_field_count(format2);
- for (uint32_t i = 0; i < format1_field_count; ++i) {
- struct tuple_field *field1 = tuple_format_field(format1, i);
+ struct tuple_field *field1;
+ json_tree_foreach_entry_preorder(field1, &format1->fields.root,
+ struct tuple_field, token) {
+next:;
+ const char *path = tuple_field_path(field1, true);
+ struct tuple_field *field2 =
+ json_tree_lookup_path_entry(&format2->fields,
+ &format2->fields.root,
+ path, strlen(path),
+ TUPLE_INDEX_BASE,
+ struct tuple_field, token);
/*
* The field has a data type in format1, but has
* no data type in format2.
*/
- if (i >= format2_field_count) {
+ if (field2 == NULL) {
/*
* The field can get a name added
* for it, and this doesn't require a data
@@ -447,12 +644,22 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
* NULLs or miss the subject field.
*/
if (field1->type == FIELD_TYPE_ANY &&
- tuple_field_is_nullable(field1))
- continue;
- else
+ tuple_field_is_nullable(field1)) {
+ /* Skip subtree. */
+ struct json_token *token = &field1->token;
+ struct json_token *parent = token->parent;
+ field1 = json_tree_child_next_entry(parent,
+ token,
+ struct
+ tuple_field,
+ token);
+ if (field1 == NULL)
+ break;
+ goto next;
+ } else {
return false;
+ }
}
- struct tuple_field *field2 = tuple_format_field(format2, i);
if (! field_type1_contains_type2(field1->type, field2->type))
return false;
/*
@@ -466,6 +673,90 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
return true;
}
+/**
+ * Descriptor of the parsed msgpack frame.
+ * Due to the fact that the msgpack has nested structures whose
+ * length is stored in the frame header at the blob beginning, we
+ * need to be able to determine that we have finished parsing the
+ * current component and should move on to the next one.
+ * For this purpose a stack of disassembled levels is organized,
+ * where the type of the level, the total number of elements,
+ * and the number of elements that have already been parsed are
+ * stored.
+ */
+struct mp_frame {
+ /** JSON token type representing frame data structure. */
+ enum json_token_type child_type;
+ /** Total count of MP members to process. */
+ uint32_t total;
+ /** Count of MP elements that already have parseed. */
+ uint32_t curr;
+};
+
+/**
+ * Emit token to analyze and do msgpack pointer shift using top
+ * mp_stack frame. Return 0 on success, -1 when analyse step must
+ * be skipped (on usuported term detection).
+ */
+static int
+mp_frame_parse(struct mp_frame *mp_stack, uint32_t mp_stack_idx,
+ const char **pos, struct json_token *token)
+{
+ token->type = mp_stack[mp_stack_idx].child_type;
+ ++mp_stack[mp_stack_idx].curr;
+ if (token->type == JSON_TOKEN_NUM) {
+ token->num = mp_stack[mp_stack_idx].curr - TUPLE_INDEX_BASE;
+ } else if (token->type == JSON_TOKEN_STR) {
+ if (mp_typeof(**pos) != MP_STR) {
+ /* Skip key. */
+ mp_next(pos);
+ return -1;
+ }
+ token->str = mp_decode_str(pos, (uint32_t *)&token->len);
+ } else {
+ unreachable();
+ }
+ return 0;
+}
+
+/**
+ * Prepare mp_frame for futher iterations. Store container length
+ * and child_type. Update parent token pointer and shift msgpack
+ * pointer.
+ */
+static int
+mp_frame_prepare(struct mp_frame *mp_stack, uint32_t *mp_stack_idx,
+ uint32_t mp_stack_total, struct json_token *token,
+ const char **pos, struct json_token **parent)
+{
+ enum mp_type type = mp_typeof(**pos);
+ if (token != NULL && *mp_stack_idx + 1 < mp_stack_total &&
+ (type == MP_MAP || type == MP_ARRAY)) {
+ uint32_t size = type == MP_ARRAY ? mp_decode_array(pos) :
+ mp_decode_map(pos);
+ if (size == 0)
+ return 0;
+ *parent = token;
+ enum json_token_type child_type =
+ type == MP_ARRAY ? JSON_TOKEN_NUM : JSON_TOKEN_STR;
+ *mp_stack_idx = *mp_stack_idx + 1;
+ mp_stack[*mp_stack_idx].child_type = child_type;
+ mp_stack[*mp_stack_idx].total = size;
+ mp_stack[*mp_stack_idx].curr = 0;
+ } else {
+ mp_next(pos);
+ while (mp_stack[*mp_stack_idx].curr >=
+ mp_stack[*mp_stack_idx].total) {
+ assert(*parent != NULL);
+ *parent = (*parent)->parent;
+ if (*mp_stack_idx == 0)
+ return -1;
+ *mp_stack_idx = *mp_stack_idx - 1;
+ }
+ }
+ return 0;
+}
+
/** @sa declaration for details. */
int
tuple_init_field_map(struct tuple_format *format, uint32_t *field_map,
@@ -512,49 +803,64 @@ tuple_init_field_map(struct tuple_format *format, uint32_t *field_map,
/* Empty tuple, nothing to do. */
goto skip;
}
- /* first field is simply accessible, so we do not store offset to it */
- struct tuple_field *field = tuple_format_field(format, 0);
- if (validate &&
- !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
- tuple_field_is_nullable(field))) {
- diag_set(ClientError, ER_FIELD_TYPE, tuple_field_path(field),
- field_type_strs[field->type]);
- goto error;
- }
- if (required_fields != NULL)
- bit_clear(required_fields, field->id);
- mp_next(&pos);
- /* other fields...*/
- uint32_t i = 1;
uint32_t defined_field_count = MIN(field_count, validate ?
tuple_format_field_count(format) :
format->index_field_count);
- if (field_count < format->index_field_count) {
- /*
- * Nullify field map to be able to detect by 0,
- * which key fields are absent in tuple_field().
- */
- memset((char *)field_map - format->field_map_size, 0,
- format->field_map_size);
+ /*
+ * Nullify field map to be able to detect by 0,
+ * which key fields are absent in tuple_field().
+ */
+ memset((char *)field_map - format->field_map_size, 0,
+ format->field_map_size);
+ uint32_t mp_stack_size =
+ format->max_path_tokens * sizeof(struct mp_frame);
+ struct mp_frame *mp_stack = region_alloc(region, mp_stack_size);
+ if (mp_stack == NULL) {
+ diag_set(OutOfMemory, mp_stack_size, "region_alloc",
+ "mp_stack");
+ goto error;
}
- for (; i < defined_field_count; ++i) {
- field = tuple_format_field(format, i);
- if (validate &&
- !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
- tuple_field_is_nullable(field))) {
- diag_set(ClientError, ER_FIELD_TYPE,
- tuple_field_path(field),
- field_type_strs[field->type]);
- goto error;
+ struct tuple_field *field;
+ mp_stack[0].child_type = JSON_TOKEN_NUM;
+ mp_stack[0].total = defined_field_count;
+ mp_stack[0].curr = 0;
+ uint32_t mp_stack_idx = 0;
+ struct json_tree *tree = (struct json_tree *)&format->fields;
+ struct json_token *parent = &tree->root;
+ while (mp_stack[0].curr <= mp_stack[0].total) {
+ struct json_token token;
+ if (mp_frame_parse(mp_stack, mp_stack_idx, &pos, &token) != 0) {
+ /* Unsupported token. */
+ goto finish_frame;
}
- if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
- field_map[field->offset_slot] =
- (uint32_t) (pos - tuple);
+ field = json_tree_lookup_entry(tree, parent, &token,
+ struct tuple_field, token);
+ if (field != NULL) {
+ bool is_nullable = tuple_field_is_nullable(field);
+ if (validate &&
+ !field_mp_type_is_compatible(field->type,
+ mp_typeof(*pos),
+ is_nullable) != 0) {
+ diag_set(ClientError, ER_FIELD_TYPE,
+ tuple_field_path(field, false),
+ field_type_strs[field->type]);
+ goto error;
+ }
+ if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
+ field_map[field->offset_slot] =
+ (uint32_t)(pos - tuple);
+ }
+ if (required_fields != NULL)
+ bit_clear(required_fields, field->id);
}
- if (required_fields != NULL)
- bit_clear(required_fields, field->id);
- mp_next(&pos);
- }
+finish_frame:
+ /* Prepare stack info for next iteration. */
+ if (mp_frame_prepare(mp_stack, &mp_stack_idx,
+ format->max_path_tokens,
+ field != NULL ? &field->token : NULL,
+ &pos, &parent) != 0)
+ break;
+ };
skip:
/*
* Check the required field bitmap for missing fields.
@@ -569,7 +875,7 @@ skip:
field = tuple_format_field_by_id(format, id);
assert(field != NULL);
diag_set(ClientError, ER_FIELD_MISSING,
- tuple_field_path(field));
+ tuple_field_path(field, false));
goto error;
}
}
@@ -713,15 +1019,7 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
return -1;
}
-/**
- * Retrieve msgpack data by JSON path.
- * @param data Pointer to msgpack with data.
- * @param path The path to process.
- * @param path_len The length of the @path.
- * @retval 0 On success.
- * @retval >0 On path parsing error, invalid character position.
- */
-static int
+int
tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len)
{
int rc;
@@ -820,3 +1118,30 @@ error:
tt_sprintf("error in path on position %d", rc));
return -1;
}
+
+int
+tuple_field_by_part_raw_slowpath(struct tuple_format *format, const char *data,
+ const uint32_t *field_map,
+ struct key_part *part, const char **raw)
+{
+ assert(part->path != NULL);
+ struct tuple_field *field =
+ tuple_format_field_by_path(format, part->fieldno, part->path,
+ part->path_len);
+ if (field != NULL) {
+ int32_t offset_slot = field->offset_slot;
+ assert(-offset_slot * sizeof(uint32_t) <=
+ format->field_map_size);
+ *raw = field_map[offset_slot] == 0 ?
+ NULL : data + field_map[offset_slot];
+ return 0;
+ }
+ /*
+ * Format doesn't have field representing specified part.
+ * Make slow tuple parsing.
+ */
+ *raw = tuple_field_raw(format, data, field_map, part->fieldno);
+ if (*raw == NULL)
+ return 0;
+ return tuple_field_go_to_path(raw, part->path, part->path_len);
+}
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index 30b93b610..3b630c3bb 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -65,6 +65,7 @@ enum { TUPLE_OFFSET_SLOT_NIL = INT32_MAX };
struct tuple;
struct tuple_format;
struct coll;
+struct iovec;
/** Engine-specific tuple format methods. */
struct tuple_format_vtab {
@@ -185,6 +186,15 @@ struct tuple_format {
* Shared names storage used by all formats of a space.
*/
struct tuple_dictionary *dict;
+ /**
+ * A maximum depth of format:fields subtree.
+ */
+ uint32_t max_path_tokens;
+ /**
+ * The size of the secondary key built for format:fields
+ * with all leaf records set to nil.
+ */
+ uint32_t vy_stmt_size;
/**
* Fields comprising the format, organized in a tree.
* First level nodes correspond to tuple fields.
@@ -221,6 +231,37 @@ tuple_format_field(struct tuple_format *format, uint32_t fieldno)
&token, struct tuple_field, token);
}
+/**
+ * Lookup field by relative JSON path and root field fieldno in
+ * format:fields tree.
+*/
+static inline struct tuple_field *
+tuple_format_field_by_path(struct tuple_format *format, uint32_t fieldno,
+ const char *path, uint32_t path_len)
+{
+ uint32_t field_count = tuple_format_field_count(format);
+ if (fieldno >= field_count)
+ return NULL;
+ struct tuple_field *root = tuple_format_field(format, fieldno);
+ assert(root != NULL);
+ return json_tree_lookup_path_entry(&format->fields, &root->token,
+ path, path_len, TUPLE_INDEX_BASE,
+ struct tuple_field, token);
+}
+
+/**
+ * Construct secondary-index tuple and initialize field_map.
+ * The iov[field->id] array item contains an extracted key
+ * for indexed field identified with unique field->id.
+ * Return the size of constructed tuple.
+ * In case of offset == NULL routine may be used for tuple size up
+ * limit estimation: all leaf records are assumed to be nil(s).
+ */
+uint32_t
+tuple_format_stmt_encode(struct tuple_format *format, char **offset,
+ char *tuple_raw, uint32_t *field_map,
+ struct iovec *iov);
+
extern struct tuple_format **tuple_formats;
static inline uint32_t
@@ -420,6 +461,18 @@ tuple_field_raw_by_name(struct tuple_format *format, const char *tuple,
return tuple_field_raw(format, tuple, field_map, fieldno);
}
+/**
+ * Retrieve msgpack data by JSON path.
+ * @param data Pointer to msgpack with data.
+ * @param path The path to process.
+ * @param path_len The length of the @path.
+ * @retval 0 On success.
+ * @retval >0 On path parsing error, invalid character position.
+ */
+int
+tuple_field_go_to_path(const char **data, const char *path,
+ uint32_t path_len);
+
/**
* Get tuple field by its path.
* @param format Tuple format.
@@ -439,6 +492,12 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
uint32_t path_len, uint32_t path_hash,
const char **field);
+/** Internal function, use tuple_field_by_part_raw instead. */
+int
+tuple_field_by_part_raw_slowpath(struct tuple_format *format, const char *data,
+ const uint32_t *field_map,
+ struct key_part *part, const char **raw);
+
/**
* Get a tuple field pointed to by an index part.
* @param format Tuple format.
@@ -451,7 +510,16 @@ static inline const char *
tuple_field_by_part_raw(struct tuple_format *format, const char *data,
const uint32_t *field_map, struct key_part *part)
{
- return tuple_field_raw(format, data, field_map, part->fieldno);
+ if (likely(part->path == NULL)) {
+ return tuple_field_raw(format, data, field_map, part->fieldno);
+ } else {
+ const char *raw;
+ MAYBE_UNUSED int rc =
+ tuple_field_by_part_raw_slowpath(format, data,
+ field_map, part, &raw);
+ assert(rc == 0);
+ return raw;
+ }
}
#if defined(__cplusplus)
diff --git a/src/box/tuple_hash.cc b/src/box/tuple_hash.cc
index b394804fe..3486ce11c 100644
--- a/src/box/tuple_hash.cc
+++ b/src/box/tuple_hash.cc
@@ -222,7 +222,7 @@ key_hash_slowpath(const char *key, struct key_def *key_def);
void
tuple_hash_func_set(struct key_def *key_def) {
- if (key_def->is_nullable)
+ if (key_def->is_nullable || key_def->has_json_paths)
goto slowpath;
/*
* Check that key_def defines sequential a key without holes
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index ca987134c..acd2d7fd6 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -982,6 +982,10 @@ vinyl_index_def_change_requires_rebuild(struct index *index,
return true;
if (!field_type1_contains_type2(new_part->type, old_part->type))
return true;
+ if (json_path_cmp(old_part->path, old_part->path_len,
+ new_part->path, new_part->path_len,
+ TUPLE_INDEX_BASE) != 0)
+ return true;
}
return false;
}
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index c9e0713c8..6fc051648 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -581,9 +581,11 @@ vy_log_record_decode(struct vy_log_record *record,
record->group_id = mp_decode_uint(&pos);
break;
case VY_LOG_KEY_DEF: {
+ struct region *region = &fiber()->gc;
uint32_t part_count = mp_decode_array(&pos);
- struct key_part_def *parts = region_alloc(&fiber()->gc,
- sizeof(*parts) * part_count);
+ struct key_part_def *parts =
+ region_alloc(region,
+ sizeof(*parts) * part_count);
if (parts == NULL) {
diag_set(OutOfMemory,
sizeof(*parts) * part_count,
@@ -591,7 +593,7 @@ vy_log_record_decode(struct vy_log_record *record,
return -1;
}
if (key_def_decode_parts(parts, part_count, &pos,
- NULL, 0) != 0) {
+ NULL, 0, region) != 0) {
diag_log();
diag_set(ClientError, ER_INVALID_VYLOG_FILE,
"Bad record: failed to decode "
@@ -705,7 +707,8 @@ vy_log_record_dup(struct region *pool, const struct vy_log_record *src)
"struct key_part_def");
goto err;
}
- key_def_dump_parts(src->key_def, dst->key_parts);
+ if (key_def_dump_parts(src->key_def, dst->key_parts, pool) != 0)
+ goto err;
dst->key_part_count = src->key_def->part_count;
dst->key_def = NULL;
}
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index ddbc2d46f..14e0c0c93 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -196,8 +196,6 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx,
const struct vy_read_view **rv,
struct tuple *key, struct tuple **ret)
{
- assert(tuple_field_count(key) >= lsm->cmp_def->part_count);
-
*ret = NULL;
double start_time = ev_monotonic_now(loop());
int rc = 0;
diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
index 47f135c65..7a302e6f3 100644
--- a/src/box/vy_stmt.c
+++ b/src/box/vy_stmt.c
@@ -385,26 +385,43 @@ vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type,
struct region *region = &fiber()->gc;
uint32_t field_count = format->index_field_count;
- struct iovec *iov = region_alloc(region, sizeof(*iov) * field_count);
+ uint32_t iov_sz =
+ sizeof(struct iovec) * format->total_field_count;
+ struct iovec *iov = region_alloc(region, iov_sz);
if (iov == NULL) {
- diag_set(OutOfMemory, sizeof(*iov) * field_count,
- "region", "iov for surrogate key");
+ diag_set(OutOfMemory, iov_sz, "region_alloc",
+ "iov for surrogate key");
return NULL;
}
- memset(iov, 0, sizeof(*iov) * field_count);
+ memset(iov, 0, iov_sz);
uint32_t part_count = mp_decode_array(&key);
assert(part_count == cmp_def->part_count);
- assert(part_count <= field_count);
- uint32_t nulls_count = field_count - cmp_def->part_count;
+ assert(part_count <= format->total_field_count);
+ /**
+ * The format:vy_stmt_size contains a size of
+ * stmt tuple having all leaf fields set to null.
+ * Calculate bsize as vy_stmt_size where parts_count
+ * nulls replaced with extracted keys.
+ */
uint32_t bsize = mp_sizeof_array(field_count) +
- mp_sizeof_nil() * nulls_count;
+ format->vy_stmt_size - mp_sizeof_nil() * part_count;
for (uint32_t i = 0; i < part_count; ++i) {
const struct key_part *part = &cmp_def->parts[i];
assert(part->fieldno < field_count);
+ struct tuple_field *field;
+ if (part->path != NULL) {
+ field = tuple_format_field_by_path(format,
+ part->fieldno,
+ part->path,
+ part->path_len);
+ } else {
+ field = tuple_format_field(format, part->fieldno);
+ }
+ assert(field != NULL);
const char *svp = key;
- iov[part->fieldno].iov_base = (char *) key;
+ iov[field->id].iov_base = (char *) key;
mp_next(&key);
- iov[part->fieldno].iov_len = key - svp;
+ iov[field->id].iov_len = key - svp;
bsize += key - svp;
}
@@ -414,18 +431,10 @@ vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type,
char *raw = (char *) tuple_data(stmt);
uint32_t *field_map = (uint32_t *) raw;
+ memset((char *)field_map - format->field_map_size, 0,
+ format->field_map_size);
char *wpos = mp_encode_array(raw, field_count);
- for (uint32_t i = 0; i < field_count; ++i) {
- struct tuple_field *field = tuple_format_field(format, i);
- if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
- field_map[field->offset_slot] = wpos - raw;
- if (iov[i].iov_base == NULL) {
- wpos = mp_encode_nil(wpos);
- } else {
- memcpy(wpos, iov[i].iov_base, iov[i].iov_len);
- wpos += iov[i].iov_len;
- }
- }
+ (void)tuple_format_stmt_encode(format, &wpos, raw, field_map, iov);
assert(wpos == raw + bsize);
vy_stmt_set_type(stmt, type);
return stmt;
diff --git a/src/lib/json/json.c b/src/lib/json/json.c
index 010a61d62..1d79bceb0 100644
--- a/src/lib/json/json.c
+++ b/src/lib/json/json.c
@@ -572,12 +572,7 @@ json_tree_lookup_path(struct json_tree *tree, struct json_token *root,
return ret;
}
-/**
- * Return the child of @parent following @pos or NULL if @pos
- * points to the last child in the children array. If @pos is
- * NULL, this function returns the first child.
- */
-static struct json_token *
+struct json_token *
json_tree_child_next(struct json_token *parent, struct json_token *pos)
{
assert(pos == NULL || pos->parent == parent);
diff --git a/src/lib/json/json.h b/src/lib/json/json.h
index 66cddd026..fc441a887 100644
--- a/src/lib/json/json.h
+++ b/src/lib/json/json.h
@@ -353,6 +353,14 @@ struct json_token *
json_tree_lookup_path(struct json_tree *tree, struct json_token *root,
const char *path, int path_len, int index_base);
+/**
+ * Return the child of @parent following @pos or NULL if @pos
+ * points to the last child in the children array. If @pos is
+ * NULL, this function returns the first child.
+ */
+struct json_token *
+json_tree_child_next(struct json_token *parent, struct json_token *pos);
+
/**
* Perform pre-order traversal in a JSON subtree rooted
* at a given node.
@@ -436,6 +444,14 @@ json_tree_postorder_next(struct json_token *root, struct json_token *pos);
json_tree_entry_safe(ret, type, member); \
})
+/**
+ * Container-aware wrapper around json_tree_child_next().
+ */
+#define json_tree_child_next_entry(parent, pos, type, member) ({ \
+ struct json_token *next = json_tree_child_next((parent), (pos)); \
+ json_tree_entry_safe(next, type, member); \
+})
+
/**
* Container-aware wrapper around json_tree_preorder_next().
*/
diff --git a/test/engine/json.result b/test/engine/json.result
new file mode 100644
index 000000000..711f7f256
--- /dev/null
+++ b/test/engine/json.result
@@ -0,0 +1,448 @@
+test_run = require('test_run').new()
+---
+...
+engine = test_run:get_cfg('engine')
+---
+...
+--
+-- gh-1012: Indexes for JSON-defined paths.
+--
+s = box.schema.space.create('withdata', {engine = engine})
+---
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}})
+---
+- error: 'Can''t create or modify index ''test1'' in space ''withdata'': same key
+ part is indexed twice'
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}})
+---
+- error: 'Wrong index options (field 2): ''path'' must be string'
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = 'FIO'}}})
+---
+- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type
+ ''map'' is not supported'
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}})
+---
+- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type
+ ''array'' is not supported'
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO'}, {3, 'str', path = 'FIO.fname'}}})
+---
+- error: Field [3]["FIO"] has type 'string' in one index, but type 'map' in another
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}})
+---
+- error: Field 3 has type 'array' in one index, but type 'map' in another
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO....fname'}}})
+---
+- error: 'Wrong index options (field 3): invalid JSON path ''FIO....fname'': error
+ in path on position 5'
+...
+idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO.fname', is_nullable = false}, {3, 'str', path = '["FIO"]["sname"]'}}})
+---
+...
+assert(idx ~= nil)
+---
+- true
+...
+assert(idx.parts[2].path == 'FIO.fname')
+---
+- true
+...
+format = {{'id', 'unsigned'}, {'meta', 'unsigned'}, {'data', 'array'}, {'age', 'unsigned'}, {'level', 'unsigned'}}
+---
+...
+s:format(format)
+---
+- error: Field 3 has type 'array' in one index, but type 'map' in another
+...
+format = {{'id', 'unsigned'}, {'meta', 'unsigned'}, {'data', 'map'}, {'age', 'unsigned'}, {'level', 'unsigned'}}
+---
+...
+s:format(format)
+---
+...
+s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = 'FIO.fname'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+---
+- error: Field [3]["FIO"]["fname"] has type 'string' in one index, but type 'number'
+ in another
+...
+s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5}
+---
+- error: 'Tuple field [3]["FIO"] type does not match one required by operation: expected
+ map'
+...
+s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5}
+---
+- error: 'Tuple field [3]["FIO"]["fname"] type does not match one required by operation:
+ expected string'
+...
+s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5}
+---
+- error: Tuple field [3]["FIO"]["sname"] required by space format is missing
+...
+s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+---
+- [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+---
+- error: Duplicate key exists in unique index 'test1' in space 'withdata'
+...
+s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5}
+---
+- error: Duplicate key exists in unique index 'test1' in space 'withdata'
+...
+s:insert{7, 7, {town = 'Moscow', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5}
+---
+- [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}},
+ 4, 5]
+...
+idx:select()
+---
+- - [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+ - [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}},
+ 4, 5]
+...
+idx:min()
+---
+- [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+idx:max()
+---
+- [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}},
+ 4, 5]
+...
+s:drop()
+---
+...
+s = box.schema.create_space('withdata', {engine = engine})
+---
+...
+parts = {}
+---
+...
+parts[1] = {1, 'unsigned', path='[2]'}
+---
+...
+pk = s:create_index('pk', {parts = parts})
+---
+...
+s:insert{{1, 2}, 3}
+---
+- [[1, 2], 3]
+...
+s:upsert({{box.null, 2}}, {{'+', 2, 5}})
+---
+...
+s:get(2)
+---
+- [[1, 2], 8]
+...
+s:drop()
+---
+...
+-- Create index on space with data
+s = box.schema.space.create('withdata', {engine = engine})
+---
+...
+pk = s:create_index('primary', { type = 'tree' })
+---
+...
+s:insert{1, 7, {town = 'London', FIO = 1234}, 4, 5}
+---
+- [1, 7, {'town': 'London', 'FIO': 1234}, 4, 5]
+...
+s:insert{2, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+---
+- [2, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+s:insert{3, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+---
+- [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+s:insert{4, 7, {town = 'London', FIO = {1,2,3}}, 4, 5}
+---
+- [4, 7, {'town': 'London', 'FIO': [1, 2, 3]}, 4, 5]
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+---
+- error: 'Tuple field [3]["FIO"] type does not match one required by operation: expected
+ map'
+...
+_ = s:delete(1)
+---
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+---
+- error: Duplicate key exists in unique index 'test1' in space 'withdata'
+...
+_ = s:delete(2)
+---
+...
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+---
+- error: 'Tuple field [3]["FIO"] type does not match one required by operation: expected
+ map'
+...
+_ = s:delete(4)
+---
+...
+idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]', is_nullable = true}, {3, 'str', path = '["FIO"]["sname"]'}, {3, 'str', path = '["FIO"]["extra"]', is_nullable = true}}})
+---
+...
+assert(idx ~= nil)
+---
+- true
+...
+s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '["FIO"]["fname"]'}}})
+---
+- error: Field [3]["FIO"]["fname"] has type 'string' in one index, but type 'number'
+ in another
+...
+idx2 = s:create_index('test2', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}}})
+---
+...
+assert(idx2 ~= nil)
+---
+- true
+...
+t = s:insert{5, 7, {town = 'Matrix', FIO = {fname = 'Agent', sname = 'Smith'}}, 4, 5}
+---
+...
+idx:select()
+---
+- - [5, 7, {'town': 'Matrix', 'FIO': {'fname': 'Agent', 'sname': 'Smith'}}, 4, 5]
+ - [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+idx:min()
+---
+- [5, 7, {'town': 'Matrix', 'FIO': {'fname': 'Agent', 'sname': 'Smith'}}, 4, 5]
+...
+idx:max()
+---
+- [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+idx:drop()
+---
+...
+s:drop()
+---
+...
+-- Test complex JSON indexes
+s = box.schema.space.create('withdata', {engine = engine})
+---
+...
+parts = {}
+---
+...
+parts[1] = {1, 'str', path='[3][2].a'}
+---
+...
+parts[2] = {1, 'unsigned', path = '[3][1]'}
+---
+...
+parts[3] = {2, 'str', path = '[2].d[1]'}
+---
+...
+pk = s:create_index('primary', { type = 'tree', parts = parts})
+---
+...
+s:insert{{1, 2, {3, {3, a = 'str', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {1, 2, 3}}
+---
+- [[1, 2, [3, {1: 3, 'a': 'str', 'b': 5}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6,
+ [1, 2, 3]]
+...
+s:insert{{1, 2, {3, {a = 'str', b = 1}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6}
+---
+- error: Duplicate key exists in unique index 'primary' in space 'withdata'
+...
+parts = {}
+---
+...
+parts[1] = {4, 'unsigned', path='[1]', is_nullable = false}
+---
+...
+parts[2] = {4, 'unsigned', path='[2]', is_nullable = true}
+---
+...
+parts[3] = {4, 'unsigned', path='[4]', is_nullable = true}
+---
+...
+trap_idx = s:create_index('trap', { type = 'tree', parts = parts})
+---
+...
+s:insert{{1, 2, {3, {3, a = 'str2', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {}}
+---
+- error: Tuple field [4][1] required by space format is missing
+...
+parts = {}
+---
+...
+parts[1] = {1, 'unsigned', path='[3][2].b' }
+---
+...
+parts[2] = {3, 'unsigned'}
+---
+...
+crosspart_idx = s:create_index('crosspart', { parts = parts})
+---
+...
+s:insert{{1, 2, {3, {a = 'str2', b = 2}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {9, 2, 3}}
+---
+- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9,
+ 2, 3]]
+...
+parts = {}
+---
+...
+parts[1] = {1, 'unsigned', path='[3][2].b'}
+---
+...
+num_idx = s:create_index('numeric', {parts = parts})
+---
+...
+s:insert{{1, 2, {3, {a = 'str3', b = 9}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {0}}
+---
+- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]]
+...
+num_idx:get(2)
+---
+- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9,
+ 2, 3]]
+...
+num_idx:select()
+---
+- - [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [
+ 9, 2, 3]]
+ - [[1, 2, [3, {1: 3, 'a': 'str', 'b': 5}]], ['c', {'d': ['e', 'f'], 'e': 'g'}],
+ 6, [1, 2, 3]]
+ - [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [
+ 0]]
+...
+num_idx:max()
+---
+- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]]
+...
+num_idx:min()
+---
+- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9,
+ 2, 3]]
+...
+assert(crosspart_idx:max() == num_idx:max())
+---
+- true
+...
+assert(crosspart_idx:min() == num_idx:min())
+---
+- true
+...
+trap_idx:max()
+---
+- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9,
+ 2, 3]]
+...
+trap_idx:min()
+---
+- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]]
+...
+s:drop()
+---
+...
+s = box.schema.space.create('withdata', {engine = engine})
+---
+...
+pk_simplified = s:create_index('primary', { type = 'tree', parts = {{1, 'unsigned'}}})
+---
+...
+assert(pk_simplified.path == box.NULL)
+---
+- true
+...
+idx = s:create_index('idx', {parts = {{2, 'integer', path = 'a'}}})
+---
+...
+s:insert{31, {a = 1, aa = -1}}
+---
+- [31, {'a': 1, 'aa': -1}]
+...
+s:insert{22, {a = 2, aa = -2}}
+---
+- [22, {'a': 2, 'aa': -2}]
+...
+s:insert{13, {a = 3, aa = -3}}
+---
+- [13, {'a': 3, 'aa': -3}]
+...
+idx:select()
+---
+- - [31, {'a': 1, 'aa': -1}]
+ - [22, {'a': 2, 'aa': -2}]
+ - [13, {'a': 3, 'aa': -3}]
+...
+idx:alter({parts = {{2, 'integer', path = 'aa'}}})
+---
+...
+idx:select()
+---
+- - [13, {'a': 3, 'aa': -3}]
+ - [22, {'a': 2, 'aa': -2}]
+ - [31, {'a': 1, 'aa': -1}]
+...
+s:drop()
+---
+...
+-- incompatible format change
+s = box.schema.space.create('test')
+---
+...
+i = s:create_index('pk', {parts = {{1, 'integer', path = '[1]'}}})
+---
+...
+s:insert{{-1}}
+---
+- [[-1]]
+...
+i:alter{parts = {{1, 'string', path = '[1]'}}}
+---
+- error: 'Tuple field [1][1] type does not match one required by operation: expected
+ string'
+...
+s:insert{{'a'}}
+---
+- error: 'Tuple field [1][1] type does not match one required by operation: expected
+ integer'
+...
+i:drop()
+---
+...
+i = s:create_index('pk', {parts = {{1, 'integer', path = '[1].FIO'}}})
+---
+...
+s:insert{{{FIO=-1}}}
+---
+- [[{'FIO': -1}]]
+...
+i:alter{parts = {{1, 'integer', path = '[1][1]'}}}
+---
+- error: 'Tuple field [1][1] type does not match one required by operation: expected
+ array'
+...
+i:alter{parts = {{1, 'integer', path = '[1].FIO[1]'}}}
+---
+- error: 'Tuple field [1][1]["FIO"] type does not match one required by operation:
+ expected array'
+...
+s:drop()
+---
+...
+engine = nil
+---
+...
+test_run = nil
+---
+...
diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua
new file mode 100644
index 000000000..2a20fc3e5
--- /dev/null
+++ b/test/engine/json.test.lua
@@ -0,0 +1,129 @@
+test_run = require('test_run').new()
+engine = test_run:get_cfg('engine')
+--
+-- gh-1012: Indexes for JSON-defined paths.
+--
+s = box.schema.space.create('withdata', {engine = engine})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = 'FIO'}}})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO'}, {3, 'str', path = 'FIO.fname'}}})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}})
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO....fname'}}})
+idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO.fname', is_nullable = false}, {3, 'str', path = '["FIO"]["sname"]'}}})
+assert(idx ~= nil)
+assert(idx.parts[2].path == 'FIO.fname')
+format = {{'id', 'unsigned'}, {'meta', 'unsigned'}, {'data', 'array'}, {'age', 'unsigned'}, {'level', 'unsigned'}}
+s:format(format)
+format = {{'id', 'unsigned'}, {'meta', 'unsigned'}, {'data', 'map'}, {'age', 'unsigned'}, {'level', 'unsigned'}}
+s:format(format)
+s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = 'FIO.fname'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5}
+s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5}
+s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5}
+s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5}
+s:insert{7, 7, {town = 'Moscow', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5}
+idx:select()
+idx:min()
+idx:max()
+s:drop()
+
+s = box.schema.create_space('withdata', {engine = engine})
+parts = {}
+parts[1] = {1, 'unsigned', path='[2]'}
+pk = s:create_index('pk', {parts = parts})
+s:insert{{1, 2}, 3}
+s:upsert({{box.null, 2}}, {{'+', 2, 5}})
+s:get(2)
+s:drop()
+
+-- Create index on space with data
+s = box.schema.space.create('withdata', {engine = engine})
+pk = s:create_index('primary', { type = 'tree' })
+s:insert{1, 7, {town = 'London', FIO = 1234}, 4, 5}
+s:insert{2, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+s:insert{3, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+s:insert{4, 7, {town = 'London', FIO = {1,2,3}}, 4, 5}
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+_ = s:delete(1)
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+_ = s:delete(2)
+s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}})
+_ = s:delete(4)
+idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]', is_nullable = true}, {3, 'str', path = '["FIO"]["sname"]'}, {3, 'str', path = '["FIO"]["extra"]', is_nullable = true}}})
+assert(idx ~= nil)
+s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '["FIO"]["fname"]'}}})
+idx2 = s:create_index('test2', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}}})
+assert(idx2 ~= nil)
+t = s:insert{5, 7, {town = 'Matrix', FIO = {fname = 'Agent', sname = 'Smith'}}, 4, 5}
+idx:select()
+idx:min()
+idx:max()
+idx:drop()
+s:drop()
+
+-- Test complex JSON indexes
+s = box.schema.space.create('withdata', {engine = engine})
+parts = {}
+parts[1] = {1, 'str', path='[3][2].a'}
+parts[2] = {1, 'unsigned', path = '[3][1]'}
+parts[3] = {2, 'str', path = '[2].d[1]'}
+pk = s:create_index('primary', { type = 'tree', parts = parts})
+s:insert{{1, 2, {3, {3, a = 'str', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {1, 2, 3}}
+s:insert{{1, 2, {3, {a = 'str', b = 1}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6}
+parts = {}
+parts[1] = {4, 'unsigned', path='[1]', is_nullable = false}
+parts[2] = {4, 'unsigned', path='[2]', is_nullable = true}
+parts[3] = {4, 'unsigned', path='[4]', is_nullable = true}
+trap_idx = s:create_index('trap', { type = 'tree', parts = parts})
+s:insert{{1, 2, {3, {3, a = 'str2', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {}}
+parts = {}
+parts[1] = {1, 'unsigned', path='[3][2].b' }
+parts[2] = {3, 'unsigned'}
+crosspart_idx = s:create_index('crosspart', { parts = parts})
+s:insert{{1, 2, {3, {a = 'str2', b = 2}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {9, 2, 3}}
+parts = {}
+parts[1] = {1, 'unsigned', path='[3][2].b'}
+num_idx = s:create_index('numeric', {parts = parts})
+s:insert{{1, 2, {3, {a = 'str3', b = 9}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {0}}
+num_idx:get(2)
+num_idx:select()
+num_idx:max()
+num_idx:min()
+assert(crosspart_idx:max() == num_idx:max())
+assert(crosspart_idx:min() == num_idx:min())
+trap_idx:max()
+trap_idx:min()
+s:drop()
+
+s = box.schema.space.create('withdata', {engine = engine})
+pk_simplified = s:create_index('primary', { type = 'tree', parts = {{1, 'unsigned'}}})
+assert(pk_simplified.path == box.NULL)
+idx = s:create_index('idx', {parts = {{2, 'integer', path = 'a'}}})
+s:insert{31, {a = 1, aa = -1}}
+s:insert{22, {a = 2, aa = -2}}
+s:insert{13, {a = 3, aa = -3}}
+idx:select()
+idx:alter({parts = {{2, 'integer', path = 'aa'}}})
+idx:select()
+s:drop()
+
+-- incompatible format change
+s = box.schema.space.create('test')
+i = s:create_index('pk', {parts = {{1, 'integer', path = '[1]'}}})
+s:insert{{-1}}
+i:alter{parts = {{1, 'string', path = '[1]'}}}
+s:insert{{'a'}}
+i:drop()
+i = s:create_index('pk', {parts = {{1, 'integer', path = '[1].FIO'}}})
+s:insert{{{FIO=-1}}}
+i:alter{parts = {{1, 'integer', path = '[1][1]'}}}
+i:alter{parts = {{1, 'integer', path = '[1].FIO[1]'}}}
+s:drop()
+
+engine = nil
+test_run = nil
+
--
2.19.2
next prev parent reply other threads:[~2019-01-09 8:29 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-01-09 8:29 [PATCH v7 0/5] box: Indexes by JSON path Kirill Shcherbatov
2019-01-09 8:29 ` Kirill Shcherbatov [this message]
2019-01-10 10:16 ` [PATCH v7 1/5] box: introduce JSON Indexes Vladimir Davydov
2019-01-09 8:29 ` [PATCH v7 2/5] box: introduce has_json_paths flag in templates Kirill Shcherbatov
2019-01-09 8:29 ` [PATCH v7 3/5] box: tune tuple_field_raw_by_path for indexed data Kirill Shcherbatov
2019-01-09 8:29 ` [PATCH v7 4/5] box: introduce offset_slot cache in key_part Kirill Shcherbatov
2019-01-10 11:28 ` Vladimir Davydov
2019-01-09 8:29 ` [PATCH v7 5/5] box: specify indexes in user-friendly form Kirill Shcherbatov
2019-01-10 10:21 ` Vladimir Davydov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=90b211d01c5a7af0e5b3015c1a33b0b27d432ab0.1547022001.git.kshcherbatov@tarantool.org \
--to=kshcherbatov@tarantool.org \
--cc=kostja@tarantool.org \
--cc=tarantool-patches@freelists.org \
--cc=vdavydov.dev@gmail.com \
--subject='Re: [PATCH v7 1/5] box: introduce JSON Indexes' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox