[tarantool-patches] [PATCH v1 3/5] box: introduce path field in key_part
Kirill Shcherbatov
kshcherbatov at tarantool.org
Mon Aug 6 15:27:00 MSK 2018
As we need to store user-defined JSON path in key_part
and key_part_def, we have introduced path and path_len
fields. JSON path is verified and transformed to canonical
form on index msgpack unpack.
Because of field names specified as format could be changed
key_part path persisted in Tarantool should be always started
with first-level field access via array index(not by name).
Part of #1012.
---
src/box/key_def.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++----
src/box/key_def.h | 13 +++-
src/box/lua/space.cc | 5 ++
src/box/schema.cc | 8 +--
src/box/vy_log.c | 3 +-
5 files changed, 205 insertions(+), 21 deletions(-)
diff --git a/src/box/key_def.c b/src/box/key_def.c
index 8a4262b..79e07f8 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -35,12 +35,15 @@
#include "column_mask.h"
#include "schema_def.h"
#include "coll_id_cache.h"
+#include "fiber.h"
+#include "json/path.h"
static const struct key_part_def key_part_def_default = {
0,
field_type_MAX,
COLL_NONE,
false,
+ NULL
};
static int64_t
@@ -53,6 +56,7 @@ part_type_by_name_wrapper(const char *str, uint32_t len)
#define PART_OPT_FIELD "field"
#define PART_OPT_COLLATION "collation"
#define PART_OPT_NULLABILITY "is_nullable"
+#define PART_OPT_PATH "path"
const struct opt_def part_def_reg[] = {
OPT_DEF_ENUM(PART_OPT_TYPE, field_type, struct key_part_def, type,
@@ -61,6 +65,7 @@ const struct opt_def part_def_reg[] = {
OPT_DEF(PART_OPT_COLLATION, OPT_UINT32, struct key_part_def, coll_id),
OPT_DEF(PART_OPT_NULLABILITY, OPT_BOOL, struct key_part_def,
is_nullable),
+ OPT_DEF(PART_OPT_PATH, OPT_STRPTR, struct key_part_def, path),
OPT_END,
};
@@ -103,7 +108,27 @@ key_def_dup(const struct key_def *src)
return NULL;
}
memcpy(res, src, sz);
+ uint32_t i = 0;
+ for (; i < src->part_count; i++) {
+ if (src->parts[i].path == NULL) {
+ res->parts[i].path = NULL;
+ continue;
+ }
+ char *path = strdup(src->parts[i].path);
+ if (path == NULL) {
+ diag_set(OutOfMemory, src->parts[i].path_len + 1,
+ "strdup", "path");
+ goto error;
+ }
+ res->parts[i].path = path;
+ }
return res;
+
+error:
+ for (uint32_t j = 0; j < i; j++)
+ free((void *)res->parts[j].path);
+ free(res);
+ return NULL;
}
void
@@ -118,6 +143,8 @@ key_def_swap(struct key_def *old_def, struct key_def *new_def)
void
key_def_delete(struct key_def *def)
{
+ for (uint32_t i = 0; i < def->part_count; i++)
+ free((void *)def->parts[i].path);
free(def);
}
@@ -160,19 +187,34 @@ key_def_new_with_parts(struct key_part_def *parts, uint32_t part_count)
if (coll_id == NULL) {
diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
i + 1, "collation was not found by ID");
- key_def_delete(def);
- return NULL;
+ goto error;
}
coll = coll_id->coll;
}
+ char *path = NULL;
+ if (part->path != NULL &&
+ (path = strdup(part->path)) == NULL) {
+ diag_set(OutOfMemory, strlen(part->path) + 1, "strdup",
+ "path");
+ goto error;
+ }
key_def_set_part(def, i, part->fieldno, part->type,
- part->is_nullable, coll, part->coll_id);
+ part->is_nullable, coll, part->coll_id,
+ path);
}
return def;
+error:
+ /*
+ * Don't care about non-initialized fields as them filled
+ * with 0 via calloc.
+ */
+ key_def_delete(def);
+ return NULL;
}
-void
-key_def_dump_parts(const struct key_def *def, struct key_part_def *parts)
+int
+key_def_dump_parts(struct region *pool, const struct key_def *def,
+ struct key_part_def *parts)
{
for (uint32_t i = 0; i < def->part_count; i++) {
const struct key_part *part = &def->parts[i];
@@ -181,7 +223,20 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts)
part_def->type = part->type;
part_def->is_nullable = part->is_nullable;
part_def->coll_id = part->coll_id;
+ if (part->path != NULL) {
+ part_def->path = region_alloc(pool, part->path_len + 1);
+ if (part_def->path == NULL) {
+ diag_set(OutOfMemory, part->path_len + 1,
+ "region_alloc", "part_def->path");
+ return -1;
+ }
+ memcpy(part_def->path, part->path, part->path_len);
+ part_def->path[part->path_len] = '\0';
+ } else {
+ part_def->path = NULL;
+ }
}
+ return 0;
}
box_key_def_t *
@@ -195,7 +250,7 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
key_def_set_part(key_def, item, fields[item],
(enum field_type)types[item],
key_part_def_default.is_nullable, NULL,
- COLL_NONE);
+ COLL_NONE, NULL);
}
return key_def;
}
@@ -241,6 +296,11 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1,
if (part1->is_nullable != part2->is_nullable)
return part1->is_nullable <
part2->is_nullable ? -1 : 1;
+ /* Lexicographic strings order. */
+ uint32_t len = MIN(part1->path_len, part2->path_len);
+ int rc = 0;
+ if ((rc = strncmp(part1->path, part2->path, len)) != 0)
+ return rc;
}
return part_count1 < part_count2 ? -1 : part_count1 > part_count2;
}
@@ -248,7 +308,7 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1,
void
key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
enum field_type type, bool is_nullable, struct coll *coll,
- uint32_t coll_id)
+ uint32_t coll_id, char *path)
{
assert(part_no < def->part_count);
assert(type < field_type_MAX);
@@ -260,6 +320,8 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
def->parts[part_no].coll_id = coll_id;
def->parts[part_no].slot_cache = TUPLE_OFFSET_SLOT_NIL;
def->parts[part_no].format_epoch = 0;
+ def->parts[part_no].path = path;
+ def->parts[part_no].path_len = path != NULL ? strlen(path) : 0;
column_mask_set_fieldno(&def->column_mask, fieldno);
/**
* When all parts are set, initialize the tuple
@@ -304,8 +366,15 @@ key_def_snprint_parts(char *buf, int size, const struct key_part_def *parts,
for (uint32_t i = 0; i < part_count; i++) {
const struct key_part_def *part = &parts[i];
assert(part->type < field_type_MAX);
- SNPRINT(total, snprintf, buf, size, "%d, '%s'",
- (int)part->fieldno, field_type_strs[part->type]);
+ if (part->path != NULL) {
+ SNPRINT(total, snprintf, buf, size, "%d, '%s', '%s'",
+ (int) part->fieldno, part->path,
+ field_type_strs[part->type]);
+ } else {
+ SNPRINT(total, snprintf, buf, size, "%d, '%s'",
+ (int) part->fieldno,
+ field_type_strs[part->type]);
+ }
if (i < part_count - 1)
SNPRINT(total, snprintf, buf, size, ", ");
}
@@ -324,6 +393,8 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count)
count++;
if (part->is_nullable)
count++;
+ if (part->path != NULL)
+ count++;
size += mp_sizeof_map(count);
size += mp_sizeof_str(strlen(PART_OPT_FIELD));
size += mp_sizeof_uint(part->fieldno);
@@ -338,6 +409,10 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count)
size += mp_sizeof_str(strlen(PART_OPT_NULLABILITY));
size += mp_sizeof_bool(part->is_nullable);
}
+ if (part->path != NULL) {
+ size += mp_sizeof_str(strlen(PART_OPT_PATH));
+ size += mp_sizeof_str(strlen(part->path));
+ }
}
return size;
}
@@ -351,6 +426,8 @@ key_def_encode_parts(char *data, const struct key_part_def *parts,
int count = 2;
if (part->coll_id != COLL_NONE)
count++;
+ if (part->path != NULL)
+ count++;
if (part->is_nullable)
count++;
data = mp_encode_map(data, count);
@@ -372,6 +449,12 @@ key_def_encode_parts(char *data, const struct key_part_def *parts,
strlen(PART_OPT_NULLABILITY));
data = mp_encode_bool(data, part->is_nullable);
}
+ if (part->path != NULL) {
+ data = mp_encode_str(data, PART_OPT_PATH,
+ strlen(PART_OPT_PATH));
+ data = mp_encode_str(data, part->path,
+ strlen(part->path));
+ }
}
return data;
}
@@ -432,6 +515,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count,
fields[part->fieldno].is_nullable :
key_part_def_default.is_nullable);
part->coll_id = COLL_NONE;
+ part->path = NULL;
}
return 0;
}
@@ -445,8 +529,9 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
return key_def_decode_parts_166(parts, part_count, data,
fields, field_count);
}
+ struct key_part_def *part;
for (uint32_t i = 0; i < part_count; i++) {
- struct key_part_def *part = &parts[i];
+ part = &parts[i];
if (mp_typeof(**data) != MP_MAP) {
diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
i + TUPLE_INDEX_BASE,
@@ -456,7 +541,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
*part = key_part_def_default;
if (opts_decode(part, part_def_reg, data,
ER_WRONG_INDEX_OPTIONS, i + TUPLE_INDEX_BASE,
- NULL) != 0)
+ &fiber()->gc) != 0)
return -1;
if (part->type == field_type_MAX) {
diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
@@ -473,8 +558,75 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
"string and scalar parts");
return -1;
}
+ if (part->path != NULL) {
+ struct region *region = &fiber()->gc;
+ size_t path_len = strlen(part->path);
+ struct json_path_parser parser;
+ struct json_path_node node;
+ json_path_parser_create(&parser, part->path, path_len);
+ /*
+ * A worst-case scenario is .a -> ["a"]
+ * i.e. 3*path_len + 1 is enough.
+ */
+ uint32_t size = region_used(region);
+ char *path =
+ region_alloc(region, 3 * path_len + 1);
+ if (path == NULL) {
+ diag_set(OutOfMemory, 3 * path_len + 1,
+ "region_alloc", "path");
+ return -1;
+ }
+ part->path = path;
+ int rc = json_path_next(&parser, &node);
+ if (rc != 0)
+ goto error_invalid_json;
+ if (node.type != JSON_PATH_NUM) {
+ diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
+ part->fieldno,
+ "invalid JSON path: first part should "
+ "be defined as array");
+ return -1;
+ }
+ if (node.num - TUPLE_INDEX_BASE != part->fieldno) {
+ diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
+ part->fieldno,
+ "invalid JSON path: first part refers "
+ "to invalid field");
+ return -1;
+ }
+ uint32_t lexemes = 0;
+ do {
+ if (node.type == JSON_PATH_NUM) {
+ path += sprintf(path, "[%u]",
+ (unsigned)node.num);
+ } else if (node.type == JSON_PATH_STR) {
+ path += sprintf(path, "[\"%.*s\"]",
+ node.len, node.str);
+ } else {
+ unreachable();
+ }
+ lexemes++;
+ } while ((rc = json_path_next(&parser, &node)) == 0 &&
+ node.type != JSON_PATH_END);
+ if (rc != 0 || node.type != JSON_PATH_END)
+ goto error_invalid_json;
+ /* JSON index is useless. */
+ if (lexemes == 1) {
+ region_truncate(region, size);
+ part->path = NULL;
+ } else {
+ region_truncate(region,
+ size + (path - part->path + 1));
+ }
+ }
}
return 0;
+
+error_invalid_json:
+ diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
+ part->fieldno + TUPLE_INDEX_BASE,
+ "invalid JSON path: path has invalid structure");
+ return -1;
}
int
@@ -497,6 +649,7 @@ key_def_decode_parts_160(struct key_part_def *parts, uint32_t part_count,
fields[part->fieldno].is_nullable :
key_part_def_default.is_nullable);
part->coll_id = COLL_NONE;
+ part->path = NULL;
}
return 0;
}
@@ -558,8 +711,15 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
part = first->parts;
end = part + first->part_count;
for (; part != end; part++) {
+ char *path = NULL;
+ if (part->path != NULL && (path = strdup(part->path)) == NULL) {
+ diag_set(OutOfMemory, part->path_len + 1, "strdup",
+ "path");
+ goto error;
+ }
key_def_set_part(new_def, pos++, part->fieldno, part->type,
- part->is_nullable, part->coll, part->coll_id);
+ part->is_nullable, part->coll, part->coll_id,
+ path);
}
/* Set-append second key def's part to the new key def. */
@@ -568,10 +728,21 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
for (; part != end; part++) {
if (key_def_find(first, part->fieldno))
continue;
+ char *path = NULL;
+ if (part->path != NULL && (path = strdup(part->path)) == NULL) {
+ diag_set(OutOfMemory, part->path_len + 1, "strdup",
+ "path");
+ goto error;
+ }
key_def_set_part(new_def, pos++, part->fieldno, part->type,
- part->is_nullable, part->coll, part->coll_id);
+ part->is_nullable, part->coll, part->coll_id,
+ path);
}
return new_def;
+
+error:
+ key_def_delete(new_def);
+ return NULL;
}
int
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 42c054c..f14a928 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -54,6 +54,8 @@ struct key_part_def {
uint32_t coll_id;
/** True if a key part can store NULLs. */
bool is_nullable;
+ /** JSON path to data. */
+ char *path;
};
/**
@@ -78,6 +80,10 @@ struct key_part {
uint64_t format_epoch;
/** Cache for corresponding tuple_format slot_offset. */
int32_t slot_cache;
+ /** JSON path to data. */
+ const char *path;
+ /** JSON path length. */
+ uint32_t path_len;
};
struct key_def;
@@ -246,8 +252,9 @@ key_def_new_with_parts(struct key_part_def *parts, uint32_t part_count);
/**
* Dump part definitions of the given key def.
*/
-void
-key_def_dump_parts(const struct key_def *def, struct key_part_def *parts);
+int
+key_def_dump_parts(struct region *pool, const struct key_def *def,
+ struct key_part_def *parts);
/**
* Set a single key part in a key def.
@@ -256,7 +263,7 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts);
void
key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
enum field_type type, bool is_nullable, struct coll *coll,
- uint32_t coll_id);
+ uint32_t coll_id, char *path);
/**
* Update 'has_optional_parts' of @a key_def with correspondence
diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc
index 580e0ea..98bb969 100644
--- a/src/box/lua/space.cc
+++ b/src/box/lua/space.cc
@@ -295,6 +295,11 @@ lbox_fillspace(struct lua_State *L, struct space *space, int i)
lua_pushnumber(L, part->fieldno + TUPLE_INDEX_BASE);
lua_setfield(L, -2, "fieldno");
+ if (part->path != NULL) {
+ lua_pushstring(L, part->path);
+ lua_setfield(L, -2, "path");
+ }
+
lua_pushboolean(L, part->is_nullable);
lua_setfield(L, -2, "is_nullable");
diff --git a/src/box/schema.cc b/src/box/schema.cc
index 433f52c..a01126a 100644
--- a/src/box/schema.cc
+++ b/src/box/schema.cc
@@ -291,13 +291,13 @@ schema_init()
auto key_def_guard = make_scoped_guard([&] { key_def_delete(key_def); });
key_def_set_part(key_def, 0 /* part no */, 0 /* field no */,
- FIELD_TYPE_STRING, false, NULL, COLL_NONE);
+ FIELD_TYPE_STRING, false, NULL, COLL_NONE, NULL);
sc_space_new(BOX_SCHEMA_ID, "_schema", key_def, &on_replace_schema,
NULL);
/* _space - home for all spaces. */
key_def_set_part(key_def, 0 /* part no */, 0 /* field no */,
- FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE);
+ FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE, NULL);
/* _collation - collation description. */
sc_space_new(BOX_COLLATION_ID, "_collation", key_def,
@@ -345,10 +345,10 @@ schema_init()
diag_raise();
/* space no */
key_def_set_part(key_def, 0 /* part no */, 0 /* field no */,
- FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE);
+ FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE, NULL);
/* index no */
key_def_set_part(key_def, 1 /* part no */, 1 /* field no */,
- FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE);
+ FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE, NULL);
sc_space_new(BOX_INDEX_ID, "_index", key_def,
&alter_space_on_replace_index, &on_stmt_begin_index);
}
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index 3843cad..b1c6659 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -711,7 +711,8 @@ vy_log_record_dup(struct region *pool, const struct vy_log_record *src)
"struct key_part_def");
goto err;
}
- key_def_dump_parts(src->key_def, dst->key_parts);
+ if (key_def_dump_parts(pool, src->key_def, dst->key_parts) != 0)
+ goto err;
dst->key_part_count = src->key_def->part_count;
dst->key_def = NULL;
}
--
2.7.4
More information about the Tarantool-patches
mailing list