Tarantool development patches archive
 help / color / mirror / Atom feed
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org
Cc: v.shpilevoy@tarantool.org,
	Kirill Shcherbatov <kshcherbatov@tarantool.org>
Subject: [tarantool-patches] [PATCH v1 3/5] box: introduce path field in key_part
Date: Mon,  6 Aug 2018 15:27:00 +0300	[thread overview]
Message-ID: <63cb256a0206e222ee10199e7d671700e44ce2aa.1533558332.git.kshcherbatov@tarantool.org> (raw)
In-Reply-To: <cover.1533558332.git.kshcherbatov@tarantool.org>
In-Reply-To: <cover.1533558332.git.kshcherbatov@tarantool.org>

As we need to store user-defined JSON path in key_part
and key_part_def, we have introduced path and path_len
fields. JSON path is verified and transformed to canonical
form on index msgpack unpack.
Because of field names specified as format could be changed
key_part path persisted in Tarantool should be always started
with first-level field access via array index(not by name).

Part of #1012.
---
 src/box/key_def.c    | 197 +++++++++++++++++++++++++++++++++++++++++++++++----
 src/box/key_def.h    |  13 +++-
 src/box/lua/space.cc |   5 ++
 src/box/schema.cc    |   8 +--
 src/box/vy_log.c     |   3 +-
 5 files changed, 205 insertions(+), 21 deletions(-)

diff --git a/src/box/key_def.c b/src/box/key_def.c
index 8a4262b..79e07f8 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -35,12 +35,15 @@
 #include "column_mask.h"
 #include "schema_def.h"
 #include "coll_id_cache.h"
+#include "fiber.h"
+#include "json/path.h"
 
 static const struct key_part_def key_part_def_default = {
 	0,
 	field_type_MAX,
 	COLL_NONE,
 	false,
+	NULL
 };
 
 static int64_t
@@ -53,6 +56,7 @@ part_type_by_name_wrapper(const char *str, uint32_t len)
 #define PART_OPT_FIELD		"field"
 #define PART_OPT_COLLATION	"collation"
 #define PART_OPT_NULLABILITY	"is_nullable"
+#define PART_OPT_PATH		"path"
 
 const struct opt_def part_def_reg[] = {
 	OPT_DEF_ENUM(PART_OPT_TYPE, field_type, struct key_part_def, type,
@@ -61,6 +65,7 @@ const struct opt_def part_def_reg[] = {
 	OPT_DEF(PART_OPT_COLLATION, OPT_UINT32, struct key_part_def, coll_id),
 	OPT_DEF(PART_OPT_NULLABILITY, OPT_BOOL, struct key_part_def,
 		is_nullable),
+	OPT_DEF(PART_OPT_PATH, OPT_STRPTR, struct key_part_def, path),
 	OPT_END,
 };
 
@@ -103,7 +108,27 @@ key_def_dup(const struct key_def *src)
 		return NULL;
 	}
 	memcpy(res, src, sz);
+	uint32_t i = 0;
+	for (; i < src->part_count; i++) {
+		if (src->parts[i].path == NULL) {
+			res->parts[i].path = NULL;
+			continue;
+		}
+		char *path = strdup(src->parts[i].path);
+		if (path == NULL) {
+			diag_set(OutOfMemory, src->parts[i].path_len + 1,
+				"strdup", "path");
+			goto error;
+		}
+		res->parts[i].path = path;
+	}
 	return res;
+
+error:
+	for (uint32_t j = 0; j < i; j++)
+		free((void *)res->parts[j].path);
+	free(res);
+	return NULL;
 }
 
 void
@@ -118,6 +143,8 @@ key_def_swap(struct key_def *old_def, struct key_def *new_def)
 void
 key_def_delete(struct key_def *def)
 {
+	for (uint32_t i = 0; i < def->part_count; i++)
+		free((void *)def->parts[i].path);
 	free(def);
 }
 
@@ -160,19 +187,34 @@ key_def_new_with_parts(struct key_part_def *parts, uint32_t part_count)
 			if (coll_id == NULL) {
 				diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
 					 i + 1, "collation was not found by ID");
-				key_def_delete(def);
-				return NULL;
+				goto error;
 			}
 			coll = coll_id->coll;
 		}
+		char *path = NULL;
+		if (part->path != NULL &&
+		    (path = strdup(part->path)) == NULL) {
+			diag_set(OutOfMemory, strlen(part->path) + 1, "strdup",
+				"path");
+			goto error;
+		}
 		key_def_set_part(def, i, part->fieldno, part->type,
-				 part->is_nullable, coll, part->coll_id);
+				 part->is_nullable, coll, part->coll_id,
+				 path);
 	}
 	return def;
+error:
+	/*
+	 * Don't care about non-initialized fields as them filled
+	 * with 0 via calloc.
+	 */
+	key_def_delete(def);
+	return NULL;
 }
 
-void
-key_def_dump_parts(const struct key_def *def, struct key_part_def *parts)
+int
+key_def_dump_parts(struct region *pool, const struct key_def *def,
+		   struct key_part_def *parts)
 {
 	for (uint32_t i = 0; i < def->part_count; i++) {
 		const struct key_part *part = &def->parts[i];
@@ -181,7 +223,20 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts)
 		part_def->type = part->type;
 		part_def->is_nullable = part->is_nullable;
 		part_def->coll_id = part->coll_id;
+		if (part->path != NULL) {
+			part_def->path = region_alloc(pool, part->path_len + 1);
+			if (part_def->path == NULL) {
+				diag_set(OutOfMemory, part->path_len + 1,
+					 "region_alloc", "part_def->path");
+				return -1;
+			}
+			memcpy(part_def->path, part->path, part->path_len);
+			part_def->path[part->path_len] = '\0';
+		} else {
+			part_def->path = NULL;
+		}
 	}
+	return 0;
 }
 
 box_key_def_t *
@@ -195,7 +250,7 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
 		key_def_set_part(key_def, item, fields[item],
 				 (enum field_type)types[item],
 				 key_part_def_default.is_nullable, NULL,
-				 COLL_NONE);
+				 COLL_NONE, NULL);
 	}
 	return key_def;
 }
@@ -241,6 +296,11 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1,
 		if (part1->is_nullable != part2->is_nullable)
 			return part1->is_nullable <
 			       part2->is_nullable ? -1 : 1;
+		/* Lexicographic strings order. */
+		uint32_t len = MIN(part1->path_len, part2->path_len);
+		int rc = 0;
+		if ((rc = strncmp(part1->path, part2->path, len)) != 0)
+			return rc;
 	}
 	return part_count1 < part_count2 ? -1 : part_count1 > part_count2;
 }
@@ -248,7 +308,7 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1,
 void
 key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
 		 enum field_type type, bool is_nullable, struct coll *coll,
-		 uint32_t coll_id)
+		 uint32_t coll_id, char *path)
 {
 	assert(part_no < def->part_count);
 	assert(type < field_type_MAX);
@@ -260,6 +320,8 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
 	def->parts[part_no].coll_id = coll_id;
 	def->parts[part_no].slot_cache = TUPLE_OFFSET_SLOT_NIL;
 	def->parts[part_no].format_epoch = 0;
+	def->parts[part_no].path = path;
+	def->parts[part_no].path_len = path != NULL ? strlen(path) : 0;
 	column_mask_set_fieldno(&def->column_mask, fieldno);
 	/**
 	 * When all parts are set, initialize the tuple
@@ -304,8 +366,15 @@ key_def_snprint_parts(char *buf, int size, const struct key_part_def *parts,
 	for (uint32_t i = 0; i < part_count; i++) {
 		const struct key_part_def *part = &parts[i];
 		assert(part->type < field_type_MAX);
-		SNPRINT(total, snprintf, buf, size, "%d, '%s'",
-			(int)part->fieldno, field_type_strs[part->type]);
+		if (part->path != NULL) {
+			SNPRINT(total, snprintf, buf, size, "%d, '%s', '%s'",
+				(int) part->fieldno, part->path,
+				field_type_strs[part->type]);
+		} else {
+			SNPRINT(total, snprintf, buf, size, "%d, '%s'",
+				(int) part->fieldno,
+				field_type_strs[part->type]);
+		}
 		if (i < part_count - 1)
 			SNPRINT(total, snprintf, buf, size, ", ");
 	}
@@ -324,6 +393,8 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count)
 			count++;
 		if (part->is_nullable)
 			count++;
+		if (part->path != NULL)
+			count++;
 		size += mp_sizeof_map(count);
 		size += mp_sizeof_str(strlen(PART_OPT_FIELD));
 		size += mp_sizeof_uint(part->fieldno);
@@ -338,6 +409,10 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count)
 			size += mp_sizeof_str(strlen(PART_OPT_NULLABILITY));
 			size += mp_sizeof_bool(part->is_nullable);
 		}
+		if (part->path != NULL) {
+			size += mp_sizeof_str(strlen(PART_OPT_PATH));
+			size += mp_sizeof_str(strlen(part->path));
+		}
 	}
 	return size;
 }
@@ -351,6 +426,8 @@ key_def_encode_parts(char *data, const struct key_part_def *parts,
 		int count = 2;
 		if (part->coll_id != COLL_NONE)
 			count++;
+		if (part->path != NULL)
+			count++;
 		if (part->is_nullable)
 			count++;
 		data = mp_encode_map(data, count);
@@ -372,6 +449,12 @@ key_def_encode_parts(char *data, const struct key_part_def *parts,
 					     strlen(PART_OPT_NULLABILITY));
 			data = mp_encode_bool(data, part->is_nullable);
 		}
+		if (part->path != NULL) {
+			data = mp_encode_str(data, PART_OPT_PATH,
+					     strlen(PART_OPT_PATH));
+			data = mp_encode_str(data, part->path,
+					     strlen(part->path));
+		}
 	}
 	return data;
 }
@@ -432,6 +515,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count,
 				     fields[part->fieldno].is_nullable :
 				     key_part_def_default.is_nullable);
 		part->coll_id = COLL_NONE;
+		part->path = NULL;
 	}
 	return 0;
 }
@@ -445,8 +529,9 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
 		return key_def_decode_parts_166(parts, part_count, data,
 						fields, field_count);
 	}
+	struct key_part_def *part;
 	for (uint32_t i = 0; i < part_count; i++) {
-		struct key_part_def *part = &parts[i];
+		part = &parts[i];
 		if (mp_typeof(**data) != MP_MAP) {
 			diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
 				 i + TUPLE_INDEX_BASE,
@@ -456,7 +541,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
 		*part = key_part_def_default;
 		if (opts_decode(part, part_def_reg, data,
 				ER_WRONG_INDEX_OPTIONS, i + TUPLE_INDEX_BASE,
-				NULL) != 0)
+				&fiber()->gc) != 0)
 			return -1;
 		if (part->type == field_type_MAX) {
 			diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
@@ -473,8 +558,75 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
 				 "string and scalar parts");
 			return -1;
 		}
+		if (part->path != NULL) {
+			struct region *region = &fiber()->gc;
+			size_t path_len = strlen(part->path);
+			struct json_path_parser parser;
+			struct json_path_node node;
+			json_path_parser_create(&parser, part->path, path_len);
+			/*
+			 * A worst-case scenario is .a -> ["a"]
+			 * i.e. 3*path_len + 1 is enough.
+			 */
+			uint32_t size = region_used(region);
+			char *path =
+				region_alloc(region, 3 * path_len + 1);
+			if (path == NULL) {
+				diag_set(OutOfMemory, 3 * path_len + 1,
+					"region_alloc", "path");
+				return -1;
+			}
+			part->path = path;
+			int rc = json_path_next(&parser, &node);
+			if (rc != 0)
+				goto error_invalid_json;
+			if (node.type != JSON_PATH_NUM) {
+				diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
+					part->fieldno,
+					"invalid JSON path: first part should "
+					"be defined as array");
+				return -1;
+			}
+			if (node.num - TUPLE_INDEX_BASE != part->fieldno) {
+				diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
+					part->fieldno,
+					"invalid JSON path: first part refers "
+					"to invalid field");
+				return -1;
+			}
+			uint32_t lexemes = 0;
+			do {
+				if (node.type == JSON_PATH_NUM) {
+					path += sprintf(path, "[%u]",
+						(unsigned)node.num);
+				} else if (node.type == JSON_PATH_STR) {
+					path += sprintf(path, "[\"%.*s\"]",
+							node.len, node.str);
+				} else {
+					unreachable();
+				}
+				lexemes++;
+			} while ((rc = json_path_next(&parser, &node)) == 0 &&
+				 node.type != JSON_PATH_END);
+			if (rc != 0 || node.type != JSON_PATH_END)
+				goto error_invalid_json;
+			/* JSON index is useless. */
+			if (lexemes == 1) {
+				region_truncate(region, size);
+				part->path = NULL;
+			} else {
+				region_truncate(region,
+						size + (path - part->path + 1));
+			}
+		}
 	}
 	return 0;
+
+error_invalid_json:
+	diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
+		 part->fieldno + TUPLE_INDEX_BASE,
+		 "invalid JSON path: path has invalid structure");
+	return -1;
 }
 
 int
@@ -497,6 +649,7 @@ key_def_decode_parts_160(struct key_part_def *parts, uint32_t part_count,
 				     fields[part->fieldno].is_nullable :
 				     key_part_def_default.is_nullable);
 		part->coll_id = COLL_NONE;
+		part->path = NULL;
 	}
 	return 0;
 }
@@ -558,8 +711,15 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
 	part = first->parts;
 	end = part + first->part_count;
 	for (; part != end; part++) {
+		char *path = NULL;
+		if (part->path != NULL && (path = strdup(part->path)) == NULL) {
+			diag_set(OutOfMemory, part->path_len + 1, "strdup",
+				"path");
+			goto error;
+		}
 		key_def_set_part(new_def, pos++, part->fieldno, part->type,
-				 part->is_nullable, part->coll, part->coll_id);
+				 part->is_nullable, part->coll, part->coll_id,
+				 path);
 	}
 
 	/* Set-append second key def's part to the new key def. */
@@ -568,10 +728,21 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
 	for (; part != end; part++) {
 		if (key_def_find(first, part->fieldno))
 			continue;
+		char *path = NULL;
+		if (part->path != NULL && (path = strdup(part->path)) == NULL) {
+			diag_set(OutOfMemory, part->path_len + 1, "strdup",
+				"path");
+			goto error;
+		}
 		key_def_set_part(new_def, pos++, part->fieldno, part->type,
-				 part->is_nullable, part->coll, part->coll_id);
+				 part->is_nullable, part->coll, part->coll_id,
+				 path);
 	}
 	return new_def;
+
+error:
+	key_def_delete(new_def);
+	return NULL;
 }
 
 int
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 42c054c..f14a928 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -54,6 +54,8 @@ struct key_part_def {
 	uint32_t coll_id;
 	/** True if a key part can store NULLs. */
 	bool is_nullable;
+	/** JSON path to data. */
+	char *path;
 };
 
 /**
@@ -78,6 +80,10 @@ struct key_part {
 	uint64_t format_epoch;
 	/** Cache for corresponding tuple_format slot_offset. */
 	int32_t slot_cache;
+	/** JSON path to data. */
+	const char *path;
+	/** JSON path length. */
+	uint32_t path_len;
 };
 
 struct key_def;
@@ -246,8 +252,9 @@ key_def_new_with_parts(struct key_part_def *parts, uint32_t part_count);
 /**
  * Dump part definitions of the given key def.
  */
-void
-key_def_dump_parts(const struct key_def *def, struct key_part_def *parts);
+int
+key_def_dump_parts(struct region *pool, const struct key_def *def,
+		   struct key_part_def *parts);
 
 /**
  * Set a single key part in a key def.
@@ -256,7 +263,7 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts);
 void
 key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
 		 enum field_type type, bool is_nullable, struct coll *coll,
-		 uint32_t coll_id);
+		 uint32_t coll_id, char *path);
 
 /**
  * Update 'has_optional_parts' of @a key_def with correspondence
diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc
index 580e0ea..98bb969 100644
--- a/src/box/lua/space.cc
+++ b/src/box/lua/space.cc
@@ -295,6 +295,11 @@ lbox_fillspace(struct lua_State *L, struct space *space, int i)
 			lua_pushnumber(L, part->fieldno + TUPLE_INDEX_BASE);
 			lua_setfield(L, -2, "fieldno");
 
+			if (part->path != NULL) {
+				lua_pushstring(L, part->path);
+				lua_setfield(L, -2, "path");
+			}
+
 			lua_pushboolean(L, part->is_nullable);
 			lua_setfield(L, -2, "is_nullable");
 
diff --git a/src/box/schema.cc b/src/box/schema.cc
index 433f52c..a01126a 100644
--- a/src/box/schema.cc
+++ b/src/box/schema.cc
@@ -291,13 +291,13 @@ schema_init()
 	auto key_def_guard = make_scoped_guard([&] { key_def_delete(key_def); });
 
 	key_def_set_part(key_def, 0 /* part no */, 0 /* field no */,
-			 FIELD_TYPE_STRING, false, NULL, COLL_NONE);
+			 FIELD_TYPE_STRING, false, NULL, COLL_NONE, NULL);
 	sc_space_new(BOX_SCHEMA_ID, "_schema", key_def, &on_replace_schema,
 		     NULL);
 
 	/* _space - home for all spaces. */
 	key_def_set_part(key_def, 0 /* part no */, 0 /* field no */,
-			 FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE);
+			 FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE, NULL);
 
 	/* _collation - collation description. */
 	sc_space_new(BOX_COLLATION_ID, "_collation", key_def,
@@ -345,10 +345,10 @@ schema_init()
 		diag_raise();
 	/* space no */
 	key_def_set_part(key_def, 0 /* part no */, 0 /* field no */,
-			 FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE);
+			 FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE, NULL);
 	/* index no */
 	key_def_set_part(key_def, 1 /* part no */, 1 /* field no */,
-			 FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE);
+			 FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE, NULL);
 	sc_space_new(BOX_INDEX_ID, "_index", key_def,
 		     &alter_space_on_replace_index, &on_stmt_begin_index);
 }
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index 3843cad..b1c6659 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -711,7 +711,8 @@ vy_log_record_dup(struct region *pool, const struct vy_log_record *src)
 				 "struct key_part_def");
 			goto err;
 		}
-		key_def_dump_parts(src->key_def, dst->key_parts);
+		if (key_def_dump_parts(pool, src->key_def, dst->key_parts) != 0)
+			goto err;
 		dst->key_part_count = src->key_def->part_count;
 		dst->key_def = NULL;
 	}
-- 
2.7.4

  parent reply	other threads:[~2018-08-06 12:27 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-06 12:26 [tarantool-patches] [PATCH v1 0/5] box: indexes by JSON path Kirill Shcherbatov
2018-08-06 12:26 ` [tarantool-patches] [PATCH v1 1/5] rfc: describe a Tarantool JSON indexes Kirill Shcherbatov
2018-08-06 12:26 ` [tarantool-patches] [PATCH v1 2/5] box: introduce slot_cache in key_part Kirill Shcherbatov
2018-08-08 22:03   ` [tarantool-patches] " Vladislav Shpilevoy
2018-08-15 12:14     ` Kirill Shcherbatov
2018-08-06 12:27 ` Kirill Shcherbatov [this message]
2018-08-08 22:03   ` [tarantool-patches] Re: [PATCH v1 3/5] box: introduce path field " Vladislav Shpilevoy
2018-08-15 12:14     ` Kirill Shcherbatov
2018-08-06 12:27 ` [tarantool-patches] [PATCH v1 4/5] box: introduce path_hash and tuple_field tree Kirill Shcherbatov
2018-08-08 22:03   ` [tarantool-patches] " Vladislav Shpilevoy
2018-08-15 12:14     ` Kirill Shcherbatov
2018-08-06 12:27 ` [tarantool-patches] [PATCH v1 5/5] box: specify indexes in user-friendly form Kirill Shcherbatov
2018-08-08 22:03 ` [tarantool-patches] Re: [PATCH v1 0/5] box: indexes by JSON path Vladislav Shpilevoy
2018-08-15 12:14   ` Kirill Shcherbatov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=63cb256a0206e222ee10199e7d671700e44ce2aa.1533558332.git.kshcherbatov@tarantool.org \
    --to=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [tarantool-patches] [PATCH v1 3/5] box: introduce path field in key_part' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox