Tarantool development patches archive
 help / color / mirror / Atom feed
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com
Cc: kostja@tarantool.org, Kirill Shcherbatov <kshcherbatov@tarantool.org>
Subject: [PATCH v6 1/8] box: refactor tuple_validate_raw
Date: Mon, 17 Dec 2018 09:52:45 +0300	[thread overview]
Message-ID: <e2d1a57e538724dce09b18624900ab6415e8311f.1544995259.git.kshcherbatov@tarantool.org> (raw)
In-Reply-To: <cover.1544995259.git.kshcherbatov@tarantool.org>

Since the tuple_validate_raw and tuple_init_field_map functions
make the same data checks, we implemented tuple_validate_raw via
tuple_init_field_map called with region memory chunk is passed
as field map. This is required because in subsequent patches
the tuple_init_field_map routine logic will be complicated,
and we want to avoid writing the same checks twice.

Introduced the format->field_map_template allocation - a memory
chunk having field_map_size and initialized with 0 for nullable
fields and UINT32_MAX marker for other.
The tuple_init_field_map copies template data to real field_map
and do map initialization for indexed fields. Then the new
routine tuple_field_map_validate scans initialized field_map and
looks for UINT32_MAX marker.
This is only integrity assert check now. But later it will be
used to raise errors when JSON document has invalid structure.

Needed for #1012
---
 src/box/tuple.c        |  35 +++--------
 src/box/tuple_format.c | 129 +++++++++++++++++++++++++++++++----------
 src/box/tuple_format.h |   8 +++
 3 files changed, 116 insertions(+), 56 deletions(-)

diff --git a/src/box/tuple.c b/src/box/tuple.c
index aae1c3cdd..4ad932f07 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -141,35 +141,18 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple)
 	if (tuple_format_field_count(format) == 0)
 		return 0; /* Nothing to check */
 
-	/* Check to see if the tuple has a sufficient number of fields. */
-	uint32_t field_count = mp_decode_array(&tuple);
-	if (format->exact_field_count > 0 &&
-	    format->exact_field_count != field_count) {
-		diag_set(ClientError, ER_EXACT_FIELD_COUNT,
-			 (unsigned) field_count,
-			 (unsigned) format->exact_field_count);
+	struct region *region = &fiber()->gc;
+	uint32_t used = region_used(region);
+	uint32_t *field_map = region_alloc(region, format->field_map_size);
+	if (field_map == NULL) {
+		diag_set(OutOfMemory, format->field_map_size, "region_alloc",
+			 "field_map");
 		return -1;
 	}
-	if (unlikely(field_count < format->min_field_count)) {
-		diag_set(ClientError, ER_MIN_FIELD_COUNT,
-			 (unsigned) field_count,
-			 (unsigned) format->min_field_count);
+	field_map = (uint32_t *)((char *)field_map + format->field_map_size);
+	if (tuple_init_field_map(format, field_map, tuple, true) != 0)
 		return -1;
-	}
-
-	/* Check field types */
-	struct tuple_field *field = tuple_format_field(format, 0);
-	uint32_t i = 0;
-	uint32_t defined_field_count =
-		MIN(field_count, tuple_format_field_count(format));
-	for (; i < defined_field_count; ++i) {
-		field = tuple_format_field(format, i);
-		if (key_mp_type_validate(field->type, mp_typeof(*tuple),
-					 ER_FIELD_TYPE, i + TUPLE_INDEX_BASE,
-					 tuple_field_is_nullable(field)))
-			return -1;
-		mp_next(&tuple);
-	}
+	region_truncate(region, used);
 	return 0;
 }
 
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 3af39a37e..1ed3656e4 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -208,6 +208,42 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
 		return -1;
 	}
 	format->field_map_size = field_map_size;
+	/**
+	 * Allocate field_map_template used for field map
+	 * initialization and validation.
+	 * Read tuple_format:field_map_template description for
+	 * more details.
+	 */
+	uint32_t *field_map_template = malloc(field_map_size);
+	if (field_map_template == NULL) {
+		diag_set(OutOfMemory, field_map_size, "malloc",
+			 "field_map_template");
+		return -1;
+	}
+	format->field_map_template = field_map_template;
+	/*
+	 * Mark all template_field_map items as uninitialized
+	 * with UINT32_MAX magic value.
+	 */
+	field_map_template = (uint32_t *)((char *)field_map_template +
+					  format->field_map_size);
+	for (int i = -1; i >= current_slot; i--)
+		field_map_template[i] = UINT32_MAX;
+	struct tuple_field *field;
+	struct json_token *root = (struct json_token *)&format->fields.root;
+	json_tree_foreach_entry_preorder(field, root, struct tuple_field,
+					 token) {
+		/*
+		 * Initialize nullable fields in
+		 * field_map_template with 0 as we shouldn't
+		 * raise error when field_map item for nullable
+		 * field was not calculated during tuple parse
+		 * (when tuple lacks such field).
+		 */
+		if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL &&
+		    tuple_field_is_nullable(field))
+			field_map_template[field->offset_slot] = 0;
+	}
 	return 0;
 }
 
@@ -330,6 +366,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
 	format->index_field_count = index_field_count;
 	format->exact_field_count = 0;
 	format->min_field_count = 0;
+	format->field_map_template = NULL;
 	return format;
 error:
 	tuple_format_destroy_fields(format);
@@ -341,6 +378,7 @@ error:
 static inline void
 tuple_format_destroy(struct tuple_format *format)
 {
+	free(format->field_map_template);
 	tuple_format_destroy_fields(format);
 	tuple_dictionary_unref(format->dict);
 }
@@ -424,6 +462,25 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
 	return true;
 }
 
+/**
+ * Verify that all offset_slots has been initialized in field_map.
+ * Routine relies on the field_map memory has been filled from the
+ * field_map_template containing UINT32_MAX marker for required
+ * fields.
+ */
+static int
+tuple_field_map_validate(const struct tuple_format *format, uint32_t *field_map)
+{
+	int32_t field_map_items =
+		(int32_t)(format->field_map_size/sizeof(field_map[0]));
+	for (int32_t i = -1; i >= -field_map_items; i--) {
+		if (field_map[i] != UINT32_MAX)
+			continue;
+		return -1;
+	}
+	return 0;
+}
+
 /** @sa declaration for details. */
 int
 tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
@@ -449,44 +506,56 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
 			 (unsigned) format->min_field_count);
 		return -1;
 	}
-
-	/* first field is simply accessible, so we do not store offset to it */
-	enum mp_type mp_type = mp_typeof(*pos);
-	const struct tuple_field *field =
-		tuple_format_field((struct tuple_format *)format, 0);
-	if (validate &&
-	    key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE,
-				 TUPLE_INDEX_BASE, tuple_field_is_nullable(field)))
-		return -1;
-	mp_next(&pos);
-	/* other fields...*/
-	uint32_t i = 1;
 	uint32_t defined_field_count = MIN(field_count, validate ?
 					   tuple_format_field_count(format) :
 					   format->index_field_count);
-	if (field_count < format->index_field_count) {
-		/*
-		 * Nullify field map to be able to detect by 0,
-		 * which key fields are absent in tuple_field().
-		 */
+	/*
+	 * Initialize memory with zeros when no validation is
+	 * required as it is reserved field_map value for nullable
+	 * fields.
+	 */
+	if (!validate) {
 		memset((char *)field_map - format->field_map_size, 0,
 		       format->field_map_size);
+	} else {
+		memcpy((char *)field_map - format->field_map_size,
+		       format->field_map_template, format->field_map_size);
 	}
-	for (; i < defined_field_count; ++i) {
-		field = tuple_format_field((struct tuple_format *)format, i);
-		mp_type = mp_typeof(*pos);
-		if (validate &&
-		    key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE,
-					 i + TUPLE_INDEX_BASE,
-					 tuple_field_is_nullable(field)))
-			return -1;
-		if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
-			field_map[field->offset_slot] =
-				(uint32_t) (pos - tuple);
+	struct json_tree *tree = (struct json_tree *)&format->fields;
+	struct json_token *parent = &tree->root;
+	struct json_token token;
+	token.type = JSON_TOKEN_NUM;
+	token.num = 0;
+	while ((uint32_t)token.num < defined_field_count) {
+		struct tuple_field *field =
+			json_tree_lookup_entry(tree, parent, &token,
+					       struct tuple_field, token);
+		enum mp_type type = mp_typeof(*pos);
+		if (field != NULL) {
+			bool is_nullable = tuple_field_is_nullable(field);
+			if (validate &&
+			    key_mp_type_validate(field->type, type,
+						 ER_FIELD_TYPE, token.num + 1,
+						 is_nullable) != 0)
+				return -1;
+			if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
+				field_map[field->offset_slot] =
+					(uint32_t)(pos - tuple);
+			}
 		}
+		token.num++;
 		mp_next(&pos);
-	}
-	return 0;
+	};
+	if (!validate)
+		return 0;
+	int rc = tuple_field_map_validate(format, field_map);
+	/*
+	 * As assert field_count >= min_field_count has already
+	 * tested and all first-level fields has parsed, all
+	 * offset_slots must be initialized.
+	 */
+	assert(rc == 0);
+	return rc;
 }
 
 uint32_t
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index 949337807..e61e271d5 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -154,6 +154,14 @@ struct tuple_format {
 	 * \sa struct tuple
 	 */
 	uint16_t field_map_size;
+	/**
+	 * Template field map initialized with zeros for nullable
+	 * and UINT32_MAX for other fields. It is used for the
+	 * tuple_init_field_map initial fill of a new field map
+	 * in order to check that all fields were filled out after
+	 * parsing a data tuple.
+	 */
+	uint32_t *field_map_template;
 	/**
 	 * If not set (== 0), any tuple in the space can have any number of
 	 * fields. If set, each tuple must have exactly this number of fields.
-- 
2.19.2

  reply	other threads:[~2018-12-17  6:52 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-17  6:52 [PATCH v6 0/8] box: Indexes by JSON path Kirill Shcherbatov
2018-12-17  6:52 ` Kirill Shcherbatov [this message]
2018-12-17  6:52 ` [PATCH v6 2/8] box: refactor ER_{FIELD_TYPE, ACTION_MISMATCH} Kirill Shcherbatov
2018-12-17  6:52 ` [PATCH v6 3/8] box: build path to field string uniformly on error Kirill Shcherbatov
2018-12-17  6:52 ` [PATCH v6 4/8] box: introduce JSON Indexes Kirill Shcherbatov
2018-12-17  6:52 ` [PATCH v6 5/8] box: introduce has_json_paths flag in templates Kirill Shcherbatov
2018-12-27 11:51   ` [tarantool-patches] " Konstantin Osipov
2018-12-27 11:52     ` Konstantin Osipov
2018-12-27 11:57       ` [tarantool-patches] " Konstantin Osipov
2018-12-17  6:52 ` [PATCH v6 6/8] box: tune tuple_field_raw_by_path for indexed data Kirill Shcherbatov
2018-12-17  6:52 ` [PATCH v6 7/8] box: introduce offset_slot cache in key_part Kirill Shcherbatov
2018-12-17  6:52 ` [PATCH v6 8/8] box: specify indexes in user-friendly form Kirill Shcherbatov
2018-12-18 20:58   ` Vladimir Davydov
2018-12-18 20:46 ` [PATCH v6 0/8] box: Indexes by JSON path Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e2d1a57e538724dce09b18624900ab6415e8311f.1544995259.git.kshcherbatov@tarantool.org \
    --to=kshcherbatov@tarantool.org \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=vdavydov.dev@gmail.com \
    --subject='Re: [PATCH v6 1/8] box: refactor tuple_validate_raw' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox