[PATCH v6 1/8] box: refactor tuple_validate_raw

Kirill Shcherbatov kshcherbatov at tarantool.org
Mon Dec 17 09:52:45 MSK 2018


Since the tuple_validate_raw and tuple_init_field_map functions
make the same data checks, we implemented tuple_validate_raw via
tuple_init_field_map called with region memory chunk is passed
as field map. This is required because in subsequent patches
the tuple_init_field_map routine logic will be complicated,
and we want to avoid writing the same checks twice.

Introduced the format->field_map_template allocation - a memory
chunk having field_map_size and initialized with 0 for nullable
fields and UINT32_MAX marker for other.
The tuple_init_field_map copies template data to real field_map
and do map initialization for indexed fields. Then the new
routine tuple_field_map_validate scans initialized field_map and
looks for UINT32_MAX marker.
This is only integrity assert check now. But later it will be
used to raise errors when JSON document has invalid structure.

Needed for #1012
---
 src/box/tuple.c        |  35 +++--------
 src/box/tuple_format.c | 129 +++++++++++++++++++++++++++++++----------
 src/box/tuple_format.h |   8 +++
 3 files changed, 116 insertions(+), 56 deletions(-)

diff --git a/src/box/tuple.c b/src/box/tuple.c
index aae1c3cdd..4ad932f07 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -141,35 +141,18 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple)
 	if (tuple_format_field_count(format) == 0)
 		return 0; /* Nothing to check */
 
-	/* Check to see if the tuple has a sufficient number of fields. */
-	uint32_t field_count = mp_decode_array(&tuple);
-	if (format->exact_field_count > 0 &&
-	    format->exact_field_count != field_count) {
-		diag_set(ClientError, ER_EXACT_FIELD_COUNT,
-			 (unsigned) field_count,
-			 (unsigned) format->exact_field_count);
+	struct region *region = &fiber()->gc;
+	uint32_t used = region_used(region);
+	uint32_t *field_map = region_alloc(region, format->field_map_size);
+	if (field_map == NULL) {
+		diag_set(OutOfMemory, format->field_map_size, "region_alloc",
+			 "field_map");
 		return -1;
 	}
-	if (unlikely(field_count < format->min_field_count)) {
-		diag_set(ClientError, ER_MIN_FIELD_COUNT,
-			 (unsigned) field_count,
-			 (unsigned) format->min_field_count);
+	field_map = (uint32_t *)((char *)field_map + format->field_map_size);
+	if (tuple_init_field_map(format, field_map, tuple, true) != 0)
 		return -1;
-	}
-
-	/* Check field types */
-	struct tuple_field *field = tuple_format_field(format, 0);
-	uint32_t i = 0;
-	uint32_t defined_field_count =
-		MIN(field_count, tuple_format_field_count(format));
-	for (; i < defined_field_count; ++i) {
-		field = tuple_format_field(format, i);
-		if (key_mp_type_validate(field->type, mp_typeof(*tuple),
-					 ER_FIELD_TYPE, i + TUPLE_INDEX_BASE,
-					 tuple_field_is_nullable(field)))
-			return -1;
-		mp_next(&tuple);
-	}
+	region_truncate(region, used);
 	return 0;
 }
 
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 3af39a37e..1ed3656e4 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -208,6 +208,42 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
 		return -1;
 	}
 	format->field_map_size = field_map_size;
+	/**
+	 * Allocate field_map_template used for field map
+	 * initialization and validation.
+	 * Read tuple_format:field_map_template description for
+	 * more details.
+	 */
+	uint32_t *field_map_template = malloc(field_map_size);
+	if (field_map_template == NULL) {
+		diag_set(OutOfMemory, field_map_size, "malloc",
+			 "field_map_template");
+		return -1;
+	}
+	format->field_map_template = field_map_template;
+	/*
+	 * Mark all template_field_map items as uninitialized
+	 * with UINT32_MAX magic value.
+	 */
+	field_map_template = (uint32_t *)((char *)field_map_template +
+					  format->field_map_size);
+	for (int i = -1; i >= current_slot; i--)
+		field_map_template[i] = UINT32_MAX;
+	struct tuple_field *field;
+	struct json_token *root = (struct json_token *)&format->fields.root;
+	json_tree_foreach_entry_preorder(field, root, struct tuple_field,
+					 token) {
+		/*
+		 * Initialize nullable fields in
+		 * field_map_template with 0 as we shouldn't
+		 * raise error when field_map item for nullable
+		 * field was not calculated during tuple parse
+		 * (when tuple lacks such field).
+		 */
+		if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL &&
+		    tuple_field_is_nullable(field))
+			field_map_template[field->offset_slot] = 0;
+	}
 	return 0;
 }
 
@@ -330,6 +366,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
 	format->index_field_count = index_field_count;
 	format->exact_field_count = 0;
 	format->min_field_count = 0;
+	format->field_map_template = NULL;
 	return format;
 error:
 	tuple_format_destroy_fields(format);
@@ -341,6 +378,7 @@ error:
 static inline void
 tuple_format_destroy(struct tuple_format *format)
 {
+	free(format->field_map_template);
 	tuple_format_destroy_fields(format);
 	tuple_dictionary_unref(format->dict);
 }
@@ -424,6 +462,25 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
 	return true;
 }
 
+/**
+ * Verify that all offset_slots has been initialized in field_map.
+ * Routine relies on the field_map memory has been filled from the
+ * field_map_template containing UINT32_MAX marker for required
+ * fields.
+ */
+static int
+tuple_field_map_validate(const struct tuple_format *format, uint32_t *field_map)
+{
+	int32_t field_map_items =
+		(int32_t)(format->field_map_size/sizeof(field_map[0]));
+	for (int32_t i = -1; i >= -field_map_items; i--) {
+		if (field_map[i] != UINT32_MAX)
+			continue;
+		return -1;
+	}
+	return 0;
+}
+
 /** @sa declaration for details. */
 int
 tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
@@ -449,44 +506,56 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
 			 (unsigned) format->min_field_count);
 		return -1;
 	}
-
-	/* first field is simply accessible, so we do not store offset to it */
-	enum mp_type mp_type = mp_typeof(*pos);
-	const struct tuple_field *field =
-		tuple_format_field((struct tuple_format *)format, 0);
-	if (validate &&
-	    key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE,
-				 TUPLE_INDEX_BASE, tuple_field_is_nullable(field)))
-		return -1;
-	mp_next(&pos);
-	/* other fields...*/
-	uint32_t i = 1;
 	uint32_t defined_field_count = MIN(field_count, validate ?
 					   tuple_format_field_count(format) :
 					   format->index_field_count);
-	if (field_count < format->index_field_count) {
-		/*
-		 * Nullify field map to be able to detect by 0,
-		 * which key fields are absent in tuple_field().
-		 */
+	/*
+	 * Initialize memory with zeros when no validation is
+	 * required as it is reserved field_map value for nullable
+	 * fields.
+	 */
+	if (!validate) {
 		memset((char *)field_map - format->field_map_size, 0,
 		       format->field_map_size);
+	} else {
+		memcpy((char *)field_map - format->field_map_size,
+		       format->field_map_template, format->field_map_size);
 	}
-	for (; i < defined_field_count; ++i) {
-		field = tuple_format_field((struct tuple_format *)format, i);
-		mp_type = mp_typeof(*pos);
-		if (validate &&
-		    key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE,
-					 i + TUPLE_INDEX_BASE,
-					 tuple_field_is_nullable(field)))
-			return -1;
-		if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
-			field_map[field->offset_slot] =
-				(uint32_t) (pos - tuple);
+	struct json_tree *tree = (struct json_tree *)&format->fields;
+	struct json_token *parent = &tree->root;
+	struct json_token token;
+	token.type = JSON_TOKEN_NUM;
+	token.num = 0;
+	while ((uint32_t)token.num < defined_field_count) {
+		struct tuple_field *field =
+			json_tree_lookup_entry(tree, parent, &token,
+					       struct tuple_field, token);
+		enum mp_type type = mp_typeof(*pos);
+		if (field != NULL) {
+			bool is_nullable = tuple_field_is_nullable(field);
+			if (validate &&
+			    key_mp_type_validate(field->type, type,
+						 ER_FIELD_TYPE, token.num + 1,
+						 is_nullable) != 0)
+				return -1;
+			if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
+				field_map[field->offset_slot] =
+					(uint32_t)(pos - tuple);
+			}
 		}
+		token.num++;
 		mp_next(&pos);
-	}
-	return 0;
+	};
+	if (!validate)
+		return 0;
+	int rc = tuple_field_map_validate(format, field_map);
+	/*
+	 * As assert field_count >= min_field_count has already
+	 * tested and all first-level fields has parsed, all
+	 * offset_slots must be initialized.
+	 */
+	assert(rc == 0);
+	return rc;
 }
 
 uint32_t
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index 949337807..e61e271d5 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -154,6 +154,14 @@ struct tuple_format {
 	 * \sa struct tuple
 	 */
 	uint16_t field_map_size;
+	/**
+	 * Template field map initialized with zeros for nullable
+	 * and UINT32_MAX for other fields. It is used for the
+	 * tuple_init_field_map initial fill of a new field map
+	 * in order to check that all fields were filled out after
+	 * parsing a data tuple.
+	 */
+	uint32_t *field_map_template;
 	/**
 	 * If not set (== 0), any tuple in the space can have any number of
 	 * fields. If set, each tuple must have exactly this number of fields.
-- 
2.19.2




More information about the Tarantool-patches mailing list