[PATCH v6 7/8] box: introduce offset_slot cache in key_part

Kirill Shcherbatov kshcherbatov at tarantool.org
Mon Dec 17 09:52:51 MSK 2018


tuple_field_by_part looks up the tuple_field corresponding to the
given key part in tuple_format in order to quickly retrieve the offset
of indexed data from the tuple field map. For regular indexes this
operation is blazing fast, however of JSON indexes it is not as we
have to parse the path to data and then do multiple lookups in a JSON
tree. Since tuple_field_by_part is used by comparators, we should
strive to make this routine as fast as possible for all kinds of
indexes.

This patch introduces an optimization that is supposed to make
tuple_field_by_part for JSON indexes as fast as it is for regular
indexes in most cases. We do that by caching the offset slot right in
key_part. There's a catch here however - we create a new format
whenever an index is dropped or created and we don't reindex old
tuples. As a result, there may be several generations of tuples in the
same space, all using different formats while there's the only key_def
used for comparison.

To overcome this problem, we introduce the notion of tuple_format
epoch. This is a counter incremented each time a new format is
created. We store it in tuple_format and key_def, and we only use
the offset slot cached in a key_def if it's epoch coincides with the
epoch of the tuple format. If they don't, we look up a tuple_field as
before, and then update the cached value provided the epoch of the
tuple format.

Part of #1012
---
 src/box/key_def.c      | 16 +++++++++++-----
 src/box/key_def.h      | 11 +++++++++++
 src/box/tuple_format.c | 10 ++++++++++
 src/box/tuple_format.h | 12 ++++++++++++
 4 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/src/box/key_def.c b/src/box/key_def.c
index a6bb89f5b..ea2824992 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -168,7 +168,8 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
 		 enum field_type type, enum on_conflict_action nullable_action,
 		 struct coll *coll, uint32_t coll_id,
 		 enum sort_order sort_order, const char *path,
-		 uint32_t path_len, char **static_pool)
+		 uint32_t path_len, char **static_pool,
+		 int32_t offset_slot_cache, uint64_t format_epoch)
 {
 	assert(part_no < def->part_count);
 	assert(type < field_type_MAX);
@@ -180,6 +181,8 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
 	def->parts[part_no].coll = coll;
 	def->parts[part_no].coll_id = coll_id;
 	def->parts[part_no].sort_order = sort_order;
+	def->parts[part_no].offset_slot_cache = offset_slot_cache;
+	def->parts[part_no].format_epoch = format_epoch;
 	if (path != NULL) {
 		assert(static_pool != NULL);
 		def->parts[part_no].path = *static_pool;
@@ -226,7 +229,8 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count)
 		uint32_t path_len = part->path != NULL ? strlen(part->path) : 0;
 		key_def_set_part(def, i, part->fieldno, part->type,
 				 part->nullable_action, coll, part->coll_id,
-				 part->sort_order, part->path, path_len, &data);
+				 part->sort_order, part->path, path_len, &data,
+				 TUPLE_OFFSET_SLOT_NIL, 0);
 	}
 	key_def_set_cmp(def);
 	return def;
@@ -280,7 +284,7 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
 				 (enum field_type)types[item],
 				 ON_CONFLICT_ACTION_DEFAULT,
 				 NULL, COLL_NONE, SORT_ORDER_ASC, NULL, 0,
-				 NULL);
+				 NULL, TUPLE_OFFSET_SLOT_NIL, 0);
 	}
 	key_def_set_cmp(key_def);
 	return key_def;
@@ -705,7 +709,8 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
 		key_def_set_part(new_def, pos++, part->fieldno, part->type,
 				 part->nullable_action, part->coll,
 				 part->coll_id, part->sort_order, part->path,
-				 part->path_len, &data);
+				 part->path_len, &data, part->offset_slot_cache,
+				 part->format_epoch);
 	}
 
 	/* Set-append second key def's part to the new key def. */
@@ -717,7 +722,8 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
 		key_def_set_part(new_def, pos++, part->fieldno, part->type,
 				 part->nullable_action, part->coll,
 				 part->coll_id, part->sort_order, part->path,
-				 part->path_len, &data);
+				 part->path_len, &data, part->offset_slot_cache,
+				 part->format_epoch);
 	}
 	key_def_set_cmp(new_def);
 	return new_def;
diff --git a/src/box/key_def.h b/src/box/key_def.h
index df498964c..8c1f7ce9e 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -100,6 +100,17 @@ struct key_part {
 	char *path;
 	/** The length of JSON path. */
 	uint32_t path_len;
+	/** Cached format epoch. */
+	uint64_t format_epoch;
+	/**
+	 * Cached value of the offset slot corresponding to
+	 * the indexed field (tuple_field::offset_slot).
+	 * Valid only if key_def::epoch equals the epoch of
+	 * the tuple format. Updated in
+	 * tuple_field_by_part_slowpath to always store the
+	 * offset corresponding to the last used tuple format.
+	 */
+	int32_t offset_slot_cache;
 };
 
 struct key_def;
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 7eac3cf50..d0b5dfa16 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -38,6 +38,7 @@ struct tuple_format **tuple_formats;
 static intptr_t recycled_format_ids = FORMAT_ID_NIL;
 
 static uint32_t formats_size = 0, formats_capacity = 0;
+static uint64_t formats_epoch = 0;
 
 static struct tuple_field *
 tuple_field_new(void)
@@ -608,6 +609,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys,
 	format->vtab = *vtab;
 	format->engine = NULL;
 	format->is_temporary = false;
+	format->epoch = ++formats_epoch;
 	if (tuple_format_register(format) < 0) {
 		tuple_format_destroy(format);
 		free(format);
@@ -1166,6 +1168,7 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 	part.fieldno = fieldno;
 	part.path = (char *)path + lexer.offset;
 	part.path_len = path_len - lexer.offset;
+	part.format_epoch = 0;
 	rc = tuple_field_by_part_raw_slowpath(format, tuple, field_map, &part,
 					      field);
 	if (rc == 0)
@@ -1192,6 +1195,13 @@ tuple_field_by_part_raw_slowpath(struct tuple_format *format, const char *data,
 		int32_t offset_slot = field->offset_slot;
 		assert(-offset_slot * sizeof(uint32_t) <=
 		       format->field_map_size);
+
+		/* Update format epoch cache. */
+		assert(part->format_epoch != format->epoch);
+		assert(format->epoch != 0);
+		part->offset_slot_cache = offset_slot;
+		part->format_epoch = format->epoch;
+
 		*raw = field_map[offset_slot] == 0 ?
 		       NULL : data + field_map[offset_slot];
 		return 0;
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index a9b4bb675..d6bc08bc0 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -137,6 +137,12 @@ tuple_field_is_nullable(const struct tuple_field *tuple_field)
  * Tuple format describes how tuple is stored and information about its fields
  */
 struct tuple_format {
+	/**
+	 * Counter that grows incrementally on space rebuild
+	 * used for caching offset slot in key_part, for more
+	 * details see key_part::offset_slot_cache.
+	 */
+	uint64_t epoch;
 	/** Virtual function table */
 	struct tuple_format_vtab vtab;
 	/** Pointer to engine-specific data. */
@@ -486,6 +492,12 @@ tuple_field_by_part_raw(struct tuple_format *format, const char *data,
 {
 	if (likely(part->path == NULL)) {
 		return tuple_field_raw(format, data, field_map, part->fieldno);
+	} else if (part->format_epoch == format->epoch) {
+		int32_t offset_slot = part->offset_slot_cache;
+		assert(-offset_slot * sizeof(uint32_t) <=
+		       format->field_map_size);
+		return field_map[offset_slot] != 0 ?
+		       data + field_map[offset_slot] : NULL;
 	} else {
 		const char *raw;
 		MAYBE_UNUSED int rc =
-- 
2.19.2




More information about the Tarantool-patches mailing list