[patches] [PATCH 4/4] vinyl: on space alter update key definitions in indexes

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Mon Mar 12 00:53:36 MSK 2018


Vinyl alter can change comparators, for example, by removal of a
space format and making some fields optional. This alter is
allowed on a non-empty space even, because optionality of a
field is not persisted. But index key definitions can not be
updated in place, since they are used in multiple threads (vinyl
workers). So it is necessary to get new key definitions from a
new index definition, and leave old ones to be deleted by
existing worker tasks, when they will be finished.

Index is not the only place storing key definition pointers:
there are also mems, ranges, cache - all these structures must be
updated too.

Closes #3229

Signed-off-by: Vladislav Shpilevoy <v.shpilevoy at tarantool.org>
---
 src/box/vinyl.c          | 71 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/box/vy_index.c       | 14 +++-------
 src/box/vy_scheduler.c   | 19 +++++++++++--
 test/engine/ddl.result   |  2 +-
 test/engine/ddl.test.lua |  2 +-
 5 files changed, 93 insertions(+), 15 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index ba0688259..6dabfeb62 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -1089,6 +1089,52 @@ vinyl_space_check_format(struct space *new_space, struct space *old_space)
 	return -1;
 }
 
+/**
+ * Set new key_def and cmp_def in all index ranges, mems and in a
+ * cache.
+ * @param index Index to update.
+ * @param key_def New key definition.
+ * @param cmp_def New comparator definition.
+ * @param ranges_array Temporary array of double pointers to
+ *        vy_range used to avoid accesing them by the tree RB-tree
+ *        API when cmp_def is being changed. RB-tree API can not
+ *        be used because it requires that cmp_def must be the
+ *        same in all ranges. But it can not be updated in all
+ *        ranges at once and it is done in a cycle.
+ */
+static void
+vy_index_alter_key_def(struct vy_index *index, struct key_def *key_def,
+			 struct key_def *cmp_def,
+			 struct vy_range **ranges_array)
+{
+	key_def_unref(index->key_def);
+	index->key_def = key_def;
+	key_def_ref(key_def);
+	key_def_unref(index->cmp_def);
+	index->cmp_def = cmp_def;
+	key_def_ref(cmp_def);
+	index->cache.cmp_def = cmp_def;
+	index->cache.cache_tree.arg = cmp_def;
+	index->mem->cmp_def = cmp_def;
+	index->mem->tree.arg = cmp_def;
+	struct vy_mem *mem;
+	rlist_foreach_entry(mem, &index->sealed, in_sealed) {
+		mem->cmp_def = cmp_def;
+		mem->tree.arg = cmp_def;
+	}
+	int i = 0;
+	/*
+	 * Save ranges into the array before changing cmp_def.
+	 * RB-tree API does not work until cmp_def is updated
+	 * everywhere.
+	 */
+	for (struct vy_range *range = vy_range_tree_first(index->tree);
+	     range != NULL; range = vy_range_tree_next(index->tree, range))
+		ranges_array[i++] = range;
+	for (--i; i >= 0; --i)
+		ranges_array[i]->cmp_def = cmp_def;
+}
+
 static void
 vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
 {
@@ -1115,6 +1161,26 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
 		tuple_format_delete(format);
 		goto fail;
 	}
+	/*
+	 * Preallocate a buffer for ranges array, that can fit
+	 * ranges of any index.
+	 */
+	int max_range_count = 0;
+	for (uint32_t i = 0; i < new_space->index_count; ++i) {
+		struct vy_index *index = vy_index(new_space->index[i]);
+		if (max_range_count < index->range_count)
+			max_range_count = index->range_count;
+	}
+	assert(max_range_count > 0);
+	size_t size = sizeof(struct vy_range *) * max_range_count;
+	struct vy_range **ranges_array =
+		(struct vy_range **) region_alloc(&fiber()->gc, size);
+	if (ranges_array == NULL) {
+		diag_set(OutOfMemory, size, "region_alloc", "ranges_array");
+		tuple_format_delete(format);
+		tuple_format_delete(upsert_format);
+		goto fail;
+	}
 
 	/* Set possibly changed opts. */
 	pk->opts = new_index_def->opts;
@@ -1133,6 +1199,8 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
 	tuple_format_ref(format);
 	pk->mem_format = new_format;
 	tuple_format_ref(new_format);
+	vy_index_alter_key_def(pk, new_space->index[0]->def->key_def,
+			       new_space->index[0]->def->cmp_def, ranges_array);
 	vy_index_validate_formats(pk);
 
 	for (uint32_t i = 1; i < new_space->index_count; ++i) {
@@ -1153,6 +1221,9 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
 		tuple_format_ref(index->mem_format_with_colmask);
 		tuple_format_ref(index->mem_format);
 		tuple_format_ref(index->upsert_format);
+		vy_index_alter_key_def(index, new_space->index[i]->def->key_def,
+				       new_space->index[i]->def->cmp_def,
+				       ranges_array);
 		vy_index_validate_formats(index);
 	}
 
diff --git a/src/box/vy_index.c b/src/box/vy_index.c
index 9cf80a74a..ecc083a51 100644
--- a/src/box/vy_index.c
+++ b/src/box/vy_index.c
@@ -157,16 +157,12 @@ vy_index_new(struct vy_index_env *index_env, struct vy_cache_env *cache_env,
 		goto fail_tree;
 	}
 
-	struct key_def *key_def = key_def_dup(index_def->key_def);
-	if (key_def == NULL)
-		goto fail_key_def;
-
-	struct key_def *cmp_def = key_def_dup(index_def->cmp_def);
-	if (cmp_def == NULL)
-		goto fail_cmp_def;
-
+	struct key_def *key_def = index_def->key_def;
+	struct key_def *cmp_def = index_def->cmp_def;
 	index->cmp_def = cmp_def;
+	key_def_ref(cmp_def);
 	index->key_def = key_def;
+	key_def_ref(key_def);
 	if (index_def->iid == 0) {
 		/*
 		 * Disk tuples can be returned to an user from a
@@ -258,9 +254,7 @@ fail_upsert_format:
 	tuple_format_unref(index->disk_format);
 fail_format:
 	key_def_unref(cmp_def);
-fail_cmp_def:
 	key_def_unref(key_def);
-fail_key_def:
 	free(index->tree);
 fail_tree:
 	free(index);
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 382cf0718..6a4944898 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -108,6 +108,13 @@ struct vy_task {
 	int status;
 	/** If ->execute fails, the error is stored here. */
 	struct diag diag;
+	/**
+	 * Saved key definitions. Orignal index->key/cmp_def can
+	 * be changed when the task is started but not finished,
+	 * for example, on alter.
+	 */
+	struct key_def *key_def;
+	struct key_def *cmp_def;
 	/** Index this task is for. */
 	struct vy_index *index;
 	/** Range to compact. */
@@ -164,6 +171,10 @@ vy_task_new(struct mempool *pool, struct vy_index *index,
 	memset(task, 0, sizeof(*task));
 	task->ops = ops;
 	task->index = index;
+	task->key_def = index->key_def;
+	key_def_ref(task->key_def);
+	task->cmp_def = index->cmp_def;
+	key_def_ref(task->cmp_def);
 	vy_index_ref(index);
 	diag_create(&task->diag);
 	return task;
@@ -174,6 +185,8 @@ static void
 vy_task_delete(struct mempool *pool, struct vy_task *task)
 {
 	vy_index_unref(task->index);
+	key_def_unref(task->key_def);
+	key_def_unref(task->cmp_def);
 	diag_destroy(&task->diag);
 	TRASH(task);
 	mempool_free(pool, task);
@@ -638,7 +651,7 @@ vy_task_write_run(struct vy_scheduler *scheduler, struct vy_task *task)
 	struct vy_run_writer writer;
 	if (vy_run_writer_create(&writer, task->new_run, index->env->path,
 				 index->space_id, index->id,
-				 index->cmp_def, index->key_def,
+				 task->cmp_def, task->key_def,
 				 task->page_size, task->bloom_fpr,
 				 task->max_output_count) != 0)
 		goto fail;
@@ -989,7 +1002,7 @@ vy_task_dump_new(struct vy_scheduler *scheduler, struct vy_index *index,
 
 	struct vy_stmt_stream *wi;
 	bool is_last_level = (index->run_count == 0);
-	wi = vy_write_iterator_new(index->cmp_def, index->disk_format,
+	wi = vy_write_iterator_new(task->cmp_def, index->disk_format,
 				   index->upsert_format, index->id == 0,
 				   is_last_level, scheduler->read_views);
 	if (wi == NULL)
@@ -1264,7 +1277,7 @@ vy_task_compact_new(struct vy_scheduler *scheduler, struct vy_index *index,
 
 	struct vy_stmt_stream *wi;
 	bool is_last_level = (range->compact_priority == range->slice_count);
-	wi = vy_write_iterator_new(index->cmp_def, index->disk_format,
+	wi = vy_write_iterator_new(task->cmp_def, index->disk_format,
 				   index->upsert_format, index->id == 0,
 				   is_last_level, scheduler->read_views);
 	if (wi == NULL)
diff --git a/test/engine/ddl.result b/test/engine/ddl.result
index 906c9d11c..c3cb1efc2 100644
--- a/test/engine/ddl.result
+++ b/test/engine/ddl.result
@@ -394,7 +394,7 @@ s:drop()
 box.cfg{}
 ---
 ...
-s = box.schema.create_space('test', {engine = 'memtx'})
+s = box.schema.create_space('test', {engine = engine})
 ---
 ...
 format = {}
diff --git a/test/engine/ddl.test.lua b/test/engine/ddl.test.lua
index 3b4cd1e5e..25381a3c8 100644
--- a/test/engine/ddl.test.lua
+++ b/test/engine/ddl.test.lua
@@ -142,7 +142,7 @@ s:drop()
 -- not only when indexes are updated.
 --
 box.cfg{}
-s = box.schema.create_space('test', {engine = 'memtx'})
+s = box.schema.create_space('test', {engine = engine})
 format = {}
 format[1] = {'field1', 'unsigned'}
 format[2] = {'field2', 'unsigned', is_nullable = true}
-- 
2.14.3 (Apple Git-98)




More information about the Tarantool-patches mailing list