[PATCH 5/5] alter: call space_vtab::commit_alter after WAL write

Vladimir Davydov vdavydov.dev at gmail.com
Tue Apr 3 20:37:43 MSK 2018


space_vtab::commit_alter is implemented only by memtx, which uses it to
delete tuples stored in the space and reset bsize when the primary index
is dropped. Currently, it's called before WAL write and so this can
result in use-after-free in memtx if WAL write fails. To avoid that,
this patch moves invocation of this method after WAL write.

A few things to note about this patch:

 - Space bsize should still be updated before WAL write, when the space
   becomes visible. So now we set it in space_vtab::prepare_alter and
   reset it in drop_primary_key, along with memtx_space::replace.

 - Before this patch, space_vtab::drop_primary_key was not called on
   space truncation, because DropIndex::alter only called it if the
   primary key was absent in the new space, which isn't true in case of
   space truncation. So this patch fixed this check: now we check the id
   of the dropped index directly, and call drop_primary_key if it's 0.

 - To discriminate between primary index rebuild and space truncation in
   space_vtab::commit_alter and make the right decision about whether we
   need to delete tuples stored in the space or not, we use a version
   counter attached to memtx_space. The counter is bumped every time the
   primary index is dropped, so if it differs between the old and new
   spaces in commit_alter, it was either primary index drop or space
   truncation and hence we need to purge the space.

Closes #3289
---
 src/box/alter.cc         | 23 +++--------------
 src/box/memtx_space.c    | 16 +++++-------
 src/box/memtx_space.h    |  6 +++++
 test/box/errinj.result   | 66 ++++++++++++++++++++++++++++++++++++++++++++++++
 test/box/errinj.test.lua | 18 +++++++++++++
 5 files changed, 101 insertions(+), 28 deletions(-)

diff --git a/src/box/alter.cc b/src/box/alter.cc
index 174d53fa..06ef2923 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -729,6 +729,8 @@ alter_space_commit(struct trigger *trigger, void *event)
 		op->commit(alter, txn->signature);
 	}
 
+	space_commit_alter(alter->old_space, alter->new_space);
+
 	trigger_run_xc(&on_alter_space, alter->new_space);
 
 	alter->new_space = NULL; /* for alter_space_delete(). */
@@ -864,8 +866,6 @@ alter_space_do(struct txn *txn, struct alter_space *alter)
 	 * The new space is ready. Time to update the space
 	 * cache with it.
 	 */
-	space_commit_alter(alter->old_space, alter->new_space);
-
 	struct space *old_space = space_cache_replace(alter->new_space);
 	(void) old_space;
 	assert(old_space == alter->old_space);
@@ -1026,23 +1026,8 @@ DropIndex::alter_def(struct alter_space * /* alter */)
 void
 DropIndex::alter(struct alter_space *alter)
 {
-	/*
-	 * If it's not the primary key, nothing to do --
-	 * the dropped index didn't exist in the new space
-	 * definition, so does not exist in the created space.
-	 */
-	if (space_index(alter->new_space, 0) != NULL)
-		return;
-	/*
-	 * OK to drop the primary key. Inform the engine about it,
-	 * since it may have to reset handler->replace function,
-	 * so that:
-	 * - DML returns proper errors rather than crashes the
-	 *   program
-	 * - when a new primary key is finally added, the space
-	 *   can be put back online properly.
-	 */
-	space_drop_primary_key(alter->new_space);
+	if (old_index_def->iid == 0)
+		space_drop_primary_key(alter->new_space);
 }
 
 void
diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
index ec6f6db6..adc81430 100644
--- a/src/box/memtx_space.c
+++ b/src/box/memtx_space.c
@@ -736,6 +736,8 @@ memtx_space_drop_primary_key(struct space *space)
 {
 	struct memtx_space *memtx_space = (struct memtx_space *)space;
 	memtx_space->replace = memtx_space_replace_no_keys;
+	memtx_space->bsize = 0;
+	memtx_space->version++;
 }
 
 static void
@@ -844,6 +846,8 @@ memtx_space_prepare_alter(struct space *old_space, struct space *new_space)
 	struct memtx_space *old_memtx_space = (struct memtx_space *)old_space;
 	struct memtx_space *new_memtx_space = (struct memtx_space *)new_space;
 	new_memtx_space->replace = old_memtx_space->replace;
+	new_memtx_space->bsize = old_memtx_space->bsize;
+	new_memtx_space->version = old_memtx_space->version;
 	bool is_empty = old_space->index_count == 0 ||
 			index_size(old_space->index[0]) == 0;
 	return space_def_check_compatibility(old_space->def,
@@ -855,17 +859,10 @@ memtx_space_commit_alter(struct space *old_space, struct space *new_space)
 {
 	struct memtx_space *old_memtx_space = (struct memtx_space *)old_space;
 	struct memtx_space *new_memtx_space = (struct memtx_space *)new_space;
-	bool is_empty = new_space->index_count == 0 ||
-			index_size(new_space->index[0]) == 0;
 
-	/*
-	 * Delete all tuples when the last index is dropped
-	 * or the space is truncated.
-	 */
-	if (is_empty)
+	/* Delete all tuples if the primary index was dropped. */
+	if (old_memtx_space->version != new_memtx_space->version)
 		memtx_space_prune(old_space);
-	else
-		new_memtx_space->bsize = old_memtx_space->bsize;
 }
 
 /* }}} DDL */
@@ -937,6 +934,7 @@ memtx_space_new(struct memtx_engine *memtx,
 	tuple_format_unref(format);
 
 	memtx_space->bsize = 0;
+	memtx_space->version = 0;
 	memtx_space->replace = memtx_space_replace_no_keys;
 	return (struct space *)memtx_space;
 }
diff --git a/src/box/memtx_space.h b/src/box/memtx_space.h
index 26d33349..0c4176bc 100644
--- a/src/box/memtx_space.h
+++ b/src/box/memtx_space.h
@@ -43,6 +43,12 @@ struct memtx_space {
 	/* Number of bytes used in memory by tuples in the space. */
 	size_t bsize;
 	/**
+	 * Version of data stored in the space. It is bumped every
+	 * time the primary index is dropped. We use it to detect
+	 * if we need to delete tuples when the space is altered.
+	 */
+	int version;
+	/**
 	 * A pointer to replace function, set to different values
 	 * at different stages of recovery.
 	 */
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 8e4d5742..269de663 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -1093,3 +1093,69 @@ s:drop()
 box.schema.user.revoke('guest', 'read,write,execute','universe')
 ---
 ...
+--
+-- gh-3289: drop/truncate leaves the space in inconsistent
+-- state if WAL write fails.
+--
+s = box.schema.space.create('test')
+---
+...
+_ = s:create_index('pk')
+---
+...
+for i = 1, 10 do s:replace{i} end
+---
+...
+errinj.set('ERRINJ_WAL_IO', true)
+---
+- ok
+...
+s:drop()
+---
+- error: Failed to write to disk
+...
+s:truncate()
+---
+- error: Failed to write to disk
+...
+s:drop()
+---
+- error: Failed to write to disk
+...
+s:truncate()
+---
+- error: Failed to write to disk
+...
+errinj.set('ERRINJ_WAL_IO', false)
+---
+- ok
+...
+for i = 1, 10 do s:replace{i + 10} end
+---
+...
+s:select()
+---
+- - [1]
+  - [2]
+  - [3]
+  - [4]
+  - [5]
+  - [6]
+  - [7]
+  - [8]
+  - [9]
+  - [10]
+  - [11]
+  - [12]
+  - [13]
+  - [14]
+  - [15]
+  - [16]
+  - [17]
+  - [18]
+  - [19]
+  - [20]
+...
+s:drop()
+---
+...
diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
index d97cd81f..c12f61a4 100644
--- a/test/box/errinj.test.lua
+++ b/test/box/errinj.test.lua
@@ -366,3 +366,21 @@ errinj.set("ERRINJ_IPROTO_TX_DELAY", false)
 
 s:drop()
 box.schema.user.revoke('guest', 'read,write,execute','universe')
+
+--
+-- gh-3289: drop/truncate leaves the space in inconsistent
+-- state if WAL write fails.
+--
+s = box.schema.space.create('test')
+_ = s:create_index('pk')
+for i = 1, 10 do s:replace{i} end
+errinj.set('ERRINJ_WAL_IO', true)
+s:drop()
+s:truncate()
+s:drop()
+s:truncate()
+errinj.set('ERRINJ_WAL_IO', false)
+for i = 1, 10 do s:replace{i + 10} end
+s:select()
+
+s:drop()
-- 
2.11.0




More information about the Tarantool-patches mailing list