[PATCH] ddl: fix vylog vs xlog sync for multi-statement transactions

Vladimir Davydov vdavydov.dev at gmail.com
Wed Jul 17 14:43:41 MSK 2019


Vinyl writes a signature of each statement that created or dropped or
modified an LSM tree to vylog. This is needed to match xlog rows to
vylog records on recovery. So alter_space_commit() passes txn->signature
to index_commit_create(), index_commit_drop(), and index_commit_modify()
methods. If a transaction has multiple DDL statements, all of them will
have the same signature. Per se this is fine. The problem is that on
recovery we apply statements one-by-one, not transactionally, and so we
pass signatures assigned to individual statements, not to transactions.
As a result, while recovering a multi-statement DDL transaction vinyl
gets different signatures for them instead of those that were used when
those statements were committed, which apparently breaks its assumptions
and leads to a crash or even a data loss.

There are two ways to fix this problem. First, we could recover
statements transactionally as well. This would guarantee that we used
the same signatures on recovery that were used when the statements were
committed. However, I don't see any other benefit in doing this, beside
fixing this issue. So let's fix the issue in a more straightforward
fashion: rather than using txn->signature, make alter_space_commit()
pass the signature of the actual statement that committed a DDL
operation, which is pretty easy to compute given the number of
statements in a transaction at the time when the statement was
committed.

Thanks to @Gerold103 for reporting this issue and carrying out
preliminary investigation.

Fixes commit f266559b167b ("ddl: allow to execute non-yielding DDL
statements in transactions").

Closes #4350
Follow-up #4083
---
https://github.com/tarantool/tarantool/issues/4350
https://github.com/tarantool/tarantool/commits/dv/gh-4350-vy-fix-tx-ddl-crash

 src/box/alter.cc         | 27 ++++++++++++++++++++++-----
 src/box/txn.h            |  9 +++++++++
 test/engine/ddl.result   | 23 +++++++++++++++++++++++
 test/engine/ddl.test.lua | 15 +++++++++++++++
 4 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/src/box/alter.cc b/src/box/alter.cc
index 1dbfe6b2..5d48c0e6 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -706,13 +706,21 @@ struct alter_space {
 	 * of key_defs and key_parts.
 	 */
 	uint32_t new_min_field_count;
+	/**
+	 * Number of rows in the transaction at the time when this
+	 * DDL operation was performed. It is used to compute this
+	 * operation signature on commit, which is needed to keep
+	 * xlog in sync with vylog, see alter_space_commit().
+	 */
+	int n_rows;
 };
 
 static struct alter_space *
 alter_space_new(struct space *old_space)
 {
-	struct alter_space *alter =
-		region_calloc_object_xc(&in_txn()->region, struct alter_space);
+	struct txn *txn = in_txn();
+	struct alter_space *alter = region_calloc_object_xc(&txn->region,
+							    struct alter_space);
 	rlist_create(&alter->ops);
 	alter->old_space = old_space;
 	alter->space_def = space_def_dup_xc(alter->old_space->def);
@@ -720,6 +728,7 @@ alter_space_new(struct space *old_space)
 		alter->new_min_field_count = old_space->format->min_field_count;
 	else
 		alter->new_min_field_count = 0;
+	alter->n_rows = txn_n_rows(txn);
 	return alter;
 }
 
@@ -823,13 +832,21 @@ alter_space_commit(struct trigger *trigger, void *event)
 	struct txn *txn = (struct txn *) event;
 	struct alter_space *alter = (struct alter_space *) trigger->data;
 	/*
+	 * The engine (vinyl) expects us to pass the signature of
+	 * the row that performed this operation, not the signature
+	 * of the transaction itself (this is needed to sync vylog
+	 * with xlog on recovery). It's trivial to get this given
+	 * the number of rows in the transaction at the time when
+	 * the operation was performed.
+	 */
+	int64_t signature = txn->signature - txn_n_rows(txn) + alter->n_rows;
+	/*
 	 * Commit alter ops, this will move the changed
 	 * indexes into their new places.
 	 */
 	class AlterSpaceOp *op;
-	rlist_foreach_entry(op, &alter->ops, link) {
-		op->commit(alter, txn->signature);
-	}
+	rlist_foreach_entry(op, &alter->ops, link)
+		op->commit(alter, signature);
 
 	alter->new_space = NULL; /* for alter_space_delete(). */
 	/*
diff --git a/src/box/txn.h b/src/box/txn.h
index baeaa0ed..f482b64e 100644
--- a/src/box/txn.h
+++ b/src/box/txn.h
@@ -293,6 +293,15 @@ txn_on_rollback(struct txn *txn, struct trigger *trigger)
 }
 
 /**
+ * Return the total number of rows committed in the txn.
+ */
+static inline int
+txn_n_rows(struct txn *txn)
+{
+	return txn->n_new_rows + txn->n_applier_rows;
+}
+
+/**
  * Start a new statement.
  */
 int
diff --git a/test/engine/ddl.result b/test/engine/ddl.result
index c59c85f1..303f8f97 100644
--- a/test/engine/ddl.result
+++ b/test/engine/ddl.result
@@ -2073,6 +2073,25 @@ i3:select()
   - [4, 'zzz', 'ddd', -666]
   - [1, 'zzz', 'aaa', 999, 'fff']
 ...
+--
+-- gh-4350: crash while trying to drop a multi-index space created
+-- transactionally after recovery.
+--
+inspector:cmd("setopt delimiter ';'")
+---
+- true
+...
+box.begin()
+s = box.schema.space.create('test_crash', {engine = engine})
+_ = s:create_index('pk')
+_ = s:create_index('sk', {parts = {2, 'unsigned'}})
+box.commit();
+---
+...
+inspector:cmd("setopt delimiter ''");
+---
+- true
+...
 -- Check that recovery works.
 inspector:cmd("restart server default")
 test_run = require('test_run')
@@ -2114,6 +2133,10 @@ s.index.i3:select()
   - [4, 'zzz', 'ddd', -666]
   - [1, 'zzz', 'aaa', 999, 'fff']
 ...
+-- gh-4350: see above.
+box.space.test_crash:drop()
+---
+...
 --
 -- gh-3903: index build doesn't work after recovery.
 --
diff --git a/test/engine/ddl.test.lua b/test/engine/ddl.test.lua
index 1670b548..d15bf1f5 100644
--- a/test/engine/ddl.test.lua
+++ b/test/engine/ddl.test.lua
@@ -757,6 +757,18 @@ i3:alter{parts = {4, 'integer', 5, 'string'}} -- error: field missing
 i3:alter{parts = {2, 'string', 4, 'integer'}} -- ok
 i3:select()
 
+--
+-- gh-4350: crash while trying to drop a multi-index space created
+-- transactionally after recovery.
+--
+inspector:cmd("setopt delimiter ';'")
+box.begin()
+s = box.schema.space.create('test_crash', {engine = engine})
+_ = s:create_index('pk')
+_ = s:create_index('sk', {parts = {2, 'unsigned'}})
+box.commit();
+inspector:cmd("setopt delimiter ''");
+
 -- Check that recovery works.
 inspector:cmd("restart server default")
 test_run = require('test_run')
@@ -768,6 +780,9 @@ s.index.i1:select()
 s.index.i2:select()
 s.index.i3:select()
 
+-- gh-4350: see above.
+box.space.test_crash:drop()
+
 --
 -- gh-3903: index build doesn't work after recovery.
 --
-- 
2.11.0




More information about the Tarantool-patches mailing list