[PATCH 4/4] vinyl: abort rw transactions when instance switches to ro

Vladimir Davydov vdavydov.dev at gmail.com
Mon Mar 4 18:39:27 MSK 2019


A Vinyl transaction may yield while having a non-empty write set. This
opens a time window for the instance to switch to read-only mode. Since
we check ro flag only before executing a DML request, the transaction
would successfully commit in such a case, breaking the assumption that
no writes are possible on an instance after box.cfg{read_only=true}
returns. In particular, this breaks master-replica switching logic.

Fix this by aborting all local rw transactions before switching to
read-only mode. Note, remote rw transactions must not be aborted,
because they ignore ro flag.

Closes #4016
---
 src/box/vinyl.c            | 15 ++++++---
 src/box/vy_tx.c            | 20 +++++++++++-
 src/box/vy_tx.h            | 14 ++++++--
 test/vinyl/errinj.result   | 76 +++++++++++++++++++++++++++++++++++++++++++
 test/vinyl/errinj.test.lua | 30 +++++++++++++++++
 test/vinyl/misc.result     | 80 ++++++++++++++++++++++++++++++++++++++++++++++
 test/vinyl/misc.test.lua   | 39 ++++++++++++++++++++++
 7 files changed, 266 insertions(+), 8 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index c8b02eb8..aac890b6 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -70,7 +70,6 @@
 #include "info/info.h"
 #include "column_mask.h"
 #include "trigger.h"
-#include "session.h"
 #include "wal.h" /* wal_mode() */
 
 /**
@@ -1041,7 +1040,7 @@ vinyl_space_prepare_alter(struct space *old_space, struct space *new_space)
 static void
 vy_abort_writers_for_ddl(struct vy_env *env, struct vy_lsm *lsm)
 {
-	tx_manager_abort_writers(env->xm, lsm);
+	tx_manager_abort_writers_for_ddl(env->xm, lsm);
 	/*
 	 * Wait for prepared transactions to complete
 	 * (we can't abort them as they reached WAL).
@@ -2334,8 +2333,7 @@ vinyl_engine_prepare(struct engine *engine, struct txn *txn)
 	 * available for the admin to track the lag so let the applier
 	 * wait as long as necessary for memory dump to complete.
 	 */
-	double timeout = (current_session()->type != SESSION_TYPE_APPLIER ?
-			  env->timeout : TIMEOUT_INFINITY);
+	double timeout = (tx->is_remote ? TIMEOUT_INFINITY : env->timeout);
 	/*
 	 * Reserve quota needed by the transaction before allocating
 	 * memory. Since this may yield, which opens a time window for
@@ -2423,6 +2421,13 @@ vinyl_engine_rollback_statement(struct engine *engine, struct txn *txn,
 	vy_tx_rollback_statement(tx, stmt->engine_savepoint);
 }
 
+static void
+vinyl_engine_switch_to_ro(struct engine *engine)
+{
+	struct vy_env *env = vy_env(engine);
+	tx_manager_abort_writers_for_ro(env->xm);
+}
+
 /* }}} Public API of transaction control */
 
 /** {{{ Environment */
@@ -4463,7 +4468,7 @@ static const struct engine_vtab vinyl_engine_vtab = {
 	/* .commit = */ vinyl_engine_commit,
 	/* .rollback_statement = */ vinyl_engine_rollback_statement,
 	/* .rollback = */ vinyl_engine_rollback,
-	/* .switch_to_ro = */ generic_engine_switch_to_ro,
+	/* .switch_to_ro = */ vinyl_engine_switch_to_ro,
 	/* .bootstrap = */ vinyl_engine_bootstrap,
 	/* .begin_initial_recovery = */ vinyl_engine_begin_initial_recovery,
 	/* .begin_final_recovery = */ vinyl_engine_begin_final_recovery,
diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c
index d216c73d..275fcb62 100644
--- a/src/box/vy_tx.c
+++ b/src/box/vy_tx.c
@@ -46,6 +46,7 @@
 #include "iterator_type.h"
 #include "salad/stailq.h"
 #include "schema.h" /* space_cache_version */
+#include "session.h"
 #include "space.h"
 #include "trigger.h"
 #include "trivia/util.h"
@@ -309,6 +310,7 @@ vy_tx_create(struct tx_manager *xm, struct vy_tx *tx)
 	tx->write_size = 0;
 	tx->xm = xm;
 	tx->state = VINYL_TX_READY;
+	tx->is_remote = false;
 	tx->read_view = (struct vy_read_view *)xm->p_global_read_view;
 	vy_tx_read_set_new(&tx->read_set);
 	tx->psn = 0;
@@ -407,6 +409,11 @@ vy_tx_begin(struct tx_manager *xm)
 		return NULL;
 	}
 	vy_tx_create(xm, tx);
+
+	struct session *session = fiber_get_session(fiber());
+	if (session != NULL && session->type == SESSION_TYPE_APPLIER)
+		tx->is_remote = true;
+
 	return tx;
 }
 
@@ -1064,7 +1071,7 @@ vy_tx_set_with_colmask(struct vy_tx *tx, struct vy_lsm *lsm,
 }
 
 void
-tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm)
+tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct vy_lsm *lsm)
 {
 	struct vy_tx *tx;
 	rlist_foreach_entry(tx, &xm->writers, in_writers) {
@@ -1076,6 +1083,17 @@ tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm)
 }
 
 void
+tx_manager_abort_writers_for_ro(struct tx_manager *xm)
+{
+	struct vy_tx *tx;
+	rlist_foreach_entry(tx, &xm->writers, in_writers) {
+		/* Remote transactions ignore ro flag. */
+		if (tx->state == VINYL_TX_READY && !tx->is_remote)
+			tx->state = VINYL_TX_ABORT;
+	}
+}
+
+void
 vy_txw_iterator_open(struct vy_txw_iterator *itr,
 		     struct vy_txw_iterator_stat *stat,
 		     struct vy_tx *tx, struct vy_lsm *lsm,
diff --git a/src/box/vy_tx.h b/src/box/vy_tx.h
index 590538d8..e3df600a 100644
--- a/src/box/vy_tx.h
+++ b/src/box/vy_tx.h
@@ -161,6 +161,8 @@ struct vy_tx {
 	size_t write_size;
 	/** Current state of the transaction.*/
 	enum tx_state state;
+	/** Set if the transaction was started by an applier. */
+	bool is_remote;
 	/**
 	 * The read view of this transaction. When a transaction
 	 * is started, it is set to the "read committed" state,
@@ -276,10 +278,18 @@ tx_manager_mem_used(struct tx_manager *xm);
 
 /**
  * Abort all rw transactions that affect the given LSM tree
- * and haven't reached WAL yet.
+ * and haven't reached WAL yet. Called before executing a DDL
+ * operation.
  */
 void
-tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm);
+tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct vy_lsm *lsm);
+
+/**
+ * Abort all local rw transactions that haven't reached WAL yet.
+ * Called before switching to read-only mode.
+ */
+void
+tx_manager_abort_writers_for_ro(struct tx_manager *xm);
 
 /** Initialize a tx object. */
 void
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 248b32c8..9b5f7314 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -1166,3 +1166,79 @@ test_run:cmd("restart server default")
 box.space.test:drop()
 ---
 ...
+--
+-- Check that remote transactions are not aborted when an instance
+-- switches to read-only mode (gh-4016).
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('pk')
+---
+...
+s:replace{1, 1}
+---
+- [1, 1]
+...
+box.schema.user.grant('guest', 'replication')
+---
+...
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+---
+- true
+...
+test_run:cmd("start server replica")
+---
+- true
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.error.injection.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0.1)
+---
+- ok
+...
+test_run:cmd("switch default")
+---
+- true
+...
+s:update({1}, {{'+', 2, 1}})
+---
+- [1, 2]
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.cfg{read_only = true}
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+vclock = test_run:get_vclock("default")
+---
+...
+_ = test_run:wait_vclock("replica", vclock)
+---
+...
+test_run:cmd("stop server replica")
+---
+- true
+...
+test_run:cmd("cleanup server replica")
+---
+- true
+...
+test_run:cmd("delete server replica")
+---
+- true
+...
+box.schema.user.revoke('guest', 'replication')
+---
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index eaec52a5..5c7a525f 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -428,3 +428,33 @@ _ = fiber.create(function() box.snapshot() end)
 
 test_run:cmd("restart server default")
 box.space.test:drop()
+
+--
+-- Check that remote transactions are not aborted when an instance
+-- switches to read-only mode (gh-4016).
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk')
+s:replace{1, 1}
+box.schema.user.grant('guest', 'replication')
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+test_run:cmd("start server replica")
+
+test_run:cmd("switch replica")
+box.error.injection.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0.1)
+
+test_run:cmd("switch default")
+s:update({1}, {{'+', 2, 1}})
+
+test_run:cmd("switch replica")
+box.cfg{read_only = true}
+
+test_run:cmd("switch default")
+vclock = test_run:get_vclock("default")
+_ = test_run:wait_vclock("replica", vclock)
+
+test_run:cmd("stop server replica")
+test_run:cmd("cleanup server replica")
+test_run:cmd("delete server replica")
+box.schema.user.revoke('guest', 'replication')
+s:drop()
diff --git a/test/vinyl/misc.result b/test/vinyl/misc.result
index 5f67271e..685fd776 100644
--- a/test/vinyl/misc.result
+++ b/test/vinyl/misc.result
@@ -282,3 +282,83 @@ test_run:cmd('cleanup server test')
 ---
 - true
 ...
+--
+-- gh-4016: local rw transactions are aborted when the instance
+-- switches to read-only mode.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('pk')
+---
+...
+s:replace({1, 1})
+---
+- [1, 1]
+...
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+-- Start rw transaction.
+ch1 = fiber.channel(1);
+---
+...
+_ = fiber.create(function()
+    box.begin()
+    s:replace{1, 2}
+    ch1:get()
+    local status, err = pcall(box.commit)
+    ch1:put(status or err)
+end);
+---
+...
+-- Start ro transaction.
+ch2 = fiber.channel(1);
+---
+...
+_ = fiber.create(function()
+    box.begin()
+    s:select()
+    ch2:get()
+    local status, err = pcall(box.commit)
+    ch2:put(status or err)
+end);
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+-- Switch to ro mode.
+box.cfg{read_only = true}
+---
+...
+-- Resume the transactions.
+ch1:put(true)
+---
+- true
+...
+ch2:put(true)
+---
+- true
+...
+ch1:get()
+---
+- Transaction has been aborted by conflict
+...
+ch2:get()
+---
+- true
+...
+-- Cleanup.
+box.cfg{read_only = false}
+---
+...
+s:select()
+---
+- - [1, 1]
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/misc.test.lua b/test/vinyl/misc.test.lua
index 1c3a9517..cdc22774 100644
--- a/test/vinyl/misc.test.lua
+++ b/test/vinyl/misc.test.lua
@@ -114,3 +114,42 @@ s:drop()
 test_run:cmd('switch default')
 test_run:cmd('stop server test')
 test_run:cmd('cleanup server test')
+
+--
+-- gh-4016: local rw transactions are aborted when the instance
+-- switches to read-only mode.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk')
+s:replace({1, 1})
+test_run:cmd("setopt delimiter ';'")
+-- Start rw transaction.
+ch1 = fiber.channel(1);
+_ = fiber.create(function()
+    box.begin()
+    s:replace{1, 2}
+    ch1:get()
+    local status, err = pcall(box.commit)
+    ch1:put(status or err)
+end);
+-- Start ro transaction.
+ch2 = fiber.channel(1);
+_ = fiber.create(function()
+    box.begin()
+    s:select()
+    ch2:get()
+    local status, err = pcall(box.commit)
+    ch2:put(status or err)
+end);
+test_run:cmd("setopt delimiter ''");
+-- Switch to ro mode.
+box.cfg{read_only = true}
+-- Resume the transactions.
+ch1:put(true)
+ch2:put(true)
+ch1:get()
+ch2:get()
+-- Cleanup.
+box.cfg{read_only = false}
+s:select()
+s:drop()
-- 
2.11.0




More information about the Tarantool-patches mailing list