Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: georgy@tarantool.org, tarantool-patches@freelists.org
Subject: [PATCH 4/4] vinyl: abort rw transactions when instance switches to ro
Date: Mon,  4 Mar 2019 18:39:27 +0300	[thread overview]
Message-ID: <60a021480834a54e71b5c5308581e8bdbdc3f7c0.1551713282.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1551713282.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1551713282.git.vdavydov.dev@gmail.com>

A Vinyl transaction may yield while having a non-empty write set. This
opens a time window for the instance to switch to read-only mode. Since
we check ro flag only before executing a DML request, the transaction
would successfully commit in such a case, breaking the assumption that
no writes are possible on an instance after box.cfg{read_only=true}
returns. In particular, this breaks master-replica switching logic.

Fix this by aborting all local rw transactions before switching to
read-only mode. Note, remote rw transactions must not be aborted,
because they ignore ro flag.

Closes #4016
---
 src/box/vinyl.c            | 15 ++++++---
 src/box/vy_tx.c            | 20 +++++++++++-
 src/box/vy_tx.h            | 14 ++++++--
 test/vinyl/errinj.result   | 76 +++++++++++++++++++++++++++++++++++++++++++
 test/vinyl/errinj.test.lua | 30 +++++++++++++++++
 test/vinyl/misc.result     | 80 ++++++++++++++++++++++++++++++++++++++++++++++
 test/vinyl/misc.test.lua   | 39 ++++++++++++++++++++++
 7 files changed, 266 insertions(+), 8 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index c8b02eb8..aac890b6 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -70,7 +70,6 @@
 #include "info/info.h"
 #include "column_mask.h"
 #include "trigger.h"
-#include "session.h"
 #include "wal.h" /* wal_mode() */
 
 /**
@@ -1041,7 +1040,7 @@ vinyl_space_prepare_alter(struct space *old_space, struct space *new_space)
 static void
 vy_abort_writers_for_ddl(struct vy_env *env, struct vy_lsm *lsm)
 {
-	tx_manager_abort_writers(env->xm, lsm);
+	tx_manager_abort_writers_for_ddl(env->xm, lsm);
 	/*
 	 * Wait for prepared transactions to complete
 	 * (we can't abort them as they reached WAL).
@@ -2334,8 +2333,7 @@ vinyl_engine_prepare(struct engine *engine, struct txn *txn)
 	 * available for the admin to track the lag so let the applier
 	 * wait as long as necessary for memory dump to complete.
 	 */
-	double timeout = (current_session()->type != SESSION_TYPE_APPLIER ?
-			  env->timeout : TIMEOUT_INFINITY);
+	double timeout = (tx->is_remote ? TIMEOUT_INFINITY : env->timeout);
 	/*
 	 * Reserve quota needed by the transaction before allocating
 	 * memory. Since this may yield, which opens a time window for
@@ -2423,6 +2421,13 @@ vinyl_engine_rollback_statement(struct engine *engine, struct txn *txn,
 	vy_tx_rollback_statement(tx, stmt->engine_savepoint);
 }
 
+static void
+vinyl_engine_switch_to_ro(struct engine *engine)
+{
+	struct vy_env *env = vy_env(engine);
+	tx_manager_abort_writers_for_ro(env->xm);
+}
+
 /* }}} Public API of transaction control */
 
 /** {{{ Environment */
@@ -4463,7 +4468,7 @@ static const struct engine_vtab vinyl_engine_vtab = {
 	/* .commit = */ vinyl_engine_commit,
 	/* .rollback_statement = */ vinyl_engine_rollback_statement,
 	/* .rollback = */ vinyl_engine_rollback,
-	/* .switch_to_ro = */ generic_engine_switch_to_ro,
+	/* .switch_to_ro = */ vinyl_engine_switch_to_ro,
 	/* .bootstrap = */ vinyl_engine_bootstrap,
 	/* .begin_initial_recovery = */ vinyl_engine_begin_initial_recovery,
 	/* .begin_final_recovery = */ vinyl_engine_begin_final_recovery,
diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c
index d216c73d..275fcb62 100644
--- a/src/box/vy_tx.c
+++ b/src/box/vy_tx.c
@@ -46,6 +46,7 @@
 #include "iterator_type.h"
 #include "salad/stailq.h"
 #include "schema.h" /* space_cache_version */
+#include "session.h"
 #include "space.h"
 #include "trigger.h"
 #include "trivia/util.h"
@@ -309,6 +310,7 @@ vy_tx_create(struct tx_manager *xm, struct vy_tx *tx)
 	tx->write_size = 0;
 	tx->xm = xm;
 	tx->state = VINYL_TX_READY;
+	tx->is_remote = false;
 	tx->read_view = (struct vy_read_view *)xm->p_global_read_view;
 	vy_tx_read_set_new(&tx->read_set);
 	tx->psn = 0;
@@ -407,6 +409,11 @@ vy_tx_begin(struct tx_manager *xm)
 		return NULL;
 	}
 	vy_tx_create(xm, tx);
+
+	struct session *session = fiber_get_session(fiber());
+	if (session != NULL && session->type == SESSION_TYPE_APPLIER)
+		tx->is_remote = true;
+
 	return tx;
 }
 
@@ -1064,7 +1071,7 @@ vy_tx_set_with_colmask(struct vy_tx *tx, struct vy_lsm *lsm,
 }
 
 void
-tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm)
+tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct vy_lsm *lsm)
 {
 	struct vy_tx *tx;
 	rlist_foreach_entry(tx, &xm->writers, in_writers) {
@@ -1076,6 +1083,17 @@ tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm)
 }
 
 void
+tx_manager_abort_writers_for_ro(struct tx_manager *xm)
+{
+	struct vy_tx *tx;
+	rlist_foreach_entry(tx, &xm->writers, in_writers) {
+		/* Remote transactions ignore ro flag. */
+		if (tx->state == VINYL_TX_READY && !tx->is_remote)
+			tx->state = VINYL_TX_ABORT;
+	}
+}
+
+void
 vy_txw_iterator_open(struct vy_txw_iterator *itr,
 		     struct vy_txw_iterator_stat *stat,
 		     struct vy_tx *tx, struct vy_lsm *lsm,
diff --git a/src/box/vy_tx.h b/src/box/vy_tx.h
index 590538d8..e3df600a 100644
--- a/src/box/vy_tx.h
+++ b/src/box/vy_tx.h
@@ -161,6 +161,8 @@ struct vy_tx {
 	size_t write_size;
 	/** Current state of the transaction.*/
 	enum tx_state state;
+	/** Set if the transaction was started by an applier. */
+	bool is_remote;
 	/**
 	 * The read view of this transaction. When a transaction
 	 * is started, it is set to the "read committed" state,
@@ -276,10 +278,18 @@ tx_manager_mem_used(struct tx_manager *xm);
 
 /**
  * Abort all rw transactions that affect the given LSM tree
- * and haven't reached WAL yet.
+ * and haven't reached WAL yet. Called before executing a DDL
+ * operation.
  */
 void
-tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm);
+tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct vy_lsm *lsm);
+
+/**
+ * Abort all local rw transactions that haven't reached WAL yet.
+ * Called before switching to read-only mode.
+ */
+void
+tx_manager_abort_writers_for_ro(struct tx_manager *xm);
 
 /** Initialize a tx object. */
 void
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 248b32c8..9b5f7314 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -1166,3 +1166,79 @@ test_run:cmd("restart server default")
 box.space.test:drop()
 ---
 ...
+--
+-- Check that remote transactions are not aborted when an instance
+-- switches to read-only mode (gh-4016).
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('pk')
+---
+...
+s:replace{1, 1}
+---
+- [1, 1]
+...
+box.schema.user.grant('guest', 'replication')
+---
+...
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+---
+- true
+...
+test_run:cmd("start server replica")
+---
+- true
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.error.injection.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0.1)
+---
+- ok
+...
+test_run:cmd("switch default")
+---
+- true
+...
+s:update({1}, {{'+', 2, 1}})
+---
+- [1, 2]
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.cfg{read_only = true}
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+vclock = test_run:get_vclock("default")
+---
+...
+_ = test_run:wait_vclock("replica", vclock)
+---
+...
+test_run:cmd("stop server replica")
+---
+- true
+...
+test_run:cmd("cleanup server replica")
+---
+- true
+...
+test_run:cmd("delete server replica")
+---
+- true
+...
+box.schema.user.revoke('guest', 'replication')
+---
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index eaec52a5..5c7a525f 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -428,3 +428,33 @@ _ = fiber.create(function() box.snapshot() end)
 
 test_run:cmd("restart server default")
 box.space.test:drop()
+
+--
+-- Check that remote transactions are not aborted when an instance
+-- switches to read-only mode (gh-4016).
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk')
+s:replace{1, 1}
+box.schema.user.grant('guest', 'replication')
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+test_run:cmd("start server replica")
+
+test_run:cmd("switch replica")
+box.error.injection.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0.1)
+
+test_run:cmd("switch default")
+s:update({1}, {{'+', 2, 1}})
+
+test_run:cmd("switch replica")
+box.cfg{read_only = true}
+
+test_run:cmd("switch default")
+vclock = test_run:get_vclock("default")
+_ = test_run:wait_vclock("replica", vclock)
+
+test_run:cmd("stop server replica")
+test_run:cmd("cleanup server replica")
+test_run:cmd("delete server replica")
+box.schema.user.revoke('guest', 'replication')
+s:drop()
diff --git a/test/vinyl/misc.result b/test/vinyl/misc.result
index 5f67271e..685fd776 100644
--- a/test/vinyl/misc.result
+++ b/test/vinyl/misc.result
@@ -282,3 +282,83 @@ test_run:cmd('cleanup server test')
 ---
 - true
 ...
+--
+-- gh-4016: local rw transactions are aborted when the instance
+-- switches to read-only mode.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('pk')
+---
+...
+s:replace({1, 1})
+---
+- [1, 1]
+...
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+-- Start rw transaction.
+ch1 = fiber.channel(1);
+---
+...
+_ = fiber.create(function()
+    box.begin()
+    s:replace{1, 2}
+    ch1:get()
+    local status, err = pcall(box.commit)
+    ch1:put(status or err)
+end);
+---
+...
+-- Start ro transaction.
+ch2 = fiber.channel(1);
+---
+...
+_ = fiber.create(function()
+    box.begin()
+    s:select()
+    ch2:get()
+    local status, err = pcall(box.commit)
+    ch2:put(status or err)
+end);
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+-- Switch to ro mode.
+box.cfg{read_only = true}
+---
+...
+-- Resume the transactions.
+ch1:put(true)
+---
+- true
+...
+ch2:put(true)
+---
+- true
+...
+ch1:get()
+---
+- Transaction has been aborted by conflict
+...
+ch2:get()
+---
+- true
+...
+-- Cleanup.
+box.cfg{read_only = false}
+---
+...
+s:select()
+---
+- - [1, 1]
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/misc.test.lua b/test/vinyl/misc.test.lua
index 1c3a9517..cdc22774 100644
--- a/test/vinyl/misc.test.lua
+++ b/test/vinyl/misc.test.lua
@@ -114,3 +114,42 @@ s:drop()
 test_run:cmd('switch default')
 test_run:cmd('stop server test')
 test_run:cmd('cleanup server test')
+
+--
+-- gh-4016: local rw transactions are aborted when the instance
+-- switches to read-only mode.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk')
+s:replace({1, 1})
+test_run:cmd("setopt delimiter ';'")
+-- Start rw transaction.
+ch1 = fiber.channel(1);
+_ = fiber.create(function()
+    box.begin()
+    s:replace{1, 2}
+    ch1:get()
+    local status, err = pcall(box.commit)
+    ch1:put(status or err)
+end);
+-- Start ro transaction.
+ch2 = fiber.channel(1);
+_ = fiber.create(function()
+    box.begin()
+    s:select()
+    ch2:get()
+    local status, err = pcall(box.commit)
+    ch2:put(status or err)
+end);
+test_run:cmd("setopt delimiter ''");
+-- Switch to ro mode.
+box.cfg{read_only = true}
+-- Resume the transactions.
+ch1:put(true)
+ch2:put(true)
+ch1:get()
+ch2:get()
+-- Cleanup.
+box.cfg{read_only = false}
+s:select()
+s:drop()
-- 
2.11.0

  parent reply	other threads:[~2019-03-04 15:39 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-04 15:39 [PATCH 0/4] Abort vinyl transactions before switching " Vladimir Davydov
2019-03-04 15:39 ` [PATCH 1/4] vinyl: rename tx statement begin/rollback routines Vladimir Davydov
2019-03-05  7:29   ` Konstantin Osipov
2019-03-04 15:39 ` [PATCH 2/4] vinyl: add tx to writers list in begin_statement engine callback Vladimir Davydov
2019-03-05  7:30   ` Konstantin Osipov
2019-03-04 15:39 ` [PATCH 3/4] engine: add switch_to_ro callback Vladimir Davydov
2019-03-05  7:31   ` Konstantin Osipov
2019-03-04 15:39 ` Vladimir Davydov [this message]
2019-03-05  7:43   ` [PATCH 4/4] vinyl: abort rw transactions when instance switches to ro Konstantin Osipov
2019-03-05  8:35     ` Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=60a021480834a54e71b5c5308581e8bdbdc3f7c0.1551713282.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=georgy@tarantool.org \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 4/4] vinyl: abort rw transactions when instance switches to ro' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox