[PATCH 4/4] vinyl: abort rw transactions when instance switches to ro
Vladimir Davydov
vdavydov.dev at gmail.com
Mon Mar 4 18:39:27 MSK 2019
A Vinyl transaction may yield while having a non-empty write set. This
opens a time window for the instance to switch to read-only mode. Since
we check ro flag only before executing a DML request, the transaction
would successfully commit in such a case, breaking the assumption that
no writes are possible on an instance after box.cfg{read_only=true}
returns. In particular, this breaks master-replica switching logic.
Fix this by aborting all local rw transactions before switching to
read-only mode. Note, remote rw transactions must not be aborted,
because they ignore ro flag.
Closes #4016
---
src/box/vinyl.c | 15 ++++++---
src/box/vy_tx.c | 20 +++++++++++-
src/box/vy_tx.h | 14 ++++++--
test/vinyl/errinj.result | 76 +++++++++++++++++++++++++++++++++++++++++++
test/vinyl/errinj.test.lua | 30 +++++++++++++++++
test/vinyl/misc.result | 80 ++++++++++++++++++++++++++++++++++++++++++++++
test/vinyl/misc.test.lua | 39 ++++++++++++++++++++++
7 files changed, 266 insertions(+), 8 deletions(-)
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index c8b02eb8..aac890b6 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -70,7 +70,6 @@
#include "info/info.h"
#include "column_mask.h"
#include "trigger.h"
-#include "session.h"
#include "wal.h" /* wal_mode() */
/**
@@ -1041,7 +1040,7 @@ vinyl_space_prepare_alter(struct space *old_space, struct space *new_space)
static void
vy_abort_writers_for_ddl(struct vy_env *env, struct vy_lsm *lsm)
{
- tx_manager_abort_writers(env->xm, lsm);
+ tx_manager_abort_writers_for_ddl(env->xm, lsm);
/*
* Wait for prepared transactions to complete
* (we can't abort them as they reached WAL).
@@ -2334,8 +2333,7 @@ vinyl_engine_prepare(struct engine *engine, struct txn *txn)
* available for the admin to track the lag so let the applier
* wait as long as necessary for memory dump to complete.
*/
- double timeout = (current_session()->type != SESSION_TYPE_APPLIER ?
- env->timeout : TIMEOUT_INFINITY);
+ double timeout = (tx->is_remote ? TIMEOUT_INFINITY : env->timeout);
/*
* Reserve quota needed by the transaction before allocating
* memory. Since this may yield, which opens a time window for
@@ -2423,6 +2421,13 @@ vinyl_engine_rollback_statement(struct engine *engine, struct txn *txn,
vy_tx_rollback_statement(tx, stmt->engine_savepoint);
}
+static void
+vinyl_engine_switch_to_ro(struct engine *engine)
+{
+ struct vy_env *env = vy_env(engine);
+ tx_manager_abort_writers_for_ro(env->xm);
+}
+
/* }}} Public API of transaction control */
/** {{{ Environment */
@@ -4463,7 +4468,7 @@ static const struct engine_vtab vinyl_engine_vtab = {
/* .commit = */ vinyl_engine_commit,
/* .rollback_statement = */ vinyl_engine_rollback_statement,
/* .rollback = */ vinyl_engine_rollback,
- /* .switch_to_ro = */ generic_engine_switch_to_ro,
+ /* .switch_to_ro = */ vinyl_engine_switch_to_ro,
/* .bootstrap = */ vinyl_engine_bootstrap,
/* .begin_initial_recovery = */ vinyl_engine_begin_initial_recovery,
/* .begin_final_recovery = */ vinyl_engine_begin_final_recovery,
diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c
index d216c73d..275fcb62 100644
--- a/src/box/vy_tx.c
+++ b/src/box/vy_tx.c
@@ -46,6 +46,7 @@
#include "iterator_type.h"
#include "salad/stailq.h"
#include "schema.h" /* space_cache_version */
+#include "session.h"
#include "space.h"
#include "trigger.h"
#include "trivia/util.h"
@@ -309,6 +310,7 @@ vy_tx_create(struct tx_manager *xm, struct vy_tx *tx)
tx->write_size = 0;
tx->xm = xm;
tx->state = VINYL_TX_READY;
+ tx->is_remote = false;
tx->read_view = (struct vy_read_view *)xm->p_global_read_view;
vy_tx_read_set_new(&tx->read_set);
tx->psn = 0;
@@ -407,6 +409,11 @@ vy_tx_begin(struct tx_manager *xm)
return NULL;
}
vy_tx_create(xm, tx);
+
+ struct session *session = fiber_get_session(fiber());
+ if (session != NULL && session->type == SESSION_TYPE_APPLIER)
+ tx->is_remote = true;
+
return tx;
}
@@ -1064,7 +1071,7 @@ vy_tx_set_with_colmask(struct vy_tx *tx, struct vy_lsm *lsm,
}
void
-tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm)
+tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct vy_lsm *lsm)
{
struct vy_tx *tx;
rlist_foreach_entry(tx, &xm->writers, in_writers) {
@@ -1076,6 +1083,17 @@ tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm)
}
void
+tx_manager_abort_writers_for_ro(struct tx_manager *xm)
+{
+ struct vy_tx *tx;
+ rlist_foreach_entry(tx, &xm->writers, in_writers) {
+ /* Remote transactions ignore ro flag. */
+ if (tx->state == VINYL_TX_READY && !tx->is_remote)
+ tx->state = VINYL_TX_ABORT;
+ }
+}
+
+void
vy_txw_iterator_open(struct vy_txw_iterator *itr,
struct vy_txw_iterator_stat *stat,
struct vy_tx *tx, struct vy_lsm *lsm,
diff --git a/src/box/vy_tx.h b/src/box/vy_tx.h
index 590538d8..e3df600a 100644
--- a/src/box/vy_tx.h
+++ b/src/box/vy_tx.h
@@ -161,6 +161,8 @@ struct vy_tx {
size_t write_size;
/** Current state of the transaction.*/
enum tx_state state;
+ /** Set if the transaction was started by an applier. */
+ bool is_remote;
/**
* The read view of this transaction. When a transaction
* is started, it is set to the "read committed" state,
@@ -276,10 +278,18 @@ tx_manager_mem_used(struct tx_manager *xm);
/**
* Abort all rw transactions that affect the given LSM tree
- * and haven't reached WAL yet.
+ * and haven't reached WAL yet. Called before executing a DDL
+ * operation.
*/
void
-tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm);
+tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct vy_lsm *lsm);
+
+/**
+ * Abort all local rw transactions that haven't reached WAL yet.
+ * Called before switching to read-only mode.
+ */
+void
+tx_manager_abort_writers_for_ro(struct tx_manager *xm);
/** Initialize a tx object. */
void
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 248b32c8..9b5f7314 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -1166,3 +1166,79 @@ test_run:cmd("restart server default")
box.space.test:drop()
---
...
+--
+-- Check that remote transactions are not aborted when an instance
+-- switches to read-only mode (gh-4016).
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('pk')
+---
+...
+s:replace{1, 1}
+---
+- [1, 1]
+...
+box.schema.user.grant('guest', 'replication')
+---
+...
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+---
+- true
+...
+test_run:cmd("start server replica")
+---
+- true
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.error.injection.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0.1)
+---
+- ok
+...
+test_run:cmd("switch default")
+---
+- true
+...
+s:update({1}, {{'+', 2, 1}})
+---
+- [1, 2]
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.cfg{read_only = true}
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+vclock = test_run:get_vclock("default")
+---
+...
+_ = test_run:wait_vclock("replica", vclock)
+---
+...
+test_run:cmd("stop server replica")
+---
+- true
+...
+test_run:cmd("cleanup server replica")
+---
+- true
+...
+test_run:cmd("delete server replica")
+---
+- true
+...
+box.schema.user.revoke('guest', 'replication')
+---
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index eaec52a5..5c7a525f 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -428,3 +428,33 @@ _ = fiber.create(function() box.snapshot() end)
test_run:cmd("restart server default")
box.space.test:drop()
+
+--
+-- Check that remote transactions are not aborted when an instance
+-- switches to read-only mode (gh-4016).
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk')
+s:replace{1, 1}
+box.schema.user.grant('guest', 'replication')
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+test_run:cmd("start server replica")
+
+test_run:cmd("switch replica")
+box.error.injection.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0.1)
+
+test_run:cmd("switch default")
+s:update({1}, {{'+', 2, 1}})
+
+test_run:cmd("switch replica")
+box.cfg{read_only = true}
+
+test_run:cmd("switch default")
+vclock = test_run:get_vclock("default")
+_ = test_run:wait_vclock("replica", vclock)
+
+test_run:cmd("stop server replica")
+test_run:cmd("cleanup server replica")
+test_run:cmd("delete server replica")
+box.schema.user.revoke('guest', 'replication')
+s:drop()
diff --git a/test/vinyl/misc.result b/test/vinyl/misc.result
index 5f67271e..685fd776 100644
--- a/test/vinyl/misc.result
+++ b/test/vinyl/misc.result
@@ -282,3 +282,83 @@ test_run:cmd('cleanup server test')
---
- true
...
+--
+-- gh-4016: local rw transactions are aborted when the instance
+-- switches to read-only mode.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('pk')
+---
+...
+s:replace({1, 1})
+---
+- [1, 1]
+...
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+-- Start rw transaction.
+ch1 = fiber.channel(1);
+---
+...
+_ = fiber.create(function()
+ box.begin()
+ s:replace{1, 2}
+ ch1:get()
+ local status, err = pcall(box.commit)
+ ch1:put(status or err)
+end);
+---
+...
+-- Start ro transaction.
+ch2 = fiber.channel(1);
+---
+...
+_ = fiber.create(function()
+ box.begin()
+ s:select()
+ ch2:get()
+ local status, err = pcall(box.commit)
+ ch2:put(status or err)
+end);
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+-- Switch to ro mode.
+box.cfg{read_only = true}
+---
+...
+-- Resume the transactions.
+ch1:put(true)
+---
+- true
+...
+ch2:put(true)
+---
+- true
+...
+ch1:get()
+---
+- Transaction has been aborted by conflict
+...
+ch2:get()
+---
+- true
+...
+-- Cleanup.
+box.cfg{read_only = false}
+---
+...
+s:select()
+---
+- - [1, 1]
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/misc.test.lua b/test/vinyl/misc.test.lua
index 1c3a9517..cdc22774 100644
--- a/test/vinyl/misc.test.lua
+++ b/test/vinyl/misc.test.lua
@@ -114,3 +114,42 @@ s:drop()
test_run:cmd('switch default')
test_run:cmd('stop server test')
test_run:cmd('cleanup server test')
+
+--
+-- gh-4016: local rw transactions are aborted when the instance
+-- switches to read-only mode.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk')
+s:replace({1, 1})
+test_run:cmd("setopt delimiter ';'")
+-- Start rw transaction.
+ch1 = fiber.channel(1);
+_ = fiber.create(function()
+ box.begin()
+ s:replace{1, 2}
+ ch1:get()
+ local status, err = pcall(box.commit)
+ ch1:put(status or err)
+end);
+-- Start ro transaction.
+ch2 = fiber.channel(1);
+_ = fiber.create(function()
+ box.begin()
+ s:select()
+ ch2:get()
+ local status, err = pcall(box.commit)
+ ch2:put(status or err)
+end);
+test_run:cmd("setopt delimiter ''");
+-- Switch to ro mode.
+box.cfg{read_only = true}
+-- Resume the transactions.
+ch1:put(true)
+ch2:put(true)
+ch1:get()
+ch2:get()
+-- Cleanup.
+box.cfg{read_only = false}
+s:select()
+s:drop()
--
2.11.0
More information about the Tarantool-patches
mailing list