From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: georgy@tarantool.org, tarantool-patches@freelists.org
Subject: [PATCH 4/4] vinyl: abort rw transactions when instance switches to ro
Date: Mon, 4 Mar 2019 18:39:27 +0300 [thread overview]
Message-ID: <60a021480834a54e71b5c5308581e8bdbdc3f7c0.1551713282.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1551713282.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1551713282.git.vdavydov.dev@gmail.com>
A Vinyl transaction may yield while having a non-empty write set. This
opens a time window for the instance to switch to read-only mode. Since
we check ro flag only before executing a DML request, the transaction
would successfully commit in such a case, breaking the assumption that
no writes are possible on an instance after box.cfg{read_only=true}
returns. In particular, this breaks master-replica switching logic.
Fix this by aborting all local rw transactions before switching to
read-only mode. Note, remote rw transactions must not be aborted,
because they ignore ro flag.
Closes #4016
---
src/box/vinyl.c | 15 ++++++---
src/box/vy_tx.c | 20 +++++++++++-
src/box/vy_tx.h | 14 ++++++--
test/vinyl/errinj.result | 76 +++++++++++++++++++++++++++++++++++++++++++
test/vinyl/errinj.test.lua | 30 +++++++++++++++++
test/vinyl/misc.result | 80 ++++++++++++++++++++++++++++++++++++++++++++++
test/vinyl/misc.test.lua | 39 ++++++++++++++++++++++
7 files changed, 266 insertions(+), 8 deletions(-)
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index c8b02eb8..aac890b6 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -70,7 +70,6 @@
#include "info/info.h"
#include "column_mask.h"
#include "trigger.h"
-#include "session.h"
#include "wal.h" /* wal_mode() */
/**
@@ -1041,7 +1040,7 @@ vinyl_space_prepare_alter(struct space *old_space, struct space *new_space)
static void
vy_abort_writers_for_ddl(struct vy_env *env, struct vy_lsm *lsm)
{
- tx_manager_abort_writers(env->xm, lsm);
+ tx_manager_abort_writers_for_ddl(env->xm, lsm);
/*
* Wait for prepared transactions to complete
* (we can't abort them as they reached WAL).
@@ -2334,8 +2333,7 @@ vinyl_engine_prepare(struct engine *engine, struct txn *txn)
* available for the admin to track the lag so let the applier
* wait as long as necessary for memory dump to complete.
*/
- double timeout = (current_session()->type != SESSION_TYPE_APPLIER ?
- env->timeout : TIMEOUT_INFINITY);
+ double timeout = (tx->is_remote ? TIMEOUT_INFINITY : env->timeout);
/*
* Reserve quota needed by the transaction before allocating
* memory. Since this may yield, which opens a time window for
@@ -2423,6 +2421,13 @@ vinyl_engine_rollback_statement(struct engine *engine, struct txn *txn,
vy_tx_rollback_statement(tx, stmt->engine_savepoint);
}
+static void
+vinyl_engine_switch_to_ro(struct engine *engine)
+{
+ struct vy_env *env = vy_env(engine);
+ tx_manager_abort_writers_for_ro(env->xm);
+}
+
/* }}} Public API of transaction control */
/** {{{ Environment */
@@ -4463,7 +4468,7 @@ static const struct engine_vtab vinyl_engine_vtab = {
/* .commit = */ vinyl_engine_commit,
/* .rollback_statement = */ vinyl_engine_rollback_statement,
/* .rollback = */ vinyl_engine_rollback,
- /* .switch_to_ro = */ generic_engine_switch_to_ro,
+ /* .switch_to_ro = */ vinyl_engine_switch_to_ro,
/* .bootstrap = */ vinyl_engine_bootstrap,
/* .begin_initial_recovery = */ vinyl_engine_begin_initial_recovery,
/* .begin_final_recovery = */ vinyl_engine_begin_final_recovery,
diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c
index d216c73d..275fcb62 100644
--- a/src/box/vy_tx.c
+++ b/src/box/vy_tx.c
@@ -46,6 +46,7 @@
#include "iterator_type.h"
#include "salad/stailq.h"
#include "schema.h" /* space_cache_version */
+#include "session.h"
#include "space.h"
#include "trigger.h"
#include "trivia/util.h"
@@ -309,6 +310,7 @@ vy_tx_create(struct tx_manager *xm, struct vy_tx *tx)
tx->write_size = 0;
tx->xm = xm;
tx->state = VINYL_TX_READY;
+ tx->is_remote = false;
tx->read_view = (struct vy_read_view *)xm->p_global_read_view;
vy_tx_read_set_new(&tx->read_set);
tx->psn = 0;
@@ -407,6 +409,11 @@ vy_tx_begin(struct tx_manager *xm)
return NULL;
}
vy_tx_create(xm, tx);
+
+ struct session *session = fiber_get_session(fiber());
+ if (session != NULL && session->type == SESSION_TYPE_APPLIER)
+ tx->is_remote = true;
+
return tx;
}
@@ -1064,7 +1071,7 @@ vy_tx_set_with_colmask(struct vy_tx *tx, struct vy_lsm *lsm,
}
void
-tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm)
+tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct vy_lsm *lsm)
{
struct vy_tx *tx;
rlist_foreach_entry(tx, &xm->writers, in_writers) {
@@ -1076,6 +1083,17 @@ tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm)
}
void
+tx_manager_abort_writers_for_ro(struct tx_manager *xm)
+{
+ struct vy_tx *tx;
+ rlist_foreach_entry(tx, &xm->writers, in_writers) {
+ /* Remote transactions ignore ro flag. */
+ if (tx->state == VINYL_TX_READY && !tx->is_remote)
+ tx->state = VINYL_TX_ABORT;
+ }
+}
+
+void
vy_txw_iterator_open(struct vy_txw_iterator *itr,
struct vy_txw_iterator_stat *stat,
struct vy_tx *tx, struct vy_lsm *lsm,
diff --git a/src/box/vy_tx.h b/src/box/vy_tx.h
index 590538d8..e3df600a 100644
--- a/src/box/vy_tx.h
+++ b/src/box/vy_tx.h
@@ -161,6 +161,8 @@ struct vy_tx {
size_t write_size;
/** Current state of the transaction.*/
enum tx_state state;
+ /** Set if the transaction was started by an applier. */
+ bool is_remote;
/**
* The read view of this transaction. When a transaction
* is started, it is set to the "read committed" state,
@@ -276,10 +278,18 @@ tx_manager_mem_used(struct tx_manager *xm);
/**
* Abort all rw transactions that affect the given LSM tree
- * and haven't reached WAL yet.
+ * and haven't reached WAL yet. Called before executing a DDL
+ * operation.
*/
void
-tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm);
+tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct vy_lsm *lsm);
+
+/**
+ * Abort all local rw transactions that haven't reached WAL yet.
+ * Called before switching to read-only mode.
+ */
+void
+tx_manager_abort_writers_for_ro(struct tx_manager *xm);
/** Initialize a tx object. */
void
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 248b32c8..9b5f7314 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -1166,3 +1166,79 @@ test_run:cmd("restart server default")
box.space.test:drop()
---
...
+--
+-- Check that remote transactions are not aborted when an instance
+-- switches to read-only mode (gh-4016).
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('pk')
+---
+...
+s:replace{1, 1}
+---
+- [1, 1]
+...
+box.schema.user.grant('guest', 'replication')
+---
+...
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+---
+- true
+...
+test_run:cmd("start server replica")
+---
+- true
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.error.injection.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0.1)
+---
+- ok
+...
+test_run:cmd("switch default")
+---
+- true
+...
+s:update({1}, {{'+', 2, 1}})
+---
+- [1, 2]
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.cfg{read_only = true}
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+vclock = test_run:get_vclock("default")
+---
+...
+_ = test_run:wait_vclock("replica", vclock)
+---
+...
+test_run:cmd("stop server replica")
+---
+- true
+...
+test_run:cmd("cleanup server replica")
+---
+- true
+...
+test_run:cmd("delete server replica")
+---
+- true
+...
+box.schema.user.revoke('guest', 'replication')
+---
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index eaec52a5..5c7a525f 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -428,3 +428,33 @@ _ = fiber.create(function() box.snapshot() end)
test_run:cmd("restart server default")
box.space.test:drop()
+
+--
+-- Check that remote transactions are not aborted when an instance
+-- switches to read-only mode (gh-4016).
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk')
+s:replace{1, 1}
+box.schema.user.grant('guest', 'replication')
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+test_run:cmd("start server replica")
+
+test_run:cmd("switch replica")
+box.error.injection.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0.1)
+
+test_run:cmd("switch default")
+s:update({1}, {{'+', 2, 1}})
+
+test_run:cmd("switch replica")
+box.cfg{read_only = true}
+
+test_run:cmd("switch default")
+vclock = test_run:get_vclock("default")
+_ = test_run:wait_vclock("replica", vclock)
+
+test_run:cmd("stop server replica")
+test_run:cmd("cleanup server replica")
+test_run:cmd("delete server replica")
+box.schema.user.revoke('guest', 'replication')
+s:drop()
diff --git a/test/vinyl/misc.result b/test/vinyl/misc.result
index 5f67271e..685fd776 100644
--- a/test/vinyl/misc.result
+++ b/test/vinyl/misc.result
@@ -282,3 +282,83 @@ test_run:cmd('cleanup server test')
---
- true
...
+--
+-- gh-4016: local rw transactions are aborted when the instance
+-- switches to read-only mode.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('pk')
+---
+...
+s:replace({1, 1})
+---
+- [1, 1]
+...
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+-- Start rw transaction.
+ch1 = fiber.channel(1);
+---
+...
+_ = fiber.create(function()
+ box.begin()
+ s:replace{1, 2}
+ ch1:get()
+ local status, err = pcall(box.commit)
+ ch1:put(status or err)
+end);
+---
+...
+-- Start ro transaction.
+ch2 = fiber.channel(1);
+---
+...
+_ = fiber.create(function()
+ box.begin()
+ s:select()
+ ch2:get()
+ local status, err = pcall(box.commit)
+ ch2:put(status or err)
+end);
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+-- Switch to ro mode.
+box.cfg{read_only = true}
+---
+...
+-- Resume the transactions.
+ch1:put(true)
+---
+- true
+...
+ch2:put(true)
+---
+- true
+...
+ch1:get()
+---
+- Transaction has been aborted by conflict
+...
+ch2:get()
+---
+- true
+...
+-- Cleanup.
+box.cfg{read_only = false}
+---
+...
+s:select()
+---
+- - [1, 1]
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/misc.test.lua b/test/vinyl/misc.test.lua
index 1c3a9517..cdc22774 100644
--- a/test/vinyl/misc.test.lua
+++ b/test/vinyl/misc.test.lua
@@ -114,3 +114,42 @@ s:drop()
test_run:cmd('switch default')
test_run:cmd('stop server test')
test_run:cmd('cleanup server test')
+
+--
+-- gh-4016: local rw transactions are aborted when the instance
+-- switches to read-only mode.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk')
+s:replace({1, 1})
+test_run:cmd("setopt delimiter ';'")
+-- Start rw transaction.
+ch1 = fiber.channel(1);
+_ = fiber.create(function()
+ box.begin()
+ s:replace{1, 2}
+ ch1:get()
+ local status, err = pcall(box.commit)
+ ch1:put(status or err)
+end);
+-- Start ro transaction.
+ch2 = fiber.channel(1);
+_ = fiber.create(function()
+ box.begin()
+ s:select()
+ ch2:get()
+ local status, err = pcall(box.commit)
+ ch2:put(status or err)
+end);
+test_run:cmd("setopt delimiter ''");
+-- Switch to ro mode.
+box.cfg{read_only = true}
+-- Resume the transactions.
+ch1:put(true)
+ch2:put(true)
+ch1:get()
+ch2:get()
+-- Cleanup.
+box.cfg{read_only = false}
+s:select()
+s:drop()
--
2.11.0
next prev parent reply other threads:[~2019-03-04 15:39 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-03-04 15:39 [PATCH 0/4] Abort vinyl transactions before switching " Vladimir Davydov
2019-03-04 15:39 ` [PATCH 1/4] vinyl: rename tx statement begin/rollback routines Vladimir Davydov
2019-03-05 7:29 ` Konstantin Osipov
2019-03-04 15:39 ` [PATCH 2/4] vinyl: add tx to writers list in begin_statement engine callback Vladimir Davydov
2019-03-05 7:30 ` Konstantin Osipov
2019-03-04 15:39 ` [PATCH 3/4] engine: add switch_to_ro callback Vladimir Davydov
2019-03-05 7:31 ` Konstantin Osipov
2019-03-04 15:39 ` Vladimir Davydov [this message]
2019-03-05 7:43 ` [PATCH 4/4] vinyl: abort rw transactions when instance switches to ro Konstantin Osipov
2019-03-05 8:35 ` Vladimir Davydov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=60a021480834a54e71b5c5308581e8bdbdc3f7c0.1551713282.git.vdavydov.dev@gmail.com \
--to=vdavydov.dev@gmail.com \
--cc=georgy@tarantool.org \
--cc=kostja@tarantool.org \
--cc=tarantool-patches@freelists.org \
--subject='Re: [PATCH 4/4] vinyl: abort rw transactions when instance switches to ro' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox