From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH 4/4] vinyl: abort rw transactions when instance switches to ro Date: Mon, 4 Mar 2019 18:39:27 +0300 Message-Id: <60a021480834a54e71b5c5308581e8bdbdc3f7c0.1551713282.git.vdavydov.dev@gmail.com> In-Reply-To: References: In-Reply-To: References: To: kostja@tarantool.org Cc: georgy@tarantool.org, tarantool-patches@freelists.org List-ID: A Vinyl transaction may yield while having a non-empty write set. This opens a time window for the instance to switch to read-only mode. Since we check ro flag only before executing a DML request, the transaction would successfully commit in such a case, breaking the assumption that no writes are possible on an instance after box.cfg{read_only=true} returns. In particular, this breaks master-replica switching logic. Fix this by aborting all local rw transactions before switching to read-only mode. Note, remote rw transactions must not be aborted, because they ignore ro flag. Closes #4016 --- src/box/vinyl.c | 15 ++++++--- src/box/vy_tx.c | 20 +++++++++++- src/box/vy_tx.h | 14 ++++++-- test/vinyl/errinj.result | 76 +++++++++++++++++++++++++++++++++++++++++++ test/vinyl/errinj.test.lua | 30 +++++++++++++++++ test/vinyl/misc.result | 80 ++++++++++++++++++++++++++++++++++++++++++++++ test/vinyl/misc.test.lua | 39 ++++++++++++++++++++++ 7 files changed, 266 insertions(+), 8 deletions(-) diff --git a/src/box/vinyl.c b/src/box/vinyl.c index c8b02eb8..aac890b6 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -70,7 +70,6 @@ #include "info/info.h" #include "column_mask.h" #include "trigger.h" -#include "session.h" #include "wal.h" /* wal_mode() */ /** @@ -1041,7 +1040,7 @@ vinyl_space_prepare_alter(struct space *old_space, struct space *new_space) static void vy_abort_writers_for_ddl(struct vy_env *env, struct vy_lsm *lsm) { - tx_manager_abort_writers(env->xm, lsm); + tx_manager_abort_writers_for_ddl(env->xm, lsm); /* * Wait for prepared transactions to complete * (we can't abort them as they reached WAL). @@ -2334,8 +2333,7 @@ vinyl_engine_prepare(struct engine *engine, struct txn *txn) * available for the admin to track the lag so let the applier * wait as long as necessary for memory dump to complete. */ - double timeout = (current_session()->type != SESSION_TYPE_APPLIER ? - env->timeout : TIMEOUT_INFINITY); + double timeout = (tx->is_remote ? TIMEOUT_INFINITY : env->timeout); /* * Reserve quota needed by the transaction before allocating * memory. Since this may yield, which opens a time window for @@ -2423,6 +2421,13 @@ vinyl_engine_rollback_statement(struct engine *engine, struct txn *txn, vy_tx_rollback_statement(tx, stmt->engine_savepoint); } +static void +vinyl_engine_switch_to_ro(struct engine *engine) +{ + struct vy_env *env = vy_env(engine); + tx_manager_abort_writers_for_ro(env->xm); +} + /* }}} Public API of transaction control */ /** {{{ Environment */ @@ -4463,7 +4468,7 @@ static const struct engine_vtab vinyl_engine_vtab = { /* .commit = */ vinyl_engine_commit, /* .rollback_statement = */ vinyl_engine_rollback_statement, /* .rollback = */ vinyl_engine_rollback, - /* .switch_to_ro = */ generic_engine_switch_to_ro, + /* .switch_to_ro = */ vinyl_engine_switch_to_ro, /* .bootstrap = */ vinyl_engine_bootstrap, /* .begin_initial_recovery = */ vinyl_engine_begin_initial_recovery, /* .begin_final_recovery = */ vinyl_engine_begin_final_recovery, diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c index d216c73d..275fcb62 100644 --- a/src/box/vy_tx.c +++ b/src/box/vy_tx.c @@ -46,6 +46,7 @@ #include "iterator_type.h" #include "salad/stailq.h" #include "schema.h" /* space_cache_version */ +#include "session.h" #include "space.h" #include "trigger.h" #include "trivia/util.h" @@ -309,6 +310,7 @@ vy_tx_create(struct tx_manager *xm, struct vy_tx *tx) tx->write_size = 0; tx->xm = xm; tx->state = VINYL_TX_READY; + tx->is_remote = false; tx->read_view = (struct vy_read_view *)xm->p_global_read_view; vy_tx_read_set_new(&tx->read_set); tx->psn = 0; @@ -407,6 +409,11 @@ vy_tx_begin(struct tx_manager *xm) return NULL; } vy_tx_create(xm, tx); + + struct session *session = fiber_get_session(fiber()); + if (session != NULL && session->type == SESSION_TYPE_APPLIER) + tx->is_remote = true; + return tx; } @@ -1064,7 +1071,7 @@ vy_tx_set_with_colmask(struct vy_tx *tx, struct vy_lsm *lsm, } void -tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm) +tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct vy_lsm *lsm) { struct vy_tx *tx; rlist_foreach_entry(tx, &xm->writers, in_writers) { @@ -1076,6 +1083,17 @@ tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm) } void +tx_manager_abort_writers_for_ro(struct tx_manager *xm) +{ + struct vy_tx *tx; + rlist_foreach_entry(tx, &xm->writers, in_writers) { + /* Remote transactions ignore ro flag. */ + if (tx->state == VINYL_TX_READY && !tx->is_remote) + tx->state = VINYL_TX_ABORT; + } +} + +void vy_txw_iterator_open(struct vy_txw_iterator *itr, struct vy_txw_iterator_stat *stat, struct vy_tx *tx, struct vy_lsm *lsm, diff --git a/src/box/vy_tx.h b/src/box/vy_tx.h index 590538d8..e3df600a 100644 --- a/src/box/vy_tx.h +++ b/src/box/vy_tx.h @@ -161,6 +161,8 @@ struct vy_tx { size_t write_size; /** Current state of the transaction.*/ enum tx_state state; + /** Set if the transaction was started by an applier. */ + bool is_remote; /** * The read view of this transaction. When a transaction * is started, it is set to the "read committed" state, @@ -276,10 +278,18 @@ tx_manager_mem_used(struct tx_manager *xm); /** * Abort all rw transactions that affect the given LSM tree - * and haven't reached WAL yet. + * and haven't reached WAL yet. Called before executing a DDL + * operation. */ void -tx_manager_abort_writers(struct tx_manager *xm, struct vy_lsm *lsm); +tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct vy_lsm *lsm); + +/** + * Abort all local rw transactions that haven't reached WAL yet. + * Called before switching to read-only mode. + */ +void +tx_manager_abort_writers_for_ro(struct tx_manager *xm); /** Initialize a tx object. */ void diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result index 248b32c8..9b5f7314 100644 --- a/test/vinyl/errinj.result +++ b/test/vinyl/errinj.result @@ -1166,3 +1166,79 @@ test_run:cmd("restart server default") box.space.test:drop() --- ... +-- +-- Check that remote transactions are not aborted when an instance +-- switches to read-only mode (gh-4016). +-- +s = box.schema.space.create('test', {engine = 'vinyl'}) +--- +... +_ = s:create_index('pk') +--- +... +s:replace{1, 1} +--- +- [1, 1] +... +box.schema.user.grant('guest', 'replication') +--- +... +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'") +--- +- true +... +test_run:cmd("start server replica") +--- +- true +... +test_run:cmd("switch replica") +--- +- true +... +box.error.injection.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0.1) +--- +- ok +... +test_run:cmd("switch default") +--- +- true +... +s:update({1}, {{'+', 2, 1}}) +--- +- [1, 2] +... +test_run:cmd("switch replica") +--- +- true +... +box.cfg{read_only = true} +--- +... +test_run:cmd("switch default") +--- +- true +... +vclock = test_run:get_vclock("default") +--- +... +_ = test_run:wait_vclock("replica", vclock) +--- +... +test_run:cmd("stop server replica") +--- +- true +... +test_run:cmd("cleanup server replica") +--- +- true +... +test_run:cmd("delete server replica") +--- +- true +... +box.schema.user.revoke('guest', 'replication') +--- +... +s:drop() +--- +... diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua index eaec52a5..5c7a525f 100644 --- a/test/vinyl/errinj.test.lua +++ b/test/vinyl/errinj.test.lua @@ -428,3 +428,33 @@ _ = fiber.create(function() box.snapshot() end) test_run:cmd("restart server default") box.space.test:drop() + +-- +-- Check that remote transactions are not aborted when an instance +-- switches to read-only mode (gh-4016). +-- +s = box.schema.space.create('test', {engine = 'vinyl'}) +_ = s:create_index('pk') +s:replace{1, 1} +box.schema.user.grant('guest', 'replication') +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'") +test_run:cmd("start server replica") + +test_run:cmd("switch replica") +box.error.injection.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0.1) + +test_run:cmd("switch default") +s:update({1}, {{'+', 2, 1}}) + +test_run:cmd("switch replica") +box.cfg{read_only = true} + +test_run:cmd("switch default") +vclock = test_run:get_vclock("default") +_ = test_run:wait_vclock("replica", vclock) + +test_run:cmd("stop server replica") +test_run:cmd("cleanup server replica") +test_run:cmd("delete server replica") +box.schema.user.revoke('guest', 'replication') +s:drop() diff --git a/test/vinyl/misc.result b/test/vinyl/misc.result index 5f67271e..685fd776 100644 --- a/test/vinyl/misc.result +++ b/test/vinyl/misc.result @@ -282,3 +282,83 @@ test_run:cmd('cleanup server test') --- - true ... +-- +-- gh-4016: local rw transactions are aborted when the instance +-- switches to read-only mode. +-- +s = box.schema.space.create('test', {engine = 'vinyl'}) +--- +... +_ = s:create_index('pk') +--- +... +s:replace({1, 1}) +--- +- [1, 1] +... +test_run:cmd("setopt delimiter ';'") +--- +- true +... +-- Start rw transaction. +ch1 = fiber.channel(1); +--- +... +_ = fiber.create(function() + box.begin() + s:replace{1, 2} + ch1:get() + local status, err = pcall(box.commit) + ch1:put(status or err) +end); +--- +... +-- Start ro transaction. +ch2 = fiber.channel(1); +--- +... +_ = fiber.create(function() + box.begin() + s:select() + ch2:get() + local status, err = pcall(box.commit) + ch2:put(status or err) +end); +--- +... +test_run:cmd("setopt delimiter ''"); +--- +- true +... +-- Switch to ro mode. +box.cfg{read_only = true} +--- +... +-- Resume the transactions. +ch1:put(true) +--- +- true +... +ch2:put(true) +--- +- true +... +ch1:get() +--- +- Transaction has been aborted by conflict +... +ch2:get() +--- +- true +... +-- Cleanup. +box.cfg{read_only = false} +--- +... +s:select() +--- +- - [1, 1] +... +s:drop() +--- +... diff --git a/test/vinyl/misc.test.lua b/test/vinyl/misc.test.lua index 1c3a9517..cdc22774 100644 --- a/test/vinyl/misc.test.lua +++ b/test/vinyl/misc.test.lua @@ -114,3 +114,42 @@ s:drop() test_run:cmd('switch default') test_run:cmd('stop server test') test_run:cmd('cleanup server test') + +-- +-- gh-4016: local rw transactions are aborted when the instance +-- switches to read-only mode. +-- +s = box.schema.space.create('test', {engine = 'vinyl'}) +_ = s:create_index('pk') +s:replace({1, 1}) +test_run:cmd("setopt delimiter ';'") +-- Start rw transaction. +ch1 = fiber.channel(1); +_ = fiber.create(function() + box.begin() + s:replace{1, 2} + ch1:get() + local status, err = pcall(box.commit) + ch1:put(status or err) +end); +-- Start ro transaction. +ch2 = fiber.channel(1); +_ = fiber.create(function() + box.begin() + s:select() + ch2:get() + local status, err = pcall(box.commit) + ch2:put(status or err) +end); +test_run:cmd("setopt delimiter ''"); +-- Switch to ro mode. +box.cfg{read_only = true} +-- Resume the transactions. +ch1:put(true) +ch2:put(true) +ch1:get() +ch2:get() +-- Cleanup. +box.cfg{read_only = false} +s:select() +s:drop() -- 2.11.0