From: Serge Petrenko via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: v.shpilevoy@tarantool.org, gorcunov@gmail.com
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH v4 15/16] replication: forbid implicit limbo owner transition
Date: Wed, 14 Jul 2021 21:25:43 +0300 [thread overview]
Message-ID: <d5ff359c6eea3dfa60da19be839ea8bf3a08e58e.1626287002.git.sergepetrenko@tarantool.org> (raw)
In-Reply-To: <cover.1626287002.git.sergepetrenko@tarantool.org>
Forbid limbo ownership transition without an explicit promote.
Make it so that synchronous transactions may be committed only after it
is claimed by some instance via a PROMOTE request.
Make everyone but the limbo owner read-only even when the limbo is
empty.
Part-of #6034
@TarantoolBot document
Title: synchronous replication changes
`box.info.synchro.queue` receives a new field: `owner`. It's a replica
id of the instance owning the synchronous transaction queue.
Once some instance owns the queue, every other instance becomes
read-only. When the queue is unclaimed, e.g.
`box.info.synchro.queue.owner` is `0`, everyone may be writeable, but
cannot create synchronous transactions.
In order to claim or re-claim the queue, you have to issue
`box.ctl.promote()` on the instance you wish to promote.
When elections are enabled, the instance issues `box.ctl.promote()`
automatically once it wins the elections, no additional actions are
required.
---
src/box/errcode.h | 2 +
src/box/lua/info.c | 4 +-
src/box/txn_limbo.c | 32 ++---
test/box/alter.result | 2 +-
test/box/error.result | 2 +
test/replication/gh-5440-qsync-ro.result | 133 ---------------------
test/replication/gh-5440-qsync-ro.test.lua | 53 --------
test/replication/suite.cfg | 1 -
8 files changed, 19 insertions(+), 210 deletions(-)
delete mode 100644 test/replication/gh-5440-qsync-ro.result
delete mode 100644 test/replication/gh-5440-qsync-ro.test.lua
diff --git a/src/box/errcode.h b/src/box/errcode.h
index 49aec4bf6..d42b64ef4 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -278,6 +278,8 @@ struct errcode_record {
/*223 */_(ER_INTERFERING_PROMOTE, "Instance with replica id %u was promoted first") \
/*224 */_(ER_RAFT_DISABLED, "Elections were turned off while running box.ctl.promote()")\
/*225 */_(ER_TXN_ROLLBACK, "Transaction was rolled back") \
+ /*226 */_(ER_SYNC_QUEUE_UNCLAIMED, "The synchronous transaction queue doesn't belong to any instance")\
+ /*227 */_(ER_SYNC_QUEUE_FOREIGN, "The synchronous transaction queue belongs to other instance with id %u")\
/*
* !IMPORTANT! Please follow instructions at start of the file
diff --git a/src/box/lua/info.c b/src/box/lua/info.c
index 1d8fe7938..ef3d6f5b2 100644
--- a/src/box/lua/info.c
+++ b/src/box/lua/info.c
@@ -615,9 +615,11 @@ lbox_info_synchro(struct lua_State *L)
/* Queue information. */
struct txn_limbo *queue = &txn_limbo;
- lua_createtable(L, 0, 1);
+ lua_createtable(L, 0, 2);
lua_pushnumber(L, queue->len);
lua_setfield(L, -2, "len");
+ lua_pushnumber(L, queue->owner_id);
+ lua_setfield(L, -2, "owner");
lua_setfield(L, -2, "queue");
return 1;
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index 991c47698..c8c4f587c 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -55,7 +55,8 @@ txn_limbo_create(struct txn_limbo *limbo)
bool
txn_limbo_is_ro(struct txn_limbo *limbo)
{
- return limbo->owner_id != instance_id && !txn_limbo_is_empty(limbo);
+ return limbo->owner_id != REPLICA_ID_NIL &&
+ limbo->owner_id != instance_id;
}
struct txn_limbo_entry *
@@ -95,18 +96,18 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
}
if (id == 0)
id = instance_id;
- bool make_ro = false;
- if (limbo->owner_id != id) {
- if (rlist_empty(&limbo->queue)) {
- limbo->owner_id = id;
- limbo->confirmed_lsn = 0;
- if (id != instance_id)
- make_ro = true;
+ if (limbo->owner_id == REPLICA_ID_NIL) {
+ diag_set(ClientError, ER_SYNC_QUEUE_UNCLAIMED);
+ return NULL;
+ } else if (limbo->owner_id != id) {
+ if (txn_limbo_is_empty(limbo)) {
+ diag_set(ClientError, ER_SYNC_QUEUE_FOREIGN,
+ limbo->owner_id);
} else {
diag_set(ClientError, ER_UNCOMMITTED_FOREIGN_SYNC_TXNS,
limbo->owner_id);
- return NULL;
}
+ return NULL;
}
size_t size;
struct txn_limbo_entry *e = region_alloc_object(&txn->region,
@@ -122,12 +123,6 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
e->is_rollback = false;
rlist_add_tail_entry(&limbo->queue, e, in_queue);
limbo->len++;
- /*
- * We added new entries from a remote instance to an empty limbo.
- * Time to make this instance read-only.
- */
- if (make_ro)
- box_update_ro_summary();
return e;
}
@@ -442,9 +437,6 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
assert(e->txn->signature >= 0);
txn_complete_success(e->txn);
}
- /* Update is_ro once the limbo is clear. */
- if (txn_limbo_is_empty(limbo))
- box_update_ro_summary();
}
/**
@@ -492,9 +484,6 @@ txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn)
if (e == last_rollback)
break;
}
- /* Update is_ro once the limbo is clear. */
- if (txn_limbo_is_empty(limbo))
- box_update_ro_summary();
}
void
@@ -525,6 +514,7 @@ txn_limbo_read_promote(struct txn_limbo *limbo, uint32_t replica_id,
txn_limbo_read_rollback(limbo, lsn + 1);
assert(txn_limbo_is_empty(&txn_limbo));
limbo->owner_id = replica_id;
+ box_update_ro_summary();
limbo->confirmed_lsn = 0;
}
diff --git a/test/box/alter.result b/test/box/alter.result
index a7bffce10..6a64f6b84 100644
--- a/test/box/alter.result
+++ b/test/box/alter.result
@@ -1464,7 +1464,7 @@ assert(s.is_sync)
...
s:replace{1}
---
-- error: Quorum collection for a synchronous transaction is timed out
+- error: The synchronous transaction queue doesn't belong to any instance
...
-- When not specified or nil - ignored.
s:alter({is_sync = nil})
diff --git a/test/box/error.result b/test/box/error.result
index 062a90399..8f783f927 100644
--- a/test/box/error.result
+++ b/test/box/error.result
@@ -444,6 +444,8 @@ t;
| 223: box.error.INTERFERING_PROMOTE
| 224: box.error.RAFT_DISABLED
| 225: box.error.TXN_ROLLBACK
+ | 226: box.error.SYNC_QUEUE_UNCLAIMED
+ | 227: box.error.SYNC_QUEUE_FOREIGN
| ...
test_run:cmd("setopt delimiter ''");
diff --git a/test/replication/gh-5440-qsync-ro.result b/test/replication/gh-5440-qsync-ro.result
deleted file mode 100644
index 1ece26a42..000000000
--- a/test/replication/gh-5440-qsync-ro.result
+++ /dev/null
@@ -1,133 +0,0 @@
--- test-run result file version 2
---
--- gh-5440 everyone but the limbo owner is read-only on non-empty limbo.
---
-env = require('test_run')
- | ---
- | ...
-test_run = env.new()
- | ---
- | ...
-fiber = require('fiber')
- | ---
- | ...
-
-box.schema.user.grant('guest', 'replication')
- | ---
- | ...
-test_run:cmd('create server replica with rpl_master=default, script="replication/replica.lua"')
- | ---
- | - true
- | ...
-test_run:cmd('start server replica with wait=True, wait_load=True')
- | ---
- | - true
- | ...
-
-_ = box.schema.space.create('test', {is_sync=true})
- | ---
- | ...
-_ = box.space.test:create_index('pk')
- | ---
- | ...
-
-old_synchro_quorum = box.cfg.replication_synchro_quorum
- | ---
- | ...
-old_synchro_timeout = box.cfg.replication_synchro_timeout
- | ---
- | ...
-
--- Make sure that the master stalls on commit leaving the limbo non-empty.
-box.cfg{replication_synchro_quorum=3, replication_synchro_timeout=1000}
- | ---
- | ...
-
-f = fiber.new(function() box.space.test:insert{1} end)
- | ---
- | ...
-f:status()
- | ---
- | - suspended
- | ...
-
--- Wait till replica's limbo is non-empty.
-test_run:wait_lsn('replica', 'default')
- | ---
- | ...
-test_run:cmd('switch replica')
- | ---
- | - true
- | ...
-
-box.info.ro
- | ---
- | - true
- | ...
-box.space.test:insert{2}
- | ---
- | - error: Can't modify data because this instance is in read-only mode.
- | ...
-success = false
- | ---
- | ...
-f = require('fiber').new(function() box.ctl.wait_rw() success = true end)
- | ---
- | ...
-f:status()
- | ---
- | - suspended
- | ...
-
-test_run:cmd('switch default')
- | ---
- | - true
- | ...
-
--- Empty the limbo.
-box.cfg{replication_synchro_quorum=2}
- | ---
- | ...
-
-test_run:cmd('switch replica')
- | ---
- | - true
- | ...
-
-test_run:wait_cond(function() return success end)
- | ---
- | - true
- | ...
-box.info.ro
- | ---
- | - false
- | ...
--- Should succeed now.
-box.space.test:insert{2}
- | ---
- | - [2]
- | ...
-
--- Cleanup.
-test_run:cmd('switch default')
- | ---
- | - true
- | ...
-box.cfg{replication_synchro_quorum=old_synchro_quorum,\
- replication_synchro_timeout=old_synchro_timeout}
- | ---
- | ...
-box.space.test:drop()
- | ---
- | ...
-test_run:cmd('stop server replica')
- | ---
- | - true
- | ...
-test_run:cmd('delete server replica')
- | ---
- | - true
- | ...
-box.schema.user.revoke('guest', 'replication')
- | ---
- | ...
diff --git a/test/replication/gh-5440-qsync-ro.test.lua b/test/replication/gh-5440-qsync-ro.test.lua
deleted file mode 100644
index d63ec9c1e..000000000
--- a/test/replication/gh-5440-qsync-ro.test.lua
+++ /dev/null
@@ -1,53 +0,0 @@
---
--- gh-5440 everyone but the limbo owner is read-only on non-empty limbo.
---
-env = require('test_run')
-test_run = env.new()
-fiber = require('fiber')
-
-box.schema.user.grant('guest', 'replication')
-test_run:cmd('create server replica with rpl_master=default, script="replication/replica.lua"')
-test_run:cmd('start server replica with wait=True, wait_load=True')
-
-_ = box.schema.space.create('test', {is_sync=true})
-_ = box.space.test:create_index('pk')
-
-old_synchro_quorum = box.cfg.replication_synchro_quorum
-old_synchro_timeout = box.cfg.replication_synchro_timeout
-
--- Make sure that the master stalls on commit leaving the limbo non-empty.
-box.cfg{replication_synchro_quorum=3, replication_synchro_timeout=1000}
-
-f = fiber.new(function() box.space.test:insert{1} end)
-f:status()
-
--- Wait till replica's limbo is non-empty.
-test_run:wait_lsn('replica', 'default')
-test_run:cmd('switch replica')
-
-box.info.ro
-box.space.test:insert{2}
-success = false
-f = require('fiber').new(function() box.ctl.wait_rw() success = true end)
-f:status()
-
-test_run:cmd('switch default')
-
--- Empty the limbo.
-box.cfg{replication_synchro_quorum=2}
-
-test_run:cmd('switch replica')
-
-test_run:wait_cond(function() return success end)
-box.info.ro
--- Should succeed now.
-box.space.test:insert{2}
-
--- Cleanup.
-test_run:cmd('switch default')
-box.cfg{replication_synchro_quorum=old_synchro_quorum,\
- replication_synchro_timeout=old_synchro_timeout}
-box.space.test:drop()
-test_run:cmd('stop server replica')
-test_run:cmd('delete server replica')
-box.schema.user.revoke('guest', 'replication')
diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg
index 6f42db081..7a6fd7052 100644
--- a/test/replication/suite.cfg
+++ b/test/replication/suite.cfg
@@ -42,7 +42,6 @@
"gh-4739-vclock-assert.test.lua": {},
"gh-4730-applier-rollback.test.lua": {},
"gh-4928-tx-boundaries.test.lua": {},
- "gh-5440-qsync-ro.test.lua": {},
"gh-5435-qsync-clear-synchro-queue-commit-all.test.lua": {},
"gh-5536-wal-limit.test.lua": {},
"gh-5566-final-join-synchro.test.lua": {},
--
2.30.1 (Apple Git-130)
next prev parent reply other threads:[~2021-07-14 18:33 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-14 18:25 [Tarantool-patches] [PATCH v4 00/16] forbid implicit limbo ownership transition Serge Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 01/16] replication: always send raft state to subscribers Serge Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 02/16] txn_limbo: fix promote term filtering Serge Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 03/16] txn_limbo: persist the latest effective promote in snapshot Serge Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 04/16] replication: encode version in JOIN request Serge Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 05/16] replication: add META stage to JOIN Serge Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 06/16] replication: send latest effective promote in initial join Serge Petrenko via Tarantool-patches
2021-07-21 23:24 ` Vladislav Shpilevoy via Tarantool-patches
2021-07-23 7:44 ` Sergey Petrenko via Tarantool-patches
2021-07-26 23:43 ` Vladislav Shpilevoy via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 07/16] replication: send current Raft term in join response Serge Petrenko via Tarantool-patches
2021-07-21 23:24 ` Vladislav Shpilevoy via Tarantool-patches
2021-07-23 7:44 ` Sergey Petrenko via Tarantool-patches
2021-07-26 23:43 ` Vladislav Shpilevoy via Tarantool-patches
2021-07-29 20:46 ` Sergey Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 08/16] raft: refactor raft_new_term() Serge Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 09/16] box: split promote() into reasonable parts Serge Petrenko via Tarantool-patches
2021-07-21 23:26 ` Vladislav Shpilevoy via Tarantool-patches
2021-07-23 7:45 ` Sergey Petrenko via Tarantool-patches
2021-07-26 23:44 ` Vladislav Shpilevoy via Tarantool-patches
2021-07-29 20:46 ` Sergey Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 10/16] box: make promote always bump the term Serge Petrenko via Tarantool-patches
2021-07-26 23:45 ` Vladislav Shpilevoy via Tarantool-patches
2021-07-29 20:46 ` Sergey Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 11/16] box: make promote on the current leader a no-op Serge Petrenko via Tarantool-patches
2021-07-21 23:26 ` Vladislav Shpilevoy via Tarantool-patches
2021-07-23 7:45 ` Sergey Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 12/16] box: fix an assertion failure after a spurious wakeup in promote Serge Petrenko via Tarantool-patches
2021-07-21 23:29 ` Vladislav Shpilevoy via Tarantool-patches
2021-07-23 7:45 ` Sergey Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 13/16] box: allow calling promote on a candidate Serge Petrenko via Tarantool-patches
2021-07-15 14:06 ` Serge Petrenko via Tarantool-patches
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 14/16] box: extract promote() settings to a separate method Serge Petrenko via Tarantool-patches
2021-07-14 18:25 ` Serge Petrenko via Tarantool-patches [this message]
2021-07-14 18:25 ` [Tarantool-patches] [PATCH v4 16/16] box: introduce `box.ctl.demote` Serge Petrenko via Tarantool-patches
2021-07-15 17:13 ` Serge Petrenko via Tarantool-patches
2021-07-15 20:11 ` [Tarantool-patches] [PATCH v4 17/16] replication: fix flaky election_qsync.test Serge Petrenko via Tarantool-patches
2021-07-26 23:43 ` [Tarantool-patches] [PATCH v4 00/16] forbid implicit limbo ownership transition Vladislav Shpilevoy via Tarantool-patches
2021-07-29 20:47 ` Sergey Petrenko via Tarantool-patches
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d5ff359c6eea3dfa60da19be839ea8bf3a08e58e.1626287002.git.sergepetrenko@tarantool.org \
--to=tarantool-patches@dev.tarantool.org \
--cc=gorcunov@gmail.com \
--cc=sergepetrenko@tarantool.org \
--cc=v.shpilevoy@tarantool.org \
--subject='Re: [Tarantool-patches] [PATCH v4 15/16] replication: forbid implicit limbo owner transition' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox