[Tarantool-patches] [PATCH v2] box: make instace ro while limbo is not empty
Serge Petrenko
sergepetrenko at tarantool.org
Mon Nov 30 12:35:51 MSK 2020
Users usually use box.ctl.wait_rw() to determine the moment when the
instance becomes writeable.
Since the synchronous replication introduction, this function became
pointless, because even when an instance is writeable, it may fail at
writing something because its limbo is not empty.
To fix the problem introduce a new helper, txn_limbo_is_ro() and start
using it in box_update_ro_summary().
Call bax_update_ro_summary() every time the limbo gets emptied out or
changes an owner.
Closes #5440
---
https://github.com/tarantool/tarantool/issues/5440
sp/gh-5440-on-state-update
Changes in v2:
- instead of updating is_ro on raft side, make
sure is_ro is updated every time limbo is
filled and emptied out.
src/box/box.cc | 3 ++-
src/box/raft.c | 9 +++----
src/box/txn_limbo.c | 25 +++++++++++++++++++
src/box/txn_limbo.h | 3 +++
test/replication/election_qsync.result | 10 ++++++--
test/replication/election_qsync.test.lua | 6 +++--
test/replication/election_qsync_stress.result | 4 +++
.../election_qsync_stress.test.lua | 2 ++
8 files changed, 52 insertions(+), 10 deletions(-)
diff --git a/src/box/box.cc b/src/box/box.cc
index b315635dc..4070cbeab 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -157,7 +157,8 @@ void
box_update_ro_summary(void)
{
bool old_is_ro_summary = is_ro_summary;
- is_ro_summary = is_ro || is_orphan || raft_is_ro(box_raft());
+ is_ro_summary = is_ro || is_orphan || raft_is_ro(box_raft()) ||
+ txn_limbo_is_ro(&txn_limbo);
/* In 99% nothing changes. Filter this out first. */
if (is_ro_summary == old_is_ro_summary)
return;
diff --git a/src/box/raft.c b/src/box/raft.c
index 6d98f5645..8f32a9662 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -155,11 +155,10 @@ box_raft_on_update_f(struct trigger *trigger, void *event)
struct raft *raft = (struct raft *)event;
assert(raft == box_raft());
/*
- * XXX: in case the instance became a leader, RO must be updated only
- * after clearing the synchro queue.
- *
- * When the instance became a follower, then on the contrary - make it
- * read-only ASAP, this is good.
+ * When the instance becomes a follower, it's good to make it read-only
+ * ASAP. This way we make sure followers don't write anything.
+ * However, if the instance is transitioning to leader it'll become
+ * writable only after it clears its synchro queue.
*/
box_update_ro_summary();
if (raft->state != RAFT_STATE_LEADER)
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index 2c35cd785..c406ed4c8 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -33,6 +33,7 @@
#include "replication.h"
#include "iproto_constants.h"
#include "journal.h"
+#include "box.h"
struct txn_limbo txn_limbo;
@@ -48,10 +49,17 @@ txn_limbo_create(struct txn_limbo *limbo)
limbo->is_in_rollback = false;
}
+bool
+txn_limbo_is_ro(struct txn_limbo *limbo)
+{
+ return limbo->owner_id != instance_id && !txn_limbo_is_empty(limbo);
+}
+
struct txn_limbo_entry *
txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
{
assert(txn_has_flag(txn, TXN_WAIT_SYNC));
+ assert(limbo == &txn_limbo);
/*
* Transactions should be added to the limbo before WAL write. Limbo
* needs that to be able rollback transactions, whose WAL write is in
@@ -72,11 +80,14 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
}
if (id == 0)
id = instance_id;
+ bool make_ro = false;
if (limbo->owner_id != id) {
if (limbo->owner_id == REPLICA_ID_NIL ||
rlist_empty(&limbo->queue)) {
limbo->owner_id = id;
limbo->confirmed_lsn = 0;
+ if (id != instance_id)
+ make_ro = true;
} else {
diag_set(ClientError, ER_UNCOMMITTED_FOREIGN_SYNC_TXNS,
limbo->owner_id);
@@ -96,6 +107,12 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
e->is_commit = false;
e->is_rollback = false;
rlist_add_tail_entry(&limbo->queue, e, in_queue);
+ /*
+ * We added new entries from a remote instance to an empty limbo.
+ * Time to make this instance read-only.
+ */
+ if (make_ro)
+ box_update_ro_summary();
return e;
}
@@ -349,6 +366,7 @@ static void
txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
{
assert(limbo->owner_id != REPLICA_ID_NIL);
+ assert(limbo == &txn_limbo);
struct txn_limbo_entry *e, *tmp;
rlist_foreach_entry_safe(e, &limbo->queue, in_queue, tmp) {
/*
@@ -388,6 +406,9 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
if (e->txn->signature >= 0)
txn_complete_success(e->txn);
}
+ /* Update is_ro once the limbo is clear. */
+ if (txn_limbo_is_empty(limbo))
+ box_update_ro_summary();
}
/**
@@ -410,6 +431,7 @@ static void
txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn)
{
assert(limbo->owner_id != REPLICA_ID_NIL);
+ assert(limbo == &txn_limbo);
struct txn_limbo_entry *e, *tmp;
struct txn_limbo_entry *last_rollback = NULL;
rlist_foreach_entry_reverse(e, &limbo->queue, in_queue) {
@@ -452,6 +474,9 @@ txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn)
if (e == last_rollback)
break;
}
+ /* Update is_ro once the limbo is clear. */
+ if (txn_limbo_is_empty(limbo))
+ box_update_ro_summary();
}
void
diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
index c7e70ba64..f7d67a826 100644
--- a/src/box/txn_limbo.h
+++ b/src/box/txn_limbo.h
@@ -172,6 +172,9 @@ txn_limbo_is_empty(struct txn_limbo *limbo)
return rlist_empty(&limbo->queue);
}
+bool
+txn_limbo_is_ro(struct txn_limbo *limbo);
+
static inline struct txn_limbo_entry *
txn_limbo_first_entry(struct txn_limbo *limbo)
{
diff --git a/test/replication/election_qsync.result b/test/replication/election_qsync.result
index cb349efcc..c06400b38 100644
--- a/test/replication/election_qsync.result
+++ b/test/replication/election_qsync.result
@@ -75,7 +75,10 @@ box.cfg{
| ---
| ...
-test_run:wait_cond(function() return box.info.election.state == 'leader' end)
+box.ctl.wait_rw()
+ | ---
+ | ...
+assert(box.info.election.state == 'leader')
| ---
| - true
| ...
@@ -130,7 +133,10 @@ box.cfg{
| ---
| ...
-test_run:wait_cond(function() return box.info.election.state == 'leader' end)
+box.ctl.wait_rw()
+ | ---
+ | ...
+assert(box.info.election.state == 'leader')
| ---
| - true
| ...
diff --git a/test/replication/election_qsync.test.lua b/test/replication/election_qsync.test.lua
index eb89e5b79..ea6fc4a61 100644
--- a/test/replication/election_qsync.test.lua
+++ b/test/replication/election_qsync.test.lua
@@ -39,7 +39,8 @@ box.cfg{
replication_timeout = 0.1, \
}
-test_run:wait_cond(function() return box.info.election.state == 'leader' end)
+box.ctl.wait_rw()
+assert(box.info.election.state == 'leader')
lsn = box.info.lsn
_ = fiber.create(function() \
ok, err = pcall(box.space.test.replace, box.space.test, {1}) \
@@ -69,7 +70,8 @@ box.cfg{
replication_timeout = 0.01, \
}
-test_run:wait_cond(function() return box.info.election.state == 'leader' end)
+box.ctl.wait_rw()
+assert(box.info.election.state == 'leader')
_ = box.space.test:replace{2}
box.space.test:select{}
box.space.test:drop()
diff --git a/test/replication/election_qsync_stress.result b/test/replication/election_qsync_stress.result
index 9fab2f1d7..1380c910b 100644
--- a/test/replication/election_qsync_stress.result
+++ b/test/replication/election_qsync_stress.result
@@ -69,6 +69,9 @@ leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
c = netbox.connect(leader_port)
| ---
| ...
+c:eval('box.ctl.wait_rw()')
+ | ---
+ | ...
_ = c:eval('box.schema.space.create("test", {is_sync=true})')
| ---
@@ -94,6 +97,7 @@ for i = 1,10 do
leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
c = netbox.connect(leader_port)
c:eval('box.cfg{replication_synchro_timeout=1000}')
+ c:eval('box.ctl.wait_rw()')
c.space._schema:replace{'smth'}
c.space.test:get{i}
test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2 0.4"')
diff --git a/test/replication/election_qsync_stress.test.lua b/test/replication/election_qsync_stress.test.lua
index 0ba15eef7..d70601cc5 100644
--- a/test/replication/election_qsync_stress.test.lua
+++ b/test/replication/election_qsync_stress.test.lua
@@ -40,6 +40,7 @@ old_leader_nr = get_leader(nrs)
old_leader = 'election_replica'..old_leader_nr
leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
c = netbox.connect(leader_port)
+c:eval('box.ctl.wait_rw()')
_ = c:eval('box.schema.space.create("test", {is_sync=true})')
_ = c:eval('box.space.test:create_index("pk")')
@@ -58,6 +59,7 @@ for i = 1,10 do
leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
c = netbox.connect(leader_port)
c:eval('box.cfg{replication_synchro_timeout=1000}')
+ c:eval('box.ctl.wait_rw()')
c.space._schema:replace{'smth'}
c.space.test:get{i}
test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2 0.4"')
--
2.24.3 (Apple Git-128)
More information about the Tarantool-patches
mailing list