[Tarantool-patches] [PATCH 1/5] [tosquash] replication: fix multiple rollbacks
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Fri Jul 3 02:40:26 MSK 2020
The problem was that if several transactions time out in one
event loop iteration, the all will write rollback. Moreover, they
will do that in a weird order, starting from the oldest, not in
a reversed order.
This patch makes limbo write only one rollback at once.
---
src/box/txn_limbo.c | 25 +++++++++++++++++++++++++
test/replication/qsync_basic.result | 2 +-
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index 0402664cb..2cb687f4d 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -44,6 +44,13 @@ txn_limbo_create(struct txn_limbo *limbo)
limbo->got_rollback = false;
}
+static inline struct txn_limbo_entry *
+txn_limbo_first_entry(struct txn_limbo *limbo)
+{
+ return rlist_first_entry(&limbo->queue, struct txn_limbo_entry,
+ in_queue);
+}
+
struct txn_limbo_entry *
txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
{
@@ -150,6 +157,24 @@ txn_limbo_wait_complete(struct txn_limbo *limbo, struct txn_limbo_entry *entry)
bool timed_out = fiber_yield_timeout(txn_limbo_confirm_timeout(limbo));
fiber_set_cancellable(cancellable);
if (timed_out) {
+ assert(!txn_limbo_is_empty(limbo));
+ if (txn_limbo_first_entry(limbo) != entry) {
+ /*
+ * If this is not a first entry in the
+ * limbo, it is definitely not a first
+ * timed out entry. And since it managed
+ * to time out too, it means there is
+ * currently another fiber writing
+ * rollback. Wait when it will finish and
+ * wake us up.
+ */
+ bool cancellable = fiber_set_cancellable(false);
+ fiber_yield();
+ fiber_set_cancellable(cancellable);
+ assert(txn_limbo_entry_is_complete(entry));
+ goto complete;
+ }
+
txn_limbo_write_rollback(limbo, entry);
struct txn_limbo_entry *e, *tmp;
rlist_foreach_entry_safe_reverse(e, &limbo->queue,
diff --git a/test/replication/qsync_basic.result b/test/replication/qsync_basic.result
index cdecf00e8..32deb2ac3 100644
--- a/test/replication/qsync_basic.result
+++ b/test/replication/qsync_basic.result
@@ -272,7 +272,7 @@ box.cfg{replication_synchro_timeout = 0.001, replication_synchro_quorum = 3}
| ...
f = fiber.create(box.space.sync.replace, box.space.sync, {6}) s:replace{6}
| ---
- | - error: Quorum collection for a synchronous transaction is timed out
+ | - error: A rollback for a synchronous transaction is received
| ...
f:status()
| ---
--
2.21.1 (Apple Git-122.3)
More information about the Tarantool-patches
mailing list