[Tarantool-patches] [PATCH 3/3] txn_limbo: handle duplicate ACKs

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Fri Jul 31 01:37:45 MSK 2020


Replica can send the same ACK multiple times. This is relatively
easy to achieve.

ACK is a message form the replica containing its vclock. It is
sent on each replica's vclock update. The update not necessarily
means that master's LSN was changed. Replica could write
something locally, with its own instance_id. Vclock is changed,
sent to the master, but from the limbo's point of view it looks
like duplicated ACK, because the received master's LSN didn't
change.

The patch makes limbo ignore duplicated ACKs.

Closes #5195
Part of #5219
---
 src/box/txn_limbo.c                           |   9 +
 .../gh-5195-qsync-replica-write.result        | 156 ++++++++++++++++++
 .../gh-5195-qsync-replica-write.test.lua      |  68 ++++++++
 3 files changed, 233 insertions(+)
 create mode 100644 test/replication/gh-5195-qsync-replica-write.result
 create mode 100644 test/replication/gh-5195-qsync-replica-write.test.lua

diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index e052b9003..0ab387392 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -453,6 +453,15 @@ txn_limbo_ack(struct txn_limbo *limbo, uint32_t replica_id, int64_t lsn)
 		return;
 	assert(limbo->instance_id != REPLICA_ID_NIL);
 	int64_t prev_lsn = vclock_get(&limbo->vclock, replica_id);
+	/*
+	 * One of the reasons why can happen - the remote instance is not
+	 * read-only and wrote something under its own insance_id. For qsync
+	 * that most likely means that the remote instance decided to take over
+	 * the limbo ownership, and the current node is going to become a
+	 * replica very soon.
+	 */
+	if (lsn == prev_lsn)
+		return;
 	vclock_follow(&limbo->vclock, replica_id, lsn);
 	struct txn_limbo_entry *e;
 	int64_t confirm_lsn = -1;
diff --git a/test/replication/gh-5195-qsync-replica-write.result b/test/replication/gh-5195-qsync-replica-write.result
new file mode 100644
index 000000000..3999e8f5e
--- /dev/null
+++ b/test/replication/gh-5195-qsync-replica-write.result
@@ -0,0 +1,156 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+fiber = require('fiber')
+ | ---
+ | ...
+engine = test_run:get_cfg('engine')
+ | ---
+ | ...
+old_synchro_quorum = box.cfg.replication_synchro_quorum
+ | ---
+ | ...
+old_synchro_timeout = box.cfg.replication_synchro_timeout
+ | ---
+ | ...
+
+box.schema.user.grant('guest', 'super')
+ | ---
+ | ...
+
+test_run:cmd('create server replica with rpl_master=default,\
+              script="replication/replica.lua"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server replica with wait=True, wait_load=True')
+ | ---
+ | - true
+ | ...
+
+--
+-- gh-5195: WAL write on a read-write replica should not crash master. Limbo
+-- should be ready that ACKs can contain the same LSN from the same replica
+-- multiple times.
+--
+_ = box.schema.space.create('sync', {engine = engine, is_sync = true})
+ | ---
+ | ...
+_ = box.space.sync:create_index('pk')
+ | ---
+ | ...
+
+box.cfg{replication_synchro_timeout = 1000, replication_synchro_quorum = 3}
+ | ---
+ | ...
+lsn = box.info.lsn
+ | ---
+ | ...
+ok, err = nil
+ | ---
+ | ...
+f = fiber.create(function()                                                     \
+    ok, err = pcall(box.space.sync.insert, box.space.sync, {2})                 \
+end)
+ | ---
+ | ...
+lsn = lsn + 1
+ | ---
+ | ...
+test_run:wait_cond(function() return box.info.lsn == lsn end)
+ | ---
+ | - true
+ | ...
+
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+test_run:wait_lsn('replica', 'default')
+ | ---
+ | ...
+-- Normal DML is blocked - the limbo is not empty and does not belong to the
+-- replica. But synchro queue cleanup also does a WAL write, and propagates LSN
+-- of the instance.
+box.cfg{replication_synchro_timeout = 0.001}
+ | ---
+ | ...
+box.ctl.clear_synchro_queue()
+ | ---
+ | ...
+
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+-- Wait second ACK receipt.
+replica_id = test_run:get_server_id('replica')
+ | ---
+ | ...
+replica_lsn = test_run:get_lsn('replica', replica_id)
+ | ---
+ | ...
+test_run:wait_downstream(replica_id, {status='follow'})
+ | ---
+ | - true
+ | ...
+test_run:wait_cond(function()                                                   \
+        local info = box.info.replication[replica_id]                           \
+        local lsn = info.downstream.vclock[replica_id]                          \
+        return lsn and lsn >= replica_lsn                                       \
+end)                                                                            \
+
+box.cfg{replication_synchro_quorum = 2}
+ | ---
+ | ...
+test_run:wait_cond(function() return f:status() == 'dead' end)
+ | ---
+ | - true
+ | ...
+ok, err
+ | ---
+ | - true
+ | - [2]
+ | ...
+box.space.sync:select{}
+ | ---
+ | - - [2]
+ | ...
+
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+box.space.sync:select{}
+ | ---
+ | - - [2]
+ | ...
+
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+
+test_run:cmd('stop server replica')
+ | ---
+ | - true
+ | ...
+test_run:cmd('delete server replica')
+ | ---
+ | - true
+ | ...
+
+box.space.sync:drop()
+ | ---
+ | ...
+box.schema.user.revoke('guest', 'super')
+ | ---
+ | ...
+
+box.cfg{                                                                        \
+    replication_synchro_quorum = old_synchro_quorum,                            \
+    replication_synchro_timeout = old_synchro_timeout,                          \
+}
+ | ---
+ | ...
diff --git a/test/replication/gh-5195-qsync-replica-write.test.lua b/test/replication/gh-5195-qsync-replica-write.test.lua
new file mode 100644
index 000000000..2b9909e21
--- /dev/null
+++ b/test/replication/gh-5195-qsync-replica-write.test.lua
@@ -0,0 +1,68 @@
+test_run = require('test_run').new()
+fiber = require('fiber')
+engine = test_run:get_cfg('engine')
+old_synchro_quorum = box.cfg.replication_synchro_quorum
+old_synchro_timeout = box.cfg.replication_synchro_timeout
+
+box.schema.user.grant('guest', 'super')
+
+test_run:cmd('create server replica with rpl_master=default,\
+              script="replication/replica.lua"')
+test_run:cmd('start server replica with wait=True, wait_load=True')
+
+--
+-- gh-5195: WAL write on a read-write replica should not crash master. Limbo
+-- should be ready that ACKs can contain the same LSN from the same replica
+-- multiple times.
+--
+_ = box.schema.space.create('sync', {engine = engine, is_sync = true})
+_ = box.space.sync:create_index('pk')
+
+box.cfg{replication_synchro_timeout = 1000, replication_synchro_quorum = 3}
+lsn = box.info.lsn
+ok, err = nil
+f = fiber.create(function()                                                     \
+    ok, err = pcall(box.space.sync.insert, box.space.sync, {2})                 \
+end)
+lsn = lsn + 1
+test_run:wait_cond(function() return box.info.lsn == lsn end)
+
+test_run:switch('replica')
+test_run:wait_lsn('replica', 'default')
+-- Normal DML is blocked - the limbo is not empty and does not belong to the
+-- replica. But synchro queue cleanup also does a WAL write, and propagates LSN
+-- of the instance.
+box.cfg{replication_synchro_timeout = 0.001}
+box.ctl.clear_synchro_queue()
+
+test_run:switch('default')
+-- Wait second ACK receipt.
+replica_id = test_run:get_server_id('replica')
+replica_lsn = test_run:get_lsn('replica', replica_id)
+test_run:wait_downstream(replica_id, {status='follow'})
+test_run:wait_cond(function()                                                   \
+        local info = box.info.replication[replica_id]                           \
+        local lsn = info.downstream.vclock[replica_id]                          \
+        return lsn and lsn >= replica_lsn                                       \
+end)                                                                            \
+
+box.cfg{replication_synchro_quorum = 2}
+test_run:wait_cond(function() return f:status() == 'dead' end)
+ok, err
+box.space.sync:select{}
+
+test_run:switch('replica')
+box.space.sync:select{}
+
+test_run:switch('default')
+
+test_run:cmd('stop server replica')
+test_run:cmd('delete server replica')
+
+box.space.sync:drop()
+box.schema.user.revoke('guest', 'super')
+
+box.cfg{                                                                        \
+    replication_synchro_quorum = old_synchro_quorum,                            \
+    replication_synchro_timeout = old_synchro_timeout,                          \
+}
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list