From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpng3.m.smailru.net (smtpng3.m.smailru.net [94.100.177.149]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 7DF80445325 for ; Fri, 31 Jul 2020 01:37:50 +0300 (MSK) From: Vladislav Shpilevoy Date: Fri, 31 Jul 2020 00:37:45 +0200 Message-Id: <454ebe58fb6089c47056548c258f30fd0f8c7f60.1596148501.git.v.shpilevoy@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH 3/3] txn_limbo: handle duplicate ACKs List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org, gorcunov@gmail.com Replica can send the same ACK multiple times. This is relatively easy to achieve. ACK is a message form the replica containing its vclock. It is sent on each replica's vclock update. The update not necessarily means that master's LSN was changed. Replica could write something locally, with its own instance_id. Vclock is changed, sent to the master, but from the limbo's point of view it looks like duplicated ACK, because the received master's LSN didn't change. The patch makes limbo ignore duplicated ACKs. Closes #5195 Part of #5219 --- src/box/txn_limbo.c | 9 + .../gh-5195-qsync-replica-write.result | 156 ++++++++++++++++++ .../gh-5195-qsync-replica-write.test.lua | 68 ++++++++ 3 files changed, 233 insertions(+) create mode 100644 test/replication/gh-5195-qsync-replica-write.result create mode 100644 test/replication/gh-5195-qsync-replica-write.test.lua diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c index e052b9003..0ab387392 100644 --- a/src/box/txn_limbo.c +++ b/src/box/txn_limbo.c @@ -453,6 +453,15 @@ txn_limbo_ack(struct txn_limbo *limbo, uint32_t replica_id, int64_t lsn) return; assert(limbo->instance_id != REPLICA_ID_NIL); int64_t prev_lsn = vclock_get(&limbo->vclock, replica_id); + /* + * One of the reasons why can happen - the remote instance is not + * read-only and wrote something under its own insance_id. For qsync + * that most likely means that the remote instance decided to take over + * the limbo ownership, and the current node is going to become a + * replica very soon. + */ + if (lsn == prev_lsn) + return; vclock_follow(&limbo->vclock, replica_id, lsn); struct txn_limbo_entry *e; int64_t confirm_lsn = -1; diff --git a/test/replication/gh-5195-qsync-replica-write.result b/test/replication/gh-5195-qsync-replica-write.result new file mode 100644 index 000000000..3999e8f5e --- /dev/null +++ b/test/replication/gh-5195-qsync-replica-write.result @@ -0,0 +1,156 @@ +-- test-run result file version 2 +test_run = require('test_run').new() + | --- + | ... +fiber = require('fiber') + | --- + | ... +engine = test_run:get_cfg('engine') + | --- + | ... +old_synchro_quorum = box.cfg.replication_synchro_quorum + | --- + | ... +old_synchro_timeout = box.cfg.replication_synchro_timeout + | --- + | ... + +box.schema.user.grant('guest', 'super') + | --- + | ... + +test_run:cmd('create server replica with rpl_master=default,\ + script="replication/replica.lua"') + | --- + | - true + | ... +test_run:cmd('start server replica with wait=True, wait_load=True') + | --- + | - true + | ... + +-- +-- gh-5195: WAL write on a read-write replica should not crash master. Limbo +-- should be ready that ACKs can contain the same LSN from the same replica +-- multiple times. +-- +_ = box.schema.space.create('sync', {engine = engine, is_sync = true}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... + +box.cfg{replication_synchro_timeout = 1000, replication_synchro_quorum = 3} + | --- + | ... +lsn = box.info.lsn + | --- + | ... +ok, err = nil + | --- + | ... +f = fiber.create(function() \ + ok, err = pcall(box.space.sync.insert, box.space.sync, {2}) \ +end) + | --- + | ... +lsn = lsn + 1 + | --- + | ... +test_run:wait_cond(function() return box.info.lsn == lsn end) + | --- + | - true + | ... + +test_run:switch('replica') + | --- + | - true + | ... +test_run:wait_lsn('replica', 'default') + | --- + | ... +-- Normal DML is blocked - the limbo is not empty and does not belong to the +-- replica. But synchro queue cleanup also does a WAL write, and propagates LSN +-- of the instance. +box.cfg{replication_synchro_timeout = 0.001} + | --- + | ... +box.ctl.clear_synchro_queue() + | --- + | ... + +test_run:switch('default') + | --- + | - true + | ... +-- Wait second ACK receipt. +replica_id = test_run:get_server_id('replica') + | --- + | ... +replica_lsn = test_run:get_lsn('replica', replica_id) + | --- + | ... +test_run:wait_downstream(replica_id, {status='follow'}) + | --- + | - true + | ... +test_run:wait_cond(function() \ + local info = box.info.replication[replica_id] \ + local lsn = info.downstream.vclock[replica_id] \ + return lsn and lsn >= replica_lsn \ +end) \ + +box.cfg{replication_synchro_quorum = 2} + | --- + | ... +test_run:wait_cond(function() return f:status() == 'dead' end) + | --- + | - true + | ... +ok, err + | --- + | - true + | - [2] + | ... +box.space.sync:select{} + | --- + | - - [2] + | ... + +test_run:switch('replica') + | --- + | - true + | ... +box.space.sync:select{} + | --- + | - - [2] + | ... + +test_run:switch('default') + | --- + | - true + | ... + +test_run:cmd('stop server replica') + | --- + | - true + | ... +test_run:cmd('delete server replica') + | --- + | - true + | ... + +box.space.sync:drop() + | --- + | ... +box.schema.user.revoke('guest', 'super') + | --- + | ... + +box.cfg{ \ + replication_synchro_quorum = old_synchro_quorum, \ + replication_synchro_timeout = old_synchro_timeout, \ +} + | --- + | ... diff --git a/test/replication/gh-5195-qsync-replica-write.test.lua b/test/replication/gh-5195-qsync-replica-write.test.lua new file mode 100644 index 000000000..2b9909e21 --- /dev/null +++ b/test/replication/gh-5195-qsync-replica-write.test.lua @@ -0,0 +1,68 @@ +test_run = require('test_run').new() +fiber = require('fiber') +engine = test_run:get_cfg('engine') +old_synchro_quorum = box.cfg.replication_synchro_quorum +old_synchro_timeout = box.cfg.replication_synchro_timeout + +box.schema.user.grant('guest', 'super') + +test_run:cmd('create server replica with rpl_master=default,\ + script="replication/replica.lua"') +test_run:cmd('start server replica with wait=True, wait_load=True') + +-- +-- gh-5195: WAL write on a read-write replica should not crash master. Limbo +-- should be ready that ACKs can contain the same LSN from the same replica +-- multiple times. +-- +_ = box.schema.space.create('sync', {engine = engine, is_sync = true}) +_ = box.space.sync:create_index('pk') + +box.cfg{replication_synchro_timeout = 1000, replication_synchro_quorum = 3} +lsn = box.info.lsn +ok, err = nil +f = fiber.create(function() \ + ok, err = pcall(box.space.sync.insert, box.space.sync, {2}) \ +end) +lsn = lsn + 1 +test_run:wait_cond(function() return box.info.lsn == lsn end) + +test_run:switch('replica') +test_run:wait_lsn('replica', 'default') +-- Normal DML is blocked - the limbo is not empty and does not belong to the +-- replica. But synchro queue cleanup also does a WAL write, and propagates LSN +-- of the instance. +box.cfg{replication_synchro_timeout = 0.001} +box.ctl.clear_synchro_queue() + +test_run:switch('default') +-- Wait second ACK receipt. +replica_id = test_run:get_server_id('replica') +replica_lsn = test_run:get_lsn('replica', replica_id) +test_run:wait_downstream(replica_id, {status='follow'}) +test_run:wait_cond(function() \ + local info = box.info.replication[replica_id] \ + local lsn = info.downstream.vclock[replica_id] \ + return lsn and lsn >= replica_lsn \ +end) \ + +box.cfg{replication_synchro_quorum = 2} +test_run:wait_cond(function() return f:status() == 'dead' end) +ok, err +box.space.sync:select{} + +test_run:switch('replica') +box.space.sync:select{} + +test_run:switch('default') + +test_run:cmd('stop server replica') +test_run:cmd('delete server replica') + +box.space.sync:drop() +box.schema.user.revoke('guest', 'super') + +box.cfg{ \ + replication_synchro_quorum = old_synchro_quorum, \ + replication_synchro_timeout = old_synchro_timeout, \ +} -- 2.21.1 (Apple Git-122.3)