From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> To: tarantool-patches@dev.tarantool.org, gorcunov@gmail.com Subject: [Tarantool-patches] [PATCH 1/1] txn_limbo: don't duplicate confirmations in WAL Date: Wed, 29 Jul 2020 01:17:27 +0200 [thread overview] Message-ID: <a7e401dbb261883515be221c55c55e7b6f34e4aa.1595978179.git.v.shpilevoy@tarantool.org> (raw) When an ACK was received for an already confirmed transaction whose CONFIRM WAL write is in progress, it produced a second CONFIRM in WAL with the same LSN. That was unnecessary work taking time and disk space for WAL records. Although it didn't lead to any bugs. Just was very inefficient. This patch makes confirmation LSN monotonically grow. In case more ACKs are received for an already confirmed LSN, its confirmation is not written second time. Closes #5144 --- Branch: http://github.com/tarantool/tarantool/tree/gerold103/gh-5144-qsync-dup-confirm Issue: https://github.com/tarantool/tarantool/issues/5144 No changelog because no visible changes. The issue can be considered more as an optimization than as a bug. src/box/txn_limbo.c | 9 +- src/box/txn_limbo.h | 8 + .../gh-5144-qsync-dup-confirm.result | 153 ++++++++++++++++++ .../gh-5144-qsync-dup-confirm.test.lua | 72 +++++++++ test/replication/replica1.lua | 1 + test/replication/replica2.lua | 1 + test/replication/suite.ini | 2 +- 7 files changed, 243 insertions(+), 3 deletions(-) create mode 100644 test/replication/gh-5144-qsync-dup-confirm.result create mode 100644 test/replication/gh-5144-qsync-dup-confirm.test.lua create mode 120000 test/replication/replica1.lua create mode 120000 test/replication/replica2.lua diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c index 72c8d572e..b6725ae21 100644 --- a/src/box/txn_limbo.c +++ b/src/box/txn_limbo.c @@ -41,6 +41,7 @@ txn_limbo_create(struct txn_limbo *limbo) limbo->instance_id = REPLICA_ID_NIL; fiber_cond_create(&limbo->wait_cond); vclock_create(&limbo->vclock); + limbo->confirmed_lsn = 0; limbo->rollback_count = 0; limbo->is_in_rollback = false; } @@ -312,6 +313,8 @@ rollback: static int txn_limbo_write_confirm(struct txn_limbo *limbo, int64_t lsn) { + assert(lsn > limbo->confirmed_lsn); + limbo->confirmed_lsn = lsn; return txn_limbo_write_confirm_rollback(limbo, lsn, true); } @@ -361,6 +364,8 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn) static int txn_limbo_write_rollback(struct txn_limbo *limbo, int64_t lsn) { + assert(lsn > limbo->confirmed_lsn); + assert(!limbo->is_in_rollback); limbo->is_in_rollback = true; int rc = txn_limbo_write_confirm_rollback(limbo, lsn, false); limbo->is_in_rollback = false; @@ -438,7 +443,7 @@ txn_limbo_ack(struct txn_limbo *limbo, uint32_t replica_id, int64_t lsn) confirm_lsn = e->lsn; } } - if (confirm_lsn == -1) + if (confirm_lsn == -1 || confirm_lsn <= limbo->confirmed_lsn) return; if (txn_limbo_write_confirm(limbo, confirm_lsn) != 0) { // TODO: what to do here?. @@ -581,7 +586,7 @@ txn_limbo_on_parameters_change(struct txn_limbo *limbo) assert(confirm_lsn > 0); } } - if (confirm_lsn > 0) { + if (confirm_lsn > limbo->confirmed_lsn) { if (txn_limbo_write_confirm(limbo, confirm_lsn) != 0) { panic("Couldn't write CONFIRM to WAL"); return; diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h index d2a689e5c..04ee7ea5c 100644 --- a/src/box/txn_limbo.h +++ b/src/box/txn_limbo.h @@ -124,6 +124,14 @@ struct txn_limbo { * transactions, created on the limbo's owner node. */ struct vclock vclock; + /** + * Maximal LSN gathered quorum and either already confirmed in WAL, or + * whose confirmation is in progress right now. Any attempt to confirm + * something smaller than this value can be safely ignored. Moreover, + * any attempt to rollback something starting from <= this LSN is + * illegal. + */ + int64_t confirmed_lsn; /** * Total number of performed rollbacks. It used as a guard * to do some actions assuming all limbo transactions will diff --git a/test/replication/gh-5144-qsync-dup-confirm.result b/test/replication/gh-5144-qsync-dup-confirm.result new file mode 100644 index 000000000..9d265d9ff --- /dev/null +++ b/test/replication/gh-5144-qsync-dup-confirm.result @@ -0,0 +1,153 @@ +-- test-run result file version 2 +test_run = require('test_run').new() + | --- + | ... +engine = test_run:get_cfg('engine') + | --- + | ... +fiber = require('fiber') + | --- + | ... +-- +-- gh-5144: qsync should not write duplicate confirms for the same LSN. That +-- could happen when first CONFIRM write is in progress, and more ACKs arrive. +-- +box.schema.user.grant('guest', 'super') + | --- + | ... + +test_run:cmd('create server replica1 with rpl_master=default, \ + script="replication/replica1.lua"') + | --- + | - true + | ... +test_run:cmd('start server replica1 with wait=True, wait_load=True') + | --- + | - true + | ... + +test_run:cmd('create server replica2 with rpl_master=default, \ + script="replication/replica2.lua"') + | --- + | - true + | ... +test_run:cmd('start server replica2 with wait=True, wait_load=True') + | --- + | - true + | ... + +box.cfg{replication_synchro_quorum = 2, replication_synchro_timeout = 1000} + | --- + | ... + +_ = box.schema.space.create('sync', {is_sync = true, engine = engine}) + | --- + | ... +_ = _:create_index('pk') + | --- + | ... + +-- Remember the current LSN. In the end, when the following synchronous +-- transaction is committed, result LSN should be this value +2: for the +-- transaction itself and for CONFIRM. +lsn_before_txn = box.info.lsn + | --- + | ... + +box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 1) + | --- + | - ok + | ... +ok, err = nil + | --- + | ... +f = fiber.create(function() \ + ok, err = pcall(box.space.sync.insert, box.space.sync, {1}) \ +end) + | --- + | ... +-- First replica's ACK is received. Quorum 2 is achieved. CONFIRM write is in +-- progress. +while not box.error.injection.get("ERRINJ_WAL_DELAY") do fiber.sleep(0.001) end + | --- + | ... + +-- Wait when both replicas confirm receipt of the sync transaction. It would +-- mean the master knows both LSNs and either already tried to write CONFIRM +-- twice, or didn't and will not. +function wait_lsn_ack(id, lsn) \ + local this_id = box.info.id \ + test_run:wait_downstream(id, {status='follow'}) \ + test_run:wait_cond(function() \ + return box.info.replication[id].downstream.vclock[this_id] >= lsn \ + end) \ +end + | --- + | ... +replica1_id = test_run:get_server_id('replica1') + | --- + | ... +replica2_id = test_run:get_server_id('replica2') + | --- + | ... +lsn = box.info.lsn + | --- + | ... +wait_lsn_ack(replica1_id, lsn) + | --- + | ... +wait_lsn_ack(replica2_id, lsn) + | --- + | ... + +-- Decrease already achieved quorum to check that it won't generate the same +-- CONFIRM on the parameter update. +box.cfg{replication_synchro_quorum = 1} + | --- + | ... + +-- Let the transaction finish. CONFIRM should be in WAL now. +box.error.injection.set("ERRINJ_WAL_DELAY", false) + | --- + | - ok + | ... +test_run:wait_cond(function() return f:status() == 'dead' end) + | --- + | - true + | ... +ok, err + | --- + | - true + | - [1] + | ... + +-- First +1 is the transaction itself. Second +1 is CONFIRM. +assert(box.info.lsn - lsn_before_txn == 2) + | --- + | - true + | ... + +box.space.sync:drop() + | --- + | ... + +test_run:cmd('stop server replica1') + | --- + | - true + | ... +test_run:cmd('delete server replica1') + | --- + | - true + | ... +test_run:cmd('stop server replica2') + | --- + | - true + | ... +test_run:cmd('delete server replica2') + | --- + | - true + | ... + +box.schema.user.revoke('guest', 'super') + | --- + | ... diff --git a/test/replication/gh-5144-qsync-dup-confirm.test.lua b/test/replication/gh-5144-qsync-dup-confirm.test.lua new file mode 100644 index 000000000..01a8351e0 --- /dev/null +++ b/test/replication/gh-5144-qsync-dup-confirm.test.lua @@ -0,0 +1,72 @@ +test_run = require('test_run').new() +engine = test_run:get_cfg('engine') +fiber = require('fiber') +-- +-- gh-5144: qsync should not write duplicate confirms for the same LSN. That +-- could happen when first CONFIRM write is in progress, and more ACKs arrive. +-- +box.schema.user.grant('guest', 'super') + +test_run:cmd('create server replica1 with rpl_master=default, \ + script="replication/replica1.lua"') +test_run:cmd('start server replica1 with wait=True, wait_load=True') + +test_run:cmd('create server replica2 with rpl_master=default, \ + script="replication/replica2.lua"') +test_run:cmd('start server replica2 with wait=True, wait_load=True') + +box.cfg{replication_synchro_quorum = 2, replication_synchro_timeout = 1000} + +_ = box.schema.space.create('sync', {is_sync = true, engine = engine}) +_ = _:create_index('pk') + +-- Remember the current LSN. In the end, when the following synchronous +-- transaction is committed, result LSN should be this value +2: for the +-- transaction itself and for CONFIRM. +lsn_before_txn = box.info.lsn + +box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 1) +ok, err = nil +f = fiber.create(function() \ + ok, err = pcall(box.space.sync.insert, box.space.sync, {1}) \ +end) +-- First replica's ACK is received. Quorum 2 is achieved. CONFIRM write is in +-- progress. +while not box.error.injection.get("ERRINJ_WAL_DELAY") do fiber.sleep(0.001) end + +-- Wait when both replicas confirm receipt of the sync transaction. It would +-- mean the master knows both LSNs and either already tried to write CONFIRM +-- twice, or didn't and will not. +function wait_lsn_ack(id, lsn) \ + local this_id = box.info.id \ + test_run:wait_downstream(id, {status='follow'}) \ + test_run:wait_cond(function() \ + return box.info.replication[id].downstream.vclock[this_id] >= lsn \ + end) \ +end +replica1_id = test_run:get_server_id('replica1') +replica2_id = test_run:get_server_id('replica2') +lsn = box.info.lsn +wait_lsn_ack(replica1_id, lsn) +wait_lsn_ack(replica2_id, lsn) + +-- Decrease already achieved quorum to check that it won't generate the same +-- CONFIRM on the parameter update. +box.cfg{replication_synchro_quorum = 1} + +-- Let the transaction finish. CONFIRM should be in WAL now. +box.error.injection.set("ERRINJ_WAL_DELAY", false) +test_run:wait_cond(function() return f:status() == 'dead' end) +ok, err + +-- First +1 is the transaction itself. Second +1 is CONFIRM. +assert(box.info.lsn - lsn_before_txn == 2) + +box.space.sync:drop() + +test_run:cmd('stop server replica1') +test_run:cmd('delete server replica1') +test_run:cmd('stop server replica2') +test_run:cmd('delete server replica2') + +box.schema.user.revoke('guest', 'super') diff --git a/test/replication/replica1.lua b/test/replication/replica1.lua new file mode 120000 index 000000000..da69ac81c --- /dev/null +++ b/test/replication/replica1.lua @@ -0,0 +1 @@ +replica.lua \ No newline at end of file diff --git a/test/replication/replica2.lua b/test/replication/replica2.lua new file mode 120000 index 000000000..da69ac81c --- /dev/null +++ b/test/replication/replica2.lua @@ -0,0 +1 @@ +replica.lua \ No newline at end of file diff --git a/test/replication/suite.ini b/test/replication/suite.ini index fa0535ed6..73f73eb89 100644 --- a/test/replication/suite.ini +++ b/test/replication/suite.ini @@ -3,7 +3,7 @@ core = tarantool script = master.lua description = tarantool/box, replication disabled = consistent.test.lua -release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua qsync_advanced.test.lua qsync_errinj.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua gh-4739-vclock-assert.test.lua gh-4730-applier-rollback.test.lua gh-5140-qsync-casc-rollback.test.lua +release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua qsync_advanced.test.lua qsync_errinj.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua gh-4739-vclock-assert.test.lua gh-4730-applier-rollback.test.lua gh-5140-qsync-casc-rollback.test.lua gh-5144-qsync-dup-confirm.test.lua config = suite.cfg lua_libs = lua/fast_replica.lua lua/rlimit.lua use_unix_sockets = True -- 2.21.1 (Apple Git-122.3)
next reply other threads:[~2020-07-28 23:17 UTC|newest] Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-07-28 23:17 Vladislav Shpilevoy [this message] 2020-07-29 20:06 ` Cyrill Gorcunov 2020-07-29 20:54 ` Vladislav Shpilevoy
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=a7e401dbb261883515be221c55c55e7b6f34e4aa.1595978179.git.v.shpilevoy@tarantool.org \ --to=v.shpilevoy@tarantool.org \ --cc=gorcunov@gmail.com \ --cc=tarantool-patches@dev.tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH 1/1] txn_limbo: don'\''t duplicate confirmations in WAL' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox