[Tarantool-patches] [PATCH 1/1] txn_limbo: don't duplicate confirmations in WAL

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Jul 29 02:17:27 MSK 2020


When an ACK was received for an already confirmed transaction
whose CONFIRM WAL write is in progress, it produced a second
CONFIRM in WAL with the same LSN.

That was unnecessary work taking time and disk space for WAL
records. Although it didn't lead to any bugs. Just was very
inefficient.

This patch makes confirmation LSN monotonically grow. In case more
ACKs are received for an already confirmed LSN, its confirmation
is not written second time.

Closes #5144
---
Branch: http://github.com/tarantool/tarantool/tree/gerold103/gh-5144-qsync-dup-confirm
Issue: https://github.com/tarantool/tarantool/issues/5144

No changelog because no visible changes. The issue can be
considered more as an optimization than as a bug.

 src/box/txn_limbo.c                           |   9 +-
 src/box/txn_limbo.h                           |   8 +
 .../gh-5144-qsync-dup-confirm.result          | 153 ++++++++++++++++++
 .../gh-5144-qsync-dup-confirm.test.lua        |  72 +++++++++
 test/replication/replica1.lua                 |   1 +
 test/replication/replica2.lua                 |   1 +
 test/replication/suite.ini                    |   2 +-
 7 files changed, 243 insertions(+), 3 deletions(-)
 create mode 100644 test/replication/gh-5144-qsync-dup-confirm.result
 create mode 100644 test/replication/gh-5144-qsync-dup-confirm.test.lua
 create mode 120000 test/replication/replica1.lua
 create mode 120000 test/replication/replica2.lua

diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index 72c8d572e..b6725ae21 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -41,6 +41,7 @@ txn_limbo_create(struct txn_limbo *limbo)
 	limbo->instance_id = REPLICA_ID_NIL;
 	fiber_cond_create(&limbo->wait_cond);
 	vclock_create(&limbo->vclock);
+	limbo->confirmed_lsn = 0;
 	limbo->rollback_count = 0;
 	limbo->is_in_rollback = false;
 }
@@ -312,6 +313,8 @@ rollback:
 static int
 txn_limbo_write_confirm(struct txn_limbo *limbo, int64_t lsn)
 {
+	assert(lsn > limbo->confirmed_lsn);
+	limbo->confirmed_lsn = lsn;
 	return txn_limbo_write_confirm_rollback(limbo, lsn, true);
 }
 
@@ -361,6 +364,8 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
 static int
 txn_limbo_write_rollback(struct txn_limbo *limbo, int64_t lsn)
 {
+	assert(lsn > limbo->confirmed_lsn);
+	assert(!limbo->is_in_rollback);
 	limbo->is_in_rollback = true;
 	int rc = txn_limbo_write_confirm_rollback(limbo, lsn, false);
 	limbo->is_in_rollback = false;
@@ -438,7 +443,7 @@ txn_limbo_ack(struct txn_limbo *limbo, uint32_t replica_id, int64_t lsn)
 			confirm_lsn = e->lsn;
 		}
 	}
-	if (confirm_lsn == -1)
+	if (confirm_lsn == -1 || confirm_lsn <= limbo->confirmed_lsn)
 		return;
 	if (txn_limbo_write_confirm(limbo, confirm_lsn) != 0) {
 		// TODO: what to do here?.
@@ -581,7 +586,7 @@ txn_limbo_on_parameters_change(struct txn_limbo *limbo)
 			assert(confirm_lsn > 0);
 		}
 	}
-	if (confirm_lsn > 0) {
+	if (confirm_lsn > limbo->confirmed_lsn) {
 		if (txn_limbo_write_confirm(limbo, confirm_lsn) != 0) {
 			panic("Couldn't write CONFIRM to WAL");
 			return;
diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
index d2a689e5c..04ee7ea5c 100644
--- a/src/box/txn_limbo.h
+++ b/src/box/txn_limbo.h
@@ -124,6 +124,14 @@ struct txn_limbo {
 	 * transactions, created on the limbo's owner node.
 	 */
 	struct vclock vclock;
+	/**
+	 * Maximal LSN gathered quorum and either already confirmed in WAL, or
+	 * whose confirmation is in progress right now. Any attempt to confirm
+	 * something smaller than this value can be safely ignored. Moreover,
+	 * any attempt to rollback something starting from <= this LSN is
+	 * illegal.
+	 */
+	int64_t confirmed_lsn;
 	/**
 	 * Total number of performed rollbacks. It used as a guard
 	 * to do some actions assuming all limbo transactions will
diff --git a/test/replication/gh-5144-qsync-dup-confirm.result b/test/replication/gh-5144-qsync-dup-confirm.result
new file mode 100644
index 000000000..9d265d9ff
--- /dev/null
+++ b/test/replication/gh-5144-qsync-dup-confirm.result
@@ -0,0 +1,153 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+engine = test_run:get_cfg('engine')
+ | ---
+ | ...
+fiber = require('fiber')
+ | ---
+ | ...
+--
+-- gh-5144: qsync should not write duplicate confirms for the same LSN. That
+-- could happen when first CONFIRM write is in progress, and more ACKs arrive.
+--
+box.schema.user.grant('guest', 'super')
+ | ---
+ | ...
+
+test_run:cmd('create server replica1 with rpl_master=default,                   \
+             script="replication/replica1.lua"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server replica1 with wait=True, wait_load=True')
+ | ---
+ | - true
+ | ...
+
+test_run:cmd('create server replica2 with rpl_master=default,                   \
+             script="replication/replica2.lua"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server replica2 with wait=True, wait_load=True')
+ | ---
+ | - true
+ | ...
+
+box.cfg{replication_synchro_quorum = 2, replication_synchro_timeout = 1000}
+ | ---
+ | ...
+
+_ = box.schema.space.create('sync', {is_sync = true, engine = engine})
+ | ---
+ | ...
+_ = _:create_index('pk')
+ | ---
+ | ...
+
+-- Remember the current LSN. In the end, when the following synchronous
+-- transaction is committed, result LSN should be this value +2: for the
+-- transaction itself and for CONFIRM.
+lsn_before_txn = box.info.lsn
+ | ---
+ | ...
+
+box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 1)
+ | ---
+ | - ok
+ | ...
+ok, err = nil
+ | ---
+ | ...
+f = fiber.create(function()                                                     \
+    ok, err = pcall(box.space.sync.insert, box.space.sync, {1})                 \
+end)
+ | ---
+ | ...
+-- First replica's ACK is received. Quorum 2 is achieved. CONFIRM write is in
+-- progress.
+while not box.error.injection.get("ERRINJ_WAL_DELAY") do fiber.sleep(0.001) end
+ | ---
+ | ...
+
+-- Wait when both replicas confirm receipt of the sync transaction. It would
+-- mean the master knows both LSNs and either already tried to write CONFIRM
+-- twice, or didn't and will not.
+function wait_lsn_ack(id, lsn)                                                  \
+    local this_id = box.info.id                                                 \
+    test_run:wait_downstream(id, {status='follow'})                             \
+    test_run:wait_cond(function()                                               \
+        return box.info.replication[id].downstream.vclock[this_id] >= lsn       \
+    end)                                                                        \
+end
+ | ---
+ | ...
+replica1_id = test_run:get_server_id('replica1')
+ | ---
+ | ...
+replica2_id = test_run:get_server_id('replica2')
+ | ---
+ | ...
+lsn = box.info.lsn
+ | ---
+ | ...
+wait_lsn_ack(replica1_id, lsn)
+ | ---
+ | ...
+wait_lsn_ack(replica2_id, lsn)
+ | ---
+ | ...
+
+-- Decrease already achieved quorum to check that it won't generate the same
+-- CONFIRM on the parameter update.
+box.cfg{replication_synchro_quorum = 1}
+ | ---
+ | ...
+
+-- Let the transaction finish. CONFIRM should be in WAL now.
+box.error.injection.set("ERRINJ_WAL_DELAY", false)
+ | ---
+ | - ok
+ | ...
+test_run:wait_cond(function() return f:status() == 'dead' end)
+ | ---
+ | - true
+ | ...
+ok, err
+ | ---
+ | - true
+ | - [1]
+ | ...
+
+-- First +1 is the transaction itself. Second +1 is CONFIRM.
+assert(box.info.lsn - lsn_before_txn == 2)
+ | ---
+ | - true
+ | ...
+
+box.space.sync:drop()
+ | ---
+ | ...
+
+test_run:cmd('stop server replica1')
+ | ---
+ | - true
+ | ...
+test_run:cmd('delete server replica1')
+ | ---
+ | - true
+ | ...
+test_run:cmd('stop server replica2')
+ | ---
+ | - true
+ | ...
+test_run:cmd('delete server replica2')
+ | ---
+ | - true
+ | ...
+
+box.schema.user.revoke('guest', 'super')
+ | ---
+ | ...
diff --git a/test/replication/gh-5144-qsync-dup-confirm.test.lua b/test/replication/gh-5144-qsync-dup-confirm.test.lua
new file mode 100644
index 000000000..01a8351e0
--- /dev/null
+++ b/test/replication/gh-5144-qsync-dup-confirm.test.lua
@@ -0,0 +1,72 @@
+test_run = require('test_run').new()
+engine = test_run:get_cfg('engine')
+fiber = require('fiber')
+--
+-- gh-5144: qsync should not write duplicate confirms for the same LSN. That
+-- could happen when first CONFIRM write is in progress, and more ACKs arrive.
+--
+box.schema.user.grant('guest', 'super')
+
+test_run:cmd('create server replica1 with rpl_master=default,                   \
+             script="replication/replica1.lua"')
+test_run:cmd('start server replica1 with wait=True, wait_load=True')
+
+test_run:cmd('create server replica2 with rpl_master=default,                   \
+             script="replication/replica2.lua"')
+test_run:cmd('start server replica2 with wait=True, wait_load=True')
+
+box.cfg{replication_synchro_quorum = 2, replication_synchro_timeout = 1000}
+
+_ = box.schema.space.create('sync', {is_sync = true, engine = engine})
+_ = _:create_index('pk')
+
+-- Remember the current LSN. In the end, when the following synchronous
+-- transaction is committed, result LSN should be this value +2: for the
+-- transaction itself and for CONFIRM.
+lsn_before_txn = box.info.lsn
+
+box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 1)
+ok, err = nil
+f = fiber.create(function()                                                     \
+    ok, err = pcall(box.space.sync.insert, box.space.sync, {1})                 \
+end)
+-- First replica's ACK is received. Quorum 2 is achieved. CONFIRM write is in
+-- progress.
+while not box.error.injection.get("ERRINJ_WAL_DELAY") do fiber.sleep(0.001) end
+
+-- Wait when both replicas confirm receipt of the sync transaction. It would
+-- mean the master knows both LSNs and either already tried to write CONFIRM
+-- twice, or didn't and will not.
+function wait_lsn_ack(id, lsn)                                                  \
+    local this_id = box.info.id                                                 \
+    test_run:wait_downstream(id, {status='follow'})                             \
+    test_run:wait_cond(function()                                               \
+        return box.info.replication[id].downstream.vclock[this_id] >= lsn       \
+    end)                                                                        \
+end
+replica1_id = test_run:get_server_id('replica1')
+replica2_id = test_run:get_server_id('replica2')
+lsn = box.info.lsn
+wait_lsn_ack(replica1_id, lsn)
+wait_lsn_ack(replica2_id, lsn)
+
+-- Decrease already achieved quorum to check that it won't generate the same
+-- CONFIRM on the parameter update.
+box.cfg{replication_synchro_quorum = 1}
+
+-- Let the transaction finish. CONFIRM should be in WAL now.
+box.error.injection.set("ERRINJ_WAL_DELAY", false)
+test_run:wait_cond(function() return f:status() == 'dead' end)
+ok, err
+
+-- First +1 is the transaction itself. Second +1 is CONFIRM.
+assert(box.info.lsn - lsn_before_txn == 2)
+
+box.space.sync:drop()
+
+test_run:cmd('stop server replica1')
+test_run:cmd('delete server replica1')
+test_run:cmd('stop server replica2')
+test_run:cmd('delete server replica2')
+
+box.schema.user.revoke('guest', 'super')
diff --git a/test/replication/replica1.lua b/test/replication/replica1.lua
new file mode 120000
index 000000000..da69ac81c
--- /dev/null
+++ b/test/replication/replica1.lua
@@ -0,0 +1 @@
+replica.lua
\ No newline at end of file
diff --git a/test/replication/replica2.lua b/test/replication/replica2.lua
new file mode 120000
index 000000000..da69ac81c
--- /dev/null
+++ b/test/replication/replica2.lua
@@ -0,0 +1 @@
+replica.lua
\ No newline at end of file
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index fa0535ed6..73f73eb89 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -3,7 +3,7 @@ core = tarantool
 script =  master.lua
 description = tarantool/box, replication
 disabled = consistent.test.lua
-release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua qsync_advanced.test.lua qsync_errinj.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua gh-4739-vclock-assert.test.lua gh-4730-applier-rollback.test.lua gh-5140-qsync-casc-rollback.test.lua
+release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua qsync_advanced.test.lua qsync_errinj.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua gh-4739-vclock-assert.test.lua gh-4730-applier-rollback.test.lua gh-5140-qsync-casc-rollback.test.lua gh-5144-qsync-dup-confirm.test.lua
 config = suite.cfg
 lua_libs = lua/fast_replica.lua lua/rlimit.lua
 use_unix_sockets = True
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list