[Tarantool-patches] [PATCH v3] replication: removing anonymous replicas from synchro quorum

Yan Shtunder ya.shtunder at gmail.com
Mon Oct 25 12:52:23 MSK 2021


Transactions have to committed after they reaches quorum of "real"
cluster members. Therefore, anonymous replicas don't have to
participate in the quorum.

Closes #5418
---
Issue: https://github.com/tarantool/tarantool/issues/5418
Patch: https://github.com/tarantool/tarantool/tree/yshtunder/gh-5418-qsync-with-anon-replicas

 src/box/relay.cc                          |  3 +-
 test/replication-luatest/gh_5418_test.lua | 82 +++++++++++++++++++++++
 2 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 test/replication-luatest/gh_5418_test.lua

diff --git a/src/box/relay.cc b/src/box/relay.cc
index f5852df7b..cf569e8e2 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -543,6 +543,7 @@ tx_status_update(struct cmsg *msg)
 	struct replication_ack ack;
 	ack.source = status->relay->replica->id;
 	ack.vclock = &status->vclock;
+	bool anon = status->relay->replica->anon;
 	/*
 	 * Let pending synchronous transactions know, which of
 	 * them were successfully sent to the replica. Acks are
@@ -550,7 +551,7 @@ tx_status_update(struct cmsg *msg)
 	 * the single master in 100% so far). Other instances wait
 	 * for master's CONFIRM message instead.
 	 */
-	if (txn_limbo.owner_id == instance_id) {
+	if (txn_limbo.owner_id == instance_id && !anon) {
 		txn_limbo_ack(&txn_limbo, ack.source,
 			      vclock_get(ack.vclock, instance_id));
 	}
diff --git a/test/replication-luatest/gh_5418_test.lua b/test/replication-luatest/gh_5418_test.lua
new file mode 100644
index 000000000..265d28ccb
--- /dev/null
+++ b/test/replication-luatest/gh_5418_test.lua
@@ -0,0 +1,82 @@
+local fio = require('fio')
+local log = require('log')
+local fiber = require('fiber')
+local t = require('luatest')
+local cluster = require('test.luatest_helpers.cluster')
+local helpers = require('test.luatest_helpers.helpers')
+
+local g = t.group('gh-5418')
+
+g.before_test('test_qsync_with_anon', function()
+    g.cluster = cluster:new({})
+
+    local box_cfg = {
+        replication         = {helpers.instance_uri('master')},
+        replication_synchro_quorum = 2,
+        replication_timeout = 0.1
+    }
+
+    g.master = g.cluster:build_server({alias = 'master'}, engine, box_cfg)
+
+    local box_cfg = {
+        replication         = {
+            helpers.instance_uri('master'),
+            helpers.instance_uri('replica')
+        },
+        replication_timeout = 0.1,
+        replication_connect_timeout = 0.5,
+        read_only           = true,
+        replication_anon    = true
+    }
+
+    g.replica = g.cluster:build_server({alias = 'replica'}, engine, box_cfg)
+
+    g.cluster:join_server(g.master)
+    g.cluster:join_server(g.replica)
+    g.cluster:start()
+    log.info('Everything is started')
+end)
+
+g.after_test('test_qsync_with_anon', function()
+    g.cluster:stop()
+    fio.rmtree(g.master.workdir)
+    fio.rmtree(g.replica.workdir)
+end)
+
+local function wait_vclock(timeout)
+    local started_at = fiber.clock()
+    local lsn = g.master:eval("return box.info.vclock[1]")
+
+    local _, tbl = g.master:eval("return next(box.info.replication_anon())")
+    local to_lsn = tbl.downstream.vclock[1]
+
+    while to_lsn == nil or to_lsn < lsn do
+        fiber.sleep(0.001)
+
+        if (fiber.clock() - started_at) > timeout then
+            return false
+        end
+
+        _, tbl = g.master:eval("return next(box.info.replication_anon())")
+        to_lsn = tbl.downstream.vclock[1]
+
+        log.info(string.format("master lsn: %d; replica_anon lsn: %d",
+            lsn, to_lsn))
+    end
+
+    return true
+end
+
+g.test_qsync_with_anon = function()
+    g.master:eval("box.schema.space.create('sync', {is_sync = true})")
+    g.master:eval("box.space.sync:create_index('pk')")
+
+    t.assert_error_msg_content_equals("Quorum collection for a synchronous transaction is timed out",
+        function() g.master:eval("return box.space.sync:insert{1}") end)
+
+    -- Wait until everything is replicated from the master to the replica
+    t.assert(wait_vclock(1))
+
+    t.assert_equals(g.master:eval("return box.space.sync:select()"), {})
+    t.assert_equals(g.replica:eval("return box.space.sync:select()"), {})
+end
--
2.25.1



More information about the Tarantool-patches mailing list