From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 984D06E454; Thu, 24 Feb 2022 23:20:17 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 984D06E454 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1645734017; bh=+tFeHJ3iyXrJ50U+wj4USfIQjvl4ZH/p+4e3PGYdcvM=; h=To:Date:In-Reply-To:References:Subject:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To:Cc: From; b=GBpdKTAZUbetwC9pt3Xe9tPiUCtMjZyqNxnhvXRgfqqxXQvK+9fk2r0Z7C12uZZ4y beCW3QBwPyVOt4J/BP3y0JWhU1OOwZont5W5/ox34NmxkNjusFpL5kAup5HayUaGza EoH6nDa1XzUdfJ2CmuVZHnleWID1DjEuZwxJM9Ak= Received: from mail-lf1-f48.google.com (mail-lf1-f48.google.com [209.85.167.48]) (using TLSv1.3 with cipher TLS_AES_128_GCM_SHA256 (128/128 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 68A036E225 for ; Thu, 24 Feb 2022 23:19:19 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 68A036E225 Received: by mail-lf1-f48.google.com with SMTP id i11so5853308lfu.3 for ; Thu, 24 Feb 2022 12:19:19 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=/4jElPNjR3XBx8znnusTLiv5zd5hENYmj4q6MxXG+Lc=; b=lR+NaxMdT+SgyvLPXN6rOYcx6aN5HmOg0VXFOEE4cBXvJQVR+d2bDFBvzV2ACX4o3C T2XmzPeL5bzClAfLGSv362JGdtbIJyUT7F7BKUQuFlUj1fXQqX3SX/SVCRlB70SGaSxY RrI1Ap1zcktxyEiTi8zkrxPEKnucQuYuPe9QugtV0/GGXTiexWU7dr9Hq8HVgqsbu0nU 1Z1y8pg5Nj2KxEkaudTB0SFW9PxcuCbTQUVxjXE8G0EXuQOBkTniaHFjJcNQ59TDUwr7 0BC5oL30mysZAh/eJNg9AnHBWuahoRoEs6LVIJCYWH0uv1is37fo5h/DDghjXtjizpgk /xAA== X-Gm-Message-State: AOAM533+8bb8mGOqwJs2FKpS5lPZ3Q3IOX/fXfyxQXfquL7lZ0vQRXFH o44RWZLTlVPebgtPDy3AcugNV+3VU3A= X-Google-Smtp-Source: ABdhPJw9fuh0bfiT4Js6mLihH1xbdKp0/34X06MIJi2IRNqpH5DpPHCifWAtLtzDQG3iAdUMFHfvAA== X-Received: by 2002:a05:6512:22d1:b0:441:3024:ddbf with SMTP id g17-20020a05651222d100b004413024ddbfmr2671545lfu.474.1645733958390; Thu, 24 Feb 2022 12:19:18 -0800 (PST) Received: from grain.localdomain ([5.18.251.97]) by smtp.gmail.com with ESMTPSA id p7-20020ac24ec7000000b00443d65ea161sm15248lfr.291.2022.02.24.12.19.17 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Thu, 24 Feb 2022 12:19:17 -0800 (PST) Received: by grain.localdomain (Postfix, from userid 1000) id 423615A0023; Thu, 24 Feb 2022 23:18:42 +0300 (MSK) To: tml Date: Thu, 24 Feb 2022 23:18:41 +0300 Message-Id: <20220224201841.412565-4-gorcunov@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220224201841.412565-1-gorcunov@gmail.com> References: <20220224201841.412565-1-gorcunov@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH v30 3/3] test: add gh-6036-qsync-order test X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Cyrill Gorcunov via Tarantool-patches Reply-To: Cyrill Gorcunov Cc: Vladislav Shpilevoy Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" To test that promotion requests are handled only when appropriate write to WAL completes, because we update memory data before the write finishes. Part-of #6036 Signed-off-by: Cyrill Gorcunov --- .../gh_6036_qsync_order_test.lua | 157 ++++++++++++++++++ test/replication-luatest/suite.ini | 1 + 2 files changed, 158 insertions(+) create mode 100644 test/replication-luatest/gh_6036_qsync_order_test.lua diff --git a/test/replication-luatest/gh_6036_qsync_order_test.lua b/test/replication-luatest/gh_6036_qsync_order_test.lua new file mode 100644 index 000000000..95ed3a517 --- /dev/null +++ b/test/replication-luatest/gh_6036_qsync_order_test.lua @@ -0,0 +1,157 @@ +local t = require('luatest') +local cluster = require('test.luatest_helpers.cluster') +local server = require('test.luatest_helpers.server') +local fiber = require('fiber') + +local g = t.group('gh-6036') + +g.before_each(function(cg) + cg.cluster = cluster:new({}) + + local box_cfg = { + replication = { + server.build_instance_uri('r1'), + server.build_instance_uri('r2'), + server.build_instance_uri('r3'), + }, + replication_timeout = 0.1, + replication_connect_quorum = 1, + election_mode = 'manual', + election_timeout = 0.1, + replication_synchro_quorum = 1, + replication_synchro_timeout = 0.1, + log_level = 6, + } + + cg.r1 = cg.cluster:build_server({ alias = 'r1', box_cfg = box_cfg }) + cg.r2 = cg.cluster:build_server({ alias = 'r2', box_cfg = box_cfg }) + cg.r3 = cg.cluster:build_server({ alias = 'r3', box_cfg = box_cfg }) + + cg.cluster:add_server(cg.r1) + cg.cluster:add_server(cg.r2) + cg.cluster:add_server(cg.r3) + cg.cluster:start() +end) + +g.after_each(function(cg) + cg.cluster:drop() + cg.cluster.servers = nil +end) + +g.test_qsync_order = function(cg) + cg.cluster:wait_fullmesh() + + -- + -- Create a synchro space on the r1 node and make + -- sure the write processed just fine. + cg.r1:exec(function() + box.ctl.promote() + box.ctl.wait_rw() + local s = box.schema.create_space('test', {is_sync = true}) + s:create_index('pk') + s:insert{1} + end) + + local vclock = cg.r1:get_vclock() + vclock[0] = nil + cg.r2:wait_vclock(vclock) + cg.r3:wait_vclock(vclock) + + t.assert_equals(cg.r1:eval("return box.space.test:select()"), {{1}}) + t.assert_equals(cg.r2:eval("return box.space.test:select()"), {{1}}) + t.assert_equals(cg.r3:eval("return box.space.test:select()"), {{1}}) + + local function update_replication(...) + return (box.cfg{ replication = { ... } }) + end + + -- + -- Drop connection between r1 and r2. + cg.r1:exec(update_replication, { + server.build_instance_uri("r1"), + server.build_instance_uri("r3"), + }) + + -- + -- Drop connection between r2 and r1. + cg.r2:exec(update_replication, { + server.build_instance_uri("r2"), + server.build_instance_uri("r3"), + }) + + -- + -- Here we have the following scheme + -- + -- r3 (WAL delay) + -- / \ + -- r1 r2 + -- + + -- + -- Initiate disk delay in a bit tricky way: the next write will + -- fall into forever sleep. + cg.r3:eval("box.error.injection.set('ERRINJ_WAL_DELAY', true)") + + -- + -- Make r2 been a leader and start writting data, the PROMOTE + -- request get queued on r3 and not yet processed, same time + -- the INSERT won't complete either waiting for the PROMOTE + -- completion first. Note that we enter r3 as well just to be + -- sure the PROMOTE has reached it via queue state test. + cg.r2:exec(function() + box.ctl.promote() + box.ctl.wait_rw() + end) + t.helpers.retrying({}, function() + assert(cg.r3:exec(function() + return box.info.synchro.queue.latched == true + end)) + end) + cg.r2:eval("box.space.test:insert{2}") + + -- + -- The r1 node has no clue that there is a new leader and continue + -- writing data with obsolete term. Since r3 is delayed now + -- the INSERT won't proceed yet but get queued. + cg.r1:eval("box.space.test:insert{3}") + + -- + -- Finally enable r3 back. Make sure the data from new r2 leader get + -- writing while old leader's data ignored. + cg.r3:eval("box.error.injection.set('ERRINJ_WAL_DELAY', false)") + t.helpers.retrying({}, function() + assert(cg.r3:exec(function() + return box.space.test:get{2} ~= nil + end)) + end) + + t.assert_equals(cg.r3:eval("return box.space.test:select()"), {{1},{2}}) + + -- + -- Make sure that while we're processing PROMOTE no other records + -- get sneaked in via applier code from other replicas. For this + -- sake initiate voting and stop inside wal thread just before + -- PROMOTE get written. Another replica sends us new record and + -- it should be dropped. + cg.r1:exec(function() + box.ctl.promote() + box.ctl.wait_rw() + end) + vclock = cg.r1:get_vclock() + vclock[0] = nil + cg.r2:wait_vclock(vclock) + cg.r3:wait_vclock(vclock) + + cg.r3:exec(function() + box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 2) + require('fiber').create(function() box.ctl.promote() end) + end) + cg.r1:eval("box.space.test:insert{4}") + cg.r3:exec(function() + assert(box.info.synchro.queue.latched == true) + box.error.injection.set('ERRINJ_WAL_DELAY', false) + box.ctl.wait_rw() + end) + + t.assert_equals(cg.r3:eval("return box.space.test:select()"), {{1},{2}}) +end diff --git a/test/replication-luatest/suite.ini b/test/replication-luatest/suite.ini index 374f1b87a..07ec93a52 100644 --- a/test/replication-luatest/suite.ini +++ b/test/replication-luatest/suite.ini @@ -2,3 +2,4 @@ core = luatest description = replication luatests is_parallel = True +release_disabled = gh_6036_qsync_order_test.lua -- 2.35.1