Tarantool development patches archive
 help / color / mirror / Atom feed
From: "Alexander V. Tikhonov" <avtikhon@tarantool.org>
To: Kirill Yukhin <kyukhin@tarantool.org>
Cc: Sergei Voronezhskii <sergw@tarantool.org>,
	tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH v1 12/12] test: errinj for pause relay_send
Date: Tue, 26 Nov 2019 09:21:48 +0300	[thread overview]
Message-ID: <af0504342f49baade185b360531bb1de66a96940.1574749278.git.avtikhon@tarantool.org> (raw)
In-Reply-To: <1c42ad20160f47d942cab405ce9896d6d31cc05f.1574749278.git.avtikhon@tarantool.org>
In-Reply-To: <1c42ad20160f47d942cab405ce9896d6d31cc05f.1574749278.git.avtikhon@tarantool.org>

From: Sergei Voronezhskii <sergw@tarantool.org>

This commit is the rest part of changes cherry picked from commit
1c34c91fa725ab254619d23c2f1d99f1e8269324. The initial part of changes
were cherry-picked at commit 8f2bd50105e62b0133032a717cfaa6f8fab26c29.

And lookup the xlog files in loop with a little sleep, until the file
count is not as expected.

Part of #3232

(cherry picked from commit 1c34c91fa725ab254619d23c2f1d99f1e8269324)
---
 test/replication/gc.result   | 86 ++++++++++++++++++++----------------
 test/replication/gc.test.lua | 62 ++++++++++++++------------
 2 files changed, 82 insertions(+), 66 deletions(-)

diff --git a/test/replication/gc.result b/test/replication/gc.result
index 5d55403b0..050a6100c 100644
--- a/test/replication/gc.result
+++ b/test/replication/gc.result
@@ -27,6 +27,28 @@ default_checkpoint_count = box.cfg.checkpoint_count
 box.cfg{checkpoint_count = 1}
 ---
 ...
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function wait_gc(n)
+    return test_run:wait_cond(function()
+        return #box.info.gc().checkpoints == n
+    end, 10)
+end;
+---
+...
+function wait_xlog(n, timeout)
+    return test_run:wait_cond(function()
+        return #fio.glob('./master/*.xlog') == n
+    end, 10)
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
 -- Grant permissions needed for replication.
 box.schema.user.grant('guest', 'replication')
 ---
@@ -63,14 +85,13 @@ for i = 1, 100 do s:auto_increment{} end
 ...
 -- Make sure replica join will take long enough for us to
 -- invoke garbage collection.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05)
+box.error.injection.set("ERRINJ_RELAY_SEND_DELAY", true)
 ---
 - ok
 ...
 -- While the replica is receiving the initial data set,
 -- make a snapshot and invoke garbage collection, then
--- remove the timeout injection so that we don't have to
--- wait too long for the replica to start.
+-- remove delay to allow replica to start.
 test_run:cmd("setopt delimiter ';'")
 ---
 - true
@@ -78,7 +99,7 @@ test_run:cmd("setopt delimiter ';'")
 fiber.create(function()
     fiber.sleep(0.1)
     box.snapshot()
-    box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0)
+    box.error.injection.set("ERRINJ_RELAY_SEND_DELAY", false)
 end)
 test_run:cmd("setopt delimiter ''");
 ---
@@ -110,21 +131,16 @@ test_run:cmd("switch default")
 ...
 -- Check that garbage collection removed the snapshot once
 -- the replica released the corresponding checkpoint.
-test_run:wait_cond(function() return #box.info.gc().checkpoints == 1 end, 10)
----
-- true
-...
-#box.info.gc().checkpoints == 1 or box.info.gc()
+wait_gc(1) or box.info.gc()
 ---
 - true
 ...
-#fio.glob('./master/*.xlog') == 1 or fio.listdir('./master')
+wait_xlog(1) or fio.listdir('./master') -- Make sure the replica will not receive data until
 ---
 - true
 ...
--- Make sure the replica will receive data it is subscribed
--- to long enough for us to invoke garbage collection.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05)
+-- we test garbage collection.
+box.error.injection.set("ERRINJ_RELAY_SEND_DELAY", true)
 ---
 - ok
 ...
@@ -152,17 +168,17 @@ box.snapshot()
 ---
 - ok
 ...
-#box.info.gc().checkpoints == 1 or box.info.gc()
+wait_gc(1) or box.info.gc()
 ---
 - true
 ...
-#fio.glob('./master/*.xlog') == 2 or fio.listdir('./master')
+wait_xlog(2) or fio.listdir('./master')
 ---
 - true
 ...
--- Remove the timeout injection so that the replica catches
+-- Resume replication so that the replica catches
 -- up quickly.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0)
+box.error.injection.set("ERRINJ_RELAY_SEND_DELAY", false)
 ---
 - ok
 ...
@@ -185,11 +201,11 @@ test_run:cmd("switch default")
 ...
 -- Now garbage collection should resume and delete files left
 -- from the old checkpoint.
-test_run:wait_cond(function() return #fio.glob('./master/*.xlog') == 0 end, 10)
+wait_gc(1) or box.info.gc()
 ---
 - true
 ...
-#fio.glob('./master/*.xlog') == 0 or fio.listdir('./master')
+wait_xlog(0) or fio.listdir('./master')
 ---
 - true
 ...
@@ -228,11 +244,11 @@ fiber.sleep(0.1) -- wait for master to relay data
 -- Garbage collection must not delete the old xlog file
 -- because it is still needed by the replica, but remove
 -- the old snapshot.
-#box.info.gc().checkpoints == 1 or box.info.gc()
+wait_gc(1) or box.info.gc()
 ---
 - true
 ...
-#fio.glob('./master/*.xlog') == 2 or fio.listdir('./master')
+wait_xlog(2) or fio.listdir('./master')
 ---
 - true
 ...
@@ -266,11 +282,11 @@ test_run:cmd("switch default")
 - true
 ...
 -- Now it's safe to drop the old xlog.
-test_run:wait_cond(function() return #fio.glob('./master/*.xlog') == 1 end, 10)
+wait_gc(1) or box.info.gc()
 ---
 - true
 ...
-#fio.glob('./master/*.xlog') == 1 or fio.listdir('./master')
+wait_xlog(1) or fio.listdir('./master')
 ---
 - true
 ...
@@ -302,17 +318,11 @@ box.snapshot()
 ---
 - ok
 ...
-#box.info.gc().checkpoints == 1 or box.info.gc()
+wait_gc(1) or box.info.gc()
 ---
 - true
 ...
-xlog_count = #fio.glob('./master/*.xlog')
----
-...
--- the replica may have managed to download all data
--- from xlog #1 before it was stopped, in which case
--- it's OK to collect xlog #1
-xlog_count == 3 or xlog_count == 2 or fio.listdir('./master')
+wait_xlog(2) or fio.listdir('./master')
 ---
 - true
 ...
@@ -321,7 +331,11 @@ xlog_count == 3 or xlog_count == 2 or fio.listdir('./master')
 test_run:cleanup_cluster()
 ---
 ...
-#fio.glob('./master/*.xlog') == 1 or fio.listdir('./master')
+wait_gc(1) or box.info.gc()
+---
+- true
+...
+wait_xlog(1) or fio.listdir('./master')
 ---
 - true
 ...
@@ -409,7 +423,7 @@ box.snapshot()
 ---
 - ok
 ...
-#fio.glob('./master/*.xlog') == 3 or fio.listdir('./master')
+wait_xlog(3) or fio.listdir('./master')
 ---
 - true
 ...
@@ -422,11 +436,7 @@ box.snapshot()
 ---
 - ok
 ...
-test_run:wait_cond(function() return #fio.glob('./master/*.xlog') == 0 end, 10)
----
-- true
-...
-#fio.glob('./master/*.xlog') == 0 or fio.listdir('./master')
+wait_xlog(0, 10) or fio.listdir('./master')
 ---
 - true
 ...
diff --git a/test/replication/gc.test.lua b/test/replication/gc.test.lua
index 40a349167..7cd18402c 100644
--- a/test/replication/gc.test.lua
+++ b/test/replication/gc.test.lua
@@ -11,6 +11,19 @@ test_run:cmd("create server replica with rpl_master=default, script='replication
 default_checkpoint_count = box.cfg.checkpoint_count
 box.cfg{checkpoint_count = 1}
 
+test_run:cmd("setopt delimiter ';'")
+function wait_gc(n)
+    return test_run:wait_cond(function()
+        return #box.info.gc().checkpoints == n
+    end, 10)
+end;
+function wait_xlog(n, timeout)
+    return test_run:wait_cond(function()
+        return #fio.glob('./master/*.xlog') == n
+    end, 10)
+end;
+test_run:cmd("setopt delimiter ''");
+
 -- Grant permissions needed for replication.
 box.schema.user.grant('guest', 'replication')
 
@@ -29,17 +42,16 @@ for i = 1, 100 do s:auto_increment{} end
 
 -- Make sure replica join will take long enough for us to
 -- invoke garbage collection.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05)
+box.error.injection.set("ERRINJ_RELAY_SEND_DELAY", true)
 
 -- While the replica is receiving the initial data set,
 -- make a snapshot and invoke garbage collection, then
--- remove the timeout injection so that we don't have to
--- wait too long for the replica to start.
+-- remove delay to allow replica to start.
 test_run:cmd("setopt delimiter ';'")
 fiber.create(function()
     fiber.sleep(0.1)
     box.snapshot()
-    box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0)
+    box.error.injection.set("ERRINJ_RELAY_SEND_DELAY", false)
 end)
 test_run:cmd("setopt delimiter ''");
 
@@ -57,12 +69,10 @@ test_run:cmd("switch default")
 
 -- Check that garbage collection removed the snapshot once
 -- the replica released the corresponding checkpoint.
-test_run:wait_cond(function() return #box.info.gc().checkpoints == 1 end, 10)
-#box.info.gc().checkpoints == 1 or box.info.gc()
-#fio.glob('./master/*.xlog') == 1 or fio.listdir('./master')
--- Make sure the replica will receive data it is subscribed
--- to long enough for us to invoke garbage collection.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05)
+wait_gc(1) or box.info.gc()
+wait_xlog(1) or fio.listdir('./master') -- Make sure the replica will not receive data until
+-- we test garbage collection.
+box.error.injection.set("ERRINJ_RELAY_SEND_DELAY", true)
 
 -- Send more data to the replica.
 -- Need to do 2 snapshots here, otherwise the replica would
@@ -76,12 +86,12 @@ box.snapshot()
 -- Invoke garbage collection. Check that it doesn't remove
 -- xlogs needed by the replica.
 box.snapshot()
-#box.info.gc().checkpoints == 1 or box.info.gc()
-#fio.glob('./master/*.xlog') == 2 or fio.listdir('./master')
+wait_gc(1) or box.info.gc()
+wait_xlog(2) or fio.listdir('./master')
 
--- Remove the timeout injection so that the replica catches
+-- Resume replication so that the replica catches
 -- up quickly.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0)
+box.error.injection.set("ERRINJ_RELAY_SEND_DELAY", false)
 
 -- Check that the replica received all data from the master.
 test_run:cmd("switch replica")
@@ -91,8 +101,8 @@ test_run:cmd("switch default")
 
 -- Now garbage collection should resume and delete files left
 -- from the old checkpoint.
-test_run:wait_cond(function() return #fio.glob('./master/*.xlog') == 0 end, 10)
-#fio.glob('./master/*.xlog') == 0 or fio.listdir('./master')
+wait_gc(1) or box.info.gc()
+wait_xlog(0) or fio.listdir('./master')
 --
 -- Check that the master doesn't delete xlog files sent to the
 -- replica until it receives a confirmation that the data has
@@ -124,8 +134,8 @@ test_run:wait_cond(function() return box.space.test:count() == 310 end, 10)
 box.space.test:count()
 test_run:cmd("switch default")
 -- Now it's safe to drop the old xlog.
-test_run:wait_cond(function() return #fio.glob('./master/*.xlog') == 1 end, 10)
-#fio.glob('./master/*.xlog') == 1 or fio.listdir('./master')
+wait_gc(1) or box.info.gc()
+wait_xlog(1) or fio.listdir('./master')
 -- Stop the replica.
 test_run:cmd("stop server replica")
 test_run:cmd("cleanup server replica")
@@ -139,17 +149,14 @@ _ = s:auto_increment{}
 box.snapshot()
 _ = s:auto_increment{}
 box.snapshot()
-#box.info.gc().checkpoints == 1 or box.info.gc()
-xlog_count = #fio.glob('./master/*.xlog')
--- the replica may have managed to download all data
--- from xlog #1 before it was stopped, in which case
--- it's OK to collect xlog #1
-xlog_count == 3 or xlog_count == 2 or fio.listdir('./master')
+wait_gc(1) or box.info.gc()
+wait_xlog(2) or fio.listdir('./master')
 
 -- The xlog should only be deleted after the replica
 -- is unregistered.
 test_run:cleanup_cluster()
-#fio.glob('./master/*.xlog') == 1 or fio.listdir('./master')
+wait_gc(1) or box.info.gc()
+wait_xlog(1) or fio.listdir('./master')
 --
 -- Test that concurrent invocation of the garbage collector works fine.
 --
@@ -188,14 +195,13 @@ _ = s:auto_increment{}
 box.snapshot()
 _ = s:auto_increment{}
 box.snapshot()
-#fio.glob('./master/*.xlog') == 3 or fio.listdir('./master')
+wait_xlog(3) or fio.listdir('./master')
 
 -- Delete the replica from the cluster table and check that
 -- all xlog files are removed.
 test_run:cleanup_cluster()
 box.snapshot()
-test_run:wait_cond(function() return #fio.glob('./master/*.xlog') == 0 end, 10)
-#fio.glob('./master/*.xlog') == 0 or fio.listdir('./master')
+wait_xlog(0, 10) or fio.listdir('./master')
 
 -- Restore the config.
 box.cfg{replication = {}}
-- 
2.17.1

  parent reply	other threads:[~2019-11-26  6:22 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-26  6:21 [Tarantool-patches] [PATCH v1 01/12] test: enable parallel mode for xlog tests Alexander V. Tikhonov
2019-11-26  6:21 ` [Tarantool-patches] [PATCH v1 02/12] test: enable parallel run for long test suites Alexander V. Tikhonov
2019-11-26  6:21 ` [Tarantool-patches] [PATCH v1 03/12] test: replication parallel mode on Alexander V. Tikhonov
2019-11-26  6:21 ` [Tarantool-patches] [PATCH v1 04/12] test: enable cleaning of a test environment Alexander V. Tikhonov
2019-11-26  6:21 ` [Tarantool-patches] [PATCH v1 05/12] test: allow to run replication/misc multiple times Alexander V. Tikhonov
2019-11-26  6:21 ` [Tarantool-patches] [PATCH v1 06/12] test: increase timeouts in replication/errinj Alexander V. Tikhonov
2019-11-26  6:21 ` [Tarantool-patches] [PATCH v1 07/12] test: wait for xlog/snap/log file changes Alexander V. Tikhonov
2019-11-26  6:21 ` [Tarantool-patches] [PATCH v1 08/12] test: use wait_cond to check follow status Alexander V. Tikhonov
2019-11-26  6:21 ` [Tarantool-patches] [PATCH v1 09/12] test: increase timeouts in replication/misc Alexander V. Tikhonov
2019-11-26  6:21 ` [Tarantool-patches] [PATCH v1 10/12] test: put require in proper places Alexander V. Tikhonov
2019-11-26  6:21 ` [Tarantool-patches] [PATCH v1 11/12] test: fix replication/gc flaky failures Alexander V. Tikhonov
2019-11-26  6:21 ` Alexander V. Tikhonov [this message]
2019-11-26  6:54 ` [Tarantool-patches] [PATCH v1 01/12] test: enable parallel mode for xlog tests Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=af0504342f49baade185b360531bb1de66a96940.1574749278.git.avtikhon@tarantool.org \
    --to=avtikhon@tarantool.org \
    --cc=kyukhin@tarantool.org \
    --cc=sergw@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v1 12/12] test: errinj for pause relay_send' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox