[Tarantool-patches] [PATCH v1 1/2] test: cleanup replication/gc

Alexander V. Tikhonov avtikhon at tarantool.org
Tue Oct 29 13:21:39 MSK 2019


From: Vladimir Davydov <vdavydov.dev at gmail.com>

 - Before checking that old WAL files have been removed, wait for the
   garabe collection to remove them to avoid a spurious test failure.
   Currently, the test waits until old checkpoints are removed, but the
   garbage collector can now remove checkpoints, but keep WAL files.
   This is a follow-up for commit 9c5d851d7830 ("replication: remove old
   snapshot files not needed by replicas").
 - Remove a few pointless box.info.gc().checkpoints checks.
 - Use test_run.wait_cond for waiting instead of while-do-sleep loops.

(cherry picked from commit 84c7d0f723655b445ac04ed52682270f06a59f50)
---
 test/replication/gc.result   | 44 ++++++++++--------------------------
 test/replication/gc.test.lua | 23 ++++++-------------
 2 files changed, 19 insertions(+), 48 deletions(-)

diff --git a/test/replication/gc.result b/test/replication/gc.result
index 5922c7d0e..5b44284bf 100644
--- a/test/replication/gc.result
+++ b/test/replication/gc.result
@@ -27,9 +27,6 @@ default_checkpoint_count = box.cfg.checkpoint_count
 box.cfg{checkpoint_count = 1}
 ---
 ...
-function wait_gc(n) while #box.info.gc().checkpoints > n do fiber.sleep(0.01) end end
----
-...
 -- Grant permissions needed for replication.
 box.schema.user.grant('guest', 'replication')
 ---
@@ -99,11 +96,9 @@ test_run:cmd("switch replica")
 ---
 - true
 ...
-fiber = require('fiber')
----
-...
-while box.space.test:count() < 200 do fiber.sleep(0.01) end
+test_run:wait_cond(function() return box.space.test:count() == 200 end, 10)
 ---
+- true
 ...
 box.space.test:count()
 ---
@@ -115,8 +110,9 @@ test_run:cmd("switch default")
 ...
 -- Check that garbage collection removed the snapshot once
 -- the replica released the corresponding checkpoint.
-wait_gc(1)
+test_run:wait_cond(function() return #box.info.gc().checkpoints == 1 end, 10)
 ---
+- true
 ...
 #box.info.gc().checkpoints == 1 or box.info.gc()
 ---
@@ -175,8 +171,9 @@ test_run:cmd("switch replica")
 ---
 - true
 ...
-while box.space.test:count() < 300 do fiber.sleep(0.01) end
+test_run:wait_cond(function() return box.space.test:count() == 300 end, 10)
 ---
+- true
 ...
 box.space.test:count()
 ---
@@ -188,10 +185,7 @@ test_run:cmd("switch default")
 ...
 -- Now garbage collection should resume and delete files left
 -- from the old checkpoint.
-wait_gc(1)
----
-...
-#box.info.gc().checkpoints == 1 or box.info.gc()
+test_run:wait_cond(function() return #fio.glob('./master/*.xlog') == 0 end, 10)
 ---
 - true
 ...
@@ -261,11 +255,9 @@ test_run:cmd("switch replica")
 ---
 - true
 ...
-fiber = require('fiber')
----
-...
-while box.space.test:count() < 310 do fiber.sleep(0.01) end
+test_run:wait_cond(function() return box.space.test:count() == 310 end, 10)
 ---
+- true
 ...
 box.space.test:count()
 ---
@@ -276,10 +268,7 @@ test_run:cmd("switch default")
 - true
 ...
 -- Now it's safe to drop the old xlog.
-wait_gc(1)
----
-...
-#box.info.gc().checkpoints == 1 or box.info.gc()
+test_run:wait_cond(function() return #fio.glob('./master/*.xlog') == 1 end, 10)
 ---
 - true
 ...
@@ -334,10 +323,6 @@ xlog_count == 3 or xlog_count == 2 or fio.listdir('./master')
 test_run:cleanup_cluster()
 ---
 ...
-#box.info.gc().checkpoints == 1 or box.info.gc()
----
-- true
-...
 #fio.glob('./master/*.xlog') == 1 or fio.listdir('./master')
 ---
 - true
@@ -381,9 +366,6 @@ replica_set.drop_all(test_run)
 fio = require('fio')
 ---
 ...
-fiber = require('fiber')
----
-...
 -- Start a replica and set it up as a master for this instance.
 test_run:cmd("start server replica")
 ---
@@ -442,11 +424,9 @@ box.snapshot()
 ---
 - ok
 ...
-t = fiber.time()
----
-...
-while #fio.glob('./master/*xlog') > 0 and fiber.time() - t < 10 do fiber.sleep(0.01) end
+test_run:wait_cond(function() return #fio.glob('./master/*.xlog') == 0 end, 10)
 ---
+- true
 ...
 #fio.glob('./master/*.xlog') == 0 or fio.listdir('./master')
 ---
diff --git a/test/replication/gc.test.lua b/test/replication/gc.test.lua
index f19fd2e7c..fee1fe968 100644
--- a/test/replication/gc.test.lua
+++ b/test/replication/gc.test.lua
@@ -11,8 +11,6 @@ test_run:cmd("create server replica with rpl_master=default, script='replication
 default_checkpoint_count = box.cfg.checkpoint_count
 box.cfg{checkpoint_count = 1}
 
-function wait_gc(n) while #box.info.gc().checkpoints > n do fiber.sleep(0.01) end end
-
 -- Grant permissions needed for replication.
 box.schema.user.grant('guest', 'replication')
 
@@ -53,14 +51,13 @@ test_run:cmd("start server replica")
 -- bootstrapped from, the replica should still receive all
 -- data from the master. Check it.
 test_run:cmd("switch replica")
-fiber = require('fiber')
-while box.space.test:count() < 200 do fiber.sleep(0.01) end
+test_run:wait_cond(function() return box.space.test:count() == 200 end, 10)
 box.space.test:count()
 test_run:cmd("switch default")
 
 -- Check that garbage collection removed the snapshot once
 -- the replica released the corresponding checkpoint.
-wait_gc(1)
+test_run:wait_cond(function() return #box.info.gc().checkpoints == 1 end, 10)
 #box.info.gc().checkpoints == 1 or box.info.gc()
 #fio.glob('./master/*.xlog') == 1 or fio.listdir('./master')
 -- Make sure the replica will receive data it is subscribed
@@ -88,14 +85,13 @@ box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0)
 
 -- Check that the replica received all data from the master.
 test_run:cmd("switch replica")
-while box.space.test:count() < 300 do fiber.sleep(0.01) end
+test_run:wait_cond(function() return box.space.test:count() == 300 end, 10)
 box.space.test:count()
 test_run:cmd("switch default")
 
 -- Now garbage collection should resume and delete files left
 -- from the old checkpoint.
-wait_gc(1)
-#box.info.gc().checkpoints == 1 or box.info.gc()
+test_run:wait_cond(function() return #fio.glob('./master/*.xlog') == 0 end, 10)
 #fio.glob('./master/*.xlog') == 0 or fio.listdir('./master')
 --
 -- Check that the master doesn't delete xlog files sent to the
@@ -124,13 +120,11 @@ box.cfg{replication = {}}
 test_run:cmd("restart server replica")
 -- Wait for the replica to catch up.
 test_run:cmd("switch replica")
-fiber = require('fiber')
-while box.space.test:count() < 310 do fiber.sleep(0.01) end
+test_run:wait_cond(function() return box.space.test:count() == 310 end, 10)
 box.space.test:count()
 test_run:cmd("switch default")
 -- Now it's safe to drop the old xlog.
-wait_gc(1)
-#box.info.gc().checkpoints == 1 or box.info.gc()
+test_run:wait_cond(function() return #fio.glob('./master/*.xlog') == 1 end, 10)
 #fio.glob('./master/*.xlog') == 1 or fio.listdir('./master')
 -- Stop the replica.
 test_run:cmd("stop server replica")
@@ -155,7 +149,6 @@ xlog_count == 3 or xlog_count == 2 or fio.listdir('./master')
 -- The xlog should only be deleted after the replica
 -- is unregistered.
 test_run:cleanup_cluster()
-#box.info.gc().checkpoints == 1 or box.info.gc()
 #fio.glob('./master/*.xlog') == 1 or fio.listdir('./master')
 --
 -- Test that concurrent invocation of the garbage collector works fine.
@@ -179,7 +172,6 @@ replica_set.drop_all(test_run)
 -- a replication master (gh-3546).
 --
 fio = require('fio')
-fiber = require('fiber')
 
 -- Start a replica and set it up as a master for this instance.
 test_run:cmd("start server replica")
@@ -202,8 +194,7 @@ box.snapshot()
 -- all xlog files are removed.
 test_run:cleanup_cluster()
 box.snapshot()
-t = fiber.time()
-while #fio.glob('./master/*xlog') > 0 and fiber.time() - t < 10 do fiber.sleep(0.01) end
+test_run:wait_cond(function() return #fio.glob('./master/*.xlog') == 0 end, 10)
 #fio.glob('./master/*.xlog') == 0 or fio.listdir('./master')
 
 -- Restore the config.
-- 
2.17.1



More information about the Tarantool-patches mailing list