[tarantool-patches] [PATCH 4/4] test: wait for xlog/snap/log file changes

Alexander Turenko alexander.turenko at tarantool.org
Wed Apr 10 16:28:45 MSK 2019


From: Alexander Tikhonov <avtikhon at gmail.com>

When a system in under heavy load (say, when tests are run in parallel)
it is possible that disc writes stalls for some time. This can cause a
fail of a check that a test performs, so now we retry such checks during
60 seconds until a condition will be met.

This change targets replication test suite.
---
 test/replication/gc_no_space.result      | 18 ++++++++++--------
 test/replication/gc_no_space.test.lua    | 18 ++++++++++--------
 test/replication/replica_rejoin.result   | 10 +++++-----
 test/replication/replica_rejoin.test.lua |  6 +++---
 test/replication/sync.result             |  2 +-
 test/replication/sync.test.lua           |  2 +-
 6 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/test/replication/gc_no_space.result b/test/replication/gc_no_space.result
index b2d3e2075..e860ab00f 100644
--- a/test/replication/gc_no_space.result
+++ b/test/replication/gc_no_space.result
@@ -20,22 +20,24 @@ test_run:cmd("setopt delimiter ';'")
 ---
 - true
 ...
-function check_file_count(dir, glob, count)
-    local files = fio.glob(fio.pathjoin(dir, glob))
-    if #files == count then
-        return true
-    end
-    return false, files
+function wait_file_count(dir, glob, count)
+    return test_run:wait_cond(function()
+        local files = fio.glob(fio.pathjoin(dir, glob))
+        if #files == count then
+            return true
+        end
+        return false, files
+    end)
 end;
 ---
 ...
 function check_wal_count(count)
-    return check_file_count(box.cfg.wal_dir, '*.xlog', count)
+    return wait_file_count(box.cfg.wal_dir, '*.xlog', count)
 end;
 ---
 ...
 function check_snap_count(count)
-    return check_file_count(box.cfg.memtx_dir, '*.snap', count)
+    return wait_file_count(box.cfg.memtx_dir, '*.snap', count)
 end;
 ---
 ...
diff --git a/test/replication/gc_no_space.test.lua b/test/replication/gc_no_space.test.lua
index 6940996fe..98ccd401b 100644
--- a/test/replication/gc_no_space.test.lua
+++ b/test/replication/gc_no_space.test.lua
@@ -11,18 +11,20 @@ fio = require('fio')
 errinj = box.error.injection
 
 test_run:cmd("setopt delimiter ';'")
-function check_file_count(dir, glob, count)
-    local files = fio.glob(fio.pathjoin(dir, glob))
-    if #files == count then
-        return true
-    end
-    return false, files
+function wait_file_count(dir, glob, count)
+    return test_run:wait_cond(function()
+        local files = fio.glob(fio.pathjoin(dir, glob))
+        if #files == count then
+            return true
+        end
+        return false, files
+    end)
 end;
 function check_wal_count(count)
-    return check_file_count(box.cfg.wal_dir, '*.xlog', count)
+    return wait_file_count(box.cfg.wal_dir, '*.xlog', count)
 end;
 function check_snap_count(count)
-    return check_file_count(box.cfg.memtx_dir, '*.snap', count)
+    return wait_file_count(box.cfg.memtx_dir, '*.snap', count)
 end;
 test_run:cmd("setopt delimiter ''");
 
diff --git a/test/replication/replica_rejoin.result b/test/replication/replica_rejoin.result
index 87d626e20..0a617c314 100644
--- a/test/replication/replica_rejoin.result
+++ b/test/replication/replica_rejoin.result
@@ -102,9 +102,9 @@ _ = box.space.test:insert{30}
 fio = require('fio')
 ---
 ...
-#fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) -- 1
+test_run:wait_cond(function() return #fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) == 1 end) or fio.pathjoin(box.cfg.wal_dir, '*.xlog')
 ---
-- 1
+- true
 ...
 box.cfg{checkpoint_count = checkpoint_count}
 ---
@@ -203,9 +203,9 @@ for i = 1, 3 do box.space.test:insert{i * 100} end
 fio = require('fio')
 ---
 ...
-#fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) -- 1
+test_run:wait_cond(function() return #fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) == 1 end) or fio.pathjoin(box.cfg.wal_dir, '*.xlog')
 ---
-- 1
+- true
 ...
 box.cfg{checkpoint_count = checkpoint_count}
 ---
@@ -330,7 +330,7 @@ box.cfg{checkpoint_count = default_checkpoint_count}
 fio = require('fio')
 ---
 ...
-#fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) == 1
+test_run:wait_cond(function() return #fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) == 1 end) or fio.pathjoin(box.cfg.wal_dir, '*.xlog')
 ---
 - true
 ...
diff --git a/test/replication/replica_rejoin.test.lua b/test/replication/replica_rejoin.test.lua
index 9bf43eff8..603ef4d15 100644
--- a/test/replication/replica_rejoin.test.lua
+++ b/test/replication/replica_rejoin.test.lua
@@ -40,7 +40,7 @@ box.snapshot()
 _ = box.space.test:delete{3}
 _ = box.space.test:insert{30}
 fio = require('fio')
-#fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) -- 1
+test_run:wait_cond(function() return #fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) == 1 end) or fio.pathjoin(box.cfg.wal_dir, '*.xlog')
 box.cfg{checkpoint_count = checkpoint_count}
 
 -- Restart the replica. Since xlogs have been removed,
@@ -76,7 +76,7 @@ for i = 1, 3 do box.space.test:delete{i * 10} end
 box.snapshot()
 for i = 1, 3 do box.space.test:insert{i * 100} end
 fio = require('fio')
-#fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) -- 1
+test_run:wait_cond(function() return #fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) == 1 end) or fio.pathjoin(box.cfg.wal_dir, '*.xlog')
 box.cfg{checkpoint_count = checkpoint_count}
 test_run:cmd("start server replica")
 test_run:cmd("switch replica")
@@ -121,7 +121,7 @@ box.cfg{checkpoint_count = 1}
 box.snapshot()
 box.cfg{checkpoint_count = default_checkpoint_count}
 fio = require('fio')
-#fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) == 1
+test_run:wait_cond(function() return #fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) == 1 end) or fio.pathjoin(box.cfg.wal_dir, '*.xlog')
 -- Bump vclock on the replica again.
 test_run:cmd("switch replica")
 for i = 1, 10 do box.space.test:replace{2} end
diff --git a/test/replication/sync.result b/test/replication/sync.result
index b34501dae..eddc7cbc8 100644
--- a/test/replication/sync.result
+++ b/test/replication/sync.result
@@ -298,7 +298,7 @@ box.info.replication[1].upstream.status -- follow
 ---
 - follow
 ...
-test_run:grep_log('replica', 'ER_CFG.*')
+test_run:wait_log("replica", "ER_CFG.*", nil, 200)
 ---
 - 'ER_CFG: Incorrect value for option ''replication'': duplicate connection with the
   same replica UUID'
diff --git a/test/replication/sync.test.lua b/test/replication/sync.test.lua
index cae97a26f..52ce88fe2 100644
--- a/test/replication/sync.test.lua
+++ b/test/replication/sync.test.lua
@@ -154,7 +154,7 @@ box.cfg{replication = replication}
 box.info.status -- running
 box.info.ro -- false
 box.info.replication[1].upstream.status -- follow
-test_run:grep_log('replica', 'ER_CFG.*')
+test_run:wait_log("replica", "ER_CFG.*", nil, 200)
 
 test_run:cmd("switch default")
 test_run:cmd("stop server replica")
-- 
2.20.1





More information about the Tarantool-patches mailing list