[PATCH] test: enable parallel mode for replication tests

Sergei Voronezhskii sergw at tarantool.org
Wed Oct 3 17:50:57 MSK 2018


- at the end of tests which create any replication config need to call
`test_run:clenup_cluster()` which clears `box.space._cluster`
- switch on `use_unix_sockets` because of 'Address already in use'
problems
- instead of just checking `box.space.test:count()` or
`#fio.glob('./master/*.xlog')` need to wait for values because of
increading load in replication process
- for `catch` test use new errinj for just pause `relay_send` to
check read-only state of replica

Part of #2436, #3232
---
BRANCH: https://github.com/tarantool/tarantool/tree/sergw/enable-parallel-test-replication
 src/box/relay.cc                            |  7 ++-
 src/errinj.h                                |  1 +
 test/replication/before_replace.result      |  3 +
 test/replication/before_replace.test.lua    |  1 +
 test/replication/catch.result               | 50 ++++++++--------
 test/replication/catch.test.lua             | 39 ++++++------
 test/replication/gc.result                  | 24 ++++----
 test/replication/gc.test.lua                | 18 +++---
 test/replication/local_spaces.result        |  3 +
 test/replication/local_spaces.test.lua      |  1 +
 test/replication/misc.result                | 66 +++++++++++++++------
 test/replication/misc.test.lua              | 45 ++++++++------
 test/replication/on_replace.result          | 13 ++++
 test/replication/on_replace.test.lua        |  4 ++
 test/replication/once.result                | 12 ++++
 test/replication/once.test.lua              |  3 +
 test/replication/quorum.result              |  9 ++-
 test/replication/quorum.test.lua            |  7 ++-
 test/replication/replica_rejoin.result      |  7 +++
 test/replication/replica_rejoin.test.lua    |  2 +
 test/replication/skip_conflict_row.result   |  7 +++
 test/replication/skip_conflict_row.test.lua |  2 +
 test/replication/status.result              |  7 +++
 test/replication/status.test.lua            |  2 +
 test/replication/suite.ini                  |  3 +-
 test/replication/sync.result                |  7 +++
 test/replication/sync.test.lua              |  2 +
 test/replication/wal_off.result             |  7 +++
 test/replication/wal_off.test.lua           |  2 +
 29 files changed, 242 insertions(+), 112 deletions(-)

diff --git a/src/box/relay.cc b/src/box/relay.cc
index c90383d4a..c7fd53cb4 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -622,12 +622,17 @@ relay_subscribe(struct replica *replica, int fd, uint64_t sync,
 static void
 relay_send(struct relay *relay, struct xrow_header *packet)
 {
+    struct errinj *inj = errinj(ERRINJ_RELAY_STOP_SEND, ERRINJ_BOOL);
+    while (inj->bparam) {
+        fiber_sleep(0.01);
+        inj = errinj(ERRINJ_RELAY_STOP_SEND, ERRINJ_BOOL);
+    }
 	packet->sync = relay->sync;
 	relay->last_row_tm = ev_monotonic_now(loop());
 	coio_write_xrow(&relay->io, packet);
 	fiber_gc();
 
-	struct errinj *inj = errinj(ERRINJ_RELAY_TIMEOUT, ERRINJ_DOUBLE);
+	inj = errinj(ERRINJ_RELAY_TIMEOUT, ERRINJ_DOUBLE);
 	if (inj != NULL && inj->dparam > 0)
 		fiber_sleep(inj->dparam);
 }
diff --git a/src/errinj.h b/src/errinj.h
index 84a1fbb5e..eaac24f5d 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -94,6 +94,7 @@ struct errinj {
 	_(ERRINJ_VY_GC, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_VY_LOG_FLUSH, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_VY_LOG_FLUSH_DELAY, ERRINJ_BOOL, {.bparam = false}) \
+	_(ERRINJ_RELAY_STOP_SEND, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_RELAY_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
 	_(ERRINJ_RELAY_REPORT_INTERVAL, ERRINJ_DOUBLE, {.dparam = 0}) \
 	_(ERRINJ_RELAY_FINAL_SLEEP, ERRINJ_BOOL, {.bparam = false}) \
diff --git a/test/replication/before_replace.result b/test/replication/before_replace.result
index 858a52de6..87973b6d1 100644
--- a/test/replication/before_replace.result
+++ b/test/replication/before_replace.result
@@ -223,3 +223,6 @@ test_run:cmd("switch default")
 test_run:drop_cluster(SERVERS)
 ---
 ...
+test_run:cleanup_cluster()
+---
+...
diff --git a/test/replication/before_replace.test.lua b/test/replication/before_replace.test.lua
index f1e590703..bcd264521 100644
--- a/test/replication/before_replace.test.lua
+++ b/test/replication/before_replace.test.lua
@@ -80,3 +80,4 @@ box.space.test:select()
 -- Cleanup.
 test_run:cmd("switch default")
 test_run:drop_cluster(SERVERS)
+test_run:cleanup_cluster()
diff --git a/test/replication/catch.result b/test/replication/catch.result
index aebba819f..6e8e17ac8 100644
--- a/test/replication/catch.result
+++ b/test/replication/catch.result
@@ -1,6 +1,9 @@
 env = require('test_run')
 ---
 ...
+fiber = require('fiber')
+---
+...
 test_run = env.new()
 ---
 ...
@@ -35,7 +38,7 @@ test_run:cmd("switch default")
 s = box.schema.space.create('test', {engine = engine});
 ---
 ...
--- vinyl does not support hash index
+-- Vinyl does not support hash index
 index = s:create_index('primary', {type = (engine == 'vinyl' and 'tree' or 'hash') })
 ---
 ...
@@ -57,14 +60,13 @@ test_run:cmd("stop server replica")
 ---
 - true
 ...
--- insert values on the master while replica is stopped and can't fetch them
-for i=1,100 do s:insert{i, 'this is test message12345'} end
+-- Insert values on the master while replica is stopped and can't fetch them.
+errinj.set('ERRINJ_RELAY_STOP_SEND', true)
 ---
+- ok
 ...
--- sleep after every tuple
-errinj.set("ERRINJ_RELAY_TIMEOUT", 1000.0)
+for i=1,100 do s:insert{i, 'this is test message12345'} end
 ---
-- ok
 ...
 test_run:cmd("start server replica with args='0.01'")
 ---
@@ -75,28 +77,25 @@ test_run:cmd("switch replica")
 - true
 ...
 -- Check that replica doesn't enter read-write mode before
--- catching up with the master: to check that we inject sleep into
--- the master relay_send function and attempt a data modifying
--- statement in replica while it's still fetching data from the
--- master.
--- In the next two cases we try to delete a tuple while replica is
+-- catching up with the master: to check that we stop sending
+-- rows on the master in relay_send function and attempt a data
+-- modifying statement in replica while it's still fetching data
+-- from the master.
+--
+-- In the next two cases we try to replace a tuple while replica is
 -- catching up with the master (local delete, remote delete) case
 --
--- #1: delete tuple on replica
+-- Case #1: replace tuple on replica locally.
 --
 box.space.test ~= nil
 ---
 - true
 ...
-d = box.space.test:delete{1}
+box.space.test:replace{1}
 ---
 - error: Can't modify data because this instance is in read-only mode.
 ...
-box.space.test:get(1) ~= nil
----
-- true
-...
--- case #2: delete tuple by net.box
+-- Case #2: replace tuple on replica by net.box.
 test_run:cmd("switch default")
 ---
 - true
@@ -108,20 +107,16 @@ test_run:cmd("set variable r_uri to 'replica.listen'")
 c = net_box.connect(r_uri)
 ---
 ...
-d = c.space.test:delete{1}
+d = c.space.test:replace{1}
 ---
 - error: Can't modify data because this instance is in read-only mode.
 ...
-c.space.test:get(1) ~= nil
----
-- true
-...
--- check sync
-errinj.set("ERRINJ_RELAY_TIMEOUT", 0)
+-- Resume replicaton
+errinj.set('ERRINJ_RELAY_STOP_SEND', false)
 ---
 - ok
 ...
--- cleanup
+-- Cleanup
 test_run:cmd("stop server replica")
 ---
 - true
@@ -130,6 +125,9 @@ test_run:cmd("cleanup server replica")
 ---
 - true
 ...
+test_run:cleanup_cluster()
+---
+...
 box.space.test:drop()
 ---
 ...
diff --git a/test/replication/catch.test.lua b/test/replication/catch.test.lua
index 8cc3242f7..90a5ce8f1 100644
--- a/test/replication/catch.test.lua
+++ b/test/replication/catch.test.lua
@@ -1,8 +1,8 @@
 env = require('test_run')
+fiber = require('fiber')
 test_run = env.new()
 engine = test_run:get_cfg('engine')
 
-
 net_box = require('net.box')
 errinj = box.error.injection
 
@@ -13,7 +13,7 @@ test_run:cmd("switch replica")
 
 test_run:cmd("switch default")
 s = box.schema.space.create('test', {engine = engine});
--- vinyl does not support hash index
+-- Vinyl does not support hash index
 index = s:create_index('primary', {type = (engine == 'vinyl' and 'tree' or 'hash') })
 
 test_run:cmd("switch replica")
@@ -22,43 +22,42 @@ while box.space.test == nil do fiber.sleep(0.01) end
 test_run:cmd("switch default")
 test_run:cmd("stop server replica")
 
--- insert values on the master while replica is stopped and can't fetch them
+-- Insert values on the master while replica is stopped and can't fetch them.
+errinj.set('ERRINJ_RELAY_STOP_SEND', true)
 for i=1,100 do s:insert{i, 'this is test message12345'} end
 
--- sleep after every tuple
-errinj.set("ERRINJ_RELAY_TIMEOUT", 1000.0)
-
 test_run:cmd("start server replica with args='0.01'")
 test_run:cmd("switch replica")
 
 -- Check that replica doesn't enter read-write mode before
--- catching up with the master: to check that we inject sleep into
--- the master relay_send function and attempt a data modifying
--- statement in replica while it's still fetching data from the
--- master.
--- In the next two cases we try to delete a tuple while replica is
+-- catching up with the master: to check that we stop sending
+-- rows on the master in relay_send function and attempt a data
+-- modifying statement in replica while it's still fetching data
+-- from the master.
+--
+-- In the next two cases we try to replace a tuple while replica is
 -- catching up with the master (local delete, remote delete) case
 --
--- #1: delete tuple on replica
+-- Case #1: replace tuple on replica locally.
 --
 box.space.test ~= nil
-d = box.space.test:delete{1}
-box.space.test:get(1) ~= nil
+box.space.test:replace{1}
 
--- case #2: delete tuple by net.box
+-- Case #2: replace tuple on replica by net.box.
 
 test_run:cmd("switch default")
 test_run:cmd("set variable r_uri to 'replica.listen'")
 c = net_box.connect(r_uri)
-d = c.space.test:delete{1}
-c.space.test:get(1) ~= nil
+d = c.space.test:replace{1}
+
+-- Resume replicaton
+errinj.set('ERRINJ_RELAY_STOP_SEND', false)
 
--- check sync
-errinj.set("ERRINJ_RELAY_TIMEOUT", 0)
 
--- cleanup
+-- Cleanup
 test_run:cmd("stop server replica")
 test_run:cmd("cleanup server replica")
+test_run:cleanup_cluster()
 box.space.test:drop()
 box.schema.user.revoke('guest', 'replication')
 
diff --git a/test/replication/gc.result b/test/replication/gc.result
index 83d0de293..46a02d0ab 100644
--- a/test/replication/gc.result
+++ b/test/replication/gc.result
@@ -1,6 +1,3 @@
-fio = require 'fio'
----
-...
 test_run = require('test_run').new()
 ---
 ...
@@ -13,6 +10,9 @@ replica_set = require('fast_replica')
 fiber = require('fiber')
 ---
 ...
+fio = require('fio')
+---
+...
 test_run:cleanup_cluster()
 ---
 ...
@@ -95,7 +95,7 @@ test_run:cmd("switch replica")
 fiber = require('fiber')
 ---
 ...
-while box.space.test:count() < 200 do fiber.sleep(0.01) end
+while box.space.test == nil or box.space.test:count() < 200 do fiber.sleep(0.01) end
 ---
 ...
 box.space.test:count()
@@ -119,9 +119,9 @@ wait_gc(1)
 ---
 - true
 ...
--- Make sure the replica will receive data it is subscribed
--- to long enough for us to invoke garbage collection.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05)
+-- Make sure the replica will not receive data until
+-- we test garbage collection.
+box.error.injection.set("ERRINJ_RELAY_STOP_SEND", true)
 ---
 - ok
 ...
@@ -153,13 +153,12 @@ box.snapshot()
 ---
 - true
 ...
-#fio.glob('./master/*.xlog') == 2 or fio.listdir('./master')
+while #fio.glob('./master/*.xlog') ~= 2 do fiber.sleep(0.01) end
 ---
-- true
 ...
--- Remove the timeout injection so that the replica catches
+-- Resume replicaton so that the replica catches
 -- up quickly.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0)
+box.error.injection.set("ERRINJ_RELAY_STOP_SEND", false)
 ---
 - ok
 ...
@@ -188,9 +187,8 @@ wait_gc(1)
 ---
 - true
 ...
-#fio.glob('./master/*.xlog') == 0 or fio.listdir('./master')
+while #fio.glob('./master/*.xlog') ~= 0 do fiber.sleep(0.01) end
 ---
-- true
 ...
 --
 -- Check that the master doesn't delete xlog files sent to the
diff --git a/test/replication/gc.test.lua b/test/replication/gc.test.lua
index eed76850c..eb7fee93c 100644
--- a/test/replication/gc.test.lua
+++ b/test/replication/gc.test.lua
@@ -1,8 +1,8 @@
-fio = require 'fio'
 test_run = require('test_run').new()
 engine = test_run:get_cfg('engine')
 replica_set = require('fast_replica')
 fiber = require('fiber')
+fio = require('fio')
 
 test_run:cleanup_cluster()
 
@@ -52,7 +52,7 @@ test_run:cmd("start server replica")
 -- data from the master. Check it.
 test_run:cmd("switch replica")
 fiber = require('fiber')
-while box.space.test:count() < 200 do fiber.sleep(0.01) end
+while box.space.test == nil or box.space.test:count() < 200 do fiber.sleep(0.01) end
 box.space.test:count()
 test_run:cmd("switch default")
 
@@ -61,9 +61,9 @@ test_run:cmd("switch default")
 wait_gc(1)
 #box.info.gc().checkpoints == 1 or box.info.gc()
 #fio.glob('./master/*.xlog') == 1 or fio.listdir('./master')
--- Make sure the replica will receive data it is subscribed
--- to long enough for us to invoke garbage collection.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05)
+-- Make sure the replica will not receive data until
+-- we test garbage collection.
+box.error.injection.set("ERRINJ_RELAY_STOP_SEND", true)
 
 -- Send more data to the replica.
 -- Need to do 2 snapshots here, otherwise the replica would
@@ -78,11 +78,11 @@ box.snapshot()
 -- xlogs needed by the replica.
 box.snapshot()
 #box.info.gc().checkpoints == 1 or box.info.gc()
-#fio.glob('./master/*.xlog') == 2 or fio.listdir('./master')
+while #fio.glob('./master/*.xlog') ~= 2 do fiber.sleep(0.01) end
 
--- Remove the timeout injection so that the replica catches
+-- Resume replicaton so that the replica catches
 -- up quickly.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0)
+box.error.injection.set("ERRINJ_RELAY_STOP_SEND", false)
 
 -- Check that the replica received all data from the master.
 test_run:cmd("switch replica")
@@ -94,7 +94,7 @@ test_run:cmd("switch default")
 -- from the old checkpoint.
 wait_gc(1)
 #box.info.gc().checkpoints == 1 or box.info.gc()
-#fio.glob('./master/*.xlog') == 0 or fio.listdir('./master')
+while #fio.glob('./master/*.xlog') ~= 0 do fiber.sleep(0.01) end
 --
 -- Check that the master doesn't delete xlog files sent to the
 -- replica until it receives a confirmation that the data has
diff --git a/test/replication/local_spaces.result b/test/replication/local_spaces.result
index 151735530..4de223261 100644
--- a/test/replication/local_spaces.result
+++ b/test/replication/local_spaces.result
@@ -216,6 +216,9 @@ test_run:cmd("cleanup server replica")
 ---
 - true
 ...
+test_run:cleanup_cluster()
+---
+...
 box.schema.user.revoke('guest', 'replication')
 ---
 ...
diff --git a/test/replication/local_spaces.test.lua b/test/replication/local_spaces.test.lua
index 06e2b0bd2..633cc9f1a 100644
--- a/test/replication/local_spaces.test.lua
+++ b/test/replication/local_spaces.test.lua
@@ -76,6 +76,7 @@ box.space.test3:select()
 test_run:cmd("switch default")
 test_run:cmd("stop server replica")
 test_run:cmd("cleanup server replica")
+test_run:cleanup_cluster()
 box.schema.user.revoke('guest', 'replication')
 
 s1:select()
diff --git a/test/replication/misc.result b/test/replication/misc.result
index f8aa8dab6..937ef1b24 100644
--- a/test/replication/misc.result
+++ b/test/replication/misc.result
@@ -88,6 +88,13 @@ test_run:cmd('cleanup server test')
 box.cfg{read_only = false}
 ---
 ...
+test_run:cmd('delete server test')
+---
+- true
+...
+test_run:cleanup_cluster()
+---
+...
 -- gh-3160 - Send heartbeats if there are changes from a remote master only
 SERVERS = { 'autobootstrap1', 'autobootstrap2', 'autobootstrap3' }
 ---
@@ -106,7 +113,7 @@ test_run:cmd("switch autobootstrap1")
 test_run = require('test_run').new()
 ---
 ...
-box.cfg{replication_timeout = 0.01, replication_connect_timeout=0.01}
+box.cfg{replication_timeout = 0.2, replication_connect_timeout=0.2}
 ---
 ...
 test_run:cmd("switch autobootstrap2")
@@ -116,7 +123,7 @@ test_run:cmd("switch autobootstrap2")
 test_run = require('test_run').new()
 ---
 ...
-box.cfg{replication_timeout = 0.01, replication_connect_timeout=0.01}
+box.cfg{replication_timeout = 0.2, replication_connect_timeout=0.2}
 ---
 ...
 test_run:cmd("switch autobootstrap3")
@@ -129,7 +136,7 @@ test_run = require('test_run').new()
 fiber=require('fiber')
 ---
 ...
-box.cfg{replication_timeout = 0.01, replication_connect_timeout=0.01}
+box.cfg{replication_timeout = 0.2, replication_connect_timeout=0.2}
 ---
 ...
 _ = box.schema.space.create('test_timeout'):create_index('pk')
@@ -140,15 +147,16 @@ test_run:cmd("setopt delimiter ';'")
 - true
 ...
 function test_timeout()
+    local replicaA = box.info.replication[1].upstream or box.info.replication[2].upstream
+    local replicaB = box.info.replication[3].upstream or box.info.replication[2].upstream
     for i = 0, 99 do 
         box.space.test_timeout:replace({1})
-        fiber.sleep(0.005)
-        local rinfo = box.info.replication
-        if rinfo[1].upstream and rinfo[1].upstream.status ~= 'follow' or
-           rinfo[2].upstream and rinfo[2].upstream.status ~= 'follow' or
-           rinfo[3].upstream and rinfo[3].upstream.status ~= 'follow' then
-            return error('Replication broken')
-        end
+        local n = 200
+        repeat
+            fiber.sleep(0.001)
+            n = n - 1
+            if n == 0 then return error(box.info.replication) end
+        until replicaA.status == 'follow' and replicaB.status == 'follow'
     end
     return true
 end ;
@@ -158,6 +166,7 @@ test_run:cmd("setopt delimiter ''");
 ---
 - true
 ...
+-- the replica status is checked 100 times, each check within replication_timeout
 test_timeout()
 ---
 - true
@@ -229,6 +238,9 @@ test_run:cmd("switch default")
 test_run:drop_cluster(SERVERS)
 ---
 ...
+test_run:cleanup_cluster()
+---
+...
 -- gh-3642 - Check that socket file descriptor doesn't leak
 -- when a replica is disconnected.
 rlimit = require('rlimit')
@@ -249,15 +261,15 @@ lim.rlim_cur = 64
 rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
 ---
 ...
-test_run:cmd('create server sock with rpl_master=default, script="replication/replica.lua"')
+test_run:cmd('create server bork with rpl_master=default, script="replication/replica.lua"')
 ---
 - true
 ...
-test_run:cmd(string.format('start server sock'))
+test_run:cmd('start server bork')
 ---
 - true
 ...
-test_run:cmd('switch sock')
+test_run:cmd('switch bork')
 ---
 - true
 ...
@@ -299,14 +311,21 @@ lim.rlim_cur = old_fno
 rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
 ---
 ...
-test_run:cmd('stop server sock')
+test_run:cmd("stop server bork")
+---
+- true
+...
+test_run:cmd("cleanup server bork")
 ---
 - true
 ...
-test_run:cmd('cleanup server sock')
+test_run:cmd("delete server bork")
 ---
 - true
 ...
+test_run:cleanup_cluster()
+---
+...
 box.schema.user.revoke('guest', 'replication')
 ---
 ...
@@ -342,6 +361,17 @@ test_run:cmd('cleanup server er_load2')
 ---
 - true
 ...
+test_run:cmd('delete server er_load1')
+---
+- true
+...
+test_run:cmd('delete server er_load2')
+---
+- true
+...
+test_run:cleanup_cluster()
+---
+...
 --
 -- Test case for gh-3637. Before the fix replica would exit with
 -- an error. Now check that we don't hang and successfully connect.
@@ -349,9 +379,6 @@ test_run:cmd('cleanup server er_load2')
 fiber = require('fiber')
 ---
 ...
-test_run:cleanup_cluster()
----
-...
 test_run:cmd("create server replica_auth with rpl_master=default, script='replication/replica_auth.lua'")
 ---
 - true
@@ -391,6 +418,9 @@ test_run:cmd("delete server replica_auth")
 ---
 - true
 ...
+test_run:cleanup_cluster()
+---
+...
 box.schema.user.drop('cluster')
 ---
 ...
diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua
index 46726b7f4..cb658f6d0 100644
--- a/test/replication/misc.test.lua
+++ b/test/replication/misc.test.lua
@@ -32,6 +32,8 @@ test_run:cmd(string.format('start server test with args="%s"', replica_uuid))
 test_run:cmd('stop server test')
 test_run:cmd('cleanup server test')
 box.cfg{read_only = false}
+test_run:cmd('delete server test')
+test_run:cleanup_cluster()
 
 -- gh-3160 - Send heartbeats if there are changes from a remote master only
 SERVERS = { 'autobootstrap1', 'autobootstrap2', 'autobootstrap3' }
@@ -41,30 +43,32 @@ test_run:create_cluster(SERVERS, "replication", {args="0.1"})
 test_run:wait_fullmesh(SERVERS)
 test_run:cmd("switch autobootstrap1")
 test_run = require('test_run').new()
-box.cfg{replication_timeout = 0.01, replication_connect_timeout=0.01}
+box.cfg{replication_timeout = 0.2, replication_connect_timeout=0.2}
 test_run:cmd("switch autobootstrap2")
 test_run = require('test_run').new()
-box.cfg{replication_timeout = 0.01, replication_connect_timeout=0.01}
+box.cfg{replication_timeout = 0.2, replication_connect_timeout=0.2}
 test_run:cmd("switch autobootstrap3")
 test_run = require('test_run').new()
 fiber=require('fiber')
-box.cfg{replication_timeout = 0.01, replication_connect_timeout=0.01}
+box.cfg{replication_timeout = 0.2, replication_connect_timeout=0.2}
 _ = box.schema.space.create('test_timeout'):create_index('pk')
 test_run:cmd("setopt delimiter ';'")
 function test_timeout()
+    local replicaA = box.info.replication[1].upstream or box.info.replication[2].upstream
+    local replicaB = box.info.replication[3].upstream or box.info.replication[2].upstream
     for i = 0, 99 do 
         box.space.test_timeout:replace({1})
-        fiber.sleep(0.005)
-        local rinfo = box.info.replication
-        if rinfo[1].upstream and rinfo[1].upstream.status ~= 'follow' or
-           rinfo[2].upstream and rinfo[2].upstream.status ~= 'follow' or
-           rinfo[3].upstream and rinfo[3].upstream.status ~= 'follow' then
-            return error('Replication broken')
-        end
+        local n = 200
+        repeat
+            fiber.sleep(0.001)
+            n = n - 1
+            if n == 0 then return error(box.info.replication) end
+        until replicaA.status == 'follow' and replicaB.status == 'follow'
     end
     return true
 end ;
 test_run:cmd("setopt delimiter ''");
+-- the replica status is checked 100 times, each check within replication_timeout
 test_timeout()
 
 -- gh-3247 - Sequence-generated value is not replicated in case
@@ -89,6 +93,7 @@ box.space.space1:drop()
 
 test_run:cmd("switch default")
 test_run:drop_cluster(SERVERS)
+test_run:cleanup_cluster()
 
 -- gh-3642 - Check that socket file descriptor doesn't leak
 -- when a replica is disconnected.
@@ -99,9 +104,9 @@ old_fno = lim.rlim_cur
 lim.rlim_cur = 64
 rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
 
-test_run:cmd('create server sock with rpl_master=default, script="replication/replica.lua"')
-test_run:cmd(string.format('start server sock'))
-test_run:cmd('switch sock')
+test_run:cmd('create server bork with rpl_master=default, script="replication/replica.lua"')
+test_run:cmd('start server bork')
+test_run:cmd('switch bork')
 test_run = require('test_run').new()
 fiber = require('fiber')
 test_run:cmd("setopt delimiter ';'")
@@ -122,8 +127,10 @@ test_run:cmd('switch default')
 lim.rlim_cur = old_fno
 rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
 
-test_run:cmd('stop server sock')
-test_run:cmd('cleanup server sock')
+test_run:cmd("stop server bork")
+test_run:cmd("cleanup server bork")
+test_run:cmd("delete server bork")
+test_run:cleanup_cluster()
 
 box.schema.user.revoke('guest', 'replication')
 
@@ -138,15 +145,15 @@ test_run:cmd('stop server er_load1')
 -- er_load2 exits automatically.
 test_run:cmd('cleanup server er_load1')
 test_run:cmd('cleanup server er_load2')
+test_run:cmd('delete server er_load1')
+test_run:cmd('delete server er_load2')
+test_run:cleanup_cluster()
 
 --
 -- Test case for gh-3637. Before the fix replica would exit with
 -- an error. Now check that we don't hang and successfully connect.
 --
 fiber = require('fiber')
-
-test_run:cleanup_cluster()
-
 test_run:cmd("create server replica_auth with rpl_master=default, script='replication/replica_auth.lua'")
 test_run:cmd("start server replica_auth with wait=False, wait_load=False, args='cluster:pass 0.05'")
 -- Wait a bit to make sure replica waits till user is created.
@@ -161,6 +168,8 @@ _ = test_run:wait_vclock('replica_auth', vclock)
 test_run:cmd("stop server replica_auth")
 test_run:cmd("cleanup server replica_auth")
 test_run:cmd("delete server replica_auth")
+test_run:cleanup_cluster()
+
 box.schema.user.drop('cluster')
 
 --
diff --git a/test/replication/on_replace.result b/test/replication/on_replace.result
index 4ffa3b25a..2e95b90ea 100644
--- a/test/replication/on_replace.result
+++ b/test/replication/on_replace.result
@@ -63,6 +63,9 @@ test_run:cmd("switch replica")
 ---
 - true
 ...
+fiber = require('fiber')
+---
+...
 while box.space.test:count() < 2 do fiber.sleep(0.01) end
 ---
 ...
@@ -88,6 +91,13 @@ test_run:cmd("cleanup server replica")
 ---
 - true
 ...
+test_run:cmd("delete server replica")
+---
+- true
+...
+test_run:cleanup_cluster()
+---
+...
 box.space.test:drop()
 ---
 ...
@@ -177,3 +187,6 @@ _ = test_run:cmd('switch default')
 test_run:drop_cluster(SERVERS)
 ---
 ...
+test_run:cleanup_cluster()
+---
+...
diff --git a/test/replication/on_replace.test.lua b/test/replication/on_replace.test.lua
index 371b71cbd..e34832103 100644
--- a/test/replication/on_replace.test.lua
+++ b/test/replication/on_replace.test.lua
@@ -26,6 +26,7 @@ session_type
 test_run:cmd("switch default")
 box.space.test:insert{2}
 test_run:cmd("switch replica")
+fiber = require('fiber')
 while box.space.test:count() < 2 do fiber.sleep(0.01) end
 --
 -- applier
@@ -37,6 +38,8 @@ test_run:cmd("switch default")
 --
 test_run:cmd("stop server replica")
 test_run:cmd("cleanup server replica")
+test_run:cmd("delete server replica")
+test_run:cleanup_cluster()
 box.space.test:drop()
 box.schema.user.revoke('guest', 'replication')
 
@@ -73,3 +76,4 @@ box.space.s2:select()
 
 _ = test_run:cmd('switch default')
 test_run:drop_cluster(SERVERS)
+test_run:cleanup_cluster()
diff --git a/test/replication/once.result b/test/replication/once.result
index 99ac05b72..fd787915e 100644
--- a/test/replication/once.result
+++ b/test/replication/once.result
@@ -85,3 +85,15 @@ once -- 1
 box.cfg{read_only = false}
 ---
 ...
+box.space._schema:delete{"oncero"}
+---
+- ['oncero']
+...
+box.space._schema:delete{"oncekey"}
+---
+- ['oncekey']
+...
+box.space._schema:delete{"oncetest"}
+---
+- ['oncetest']
+...
diff --git a/test/replication/once.test.lua b/test/replication/once.test.lua
index 264c63670..813fbfdab 100644
--- a/test/replication/once.test.lua
+++ b/test/replication/once.test.lua
@@ -28,3 +28,6 @@ box.cfg{read_only = true}
 box.once("ro", f, 1) -- ok, already done
 once -- 1
 box.cfg{read_only = false}
+box.space._schema:delete{"oncero"}
+box.space._schema:delete{"oncekey"}
+box.space._schema:delete{"oncetest"}
diff --git a/test/replication/quorum.result b/test/replication/quorum.result
index 265b099b7..2642fe8f4 100644
--- a/test/replication/quorum.result
+++ b/test/replication/quorum.result
@@ -435,18 +435,21 @@ test_run:cmd('switch default')
 ---
 - true
 ...
-test_run:cmd('stop server replica_quorum')
+test_run:cmd("stop server replica_quorum")
 ---
 - true
 ...
-test_run:cmd('cleanup server replica_quorum')
+test_run:cmd("cleanup server replica_quorum")
 ---
 - true
 ...
-test_run:cmd('delete server replica_quorum')
+test_run:cmd("delete server replica_quorum")
 ---
 - true
 ...
+test_run:cleanup_cluster()
+---
+...
 box.schema.user.revoke('guest', 'replication')
 ---
 ...
diff --git a/test/replication/quorum.test.lua b/test/replication/quorum.test.lua
index 5a43275c2..24d1b27c4 100644
--- a/test/replication/quorum.test.lua
+++ b/test/replication/quorum.test.lua
@@ -166,7 +166,8 @@ test_run:cmd('switch replica_quorum')
 box.cfg{replication={INSTANCE_URI, nonexistent_uri(1)}}
 box.info.id
 test_run:cmd('switch default')
-test_run:cmd('stop server replica_quorum')
-test_run:cmd('cleanup server replica_quorum')
-test_run:cmd('delete server replica_quorum')
+test_run:cmd("stop server replica_quorum")
+test_run:cmd("cleanup server replica_quorum")
+test_run:cmd("delete server replica_quorum")
+test_run:cleanup_cluster()
 box.schema.user.revoke('guest', 'replication')
diff --git a/test/replication/replica_rejoin.result b/test/replication/replica_rejoin.result
index 4370fae4b..37849850f 100644
--- a/test/replication/replica_rejoin.result
+++ b/test/replication/replica_rejoin.result
@@ -242,6 +242,13 @@ test_run:cmd("cleanup server replica")
 ---
 - true
 ...
+test_run:cmd("delete server replica")
+---
+- true
+...
+test_run:cleanup_cluster()
+---
+...
 box.space.test:drop()
 ---
 ...
diff --git a/test/replication/replica_rejoin.test.lua b/test/replication/replica_rejoin.test.lua
index f998f60d0..950ec7532 100644
--- a/test/replication/replica_rejoin.test.lua
+++ b/test/replication/replica_rejoin.test.lua
@@ -87,5 +87,7 @@ box.space.test:select()
 test_run:cmd("switch default")
 test_run:cmd("stop server replica")
 test_run:cmd("cleanup server replica")
+test_run:cmd("delete server replica")
+test_run:cleanup_cluster()
 box.space.test:drop()
 box.schema.user.revoke('guest', 'replication')
diff --git a/test/replication/skip_conflict_row.result b/test/replication/skip_conflict_row.result
index 29963f56a..6ca13b472 100644
--- a/test/replication/skip_conflict_row.result
+++ b/test/replication/skip_conflict_row.result
@@ -91,6 +91,13 @@ test_run:cmd("cleanup server replica")
 ---
 - true
 ...
+test_run:cmd("delete server replica")
+---
+- true
+...
+test_run:cleanup_cluster()
+---
+...
 box.space.test:drop()
 ---
 ...
diff --git a/test/replication/skip_conflict_row.test.lua b/test/replication/skip_conflict_row.test.lua
index 5f7d6ead3..4406ced95 100644
--- a/test/replication/skip_conflict_row.test.lua
+++ b/test/replication/skip_conflict_row.test.lua
@@ -31,5 +31,7 @@ box.info.status
 -- cleanup
 test_run:cmd("stop server replica")
 test_run:cmd("cleanup server replica")
+test_run:cmd("delete server replica")
+test_run:cleanup_cluster()
 box.space.test:drop()
 box.schema.user.revoke('guest', 'replication')
diff --git a/test/replication/status.result b/test/replication/status.result
index 8394b98c1..9e69f2478 100644
--- a/test/replication/status.result
+++ b/test/replication/status.result
@@ -391,3 +391,10 @@ test_run:cmd("cleanup server replica")
 ---
 - true
 ...
+test_run:cmd("delete server replica")
+---
+- true
+...
+test_run:cleanup_cluster()
+---
+...
diff --git a/test/replication/status.test.lua b/test/replication/status.test.lua
index 8bb25e0c6..cfdf6acdb 100644
--- a/test/replication/status.test.lua
+++ b/test/replication/status.test.lua
@@ -142,3 +142,5 @@ test_run:cmd('switch default')
 box.schema.user.revoke('guest', 'replication')
 test_run:cmd("stop server replica")
 test_run:cmd("cleanup server replica")
+test_run:cmd("delete server replica")
+test_run:cleanup_cluster()
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index f4abc7af1..5cbc371c2 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -6,5 +6,6 @@ disabled = consistent.test.lua
 release_disabled = catch.test.lua errinj.test.lua gc.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua
 config = suite.cfg
 lua_libs = lua/fast_replica.lua lua/rlimit.lua
+use_unix_sockets = True
 long_run = prune.test.lua
-is_parallel = False
+is_parallel = True
diff --git a/test/replication/sync.result b/test/replication/sync.result
index 81de60758..b2381ac59 100644
--- a/test/replication/sync.result
+++ b/test/replication/sync.result
@@ -303,6 +303,13 @@ test_run:cmd("cleanup server replica")
 ---
 - true
 ...
+test_run:cmd("delete server replica")
+---
+- true
+...
+test_run:cleanup_cluster()
+---
+...
 box.space.test:drop()
 ---
 ...
diff --git a/test/replication/sync.test.lua b/test/replication/sync.test.lua
index a5cfab8de..51131667d 100644
--- a/test/replication/sync.test.lua
+++ b/test/replication/sync.test.lua
@@ -145,6 +145,8 @@ test_run:grep_log('replica', 'ER_CFG.*')
 test_run:cmd("switch default")
 test_run:cmd("stop server replica")
 test_run:cmd("cleanup server replica")
+test_run:cmd("delete server replica")
+test_run:cleanup_cluster()
 
 box.space.test:drop()
 box.schema.user.revoke('guest', 'replication')
diff --git a/test/replication/wal_off.result b/test/replication/wal_off.result
index e3b5709e9..e0ae84bd7 100644
--- a/test/replication/wal_off.result
+++ b/test/replication/wal_off.result
@@ -107,6 +107,13 @@ test_run:cmd("cleanup server wal_off")
 ---
 - true
 ...
+test_run:cmd("delete server wal_off")
+---
+- true
+...
+test_run:cleanup_cluster()
+---
+...
 box.schema.user.revoke('guest', 'replication')
 ---
 ...
diff --git a/test/replication/wal_off.test.lua b/test/replication/wal_off.test.lua
index 81fcf0b33..110f2f1f7 100644
--- a/test/replication/wal_off.test.lua
+++ b/test/replication/wal_off.test.lua
@@ -37,5 +37,7 @@ box.cfg { replication = "" }
 
 test_run:cmd("stop server wal_off")
 test_run:cmd("cleanup server wal_off")
+test_run:cmd("delete server wal_off")
+test_run:cleanup_cluster()
 
 box.schema.user.revoke('guest', 'replication')
-- 
2.18.0




More information about the Tarantool-patches mailing list