Tarantool development patches archive
 help / color / mirror / Atom feed
From: Sergei Voronezhskii <sergw@tarantool.org>
To: tarantool-patches@freelists.org
Cc: Alexander Turenko <alexander.turenko@tarantool.org>,
	Vladimir Davydov <vdavydov.dev@gmail.com>
Subject: [PATCH 2/4] test: errinj for pause relay_send
Date: Fri,  5 Oct 2018 12:02:13 +0300	[thread overview]
Message-ID: <20181005090215.6160-3-sergw@tarantool.org> (raw)
In-Reply-To: <20181005090215.6160-1-sergw@tarantool.org>

Instead of using timeout we need just pause `relay_send`. Can't relay
on timeout because of various system load in parallel mode. Add new
errinj which checks boolean in loop and until it is not `True` do not
pass the method `relay_send` to the next statement.

Also here we change `delete` to `replace`. And lookup the xlog files
in loop with a little sleep, until the file count is not as expected.

Part of #2436, #3232
---
 src/box/relay.cc                |  7 +++++-
 src/errinj.h                    |  1 +
 test/replication/catch.result   | 44 ++++++++++++++-------------------
 test/replication/catch.test.lua | 36 +++++++++++++--------------
 test/replication/gc.result      | 18 ++++++--------
 test/replication/gc.test.lua    | 16 ++++++------
 6 files changed, 58 insertions(+), 64 deletions(-)

diff --git a/src/box/relay.cc b/src/box/relay.cc
index c90383d4a..8618fa81a 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -622,12 +622,17 @@ relay_subscribe(struct replica *replica, int fd, uint64_t sync,
 static void
 relay_send(struct relay *relay, struct xrow_header *packet)
 {
+    struct errinj *inj = errinj(ERRINJ_RELAY_SEND_DELAY, ERRINJ_BOOL);
+    while (inj->bparam) {
+        fiber_sleep(0.01);
+        inj = errinj(ERRINJ_RELAY_SEND_DELAY, ERRINJ_BOOL);
+    }
 	packet->sync = relay->sync;
 	relay->last_row_tm = ev_monotonic_now(loop());
 	coio_write_xrow(&relay->io, packet);
 	fiber_gc();
 
-	struct errinj *inj = errinj(ERRINJ_RELAY_TIMEOUT, ERRINJ_DOUBLE);
+	inj = errinj(ERRINJ_RELAY_TIMEOUT, ERRINJ_DOUBLE);
 	if (inj != NULL && inj->dparam > 0)
 		fiber_sleep(inj->dparam);
 }
diff --git a/src/errinj.h b/src/errinj.h
index 84a1fbb5e..bf6c15ba5 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -94,6 +94,7 @@ struct errinj {
 	_(ERRINJ_VY_GC, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_VY_LOG_FLUSH, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_VY_LOG_FLUSH_DELAY, ERRINJ_BOOL, {.bparam = false}) \
+	_(ERRINJ_RELAY_SEND_DELAY, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_RELAY_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
 	_(ERRINJ_RELAY_REPORT_INTERVAL, ERRINJ_DOUBLE, {.dparam = 0}) \
 	_(ERRINJ_RELAY_FINAL_SLEEP, ERRINJ_BOOL, {.bparam = false}) \
diff --git a/test/replication/catch.result b/test/replication/catch.result
index e23f33cef..b4ddc5d51 100644
--- a/test/replication/catch.result
+++ b/test/replication/catch.result
@@ -35,7 +35,7 @@ test_run:cmd("switch default")
 s = box.schema.space.create('test', {engine = engine});
 ---
 ...
--- vinyl does not support hash index
+-- Vinyl does not support hash index
 index = s:create_index('primary', {type = (engine == 'vinyl' and 'tree' or 'hash') })
 ---
 ...
@@ -57,14 +57,13 @@ test_run:cmd("stop server replica")
 ---
 - true
 ...
--- insert values on the master while replica is stopped and can't fetch them
-for i=1,100 do s:insert{i, 'this is test message12345'} end
+-- Insert values on the master while replica is stopped and can't fetch them.
+errinj.set('ERRINJ_RELAY_SEND_DELAY', true)
 ---
+- ok
 ...
--- sleep after every tuple
-errinj.set("ERRINJ_RELAY_TIMEOUT", 1000.0)
+for i=1,100 do s:insert{i, 'this is test message12345'} end
 ---
-- ok
 ...
 test_run:cmd("start server replica with args='0.01'")
 ---
@@ -75,28 +74,25 @@ test_run:cmd("switch replica")
 - true
 ...
 -- Check that replica doesn't enter read-write mode before
--- catching up with the master: to check that we inject sleep into
--- the master relay_send function and attempt a data modifying
--- statement in replica while it's still fetching data from the
--- master.
--- In the next two cases we try to delete a tuple while replica is
+-- catching up with the master: to check that we stop sending
+-- rows on the master in relay_send function and attempt a data
+-- modifying statement in replica while it's still fetching data
+-- from the master.
+--
+-- In the next two cases we try to replace a tuple while replica is
 -- catching up with the master (local delete, remote delete) case
 --
--- #1: delete tuple on replica
+-- Case #1: replace tuple on replica locally.
 --
 box.space.test ~= nil
 ---
 - true
 ...
-d = box.space.test:delete{1}
+box.space.test:replace{1}
 ---
 - error: Can't modify data because this instance is in read-only mode.
 ...
-box.space.test:get(1) ~= nil
----
-- true
-...
--- case #2: delete tuple by net.box
+-- Case #2: replace tuple on replica by net.box.
 test_run:cmd("switch default")
 ---
 - true
@@ -108,20 +104,16 @@ test_run:cmd("set variable r_uri to 'replica.listen'")
 c = net_box.connect(r_uri)
 ---
 ...
-d = c.space.test:delete{1}
+d = c.space.test:replace{1}
 ---
 - error: Can't modify data because this instance is in read-only mode.
 ...
-c.space.test:get(1) ~= nil
----
-- true
-...
--- check sync
-errinj.set("ERRINJ_RELAY_TIMEOUT", 0)
+-- Resume replicaton
+errinj.set('ERRINJ_RELAY_SEND_DELAY', false)
 ---
 - ok
 ...
--- cleanup
+-- Cleanup
 test_run:cmd("stop server replica")
 ---
 - true
diff --git a/test/replication/catch.test.lua b/test/replication/catch.test.lua
index 217328772..5223e3a24 100644
--- a/test/replication/catch.test.lua
+++ b/test/replication/catch.test.lua
@@ -13,7 +13,7 @@ test_run:cmd("switch replica")
 
 test_run:cmd("switch default")
 s = box.schema.space.create('test', {engine = engine});
--- vinyl does not support hash index
+-- Vinyl does not support hash index
 index = s:create_index('primary', {type = (engine == 'vinyl' and 'tree' or 'hash') })
 
 test_run:cmd("switch replica")
@@ -22,41 +22,39 @@ while box.space.test == nil do fiber.sleep(0.01) end
 test_run:cmd("switch default")
 test_run:cmd("stop server replica")
 
--- insert values on the master while replica is stopped and can't fetch them
+-- Insert values on the master while replica is stopped and can't fetch them.
+errinj.set('ERRINJ_RELAY_SEND_DELAY', true)
 for i=1,100 do s:insert{i, 'this is test message12345'} end
 
--- sleep after every tuple
-errinj.set("ERRINJ_RELAY_TIMEOUT", 1000.0)
-
 test_run:cmd("start server replica with args='0.01'")
 test_run:cmd("switch replica")
 
 -- Check that replica doesn't enter read-write mode before
--- catching up with the master: to check that we inject sleep into
--- the master relay_send function and attempt a data modifying
--- statement in replica while it's still fetching data from the
--- master.
--- In the next two cases we try to delete a tuple while replica is
+-- catching up with the master: to check that we stop sending
+-- rows on the master in relay_send function and attempt a data
+-- modifying statement in replica while it's still fetching data
+-- from the master.
+--
+-- In the next two cases we try to replace a tuple while replica is
 -- catching up with the master (local delete, remote delete) case
 --
--- #1: delete tuple on replica
+-- Case #1: replace tuple on replica locally.
 --
 box.space.test ~= nil
-d = box.space.test:delete{1}
-box.space.test:get(1) ~= nil
+box.space.test:replace{1}
 
--- case #2: delete tuple by net.box
+-- Case #2: replace tuple on replica by net.box.
 
 test_run:cmd("switch default")
 test_run:cmd("set variable r_uri to 'replica.listen'")
 c = net_box.connect(r_uri)
-d = c.space.test:delete{1}
-c.space.test:get(1) ~= nil
+d = c.space.test:replace{1}
+
+-- Resume replicaton
+errinj.set('ERRINJ_RELAY_SEND_DELAY', false)
 
--- check sync
-errinj.set("ERRINJ_RELAY_TIMEOUT", 0)
 
--- cleanup
+-- Cleanup
 test_run:cmd("stop server replica")
 test_run:cmd("cleanup server replica")
 test_run:cleanup_cluster()
diff --git a/test/replication/gc.result b/test/replication/gc.result
index 83d0de293..ef6463d87 100644
--- a/test/replication/gc.result
+++ b/test/replication/gc.result
@@ -95,7 +95,7 @@ test_run:cmd("switch replica")
 fiber = require('fiber')
 ---
 ...
-while box.space.test:count() < 200 do fiber.sleep(0.01) end
+while box.space.test == nil or box.space.test:count() < 200 do fiber.sleep(0.01) end
 ---
 ...
 box.space.test:count()
@@ -119,9 +119,9 @@ wait_gc(1)
 ---
 - true
 ...
--- Make sure the replica will receive data it is subscribed
--- to long enough for us to invoke garbage collection.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05)
+-- Make sure the replica will not receive data until
+-- we test garbage collection.
+box.error.injection.set("ERRINJ_RELAY_SEND_DELAY", true)
 ---
 - ok
 ...
@@ -153,13 +153,12 @@ box.snapshot()
 ---
 - true
 ...
-#fio.glob('./master/*.xlog') == 2 or fio.listdir('./master')
+while #fio.glob('./master/*.xlog') ~= 2 do fiber.sleep(0.01) end
 ---
-- true
 ...
--- Remove the timeout injection so that the replica catches
+-- Resume replicaton so that the replica catches
 -- up quickly.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0)
+box.error.injection.set("ERRINJ_RELAY_SEND_DELAY", false)
 ---
 - ok
 ...
@@ -188,9 +187,8 @@ wait_gc(1)
 ---
 - true
 ...
-#fio.glob('./master/*.xlog') == 0 or fio.listdir('./master')
+while #fio.glob('./master/*.xlog') ~= 0 do fiber.sleep(0.01) end
 ---
-- true
 ...
 --
 -- Check that the master doesn't delete xlog files sent to the
diff --git a/test/replication/gc.test.lua b/test/replication/gc.test.lua
index eed76850c..ec3bf6baa 100644
--- a/test/replication/gc.test.lua
+++ b/test/replication/gc.test.lua
@@ -52,7 +52,7 @@ test_run:cmd("start server replica")
 -- data from the master. Check it.
 test_run:cmd("switch replica")
 fiber = require('fiber')
-while box.space.test:count() < 200 do fiber.sleep(0.01) end
+while box.space.test == nil or box.space.test:count() < 200 do fiber.sleep(0.01) end
 box.space.test:count()
 test_run:cmd("switch default")
 
@@ -61,9 +61,9 @@ test_run:cmd("switch default")
 wait_gc(1)
 #box.info.gc().checkpoints == 1 or box.info.gc()
 #fio.glob('./master/*.xlog') == 1 or fio.listdir('./master')
--- Make sure the replica will receive data it is subscribed
--- to long enough for us to invoke garbage collection.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05)
+-- Make sure the replica will not receive data until
+-- we test garbage collection.
+box.error.injection.set("ERRINJ_RELAY_SEND_DELAY", true)
 
 -- Send more data to the replica.
 -- Need to do 2 snapshots here, otherwise the replica would
@@ -78,11 +78,11 @@ box.snapshot()
 -- xlogs needed by the replica.
 box.snapshot()
 #box.info.gc().checkpoints == 1 or box.info.gc()
-#fio.glob('./master/*.xlog') == 2 or fio.listdir('./master')
+while #fio.glob('./master/*.xlog') ~= 2 do fiber.sleep(0.01) end
 
--- Remove the timeout injection so that the replica catches
+-- Resume replicaton so that the replica catches
 -- up quickly.
-box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0)
+box.error.injection.set("ERRINJ_RELAY_SEND_DELAY", false)
 
 -- Check that the replica received all data from the master.
 test_run:cmd("switch replica")
@@ -94,7 +94,7 @@ test_run:cmd("switch default")
 -- from the old checkpoint.
 wait_gc(1)
 #box.info.gc().checkpoints == 1 or box.info.gc()
-#fio.glob('./master/*.xlog') == 0 or fio.listdir('./master')
+while #fio.glob('./master/*.xlog') ~= 0 do fiber.sleep(0.01) end
 --
 -- Check that the master doesn't delete xlog files sent to the
 -- replica until it receives a confirmation that the data has
-- 
2.18.0

  parent reply	other threads:[~2018-10-05  9:02 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-27 15:38 [PATCH] test: enable parallel mode for replication tests Sergei Voronezhskii
2018-10-01  1:36 ` Alexander Turenko
2018-10-01 10:41   ` [tarantool-patches] " Alexander Turenko
2018-10-03 14:50     ` Sergei Voronezhskii
2018-10-05  9:02       ` Sergei Voronezhskii
2018-10-05  9:02         ` [PATCH 1/4] test: cleanup replication tests, parallel mode on Sergei Voronezhskii
2018-10-08 19:02           ` Alexander Turenko
2018-10-05  9:02         ` Sergei Voronezhskii [this message]
2018-10-08 19:07           ` [PATCH 2/4] test: errinj for pause relay_send Alexander Turenko
2018-10-05  9:02         ` [PATCH 3/4] test: increase timeout to check replica status Sergei Voronezhskii
2018-10-08 19:07           ` Alexander Turenko
2018-10-05  9:02         ` [PATCH 4/4] test: refactor some requirements to pass the runs Sergei Voronezhskii
2018-10-08 19:08           ` Alexander Turenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181005090215.6160-3-sergw@tarantool.org \
    --to=sergw@tarantool.org \
    --cc=alexander.turenko@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=vdavydov.dev@gmail.com \
    --subject='Re: [PATCH 2/4] test: errinj for pause relay_send' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox