* [tarantool-patches] [PATCH 3/5] test: redo some swim tests using error injections
2019-07-03 23:28 [tarantool-patches] [PATCH 0/5] SWIM loglog preparation Vladislav Shpilevoy
2019-07-03 23:28 ` [tarantool-patches] [PATCH 1/5] swim: fix flaky 'indirect pings' test Vladislav Shpilevoy
2019-07-03 23:28 ` [tarantool-patches] [PATCH 2/5] swim: sadly remove cache Vladislav Shpilevoy
@ 2019-07-03 23:28 ` Vladislav Shpilevoy
2019-07-04 8:23 ` [tarantool-patches] " Konstantin Osipov
2019-07-03 23:28 ` [tarantool-patches] [PATCH 4/5] swim: speed-up tests Vladislav Shpilevoy
` (2 subsequent siblings)
5 siblings, 1 reply; 12+ messages in thread
From: Vladislav Shpilevoy @ 2019-07-03 23:28 UTC (permalink / raw)
To: tarantool-patches; +Cc: kostja
There were tests relying on certain content of SWIM messages.
After next patches these conditions won't work without an
explicit intervention with error injections.
The patchset moves these tests to separate release-disabled
files.
Part of #4253
---
src/lib/core/errinj.h | 1 +
src/lib/swim/swim.c | 5 +
test/swim/errinj.result | 102 ++++++++++++++++++
test/swim/errinj.test.lua | 41 ++++++++
test/swim/suite.ini | 1 +
test/swim/swim.result | 55 ----------
test/swim/swim.test.lua | 22 ----
test/unit/CMakeLists.txt | 5 +
test/unit/suite.ini | 1 +
test/unit/swim.c | 174 +-----------------------------
test/unit/swim.result | 31 ++----
test/unit/swim_errinj.c | 199 +++++++++++++++++++++++++++++++++++
test/unit/swim_errinj.result | 18 ++++
test/unit/swim_test_utils.c | 102 ++++++------------
test/unit/swim_test_utils.h | 24 +++--
15 files changed, 432 insertions(+), 349 deletions(-)
create mode 100644 test/swim/errinj.result
create mode 100644 test/swim/errinj.test.lua
create mode 100644 test/unit/swim_errinj.c
create mode 100644 test/unit/swim_errinj.result
diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
index 787836514..b4de98873 100644
--- a/src/lib/core/errinj.h
+++ b/src/lib/core/errinj.h
@@ -132,6 +132,7 @@ struct errinj {
_(ERRINJ_SIO_READ_MAX, ERRINJ_INT, {.iparam = -1}) \
_(ERRINJ_SQL_NAME_NORMALIZATION, ERRINJ_BOOL, {.bparam = false}) \
_(ERRINJ_COIO_SENDFILE_CHUNK, ERRINJ_INT, {.iparam = -1}) \
+ _(ERRINJ_SWIM_FD_ONLY, ERRINJ_BOOL, {.bparam = false}) \
ENUM0(errinj_id, ERRINJ_LIST);
extern struct errinj errinjs[];
diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index 465e850a5..4f01d623a 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -38,6 +38,7 @@
#include "assoc.h"
#include "sio.h"
#include "trigger.h"
+#include "errinj.h"
#define HEAP_FORWARD_DECLARATION
#include "salad/heap.h"
@@ -1133,6 +1134,10 @@ swim_encode_round_msg(struct swim *swim)
map_size += swim_encode_src_uuid(swim, packet);
map_size += swim_encode_failure_detection(swim, packet,
SWIM_FD_MSG_PING);
+ ERROR_INJECT(ERRINJ_SWIM_FD_ONLY, {
+ mp_encode_map(header, map_size);
+ return;
+ });
map_size += swim_encode_dissemination(swim, packet);
map_size += swim_encode_anti_entropy(swim, packet);
diff --git a/test/swim/errinj.result b/test/swim/errinj.result
new file mode 100644
index 000000000..efeab87a8
--- /dev/null
+++ b/test/swim/errinj.result
@@ -0,0 +1,102 @@
+test_run = require('test_run').new()
+---
+...
+errinj = box.error.injection
+---
+...
+--
+-- A complex case the payload cache needs to deal with. It is
+-- possible, that a new member's incarnation is learned, but new
+-- payload is not. That happens when a cluster is huge, and
+-- anti-entropy with dissemination both may do not contain a
+-- message sender. Payload cache should correctly process that,
+-- when the new payload finally arrives.
+--
+s1 = swim.new({uuid = uuid(1), uri = uri(), heartbeat_rate = 1000, generation = 0})
+---
+...
+s2 = swim.new({uuid = uuid(2), uri = uri(), heartbeat_rate = 1000, generation = 0})
+---
+...
+s1_self = s1:self()
+---
+...
+_ = s1:add_member({uuid = s2:self():uuid(), uri = s2:self():uri()})
+---
+...
+_ = s2:add_member({uuid = s1_self:uuid(), uri = s1_self:uri()})
+---
+...
+s1:size()
+---
+- 2
+...
+s2:size()
+---
+- 2
+...
+s1_view = s2:member_by_uuid(s1_self:uuid())
+---
+...
+s1:set_payload('payload')
+---
+- true
+...
+s1:self():incarnation()
+---
+- cdata {generation = 0ULL, version = 2ULL}
+...
+-- Via probe() S2 learns new incarnation of S1, but without new
+-- payload.
+errinj.set("ERRINJ_SWIM_FD_ONLY", true)
+---
+- ok
+...
+s1:probe_member(s2:self():uri())
+---
+- true
+...
+errinj.set("ERRINJ_SWIM_FD_ONLY", false)
+---
+- ok
+...
+s1_view:payload()
+---
+- null
+...
+s1_view:incarnation()
+---
+- cdata {generation = 0ULL, version = 2ULL}
+...
+s1:cfg({heartbeat_rate = 0.01})
+---
+- true
+...
+s2:cfg({heartbeat_rate = 0.01})
+---
+- true
+...
+while s1_view:payload() ~= 'payload' do fiber.sleep(0.01) end
+---
+...
+p = s1_view:payload()
+---
+...
+s1_view:payload() == p
+---
+- true
+...
+p
+---
+- payload
+...
+s1_view:incarnation()
+---
+- cdata {generation = 0ULL, version = 2ULL}
+...
+s1:delete()
+---
+...
+s2:delete()
+---
+...
diff --git a/test/swim/errinj.test.lua b/test/swim/errinj.test.lua
new file mode 100644
index 000000000..cf0cb2dd4
--- /dev/null
+++ b/test/swim/errinj.test.lua
@@ -0,0 +1,41 @@
+test_run = require('test_run').new()
+errinj = box.error.injection
+
+--
+-- A complex case the payload cache needs to deal with. It is
+-- possible, that a new member's incarnation is learned, but new
+-- payload is not. That happens when a cluster is huge, and
+-- anti-entropy with dissemination both may do not contain a
+-- message sender. Payload cache should correctly process that,
+-- when the new payload finally arrives.
+--
+s1 = swim.new({uuid = uuid(1), uri = uri(), heartbeat_rate = 1000, generation = 0})
+s2 = swim.new({uuid = uuid(2), uri = uri(), heartbeat_rate = 1000, generation = 0})
+s1_self = s1:self()
+_ = s1:add_member({uuid = s2:self():uuid(), uri = s2:self():uri()})
+_ = s2:add_member({uuid = s1_self:uuid(), uri = s1_self:uri()})
+s1:size()
+s2:size()
+s1_view = s2:member_by_uuid(s1_self:uuid())
+
+
+s1:set_payload('payload')
+s1:self():incarnation()
+-- Via probe() S2 learns new incarnation of S1, but without new
+-- payload.
+errinj.set("ERRINJ_SWIM_FD_ONLY", true)
+s1:probe_member(s2:self():uri())
+errinj.set("ERRINJ_SWIM_FD_ONLY", false)
+s1_view:payload()
+s1_view:incarnation()
+
+s1:cfg({heartbeat_rate = 0.01})
+s2:cfg({heartbeat_rate = 0.01})
+while s1_view:payload() ~= 'payload' do fiber.sleep(0.01) end
+p = s1_view:payload()
+s1_view:payload() == p
+p
+s1_view:incarnation()
+
+s1:delete()
+s2:delete()
diff --git a/test/swim/suite.ini b/test/swim/suite.ini
index 13189c1cf..16343dd4c 100644
--- a/test/swim/suite.ini
+++ b/test/swim/suite.ini
@@ -2,4 +2,5 @@
core = tarantool
description = SWIM tests
script = box.lua
+release_disabled = errinj.test.lua
is_parallel = True
diff --git a/test/swim/swim.result b/test/swim/swim.result
index c7d539f5d..9b2e46621 100644
--- a/test/swim/swim.result
+++ b/test/swim/swim.result
@@ -879,61 +879,6 @@ s1_view:payload() == p
---
- true
...
--- Now a complex case. It is possible, that a new member's
--- incarnation is learned, but new payload is not. Payload cache
--- should correctly process that.
-s1:cfg({heartbeat_rate = 1000})
----
-- true
-...
-s2:cfg({heartbeat_rate = 1000})
----
-- true
-...
-s1:set_payload({a = 200})
----
-- true
-...
--- Via probe() S2 learns new incarnation of S1, but without new
--- payload.
-s2:probe_member(s1_self:uri())
----
-- true
-...
-s1_view:payload()
----
-- {'a': 100}
-...
-s1_view:incarnation()
----
-- cdata {generation = 0ULL, version = 3ULL}
-...
-s1:cfg({heartbeat_rate = 0.01})
----
-- true
-...
-s2:cfg({heartbeat_rate = 0.01})
----
-- true
-...
-while s1_view:payload().a ~= 200 do fiber.sleep(0.01) end
----
-...
-p = s1_view:payload()
----
-...
-s1_view:payload() == p
----
-- true
-...
-p
----
-- {'a': 200}
-...
-s1_view:incarnation()
----
-- cdata {generation = 0ULL, version = 3ULL}
-...
s1:delete()
---
...
diff --git a/test/swim/swim.test.lua b/test/swim/swim.test.lua
index c949b3be2..7e2f5a857 100644
--- a/test/swim/swim.test.lua
+++ b/test/swim/swim.test.lua
@@ -287,28 +287,6 @@ while not s1_view:payload() do fiber.sleep(0.01) end
p = s1_view:payload()
s1_view:payload() == p
--- Now a complex case. It is possible, that a new member's
--- incarnation is learned, but new payload is not. Payload cache
--- should correctly process that.
-
-s1:cfg({heartbeat_rate = 1000})
-s2:cfg({heartbeat_rate = 1000})
-
-s1:set_payload({a = 200})
--- Via probe() S2 learns new incarnation of S1, but without new
--- payload.
-s2:probe_member(s1_self:uri())
-s1_view:payload()
-s1_view:incarnation()
-
-s1:cfg({heartbeat_rate = 0.01})
-s2:cfg({heartbeat_rate = 0.01})
-while s1_view:payload().a ~= 200 do fiber.sleep(0.01) end
-p = s1_view:payload()
-s1_view:payload() == p
-p
-s1_view:incarnation()
-
s1:delete()
s2:delete()
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index e3de34b16..3b31ddfae 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -230,5 +230,10 @@ add_executable(swim_proto.test swim_proto.c swim_test_transport.c swim_test_ev.c
swim_test_utils.c ${PROJECT_SOURCE_DIR}/src/version.c)
target_link_libraries(swim_proto.test unit swim)
+add_executable(swim_errinj.test swim_errinj.c swim_test_transport.c
+ swim_test_ev.c swim_test_utils.c
+ ${PROJECT_SOURCE_DIR}/src/version.c)
+target_link_libraries(swim_errinj.test unit swim)
+
add_executable(merger.test merger.test.c)
target_link_libraries(merger.test unit core box)
diff --git a/test/unit/suite.ini b/test/unit/suite.ini
index 75c80ece1..89c8499fc 100644
--- a/test/unit/suite.ini
+++ b/test/unit/suite.ini
@@ -1,4 +1,5 @@
[default]
core = unittest
description = unit tests
+release_disabled = swim_errinj.test
is_parallel = True
diff --git a/test/unit/swim.c b/test/unit/swim.c
index d93f80554..263816c0c 100644
--- a/test/unit/swim.c
+++ b/test/unit/swim.c
@@ -28,21 +28,8 @@
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-
-#include "memory.h"
-#include "fiber.h"
-#include "random.h"
-#include "uuid/tt_uuid.h"
-#include "unit.h"
-#include "uri/uri.h"
-#include "swim/swim.h"
-#include "swim/swim_ev.h"
-#include "swim/swim_proto.h"
-#include "swim_test_transport.h"
-#include "swim_test_ev.h"
#include "swim_test_utils.h"
#include "trigger.h"
-#include <fcntl.h>
#include <math.h>
/**
@@ -748,136 +735,6 @@ swim_test_payload_basic(void)
swim_finish_test();
}
-static void
-swim_test_payload_refutation(void)
-{
- swim_start_test(11);
- int size, cluster_size = 3;
- struct swim_cluster *cluster = swim_cluster_new(cluster_size);
- swim_cluster_set_ack_timeout(cluster, 1);
- for (int i = 0; i < cluster_size; ++i) {
- for (int j = i + 1; j < cluster_size; ++j)
- swim_cluster_interconnect(cluster, i, j);
- }
- const char *s0_old_payload = "s0 payload";
- int s0_old_payload_size = strlen(s0_old_payload) + 1;
- fail_if(swim_cluster_member_set_payload(cluster, 0, s0_old_payload,
- s0_old_payload_size) != 0);
- fail_if(swim_cluster_wait_payload_everywhere(cluster, 0, s0_old_payload,
- s0_old_payload_size,
- 3) != 0);
- /*
- * The test checks the following case. Assume there are 3
- * nodes: S1, S2, S3. They all know each other. S1 sets
- * new payload, S2 and S3 knows that. They all see that S1
- * has version 1 and payload P1.
- *
- * Now S1 changes payload to P2. Its version becomes
- * 2. During next entire round its round messages are
- * lost, however ACKs work ok.
- */
- const char *s0_new_payload = "s0 second payload";
- int s0_new_payload_size = strlen(s0_new_payload);
- fail_if(swim_cluster_member_set_payload(cluster, 0, s0_new_payload,
- s0_new_payload_size) != 0);
- int components[2] = {SWIM_DISSEMINATION, SWIM_ANTI_ENTROPY};
- swim_cluster_drop_components(cluster, 0, components, 2);
- swim_run_for(3);
- swim_cluster_drop_components(cluster, 0, NULL, 0);
-
- is(swim_cluster_member_incarnation(cluster, 1, 0).version, 2,
- "S2 sees new version of S1");
- is(swim_cluster_member_incarnation(cluster, 2, 0).version, 2,
- "S3 does the same");
-
- const char *tmp = swim_cluster_member_payload(cluster, 1, 0, &size);
- ok(size == s0_old_payload_size &&
- memcmp(tmp, s0_old_payload, size) == 0,
- "but S2 does not known the new payload");
-
- tmp = swim_cluster_member_payload(cluster, 2, 0, &size);
- ok(size == s0_old_payload_size &&
- memcmp(tmp, s0_old_payload, size) == 0,
- "as well as S3");
-
- /* Restore normal ACK timeout. */
- swim_cluster_set_ack_timeout(cluster, 30);
-
- /*
- * Now S1's payload TTD is 0, but via ACKs S1 sent its new
- * version to S2 and S3. Despite that they should
- * apply new S1's payload via anti-entropy. Next lines
- * test that:
- *
- * 1) S2 can apply new S1's payload from S1's
- * anti-entropy;
- *
- * 2) S2 will not receive the old S1's payload from S3.
- * S3 knows, that its payload is outdated, and should
- * not send it;
- *
- * 2) S3 can apply new S1's payload from S2's
- * anti-entropy. Note, that here S3 applies the payload
- * not directly from the originator. It is the most
- * complex case.
- *
- * Next lines test the case (1).
- */
-
- /* S3 does not participate in the test (1). */
- swim_cluster_set_drop(cluster, 2, 100);
- swim_run_for(3);
-
- tmp = swim_cluster_member_payload(cluster, 1, 0, &size);
- ok(size == s0_new_payload_size &&
- memcmp(tmp, s0_new_payload, size) == 0,
- "S2 learned S1's payload via anti-entropy");
- is(swim_cluster_member_incarnation(cluster, 1, 0).version, 2,
- "version still is the same");
-
- tmp = swim_cluster_member_payload(cluster, 2, 0, &size);
- ok(size == s0_old_payload_size &&
- memcmp(tmp, s0_old_payload, size) == 0,
- "S3 was blocked and does not know anything");
- is(swim_cluster_member_incarnation(cluster, 2, 0).version, 2,
- "version still is the same");
-
- /* S1 will not participate in the tests further. */
- swim_cluster_set_drop(cluster, 0, 100);
-
- /*
- * Now check the case (2) - S3 will not send outdated
- * version of S1's payload. To maintain the experimental
- * integrity S1 and S2 are silent. Only S3 sends packets.
- */
- swim_cluster_set_drop(cluster, 2, 0);
- swim_cluster_set_drop_out(cluster, 1, 100);
- swim_run_for(3);
-
- tmp = swim_cluster_member_payload(cluster, 1, 0, &size);
- ok(size == s0_new_payload_size &&
- memcmp(tmp, s0_new_payload, size) == 0,
- "S2 keeps the same new S1's payload, S3 did not rewrite it");
-
- tmp = swim_cluster_member_payload(cluster, 2, 0, &size);
- ok(size == s0_old_payload_size &&
- memcmp(tmp, s0_old_payload, size) == 0,
- "S3 still does not know anything");
-
- /*
- * Now check the case (3) - S3 accepts new S1's payload
- * from S2. Even knowing the same S1's version.
- */
- swim_cluster_set_drop(cluster, 1, 0);
- swim_cluster_set_drop_out(cluster, 2, 100);
- is(swim_cluster_wait_payload_everywhere(cluster, 0, s0_new_payload,
- s0_new_payload_size, 3), 0,
- "S3 learns S1's payload from S2")
-
- swim_cluster_delete(cluster);
- swim_finish_test();
-}
-
static void
swim_test_indirect_ping(void)
{
@@ -1245,7 +1102,7 @@ swim_test_suspect_new_members(void)
static int
main_f(va_list ap)
{
- swim_start_test(24);
+ swim_start_test(23);
(void) ap;
swim_test_ev_init();
@@ -1267,7 +1124,6 @@ main_f(va_list ap)
swim_test_uri_update();
swim_test_broadcast();
swim_test_payload_basic();
- swim_test_payload_refutation();
swim_test_indirect_ping();
swim_test_encryption();
swim_test_slow_net();
@@ -1287,32 +1143,6 @@ main_f(va_list ap)
int
main()
{
- random_init();
- time_t seed = time(NULL);
- srand(seed);
- memory_init();
- fiber_init(fiber_c_invoke);
- int fd = open("log.txt", O_TRUNC);
- if (fd != -1)
- close(fd);
- say_logger_init("log.txt", 6, 1, "plain", 0);
- /*
- * Print the seed to be able to reproduce a bug with the
- * same seed.
- */
- say_info("Random seed = %llu", (unsigned long long) seed);
-
- struct fiber *main_fiber = fiber_new("main", main_f);
- fiber_set_joinable(main_fiber, true);
- assert(main_fiber != NULL);
- fiber_wakeup(main_fiber);
- ev_run(loop(), 0);
- fiber_join(main_fiber);
-
- say_logger_free();
- fiber_free();
- memory_free();
- random_free();
-
+ swim_run_test("swim.txt", main_f);
return test_result;
}
\ No newline at end of file
diff --git a/test/unit/swim.result b/test/unit/swim.result
index 4d5ec0f3f..741ab80ef 100644
--- a/test/unit/swim.result
+++ b/test/unit/swim.result
@@ -1,5 +1,5 @@
*** main_f ***
-1..24
+1..23
*** swim_test_one_link ***
1..6
ok 1 - no rounds - no fullmesh
@@ -169,38 +169,23 @@ ok 15 - subtests
ok 11 - third payload is disseminated via anti-entropy
ok 16 - subtests
*** swim_test_payload_basic: done ***
- *** swim_test_payload_refutation ***
- 1..11
- ok 1 - S2 sees new version of S1
- ok 2 - S3 does the same
- ok 3 - but S2 does not known the new payload
- ok 4 - as well as S3
- ok 5 - S2 learned S1's payload via anti-entropy
- ok 6 - version still is the same
- ok 7 - S3 was blocked and does not know anything
- ok 8 - version still is the same
- ok 9 - S2 keeps the same new S1's payload, S3 did not rewrite it
- ok 10 - S3 still does not know anything
- ok 11 - S3 learns S1's payload from S2
-ok 17 - subtests
- *** swim_test_payload_refutation: done ***
*** swim_test_indirect_ping ***
1..2
ok 1 - S1 is still alive everywhere
ok 2 - as well as S2 - they communicated via S3
-ok 18 - subtests
+ok 17 - subtests
*** swim_test_indirect_ping: done ***
*** swim_test_encryption ***
1..3
ok 1 - cluster works with encryption
ok 2 - different encryption keys - can't interact
ok 3 - cluster works after encryption has been disabled
-ok 19 - subtests
+ok 18 - subtests
*** swim_test_encryption: done ***
*** swim_test_slow_net ***
1..0
# slow network leads to idle round steps, they should not produce a new message
-ok 20 - subtests
+ok 19 - subtests
*** swim_test_slow_net: done ***
*** swim_test_triggers ***
1..23
@@ -228,25 +213,25 @@ ok 20 - subtests
# now all the triggers are done and deleted
ok 22 - local URI update warns about version update
ok 23 - version is a part of incarnation, so the latter is updated too
-ok 21 - subtests
+ok 20 - subtests
*** swim_test_triggers: done ***
*** swim_test_generation ***
1..3
ok 1 - S1 disseminated its payload to S2
ok 2 - S1 restarted and set another payload. Without generation it could lead to never disseminated new payload.
ok 3 - S2 sees new generation of S1
-ok 22 - subtests
+ok 21 - subtests
*** swim_test_generation: done ***
*** swim_test_dissemination_speed ***
1..2
ok 1 - dissemination work in log time even at the very start of a cluster
ok 2 - dissemination can withstand an event storm
-ok 23 - subtests
+ok 22 - subtests
*** swim_test_dissemination_speed: done ***
*** swim_test_suspect_new_members ***
1..2
ok 1 - S2 dropped S1 as dead
ok 2 - S3 didn't add S1 from S2's messages, because S1 didn't answer on a ping
-ok 24 - subtests
+ok 23 - subtests
*** swim_test_suspect_new_members: done ***
*** main_f: done ***
diff --git a/test/unit/swim_errinj.c b/test/unit/swim_errinj.c
new file mode 100644
index 000000000..2e89f1821
--- /dev/null
+++ b/test/unit/swim_errinj.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "swim_test_utils.h"
+#include "errinj.h"
+
+/**
+ * Test result is a real returned value of main_f. Fiber_join can
+ * not be used, because it expects if a returned value < 0 then
+ * diag is not empty. But in unit tests it can be violated -
+ * check_plan() does not set diag.
+ */
+static int test_result;
+
+static void
+swim_test_payload_refutation(void)
+{
+ swim_start_test(11);
+ int size, cluster_size = 3;
+ struct swim_cluster *cluster = swim_cluster_new(cluster_size);
+ swim_cluster_set_ack_timeout(cluster, 1);
+ for (int i = 0; i < cluster_size; ++i) {
+ for (int j = i + 1; j < cluster_size; ++j)
+ swim_cluster_interconnect(cluster, i, j);
+ }
+ const char *s0_old_payload = "s0 payload";
+ int s0_old_payload_size = strlen(s0_old_payload) + 1;
+ fail_if(swim_cluster_member_set_payload(cluster, 0, s0_old_payload,
+ s0_old_payload_size) != 0);
+ fail_if(swim_cluster_wait_payload_everywhere(cluster, 0, s0_old_payload,
+ s0_old_payload_size,
+ 3) != 0);
+ /*
+ * The test checks the following case. Assume there are 3
+ * nodes: S1, S2, S3. They all know each other. S1 sets
+ * new payload, S2 and S3 knows that. They all see that S1
+ * has version 1 and payload P1.
+ *
+ * Now S1 changes payload to P2. Its version becomes
+ * 2. During next entire round its round messages are
+ * lost, however ACKs work ok. Assume also, that
+ * anti-entropy does not work. For example, if the cluster
+ * is huge, and S1 does not fit into that section.
+ */
+ const char *s0_new_payload = "s0 second payload";
+ int s0_new_payload_size = strlen(s0_new_payload);
+ fail_if(swim_cluster_member_set_payload(cluster, 0, s0_new_payload,
+ s0_new_payload_size) != 0);
+ struct errinj *errinj = &errinjs[ERRINJ_SWIM_FD_ONLY];
+ errinj->bparam = true;
+ swim_run_for(3);
+ errinj->bparam = false;
+
+ is(swim_cluster_member_incarnation(cluster, 1, 0).version, 2,
+ "S2 sees new version of S1");
+ is(swim_cluster_member_incarnation(cluster, 2, 0).version, 2,
+ "S3 does the same");
+
+ const char *tmp = swim_cluster_member_payload(cluster, 1, 0, &size);
+ ok(size == s0_old_payload_size &&
+ memcmp(tmp, s0_old_payload, size) == 0,
+ "but S2 does not known the new payload");
+
+ tmp = swim_cluster_member_payload(cluster, 2, 0, &size);
+ ok(size == s0_old_payload_size &&
+ memcmp(tmp, s0_old_payload, size) == 0,
+ "as well as S3");
+
+ /* Restore normal ACK timeout. */
+ swim_cluster_set_ack_timeout(cluster, 30);
+
+ /*
+ * Now S1's payload TTD is 0, but via ACKs S1 sent its new
+ * version to S2 and S3. Despite that they should
+ * apply new S1's payload via anti-entropy. Next lines
+ * test that:
+ *
+ * 1) S2 can apply new S1's payload from S1's
+ * anti-entropy;
+ *
+ * 2) S2 will not receive the old S1's payload from S3.
+ * S3 knows, that its payload is outdated, and should
+ * not send it;
+ *
+ * 2) S3 can apply new S1's payload from S2's
+ * anti-entropy. Note, that here S3 applies the payload
+ * not directly from the originator. It is the most
+ * complex case.
+ *
+ * Next lines test the case (1).
+ */
+
+ /* S3 does not participate in the test (1). */
+ swim_cluster_set_drop(cluster, 2, 100);
+ swim_run_for(3);
+
+ tmp = swim_cluster_member_payload(cluster, 1, 0, &size);
+ ok(size == s0_new_payload_size &&
+ memcmp(tmp, s0_new_payload, size) == 0,
+ "S2 learned S1's payload via anti-entropy");
+ is(swim_cluster_member_incarnation(cluster, 1, 0).version, 2,
+ "version still is the same");
+
+ tmp = swim_cluster_member_payload(cluster, 2, 0, &size);
+ ok(size == s0_old_payload_size &&
+ memcmp(tmp, s0_old_payload, size) == 0,
+ "S3 was blocked and does not know anything");
+ is(swim_cluster_member_incarnation(cluster, 2, 0).version, 2,
+ "version still is the same");
+
+ /* S1 will not participate in the tests further. */
+ swim_cluster_set_drop(cluster, 0, 100);
+
+ /*
+ * Now check the case (2) - S3 will not send outdated
+ * version of S1's payload. To maintain the experimental
+ * integrity S1 and S2 are silent. Only S3 sends packets.
+ */
+ swim_cluster_set_drop(cluster, 2, 0);
+ swim_cluster_set_drop_out(cluster, 1, 100);
+ swim_run_for(3);
+
+ tmp = swim_cluster_member_payload(cluster, 1, 0, &size);
+ ok(size == s0_new_payload_size &&
+ memcmp(tmp, s0_new_payload, size) == 0,
+ "S2 keeps the same new S1's payload, S3 did not rewrite it");
+
+ tmp = swim_cluster_member_payload(cluster, 2, 0, &size);
+ ok(size == s0_old_payload_size &&
+ memcmp(tmp, s0_old_payload, size) == 0,
+ "S3 still does not know anything");
+
+ /*
+ * Now check the case (3) - S3 accepts new S1's payload
+ * from S2. Even knowing the same S1's version.
+ */
+ swim_cluster_set_drop(cluster, 1, 0);
+ swim_cluster_set_drop_out(cluster, 2, 100);
+ is(swim_cluster_wait_payload_everywhere(cluster, 0, s0_new_payload,
+ s0_new_payload_size, 3), 0,
+ "S3 learns S1's payload from S2")
+
+ swim_cluster_delete(cluster);
+ swim_finish_test();
+}
+
+
+static int
+main_f(va_list ap)
+{
+ swim_start_test(1);
+
+ (void) ap;
+ swim_test_ev_init();
+ swim_test_transport_init();
+
+ swim_test_payload_refutation();
+
+ swim_test_transport_free();
+ swim_test_ev_free();
+
+ test_result = check_plan();
+ footer();
+ return 0;
+}
+
+int
+main()
+{
+ swim_run_test("swim_errinj.txt", main_f);
+ return test_result;
+}
diff --git a/test/unit/swim_errinj.result b/test/unit/swim_errinj.result
new file mode 100644
index 000000000..c60389d7f
--- /dev/null
+++ b/test/unit/swim_errinj.result
@@ -0,0 +1,18 @@
+ *** main_f ***
+1..1
+ *** swim_test_payload_refutation ***
+ 1..11
+ ok 1 - S2 sees new version of S1
+ ok 2 - S3 does the same
+ ok 3 - but S2 does not known the new payload
+ ok 4 - as well as S3
+ ok 5 - S2 learned S1's payload via anti-entropy
+ ok 6 - version still is the same
+ ok 7 - S3 was blocked and does not know anything
+ ok 8 - version still is the same
+ ok 9 - S2 keeps the same new S1's payload, S3 did not rewrite it
+ ok 10 - S3 still does not know anything
+ ok 11 - S3 learns S1's payload from S2
+ok 1 - subtests
+ *** swim_test_payload_refutation: done ***
+ *** main_f: done ***
diff --git a/test/unit/swim_test_utils.c b/test/unit/swim_test_utils.c
index c56af2233..bfefcbaf9 100644
--- a/test/unit/swim_test_utils.c
+++ b/test/unit/swim_test_utils.c
@@ -34,9 +34,11 @@
#include "swim/swim_ev.h"
#include "uuid/tt_uuid.h"
#include "trivia/util.h"
-#include "fiber.h"
#include "msgpuck.h"
#include "trigger.h"
+#include "memory.h"
+#include "random.h"
+#include <fcntl.h>
/**
* Drop rate packet filter to drop packets with a certain
@@ -61,26 +63,6 @@ swim_drop_rate_create(struct swim_drop_rate *dr, double rate, bool is_for_in,
dr->rate = rate;
}
-/**
- * Drop components packet filter to drop packets containing
- * specified SWIM components.
- */
-struct swim_drop_components {
- /** List of component body keys. */
- const int *keys;
- /** Length of @a keys. */
- int key_count;
-};
-
-/** Initialize drop components packet filter. */
-static inline void
-swim_drop_components_create(struct swim_drop_components *dc, const int *keys,
- int key_count)
-{
- dc->keys = keys;
- dc->key_count = key_count;
-}
-
/** Packet filter to drop packets with specified destinations. */
struct swim_drop_channel {
/**
@@ -164,10 +146,6 @@ struct swim_node {
* from/to a specified direction.
*/
struct swim_drop_rate drop_rate;
- /**
- * Filter to drop packets with specified SWIM components.
- */
- struct swim_drop_components drop_components;
/** Filter to drop packets with specified destinations. */
struct swim_drop_channel drop_channel;
};
@@ -230,7 +208,6 @@ swim_node_create(struct swim_node *n, int id)
(void) rc;
swim_drop_rate_create(&n->drop_rate, 0, false, false);
- swim_drop_components_create(&n->drop_components, NULL, 0);
swim_drop_channel_create(&n->drop_channel);
}
@@ -495,48 +472,6 @@ swim_cluster_set_drop_in(struct swim_cluster *cluster, int i, double value)
swim_cluster_set_drop_generic(cluster, i, value, true, false);
}
-/**
- * Check if a packet contains any of the components to filter out.
- */
-static bool
-swim_filter_drop_component(const char *data, int size, void *udata, int dir,
- int peer_fd)
-{
- (void) size;
- (void) dir;
- (void) peer_fd;
- struct swim_drop_components *dc = (struct swim_drop_components *) udata;
- /* Skip meta. */
- mp_next(&data);
- int map_size = mp_decode_map(&data);
- for (int i = 0; i < map_size; ++i) {
- int key = mp_decode_uint(&data);
- for (int j = 0; j < dc->key_count; ++j) {
- if (dc->keys[j] == key)
- return true;
- }
- /* Skip value. */
- mp_next(&data);
- }
- return false;
-}
-
-void
-swim_cluster_drop_components(struct swim_cluster *cluster, int i,
- const int *keys, int key_count)
-{
- struct swim_node *n = swim_cluster_node(cluster, i);
- int fd = swim_fd(n->swim);
- if (key_count == 0) {
- swim_test_transport_remove_filter(fd,
- swim_filter_drop_component);
- return;
- }
- swim_drop_components_create(&n->drop_components, keys, key_count);
- swim_test_transport_add_filter(fd, swim_filter_drop_component,
- &n->drop_components);
-}
-
/**
* Check if the packet sender should drop a packet outgoing to
* @a peer_fd file descriptor.
@@ -922,3 +857,34 @@ swim_error_check_match(const char *msg)
{
return strstr(diag_last_error(diag_get())->errmsg, msg) != NULL;
}
+
+void
+swim_run_test(const char *log_file, fiber_func test)
+{
+ random_init();
+ time_t seed = time(NULL);
+ srand(seed);
+ memory_init();
+ fiber_init(fiber_c_invoke);
+ int fd = open(log_file, O_TRUNC);
+ if (fd != -1)
+ close(fd);
+ say_logger_init(log_file, 6, 1, "plain", 0);
+ /*
+ * Print the seed to be able to reproduce a bug with the
+ * same seed.
+ */
+ say_info("Random seed = %llu", (unsigned long long) seed);
+
+ struct fiber *main_fiber = fiber_new("main", test);
+ fiber_set_joinable(main_fiber, true);
+ assert(main_fiber != NULL);
+ fiber_wakeup(main_fiber);
+ ev_run(loop(), 0);
+ fiber_join(main_fiber);
+
+ say_logger_free();
+ fiber_free();
+ memory_free();
+ random_free();
+}
diff --git a/test/unit/swim_test_utils.h b/test/unit/swim_test_utils.h
index 7064aa67d..ac86b6f72 100644
--- a/test/unit/swim_test_utils.h
+++ b/test/unit/swim_test_utils.h
@@ -31,7 +31,15 @@
* SUCH DAMAGE.
*/
#include <stdbool.h>
+#include "uuid/tt_uuid.h"
+#include "unit.h"
+#include "fiber.h"
+#include "uri/uri.h"
#include "swim/swim.h"
+#include "swim/swim_ev.h"
+#include "swim/swim_proto.h"
+#include "swim_test_transport.h"
+#include "swim_test_ev.h"
struct swim_cluster;
@@ -117,15 +125,6 @@ swim_cluster_set_drop_out(struct swim_cluster *cluster, int i, double value);
void
swim_cluster_set_drop_in(struct swim_cluster *cluster, int i, double value);
-/**
- * Drop all packets from/to a SWIM instance with id @a i
- * containing components specified in @a keys. Components are
- * defined by the constants in the packet body.
- */
-void
-swim_cluster_drop_components(struct swim_cluster *cluster, int i,
- const int *keys, int key_count);
-
/**
* When @a value is true, break a one direction network link
* between @a to_id and @a from_id SWIM instances. It is a pure
@@ -245,6 +244,13 @@ swim_cluster_run_triggers(struct swim_cluster *cluster);
void
swim_run_for(double duration);
+/**
+ * A helper to initialize all the necessary subsystems before a
+ * test, and free them afterwards.
+ */
+void
+swim_run_test(const char *log_file, fiber_func test);
+
#define swim_start_test(n) { \
header(); \
say_verbose("-------- SWIM start test %s --------", __func__); \
--
2.20.1 (Apple Git-117)
^ permalink raw reply [flat|nested] 12+ messages in thread
* [tarantool-patches] [PATCH 5/5] swim: speed-up empty payloads cluster bootstrap
2019-07-03 23:28 [tarantool-patches] [PATCH 0/5] SWIM loglog preparation Vladislav Shpilevoy
` (3 preceding siblings ...)
2019-07-03 23:28 ` [tarantool-patches] [PATCH 4/5] swim: speed-up tests Vladislav Shpilevoy
@ 2019-07-03 23:28 ` Vladislav Shpilevoy
2019-07-04 8:26 ` [tarantool-patches] " Konstantin Osipov
2019-07-05 22:44 ` [tarantool-patches] Re: [PATCH 0/5] SWIM loglog preparation Vladislav Shpilevoy
5 siblings, 1 reply; 12+ messages in thread
From: Vladislav Shpilevoy @ 2019-07-03 23:28 UTC (permalink / raw)
To: tarantool-patches; +Cc: kostja
One another place consuming most of the tests start up time is
useless dissemination of an empty payload, which can be skipped
in fact.
Consider a cluster of 300 nodes. Each one of them are
interconnected manually, and now a test wants to wait for a
stabilization, when there are no events. On such a cluster it
happens for ~200 round steps till there are no any single event.
This is not about big packets, or log() TTD. There may be a few
events, may be more, but when a test wants the cluster to be
clean, it needs to wait for all the events being done.
This patch abuses the fact, that empty payloads can be compared
for free, no any single memcmp. If both new and the old payload
are empty, then nothing to disseminate.
It could help in a real cluster too, if initially there are no
payloads.
Needed for #4253
---
src/lib/swim/swim.c | 13 ++++++++++++-
test/unit/swim.c | 19 ++++++-------------
2 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index e9f99015c..eda255e6f 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -728,9 +728,20 @@ swim_update_member_payload(struct swim *swim, struct swim_member *member,
return -1;
}
memcpy(new_payload, payload, payload_size);
- } else {
+ } else if (member->payload_size > 0) {
free(member->payload);
new_payload = NULL;
+ } else {
+ /*
+ * A special free-to-check case. Both new and old
+ * payloads are empty usually at a cluster
+ * startup. Skip of that useless event frees one
+ * slot in UDP packets for something more
+ * meaningful, and speeds up the tests.
+ */
+ assert(member->payload_size == 0 && payload_size == 0);
+ member->is_payload_up_to_date = true;
+ return 0;
}
member->payload = new_payload;
member->payload_size = payload_size;
diff --git a/test/unit/swim.c b/test/unit/swim.c
index 263816c0c..1371e7d26 100644
--- a/test/unit/swim.c
+++ b/test/unit/swim.c
@@ -866,7 +866,7 @@ swim_cluster_delete_f(va_list ap)
static void
swim_test_triggers(void)
{
- swim_start_test(23);
+ swim_start_test(20);
struct swim_cluster *cluster = swim_cluster_new(2);
swim_cluster_set_ack_timeout(cluster, 1);
struct trigger_ctx tctx, tctx2;
@@ -891,16 +891,9 @@ swim_test_triggers(void)
"ctx.member is set");
is(tctx.ctx.events, SWIM_EV_NEW, "ctx.events is set");
- swim_run_for(1);
- swim_cluster_run_triggers(cluster);
- is(tctx.counter, 2, "payload is delivered, trigger caught that");
- is(tctx.ctx.member, swim_cluster_member_view(cluster, 0, 1),
- "S1 got S2' payload");
- is(tctx.ctx.events, SWIM_EV_NEW_PAYLOAD, "mask says that");
-
swim_cluster_member_set_payload(cluster, 0, "123", 3);
swim_cluster_run_triggers(cluster);
- is(tctx.counter, 3, "self payload is updated");
+ is(tctx.counter, 2, "self payload is updated");
is(tctx.ctx.member, swim_self(s1), "self is set as a member");
is(tctx.ctx.events, SWIM_EV_NEW_PAYLOAD | SWIM_EV_NEW_VERSION,
"both version and payload events are presented");
@@ -909,18 +902,18 @@ swim_test_triggers(void)
fail_if(swim_cluster_wait_status(cluster, 0, 1,
MEMBER_SUSPECTED, 3) != 0);
swim_cluster_run_triggers(cluster);
- is(tctx.counter, 4, "suspicion fired a trigger");
+ is(tctx.counter, 3, "suspicion fired a trigger");
is(tctx.ctx.events, SWIM_EV_NEW_STATUS, "status suspected");
fail_if(swim_cluster_wait_status(cluster, 0, 1, MEMBER_DEAD, 3) != 0);
swim_cluster_run_triggers(cluster);
- is(tctx.counter, 5, "death fired a trigger");
+ is(tctx.counter, 4, "death fired a trigger");
is(tctx.ctx.events, SWIM_EV_NEW_STATUS, "status dead");
fail_if(swim_cluster_wait_status(cluster, 0, 1,
swim_member_status_MAX, 2) != 0);
swim_cluster_run_triggers(cluster);
- is(tctx.counter, 6, "drop fired a trigger");
+ is(tctx.counter, 5, "drop fired a trigger");
is(tctx.ctx.events, SWIM_EV_DROP, "status dropped");
is(swim_cluster_member_view(cluster, 0, 1), NULL,
"dropped member is not presented in the member table");
@@ -940,7 +933,7 @@ swim_test_triggers(void)
swim_cluster_add_link(cluster, 0, 1);
swim_cluster_run_triggers(cluster);
is(tctx2.counter, 1, "yielding trigger is fired");
- is(tctx.counter, 6, "non-yielding still is not");
+ is(tctx.counter, 5, "non-yielding still is not");
struct fiber *async_delete_fiber =
fiber_new("async delete", swim_cluster_delete_f);
--
2.20.1 (Apple Git-117)
^ permalink raw reply [flat|nested] 12+ messages in thread