[Tarantool-patches] [PATCH v1] Divide replication/mist.test.lua

Serge Petrenko sergepetrenko at tarantool.org
Wed Sep 2 11:09:30 MSK 2020


Hi! Thanks for the patch!

Please see my comments below.

17.07.2020 12:25, Alexander V. Tikhonov пишет:
> To fix flaky issues of replication/misc.test.lua the test had to be
> divided into smaller tests to be able to localize the flaky results:
>
>    misc_assert_connecting_master_twice_gh-3610.test.lua
>    misc_assert_on_server_die_gh-2991.test.lua
>    misc_assert_replica_on_applier_disconnect_gh-3510.test.lua
>    misc_crash_on_box_concurrent_update_gh-3606.test.lua
>    misc_heartbeats_on_master_changes_gh-3160.test.lua
>    misc_no_failure_on_error_reading_wal_gh-4399.test.lua
>    misc_no_panic_on_connected_gh-3637.test.lua
>    misc_no_restart_on_same_configuration_gh-3711.test.lua
>    misc_no_socket_leak_on_replica_disconnect_gh-3642.test.lua
>    misc_orphan_on_reconfiguration_error_gh-4424.test.lua
>    misc_rebootstrap_from_ro_master_gh-3111.test.lua
>    misc_replica_checks_cluster_id_gh-3704.test.lua
>    misc_return_on_quorum_0_gh-3760.test.lua
>    misc_value_not_replicated_on_iproto_request_gh-3247.test.lua

We use a different naming scheme for regression tests.
It should be `gh-xxxx-bug-description`, also notice the dashes
between words instead of the underscores.

>
> Needed for #4940
> ---
>
> Github: https://github.com/tarantool/tarantool/tree/avtikhon/gh-4940-replication-misc
> Issue: https://github.com/tarantool/tarantool/issues/4940
>
>   test/replication/misc.result                  | 866 ------------------
>   test/replication/misc.test.lua                | 356 -------
>   ...ert_connecting_master_twice_gh-3610.result |  86 ++
>   ...t_connecting_master_twice_gh-3610.test.lua |  34 +
>   .../misc_assert_on_server_die_gh-2991.result  |  31 +
>   ...misc_assert_on_server_die_gh-2991.test.lua |  12 +
>   ...plica_on_applier_disconnect_gh-3510.result |  49 +
>   ...ica_on_applier_disconnect_gh-3510.test.lua |  17 +
>   ...sh_on_box_concurrent_update_gh-3606.result |  50 +
>   ..._on_box_concurrent_update_gh-3606.test.lua |  19 +
>   ...eartbeats_on_master_changes_gh-3160.result |  76 ++
>   ...rtbeats_on_master_changes_gh-3160.test.lua |  40 +
>   ...ailure_on_error_reading_wal_gh-4399.result |  97 ++
>   ...lure_on_error_reading_wal_gh-4399.test.lua |  39 +
>   .../misc_no_panic_on_connected_gh-3637.result |  72 ++
>   ...isc_no_panic_on_connected_gh-3637.test.lua |  33 +
>   ...start_on_same_configuration_gh-3711.result | 107 +++
>   ...art_on_same_configuration_gh-3711.test.lua |  41 +
>   ..._leak_on_replica_disconnect_gh-3642.result |  98 ++
>   ...eak_on_replica_disconnect_gh-3642.test.lua |  44 +
>   ...an_on_reconfiguration_error_gh-4424.result |  88 ++
>   ..._on_reconfiguration_error_gh-4424.test.lua |  37 +
>   ..._rebootstrap_from_ro_master_gh-3111.result |  58 ++
>   ...ebootstrap_from_ro_master_gh-3111.test.lua |  20 +
>   ...c_replica_checks_cluster_id_gh-3704.result |  71 ++
>   ...replica_checks_cluster_id_gh-3704.test.lua |  26 +
>   .../misc_return_on_quorum_0_gh-3760.result    |  48 +
>   .../misc_return_on_quorum_0_gh-3760.test.lua  |  27 +
>   ...eplicated_on_iproto_request_gh-3247.result |  90 ++
>   ...licated_on_iproto_request_gh-3247.test.lua |  33 +
>   test/replication/suite.cfg                    |  15 +-
>   31 files changed, 1457 insertions(+), 1223 deletions(-)
>   delete mode 100644 test/replication/misc.result
>   delete mode 100644 test/replication/misc.test.lua
>   create mode 100644 test/replication/misc_assert_connecting_master_twice_gh-3610.result
>   create mode 100644 test/replication/misc_assert_connecting_master_twice_gh-3610.test.lua
>   create mode 100644 test/replication/misc_assert_on_server_die_gh-2991.result
>   create mode 100644 test/replication/misc_assert_on_server_die_gh-2991.test.lua
>   create mode 100644 test/replication/misc_assert_replica_on_applier_disconnect_gh-3510.result
>   create mode 100644 test/replication/misc_assert_replica_on_applier_disconnect_gh-3510.test.lua
>   create mode 100644 test/replication/misc_crash_on_box_concurrent_update_gh-3606.result
>   create mode 100644 test/replication/misc_crash_on_box_concurrent_update_gh-3606.test.lua
>   create mode 100644 test/replication/misc_heartbeats_on_master_changes_gh-3160.result
>   create mode 100644 test/replication/misc_heartbeats_on_master_changes_gh-3160.test.lua
>   create mode 100644 test/replication/misc_no_failure_on_error_reading_wal_gh-4399.result
>   create mode 100644 test/replication/misc_no_failure_on_error_reading_wal_gh-4399.test.lua
>   create mode 100644 test/replication/misc_no_panic_on_connected_gh-3637.result
>   create mode 100644 test/replication/misc_no_panic_on_connected_gh-3637.test.lua
>   create mode 100644 test/replication/misc_no_restart_on_same_configuration_gh-3711.result
>   create mode 100644 test/replication/misc_no_restart_on_same_configuration_gh-3711.test.lua
>   create mode 100644 test/replication/misc_no_socket_leak_on_replica_disconnect_gh-3642.result
>   create mode 100644 test/replication/misc_no_socket_leak_on_replica_disconnect_gh-3642.test.lua
>   create mode 100644 test/replication/misc_orphan_on_reconfiguration_error_gh-4424.result
>   create mode 100644 test/replication/misc_orphan_on_reconfiguration_error_gh-4424.test.lua
>   create mode 100644 test/replication/misc_rebootstrap_from_ro_master_gh-3111.result
>   create mode 100644 test/replication/misc_rebootstrap_from_ro_master_gh-3111.test.lua
>   create mode 100644 test/replication/misc_replica_checks_cluster_id_gh-3704.result
>   create mode 100644 test/replication/misc_replica_checks_cluster_id_gh-3704.test.lua
>   create mode 100644 test/replication/misc_return_on_quorum_0_gh-3760.result
>   create mode 100644 test/replication/misc_return_on_quorum_0_gh-3760.test.lua
>   create mode 100644 test/replication/misc_value_not_replicated_on_iproto_request_gh-3247.result
>   create mode 100644 test/replication/misc_value_not_replicated_on_iproto_request_gh-3247.test.lua
>
> diff --git a/test/replication/misc.result b/test/replication/misc.result
> deleted file mode 100644
> index e5d1f560e..000000000
> --- a/test/replication/misc.result
> +++ /dev/null
> @@ -1,866 +0,0 @@
> -uuid = require('uuid')
> ----
> -...
> -test_run = require('test_run').new()
> ----
> -...
> -box.schema.user.grant('guest', 'replication')
> ----
> -...
> --- gh-2991 - Tarantool asserts on box.cfg.replication update if one of
> --- servers is dead
> -replication_timeout = box.cfg.replication_timeout
> ----
> -...
> -replication_connect_timeout = box.cfg.replication_connect_timeout
> ----
> -...
> -box.cfg{replication_timeout=0.05, replication_connect_timeout=0.05, replication={}}
> ----
> -...
> -box.cfg{replication_connect_quorum=2}
> ----
> -...
> -box.cfg{replication = {'127.0.0.1:12345', box.cfg.listen}}
> ----
> -...
> -box.info.status
> ----
> -- orphan
> -...
> -box.info.ro
> ----
> -- true
> -...
> --- gh-3606 - Tarantool crashes if box.cfg.replication is updated concurrently
> -fiber = require('fiber')
> ----
> -...
> -c = fiber.channel(2)
> ----
> -...
> -f = function() fiber.create(function() pcall(box.cfg, {replication = {12345}}) c:put(true) end) end
> ----
> -...
> -f()
> ----
> -...
> -f()
> ----
> -...
> -c:get()
> ----
> -- true
> -...
> -c:get()
> ----
> -- true
> -...
> -box.cfg{replication = "", replication_timeout = replication_timeout, replication_connect_timeout = replication_connect_timeout}
> ----
> -...
> -box.info.status
> ----
> -- running
> -...
> -box.info.ro
> ----
> -- false
> -...
> --- gh-3111 - Allow to rebootstrap a replica from a read-only master
> -replica_uuid = uuid.new()
> ----
> -...
> -test_run:cmd('create server test with rpl_master=default, script="replication/replica_uuid.lua"')
> ----
> -- true
> -...
> -test_run:cmd(string.format('start server test with args="%s"', replica_uuid))
> ----
> -- true
> -...
> -test_run:cmd('stop server test')
> ----
> -- true
> -...
> -test_run:cmd('cleanup server test')
> ----
> -- true
> -...
> -box.cfg{read_only = true}
> ----
> -...
> -test_run:cmd(string.format('start server test with args="%s"', replica_uuid))
> ----
> -- true
> -...
> -test_run:cmd('stop server test')
> ----
> -- true
> -...
> -test_run:cmd('cleanup server test')
> ----
> -- true
> -...
> -box.cfg{read_only = false}
> ----
> -...
> -test_run:cmd('delete server test')
> ----
> -- true
> -...
> -test_run:cleanup_cluster()
> ----
> -...
> --- gh-3160 - Send heartbeats if there are changes from a remote master only
> -SERVERS = { 'autobootstrap1', 'autobootstrap2', 'autobootstrap3' }
> ----
> -...
> --- Deploy a cluster.
> -test_run:create_cluster(SERVERS, "replication", {args="0.03"})
> ----
> -...
> -test_run:wait_fullmesh(SERVERS)
> ----
> -...
> -test_run:cmd("switch autobootstrap3")
> ----
> -- true
> -...
> -test_run = require('test_run').new()
> ----
> -...
> -fiber = require('fiber')
> ----
> -...
> -_ = box.schema.space.create('test_timeout'):create_index('pk')
> ----
> -...
> -test_run:cmd("setopt delimiter ';'")
> ----
> -- true
> -...
> -function wait_not_follow(replicaA, replicaB)
> -    return test_run:wait_cond(function()
> -        return replicaA.status ~= 'follow' or replicaB.status ~= 'follow'
> -    end, box.cfg.replication_timeout)
> -end;
> ----
> -...
> -function test_timeout()
> -    local replicaA = box.info.replication[1].upstream or box.info.replication[2].upstream
> -    local replicaB = box.info.replication[3].upstream or box.info.replication[2].upstream
> -    local follows = test_run:wait_cond(function()
> -        return replicaA.status == 'follow' or replicaB.status == 'follow'
> -    end)
> -    if not follows then error('replicas are not in the follow status') end
> -    for i = 0, 99 do
> -        box.space.test_timeout:replace({1})
> -        if wait_not_follow(replicaA, replicaB) then
> -            return error(box.info.replication)
> -        end
> -    end
> -    return true
> -end;
> ----
> -...
> -test_run:cmd("setopt delimiter ''");
> ----
> -- true
> -...
> -test_timeout()
> ----
> -- true
> -...
> --- gh-3247 - Sequence-generated value is not replicated in case
> --- the request was sent via iproto.
> -test_run:cmd("switch autobootstrap1")
> ----
> -- true
> -...
> -net_box = require('net.box')
> ----
> -...
> -_ = box.schema.space.create('space1')
> ----
> -...
> -_ = box.schema.sequence.create('seq')
> ----
> -...
> -_ = box.space.space1:create_index('primary', {sequence = true} )
> ----
> -...
> -_ = box.space.space1:create_index('secondary', {parts = {2, 'unsigned'}})
> ----
> -...
> -box.schema.user.grant('guest', 'read,write', 'space', 'space1')
> ----
> -...
> -c = net_box.connect(box.cfg.listen)
> ----
> -...
> -c.space.space1:insert{box.NULL, "data"} -- fails, but bumps sequence value
> ----
> -- error: 'Tuple field 2 type does not match one required by operation: expected unsigned'
> -...
> -c.space.space1:insert{box.NULL, 1, "data"}
> ----
> -- [2, 1, 'data']
> -...
> -box.space.space1:select{}
> ----
> -- - [2, 1, 'data']
> -...
> -vclock = test_run:get_vclock("autobootstrap1")
> ----
> -...
> -vclock[0] = nil
> ----
> -...
> -_ = test_run:wait_vclock("autobootstrap2", vclock)
> ----
> -...
> -test_run:cmd("switch autobootstrap2")
> ----
> -- true
> -...
> -box.space.space1:select{}
> ----
> -- - [2, 1, 'data']
> -...
> -test_run:cmd("switch autobootstrap1")
> ----
> -- true
> -...
> -box.space.space1:drop()
> ----
> -...
> -test_run:cmd("switch default")
> ----
> -- true
> -...
> -test_run:drop_cluster(SERVERS)
> ----
> -...
> -test_run:cleanup_cluster()
> ----
> -...
> --- gh-3642 - Check that socket file descriptor doesn't leak
> --- when a replica is disconnected.
> -rlimit = require('rlimit')
> ----
> -...
> -lim = rlimit.limit()
> ----
> -...
> -rlimit.getrlimit(rlimit.RLIMIT_NOFILE, lim)
> ----
> -...
> -old_fno = lim.rlim_cur
> ----
> -...
> -lim.rlim_cur = 64
> ----
> -...
> -rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
> ----
> -...
> -test_run:cmd('create server sock with rpl_master=default, script="replication/replica.lua"')
> ----
> -- true
> -...
> -test_run:cmd('start server sock')
> ----
> -- true
> -...
> -test_run:cmd('switch sock')
> ----
> -- true
> -...
> -test_run = require('test_run').new()
> ----
> -...
> -fiber = require('fiber')
> ----
> -...
> -test_run:cmd("setopt delimiter ';'")
> ----
> -- true
> -...
> -for i = 1, 64 do
> -    local replication = box.cfg.replication
> -    box.cfg{replication = {}}
> -    box.cfg{replication = replication}
> -    while box.info.replication[1].upstream.status ~= 'follow' do
> -        fiber.sleep(0.001)
> -    end
> -end;
> ----
> -...
> -test_run:cmd("setopt delimiter ''");
> ----
> -- true
> -...
> -box.info.replication[1].upstream.status
> ----
> -- follow
> -...
> -test_run:cmd('switch default')
> ----
> -- true
> -...
> -lim.rlim_cur = old_fno
> ----
> -...
> -rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
> ----
> -...
> -test_run:cmd("stop server sock")
> ----
> -- true
> -...
> -test_run:cmd("cleanup server sock")
> ----
> -- true
> -...
> -test_run:cmd("delete server sock")
> ----
> -- true
> -...
> -test_run:cleanup_cluster()
> ----
> -...
> -box.schema.user.revoke('guest', 'replication')
> ----
> -...
> --- gh-3510 assertion failure in replica_on_applier_disconnect()
> -test_run:cmd('create server er_load1 with script="replication/er_load1.lua"')
> ----
> -- true
> -...
> -test_run:cmd('create server er_load2 with script="replication/er_load2.lua"')
> ----
> -- true
> -...
> -test_run:cmd('start server er_load1 with wait=False, wait_load=False')
> ----
> -- true
> -...
> --- Instance er_load2 will fail with error ER_REPLICASET_UUID_MISMATCH.
> --- This is OK since we only test here that er_load1 doesn't assert.
> -test_run:cmd('start server er_load2 with wait=True, wait_load=True, crash_expected = True')
> ----
> -- false
> -...
> -test_run:cmd('stop server er_load1')
> ----
> -- true
> -...
> --- er_load2 exits automatically.
> -test_run:cmd('cleanup server er_load1')
> ----
> -- true
> -...
> -test_run:cmd('cleanup server er_load2')
> ----
> -- true
> -...
> -test_run:cmd('delete server er_load1')
> ----
> -- true
> -...
> -test_run:cmd('delete server er_load2')
> ----
> -- true
> -...
> -test_run:cleanup_cluster()
> ----
> -...
> ---
> --- Test case for gh-3637, gh-4550. Before the fix replica would
> --- exit with an error if a user does not exist or a password is
> --- incorrect. Now check that we don't hang/panic and successfully
> --- connect.
> ---
> -fiber = require('fiber')
> ----
> -...
> -test_run:cmd("create server replica_auth with rpl_master=default, script='replication/replica_auth.lua'")
> ----
> -- true
> -...
> -test_run:cmd("start server replica_auth with wait=False, wait_load=False, args='cluster:pass 0.05'")
> ----
> -- true
> -...
> --- Wait a bit to make sure replica waits till user is created.
> -fiber.sleep(0.1)
> ----
> -...
> -box.schema.user.create('cluster')
> ----
> -...
> --- The user is created. Let the replica fail auth request due to
> --- a wrong password.
> -fiber.sleep(0.1)
> ----
> -...
> -box.schema.user.passwd('cluster', 'pass')
> ----
> -...
> -box.schema.user.grant('cluster', 'replication')
> ----
> -...
> -while box.info.replication[2] == nil do fiber.sleep(0.01) end
> ----
> -...
> -vclock = test_run:get_vclock('default')
> ----
> -...
> -vclock[0] = nil
> ----
> -...
> -_ = test_run:wait_vclock('replica_auth', vclock)
> ----
> -...
> -test_run:cmd("stop server replica_auth")
> ----
> -- true
> -...
> -test_run:cmd("cleanup server replica_auth")
> ----
> -- true
> -...
> -test_run:cmd("delete server replica_auth")
> ----
> -- true
> -...
> -test_run:cleanup_cluster()
> ----
> -...
> -box.schema.user.drop('cluster')
> ----
> -...
> ---
> --- Test case for gh-3610. Before the fix replica would fail with the assertion
> --- when trying to connect to the same master twice.
> ---
> -box.schema.user.grant('guest', 'replication')
> ----
> -...
> -test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> ----
> -- true
> -...
> -test_run:cmd("start server replica")
> ----
> -- true
> -...
> -test_run:cmd("switch replica")
> ----
> -- true
> -...
> -replication = box.cfg.replication[1]
> ----
> -...
> -box.cfg{replication = {replication, replication}}
> ----
> -- error: 'Incorrect value for option ''replication'': duplicate connection to the
> -    same replica'
> -...
> --- Check the case when duplicate connection is detected in the background.
> -test_run:cmd("switch default")
> ----
> -- true
> -...
> -listen = box.cfg.listen
> ----
> -...
> -box.cfg{listen = ''}
> ----
> -...
> -test_run:cmd("switch replica")
> ----
> -- true
> -...
> -box.cfg{replication_connect_quorum = 0, replication_connect_timeout = 0.01}
> ----
> -...
> -box.cfg{replication = {replication, replication}}
> ----
> -...
> -test_run:cmd("switch default")
> ----
> -- true
> -...
> -box.cfg{listen = listen}
> ----
> -...
> -while test_run:grep_log('replica', 'duplicate connection') == nil do fiber.sleep(0.01) end
> ----
> -...
> -test_run:cmd("stop server replica")
> ----
> -- true
> -...
> -test_run:cmd("cleanup server replica")
> ----
> -- true
> -...
> -test_run:cmd("delete server replica")
> ----
> -- true
> -...
> -test_run:cleanup_cluster()
> ----
> -...
> -box.schema.user.revoke('guest', 'replication')
> ----
> -...
> ---
> --- gh-3711 Do not restart replication on box.cfg in case the
> --- configuration didn't change.
> ---
> -box.schema.user.grant('guest', 'replication')
> ----
> -...
> -test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> ----
> -- true
> -...
> -test_run:cmd("start server replica")
> ----
> -- true
> -...
> --- Access rights are checked only during reconnect. If the new
> --- and old configurations are equivalent, no reconnect will be
> --- issued and replication should continue working.
> -box.schema.user.revoke('guest', 'replication')
> ----
> -...
> -test_run:cmd("switch replica")
> ----
> -- true
> -...
> -replication = box.cfg.replication[1]
> ----
> -...
> -box.cfg{replication = {replication}}
> ----
> -...
> -box.info.status == 'running'
> ----
> -- true
> -...
> -box.cfg{replication = replication}
> ----
> -...
> -box.info.status == 'running'
> ----
> -- true
> -...
> --- Check that comparison of tables works as expected as well.
> -test_run:cmd("switch default")
> ----
> -- true
> -...
> -box.schema.user.grant('guest', 'replication')
> ----
> -...
> -test_run:cmd("switch replica")
> ----
> -- true
> -...
> -replication = box.cfg.replication
> ----
> -...
> -table.insert(replication, box.cfg.listen)
> ----
> -...
> -test_run:cmd("switch default")
> ----
> -- true
> -...
> -box.schema.user.revoke('guest', 'replication')
> ----
> -...
> -test_run:cmd("switch replica")
> ----
> -- true
> -...
> -box.cfg{replication = replication}
> ----
> -...
> -box.info.status == 'running'
> ----
> -- true
> -...
> -test_run:cmd("switch default")
> ----
> -- true
> -...
> -test_run:cmd("stop server replica")
> ----
> -- true
> -...
> -test_run:cmd("cleanup server replica")
> ----
> -- true
> -...
> -test_run:cmd("delete server replica")
> ----
> -- true
> -...
> -test_run:cleanup_cluster()
> ----
> -...
> ---
> --- gh-3704 move cluster id check to replica
> ---
> -test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> ----
> -- true
> -...
> -box.schema.user.grant("guest", "replication")
> ----
> -...
> -test_run:cmd("start server replica")
> ----
> -- true
> -...
> -test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
> ----
> -- null
> -...
> -box.info.replication[2].downstream.status
> ----
> -- follow
> -...
> --- change master's cluster uuid and check that replica doesn't connect.
> -test_run:cmd("stop server replica")
> ----
> -- true
> -...
> -_ = box.space._schema:replace{'cluster', tostring(uuid.new())}
> ----
> -...
> --- master believes replica is in cluster, but their cluster UUIDs differ.
> -test_run:cmd("start server replica")
> ----
> -- true
> -...
> -test_run:wait_log("replica", "REPLICASET_UUID_MISMATCH", nil, 1.0)
> ----
> -- REPLICASET_UUID_MISMATCH
> -...
> -test_run:wait_downstream(2, {status = 'stopped'})
> ----
> -- true
> -...
> -test_run:cmd("stop server replica")
> ----
> -- true
> -...
> -test_run:cmd("cleanup server replica")
> ----
> -- true
> -...
> -test_run:cmd("delete server replica")
> ----
> -- true
> -...
> -test_run:cleanup_cluster()
> ----
> -...
> -box.schema.user.revoke('guest', 'replication')
> ----
> -...
> ---
> --- gh-4399 Check that an error reading WAL directory on subscribe
> --- doesn't lead to a permanent replication failure.
> ---
> -box.schema.user.grant("guest", "replication")
> ----
> -...
> -test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> ----
> -- true
> -...
> -test_run:cmd("start server replica")
> ----
> -- true
> -...
> --- Make the WAL directory inaccessible.
> -fio = require('fio')
> ----
> -...
> -path = fio.abspath(box.cfg.wal_dir)
> ----
> -...
> -fio.chmod(path, 0)
> ----
> -- true
> -...
> --- Break replication on timeout.
> -replication_timeout = box.cfg.replication_timeout
> ----
> -...
> -box.cfg{replication_timeout = 9000}
> ----
> -...
> -test_run:cmd("switch replica")
> ----
> -- true
> -...
> -test_run:wait_cond(function() return box.info.replication[1].upstream.status ~= 'follow' end)
> ----
> -- true
> -...
> -require('fiber').sleep(box.cfg.replication_timeout)
> ----
> -...
> -test_run:cmd("switch default")
> ----
> -- true
> -...
> -box.cfg{replication_timeout = replication_timeout}
> ----
> -...
> --- Restore access to the WAL directory.
> --- Wait for replication to be reestablished.
> -fio.chmod(path, tonumber('777', 8))
> ----
> -- true
> -...
> -test_run:cmd("switch replica")
> ----
> -- true
> -...
> -test_run:wait_cond(function() return box.info.replication[1].upstream.status == 'follow' end)
> ----
> -- true
> -...
> -test_run:cmd("switch default")
> ----
> -- true
> -...
> -test_run:cmd("stop server replica")
> ----
> -- true
> -...
> -test_run:cmd("cleanup server replica")
> ----
> -- true
> -...
> -test_run:cmd("delete server replica")
> ----
> -- true
> -...
> -test_run:cleanup_cluster()
> ----
> -...
> -box.schema.user.revoke('guest', 'replication')
> ----
> -...
> ---
> --- gh-4424 Always enter orphan mode on error in replication
> --- configuration change.
> ---
> -replication_connect_timeout = box.cfg.replication_connect_timeout
> ----
> -...
> -replication_connect_quorum = box.cfg.replication_connect_quorum
> ----
> -...
> -box.cfg{replication="12345", replication_connect_timeout=0.1, replication_connect_quorum=1}
> ----
> -...
> -box.info.status
> ----
> -- orphan
> -...
> -box.info.ro
> ----
> -- true
> -...
> --- reset replication => leave orphan mode
> -box.cfg{replication=""}
> ----
> -...
> -box.info.status
> ----
> -- running
> -...
> -box.info.ro
> ----
> -- false
> -...
> --- no switch to orphan when quorum == 0
> -box.cfg{replication="12345", replication_connect_quorum=0}
> ----
> -...
> -box.info.status
> ----
> -- running
> -...
> -box.info.ro
> ----
> -- false
> -...
> --- we could connect to one out of two replicas. Set orphan.
> -box.cfg{replication_connect_quorum=2}
> ----
> -...
> -box.cfg{replication={box.cfg.listen, "12345"}}
> ----
> -...
> -box.info.status
> ----
> -- orphan
> -...
> -box.info.ro
> ----
> -- true
> -...
> --- lower quorum => leave orphan mode
> -box.cfg{replication_connect_quorum=1}
> ----
> -...
> -box.info.status
> ----
> -- running
> -...
> -box.info.ro
> ----
> -- false
> -...
> ---
> --- gh-3760: replication quorum 0 on reconfiguration should return
> --- from box.cfg immediately.
> ---
> -replication = box.cfg.replication
> ----
> -...
> -box.cfg{                                                        \
> -    replication = {},                                           \
> -    replication_connect_quorum = 0,                             \
> -    replication_connect_timeout = 1000000                       \
> -}
> ----
> -...
> --- The call below would hang, if quorum 0 is ignored, or checked
> --- too late.
> -box.cfg{replication = {'localhost:12345'}}
> ----
> -...
> -box.info.status
> ----
> -- running
> -...
> -box.cfg{                                                        \
> -    replication = {},                                           \
> -    replication_connect_quorum = replication_connect_quorum,    \
> -    replication_connect_timeout = replication_connect_timeout   \
> -}
> ----
> -...
> diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua
> deleted file mode 100644
> index d285b014a..000000000
> --- a/test/replication/misc.test.lua
> +++ /dev/null
> @@ -1,356 +0,0 @@
> -uuid = require('uuid')
> -test_run = require('test_run').new()
> -
> -box.schema.user.grant('guest', 'replication')
> -
> --- gh-2991 - Tarantool asserts on box.cfg.replication update if one of
> --- servers is dead
> -replication_timeout = box.cfg.replication_timeout
> -replication_connect_timeout = box.cfg.replication_connect_timeout
> -box.cfg{replication_timeout=0.05, replication_connect_timeout=0.05, replication={}}
> -box.cfg{replication_connect_quorum=2}
> -box.cfg{replication = {'127.0.0.1:12345', box.cfg.listen}}
> -box.info.status
> -box.info.ro
> -
> --- gh-3606 - Tarantool crashes if box.cfg.replication is updated concurrently
> -fiber = require('fiber')
> -c = fiber.channel(2)
> -f = function() fiber.create(function() pcall(box.cfg, {replication = {12345}}) c:put(true) end) end
> -f()
> -f()
> -c:get()
> -c:get()
> -
> -box.cfg{replication = "", replication_timeout = replication_timeout, replication_connect_timeout = replication_connect_timeout}
> -box.info.status
> -box.info.ro
> -
> --- gh-3111 - Allow to rebootstrap a replica from a read-only master
> -replica_uuid = uuid.new()
> -test_run:cmd('create server test with rpl_master=default, script="replication/replica_uuid.lua"')
> -test_run:cmd(string.format('start server test with args="%s"', replica_uuid))
> -test_run:cmd('stop server test')
> -test_run:cmd('cleanup server test')
> -box.cfg{read_only = true}
> -test_run:cmd(string.format('start server test with args="%s"', replica_uuid))
> -test_run:cmd('stop server test')
> -test_run:cmd('cleanup server test')
> -box.cfg{read_only = false}
> -test_run:cmd('delete server test')
> -test_run:cleanup_cluster()
> -
> --- gh-3160 - Send heartbeats if there are changes from a remote master only
> -SERVERS = { 'autobootstrap1', 'autobootstrap2', 'autobootstrap3' }
> -
> --- Deploy a cluster.
> -test_run:create_cluster(SERVERS, "replication", {args="0.03"})
> -test_run:wait_fullmesh(SERVERS)
> -test_run:cmd("switch autobootstrap3")
> -test_run = require('test_run').new()
> -fiber = require('fiber')
> -_ = box.schema.space.create('test_timeout'):create_index('pk')
> -test_run:cmd("setopt delimiter ';'")
> -function wait_not_follow(replicaA, replicaB)
> -    return test_run:wait_cond(function()
> -        return replicaA.status ~= 'follow' or replicaB.status ~= 'follow'
> -    end, box.cfg.replication_timeout)
> -end;
> -function test_timeout()
> -    local replicaA = box.info.replication[1].upstream or box.info.replication[2].upstream
> -    local replicaB = box.info.replication[3].upstream or box.info.replication[2].upstream
> -    local follows = test_run:wait_cond(function()
> -        return replicaA.status == 'follow' or replicaB.status == 'follow'
> -    end)
> -    if not follows then error('replicas are not in the follow status') end
> -    for i = 0, 99 do
> -        box.space.test_timeout:replace({1})
> -        if wait_not_follow(replicaA, replicaB) then
> -            return error(box.info.replication)
> -        end
> -    end
> -    return true
> -end;
> -test_run:cmd("setopt delimiter ''");
> -test_timeout()
> -
> --- gh-3247 - Sequence-generated value is not replicated in case
> --- the request was sent via iproto.
> -test_run:cmd("switch autobootstrap1")
> -net_box = require('net.box')
> -_ = box.schema.space.create('space1')
> -_ = box.schema.sequence.create('seq')
> -_ = box.space.space1:create_index('primary', {sequence = true} )
> -_ = box.space.space1:create_index('secondary', {parts = {2, 'unsigned'}})
> -box.schema.user.grant('guest', 'read,write', 'space', 'space1')
> -c = net_box.connect(box.cfg.listen)
> -c.space.space1:insert{box.NULL, "data"} -- fails, but bumps sequence value
> -c.space.space1:insert{box.NULL, 1, "data"}
> -box.space.space1:select{}
> -vclock = test_run:get_vclock("autobootstrap1")
> -vclock[0] = nil
> -_ = test_run:wait_vclock("autobootstrap2", vclock)
> -test_run:cmd("switch autobootstrap2")
> -box.space.space1:select{}
> -test_run:cmd("switch autobootstrap1")
> -box.space.space1:drop()
> -
> -test_run:cmd("switch default")
> -test_run:drop_cluster(SERVERS)
> -test_run:cleanup_cluster()
> -
> --- gh-3642 - Check that socket file descriptor doesn't leak
> --- when a replica is disconnected.
> -rlimit = require('rlimit')
> -lim = rlimit.limit()
> -rlimit.getrlimit(rlimit.RLIMIT_NOFILE, lim)
> -old_fno = lim.rlim_cur
> -lim.rlim_cur = 64
> -rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
> -
> -test_run:cmd('create server sock with rpl_master=default, script="replication/replica.lua"')
> -test_run:cmd('start server sock')
> -test_run:cmd('switch sock')
> -test_run = require('test_run').new()
> -fiber = require('fiber')
> -test_run:cmd("setopt delimiter ';'")
> -for i = 1, 64 do
> -    local replication = box.cfg.replication
> -    box.cfg{replication = {}}
> -    box.cfg{replication = replication}
> -    while box.info.replication[1].upstream.status ~= 'follow' do
> -        fiber.sleep(0.001)
> -    end
> -end;
> -test_run:cmd("setopt delimiter ''");
> -
> -box.info.replication[1].upstream.status
> -
> -test_run:cmd('switch default')
> -
> -lim.rlim_cur = old_fno
> -rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
> -
> -test_run:cmd("stop server sock")
> -test_run:cmd("cleanup server sock")
> -test_run:cmd("delete server sock")
> -test_run:cleanup_cluster()
> -
> -box.schema.user.revoke('guest', 'replication')
> -
> --- gh-3510 assertion failure in replica_on_applier_disconnect()
> -test_run:cmd('create server er_load1 with script="replication/er_load1.lua"')
> -test_run:cmd('create server er_load2 with script="replication/er_load2.lua"')
> -test_run:cmd('start server er_load1 with wait=False, wait_load=False')
> --- Instance er_load2 will fail with error ER_REPLICASET_UUID_MISMATCH.
> --- This is OK since we only test here that er_load1 doesn't assert.
> -test_run:cmd('start server er_load2 with wait=True, wait_load=True, crash_expected = True')
> -test_run:cmd('stop server er_load1')
> --- er_load2 exits automatically.
> -test_run:cmd('cleanup server er_load1')
> -test_run:cmd('cleanup server er_load2')
> -test_run:cmd('delete server er_load1')
> -test_run:cmd('delete server er_load2')
> -test_run:cleanup_cluster()
> -
> ---
> --- Test case for gh-3637, gh-4550. Before the fix replica would
> --- exit with an error if a user does not exist or a password is
> --- incorrect. Now check that we don't hang/panic and successfully
> --- connect.
> ---
> -fiber = require('fiber')
> -test_run:cmd("create server replica_auth with rpl_master=default, script='replication/replica_auth.lua'")
> -test_run:cmd("start server replica_auth with wait=False, wait_load=False, args='cluster:pass 0.05'")
> --- Wait a bit to make sure replica waits till user is created.
> -fiber.sleep(0.1)
> -box.schema.user.create('cluster')
> --- The user is created. Let the replica fail auth request due to
> --- a wrong password.
> -fiber.sleep(0.1)
> -box.schema.user.passwd('cluster', 'pass')
> -box.schema.user.grant('cluster', 'replication')
> -
> -while box.info.replication[2] == nil do fiber.sleep(0.01) end
> -vclock = test_run:get_vclock('default')
> -vclock[0] = nil
> -_ = test_run:wait_vclock('replica_auth', vclock)
> -
> -test_run:cmd("stop server replica_auth")
> -test_run:cmd("cleanup server replica_auth")
> -test_run:cmd("delete server replica_auth")
> -test_run:cleanup_cluster()
> -
> -box.schema.user.drop('cluster')
> -
> ---
> --- Test case for gh-3610. Before the fix replica would fail with the assertion
> --- when trying to connect to the same master twice.
> ---
> -box.schema.user.grant('guest', 'replication')
> -test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> -test_run:cmd("start server replica")
> -test_run:cmd("switch replica")
> -replication = box.cfg.replication[1]
> -box.cfg{replication = {replication, replication}}
> -
> --- Check the case when duplicate connection is detected in the background.
> -test_run:cmd("switch default")
> -listen = box.cfg.listen
> -box.cfg{listen = ''}
> -
> -test_run:cmd("switch replica")
> -box.cfg{replication_connect_quorum = 0, replication_connect_timeout = 0.01}
> -box.cfg{replication = {replication, replication}}
> -
> -test_run:cmd("switch default")
> -box.cfg{listen = listen}
> -while test_run:grep_log('replica', 'duplicate connection') == nil do fiber.sleep(0.01) end
> -
> -test_run:cmd("stop server replica")
> -test_run:cmd("cleanup server replica")
> -test_run:cmd("delete server replica")
> -test_run:cleanup_cluster()
> -box.schema.user.revoke('guest', 'replication')
> -
> ---
> --- gh-3711 Do not restart replication on box.cfg in case the
> --- configuration didn't change.
> ---
> -box.schema.user.grant('guest', 'replication')
> -test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> -test_run:cmd("start server replica")
> -
> --- Access rights are checked only during reconnect. If the new
> --- and old configurations are equivalent, no reconnect will be
> --- issued and replication should continue working.
> -box.schema.user.revoke('guest', 'replication')
> -test_run:cmd("switch replica")
> -replication = box.cfg.replication[1]
> -box.cfg{replication = {replication}}
> -box.info.status == 'running'
> -box.cfg{replication = replication}
> -box.info.status == 'running'
> -
> --- Check that comparison of tables works as expected as well.
> -test_run:cmd("switch default")
> -box.schema.user.grant('guest', 'replication')
> -test_run:cmd("switch replica")
> -replication = box.cfg.replication
> -table.insert(replication, box.cfg.listen)
> -test_run:cmd("switch default")
> -box.schema.user.revoke('guest', 'replication')
> -test_run:cmd("switch replica")
> -box.cfg{replication = replication}
> -box.info.status == 'running'
> -
> -test_run:cmd("switch default")
> -test_run:cmd("stop server replica")
> -test_run:cmd("cleanup server replica")
> -test_run:cmd("delete server replica")
> -test_run:cleanup_cluster()
> -
> ---
> --- gh-3704 move cluster id check to replica
> ---
> -test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> -box.schema.user.grant("guest", "replication")
> -test_run:cmd("start server replica")
> -test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
> -box.info.replication[2].downstream.status
> --- change master's cluster uuid and check that replica doesn't connect.
> -test_run:cmd("stop server replica")
> -_ = box.space._schema:replace{'cluster', tostring(uuid.new())}
> --- master believes replica is in cluster, but their cluster UUIDs differ.
> -test_run:cmd("start server replica")
> -test_run:wait_log("replica", "REPLICASET_UUID_MISMATCH", nil, 1.0)
> -test_run:wait_downstream(2, {status = 'stopped'})
> -
> -test_run:cmd("stop server replica")
> -test_run:cmd("cleanup server replica")
> -test_run:cmd("delete server replica")
> -test_run:cleanup_cluster()
> -box.schema.user.revoke('guest', 'replication')
> -
> ---
> --- gh-4399 Check that an error reading WAL directory on subscribe
> --- doesn't lead to a permanent replication failure.
> ---
> -box.schema.user.grant("guest", "replication")
> -test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> -test_run:cmd("start server replica")
> -
> --- Make the WAL directory inaccessible.
> -fio = require('fio')
> -path = fio.abspath(box.cfg.wal_dir)
> -fio.chmod(path, 0)
> -
> --- Break replication on timeout.
> -replication_timeout = box.cfg.replication_timeout
> -box.cfg{replication_timeout = 9000}
> -test_run:cmd("switch replica")
> -test_run:wait_cond(function() return box.info.replication[1].upstream.status ~= 'follow' end)
> -require('fiber').sleep(box.cfg.replication_timeout)
> -test_run:cmd("switch default")
> -box.cfg{replication_timeout = replication_timeout}
> -
> --- Restore access to the WAL directory.
> --- Wait for replication to be reestablished.
> -fio.chmod(path, tonumber('777', 8))
> -test_run:cmd("switch replica")
> -test_run:wait_cond(function() return box.info.replication[1].upstream.status == 'follow' end)
> -test_run:cmd("switch default")
> -
> -test_run:cmd("stop server replica")
> -test_run:cmd("cleanup server replica")
> -test_run:cmd("delete server replica")
> -test_run:cleanup_cluster()
> -box.schema.user.revoke('guest', 'replication')
> -
> ---
> --- gh-4424 Always enter orphan mode on error in replication
> --- configuration change.
> ---
> -replication_connect_timeout = box.cfg.replication_connect_timeout
> -replication_connect_quorum = box.cfg.replication_connect_quorum
> -box.cfg{replication="12345", replication_connect_timeout=0.1, replication_connect_quorum=1}
> -box.info.status
> -box.info.ro
> --- reset replication => leave orphan mode
> -box.cfg{replication=""}
> -box.info.status
> -box.info.ro
> --- no switch to orphan when quorum == 0
> -box.cfg{replication="12345", replication_connect_quorum=0}
> -box.info.status
> -box.info.ro
> -
> --- we could connect to one out of two replicas. Set orphan.
> -box.cfg{replication_connect_quorum=2}
> -box.cfg{replication={box.cfg.listen, "12345"}}
> -box.info.status
> -box.info.ro
> --- lower quorum => leave orphan mode
> -box.cfg{replication_connect_quorum=1}
> -box.info.status
> -box.info.ro
> -
> ---
> --- gh-3760: replication quorum 0 on reconfiguration should return
> --- from box.cfg immediately.
> ---
> -replication = box.cfg.replication
> -box.cfg{                                                        \
> -    replication = {},                                           \
> -    replication_connect_quorum = 0,                             \
> -    replication_connect_timeout = 1000000                       \
> -}
> --- The call below would hang, if quorum 0 is ignored, or checked
> --- too late.
> -box.cfg{replication = {'localhost:12345'}}
> -box.info.status
> -box.cfg{                                                        \
> -    replication = {},                                           \
> -    replication_connect_quorum = replication_connect_quorum,    \
> -    replication_connect_timeout = replication_connect_timeout   \
> -}
> diff --git a/test/replication/misc_assert_connecting_master_twice_gh-3610.result b/test/replication/misc_assert_connecting_master_twice_gh-3610.result
> new file mode 100644
> index 000000000..d7b7cc25b
> --- /dev/null
> +++ b/test/replication/misc_assert_connecting_master_twice_gh-3610.result
> @@ -0,0 +1,86 @@
> +test_run = require('test_run').new()
> +---
> +...
> +test_run:cmd("restart server default")
I don't  think you should restart the server here.
The test seems fine without it. Same about almost all the
testcases below.
> +uuid = require('uuid')
> +---
> +...
> +fiber = require('fiber')
> +---
> +...

UUID and fiber modules aren't used in this test, so please remove
these two extra lines. Same about almost all the testcases below.


The branch contains this extraneous change:

+replication_connect_quorum = box.cfg.replication_connect_quorum

+---
+...
+replication_connect_timeout = box.cfg.replication_connect_timeout
+---

+...

You don't need to save these options, since you only change them on a 
replica.

> +--
> +-- Test case for gh-3610. Before the fix replica would fail with the assertion
> +-- when trying to connect to the same master twice.
> +--
> +box.schema.user.grant('guest', 'replication')
> +---
> +...
> +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> +---
> +- true
> +...
> +test_run:cmd("start server replica")
> +---
> +- true
> +...
> +test_run:cmd("switch replica")
> +---
> +- true
> +...
> +replication = box.cfg.replication[1]
> +---
> +...
> +box.cfg{replication = {replication, replication}}
> +---
> +- error: 'Incorrect value for option ''replication'': duplicate connection to the
> +    same replica'
> +...
> +-- Check the case when duplicate connection is detected in the background.
> +test_run:cmd("switch default")
> +---
> +- true
> +...
> +listen = box.cfg.listen
> +---
> +...
> +box.cfg{listen = ''}
> +---
> +...
> +test_run:cmd("switch replica")
> +---
> +- true
> +...
> +box.cfg{replication_connect_quorum = 0, replication_connect_timeout = 0.01}
> +---
> +...
> +box.cfg{replication = {replication, replication}}
> +---
> +...
> +test_run:cmd("switch default")
> +---
> +- true
> +...
> +box.cfg{listen = listen}
> +---
> +...
> +while test_run:grep_log('replica', 'duplicate connection') == nil do fiber.sleep(0.01) end
> +---
> +...
> +test_run:cmd("stop server replica")
> +---
> +- true
> +...
> +test_run:cmd("cleanup server replica")
> +---
> +- true
> +...
> +test_run:cmd("delete server replica")
> +---
> +- true
> +...
> +test_run:cleanup_cluster()
> +---
> +...
> +box.schema.user.revoke('guest', 'replication')
> +---
> +...
+box.cfg{replication = "", \
+        replication_connect_quorum = replication_connect_quorum, \
+        replication_connect_timeout = replication_connect_timeout}
+---
+...

The change I spoke of above. This piece is also unneeded.

> diff --git a/test/replication/misc_assert_connecting_master_twice_gh-3610.test.lua b/test/replication/misc_assert_connecting_master_twice_gh-3610.test.lua
> new file mode 100644
> index 000000000..9b12e623b
> --- /dev/null
> +++ b/test/replication/misc_assert_connecting_master_twice_gh-3610.test.lua
> @@ -0,0 +1,34 @@
> +test_run = require('test_run').new()
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +fiber = require('fiber')
> +
> +--
> +-- Test case for gh-3610. Before the fix replica would fail with the assertion
> +-- when trying to connect to the same master twice.
> +--
> +box.schema.user.grant('guest', 'replication')
> +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> +test_run:cmd("start server replica")
> +test_run:cmd("switch replica")
> +replication = box.cfg.replication[1]
> +box.cfg{replication = {replication, replication}}
> +
> +-- Check the case when duplicate connection is detected in the background.
> +test_run:cmd("switch default")
> +listen = box.cfg.listen
> +box.cfg{listen = ''}
> +
> +test_run:cmd("switch replica")
> +box.cfg{replication_connect_quorum = 0, replication_connect_timeout = 0.01}
> +box.cfg{replication = {replication, replication}}
> +
> +test_run:cmd("switch default")
> +box.cfg{listen = listen}
> +while test_run:grep_log('replica', 'duplicate connection') == nil do fiber.sleep(0.01) end
> +
> +test_run:cmd("stop server replica")
> +test_run:cmd("cleanup server replica")
> +test_run:cmd("delete server replica")
> +test_run:cleanup_cluster()
> +box.schema.user.revoke('guest', 'replication')
> diff --git a/test/replication/misc_assert_on_server_die_gh-2991.result b/test/replication/misc_assert_on_server_die_gh-2991.result
> new file mode 100644
> index 000000000..ea9e80f6b
> --- /dev/null
> +++ b/test/replication/misc_assert_on_server_die_gh-2991.result
> @@ -0,0 +1,31 @@
> +uuid = require('uuid')
> +---
> +...
UUID module isn't used in this testcase, please remove the extra line.
> +test_run = require('test_run').new()
> +---
> +...
> +-- gh-2991 - Tarantool asserts on box.cfg.replication update if one of
> +-- servers is dead
> +replication_timeout = box.cfg.replication_timeout
> +---
> +...
> +replication_connect_timeout = box.cfg.replication_connect_timeout
> +---
> +...
> +box.cfg{replication_timeout=0.05, replication_connect_timeout=0.05, replication={}}
> +---
> +...
> +box.cfg{replication_connect_quorum=2}
> +---
> +...
> +box.cfg{replication = {'127.0.0.1:12345', box.cfg.listen}}
> +---
> +...
> +box.info.status
> +---
> +- orphan
> +...
> +box.info.ro
> +---
> +- true
> +...
> diff --git a/test/replication/misc_assert_on_server_die_gh-2991.test.lua b/test/replication/misc_assert_on_server_die_gh-2991.test.lua
> new file mode 100644
> index 000000000..adda839f7
> --- /dev/null
> +++ b/test/replication/misc_assert_on_server_die_gh-2991.test.lua
> @@ -0,0 +1,12 @@
> +uuid = require('uuid')
> +test_run = require('test_run').new()
> +
> +-- gh-2991 - Tarantool asserts on box.cfg.replication update if one of
> +-- servers is dead
> +replication_timeout = box.cfg.replication_timeout
> +replication_connect_timeout = box.cfg.replication_connect_timeout
> +box.cfg{replication_timeout=0.05, replication_connect_timeout=0.05, replication={}}
> +box.cfg{replication_connect_quorum=2}
> +box.cfg{replication = {'127.0.0.1:12345', box.cfg.listen}}
> +box.info.status
> +box.info.ro
> diff --git a/test/replication/misc_assert_replica_on_applier_disconnect_gh-3510.result b/test/replication/misc_assert_replica_on_applier_disconnect_gh-3510.result
> new file mode 100644
> index 000000000..82f5d9a23
> --- /dev/null
> +++ b/test/replication/misc_assert_replica_on_applier_disconnect_gh-3510.result
> @@ -0,0 +1,49 @@
> +uuid = require('uuid')
> +---
> +...
Same about the uuid.
> +test_run = require('test_run').new()
> +---
> +...
> +-- gh-3510 assertion failure in replica_on_applier_disconnect()
> +test_run:cmd('create server er_load1 with script="replication/er_load1.lua"')
> +---
> +- true
> +...
> +test_run:cmd('create server er_load2 with script="replication/er_load2.lua"')
> +---
> +- true
> +...
> +test_run:cmd('start server er_load1 with wait=False, wait_load=False')
> +---
> +- true
> +...
> +-- Instance er_load2 will fail with error ER_REPLICASET_UUID_MISMATCH.
> +-- This is OK since we only test here that er_load1 doesn't assert.
> +test_run:cmd('start server er_load2 with wait=True, wait_load=True, crash_expected = True')
> +---
> +- false
> +...
> +test_run:cmd('stop server er_load1')
> +---
> +- true
> +...
> +-- er_load2 exits automatically.
> +test_run:cmd('cleanup server er_load1')
> +---
> +- true
> +...
> +test_run:cmd('cleanup server er_load2')
> +---
> +- true
> +...
> +test_run:cmd('delete server er_load1')
> +---
> +- true
> +...
> +test_run:cmd('delete server er_load2')
> +---
> +- true
> +...
> +test_run:cleanup_cluster()
> +---
> +...
> diff --git a/test/replication/misc_assert_replica_on_applier_disconnect_gh-3510.test.lua b/test/replication/misc_assert_replica_on_applier_disconnect_gh-3510.test.lua
> new file mode 100644
> index 000000000..4e1d2a41e
> --- /dev/null
> +++ b/test/replication/misc_assert_replica_on_applier_disconnect_gh-3510.test.lua
> @@ -0,0 +1,17 @@
> +uuid = require('uuid')
> +test_run = require('test_run').new()
> +
> +-- gh-3510 assertion failure in replica_on_applier_disconnect()
> +test_run:cmd('create server er_load1 with script="replication/er_load1.lua"')
> +test_run:cmd('create server er_load2 with script="replication/er_load2.lua"')
> +test_run:cmd('start server er_load1 with wait=False, wait_load=False')
> +-- Instance er_load2 will fail with error ER_REPLICASET_UUID_MISMATCH.
> +-- This is OK since we only test here that er_load1 doesn't assert.
> +test_run:cmd('start server er_load2 with wait=True, wait_load=True, crash_expected = True')
> +test_run:cmd('stop server er_load1')
> +-- er_load2 exits automatically.
> +test_run:cmd('cleanup server er_load1')
> +test_run:cmd('cleanup server er_load2')
> +test_run:cmd('delete server er_load1')
> +test_run:cmd('delete server er_load2')
> +test_run:cleanup_cluster()
> diff --git a/test/replication/misc_crash_on_box_concurrent_update_gh-3606.result b/test/replication/misc_crash_on_box_concurrent_update_gh-3606.result
> new file mode 100644
> index 000000000..b43b00849
> --- /dev/null
> +++ b/test/replication/misc_crash_on_box_concurrent_update_gh-3606.result
> @@ -0,0 +1,50 @@
> +uuid = require('uuid')
> +---
> +...
Same about UUID.
> +test_run = require('test_run').new()
> +---
> +...
> +replication_timeout = box.cfg.replication_timeout
> +---
> +...
> +replication_connect_timeout = box.cfg.replication_connect_timeout
> +---
> +...
> +box.cfg{replication_timeout=0.05, replication_connect_timeout=0.05, replication={}}
> +---
> +...
> +-- gh-3606 - Tarantool crashes if box.cfg.replication is updated concurrently
> +fiber = require('fiber')
> +---
> +...
> +c = fiber.channel(2)
> +---
> +...
> +f = function() fiber.create(function() pcall(box.cfg, {replication = {12345}}) c:put(true) end) end
> +---
> +...
> +f()
> +---
> +...
> +f()
> +---
> +...
> +c:get()
> +---
> +- true
> +...
> +c:get()
> +---
> +- true
> +...
> +box.cfg{replication = "", replication_timeout = replication_timeout, replication_connect_timeout = replication_connect_timeout}
> +---
> +...
> +box.info.status
> +---
> +- running
> +...
> +box.info.ro
> +---
> +- false
> +...
> diff --git a/test/replication/misc_crash_on_box_concurrent_update_gh-3606.test.lua b/test/replication/misc_crash_on_box_concurrent_update_gh-3606.test.lua
> new file mode 100644
> index 000000000..17f9c6bc6
> --- /dev/null
> +++ b/test/replication/misc_crash_on_box_concurrent_update_gh-3606.test.lua
> @@ -0,0 +1,19 @@
> +uuid = require('uuid')
> +test_run = require('test_run').new()
> +
> +replication_timeout = box.cfg.replication_timeout
> +replication_connect_timeout = box.cfg.replication_connect_timeout
> +box.cfg{replication_timeout=0.05, replication_connect_timeout=0.05, replication={}}
> +
> +-- gh-3606 - Tarantool crashes if box.cfg.replication is updated concurrently
> +fiber = require('fiber')
> +c = fiber.channel(2)
> +f = function() fiber.create(function() pcall(box.cfg, {replication = {12345}}) c:put(true) end) end
> +f()
> +f()
> +c:get()
> +c:get()
> +
> +box.cfg{replication = "", replication_timeout = replication_timeout, replication_connect_timeout = replication_connect_timeout}
> +box.info.status
> +box.info.ro
> diff --git a/test/replication/misc_heartbeats_on_master_changes_gh-3160.result b/test/replication/misc_heartbeats_on_master_changes_gh-3160.result
> new file mode 100644
> index 000000000..cdb463614
> --- /dev/null
> +++ b/test/replication/misc_heartbeats_on_master_changes_gh-3160.result
> @@ -0,0 +1,76 @@
> +uuid = require('uuid')
> +---
> +...

Same about UUID.

> +test_run = require('test_run').new()
> +---
> +...

The branch contains this change:

+...
+replication_timeout = box.cfg.replication_timeout
+---
+...
+replication_connect_timeout = box.cfg.replication_connect_timeout
+---
+...
+box.cfg{replication_timeout=0.05, replication_connect_timeout=0.05, 
replication={}}
+---
+...

Which is extraneous in this test. The original testcase uses default 
timeout values,
and I don't think we should change them.

> +-- gh-3160 - Send heartbeats if there are changes from a remote master only
> +SERVERS = { 'autobootstrap1', 'autobootstrap2', 'autobootstrap3' }
> +---
> +...
> +-- Deploy a cluster.
> +test_run:create_cluster(SERVERS, "replication", {args="0.03"})
> +---
> +...
> +test_run:wait_fullmesh(SERVERS)
> +---
> +...
> +test_run:cmd("switch autobootstrap3")
> +---
> +- true
> +...
> +test_run = require('test_run').new()
> +---
> +...
> +fiber = require('fiber')
> +---
> +...
> +_ = box.schema.space.create('test_timeout'):create_index('pk')
> +---
> +...
> +test_run:cmd("setopt delimiter ';'")
> +---
> +- true
> +...
> +function wait_not_follow(replicaA, replicaB)
> +    return test_run:wait_cond(function()
> +        return replicaA.status ~= 'follow' or replicaB.status ~= 'follow'
> +    end, box.cfg.replication_timeout)
> +end;
> +---
> +...
> +function test_timeout()
> +    local replicaA = box.info.replication[1].upstream or box.info.replication[2].upstream
> +    local replicaB = box.info.replication[3].upstream or box.info.replication[2].upstream
> +    local follows = test_run:wait_cond(function()
> +        return replicaA.status == 'follow' or replicaB.status == 'follow'
> +    end)
> +    if not follows then error('replicas are not in the follow status') end
> +    for i = 0, 99 do
> +        box.space.test_timeout:replace({1})
> +        if wait_not_follow(replicaA, replicaB) then
> +            return error(box.info.replication)
> +        end
> +    end
> +    return true
> +end;
> +---
> +...
> +test_run:cmd("setopt delimiter ''");
> +---
> +- true
> +...
> +test_timeout()
> +---
> +- true
> +...
> +test_run:cmd("switch default")
> +---
> +- true
> +...
> +test_run:drop_cluster(SERVERS)
> +---
> +...
> +test_run:cleanup_cluster()

No need to call cleanup_cluster(). The 'default' instance wasn't part of 
the cluster
you ran in this test.


+...
+box.cfg{replication = "", replication_timeout = replication_timeout, \
+        replication_connect_timeout = replication_connect_timeout}
+---
+...

One more extraneous change, related to the one above.


> +---
> +...
> diff --git a/test/replication/misc_heartbeats_on_master_changes_gh-3160.test.lua b/test/replication/misc_heartbeats_on_master_changes_gh-3160.test.lua
> new file mode 100644
> index 000000000..eba8a7725
> --- /dev/null
> +++ b/test/replication/misc_heartbeats_on_master_changes_gh-3160.test.lua
> @@ -0,0 +1,40 @@
> +uuid = require('uuid')
> +test_run = require('test_run').new()
> +
> +-- gh-3160 - Send heartbeats if there are changes from a remote master only
> +SERVERS = { 'autobootstrap1', 'autobootstrap2', 'autobootstrap3' }
> +
> +-- Deploy a cluster.
> +test_run:create_cluster(SERVERS, "replication", {args="0.03"})
> +test_run:wait_fullmesh(SERVERS)
> +test_run:cmd("switch autobootstrap3")
> +test_run = require('test_run').new()
> +fiber = require('fiber')
> +_ = box.schema.space.create('test_timeout'):create_index('pk')
> +test_run:cmd("setopt delimiter ';'")
> +function wait_not_follow(replicaA, replicaB)
> +    return test_run:wait_cond(function()
> +        return replicaA.status ~= 'follow' or replicaB.status ~= 'follow'
> +    end, box.cfg.replication_timeout)
> +end;
> +function test_timeout()
> +    local replicaA = box.info.replication[1].upstream or box.info.replication[2].upstream
> +    local replicaB = box.info.replication[3].upstream or box.info.replication[2].upstream
> +    local follows = test_run:wait_cond(function()
> +        return replicaA.status == 'follow' or replicaB.status == 'follow'
> +    end)
> +    if not follows then error('replicas are not in the follow status') end
> +    for i = 0, 99 do
> +        box.space.test_timeout:replace({1})
> +        if wait_not_follow(replicaA, replicaB) then
> +            return error(box.info.replication)
> +        end
> +    end
> +    return true
> +end;
> +test_run:cmd("setopt delimiter ''");
> +test_timeout()
> +
> +test_run:cmd("switch default")
> +test_run:drop_cluster(SERVERS)
> +test_run:cleanup_cluster()
> diff --git a/test/replication/misc_no_failure_on_error_reading_wal_gh-4399.result b/test/replication/misc_no_failure_on_error_reading_wal_gh-4399.result
> new file mode 100644
> index 000000000..64a4de26c
> --- /dev/null
> +++ b/test/replication/misc_no_failure_on_error_reading_wal_gh-4399.result
> @@ -0,0 +1,97 @@
> +test_run = require('test_run').new()
> +---
> +...
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +---
> +...
> +fiber = require('fiber')
Same about restarting the server, requiring uuid and fiber. Not needed.
> +---
> +...
> +--
> +-- gh-4399 Check that an error reading WAL directory on subscribe
> +-- doesn't lead to a permanent replication failure.
> +--
> +box.schema.user.grant("guest", "replication")
> +---
> +...
> +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> +---
> +- true
> +...
> +test_run:cmd("start server replica")
> +---
> +- true
> +...
> +-- Make the WAL directory inaccessible.
> +fio = require('fio')
> +---
> +...
> +path = fio.abspath(box.cfg.wal_dir)
> +---
> +...
> +fio.chmod(path, 0)
> +---
> +- true
> +...
> +-- Break replication on timeout.
> +replication_timeout = box.cfg.replication_timeout
> +---
> +...
> +box.cfg{replication_timeout = 9000}
> +---
> +...
> +test_run:cmd("switch replica")
> +---
> +- true
> +...
> +test_run:wait_cond(function() return box.info.replication[1].upstream.status ~= 'follow' end)
> +---
> +- true
> +...
> +require('fiber').sleep(box.cfg.replication_timeout)
> +---
> +...
> +test_run:cmd("switch default")
> +---
> +- true
> +...
> +box.cfg{replication_timeout = replication_timeout}
> +---
> +...
> +-- Restore access to the WAL directory.
> +-- Wait for replication to be reestablished.
> +fio.chmod(path, tonumber('777', 8))
> +---
> +- true
> +...
> +test_run:cmd("switch replica")
> +---
> +- true
> +...
> +test_run:wait_cond(function() return box.info.replication[1].upstream.status == 'follow' end)
> +---
> +- true
> +...
> +test_run:cmd("switch default")
> +---
> +- true
> +...
> +test_run:cmd("stop server replica")
> +---
> +- true
> +...
> +test_run:cmd("cleanup server replica")
> +---
> +- true
> +...
> +test_run:cmd("delete server replica")
> +---
> +- true
> +...
> +test_run:cleanup_cluster()
> +---
> +...
> +box.schema.user.revoke('guest', 'replication')
> +---
> +...
> diff --git a/test/replication/misc_no_failure_on_error_reading_wal_gh-4399.test.lua b/test/replication/misc_no_failure_on_error_reading_wal_gh-4399.test.lua
> new file mode 100644
> index 000000000..15e19a211
> --- /dev/null
> +++ b/test/replication/misc_no_failure_on_error_reading_wal_gh-4399.test.lua
> @@ -0,0 +1,39 @@
> +test_run = require('test_run').new()
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +fiber = require('fiber')
> +
> +--
> +-- gh-4399 Check that an error reading WAL directory on subscribe
> +-- doesn't lead to a permanent replication failure.
> +--
> +box.schema.user.grant("guest", "replication")
> +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> +test_run:cmd("start server replica")
> +
> +-- Make the WAL directory inaccessible.
> +fio = require('fio')
> +path = fio.abspath(box.cfg.wal_dir)
> +fio.chmod(path, 0)
> +
> +-- Break replication on timeout.
> +replication_timeout = box.cfg.replication_timeout
> +box.cfg{replication_timeout = 9000}
> +test_run:cmd("switch replica")
> +test_run:wait_cond(function() return box.info.replication[1].upstream.status ~= 'follow' end)
> +require('fiber').sleep(box.cfg.replication_timeout)
> +test_run:cmd("switch default")
> +box.cfg{replication_timeout = replication_timeout}
> +
> +-- Restore access to the WAL directory.
> +-- Wait for replication to be reestablished.
> +fio.chmod(path, tonumber('777', 8))
> +test_run:cmd("switch replica")
> +test_run:wait_cond(function() return box.info.replication[1].upstream.status == 'follow' end)
> +test_run:cmd("switch default")
> +
> +test_run:cmd("stop server replica")
> +test_run:cmd("cleanup server replica")
> +test_run:cmd("delete server replica")
> +test_run:cleanup_cluster()
> +box.schema.user.revoke('guest', 'replication')
> diff --git a/test/replication/misc_no_panic_on_connected_gh-3637.result b/test/replication/misc_no_panic_on_connected_gh-3637.result
> new file mode 100644
> index 000000000..693b18c42
> --- /dev/null
> +++ b/test/replication/misc_no_panic_on_connected_gh-3637.result
> @@ -0,0 +1,72 @@
> +test_run = require('test_run').new()
> +---
> +...
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +---
> +...
Again, no need for uuid here. And no need for restart.
> +--
> +-- Test case for gh-3637, gh-4550. Before the fix replica would
> +-- exit with an error if a user does not exist or a password is
> +-- incorrect. Now check that we don't hang/panic and successfully
> +-- connect.
> +--
> +fiber = require('fiber')
> +---
> +...
> +test_run:cmd("create server replica_auth with rpl_master=default, script='replication/replica_auth.lua'")
> +---
> +- true
> +...
> +test_run:cmd("start server replica_auth with wait=False, wait_load=False, args='cluster:pass 0.05'")
> +---
> +- true
> +...
> +-- Wait a bit to make sure replica waits till user is created.
> +fiber.sleep(0.1)
> +---
> +...
> +box.schema.user.create('cluster')
> +---
> +...
> +-- The user is created. Let the replica fail auth request due to
> +-- a wrong password.
> +fiber.sleep(0.1)
> +---
> +...
> +box.schema.user.passwd('cluster', 'pass')
> +---
> +...
> +box.schema.user.grant('cluster', 'replication')
> +---
> +...
> +while box.info.replication[2] == nil do fiber.sleep(0.01) end
> +---
> +...
> +vclock = test_run:get_vclock('default')
> +---
> +...
> +vclock[0] = nil
> +---
> +...
> +_ = test_run:wait_vclock('replica_auth', vclock)
> +---
> +...
> +test_run:cmd("stop server replica_auth")
> +---
> +- true
> +...
> +test_run:cmd("cleanup server replica_auth")
> +---
> +- true
> +...
> +test_run:cmd("delete server replica_auth")
> +---
> +- true
> +...
> +test_run:cleanup_cluster()
> +---
> +...
> +box.schema.user.drop('cluster')
> +---
> +...
> diff --git a/test/replication/misc_no_panic_on_connected_gh-3637.test.lua b/test/replication/misc_no_panic_on_connected_gh-3637.test.lua
> new file mode 100644
> index 000000000..a1e51198b
> --- /dev/null
> +++ b/test/replication/misc_no_panic_on_connected_gh-3637.test.lua
> @@ -0,0 +1,33 @@
> +test_run = require('test_run').new()
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +
> +--
> +-- Test case for gh-3637, gh-4550. Before the fix replica would
> +-- exit with an error if a user does not exist or a password is
> +-- incorrect. Now check that we don't hang/panic and successfully
> +-- connect.
> +--
> +fiber = require('fiber')
> +test_run:cmd("create server replica_auth with rpl_master=default, script='replication/replica_auth.lua'")
> +test_run:cmd("start server replica_auth with wait=False, wait_load=False, args='cluster:pass 0.05'")
> +-- Wait a bit to make sure replica waits till user is created.
> +fiber.sleep(0.1)
> +box.schema.user.create('cluster')
> +-- The user is created. Let the replica fail auth request due to
> +-- a wrong password.
> +fiber.sleep(0.1)
> +box.schema.user.passwd('cluster', 'pass')
> +box.schema.user.grant('cluster', 'replication')
> +
> +while box.info.replication[2] == nil do fiber.sleep(0.01) end
> +vclock = test_run:get_vclock('default')
> +vclock[0] = nil
> +_ = test_run:wait_vclock('replica_auth', vclock)
> +
> +test_run:cmd("stop server replica_auth")
> +test_run:cmd("cleanup server replica_auth")
> +test_run:cmd("delete server replica_auth")
> +test_run:cleanup_cluster()
> +
> +box.schema.user.drop('cluster')
> diff --git a/test/replication/misc_no_restart_on_same_configuration_gh-3711.result b/test/replication/misc_no_restart_on_same_configuration_gh-3711.result
> new file mode 100644
> index 000000000..dd9ef848c
> --- /dev/null
> +++ b/test/replication/misc_no_restart_on_same_configuration_gh-3711.result
> @@ -0,0 +1,107 @@
> +test_run = require('test_run').new()
> +---
> +...
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +---
> +...
> +fiber = require('fiber')
Same about restart, uuid and fiber.
> +---
> +...
> +--
> +-- gh-3711 Do not restart replication on box.cfg in case the
> +-- configuration didn't change.
> +--
> +box.schema.user.grant('guest', 'replication')
> +---
> +...
> +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> +---
> +- true
> +...
> +test_run:cmd("start server replica")
> +---
> +- true
> +...
> +-- Access rights are checked only during reconnect. If the new
> +-- and old configurations are equivalent, no reconnect will be
> +-- issued and replication should continue working.
> +box.schema.user.revoke('guest', 'replication')
> +---
> +...
> +test_run:cmd("switch replica")
> +---
> +- true
> +...
> +replication = box.cfg.replication[1]
> +---
> +...
> +box.cfg{replication = {replication}}
> +---
> +...
> +box.info.status == 'running'
> +---
> +- true
> +...
> +box.cfg{replication = replication}
> +---
> +...
> +box.info.status == 'running'
> +---
> +- true
> +...
> +-- Check that comparison of tables works as expected as well.
> +test_run:cmd("switch default")
> +---
> +- true
> +...
> +box.schema.user.grant('guest', 'replication')
> +---
> +...
> +test_run:cmd("switch replica")
> +---
> +- true
> +...
> +replication = box.cfg.replication
> +---
> +...
> +table.insert(replication, box.cfg.listen)
> +---
> +...
> +test_run:cmd("switch default")
> +---
> +- true
> +...
> +box.schema.user.revoke('guest', 'replication')
> +---
> +...
> +test_run:cmd("switch replica")
> +---
> +- true
> +...
> +box.cfg{replication = replication}
> +---
> +...
> +box.info.status == 'running'
> +---
> +- true
> +...
> +test_run:cmd("switch default")
> +---
> +- true
> +...
> +test_run:cmd("stop server replica")
> +---
> +- true
> +...
> +test_run:cmd("cleanup server replica")
> +---
> +- true
> +...
> +test_run:cmd("delete server replica")
> +---
> +- true
> +...
> +test_run:cleanup_cluster()
> +---
> +...
> diff --git a/test/replication/misc_no_restart_on_same_configuration_gh-3711.test.lua b/test/replication/misc_no_restart_on_same_configuration_gh-3711.test.lua
> new file mode 100644
> index 000000000..14b522ead
> --- /dev/null
> +++ b/test/replication/misc_no_restart_on_same_configuration_gh-3711.test.lua
> @@ -0,0 +1,41 @@
> +test_run = require('test_run').new()
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +fiber = require('fiber')
> +
> +--
> +-- gh-3711 Do not restart replication on box.cfg in case the
> +-- configuration didn't change.
> +--
> +box.schema.user.grant('guest', 'replication')
> +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> +test_run:cmd("start server replica")
> +
> +-- Access rights are checked only during reconnect. If the new
> +-- and old configurations are equivalent, no reconnect will be
> +-- issued and replication should continue working.
> +box.schema.user.revoke('guest', 'replication')
> +test_run:cmd("switch replica")
> +replication = box.cfg.replication[1]
> +box.cfg{replication = {replication}}
> +box.info.status == 'running'
> +box.cfg{replication = replication}
> +box.info.status == 'running'
> +
> +-- Check that comparison of tables works as expected as well.
> +test_run:cmd("switch default")
> +box.schema.user.grant('guest', 'replication')
> +test_run:cmd("switch replica")
> +replication = box.cfg.replication
> +table.insert(replication, box.cfg.listen)
> +test_run:cmd("switch default")
> +box.schema.user.revoke('guest', 'replication')
> +test_run:cmd("switch replica")
> +box.cfg{replication = replication}
> +box.info.status == 'running'
> +
> +test_run:cmd("switch default")
> +test_run:cmd("stop server replica")
> +test_run:cmd("cleanup server replica")
> +test_run:cmd("delete server replica")
> +test_run:cleanup_cluster()
> diff --git a/test/replication/misc_no_socket_leak_on_replica_disconnect_gh-3642.result b/test/replication/misc_no_socket_leak_on_replica_disconnect_gh-3642.result
> new file mode 100644
> index 000000000..3680bcebb
> --- /dev/null
> +++ b/test/replication/misc_no_socket_leak_on_replica_disconnect_gh-3642.result
> @@ -0,0 +1,98 @@
> +test_run = require('test_run').new()
> +---
> +...
> +test_run:cmd("restart server default")

Unneeded restart. Correct me if I'm wrong.

> +uuid = require('uuid')

You don't need the uuid module in this testcase.

> +---
> +...
> +box.schema.user.grant('guest', 'replication')
> +---
> +...
> +-- gh-3642 - Check that socket file descriptor doesn't leak
> +-- when a replica is disconnected.
> +rlimit = require('rlimit')
> +---
> +...
> +lim = rlimit.limit()
> +---
> +...
> +rlimit.getrlimit(rlimit.RLIMIT_NOFILE, lim)
> +---
> +...
> +old_fno = lim.rlim_cur
> +---
> +...
> +lim.rlim_cur = 64
> +---
> +...
> +rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
> +---
> +...
> +test_run:cmd('create server sock with rpl_master=default, script="replication/replica.lua"')
> +---
> +- true
> +...
> +test_run:cmd('start server sock')
> +---
> +- true
> +...
> +test_run:cmd('switch sock')
> +---
> +- true
> +...
> +test_run = require('test_run').new()
> +---
> +...
> +fiber = require('fiber')
> +---
> +...
> +test_run:cmd("setopt delimiter ';'")
> +---
> +- true
> +...
> +for i = 1, 64 do
> +    local replication = box.cfg.replication
> +    box.cfg{replication = {}}
> +    box.cfg{replication = replication}
> +    while box.info.replication[1].upstream.status ~= 'follow' do
> +        fiber.sleep(0.001)
> +    end
> +end;
> +---
> +...
> +test_run:cmd("setopt delimiter ''");
> +---
> +- true
> +...
> +box.info.replication[1].upstream.status
> +---
> +- follow
> +...
> +test_run:cmd('switch default')
> +---
> +- true
> +...
> +lim.rlim_cur = old_fno
> +---
> +...
> +rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
> +---
> +...
> +test_run:cmd("stop server sock")
> +---
> +- true
> +...
> +test_run:cmd("cleanup server sock")
> +---
> +- true
> +...
> +test_run:cmd("delete server sock")
> +---
> +- true
> +...
> +test_run:cleanup_cluster()
> +---
> +...
> +box.schema.user.revoke('guest', 'replication')
> +---
> +...
> diff --git a/test/replication/misc_no_socket_leak_on_replica_disconnect_gh-3642.test.lua b/test/replication/misc_no_socket_leak_on_replica_disconnect_gh-3642.test.lua
> new file mode 100644
> index 000000000..08ef9ec0d
> --- /dev/null
> +++ b/test/replication/misc_no_socket_leak_on_replica_disconnect_gh-3642.test.lua
> @@ -0,0 +1,44 @@
> +test_run = require('test_run').new()
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +
> +box.schema.user.grant('guest', 'replication')
> +
> +-- gh-3642 - Check that socket file descriptor doesn't leak
> +-- when a replica is disconnected.
> +rlimit = require('rlimit')
> +lim = rlimit.limit()
> +rlimit.getrlimit(rlimit.RLIMIT_NOFILE, lim)
> +old_fno = lim.rlim_cur
> +lim.rlim_cur = 64
> +rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
> +
> +test_run:cmd('create server sock with rpl_master=default, script="replication/replica.lua"')
> +test_run:cmd('start server sock')
> +test_run:cmd('switch sock')
> +test_run = require('test_run').new()
> +fiber = require('fiber')
> +test_run:cmd("setopt delimiter ';'")
> +for i = 1, 64 do
> +    local replication = box.cfg.replication
> +    box.cfg{replication = {}}
> +    box.cfg{replication = replication}
> +    while box.info.replication[1].upstream.status ~= 'follow' do
> +        fiber.sleep(0.001)
> +    end
> +end;
> +test_run:cmd("setopt delimiter ''");
> +
> +box.info.replication[1].upstream.status
> +
> +test_run:cmd('switch default')
> +
> +lim.rlim_cur = old_fno
> +rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
> +
> +test_run:cmd("stop server sock")
> +test_run:cmd("cleanup server sock")
> +test_run:cmd("delete server sock")
> +test_run:cleanup_cluster()
> +
> +box.schema.user.revoke('guest', 'replication')
> diff --git a/test/replication/misc_orphan_on_reconfiguration_error_gh-4424.result b/test/replication/misc_orphan_on_reconfiguration_error_gh-4424.result
> new file mode 100644
> index 000000000..6a9d1148d
> --- /dev/null
> +++ b/test/replication/misc_orphan_on_reconfiguration_error_gh-4424.result
> @@ -0,0 +1,88 @@
> +uuid = require('uuid')
> +---
> +...
> +test_run = require('test_run').new()
> +---
> +...
> +fiber = require('fiber')

UUID and fiber aren't used in this testcase

> +---
> +...
> +--
> +-- gh-4424 Always enter orphan mode on error in replication
> +-- configuration change.
> +--
> +replication_connect_timeout = box.cfg.replication_connect_timeout
> +---
> +...
> +replication_connect_quorum = box.cfg.replication_connect_quorum
> +---
> +...
> +box.cfg{replication="12345", replication_connect_timeout=0.1, replication_connect_quorum=1}
> +---
> +...
> +box.info.status
> +---
> +- orphan
> +...
> +box.info.ro
> +---
> +- true
> +...
> +-- reset replication => leave orphan mode
> +box.cfg{replication=""}
> +---
> +...
> +box.info.status
> +---
> +- running
> +...
> +box.info.ro
> +---
> +- false
> +...
> +-- no switch to orphan when quorum == 0
> +box.cfg{replication="12345", replication_connect_quorum=0}
> +---
> +...
> +box.info.status
> +---
> +- running
> +...
> +box.info.ro
> +---
> +- false
> +...
> +-- we could connect to one out of two replicas. Set orphan.
> +box.cfg{replication_connect_quorum=2}
> +---
> +...
> +box.cfg{replication={box.cfg.listen, "12345"}}
> +---
> +...
> +box.info.status
> +---
> +- orphan
> +...
> +box.info.ro
> +---
> +- true
> +...
> +-- lower quorum => leave orphan mode
> +box.cfg{replication_connect_quorum=1}
> +---
> +...
> +box.info.status
> +---
> +- running
> +...
> +box.info.ro
> +---
> +- false
> +...
> +box.cfg{                                                        \
> +    replication = {},                                           \
> +    replication_connect_quorum = replication_connect_quorum,    \
> +    replication_connect_timeout = replication_connect_timeout   \
> +}
> +---
> +...
> diff --git a/test/replication/misc_orphan_on_reconfiguration_error_gh-4424.test.lua b/test/replication/misc_orphan_on_reconfiguration_error_gh-4424.test.lua
> new file mode 100644
> index 000000000..7d1a70a36
> --- /dev/null
> +++ b/test/replication/misc_orphan_on_reconfiguration_error_gh-4424.test.lua
> @@ -0,0 +1,37 @@
> +uuid = require('uuid')
> +test_run = require('test_run').new()
> +fiber = require('fiber')
> +
> +--
> +-- gh-4424 Always enter orphan mode on error in replication
> +-- configuration change.
> +--
> +replication_connect_timeout = box.cfg.replication_connect_timeout
> +replication_connect_quorum = box.cfg.replication_connect_quorum
> +box.cfg{replication="12345", replication_connect_timeout=0.1, replication_connect_quorum=1}
> +box.info.status
> +box.info.ro
> +-- reset replication => leave orphan mode
> +box.cfg{replication=""}
> +box.info.status
> +box.info.ro
> +-- no switch to orphan when quorum == 0
> +box.cfg{replication="12345", replication_connect_quorum=0}
> +box.info.status
> +box.info.ro
> +
> +-- we could connect to one out of two replicas. Set orphan.
> +box.cfg{replication_connect_quorum=2}
> +box.cfg{replication={box.cfg.listen, "12345"}}
> +box.info.status
> +box.info.ro
> +-- lower quorum => leave orphan mode
> +box.cfg{replication_connect_quorum=1}
> +box.info.status
> +box.info.ro
> +
> +box.cfg{                                                        \
> +    replication = {},                                           \
> +    replication_connect_quorum = replication_connect_quorum,    \
> +    replication_connect_timeout = replication_connect_timeout   \
> +}
> diff --git a/test/replication/misc_rebootstrap_from_ro_master_gh-3111.result b/test/replication/misc_rebootstrap_from_ro_master_gh-3111.result
> new file mode 100644
> index 000000000..7ffca1585
> --- /dev/null
> +++ b/test/replication/misc_rebootstrap_from_ro_master_gh-3111.result
> @@ -0,0 +1,58 @@
> +test_run = require('test_run').new()
> +---
> +...
> +test_run:cmd("restart server default")

Why do you need it? Seems like the test passes just fine without the 
restart.
Restarting the server will take some time if this is executed after some 
test that
performs lots of DML operations.So You better remove the change

> +uuid = require('uuid')
> +---
> +...
> +box.schema.user.grant('guest', 'replication')
> +---
> +...
> +-- gh-3111 - Allow to rebootstrap a replica from a read-only master
> +replica_uuid = uuid.new()
> +---
> +...
> +test_run:cmd('create server test with rpl_master=default, script="replication/replica_uuid.lua"')
> +---
> +- true
> +...
> +test_run:cmd(string.format('start server test with args="%s"', replica_uuid))
> +---
> +- true
> +...
> +test_run:cmd('stop server test')
> +---
> +- true
> +...
> +test_run:cmd('cleanup server test')
> +---
> +- true
> +...
> +box.cfg{read_only = true}
> +---
> +...
> +test_run:cmd(string.format('start server test with args="%s"', replica_uuid))
> +---
> +- true
> +...
> +test_run:cmd('stop server test')
> +---
> +- true
> +...
> +test_run:cmd('cleanup server test')
> +---
> +- true
> +...
> +box.cfg{read_only = false}
> +---
> +...
> +test_run:cmd('delete server test')
> +---
> +- true
> +...
> +test_run:cleanup_cluster()
> +---
> +...
> +box.schema.user.revoke('guest', 'replication')
> +---
> +...
> diff --git a/test/replication/misc_rebootstrap_from_ro_master_gh-3111.test.lua b/test/replication/misc_rebootstrap_from_ro_master_gh-3111.test.lua
> new file mode 100644
> index 000000000..bb9b4a80f
> --- /dev/null
> +++ b/test/replication/misc_rebootstrap_from_ro_master_gh-3111.test.lua
> @@ -0,0 +1,20 @@
> +test_run = require('test_run').new()
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +
> +box.schema.user.grant('guest', 'replication')
> +
> +-- gh-3111 - Allow to rebootstrap a replica from a read-only master
> +replica_uuid = uuid.new()
> +test_run:cmd('create server test with rpl_master=default, script="replication/replica_uuid.lua"')
> +test_run:cmd(string.format('start server test with args="%s"', replica_uuid))
> +test_run:cmd('stop server test')
> +test_run:cmd('cleanup server test')
> +box.cfg{read_only = true}
> +test_run:cmd(string.format('start server test with args="%s"', replica_uuid))
> +test_run:cmd('stop server test')
> +test_run:cmd('cleanup server test')
> +box.cfg{read_only = false}
> +test_run:cmd('delete server test')
> +test_run:cleanup_cluster()
> +box.schema.user.revoke('guest', 'replication')
> diff --git a/test/replication/misc_replica_checks_cluster_id_gh-3704.result b/test/replication/misc_replica_checks_cluster_id_gh-3704.result
> new file mode 100644
> index 000000000..e6bc8b4d8
> --- /dev/null
> +++ b/test/replication/misc_replica_checks_cluster_id_gh-3704.result
> @@ -0,0 +1,71 @@
> +test_run = require('test_run').new()
> +---
> +...
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +---
> +...
> +fiber = require('fiber')

No need to restart the instance, and require fiber module.

UUID is needed this time.

> +---
> +...
> +--
> +-- gh-3704 move cluster id check to replica
> +--
> +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> +---
> +- true
> +...
> +box.schema.user.grant("guest", "replication")
> +---
> +...
> +test_run:cmd("start server replica")
> +---
> +- true
> +...
> +test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
> +---
> +- null
> +...
> +box.info.replication[2].downstream.status
> +---
> +- follow
> +...
> +-- change master's cluster uuid and check that replica doesn't connect.
> +test_run:cmd("stop server replica")
> +---
> +- true
> +...
> +_ = box.space._schema:replace{'cluster', tostring(uuid.new())}
> +---
> +...
> +-- master believes replica is in cluster, but their cluster UUIDs differ.
> +test_run:cmd("start server replica")
> +---
> +- true
> +...
> +test_run:wait_log("replica", "REPLICASET_UUID_MISMATCH", nil, 1.0)
> +---
> +- REPLICASET_UUID_MISMATCH
> +...
> +test_run:wait_downstream(2, {status = 'stopped'})
> +---
> +- true
> +...
> +test_run:cmd("stop server replica")
> +---
> +- true
> +...
> +test_run:cmd("cleanup server replica")
> +---
> +- true
> +...
> +test_run:cmd("delete server replica")
> +---
> +- true
> +...
> +test_run:cleanup_cluster()
> +---
> +...
> +box.schema.user.revoke('guest', 'replication')
> +---
> +...
> diff --git a/test/replication/misc_replica_checks_cluster_id_gh-3704.test.lua b/test/replication/misc_replica_checks_cluster_id_gh-3704.test.lua
> new file mode 100644
> index 000000000..8a23cc1fe
> --- /dev/null
> +++ b/test/replication/misc_replica_checks_cluster_id_gh-3704.test.lua
> @@ -0,0 +1,26 @@
> +test_run = require('test_run').new()
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +fiber = require('fiber')
> +
> +--
> +-- gh-3704 move cluster id check to replica
> +--
> +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
> +box.schema.user.grant("guest", "replication")
> +test_run:cmd("start server replica")
> +test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
> +box.info.replication[2].downstream.status
> +-- change master's cluster uuid and check that replica doesn't connect.
> +test_run:cmd("stop server replica")
> +_ = box.space._schema:replace{'cluster', tostring(uuid.new())}
> +-- master believes replica is in cluster, but their cluster UUIDs differ.
> +test_run:cmd("start server replica")
> +test_run:wait_log("replica", "REPLICASET_UUID_MISMATCH", nil, 1.0)
> +test_run:wait_downstream(2, {status = 'stopped'})
> +
> +test_run:cmd("stop server replica")
> +test_run:cmd("cleanup server replica")
> +test_run:cmd("delete server replica")
> +test_run:cleanup_cluster()
> +box.schema.user.revoke('guest', 'replication')
> diff --git a/test/replication/misc_return_on_quorum_0_gh-3760.result b/test/replication/misc_return_on_quorum_0_gh-3760.result
> new file mode 100644
> index 000000000..2eb622896
> --- /dev/null
> +++ b/test/replication/misc_return_on_quorum_0_gh-3760.result
> @@ -0,0 +1,48 @@
> +uuid = require('uuid')
> +---
> +...
> +test_run = require('test_run').new()
> +---
> +...
> +fiber = require('fiber')

Unneeded uuid and fiber

> +---
> +...
> +replication_connect_timeout = box.cfg.replication_connect_timeout
> +---
> +...
> +replication_connect_quorum = box.cfg.replication_connect_quorum
> +---
> +...
> +box.cfg{replication="12345", replication_connect_timeout=0.1, replication_connect_quorum=1}

This box.cfg call is extraneous. The test'll be just fine without it.

> +---
> +...
> +--
> +-- gh-3760: replication quorum 0 on reconfiguration should return
> +-- from box.cfg immediately.
> +--
> +replication = box.cfg.replication
> +---
> +...
> +box.cfg{                                                        \
> +    replication = {},                                           \
> +    replication_connect_quorum = 0,                             \
> +    replication_connect_timeout = 1000000                       \
> +}
> +---
> +...
> +-- The call below would hang, if quorum 0 is ignored, or checked
> +-- too late.
> +box.cfg{replication = {'localhost:12345'}}
> +---
> +...
> +box.info.status
> +---
> +- running
> +...
> +box.cfg{                                                        \
> +    replication = {},                                           \
> +    replication_connect_quorum = replication_connect_quorum,    \
> +    replication_connect_timeout = replication_connect_timeout   \
> +}
> +---
> +...
> diff --git a/test/replication/misc_return_on_quorum_0_gh-3760.test.lua b/test/replication/misc_return_on_quorum_0_gh-3760.test.lua
> new file mode 100644
> index 000000000..9e0651032
> --- /dev/null
> +++ b/test/replication/misc_return_on_quorum_0_gh-3760.test.lua
> @@ -0,0 +1,27 @@
> +uuid = require('uuid')
> +test_run = require('test_run').new()
> +fiber = require('fiber')
> +
> +replication_connect_timeout = box.cfg.replication_connect_timeout
> +replication_connect_quorum = box.cfg.replication_connect_quorum
> +box.cfg{replication="12345", replication_connect_timeout=0.1, replication_connect_quorum=1}
> +
> +--
> +-- gh-3760: replication quorum 0 on reconfiguration should return
> +-- from box.cfg immediately.
> +--
> +replication = box.cfg.replication
> +box.cfg{                                                        \
> +    replication = {},                                           \
> +    replication_connect_quorum = 0,                             \
> +    replication_connect_timeout = 1000000                       \
> +}
> +-- The call below would hang, if quorum 0 is ignored, or checked
> +-- too late.
> +box.cfg{replication = {'localhost:12345'}}
> +box.info.status
> +box.cfg{                                                        \
> +    replication = {},                                           \
> +    replication_connect_quorum = replication_connect_quorum,    \
> +    replication_connect_timeout = replication_connect_timeout   \
> +}
> diff --git a/test/replication/misc_value_not_replicated_on_iproto_request_gh-3247.result b/test/replication/misc_value_not_replicated_on_iproto_request_gh-3247.result
> new file mode 100644
> index 000000000..d416bd9a6
> --- /dev/null
> +++ b/test/replication/misc_value_not_replicated_on_iproto_request_gh-3247.result
> @@ -0,0 +1,90 @@
> +test_run = require('test_run').new()
> +---
> +...
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +---
> +...

You don't need  the uuid module in this testcase.


> +-- Deploy a cluster.
> +SERVERS = { 'autobootstrap1', 'autobootstrap2', 'autobootstrap3' }
> +---
> +...
> +test_run:create_cluster(SERVERS, "replication", {args="0.03"})
> +---
> +...
> +test_run:wait_fullmesh(SERVERS)
> +---
> +...
> +-- gh-3247 - Sequence-generated value is not replicated in case
> +-- the request was sent via iproto.
> +test_run:cmd("switch autobootstrap1")
> +---
> +- true
> +...
> +net_box = require('net.box')
> +---
> +...
> +_ = box.schema.space.create('space1')
> +---
> +...
> +_ = box.schema.sequence.create('seq')
> +---
> +...
> +_ = box.space.space1:create_index('primary', {sequence = true} )
> +---
> +...
> +_ = box.space.space1:create_index('secondary', {parts = {2, 'unsigned'}})
> +---
> +...
> +box.schema.user.grant('guest', 'read,write', 'space', 'space1')
> +---
> +...
> +c = net_box.connect(box.cfg.listen)
> +---
> +...
> +c.space.space1:insert{box.NULL, "data"} -- fails, but bumps sequence value
> +---
> +- error: 'Tuple field 2 type does not match one required by operation: expected unsigned'
> +...
> +c.space.space1:insert{box.NULL, 1, "data"}
> +---
> +- [2, 1, 'data']
> +...
> +box.space.space1:select{}
> +---
> +- - [2, 1, 'data']
> +...
> +vclock = test_run:get_vclock("autobootstrap1")
> +---
> +...
> +vclock[0] = nil
> +---
> +...
> +_ = test_run:wait_vclock("autobootstrap2", vclock)
> +---
> +...
> +test_run:cmd("switch autobootstrap2")
> +---
> +- true
> +...
> +box.space.space1:select{}
> +---
> +- - [2, 1, 'data']
> +...
> +test_run:cmd("switch autobootstrap1")
> +---
> +- true
> +...
> +box.space.space1:drop()
> +---
> +...
> +test_run:cmd("switch default")
> +---
> +- true
> +...
> +test_run:drop_cluster(SERVERS)
> +---
> +...
> +test_run:cleanup_cluster()
> +---
> +...
> diff --git a/test/replication/misc_value_not_replicated_on_iproto_request_gh-3247.test.lua b/test/replication/misc_value_not_replicated_on_iproto_request_gh-3247.test.lua
> new file mode 100644
> index 000000000..586e8b997
> --- /dev/null
> +++ b/test/replication/misc_value_not_replicated_on_iproto_request_gh-3247.test.lua
> @@ -0,0 +1,33 @@
> +test_run = require('test_run').new()
> +test_run:cmd("restart server default")
> +uuid = require('uuid')
> +
> +-- Deploy a cluster.
> +SERVERS = { 'autobootstrap1', 'autobootstrap2', 'autobootstrap3' }
> +test_run:create_cluster(SERVERS, "replication", {args="0.03"})
> +test_run:wait_fullmesh(SERVERS)
> +
> +-- gh-3247 - Sequence-generated value is not replicated in case
> +-- the request was sent via iproto.
> +test_run:cmd("switch autobootstrap1")
> +net_box = require('net.box')
> +_ = box.schema.space.create('space1')
> +_ = box.schema.sequence.create('seq')
> +_ = box.space.space1:create_index('primary', {sequence = true} )
> +_ = box.space.space1:create_index('secondary', {parts = {2, 'unsigned'}})
> +box.schema.user.grant('guest', 'read,write', 'space', 'space1')
> +c = net_box.connect(box.cfg.listen)
> +c.space.space1:insert{box.NULL, "data"} -- fails, but bumps sequence value
> +c.space.space1:insert{box.NULL, 1, "data"}
> +box.space.space1:select{}
> +vclock = test_run:get_vclock("autobootstrap1")
> +vclock[0] = nil
> +_ = test_run:wait_vclock("autobootstrap2", vclock)
> +test_run:cmd("switch autobootstrap2")
> +box.space.space1:select{}
> +test_run:cmd("switch autobootstrap1")
> +box.space.space1:drop()
> +
> +test_run:cmd("switch default")
> +test_run:drop_cluster(SERVERS)
> +test_run:cleanup_cluster()
> diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg
> index f357b07da..e21daa5ad 100644
> --- a/test/replication/suite.cfg
> +++ b/test/replication/suite.cfg
> @@ -1,6 +1,19 @@
>   {
>       "anon.test.lua": {},
> -    "misc.test.lua": {},
> +    "misc_assert_connecting_master_twice_gh-3610.test.lua": {},
> +    "misc_assert_on_server_die_gh-2991.test.lua": {},
> +    "misc_assert_replica_on_applier_disconnect_gh-3510.test.lua": {},
> +    "misc_crash_on_box_concurrent_update_gh-3606.test.lua": {},
> +    "misc_heartbeats_on_master_changes_gh-3160.test.lua": {},
> +    "misc_no_failure_on_error_reading_wal_gh-4399.test.lua": {},
> +    "misc_no_panic_on_connected_gh-3637.test.lua": {},
> +    "misc_no_restart_on_same_configuration_gh-3711.test.lua": {},
> +    "misc_no_socket_leak_on_replica_disconnect_gh-3642.test.lua": {},
> +    "misc_orphan_on_reconfiguration_error_gh-4424.test.lua": {},
> +    "misc_rebootstrap_from_ro_master_gh-3111.test.lua": {},
> +    "misc_replica_checks_cluster_id_gh-3704.test.lua": {},
> +    "misc_return_on_quorum_0_gh-3760.test.lua": {},
> +    "misc_value_not_replicated_on_iproto_request_gh-3247.test.lua": {},
>       "once.test.lua": {},
>       "on_replace.test.lua": {},
>       "status.test.lua": {},

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list