From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp51.i.mail.ru (smtp51.i.mail.ru [94.100.177.111]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id C93AD42EF5C for ; Fri, 3 Jul 2020 00:15:06 +0300 (MSK) From: sergeyb@tarantool.org Date: Fri, 3 Jul 2020 00:13:35 +0300 Message-Id: <012c8c196396cf963a0aa1f2d23814ff84b81cfb.1593723973.git.sergeyb@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH 2/4] replication: add advanced tests for sync replication List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org, v.shpilevoy@tarantool.org, sergepetrenko@tarantool.org, gorcunov@gmail.com, lvasiliev@tarantool.org From: Sergey Bronnikov Part of #5055 --- test/replication/qsync_advanced.result | 939 +++++++++++++++++++++++ test/replication/qsync_advanced.test.lua | 337 ++++++++ 2 files changed, 1276 insertions(+) create mode 100644 test/replication/qsync_advanced.result create mode 100644 test/replication/qsync_advanced.test.lua diff --git a/test/replication/qsync_advanced.result b/test/replication/qsync_advanced.result new file mode 100644 index 000000000..fa94c8339 --- /dev/null +++ b/test/replication/qsync_advanced.result @@ -0,0 +1,939 @@ +-- test-run result file version 2 +env = require('test_run') + | --- + | ... +test_run = env.new() + | --- + | ... +engine = test_run:get_cfg('engine') + | --- + | ... +fiber = require('fiber') + | --- + | ... + +orig_synchro_quorum = box.cfg.replication_synchro_quorum + | --- + | ... +orig_synchro_timeout = box.cfg.replication_synchro_timeout + | --- + | ... + +NUM_INSTANCES = 2 + | --- + | ... +BROKEN_QUORUM = NUM_INSTANCES + 1 + | --- + | ... + +test_run:cmd("setopt delimiter ';'") + | --- + | - true + | ... +disable_sync_mode = function() + local s = box.space._space:get(box.space.sync.id) + local new_s = s:update({{'=', 6, {is_sync=false}}}) + box.space._space:replace(new_s) +end; + | --- + | ... +test_run:cmd("setopt delimiter ''"); + | --- + | - true + | ... + +box.schema.user.grant('guest', 'replication') + | --- + | ... + +-- Setup an async cluster with two instances. +test_run:cmd('create server replica with rpl_master=default,\ + script="replication/replica.lua"') + | --- + | - true + | ... +test_run:cmd('start server replica with wait=True, wait_load=True') + | --- + | - true + | ... + +-- Successful write. +-- Testcase setup. +test_run:switch('default') + | --- + | - true + | ... +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} + | --- + | ... +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... +-- Testcase body. +box.space.sync:insert{1} -- success + | --- + | - [1] + | ... +test_run:cmd('switch replica') + | --- + | - true + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +-- Testcase cleanup. +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:drop() + | --- + | ... + +-- Unsuccessfull write. +-- Testcase setup. +test_run:switch('default') + | --- + | - true + | ... +box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=0.1} + | --- + | ... +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... +-- Testcase body. +box.space.sync:insert{1} + | --- + | - error: Quorum collection for a synchronous transaction is timed out + | ... +test_run:switch('replica') + | --- + | - true + | ... +box.space.sync:select{} -- none + | --- + | - [] + | ... +-- Testcase cleanup. +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:drop() + | --- + | ... + +-- Updated replication_synchro_quorum doesn't affect existed tx. +-- Testcase setup. +test_run:switch('default') + | --- + | - true + | ... +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... +-- Testcase body. +OP_TIMEOUT = 5 + | --- + | ... +box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=OP_TIMEOUT} + | --- + | ... +test_run:cmd("setopt delimiter ';'") + | --- + | - true + | ... +_ = fiber.create(function() + box.space.sync:insert{1} +end); + | --- + | ... +test_run:cmd("setopt delimiter ''"); + | --- + | - true + | ... +box.cfg{replication_synchro_quorum=NUM_INSTANCES} + | --- + | ... +fiber.sleep(OP_TIMEOUT) -- to make sure replication_synchro_timeout is exceeded + | --- + | ... +box.space.sync:select{} -- none + | --- + | - [] + | ... +test_run:switch('replica') + | --- + | - true + | ... +box.space.sync:select{} -- none + | --- + | - [] + | ... +-- Testcase cleanup. +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:drop() + | --- + | ... + +-- [RFC, quorum commit] attempt to write multiple transactions, expected the +-- same order as on client in case of achieved quorum. +-- Testcase setup. +test_run:switch('default') + | --- + | - true + | ... +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} + | --- + | ... +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... +-- Testcase body. +box.space.sync:insert{1} + | --- + | - [1] + | ... +box.space.sync:insert{2} + | --- + | - [2] + | ... +box.space.sync:insert{3} + | --- + | - [3] + | ... +box.space.sync:select{} -- 1, 2, 3 + | --- + | - - [1] + | - [2] + | - [3] + | ... +test_run:switch('replica') + | --- + | - true + | ... +box.space.sync:select{} -- 1, 2, 3 + | --- + | - - [1] + | - [2] + | - [3] + | ... +-- Testcase cleanup. +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:drop() + | --- + | ... + +-- Synchro timeout is not bigger than replication_synchro_timeout value. +-- Testcase setup. +test_run:switch('default') + | --- + | - true + | ... +box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=orig_synchro_timeout} + | --- + | ... +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... +-- Testcase body. +start = os.time() + | --- + | ... +box.space.sync:insert{1} + | --- + | - error: Quorum collection for a synchronous transaction is timed out + | ... +(os.time() - start) == box.cfg.replication_synchro_timeout -- true + | --- + | - true + | ... +-- Testcase cleanup. +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:drop() + | --- + | ... + +-- replication_synchro_quorum +test_run:switch('default') + | --- + | - true + | ... +INT_MIN = -2147483648 + | --- + | ... +INT_MAX = 2147483648 + | --- + | ... +box.cfg{replication_synchro_quorum=INT_MAX} -- error + | --- + | - error: 'Incorrect value for option ''replication_synchro_quorum'': the value must + | be greater than zero and less than maximal number of replicas' + | ... +box.cfg.replication_synchro_quorum -- old value + | --- + | - 3 + | ... +box.cfg{replication_synchro_quorum=INT_MIN} -- error + | --- + | - error: 'Incorrect value for option ''replication_synchro_quorum'': the value must + | be greater than zero and less than maximal number of replicas' + | ... +box.cfg.replication_synchro_quorum -- old value + | --- + | - 3 + | ... + +-- replication_synchro_timeout +test_run:switch('default') + | --- + | - true + | ... +DOUBLE_MAX = 9007199254740992 + | --- + | ... +box.cfg{replication_synchro_timeout=DOUBLE_MAX} + | --- + | ... +box.cfg.replication_synchro_timeout -- DOUBLE_MAX + | --- + | - 9007199254740992 + | ... +box.cfg{replication_synchro_timeout=DOUBLE_MAX+1} + | --- + | ... +box.cfg.replication_synchro_timeout -- DOUBLE_MAX + | --- + | - 9007199254740992 + | ... +box.cfg{replication_synchro_timeout=-1} -- error + | --- + | - error: 'Incorrect value for option ''replication_synchro_timeout'': the value must + | be greater than zero' + | ... +box.cfg.replication_synchro_timeout -- old value + | --- + | - 9007199254740992 + | ... +box.cfg{replication_synchro_timeout=0} -- error + | --- + | - error: 'Incorrect value for option ''replication_synchro_timeout'': the value must + | be greater than zero' + | ... +box.cfg.replication_synchro_timeout -- old value + | --- + | - 9007199254740992 + | ... + +-- TX is in synchronous replication. +-- Testcase setup. +test_run:switch('default') + | --- + | - true + | ... +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} + | --- + | ... +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... +-- Testcase body. +box.begin() box.space.sync:insert({1}) box.commit() + | --- + | ... +box.begin() box.space.sync:insert({2}) box.commit() + | --- + | ... +-- Testcase cleanup. +box.space.sync:drop() + | --- + | ... + +-- [RFC, summary] switch sync replicas into async ones, expected success and +-- data consistency on a leader and replicas. +-- Testcase setup. +test_run:switch('default') + | --- + | - true + | ... +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} + | --- + | ... +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... +-- Testcase body. +box.space.sync:insert{1} + | --- + | - [1] + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +test_run:switch('replica') + | --- + | - true + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +test_run:switch('default') + | --- + | - true + | ... +-- Disable synchronous mode. +disable_sync_mode() + | --- + | ... +-- Space is in async mode now. +box.cfg{replication_synchro_quorum=NUM_INSTANCES} + | --- + | ... +box.space.sync:insert{2} -- success + | --- + | - [2] + | ... +box.space.sync:insert{3} -- success + | --- + | - [3] + | ... +box.cfg{replication_synchro_quorum=BROKEN_QUORUM} + | --- + | ... +box.space.sync:insert{4} -- success + | --- + | - [4] + | ... +box.cfg{replication_synchro_quorum=NUM_INSTANCES} + | --- + | ... +box.space.sync:insert{5} -- success + | --- + | - [5] + | ... +box.space.sync:select{} -- 1, 2, 3, 4, 5 + | --- + | - - [1] + | - [2] + | - [3] + | - [4] + | - [5] + | ... +test_run:cmd('switch replica') + | --- + | - true + | ... +box.space.sync:select{} -- 1, 2, 3, 4, 5 + | --- + | - - [1] + | - [2] + | - [3] + | - [4] + | - [5] + | ... +-- Testcase cleanup. +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:drop() + | --- + | ... + +-- (FLAKY) [RFC, Synchronous replication enabling] "As soon as last operation of +-- synchronous transaction appeared in leader's WAL, it will cause all +-- following transactions - no matter if they are synchronous or not - wait for +-- the quorum. In case quorum is not achieved the 'rollback' operation will +-- cause rollback of all transactions after the synchronous one." +-- Testcase setup. +test_run:switch('default') + | --- + | - true + | ... +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} + | --- + | ... +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... +-- Testcase body. +box.space.sync:insert{1} + | --- + | - [1] + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +test_run:switch('replica') + | --- + | - true + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +test_run:switch('default') + | --- + | - true + | ... +-- OP_TIMEOUT should be enough to make sync operation, disable +-- sync mode and make an async operation +OP_TIMEOUT = 10 + | --- + | ... +box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=OP_TIMEOUT} + | --- + | ... +test_run:cmd("setopt delimiter ';'") + | --- + | - true + | ... +_ = fiber.create(function() + box.space.sync:insert{2} +end); + | --- + | ... +test_run:cmd("setopt delimiter ''"); + | --- + | - true + | ... +-- Disable synchronous mode. +disable_sync_mode() + | --- + | - error: A rollback for a synchronous transaction is received + | ... +-- Space is in async mode now. +box.space.sync:insert{3} -- async operation must wait sync one + | --- + | - error: Quorum collection for a synchronous transaction is timed out + | ... +fiber.sleep(OP_TIMEOUT + 1) + | --- + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +test_run:cmd('switch replica') + | --- + | - true + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +-- Testcase cleanup. +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:drop() + | --- + | ... + +-- Warn user when setting `replication_synchro_quorum` to a value +-- greater than number of instances in a cluster, see gh-5122. +box.cfg{replication_synchro_quorum=BROKEN_QUORUM} -- warning + | --- + | ... + +-- [RFC, summary] switch from leader to replica and vice versa, expected +-- success and data consistency on a leader and replicas (gh-5124). +-- Testcase setup. +test_run:switch('default') + | --- + | - true + | ... +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} + | --- + | ... +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... +-- Testcase body. +box.space.sync:insert{1} + | --- + | - [1] + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +test_run:switch('replica') + | --- + | - true + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +box.cfg{read_only=false} -- promote replica to master + | --- + | ... +test_run:switch('default') + | --- + | - true + | ... +box.cfg{read_only=true} -- demote master to replica + | --- + | ... +test_run:switch('replica') + | --- + | - true + | ... +box.space.sync:insert{2} + | --- + | - [2] + | ... +box.space.sync:select{} -- 1, 2 + | --- + | - - [1] + | - [2] + | ... +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:select{} -- 1, 2 + | --- + | - - [1] + | ... +-- Revert cluster configuration. +test_run:switch('default') + | --- + | - true + | ... +box.cfg{read_only=false} + | --- + | ... +test_run:switch('replica') + | --- + | - true + | ... +box.cfg{read_only=true} + | --- + | ... +-- Testcase cleanup. +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:drop() + | --- + | ... + +-- check behaviour with failed write to WAL on master (ERRINJ_WAL_IO) +-- Testcase setup. +test_run:switch('default') + | --- + | - true + | ... +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} + | --- + | ... +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... +-- Testcase body. +box.space.sync:insert{1} + | --- + | - [1] + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +box.error.injection.set('ERRINJ_WAL_IO', true) + | --- + | - ok + | ... +box.space.sync:insert{2} + | --- + | - error: Failed to write to disk + | ... +box.error.injection.set('ERRINJ_WAL_IO', false) + | --- + | - ok + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +test_run:switch('replica') + | --- + | - true + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +-- Testcase cleanup. +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:drop() + | --- + | ... + +-- [RFC, quorum commit] check behaviour with failure answer from a replica +-- (ERRINJ_WAL_SYNC) during write, expected disconnect from the replication +-- (gh-5123, set replication_synchro_quorum to 1). +-- Testcase setup. +test_run:switch('default') + | --- + | - true + | ... +box.cfg{replication_synchro_quorum=2, replication_synchro_timeout=0.1} + | --- + | ... +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... +-- Testcase body. +box.space.sync:insert{1} + | --- + | - [1] + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +test_run:switch('replica') + | --- + | - true + | ... +box.error.injection.set('ERRINJ_WAL_IO', true) + | --- + | - ok + | ... +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:insert{2} + | --- + | - error: Quorum collection for a synchronous transaction is timed out + | ... +test_run:switch('replica') + | --- + | - true + | ... +box.error.injection.set('ERRINJ_WAL_IO', false) + | --- + | - ok + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +-- Testcase cleanup. +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:drop() + | --- + | ... + +-- Teardown. +test_run:cmd('switch default') + | --- + | - true + | ... +test_run:cmd('stop server replica') + | --- + | - true + | ... +test_run:cmd('delete server replica') + | --- + | - true + | ... +test_run:cleanup_cluster() + | --- + | ... +box.schema.user.revoke('guest', 'replication') + | --- + | ... +box.cfg{ \ + replication_synchro_quorum = orig_synchro_quorum, \ + replication_synchro_timeout = orig_synchro_timeout, \ +} + | --- + | ... + +-- Setup an async cluster. +box.schema.user.grant('guest', 'replication') + | --- + | ... +test_run:cmd('create server replica with rpl_master=default,\ + script="replication/replica.lua"') + | --- + | - true + | ... +test_run:cmd('start server replica with wait=True, wait_load=True') + | --- + | - true + | ... + +-- [RFC, summary] switch async replica into sync one, expected +-- success and data consistency on a leader and replica. +-- Testcase setup. +_ = box.schema.space.create('sync', {engine=engine}) + | --- + | ... +_ = box.space.sync:create_index('pk') + | --- + | ... +box.space.sync:insert{1} -- success + | --- + | - [1] + | ... +test_run:cmd('switch replica') + | --- + | - true + | ... +box.space.sync:select{} -- 1 + | --- + | - - [1] + | ... +test_run:switch('default') + | --- + | - true + | ... +-- Enable synchronous mode. +s = box.space._space:get(box.space.sync.id) + | --- + | ... +new_s = s:update({{'=', 6, {is_sync=true}}}) + | --- + | ... +box.space._space:replace(new_s) + | --- + | - [523, 1, 'sync', 'vinyl', 0, {'is_sync': true}, []] + | ... +-- Space is in sync mode now. +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} + | --- + | ... +box.space.sync:insert{2} -- success + | --- + | - [2] + | ... +box.space.sync:insert{3} -- success + | --- + | - [3] + | ... +box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=0.1} + | --- + | ... +box.space.sync:insert{4} -- failure + | --- + | - error: Quorum collection for a synchronous transaction is timed out + | ... +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} + | --- + | ... +box.space.sync:insert{5} -- success + | --- + | - [5] + | ... +box.space.sync:select{} -- 1, 2, 3, 5 + | --- + | - - [1] + | - [2] + | - [3] + | - [5] + | ... +test_run:cmd('switch replica') + | --- + | - true + | ... +box.space.sync:select{} -- 1, 2, 3, 5 + | --- + | - - [1] + | - [2] + | - [3] + | - [5] + | ... +-- Testcase cleanup. +test_run:switch('default') + | --- + | - true + | ... +box.space.sync:drop() + | --- + | ... + +-- Teardown. +test_run:cmd('switch default') + | --- + | - true + | ... +test_run:cmd('stop server replica') + | --- + | - true + | ... +test_run:cmd('delete server replica') + | --- + | - true + | ... +test_run:cleanup_cluster() + | --- + | ... +box.schema.user.revoke('guest', 'replication') + | --- + | ... +box.cfg{ \ + replication_synchro_quorum = orig_synchro_quorum, \ + replication_synchro_timeout = orig_synchro_timeout, \ +} + | --- + | ... diff --git a/test/replication/qsync_advanced.test.lua b/test/replication/qsync_advanced.test.lua new file mode 100644 index 000000000..270fd494d --- /dev/null +++ b/test/replication/qsync_advanced.test.lua @@ -0,0 +1,337 @@ +env = require('test_run') +test_run = env.new() +engine = test_run:get_cfg('engine') +fiber = require('fiber') + +orig_synchro_quorum = box.cfg.replication_synchro_quorum +orig_synchro_timeout = box.cfg.replication_synchro_timeout + +NUM_INSTANCES = 2 +BROKEN_QUORUM = NUM_INSTANCES + 1 + +test_run:cmd("setopt delimiter ';'") +disable_sync_mode = function() + local s = box.space._space:get(box.space.sync.id) + local new_s = s:update({{'=', 6, {is_sync=false}}}) + box.space._space:replace(new_s) +end; +test_run:cmd("setopt delimiter ''"); + +box.schema.user.grant('guest', 'replication') + +-- Setup an async cluster with two instances. +test_run:cmd('create server replica with rpl_master=default,\ + script="replication/replica.lua"') +test_run:cmd('start server replica with wait=True, wait_load=True') + +-- Successful write. +-- Testcase setup. +test_run:switch('default') +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) +_ = box.space.sync:create_index('pk') +-- Testcase body. +box.space.sync:insert{1} -- success +test_run:cmd('switch replica') +box.space.sync:select{} -- 1 +-- Testcase cleanup. +test_run:switch('default') +box.space.sync:drop() + +-- Unsuccessfull write. +-- Testcase setup. +test_run:switch('default') +box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=0.1} +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) +_ = box.space.sync:create_index('pk') +-- Testcase body. +box.space.sync:insert{1} +test_run:switch('replica') +box.space.sync:select{} -- none +-- Testcase cleanup. +test_run:switch('default') +box.space.sync:drop() + +-- Updated replication_synchro_quorum doesn't affect existed tx. +-- Testcase setup. +test_run:switch('default') +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) +_ = box.space.sync:create_index('pk') +-- Testcase body. +OP_TIMEOUT = 5 +box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=OP_TIMEOUT} +test_run:cmd("setopt delimiter ';'") +_ = fiber.create(function() + box.space.sync:insert{1} +end); +test_run:cmd("setopt delimiter ''"); +box.cfg{replication_synchro_quorum=NUM_INSTANCES} +fiber.sleep(OP_TIMEOUT) -- to make sure replication_synchro_timeout is exceeded +box.space.sync:select{} -- none +test_run:switch('replica') +box.space.sync:select{} -- none +-- Testcase cleanup. +test_run:switch('default') +box.space.sync:drop() + +-- [RFC, quorum commit] attempt to write multiple transactions, expected the +-- same order as on client in case of achieved quorum. +-- Testcase setup. +test_run:switch('default') +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) +_ = box.space.sync:create_index('pk') +-- Testcase body. +box.space.sync:insert{1} +box.space.sync:insert{2} +box.space.sync:insert{3} +box.space.sync:select{} -- 1, 2, 3 +test_run:switch('replica') +box.space.sync:select{} -- 1, 2, 3 +-- Testcase cleanup. +test_run:switch('default') +box.space.sync:drop() + +-- Synchro timeout is not bigger than replication_synchro_timeout value. +-- Testcase setup. +test_run:switch('default') +box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=orig_synchro_timeout} +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) +_ = box.space.sync:create_index('pk') +-- Testcase body. +start = os.time() +box.space.sync:insert{1} +(os.time() - start) == box.cfg.replication_synchro_timeout -- true +-- Testcase cleanup. +test_run:switch('default') +box.space.sync:drop() + +-- replication_synchro_quorum +test_run:switch('default') +INT_MIN = -2147483648 +INT_MAX = 2147483648 +box.cfg{replication_synchro_quorum=INT_MAX} -- error +box.cfg.replication_synchro_quorum -- old value +box.cfg{replication_synchro_quorum=INT_MIN} -- error +box.cfg.replication_synchro_quorum -- old value + +-- replication_synchro_timeout +test_run:switch('default') +DOUBLE_MAX = 9007199254740992 +box.cfg{replication_synchro_timeout=DOUBLE_MAX} +box.cfg.replication_synchro_timeout -- DOUBLE_MAX +box.cfg{replication_synchro_timeout=DOUBLE_MAX+1} +box.cfg.replication_synchro_timeout -- DOUBLE_MAX +box.cfg{replication_synchro_timeout=-1} -- error +box.cfg.replication_synchro_timeout -- old value +box.cfg{replication_synchro_timeout=0} -- error +box.cfg.replication_synchro_timeout -- old value + +-- TX is in synchronous replication. +-- Testcase setup. +test_run:switch('default') +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) +_ = box.space.sync:create_index('pk') +-- Testcase body. +box.begin() box.space.sync:insert({1}) box.commit() +box.begin() box.space.sync:insert({2}) box.commit() +-- Testcase cleanup. +box.space.sync:drop() + +-- [RFC, summary] switch sync replicas into async ones, expected success and +-- data consistency on a leader and replicas. +-- Testcase setup. +test_run:switch('default') +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) +_ = box.space.sync:create_index('pk') +-- Testcase body. +box.space.sync:insert{1} +box.space.sync:select{} -- 1 +test_run:switch('replica') +box.space.sync:select{} -- 1 +test_run:switch('default') +-- Disable synchronous mode. +disable_sync_mode() +-- Space is in async mode now. +box.cfg{replication_synchro_quorum=NUM_INSTANCES} +box.space.sync:insert{2} -- success +box.space.sync:insert{3} -- success +box.cfg{replication_synchro_quorum=BROKEN_QUORUM} +box.space.sync:insert{4} -- success +box.cfg{replication_synchro_quorum=NUM_INSTANCES} +box.space.sync:insert{5} -- success +box.space.sync:select{} -- 1, 2, 3, 4, 5 +test_run:cmd('switch replica') +box.space.sync:select{} -- 1, 2, 3, 4, 5 +-- Testcase cleanup. +test_run:switch('default') +box.space.sync:drop() + +-- (FLAKY) [RFC, Synchronous replication enabling] "As soon as last operation of +-- synchronous transaction appeared in leader's WAL, it will cause all +-- following transactions - no matter if they are synchronous or not - wait for +-- the quorum. In case quorum is not achieved the 'rollback' operation will +-- cause rollback of all transactions after the synchronous one." +-- Testcase setup. +test_run:switch('default') +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) +_ = box.space.sync:create_index('pk') +-- Testcase body. +box.space.sync:insert{1} +box.space.sync:select{} -- 1 +test_run:switch('replica') +box.space.sync:select{} -- 1 +test_run:switch('default') +-- OP_TIMEOUT should be enough to make sync operation, disable +-- sync mode and make an async operation +OP_TIMEOUT = 10 +box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=OP_TIMEOUT} +test_run:cmd("setopt delimiter ';'") +_ = fiber.create(function() + box.space.sync:insert{2} +end); +test_run:cmd("setopt delimiter ''"); +-- Disable synchronous mode. +disable_sync_mode() +-- Space is in async mode now. +box.space.sync:insert{3} -- async operation must wait sync one +fiber.sleep(OP_TIMEOUT + 1) +box.space.sync:select{} -- 1 +test_run:cmd('switch replica') +box.space.sync:select{} -- 1 +-- Testcase cleanup. +test_run:switch('default') +box.space.sync:drop() + +-- Warn user when setting `replication_synchro_quorum` to a value +-- greater than number of instances in a cluster, see gh-5122. +box.cfg{replication_synchro_quorum=BROKEN_QUORUM} -- warning + +-- [RFC, summary] switch from leader to replica and vice versa, expected +-- success and data consistency on a leader and replicas (gh-5124). +-- Testcase setup. +test_run:switch('default') +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) +_ = box.space.sync:create_index('pk') +-- Testcase body. +box.space.sync:insert{1} +box.space.sync:select{} -- 1 +test_run:switch('replica') +box.space.sync:select{} -- 1 +box.cfg{read_only=false} -- promote replica to master +test_run:switch('default') +box.cfg{read_only=true} -- demote master to replica +test_run:switch('replica') +box.space.sync:insert{2} +box.space.sync:select{} -- 1, 2 +test_run:switch('default') +box.space.sync:select{} -- 1, 2 +-- Revert cluster configuration. +test_run:switch('default') +box.cfg{read_only=false} +test_run:switch('replica') +box.cfg{read_only=true} +-- Testcase cleanup. +test_run:switch('default') +box.space.sync:drop() + +-- Check behaviour with failed write to WAL on master (ERRINJ_WAL_IO). +-- Testcase setup. +test_run:switch('default') +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) +_ = box.space.sync:create_index('pk') +-- Testcase body. +box.space.sync:insert{1} +box.space.sync:select{} -- 1 +box.error.injection.set('ERRINJ_WAL_IO', true) +box.space.sync:insert{2} +box.error.injection.set('ERRINJ_WAL_IO', false) +box.space.sync:select{} -- 1 +test_run:switch('replica') +box.space.sync:select{} -- 1 +-- Testcase cleanup. +test_run:switch('default') +box.space.sync:drop() + +-- [RFC, quorum commit] check behaviour with failure answer from a replica +-- (ERRINJ_WAL_SYNC) during write, expected disconnect from the replication +-- (gh-5123, set replication_synchro_quorum to 1). +-- Testcase setup. +test_run:switch('default') +box.cfg{replication_synchro_quorum=2, replication_synchro_timeout=0.1} +_ = box.schema.space.create('sync', {is_sync=true, engine=engine}) +_ = box.space.sync:create_index('pk') +-- Testcase body. +box.space.sync:insert{1} +box.space.sync:select{} -- 1 +test_run:switch('replica') +box.error.injection.set('ERRINJ_WAL_IO', true) +test_run:switch('default') +box.space.sync:insert{2} +test_run:switch('replica') +box.error.injection.set('ERRINJ_WAL_IO', false) +box.space.sync:select{} -- 1 +-- Testcase cleanup. +test_run:switch('default') +box.space.sync:drop() + +-- Teardown. +test_run:cmd('switch default') +test_run:cmd('stop server replica') +test_run:cmd('delete server replica') +test_run:cleanup_cluster() +box.schema.user.revoke('guest', 'replication') +box.cfg{ \ + replication_synchro_quorum = orig_synchro_quorum, \ + replication_synchro_timeout = orig_synchro_timeout, \ +} + +-- Setup an async cluster. +box.schema.user.grant('guest', 'replication') +test_run:cmd('create server replica with rpl_master=default,\ + script="replication/replica.lua"') +test_run:cmd('start server replica with wait=True, wait_load=True') + +-- [RFC, summary] switch async replica into sync one, expected +-- success and data consistency on a leader and replica. +-- Testcase setup. +_ = box.schema.space.create('sync', {engine=engine}) +_ = box.space.sync:create_index('pk') +box.space.sync:insert{1} -- success +test_run:cmd('switch replica') +box.space.sync:select{} -- 1 +test_run:switch('default') +-- Enable synchronous mode. +s = box.space._space:get(box.space.sync.id) +new_s = s:update({{'=', 6, {is_sync=true}}}) +box.space._space:replace(new_s) +-- Space is in sync mode now. +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} +box.space.sync:insert{2} -- success +box.space.sync:insert{3} -- success +box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=0.1} +box.space.sync:insert{4} -- failure +box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=0.1} +box.space.sync:insert{5} -- success +box.space.sync:select{} -- 1, 2, 3, 5 +test_run:cmd('switch replica') +box.space.sync:select{} -- 1, 2, 3, 5 +-- Testcase cleanup. +test_run:switch('default') +box.space.sync:drop() + +-- Teardown. +test_run:cmd('switch default') +test_run:cmd('stop server replica') +test_run:cmd('delete server replica') +test_run:cleanup_cluster() +box.schema.user.revoke('guest', 'replication') +box.cfg{ \ + replication_synchro_quorum = orig_synchro_quorum, \ + replication_synchro_timeout = orig_synchro_timeout, \ +} -- 2.26.2