From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 0811D24B86 for ; Mon, 23 Jul 2018 07:14:37 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id mH0_Uk_x0IJ4 for ; Mon, 23 Jul 2018 07:14:36 -0400 (EDT) Received: from smtp36.i.mail.ru (smtp36.i.mail.ru [94.100.177.96]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id B4032242AF for ; Mon, 23 Jul 2018 07:14:36 -0400 (EDT) From: AKhatskevich Subject: [tarantool-patches] [PATCH 1/4] Add test on error during reconfigure Date: Mon, 23 Jul 2018 14:14:19 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: v.shpilevoy@tarantool.org, tarantool-patches@freelists.org In case reconfigure process fails, the node should continue work properly. --- test/lua_libs/util.lua | 24 ++++++++++++++++++++++++ test/router/router.result | 30 ++++++++++++++++++++++++++++++ test/router/router.test.lua | 9 +++++++++ test/storage/storage.result | 39 +++++++++++++++++++++++++++++++++++++++ test/storage/storage.test.lua | 12 ++++++++++++ vshard/router/init.lua | 7 +++++++ vshard/storage/init.lua | 9 +++++++++ 7 files changed, 130 insertions(+) diff --git a/test/lua_libs/util.lua b/test/lua_libs/util.lua index f2d3b48..f40d3a6 100644 --- a/test/lua_libs/util.lua +++ b/test/lua_libs/util.lua @@ -69,9 +69,33 @@ local function wait_master(test_run, replicaset, master) log.info('Slaves are connected to a master "%s"', master) end +-- +-- Check that data has at least all etalon's fields and they are +-- equal. +-- @param etalon Table which fields should be found in `data`. +-- @param data Table which is checked against `etalon`. +-- +-- @retval Boolean indicator of equality and if is not equal, then +-- table of names of fields which are different in `data`. +-- +local function has_same_fields(etalon, data) + assert(type(etalon) == 'table' and type(data) == 'table') + local diff = {} + for k, v in pairs(etalon) do + if v ~= data[k] then + table.insert(diff, k) + end + end + if #diff > 0 then + return false, diff + end + return true +end + return { check_error = check_error, shuffle_masters = shuffle_masters, collect_timeouts = collect_timeouts, wait_master = wait_master, + has_same_fields = has_same_fields, } diff --git a/test/router/router.result b/test/router/router.result index 15f4fd0..4919962 100644 --- a/test/router/router.result +++ b/test/router/router.result @@ -1156,6 +1156,36 @@ util.check_error(vshard.router.cfg, non_dynamic_cfg) --- - Non-dynamic option shard_index cannot be reconfigured ... +-- Error during reconfigure process. +vshard.router.route(1):callro('echo', {'some_data'}) +--- +- some_data +- null +- null +... +vshard.router.internal.errinj.ERRINJ_CFG = true +--- +... +old_internal = table.copy(vshard.router.internal) +--- +... +util.check_error(vshard.router.cfg, cfg) +--- +- 'Error injection: cfg' +... +vshard.router.internal.errinj.ERRINJ_CFG = false +--- +... +util.has_same_fields(old_internal, vshard.router.internal) +--- +- true +... +vshard.router.route(1):callro('echo', {'some_data'}) +--- +- some_data +- null +- null +... _ = test_run:cmd("switch default") --- ... diff --git a/test/router/router.test.lua b/test/router/router.test.lua index 8006e5d..df2f381 100644 --- a/test/router/router.test.lua +++ b/test/router/router.test.lua @@ -444,6 +444,15 @@ non_dynamic_cfg = table.copy(cfg) non_dynamic_cfg.shard_index = 'non_default_name' util.check_error(vshard.router.cfg, non_dynamic_cfg) +-- Error during reconfigure process. +vshard.router.route(1):callro('echo', {'some_data'}) +vshard.router.internal.errinj.ERRINJ_CFG = true +old_internal = table.copy(vshard.router.internal) +util.check_error(vshard.router.cfg, cfg) +vshard.router.internal.errinj.ERRINJ_CFG = false +util.has_same_fields(old_internal, vshard.router.internal) +vshard.router.route(1):callro('echo', {'some_data'}) + _ = test_run:cmd("switch default") test_run:drop_cluster(REPLICASET_2) diff --git a/test/storage/storage.result b/test/storage/storage.result index 4399ff0..ff07fe9 100644 --- a/test/storage/storage.result +++ b/test/storage/storage.result @@ -732,6 +732,45 @@ util.check_error(vshard.storage.cfg, non_dynamic_cfg, names.storage_1_a) --- - Non-dynamic option bucket_count cannot be reconfigured ... +-- Error during reconfigure process. +_, rs = next(vshard.storage.internal.replicasets) +--- +... +rs:callro('echo', {'some_data'}) +--- +- some_data +- null +- null +... +vshard.storage.internal.errinj.ERRINJ_CFG = true +--- +... +old_internal = table.copy(vshard.storage.internal) +--- +... +_, err = pcall(vshard.storage.cfg, cfg, names.storage_1_a) +--- +... +err:match('Error injection:.*') +--- +- 'Error injection: cfg' +... +vshard.storage.internal.errinj.ERRINJ_CFG = false +--- +... +util.has_same_fields(old_internal, vshard.storage.internal) +--- +- true +... +_, rs = next(vshard.storage.internal.replicasets) +--- +... +rs:callro('echo', {'some_data'}) +--- +- some_data +- null +- null +... _ = test_run:cmd("switch default") --- ... diff --git a/test/storage/storage.test.lua b/test/storage/storage.test.lua index 72564e1..04bb608 100644 --- a/test/storage/storage.test.lua +++ b/test/storage/storage.test.lua @@ -182,6 +182,18 @@ non_dynamic_cfg = table.copy(cfg) non_dynamic_cfg.bucket_count = require('vshard.consts').DEFAULT_BUCKET_COUNT + 1 util.check_error(vshard.storage.cfg, non_dynamic_cfg, names.storage_1_a) +-- Error during reconfigure process. +_, rs = next(vshard.storage.internal.replicasets) +rs:callro('echo', {'some_data'}) +vshard.storage.internal.errinj.ERRINJ_CFG = true +old_internal = table.copy(vshard.storage.internal) +_, err = pcall(vshard.storage.cfg, cfg, names.storage_1_a) +err:match('Error injection:.*') +vshard.storage.internal.errinj.ERRINJ_CFG = false +util.has_same_fields(old_internal, vshard.storage.internal) +_, rs = next(vshard.storage.internal.replicasets) +rs:callro('echo', {'some_data'}) + _ = test_run:cmd("switch default") test_run:drop_cluster(REPLICASET_2) diff --git a/vshard/router/init.lua b/vshard/router/init.lua index 4531f3a..a143070 100644 --- a/vshard/router/init.lua +++ b/vshard/router/init.lua @@ -11,6 +11,7 @@ local M = rawget(_G, '__module_vshard_router') if not M then M = { errinj = { + ERRINJ_CFG = false, ERRINJ_FAILOVER_CHANGE_CFG = false, ERRINJ_RELOAD = false, ERRINJ_LONG_DISCOVERY = false, @@ -486,6 +487,12 @@ local function router_cfg(cfg) for k, v in pairs(cfg) do log.info({[k] = v}) end + -- It is considered that all possible errors during cfg + -- process occur only before this place. + -- This check should be placed as late as possible. + if M.errinj.ERRINJ_CFG then + error('Error injection: cfg') + end box.cfg(cfg) log.info("Box has been configured") M.total_bucket_count = total_bucket_count diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua index ff204a4..052e94f 100644 --- a/vshard/storage/init.lua +++ b/vshard/storage/init.lua @@ -33,6 +33,7 @@ if not M then -- Bucket count stored on all replicasets. total_bucket_count = 0, errinj = { + ERRINJ_CFG = false, ERRINJ_BUCKET_FIND_GARBAGE_DELAY = false, ERRINJ_RELOAD = false, ERRINJ_CFG_DELAY = false, @@ -1560,6 +1561,14 @@ local function storage_cfg(cfg, this_replica_uuid) local shard_index = cfg.shard_index local collect_bucket_garbage_interval = cfg.collect_bucket_garbage_interval local collect_lua_garbage = cfg.collect_lua_garbage + + -- It is considered that all possible errors during cfg + -- process occur only before this place. + -- This check should be placed as late as possible. + if M.errinj.ERRINJ_CFG then + error('Error injection: cfg') + end + -- -- Sync timeout is a special case - it must be updated before -- all other options to allow a user to demote a master with -- 2.14.1