From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpng1.m.smailru.net (smtpng1.m.smailru.net [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 0929B445325 for ; Thu, 6 Aug 2020 01:15:52 +0300 (MSK) From: Vladislav Shpilevoy Date: Thu, 6 Aug 2020 00:15:49 +0200 Message-Id: <0cd777f3990939b32d96bf964dea55e8073e65bb.1596665591.git.v.shpilevoy@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH v2 vshard 2/2] storage: allow replica to boot before master List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org, yaroslav.dynnikov@tarantool.org, olegrok@tarantool.org Before the patch a replica couldn't call vshard.storage.cfg() before the master is bootstrapped. That could lead to an exception in the middle of vshard.storage.cfg(). It sounds not so bad and even looks unreachable, because replica bootstrap anyway is blocked in box.cfg() until a master node is started, otherwise the replica instance is terminated. But the box.cfg bootstrap != vshard bootstrap. It could be that both replica and master nodes are already bootstrapped and working, but vshard.storage.cfg() wasn't called yet. In that case vshard.storage.cfg() on the replica wouldn't block in box.cfg(), and would happily fail a few lines later in vshard.storage.cfg() at attempt to touch not existing box.space._bucket. That was the case for cartridge. The framework configures instances in its own way before vshard.storage.cfg(). Then it calls vshard.storage.cfg() on them in arbitrary order, and sometimes replicas were configured earlier than the master. The fail is fixed by simply skipping the _bucket's trigger installation on the replica node. Because so far it is not needed here for anything. The trigger's sole purpose is to increment bucket generation used only for DML on _bucket on the master node. Now vshard.storage.cfg() on replica is able to finish before master does the same. However the replica still won't be able to handle vshard.storage.* requests until receives vshard schema from master. Closes #237 --- test/lua_libs/storage_template.lua | 55 +++++++++++- test/misc/reconfigure.result | 33 +++++++ test/misc/reconfigure.test.lua | 9 ++ test/reload_evolution/storage.result | 16 ++-- test/reload_evolution/storage.test.lua | 14 +-- test/router/boot_replica_first.result | 112 ++++++++++++++++++++++++ test/router/boot_replica_first.test.lua | 42 +++++++++ vshard/storage/init.lua | 21 ++++- vshard/storage/reload_evolution.lua | 8 ++ 9 files changed, 297 insertions(+), 13 deletions(-) create mode 100644 test/router/boot_replica_first.result create mode 100644 test/router/boot_replica_first.test.lua diff --git a/test/lua_libs/storage_template.lua b/test/lua_libs/storage_template.lua index 29a753d..84e4180 100644 --- a/test/lua_libs/storage_template.lua +++ b/test/lua_libs/storage_template.lua @@ -1,5 +1,7 @@ #!/usr/bin/env tarantool +local luri = require('uri') + NAME = require('fio').basename(arg[0], '.lua') fiber = require('fiber') test_run = require('test_run').new() @@ -10,12 +12,63 @@ log = require('log') if not cfg.shard_index then cfg.shard_index = 'bucket_id' end +instance_uuid = util.name_to_uuid[NAME] + +-- +-- Bootstrap the instance exactly like vshard does. But don't +-- initialize any vshard-specific code. +-- +local function boot_like_vshard() + assert(type(box.cfg) == 'function') + for rs_uuid, rs in pairs(cfg.sharding) do + for replica_uuid, replica in pairs(rs.replicas) do + if replica_uuid == instance_uuid then + local box_cfg = {replication = {}} + box_cfg.instance_uuid = replica_uuid + box_cfg.replicaset_uuid = rs_uuid + box_cfg.listen = replica.uri + box_cfg.read_only = not replica.master + box_cfg.replication_connect_quorum = 0 + box_cfg.replication_timeout = 0.1 + for _, replica in pairs(rs.replicas) do + table.insert(box_cfg.replication, replica.uri) + end + box.cfg(box_cfg) + if not replica.master then + return + end + local uri = luri.parse(replica.uri) + box.schema.user.create(uri.login, { + password = uri.password, if_not_exists = true, + }) + box.schema.user.grant(uri.login, 'super') + return + end + end + end + assert(false) +end + +local omit_cfg = false +local i = 1 +while arg[i] ~= nil do + local key = arg[i] + i = i + 1 + if key == 'boot_before_cfg' then + boot_like_vshard() + omit_cfg = true + end +end vshard = require('vshard') echo_count = 0 cfg.replication_connect_timeout = 3 cfg.replication_timeout = 0.1 -vshard.storage.cfg(cfg, util.name_to_uuid[NAME]) + +if not omit_cfg then + vshard.storage.cfg(cfg, instance_uuid) +end + function bootstrap_storage(engine) box.once("testapp:schema:1", function() if rawget(_G, 'CHANGE_SPACE_IDS') then diff --git a/test/misc/reconfigure.result b/test/misc/reconfigure.result index be58eca..168be5d 100644 --- a/test/misc/reconfigure.result +++ b/test/misc/reconfigure.result @@ -277,6 +277,14 @@ box.cfg.replication --- - - storage:storage@127.0.0.1:3301 ... +box.cfg.read_only +--- +- false +... +#box.space._bucket:on_replace() +--- +- 1 +... _ = test_run:switch('storage_2_a') --- ... @@ -303,6 +311,15 @@ box.cfg.replication - - storage:storage@127.0.0.1:3303 - storage:storage@127.0.0.1:3304 ... +box.cfg.read_only +--- +- true +... +-- Should be zero on the slave node. Even though earlier the node was a master. +#box.space._bucket:on_replace() +--- +- 0 +... _ = test_run:switch('storage_2_b') --- ... @@ -329,6 +346,14 @@ box.cfg.replication - - storage:storage@127.0.0.1:3303 - storage:storage@127.0.0.1:3304 ... +box.cfg.read_only +--- +- false +... +#box.space._bucket:on_replace() +--- +- 1 +... _ = test_run:switch('storage_3_a') --- ... @@ -354,6 +379,14 @@ box.cfg.replication --- - - storage:storage@127.0.0.1:3306 ... +box.cfg.read_only +--- +- false +... +#box.space._bucket:on_replace() +--- +- 1 +... _ = test_run:switch('router_1') --- ... diff --git a/test/misc/reconfigure.test.lua b/test/misc/reconfigure.test.lua index 1b894c8..e891010 100644 --- a/test/misc/reconfigure.test.lua +++ b/test/misc/reconfigure.test.lua @@ -111,6 +111,8 @@ for k,v in pairs(info.replicasets) do table.insert(uris, v.master.uri) end table.sort(uris) uris box.cfg.replication +box.cfg.read_only +#box.space._bucket:on_replace() _ = test_run:switch('storage_2_a') info = vshard.storage.info() @@ -119,6 +121,9 @@ for k,v in pairs(info.replicasets) do table.insert(uris, v.master.uri) end table.sort(uris) uris box.cfg.replication +box.cfg.read_only +-- Should be zero on the slave node. Even though earlier the node was a master. +#box.space._bucket:on_replace() _ = test_run:switch('storage_2_b') info = vshard.storage.info() @@ -127,6 +132,8 @@ for k,v in pairs(info.replicasets) do table.insert(uris, v.master.uri) end table.sort(uris) uris box.cfg.replication +box.cfg.read_only +#box.space._bucket:on_replace() _ = test_run:switch('storage_3_a') info = vshard.storage.info() @@ -135,6 +142,8 @@ for k,v in pairs(info.replicasets) do table.insert(uris, v.master.uri) end table.sort(uris) uris box.cfg.replication +box.cfg.read_only +#box.space._bucket:on_replace() _ = test_run:switch('router_1') info = vshard.router.info() diff --git a/test/reload_evolution/storage.result b/test/reload_evolution/storage.result index dde0a1f..4652c4f 100644 --- a/test/reload_evolution/storage.result +++ b/test/reload_evolution/storage.result @@ -86,16 +86,22 @@ package.loaded['vshard.storage'] = nil vshard.storage = require("vshard.storage") --- ... --- Should be nil. Because there was a bug that reload always reapplied the last --- migration callback. When it was fixed, the last callback wasn't called twice. --- But since the callback was only one, now nothing is called, and nothing is --- logged. -test_run:grep_log('storage_2_a', 'vshard.storage.reload_evolution: upgraded to') == nil +test_run:grep_log('storage_2_a', 'vshard.storage.reload_evolution: upgraded to') ~= nil --- - true ... vshard.storage.internal.reload_version --- +- 2 +... +-- +-- gh-237: should be only one trigger. During gh-237 the trigger installation +-- became conditional and therefore required to remember the current trigger +-- somewhere. When reloaded from the old version, the trigger needed to be +-- fetched from _bucket:on_replace(). +-- +#box.space._bucket:on_replace() +--- - 1 ... -- Make sure storage operates well. diff --git a/test/reload_evolution/storage.test.lua b/test/reload_evolution/storage.test.lua index 7858112..06f7117 100644 --- a/test/reload_evolution/storage.test.lua +++ b/test/reload_evolution/storage.test.lua @@ -35,12 +35,16 @@ box.space.test:insert({42, bucket_id_to_move}) package.path = original_package_path package.loaded['vshard.storage'] = nil vshard.storage = require("vshard.storage") --- Should be nil. Because there was a bug that reload always reapplied the last --- migration callback. When it was fixed, the last callback wasn't called twice. --- But since the callback was only one, now nothing is called, and nothing is --- logged. -test_run:grep_log('storage_2_a', 'vshard.storage.reload_evolution: upgraded to') == nil +test_run:grep_log('storage_2_a', 'vshard.storage.reload_evolution: upgraded to') ~= nil vshard.storage.internal.reload_version +-- +-- gh-237: should be only one trigger. During gh-237 the trigger installation +-- became conditional and therefore required to remember the current trigger +-- somewhere. When reloaded from the old version, the trigger needed to be +-- fetched from _bucket:on_replace(). +-- +#box.space._bucket:on_replace() + -- Make sure storage operates well. vshard.storage.bucket_force_drop(2000) vshard.storage.bucket_force_create(2000) diff --git a/test/router/boot_replica_first.result b/test/router/boot_replica_first.result new file mode 100644 index 0000000..1705230 --- /dev/null +++ b/test/router/boot_replica_first.result @@ -0,0 +1,112 @@ +-- test-run result file version 2 +test_run = require('test_run').new() + | --- + | ... +REPLICASET_1 = { 'box_1_a', 'box_1_b', 'box_1_c' } + | --- + | ... +test_run:create_cluster(REPLICASET_1, 'router', {args = 'boot_before_cfg'}) + | --- + | ... +util = require('util') + | --- + | ... +util.wait_master(test_run, REPLICASET_1, 'box_1_a') + | --- + | ... +_ = test_run:cmd("create server router with script='router/router_2.lua'") + | --- + | ... +_ = test_run:cmd("start server router") + | --- + | ... + +-- +-- gh-237: replica should be able to boot before master. Before the issue was +-- fixed, replica always tried to install a trigger on _bucket space even when +-- it was not created on a master yet - that lead to an exception in +-- storage.cfg. Now it should not install the trigger at all, because anyway it +-- is not needed on replica for anything. +-- + +test_run:switch('box_1_b') + | --- + | - true + | ... +vshard.storage.cfg(cfg, instance_uuid) + | --- + | ... +-- _bucket is not created yet. Will fail. +util.check_error(vshard.storage.call, 1, 'read', 'echo', {100}) + | --- + | - attempt to index field '_bucket' (a nil value) + | ... + +test_run:switch('default') + | --- + | - true + | ... +util.map_evals(test_run, {REPLICASET_1}, 'bootstrap_storage(\'memtx\')') + | --- + | ... + +test_run:switch('box_1_a') + | --- + | - true + | ... +vshard.storage.cfg(cfg, instance_uuid) + | --- + | ... + +test_run:switch('box_1_b') + | --- + | - true + | ... +test_run:wait_lsn('box_1_b', 'box_1_a') + | --- + | ... +-- Fails, but gracefully. Because the bucket is not found here. +vshard.storage.call(1, 'read', 'echo', {100}) + | --- + | - null + | - bucket_id: 1 + | reason: Not found + | code: 1 + | type: ShardingError + | message: 'Cannot perform action with bucket 1, reason: Not found' + | name: WRONG_BUCKET + | ... +-- Should not have triggers. +#box.space._bucket:on_replace() + | --- + | - 0 + | ... + +test_run:switch('router') + | --- + | - true + | ... +vshard.router.bootstrap() + | --- + | - true + | ... +vshard.router.callro(1, 'echo', {100}) + | --- + | - 100 + | ... + +test_run:switch("default") + | --- + | - true + | ... +test_run:cmd('stop server router') + | --- + | - true + | ... +test_run:cmd('delete server router') + | --- + | - true + | ... +test_run:drop_cluster(REPLICASET_1) + | --- + | ... diff --git a/test/router/boot_replica_first.test.lua b/test/router/boot_replica_first.test.lua new file mode 100644 index 0000000..7b1b3fd --- /dev/null +++ b/test/router/boot_replica_first.test.lua @@ -0,0 +1,42 @@ +test_run = require('test_run').new() +REPLICASET_1 = { 'box_1_a', 'box_1_b', 'box_1_c' } +test_run:create_cluster(REPLICASET_1, 'router', {args = 'boot_before_cfg'}) +util = require('util') +util.wait_master(test_run, REPLICASET_1, 'box_1_a') +_ = test_run:cmd("create server router with script='router/router_2.lua'") +_ = test_run:cmd("start server router") + +-- +-- gh-237: replica should be able to boot before master. Before the issue was +-- fixed, replica always tried to install a trigger on _bucket space even when +-- it was not created on a master yet - that lead to an exception in +-- storage.cfg. Now it should not install the trigger at all, because anyway it +-- is not needed on replica for anything. +-- + +test_run:switch('box_1_b') +vshard.storage.cfg(cfg, instance_uuid) +-- _bucket is not created yet. Will fail. +util.check_error(vshard.storage.call, 1, 'read', 'echo', {100}) + +test_run:switch('default') +util.map_evals(test_run, {REPLICASET_1}, 'bootstrap_storage(\'memtx\')') + +test_run:switch('box_1_a') +vshard.storage.cfg(cfg, instance_uuid) + +test_run:switch('box_1_b') +test_run:wait_lsn('box_1_b', 'box_1_a') +-- Fails, but gracefully. Because the bucket is not found here. +vshard.storage.call(1, 'read', 'echo', {100}) +-- Should not have triggers. +#box.space._bucket:on_replace() + +test_run:switch('router') +vshard.router.bootstrap() +vshard.router.callro(1, 'echo', {100}) + +test_run:switch("default") +test_run:cmd('stop server router') +test_run:cmd('delete server router') +test_run:drop_cluster(REPLICASET_1) diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua index c6a78fe..5464824 100644 --- a/vshard/storage/init.lua +++ b/vshard/storage/init.lua @@ -94,6 +94,17 @@ if not M then -- detect that _bucket was not changed between yields. -- bucket_generation = 0, + -- + -- Reference to the function used as on_replace trigger on + -- _bucket space. It is used to replace the trigger with + -- a new function when reload happens. It is kept + -- explicitly because the old function is deleted on + -- reload from the global namespace. On the other hand, it + -- is still stored in _bucket:on_replace() somewhere, but + -- it is not known where. The only 100% way to be able to + -- replace the old function is to keep its reference. + -- + bucket_on_replace = nil, ------------------- Garbage collection ------------------- -- Fiber to remove garbage buckets data. @@ -2435,8 +2446,14 @@ local function storage_cfg(cfg, this_replica_uuid, is_reload) local uri = luri.parse(this_replica.uri) schema_upgrade(is_master, uri.login, uri.password) - local old_trigger = box.space._bucket:on_replace()[1] - box.space._bucket:on_replace(bucket_generation_increment, old_trigger) + if M.bucket_on_replace then + box.space._bucket:on_replace(nil, M.bucket_on_replace) + M.bucket_on_replace = nil + end + if is_master then + box.space._bucket:on_replace(bucket_generation_increment) + M.bucket_on_replace = bucket_generation_increment + end lreplicaset.rebind_replicasets(new_replicasets, M.replicasets) lreplicaset.outdate_replicasets(M.replicasets) diff --git a/vshard/storage/reload_evolution.lua b/vshard/storage/reload_evolution.lua index 1abc9e2..f38af74 100644 --- a/vshard/storage/reload_evolution.lua +++ b/vshard/storage/reload_evolution.lua @@ -17,6 +17,14 @@ migrations[#migrations + 1] = function(M) -- Code to update Lua objects. end +migrations[#migrations + 1] = function(M) + local bucket = box.space._bucket + if bucket then + assert(M.bucket_on_replace == nil) + M.bucket_on_replace = bucket:on_replace()[1] + end +end + -- -- Perform an update based on a version stored in `M` (internals). -- @param M Old module internals which should be updated. -- 2.21.1 (Apple Git-122.3)