From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp21.mail.ru (smtp21.mail.ru [94.100.179.250]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 45E60445320 for ; Thu, 6 Aug 2020 09:40:14 +0300 (MSK) From: Oleg Babin References: <0cd777f3990939b32d96bf964dea55e8073e65bb.1596665591.git.v.shpilevoy@tarantool.org> Message-ID: <894ab65e-64f2-e593-4f58-1c239ce3994f@tarantool.org> Date: Thu, 6 Aug 2020 09:40:12 +0300 MIME-Version: 1.0 In-Reply-To: <0cd777f3990939b32d96bf964dea55e8073e65bb.1596665591.git.v.shpilevoy@tarantool.org> Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-Language: en-GB Subject: Re: [Tarantool-patches] [PATCH v2 vshard 2/2] storage: allow replica to boot before master List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Vladislav Shpilevoy , tarantool-patches@dev.tarantool.org, yaroslav.dynnikov@tarantool.org Thanks for your patch. LGTM. On 06/08/2020 01:15, Vladislav Shpilevoy wrote: > Before the patch a replica couldn't call vshard.storage.cfg() > before the master is bootstrapped. That could lead to an exception > in the middle of vshard.storage.cfg(). > > It sounds not so bad and even looks unreachable, because replica > bootstrap anyway is blocked in box.cfg() until a master node is > started, otherwise the replica instance is terminated. > But the box.cfg bootstrap != vshard bootstrap. > > It could be that both replica and master nodes are already > bootstrapped and working, but vshard.storage.cfg() wasn't called > yet. In that case vshard.storage.cfg() on the replica wouldn't > block in box.cfg(), and would happily fail a few lines later in > vshard.storage.cfg() at attempt to touch not existing > box.space._bucket. > > That was the case for cartridge. The framework configures > instances in its own way before vshard.storage.cfg(). Then it > calls vshard.storage.cfg() on them in arbitrary order, and > sometimes replicas were configured earlier than the master. > > The fail is fixed by simply skipping the _bucket's trigger > installation on the replica node. Because so far it is not needed > here for anything. The trigger's sole purpose is to increment > bucket generation used only for DML on _bucket on the master node. > > Now vshard.storage.cfg() on replica is able to finish before > master does the same. However the replica still won't be able to > handle vshard.storage.* requests until receives vshard schema > from master. > > Closes #237 > --- > test/lua_libs/storage_template.lua | 55 +++++++++++- > test/misc/reconfigure.result | 33 +++++++ > test/misc/reconfigure.test.lua | 9 ++ > test/reload_evolution/storage.result | 16 ++-- > test/reload_evolution/storage.test.lua | 14 +-- > test/router/boot_replica_first.result | 112 ++++++++++++++++++++++++ > test/router/boot_replica_first.test.lua | 42 +++++++++ > vshard/storage/init.lua | 21 ++++- > vshard/storage/reload_evolution.lua | 8 ++ > 9 files changed, 297 insertions(+), 13 deletions(-) > create mode 100644 test/router/boot_replica_first.result > create mode 100644 test/router/boot_replica_first.test.lua > > diff --git a/test/lua_libs/storage_template.lua b/test/lua_libs/storage_template.lua > index 29a753d..84e4180 100644 > --- a/test/lua_libs/storage_template.lua > +++ b/test/lua_libs/storage_template.lua > @@ -1,5 +1,7 @@ > #!/usr/bin/env tarantool > > +local luri = require('uri') > + > NAME = require('fio').basename(arg[0], '.lua') > fiber = require('fiber') > test_run = require('test_run').new() > @@ -10,12 +12,63 @@ log = require('log') > if not cfg.shard_index then > cfg.shard_index = 'bucket_id' > end > +instance_uuid = util.name_to_uuid[NAME] > + > +-- > +-- Bootstrap the instance exactly like vshard does. But don't > +-- initialize any vshard-specific code. > +-- > +local function boot_like_vshard() > + assert(type(box.cfg) == 'function') > + for rs_uuid, rs in pairs(cfg.sharding) do > + for replica_uuid, replica in pairs(rs.replicas) do > + if replica_uuid == instance_uuid then > + local box_cfg = {replication = {}} > + box_cfg.instance_uuid = replica_uuid > + box_cfg.replicaset_uuid = rs_uuid > + box_cfg.listen = replica.uri > + box_cfg.read_only = not replica.master > + box_cfg.replication_connect_quorum = 0 > + box_cfg.replication_timeout = 0.1 > + for _, replica in pairs(rs.replicas) do > + table.insert(box_cfg.replication, replica.uri) > + end > + box.cfg(box_cfg) > + if not replica.master then > + return > + end > + local uri = luri.parse(replica.uri) > + box.schema.user.create(uri.login, { > + password = uri.password, if_not_exists = true, > + }) > + box.schema.user.grant(uri.login, 'super') > + return > + end > + end > + end > + assert(false) > +end > + > +local omit_cfg = false > +local i = 1 > +while arg[i] ~= nil do > + local key = arg[i] > + i = i + 1 > + if key == 'boot_before_cfg' then > + boot_like_vshard() > + omit_cfg = true > + end > +end > > vshard = require('vshard') > echo_count = 0 > cfg.replication_connect_timeout = 3 > cfg.replication_timeout = 0.1 > -vshard.storage.cfg(cfg, util.name_to_uuid[NAME]) > + > +if not omit_cfg then > + vshard.storage.cfg(cfg, instance_uuid) > +end > + > function bootstrap_storage(engine) > box.once("testapp:schema:1", function() > if rawget(_G, 'CHANGE_SPACE_IDS') then > diff --git a/test/misc/reconfigure.result b/test/misc/reconfigure.result > index be58eca..168be5d 100644 > --- a/test/misc/reconfigure.result > +++ b/test/misc/reconfigure.result > @@ -277,6 +277,14 @@ box.cfg.replication > --- > - -storage:storage@127.0.0.1:3301 > ... > +box.cfg.read_only > +--- > +- false > +... > +#box.space._bucket:on_replace() > +--- > +- 1 > +... > _ = test_run:switch('storage_2_a') > --- > ... > @@ -303,6 +311,15 @@ box.cfg.replication > - -storage:storage@127.0.0.1:3303 > -storage:storage@127.0.0.1:3304 > ... > +box.cfg.read_only > +--- > +- true > +... > +-- Should be zero on the slave node. Even though earlier the node was a master. > +#box.space._bucket:on_replace() > +--- > +- 0 > +... > _ = test_run:switch('storage_2_b') > --- > ... > @@ -329,6 +346,14 @@ box.cfg.replication > - -storage:storage@127.0.0.1:3303 > -storage:storage@127.0.0.1:3304 > ... > +box.cfg.read_only > +--- > +- false > +... > +#box.space._bucket:on_replace() > +--- > +- 1 > +... > _ = test_run:switch('storage_3_a') > --- > ... > @@ -354,6 +379,14 @@ box.cfg.replication > --- > - -storage:storage@127.0.0.1:3306 > ... > +box.cfg.read_only > +--- > +- false > +... > +#box.space._bucket:on_replace() > +--- > +- 1 > +... > _ = test_run:switch('router_1') > --- > ... > diff --git a/test/misc/reconfigure.test.lua b/test/misc/reconfigure.test.lua > index 1b894c8..e891010 100644 > --- a/test/misc/reconfigure.test.lua > +++ b/test/misc/reconfigure.test.lua > @@ -111,6 +111,8 @@ for k,v in pairs(info.replicasets) do table.insert(uris, v.master.uri) end > table.sort(uris) > uris > box.cfg.replication > +box.cfg.read_only > +#box.space._bucket:on_replace() > > _ = test_run:switch('storage_2_a') > info = vshard.storage.info() > @@ -119,6 +121,9 @@ for k,v in pairs(info.replicasets) do table.insert(uris, v.master.uri) end > table.sort(uris) > uris > box.cfg.replication > +box.cfg.read_only > +-- Should be zero on the slave node. Even though earlier the node was a master. > +#box.space._bucket:on_replace() > > _ = test_run:switch('storage_2_b') > info = vshard.storage.info() > @@ -127,6 +132,8 @@ for k,v in pairs(info.replicasets) do table.insert(uris, v.master.uri) end > table.sort(uris) > uris > box.cfg.replication > +box.cfg.read_only > +#box.space._bucket:on_replace() > > _ = test_run:switch('storage_3_a') > info = vshard.storage.info() > @@ -135,6 +142,8 @@ for k,v in pairs(info.replicasets) do table.insert(uris, v.master.uri) end > table.sort(uris) > uris > box.cfg.replication > +box.cfg.read_only > +#box.space._bucket:on_replace() > > _ = test_run:switch('router_1') > info = vshard.router.info() > diff --git a/test/reload_evolution/storage.result b/test/reload_evolution/storage.result > index dde0a1f..4652c4f 100644 > --- a/test/reload_evolution/storage.result > +++ b/test/reload_evolution/storage.result > @@ -86,16 +86,22 @@ package.loaded['vshard.storage'] = nil > vshard.storage = require("vshard.storage") > --- > ... > --- Should be nil. Because there was a bug that reload always reapplied the last > --- migration callback. When it was fixed, the last callback wasn't called twice. > --- But since the callback was only one, now nothing is called, and nothing is > --- logged. > -test_run:grep_log('storage_2_a', 'vshard.storage.reload_evolution: upgraded to') == nil > +test_run:grep_log('storage_2_a', 'vshard.storage.reload_evolution: upgraded to') ~= nil > --- > - true > ... > vshard.storage.internal.reload_version > --- > +- 2 > +... > +-- > +-- gh-237: should be only one trigger. During gh-237 the trigger installation > +-- became conditional and therefore required to remember the current trigger > +-- somewhere. When reloaded from the old version, the trigger needed to be > +-- fetched from _bucket:on_replace(). > +-- > +#box.space._bucket:on_replace() > +--- > - 1 > ... > -- Make sure storage operates well. > diff --git a/test/reload_evolution/storage.test.lua b/test/reload_evolution/storage.test.lua > index 7858112..06f7117 100644 > --- a/test/reload_evolution/storage.test.lua > +++ b/test/reload_evolution/storage.test.lua > @@ -35,12 +35,16 @@ box.space.test:insert({42, bucket_id_to_move}) > package.path = original_package_path > package.loaded['vshard.storage'] = nil > vshard.storage = require("vshard.storage") > --- Should be nil. Because there was a bug that reload always reapplied the last > --- migration callback. When it was fixed, the last callback wasn't called twice. > --- But since the callback was only one, now nothing is called, and nothing is > --- logged. > -test_run:grep_log('storage_2_a', 'vshard.storage.reload_evolution: upgraded to') == nil > +test_run:grep_log('storage_2_a', 'vshard.storage.reload_evolution: upgraded to') ~= nil > vshard.storage.internal.reload_version > +-- > +-- gh-237: should be only one trigger. During gh-237 the trigger installation > +-- became conditional and therefore required to remember the current trigger > +-- somewhere. When reloaded from the old version, the trigger needed to be > +-- fetched from _bucket:on_replace(). > +-- > +#box.space._bucket:on_replace() > + > -- Make sure storage operates well. > vshard.storage.bucket_force_drop(2000) > vshard.storage.bucket_force_create(2000) > diff --git a/test/router/boot_replica_first.result b/test/router/boot_replica_first.result > new file mode 100644 > index 0000000..1705230 > --- /dev/null > +++ b/test/router/boot_replica_first.result > @@ -0,0 +1,112 @@ > +-- test-run result file version 2 > +test_run = require('test_run').new() > + | --- > + | ... > +REPLICASET_1 = { 'box_1_a', 'box_1_b', 'box_1_c' } > + | --- > + | ... > +test_run:create_cluster(REPLICASET_1, 'router', {args = 'boot_before_cfg'}) > + | --- > + | ... > +util = require('util') > + | --- > + | ... > +util.wait_master(test_run, REPLICASET_1, 'box_1_a') > + | --- > + | ... > +_ = test_run:cmd("create server router with script='router/router_2.lua'") > + | --- > + | ... > +_ = test_run:cmd("start server router") > + | --- > + | ... > + > +-- > +-- gh-237: replica should be able to boot before master. Before the issue was > +-- fixed, replica always tried to install a trigger on _bucket space even when > +-- it was not created on a master yet - that lead to an exception in > +-- storage.cfg. Now it should not install the trigger at all, because anyway it > +-- is not needed on replica for anything. > +-- > + > +test_run:switch('box_1_b') > + | --- > + | - true > + | ... > +vshard.storage.cfg(cfg, instance_uuid) > + | --- > + | ... > +-- _bucket is not created yet. Will fail. > +util.check_error(vshard.storage.call, 1, 'read', 'echo', {100}) > + | --- > + | - attempt to index field '_bucket' (a nil value) > + | ... > + > +test_run:switch('default') > + | --- > + | - true > + | ... > +util.map_evals(test_run, {REPLICASET_1}, 'bootstrap_storage(\'memtx\')') > + | --- > + | ... > + > +test_run:switch('box_1_a') > + | --- > + | - true > + | ... > +vshard.storage.cfg(cfg, instance_uuid) > + | --- > + | ... > + > +test_run:switch('box_1_b') > + | --- > + | - true > + | ... > +test_run:wait_lsn('box_1_b', 'box_1_a') > + | --- > + | ... > +-- Fails, but gracefully. Because the bucket is not found here. > +vshard.storage.call(1, 'read', 'echo', {100}) > + | --- > + | - null > + | - bucket_id: 1 > + | reason: Not found > + | code: 1 > + | type: ShardingError > + | message: 'Cannot perform action with bucket 1, reason: Not found' > + | name: WRONG_BUCKET > + | ... > +-- Should not have triggers. > +#box.space._bucket:on_replace() > + | --- > + | - 0 > + | ... > + > +test_run:switch('router') > + | --- > + | - true > + | ... > +vshard.router.bootstrap() > + | --- > + | - true > + | ... > +vshard.router.callro(1, 'echo', {100}) > + | --- > + | - 100 > + | ... > + > +test_run:switch("default") > + | --- > + | - true > + | ... > +test_run:cmd('stop server router') > + | --- > + | - true > + | ... > +test_run:cmd('delete server router') > + | --- > + | - true > + | ... > +test_run:drop_cluster(REPLICASET_1) > + | --- > + | ... > diff --git a/test/router/boot_replica_first.test.lua b/test/router/boot_replica_first.test.lua > new file mode 100644 > index 0000000..7b1b3fd > --- /dev/null > +++ b/test/router/boot_replica_first.test.lua > @@ -0,0 +1,42 @@ > +test_run = require('test_run').new() > +REPLICASET_1 = { 'box_1_a', 'box_1_b', 'box_1_c' } > +test_run:create_cluster(REPLICASET_1, 'router', {args = 'boot_before_cfg'}) > +util = require('util') > +util.wait_master(test_run, REPLICASET_1, 'box_1_a') > +_ = test_run:cmd("create server router with script='router/router_2.lua'") > +_ = test_run:cmd("start server router") > + > +-- > +-- gh-237: replica should be able to boot before master. Before the issue was > +-- fixed, replica always tried to install a trigger on _bucket space even when > +-- it was not created on a master yet - that lead to an exception in > +-- storage.cfg. Now it should not install the trigger at all, because anyway it > +-- is not needed on replica for anything. > +-- > + > +test_run:switch('box_1_b') > +vshard.storage.cfg(cfg, instance_uuid) > +-- _bucket is not created yet. Will fail. > +util.check_error(vshard.storage.call, 1, 'read', 'echo', {100}) > + > +test_run:switch('default') > +util.map_evals(test_run, {REPLICASET_1}, 'bootstrap_storage(\'memtx\')') > + > +test_run:switch('box_1_a') > +vshard.storage.cfg(cfg, instance_uuid) > + > +test_run:switch('box_1_b') > +test_run:wait_lsn('box_1_b', 'box_1_a') > +-- Fails, but gracefully. Because the bucket is not found here. > +vshard.storage.call(1, 'read', 'echo', {100}) > +-- Should not have triggers. > +#box.space._bucket:on_replace() > + > +test_run:switch('router') > +vshard.router.bootstrap() > +vshard.router.callro(1, 'echo', {100}) > + > +test_run:switch("default") > +test_run:cmd('stop server router') > +test_run:cmd('delete server router') > +test_run:drop_cluster(REPLICASET_1) > diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua > index c6a78fe..5464824 100644 > --- a/vshard/storage/init.lua > +++ b/vshard/storage/init.lua > @@ -94,6 +94,17 @@ if not M then > -- detect that _bucket was not changed between yields. > -- > bucket_generation = 0, > + -- > + -- Reference to the function used as on_replace trigger on > + -- _bucket space. It is used to replace the trigger with > + -- a new function when reload happens. It is kept > + -- explicitly because the old function is deleted on > + -- reload from the global namespace. On the other hand, it > + -- is still stored in _bucket:on_replace() somewhere, but > + -- it is not known where. The only 100% way to be able to > + -- replace the old function is to keep its reference. > + -- > + bucket_on_replace = nil, > > ------------------- Garbage collection ------------------- > -- Fiber to remove garbage buckets data. > @@ -2435,8 +2446,14 @@ local function storage_cfg(cfg, this_replica_uuid, is_reload) > local uri = luri.parse(this_replica.uri) > schema_upgrade(is_master, uri.login, uri.password) > > - local old_trigger = box.space._bucket:on_replace()[1] > - box.space._bucket:on_replace(bucket_generation_increment, old_trigger) > + if M.bucket_on_replace then > + box.space._bucket:on_replace(nil, M.bucket_on_replace) > + M.bucket_on_replace = nil > + end > + if is_master then > + box.space._bucket:on_replace(bucket_generation_increment) > + M.bucket_on_replace = bucket_generation_increment > + end > > lreplicaset.rebind_replicasets(new_replicasets, M.replicasets) > lreplicaset.outdate_replicasets(M.replicasets) > diff --git a/vshard/storage/reload_evolution.lua b/vshard/storage/reload_evolution.lua > index 1abc9e2..f38af74 100644 > --- a/vshard/storage/reload_evolution.lua > +++ b/vshard/storage/reload_evolution.lua > @@ -17,6 +17,14 @@ migrations[#migrations + 1] = function(M) > -- Code to update Lua objects. > end > > +migrations[#migrations + 1] = function(M) > + local bucket = box.space._bucket > + if bucket then > + assert(M.bucket_on_replace == nil) > + M.bucket_on_replace = bucket:on_replace()[1] > + end > +end > + > -- > -- Perform an update based on a version stored in `M` (internals). > -- @param M Old module internals which should be updated.