From: Alex Khatskevich <avkhatskevich@tarantool.org> To: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>, tarantool-patches@freelists.org Subject: [tarantool-patches] Re: [PATCH 1/3] Update only vshard part of a cfg on reload Date: Tue, 7 Aug 2018 16:19:57 +0300 [thread overview] Message-ID: <1d95c864-6ec2-d9ee-a851-1ea1020a47ae@tarantool.org> (raw) In-Reply-To: <1457a321-3ed1-ed05-74c2-e4f8508a0156@tarantool.org> On 06.08.2018 20:03, Vladislav Shpilevoy wrote: > Thanks for the patch! See 3 comments below. > >> diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua >> index 102b942..40216ea 100644 >> --- a/vshard/storage/init.lua >> +++ b/vshard/storage/init.lua >> @@ -1553,18 +1553,19 @@ local function storage_cfg(cfg, >> this_replica_uuid) >> -- >> -- If a master role of the replica is not changed, then >> -- 'read_only' can be set right here. >> - cfg.listen = cfg.listen or this_replica.uri >> - if cfg.replication == nil and this_replicaset.master and not >> is_master then >> - cfg.replication = {this_replicaset.master.uri} >> + box_cfg.listen = box_cfg.listen or this_replica.uri >> + if box_cfg.replication == nil and this_replicaset.master >> + and not is_master then >> + box_cfg.replication = {this_replicaset.master.uri} >> else >> - cfg.replication = {} >> + box_cfg.replication = {} >> end >> if was_master == is_master then >> - cfg.read_only = not is_master >> + box_cfg.read_only = not is_master >> end >> if type(box.cfg) == 'function' then >> - cfg.instance_uuid = this_replica.uuid >> - cfg.replicaset_uuid = this_replicaset.uuid >> + box_cfg.instance_uuid = this_replica.uuid >> + box_cfg.replicaset_uuid = this_replicaset.uuid > > 1. All these box_cfg manipulations should be done under 'if not > is_reload' > I think. Fixed. > >> else >> local info = box.info >> if this_replica_uuid ~= info.uuid then >> @@ -1607,27 +1610,27 @@ local function storage_cfg(cfg, >> this_replica_uuid) >> local_on_master_enable_prepare() >> end >> >> - local box_cfg = table.copy(cfg) >> - lcfg.remove_non_box_options(box_cfg) >> - local ok, err = pcall(box.cfg, box_cfg) >> - while M.errinj.ERRINJ_CFG_DELAY do >> - lfiber.sleep(0.01) >> - end >> - if not ok then >> - M.sync_timeout = old_sync_timeout >> - if was_master and not is_master then >> - local_on_master_disable_abort() >> + if not is_reload then >> + local ok, err = true, nil >> + ok, err = pcall(box.cfg, box_cfg) > > 2. Why do you need to announce 'local ok, err' before > their usage on the next line? fixed. > > >> + while M.errinj.ERRINJ_CFG_DELAY do >> + lfiber.sleep(0.01) >> end >> - if not was_master and is_master then >> - local_on_master_enable_abort() >> + if not ok then >> + M.sync_timeout = old_sync_timeout >> + if was_master and not is_master then >> + local_on_master_disable_abort() >> + end >> + if not was_master and is_master then >> + local_on_master_enable_abort() >> + end >> + error(err) >> end >> - error(err) >> + log.info("Box has been configured") >> + local uri = luri.parse(this_replica.uri) >> + box.once("vshard:storage:1", storage_schema_v1, uri.login, >> uri.password) >> end >> >> - log.info("Box has been configured") >> - local uri = luri.parse(this_replica.uri) >> - box.once("vshard:storage:1", storage_schema_v1, uri.login, >> uri.password) >> - >> lreplicaset.rebind_replicasets(new_replicasets, M.replicasets) >> lreplicaset.outdate_replicasets(M.replicasets) >> M.replicasets = new_replicasets >> @@ -1874,7 +1877,7 @@ if not rawget(_G, MODULE_INTERNALS) then >> rawset(_G, MODULE_INTERNALS, M) >> else >> reload_evolution.upgrade(M) >> - storage_cfg(M.current_cfg, M.this_replica.uuid) >> + storage_cfg(M.current_cfg, M.this_replica.uuid, true) > > 3. I see that you have stored vshard_cfg in M.current_cfg. Not a full > config. So it does not have any box options. And it causes a question > - why do you need to separate reload from non-reload, if reload anyway > in such implementation is like 'box.cfg{}' call with no parameters? > And if you do not store box_cfg options how are you going to compare > configs when we will implement atomic cfg over cluster? Fixed. And in a router too. Full diff: commit d3c35612130ff95b20245993ab5053981d3b985f Author: AKhatskevich <avkhatskevich@tarantool.org> Date: Mon Jul 23 16:42:22 2018 +0300 Update only vshard part of a cfg on reload Box cfg could have been changed by a user and then overridden by an old vshard config on reload. Since that commit, box part of a config is applied only when it is explicitly passed to a `cfg` method. This change is important for the multiple routers feature. diff --git a/vshard/cfg.lua b/vshard/cfg.lua index 7c9ab77..af1c3ee 100644 --- a/vshard/cfg.lua +++ b/vshard/cfg.lua @@ -221,6 +221,22 @@ local cfg_template = { }, } +-- +-- Split it into vshard_cfg and box_cfg parts. +-- +local function cfg_split(cfg) + local vshard_cfg = {} + local box_cfg = {} + for k, v in pairs(cfg) do + if cfg_template[k] then + vshard_cfg[k] = v + else + box_cfg[k] = v + end + end + return vshard_cfg, box_cfg +end + -- -- Names of options which cannot be changed during reconfigure. -- @@ -252,24 +268,7 @@ local function cfg_check(shard_cfg, old_cfg) return shard_cfg end --- --- Nullify non-box options. --- -local function remove_non_box_options(cfg) - cfg.sharding = nil - cfg.weights = nil - cfg.zone = nil - cfg.bucket_count = nil - cfg.rebalancer_disbalance_threshold = nil - cfg.rebalancer_max_receiving = nil - cfg.shard_index = nil - cfg.collect_bucket_garbage_interval = nil - cfg.collect_lua_garbage = nil - cfg.sync_timeout = nil - cfg.connection_outdate_delay = nil -end - return { check = cfg_check, - remove_non_box_options = remove_non_box_options, + split = cfg_split, } diff --git a/vshard/router/init.lua b/vshard/router/init.lua index d8c026b..1e8d898 100644 --- a/vshard/router/init.lua +++ b/vshard/router/init.lua @@ -491,19 +491,17 @@ end -- Configuration -------------------------------------------------------------------------------- -local function router_cfg(cfg) +local function router_cfg(cfg, is_reload) cfg = lcfg.check(cfg, M.current_cfg) - local new_cfg = table.copy(cfg) + local vshard_cfg, box_cfg = lcfg.split(cfg) if not M.replicasets then log.info('Starting router configuration') else log.info('Starting router reconfiguration') end - local new_replicasets = lreplicaset.buildall(cfg) - local total_bucket_count = cfg.bucket_count - local collect_lua_garbage = cfg.collect_lua_garbage - local box_cfg = table.copy(cfg) - lcfg.remove_non_box_options(box_cfg) + local new_replicasets = lreplicaset.buildall(vshard_cfg) + local total_bucket_count = vshard_cfg.bucket_count + local collect_lua_garbage = vshard_cfg.collect_lua_garbage log.info("Calling box.cfg()...") for k, v in pairs(box_cfg) do log.info({[k] = v}) @@ -514,8 +512,10 @@ local function router_cfg(cfg) if M.errinj.ERRINJ_CFG then error('Error injection: cfg') end - box.cfg(box_cfg) - log.info("Box has been configured") + if not is_reload then + box.cfg(box_cfg) + log.info("Box has been configured") + end -- Move connections from an old configuration to a new one. -- It must be done with no yields to prevent usage both of not -- fully moved old replicasets, and not fully built new ones. @@ -526,8 +526,9 @@ local function router_cfg(cfg) replicaset:connect_all() end lreplicaset.wait_masters_connect(new_replicasets) - lreplicaset.outdate_replicasets(M.replicasets, cfg.connection_outdate_delay) - M.connection_outdate_delay = cfg.connection_outdate_delay + lreplicaset.outdate_replicasets(M.replicasets, + vshard_cfg.connection_outdate_delay) + M.connection_outdate_delay = vshard_cfg.connection_outdate_delay M.total_bucket_count = total_bucket_count M.collect_lua_garbage = collect_lua_garbage M.current_cfg = cfg @@ -817,7 +818,7 @@ end if not rawget(_G, MODULE_INTERNALS) then rawset(_G, MODULE_INTERNALS, M) else - router_cfg(M.current_cfg) + router_cfg(M.current_cfg, true) M.module_version = M.module_version + 1 end @@ -825,7 +826,7 @@ M.discovery_f = discovery_f M.failover_f = failover_f return { - cfg = router_cfg; + cfg = function(cfg) return router_cfg(cfg, false) end; info = router_info; buckets_info = router_buckets_info; call = router_call; diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua index 1f29323..2080769 100644 --- a/vshard/storage/init.lua +++ b/vshard/storage/init.lua @@ -1500,13 +1500,14 @@ end -------------------------------------------------------------------------------- -- Configuration -------------------------------------------------------------------------------- -local function storage_cfg(cfg, this_replica_uuid) + +local function storage_cfg(cfg, this_replica_uuid, is_reload) if this_replica_uuid == nil then error('Usage: cfg(configuration, this_replica_uuid)') end cfg = lcfg.check(cfg, M.current_cfg) - local new_cfg = table.copy(cfg) - if cfg.weights or cfg.zone then + local vshard_cfg, box_cfg = lcfg.split(cfg) + if vshard_cfg.weights or vshard_cfg.zone then error('Weights and zone are not allowed for storage configuration') end if M.replicasets then @@ -1520,7 +1521,7 @@ local function storage_cfg(cfg, this_replica_uuid) local this_replicaset local this_replica - local new_replicasets = lreplicaset.buildall(cfg) + local new_replicasets = lreplicaset.buildall(vshard_cfg) local min_master for rs_uuid, rs in pairs(new_replicasets) do for replica_uuid, replica in pairs(rs.replicas) do @@ -1544,46 +1545,14 @@ local function storage_cfg(cfg, this_replica_uuid) log.info('I am master') end - -- Do not change 'read_only' option here - if a master is - -- disabled and there are triggers on master disable, then - -- they would not be able to modify anything, if 'read_only' - -- had been set here. 'Read_only' is set in - -- local_on_master_disable after triggers and is unset in - -- local_on_master_enable before triggers. - -- - -- If a master role of the replica is not changed, then - -- 'read_only' can be set right here. - cfg.listen = cfg.listen or this_replica.uri - if cfg.replication == nil and this_replicaset.master and not is_master then - cfg.replication = {this_replicaset.master.uri} - else - cfg.replication = {} - end - if was_master == is_master then - cfg.read_only = not is_master - end - if type(box.cfg) == 'function' then - cfg.instance_uuid = this_replica.uuid - cfg.replicaset_uuid = this_replicaset.uuid - else - local info = box.info - if this_replica_uuid ~= info.uuid then - error(string.format('Instance UUID mismatch: already set "%s" '.. - 'but "%s" in arguments', info.uuid, - this_replica_uuid)) - end - if this_replicaset.uuid ~= info.cluster.uuid then - error(string.format('Replicaset UUID mismatch: already set "%s" '.. - 'but "%s" in vshard config', info.cluster.uuid, - this_replicaset.uuid)) - end - end - local total_bucket_count = cfg.bucket_count - local rebalancer_disbalance_threshold = cfg.rebalancer_disbalance_threshold - local rebalancer_max_receiving = cfg.rebalancer_max_receiving - local shard_index = cfg.shard_index - local collect_bucket_garbage_interval = cfg.collect_bucket_garbage_interval - local collect_lua_garbage = cfg.collect_lua_garbage + local total_bucket_count = vshard_cfg.bucket_count + local rebalancer_disbalance_threshold = + vshard_cfg.rebalancer_disbalance_threshold + local rebalancer_max_receiving = vshard_cfg.rebalancer_max_receiving + local shard_index = vshard_cfg.shard_index + local collect_bucket_garbage_interval = + vshard_cfg.collect_bucket_garbage_interval + local collect_lua_garbage = vshard_cfg.collect_lua_garbage -- It is considered that all possible errors during cfg -- process occur only before this place. @@ -1598,7 +1567,7 @@ local function storage_cfg(cfg, this_replica_uuid) -- a new sync timeout. -- local old_sync_timeout = M.sync_timeout - M.sync_timeout = cfg.sync_timeout + M.sync_timeout = vshard_cfg.sync_timeout if was_master and not is_master then local_on_master_disable_prepare() @@ -1607,27 +1576,61 @@ local function storage_cfg(cfg, this_replica_uuid) local_on_master_enable_prepare() end - local box_cfg = table.copy(cfg) - lcfg.remove_non_box_options(box_cfg) - local ok, err = pcall(box.cfg, box_cfg) - while M.errinj.ERRINJ_CFG_DELAY do - lfiber.sleep(0.01) - end - if not ok then - M.sync_timeout = old_sync_timeout - if was_master and not is_master then - local_on_master_disable_abort() + if not is_reload then + -- Do not change 'read_only' option here - if a master is + -- disabled and there are triggers on master disable, then + -- they would not be able to modify anything, if 'read_only' + -- had been set here. 'Read_only' is set in + -- local_on_master_disable after triggers and is unset in + -- local_on_master_enable before triggers. + -- + -- If a master role of the replica is not changed, then + -- 'read_only' can be set right here. + box_cfg.listen = box_cfg.listen or this_replica.uri + if box_cfg.replication == nil and this_replicaset.master + and not is_master then + box_cfg.replication = {this_replicaset.master.uri} + else + box_cfg.replication = {} end - if not was_master and is_master then - local_on_master_enable_abort() + if was_master == is_master then + box_cfg.read_only = not is_master end - error(err) + if type(box.cfg) == 'function' then + box_cfg.instance_uuid = this_replica.uuid + box_cfg.replicaset_uuid = this_replicaset.uuid + else + local info = box.info + if this_replica_uuid ~= info.uuid then + error(string.format('Instance UUID mismatch: already set ' .. + '"%s" but "%s" in arguments', info.uuid, + this_replica_uuid)) + end + if this_replicaset.uuid ~= info.cluster.uuid then + error(string.format('Replicaset UUID mismatch: already set ' .. + '"%s" but "%s" in vshard config', + info.cluster.uuid, this_replicaset.uuid)) + end + end + local ok, err = pcall(box.cfg, box_cfg) + while M.errinj.ERRINJ_CFG_DELAY do + lfiber.sleep(0.01) + end + if not ok then + M.sync_timeout = old_sync_timeout + if was_master and not is_master then + local_on_master_disable_abort() + end + if not was_master and is_master then + local_on_master_enable_abort() + end + error(err) + end + log.info("Box has been configured") + local uri = luri.parse(this_replica.uri) + box.once("vshard:storage:1", storage_schema_v1, uri.login, uri.password) end - log.info("Box has been configured") - local uri = luri.parse(this_replica.uri) - box.once("vshard:storage:1", storage_schema_v1, uri.login, uri.password) - lreplicaset.rebind_replicasets(new_replicasets, M.replicasets) lreplicaset.outdate_replicasets(M.replicasets) M.replicasets = new_replicasets @@ -1639,7 +1642,7 @@ local function storage_cfg(cfg, this_replica_uuid) M.shard_index = shard_index M.collect_bucket_garbage_interval = collect_bucket_garbage_interval M.collect_lua_garbage = collect_lua_garbage - M.current_cfg = new_cfg + M.current_cfg = cfg if was_master and not is_master then local_on_master_disable() @@ -1875,7 +1878,7 @@ if not rawget(_G, MODULE_INTERNALS) then rawset(_G, MODULE_INTERNALS, M) else reload_evolution.upgrade(M) - storage_cfg(M.current_cfg, M.this_replica.uuid) + storage_cfg(M.current_cfg, M.this_replica.uuid, true) M.module_version = M.module_version + 1 end @@ -1914,7 +1917,7 @@ return { rebalancing_is_in_progress = rebalancing_is_in_progress, recovery_wakeup = recovery_wakeup, call = storage_call, - cfg = storage_cfg, + cfg = function(cfg, uuid) return storage_cfg(cfg, uuid, false) end, info = storage_info, buckets_info = storage_buckets_info, buckets_count = storage_buckets_count,
next prev parent reply other threads:[~2018-08-07 13:20 UTC|newest] Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-07-31 16:25 [tarantool-patches] [PATCH 0/3] multiple routers AKhatskevich 2018-07-31 16:25 ` [tarantool-patches] [PATCH 1/3] Update only vshard part of a cfg on reload AKhatskevich 2018-08-01 18:43 ` [tarantool-patches] " Vladislav Shpilevoy 2018-08-03 20:03 ` Alex Khatskevich 2018-08-06 17:03 ` Vladislav Shpilevoy 2018-08-07 13:19 ` Alex Khatskevich [this message] 2018-08-08 11:17 ` Vladislav Shpilevoy 2018-07-31 16:25 ` [tarantool-patches] [PATCH 2/3] Move lua gc to a dedicated module AKhatskevich 2018-08-01 18:43 ` [tarantool-patches] " Vladislav Shpilevoy 2018-08-03 20:04 ` Alex Khatskevich 2018-08-06 17:03 ` Vladislav Shpilevoy 2018-08-08 11:17 ` Vladislav Shpilevoy 2018-07-31 16:25 ` [tarantool-patches] [PATCH 3/3] Introduce multiple routers feature AKhatskevich 2018-08-01 18:43 ` [tarantool-patches] " Vladislav Shpilevoy 2018-08-03 20:05 ` Alex Khatskevich 2018-08-06 17:03 ` Vladislav Shpilevoy 2018-08-07 13:18 ` Alex Khatskevich 2018-08-08 12:28 ` Vladislav Shpilevoy 2018-08-08 14:04 ` Alex Khatskevich 2018-08-08 15:37 ` Vladislav Shpilevoy 2018-08-01 14:30 ` [tarantool-patches] [PATCH] Check self arg passed for router objects AKhatskevich 2018-08-03 20:07 ` [tarantool-patches] [PATCH] Refactor config templates AKhatskevich 2018-08-06 15:49 ` [tarantool-patches] " Vladislav Shpilevoy
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=1d95c864-6ec2-d9ee-a851-1ea1020a47ae@tarantool.org \ --to=avkhatskevich@tarantool.org \ --cc=tarantool-patches@freelists.org \ --cc=v.shpilevoy@tarantool.org \ --subject='[tarantool-patches] Re: [PATCH 1/3] Update only vshard part of a cfg on reload' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox