[tarantool-patches] Re: [PATCH 1/3] Update only vshard part of a cfg on reload
Alex Khatskevich
avkhatskevich at tarantool.org
Tue Aug 7 16:19:57 MSK 2018
On 06.08.2018 20:03, Vladislav Shpilevoy wrote:
> Thanks for the patch! See 3 comments below.
>
>> diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
>> index 102b942..40216ea 100644
>> --- a/vshard/storage/init.lua
>> +++ b/vshard/storage/init.lua
>> @@ -1553,18 +1553,19 @@ local function storage_cfg(cfg,
>> this_replica_uuid)
>> --
>> -- If a master role of the replica is not changed, then
>> -- 'read_only' can be set right here.
>> - cfg.listen = cfg.listen or this_replica.uri
>> - if cfg.replication == nil and this_replicaset.master and not
>> is_master then
>> - cfg.replication = {this_replicaset.master.uri}
>> + box_cfg.listen = box_cfg.listen or this_replica.uri
>> + if box_cfg.replication == nil and this_replicaset.master
>> + and not is_master then
>> + box_cfg.replication = {this_replicaset.master.uri}
>> else
>> - cfg.replication = {}
>> + box_cfg.replication = {}
>> end
>> if was_master == is_master then
>> - cfg.read_only = not is_master
>> + box_cfg.read_only = not is_master
>> end
>> if type(box.cfg) == 'function' then
>> - cfg.instance_uuid = this_replica.uuid
>> - cfg.replicaset_uuid = this_replicaset.uuid
>> + box_cfg.instance_uuid = this_replica.uuid
>> + box_cfg.replicaset_uuid = this_replicaset.uuid
>
> 1. All these box_cfg manipulations should be done under 'if not
> is_reload'
> I think.
Fixed.
>
>> else
>> local info = box.info
>> if this_replica_uuid ~= info.uuid then
>> @@ -1607,27 +1610,27 @@ local function storage_cfg(cfg,
>> this_replica_uuid)
>> local_on_master_enable_prepare()
>> end
>>
>> - local box_cfg = table.copy(cfg)
>> - lcfg.remove_non_box_options(box_cfg)
>> - local ok, err = pcall(box.cfg, box_cfg)
>> - while M.errinj.ERRINJ_CFG_DELAY do
>> - lfiber.sleep(0.01)
>> - end
>> - if not ok then
>> - M.sync_timeout = old_sync_timeout
>> - if was_master and not is_master then
>> - local_on_master_disable_abort()
>> + if not is_reload then
>> + local ok, err = true, nil
>> + ok, err = pcall(box.cfg, box_cfg)
>
> 2. Why do you need to announce 'local ok, err' before
> their usage on the next line?
fixed.
>
>
>> + while M.errinj.ERRINJ_CFG_DELAY do
>> + lfiber.sleep(0.01)
>> end
>> - if not was_master and is_master then
>> - local_on_master_enable_abort()
>> + if not ok then
>> + M.sync_timeout = old_sync_timeout
>> + if was_master and not is_master then
>> + local_on_master_disable_abort()
>> + end
>> + if not was_master and is_master then
>> + local_on_master_enable_abort()
>> + end
>> + error(err)
>> end
>> - error(err)
>> + log.info("Box has been configured")
>> + local uri = luri.parse(this_replica.uri)
>> + box.once("vshard:storage:1", storage_schema_v1, uri.login,
>> uri.password)
>> end
>>
>> - log.info("Box has been configured")
>> - local uri = luri.parse(this_replica.uri)
>> - box.once("vshard:storage:1", storage_schema_v1, uri.login,
>> uri.password)
>> -
>> lreplicaset.rebind_replicasets(new_replicasets, M.replicasets)
>> lreplicaset.outdate_replicasets(M.replicasets)
>> M.replicasets = new_replicasets
>> @@ -1874,7 +1877,7 @@ if not rawget(_G, MODULE_INTERNALS) then
>> rawset(_G, MODULE_INTERNALS, M)
>> else
>> reload_evolution.upgrade(M)
>> - storage_cfg(M.current_cfg, M.this_replica.uuid)
>> + storage_cfg(M.current_cfg, M.this_replica.uuid, true)
>
> 3. I see that you have stored vshard_cfg in M.current_cfg. Not a full
> config. So it does not have any box options. And it causes a question
> - why do you need to separate reload from non-reload, if reload anyway
> in such implementation is like 'box.cfg{}' call with no parameters?
> And if you do not store box_cfg options how are you going to compare
> configs when we will implement atomic cfg over cluster?
Fixed. And in a router too.
Full diff:
commit d3c35612130ff95b20245993ab5053981d3b985f
Author: AKhatskevich <avkhatskevich at tarantool.org>
Date: Mon Jul 23 16:42:22 2018 +0300
Update only vshard part of a cfg on reload
Box cfg could have been changed by a user and then overridden by
an old vshard config on reload.
Since that commit, box part of a config is applied only when
it is explicitly passed to a `cfg` method.
This change is important for the multiple routers feature.
diff --git a/vshard/cfg.lua b/vshard/cfg.lua
index 7c9ab77..af1c3ee 100644
--- a/vshard/cfg.lua
+++ b/vshard/cfg.lua
@@ -221,6 +221,22 @@ local cfg_template = {
},
}
+--
+-- Split it into vshard_cfg and box_cfg parts.
+--
+local function cfg_split(cfg)
+ local vshard_cfg = {}
+ local box_cfg = {}
+ for k, v in pairs(cfg) do
+ if cfg_template[k] then
+ vshard_cfg[k] = v
+ else
+ box_cfg[k] = v
+ end
+ end
+ return vshard_cfg, box_cfg
+end
+
--
-- Names of options which cannot be changed during reconfigure.
--
@@ -252,24 +268,7 @@ local function cfg_check(shard_cfg, old_cfg)
return shard_cfg
end
---
--- Nullify non-box options.
---
-local function remove_non_box_options(cfg)
- cfg.sharding = nil
- cfg.weights = nil
- cfg.zone = nil
- cfg.bucket_count = nil
- cfg.rebalancer_disbalance_threshold = nil
- cfg.rebalancer_max_receiving = nil
- cfg.shard_index = nil
- cfg.collect_bucket_garbage_interval = nil
- cfg.collect_lua_garbage = nil
- cfg.sync_timeout = nil
- cfg.connection_outdate_delay = nil
-end
-
return {
check = cfg_check,
- remove_non_box_options = remove_non_box_options,
+ split = cfg_split,
}
diff --git a/vshard/router/init.lua b/vshard/router/init.lua
index d8c026b..1e8d898 100644
--- a/vshard/router/init.lua
+++ b/vshard/router/init.lua
@@ -491,19 +491,17 @@ end
-- Configuration
--------------------------------------------------------------------------------
-local function router_cfg(cfg)
+local function router_cfg(cfg, is_reload)
cfg = lcfg.check(cfg, M.current_cfg)
- local new_cfg = table.copy(cfg)
+ local vshard_cfg, box_cfg = lcfg.split(cfg)
if not M.replicasets then
log.info('Starting router configuration')
else
log.info('Starting router reconfiguration')
end
- local new_replicasets = lreplicaset.buildall(cfg)
- local total_bucket_count = cfg.bucket_count
- local collect_lua_garbage = cfg.collect_lua_garbage
- local box_cfg = table.copy(cfg)
- lcfg.remove_non_box_options(box_cfg)
+ local new_replicasets = lreplicaset.buildall(vshard_cfg)
+ local total_bucket_count = vshard_cfg.bucket_count
+ local collect_lua_garbage = vshard_cfg.collect_lua_garbage
log.info("Calling box.cfg()...")
for k, v in pairs(box_cfg) do
log.info({[k] = v})
@@ -514,8 +512,10 @@ local function router_cfg(cfg)
if M.errinj.ERRINJ_CFG then
error('Error injection: cfg')
end
- box.cfg(box_cfg)
- log.info("Box has been configured")
+ if not is_reload then
+ box.cfg(box_cfg)
+ log.info("Box has been configured")
+ end
-- Move connections from an old configuration to a new one.
-- It must be done with no yields to prevent usage both of not
-- fully moved old replicasets, and not fully built new ones.
@@ -526,8 +526,9 @@ local function router_cfg(cfg)
replicaset:connect_all()
end
lreplicaset.wait_masters_connect(new_replicasets)
- lreplicaset.outdate_replicasets(M.replicasets,
cfg.connection_outdate_delay)
- M.connection_outdate_delay = cfg.connection_outdate_delay
+ lreplicaset.outdate_replicasets(M.replicasets,
+ vshard_cfg.connection_outdate_delay)
+ M.connection_outdate_delay = vshard_cfg.connection_outdate_delay
M.total_bucket_count = total_bucket_count
M.collect_lua_garbage = collect_lua_garbage
M.current_cfg = cfg
@@ -817,7 +818,7 @@ end
if not rawget(_G, MODULE_INTERNALS) then
rawset(_G, MODULE_INTERNALS, M)
else
- router_cfg(M.current_cfg)
+ router_cfg(M.current_cfg, true)
M.module_version = M.module_version + 1
end
@@ -825,7 +826,7 @@ M.discovery_f = discovery_f
M.failover_f = failover_f
return {
- cfg = router_cfg;
+ cfg = function(cfg) return router_cfg(cfg, false) end;
info = router_info;
buckets_info = router_buckets_info;
call = router_call;
diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
index 1f29323..2080769 100644
--- a/vshard/storage/init.lua
+++ b/vshard/storage/init.lua
@@ -1500,13 +1500,14 @@ end
--------------------------------------------------------------------------------
-- Configuration
--------------------------------------------------------------------------------
-local function storage_cfg(cfg, this_replica_uuid)
+
+local function storage_cfg(cfg, this_replica_uuid, is_reload)
if this_replica_uuid == nil then
error('Usage: cfg(configuration, this_replica_uuid)')
end
cfg = lcfg.check(cfg, M.current_cfg)
- local new_cfg = table.copy(cfg)
- if cfg.weights or cfg.zone then
+ local vshard_cfg, box_cfg = lcfg.split(cfg)
+ if vshard_cfg.weights or vshard_cfg.zone then
error('Weights and zone are not allowed for storage
configuration')
end
if M.replicasets then
@@ -1520,7 +1521,7 @@ local function storage_cfg(cfg, this_replica_uuid)
local this_replicaset
local this_replica
- local new_replicasets = lreplicaset.buildall(cfg)
+ local new_replicasets = lreplicaset.buildall(vshard_cfg)
local min_master
for rs_uuid, rs in pairs(new_replicasets) do
for replica_uuid, replica in pairs(rs.replicas) do
@@ -1544,46 +1545,14 @@ local function storage_cfg(cfg, this_replica_uuid)
log.info('I am master')
end
- -- Do not change 'read_only' option here - if a master is
- -- disabled and there are triggers on master disable, then
- -- they would not be able to modify anything, if 'read_only'
- -- had been set here. 'Read_only' is set in
- -- local_on_master_disable after triggers and is unset in
- -- local_on_master_enable before triggers.
- --
- -- If a master role of the replica is not changed, then
- -- 'read_only' can be set right here.
- cfg.listen = cfg.listen or this_replica.uri
- if cfg.replication == nil and this_replicaset.master and not
is_master then
- cfg.replication = {this_replicaset.master.uri}
- else
- cfg.replication = {}
- end
- if was_master == is_master then
- cfg.read_only = not is_master
- end
- if type(box.cfg) == 'function' then
- cfg.instance_uuid = this_replica.uuid
- cfg.replicaset_uuid = this_replicaset.uuid
- else
- local info = box.info
- if this_replica_uuid ~= info.uuid then
- error(string.format('Instance UUID mismatch: already set
"%s" '..
- 'but "%s" in arguments', info.uuid,
- this_replica_uuid))
- end
- if this_replicaset.uuid ~= info.cluster.uuid then
- error(string.format('Replicaset UUID mismatch: already set
"%s" '..
- 'but "%s" in vshard config',
info.cluster.uuid,
- this_replicaset.uuid))
- end
- end
- local total_bucket_count = cfg.bucket_count
- local rebalancer_disbalance_threshold =
cfg.rebalancer_disbalance_threshold
- local rebalancer_max_receiving = cfg.rebalancer_max_receiving
- local shard_index = cfg.shard_index
- local collect_bucket_garbage_interval =
cfg.collect_bucket_garbage_interval
- local collect_lua_garbage = cfg.collect_lua_garbage
+ local total_bucket_count = vshard_cfg.bucket_count
+ local rebalancer_disbalance_threshold =
+ vshard_cfg.rebalancer_disbalance_threshold
+ local rebalancer_max_receiving = vshard_cfg.rebalancer_max_receiving
+ local shard_index = vshard_cfg.shard_index
+ local collect_bucket_garbage_interval =
+ vshard_cfg.collect_bucket_garbage_interval
+ local collect_lua_garbage = vshard_cfg.collect_lua_garbage
-- It is considered that all possible errors during cfg
-- process occur only before this place.
@@ -1598,7 +1567,7 @@ local function storage_cfg(cfg, this_replica_uuid)
-- a new sync timeout.
--
local old_sync_timeout = M.sync_timeout
- M.sync_timeout = cfg.sync_timeout
+ M.sync_timeout = vshard_cfg.sync_timeout
if was_master and not is_master then
local_on_master_disable_prepare()
@@ -1607,27 +1576,61 @@ local function storage_cfg(cfg, this_replica_uuid)
local_on_master_enable_prepare()
end
- local box_cfg = table.copy(cfg)
- lcfg.remove_non_box_options(box_cfg)
- local ok, err = pcall(box.cfg, box_cfg)
- while M.errinj.ERRINJ_CFG_DELAY do
- lfiber.sleep(0.01)
- end
- if not ok then
- M.sync_timeout = old_sync_timeout
- if was_master and not is_master then
- local_on_master_disable_abort()
+ if not is_reload then
+ -- Do not change 'read_only' option here - if a master is
+ -- disabled and there are triggers on master disable, then
+ -- they would not be able to modify anything, if 'read_only'
+ -- had been set here. 'Read_only' is set in
+ -- local_on_master_disable after triggers and is unset in
+ -- local_on_master_enable before triggers.
+ --
+ -- If a master role of the replica is not changed, then
+ -- 'read_only' can be set right here.
+ box_cfg.listen = box_cfg.listen or this_replica.uri
+ if box_cfg.replication == nil and this_replicaset.master
+ and not is_master then
+ box_cfg.replication = {this_replicaset.master.uri}
+ else
+ box_cfg.replication = {}
end
- if not was_master and is_master then
- local_on_master_enable_abort()
+ if was_master == is_master then
+ box_cfg.read_only = not is_master
end
- error(err)
+ if type(box.cfg) == 'function' then
+ box_cfg.instance_uuid = this_replica.uuid
+ box_cfg.replicaset_uuid = this_replicaset.uuid
+ else
+ local info = box.info
+ if this_replica_uuid ~= info.uuid then
+ error(string.format('Instance UUID mismatch: already
set ' ..
+ '"%s" but "%s" in arguments',
info.uuid,
+ this_replica_uuid))
+ end
+ if this_replicaset.uuid ~= info.cluster.uuid then
+ error(string.format('Replicaset UUID mismatch: already
set ' ..
+ '"%s" but "%s" in vshard config',
+ info.cluster.uuid,
this_replicaset.uuid))
+ end
+ end
+ local ok, err = pcall(box.cfg, box_cfg)
+ while M.errinj.ERRINJ_CFG_DELAY do
+ lfiber.sleep(0.01)
+ end
+ if not ok then
+ M.sync_timeout = old_sync_timeout
+ if was_master and not is_master then
+ local_on_master_disable_abort()
+ end
+ if not was_master and is_master then
+ local_on_master_enable_abort()
+ end
+ error(err)
+ end
+ log.info("Box has been configured")
+ local uri = luri.parse(this_replica.uri)
+ box.once("vshard:storage:1", storage_schema_v1, uri.login,
uri.password)
end
- log.info("Box has been configured")
- local uri = luri.parse(this_replica.uri)
- box.once("vshard:storage:1", storage_schema_v1, uri.login,
uri.password)
-
lreplicaset.rebind_replicasets(new_replicasets, M.replicasets)
lreplicaset.outdate_replicasets(M.replicasets)
M.replicasets = new_replicasets
@@ -1639,7 +1642,7 @@ local function storage_cfg(cfg, this_replica_uuid)
M.shard_index = shard_index
M.collect_bucket_garbage_interval = collect_bucket_garbage_interval
M.collect_lua_garbage = collect_lua_garbage
- M.current_cfg = new_cfg
+ M.current_cfg = cfg
if was_master and not is_master then
local_on_master_disable()
@@ -1875,7 +1878,7 @@ if not rawget(_G, MODULE_INTERNALS) then
rawset(_G, MODULE_INTERNALS, M)
else
reload_evolution.upgrade(M)
- storage_cfg(M.current_cfg, M.this_replica.uuid)
+ storage_cfg(M.current_cfg, M.this_replica.uuid, true)
M.module_version = M.module_version + 1
end
@@ -1914,7 +1917,7 @@ return {
rebalancing_is_in_progress = rebalancing_is_in_progress,
recovery_wakeup = recovery_wakeup,
call = storage_call,
- cfg = storage_cfg,
+ cfg = function(cfg, uuid) return storage_cfg(cfg, uuid, false) end,
info = storage_info,
buckets_info = storage_buckets_info,
buckets_count = storage_buckets_count,
More information about the Tarantool-patches
mailing list