[tarantool-patches] Re: [PATCH 1/3] Update only vshard part of a cfg on reload
Alex Khatskevich
avkhatskevich at tarantool.org
Fri Aug 3 23:03:38 MSK 2018
On 01.08.2018 21:43, Vladislav Shpilevoy wrote:
> Thanks for the patch! See 4 comments below.
>
> On 31/07/2018 19:25, AKhatskevich wrote:
>> Box cfg could have been changed by a user and then overridden by
>> an old vshard config on reload.
>>
>> Since that commit, box part of a config is applied only when
>> it is explicitly passed to a `cfg` method.
>>
>> This change is important for the multiple routers feature.
>> ---
>> vshard/cfg.lua | 54
>> +++++++++++++++++++++++++------------------------
>> vshard/router/init.lua | 18 ++++++++---------
>> vshard/storage/init.lua | 53
>> ++++++++++++++++++++++++++++--------------------
>> 3 files changed, 67 insertions(+), 58 deletions(-)
>>
>> diff --git a/vshard/cfg.lua b/vshard/cfg.lua
>> index bba12cc..8282086 100644
>> --- a/vshard/cfg.lua
>> +++ b/vshard/cfg.lua
>> @@ -230,48 +230,50 @@ local non_dynamic_options = {
>> 'bucket_count', 'shard_index'
>> }
>> +--
>> +-- Deepcopy a config and split it into vshard_cfg and box_cfg.
>> +--
>> +local function split_cfg(cfg)
>> + local vshard_field_map = {}
>> + for _, field in ipairs(cfg_template) do
>> + vshard_field_map[field[1]] = true
>> + end
>
> 1. vshard_field_map does not change ever. Why do you build it
> on each cfg? Please, store it in a module local variable like
> cfg_template. Or refactor cfg_template and other templates so
> they would be maps with parameter name as a key - looks like
> the most suitable solution.
Refactored cfg_template. (add extra commit before this one)
>
>> + local vshard_cfg = {}
>> + local box_cfg = {}
>> + for k, v in pairs(cfg) do
>> + if vshard_field_map[k] then
>> + vshard_cfg[k] = table.deepcopy(v)
>> + else
>> + box_cfg[k] = table.deepcopy(v)
>> + end
>> + end
>> + return vshard_cfg, box_cfg
>> +end
>> +
>> diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
>> index 102b942..75f5df9 100644
>> --- a/vshard/storage/init.lua
>> +++ b/vshard/storage/init.lua
>> @@ -1500,13 +1500,17 @@ end
>> --------------------------------------------------------------------------------
>> -- Configuration
>> --------------------------------------------------------------------------------
>> +-- Private (not accessible by a user) reload indicator.
>> +local is_reload = false
>
> 2. Please, make this variable be parameter of storage_cfg and wrap public
> storage.cfg with a one-liner:
>
> storage.cfg = function(cfg, uuid) return storage_cfg(cfg, uuid,
> false) end
>
> I believe/hope you understand that such way to pass parameters, via
> global
> variables, is flawed by design.
Nice idea.
Fixed.
>
>> @@ -1553,18 +1557,19 @@ local function storage_cfg(cfg,
>> this_replica_uuid)
>> --
>> -- If a master role of the replica is not changed, then
>> -- 'read_only' can be set right here.
>> - cfg.listen = cfg.listen or this_replica.uri
>> - if cfg.replication == nil and this_replicaset.master and not
>> is_master then
>> - cfg.replication = {this_replicaset.master.uri}
>> + box_cfg.listen = box_cfg.listen or this_replica.uri
>> + if box_cfg.replication == nil and this_replicaset.master
>> + and not is_master then
>
> 3. Broken indentation.
fixed
>
>> + box_cfg.replication = {this_replicaset.master.uri}
>> else
>> - cfg.replication = {}
>> + box_cfg.replication = {}
>> end
>> if was_master == is_master then
>> - cfg.read_only = not is_master
>> + box_cfg.read_only = not is_master
>> end
>> if type(box.cfg) == 'function' then
>> - cfg.instance_uuid = this_replica.uuid
>> - cfg.replicaset_uuid = this_replicaset.uuid
>> + box_cfg.instance_uuid = this_replica.uuid
>> + box_cfg.replicaset_uuid = this_replicaset.uuid
>> else
>> local info = box.info
>> if this_replica_uuid ~= info.uuid then
>> @@ -1607,9 +1614,10 @@ local function storage_cfg(cfg,
>> this_replica_uuid)
>> local_on_master_enable_prepare()
>> end
>> - local box_cfg = table.copy(cfg)
>> - lcfg.remove_non_box_options(box_cfg)
>> - local ok, err = pcall(box.cfg, box_cfg)
>> + local ok, err = true, nil
>> + if not xis_reload then
>> + ok, err = pcall(box.cfg, box_cfg)
>> + end
>
> 4. The code below (if not ok then ...) can be moved inside
> 'if not is_reload' together with 'local ok, err' declaration.
> Please, do.
>
>> while M.errinj.ERRINJ_CFG_DELAY do
>> lfiber.sleep(0.01)
>> end
Done.
Full diff
commit 81cb60df74fbacae3aee1817f1ff16e7fe0af72f
Author: AKhatskevich <avkhatskevich at tarantool.org>
Date: Mon Jul 23 16:42:22 2018 +0300
Update only vshard part of a cfg on reload
Box cfg could have been changed by a user and then overridden by
an old vshard config on reload.
Since that commit, box part of a config is applied only when
it is explicitly passed to a `cfg` method.
This change is important for the multiple routers feature.
diff --git a/vshard/cfg.lua b/vshard/cfg.lua
index 7c9ab77..80ea432 100644
--- a/vshard/cfg.lua
+++ b/vshard/cfg.lua
@@ -221,6 +221,22 @@ local cfg_template = {
},
}
+--
+-- Deepcopy a config and split it into vshard_cfg and box_cfg.
+--
+local function split_cfg(cfg)
+ local vshard_cfg = {}
+ local box_cfg = {}
+ for k, v in pairs(cfg) do
+ if cfg_template[k] then
+ vshard_cfg[k] = table.deepcopy(v)
+ else
+ box_cfg[k] = table.deepcopy(v)
+ end
+ end
+ return vshard_cfg, box_cfg
+end
+
--
-- Names of options which cannot be changed during reconfigure.
--
@@ -232,44 +248,26 @@ local non_dynamic_options = {
-- Check sharding config on correctness. Check types, name and uri
-- uniqueness, master count (in each replicaset must be <= 1).
--
-local function cfg_check(shard_cfg, old_cfg)
- if type(shard_cfg) ~= 'table' then
+local function cfg_check(cfg, old_vshard_cfg)
+ if type(cfg) ~= 'table' then
error('Сonfig must be map of options')
end
- shard_cfg = table.deepcopy(shard_cfg)
- validate_config(shard_cfg, cfg_template)
- if not old_cfg then
- return shard_cfg
+ local vshard_cfg, box_cfg = split_cfg(cfg)
+ validate_config(vshard_cfg, cfg_template)
+ if not old_vshard_cfg then
+ return vshard_cfg, box_cfg
end
-- Check non-dynamic after default values are added.
for _, f_name in pairs(non_dynamic_options) do
-- New option may be added in new vshard version.
- if shard_cfg[f_name] ~= old_cfg[f_name] then
+ if vshard_cfg[f_name] ~= old_vshard_cfg[f_name] then
error(string.format('Non-dynamic option %s ' ..
'cannot be reconfigured', f_name))
end
end
- return shard_cfg
-end
-
---
--- Nullify non-box options.
---
-local function remove_non_box_options(cfg)
- cfg.sharding = nil
- cfg.weights = nil
- cfg.zone = nil
- cfg.bucket_count = nil
- cfg.rebalancer_disbalance_threshold = nil
- cfg.rebalancer_max_receiving = nil
- cfg.shard_index = nil
- cfg.collect_bucket_garbage_interval = nil
- cfg.collect_lua_garbage = nil
- cfg.sync_timeout = nil
- cfg.connection_outdate_delay = nil
+ return vshard_cfg, box_cfg
end
return {
check = cfg_check,
- remove_non_box_options = remove_non_box_options,
}
diff --git a/vshard/router/init.lua b/vshard/router/init.lua
index 4cb19fd..e2b2b22 100644
--- a/vshard/router/init.lua
+++ b/vshard/router/init.lua
@@ -496,18 +496,15 @@ end
--------------------------------------------------------------------------------
local function router_cfg(cfg)
- cfg = lcfg.check(cfg, M.current_cfg)
- local new_cfg = table.copy(cfg)
+ local vshard_cfg, box_cfg = lcfg.check(cfg, M.current_cfg)
if not M.replicasets then
log.info('Starting router configuration')
else
log.info('Starting router reconfiguration')
end
- local new_replicasets = lreplicaset.buildall(cfg)
- local total_bucket_count = cfg.bucket_count
- local collect_lua_garbage = cfg.collect_lua_garbage
- local box_cfg = table.copy(cfg)
- lcfg.remove_non_box_options(box_cfg)
+ local new_replicasets = lreplicaset.buildall(vshard_cfg)
+ local total_bucket_count = vshard_cfg.bucket_count
+ local collect_lua_garbage = vshard_cfg.collect_lua_garbage
log.info("Calling box.cfg()...")
for k, v in pairs(box_cfg) do
log.info({[k] = v})
@@ -530,11 +527,12 @@ local function router_cfg(cfg)
replicaset:connect_all()
end
lreplicaset.wait_masters_connect(new_replicasets)
- lreplicaset.outdate_replicasets(M.replicasets,
cfg.connection_outdate_delay)
- M.connection_outdate_delay = cfg.connection_outdate_delay
+ lreplicaset.outdate_replicasets(M.replicasets,
+ vshard_cfg.connection_outdate_delay)
+ M.connection_outdate_delay = vshard_cfg.connection_outdate_delay
M.total_bucket_count = total_bucket_count
M.collect_lua_garbage = collect_lua_garbage
- M.current_cfg = cfg
+ M.current_cfg = vshard_cfg
M.replicasets = new_replicasets
-- Update existing route map in-place.
local old_route_map = M.route_map
diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
index 102b942..40216ea 100644
--- a/vshard/storage/init.lua
+++ b/vshard/storage/init.lua
@@ -1500,13 +1500,13 @@ end
--------------------------------------------------------------------------------
-- Configuration
--------------------------------------------------------------------------------
-local function storage_cfg(cfg, this_replica_uuid)
+
+local function storage_cfg(cfg, this_replica_uuid, is_reload)
if this_replica_uuid == nil then
error('Usage: cfg(configuration, this_replica_uuid)')
end
- cfg = lcfg.check(cfg, M.current_cfg)
- local new_cfg = table.copy(cfg)
- if cfg.weights or cfg.zone then
+ local vshard_cfg, box_cfg = lcfg.check(cfg, M.current_cfg)
+ if vshard_cfg.weights or vshard_cfg.zone then
error('Weights and zone are not allowed for storage
configuration')
end
if M.replicasets then
@@ -1520,7 +1520,7 @@ local function storage_cfg(cfg, this_replica_uuid)
local this_replicaset
local this_replica
- local new_replicasets = lreplicaset.buildall(cfg)
+ local new_replicasets = lreplicaset.buildall(vshard_cfg)
local min_master
for rs_uuid, rs in pairs(new_replicasets) do
for replica_uuid, replica in pairs(rs.replicas) do
@@ -1553,18 +1553,19 @@ local function storage_cfg(cfg, this_replica_uuid)
--
-- If a master role of the replica is not changed, then
-- 'read_only' can be set right here.
- cfg.listen = cfg.listen or this_replica.uri
- if cfg.replication == nil and this_replicaset.master and not
is_master then
- cfg.replication = {this_replicaset.master.uri}
+ box_cfg.listen = box_cfg.listen or this_replica.uri
+ if box_cfg.replication == nil and this_replicaset.master
+ and not is_master then
+ box_cfg.replication = {this_replicaset.master.uri}
else
- cfg.replication = {}
+ box_cfg.replication = {}
end
if was_master == is_master then
- cfg.read_only = not is_master
+ box_cfg.read_only = not is_master
end
if type(box.cfg) == 'function' then
- cfg.instance_uuid = this_replica.uuid
- cfg.replicaset_uuid = this_replicaset.uuid
+ box_cfg.instance_uuid = this_replica.uuid
+ box_cfg.replicaset_uuid = this_replicaset.uuid
else
local info = box.info
if this_replica_uuid ~= info.uuid then
@@ -1578,12 +1579,14 @@ local function storage_cfg(cfg, this_replica_uuid)
this_replicaset.uuid))
end
end
- local total_bucket_count = cfg.bucket_count
- local rebalancer_disbalance_threshold =
cfg.rebalancer_disbalance_threshold
- local rebalancer_max_receiving = cfg.rebalancer_max_receiving
- local shard_index = cfg.shard_index
- local collect_bucket_garbage_interval =
cfg.collect_bucket_garbage_interval
- local collect_lua_garbage = cfg.collect_lua_garbage
+ local total_bucket_count = vshard_cfg.bucket_count
+ local rebalancer_disbalance_threshold =
+ vshard_cfg.rebalancer_disbalance_threshold
+ local rebalancer_max_receiving = vshard_cfg.rebalancer_max_receiving
+ local shard_index = vshard_cfg.shard_index
+ local collect_bucket_garbage_interval =
+ vshard_cfg.collect_bucket_garbage_interval
+ local collect_lua_garbage = vshard_cfg.collect_lua_garbage
-- It is considered that all possible errors during cfg
-- process occur only before this place.
@@ -1598,7 +1601,7 @@ local function storage_cfg(cfg, this_replica_uuid)
-- a new sync timeout.
--
local old_sync_timeout = M.sync_timeout
- M.sync_timeout = cfg.sync_timeout
+ M.sync_timeout = vshard_cfg.sync_timeout
if was_master and not is_master then
local_on_master_disable_prepare()
@@ -1607,27 +1610,27 @@ local function storage_cfg(cfg, this_replica_uuid)
local_on_master_enable_prepare()
end
- local box_cfg = table.copy(cfg)
- lcfg.remove_non_box_options(box_cfg)
- local ok, err = pcall(box.cfg, box_cfg)
- while M.errinj.ERRINJ_CFG_DELAY do
- lfiber.sleep(0.01)
- end
- if not ok then
- M.sync_timeout = old_sync_timeout
- if was_master and not is_master then
- local_on_master_disable_abort()
+ if not is_reload then
+ local ok, err = true, nil
+ ok, err = pcall(box.cfg, box_cfg)
+ while M.errinj.ERRINJ_CFG_DELAY do
+ lfiber.sleep(0.01)
end
- if not was_master and is_master then
- local_on_master_enable_abort()
+ if not ok then
+ M.sync_timeout = old_sync_timeout
+ if was_master and not is_master then
+ local_on_master_disable_abort()
+ end
+ if not was_master and is_master then
+ local_on_master_enable_abort()
+ end
+ error(err)
end
- error(err)
+ log.info("Box has been configured")
+ local uri = luri.parse(this_replica.uri)
+ box.once("vshard:storage:1", storage_schema_v1, uri.login,
uri.password)
end
- log.info("Box has been configured")
- local uri = luri.parse(this_replica.uri)
- box.once("vshard:storage:1", storage_schema_v1, uri.login,
uri.password)
-
lreplicaset.rebind_replicasets(new_replicasets, M.replicasets)
lreplicaset.outdate_replicasets(M.replicasets)
M.replicasets = new_replicasets
@@ -1639,7 +1642,7 @@ local function storage_cfg(cfg, this_replica_uuid)
M.shard_index = shard_index
M.collect_bucket_garbage_interval = collect_bucket_garbage_interval
M.collect_lua_garbage = collect_lua_garbage
- M.current_cfg = new_cfg
+ M.current_cfg = vshard_cfg
if was_master and not is_master then
local_on_master_disable()
@@ -1874,7 +1877,7 @@ if not rawget(_G, MODULE_INTERNALS) then
rawset(_G, MODULE_INTERNALS, M)
else
reload_evolution.upgrade(M)
- storage_cfg(M.current_cfg, M.this_replica.uuid)
+ storage_cfg(M.current_cfg, M.this_replica.uuid, true)
M.module_version = M.module_version + 1
end
@@ -1913,7 +1916,7 @@ return {
rebalancing_is_in_progress = rebalancing_is_in_progress,
recovery_wakeup = recovery_wakeup,
call = storage_call,
- cfg = storage_cfg,
+ cfg = function(cfg, uuid) return storage_cfg(cfg, uuid, false) end,
info = storage_info,
buckets_info = storage_buckets_info,
buckets_count = storage_buckets_count,
More information about the Tarantool-patches
mailing list