[tarantool-patches] Re: [PATCH 2/2] Complete module reload
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Tue Jun 19 12:00:42 MSK 2018
Hello. Thanks for the fixes! See 10 comments below and
the commit with some new fixes on the branch.
> diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua
> index 99f59aa..4e7bc10 100644
> --- a/vshard/replicaset.lua
> +++ b/vshard/replicaset.lua
> @@ -338,26 +339,80 @@ local function replicaset_tostring(replicaset)
> end
>
> --
> --- Rebind connections of old replicas to new ones.
> +-- Outdate old objects in case of reload/reconfigure.
> --
> -local function replicaset_rebind_connections(replicaset)
> - for _, replica in pairs(replicaset.replicas) do
> - local old_replica = replica.old_replica
> - if old_replica then
> - local conn = old_replica.conn
> - replica.conn = conn
> - replica.down_ts = old_replica.down_ts
> - replica.net_timeout = old_replica.net_timeout
> - replica.net_sequential_ok = old_replica.net_sequential_ok
> - replica.net_sequential_fail = old_replica.net_sequential_fail
> - if conn then
> - conn.replica = replica
> - conn.replicaset = replicaset
> - old_replica.conn = nil
> +local function cleanup_old_objects(old_replicaset_mt, old_replica_mt,
> + old_replicas)
> + local function get_outdated_warning()
> + return function(...)
> + return nil, lerror.vshard(lerror.code.OBJECT_IS_OUTDATED)
> + end
> + end
1. Please, declare this outdated_mt out of this function in a module
local variable like it is done with ordinal replica/replicaset mt. Not just
__index, but entire mt. See comment 3 why.
2. You did not outdate replica metatables. How did your tests pass then?
> + old_replicaset_mt.__index = get_outdated_warning
> + -- Leave replica_mt unchanged. All its methods are safe.
> + for _, replica in pairs(old_replicas) do
> + replica.conn = nil
> + end
> + log.info('Old replicaset and replica objects are outdated.')
> +end
> +
> +--
> +-- Initiate outdating process, which cancels connections and
> +-- spoils object methods.
> +--
> +local function outdate_replicasets(replicasets, outdate_delay)
> + -- Collect data to outdate old objects.
> + local old_replicaset_mt = nil
> + local old_replica_mt = nil
> + local old_replicas = {}
> + for _, replicaset in pairs(replicasets) do
> + local old_mt = getmetatable(replicaset)
> + assert(old_replicaset_mt == nil or old_replicaset_mt == old_mt)
> + old_replicaset_mt = old_mt
> + for _, replica in pairs(replicaset.replicas) do
> + table.insert(old_replicas, replica)
> + local old_mt = getmetatable(replica)
> + assert(old_replica_mt == nil or old_replica_mt == old_mt)
> + old_replica_mt = old_mt
> + end
> + end
> + util.async_task(outdate_delay, cleanup_old_objects, old_replicaset_mt,
> + old_replica_mt, old_replicas)
3. You do not need to search old_replica_mt and old_replicaset_mt here. You do not
need them at all. Just declare special object_outdate_mt and set this metatable to
the old objects. Not just __index, but entire metatable. So you can get rid of
this old mts search, old mt parameters.
> +end
> +
> +
> +--
> +-- Copy netbox conections from old replica objects to new ones
> +-- and outdate old objects.
> +-- @param replicasets New replicasets
> +-- @param old_replicasets Replicasets and replicas to be outdated.
> +-- @param outdate_delay Number of seconds; delay to outdate
> +-- old objects.
> +--
> +local function rebind_replicasets(replicasets, old_replicasets, outdate_delay)
> + for replicaset_uuid, replicaset in pairs(replicasets) do
> + local old_replicaset = old_replicasets and
> + old_replicasets[replicaset_uuid]> + for replica_uuid, replica in pairs(replicaset.replicas) do
> + local old_replica = old_replicaset and
> + old_replicaset.replicas[replica_uuid]> + if old_replica then
> + local conn = old_replica.conn
> + replica.conn = conn
> + replica.down_ts = old_replica.down_ts
> + replica.net_timeout = old_replica.net_timeout
> + replica.net_sequential_ok = old_replica.net_sequential_ok
> + replica.net_sequential_fail = old_replica.net_sequential_fail
> + if conn then
> + conn.replica = replica
> + conn.replicaset = replicaset
> + end
> end
> - replica.old_replica = nil
> end
> end
> + if old_replicasets then
> + outdate_replicasets(old_replicasets, outdate_delay)
4. Please, name it schedule_connections_outdate. Now the name
confuses. It could be thought to deprecate the objects now, not
after timeout.
> + end
> end
> @@ -406,11 +452,30 @@ local replica_mt = {
> end
> end,
> }
> -index = {}
> -for name, func in pairs(replica_mt.__index) do
> - index[name] = gsc("replica", name, replica_mt, func)
> +
> +--
> +-- Get new copy of replicaset and replica meta tables.
> +-- It is important to have distinct copies of meta tables to
> +-- be able to outdate only old objects.
> +--
> +local function copy_metatables()
5. Please, remove. This function is harmful and slow. You do not
need to copy metatables. Use my recommendations above and think.
> + --
> + -- Wrap self methods with a sanity checker.
> + --
> + local replicaset_mt_copy = table.copy(replicaset_mt)
> + local replica_mt_copy = table.copy(replica_mt)
> + local index = {}
> + for name, func in pairs(replicaset_mt_copy.__index) do
> + index[name] = gsc("replicaset", name, replicaset_mt_copy, func)
> + end
> + replicaset_mt_copy.__index = index
> + index = {}
> + for name, func in pairs(replica_mt_copy.__index) do
> + index[name] = gsc("replica", name, replica_mt_copy, func)
> + end
> + replica_mt_copy.__index = index
> + return replicaset_mt_copy, replica_mt_copy
> end
> -replica_mt.__index = index
>
> @@ -514,20 +579,17 @@ local function buildall(sharding_cfg, old_replicasets)
6. Unused parameter.
> zone_weights = {}
> end
> @@ -535,8 +597,8 @@ local function buildall(sharding_cfg, old_replicasets)
> uri = replica.uri, name = replica.name, uuid = replica_uuid,
> zone = replica.zone, net_timeout = consts.CALL_TIMEOUT_MIN,
> net_sequential_ok = 0, net_sequential_fail = 0,
> - down_ts = curr_ts, old_replica = old_replica,
> - }, replica_mt)
> + down_ts = curr_ts,
> + }, replica_mt_copy)
> new_replicaset.replicas[replica_uuid] = new_replica
> if replica.master then
> new_replicaset.master = new_replica
> @@ -596,4 +658,6 @@ return {
> buildall = buildall,
> calculate_etalon_balance = cluster_calculate_etalon_balance,
> wait_masters_connect = wait_masters_connect,
> + rebind_replicasets = rebind_replicasets,
> + outdate_replicasets = outdate_replicasets,
7. Why do you need to export this function?
> }
> diff --git a/vshard/router/init.lua b/vshard/router/init.lua
> index 1dee80c..d70d060 100644
> --- a/vshard/router/init.lua
> +++ b/vshard/router/init.lua
> @@ -459,6 +473,7 @@ end
>
> local function router_cfg(cfg)
> cfg = lcfg.check(cfg)
> + local current_cfg = table.deepcopy(cfg)
8. Ok, I understand why do you copy it, but please,
write a comment about it.
> if not M.replicasets then
> log.info('Starting router configuration')
> else
> @@ -776,15 +792,16 @@ end
> -- About functions, saved in M, and reloading see comment in
> -- storage/init.lua.
> --
> -M.discovery_f = discovery_f
> -M.failover_f = failover_f
> -
> -if not rawget(_G, '__module_vshard_router') then
> - rawset(_G, '__module_vshard_router', M)
> +if not rawget(_G, MODULE_INTERNALS) then
> + rawset(_G, MODULE_INTERNALS, M)
> else
> + router_cfg(M.current_cfg)
> M.module_version = M.module_version + 1
> end
>
> +M.discovery_f = discovery_f
> +M.failover_f = failover_f
9. Why did you move these lines?
> +
> return {
> cfg = router_cfg;
> info = router_info;
> diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
> index 879c7c4..50347b0 100644
> --- a/vshard/storage/init.lua
> +++ b/vshard/storage/init.lua
> @@ -21,6 +33,8 @@ if not M then
> -- See format in replicaset.lua.
> --
> replicasets = nil,
> + -- Time to deprecate old objects on reload.
> + reload_deprecate_timeout = nil,
10. Why here is 'reload_deprecate_timeout', but 'connection_outdate_delay'
in router?
> -- Triggers on master switch event. They are called right
> -- before the event occurs.
> _on_master_enable = trigger.new('_on_master_enable'),
More information about the Tarantool-patches
mailing list