From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 316EF28B76 for ; Fri, 3 Aug 2018 16:03:45 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id U3jNXQ9Z64wo for ; Fri, 3 Aug 2018 16:03:45 -0400 (EDT) Received: from smtp14.mail.ru (smtp14.mail.ru [94.100.181.95]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 4CDE228B68 for ; Fri, 3 Aug 2018 16:03:44 -0400 (EDT) From: Alex Khatskevich Subject: [tarantool-patches] Re: [PATCH 1/3] Update only vshard part of a cfg on reload References: <37ba10547bf64fd880734b0550e4865f58dda14a.1533054045.git.avkhatskevich@tarantool.org> Message-ID: <55486f60-a630-1d19-69e0-5cf99c01f319@tarantool.org> Date: Fri, 3 Aug 2018 23:03:38 +0300 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset="utf-8"; format="flowed" Content-Transfer-Encoding: 8bit Content-Language: en-US Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: Vladislav Shpilevoy , tarantool-patches@freelists.org On 01.08.2018 21:43, Vladislav Shpilevoy wrote: > Thanks for the patch! See 4 comments below. > > On 31/07/2018 19:25, AKhatskevich wrote: >> Box cfg could have been changed by a user and then overridden by >> an old vshard config on reload. >> >> Since that commit, box part of a config is applied only when >> it is explicitly passed to a `cfg` method. >> >> This change is important for the multiple routers feature. >> --- >>   vshard/cfg.lua          | 54 >> +++++++++++++++++++++++++------------------------ >>   vshard/router/init.lua  | 18 ++++++++--------- >>   vshard/storage/init.lua | 53 >> ++++++++++++++++++++++++++++-------------------- >>   3 files changed, 67 insertions(+), 58 deletions(-) >> >> diff --git a/vshard/cfg.lua b/vshard/cfg.lua >> index bba12cc..8282086 100644 >> --- a/vshard/cfg.lua >> +++ b/vshard/cfg.lua >> @@ -230,48 +230,50 @@ local non_dynamic_options = { >>       'bucket_count', 'shard_index' >>   } >>   +-- >> +-- Deepcopy a config and split it into vshard_cfg and box_cfg. >> +-- >> +local function split_cfg(cfg) >> +    local vshard_field_map = {} >> +    for _, field in ipairs(cfg_template) do >> +        vshard_field_map[field[1]] = true >> +    end > > 1. vshard_field_map does not change ever. Why do you build it > on each cfg? Please, store it in a module local variable like > cfg_template. Or refactor cfg_template and other templates so > they would be maps with parameter name as a key - looks like > the most suitable solution. Refactored cfg_template. (add extra commit before this one) > >> +    local vshard_cfg = {} >> +    local box_cfg = {} >> +    for k, v in pairs(cfg) do >> +        if vshard_field_map[k] then >> +            vshard_cfg[k] = table.deepcopy(v) >> +        else >> +            box_cfg[k] = table.deepcopy(v) >> +        end >> +    end >> +    return vshard_cfg, box_cfg >> +end >> + >> diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua >> index 102b942..75f5df9 100644 >> --- a/vshard/storage/init.lua >> +++ b/vshard/storage/init.lua >> @@ -1500,13 +1500,17 @@ end >> -------------------------------------------------------------------------------- >>   -- Configuration >> -------------------------------------------------------------------------------- >> +-- Private (not accessible by a user) reload indicator. >> +local is_reload = false > > 2. Please, make this variable be parameter of storage_cfg and wrap public > storage.cfg with a one-liner: > >     storage.cfg = function(cfg, uuid) return storage_cfg(cfg, uuid, > false) end > > I believe/hope you understand that such way to pass parameters, via > global > variables, is flawed by design. Nice idea. Fixed. > >> @@ -1553,18 +1557,19 @@ local function storage_cfg(cfg, >> this_replica_uuid) >>       -- >>       -- If a master role of the replica is not changed, then >>       -- 'read_only' can be set right here. >> -    cfg.listen = cfg.listen or this_replica.uri >> -    if cfg.replication == nil and this_replicaset.master and not >> is_master then >> -        cfg.replication = {this_replicaset.master.uri} >> +    box_cfg.listen = box_cfg.listen or this_replica.uri >> +    if box_cfg.replication == nil and this_replicaset.master >> +            and not is_master then > > 3. Broken indentation. fixed > >> +        box_cfg.replication = {this_replicaset.master.uri} >>       else >> -        cfg.replication = {} >> +        box_cfg.replication = {} >>       end >>       if was_master == is_master then >> -        cfg.read_only = not is_master >> +        box_cfg.read_only = not is_master >>       end >>       if type(box.cfg) == 'function' then >> -        cfg.instance_uuid = this_replica.uuid >> -        cfg.replicaset_uuid = this_replicaset.uuid >> +        box_cfg.instance_uuid = this_replica.uuid >> +        box_cfg.replicaset_uuid = this_replicaset.uuid >>       else >>           local info = box.info >>           if this_replica_uuid ~= info.uuid then >> @@ -1607,9 +1614,10 @@ local function storage_cfg(cfg, >> this_replica_uuid) >>           local_on_master_enable_prepare() >>       end >>   -    local box_cfg = table.copy(cfg) >> -    lcfg.remove_non_box_options(box_cfg) >> -    local ok, err = pcall(box.cfg, box_cfg) >> +    local ok, err = true, nil >> +    if not xis_reload then >> +        ok, err = pcall(box.cfg, box_cfg) >> +    end > > 4. The code below (if not ok then ...) can be moved inside > 'if not is_reload' together with 'local ok, err' declaration. > Please, do. > >>       while M.errinj.ERRINJ_CFG_DELAY do >>           lfiber.sleep(0.01) >>       end Done. Full diff commit 81cb60df74fbacae3aee1817f1ff16e7fe0af72f Author: AKhatskevich Date:   Mon Jul 23 16:42:22 2018 +0300     Update only vshard part of a cfg on reload     Box cfg could have been changed by a user and then overridden by     an old vshard config on reload.     Since that commit, box part of a config is applied only when     it is explicitly passed to a `cfg` method.     This change is important for the multiple routers feature. diff --git a/vshard/cfg.lua b/vshard/cfg.lua index 7c9ab77..80ea432 100644 --- a/vshard/cfg.lua +++ b/vshard/cfg.lua @@ -221,6 +221,22 @@ local cfg_template = {      },  } +-- +-- Deepcopy a config and split it into vshard_cfg and box_cfg. +-- +local function split_cfg(cfg) +    local vshard_cfg = {} +    local box_cfg = {} +    for k, v in pairs(cfg) do +        if cfg_template[k] then +            vshard_cfg[k] = table.deepcopy(v) +        else +            box_cfg[k] = table.deepcopy(v) +        end +    end +    return vshard_cfg, box_cfg +end +  --  -- Names of options which cannot be changed during reconfigure.  -- @@ -232,44 +248,26 @@ local non_dynamic_options = {  -- Check sharding config on correctness. Check types, name and uri  -- uniqueness, master count (in each replicaset must be <= 1).  -- -local function cfg_check(shard_cfg, old_cfg) -    if type(shard_cfg) ~= 'table' then +local function cfg_check(cfg, old_vshard_cfg) +    if type(cfg) ~= 'table' then          error('Сonfig must be map of options')      end -    shard_cfg = table.deepcopy(shard_cfg) -    validate_config(shard_cfg, cfg_template) -    if not old_cfg then -        return shard_cfg +    local vshard_cfg, box_cfg = split_cfg(cfg) +    validate_config(vshard_cfg, cfg_template) +    if not old_vshard_cfg then +        return vshard_cfg, box_cfg      end      -- Check non-dynamic after default values are added.      for _, f_name in pairs(non_dynamic_options) do          -- New option may be added in new vshard version. -        if shard_cfg[f_name] ~= old_cfg[f_name] then +        if vshard_cfg[f_name] ~= old_vshard_cfg[f_name] then             error(string.format('Non-dynamic option %s ' ..                                 'cannot be reconfigured', f_name))          end      end -    return shard_cfg -end - --- --- Nullify non-box options. --- -local function remove_non_box_options(cfg) -    cfg.sharding = nil -    cfg.weights = nil -    cfg.zone = nil -    cfg.bucket_count = nil -    cfg.rebalancer_disbalance_threshold = nil -    cfg.rebalancer_max_receiving = nil -    cfg.shard_index = nil -    cfg.collect_bucket_garbage_interval = nil -    cfg.collect_lua_garbage = nil -    cfg.sync_timeout = nil -    cfg.connection_outdate_delay = nil +    return vshard_cfg, box_cfg  end  return {      check = cfg_check, -    remove_non_box_options = remove_non_box_options,  } diff --git a/vshard/router/init.lua b/vshard/router/init.lua index 4cb19fd..e2b2b22 100644 --- a/vshard/router/init.lua +++ b/vshard/router/init.lua @@ -496,18 +496,15 @@ end  --------------------------------------------------------------------------------  local function router_cfg(cfg) -    cfg = lcfg.check(cfg, M.current_cfg) -    local new_cfg = table.copy(cfg) +    local vshard_cfg, box_cfg = lcfg.check(cfg, M.current_cfg)      if not M.replicasets then          log.info('Starting router configuration')      else          log.info('Starting router reconfiguration')      end -    local new_replicasets = lreplicaset.buildall(cfg) -    local total_bucket_count = cfg.bucket_count -    local collect_lua_garbage = cfg.collect_lua_garbage -    local box_cfg = table.copy(cfg) -    lcfg.remove_non_box_options(box_cfg) +    local new_replicasets = lreplicaset.buildall(vshard_cfg) +    local total_bucket_count = vshard_cfg.bucket_count +    local collect_lua_garbage = vshard_cfg.collect_lua_garbage      log.info("Calling box.cfg()...")      for k, v in pairs(box_cfg) do          log.info({[k] = v}) @@ -530,11 +527,12 @@ local function router_cfg(cfg)          replicaset:connect_all()      end      lreplicaset.wait_masters_connect(new_replicasets) -    lreplicaset.outdate_replicasets(M.replicasets, cfg.connection_outdate_delay) -    M.connection_outdate_delay = cfg.connection_outdate_delay +    lreplicaset.outdate_replicasets(M.replicasets, + vshard_cfg.connection_outdate_delay) +    M.connection_outdate_delay = vshard_cfg.connection_outdate_delay      M.total_bucket_count = total_bucket_count      M.collect_lua_garbage = collect_lua_garbage -    M.current_cfg = cfg +    M.current_cfg = vshard_cfg      M.replicasets = new_replicasets      -- Update existing route map in-place.      local old_route_map = M.route_map diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua index 102b942..40216ea 100644 --- a/vshard/storage/init.lua +++ b/vshard/storage/init.lua @@ -1500,13 +1500,13 @@ end  --------------------------------------------------------------------------------  -- Configuration  -------------------------------------------------------------------------------- -local function storage_cfg(cfg, this_replica_uuid) + +local function storage_cfg(cfg, this_replica_uuid, is_reload)      if this_replica_uuid == nil then          error('Usage: cfg(configuration, this_replica_uuid)')      end -    cfg = lcfg.check(cfg, M.current_cfg) -    local new_cfg = table.copy(cfg) -    if cfg.weights or cfg.zone then +    local vshard_cfg, box_cfg = lcfg.check(cfg, M.current_cfg) +    if vshard_cfg.weights or vshard_cfg.zone then          error('Weights and zone are not allowed for storage configuration')      end      if M.replicasets then @@ -1520,7 +1520,7 @@ local function storage_cfg(cfg, this_replica_uuid)      local this_replicaset      local this_replica -    local new_replicasets = lreplicaset.buildall(cfg) +    local new_replicasets = lreplicaset.buildall(vshard_cfg)      local min_master      for rs_uuid, rs in pairs(new_replicasets) do          for replica_uuid, replica in pairs(rs.replicas) do @@ -1553,18 +1553,19 @@ local function storage_cfg(cfg, this_replica_uuid)      --      -- If a master role of the replica is not changed, then      -- 'read_only' can be set right here. -    cfg.listen = cfg.listen or this_replica.uri -    if cfg.replication == nil and this_replicaset.master and not is_master then -        cfg.replication = {this_replicaset.master.uri} +    box_cfg.listen = box_cfg.listen or this_replica.uri +    if box_cfg.replication == nil and this_replicaset.master +       and not is_master then +        box_cfg.replication = {this_replicaset.master.uri}      else -        cfg.replication = {} +        box_cfg.replication = {}      end      if was_master == is_master then -        cfg.read_only = not is_master +        box_cfg.read_only = not is_master      end      if type(box.cfg) == 'function' then -        cfg.instance_uuid = this_replica.uuid -        cfg.replicaset_uuid = this_replicaset.uuid +        box_cfg.instance_uuid = this_replica.uuid +        box_cfg.replicaset_uuid = this_replicaset.uuid      else          local info = box.info          if this_replica_uuid ~= info.uuid then @@ -1578,12 +1579,14 @@ local function storage_cfg(cfg, this_replica_uuid)                                  this_replicaset.uuid))          end      end -    local total_bucket_count = cfg.bucket_count -    local rebalancer_disbalance_threshold = cfg.rebalancer_disbalance_threshold -    local rebalancer_max_receiving = cfg.rebalancer_max_receiving -    local shard_index = cfg.shard_index -    local collect_bucket_garbage_interval = cfg.collect_bucket_garbage_interval -    local collect_lua_garbage = cfg.collect_lua_garbage +    local total_bucket_count = vshard_cfg.bucket_count +    local rebalancer_disbalance_threshold = +        vshard_cfg.rebalancer_disbalance_threshold +    local rebalancer_max_receiving = vshard_cfg.rebalancer_max_receiving +    local shard_index = vshard_cfg.shard_index +    local collect_bucket_garbage_interval = +        vshard_cfg.collect_bucket_garbage_interval +    local collect_lua_garbage = vshard_cfg.collect_lua_garbage      -- It is considered that all possible errors during cfg      -- process occur only before this place. @@ -1598,7 +1601,7 @@ local function storage_cfg(cfg, this_replica_uuid)      -- a new sync timeout.      --      local old_sync_timeout = M.sync_timeout -    M.sync_timeout = cfg.sync_timeout +    M.sync_timeout = vshard_cfg.sync_timeout      if was_master and not is_master then          local_on_master_disable_prepare() @@ -1607,27 +1610,27 @@ local function storage_cfg(cfg, this_replica_uuid)          local_on_master_enable_prepare()      end -    local box_cfg = table.copy(cfg) -    lcfg.remove_non_box_options(box_cfg) -    local ok, err = pcall(box.cfg, box_cfg) -    while M.errinj.ERRINJ_CFG_DELAY do -        lfiber.sleep(0.01) -    end -    if not ok then -        M.sync_timeout = old_sync_timeout -        if was_master and not is_master then -            local_on_master_disable_abort() +    if not is_reload then +        local ok, err = true, nil +        ok, err = pcall(box.cfg, box_cfg) +        while M.errinj.ERRINJ_CFG_DELAY do +            lfiber.sleep(0.01)          end -        if not was_master and is_master then -            local_on_master_enable_abort() +        if not ok then +            M.sync_timeout = old_sync_timeout +            if was_master and not is_master then +                local_on_master_disable_abort() +            end +            if not was_master and is_master then +                local_on_master_enable_abort() +            end +            error(err)          end -        error(err) +        log.info("Box has been configured") +        local uri = luri.parse(this_replica.uri) +        box.once("vshard:storage:1", storage_schema_v1, uri.login, uri.password)      end -    log.info("Box has been configured") -    local uri = luri.parse(this_replica.uri) -    box.once("vshard:storage:1", storage_schema_v1, uri.login, uri.password) -      lreplicaset.rebind_replicasets(new_replicasets, M.replicasets)      lreplicaset.outdate_replicasets(M.replicasets)      M.replicasets = new_replicasets @@ -1639,7 +1642,7 @@ local function storage_cfg(cfg, this_replica_uuid)      M.shard_index = shard_index      M.collect_bucket_garbage_interval = collect_bucket_garbage_interval      M.collect_lua_garbage = collect_lua_garbage -    M.current_cfg = new_cfg +    M.current_cfg = vshard_cfg      if was_master and not is_master then          local_on_master_disable() @@ -1874,7 +1877,7 @@ if not rawget(_G, MODULE_INTERNALS) then      rawset(_G, MODULE_INTERNALS, M)  else      reload_evolution.upgrade(M) -    storage_cfg(M.current_cfg, M.this_replica.uuid) +    storage_cfg(M.current_cfg, M.this_replica.uuid, true)      M.module_version = M.module_version + 1  end @@ -1913,7 +1916,7 @@ return {      rebalancing_is_in_progress = rebalancing_is_in_progress,      recovery_wakeup = recovery_wakeup,      call = storage_call, -    cfg = storage_cfg, +    cfg = function(cfg, uuid) return storage_cfg(cfg, uuid, false) end,      info = storage_info,      buckets_info = storage_buckets_info,      buckets_count = storage_buckets_count,