[tarantool-patches] Re: [PATCH 1/3] Update only vshard part of a cfg on reload

Alex Khatskevich avkhatskevich at tarantool.org
Fri Aug 3 23:03:38 MSK 2018



On 01.08.2018 21:43, Vladislav Shpilevoy wrote:
> Thanks for the patch! See 4 comments below.
>
> On 31/07/2018 19:25, AKhatskevich wrote:
>> Box cfg could have been changed by a user and then overridden by
>> an old vshard config on reload.
>>
>> Since that commit, box part of a config is applied only when
>> it is explicitly passed to a `cfg` method.
>>
>> This change is important for the multiple routers feature.
>> ---
>>   vshard/cfg.lua          | 54 
>> +++++++++++++++++++++++++------------------------
>>   vshard/router/init.lua  | 18 ++++++++---------
>>   vshard/storage/init.lua | 53 
>> ++++++++++++++++++++++++++++--------------------
>>   3 files changed, 67 insertions(+), 58 deletions(-)
>>
>> diff --git a/vshard/cfg.lua b/vshard/cfg.lua
>> index bba12cc..8282086 100644
>> --- a/vshard/cfg.lua
>> +++ b/vshard/cfg.lua
>> @@ -230,48 +230,50 @@ local non_dynamic_options = {
>>       'bucket_count', 'shard_index'
>>   }
>>   +--
>> +-- Deepcopy a config and split it into vshard_cfg and box_cfg.
>> +--
>> +local function split_cfg(cfg)
>> +    local vshard_field_map = {}
>> +    for _, field in ipairs(cfg_template) do
>> +        vshard_field_map[field[1]] = true
>> +    end
>
> 1. vshard_field_map does not change ever. Why do you build it
> on each cfg? Please, store it in a module local variable like
> cfg_template. Or refactor cfg_template and other templates so
> they would be maps with parameter name as a key - looks like
> the most suitable solution.
Refactored cfg_template. (add extra commit before this one)
>
>> +    local vshard_cfg = {}
>> +    local box_cfg = {}
>> +    for k, v in pairs(cfg) do
>> +        if vshard_field_map[k] then
>> +            vshard_cfg[k] = table.deepcopy(v)
>> +        else
>> +            box_cfg[k] = table.deepcopy(v)
>> +        end
>> +    end
>> +    return vshard_cfg, box_cfg
>> +end
>> +
>> diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
>> index 102b942..75f5df9 100644
>> --- a/vshard/storage/init.lua
>> +++ b/vshard/storage/init.lua
>> @@ -1500,13 +1500,17 @@ end
>> --------------------------------------------------------------------------------
>>   -- Configuration
>> --------------------------------------------------------------------------------
>> +-- Private (not accessible by a user) reload indicator.
>> +local is_reload = false
>
> 2. Please, make this variable be parameter of storage_cfg and wrap public
> storage.cfg with a one-liner:
>
>     storage.cfg = function(cfg, uuid) return storage_cfg(cfg, uuid, 
> false) end
>
> I believe/hope you understand that such way to pass parameters, via 
> global
> variables, is flawed by design.
Nice idea.
Fixed.
>
>> @@ -1553,18 +1557,19 @@ local function storage_cfg(cfg, 
>> this_replica_uuid)
>>       --
>>       -- If a master role of the replica is not changed, then
>>       -- 'read_only' can be set right here.
>> -    cfg.listen = cfg.listen or this_replica.uri
>> -    if cfg.replication == nil and this_replicaset.master and not 
>> is_master then
>> -        cfg.replication = {this_replicaset.master.uri}
>> +    box_cfg.listen = box_cfg.listen or this_replica.uri
>> +    if box_cfg.replication == nil and this_replicaset.master
>> +            and not is_master then
>
> 3. Broken indentation.
fixed
>
>> +        box_cfg.replication = {this_replicaset.master.uri}
>>       else
>> -        cfg.replication = {}
>> +        box_cfg.replication = {}
>>       end
>>       if was_master == is_master then
>> -        cfg.read_only = not is_master
>> +        box_cfg.read_only = not is_master
>>       end
>>       if type(box.cfg) == 'function' then
>> -        cfg.instance_uuid = this_replica.uuid
>> -        cfg.replicaset_uuid = this_replicaset.uuid
>> +        box_cfg.instance_uuid = this_replica.uuid
>> +        box_cfg.replicaset_uuid = this_replicaset.uuid
>>       else
>>           local info = box.info
>>           if this_replica_uuid ~= info.uuid then
>> @@ -1607,9 +1614,10 @@ local function storage_cfg(cfg, 
>> this_replica_uuid)
>>           local_on_master_enable_prepare()
>>       end
>>   -    local box_cfg = table.copy(cfg)
>> -    lcfg.remove_non_box_options(box_cfg)
>> -    local ok, err = pcall(box.cfg, box_cfg)
>> +    local ok, err = true, nil
>> +    if not xis_reload then
>> +        ok, err = pcall(box.cfg, box_cfg)
>> +    end
>
> 4. The code below (if not ok then ...) can be moved inside
> 'if not is_reload' together with 'local ok, err' declaration.
> Please, do.
>
>>       while M.errinj.ERRINJ_CFG_DELAY do
>>           lfiber.sleep(0.01)
>>       end
Done.



Full diff

commit 81cb60df74fbacae3aee1817f1ff16e7fe0af72f
Author: AKhatskevich <avkhatskevich at tarantool.org>
Date:   Mon Jul 23 16:42:22 2018 +0300

     Update only vshard part of a cfg on reload

     Box cfg could have been changed by a user and then overridden by
     an old vshard config on reload.

     Since that commit, box part of a config is applied only when
     it is explicitly passed to a `cfg` method.

     This change is important for the multiple routers feature.

diff --git a/vshard/cfg.lua b/vshard/cfg.lua
index 7c9ab77..80ea432 100644
--- a/vshard/cfg.lua
+++ b/vshard/cfg.lua
@@ -221,6 +221,22 @@ local cfg_template = {
      },
  }

+--
+-- Deepcopy a config and split it into vshard_cfg and box_cfg.
+--
+local function split_cfg(cfg)
+    local vshard_cfg = {}
+    local box_cfg = {}
+    for k, v in pairs(cfg) do
+        if cfg_template[k] then
+            vshard_cfg[k] = table.deepcopy(v)
+        else
+            box_cfg[k] = table.deepcopy(v)
+        end
+    end
+    return vshard_cfg, box_cfg
+end
+
  --
  -- Names of options which cannot be changed during reconfigure.
  --
@@ -232,44 +248,26 @@ local non_dynamic_options = {
  -- Check sharding config on correctness. Check types, name and uri
  -- uniqueness, master count (in each replicaset must be <= 1).
  --
-local function cfg_check(shard_cfg, old_cfg)
-    if type(shard_cfg) ~= 'table' then
+local function cfg_check(cfg, old_vshard_cfg)
+    if type(cfg) ~= 'table' then
          error('Сonfig must be map of options')
      end
-    shard_cfg = table.deepcopy(shard_cfg)
-    validate_config(shard_cfg, cfg_template)
-    if not old_cfg then
-        return shard_cfg
+    local vshard_cfg, box_cfg = split_cfg(cfg)
+    validate_config(vshard_cfg, cfg_template)
+    if not old_vshard_cfg then
+        return vshard_cfg, box_cfg
      end
      -- Check non-dynamic after default values are added.
      for _, f_name in pairs(non_dynamic_options) do
          -- New option may be added in new vshard version.
-        if shard_cfg[f_name] ~= old_cfg[f_name] then
+        if vshard_cfg[f_name] ~= old_vshard_cfg[f_name] then
             error(string.format('Non-dynamic option %s ' ..
                                 'cannot be reconfigured', f_name))
          end
      end
-    return shard_cfg
-end
-
---
--- Nullify non-box options.
---
-local function remove_non_box_options(cfg)
-    cfg.sharding = nil
-    cfg.weights = nil
-    cfg.zone = nil
-    cfg.bucket_count = nil
-    cfg.rebalancer_disbalance_threshold = nil
-    cfg.rebalancer_max_receiving = nil
-    cfg.shard_index = nil
-    cfg.collect_bucket_garbage_interval = nil
-    cfg.collect_lua_garbage = nil
-    cfg.sync_timeout = nil
-    cfg.connection_outdate_delay = nil
+    return vshard_cfg, box_cfg
  end

  return {
      check = cfg_check,
-    remove_non_box_options = remove_non_box_options,
  }
diff --git a/vshard/router/init.lua b/vshard/router/init.lua
index 4cb19fd..e2b2b22 100644
--- a/vshard/router/init.lua
+++ b/vshard/router/init.lua
@@ -496,18 +496,15 @@ end
  --------------------------------------------------------------------------------

  local function router_cfg(cfg)
-    cfg = lcfg.check(cfg, M.current_cfg)
-    local new_cfg = table.copy(cfg)
+    local vshard_cfg, box_cfg = lcfg.check(cfg, M.current_cfg)
      if not M.replicasets then
          log.info('Starting router configuration')
      else
          log.info('Starting router reconfiguration')
      end
-    local new_replicasets = lreplicaset.buildall(cfg)
-    local total_bucket_count = cfg.bucket_count
-    local collect_lua_garbage = cfg.collect_lua_garbage
-    local box_cfg = table.copy(cfg)
-    lcfg.remove_non_box_options(box_cfg)
+    local new_replicasets = lreplicaset.buildall(vshard_cfg)
+    local total_bucket_count = vshard_cfg.bucket_count
+    local collect_lua_garbage = vshard_cfg.collect_lua_garbage
      log.info("Calling box.cfg()...")
      for k, v in pairs(box_cfg) do
          log.info({[k] = v})
@@ -530,11 +527,12 @@ local function router_cfg(cfg)
          replicaset:connect_all()
      end
      lreplicaset.wait_masters_connect(new_replicasets)
-    lreplicaset.outdate_replicasets(M.replicasets, 
cfg.connection_outdate_delay)
-    M.connection_outdate_delay = cfg.connection_outdate_delay
+    lreplicaset.outdate_replicasets(M.replicasets,
+ vshard_cfg.connection_outdate_delay)
+    M.connection_outdate_delay = vshard_cfg.connection_outdate_delay
      M.total_bucket_count = total_bucket_count
      M.collect_lua_garbage = collect_lua_garbage
-    M.current_cfg = cfg
+    M.current_cfg = vshard_cfg
      M.replicasets = new_replicasets
      -- Update existing route map in-place.
      local old_route_map = M.route_map
diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
index 102b942..40216ea 100644
--- a/vshard/storage/init.lua
+++ b/vshard/storage/init.lua
@@ -1500,13 +1500,13 @@ end
  --------------------------------------------------------------------------------
  -- Configuration
  --------------------------------------------------------------------------------
-local function storage_cfg(cfg, this_replica_uuid)
+
+local function storage_cfg(cfg, this_replica_uuid, is_reload)
      if this_replica_uuid == nil then
          error('Usage: cfg(configuration, this_replica_uuid)')
      end
-    cfg = lcfg.check(cfg, M.current_cfg)
-    local new_cfg = table.copy(cfg)
-    if cfg.weights or cfg.zone then
+    local vshard_cfg, box_cfg = lcfg.check(cfg, M.current_cfg)
+    if vshard_cfg.weights or vshard_cfg.zone then
          error('Weights and zone are not allowed for storage 
configuration')
      end
      if M.replicasets then
@@ -1520,7 +1520,7 @@ local function storage_cfg(cfg, this_replica_uuid)

      local this_replicaset
      local this_replica
-    local new_replicasets = lreplicaset.buildall(cfg)
+    local new_replicasets = lreplicaset.buildall(vshard_cfg)
      local min_master
      for rs_uuid, rs in pairs(new_replicasets) do
          for replica_uuid, replica in pairs(rs.replicas) do
@@ -1553,18 +1553,19 @@ local function storage_cfg(cfg, this_replica_uuid)
      --
      -- If a master role of the replica is not changed, then
      -- 'read_only' can be set right here.
-    cfg.listen = cfg.listen or this_replica.uri
-    if cfg.replication == nil and this_replicaset.master and not 
is_master then
-        cfg.replication = {this_replicaset.master.uri}
+    box_cfg.listen = box_cfg.listen or this_replica.uri
+    if box_cfg.replication == nil and this_replicaset.master
+       and not is_master then
+        box_cfg.replication = {this_replicaset.master.uri}
      else
-        cfg.replication = {}
+        box_cfg.replication = {}
      end
      if was_master == is_master then
-        cfg.read_only = not is_master
+        box_cfg.read_only = not is_master
      end
      if type(box.cfg) == 'function' then
-        cfg.instance_uuid = this_replica.uuid
-        cfg.replicaset_uuid = this_replicaset.uuid
+        box_cfg.instance_uuid = this_replica.uuid
+        box_cfg.replicaset_uuid = this_replicaset.uuid
      else
          local info = box.info
          if this_replica_uuid ~= info.uuid then
@@ -1578,12 +1579,14 @@ local function storage_cfg(cfg, this_replica_uuid)
                                  this_replicaset.uuid))
          end
      end
-    local total_bucket_count = cfg.bucket_count
-    local rebalancer_disbalance_threshold = 
cfg.rebalancer_disbalance_threshold
-    local rebalancer_max_receiving = cfg.rebalancer_max_receiving
-    local shard_index = cfg.shard_index
-    local collect_bucket_garbage_interval = 
cfg.collect_bucket_garbage_interval
-    local collect_lua_garbage = cfg.collect_lua_garbage
+    local total_bucket_count = vshard_cfg.bucket_count
+    local rebalancer_disbalance_threshold =
+        vshard_cfg.rebalancer_disbalance_threshold
+    local rebalancer_max_receiving = vshard_cfg.rebalancer_max_receiving
+    local shard_index = vshard_cfg.shard_index
+    local collect_bucket_garbage_interval =
+        vshard_cfg.collect_bucket_garbage_interval
+    local collect_lua_garbage = vshard_cfg.collect_lua_garbage

      -- It is considered that all possible errors during cfg
      -- process occur only before this place.
@@ -1598,7 +1601,7 @@ local function storage_cfg(cfg, this_replica_uuid)
      -- a new sync timeout.
      --
      local old_sync_timeout = M.sync_timeout
-    M.sync_timeout = cfg.sync_timeout
+    M.sync_timeout = vshard_cfg.sync_timeout

      if was_master and not is_master then
          local_on_master_disable_prepare()
@@ -1607,27 +1610,27 @@ local function storage_cfg(cfg, this_replica_uuid)
          local_on_master_enable_prepare()
      end

-    local box_cfg = table.copy(cfg)
-    lcfg.remove_non_box_options(box_cfg)
-    local ok, err = pcall(box.cfg, box_cfg)
-    while M.errinj.ERRINJ_CFG_DELAY do
-        lfiber.sleep(0.01)
-    end
-    if not ok then
-        M.sync_timeout = old_sync_timeout
-        if was_master and not is_master then
-            local_on_master_disable_abort()
+    if not is_reload then
+        local ok, err = true, nil
+        ok, err = pcall(box.cfg, box_cfg)
+        while M.errinj.ERRINJ_CFG_DELAY do
+            lfiber.sleep(0.01)
          end
-        if not was_master and is_master then
-            local_on_master_enable_abort()
+        if not ok then
+            M.sync_timeout = old_sync_timeout
+            if was_master and not is_master then
+                local_on_master_disable_abort()
+            end
+            if not was_master and is_master then
+                local_on_master_enable_abort()
+            end
+            error(err)
          end
-        error(err)
+        log.info("Box has been configured")
+        local uri = luri.parse(this_replica.uri)
+        box.once("vshard:storage:1", storage_schema_v1, uri.login, 
uri.password)
      end

-    log.info("Box has been configured")
-    local uri = luri.parse(this_replica.uri)
-    box.once("vshard:storage:1", storage_schema_v1, uri.login, 
uri.password)
-
      lreplicaset.rebind_replicasets(new_replicasets, M.replicasets)
      lreplicaset.outdate_replicasets(M.replicasets)
      M.replicasets = new_replicasets
@@ -1639,7 +1642,7 @@ local function storage_cfg(cfg, this_replica_uuid)
      M.shard_index = shard_index
      M.collect_bucket_garbage_interval = collect_bucket_garbage_interval
      M.collect_lua_garbage = collect_lua_garbage
-    M.current_cfg = new_cfg
+    M.current_cfg = vshard_cfg

      if was_master and not is_master then
          local_on_master_disable()
@@ -1874,7 +1877,7 @@ if not rawget(_G, MODULE_INTERNALS) then
      rawset(_G, MODULE_INTERNALS, M)
  else
      reload_evolution.upgrade(M)
-    storage_cfg(M.current_cfg, M.this_replica.uuid)
+    storage_cfg(M.current_cfg, M.this_replica.uuid, true)
      M.module_version = M.module_version + 1
  end

@@ -1913,7 +1916,7 @@ return {
      rebalancing_is_in_progress = rebalancing_is_in_progress,
      recovery_wakeup = recovery_wakeup,
      call = storage_call,
-    cfg = storage_cfg,
+    cfg = function(cfg, uuid) return storage_cfg(cfg, uuid, false) end,
      info = storage_info,
      buckets_info = storage_buckets_info,
      buckets_count = storage_buckets_count,





More information about the Tarantool-patches mailing list