[tarantool-patches] Re: [PATCH 2/3] Move lua gc to a dedicated module

Alex Khatskevich avkhatskevich at tarantool.org
Fri Aug 3 23:04:34 MSK 2018


On 01.08.2018 21:43, Vladislav Shpilevoy wrote:
> Thanks for the patch! See 4 comments below.
>
> On 31/07/2018 19:25, AKhatskevich wrote:
>> `vshard.lua_gc.lua` is a new module which helps make gc work more
>> intense.
>> Before the commit that was a duty of router and storage.
>>
>> Reasons to move lua gc to a separate module:
>> 1. It is not a duty of vshard to collect garbage, so let gc fiber
>>     be as far from vshard as possible.
>> 2. Next commits will introduce multiple routers feature, which require
>>     gc fiber to be a singleton.
>>
>> Closes #138
>> ---
>>   test/router/garbage_collector.result    | 27 +++++++++++------
>>   test/router/garbage_collector.test.lua  | 18 ++++++-----
>>   test/storage/garbage_collector.result   | 27 +++++++++--------
>>   test/storage/garbage_collector.test.lua | 22 ++++++--------
>>   vshard/lua_gc.lua                       | 54 
>> +++++++++++++++++++++++++++++++++
>>   vshard/router/init.lua                  | 19 +++---------
>>   vshard/storage/init.lua                 | 20 ++++--------
>>   7 files changed, 116 insertions(+), 71 deletions(-)
>>   create mode 100644 vshard/lua_gc.lua
>>
>> diff --git a/test/router/garbage_collector.result 
>> b/test/router/garbage_collector.result
>> index 3c2a4f1..a7474fc 100644
>> --- a/test/router/garbage_collector.result
>> +++ b/test/router/garbage_collector.result
>> @@ -40,27 +40,30 @@ test_run:switch('router_1')
>>   fiber = require('fiber')
>>   ---
>>   ...
>> -cfg.collect_lua_garbage = true
>> +lua_gc = require('vshard.lua_gc')
>>   ---
>>   ...
>> -iters = vshard.consts.COLLECT_LUA_GARBAGE_INTERVAL / 
>> vshard.consts.DISCOVERY_INTERVAL
>> +cfg.collect_lua_garbage = true
>
> 1. Now this code tests nothing but just fibers. Below you do wakeup
> and check that iteration counter is increased, but it is obvious
> thing. Before your patch the test really tested that GC is called
> by checking for nullified weak references. Now I can remove 
> collectgarbage()
> from the main_loop and nothing would changed. Please, make this test
> be a test.
GC test returned back.
>
> Moreover, the test hangs forever both locally and on Travis.
Fixed
>
>> diff --git a/test/storage/garbage_collector.result 
>> b/test/storage/garbage_collector.result
>> index 3588fb4..d94ba24 100644
>> --- a/test/storage/garbage_collector.result
>> +++ b/test/storage/garbage_collector.result
>
> 2. Same. Now the test passes even if I removed collectgarbage() from
> the main loop.
returned.
>
>> diff --git a/vshard/lua_gc.lua b/vshard/lua_gc.lua
>> new file mode 100644
>> index 0000000..8d6af3e
>> --- /dev/null
>> +++ b/vshard/lua_gc.lua
>> @@ -0,0 +1,54 @@
>> +--
>> +-- This module implements background lua GC fiber.
>> +-- It's purpose is to make GC more aggressive.
>> +--
>> +
>> +local lfiber = require('fiber')
>> +local MODULE_INTERNALS = '__module_vshard_lua_gc'
>> +
>> +local M = rawget(_G, MODULE_INTERNALS)
>> +if not M then
>> +    M = {
>> +        -- Background fiber.
>> +        bg_fiber = nil,
>> +        -- GC interval in seconds.
>> +        interval = nil,
>> +        -- Main loop.
>> +        -- Stored here to make the fiber reloadable.
>> +        main_loop = nil,
>> +        -- Number of `collectgarbage()` calls.
>> +        iterations = 0,
>> +    }
>> +end
>> +local DEFALUT_INTERVAL = 100
>
> 3. For constants please use vshard.consts.
>
> 4. You should not choose interval inside the main_loop.
> Please, use 'default' option in cfg.lua.
DEFAULT_INTERVAL is removed at all.
Interval value is became required.



full diff



commit ec221bd060f46e4dc009eaab1c6c1bd1cf5a4150
Author: AKhatskevich <avkhatskevich at tarantool.org>
Date:   Thu Jul 26 01:17:00 2018 +0300

     Move lua gc to a dedicated module

     `vshard.lua_gc.lua` is a new module which helps make gc work more
     intense.
     Before the commit that was a duty of router and storage.

     Reasons to move lua gc to a separate module:
     1. It is not a duty of vshard to collect garbage, so let gc fiber
        be as far from vshard as possible.
     2. Next commits will introduce multiple routers feature, which require
        gc fiber to be a singleton.

     Closes #138

diff --git a/test/router/garbage_collector.result 
b/test/router/garbage_collector.result
index 3c2a4f1..7780046 100644
--- a/test/router/garbage_collector.result
+++ b/test/router/garbage_collector.result
@@ -40,41 +40,59 @@ test_run:switch('router_1')
  fiber = require('fiber')
  ---
  ...
-cfg.collect_lua_garbage = true
+lua_gc = require('vshard.lua_gc')
  ---
  ...
-iters = vshard.consts.COLLECT_LUA_GARBAGE_INTERVAL / 
vshard.consts.DISCOVERY_INTERVAL
+cfg.collect_lua_garbage = true
  ---
  ...
  vshard.router.cfg(cfg)
  ---
  ...
+lua_gc.internal.bg_fiber ~= nil
+---
+- true
+...
+-- Check that `collectgarbage()` was really called.
  a = setmetatable({}, {__mode = 'v'})
  ---
  ...
  a.k = {b = 100}
  ---
  ...
-for i = 1, iters + 1 do vshard.router.discovery_wakeup() 
fiber.sleep(0.01) end
+iterations = lua_gc.internal.iterations
+---
+...
+lua_gc.internal.bg_fiber:wakeup()
+---
+...
+while lua_gc.internal.iterations < iterations + 1 do fiber.sleep(0.01) end
  ---
  ...
  a.k
  ---
  - null
  ...
+lua_gc.internal.interval = 0.001
+---
+...
  cfg.collect_lua_garbage = false
  ---
  ...
  vshard.router.cfg(cfg)
  ---
  ...
-a.k = {b = 100}
+lua_gc.internal.bg_fiber == nil
+---
+- true
+...
+iterations = lua_gc.internal.iterations
  ---
  ...
-for i = 1, iters + 1 do vshard.router.discovery_wakeup() 
fiber.sleep(0.01) end
+fiber.sleep(0.01)
  ---
  ...
-a.k ~= nil
+iterations == lua_gc.internal.iterations
  ---
  - true
  ...
diff --git a/test/router/garbage_collector.test.lua 
b/test/router/garbage_collector.test.lua
index b3411cd..e8d0876 100644
--- a/test/router/garbage_collector.test.lua
+++ b/test/router/garbage_collector.test.lua
@@ -13,18 +13,24 @@ test_run:cmd("start server router_1")
  --
  test_run:switch('router_1')
  fiber = require('fiber')
+lua_gc = require('vshard.lua_gc')
  cfg.collect_lua_garbage = true
-iters = vshard.consts.COLLECT_LUA_GARBAGE_INTERVAL / 
vshard.consts.DISCOVERY_INTERVAL
  vshard.router.cfg(cfg)
+lua_gc.internal.bg_fiber ~= nil
+-- Check that `collectgarbage()` was really called.
  a = setmetatable({}, {__mode = 'v'})
  a.k = {b = 100}
-for i = 1, iters + 1 do vshard.router.discovery_wakeup() 
fiber.sleep(0.01) end
+iterations = lua_gc.internal.iterations
+lua_gc.internal.bg_fiber:wakeup()
+while lua_gc.internal.iterations < iterations + 1 do fiber.sleep(0.01) end
  a.k
+lua_gc.internal.interval = 0.001
  cfg.collect_lua_garbage = false
  vshard.router.cfg(cfg)
-a.k = {b = 100}
-for i = 1, iters + 1 do vshard.router.discovery_wakeup() 
fiber.sleep(0.01) end
-a.k ~= nil
+lua_gc.internal.bg_fiber == nil
+iterations = lua_gc.internal.iterations
+fiber.sleep(0.01)
+iterations == lua_gc.internal.iterations

  test_run:switch("default")
  test_run:cmd("stop server router_1")
diff --git a/test/storage/garbage_collector.result 
b/test/storage/garbage_collector.result
index 3588fb4..6bec2db 100644
--- a/test/storage/garbage_collector.result
+++ b/test/storage/garbage_collector.result
@@ -120,7 +120,7 @@ test_run:switch('storage_1_a')
  fiber = require('fiber')
  ---
  ...
-log = require('log')
+lua_gc = require('vshard.lua_gc')
  ---
  ...
  cfg.collect_lua_garbage = true
@@ -129,38 +129,50 @@ cfg.collect_lua_garbage = true
  vshard.storage.cfg(cfg, names.storage_1_a)
  ---
  ...
--- Create a weak reference to a able {b = 100} - it must be
--- deleted on the next GC.
+lua_gc.internal.bg_fiber ~= nil
+---
+- true
+...
+-- Check that `collectgarbage()` was really called.
  a = setmetatable({}, {__mode = 'v'})
  ---
  ...
  a.k = {b = 100}
  ---
  ...
-iters = vshard.consts.COLLECT_LUA_GARBAGE_INTERVAL / 
vshard.consts.DEFAULT_COLLECT_BUCKET_GARBAGE_INTERVAL
+iterations = lua_gc.internal.iterations
  ---
  ...
--- Wait until Lua GC deletes a.k.
-for i = 1, iters + 1 do vshard.storage.garbage_collector_wakeup() 
fiber.sleep(0.01) end
+lua_gc.internal.bg_fiber:wakeup()
+---
+...
+while lua_gc.internal.iterations < iterations + 1 do fiber.sleep(0.01) end
  ---
  ...
  a.k
  ---
  - null
  ...
+lua_gc.internal.interval = 0.001
+---
+...
  cfg.collect_lua_garbage = false
  ---
  ...
  vshard.storage.cfg(cfg, names.storage_1_a)
  ---
  ...
-a.k = {b = 100}
+lua_gc.internal.bg_fiber == nil
+---
+- true
+...
+iterations = lua_gc.internal.iterations
  ---
  ...
-for i = 1, iters + 1 do vshard.storage.garbage_collector_wakeup() 
fiber.sleep(0.01) end
+fiber.sleep(0.01)
  ---
  ...
-a.k ~= nil
+iterations == lua_gc.internal.iterations
  ---
  - true
  ...
diff --git a/test/storage/garbage_collector.test.lua 
b/test/storage/garbage_collector.test.lua
index 79e76d8..407b8a1 100644
--- a/test/storage/garbage_collector.test.lua
+++ b/test/storage/garbage_collector.test.lua
@@ -46,22 +46,24 @@ customer:select{}
  --
  test_run:switch('storage_1_a')
  fiber = require('fiber')
-log = require('log')
+lua_gc = require('vshard.lua_gc')
  cfg.collect_lua_garbage = true
  vshard.storage.cfg(cfg, names.storage_1_a)
--- Create a weak reference to a able {b = 100} - it must be
--- deleted on the next GC.
+lua_gc.internal.bg_fiber ~= nil
+-- Check that `collectgarbage()` was really called.
  a = setmetatable({}, {__mode = 'v'})
  a.k = {b = 100}
-iters = vshard.consts.COLLECT_LUA_GARBAGE_INTERVAL / 
vshard.consts.DEFAULT_COLLECT_BUCKET_GARBAGE_INTERVAL
--- Wait until Lua GC deletes a.k.
-for i = 1, iters + 1 do vshard.storage.garbage_collector_wakeup() 
fiber.sleep(0.01) end
+iterations = lua_gc.internal.iterations
+lua_gc.internal.bg_fiber:wakeup()
+while lua_gc.internal.iterations < iterations + 1 do fiber.sleep(0.01) end
  a.k
+lua_gc.internal.interval = 0.001
  cfg.collect_lua_garbage = false
  vshard.storage.cfg(cfg, names.storage_1_a)
-a.k = {b = 100}
-for i = 1, iters + 1 do vshard.storage.garbage_collector_wakeup() 
fiber.sleep(0.01) end
-a.k ~= nil
+lua_gc.internal.bg_fiber == nil
+iterations = lua_gc.internal.iterations
+fiber.sleep(0.01)
+iterations == lua_gc.internal.iterations

  test_run:switch('default')
  test_run:drop_cluster(REPLICASET_2)
diff --git a/vshard/lua_gc.lua b/vshard/lua_gc.lua
new file mode 100644
index 0000000..c6c5cd3
--- /dev/null
+++ b/vshard/lua_gc.lua
@@ -0,0 +1,54 @@
+--
+-- This module implements background lua GC fiber.
+-- It's purpose is to make GC more aggressive.
+--
+
+local lfiber = require('fiber')
+local MODULE_INTERNALS = '__module_vshard_lua_gc'
+
+local M = rawget(_G, MODULE_INTERNALS)
+if not M then
+    M = {
+        -- Background fiber.
+        bg_fiber = nil,
+        -- GC interval in seconds.
+        interval = nil,
+        -- Main loop.
+        -- Stored here to make the fiber reloadable.
+        main_loop = nil,
+        -- Number of `collectgarbage()` calls.
+        iterations = 0,
+    }
+end
+
+M.main_loop = function()
+    lfiber.sleep(M.interval)
+    collectgarbage()
+    M.iterations = M.iterations + 1
+    return M.main_loop()
+end
+
+local function set_state(active, interval)
+    assert(type(interval) == 'number')
+    M.interval = interval
+    if active and not M.bg_fiber then
+        M.bg_fiber = lfiber.create(M.main_loop)
+        M.bg_fiber:name('vshard.lua_gc')
+    end
+    if not active and M.bg_fiber then
+        M.bg_fiber:cancel()
+        M.bg_fiber = nil
+    end
+    if active then
+        M.bg_fiber:wakeup()
+    end
+end
+
+if not rawget(_G, MODULE_INTERNALS) then
+    rawset(_G, MODULE_INTERNALS, M)
+end
+
+return {
+    set_state = set_state,
+    internal = M,
+}
diff --git a/vshard/router/init.lua b/vshard/router/init.lua
index e2b2b22..3e127cb 100644
--- a/vshard/router/init.lua
+++ b/vshard/router/init.lua
@@ -7,6 +7,7 @@ if rawget(_G, MODULE_INTERNALS) then
      local vshard_modules = {
          'vshard.consts', 'vshard.error', 'vshard.cfg',
          'vshard.hash', 'vshard.replicaset', 'vshard.util',
+        'vshard.lua_gc',
      }
      for _, module in pairs(vshard_modules) do
          package.loaded[module] = nil
@@ -18,6 +19,7 @@ local lcfg = require('vshard.cfg')
  local lhash = require('vshard.hash')
  local lreplicaset = require('vshard.replicaset')
  local util = require('vshard.util')
+local lua_gc = require('vshard.lua_gc')

  local M = rawget(_G, MODULE_INTERNALS)
  if not M then
@@ -43,8 +45,7 @@ if not M then
          discovery_fiber = nil,
          -- Bucket count stored on all replicasets.
          total_bucket_count = 0,
-        -- If true, then discovery fiber starts to call
-        -- collectgarbage() periodically.
+        -- Boolean lua_gc state (create periodic gc task).
          collect_lua_garbage = nil,
          -- This counter is used to restart background fibers with
          -- new reloaded code.
@@ -151,8 +152,6 @@ end
  --
  local function discovery_f()
      local module_version = M.module_version
-    local iterations_until_lua_gc =
-        consts.COLLECT_LUA_GARBAGE_INTERVAL / consts.DISCOVERY_INTERVAL
      while module_version == M.module_version do
          while not next(M.replicasets) do
              lfiber.sleep(consts.DISCOVERY_INTERVAL)
@@ -188,12 +187,6 @@ local function discovery_f()
                      M.route_map[bucket_id] = replicaset
                  end
              end
-            iterations_until_lua_gc = iterations_until_lua_gc - 1
-            if M.collect_lua_garbage and iterations_until_lua_gc == 0 then
-                iterations_until_lua_gc =
-                    consts.COLLECT_LUA_GARBAGE_INTERVAL / 
consts.DISCOVERY_INTERVAL
-                collectgarbage()
-            end
              lfiber.sleep(consts.DISCOVERY_INTERVAL)
          end
      end
@@ -504,7 +497,6 @@ local function router_cfg(cfg)
      end
      local new_replicasets = lreplicaset.buildall(vshard_cfg)
      local total_bucket_count = vshard_cfg.bucket_count
-    local collect_lua_garbage = vshard_cfg.collect_lua_garbage
      log.info("Calling box.cfg()...")
      for k, v in pairs(box_cfg) do
          log.info({[k] = v})
@@ -531,7 +523,7 @@ local function router_cfg(cfg)
vshard_cfg.connection_outdate_delay)
      M.connection_outdate_delay = vshard_cfg.connection_outdate_delay
      M.total_bucket_count = total_bucket_count
-    M.collect_lua_garbage = collect_lua_garbage
+    M.collect_lua_garbage = vshard_cfg.collect_lua_garbage
      M.current_cfg = vshard_cfg
      M.replicasets = new_replicasets
      -- Update existing route map in-place.
@@ -548,8 +540,7 @@ local function router_cfg(cfg)
          M.discovery_fiber = util.reloadable_fiber_create(
              'vshard.discovery', M, 'discovery_f')
      end
-    -- Destroy connections, not used in a new configuration.
-    collectgarbage()
+    lua_gc.set_state(M.collect_lua_garbage, 
consts.COLLECT_LUA_GARBAGE_INTERVAL)
  end

  --------------------------------------------------------------------------------
diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
index 40216ea..3e29e9d 100644
--- a/vshard/storage/init.lua
+++ b/vshard/storage/init.lua
@@ -10,7 +10,8 @@ if rawget(_G, MODULE_INTERNALS) then
      local vshard_modules = {
          'vshard.consts', 'vshard.error', 'vshard.cfg',
          'vshard.replicaset', 'vshard.util',
-        'vshard.storage.reload_evolution'
+        'vshard.storage.reload_evolution',
+        'vshard.lua_gc',
      }
      for _, module in pairs(vshard_modules) do
          package.loaded[module] = nil
@@ -21,6 +22,7 @@ local lerror = require('vshard.error')
  local lcfg = require('vshard.cfg')
  local lreplicaset = require('vshard.replicaset')
  local util = require('vshard.util')
+local lua_gc = require('vshard.lua_gc')
  local reload_evolution = require('vshard.storage.reload_evolution')

  local M = rawget(_G, MODULE_INTERNALS)
@@ -75,8 +77,7 @@ if not M then
          collect_bucket_garbage_fiber = nil,
          -- Do buckets garbage collection once per this time.
          collect_bucket_garbage_interval = nil,
-        -- If true, then bucket garbage collection fiber starts to
-        -- call collectgarbage() periodically.
+        -- Boolean lua_gc state (create periodic gc task).
          collect_lua_garbage = nil,

          -------------------- Bucket recovery ---------------------
@@ -1063,9 +1064,6 @@ function collect_garbage_f()
      -- buckets_for_redirect is deleted, it gets empty_sent_buckets
      -- for next deletion.
      local empty_sent_buckets = {}
-    local iterations_until_lua_gc =
-        consts.COLLECT_LUA_GARBAGE_INTERVAL / 
M.collect_bucket_garbage_interval
-
      while M.module_version == module_version do
          -- Check if no changes in buckets configuration.
          if control.bucket_generation_collected ~= 
control.bucket_generation then
@@ -1106,12 +1104,6 @@ function collect_garbage_f()
              end
          end
  ::continue::
-        iterations_until_lua_gc = iterations_until_lua_gc - 1
-        if iterations_until_lua_gc == 0 and M.collect_lua_garbage then
-            iterations_until_lua_gc = consts.COLLECT_LUA_GARBAGE_INTERVAL /
- M.collect_bucket_garbage_interval
-            collectgarbage()
-        end
          lfiber.sleep(M.collect_bucket_garbage_interval)
      end
  end
@@ -1586,7 +1578,6 @@ local function storage_cfg(cfg, this_replica_uuid, 
is_reload)
      local shard_index = vshard_cfg.shard_index
      local collect_bucket_garbage_interval =
          vshard_cfg.collect_bucket_garbage_interval
-    local collect_lua_garbage = vshard_cfg.collect_lua_garbage

      -- It is considered that all possible errors during cfg
      -- process occur only before this place.
@@ -1641,7 +1632,7 @@ local function storage_cfg(cfg, this_replica_uuid, 
is_reload)
      M.rebalancer_max_receiving = rebalancer_max_receiving
      M.shard_index = shard_index
      M.collect_bucket_garbage_interval = collect_bucket_garbage_interval
-    M.collect_lua_garbage = collect_lua_garbage
+    M.collect_lua_garbage = vshard_cfg.collect_lua_garbage
      M.current_cfg = vshard_cfg

      if was_master and not is_master then
@@ -1666,6 +1657,7 @@ local function storage_cfg(cfg, this_replica_uuid, 
is_reload)
          M.rebalancer_fiber:cancel()
          M.rebalancer_fiber = nil
      end
+    lua_gc.set_state(M.collect_lua_garbage, 
consts.COLLECT_LUA_GARBAGE_INTERVAL)
      -- Destroy connections, not used in a new configuration.
      collectgarbage()
  end





More information about the Tarantool-patches mailing list