[tarantool-patches] [PATCH][vshard] Reload reloadable fiber

AKhatskevich avkhatskevich at tarantool.org
Thu Jun 14 14:58:09 MSK 2018


Fixed a problem:
The `reloadable_fiber_f` was running an infinite where loop and
preventing the whole module from being reloaded.

This behavior is fixed by calling new version of `reloadable_fiber_f` in
a return statement instead of the where loop.  Note: calling a function
in a return statement doesn't increase a stack size.

Closes #116
---
Branch: https://github.com/tarantool/vshard/tree/kh/gh-116-reloadable
Issue: https://github.com/tarantool/vshard/issues/116

Change:
 * Add tests.

 test/router/reload.result    |   4 +-
 test/router/reload.test.lua  |   4 +-
 test/storage/reload.result   |   6 +--
 test/storage/reload.test.lua |   6 +--
 test/unit/util.result        | 107 +++++++++++++++++++++++++++++++++++++++++++
 test/unit/util.test.lua      |  50 ++++++++++++++++++++
 vshard/util.lua              |  78 +++++++++++++++++++++----------
 7 files changed, 222 insertions(+), 33 deletions(-)
 create mode 100644 test/unit/util.result
 create mode 100644 test/unit/util.test.lua

diff --git a/test/router/reload.result b/test/router/reload.result
index 19a9ead..47f3c2e 100644
--- a/test/router/reload.result
+++ b/test/router/reload.result
@@ -116,10 +116,10 @@ vshard.router.module_version()
 check_reloaded()
 ---
 ...
-while test_run:grep_log('router_1', 'Failover has been reloaded') == nil do fiber.sleep(0.1) end
+while test_run:grep_log('router_1', 'Failover has been started') == nil do fiber.sleep(0.1) end
 ---
 ...
-while test_run:grep_log('router_1', 'Discovery has been reloaded') == nil do fiber.sleep(0.1) vshard.router.discovery_wakeup() end
+while test_run:grep_log('router_1', 'Discovery has been started') == nil do fiber.sleep(0.1) vshard.router.discovery_wakeup() end
 ---
 ...
 check_reloaded()
diff --git a/test/router/reload.test.lua b/test/router/reload.test.lua
index 6e21b74..af2939d 100644
--- a/test/router/reload.test.lua
+++ b/test/router/reload.test.lua
@@ -59,8 +59,8 @@ vshard.router.module_version()
 
 check_reloaded()
 
-while test_run:grep_log('router_1', 'Failover has been reloaded') == nil do fiber.sleep(0.1) end
-while test_run:grep_log('router_1', 'Discovery has been reloaded') == nil do fiber.sleep(0.1) vshard.router.discovery_wakeup() end
+while test_run:grep_log('router_1', 'Failover has been started') == nil do fiber.sleep(0.1) end
+while test_run:grep_log('router_1', 'Discovery has been started') == nil do fiber.sleep(0.1) vshard.router.discovery_wakeup() end
 
 check_reloaded()
 
diff --git a/test/storage/reload.result b/test/storage/reload.result
index f689cf4..531d984 100644
--- a/test/storage/reload.result
+++ b/test/storage/reload.result
@@ -113,13 +113,13 @@ vshard.storage.module_version()
 check_reloaded()
 ---
 ...
-while test_run:grep_log('storage_2_a', 'Garbage collector has been reloaded') == nil do fiber.sleep(0.1) end
+while test_run:grep_log('storage_2_a', 'Garbage collector has been started') == nil do fiber.sleep(0.1) end
 ---
 ...
-while test_run:grep_log('storage_2_a', 'Recovery has been reloaded') == nil do fiber.sleep(0.1) vshard.storage.recovery_wakeup() end
+while test_run:grep_log('storage_2_a', 'Recovery has been started') == nil do fiber.sleep(0.1) vshard.storage.recovery_wakeup() end
 ---
 ...
-while test_run:grep_log('storage_2_a', 'Rebalancer has been reloaded') == nil do fiber.sleep(0.1) vshard.storage.rebalancer_wakeup() end
+while test_run:grep_log('storage_2_a', 'Rebalancer has been started') == nil do fiber.sleep(0.1) vshard.storage.rebalancer_wakeup() end
 ---
 ...
 check_reloaded()
diff --git a/test/storage/reload.test.lua b/test/storage/reload.test.lua
index 6e19a92..64c3a60 100644
--- a/test/storage/reload.test.lua
+++ b/test/storage/reload.test.lua
@@ -59,9 +59,9 @@ vshard.storage.module_version()
 
 check_reloaded()
 
-while test_run:grep_log('storage_2_a', 'Garbage collector has been reloaded') == nil do fiber.sleep(0.1) end
-while test_run:grep_log('storage_2_a', 'Recovery has been reloaded') == nil do fiber.sleep(0.1) vshard.storage.recovery_wakeup() end
-while test_run:grep_log('storage_2_a', 'Rebalancer has been reloaded') == nil do fiber.sleep(0.1) vshard.storage.rebalancer_wakeup() end
+while test_run:grep_log('storage_2_a', 'Garbage collector has been started') == nil do fiber.sleep(0.1) end
+while test_run:grep_log('storage_2_a', 'Recovery has been started') == nil do fiber.sleep(0.1) vshard.storage.recovery_wakeup() end
+while test_run:grep_log('storage_2_a', 'Rebalancer has been started') == nil do fiber.sleep(0.1) vshard.storage.rebalancer_wakeup() end
 
 check_reloaded()
 
diff --git a/test/unit/util.result b/test/unit/util.result
new file mode 100644
index 0000000..ea9edfa
--- /dev/null
+++ b/test/unit/util.result
@@ -0,0 +1,107 @@
+test_run = require('test_run').new()
+---
+...
+util = require('vshard.util')
+---
+...
+test_util = require('util')
+---
+...
+log = require('log')
+---
+...
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+fake_M = {
+    reloadable_func = nil,
+    module_version = 1,
+};
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+-- Check autoreload on function change during failure.
+fake_M.reloadable_function = function () fake_M.reloadable_function = 42; error('Error happened.') end
+---
+...
+util.internal.errinj.RELOADABLE_FUNCTION_CHANGED = true
+---
+...
+util.internal.errinj.RELOADABLE_EXIT = true
+---
+...
+util.reloadable_fiber_f(fake_M, 'reloadable_function', 'Worker_name')
+---
+...
+test_run:grep_log('default', 'reloadable function reloadable_function has changed')
+---
+- reloadable function reloadable_function has changed
+...
+test_run:grep_log('default', 'is reloading')
+---
+- is reloading
+...
+util.internal.errinj.RELOADABLE_FUNCTION_CHANGED = nil
+---
+...
+log.info(string.rep('a', 1000))
+---
+...
+-- Check reload feature.
+fake_M.reloadable_function = function () return true end
+---
+...
+util.reloadable_fiber_f(fake_M, 'reloadable_function', 'Worker_name')
+---
+...
+test_run:grep_log('default', 'is reloading', 1000)
+---
+- is reloading
+...
+stack_size_log = {}
+---
+...
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+fake_M.reloadable_function = function ()
+    table.insert(stack_size_log, #debug.traceback())
+end
+test_run:cmd("setopt delimiter ''");
+---
+...
+-- Chack that stack size is not increased by recursive call of
+-- `reloadable_fiber_f`.
+util.internal.errinj.RELOADABLE_EXIT = nil
+---
+...
+util.internal.errinj.RELOADABLE_STACK_MAX = 10
+---
+...
+util.reloadable_fiber_f(fake_M, 'reloadable_function', 'Worker_name')
+---
+...
+#stack_size_log
+---
+- 10
+...
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+for _, frame_size in pairs(stack_size_log) do
+    if stack_size_log[1] ~= frame_size then
+        error("Stack grows.")
+    end
+end
+test_run:cmd("setopt delimiter ''");
+---
+...
+util.internal.errinj.RELOADABLE_STACK_MAX = nil
+---
+...
diff --git a/test/unit/util.test.lua b/test/unit/util.test.lua
new file mode 100644
index 0000000..fa0aff2
--- /dev/null
+++ b/test/unit/util.test.lua
@@ -0,0 +1,50 @@
+test_run = require('test_run').new()
+util = require('vshard.util')
+test_util = require('util')
+log = require('log')
+
+test_run:cmd("setopt delimiter ';'")
+fake_M = {
+    reloadable_func = nil,
+    module_version = 1,
+};
+test_run:cmd("setopt delimiter ''");
+
+-- Check autoreload on function change during failure.
+fake_M.reloadable_function = function () fake_M.reloadable_function = 42; error('Error happened.') end
+
+util.internal.errinj.RELOADABLE_FUNCTION_CHANGED = true
+util.internal.errinj.RELOADABLE_EXIT = true
+
+util.reloadable_fiber_f(fake_M, 'reloadable_function', 'Worker_name')
+test_run:grep_log('default', 'reloadable function reloadable_function has changed')
+test_run:grep_log('default', 'is reloading')
+util.internal.errinj.RELOADABLE_FUNCTION_CHANGED = nil
+log.info(string.rep('a', 1000))
+
+-- Check reload feature.
+fake_M.reloadable_function = function () return true end
+util.reloadable_fiber_f(fake_M, 'reloadable_function', 'Worker_name')
+test_run:grep_log('default', 'is reloading', 1000)
+
+stack_size_log = {}
+test_run:cmd("setopt delimiter ';'")
+fake_M.reloadable_function = function ()
+    table.insert(stack_size_log, #debug.traceback())
+end
+test_run:cmd("setopt delimiter ''");
+
+-- Chack that stack size is not increased by recursive call of
+-- `reloadable_fiber_f`.
+util.internal.errinj.RELOADABLE_EXIT = nil
+util.internal.errinj.RELOADABLE_STACK_MAX = 10
+util.reloadable_fiber_f(fake_M, 'reloadable_function', 'Worker_name')
+#stack_size_log
+test_run:cmd("setopt delimiter ';'")
+for _, frame_size in pairs(stack_size_log) do
+    if stack_size_log[1] ~= frame_size then
+        error("Stack grows.")
+    end
+end
+test_run:cmd("setopt delimiter ''");
+util.internal.errinj.RELOADABLE_STACK_MAX = nil
diff --git a/vshard/util.lua b/vshard/util.lua
index bb71318..fa51701 100644
--- a/vshard/util.lua
+++ b/vshard/util.lua
@@ -2,6 +2,24 @@
 local log = require('log')
 local fiber = require('fiber')
 
+local MODULE_INTERNALS = '__module_vshard_util'
+local M = rawget(_G, MODULE_INTERNALS)
+if not M then
+    --
+    -- The module is loaded for the first time.
+    --
+    M = {
+        -- Latest versions of functions.
+        reloadable_fiber_f = nil,
+        errinj = {
+            RELOADABLE_STACK_MAX = nil,
+            RELOADABLE_EXIT = nil,
+        }
+
+    }
+    rawset(_G, MODULE_INTERNALS, M)
+end
+
 --
 -- Extract parts of a tuple.
 -- @param tuple Tuple to extract a key from.
@@ -19,33 +37,43 @@ local function tuple_extract_key(tuple, parts)
 end
 
 --
--- Wrapper to run @a func in infinite loop and restart it on the
--- module reload. This function CAN NOT BE AUTORELOADED. To update
--- it you must manualy stop all fibers, run by this function, do
--- reload, and then restart all stopped fibers. This can be done,
--- for example, by calling vshard.storage/router.cfg() again with
--- the same config as earlier.
+-- Wrapper to run a func in infinite loop and restart it on
+-- errors and module reload.
+-- To handle module reload and run new version of a function
+-- in the module, the function should just return.
 --
--- @param func Reloadable function to run. It must accept current
---        module version as an argument, and interrupt itself,
---        when it is changed.
--- @param worker_name Name of the function. Usual infinite fiber
---        represents a background subsystem, which has a name. For
---        example: "Garbage Collector", "Recovery", "Discovery",
---        "Rebalancer".
--- @param M Module which can reload.
+-- @param module Module which can be reloaded.
+-- @param func_name Name of a function to be executed in the
+--        module.
+-- @param worker_name Name of the reloadable background subsystem.
+--        For example: "Garbage Collector", "Recovery", "Discovery",
+--        "Rebalancer". Used only for an activity logging.
 --
-local function reloadable_fiber_f(M, func_name, worker_name)
-    while true do
-        local ok, err = pcall(M[func_name], M.module_version)
-        if not ok then
-            log.error('%s has been failed: %s', worker_name, err)
-            fiber.yield()
-        else
-            log.info('%s has been reloaded', worker_name)
-            fiber.yield()
+local function reloadable_fiber_f(module, func_name, worker_name)
+    log.info('%s has been started', worker_name)
+    local func = module[func_name]
+    local ok, err = pcall(func, module.module_version)
+    if not ok then
+        log.error('%s has been failed: %s', worker_name, err)
+        if func ~= module[func_name] then
+            log.warn('%s reloadable function %s has changed',
+                        worker_name, func_name)
         end
     end
+    fiber.yield()
+    log.info('%s is reloading', worker_name)
+    if M.errinj.RELOADABLE_EXIT then
+        return
+    end
+    -- luajit drops this frame if next function is called in
+    -- return statement.
+    if M.errinj.RELOADABLE_STACK_MAX then
+        M.errinj.RELOADABLE_STACK_MAX = M.errinj.RELOADABLE_STACK_MAX - 1
+        if M.errinj.RELOADABLE_STACK_MAX == 0 then
+            return
+        end
+    end
+    return M.reloadable_fiber_f(module, func_name, worker_name)
 end
 
 --
@@ -75,8 +103,12 @@ local function generate_self_checker(obj_name, func_name, mt, func)
     end
 end
 
+-- Update latest versions of function
+M.reloadable_fiber_f = reloadable_fiber_f
+
 return {
     tuple_extract_key = tuple_extract_key,
     reloadable_fiber_f = reloadable_fiber_f,
     generate_self_checker = generate_self_checker,
+    internal = M,
 }
-- 
2.14.1





More information about the Tarantool-patches mailing list