From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH] box: serialize calls to box.cfg Date: Tue, 7 Aug 2018 12:43:26 +0300 Message-Id: <619fdb85de8edff29334abcf75ffec7aa8647723.1533634796.git.vdavydov.dev@gmail.com> To: tarantool-patches@freelists.org List-ID: It is dangerous to call box.cfg() concurrently from different fibers. For example, replication configuration uses static variables and yields so calling it concurrently can result in a crash. To make sure it never happens, let's protect box.cfg() with a lock. Closes #3606 --- https://github.com/tarantool/tarantool/issues/3606 https://github.com/tarantool/tarantool/tree/dv/gh-3606-concurrent-replication-cfg-fix src/box/lua/load_cfg.lua | 21 ++++++++++++++++++--- test/replication/misc.result | 24 ++++++++++++++++++++++++ test/replication/misc.test.lua | 10 ++++++++++ 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua index 0b668cdc..4de79220 100644 --- a/src/box/lua/load_cfg.lua +++ b/src/box/lua/load_cfg.lua @@ -5,6 +5,21 @@ local json = require('json') local private = require('box.internal') local urilib = require('uri') local math = require('math') +local fiber = require('fiber') + +-- Function decorator that is used to prevent box.cfg() from +-- being called concurrently by different fibers. +local lock = fiber.channel(1) +local function locked(f) + return function(...) + lock:put(true) + local status = pcall(f, ...) + lock:get() + if not status then + box.error() + end + end +end -- all available options local default_cfg = { @@ -409,7 +424,7 @@ local function load_cfg(cfg) -- Save new box.cfg box.cfg = cfg if not pcall(private.cfg_check) then - box.cfg = load_cfg -- restore original box.cfg + box.cfg = locked(load_cfg) -- restore original box.cfg return box.error() -- re-throw exception from check_cfg() end -- Restore box members after initial configuration @@ -423,7 +438,7 @@ local function load_cfg(cfg) __newindex = function(table, index) error('Attempt to modify a read-only table') end, - __call = reload_cfg, + __call = locked(reload_cfg), }) private.cfg_load() for key, fun in pairs(dynamic_cfg) do @@ -439,7 +454,7 @@ local function load_cfg(cfg) box.schema.upgrade{auto = true} end end -box.cfg = load_cfg +box.cfg = locked(load_cfg) -- gh-810: -- hack luajit default cpath diff --git a/test/replication/misc.result b/test/replication/misc.result index ff0dbf54..9d9d010c 100644 --- a/test/replication/misc.result +++ b/test/replication/misc.result @@ -23,6 +23,30 @@ box.cfg{replication = {'127.0.0.1:12345', box.cfg.listen}} - error: 'Incorrect value for option ''replication'': failed to connect to one or more replicas' ... +-- gh-3606 - Tarantool crashes if box.cfg.replication is updated concurrently +fiber = require('fiber') +--- +... +c = fiber.channel(2) +--- +... +f = function() fiber.create(function() pcall(box.cfg, {replication = {12345}}) c:put(true) end) end +--- +... +f() +--- +... +f() +--- +... +c:get() +--- +- true +... +c:get() +--- +- true +... box.cfg{replication_timeout = replication_timeout, replication_connect_timeout = replication_connect_timeout} --- ... diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua index c05e5216..da5a9023 100644 --- a/test/replication/misc.test.lua +++ b/test/replication/misc.test.lua @@ -9,6 +9,16 @@ replication_timeout = box.cfg.replication_timeout replication_connect_timeout = box.cfg.replication_connect_timeout box.cfg{replication_timeout=0.05, replication_connect_timeout=0.05, replication={}} box.cfg{replication = {'127.0.0.1:12345', box.cfg.listen}} + +-- gh-3606 - Tarantool crashes if box.cfg.replication is updated concurrently +fiber = require('fiber') +c = fiber.channel(2) +f = function() fiber.create(function() pcall(box.cfg, {replication = {12345}}) c:put(true) end) end +f() +f() +c:get() +c:get() + box.cfg{replication_timeout = replication_timeout, replication_connect_timeout = replication_connect_timeout} -- gh-3111 - Allow to rebootstrap a replica from a read-only master -- 2.11.0