From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 8135B26534 for ; Fri, 15 Jun 2018 08:48:14 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id PseZd1oiijy3 for ; Fri, 15 Jun 2018 08:48:14 -0400 (EDT) Received: from smtp34.i.mail.ru (smtp34.i.mail.ru [94.100.177.94]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 94CBA264E9 for ; Fri, 15 Jun 2018 08:48:13 -0400 (EDT) From: AKhatskevich Subject: [tarantool-patches] [PATCH 2/2] Fix discovery/reconfigure race Date: Fri, 15 Jun 2018 15:47:59 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: v.shpilevoy@tarantool.org, tarantool-patches@freelists.org This commit prevents discovery fiber from discovering old replicasets and spoiling `route_map`. --- test/router/router.result | 62 +++++++++++++++++++++++++++++++++++++++++++++ test/router/router.test.lua | 42 ++++++++++++++++++++++++++++++ vshard/router/init.lua | 15 ++++++++++- 3 files changed, 118 insertions(+), 1 deletion(-) diff --git a/test/router/router.result b/test/router/router.result index 5643f3e..e61505e 100644 --- a/test/router/router.result +++ b/test/router/router.result @@ -1095,6 +1095,68 @@ for bucket, old_rs in pairs(bucket_to_old_rs) do end; --- ... +-- +-- Check route_map is not filled with old replica objects after +-- recpnfigure. +-- +-- Perform #replicasets phases of discovery, to update replicasets +-- object in for loop of discovery fiber since previous cfg. +for _, __ in pairs(vshard.router.internal.replicasets) do + vshard.router.discovery_wakeup() + fiber.sleep(0.02) +end; +--- +... +-- Simulate long `callro`. +-- Stuck on first rs in replicasets. +vshard.router.internal.errinj.LONG_DISCOVERY = true; +--- +... +for _, __ in pairs(vshard.router.internal.replicasets) do + vshard.router.discovery_wakeup() + fiber.sleep(0.02) +end; +--- +... +vshard.router.cfg(cfg); +--- +... +vshard.router.internal.errinj.LONG_DISCOVERY = nil; +--- +... +-- Do discovery iteration. +vshard.router.discovery_wakeup() +fiber.sleep(0.02) + +rs_cnt = 0; +--- +... +new_replicasets = {} +for _, rs in pairs(vshard.router.internal.replicasets) do + new_replicasets[rs] = true + rs_cnt = rs_cnt + 1 +end; +--- +... +rs_cnt; +--- +- 2 +... +bucket_cnt = 0; +--- +... +for bucket_id, rs in pairs(vshard.router.internal.route_map) do + if not new_replicasets[rs] then + error('Old object added to route_map.') + end + bucket_cnt = bucket_cnt + 1 +end; +--- +... +bucket_cnt; +--- +- 3000 +... test_run:cmd("setopt delimiter ''"); --- - true diff --git a/test/router/router.test.lua b/test/router/router.test.lua index 106f3d8..528a84b 100644 --- a/test/router/router.test.lua +++ b/test/router/router.test.lua @@ -411,6 +411,48 @@ for bucket, old_rs in pairs(bucket_to_old_rs) do error("route_map was not updataed.") end end; + +-- +-- Check route_map is not filled with old replica objects after +-- recpnfigure. +-- + +-- Perform #replicasets phases of discovery, to update replicasets +-- object in for loop of discovery fiber since previous cfg. +for _, __ in pairs(vshard.router.internal.replicasets) do + vshard.router.discovery_wakeup() + fiber.sleep(0.02) +end; +-- Simulate long `callro`. +-- Stuck on first rs in replicasets. +vshard.router.internal.errinj.LONG_DISCOVERY = true; +for _, __ in pairs(vshard.router.internal.replicasets) do + vshard.router.discovery_wakeup() + fiber.sleep(0.02) +end; + +vshard.router.cfg(cfg); +vshard.router.internal.errinj.LONG_DISCOVERY = nil; +-- Do discovery iteration. +vshard.router.discovery_wakeup() +fiber.sleep(0.02) + +rs_cnt = 0; +new_replicasets = {} +for _, rs in pairs(vshard.router.internal.replicasets) do + new_replicasets[rs] = true + rs_cnt = rs_cnt + 1 +end; +rs_cnt; +bucket_cnt = 0; +for bucket_id, rs in pairs(vshard.router.internal.route_map) do + if not new_replicasets[rs] then + error('Old object added to route_map.') + end + bucket_cnt = bucket_cnt + 1 +end; +bucket_cnt; + test_run:cmd("setopt delimiter ''"); _ = test_run:cmd("switch default") diff --git a/vshard/router/init.lua b/vshard/router/init.lua index 7e765fa..df5b343 100644 --- a/vshard/router/init.lua +++ b/vshard/router/init.lua @@ -127,10 +127,23 @@ local function discovery_f(module_version) local iterations_until_lua_gc = consts.COLLECT_LUA_GARBAGE_INTERVAL / consts.DISCOVERY_INTERVAL while module_version == M.module_version do - for _, replicaset in pairs(M.replicasets) do + local old_replicasets = M.replicasets + for rs_uuid, replicaset in pairs(M.replicasets) do local active_buckets, err = replicaset:callro('vshard.storage.buckets_discovery', {}, {timeout = 2}) + while M.errinj.LONG_DISCOVERY do + -- Stuck on the first replicaset. + if rs_uuid ~= select(1, next(M.replicasets)) then + break + end + lfiber.sleep(0.01) + end + -- Renew replicasets object in case of reconfigure + -- and reload events. + if M.replicasets ~= old_replicasets then + break + end if not active_buckets then log.error('Error during discovery %s: %s', replicaset, err) else -- 2.14.1