From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 2F3C5267FA for ; Mon, 30 Jul 2018 07:55:34 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id B2vOvv0j6qkx for ; Mon, 30 Jul 2018 07:55:34 -0400 (EDT) Received: from smtp14.mail.ru (smtp14.mail.ru [94.100.181.95]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id DFF07267B0 for ; Mon, 30 Jul 2018 07:55:33 -0400 (EDT) Subject: [tarantool-patches] Re: [PATCH 1/4] Fix races related to object outdating References: <18f2ede05fa4a77bf0bd2abb64c25df0e3c574d6.1532940401.git.avkhatskevich@tarantool.org> From: Vladislav Shpilevoy Message-ID: <4a8e9e20-561d-6896-ea8c-8517add2bc50@tarantool.org> Date: Mon, 30 Jul 2018 14:55:31 +0300 MIME-Version: 1.0 In-Reply-To: <18f2ede05fa4a77bf0bd2abb64c25df0e3c574d6.1532940401.git.avkhatskevich@tarantool.org> Content-Type: text/plain; charset=utf-8; format=flowed Content-Language: en-US Content-Transfer-Encoding: 7bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org, AKhatskevich Hi! Thanks for the patch! See 1 comment below and fixes on the branch in a separate commit. Please, look and squash. On 30/07/2018 11:56, AKhatskevich wrote: > Reload/reconfigure may replace many of M fields during any yield. > Old objects should not be accessed after they are outdated. > > This commit handles such cases within `vshard.router`. > --- > vshard/replicaset.lua | 30 ++++++++++++++----------- > vshard/router/init.lua | 58 +++++++++++++++++++++++++++++-------------------- > vshard/storage/init.lua | 1 + > 3 files changed, 52 insertions(+), 37 deletions(-) > > diff --git a/vshard/router/init.lua b/vshard/router/init.lua > index 142ddb6..1a0ed2f 100644 > --- a/vshard/router/init.lua > +++ b/vshard/router/init.lua > @@ -88,15 +94,18 @@ local function bucket_discovery(bucket_id) > log.verbose("Discovering bucket %d", bucket_id) > local last_err = nil > local unreachable_uuid = nil > - for uuid, replicaset in pairs(M.replicasets) do > - local _, err = > - replicaset:callrw('vshard.storage.bucket_stat', {bucket_id}) > - if err == nil then > - bucket_set(bucket_id, replicaset) > - return replicaset > - elseif err.code ~= lerror.code.WRONG_BUCKET then > - last_err = err > - unreachable_uuid = uuid > + for uuid, _ in pairs(M.replicasets) do > + -- Handle reload/reconfigure. > + replicaset = M.replicasets[uuid] > + if replicaset then > + local _, err = > + replicaset:callrw('vshard.storage.bucket_stat', {bucket_id}) > + if err == nil then > + return bucket_set(bucket_id, replicaset.uuid) Do not return error immediately. You can continue iteration in the hope of finding the bucket out on one of next replicasets. So here you do 'if bucket_set ~= nil then return result end'. Else continue. > + elseif err.code ~= lerror.code.WRONG_BUCKET then > + last_err = err > + unreachable_uuid = uuid > + end > end > end > local err = nil