From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 98DA16EC40; Thu, 3 Jun 2021 00:34:37 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 98DA16EC40 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1622669677; bh=XJ5ozGJaF19utNs3et06LpPLAtUpmGEZ91R2ju7a6QU=; h=To:Date:Subject:List-Id:List-Unsubscribe:List-Archive:List-Post: List-Help:List-Subscribe:From:Reply-To:From; b=Lg3wBdRwlMfQK/L6K6jJra7wypuYuAC/DJJXpT53T+aAtVAi27edPElMNF6zhNy5z OeKW6oxf+Y6Z55oH+Ejcs3iNHGBhK4UuC8hukIsLJVMo/Y5dnPbVTsABs2AmN2a6SZ 4+ypevzwjlxeVXu4HdZGR8JXvEhXA9Y+nNrUJibY= Received: from smtp61.i.mail.ru (smtp61.i.mail.ru [217.69.128.41]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 36F456EC40 for ; Thu, 3 Jun 2021 00:34:32 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 36F456EC40 Received: by smtp61.i.mail.ru with esmtpa (envelope-from ) id 1loYVT-0004Cu-2i; Thu, 03 Jun 2021 00:34:31 +0300 To: tarantool-patches@dev.tarantool.org, yaroslav.dynnikov@tarantool.org, olegrok@tarantool.org Date: Wed, 2 Jun 2021 23:34:29 +0200 Message-Id: X-Mailer: git-send-email 2.24.3 (Apple Git-128) MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-7564579A: 646B95376F6C166E X-77F55803: 4F1203BC0FB41BD9D5B0DA836B685C543EF5F9E25E4001B3518B676B8BE4A4C7182A05F5380850401B7CFD8379238396071A2DA4CF6FF44997D388EBB315D12AAE7FD6DAE7DB5206 X-7FA49CB5: FF5795518A3D127A4AD6D5ED66289B5278DA827A17800CE73C871DD2182510D5EA1F7E6F0F101C67BD4B6F7A4D31EC0BCC500DACC3FED6E28638F802B75D45FF8AA50765F790063725FA9CD6081C82518638F802B75D45FF36EB9D2243A4F8B5A6FCA7DBDB1FC311F39EFFDF887939037866D6147AF826D8F64071289A5E7FDA70A2721E3B37C0F4117882F4460429724CE54428C33FAD305F5C1EE8F4F765FC09CDF8F23AD6196DA471835C12D1D9774AD6D5ED66289B52BA9C0B312567BB23117882F44604297287769387670735200AC5B80A05675ACD28451B159A507268D2E47CDBA5A96583BA9C0B312567BB2376E601842F6C81A19E625A9149C048EEC24E1E72F37C03A028F6BDBBAB179F4ED8FC6C240DEA7642DBF02ECDB25306B2B78CF848AE20165D0A6AB1C7CE11FEE3E753FA5741D1AD0203F1AB874ED89028C4224003CC836476EA7A3FFF5B025636E2021AF6380DFAD1A18204E546F3947CB11811A4A51E3B096D1867E19FE1407959CC434672EE6371089D37D7C0E48F6C8AA50765F79006373BC478629CBEC79DEFF80C71ABB335746BA297DBC24807EABDAD6C7F3747799A X-B7AD71C0: AC4F5C86D027EB782CDD5689AFBDA7A24209795067102C07E8F7B195E1C97831725846AC59DC687A60D789705920FE00 X-C1DE0DAB: C20DE7B7AB408E4181F030C43753B8183A4AFAF3EA6BDC44671AA518CC42EA90DBB9428EB2128D674B053E82B3DFC5D6C9239B022A2193E19C2B6934AE262D3EE7EAB7254005DCED0466EEEF578295AF1E0A4E2319210D9B64D260DF9561598F01A9E91200F654B06CE7B4E551862B828E8E86DC7131B365E7726E8460B7C23C X-C8649E89: 4E36BF7865823D7055A7F0CF078B5EC49A30900B95165D34324A9840C798D500CB9F78347F2E14003738EBE3C01AA87653D6FF2A133ED83357A3AC7023F6E99B1D7E09C32AA3244C58962512EA1675845B3D8B7C0077182739C99C45E8D137E9FACE5A9C96DEB163 X-D57D3AED: 3ZO7eAau8CL7WIMRKs4sN3D3tLDjz0dLbV79QFUyzQ2Ujvy7cMT6pYYqY16iZVKkSc3dCLJ7zSJH7+u4VD18S7Vl4ZUrpaVfd2+vE6kuoey4m4VkSEu530nj6fImhcD4MUrOEAnl0W826KZ9Q+tr5ycPtXkTV4k65bRjmOUUP8cvGozZ33TWg5HZplvhhXbhDGzqmQDTd6OAevLeAnq3Ra9uf7zvY2zzsIhlcp/Y7m53TZgf2aB4JOg4gkr2bioj+mfSpkNmA2r9MmMKEAIhBA== X-Mailru-Sender: 504CC1E875BF3E7D9BC0E5172ADA311028CAE3B036F8C4D24342A8D01290B1198B323A312B9E97B107784C02288277CA03E0582D3806FB6A5317862B1921BA260ED6CFD6382C13A6112434F685709FCF0DA7A0AF5A3A8387 X-Mras: Ok Subject: [Tarantool-patches] [PATCH vshard 1/1] recovery: relax recovery messages verbosity X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Vladislav Shpilevoy via Tarantool-patches Reply-To: Vladislav Shpilevoy Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" Recovery fiber on the storages used to print messages about starting recovery even when no recovery was needed yet: Starting ... buckets recovery step Finish bucket recovery step It happened a lot during rebalancing even if it worked fine. Because there appear receiving/sending buckets, and recovery double-checks if they are really transferring, not stuck. The patch makes recovery fiber not account the buckets, whose transfer is actually in progress, as broken. Hence it won't print the recovery messages anymore unless the transfer was really interrupted. Along with that the recovery now prints more details about the first bucket which triggered the real recovery. Closes #274 --- Branch: http://github.com/tarantool/vshard/tree/gerold103/gh-274-user-friendly-recovery Issue: https://github.com/tarantool/vshard/issues/274 vshard/storage/init.lua | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua index 7045d91..8a019fa 100644 --- a/vshard/storage/init.lua +++ b/vshard/storage/init.lua @@ -736,21 +736,25 @@ local function recovery_step_by_type(type) local is_empty = true local recovered = 0 local total = 0 + local start_format = 'Starting %s buckets recovery step' for _, bucket in _bucket.index.status:pairs(type) do total = total + 1 local bucket_id = bucket.id if M.rebalancer_transfering_buckets[bucket_id] then goto continue end - if is_empty then - log.info('Starting %s buckets recovery step', type) - end - is_empty = false assert(bucket_is_transfer_in_progress(bucket)) - local destination = M.replicasets[bucket.destination] + local peer_uuid = bucket.destination + local destination = M.replicasets[peer_uuid] if not destination or not destination.master then -- No replicaset master for a bucket. Wait until it -- appears. + if is_empty then + log.info(start_format, type) + log.error('Can not find for bucket %s its peer %s', bucket_id, + peer_uuid) + is_empty = false + end goto continue end local remote_bucket, err = @@ -759,6 +763,15 @@ local function recovery_step_by_type(type) -- not be used to recovery anything. Try later. if not remote_bucket and (not err or err.type ~= 'ShardingError' or err.code ~= lerror.code.WRONG_BUCKET) then + if is_empty then + if err == nil then + err = 'unknown' + end + log.info(start_format, type) + log.error('Error during recovery of bucket %s on replicaset '.. + '%s: %s', bucket_id, peer_uuid, err) + is_empty = false + end goto continue end -- Do nothing until the bucket on both sides stopped @@ -772,13 +785,20 @@ local function recovery_step_by_type(type) if not bucket or not bucket_is_transfer_in_progress(bucket) then goto continue end + if is_empty then + log.info(start_format, type) + end if recovery_local_bucket_is_garbage(bucket, remote_bucket) then _bucket:update({bucket_id}, {{'=', 2, consts.BUCKET.GARBAGE}}) recovered = recovered + 1 elseif recovery_local_bucket_is_active(bucket, remote_bucket) then _bucket:replace({bucket_id, consts.BUCKET.ACTIVE}) recovered = recovered + 1 + elseif is_empty then + log.info('Bucket %s is %s local and %s on replicaset %s, waiting', + bucket_id, bucket.status, remote_bucket.status, peer_uuid) end + is_empty = false ::continue:: end if not is_empty then -- 2.24.3 (Apple Git-128)