From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp39.i.mail.ru (smtp39.i.mail.ru [94.100.177.99]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id C110D469710 for ; Tue, 12 May 2020 18:59:52 +0300 (MSK) Date: Tue, 12 May 2020 15:59:51 +0000 From: Nikita Pettik Message-ID: <20200512155951.GB12225@tarantool.org> References: <79b23da4-1ba5-18ac-6651-bab04a564fd9@tarantool.org> <20200512141456.GA12225@tarantool.org> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20200512141456.GA12225@tarantool.org> Subject: Re: [Tarantool-patches] [PATCH v4 0/2] vinyl: fix uninitialized memory accesses List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Vladislav Shpilevoy Cc: tarantool-patches@dev.tarantool.org On 12 May 14:14, Nikita Pettik wrote: > On 10 May 21:49, Vladislav Shpilevoy wrote: > > Hi! Thanks for the patchset! > > > > LGTM. > > I found that test occasionally fails. For instance: > https://gitlab.com/tarantool/tarantool/-/jobs/548447188 > > So I have to push fix below to make it stable. In a nutshell we > can't rely on range/count correlation if we want to avoid any > possible races: ranges/runs are updated during compaction, > meanwhile in fact we should wait till compaction is completed > (since due to fails/error some results may turn out to be rollbacked). > > It seems pretty straightforward, so hope you don't mind this fix. Pushed to 1.10 both patches. Pushed to 2.3, 2.4 and master first patch and a test extracted from second patch (since problem is already fixed on master branch via 2f17c92). Updated changelogs correspondingly. Branch is dropped. Before that I've verified that CI statuses are OK. > diff --git a/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua b/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua > index 547ab628e..4b3c55505 100644 > --- a/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua > +++ b/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua > @@ -14,12 +14,18 @@ function dump(big) > box.snapshot() > end; > > -function compact() > +-- Tuple clean-up takes place after compaction is completed. > +-- Meanwhile range count is updated during compaction process. > +-- So instead of relying on range/run match, let's check explicitly > +-- number of completed tasks. > +-- > +function compact(tasks_expected) > + local scheduler = box.stat.vinyl().scheduler > + local tasks_completed = scheduler.tasks_completed > s.index.pk:compact() > repeat > fiber.sleep(0.001) > - local info = s.index.pk:stat() > - until info.range_count == info.run_count > + until box.stat.vinyl().scheduler.tasks_completed >= tasks_completed + tasks_expected > end; > test_run:cmd("setopt delimiter ''"); > > @@ -32,7 +38,7 @@ dump() > assert(s.index.pk:stat().range_count == 1) > assert(s.index.pk:stat().run_count == 2) > > -compact() > +compact(1) > assert(s.index.pk:stat().range_count == 1) > assert(s.index.pk:stat().run_count == 1) > > @@ -46,7 +52,7 @@ errinj.set('ERRINJ_VY_STMT_ALLOC', 0) > -- Still split_range() fails, as a result we get one range > -- instead two. > -- > -compact() > +compact(1) > assert(s.index.pk:stat().range_count == 1) > assert(s.index.pk:stat().run_count == 1) > assert(errinj.get('ERRINJ_VY_STMT_ALLOC') == -1) > @@ -63,7 +69,7 @@ _ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size > dump(true) > dump() > > -compact() > +compact(1) > > dump() > > @@ -72,7 +78,7 @@ errinj.set('ERRINJ_VY_STMT_ALLOC', 5) > -- Compaction of first range fails, so it is re-scheduled and > -- then successfully finishes at the second attempt. > -- > -compact() > +compact(2) > assert(s.index.pk:stat().range_count == 2) > assert(s.index.pk:stat().run_count == 2) > assert(errinj.get('ERRINJ_VY_STMT_ALLOC') == -1) > @@ -92,13 +98,13 @@ _ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size > dump(true) > dump() > > -compact() > +compact(1) > > dump() > > errinj = box.error.injection > errinj.set('ERRINJ_VY_READ_VIEW_MERGE_FAIL', true) > -compact() > +compact(2) > assert(s.index.pk:stat().range_count == 2) > assert(s.index.pk:stat().run_count == 2) > assert(errinj.get('ERRINJ_VY_READ_VIEW_MERGE_FAIL') == false) > @@ -117,7 +123,7 @@ _ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size > dump(true) > dump() > > -compact() > +compact(1) > > dump() > assert(s.index.pk:stat().range_count == 1) > @@ -125,14 +131,7 @@ assert(s.index.pk:stat().run_count == 2) > > errinj.set('ERRINJ_VY_WRITE_ITERATOR_START_FAIL', true) > errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0.1) > -tasks_completed = box.stat.vinyl().scheduler.tasks_completed > -s.index.pk:compact() > --- Tuple clean-up takes place after compaction is completed. > --- Meanwhile range count is updated during compaction process. > --- So instead of relying on range/run match, let's check explicitly > --- number of completed tasks. > --- > -repeat fiber.sleep(0.001) until box.stat.vinyl().scheduler.tasks_completed >= tasks_completed + 1 > +compact(2) > >