From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp42.i.mail.ru (smtp42.i.mail.ru [94.100.177.102]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id BCCA145C304 for ; Mon, 30 Nov 2020 00:41:52 +0300 (MSK) Content-Type: text/plain; charset=utf-8 Mime-Version: 1.0 (Mac OS X Mail 13.4 \(3608.120.23.2.4\)) From: Sergey Ostanevich In-Reply-To: <12e5f150-8e08-1004-ad8a-c6bd1a04fd5f@tarantool.org> Date: Mon, 30 Nov 2020 00:41:50 +0300 Content-Transfer-Encoding: quoted-printable Message-Id: <8BC41E7D-1610-4266-BAFD-3775A9090B64@tarantool.org> References: <20201031162911.61876-1-sergos@tarantool.org> <12e5f150-8e08-1004-ad8a-c6bd1a04fd5f@tarantool.org> Subject: Re: [Tarantool-patches] [PATCH v2] core: handle fiber cancellation for fiber.cond List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Vladislav Shpilevoy Cc: tarantool-patches@dev.tarantool.org, alexander.turenko@tarantool.org Hi! Thank you for review! Force-pushed the sergos/gh-5013-fiber-cond branch, new patches are = inline. Sergos > On 26 Nov 2020, at 00:32, Vladislav Shpilevoy = wrote: >=20 > Hi! Thanks for the fixes! >=20 > Technically the patch is good! See 3 non-technical comments below. >=20 >> diff --git a/src/box/relay.cc b/src/box/relay.cc >> index b68b45e00..a7bc2c6f7 100644 >> --- a/src/box/relay.cc >> +++ b/src/box/relay.cc >> @@ -780,6 +771,16 @@ relay_subscribe_f(va_list ap) >> cbus_endpoint_destroy(&relay->endpoint, cbus_process); >>=20 >> relay_exit(relay); >> + >> + /* >> + * Log the error that caused the relay to break the loop. >> + * Don't clear the error for status reporting. >> + */ >> + assert(!diag_is_empty(&relay->diag)); >> + diag_set_error(diag_get(), diag_last_error(&relay->diag)); >> + diag_log(); >> + say_crit("exiting the relay loop"); >> + >=20 > 1. I suggest you to move this to a separate commit, before you > change fiber cond behaviour. Because it seems it is not related > to conds really. And even if it would be related, still the > issue looks like a separate bug. I failed to make a test that is differs from the fiber_cond setting a diag. So I put it is as a preparation step and mentioned it in=20 commit message.=20 =E2=80=94=E2=80=94- The fiber_cond_wait() will set an error in case fiber is cancelled. As a result, the current diag in the fiber can be reset during the wal_clear_watcher(). To prevent such overwrite the diag copy from the relay into current fiber is moved to the exit of the relay_subscribe_f(). Part of #5013 --- src/box/relay.cc | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/box/relay.cc b/src/box/relay.cc index 1e77e0d9b..f62e925af 100644 --- a/src/box/relay.cc +++ b/src/box/relay.cc @@ -753,15 +753,6 @@ relay_subscribe_f(va_list ap) if (!relay->replica->anon) relay_send_is_raft_enabled(relay, &raft_enabler, false); - /* - * Log the error that caused the relay to break the loop. - * Don't clear the error for status reporting. - */ - assert(!diag_is_empty(&relay->diag)); - diag_set_error(diag_get(), diag_last_error(&relay->diag)); - diag_log(); - say_crit("exiting the relay loop"); - /* * Clear garbage collector trigger and WAL watcher. * trigger_clear() does nothing in case the triggers @@ -780,6 +771,16 @@ relay_subscribe_f(va_list ap) cbus_endpoint_destroy(&relay->endpoint, cbus_process); relay_exit(relay); + + /* + * Log the error that caused the relay to break the loop. + * Don't clear the error for status reporting. + */ + assert(!diag_is_empty(&relay->diag)); + diag_set_error(diag_get(), diag_last_error(&relay->diag)); + diag_log(); + say_crit("exiting the relay loop"); + return -1; } -- >=20 >> return -1; >> } >> diff --git a/test/app-tap/gh-5013-fiber-cancel.test.lua = b/test/app-tap/gh-5013-fiber-cancel.test.lua >> new file mode 100755 >> index 000000000..ca4ca2c90 >> --- /dev/null >> +++ b/test/app-tap/gh-5013-fiber-cancel.test.lua >> @@ -0,0 +1,23 @@ >> +#!/usr/bin/env tarantool >> + >> +local tap =3D require('tap') >> +local fiber =3D require('fiber') >> +local test =3D tap.test("gh-5013-fiber-cancel") >> + >> +test:plan(2) >> + >> +local result =3D {} >> + >> +function test_f() >> + local cond =3D fiber.cond() >> + local res, err =3D pcall(cond.wait, cond) >> + result.res =3D res >> + result.err =3D err >> +end >> + >> +local f =3D fiber.create(test_f) >> +f:cancel() >> +fiber.yield() >> + >> +test:ok(result.res =3D=3D false, 'expected result is false') >> +test:ok(tostring(result.err) =3D=3D 'fiber is cancelled', 'fiber = cancellation should be reported') >=20 > 2. I think you are also supposed to call os.exit with test:check() > like other tap tests do. Otherwise it probably always ends with 0 > code, and won't work properly when we will make tap tests non-diff > based. Done.=20 >=20 >> diff --git a/test/box/gh-4834-netbox-fiber-cancel.result = b/test/box/gh-4834-netbox-fiber-cancel.result >> new file mode 100644 >> index 000000000..cb631995c >> --- /dev/null >> +++ b/test/box/gh-4834-netbox-fiber-cancel.result >> @@ -0,0 +1,62 @@ >> +-- test-run result file version 2 >> +remote =3D require('net.box') >> + | --- >> + | ... >> +fiber =3D require('fiber') >> + | --- >> + | ... >> +test_run =3D require('test_run').new() >> + | --- >> + | ... >> + >> +-- #4834: Cancelling fiber doesn't interrupt netbox operations >> +function infinite_call() fiber.channel(1):get() end >=20 > 3. I noticed you never finish the fiber. So it hangs there forever > or until a restart. Better not leave any test artifacts and finish > it completely. Just save the channel to a global variable and put > something into it in the end. The channel is created in the remote execution - I just kept it until the end of the routine and closed it.=20 =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D Before this patch fiber.cond():wait() just returns for cancelled fiber. In contrast fiber.channel():get() throws "fiber is canceled" error. This patch unifies behaviour of channels and condvars. It also fixes a related net.box module problem #4834 since fiber.cond now performs test for fiber cancellation. Closes #4834 Closes #5013 Co-authored-by: Oleg Babin @TarantoolBot document Title: fiber.cond():wait() throws if fiber is cancelled Currently fiber.cond():wait() throws an error if waiting fiber is cancelled. --- src/box/box.cc | 4 -- src/lib/core/fiber_cond.c | 4 ++ src/lib/core/fiber_cond.h | 2 +- test/app-tap/gh-5013-fiber-cancel.test.lua | 26 ++++++++ test/box/gh-4834-netbox-fiber-cancel.result | 62 +++++++++++++++++++ test/box/gh-4834-netbox-fiber-cancel.test.lua | 28 +++++++++ 6 files changed, 121 insertions(+), 5 deletions(-) create mode 100755 test/app-tap/gh-5013-fiber-cancel.test.lua create mode 100644 test/box/gh-4834-netbox-fiber-cancel.result create mode 100644 test/box/gh-4834-netbox-fiber-cancel.test.lua diff --git a/src/box/box.cc b/src/box/box.cc index 1f7dec362..e02be2c6f 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -307,10 +307,6 @@ box_wait_ro(bool ro, double timeout) while (is_box_configured =3D=3D false || box_is_ro() !=3D ro) { if (fiber_cond_wait_deadline(&ro_cond, deadline) !=3D 0) return -1; - if (fiber_is_cancelled()) { - diag_set(FiberIsCancelled); - return -1; - } } return 0; } diff --git a/src/lib/core/fiber_cond.c b/src/lib/core/fiber_cond.c index 904a350d9..71bb2d04d 100644 --- a/src/lib/core/fiber_cond.c +++ b/src/lib/core/fiber_cond.c @@ -108,6 +108,10 @@ fiber_cond_wait_timeout(struct fiber_cond *c, = double timeout) diag_set(TimedOut); return -1; } + if (fiber_is_cancelled()) { + diag_set(FiberIsCancelled); + return -1; + } return 0; } =20 diff --git a/src/lib/core/fiber_cond.h b/src/lib/core/fiber_cond.h index 87c6f2ca2..2662e0654 100644 --- a/src/lib/core/fiber_cond.h +++ b/src/lib/core/fiber_cond.h @@ -114,7 +114,7 @@ fiber_cond_broadcast(struct fiber_cond *cond); * @param cond condition * @param timeout timeout in seconds * @retval 0 on fiber_cond_signal() call or a spurious wake up - * @retval -1 on timeout, diag is set to TimedOut + * @retval -1 on timeout or fiber cancellation, diag is set */ int fiber_cond_wait_timeout(struct fiber_cond *cond, double timeout); diff --git a/test/app-tap/gh-5013-fiber-cancel.test.lua = b/test/app-tap/gh-5013-fiber-cancel.test.lua new file mode 100755 index 000000000..91c6775dc --- /dev/null +++ b/test/app-tap/gh-5013-fiber-cancel.test.lua @@ -0,0 +1,26 @@ +#!/usr/bin/env tarantool + +local tap =3D require('tap') +local fiber =3D require('fiber') +local test =3D tap.test("gh-5013-fiber-cancel") + +test:plan(2) + +local result =3D {} + +function test_f() + local cond =3D fiber.cond() + local res, err =3D pcall(cond.wait, cond) + result.res =3D res + result.err =3D err +end + +local f =3D fiber.create(test_f) +f:cancel() +fiber.yield() + +print(require('yaml').encode(result)) + +test:ok(result.res =3D=3D false, 'expected result is false') +test:ok(tostring(result.err) =3D=3D 'fiber is cancelled', 'fiber = cancellation should be reported') +os.exit(test:check() and 0 or 1) diff --git a/test/box/gh-4834-netbox-fiber-cancel.result = b/test/box/gh-4834-netbox-fiber-cancel.result new file mode 100644 index 000000000..5af404e82 --- /dev/null +++ b/test/box/gh-4834-netbox-fiber-cancel.result @@ -0,0 +1,62 @@ +-- test-run result file version 2 +remote =3D require('net.box') + | --- + | ... +fiber =3D require('fiber') + | --- + | ... +test_run =3D require('test_run').new() + | --- + | ... + +-- #4834: Cancelling fiber doesn't interrupt netbox operations +function infinite_call() local channel =3D fiber.channel(1) = pcall(channel:get()) channel.close() end + | --- + | ... +box.schema.func.create('infinite_call') + | --- + | ... +box.schema.user.grant('guest', 'execute', 'function', 'infinite_call') + | --- + | ... + +error_msg =3D nil + | --- + | ... +test_run:cmd("setopt delimiter ';'") + | --- + | - true + | ... +function netbox_runner() + local cn =3D remote.connect(box.cfg.listen) + local f =3D fiber.new(function() + _, error_msg =3D pcall(cn.call, cn, 'infinite_call') + end) + f:set_joinable(true) + fiber.yield() + f:cancel() + f:join() + cn:close() +end; + | --- + | ... +test_run:cmd("setopt delimiter ''"); + | --- + | - true + | ... +netbox_runner() + | --- + | ... +error_msg + | --- + | - fiber is cancelled + | ... +box.schema.func.drop('infinite_call') + | --- + | ... +infinite_call =3D nil + | --- + | ... +error_msg =3D nil + | --- + | ... diff --git a/test/box/gh-4834-netbox-fiber-cancel.test.lua = b/test/box/gh-4834-netbox-fiber-cancel.test.lua new file mode 100644 index 000000000..59963ba91 --- /dev/null +++ b/test/box/gh-4834-netbox-fiber-cancel.test.lua @@ -0,0 +1,28 @@ +remote =3D require('net.box') +fiber =3D require('fiber') +test_run =3D require('test_run').new() + +-- #4834: Cancelling fiber doesn't interrupt netbox operations +function infinite_call() local channel =3D fiber.channel(1) = pcall(channel:get()) channel.close() end +box.schema.func.create('infinite_call') +box.schema.user.grant('guest', 'execute', 'function', 'infinite_call') + +error_msg =3D nil +test_run:cmd("setopt delimiter ';'") +function netbox_runner() + local cn =3D remote.connect(box.cfg.listen) + local f =3D fiber.new(function() + _, error_msg =3D pcall(cn.call, cn, 'infinite_call') + end) + f:set_joinable(true) + fiber.yield() + f:cancel() + f:join() + cn:close() +end; +test_run:cmd("setopt delimiter ''"); +netbox_runner() +error_msg +box.schema.func.drop('infinite_call') +infinite_call =3D nil +error_msg =3D nil --=20 2.24.3 (Apple Git-128)