From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpng1.m.smailru.net (smtpng1.m.smailru.net [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id F06AF4765E0 for ; Fri, 25 Dec 2020 15:44:04 +0300 (MSK) References: <20f64588aedf55188d368a789959d66bb04a0bc7.1608842218.git.artemreyt@tarantool.org> From: Sergey Bronnikov Message-ID: <0c744961-1c5d-ba15-8260-49ce71325e77@tarantool.org> Date: Fri, 25 Dec 2020 15:44:03 +0300 MIME-Version: 1.0 In-Reply-To: <20f64588aedf55188d368a789959d66bb04a0bc7.1608842218.git.artemreyt@tarantool.org> Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-Language: en-US Subject: Re: [Tarantool-patches] [PATCH v4] lua: fix running init lua script List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Artem Starshov , Alexander Turenko Cc: tarantool-patches@dev.tarantool.org Artem, thanks for the patch! LGTM On 24.12.2020 23:41, Artem Starshov wrote: > When tarantool launched with -e flag and in > script after there is an error, program hangs. > This happens because shed fiber launches separate > fiber for init user script and starts auxiliary > event loop. It's supposed that fiber will stop > this loop, but in case of error in script, fiber > tries to stop a loop when the last one isn't > started yet. > > Added a flag, which will watch is loop started and > when fiber tries to call `ev_break()` we can be sure > that loop is running already. > > Fixes #4983 > --- > @ChangeLog: Fixed a hang when tarantool launched with the -e option and users > script throws an error (`tarantool -e 'assert(false)'`) (gh-4983). > > Changes in v2: > - Test commit squashed with code fix commit; > - Fixed comments in test; > - Fixed commit message (no stairs); > - Closes -> Fixes; > - Removed small bugs in test; > - Moved test to tap.test(name, func). > > Changes in v3: > - Added more information to change log; > - 'src/lua` -> 'lua' for patch name; > - Code-style fixes in test. > > Changes in v4: > - Set timeout for operations like for cond_var wait in test-run > - Added module `process_timeout` in test/app-tap/lua for further using > > Branch: https://github.com/tarantool/tarantool/tree/artemreyt/gh-4983-tnt-e-assert-false-hangs > Issue: https://github.com/tarantool/tarantool/issues/4983 > > src/lua/init.c | 9 +++ > .../gh-4983-tnt-e-assert-false-hangs.test.lua | 46 +++++++++++++++ > test/app-tap/lua/process_timeout.lua | 57 +++++++++++++++++++ > test/app-tap/suite.ini | 2 +- > 4 files changed, 113 insertions(+), 1 deletion(-) > create mode 100755 test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua > create mode 100644 test/app-tap/lua/process_timeout.lua > > diff --git a/src/lua/init.c b/src/lua/init.c > index a0b2fc775..92bdb82c0 100644 > --- a/src/lua/init.c > +++ b/src/lua/init.c > @@ -569,6 +569,7 @@ run_script_f(va_list ap) > * never really dead. It never returns from its function. > */ > struct diag *diag = va_arg(ap, struct diag *); > + bool aux_loop_is_run = false; > > /* > * Load libraries and execute chunks passed by -l and -e > @@ -611,6 +612,7 @@ run_script_f(va_list ap) > * loop and re-schedule this fiber. > */ > fiber_sleep(0.0); > + aux_loop_is_run = true; > > if (path && strcmp(path, "-") != 0 && access(path, F_OK) == 0) { > /* Execute script. */ > @@ -650,6 +652,13 @@ run_script_f(va_list ap) > * return control back to tarantool_lua_run_script. > */ > end: > + /* > + * Auxiliary loop in tarantool_lua_run_script > + * should start (ev_run()) before this fiber > + * invokes ev_break(). > + */ > + if (!aux_loop_is_run) > + fiber_sleep(0.0); > ev_break(loop(), EVBREAK_ALL); > return 0; > > diff --git a/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua b/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua > new file mode 100755 > index 000000000..891a56ade > --- /dev/null > +++ b/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua > @@ -0,0 +1,46 @@ > +#!/usr/bin/env tarantool > + > +local common = require('process_timeout') > +local tap = require('tap') > +local fio = require('fio') > + > +-- > +-- gh-4983: tarantool -e 'assert(false)' hangs > +-- > + > +local TARANTOOL_PATH = arg[-1] > +local output_file = fio.abspath('out.txt') > +local line = ('%s -e "assert(false)" > %s 2>&1 & echo $!'): > + format(TARANTOOL_PATH, output_file) > + > +-- Like a default timeout for `cond_wait` in test-run > +local process_waiting_timeout = 60.0 > +local file_read_timeout = 60.0 > +local file_open_timeout = 60.0 > +local file_read_interval = 0.2 > + > +local res = tap.test('gh-4983-tnt-e-assert-false-hangs', function(test) > + test:plan(2) > + > + local pid = tonumber(io.popen(line):read('*line')) > + assert(pid, 'pid of proccess can\'t be recieved') > + > + local process_completed = common.wait_process_completion(pid, > + process_waiting_timeout) > + > + test:ok(process_completed, > + ('tarantool process with pid = %d completed'):format(pid)) > + > + -- Kill process if hangs. > + if not process_completed then ffi.C.kill(pid, 9) end > + > + local fh = common.open_with_timeout(output_file, file_open_timeout) > + assert(fh, 'error while opening ' .. output_file) > + > + local data = common.read_with_timeout(fh, file_read_timeout, file_read_interval) > + test:like(data, 'assertion failed', 'assertion failure is displayed') > + > + fh:close() > +end) > + > +os.exit(res and 0 or 1) > diff --git a/test/app-tap/lua/process_timeout.lua b/test/app-tap/lua/process_timeout.lua > new file mode 100644 > index 000000000..141fb0e43 > --- /dev/null > +++ b/test/app-tap/lua/process_timeout.lua > @@ -0,0 +1,57 @@ > +local fiber = require('fiber') > +local clock = require('clock') > +local ffi = require('ffi') > +local fio = require('fio') > +local errno = require('errno') > + > +-- For process_is_alive. > +ffi.cdef([[ > + int kill(pid_t pid, int sig); > +]]) > + > +--- Verify whether a process is alive. > +local function process_is_alive(pid) > + local rc = ffi.C.kill(pid, 0) > + return rc == 0 or errno() ~= errno.ESRCH > +end > + > +--- Returns true if process completed before timeout, false otherwise. > +local function wait_process_completion(pid, timeout) > + local start_time = clock.monotonic() > + local process_completed = false > + while clock.monotonic() - start_time < timeout do > + if not process_is_alive(pid) then > + process_completed = true > + break > + end > + end > + return process_completed > +end > + > +--- Open file on reading with timeout. > +local function open_with_timeout(filename, timeout) > + local fh > + local start_time = clock.monotonic() > + while not fh and clock.monotonic() - start_time < timeout do > + fh = fio.open(filename, {'O_RDONLY'}) > + end > + return fh > +end > + > +--- Try to read from file with timeout with interval. > +local function read_with_timeout(fh, timeout, interval) > + local data = '' > + local start_time = clock.monotonic() > + while #data == 0 and clock.monotonic() - start_time < timeout do > + data = fh:read() > + if #data == 0 then fiber.sleep(interval) end > + end > + return data > +end > + > +return { > + process_is_alive = process_is_alive, > + wait_process_completion = wait_process_completion, > + open_with_timeout = open_with_timeout, > + read_with_timeout = read_with_timeout > +} > diff --git a/test/app-tap/suite.ini b/test/app-tap/suite.ini > index 2f16128f9..6e27f88d6 100644 > --- a/test/app-tap/suite.ini > +++ b/test/app-tap/suite.ini > @@ -1,7 +1,7 @@ > [default] > core = app > description = application server tests (TAP) > -lua_libs = lua/require_mod.lua lua/serializer_test.lua > +lua_libs = lua/require_mod.lua lua/serializer_test.lua lua/process_timeout.lua > is_parallel = True > pretest_clean = True > use_unix_sockets_iproto = True