From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpng2.m.smailru.net (smtpng2.m.smailru.net [94.100.179.3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id CF88A45C304 for ; Fri, 18 Dec 2020 15:59:28 +0300 (MSK) References: <27bae760920c0fe2647dff3b537f320f9d59c488.1608134096.git.artemreyt@tarantool.org> From: Sergey Bronnikov Message-ID: Date: Fri, 18 Dec 2020 15:59:27 +0300 MIME-Version: 1.0 In-Reply-To: <27bae760920c0fe2647dff3b537f320f9d59c488.1608134096.git.artemreyt@tarantool.org> Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-Language: en-US Subject: Re: [Tarantool-patches] [PATCH v3] lua: fix running init lua script List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Artem Starshov , Alexander Turenko Cc: tarantool-patches@dev.tarantool.org Hello, Artem thanks for the patch! See 4 minor comments below. On 16.12.2020 18:57, Artem Starshov wrote: > When tarantool launched with -e flag and in > script after there is an error, programm hangs. 1. "program" > This happens because shed fiber launches separate > fiber for init user script and starts auxiliary > event loop. It's supposed that fiber will stop > this loop, but in case of error in script, fiber > tries to stop a loop when the last one isn't > started yet. > > Added a flag, which will watch is loop started and > when fiber tries to call `ev_break()` we can be sure > that loop is running already. > > Fixes #4983 > --- > @ChangeLog: Fixed a hang when tarantool launched with the -e option and users > script throws an error (`tarantool -e 'assert(false)'`) (gh-4983). > > Changes in v2: > - Test commit squashed with code fix commit; > - Fixed comments in test; > - Fixed commit message (no stairs); > - Closes -> Fixes; > - Removed small bugs in test; > - Moved test to tap.test(name, func). > > Changes in v3: > - Added more information to change log; > - 'src/lua` -> 'lua' for patch name; > - Code-style fixes in test. > > Branch: https://github.com/tarantool/tarantool/tree/artemreyt/gh-4983-tnt-e-assert-false-hangs > Issue: https://github.com/tarantool/tarantool/issues/4983 > > src/lua/init.c | 9 ++ > .../gh-4983-tnt-e-assert-false-hangs.test.lua | 92 +++++++++++++++++++ > 2 files changed, 101 insertions(+) > create mode 100755 test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua > > diff --git a/src/lua/init.c b/src/lua/init.c > index a0b2fc775..92bdb82c0 100644 > --- a/src/lua/init.c > +++ b/src/lua/init.c > @@ -569,6 +569,7 @@ run_script_f(va_list ap) > * never really dead. It never returns from its function. > */ > struct diag *diag = va_arg(ap, struct diag *); > + bool aux_loop_is_run = false; > > /* > * Load libraries and execute chunks passed by -l and -e > @@ -611,6 +612,7 @@ run_script_f(va_list ap) > * loop and re-schedule this fiber. > */ > fiber_sleep(0.0); > + aux_loop_is_run = true; > > if (path && strcmp(path, "-") != 0 && access(path, F_OK) == 0) { > /* Execute script. */ > @@ -650,6 +652,13 @@ run_script_f(va_list ap) > * return control back to tarantool_lua_run_script. > */ > end: > + /* > + * Auxiliary loop in tarantool_lua_run_script > + * should start (ev_run()) before this fiber > + * invokes ev_break(). > + */ > + if (!aux_loop_is_run) > + fiber_sleep(0.0); > ev_break(loop(), EVBREAK_ALL); > return 0; > > diff --git a/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua b/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua > new file mode 100755 > index 000000000..378ac229c > --- /dev/null > +++ b/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua 2. test always failed when it is running with high concurrency: ../../test/test-run.py --builddir=/home/s.bronnikov/work/tarantool/build --vardir=/home/s.bronnikov/work/tantool/build/test/var \ -j100 $(yes app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua | head -n 1000) > @@ -0,0 +1,92 @@ > +#!/usr/bin/env tarantool > + > +local fiber = require('fiber') > +local clock = require('clock') > +local ffi = require('ffi') > +local fio = require('fio') > +local errno = require('errno') > +local tap = require('tap') > + > +-- > +-- gh-4983: tarantool -e 'assert(false)' hangs > +-- > + > +-- For process_is_alive. > +ffi.cdef([[ > + int kill(pid_t pid, int sig); > +]]) > + > +--- Verify whether a process is alive. > +local function process_is_alive(pid) > + local rc = ffi.C.kill(pid, 0) > + return rc == 0 or errno() ~= errno.ESRCH > +end > + > +--- Returns true if process completed before timeout, false otherwise. > +local function wait_process_completion(pid, timeout) > + local start_time = clock.monotonic() > + local process_completed = false > + while clock.monotonic() - start_time < timeout do > + if not process_is_alive(pid) then > + process_completed = true > + break > + end > + end > + return process_completed > +end > + > +--- Open file on reading with timeout. > +local function open_with_timeout(filename, timeout) > + local fh > + local start_time = clock.monotonic() > + while not fh and clock.monotonic() - start_time < timeout do > + fh = fio.open(filename, {'O_RDONLY'}) > + end > + return fh > +end > + > +--- Try to read from file with timeout with interval. > +local function read_with_timeout(fh, timeout, interval) > + local data = '' > + local start_time = clock.monotonic() > + while #data == 0 and clock.monotonic() - start_time < timeout do > + data = fh:read() > + if #data == 0 then fiber.sleep(interval) end > + end > + return data > +end > + > +local TARANTOOL_PATH = arg[-1] > +local output_file = fio.abspath('out.txt') > +local line = ('%s -e "assert(false)" > %s 2>&1 & echo $!'): > + format(TARANTOOL_PATH, output_file) > +local process_waiting_timeout = 2.0 > +local file_read_timeout = 2.0 > +local file_read_interval = 0.2 > +local file_open_timeout = 2.0 > + > +local res = tap.test('gh-4983-tnt-e-assert-false-hangs', function(test) > + test:plan(2) > + > + local pid = tonumber(io.popen(line):read("*line")) 3. Why are you using double quotes here if above you are using single quotes? Make using quotes consistent in a whole test. See below too. > + assert(pid, "pid of proccess can't be recieved") > + > + local process_completed = wait_process_completion(pid, > + process_waiting_timeout) > + > + test:ok(process_completed, > + ('tarantool process with pid = %d completed'):format(pid)) > + > + -- Kill process if hangs. > + if not process_completed then ffi.C.kill(pid, 9) end > + > + local fh = open_with_timeout(output_file, file_open_timeout) > + assert(fh, 'error while opening ' .. output_file) > + > + local data = read_with_timeout(fh, file_read_timeout, file_read_interval) > + test:like(data, "assertion failed", "assertion failure is displayed") > + > + fh:close() > +end) > + > +os.exit(res and 0 or 1) > 4. add a newline at the end of file