From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 24CFD214C0 for ; Tue, 10 Sep 2019 17:56:26 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 0PcdZ4eLAvEW for ; Tue, 10 Sep 2019 17:56:26 -0400 (EDT) Received: from smtpng1.m.smailru.net (smtpng1.m.smailru.net [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id B512920D12 for ; Tue, 10 Sep 2019 17:56:24 -0400 (EDT) From: Vladislav Shpilevoy Subject: [tarantool-patches] [PATCH 1/1] app: exit gracefully when a main script throws an error Date: Wed, 11 Sep 2019 00:00:06 +0200 Message-Id: <6d69f49ae7d4a58dfef92a654b0c6341df3f0344.1568152663.git.v.shpilevoy@tarantool.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-Help: List-Unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-Subscribe: List-Owner: List-post: List-Archive: To: tarantool-patches@freelists.org Cc: alexander.turenko@tarantool.org Code to run main script (passed via command line args, or interactive console) has a footer where it notifies systemd, logs a happened error, and panics. Before the patch that code was unreachable in case of any exception in a main script, because panic happened earlier. Now a happened exception is correctly carried to the footer with proper error processing. A first and obvious solution was replace all panics with diag_set and use fiber_join on the script runner fiber. But appeared, that the fiber running a main script can't be joined. This is because normally it exits via os.exit() which never returns and therefore its caller never dies = can't be joined. The patch solves this problem by passing main fiber diag to the script runner by pointer, eliminating fiber_join necessity. Closes #4382 --- Branch: https://github.com/tarantool/tarantool/tree/gerold103/gh-4382-init-script-fail-and-sysd Issue: https://github.com/tarantool/tarantool/issues/4382 src/lua/init.c | 64 ++++++++++++++++++++------------- src/lua/init.h | 6 +++- src/main.cc | 5 +-- test/app-tap/fail_main.result | 3 ++ test/app-tap/fail_main.test.lua | 39 ++++++++++++++++++++ 5 files changed, 89 insertions(+), 28 deletions(-) create mode 100644 test/app-tap/fail_main.result create mode 100755 test/app-tap/fail_main.test.lua diff --git a/src/lua/init.c b/src/lua/init.c index 1641f3d82..097dd8495 100644 --- a/src/lua/init.c +++ b/src/lua/init.c @@ -534,19 +534,17 @@ tarantool_lua_slab_cache() /** * Push argument and call a function on the top of Lua stack */ -static void +static int lua_main(lua_State *L, int argc, char **argv) { assert(lua_isfunction(L, -1)); lua_checkstack(L, argc - 1); for (int i = 1; i < argc; i++) lua_pushstring(L, argv[i]); - if (luaT_call(L, lua_gettop(L) - 1, 0) != 0) { - struct error *e = diag_last_error(&fiber()->diag); - panic("%s", e->errmsg); - } + int rc = luaT_call(L, lua_gettop(L) - 1, 0); /* clear the stack from return values. */ lua_settop(L, 0); + return rc; } /** @@ -562,7 +560,13 @@ run_script_f(va_list ap) char **optv = va_arg(ap, char **); int argc = va_arg(ap, int); char **argv = va_arg(ap, char **); - struct diag *diag = &fiber()->diag; + /* + * An error is returned via an external diag. A caller + * can't use fiber_join(), because the script can call + * os.exit(). That call makes the script runner fiber + * never really dead. It never returns from its function. + */ + struct diag *diag = va_arg(ap, struct diag *); /* * Load libraries and execute chunks passed by -l and -e @@ -577,10 +581,8 @@ run_script_f(va_list ap) */ lua_getglobal(L, "require"); lua_pushstring(L, optv[i + 1]); - if (luaT_call(L, 1, 1) != 0) { - struct error *e = diag_last_error(diag); - panic("%s", e->errmsg); - } + if (luaT_call(L, 1, 1) != 0) + goto error; /* Non-standard: set name = require('name') */ lua_setglobal(L, optv[i + 1]); lua_settop(L, 0); @@ -590,13 +592,10 @@ run_script_f(va_list ap) * Execute chunk */ if (luaL_loadbuffer(L, optv[i + 1], strlen(optv[i + 1]), - "=(command line)") != 0) { - panic("%s", lua_tostring(L, -1)); - } - if (luaT_call(L, 0, 0) != 0) { - struct error *e = diag_last_error(diag); - panic("%s", e->errmsg); - } + "=(command line)") != 0) + goto luajit_error; + if (luaT_call(L, 0, 0) != 0) + goto error; lua_settop(L, 0); break; default: @@ -614,13 +613,15 @@ run_script_f(va_list ap) if (path && strcmp(path, "-") != 0 && access(path, F_OK) == 0) { /* Execute script. */ if (luaL_loadfile(L, path) != 0) - panic("%s", lua_tostring(L, -1)); - lua_main(L, argc, argv); + goto luajit_error; + if (lua_main(L, argc, argv) != 0) + goto error; } else if (!isatty(STDIN_FILENO) || (path && strcmp(path, "-") == 0)) { /* Execute stdin */ if (luaL_loadfile(L, NULL) != 0) - panic("%s", lua_tostring(L, -1)); - lua_main(L, argc, argv); + goto luajit_error; + if (lua_main(L, argc, argv) != 0) + goto error; } else { interactive = true; } @@ -639,18 +640,25 @@ run_script_f(va_list ap) lua_remove(L, -2); /* remove package.loaded.console */ lua_remove(L, -2); /* remove package.loaded */ start_loop = false; - lua_main(L, argc, argv); + if (lua_main(L, argc, argv) != 0) + goto error; } - /* * Lua script finished. Stop the auxiliary event loop and * return control back to tarantool_lua_run_script. */ +end: ev_break(loop(), EVBREAK_ALL); return 0; + +luajit_error: + diag_set(LuajitError, lua_tostring(L, -1)); +error: + diag_move(diag_get(), diag); + goto end; } -void +int tarantool_lua_run_script(char *path, bool interactive, int optc, char **optv, int argc, char **argv) { @@ -668,7 +676,7 @@ tarantool_lua_run_script(char *path, bool interactive, panic("%s", diag_last_error(diag_get())->errmsg); script_fiber->storage.lua.stack = tarantool_L; fiber_start(script_fiber, tarantool_L, path, interactive, - optc, optv, argc, argv); + optc, optv, argc, argv, diag_get()); /* * Run an auxiliary event loop to re-schedule run_script fiber. @@ -678,6 +686,12 @@ tarantool_lua_run_script(char *path, bool interactive, ev_run(loop(), 0); /* The fiber running the startup script has ended. */ script_fiber = NULL; + /* + * Result can't be obtained via fiber_join - script fiber + * never dies if os.exit() was called. This is why diag + * is checked explicitly. + */ + return diag_is_empty(diag_get()) ? 0 : -1; } void diff --git a/src/lua/init.h b/src/lua/init.h index 1257ef510..507360738 100644 --- a/src/lua/init.h +++ b/src/lua/init.h @@ -65,8 +65,12 @@ tarantool_lua_free(); * @param interactive force interactive mode * @param argc argc the number of command line arguments * @param argv argv command line arguments + * + * @retval 0 The script is successfully finished. + * @retval -1 Error during the script execution. Diagnostics area + * error is set. */ -void +int tarantool_lua_run_script(char *path, bool force_interactive, int optc, char **optv, int argc, char **argv); diff --git a/src/main.cc b/src/main.cc index b16cfa5fe..6ccb2a47f 100644 --- a/src/main.cc +++ b/src/main.cc @@ -845,8 +845,9 @@ main(int argc, char **argv) * is why script must run only after the server was fully * initialized. */ - tarantool_lua_run_script(script, interactive, optc, optv, - main_argc, main_argv); + if (tarantool_lua_run_script(script, interactive, optc, optv, + main_argc, main_argv) != 0) + diag_raise(); /* * Start event loop after executing Lua script if signal_cb() * wasn't triggered and there is some new events. Initial value diff --git a/test/app-tap/fail_main.result b/test/app-tap/fail_main.result new file mode 100644 index 000000000..09d9f2b20 --- /dev/null +++ b/test/app-tap/fail_main.result @@ -0,0 +1,3 @@ +TAP version 13 +1..1 +ok - main script error is handled gracefully diff --git a/test/app-tap/fail_main.test.lua b/test/app-tap/fail_main.test.lua new file mode 100755 index 000000000..f8c45bf6f --- /dev/null +++ b/test/app-tap/fail_main.test.lua @@ -0,0 +1,39 @@ +#!/usr/bin/env tarantool + +local tap = require('tap') +local test = tap.test("fail_main") +local fio = require('fio') +local tarantool_bin = arg[-1] + +test:plan(1) + +function run_script(code) + local dir = fio.tempdir() + local script_path = fio.pathjoin(dir, 'script.lua') + local script = fio.open(script_path, {'O_CREAT', 'O_WRONLY', 'O_APPEND'}, + tonumber('0777', 8)) + script:write(code) + script:close() + local output_file = fio.pathjoin(fio.cwd(), 'out.txt') + local cmd = [[/bin/sh -c 'cd "%s" && "%s" ./script.lua 0> %s 2> %s']] + local code = os.execute( + string.format(cmd, dir, tarantool_bin, output_file, output_file) + ) + fio.rmtree(dir) + local out_fd = fio.open(output_file, {'O_RDONLY'}) + local output = out_fd:read(100000) + out_fd:close() + return code, output +end + +-- +-- gh-4382: an error in main script should be handled gracefully, +-- with proper logging. +-- +local code, output = run_script("error('Error in the main script')") + +test:ok(output:match("fatal error, exiting the event loop"), + "main script error is handled gracefully") + +test:check() +os.exit(0) -- 2.20.1 (Apple Git-117)