[tarantool-patches] [PATCH 1/1] app: exit gracefully when a main script throws an error

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Sep 11 01:00:06 MSK 2019


Code to run main script (passed via command line args, or
interactive console) has a footer where it notifies systemd,
logs a happened error, and panics.

Before the patch that code was unreachable in case of any
exception in a main script, because panic happened earlier. Now a
happened exception is correctly carried to the footer with proper
error processing.

A first and obvious solution was replace all panics with diag_set
and use fiber_join on the script runner fiber. But appeared, that
the fiber running a main script can't be joined. This is because
normally it exits via os.exit() which never returns and therefore
its caller never dies = can't be joined.

The patch solves this problem by passing main fiber diag to the
script runner by pointer, eliminating fiber_join necessity.

Closes #4382
---
Branch: https://github.com/tarantool/tarantool/tree/gerold103/gh-4382-init-script-fail-and-sysd
Issue: https://github.com/tarantool/tarantool/issues/4382

 src/lua/init.c                  | 64 ++++++++++++++++++++-------------
 src/lua/init.h                  |  6 +++-
 src/main.cc                     |  5 +--
 test/app-tap/fail_main.result   |  3 ++
 test/app-tap/fail_main.test.lua | 39 ++++++++++++++++++++
 5 files changed, 89 insertions(+), 28 deletions(-)
 create mode 100644 test/app-tap/fail_main.result
 create mode 100755 test/app-tap/fail_main.test.lua

diff --git a/src/lua/init.c b/src/lua/init.c
index 1641f3d82..097dd8495 100644
--- a/src/lua/init.c
+++ b/src/lua/init.c
@@ -534,19 +534,17 @@ tarantool_lua_slab_cache()
 /**
  * Push argument and call a function on the top of Lua stack
  */
-static void
+static int
 lua_main(lua_State *L, int argc, char **argv)
 {
 	assert(lua_isfunction(L, -1));
 	lua_checkstack(L, argc - 1);
 	for (int i = 1; i < argc; i++)
 		lua_pushstring(L, argv[i]);
-	if (luaT_call(L, lua_gettop(L) - 1, 0) != 0) {
-		struct error *e = diag_last_error(&fiber()->diag);
-		panic("%s", e->errmsg);
-	}
+	int rc = luaT_call(L, lua_gettop(L) - 1, 0);
 	/* clear the stack from return values. */
 	lua_settop(L, 0);
+	return rc;
 }
 
 /**
@@ -562,7 +560,13 @@ run_script_f(va_list ap)
 	char **optv = va_arg(ap, char **);
 	int argc = va_arg(ap, int);
 	char **argv = va_arg(ap, char **);
-	struct diag *diag = &fiber()->diag;
+	/*
+	 * An error is returned via an external diag. A caller
+	 * can't use fiber_join(), because the script can call
+	 * os.exit(). That call makes the script runner fiber
+	 * never really dead. It never returns from its function.
+	 */
+	struct diag *diag = va_arg(ap, struct diag *);
 
 	/*
 	 * Load libraries and execute chunks passed by -l and -e
@@ -577,10 +581,8 @@ run_script_f(va_list ap)
 			 */
 			lua_getglobal(L, "require");
 			lua_pushstring(L, optv[i + 1]);
-			if (luaT_call(L, 1, 1) != 0) {
-				struct error *e = diag_last_error(diag);
-				panic("%s", e->errmsg);
-			}
+			if (luaT_call(L, 1, 1) != 0)
+				goto error;
 			/* Non-standard: set name = require('name') */
 			lua_setglobal(L, optv[i + 1]);
 			lua_settop(L, 0);
@@ -590,13 +592,10 @@ run_script_f(va_list ap)
 			 * Execute chunk
 			 */
 			if (luaL_loadbuffer(L, optv[i + 1], strlen(optv[i + 1]),
-					    "=(command line)") != 0) {
-				panic("%s", lua_tostring(L, -1));
-			}
-			if (luaT_call(L, 0, 0) != 0) {
-				struct error *e = diag_last_error(diag);
-				panic("%s", e->errmsg);
-			}
+					    "=(command line)") != 0)
+				goto luajit_error;
+			if (luaT_call(L, 0, 0) != 0)
+				goto error;
 			lua_settop(L, 0);
 			break;
 		default:
@@ -614,13 +613,15 @@ run_script_f(va_list ap)
 	if (path && strcmp(path, "-") != 0 && access(path, F_OK) == 0) {
 		/* Execute script. */
 		if (luaL_loadfile(L, path) != 0)
-			panic("%s", lua_tostring(L, -1));
-		lua_main(L, argc, argv);
+			goto luajit_error;
+		if (lua_main(L, argc, argv) != 0)
+			goto error;
 	} else if (!isatty(STDIN_FILENO) || (path && strcmp(path, "-") == 0)) {
 		/* Execute stdin */
 		if (luaL_loadfile(L, NULL) != 0)
-			panic("%s", lua_tostring(L, -1));
-		lua_main(L, argc, argv);
+			goto luajit_error;
+		if (lua_main(L, argc, argv) != 0)
+			goto error;
 	} else {
 		interactive = true;
 	}
@@ -639,18 +640,25 @@ run_script_f(va_list ap)
 		lua_remove(L, -2); /* remove package.loaded.console */
 		lua_remove(L, -2); /* remove package.loaded */
 		start_loop = false;
-		lua_main(L, argc, argv);
+		if (lua_main(L, argc, argv) != 0)
+			goto error;
 	}
-
 	/*
 	 * Lua script finished. Stop the auxiliary event loop and
 	 * return control back to tarantool_lua_run_script.
 	 */
+end:
 	ev_break(loop(), EVBREAK_ALL);
 	return 0;
+
+luajit_error:
+	diag_set(LuajitError, lua_tostring(L, -1));
+error:
+	diag_move(diag_get(), diag);
+	goto end;
 }
 
-void
+int
 tarantool_lua_run_script(char *path, bool interactive,
 			 int optc, char **optv, int argc, char **argv)
 {
@@ -668,7 +676,7 @@ tarantool_lua_run_script(char *path, bool interactive,
 		panic("%s", diag_last_error(diag_get())->errmsg);
 	script_fiber->storage.lua.stack = tarantool_L;
 	fiber_start(script_fiber, tarantool_L, path, interactive,
-		    optc, optv, argc, argv);
+		    optc, optv, argc, argv, diag_get());
 
 	/*
 	 * Run an auxiliary event loop to re-schedule run_script fiber.
@@ -678,6 +686,12 @@ tarantool_lua_run_script(char *path, bool interactive,
 		ev_run(loop(), 0);
 	/* The fiber running the startup script has ended. */
 	script_fiber = NULL;
+	/*
+	 * Result can't be obtained via fiber_join - script fiber
+	 * never dies if os.exit() was called. This is why diag
+	 * is checked explicitly.
+	 */
+	return diag_is_empty(diag_get()) ? 0 : -1;
 }
 
 void
diff --git a/src/lua/init.h b/src/lua/init.h
index 1257ef510..507360738 100644
--- a/src/lua/init.h
+++ b/src/lua/init.h
@@ -65,8 +65,12 @@ tarantool_lua_free();
  * @param interactive force interactive mode
  * @param argc argc the number of command line arguments
  * @param argv argv command line arguments
+ *
+ * @retval 0 The script is successfully finished.
+ * @retval -1 Error during the script execution. Diagnostics area
+ *        error is set.
  */
-void
+int
 tarantool_lua_run_script(char *path, bool force_interactive,
 			 int optc, char **optv,
 			 int argc, char **argv);
diff --git a/src/main.cc b/src/main.cc
index b16cfa5fe..6ccb2a47f 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -845,8 +845,9 @@ main(int argc, char **argv)
 		 * is why script must run only after the server was fully
 		 * initialized.
 		 */
-		tarantool_lua_run_script(script, interactive, optc, optv,
-					 main_argc, main_argv);
+		if (tarantool_lua_run_script(script, interactive, optc, optv,
+					     main_argc, main_argv) != 0)
+			diag_raise();
 		/*
 		 * Start event loop after executing Lua script if signal_cb()
 		 * wasn't triggered and there is some new events. Initial value
diff --git a/test/app-tap/fail_main.result b/test/app-tap/fail_main.result
new file mode 100644
index 000000000..09d9f2b20
--- /dev/null
+++ b/test/app-tap/fail_main.result
@@ -0,0 +1,3 @@
+TAP version 13
+1..1
+ok - main script error is handled gracefully
diff --git a/test/app-tap/fail_main.test.lua b/test/app-tap/fail_main.test.lua
new file mode 100755
index 000000000..f8c45bf6f
--- /dev/null
+++ b/test/app-tap/fail_main.test.lua
@@ -0,0 +1,39 @@
+#!/usr/bin/env tarantool
+
+local tap = require('tap')
+local test = tap.test("fail_main")
+local fio = require('fio')
+local tarantool_bin = arg[-1]
+
+test:plan(1)
+
+function run_script(code)
+    local dir = fio.tempdir()
+    local script_path = fio.pathjoin(dir, 'script.lua')
+    local script = fio.open(script_path, {'O_CREAT', 'O_WRONLY', 'O_APPEND'},
+        tonumber('0777', 8))
+    script:write(code)
+    script:close()
+    local output_file = fio.pathjoin(fio.cwd(), 'out.txt')
+    local cmd = [[/bin/sh -c 'cd "%s" && "%s" ./script.lua 0> %s 2> %s']]
+    local code = os.execute(
+        string.format(cmd, dir, tarantool_bin, output_file, output_file)
+    )
+    fio.rmtree(dir)
+    local out_fd = fio.open(output_file, {'O_RDONLY'})
+    local output = out_fd:read(100000)
+    out_fd:close()
+    return code, output
+end
+
+--
+-- gh-4382: an error in main script should be handled gracefully,
+-- with proper logging.
+--
+local code, output = run_script("error('Error in the main script')")
+
+test:ok(output:match("fatal error, exiting the event loop"),
+        "main script error is handled gracefully")
+
+test:check()
+os.exit(0)
-- 
2.20.1 (Apple Git-117)





More information about the Tarantool-patches mailing list