[Tarantool-patches] [PATCH v4 3/4] crash: move fatal signal handling in

Cyrill Gorcunov gorcunov at gmail.com
Thu Dec 10 19:18:31 MSK 2020


When SIGSEGV or SIGFPE reaches the tarantool we try to gather
all information related to the crash and print it out to the
console (well, stderr actually). Still there is a request
to not just show this info locally but send it out to the
feedback server.

Thus to keep gathering crash related information in one module,
we move fatal signal handling into the separate crash.c file.
This allows us to collect the data we need in one place and
reuse it when we need to send reports to stderr (and to the
feedback server, which will be implemented in next patch).

Part-of #5261

Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
 src/lib/core/CMakeLists.txt |   1 +
 src/lib/core/crash.c        | 291 ++++++++++++++++++++++++++++++++++++
 src/lib/core/crash.h        |  32 ++++
 src/main.cc                 | 138 +----------------
 4 files changed, 329 insertions(+), 133 deletions(-)
 create mode 100644 src/lib/core/crash.c
 create mode 100644 src/lib/core/crash.h

diff --git a/src/lib/core/CMakeLists.txt b/src/lib/core/CMakeLists.txt
index 13ed1e7ab..06b2b91e1 100644
--- a/src/lib/core/CMakeLists.txt
+++ b/src/lib/core/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(core_sources
     diag.c
+    crash.c
     say.c
     memory.c
     clock.c
diff --git a/src/lib/core/crash.c b/src/lib/core/crash.c
new file mode 100644
index 000000000..9572a023c
--- /dev/null
+++ b/src/lib/core/crash.c
@@ -0,0 +1,291 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file.
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <time.h>
+
+#include "trivia/util.h"
+#include "backtrace.h"
+#include "crash.h"
+#include "say.h"
+
+#define pr_fmt(fmt)		"crash: " fmt
+#define pr_syserr(fmt, ...)	say_syserror(pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_panic(fmt, ...)	panic(pr_fmt(fmt), ##__VA_ARGS__)
+
+#ifdef TARGET_OS_LINUX
+#ifndef __x86_64__
+# error "Non x86-64 architectures are not supported"
+#endif
+struct crash_greg {
+	uint64_t	r8;
+	uint64_t	r9;
+	uint64_t	r10;
+	uint64_t	r11;
+	uint64_t	r12;
+	uint64_t	r13;
+	uint64_t	r14;
+	uint64_t	r15;
+	uint64_t	di;
+	uint64_t	si;
+	uint64_t	bp;
+	uint64_t	bx;
+	uint64_t	dx;
+	uint64_t	ax;
+	uint64_t	cx;
+	uint64_t	sp;
+	uint64_t	ip;
+	uint64_t	flags;
+	uint16_t	cs;
+	uint16_t	gs;
+	uint16_t	fs;
+	uint16_t	ss;
+	uint64_t	err;
+	uint64_t	trapno;
+	uint64_t	oldmask;
+	uint64_t	cr2;
+	uint64_t	fpstate;
+	uint64_t	reserved1[8];
+};
+#endif /* TARGET_OS_LINUX */
+
+static struct crash_info {
+	/**
+	 * These two are mostly useless as being
+	 * plain addresses but keep for backward
+	 * compatibility.
+	 */
+	void *context_addr;
+	void *siginfo_addr;
+#ifdef TARGET_OS_LINUX
+	/**
+	 * Registers contents.
+	 */
+	struct crash_greg greg;
+#endif
+	/**
+	 * Faulting address.
+	 */
+	void *siaddr;
+	/**
+	 * Crash signal number.
+	 */
+	int signo;
+	/**
+	 * Crash signal code.
+	 */
+	int sicode;
+#ifdef ENABLE_BACKTRACE
+	/*
+	 * 4K of memory should be enough to keep the backtrace.
+	 * In worst case it gonna be simply trimmed.
+	 */
+	char backtrace_buf[4096];
+#endif
+} crash_info;
+
+/**
+ * The routine is called inside crash signal handler so
+ * be careful to not cause additional signals inside.
+ */
+static struct crash_info *
+crash_collect(int signo, siginfo_t *siginfo, void *ucontext)
+{
+	struct crash_info *cinfo = &crash_info;
+
+	cinfo->signo = signo;
+	cinfo->sicode = siginfo->si_code;
+	cinfo->siaddr = siginfo->si_addr;
+	cinfo->context_addr = ucontext;
+	cinfo->siginfo_addr = siginfo;
+
+#ifdef ENABLE_BACKTRACE
+	char *start = cinfo->backtrace_buf;
+	backtrace(start, sizeof(cinfo->backtrace_buf));
+#endif
+
+#ifdef TARGET_OS_LINUX
+	/*
+	 * uc_mcontext on libc level looks somehow strange,
+	 * they define an array of uint64_t where each register
+	 * defined by REG_x macro.
+	 *
+	 * In turn the kernel is quite explicit about the context.
+	 * Moreover it is a part of user ABI, thus won't be changed.
+	 *
+	 * Lets use memcpy here to make a copy in a fast way.
+	 */
+	ucontext_t *uc = ucontext;
+	memcpy(&cinfo->greg, &uc->uc_mcontext, sizeof(cinfo->greg));
+#endif
+
+	return cinfo;
+}
+
+/**
+ * Report crash information to the stderr
+ * (usually a current console).
+ */
+static void
+crash_report_stderr(struct crash_info *cinfo)
+{
+	if (cinfo->signo == SIGSEGV) {
+		fprintf(stderr, "Segmentation fault\n");
+		const char *signal_code_repr = 0;
+
+		switch (cinfo->sicode) {
+		case SEGV_MAPERR:
+			signal_code_repr = "SEGV_MAPERR";
+			break;
+		case SEGV_ACCERR:
+			signal_code_repr = "SEGV_ACCERR";
+			break;
+		}
+
+		if (signal_code_repr)
+			fprintf(stderr, "  code: %s\n", signal_code_repr);
+		else
+			fprintf(stderr, "  code: %d\n", cinfo->sicode);
+		/*
+		 * fprintf is used instead of fdprintf, because
+		 * fdprintf does not understand %p
+		 */
+		fprintf(stderr, "  addr: %p\n", cinfo->siaddr);
+	} else {
+		fprintf(stderr, "Got a fatal signal %d\n", cinfo->signo);
+	}
+
+	fprintf(stderr, "  context: %p\n", cinfo->context_addr);
+	fprintf(stderr, "  siginfo: %p\n", cinfo->siginfo_addr);
+
+#ifdef TARGET_OS_LINUX
+# define fprintf_reg(__n, __v)				\
+	fprintf(stderr, "  %-9s0x%-17llx%lld\n",	\
+		__n, (long long)__v, (long long)__v)
+	fprintf_reg("rax", cinfo->greg.ax);
+	fprintf_reg("rbx", cinfo->greg.bx);
+	fprintf_reg("rcx", cinfo->greg.cx);
+	fprintf_reg("rdx", cinfo->greg.dx);
+	fprintf_reg("rsi", cinfo->greg.si);
+	fprintf_reg("rdi", cinfo->greg.di);
+	fprintf_reg("rsp", cinfo->greg.sp);
+	fprintf_reg("rbp", cinfo->greg.bp);
+	fprintf_reg("r8", cinfo->greg.r8);
+	fprintf_reg("r9", cinfo->greg.r9);
+	fprintf_reg("r10", cinfo->greg.r10);
+	fprintf_reg("r11", cinfo->greg.r11);
+	fprintf_reg("r12", cinfo->greg.r12);
+	fprintf_reg("r13", cinfo->greg.r13);
+	fprintf_reg("r14", cinfo->greg.r14);
+	fprintf_reg("r15", cinfo->greg.r15);
+	fprintf_reg("rip", cinfo->greg.ip);
+	fprintf_reg("eflags", cinfo->greg.flags);
+	fprintf_reg("cs", cinfo->greg.cs);
+	fprintf_reg("gs", cinfo->greg.gs);
+	fprintf_reg("fs", cinfo->greg.fs);
+	fprintf_reg("cr2", cinfo->greg.cr2);
+	fprintf_reg("err", cinfo->greg.err);
+	fprintf_reg("oldmask", cinfo->greg.oldmask);
+	fprintf_reg("trapno", cinfo->greg.trapno);
+# undef fprintf_reg
+#endif /* TARGET_OS_LINUX */
+
+	fprintf(stderr, "Current time: %u\n", (unsigned)time(0));
+	fprintf(stderr, "Please file a bug at "
+		"http://github.com/tarantool/tarantool/issues\n");
+
+#ifdef ENABLE_BACKTRACE
+	fprintf(stderr, "Attempting backtrace... Note: since the server has "
+		"already crashed, \nthis may fail as well\n");
+	fprintf(stderr, "%s", cinfo->backtrace_buf);
+#endif
+}
+
+/**
+ * Handle fatal (crashing) signal.
+ *
+ * Try to log as much as possible before dumping a core.
+ *
+ * Core files are not always allowed and it takes an effort to
+ * extract useful information from them.
+ *
+ * *Recursive invocation*
+ *
+ * Unless SIGSEGV is sent by kill(), Linux resets the signal
+ * a default value before invoking the handler.
+ *
+ * Despite that, as an extra precaution to avoid infinite
+ * recursion, we count invocations of the handler, and
+ * quietly _exit() when called for a second time.
+ */
+static void
+crash_signal_cb(int signo, siginfo_t *siginfo, void *context)
+{
+	static volatile sig_atomic_t in_cb = 0;
+	struct crash_info *cinfo;
+
+	if (in_cb == 0) {
+		in_cb = 1;
+		cinfo = crash_collect(signo, siginfo, context);
+		crash_report_stderr(cinfo);
+	} else {
+		/* Got a signal while running the handler. */
+		fprintf(stderr, "Fatal %d while backtracing", signo);
+	}
+
+	/* Try to dump a core */
+	struct sigaction sa = {
+		.sa_handler = SIG_DFL,
+	};
+	sigemptyset(&sa.sa_mask);
+	sigaction(SIGABRT, &sa, NULL);
+	abort();
+}
+
+/**
+ * Fatal signals we generate crash on.
+ */
+static const int crash_signals[] = { SIGSEGV, SIGFPE };
+
+void
+crash_signal_reset(void)
+{
+	struct sigaction sa = {
+		.sa_handler = SIG_DFL,
+	};
+	sigemptyset(&sa.sa_mask);
+
+	for (size_t i = 0; i < lengthof(crash_signals); i++) {
+		if (sigaction(crash_signals[i], &sa, NULL) == 0)
+			continue;
+		pr_syserr("reset sigaction %d", crash_signals[i]);
+	}
+}
+
+void
+crash_signal_init(void)
+{
+	/*
+	 * SA_RESETHAND resets handler action to the default
+	 * one when entering handler.
+	 *
+	 * SA_NODEFER allows receiving the same signal
+	 * during handler.
+	 */
+	struct sigaction sa = {
+		.sa_flags = SA_RESETHAND | SA_NODEFER | SA_SIGINFO,
+		.sa_sigaction = crash_signal_cb,
+	};
+	sigemptyset(&sa.sa_mask);
+
+	for (size_t i = 0; i < lengthof(crash_signals); i++) {
+		if (sigaction(crash_signals[i], &sa, NULL) == 0)
+			continue;
+		pr_panic("sigaction %d (%s)", crash_signals[i],
+			 strerror(errno));
+	}
+}
diff --git a/src/lib/core/crash.h b/src/lib/core/crash.h
new file mode 100644
index 000000000..d107cd953
--- /dev/null
+++ b/src/lib/core/crash.h
@@ -0,0 +1,32 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file.
+ */
+#pragma once
+
+#include <stdint.h>
+#include <signal.h>
+#include <limits.h>
+
+#include "trivia/config.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+/**
+ * Initialize crash signal handlers.
+ */
+void
+crash_signal_init(void);
+
+/**
+ * Reset crash signal handlers.
+ */
+void
+crash_signal_reset(void);
+
+#if defined(__cplusplus)
+}
+#endif /* defined(__cplusplus) */
diff --git a/src/main.cc b/src/main.cc
index 2f48f474c..391e0f878 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -79,6 +79,7 @@
 #include "systemd.h"
 #include "crypto/crypto.h"
 #include "core/popen.h"
+#include "core/crash.h"
 
 static pid_t master_pid = getpid();
 static struct pidfh *pid_file_handle;
@@ -184,124 +185,6 @@ signal_sigwinch_cb(ev_loop *loop, struct ev_signal *w, int revents)
 		rl_resize_terminal();
 }
 
-#if defined(__linux__) && defined(__amd64)
-
-inline void
-dump_x86_64_register(const char *reg_name, unsigned long long val)
-{
-	fprintf(stderr, "  %-9s0x%-17llx%lld\n", reg_name, val, val);
-}
-
-void
-dump_x86_64_registers(ucontext_t *uc)
-{
-	dump_x86_64_register("rax", uc->uc_mcontext.gregs[REG_RAX]);
-	dump_x86_64_register("rbx", uc->uc_mcontext.gregs[REG_RBX]);
-	dump_x86_64_register("rcx", uc->uc_mcontext.gregs[REG_RCX]);
-	dump_x86_64_register("rdx", uc->uc_mcontext.gregs[REG_RDX]);
-	dump_x86_64_register("rsi", uc->uc_mcontext.gregs[REG_RSI]);
-	dump_x86_64_register("rdi", uc->uc_mcontext.gregs[REG_RDI]);
-	dump_x86_64_register("rsp", uc->uc_mcontext.gregs[REG_RSP]);
-	dump_x86_64_register("rbp", uc->uc_mcontext.gregs[REG_RBP]);
-	dump_x86_64_register("r8", uc->uc_mcontext.gregs[REG_R8]);
-	dump_x86_64_register("r9", uc->uc_mcontext.gregs[REG_R9]);
-	dump_x86_64_register("r10", uc->uc_mcontext.gregs[REG_R10]);
-	dump_x86_64_register("r11", uc->uc_mcontext.gregs[REG_R11]);
-	dump_x86_64_register("r12", uc->uc_mcontext.gregs[REG_R12]);
-	dump_x86_64_register("r13", uc->uc_mcontext.gregs[REG_R13]);
-	dump_x86_64_register("r14", uc->uc_mcontext.gregs[REG_R14]);
-	dump_x86_64_register("r15", uc->uc_mcontext.gregs[REG_R15]);
-	dump_x86_64_register("rip", uc->uc_mcontext.gregs[REG_RIP]);
-	dump_x86_64_register("eflags", uc->uc_mcontext.gregs[REG_EFL]);
-	dump_x86_64_register("cs", (uc->uc_mcontext.gregs[REG_CSGSFS] >> 0) & 0xffff);
-	dump_x86_64_register("gs", (uc->uc_mcontext.gregs[REG_CSGSFS] >> 16) & 0xffff);
-	dump_x86_64_register("fs", (uc->uc_mcontext.gregs[REG_CSGSFS] >> 32) & 0xffff);
-	dump_x86_64_register("cr2", uc->uc_mcontext.gregs[REG_CR2]);
-	dump_x86_64_register("err", uc->uc_mcontext.gregs[REG_ERR]);
-	dump_x86_64_register("oldmask", uc->uc_mcontext.gregs[REG_OLDMASK]);
-	dump_x86_64_register("trapno", uc->uc_mcontext.gregs[REG_TRAPNO]);
-}
-
-#endif /* defined(__linux__) && defined(__amd64) */
-
-/** Try to log as much as possible before dumping a core.
- *
- * Core files are not aways allowed and it takes an effort to
- * extract useful information from them.
- *
- * *Recursive invocation*
- *
- * Unless SIGSEGV is sent by kill(), Linux
- * resets the signal a default value before invoking
- * the handler.
- *
- * Despite that, as an extra precaution to avoid infinite
- * recursion, we count invocations of the handler, and
- * quietly _exit() when called for a second time.
- */
-static void
-sig_fatal_cb(int signo, siginfo_t *siginfo, void *context)
-{
-	static volatile sig_atomic_t in_cb = 0;
-	int fd = STDERR_FILENO;
-	struct sigaction sa;
-
-	/* Got a signal while running the handler. */
-	if (in_cb) {
-		fdprintf(fd, "Fatal %d while backtracing", signo);
-		goto end;
-	}
-
-	in_cb = 1;
-
-	if (signo == SIGSEGV) {
-		fdprintf(fd, "Segmentation fault\n");
-		const char *signal_code_repr = 0;
-		switch (siginfo->si_code) {
-		case SEGV_MAPERR:
-			signal_code_repr = "SEGV_MAPERR";
-			break;
-		case SEGV_ACCERR:
-			signal_code_repr = "SEGV_ACCERR";
-			break;
-		}
-		if (signal_code_repr)
-			fdprintf(fd, "  code: %s\n", signal_code_repr);
-		else
-			fdprintf(fd, "  code: %d\n", siginfo->si_code);
-		/*
-		 * fprintf is used insted of fdprintf, because
-		 * fdprintf does not understand %p
-		 */
-		fprintf(stderr, "  addr: %p\n", siginfo->si_addr);
-	} else
-		fdprintf(fd, "Got a fatal signal %d\n", signo);
-	fprintf(stderr, "  context: %p\n", context);
-	fprintf(stderr, "  siginfo: %p\n", siginfo);
-
-#if defined(__linux__) && defined(__amd64)
-	dump_x86_64_registers((ucontext_t *)context);
-#endif
-
-	fdprintf(fd, "Current time: %u\n", (unsigned) time(0));
-	fdprintf(fd,
-		 "Please file a bug at http://github.com/tarantool/tarantool/issues\n");
-
-#ifdef ENABLE_BACKTRACE
-	fdprintf(fd, "Attempting backtrace... Note: since the server has "
-		 "already crashed, \nthis may fail as well\n");
-	print_backtrace();
-#endif
-end:
-	/* Try to dump core. */
-	memset(&sa, 0, sizeof(sa));
-	sigemptyset(&sa.sa_mask);
-	sa.sa_handler = SIG_DFL;
-	sigaction(SIGABRT, &sa, NULL);
-
-	abort();
-}
-
 static void
 signal_free(void)
 {
@@ -328,11 +211,11 @@ signal_reset(void)
 	    sigaction(SIGINT, &sa, NULL) == -1 ||
 	    sigaction(SIGTERM, &sa, NULL) == -1 ||
 	    sigaction(SIGHUP, &sa, NULL) == -1 ||
-	    sigaction(SIGWINCH, &sa, NULL) == -1 ||
-	    sigaction(SIGSEGV, &sa, NULL) == -1 ||
-	    sigaction(SIGFPE, &sa, NULL) == -1)
+	    sigaction(SIGWINCH, &sa, NULL) == -1)
 		say_syserror("sigaction");
 
+	crash_signal_reset();
+
 	/* Unblock any signals blocked by libev. */
 	sigset_t sigset;
 	sigfillset(&sigset);
@@ -362,18 +245,7 @@ signal_init(void)
 	if (sigaction(SIGPIPE, &sa, 0) == -1)
 		panic_syserror("sigaction");
 
-	/*
-	 * SA_RESETHAND resets handler action to the default
-	 * one when entering handler.
-	 * SA_NODEFER allows receiving the same signal during handler.
-	 */
-	sa.sa_flags = SA_RESETHAND | SA_NODEFER | SA_SIGINFO;
-	sa.sa_sigaction = sig_fatal_cb;
-
-	if (sigaction(SIGSEGV, &sa, 0) == -1 ||
-	    sigaction(SIGFPE, &sa, 0) == -1) {
-		panic_syserror("sigaction");
-	}
+	crash_signal_init();
 
 	ev_signal_init(&ev_sigs[0], sig_checkpoint, SIGUSR1);
 	ev_signal_init(&ev_sigs[1], signal_cb, SIGINT);
-- 
2.26.2



More information about the Tarantool-patches mailing list