Tarantool development patches archive
 help / color / mirror / Atom feed
From: Cyrill Gorcunov <gorcunov@gmail.com>
To: tml <tarantool-patches@dev.tarantool.org>
Cc: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Subject: [Tarantool-patches] [PATCH v5 3/4] crash: move fatal signal handling in
Date: Wed, 23 Dec 2020 18:41:54 +0300	[thread overview]
Message-ID: <20201223154155.234884-4-gorcunov@gmail.com> (raw)
In-Reply-To: <20201223154155.234884-1-gorcunov@gmail.com>

When SIGSEGV or SIGFPE reaches the tarantool we try to gather
all information related to the crash and print it out to the
console (well, stderr actually). Still there is a request
to not just show this info locally but send it out to the
feedback server.

Thus to keep gathering crash related information in one module,
we move fatal signal handling into the separate crash.c file.
This allows us to collect the data we need in one place and
reuse it when we need to send reports to stderr (and to the
feedback server, which will be implemented in next patch).

Part-of #5261

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 src/lib/core/CMakeLists.txt |   1 +
 src/lib/core/crash.c        | 295 ++++++++++++++++++++++++++++++++++++
 src/lib/core/crash.h        |  26 ++++
 src/main.cc                 | 138 +----------------
 4 files changed, 327 insertions(+), 133 deletions(-)
 create mode 100644 src/lib/core/crash.c
 create mode 100644 src/lib/core/crash.h

diff --git a/src/lib/core/CMakeLists.txt b/src/lib/core/CMakeLists.txt
index 7c62fc5ce..30cf0dd15 100644
--- a/src/lib/core/CMakeLists.txt
+++ b/src/lib/core/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(core_sources
     diag.c
+    crash.c
     say.c
     memory.c
     clock.c
diff --git a/src/lib/core/crash.c b/src/lib/core/crash.c
new file mode 100644
index 000000000..3929463f3
--- /dev/null
+++ b/src/lib/core/crash.c
@@ -0,0 +1,295 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file.
+ */
+
+#include <string.h>
+#include <signal.h>
+#include <stdint.h>
+#include <time.h>
+
+#include "trivia/util.h"
+
+#include "backtrace.h"
+#include "crash.h"
+#include "say.h"
+
+#define pr_fmt(fmt)		"crash: " fmt
+#define pr_syserr(fmt, ...)	say_syserror(pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_panic(fmt, ...)	panic(pr_fmt(fmt), ##__VA_ARGS__)
+
+#ifdef TARGET_OS_LINUX
+#ifndef __x86_64__
+# error "Non x86-64 architectures are not supported"
+#endif
+struct crash_greg {
+	uint64_t	r8;
+	uint64_t	r9;
+	uint64_t	r10;
+	uint64_t	r11;
+	uint64_t	r12;
+	uint64_t	r13;
+	uint64_t	r14;
+	uint64_t	r15;
+	uint64_t	di;
+	uint64_t	si;
+	uint64_t	bp;
+	uint64_t	bx;
+	uint64_t	dx;
+	uint64_t	ax;
+	uint64_t	cx;
+	uint64_t	sp;
+	uint64_t	ip;
+	uint64_t	flags;
+	uint16_t	cs;
+	uint16_t	gs;
+	uint16_t	fs;
+	uint16_t	ss;
+	uint64_t	err;
+	uint64_t	trapno;
+	uint64_t	oldmask;
+	uint64_t	cr2;
+	uint64_t	fpstate;
+	uint64_t	reserved1[8];
+};
+#endif /* TARGET_OS_LINUX */
+
+static struct crash_info {
+	/**
+	 * These two are mostly useless as being
+	 * plain addresses and without real binary
+	 * crash dump file we can't use them for
+	 * anything suitable (in terms of analysis sake)
+	 * but keep for backward compatibility.
+	 */
+	void *context_addr;
+	void *siginfo_addr;
+#ifdef TARGET_OS_LINUX
+	/**
+	 * Registers contents.
+	 */
+	struct crash_greg greg;
+#endif
+	/**
+	 * Faulting address.
+	 */
+	void *siaddr;
+	/**
+	 * Crash signal number.
+	 */
+	int signo;
+	/**
+	 * Crash signal code.
+	 */
+	int sicode;
+#ifdef ENABLE_BACKTRACE
+	/**
+	 * 1K of memory should be enough to keep the backtrace.
+	 * In worst case it gonna be simply trimmed.
+	 */
+	char backtrace_buf[1024];
+#endif
+} crash_info;
+
+/**
+ * The routine is called inside crash signal handler so
+ * be careful to not cause additional signals inside.
+ */
+static struct crash_info *
+crash_collect(int signo, siginfo_t *siginfo, void *ucontext)
+{
+	struct crash_info *cinfo = &crash_info;
+
+	cinfo->signo = signo;
+	cinfo->sicode = siginfo->si_code;
+	cinfo->siaddr = siginfo->si_addr;
+	cinfo->context_addr = ucontext;
+	cinfo->siginfo_addr = siginfo;
+
+#ifdef ENABLE_BACKTRACE
+	char *start = cinfo->backtrace_buf;
+	backtrace(start, sizeof(cinfo->backtrace_buf));
+#endif
+
+#ifdef TARGET_OS_LINUX
+	/*
+	 * uc_mcontext on libc level looks somehow strange,
+	 * they define an array of uint64_t where each register
+	 * defined by REG_x macro.
+	 *
+	 * In turn the kernel is quite explicit about the context.
+	 * Moreover it is a part of user ABI, thus won't be changed.
+	 *
+	 * Lets use memcpy here to make a copy in a fast way.
+	 */
+	ucontext_t *uc = ucontext;
+	memcpy(&cinfo->greg, &uc->uc_mcontext, sizeof(cinfo->greg));
+#endif
+
+	return cinfo;
+}
+
+/**
+ * Report crash information to the stderr
+ * (usually a current console).
+ */
+static void
+crash_report_stderr(struct crash_info *cinfo)
+{
+	if (cinfo->signo == SIGSEGV) {
+		fprintf(stderr, "Segmentation fault\n");
+		const char *signal_code_repr = NULL;
+
+		switch (cinfo->sicode) {
+		case SEGV_MAPERR:
+			signal_code_repr = "SEGV_MAPERR";
+			break;
+		case SEGV_ACCERR:
+			signal_code_repr = "SEGV_ACCERR";
+			break;
+		}
+
+		if (signal_code_repr != NULL)
+			fprintf(stderr, "  code: %s\n", signal_code_repr);
+		else
+			fprintf(stderr, "  code: %d\n", cinfo->sicode);
+		/*
+		 * fprintf is used instead of fdprintf, because
+		 * fdprintf does not understand %p
+		 */
+		fprintf(stderr, "  addr: %p\n", cinfo->siaddr);
+	} else {
+		fprintf(stderr, "Got a fatal signal %d\n", cinfo->signo);
+	}
+
+	fprintf(stderr, "  context: %p\n", cinfo->context_addr);
+	fprintf(stderr, "  siginfo: %p\n", cinfo->siginfo_addr);
+
+#ifdef TARGET_OS_LINUX
+# define fprintf_reg(__n, __v)				\
+	fprintf(stderr, "  %-9s0x%-17llx%lld\n",	\
+		__n, (long long)__v, (long long)__v)
+	fprintf_reg("rax", cinfo->greg.ax);
+	fprintf_reg("rbx", cinfo->greg.bx);
+	fprintf_reg("rcx", cinfo->greg.cx);
+	fprintf_reg("rdx", cinfo->greg.dx);
+	fprintf_reg("rsi", cinfo->greg.si);
+	fprintf_reg("rdi", cinfo->greg.di);
+	fprintf_reg("rsp", cinfo->greg.sp);
+	fprintf_reg("rbp", cinfo->greg.bp);
+	fprintf_reg("r8", cinfo->greg.r8);
+	fprintf_reg("r9", cinfo->greg.r9);
+	fprintf_reg("r10", cinfo->greg.r10);
+	fprintf_reg("r11", cinfo->greg.r11);
+	fprintf_reg("r12", cinfo->greg.r12);
+	fprintf_reg("r13", cinfo->greg.r13);
+	fprintf_reg("r14", cinfo->greg.r14);
+	fprintf_reg("r15", cinfo->greg.r15);
+	fprintf_reg("rip", cinfo->greg.ip);
+	fprintf_reg("eflags", cinfo->greg.flags);
+	fprintf_reg("cs", cinfo->greg.cs);
+	fprintf_reg("gs", cinfo->greg.gs);
+	fprintf_reg("fs", cinfo->greg.fs);
+	fprintf_reg("cr2", cinfo->greg.cr2);
+	fprintf_reg("err", cinfo->greg.err);
+	fprintf_reg("oldmask", cinfo->greg.oldmask);
+	fprintf_reg("trapno", cinfo->greg.trapno);
+# undef fprintf_reg
+#endif /* TARGET_OS_LINUX */
+
+	fprintf(stderr, "Current time: %u\n", (unsigned)time(0));
+	fprintf(stderr, "Please file a bug at "
+		"http://github.com/tarantool/tarantool/issues\n");
+
+#ifdef ENABLE_BACKTRACE
+	fprintf(stderr, "Attempting backtrace... Note: since the server has "
+		"already crashed, \nthis may fail as well\n");
+	fprintf(stderr, "%s", cinfo->backtrace_buf);
+#endif
+}
+
+/**
+ * Handle fatal (crashing) signal.
+ *
+ * Try to log as much as possible before dumping a core.
+ *
+ * Core files are not always allowed and it takes an effort to
+ * extract useful information from them.
+ *
+ * *Recursive invocation*
+ *
+ * Unless SIGSEGV is sent by kill(), Linux resets the signal
+ * a default value before invoking the handler.
+ *
+ * Despite that, as an extra precaution to avoid infinite
+ * recursion, we count invocations of the handler, and
+ * quietly _exit() when called for a second time.
+ */
+static void
+crash_signal_cb(int signo, siginfo_t *siginfo, void *context)
+{
+	static volatile sig_atomic_t in_cb = 0;
+	struct crash_info *cinfo;
+
+	if (in_cb == 0) {
+		in_cb = 1;
+		cinfo = crash_collect(signo, siginfo, context);
+		crash_report_stderr(cinfo);
+	} else {
+		/* Got a signal while running the handler. */
+		fprintf(stderr, "Fatal %d while backtracing", signo);
+	}
+
+	/* Try to dump a core */
+	struct sigaction sa = {
+		.sa_handler = SIG_DFL,
+	};
+	sigemptyset(&sa.sa_mask);
+	sigaction(SIGABRT, &sa, NULL);
+	abort();
+}
+
+/**
+ * Fatal signals we generate crash on.
+ */
+static const int crash_signals[] = { SIGSEGV, SIGFPE };
+
+void
+crash_signal_reset(void)
+{
+	struct sigaction sa = {
+		.sa_handler = SIG_DFL,
+	};
+	sigemptyset(&sa.sa_mask);
+
+	for (size_t i = 0; i < lengthof(crash_signals); i++) {
+		if (sigaction(crash_signals[i], &sa, NULL) == 0)
+			continue;
+		pr_syserr("reset sigaction %d", crash_signals[i]);
+	}
+}
+
+void
+crash_signal_init(void)
+{
+	/*
+	 * SA_RESETHAND resets handler action to the default
+	 * one when entering handler.
+	 *
+	 * SA_NODEFER allows receiving the same signal
+	 * during handler.
+	 */
+	struct sigaction sa = {
+		.sa_flags = SA_RESETHAND | SA_NODEFER | SA_SIGINFO,
+		.sa_sigaction = crash_signal_cb,
+	};
+	sigemptyset(&sa.sa_mask);
+
+	for (size_t i = 0; i < lengthof(crash_signals); i++) {
+		if (sigaction(crash_signals[i], &sa, NULL) == 0)
+			continue;
+		pr_panic("sigaction %d (%s)", crash_signals[i],
+			 strerror(errno));
+	}
+}
diff --git a/src/lib/core/crash.h b/src/lib/core/crash.h
new file mode 100644
index 000000000..cd1db585e
--- /dev/null
+++ b/src/lib/core/crash.h
@@ -0,0 +1,26 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file.
+ */
+#pragma once
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+/**
+ * Initialize crash signal handlers.
+ */
+void
+crash_signal_init(void);
+
+/**
+ * Reset crash signal handlers.
+ */
+void
+crash_signal_reset(void);
+
+#if defined(__cplusplus)
+}
+#endif /* defined(__cplusplus) */
diff --git a/src/main.cc b/src/main.cc
index 2f48f474c..391e0f878 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -79,6 +79,7 @@
 #include "systemd.h"
 #include "crypto/crypto.h"
 #include "core/popen.h"
+#include "core/crash.h"
 
 static pid_t master_pid = getpid();
 static struct pidfh *pid_file_handle;
@@ -184,124 +185,6 @@ signal_sigwinch_cb(ev_loop *loop, struct ev_signal *w, int revents)
 		rl_resize_terminal();
 }
 
-#if defined(__linux__) && defined(__amd64)
-
-inline void
-dump_x86_64_register(const char *reg_name, unsigned long long val)
-{
-	fprintf(stderr, "  %-9s0x%-17llx%lld\n", reg_name, val, val);
-}
-
-void
-dump_x86_64_registers(ucontext_t *uc)
-{
-	dump_x86_64_register("rax", uc->uc_mcontext.gregs[REG_RAX]);
-	dump_x86_64_register("rbx", uc->uc_mcontext.gregs[REG_RBX]);
-	dump_x86_64_register("rcx", uc->uc_mcontext.gregs[REG_RCX]);
-	dump_x86_64_register("rdx", uc->uc_mcontext.gregs[REG_RDX]);
-	dump_x86_64_register("rsi", uc->uc_mcontext.gregs[REG_RSI]);
-	dump_x86_64_register("rdi", uc->uc_mcontext.gregs[REG_RDI]);
-	dump_x86_64_register("rsp", uc->uc_mcontext.gregs[REG_RSP]);
-	dump_x86_64_register("rbp", uc->uc_mcontext.gregs[REG_RBP]);
-	dump_x86_64_register("r8", uc->uc_mcontext.gregs[REG_R8]);
-	dump_x86_64_register("r9", uc->uc_mcontext.gregs[REG_R9]);
-	dump_x86_64_register("r10", uc->uc_mcontext.gregs[REG_R10]);
-	dump_x86_64_register("r11", uc->uc_mcontext.gregs[REG_R11]);
-	dump_x86_64_register("r12", uc->uc_mcontext.gregs[REG_R12]);
-	dump_x86_64_register("r13", uc->uc_mcontext.gregs[REG_R13]);
-	dump_x86_64_register("r14", uc->uc_mcontext.gregs[REG_R14]);
-	dump_x86_64_register("r15", uc->uc_mcontext.gregs[REG_R15]);
-	dump_x86_64_register("rip", uc->uc_mcontext.gregs[REG_RIP]);
-	dump_x86_64_register("eflags", uc->uc_mcontext.gregs[REG_EFL]);
-	dump_x86_64_register("cs", (uc->uc_mcontext.gregs[REG_CSGSFS] >> 0) & 0xffff);
-	dump_x86_64_register("gs", (uc->uc_mcontext.gregs[REG_CSGSFS] >> 16) & 0xffff);
-	dump_x86_64_register("fs", (uc->uc_mcontext.gregs[REG_CSGSFS] >> 32) & 0xffff);
-	dump_x86_64_register("cr2", uc->uc_mcontext.gregs[REG_CR2]);
-	dump_x86_64_register("err", uc->uc_mcontext.gregs[REG_ERR]);
-	dump_x86_64_register("oldmask", uc->uc_mcontext.gregs[REG_OLDMASK]);
-	dump_x86_64_register("trapno", uc->uc_mcontext.gregs[REG_TRAPNO]);
-}
-
-#endif /* defined(__linux__) && defined(__amd64) */
-
-/** Try to log as much as possible before dumping a core.
- *
- * Core files are not aways allowed and it takes an effort to
- * extract useful information from them.
- *
- * *Recursive invocation*
- *
- * Unless SIGSEGV is sent by kill(), Linux
- * resets the signal a default value before invoking
- * the handler.
- *
- * Despite that, as an extra precaution to avoid infinite
- * recursion, we count invocations of the handler, and
- * quietly _exit() when called for a second time.
- */
-static void
-sig_fatal_cb(int signo, siginfo_t *siginfo, void *context)
-{
-	static volatile sig_atomic_t in_cb = 0;
-	int fd = STDERR_FILENO;
-	struct sigaction sa;
-
-	/* Got a signal while running the handler. */
-	if (in_cb) {
-		fdprintf(fd, "Fatal %d while backtracing", signo);
-		goto end;
-	}
-
-	in_cb = 1;
-
-	if (signo == SIGSEGV) {
-		fdprintf(fd, "Segmentation fault\n");
-		const char *signal_code_repr = 0;
-		switch (siginfo->si_code) {
-		case SEGV_MAPERR:
-			signal_code_repr = "SEGV_MAPERR";
-			break;
-		case SEGV_ACCERR:
-			signal_code_repr = "SEGV_ACCERR";
-			break;
-		}
-		if (signal_code_repr)
-			fdprintf(fd, "  code: %s\n", signal_code_repr);
-		else
-			fdprintf(fd, "  code: %d\n", siginfo->si_code);
-		/*
-		 * fprintf is used insted of fdprintf, because
-		 * fdprintf does not understand %p
-		 */
-		fprintf(stderr, "  addr: %p\n", siginfo->si_addr);
-	} else
-		fdprintf(fd, "Got a fatal signal %d\n", signo);
-	fprintf(stderr, "  context: %p\n", context);
-	fprintf(stderr, "  siginfo: %p\n", siginfo);
-
-#if defined(__linux__) && defined(__amd64)
-	dump_x86_64_registers((ucontext_t *)context);
-#endif
-
-	fdprintf(fd, "Current time: %u\n", (unsigned) time(0));
-	fdprintf(fd,
-		 "Please file a bug at http://github.com/tarantool/tarantool/issues\n");
-
-#ifdef ENABLE_BACKTRACE
-	fdprintf(fd, "Attempting backtrace... Note: since the server has "
-		 "already crashed, \nthis may fail as well\n");
-	print_backtrace();
-#endif
-end:
-	/* Try to dump core. */
-	memset(&sa, 0, sizeof(sa));
-	sigemptyset(&sa.sa_mask);
-	sa.sa_handler = SIG_DFL;
-	sigaction(SIGABRT, &sa, NULL);
-
-	abort();
-}
-
 static void
 signal_free(void)
 {
@@ -328,11 +211,11 @@ signal_reset(void)
 	    sigaction(SIGINT, &sa, NULL) == -1 ||
 	    sigaction(SIGTERM, &sa, NULL) == -1 ||
 	    sigaction(SIGHUP, &sa, NULL) == -1 ||
-	    sigaction(SIGWINCH, &sa, NULL) == -1 ||
-	    sigaction(SIGSEGV, &sa, NULL) == -1 ||
-	    sigaction(SIGFPE, &sa, NULL) == -1)
+	    sigaction(SIGWINCH, &sa, NULL) == -1)
 		say_syserror("sigaction");
 
+	crash_signal_reset();
+
 	/* Unblock any signals blocked by libev. */
 	sigset_t sigset;
 	sigfillset(&sigset);
@@ -362,18 +245,7 @@ signal_init(void)
 	if (sigaction(SIGPIPE, &sa, 0) == -1)
 		panic_syserror("sigaction");
 
-	/*
-	 * SA_RESETHAND resets handler action to the default
-	 * one when entering handler.
-	 * SA_NODEFER allows receiving the same signal during handler.
-	 */
-	sa.sa_flags = SA_RESETHAND | SA_NODEFER | SA_SIGINFO;
-	sa.sa_sigaction = sig_fatal_cb;
-
-	if (sigaction(SIGSEGV, &sa, 0) == -1 ||
-	    sigaction(SIGFPE, &sa, 0) == -1) {
-		panic_syserror("sigaction");
-	}
+	crash_signal_init();
 
 	ev_signal_init(&ev_sigs[0], sig_checkpoint, SIGUSR1);
 	ev_signal_init(&ev_sigs[1], signal_cb, SIGINT);
-- 
2.26.2

  parent reply	other threads:[~2020-12-23 15:42 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-23 15:41 [Tarantool-patches] [PATCH v5 0/4] Our feedback daemon sends only a few portions of usage Cyrill Gorcunov
2020-12-23 15:41 ` [Tarantool-patches] [PATCH v5 1/4] util: introduce strlcpy helper Cyrill Gorcunov
2020-12-23 15:41 ` [Tarantool-patches] [PATCH v5 2/4] backtrace: allow to specify destination buffer Cyrill Gorcunov
2020-12-23 15:41 ` Cyrill Gorcunov [this message]
2020-12-23 15:41 ` [Tarantool-patches] [PATCH v5 4/4] crash: report crash data to the feedback server Cyrill Gorcunov
2020-12-23 18:47   ` Vladislav Shpilevoy
2020-12-23 18:57     ` Cyrill Gorcunov
2020-12-23 21:22     ` Cyrill Gorcunov
2020-12-24 13:16       ` Cyrill Gorcunov
2020-12-24 17:15         ` Vladislav Shpilevoy
2020-12-24 17:33           ` Cyrill Gorcunov
2020-12-24 18:22             ` Vladislav Shpilevoy
2020-12-24 18:33               ` Cyrill Gorcunov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201223154155.234884-4-gorcunov@gmail.com \
    --to=gorcunov@gmail.com \
    --cc=tarantool-patches@dev.tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v5 3/4] crash: move fatal signal handling in' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox