From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-lf1-f49.google.com (mail-lf1-f49.google.com [209.85.167.49]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 71C8B4765E5 for ; Wed, 23 Dec 2020 18:42:35 +0300 (MSK) Received: by mail-lf1-f49.google.com with SMTP id l11so41112402lfg.0 for ; Wed, 23 Dec 2020 07:42:35 -0800 (PST) From: Cyrill Gorcunov Date: Wed, 23 Dec 2020 18:41:54 +0300 Message-Id: <20201223154155.234884-4-gorcunov@gmail.com> In-Reply-To: <20201223154155.234884-1-gorcunov@gmail.com> References: <20201223154155.234884-1-gorcunov@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH v5 3/4] crash: move fatal signal handling in List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tml Cc: Vladislav Shpilevoy When SIGSEGV or SIGFPE reaches the tarantool we try to gather all information related to the crash and print it out to the console (well, stderr actually). Still there is a request to not just show this info locally but send it out to the feedback server. Thus to keep gathering crash related information in one module, we move fatal signal handling into the separate crash.c file. This allows us to collect the data we need in one place and reuse it when we need to send reports to stderr (and to the feedback server, which will be implemented in next patch). Part-of #5261 Signed-off-by: Cyrill Gorcunov --- src/lib/core/CMakeLists.txt | 1 + src/lib/core/crash.c | 295 ++++++++++++++++++++++++++++++++++++ src/lib/core/crash.h | 26 ++++ src/main.cc | 138 +---------------- 4 files changed, 327 insertions(+), 133 deletions(-) create mode 100644 src/lib/core/crash.c create mode 100644 src/lib/core/crash.h diff --git a/src/lib/core/CMakeLists.txt b/src/lib/core/CMakeLists.txt index 7c62fc5ce..30cf0dd15 100644 --- a/src/lib/core/CMakeLists.txt +++ b/src/lib/core/CMakeLists.txt @@ -1,5 +1,6 @@ set(core_sources diag.c + crash.c say.c memory.c clock.c diff --git a/src/lib/core/crash.c b/src/lib/core/crash.c new file mode 100644 index 000000000..3929463f3 --- /dev/null +++ b/src/lib/core/crash.c @@ -0,0 +1,295 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file. + */ + +#include +#include +#include +#include + +#include "trivia/util.h" + +#include "backtrace.h" +#include "crash.h" +#include "say.h" + +#define pr_fmt(fmt) "crash: " fmt +#define pr_syserr(fmt, ...) say_syserror(pr_fmt(fmt), ##__VA_ARGS__) +#define pr_panic(fmt, ...) panic(pr_fmt(fmt), ##__VA_ARGS__) + +#ifdef TARGET_OS_LINUX +#ifndef __x86_64__ +# error "Non x86-64 architectures are not supported" +#endif +struct crash_greg { + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t di; + uint64_t si; + uint64_t bp; + uint64_t bx; + uint64_t dx; + uint64_t ax; + uint64_t cx; + uint64_t sp; + uint64_t ip; + uint64_t flags; + uint16_t cs; + uint16_t gs; + uint16_t fs; + uint16_t ss; + uint64_t err; + uint64_t trapno; + uint64_t oldmask; + uint64_t cr2; + uint64_t fpstate; + uint64_t reserved1[8]; +}; +#endif /* TARGET_OS_LINUX */ + +static struct crash_info { + /** + * These two are mostly useless as being + * plain addresses and without real binary + * crash dump file we can't use them for + * anything suitable (in terms of analysis sake) + * but keep for backward compatibility. + */ + void *context_addr; + void *siginfo_addr; +#ifdef TARGET_OS_LINUX + /** + * Registers contents. + */ + struct crash_greg greg; +#endif + /** + * Faulting address. + */ + void *siaddr; + /** + * Crash signal number. + */ + int signo; + /** + * Crash signal code. + */ + int sicode; +#ifdef ENABLE_BACKTRACE + /** + * 1K of memory should be enough to keep the backtrace. + * In worst case it gonna be simply trimmed. + */ + char backtrace_buf[1024]; +#endif +} crash_info; + +/** + * The routine is called inside crash signal handler so + * be careful to not cause additional signals inside. + */ +static struct crash_info * +crash_collect(int signo, siginfo_t *siginfo, void *ucontext) +{ + struct crash_info *cinfo = &crash_info; + + cinfo->signo = signo; + cinfo->sicode = siginfo->si_code; + cinfo->siaddr = siginfo->si_addr; + cinfo->context_addr = ucontext; + cinfo->siginfo_addr = siginfo; + +#ifdef ENABLE_BACKTRACE + char *start = cinfo->backtrace_buf; + backtrace(start, sizeof(cinfo->backtrace_buf)); +#endif + +#ifdef TARGET_OS_LINUX + /* + * uc_mcontext on libc level looks somehow strange, + * they define an array of uint64_t where each register + * defined by REG_x macro. + * + * In turn the kernel is quite explicit about the context. + * Moreover it is a part of user ABI, thus won't be changed. + * + * Lets use memcpy here to make a copy in a fast way. + */ + ucontext_t *uc = ucontext; + memcpy(&cinfo->greg, &uc->uc_mcontext, sizeof(cinfo->greg)); +#endif + + return cinfo; +} + +/** + * Report crash information to the stderr + * (usually a current console). + */ +static void +crash_report_stderr(struct crash_info *cinfo) +{ + if (cinfo->signo == SIGSEGV) { + fprintf(stderr, "Segmentation fault\n"); + const char *signal_code_repr = NULL; + + switch (cinfo->sicode) { + case SEGV_MAPERR: + signal_code_repr = "SEGV_MAPERR"; + break; + case SEGV_ACCERR: + signal_code_repr = "SEGV_ACCERR"; + break; + } + + if (signal_code_repr != NULL) + fprintf(stderr, " code: %s\n", signal_code_repr); + else + fprintf(stderr, " code: %d\n", cinfo->sicode); + /* + * fprintf is used instead of fdprintf, because + * fdprintf does not understand %p + */ + fprintf(stderr, " addr: %p\n", cinfo->siaddr); + } else { + fprintf(stderr, "Got a fatal signal %d\n", cinfo->signo); + } + + fprintf(stderr, " context: %p\n", cinfo->context_addr); + fprintf(stderr, " siginfo: %p\n", cinfo->siginfo_addr); + +#ifdef TARGET_OS_LINUX +# define fprintf_reg(__n, __v) \ + fprintf(stderr, " %-9s0x%-17llx%lld\n", \ + __n, (long long)__v, (long long)__v) + fprintf_reg("rax", cinfo->greg.ax); + fprintf_reg("rbx", cinfo->greg.bx); + fprintf_reg("rcx", cinfo->greg.cx); + fprintf_reg("rdx", cinfo->greg.dx); + fprintf_reg("rsi", cinfo->greg.si); + fprintf_reg("rdi", cinfo->greg.di); + fprintf_reg("rsp", cinfo->greg.sp); + fprintf_reg("rbp", cinfo->greg.bp); + fprintf_reg("r8", cinfo->greg.r8); + fprintf_reg("r9", cinfo->greg.r9); + fprintf_reg("r10", cinfo->greg.r10); + fprintf_reg("r11", cinfo->greg.r11); + fprintf_reg("r12", cinfo->greg.r12); + fprintf_reg("r13", cinfo->greg.r13); + fprintf_reg("r14", cinfo->greg.r14); + fprintf_reg("r15", cinfo->greg.r15); + fprintf_reg("rip", cinfo->greg.ip); + fprintf_reg("eflags", cinfo->greg.flags); + fprintf_reg("cs", cinfo->greg.cs); + fprintf_reg("gs", cinfo->greg.gs); + fprintf_reg("fs", cinfo->greg.fs); + fprintf_reg("cr2", cinfo->greg.cr2); + fprintf_reg("err", cinfo->greg.err); + fprintf_reg("oldmask", cinfo->greg.oldmask); + fprintf_reg("trapno", cinfo->greg.trapno); +# undef fprintf_reg +#endif /* TARGET_OS_LINUX */ + + fprintf(stderr, "Current time: %u\n", (unsigned)time(0)); + fprintf(stderr, "Please file a bug at " + "http://github.com/tarantool/tarantool/issues\n"); + +#ifdef ENABLE_BACKTRACE + fprintf(stderr, "Attempting backtrace... Note: since the server has " + "already crashed, \nthis may fail as well\n"); + fprintf(stderr, "%s", cinfo->backtrace_buf); +#endif +} + +/** + * Handle fatal (crashing) signal. + * + * Try to log as much as possible before dumping a core. + * + * Core files are not always allowed and it takes an effort to + * extract useful information from them. + * + * *Recursive invocation* + * + * Unless SIGSEGV is sent by kill(), Linux resets the signal + * a default value before invoking the handler. + * + * Despite that, as an extra precaution to avoid infinite + * recursion, we count invocations of the handler, and + * quietly _exit() when called for a second time. + */ +static void +crash_signal_cb(int signo, siginfo_t *siginfo, void *context) +{ + static volatile sig_atomic_t in_cb = 0; + struct crash_info *cinfo; + + if (in_cb == 0) { + in_cb = 1; + cinfo = crash_collect(signo, siginfo, context); + crash_report_stderr(cinfo); + } else { + /* Got a signal while running the handler. */ + fprintf(stderr, "Fatal %d while backtracing", signo); + } + + /* Try to dump a core */ + struct sigaction sa = { + .sa_handler = SIG_DFL, + }; + sigemptyset(&sa.sa_mask); + sigaction(SIGABRT, &sa, NULL); + abort(); +} + +/** + * Fatal signals we generate crash on. + */ +static const int crash_signals[] = { SIGSEGV, SIGFPE }; + +void +crash_signal_reset(void) +{ + struct sigaction sa = { + .sa_handler = SIG_DFL, + }; + sigemptyset(&sa.sa_mask); + + for (size_t i = 0; i < lengthof(crash_signals); i++) { + if (sigaction(crash_signals[i], &sa, NULL) == 0) + continue; + pr_syserr("reset sigaction %d", crash_signals[i]); + } +} + +void +crash_signal_init(void) +{ + /* + * SA_RESETHAND resets handler action to the default + * one when entering handler. + * + * SA_NODEFER allows receiving the same signal + * during handler. + */ + struct sigaction sa = { + .sa_flags = SA_RESETHAND | SA_NODEFER | SA_SIGINFO, + .sa_sigaction = crash_signal_cb, + }; + sigemptyset(&sa.sa_mask); + + for (size_t i = 0; i < lengthof(crash_signals); i++) { + if (sigaction(crash_signals[i], &sa, NULL) == 0) + continue; + pr_panic("sigaction %d (%s)", crash_signals[i], + strerror(errno)); + } +} diff --git a/src/lib/core/crash.h b/src/lib/core/crash.h new file mode 100644 index 000000000..cd1db585e --- /dev/null +++ b/src/lib/core/crash.h @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file. + */ +#pragma once + +#if defined(__cplusplus) +extern "C" { +#endif /* defined(__cplusplus) */ + +/** + * Initialize crash signal handlers. + */ +void +crash_signal_init(void); + +/** + * Reset crash signal handlers. + */ +void +crash_signal_reset(void); + +#if defined(__cplusplus) +} +#endif /* defined(__cplusplus) */ diff --git a/src/main.cc b/src/main.cc index 2f48f474c..391e0f878 100644 --- a/src/main.cc +++ b/src/main.cc @@ -79,6 +79,7 @@ #include "systemd.h" #include "crypto/crypto.h" #include "core/popen.h" +#include "core/crash.h" static pid_t master_pid = getpid(); static struct pidfh *pid_file_handle; @@ -184,124 +185,6 @@ signal_sigwinch_cb(ev_loop *loop, struct ev_signal *w, int revents) rl_resize_terminal(); } -#if defined(__linux__) && defined(__amd64) - -inline void -dump_x86_64_register(const char *reg_name, unsigned long long val) -{ - fprintf(stderr, " %-9s0x%-17llx%lld\n", reg_name, val, val); -} - -void -dump_x86_64_registers(ucontext_t *uc) -{ - dump_x86_64_register("rax", uc->uc_mcontext.gregs[REG_RAX]); - dump_x86_64_register("rbx", uc->uc_mcontext.gregs[REG_RBX]); - dump_x86_64_register("rcx", uc->uc_mcontext.gregs[REG_RCX]); - dump_x86_64_register("rdx", uc->uc_mcontext.gregs[REG_RDX]); - dump_x86_64_register("rsi", uc->uc_mcontext.gregs[REG_RSI]); - dump_x86_64_register("rdi", uc->uc_mcontext.gregs[REG_RDI]); - dump_x86_64_register("rsp", uc->uc_mcontext.gregs[REG_RSP]); - dump_x86_64_register("rbp", uc->uc_mcontext.gregs[REG_RBP]); - dump_x86_64_register("r8", uc->uc_mcontext.gregs[REG_R8]); - dump_x86_64_register("r9", uc->uc_mcontext.gregs[REG_R9]); - dump_x86_64_register("r10", uc->uc_mcontext.gregs[REG_R10]); - dump_x86_64_register("r11", uc->uc_mcontext.gregs[REG_R11]); - dump_x86_64_register("r12", uc->uc_mcontext.gregs[REG_R12]); - dump_x86_64_register("r13", uc->uc_mcontext.gregs[REG_R13]); - dump_x86_64_register("r14", uc->uc_mcontext.gregs[REG_R14]); - dump_x86_64_register("r15", uc->uc_mcontext.gregs[REG_R15]); - dump_x86_64_register("rip", uc->uc_mcontext.gregs[REG_RIP]); - dump_x86_64_register("eflags", uc->uc_mcontext.gregs[REG_EFL]); - dump_x86_64_register("cs", (uc->uc_mcontext.gregs[REG_CSGSFS] >> 0) & 0xffff); - dump_x86_64_register("gs", (uc->uc_mcontext.gregs[REG_CSGSFS] >> 16) & 0xffff); - dump_x86_64_register("fs", (uc->uc_mcontext.gregs[REG_CSGSFS] >> 32) & 0xffff); - dump_x86_64_register("cr2", uc->uc_mcontext.gregs[REG_CR2]); - dump_x86_64_register("err", uc->uc_mcontext.gregs[REG_ERR]); - dump_x86_64_register("oldmask", uc->uc_mcontext.gregs[REG_OLDMASK]); - dump_x86_64_register("trapno", uc->uc_mcontext.gregs[REG_TRAPNO]); -} - -#endif /* defined(__linux__) && defined(__amd64) */ - -/** Try to log as much as possible before dumping a core. - * - * Core files are not aways allowed and it takes an effort to - * extract useful information from them. - * - * *Recursive invocation* - * - * Unless SIGSEGV is sent by kill(), Linux - * resets the signal a default value before invoking - * the handler. - * - * Despite that, as an extra precaution to avoid infinite - * recursion, we count invocations of the handler, and - * quietly _exit() when called for a second time. - */ -static void -sig_fatal_cb(int signo, siginfo_t *siginfo, void *context) -{ - static volatile sig_atomic_t in_cb = 0; - int fd = STDERR_FILENO; - struct sigaction sa; - - /* Got a signal while running the handler. */ - if (in_cb) { - fdprintf(fd, "Fatal %d while backtracing", signo); - goto end; - } - - in_cb = 1; - - if (signo == SIGSEGV) { - fdprintf(fd, "Segmentation fault\n"); - const char *signal_code_repr = 0; - switch (siginfo->si_code) { - case SEGV_MAPERR: - signal_code_repr = "SEGV_MAPERR"; - break; - case SEGV_ACCERR: - signal_code_repr = "SEGV_ACCERR"; - break; - } - if (signal_code_repr) - fdprintf(fd, " code: %s\n", signal_code_repr); - else - fdprintf(fd, " code: %d\n", siginfo->si_code); - /* - * fprintf is used insted of fdprintf, because - * fdprintf does not understand %p - */ - fprintf(stderr, " addr: %p\n", siginfo->si_addr); - } else - fdprintf(fd, "Got a fatal signal %d\n", signo); - fprintf(stderr, " context: %p\n", context); - fprintf(stderr, " siginfo: %p\n", siginfo); - -#if defined(__linux__) && defined(__amd64) - dump_x86_64_registers((ucontext_t *)context); -#endif - - fdprintf(fd, "Current time: %u\n", (unsigned) time(0)); - fdprintf(fd, - "Please file a bug at http://github.com/tarantool/tarantool/issues\n"); - -#ifdef ENABLE_BACKTRACE - fdprintf(fd, "Attempting backtrace... Note: since the server has " - "already crashed, \nthis may fail as well\n"); - print_backtrace(); -#endif -end: - /* Try to dump core. */ - memset(&sa, 0, sizeof(sa)); - sigemptyset(&sa.sa_mask); - sa.sa_handler = SIG_DFL; - sigaction(SIGABRT, &sa, NULL); - - abort(); -} - static void signal_free(void) { @@ -328,11 +211,11 @@ signal_reset(void) sigaction(SIGINT, &sa, NULL) == -1 || sigaction(SIGTERM, &sa, NULL) == -1 || sigaction(SIGHUP, &sa, NULL) == -1 || - sigaction(SIGWINCH, &sa, NULL) == -1 || - sigaction(SIGSEGV, &sa, NULL) == -1 || - sigaction(SIGFPE, &sa, NULL) == -1) + sigaction(SIGWINCH, &sa, NULL) == -1) say_syserror("sigaction"); + crash_signal_reset(); + /* Unblock any signals blocked by libev. */ sigset_t sigset; sigfillset(&sigset); @@ -362,18 +245,7 @@ signal_init(void) if (sigaction(SIGPIPE, &sa, 0) == -1) panic_syserror("sigaction"); - /* - * SA_RESETHAND resets handler action to the default - * one when entering handler. - * SA_NODEFER allows receiving the same signal during handler. - */ - sa.sa_flags = SA_RESETHAND | SA_NODEFER | SA_SIGINFO; - sa.sa_sigaction = sig_fatal_cb; - - if (sigaction(SIGSEGV, &sa, 0) == -1 || - sigaction(SIGFPE, &sa, 0) == -1) { - panic_syserror("sigaction"); - } + crash_signal_init(); ev_signal_init(&ev_sigs[0], sig_checkpoint, SIGUSR1); ev_signal_init(&ev_sigs[1], signal_cb, SIGINT); -- 2.26.2