[Tarantool-patches] [PATCH v3 2/4] errstat: add crash report base code

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sat Dec 5 21:33:05 MSK 2020


Thanks for the patch!

See 22 comments below.

On 04.12.2020 16:30, Cyrill Gorcunov via Tarantool-patches wrote:
> The errstat stands for error statistics. At moment it supports
> gathering crash dumps into an internal preallocated buffer
> which includes:
> 
>  - `uname` output
>  - build type
>  - a reason for the crash
>  - call backtrace (linux x86-64 only)
> 
> The data is collected into json format and then encoded into
> base64 form. Moreover the backtrace itself is preencoded as
> base64 earlier because we don't need the json values to consist
> some weird characters.
> 
> Once data is collected one can run errstat_exec_send_crash function
> which executes another copy of tarantool and passes it a script
> in form of
> 
>  > tarantool -e "require('http.client').post('127.0.0.1:1500', \
>  > '{"crashdump":{"version":"1","data":"eyJ1bmFtZSI6eyJzeXNuYW1l.."}}', \
>  > {timeout=1}); os.exit(1);"
> 
> The address of the network dump collector is configurable via traditional
> box.cfg{feedback_host=addr}.
> 
> Note that we're trying to use preallocated memory for data collecting
> since this code is supposed to be called from inside of a fatal signal
> handler. For simplicity we use snprintf, strlen, memcpy inside encoder
> but should consider to implement own helpers instead to minimize the
> use of system libraries. For exactly the same reason we use inplace
> base64 encode helper.
> 
> When data is ready we send it with help of vfork+execve, this is done
> to make sure that local coredump is still generated even after the
> report is sent.
> 
> A typical encoded data looks like
> 
>  | {
>  |   "uname": {
>  |     "sysname": "Linux",
>  |     "release": "5.9.10-100.fc32.x86_64",
>  |     "version": "#1 SMP Mon Nov 23 18:12:36 UTC 2020",
>  |     "machine": "x86_64"
>  |   },
>  |   "build": {
>  |     "major": 2,
>  |     "minor": 7,
>  |     "patch": 0,

1. Why do you need the version numbers and the 'version'
string both? All can be extracted from 'version' string.

>  |     "version": "2.7.0-78-gf898822f9",
>  |     "cmake_type": "Linux-x86_64-Debug"
>  |   },
>  |   "signal": {
>  |     "signo": 11,
>  |     "si_code": 0,
>  |     "si_addr": "0x3e800009727",
>  |     "backtrace": "IzAgIDB4NjMyODhiIGlu..",
>  |     "timestamp": "0x164ceceac0610ae9",

2. Why is timestamp in hex? And why do you need both
hex and string?

>  |     "timestamp_str": "2020-12-02 17:34:20 MSK"
>  |   }
>  | }
> 
> Part-of #5261

3. Lets better put the issue link in the end, like we do in 100%
of our patches except when a docbot request is present. Because
we usually look for issue reference in the end, and by putting it
into a random place of a commit message you complicate that
search.

> Worth to mention that the patch uses box.cfg's
> 
>  - feedback_host
>  - feedback_enabled
>  - feedback_no_crashinfo
> 
> variables internally and feedback_no_crashinfo will be introduced
> in next paches, thus after this patch the crash report is disabled.

4. paches -> patches. You need to turn on a spell checker or something.

> diff --git a/src/lib/core/errstat.c b/src/lib/core/errstat.c
> new file mode 100644
> index 000000000..992bc426a
> --- /dev/null
> +++ b/src/lib/core/errstat.c
> @@ -0,0 +1,409 @@
> +/*
> + * SPDX-License-Identifier: BSD-2-Clause
> + *
> + * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file.
> + */
> +
> +#include <string.h>
> +#include <unistd.h>
> +#include <time.h>
> +#include <sys/types.h>
> +#include <sys/utsname.h>
> +
> +#include "trivia/util.h"
> +#include "backtrace.h"
> +#include "errstat.h"
> +#include "cfg.h"
> +#include "say.h"
> +
> +#define pr_fmt(fmt)		"errstat: " fmt
> +#define pr_info(fmt, ...)	say_info(pr_fmt(fmt), ##__VA_ARGS__)
> +#define pr_err(fmt, ...)	say_error(pr_fmt(fmt), ##__VA_ARGS__)
> +#define pr_crit(fmt, ...)	fprintf(stderr, pr_fmt(fmt) "\n", ##__VA_ARGS__)
> +#define pr_panic(fmt, ...)	panic(pr_fmt(fmt), ##__VA_ARGS__)
> +
> +static struct errstat glob_errstat;
> +static bool cfg_send_crash = false;

5. Why do you have a global object for this module, and yet
this part of it is separated into another global variable?

> +
> +/*
> + * We don't need it to be optimized but rather a compact form.
> + */
> +static unsigned char *
> +base64_encode(unsigned char *dst, unsigned char *src, size_t src_len)
> +{

6. Why can't you use the existing base64_encode() function from
base64.h? It is not different from your version. You both use global
variables (static also is global, just not visible everywhere). So
your version is not 'safer' in terms of being called from a signal
handler.

> +	static int m[] = {0, 2, 1};
> +	static unsigned char t[] = {
> +		'A','B','C','D','E','F','G','H',
> +		'I','J','K','L','M','N','O','P',
> +		'Q','R','S','T','U','V','W','X',
> +		'Y','Z','a','b','c','d','e','f',
> +		'g','h','i','j','k','l','m','n',
> +		'o','p','q','r','s','t','u','v',
> +		'w','x','y','z','0','1','2','3',
> +		'4','5','6','7','8','9','+','/'
> +	};
> +	size_t i, j;
> +
> +	for (i = 0, j = 0; i < src_len;) {
> +		uint32_t a = i < src_len ? src[i++] : 0;
> +		uint32_t b = i < src_len ? src[i++] : 0;
> +		uint32_t c = i < src_len ? src[i++] : 0;
> +
> +		uint32_t d = (a << 0x10) + (b << 0x08) + c;
> +
> +		dst[j++] = t[(d >> 3 * 6) & 0x3f];
> +		dst[j++] = t[(d >> 2 * 6) & 0x3f];
> +		dst[j++] = t[(d >> 1 * 6) & 0x3f];
> +		dst[j++] = t[(d >> 0 * 6) & 0x3f];
> +	}
> +
> +	size_t dst_len = ERRSTAT_BASE64_LEN(src_len);
> +	j = m[src_len % 3];
> +	for (i = 0; i < j; i++)
> +		dst[dst_len - 1 - i] = '=';
> +
> +	return dst;
> +}
> +
> +static size_t
> +strlcpy(char *dst, const char *src, size_t size)
> +{
> +	size_t ret = strlen(src);
> +	if (size) {
> +		size_t len = (ret >= size) ? size - 1 : ret;
> +		memcpy(dst, src, len);
> +		dst[len] = '\0';
> +	}
> +	return ret;
> +}
> +
> +#define strlcpy_a(dst, src) strlcpy(dst, src, sizeof(dst))

7. What is _a?

> +
> +struct errstat *
> +errstat_get(void)

8. Why do you ever need to get it? Why the stat is not entirely
in the .c file? It heavily depends on the platform, so it does
not make much sense to expose it to any external code. It won't
be able to use it properly except for some common things.

> +{
> +	return &glob_errstat;
> +}
> +
> +static inline
> +uint64_t timespec_to_ns(struct timespec *ts)

9. Please, put return type on a separate line. On the same line
as 'static inline'.

> +{
> +	return (uint64_t)ts->tv_sec * 1000000000 + (uint64_t)ts->tv_nsec;
> +}
> +
> +static char *
> +ns_to_localtime(uint64_t timestamp, char *buf, ssize_t len)
> +{
> +	time_t sec = timestamp / 1000000000;
> +	char *start = buf;
> +	struct tm tm;
> +
> +	/*
> +	 * Use similar format as say_x logger. Except plain
> +	 * seconds should be enough.
> +	 */
> +	localtime_r(&sec, &tm);
> +	ssize_t total = strftime(start, len, "%F %T %Z", &tm);
> +	start += total;
> +	if (total < len)
> +		return buf;
> +	buf[len-1] = '\0';

10. Lets conform with our code style and use spaces before and after
binary operators.

> +	return buf;
> +}
> +
> +void
> +errstat_init(const char *tarantool_bin)
> +{
> +	struct errstat_build *binfo = &errstat_get()->build_info;
> +	struct errstat_uname *uinfo = &errstat_get()->uname_info;
> +	struct errstat_crash *cinfo = &errstat_get()->crash_info;
> +
> +	binfo->major = PACKAGE_VERSION_MAJOR;
> +	binfo->minor = PACKAGE_VERSION_MINOR;
> +	binfo->patch = PACKAGE_VERSION_PATCH;
> +
> +	strlcpy_a(binfo->version, PACKAGE_VERSION);
> +	strlcpy_a(binfo->cmake_type, BUILD_INFO);
> +
> +	static_assert(ERRSTAT_UNAME_BUF_LEN > sizeof(struct errstat_uname),
> +		      "uname_buf is too small");
> +
> +	char uname_buf[ERRSTAT_UNAME_BUF_LEN];
> +	struct utsname *uname_ptr = (void *)uname_buf;
> +	if (uname(uname_ptr) == 0) {
> +		strlcpy_a(uinfo->sysname, uname_ptr->sysname);
> +		strlcpy_a(uinfo->nodename, uname_ptr->nodename);
> +		strlcpy_a(uinfo->release, uname_ptr->release);
> +		strlcpy_a(uinfo->version, uname_ptr->version);
> +		strlcpy_a(uinfo->machine, uname_ptr->machine);
> +	} else
> +		pr_err("can't fetch uname");

11. Lets conform with the code style, and use {} either in both 'if'
and 'else', or in none.

> +
> +	strlcpy_a(cinfo->tarantool_bin, tarantool_bin);
> +	if (strlen(cinfo->tarantool_bin) < strlen(tarantool_bin))
> +		pr_panic("can't save binary path");
> +
> +	static_assert(sizeof(cinfo->exec_argv_1) == 4,
> +		      "exec_argv_1 is too small");
> +	strlcpy_a(cinfo->exec_argv_1, "-e");
> +}
> +
> +void
> +cfg_errstat_set_params(void)

12. I suggest you to rename it to errstat_cfg().

'cfg' is a prefix used by 'cfg' module, which you also
use here. This is probably why we have box_cfg, raft_cfg_*,
iproto_cfg_* and so on.

> +{
> +	struct errstat_crash *cinfo = &errstat_get()->crash_info;
> +	const char *host = cfg_gets("feedback_host");
> +	int enabled = cfg_getb("feedback_enabled");

13. For flags we use name prefix 'is_'. Please, blend in.

> +	int no_crashinfo = cfg_getb("feedback_no_crashinfo");

14. That is another proof that the module name is misleading - you
even use crashinfo in the options. Does not it look consistent
to name the module 'crashinfo' too? (Or something else with 'crash'
and without 'stat' in name.)

> +
> +	if (host == NULL || enabled == 0 || no_crashinfo == 1) {
> +		if (cfg_send_crash) {
> +			pr_info("disable sedning crashinfo feedback");
> +			cfg_send_crash = false;
> +			cinfo->feedback_host[0] = '\0';
> +		}
> +		return;
> +	}
> +
> +	if (strcmp(cinfo->feedback_host, host) != 0) {
> +		strlcpy_a(cinfo->feedback_host, host);
> +		if (strlen(cinfo->feedback_host) < strlen(host))
> +			pr_panic("feedback_host is too long");
> +	}
> +
> +	if (!cfg_send_crash) {
> +		pr_info("enable sedning crashinfo feedback");
> +		cfg_send_crash = true;
> +	}
> +}
> +
> +void
> +errstat_reset(void)

15. This function is never used.

> +{
> +	struct errstat_crash *cinfo = &errstat_get()->crash_info;
> +
> +#ifdef ENABLE_BACKTRACE
> +	cinfo->backtrace_buf[0] = '\0';
> +#endif
> +	memset(&cinfo->siginfo, 0, sizeof(cinfo->siginfo));
> +	cinfo->timestamp_rt = 0;
> +}
> +
> +#ifdef TARGET_OS_LINUX
> +static void
> +collect_gregs(struct errstat_crash *cinfo, ucontext_t *uc)
> +{
> +	static_assert(sizeof(cinfo->greg) == sizeof(uc->uc_mcontext),
> +		      "GP regs are not matching signal frame");
> +
> +	/*
> +	 * uc_mcontext on libc level looks somehow strange,
> +	 * they define an array of uint64_t where each register
> +	 * defined by REG_x macro.
> +	 *
> +	 * In turn the kernel is quite explicit about the context.
> +	 * Moreover it is a part of user ABI, thus won't be changed.
> +	 *
> +	 * Lets use memcpy here to make a copy in a fast way.
> +	 */
> +	memcpy(&cinfo->greg, &uc->uc_mcontext, sizeof(cinfo->greg));
> +}
> +#endif
> +
> +/**
> + * The routine is called inside crash signal handler so
> + * be carefull to not cause additional signals inside.
> + */
> +void
> +errstat_collect_crash(int signo, siginfo_t *siginfo, void *ucontext)
> +{
> +	struct errstat_crash *cinfo = &errstat_get()->crash_info;
> +
> +	struct timespec ts;
> +	if (clock_gettime(CLOCK_REALTIME, &ts) == 0) {
> +		cinfo->timestamp_rt = timespec_to_ns(&ts);
> +		ns_to_localtime(cinfo->timestamp_rt,
> +				cinfo->timestamp_rt_str,
> +				sizeof(cinfo->timestamp_rt_str));
> +	} else {
> +		cinfo->timestamp_rt = 0;
> +		memset(cinfo->timestamp_rt_str, 0,
> +		       sizeof(cinfo->timestamp_rt_str));
> +	}
> +
> +	cinfo->signo = signo;
> +	cinfo->siginfo = *siginfo;
> +
> +	cinfo->context_addr = ucontext;
> +	cinfo->siginfo_addr = siginfo;
> +
> +#ifdef ENABLE_BACKTRACE
> +	char *start = cinfo->backtrace_buf;
> +	char *end = start + sizeof(cinfo->backtrace_buf) - 1;
> +	backtrace(start, end);
> +#endif
> +
> +#ifdef TARGET_OS_LINUX
> +	collect_gregs(cinfo, ucontext);
> +#endif
> +}
> +
> +/**
> + * Prepare report in json format and put it into a buffer.
> + */
> +static void
> +prepare_report_script(void)
> +{
> +	struct errstat_crash *cinfo = &errstat_get()->crash_info;
> +	struct errstat_build *binfo = &errstat_get()->build_info;
> +	struct errstat_uname *uinfo = &errstat_get()->uname_info;
> +
> +	char *p, *e;
> +
> +#ifdef ENABLE_BACKTRACE
> +	/*
> +	 * We can't use arbitrary data which can be misinterpreted
> +	 * by Lua script when we pass it to a script.
> +	 *
> +	 * WARNING: We use report_encoded as a temp buffer.

16. Static buffer is much bigger, and is already there. Why
can't you use it? Instead you somewhy add several kilobytes of
another static buffer padding out the process size.

> +	 */
> +	size_t bt_len = strlen(cinfo->backtrace_buf);
> +	size_t bt_elen = ERRSTAT_BASE64_LEN(bt_len);
> +	if (bt_elen >= sizeof(cinfo->report_encoded))
> +		pr_panic("backtrace space is too small");
> +
> +	base64_encode((unsigned char *)cinfo->report_encoded,
> +		      (unsigned char *)cinfo->backtrace_buf, bt_len);
> +	cinfo->report_encoded[bt_elen] = '\0';
> +	memcpy(cinfo->backtrace_buf, cinfo->report_encoded, bt_elen + 1);
> +#endif
> +
> +#define snprintf_safe(fmt, ...)					\
> +	do {							\
> +		p += snprintf(p, e - p, fmt, ##__VA_ARGS__);	\
> +		if (p >= e)					\
> +			goto out;				\
> +	} while (0)
> +
> +	e = cinfo->report + sizeof(cinfo->report) - 1;
> +	p = cinfo->report;
> +
> +	snprintf_safe("{");
> +	snprintf_safe("\"uname\":{");
> +	snprintf_safe("\"sysname\":\"%s\",", uinfo->sysname);
> +#if 0> +	/*
> +	 * nodename might a sensitive information so don't
> +	 * send it by default.
> +	 */
> +	snprintf_safe("\"nodename\":\"%s\",", uinfo->nodename);
> +#endif
> +	snprintf_safe("\"release\":\"%s\",", uinfo->release);
> +	snprintf_safe("\"version\":\"%s\",", uinfo->version);
> +	snprintf_safe("\"machine\":\"%s\"", uinfo->machine);
> +	snprintf_safe("},");
> +
> +	snprintf_safe("\"build\":{");
> +	snprintf_safe("\"major\":%d,", binfo->major);
> +	snprintf_safe("\"minor\":%d,", binfo->minor);
> +	snprintf_safe("\"patch\":%d,", binfo->patch);
> +	snprintf_safe("\"version\":\"%s\",", binfo->version);
> +	snprintf_safe("\"cmake_type\":\"%s\"", binfo->cmake_type);
> +	snprintf_safe("},");
> +
> +	snprintf_safe("\"signal\":{");
> +	snprintf_safe("\"signo\":%d,", cinfo->signo);
> +	snprintf_safe("\"si_code\":%d,", cinfo->siginfo.si_code);
> +	if (cinfo->signo == SIGSEGV) {
> +		if (cinfo->siginfo.si_code == SEGV_MAPERR) {
> +			snprintf_safe("\"si_code_str\":\"%s\",",
> +				      "SEGV_MAPERR");
> +		} else if (cinfo->siginfo.si_code == SEGV_ACCERR) {
> +			snprintf_safe("\"si_code_str\":\"%s\",",
> +				      "SEGV_ACCERR");
> +		}
> +		snprintf_safe("\"si_addr\":\"0x%llx\",",
> +			      (long long)cinfo->siginfo.si_addr);
> +	}
> +#ifdef ENABLE_BACKTRACE
> +	snprintf_safe("\"backtrace\":\"%s\",", cinfo->backtrace_buf);
> +#endif
> +	snprintf_safe("\"timestamp\":\"0x%llx\",",
> +		      (long long)cinfo->timestamp_rt);
> +	snprintf_safe("\"timestamp_str\":\"%s\"",
> +		      cinfo->timestamp_rt_str);
> +	snprintf_safe("}");
> +	snprintf_safe("}\'");
> +
> +	size_t report_len = strlen(cinfo->report);
> +	size_t report_elen = ERRSTAT_BASE64_LEN(report_len);
> +	if (report_elen >= sizeof(cinfo->report_encoded))
> +		pr_panic("report encoded space is too small");
> +
> +	base64_encode((unsigned char *)cinfo->report_encoded,
> +		      (unsigned char *)cinfo->report,
> +		      report_len);
> +	cinfo->report_encoded[report_elen] = '\0';
> +
> +	e = cinfo->report_script + sizeof(cinfo->report_script) - 1;
> +	p = cinfo->report_script;
> +
> +	snprintf_safe("require(\'http.client\').post(\'%s\',"
> +		      "'{\"crashdump\":{\"version\":\"%d\","
> +		      "\"data\":\"%s\"}}',{timeout=1}); os.exit(1);",

17. Why so small timeout?

> +		      cinfo->feedback_host,
> +		      ERRSTAT_REPORT_VERSION,
> +		      cinfo->report_encoded);
> +
> +#undef snprintf_safe
> +	return;
> +
> +out:
> +	pr_crit("unable to prepare a crash report");
> +	struct sigaction sa = {
> +		.sa_handler = SIG_DFL,
> +	};
> +	sigemptyset(&sa.sa_mask);
> +	sigaction(SIGABRT, &sa, NULL);

18. Why do you touch the signal handler here? You didn't
change it anywhere in this file.

> +
> +	abort();
> +}
> +
> +void
> +errstat_exec_send_crash(void)

19. Why is it called 'exec'?

> +{
> +	if (!cfg_send_crash)
> +		return;
> +
> +	prepare_report_script();
> +
> +	struct errstat_crash *cinfo = &errstat_get()->crash_info;
> +	cinfo->exec_argv[0] = cinfo->tarantool_bin;
> +	cinfo->exec_argv[1] = cinfo->exec_argv_1;
> +	cinfo->exec_argv[2] = cinfo->report_script;
> +	cinfo->exec_argv[3] = NULL;
> +
> +	/*
> +	 * Can't use fork here because libev has own
> +	 * at_fork helpers with mutex where we might
> +	 * stuck (see popen code).
> +	 */
> +	pid_t pid = vfork();
> +	if (pid < 0) {
> +		pr_crit("unable to fork (errno %d)", errno);
> +	} else if (pid == 0) {
> +		/*
> +		 * The script must exit at the end but there
> +		 * is no simple way to make sure from inside
> +		 * of a signal crash handler. So just hope it
> +		 * is running fine.
> +		 */
> +		execve(cinfo->tarantool_bin, cinfo->exec_argv, NULL);
> +		pr_crit("exec(%s,[%s,%s,%s,NULL]) failed",
> +			cinfo->tarantool_bin,
> +			cinfo->exec_argv[0],
> +			cinfo->exec_argv[1],
> +			cinfo->exec_argv[2]);
> +		_exit(1);
> +	}
> +}
> diff --git a/src/lib/core/errstat.h b/src/lib/core/errstat.h
> new file mode 100644
> index 000000000..77db53052
> --- /dev/null
> +++ b/src/lib/core/errstat.h
> @@ -0,0 +1,253 @@
> +/*
> + * SPDX-License-Identifier: BSD-2-Clause
> + *
> + * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file.
> + */
> +#pragma once
> +
> +#include <stdint.h>
> +#include <signal.h>
> +#include <limits.h>
> +
> +#include "trivia/config.h"
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif /* defined(__cplusplus) */
> +
> +#define ERRSTAT_REPORT_VERSION 1

20. It is not used out of .c file. I suggest you to
move it there.

> +
> +/**
> + * Build type information for statistics.
> + */
> +struct errstat_build {
> +	/**
> +	 * Package major version - 1 for 1.6.7.
> +	 */
> +	int major;
> +	/**
> +	 * Package minor version - 6 for 1.6.7
> +	 */
> +	int minor;
> +	/**
> +	 * Package patch version - 7 for 1.6.7
> +	 */
> +	int patch;
> +	/**
> +	 * A string with major-minor-patch-commit-id identifier of the
> +	 * release, e.g. 2.7.0-62-g0b7726571.
> +	 */
> +	char version[64];
> +	/**
> +	 * Build type (Debug and etc).
> +	 */
> +	char cmake_type[64];
> +};
> +
> +#ifdef TARGET_OS_LINUX
> +#ifndef __x86_64__
> +# error "Non x86-64 architectures are not supported"
> +#endif
> +struct errstat_greg {
> +	uint64_t	r8;
> +	uint64_t	r9;
> +	uint64_t	r10;
> +	uint64_t	r11;
> +	uint64_t	r12;
> +	uint64_t	r13;
> +	uint64_t	r14;
> +	uint64_t	r15;
> +	uint64_t	di;
> +	uint64_t	si;
> +	uint64_t	bp;
> +	uint64_t	bx;
> +	uint64_t	dx;
> +	uint64_t	ax;
> +	uint64_t	cx;
> +	uint64_t	sp;
> +	uint64_t	ip;
> +	uint64_t	flags;
> +	uint16_t	cs;
> +	uint16_t	gs;
> +	uint16_t	fs;
> +	uint16_t	ss;
> +	uint64_t	err;
> +	uint64_t	trapno;
> +	uint64_t	oldmask;
> +	uint64_t	cr2;
> +	uint64_t	fpstate;
> +	uint64_t	reserved1[8];
> +};
> +#endif /* TARGET_OS_LINUX */
> +
> +#define ERRSTAT_BASE64_LEN(len)			(4 * (((len) + 2) / 3))
> +
> +/*
> + * 4K of memory should be enough to keep the backtrace.
> + * In worst case it gonna be simply trimmed. Since we're
> + * reporting it encoded the pain text shrinks to 3070 bytes.
> + */
> +#define ERRSTAT_BACKTRACE_MAX			(4096)
> +
> +/*
> + * The report should include the bactrace
> + * and all additional information we're
> + * going to send.
> + */
> +#define ERRSTAT_REPORT_PAYLOAD_MAX		(2048)
> +#ifdef ENABLE_BACKTRACE
> +# define ERRSTAT_REPORT_MAX			\
> +	(ERRSTAT_BACKTRACE_MAX +		\
> +	 ERRSTAT_REPORT_PAYLOAD_MAX)
> +# else
> +# define ERRSTAT_REPORT_MAX			\
> +	(ERRSTAT_REPORT_PAYLOAD_MAX)
> +#endif
> +
> +/*
> + * We encode report into base64 because
> + * it is passed inside Lua script.
> + */
> +#define ERRSTAT_REPORT_ENCODED_MAX		\
> +	ERRSTAT_BASE64_LEN(ERRSTAT_REPORT_MAX)
> +
> +
> +/*
> + * The script to execute should contain encoded
> + * report.
> + */
> +#define ERRSTAT_REPORT_SCRIPT_PAYLOAD_MAX	(512)
> +#define ERRSTAT_REPORT_SCRIPT_MAX		\
> +	(ERRSTAT_REPORT_SCRIPT_PAYLOAD_MAX +	\
> +	 ERRSTAT_REPORT_ENCODED_MAX)
> +
> +struct errstat_crash {
> +	/**
> +	 * Exec arguments pointers.
> +	 */
> +	char *exec_argv[4];
> +	/**
> +	 * Predefined argument "-e".
> +	 */
> +	char exec_argv_1[4];
> +	/**
> +	 * Crash report in plain json format.
> +	 */
> +	char report[ERRSTAT_REPORT_MAX];
> +	/**
> +	 * Crash report in base64 form.
> +	 */
> +	char report_encoded[ERRSTAT_REPORT_ENCODED_MAX];
> +	/**
> +	 * Tarantool executable to send report stript.
> +	 */
> +	char tarantool_bin[PATH_MAX];
> +	/**
> +	 * The script to evaluate by tarantool
> +	 * to send the report.
> +	 */
> +	char report_script[ERRSTAT_REPORT_SCRIPT_MAX];
> +#ifdef ENABLE_BACKTRACE
> +	/**
> +	 * Backtrace buffer.
> +	 */
> +	char backtrace_buf[ERRSTAT_BACKTRACE_MAX];
> +#endif
> +	/**
> +	 * Crash signal.
> +	 */
> +	int signo;
> +	/**
> +	 * Signal information.
> +	 */
> +	siginfo_t siginfo;
> +	/**
> +	 * These two are mostly useless as being
> +	 * plain addresses but keep for backward
> +	 * compatibility.
> +	 */
> +	void *context_addr;
> +	void *siginfo_addr;
> +#ifdef TARGET_OS_LINUX
> +	/**
> +	 * Registers contents.
> +	 */
> +	struct errstat_greg greg;
> +#endif
> +	/**
> +	 * Timestamp in nanoseconds (realtime).
> +	 */
> +	uint64_t timestamp_rt;
> +	/**
> +	 * Timestamp string representation to
> +	 * use on demand.
> +	 */
> +	char timestamp_rt_str[32];
> +	/**
> +	 * Crash collector host.
> +	 */
> +	char feedback_host[1024];
> +};
> +
> +#define ERRSTAT_UNAME_BUF_LEN	1024
> +#define ERRSTAT_UNAME_FIELD_LEN	128
> +/**
> + * Information about node.
> + *
> + * On linux there is new_utsname structure which
> + * encodes each field to __NEW_UTS_LEN + 1 => 64 + 1 = 65.
> + * So lets just reserve more data in advance.
> + */
> +struct errstat_uname {
> +	char sysname[ERRSTAT_UNAME_FIELD_LEN];
> +	char nodename[ERRSTAT_UNAME_FIELD_LEN];
> +	char release[ERRSTAT_UNAME_FIELD_LEN];
> +	char version[ERRSTAT_UNAME_FIELD_LEN];
> +	char machine[ERRSTAT_UNAME_FIELD_LEN];
> +};
> +
> +struct errstat {
> +	struct errstat_build build_info;
> +	struct errstat_uname uname_info;
> +	struct errstat_crash crash_info;
> +};
> +
> +/**
> + * Return a pointer to the info keeper.
> + */
> +extern struct errstat *

21. Please, omit 'extern'. They don't make any difference
here, and stand out of all the other libs we have.

> +errstat_get(void);
> +
> +/**
> + * Initialize error statistics.
> + */
> +extern void
> +errstat_init(const char *tarantool_bin);
> +
> +/**
> + * Configure errstat from feedback daemon.
> + */
> +extern void
> +cfg_errstat_set_params(void);
> +
> +/**
> + * Reset everything except build information.
> + */
> +extern void
> +errstat_reset(void);
> +
> +/**
> + * Collect a crash.
> + */
> +extern void
> +errstat_collect_crash(int signo, siginfo_t *siginfo, void *context);
> +
> +/**
> + * Send a crash report.
> + */
> +extern void
> +errstat_exec_send_crash(void);

22. Why do you have collect and send separated? Why can't it just be
something like 'crashinfo_handle_crash(sig stuff...);'

> +
> +#if defined(__cplusplus)
> +}
> +#endif /* defined(__cplusplus) */
> 


More information about the Tarantool-patches mailing list