[Tarantool-patches] [PATCH v4 4/4] crash: report crash data to the feedback server
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Sun Dec 20 18:16:46 MSK 2020
I see you didn't push it on the branch. Please, do. Otherwise I can't
validate if it works.
See 6 comments below.
> diff --git a/src/box/box.cc b/src/box/box.cc
> index a8bc3471d..27079fd46 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -1213,6 +1214,21 @@ box_set_prepared_stmt_cache_size(void)
> return 0;
> }
>
> +void
> +box_set_crash_params(void)
> +{
> + const char *host = cfg_gets("feedback_host");
> + bool is_enabled_1 = cfg_getb("feedback_enabled");
> + bool is_enabled_2 = cfg_getb("feedback_crashinfo");
> +
> + if (host != NULL && strlen(host) >= CRASH_FEEDBACK_HOST_MAX) {
> + tnt_raise(ClientError, ER_CFG, "feedback_host",
> + "the address is too long");
1. Please, don't use exceptions in the new code. We are getting rid
of them, and such changes complicate it in future.
> + }
> +
> + crash_cfg_set_params(host, is_enabled_1 && is_enabled_2);
2. Just 'crash_cfg', please. We have lots of 'cfg' functions for
various modules, and none of them has 'set_params' suffix. Probably
because this is what 'cfg' means anyway.
> +}
> diff --git a/src/lib/core/crash.c b/src/lib/core/crash.c
> index a15a13e8e..f6dd91987 100644
> --- a/src/lib/core/crash.c
> +++ b/src/lib/core/crash.c
> @@ -81,13 +96,75 @@ static struct crash_info {
> int sicode;
> #ifdef ENABLE_BACKTRACE
> /*
> - * 4K of memory should be enough to keep the backtrace.
> + * 1K of memory should be enough to keep the backtrace.
> * In worst case it gonna be simply trimmed.
> */
> - char backtrace_buf[4096];
> + char backtrace_buf[1024];
3. I am begging you. Please. Stop mixing independent changes with
each other. Please. How can I ask otherwise? What can I do to
explain this? How can I help to understand?
> #endif> @@ -126,6 +209,196 @@ crash_collect(int signo, siginfo_t *siginfo, void *ucontext)
> return cinfo;
> }
>
> +/**
> + * Mark an environment that we're in crashinfo handling, this
> + * allows us to escape recursive attempts to send report,
> + * if the action of sending report is failing itself.
> + */
> +static int
> +crash_mark_env_mode(void)
> +{
> + const char *env_name = "TT_CRASHINFO_MODE";
> + if (getenv(env_name) != NULL) {
> + pr_crit("recursive failure detected");
> + return -1;
> + }
> +
> + if (setenv(env_name, "y", 0) != 0) {
> + pr_crit("unable to setup %s", env_name);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +/**
> + * Report crash information to the feedback daemon
> + * (ie send it to feedback daemon).
> + */
> +static void
> +crash_report_feedback_daemon(struct crash_info *cinfo)
> +{
> + if (crash_mark_env_mode() != 0)
> + return;
> +
> + /*
> + * Update to a new number if format get changed.
> + */
> + const int crashinfo_version = 1;
> +
> + char *p = static_alloc(SMALL_STATIC_SIZE);
> + char *e = &p[SMALL_STATIC_SIZE - 1];
> + char *head = p;
> + char *tail = &p[SMALL_STATIC_SIZE - 8];
> +
> + /*
> + * Note that while we encode the report we
> + * intensively use a tail of the allocated
> + * buffer as a temporary store.
> + */
> +
> +#define snprintf_safe(__end, __fmt, ...) \
> + do { \
> + size_t size = (char *)(void *)__end - p; \
4. Please, remove the double cast. You can use (char *) as is. Here and
in other places.
> + p += snprintf(p, size, __fmt, ##__VA_ARGS__); \
> + if (p >= (char *)(void *)__end) \
> + goto out; \
> + } while (0)
> +
> + /*
> + * Lets reuse tail of the buffer as a temp space.
> + */
> + struct utsname *uname_ptr = (void *)&tail[-sizeof(struct utsname)];
> + if (p >= (char *)(void *)uname_ptr)
> + goto out;
> +
> + if (uname(uname_ptr) != 0) {
> + pr_syserr("uname call failed, ignore");
> + memset(uname_ptr, 0, sizeof(struct utsname));
> + }
> +
> + snprintf_safe(uname_ptr, "{");
> + snprintf_safe(uname_ptr, "\"uname\":{");
> + snprintf_safe(uname_ptr, "\"sysname\":\"%s\",", uname_ptr->sysname);
> + /*
> + * nodename might a sensitive information, skip.
5. 'might' what? You missed a verb.
> + */
> + snprintf_safe(uname_ptr, "\"release\":\"%s\",", uname_ptr->release);
> + snprintf_safe(uname_ptr, "\"version\":\"%s\",", uname_ptr->version);
> + snprintf_safe(uname_ptr, "\"machine\":\"%s\"", uname_ptr->machine);
> + snprintf_safe(uname_ptr, "},");
> +
> + snprintf_safe(e, "\"build\":{");
> + snprintf_safe(e, "\"version\":\"%s\",", PACKAGE_VERSION);
> + snprintf_safe(e, "\"cmake_type\":\"%s\"", BUILD_INFO);
> + snprintf_safe(e, "},");
> +
> + snprintf_safe(e, "\"signal\":{");
> + snprintf_safe(e, "\"signo\":%d,", cinfo->signo);
> + snprintf_safe(e, "\"si_code\":%d,", cinfo->sicode);
> + if (cinfo->signo == SIGSEGV) {
> + if (cinfo->sicode == SEGV_MAPERR) {
> + snprintf_safe(e, "\"si_code_str\":\"%s\",",
> + "SEGV_MAPERR");
> + } else if (cinfo->sicode == SEGV_ACCERR) {
> + snprintf_safe(e, "\"si_code_str\":\"%s\",",
> + "SEGV_ACCERR");
> + }
> + snprintf_safe(e, "\"si_addr\":\"0x%llx\",",
> + (long long)cinfo->siaddr);
> + }
> +
> +#ifdef ENABLE_BACKTRACE
> + /*
> + * The backtrace itself is encoded into base64 form
> + * since it might have arbitrary symbols not suitable
> + * for json encoding (newlines and etc).
> + */
> + size_t bt_len = strlen(cinfo->backtrace_buf);
> + size_t bt_elen = base64_bufsize(bt_len, BASE64_NOWRAP);
> + char *bt_base64 = &tail[-bt_elen];
> + if (p >= bt_base64)
> + goto out;
> + base64_encode(cinfo->backtrace_buf, bt_len,
> + bt_base64, bt_elen, BASE64_NOWRAP);
> + bt_base64[bt_elen] = '\0';
> + snprintf_safe(bt_base64, "\"backtrace\":\"%s\",", bt_base64);
> +#endif
> +
> + /* 64 bytes should be enough for longest localtime */
> + char *timestamp_rt_str = &tail[-64];
> + if (p >= timestamp_rt_str)
> + goto out;
> + ns_to_localtime(cinfo->timestamp_rt, timestamp_rt_str, 64);
> + snprintf_safe(timestamp_rt_str, "\"timestamp\":\"%s\"", timestamp_rt_str);
> + snprintf_safe(timestamp_rt_str, "}");
> + snprintf_safe(timestamp_rt_str, "}");
> +
> + size_t report_len = p - head;
> + size_t report_elen = base64_bufsize(report_len, BASE64_NOWRAP);
> +
> + char *report_base64 = &tail[-report_elen];
> + if (p >= report_base64)
> + goto out;
> + base64_encode(head, report_len, report_base64,
> + report_elen, BASE64_NOWRAP);
> + report_base64[report_elen] = '\0';
> +
> + /*
> + * Encoded report now sits at report_base64 position,
> + * at the tail of 'small' static buffer. Lets prepare
> + * the script to run.
> + */
> + p = head;
> + snprintf_safe(report_base64,
> + "require(\'http.client\').post(\'%s\',"
> + "'{\"crashdump\":{\"version\":\"%d\","
> + "\"data\": \"%s\"}}',{timeout=1});"
> + "os.exit(1);", feedback_host,
> + crashinfo_version, report_base64);
6. I think now I understood all these buffer allocs. Except
that I still don't understand why is tail initialized as
char *tail = &p[SMALL_STATIC_SIZE - 8];
instead of just `tail = end;`.
More information about the Tarantool-patches
mailing list