[Tarantool-patches] [PATCH v4 4/4] crash: report crash data to the feedback server

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sun Dec 20 18:16:46 MSK 2020


I see you didn't push it on the branch. Please, do. Otherwise I can't
validate if it works.

See 6 comments below.

> diff --git a/src/box/box.cc b/src/box/box.cc
> index a8bc3471d..27079fd46 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -1213,6 +1214,21 @@ box_set_prepared_stmt_cache_size(void)
>  	return 0;
>  }
>  
> +void
> +box_set_crash_params(void)
> +{
> +	const char *host = cfg_gets("feedback_host");
> +	bool is_enabled_1 = cfg_getb("feedback_enabled");
> +	bool is_enabled_2 = cfg_getb("feedback_crashinfo");
> +
> +	if (host != NULL && strlen(host) >= CRASH_FEEDBACK_HOST_MAX) {
> +		tnt_raise(ClientError, ER_CFG, "feedback_host",
> +			  "the address is too long");

1. Please, don't use exceptions in the new code. We are getting rid
of them, and such changes complicate it in future.

> +	}
> +
> +	crash_cfg_set_params(host, is_enabled_1 && is_enabled_2);

2. Just 'crash_cfg', please. We have lots of 'cfg' functions for
various modules, and none of them has 'set_params' suffix. Probably
because this is what 'cfg' means anyway.

> +}
> diff --git a/src/lib/core/crash.c b/src/lib/core/crash.c
> index a15a13e8e..f6dd91987 100644
> --- a/src/lib/core/crash.c
> +++ b/src/lib/core/crash.c
> @@ -81,13 +96,75 @@ static struct crash_info {
>  	int sicode;
>  #ifdef ENABLE_BACKTRACE
>  	/*
> -	 * 4K of memory should be enough to keep the backtrace.
> +	 * 1K of memory should be enough to keep the backtrace.
>  	 * In worst case it gonna be simply trimmed.
>  	 */
> -	char backtrace_buf[4096];
> +	char backtrace_buf[1024];

3. I am begging you. Please. Stop mixing independent changes with
each other. Please. How can I ask otherwise? What can I do to
explain this? How can I help to understand?

>  #endif> @@ -126,6 +209,196 @@ crash_collect(int signo, siginfo_t *siginfo, void *ucontext)
>  	return cinfo;
>  }
>  
> +/**
> + * Mark an environment that we're in crashinfo handling, this
> + * allows us to escape recursive attempts to send report,
> + * if the action of sending report is failing itself.
> + */
> +static int
> +crash_mark_env_mode(void)
> +{
> +	const char *env_name = "TT_CRASHINFO_MODE";
> +	if (getenv(env_name) != NULL) {
> +		pr_crit("recursive failure detected");
> +		return -1;
> +	}
> +
> +	if (setenv(env_name, "y", 0) != 0) {
> +		pr_crit("unable to setup %s", env_name);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * Report crash information to the feedback daemon
> + * (ie send it to feedback daemon).
> + */
> +static void
> +crash_report_feedback_daemon(struct crash_info *cinfo)
> +{
> +	if (crash_mark_env_mode() != 0)
> +		return;
> +
> +	/*
> +	 * Update to a new number if format get changed.
> +	 */
> +	const int crashinfo_version = 1;
> +
> +	char *p = static_alloc(SMALL_STATIC_SIZE);
> +	char *e = &p[SMALL_STATIC_SIZE - 1];
> +	char *head = p;
> +	char *tail = &p[SMALL_STATIC_SIZE - 8];
> +
> +	/*
> +	 * Note that while we encode the report we
> +	 * intensively use a tail of the allocated
> +	 * buffer as a temporary store.
> +	 */
> +
> +#define snprintf_safe(__end, __fmt, ...)				\
> +	do {								\
> +		size_t size = (char *)(void *)__end - p;		\

4. Please, remove the double cast. You can use (char *) as is. Here and
in other places.

> +		p += snprintf(p, size, __fmt, ##__VA_ARGS__);		\
> +		if (p >= (char *)(void *)__end)				\
> +			goto out;					\
> +	} while (0)
> +
> +	/*
> +	 * Lets reuse tail of the buffer as a temp space.
> +	 */
> +	struct utsname *uname_ptr = (void *)&tail[-sizeof(struct utsname)];
> +	if (p >= (char *)(void *)uname_ptr)
> +		goto out;
> +
> +	if (uname(uname_ptr) != 0) {
> +		pr_syserr("uname call failed, ignore");
> +		memset(uname_ptr, 0, sizeof(struct utsname));
> +	}
> +
> +	snprintf_safe(uname_ptr, "{");
> +	snprintf_safe(uname_ptr, "\"uname\":{");
> +	snprintf_safe(uname_ptr, "\"sysname\":\"%s\",", uname_ptr->sysname);
> +	/*
> +	 * nodename might a sensitive information, skip.

5. 'might' what? You missed a verb.

> +	 */
> +	snprintf_safe(uname_ptr, "\"release\":\"%s\",", uname_ptr->release);
> +	snprintf_safe(uname_ptr, "\"version\":\"%s\",", uname_ptr->version);
> +	snprintf_safe(uname_ptr, "\"machine\":\"%s\"", uname_ptr->machine);
> +	snprintf_safe(uname_ptr, "},");
> +
> +	snprintf_safe(e, "\"build\":{");
> +	snprintf_safe(e, "\"version\":\"%s\",", PACKAGE_VERSION);
> +	snprintf_safe(e, "\"cmake_type\":\"%s\"", BUILD_INFO);
> +	snprintf_safe(e, "},");
> +
> +	snprintf_safe(e, "\"signal\":{");
> +	snprintf_safe(e, "\"signo\":%d,", cinfo->signo);
> +	snprintf_safe(e, "\"si_code\":%d,", cinfo->sicode);
> +	if (cinfo->signo == SIGSEGV) {
> +		if (cinfo->sicode == SEGV_MAPERR) {
> +			snprintf_safe(e, "\"si_code_str\":\"%s\",",
> +				      "SEGV_MAPERR");
> +		} else if (cinfo->sicode == SEGV_ACCERR) {
> +			snprintf_safe(e, "\"si_code_str\":\"%s\",",
> +				      "SEGV_ACCERR");
> +		}
> +		snprintf_safe(e, "\"si_addr\":\"0x%llx\",",
> +			      (long long)cinfo->siaddr);
> +	}
> +
> +#ifdef ENABLE_BACKTRACE
> +	/*
> +	 * The backtrace itself is encoded into base64 form
> +	 * since it might have arbitrary symbols not suitable
> +	 * for json encoding (newlines and etc).
> +	 */
> +	size_t bt_len = strlen(cinfo->backtrace_buf);
> +	size_t bt_elen = base64_bufsize(bt_len, BASE64_NOWRAP);
> +	char *bt_base64 = &tail[-bt_elen];
> +	if (p >= bt_base64)
> +		goto out;
> +	base64_encode(cinfo->backtrace_buf, bt_len,
> +		      bt_base64, bt_elen, BASE64_NOWRAP);
> +	bt_base64[bt_elen] = '\0';
> +	snprintf_safe(bt_base64, "\"backtrace\":\"%s\",", bt_base64);
> +#endif
> +
> +	/* 64 bytes should be enough for longest localtime */
> +	char *timestamp_rt_str = &tail[-64];
> +	if (p >= timestamp_rt_str)
> +		goto out;
> +	ns_to_localtime(cinfo->timestamp_rt, timestamp_rt_str, 64);
> +	snprintf_safe(timestamp_rt_str, "\"timestamp\":\"%s\"", timestamp_rt_str);
> +	snprintf_safe(timestamp_rt_str, "}");
> +	snprintf_safe(timestamp_rt_str, "}");
> +
> +	size_t report_len = p - head;
> +	size_t report_elen = base64_bufsize(report_len, BASE64_NOWRAP);
> +
> +	char *report_base64 = &tail[-report_elen];
> +	if (p >= report_base64)
> +		goto out;
> +	base64_encode(head, report_len, report_base64,
> +		      report_elen, BASE64_NOWRAP);
> +	report_base64[report_elen] = '\0';
> +
> +	/*
> +	 * Encoded report now sits at report_base64 position,
> +	 * at the tail of 'small' static buffer. Lets prepare
> +	 * the script to run.
> +	 */
> +	p = head;
> +	snprintf_safe(report_base64,
> +		      "require(\'http.client\').post(\'%s\',"
> +		      "'{\"crashdump\":{\"version\":\"%d\","
> +		      "\"data\": \"%s\"}}',{timeout=1});"
> +		      "os.exit(1);", feedback_host,
> +		      crashinfo_version, report_base64);

6. I think now I understood all these buffer allocs. Except
that I still don't understand why is tail initialized as

	char *tail = &p[SMALL_STATIC_SIZE - 8];

instead of just `tail = end;`.


More information about the Tarantool-patches mailing list