[tarantool-patches] [PATCH] xrow: print corrupted rows on decoding error.

Serge Petrenko sergepetrenko at tarantool.org
Tue Mar 19 15:19:31 MSK 2019


Hi!
Thankyou for review!
The fixes are on the branch, and an incremental diff is below.

> 18 марта 2019 г., в 20:58, Vladimir Davydov <vdavydov.dev at gmail.com> написал(а):
> 
> On Fri, Mar 15, 2019 at 07:02:38PM +0300, Serge Petrenko wrote:
>> Add row hex printing to log on verbose level. This would be useful
>> during investigation of errors related to invalid msgpack packet
>> arrival.
>> 
>> Related to #4040
>> ---
>> https://github.com/tarantool/tarantool/tree/sp/xrow-decode-verbose-err
>> 
>> 
>> src/box/xrow.c | 27 +++++++++++++++++++++++++++
>> 1 file changed, 27 insertions(+)
>> 
>> diff --git a/src/box/xrow.c b/src/box/xrow.c
>> index bddae1d5b..8a5254902 100644
>> --- a/src/box/xrow.c
>> +++ b/src/box/xrow.c
>> @@ -88,14 +88,29 @@ mp_decode_vclock(const char **data, struct vclock *vclock)
>> 	return 0;
>> }
>> 
>> +static inline void
>> +say_dump_row_hex(const char *start, const char *end)
>> +{
>> +	char *buf = (char *)malloc(3 * (end - start) + 1);
> 
> Theoretically, malloc() may fail. You'd better check it and print
> a warning in this case. Anyway, do we really need to use malloc()
> here at all? Won't tt_static_buf be enough for typical cases.

Done, please see the incremental diff below. I still use malloc for sizes greater than TT_STATIC_BUF_LEN,
but check for an allocation error.

> 
>> +	char *pos = buf;
>> +	for (const char *cur = start; cur < end; ++cur) {
>> +		pos += sprintf(pos, "%02hhX ", *cur);
>> +	}
>> +	*pos = 0;
>> +	say_verbose("Corrupted row is: %s", buf);
> 
> say_verbose() will turn into no-op, but you allocate a buffer and format
> the message anyway. Please fix that by using say_log_level_is_enabled()
> before doing anything.

Fixed.
> 
>> +	free(buf);
>> +}
>> +
>> int
>> xrow_header_decode(struct xrow_header *header, const char **pos,
>> 		   const char *end, bool end_is_exact)
>> {
>> 	memset(header, 0, sizeof(struct xrow_header));
>> 	const char *tmp = *pos;
>> +	const char * const start = *pos;
>> 	if (mp_check(&tmp, end) != 0) {
>> error:
>> +		say_dump_row_hex(start, end);
>> 		diag_set(ClientError, ER_INVALID_MSGPACK, "packet header");
>> 		return -1;
>> 	}
>> @@ -163,6 +178,7 @@ error:
>> 	if (*pos < end && header->type != IPROTO_NOP) {
>> 		const char *body = *pos;
>> 		if (mp_check(pos, end)) {
>> +			say_dump_row_hex(start, end);
>> 			diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
>> 			return -1;
>> 		}
>> @@ -171,6 +187,7 @@ error:
>> 		header->body[0].iov_len = *pos - body;
>> 	}
>> 	if (end_is_exact && *pos < end) {
>> +		say_dump_row_hex(start,end);
>> 		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
> 
> It would be better to add a helper function that would set diag and
> print a corrupted row so that we don't miss it in case we add another
> point of failure. Dunno, may be it's even worth adding a new exception
> class with logging built in.

Done, please, see below.
I don’t like the idea with a new exception class.
If we introduce it, we’ll have to store the whole
corrupted row, which can be quite big, until logging happens.

diff --git a/src/box/xrow.c b/src/box/xrow.c
index 3854874cc..000647111 100644
--- a/src/box/xrow.c
+++ b/src/box/xrow.c
@@ -88,17 +88,44 @@ mp_decode_vclock(const char **data, struct vclock *vclock)
 	return 0;
 }
 
+/**
+ * Set diagnostics with an error and log the corrupted row
+ * which caused the error.
+ * @param what - exception to set.
+ * @param desc_str - error description string.
+ */
 static inline void
-say_dump_row_hex(const char *start, const char *end)
+xrow_on_decode_err(const char *start, const char *end, uint32_t what,
+		   const char *desc_str)
 {
-	char *buf = (char *)malloc(3 * (end - start) + 1);
+	diag_set(ClientError, what, desc_str);
+
+	if (!say_log_level_is_enabled(S_VERBOSE))
+		return;
+
+	size_t len = 3 * (end - start + 1);
+	char *buf;
+
+	if (len <= TT_STATIC_BUF_LEN) {
+		buf = tt_static_buf();
+	} else {
+		buf = (char *)malloc(len);
+		if (!buf) {
+			say_verbose("Got a corrupted row during decoding. "\
+				    "Not enough memory to dump row contents.");
+			return;
+		}
+	}
+
 	char *pos = buf;
 	for (const char *cur = start; cur < end; ++cur) {
 		pos += sprintf(pos, "%02hhX ", *cur);
 	}
 	*pos = 0;
 	say_verbose("Corrupted row is: %s", buf);
-	free(buf);
+
+	if (len > TT_STATIC_BUF_LEN)
+		free(buf);
 }
 
 int
@@ -110,8 +137,7 @@ xrow_header_decode(struct xrow_header *header, const char **pos,
 	const char * const start = *pos;
 	if (mp_check(&tmp, end) != 0) {
 error:
-		say_dump_row_hex(start, end);
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet header");
+		xrow_on_decode_err(start, end, ER_INVALID_MSGPACK, "packet header");
 		return -1;
 	}
 
@@ -178,8 +204,7 @@ error:
 	if (*pos < end && header->type != IPROTO_NOP) {
 		const char *body = *pos;
 		if (mp_check(pos, end)) {
-			say_dump_row_hex(start, end);
-			diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+			xrow_on_decode_err(start, end, ER_INVALID_MSGPACK, "packet body");
 			return -1;
 		}
 		header->bodycnt = 1;
@@ -187,8 +212,7 @@ error:
 		header->body[0].iov_len = *pos - body;
 	}
 	if (end_is_exact && *pos < end) {
-		say_dump_row_hex(start,end);
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(start,end, ER_INVALID_MSGPACK, "packet body");
 		return -1;
 	}
 	return 0;
@@ -549,8 +573,8 @@ xrow_decode_sql(const struct xrow_header *row, struct sql_request *request)
 
 	if (mp_typeof(*data) != MP_MAP || mp_check_map(data, end) > 0) {
 error:
-		say_dump_row_hex(row->body[0].iov_base, end);
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet body");
 		return -1;
 	}
 
@@ -621,8 +645,8 @@ xrow_decode_dml(struct xrow_header *row, struct request *request,
 
 	if (mp_typeof(*data) != MP_MAP || mp_check_map(data, end) > 0) {
 error:
-		say_dump_row_hex(row->body[0].iov_base, end);
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet body");
 		return -1;
 	}
 
@@ -681,16 +705,15 @@ error:
 		}
 	}
 	if (data != end) {
-		say_dump_row_hex(row->body[0].iov_base, end);
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet end");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet end");
 		return -1;
 	}
 done:
 	if (key_map) {
-		say_dump_row_hex(row->body[0].iov_base, end);
 		enum iproto_key key = (enum iproto_key) bit_ctz_u64(key_map);
-		diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
-			 iproto_key_name(key));
+		xrow_on_decode_err(row->body[0].iov_base, end,
+				   ER_MISSING_REQUEST_FIELD, iproto_key_name(key));
 		return -1;
 	}
 	return 0;
@@ -837,8 +860,8 @@ xrow_decode_call(const struct xrow_header *row, struct call_request *request)
 
 	if (mp_typeof(*data) != MP_MAP || mp_check_map(data, end) > 0) {
 error:
-		say_dump_row_hex(row->body[0].iov_base, end);
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet body");
 		return -1;
 	}
 
@@ -877,8 +900,8 @@ error:
 		}
 	}
 	if (data != end) {
-		say_dump_row_hex(row->body[0].iov_base, end);
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet end");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet end");
 		return -1;
 	}
 	if (row->type == IPROTO_EVAL) {
@@ -918,8 +941,8 @@ xrow_decode_auth(const struct xrow_header *row, struct auth_request *request)
 
 	if (mp_typeof(*data) != MP_MAP || mp_check_map(data, end) > 0) {
 error:
-		say_dump_row_hex(row->body[0].iov_base, end);
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet body");
 		return -1;
 	}
 
@@ -951,8 +974,8 @@ error:
 		}
 	}
 	if (data != end) {
-		say_dump_row_hex(row->body[0].iov_base, end);
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet end");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet end");
 		return -1;
 	}
 	if (request->user_name == NULL) {
@@ -1112,8 +1135,8 @@ xrow_decode_ballot(struct xrow_header *row, struct ballot *ballot)
 	}
 	return 0;
 err:
-	say_dump_row_hex(row->body[0].iov_base, end);
-	diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+	xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+			   "packet body");
 	return -1;
 }
 
@@ -1162,8 +1185,8 @@ xrow_decode_subscribe(struct xrow_header *row, struct tt_uuid *replicaset_uuid,
 	const char *end = data + row->body[0].iov_len;
 	const char *d = data;
 	if (mp_check(&d, end) != 0 || mp_typeof(*data) != MP_MAP) {
-		say_dump_row_hex(row->body[0].iov_base, end);
-		diag_set(ClientError, ER_INVALID_MSGPACK, "request body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "request body");
 		return -1;
 	}




More information about the Tarantool-patches mailing list