[PATCH v2] xrow: print corrupted rows on decoding error.

Serge Petrenko sergepetrenko at tarantool.org
Mon Apr 1 16:09:06 MSK 2019


Add row hex printing to log on verbose level. This would be useful
during investigation of errors related to invalid msgpack packet
arrival.

Here's some example output:
```
2019-04-01 16:04:51.629 [31304] iproto V> Corrupted row is: 3D A5 41 55 99 18 A8 08 C2 40 2B BA 50 12 F6 C6 0B 27 66 1C
2019-04-01 16:04:51.629 [31304] iproto xrow.c:133 E> ER_INVALID_MSGPACK: Invalid MsgPack - packet header
```

Related to #4040
---
https://github.com/tarantool/tarantool/tree/sp/xrow-decode-verbose-err
https://github.com/tarantool/tarantool/issues/4040

Changes in v2:
  - make xrow_on_decode_err a macro instead of a
    function to preserve line where error occurs.
  - add a test case.
  - always allocate buffer for hex dump with malloc
  - add logging on all errors related to invalid
    msgpack and missing request field.

 src/box/xrow.c            | 116 ++++++++++++++++++++++++++------------
 test/box/net.box.result   |  34 +++++++++++
 test/box/net.box.test.lua |  16 ++++++
 3 files changed, 131 insertions(+), 35 deletions(-)

diff --git a/src/box/xrow.c b/src/box/xrow.c
index 73d9e739b..fdd15bef3 100644
--- a/src/box/xrow.c
+++ b/src/box/xrow.c
@@ -88,15 +88,49 @@ mp_decode_vclock(const char **data, struct vclock *vclock)
 	return 0;
 }
 
+/**
+ * Set diagnostics with an error and log the corrupted row
+ * which caused the error.
+ * Optionally, if log_level is 'verbose' or greater,
+ * dump the corrupted row contents in hex to the log.
+ *
+ * @param what - exception to set.
+ * @param desc_str - error description string.
+ */
+#define xrow_on_decode_err(_start, _end, _what, _desc_str) do {				\
+	diag_set(ClientError, _what, _desc_str);					\
+											\
+	if (!say_log_level_is_enabled(S_VERBOSE))					\
+		break;									\
+											\
+	size_t _len = 3 * ((const char *)(_end) - (const char *)(_start) + 1);		\
+	char *_buf = (char *)malloc(_len);						\
+	if (!_buf) {									\
+		say_verbose("Got a corrupted row during decoding. "			\
+			    "Not enough memory to dump row contents.");			\
+		break;									\
+	}										\
+											\
+	char *_pos = _buf;								\
+	char *_buf_end = _buf + _len;							\
+	for (const char *_cur = _start; _cur < _end; ++_cur) {				\
+		_pos += snprintf(_pos, _buf_end - _pos, "%02X ", (unsigned char)*_cur);	\
+	}										\
+	say_verbose("Corrupted row is: %s", _buf);					\
+											\
+	free(_buf);									\
+} while (0)
+
 int
 xrow_header_decode(struct xrow_header *header, const char **pos,
 		   const char *end, bool end_is_exact)
 {
 	memset(header, 0, sizeof(struct xrow_header));
 	const char *tmp = *pos;
+	const char * const start = *pos;
 	if (mp_check(&tmp, end) != 0) {
 error:
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet header");
+		xrow_on_decode_err(start, end, ER_INVALID_MSGPACK, "packet header");
 		return -1;
 	}
 
@@ -163,7 +197,7 @@ error:
 	if (*pos < end && header->type != IPROTO_NOP) {
 		const char *body = *pos;
 		if (mp_check(pos, end)) {
-			diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+			xrow_on_decode_err(start, end, ER_INVALID_MSGPACK, "packet body");
 			return -1;
 		}
 		header->bodycnt = 1;
@@ -171,7 +205,7 @@ error:
 		header->body[0].iov_len = *pos - body;
 	}
 	if (end_is_exact && *pos < end) {
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(start,end, ER_INVALID_MSGPACK, "packet body");
 		return -1;
 	}
 	return 0;
@@ -183,14 +217,11 @@ error:
 static inline int
 xrow_decode_uuid(const char **pos, struct tt_uuid *out)
 {
-	if (mp_typeof(**pos) != MP_STR) {
-error:
-		diag_set(ClientError, ER_INVALID_MSGPACK, "UUID");
+	if (mp_typeof(**pos) != MP_STR)
 		return -1;
-	}
 	uint32_t len = mp_decode_strl(pos);
 	if (tt_uuid_from_strl(*pos, len, out) != 0)
-		goto error;
+		return -1;
 	*pos += len;
 	return 0;
 }
@@ -532,7 +563,8 @@ xrow_decode_sql(const struct xrow_header *row, struct sql_request *request)
 
 	if (mp_typeof(*data) != MP_MAP || mp_check_map(data, end) > 0) {
 error:
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet body");
 		return -1;
 	}
 
@@ -555,7 +587,7 @@ error:
 			request->sql_text = value;
 	}
 	if (request->sql_text == NULL) {
-		diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_MISSING_REQUEST_FIELD,
 			 iproto_key_name(IPROTO_SQL_TEXT));
 		return -1;
 	}
@@ -603,7 +635,8 @@ xrow_decode_dml(struct xrow_header *row, struct request *request,
 
 	if (mp_typeof(*data) != MP_MAP || mp_check_map(data, end) > 0) {
 error:
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet body");
 		return -1;
 	}
 
@@ -662,14 +695,15 @@ error:
 		}
 	}
 	if (data != end) {
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet end");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet end");
 		return -1;
 	}
 done:
 	if (key_map) {
 		enum iproto_key key = (enum iproto_key) bit_ctz_u64(key_map);
-		diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
-			 iproto_key_name(key));
+		xrow_on_decode_err(row->body[0].iov_base, end,
+				   ER_MISSING_REQUEST_FIELD, iproto_key_name(key));
 		return -1;
 	}
 	return 0;
@@ -816,7 +850,8 @@ xrow_decode_call(const struct xrow_header *row, struct call_request *request)
 
 	if (mp_typeof(*data) != MP_MAP || mp_check_map(data, end) > 0) {
 error:
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet body");
 		return -1;
 	}
 
@@ -855,20 +890,21 @@ error:
 		}
 	}
 	if (data != end) {
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet end");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet end");
 		return -1;
 	}
 	if (row->type == IPROTO_EVAL) {
 		if (request->expr == NULL) {
-			diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
-				 iproto_key_name(IPROTO_EXPR));
+			xrow_on_decode_err(row->body[0].iov_base, end, ER_MISSING_REQUEST_FIELD,
+					   iproto_key_name(IPROTO_EXPR));
 			return -1;
 		}
 	} else if (request->name == NULL) {
 		assert(row->type == IPROTO_CALL_16 ||
 		       row->type == IPROTO_CALL);
-		diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
-			 iproto_key_name(IPROTO_FUNCTION_NAME));
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_MISSING_REQUEST_FIELD,
+				   iproto_key_name(IPROTO_FUNCTION_NAME));
 		return -1;
 	}
 	if (request->args == NULL) {
@@ -895,7 +931,8 @@ xrow_decode_auth(const struct xrow_header *row, struct auth_request *request)
 
 	if (mp_typeof(*data) != MP_MAP || mp_check_map(data, end) > 0) {
 error:
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet body");
 		return -1;
 	}
 
@@ -927,17 +964,18 @@ error:
 		}
 	}
 	if (data != end) {
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet end");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet end");
 		return -1;
 	}
 	if (request->user_name == NULL) {
-		diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
-			  iproto_key_name(IPROTO_USER_NAME));
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_MISSING_REQUEST_FIELD,
+				   iproto_key_name(IPROTO_USER_NAME));
 		return -1;
 	}
 	if (request->scramble == NULL) {
-		diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
-			 iproto_key_name(IPROTO_TUPLE));
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_MISSING_REQUEST_FIELD,
+				   iproto_key_name(IPROTO_TUPLE));
 		return -1;
 	}
 	return 0;
@@ -1087,7 +1125,8 @@ xrow_decode_ballot(struct xrow_header *row, struct ballot *ballot)
 	}
 	return 0;
 err:
-	diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+	xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+			   "packet body");
 	return -1;
 }
 
@@ -1132,11 +1171,12 @@ xrow_decode_subscribe(struct xrow_header *row, struct tt_uuid *replicaset_uuid,
 		return -1;
 	}
 	assert(row->bodycnt == 1);
-	const char *data = (const char *) row->body[0].iov_base;
+	const char * const data = (const char *) row->body[0].iov_base;
 	const char *end = data + row->body[0].iov_len;
 	const char *d = data;
 	if (mp_check(&d, end) != 0 || mp_typeof(*data) != MP_MAP) {
-		diag_set(ClientError, ER_INVALID_MSGPACK, "request body");
+		xrow_on_decode_err(data, end, ER_INVALID_MSGPACK,
+				   "request body");
 		return -1;
 	}
 
@@ -1153,21 +1193,27 @@ xrow_decode_subscribe(struct xrow_header *row, struct tt_uuid *replicaset_uuid,
 		case IPROTO_CLUSTER_UUID:
 			if (replicaset_uuid == NULL)
 				goto skip;
-			if (xrow_decode_uuid(&d, replicaset_uuid) != 0)
+			if (xrow_decode_uuid(&d, replicaset_uuid) != 0) {
+				xrow_on_decode_err(data, end, ER_INVALID_MSGPACK,
+						   "UUID");
 				return -1;
+			}
 			break;
 		case IPROTO_INSTANCE_UUID:
 			if (instance_uuid == NULL)
 				goto skip;
-			if (xrow_decode_uuid(&d, instance_uuid) != 0)
+			if (xrow_decode_uuid(&d, instance_uuid) != 0) {
+				xrow_on_decode_err(data, end, ER_INVALID_MSGPACK,
+						   "UUID");
 				return -1;
+			}
 			break;
 		case IPROTO_VCLOCK:
 			if (vclock == NULL)
 				goto skip;
 			if (mp_decode_vclock(&d, vclock) != 0) {
-				diag_set(ClientError, ER_INVALID_MSGPACK,
-					 "invalid VCLOCK");
+				xrow_on_decode_err(data, end, ER_INVALID_MSGPACK,
+						   "invalid VCLOCK");
 				return -1;
 			}
 			break;
@@ -1175,8 +1221,8 @@ xrow_decode_subscribe(struct xrow_header *row, struct tt_uuid *replicaset_uuid,
 			if (version_id == NULL)
 				goto skip;
 			if (mp_typeof(*d) != MP_UINT) {
-				diag_set(ClientError, ER_INVALID_MSGPACK,
-					 "invalid VERSION");
+				xrow_on_decode_err(data, end, ER_INVALID_MSGPACK,
+						   "invalid VERSION");
 				return -1;
 			}
 			*version_id = mp_decode_uint(&d);
diff --git a/test/box/net.box.result b/test/box/net.box.result
index aecaf9436..e2c3f3a40 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -3526,3 +3526,37 @@ s:drop()
 box.cfg{readahead = readahead}
 ---
 ...
+--
+-- related to gh-4040: log corrupted rows
+--
+log_level = box.cfg.log_level
+---
+...
+box.cfg{log_level=6}
+---
+...
+sock = socket.tcp_connect(LISTEN.host, LISTEN.service)
+---
+...
+-- we need to have a packet with correctly encoded length,
+-- so that it bypasses iproto length check, but cannot be
+-- decoded in xrow_header_decode
+-- 0x14 = 20, sha1 digest is 20 bytes long
+data = string.fromhex('14'..require('digest').sha1_hex('asdf'))
+---
+...
+sock:write(data)
+---
+- 21
+...
+sock:close()
+---
+- true
+...
+test_run:grep_log('default', 'Corrupted row is: .*')
+---
+- 'Corrupted row is: 3D A5 41 55 99 18 A8 08 C2 40 2B BA 50 12 F6 C6 0B 27 66 1C '
+...
+box.cfg{log_level=log_level}
+---
+...
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index 04d6c1903..e2f8f90ba 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -1435,3 +1435,19 @@ test_run:wait_log('default', 'readahead limit is reached', 1024, 0.1)
 
 s:drop()
 box.cfg{readahead = readahead}
+
+--
+-- related to gh-4040: log corrupted rows
+--
+log_level = box.cfg.log_level
+box.cfg{log_level=6}
+sock = socket.tcp_connect(LISTEN.host, LISTEN.service)
+-- we need to have a packet with correctly encoded length,
+-- so that it bypasses iproto length check, but cannot be
+-- decoded in xrow_header_decode
+-- 0x14 = 20, sha1 digest is 20 bytes long
+data = string.fromhex('14'..require('digest').sha1_hex('asdf'))
+sock:write(data)
+sock:close()
+test_run:grep_log('default', 'Corrupted row is: .*')
+box.cfg{log_level=log_level}
-- 
2.20.1 (Apple Git-117)




More information about the Tarantool-patches mailing list