Tarantool development patches archive
 help / color / mirror / Atom feed
* [PATCH v2] xrow: print corrupted rows on decoding error.
@ 2019-04-01 13:09 Serge Petrenko
  2019-04-03  9:18 ` Vladimir Davydov
  0 siblings, 1 reply; 11+ messages in thread
From: Serge Petrenko @ 2019-04-01 13:09 UTC (permalink / raw)
  To: vdavydov.dev; +Cc: tarantool-patches, Serge Petrenko

Add row hex printing to log on verbose level. This would be useful
during investigation of errors related to invalid msgpack packet
arrival.

Here's some example output:
```
2019-04-01 16:04:51.629 [31304] iproto V> Corrupted row is: 3D A5 41 55 99 18 A8 08 C2 40 2B BA 50 12 F6 C6 0B 27 66 1C
2019-04-01 16:04:51.629 [31304] iproto xrow.c:133 E> ER_INVALID_MSGPACK: Invalid MsgPack - packet header
```

Related to #4040
---
https://github.com/tarantool/tarantool/tree/sp/xrow-decode-verbose-err
https://github.com/tarantool/tarantool/issues/4040

Changes in v2:
  - make xrow_on_decode_err a macro instead of a
    function to preserve line where error occurs.
  - add a test case.
  - always allocate buffer for hex dump with malloc
  - add logging on all errors related to invalid
    msgpack and missing request field.

 src/box/xrow.c            | 116 ++++++++++++++++++++++++++------------
 test/box/net.box.result   |  34 +++++++++++
 test/box/net.box.test.lua |  16 ++++++
 3 files changed, 131 insertions(+), 35 deletions(-)

diff --git a/src/box/xrow.c b/src/box/xrow.c
index 73d9e739b..fdd15bef3 100644
--- a/src/box/xrow.c
+++ b/src/box/xrow.c
@@ -88,15 +88,49 @@ mp_decode_vclock(const char **data, struct vclock *vclock)
 	return 0;
 }
 
+/**
+ * Set diagnostics with an error and log the corrupted row
+ * which caused the error.
+ * Optionally, if log_level is 'verbose' or greater,
+ * dump the corrupted row contents in hex to the log.
+ *
+ * @param what - exception to set.
+ * @param desc_str - error description string.
+ */
+#define xrow_on_decode_err(_start, _end, _what, _desc_str) do {				\
+	diag_set(ClientError, _what, _desc_str);					\
+											\
+	if (!say_log_level_is_enabled(S_VERBOSE))					\
+		break;									\
+											\
+	size_t _len = 3 * ((const char *)(_end) - (const char *)(_start) + 1);		\
+	char *_buf = (char *)malloc(_len);						\
+	if (!_buf) {									\
+		say_verbose("Got a corrupted row during decoding. "			\
+			    "Not enough memory to dump row contents.");			\
+		break;									\
+	}										\
+											\
+	char *_pos = _buf;								\
+	char *_buf_end = _buf + _len;							\
+	for (const char *_cur = _start; _cur < _end; ++_cur) {				\
+		_pos += snprintf(_pos, _buf_end - _pos, "%02X ", (unsigned char)*_cur);	\
+	}										\
+	say_verbose("Corrupted row is: %s", _buf);					\
+											\
+	free(_buf);									\
+} while (0)
+
 int
 xrow_header_decode(struct xrow_header *header, const char **pos,
 		   const char *end, bool end_is_exact)
 {
 	memset(header, 0, sizeof(struct xrow_header));
 	const char *tmp = *pos;
+	const char * const start = *pos;
 	if (mp_check(&tmp, end) != 0) {
 error:
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet header");
+		xrow_on_decode_err(start, end, ER_INVALID_MSGPACK, "packet header");
 		return -1;
 	}
 
@@ -163,7 +197,7 @@ error:
 	if (*pos < end && header->type != IPROTO_NOP) {
 		const char *body = *pos;
 		if (mp_check(pos, end)) {
-			diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+			xrow_on_decode_err(start, end, ER_INVALID_MSGPACK, "packet body");
 			return -1;
 		}
 		header->bodycnt = 1;
@@ -171,7 +205,7 @@ error:
 		header->body[0].iov_len = *pos - body;
 	}
 	if (end_is_exact && *pos < end) {
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(start,end, ER_INVALID_MSGPACK, "packet body");
 		return -1;
 	}
 	return 0;
@@ -183,14 +217,11 @@ error:
 static inline int
 xrow_decode_uuid(const char **pos, struct tt_uuid *out)
 {
-	if (mp_typeof(**pos) != MP_STR) {
-error:
-		diag_set(ClientError, ER_INVALID_MSGPACK, "UUID");
+	if (mp_typeof(**pos) != MP_STR)
 		return -1;
-	}
 	uint32_t len = mp_decode_strl(pos);
 	if (tt_uuid_from_strl(*pos, len, out) != 0)
-		goto error;
+		return -1;
 	*pos += len;
 	return 0;
 }
@@ -532,7 +563,8 @@ xrow_decode_sql(const struct xrow_header *row, struct sql_request *request)
 
 	if (mp_typeof(*data) != MP_MAP || mp_check_map(data, end) > 0) {
 error:
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet body");
 		return -1;
 	}
 
@@ -555,7 +587,7 @@ error:
 			request->sql_text = value;
 	}
 	if (request->sql_text == NULL) {
-		diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_MISSING_REQUEST_FIELD,
 			 iproto_key_name(IPROTO_SQL_TEXT));
 		return -1;
 	}
@@ -603,7 +635,8 @@ xrow_decode_dml(struct xrow_header *row, struct request *request,
 
 	if (mp_typeof(*data) != MP_MAP || mp_check_map(data, end) > 0) {
 error:
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet body");
 		return -1;
 	}
 
@@ -662,14 +695,15 @@ error:
 		}
 	}
 	if (data != end) {
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet end");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet end");
 		return -1;
 	}
 done:
 	if (key_map) {
 		enum iproto_key key = (enum iproto_key) bit_ctz_u64(key_map);
-		diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
-			 iproto_key_name(key));
+		xrow_on_decode_err(row->body[0].iov_base, end,
+				   ER_MISSING_REQUEST_FIELD, iproto_key_name(key));
 		return -1;
 	}
 	return 0;
@@ -816,7 +850,8 @@ xrow_decode_call(const struct xrow_header *row, struct call_request *request)
 
 	if (mp_typeof(*data) != MP_MAP || mp_check_map(data, end) > 0) {
 error:
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet body");
 		return -1;
 	}
 
@@ -855,20 +890,21 @@ error:
 		}
 	}
 	if (data != end) {
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet end");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet end");
 		return -1;
 	}
 	if (row->type == IPROTO_EVAL) {
 		if (request->expr == NULL) {
-			diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
-				 iproto_key_name(IPROTO_EXPR));
+			xrow_on_decode_err(row->body[0].iov_base, end, ER_MISSING_REQUEST_FIELD,
+					   iproto_key_name(IPROTO_EXPR));
 			return -1;
 		}
 	} else if (request->name == NULL) {
 		assert(row->type == IPROTO_CALL_16 ||
 		       row->type == IPROTO_CALL);
-		diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
-			 iproto_key_name(IPROTO_FUNCTION_NAME));
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_MISSING_REQUEST_FIELD,
+				   iproto_key_name(IPROTO_FUNCTION_NAME));
 		return -1;
 	}
 	if (request->args == NULL) {
@@ -895,7 +931,8 @@ xrow_decode_auth(const struct xrow_header *row, struct auth_request *request)
 
 	if (mp_typeof(*data) != MP_MAP || mp_check_map(data, end) > 0) {
 error:
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet body");
 		return -1;
 	}
 
@@ -927,17 +964,18 @@ error:
 		}
 	}
 	if (data != end) {
-		diag_set(ClientError, ER_INVALID_MSGPACK, "packet end");
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+				   "packet end");
 		return -1;
 	}
 	if (request->user_name == NULL) {
-		diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
-			  iproto_key_name(IPROTO_USER_NAME));
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_MISSING_REQUEST_FIELD,
+				   iproto_key_name(IPROTO_USER_NAME));
 		return -1;
 	}
 	if (request->scramble == NULL) {
-		diag_set(ClientError, ER_MISSING_REQUEST_FIELD,
-			 iproto_key_name(IPROTO_TUPLE));
+		xrow_on_decode_err(row->body[0].iov_base, end, ER_MISSING_REQUEST_FIELD,
+				   iproto_key_name(IPROTO_TUPLE));
 		return -1;
 	}
 	return 0;
@@ -1087,7 +1125,8 @@ xrow_decode_ballot(struct xrow_header *row, struct ballot *ballot)
 	}
 	return 0;
 err:
-	diag_set(ClientError, ER_INVALID_MSGPACK, "packet body");
+	xrow_on_decode_err(row->body[0].iov_base, end, ER_INVALID_MSGPACK,
+			   "packet body");
 	return -1;
 }
 
@@ -1132,11 +1171,12 @@ xrow_decode_subscribe(struct xrow_header *row, struct tt_uuid *replicaset_uuid,
 		return -1;
 	}
 	assert(row->bodycnt == 1);
-	const char *data = (const char *) row->body[0].iov_base;
+	const char * const data = (const char *) row->body[0].iov_base;
 	const char *end = data + row->body[0].iov_len;
 	const char *d = data;
 	if (mp_check(&d, end) != 0 || mp_typeof(*data) != MP_MAP) {
-		diag_set(ClientError, ER_INVALID_MSGPACK, "request body");
+		xrow_on_decode_err(data, end, ER_INVALID_MSGPACK,
+				   "request body");
 		return -1;
 	}
 
@@ -1153,21 +1193,27 @@ xrow_decode_subscribe(struct xrow_header *row, struct tt_uuid *replicaset_uuid,
 		case IPROTO_CLUSTER_UUID:
 			if (replicaset_uuid == NULL)
 				goto skip;
-			if (xrow_decode_uuid(&d, replicaset_uuid) != 0)
+			if (xrow_decode_uuid(&d, replicaset_uuid) != 0) {
+				xrow_on_decode_err(data, end, ER_INVALID_MSGPACK,
+						   "UUID");
 				return -1;
+			}
 			break;
 		case IPROTO_INSTANCE_UUID:
 			if (instance_uuid == NULL)
 				goto skip;
-			if (xrow_decode_uuid(&d, instance_uuid) != 0)
+			if (xrow_decode_uuid(&d, instance_uuid) != 0) {
+				xrow_on_decode_err(data, end, ER_INVALID_MSGPACK,
+						   "UUID");
 				return -1;
+			}
 			break;
 		case IPROTO_VCLOCK:
 			if (vclock == NULL)
 				goto skip;
 			if (mp_decode_vclock(&d, vclock) != 0) {
-				diag_set(ClientError, ER_INVALID_MSGPACK,
-					 "invalid VCLOCK");
+				xrow_on_decode_err(data, end, ER_INVALID_MSGPACK,
+						   "invalid VCLOCK");
 				return -1;
 			}
 			break;
@@ -1175,8 +1221,8 @@ xrow_decode_subscribe(struct xrow_header *row, struct tt_uuid *replicaset_uuid,
 			if (version_id == NULL)
 				goto skip;
 			if (mp_typeof(*d) != MP_UINT) {
-				diag_set(ClientError, ER_INVALID_MSGPACK,
-					 "invalid VERSION");
+				xrow_on_decode_err(data, end, ER_INVALID_MSGPACK,
+						   "invalid VERSION");
 				return -1;
 			}
 			*version_id = mp_decode_uint(&d);
diff --git a/test/box/net.box.result b/test/box/net.box.result
index aecaf9436..e2c3f3a40 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -3526,3 +3526,37 @@ s:drop()
 box.cfg{readahead = readahead}
 ---
 ...
+--
+-- related to gh-4040: log corrupted rows
+--
+log_level = box.cfg.log_level
+---
+...
+box.cfg{log_level=6}
+---
+...
+sock = socket.tcp_connect(LISTEN.host, LISTEN.service)
+---
+...
+-- we need to have a packet with correctly encoded length,
+-- so that it bypasses iproto length check, but cannot be
+-- decoded in xrow_header_decode
+-- 0x14 = 20, sha1 digest is 20 bytes long
+data = string.fromhex('14'..require('digest').sha1_hex('asdf'))
+---
+...
+sock:write(data)
+---
+- 21
+...
+sock:close()
+---
+- true
+...
+test_run:grep_log('default', 'Corrupted row is: .*')
+---
+- 'Corrupted row is: 3D A5 41 55 99 18 A8 08 C2 40 2B BA 50 12 F6 C6 0B 27 66 1C '
+...
+box.cfg{log_level=log_level}
+---
+...
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index 04d6c1903..e2f8f90ba 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -1435,3 +1435,19 @@ test_run:wait_log('default', 'readahead limit is reached', 1024, 0.1)
 
 s:drop()
 box.cfg{readahead = readahead}
+
+--
+-- related to gh-4040: log corrupted rows
+--
+log_level = box.cfg.log_level
+box.cfg{log_level=6}
+sock = socket.tcp_connect(LISTEN.host, LISTEN.service)
+-- we need to have a packet with correctly encoded length,
+-- so that it bypasses iproto length check, but cannot be
+-- decoded in xrow_header_decode
+-- 0x14 = 20, sha1 digest is 20 bytes long
+data = string.fromhex('14'..require('digest').sha1_hex('asdf'))
+sock:write(data)
+sock:close()
+test_run:grep_log('default', 'Corrupted row is: .*')
+box.cfg{log_level=log_level}
-- 
2.20.1 (Apple Git-117)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] xrow: print corrupted rows on decoding error.
  2019-04-01 13:09 [PATCH v2] xrow: print corrupted rows on decoding error Serge Petrenko
@ 2019-04-03  9:18 ` Vladimir Davydov
  2019-04-03  9:41   ` [tarantool-patches] " Kirill Yukhin
  2019-04-04 11:52   ` Serge Petrenko
  0 siblings, 2 replies; 11+ messages in thread
From: Vladimir Davydov @ 2019-04-03  9:18 UTC (permalink / raw)
  To: Serge Petrenko; +Cc: tarantool-patches

On Mon, Apr 01, 2019 at 04:09:06PM +0300, Serge Petrenko wrote:
> Add row hex printing to log on verbose level. This would be useful
> during investigation of errors related to invalid msgpack packet
> arrival.
> 
> Here's some example output:
> ```
> 2019-04-01 16:04:51.629 [31304] iproto V> Corrupted row is: 3D A5 41 55 99 18 A8 08 C2 40 2B BA 50 12 F6 C6 0B 27 66 1C
> 2019-04-01 16:04:51.629 [31304] iproto xrow.c:133 E> ER_INVALID_MSGPACK: Invalid MsgPack - packet header
> ```
> 
> Related to #4040
> ---
> https://github.com/tarantool/tarantool/tree/sp/xrow-decode-verbose-err
> https://github.com/tarantool/tarantool/issues/4040
> 
> Changes in v2:
>   - make xrow_on_decode_err a macro instead of a
>     function to preserve line where error occurs.
>   - add a test case.
>   - always allocate buffer for hex dump with malloc
>   - add logging on all errors related to invalid
>     msgpack and missing request field.
> 
>  src/box/xrow.c            | 116 ++++++++++++++++++++++++++------------
>  test/box/net.box.result   |  34 +++++++++++
>  test/box/net.box.test.lua |  16 ++++++
>  3 files changed, 131 insertions(+), 35 deletions(-)
> 
> diff --git a/src/box/xrow.c b/src/box/xrow.c
> index 73d9e739b..fdd15bef3 100644
> --- a/src/box/xrow.c
> +++ b/src/box/xrow.c
> @@ -88,15 +88,49 @@ mp_decode_vclock(const char **data, struct vclock *vclock)
>  	return 0;
>  }
>  
> +/**
> + * Set diagnostics with an error and log the corrupted row
> + * which caused the error.
> + * Optionally, if log_level is 'verbose' or greater,
> + * dump the corrupted row contents in hex to the log.
> + *
> + * @param what - exception to set.
> + * @param desc_str - error description string.
> + */
> +#define xrow_on_decode_err(_start, _end, _what, _desc_str) do {				\
> +	diag_set(ClientError, _what, _desc_str);					\
> +											\
> +	if (!say_log_level_is_enabled(S_VERBOSE))					\
> +		break;									\
> +											\
> +	size_t _len = 3 * ((const char *)(_end) - (const char *)(_start) + 1);		\

Nit: shouldn't +1 be outside the parentheses?

> +	char *_buf = (char *)malloc(_len);						\

Nit: we don't usually prefix names with underscores, because this may
conflict with libc internal macro definitions.

Other than that this patch looks good to me. Do we need it in 1.10?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tarantool-patches] Re: [PATCH v2] xrow: print corrupted rows on decoding error.
  2019-04-03  9:18 ` Vladimir Davydov
@ 2019-04-03  9:41   ` Kirill Yukhin
  2019-04-04 11:52   ` Serge Petrenko
  1 sibling, 0 replies; 11+ messages in thread
From: Kirill Yukhin @ 2019-04-03  9:41 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Serge Petrenko

Hello,

On 03 Apr 12:18, Vladimir Davydov wrote:
> Other than that this patch looks good to me. Do we need it in 1.10?

I guess - yes.

--
Regards, Kirill Yukhin

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2] xrow: print corrupted rows on decoding error.
  2019-04-03  9:18 ` Vladimir Davydov
  2019-04-03  9:41   ` [tarantool-patches] " Kirill Yukhin
@ 2019-04-04 11:52   ` Serge Petrenko
  2019-04-04 12:06     ` Vladimir Davydov
  1 sibling, 1 reply; 11+ messages in thread
From: Serge Petrenko @ 2019-04-04 11:52 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: tarantool-patches, Kirill Yukhin


Hi!

> 3 апр. 2019 г., в 12:18, Vladimir Davydov <vdavydov.dev@gmail.com> написал(а):
> 
> On Mon, Apr 01, 2019 at 04:09:06PM +0300, Serge Petrenko wrote:
>> Add row hex printing to log on verbose level. This would be useful
>> during investigation of errors related to invalid msgpack packet
>> arrival.
>> 
>> Here's some example output:
>> ```
>> 2019-04-01 16:04:51.629 [31304] iproto V> Corrupted row is: 3D A5 41 55 99 18 A8 08 C2 40 2B BA 50 12 F6 C6 0B 27 66 1C
>> 2019-04-01 16:04:51.629 [31304] iproto xrow.c:133 E> ER_INVALID_MSGPACK: Invalid MsgPack - packet header
>> ```
>> 
>> Related to #4040
>> ---
>> https://github.com/tarantool/tarantool/tree/sp/xrow-decode-verbose-err
>> https://github.com/tarantool/tarantool/issues/4040
>> 
>> Changes in v2:
>>  - make xrow_on_decode_err a macro instead of a
>>    function to preserve line where error occurs.
>>  - add a test case.
>>  - always allocate buffer for hex dump with malloc
>>  - add logging on all errors related to invalid
>>    msgpack and missing request field.
>> 
>> src/box/xrow.c            | 116 ++++++++++++++++++++++++++------------
>> test/box/net.box.result   |  34 +++++++++++
>> test/box/net.box.test.lua |  16 ++++++
>> 3 files changed, 131 insertions(+), 35 deletions(-)
>> 
>> diff --git a/src/box/xrow.c b/src/box/xrow.c
>> index 73d9e739b..fdd15bef3 100644
>> --- a/src/box/xrow.c
>> +++ b/src/box/xrow.c
>> @@ -88,15 +88,49 @@ mp_decode_vclock(const char **data, struct vclock *vclock)
>> 	return 0;
>> }
>> 
>> +/**
>> + * Set diagnostics with an error and log the corrupted row
>> + * which caused the error.
>> + * Optionally, if log_level is 'verbose' or greater,
>> + * dump the corrupted row contents in hex to the log.
>> + *
>> + * @param what - exception to set.
>> + * @param desc_str - error description string.
>> + */
>> +#define xrow_on_decode_err(_start, _end, _what, _desc_str) do {				\
>> +	diag_set(ClientError, _what, _desc_str);					\
>> +											\
>> +	if (!say_log_level_is_enabled(S_VERBOSE))					\
>> +		break;									\
>> +											\
>> +	size_t _len = 3 * ((const char *)(_end) - (const char *)(_start) + 1);		\
> 
> Nit: shouldn't +1 be outside the parentheses?

Yes, I guess I meant the interval to be inclusive.
But since it is not, the ‘+1’ should be outside.

> 
>> +	char *_buf = (char *)malloc(_len);						\
> 
> Nit: we don't usually prefix names with underscores, because this may
> conflict with libc internal macro definitions.

Ok. Do you want me to fix it?

> 
> Other than that this patch looks good to me. Do we need it in 1.10?

Yes, we need it in 1.10.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2] xrow: print corrupted rows on decoding error.
  2019-04-04 11:52   ` Serge Petrenko
@ 2019-04-04 12:06     ` Vladimir Davydov
  2019-04-04 12:29       ` Serge Petrenko
  0 siblings, 1 reply; 11+ messages in thread
From: Vladimir Davydov @ 2019-04-04 12:06 UTC (permalink / raw)
  To: Serge Petrenko; +Cc: tarantool-patches, Kirill Yukhin

On Thu, Apr 04, 2019 at 02:52:25PM +0300, Serge Petrenko wrote:
> 
> Hi!
> 
> > 3 апр. 2019 г., в 12:18, Vladimir Davydov <vdavydov.dev@gmail.com> написал(а):
> > 
> > On Mon, Apr 01, 2019 at 04:09:06PM +0300, Serge Petrenko wrote:
> >> Add row hex printing to log on verbose level. This would be useful
> >> during investigation of errors related to invalid msgpack packet
> >> arrival.
> >> 
> >> Here's some example output:
> >> ```
> >> 2019-04-01 16:04:51.629 [31304] iproto V> Corrupted row is: 3D A5 41 55 99 18 A8 08 C2 40 2B BA 50 12 F6 C6 0B 27 66 1C
> >> 2019-04-01 16:04:51.629 [31304] iproto xrow.c:133 E> ER_INVALID_MSGPACK: Invalid MsgPack - packet header
> >> ```
> >> 
> >> Related to #4040
> >> ---
> >> https://github.com/tarantool/tarantool/tree/sp/xrow-decode-verbose-err
> >> https://github.com/tarantool/tarantool/issues/4040
> >> 
> >> Changes in v2:
> >>  - make xrow_on_decode_err a macro instead of a
> >>    function to preserve line where error occurs.
> >>  - add a test case.
> >>  - always allocate buffer for hex dump with malloc
> >>  - add logging on all errors related to invalid
> >>    msgpack and missing request field.
> >> 
> >> src/box/xrow.c            | 116 ++++++++++++++++++++++++++------------
> >> test/box/net.box.result   |  34 +++++++++++
> >> test/box/net.box.test.lua |  16 ++++++
> >> 3 files changed, 131 insertions(+), 35 deletions(-)
> >> 
> >> diff --git a/src/box/xrow.c b/src/box/xrow.c
> >> index 73d9e739b..fdd15bef3 100644
> >> --- a/src/box/xrow.c
> >> +++ b/src/box/xrow.c
> >> @@ -88,15 +88,49 @@ mp_decode_vclock(const char **data, struct vclock *vclock)
> >> 	return 0;
> >> }
> >> 
> >> +/**
> >> + * Set diagnostics with an error and log the corrupted row
> >> + * which caused the error.
> >> + * Optionally, if log_level is 'verbose' or greater,
> >> + * dump the corrupted row contents in hex to the log.
> >> + *
> >> + * @param what - exception to set.
> >> + * @param desc_str - error description string.
> >> + */
> >> +#define xrow_on_decode_err(_start, _end, _what, _desc_str) do {				\
> >> +	diag_set(ClientError, _what, _desc_str);					\
> >> +											\
> >> +	if (!say_log_level_is_enabled(S_VERBOSE))					\
> >> +		break;									\
> >> +											\
> >> +	size_t _len = 3 * ((const char *)(_end) - (const char *)(_start) + 1);		\
> > 
> > Nit: shouldn't +1 be outside the parentheses?
> 
> Yes, I guess I meant the interval to be inclusive.
> But since it is not, the ‘+1’ should be outside.
> 
> > 
> >> +	char *_buf = (char *)malloc(_len);						\
> > 
> > Nit: we don't usually prefix names with underscores, because this may
> > conflict with libc internal macro definitions.
> 
> Ok. Do you want me to fix it?

Yes, please do and update the branch.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2] xrow: print corrupted rows on decoding error.
  2019-04-04 12:06     ` Vladimir Davydov
@ 2019-04-04 12:29       ` Serge Petrenko
  2019-04-04 14:51         ` Vladimir Davydov
  0 siblings, 1 reply; 11+ messages in thread
From: Serge Petrenko @ 2019-04-04 12:29 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: Kirill Yukhin, tarantool-patches

[-- Attachment #1: Type: text/plain, Size: 6296 bytes --]



> 4 апр. 2019 г., в 15:06, Vladimir Davydov <vdavydov.dev@gmail.com> написал(а):
> 
> On Thu, Apr 04, 2019 at 02:52:25PM +0300, Serge Petrenko wrote:
>> 
>> Hi!
>> 
>>> 3 апр. 2019 г., в 12:18, Vladimir Davydov <vdavydov.dev@gmail.com> написал(а):
>>> 
>>> On Mon, Apr 01, 2019 at 04:09:06PM +0300, Serge Petrenko wrote:
>>>> Add row hex printing to log on verbose level. This would be useful
>>>> during investigation of errors related to invalid msgpack packet
>>>> arrival.
>>>> 
>>>> Here's some example output:
>>>> ```
>>>> 2019-04-01 16:04:51.629 [31304] iproto V> Corrupted row is: 3D A5 41 55 99 18 A8 08 C2 40 2B BA 50 12 F6 C6 0B 27 66 1C
>>>> 2019-04-01 16:04:51.629 [31304] iproto xrow.c:133 E> ER_INVALID_MSGPACK: Invalid MsgPack - packet header
>>>> ```
>>>> 
>>>> Related to #4040
>>>> ---
>>>> https://github.com/tarantool/tarantool/tree/sp/xrow-decode-verbose-err
>>>> https://github.com/tarantool/tarantool/issues/4040
>>>> 
>>>> Changes in v2:
>>>> - make xrow_on_decode_err a macro instead of a
>>>>   function to preserve line where error occurs.
>>>> - add a test case.
>>>> - always allocate buffer for hex dump with malloc
>>>> - add logging on all errors related to invalid
>>>>   msgpack and missing request field.
>>>> 
>>>> src/box/xrow.c            | 116 ++++++++++++++++++++++++++------------
>>>> test/box/net.box.result   |  34 +++++++++++
>>>> test/box/net.box.test.lua |  16 ++++++
>>>> 3 files changed, 131 insertions(+), 35 deletions(-)
>>>> 
>>>> diff --git a/src/box/xrow.c b/src/box/xrow.c
>>>> index 73d9e739b..fdd15bef3 100644
>>>> --- a/src/box/xrow.c
>>>> +++ b/src/box/xrow.c
>>>> @@ -88,15 +88,49 @@ mp_decode_vclock(const char **data, struct vclock *vclock)
>>>> 	return 0;
>>>> }
>>>> 
>>>> +/**
>>>> + * Set diagnostics with an error and log the corrupted row
>>>> + * which caused the error.
>>>> + * Optionally, if log_level is 'verbose' or greater,
>>>> + * dump the corrupted row contents in hex to the log.
>>>> + *
>>>> + * @param what - exception to set.
>>>> + * @param desc_str - error description string.
>>>> + */
>>>> +#define xrow_on_decode_err(_start, _end, _what, _desc_str) do {				\
>>>> +	diag_set(ClientError, _what, _desc_str);					\
>>>> +											\
>>>> +	if (!say_log_level_is_enabled(S_VERBOSE))					\
>>>> +		break;									\
>>>> +											\
>>>> +	size_t _len = 3 * ((const char *)(_end) - (const char *)(_start) + 1);		\
>>> 
>>> Nit: shouldn't +1 be outside the parentheses?
>> 
>> Yes, I guess I meant the interval to be inclusive.
>> But since it is not, the ‘+1’ should be outside.
>> 
>>> 
>>>> +	char *_buf = (char *)malloc(_len);						\
>>> 
>>> Nit: we don't usually prefix names with underscores, because this may
>>> conflict with libc internal macro definitions.
>> 
>> Ok. Do you want me to fix it?
> 
> Yes, please do and update the branch.

Done, here’s the diff.

diff --git a/src/box/xrow.c b/src/box/xrow.c
index fdd15bef3..6aebe5c50 100644
--- a/src/box/xrow.c
+++ b/src/box/xrow.c
@@ -97,28 +97,28 @@ mp_decode_vclock(const char **data, struct vclock *vclock)
  * @param what - exception to set.
  * @param desc_str - error description string.
  */
-#define xrow_on_decode_err(_start, _end, _what, _desc_str) do {                                \
-       diag_set(ClientError, _what, _desc_str);                                        \
+#define xrow_on_decode_err(start, end, what, desc_str) do {                            \
+       diag_set(ClientError, what, desc_str);                                  \
                                                                                        \
        if (!say_log_level_is_enabled(S_VERBOSE))                                       \
                break;                                                                  \
                                                                                        \
-       size_t _len = 3 * ((const char *)(_end) - (const char *)(_start) + 1);          \
-       char *_buf = (char *)malloc(_len);                                              \
-       if (!_buf) {                                                                    \
+       size_t len = 3 * ((const char *)(end) - (const char *)(start) + 1);             \
+       char *buf = (char *)malloc(len);                                                \
+       if (!buf) {                                                                     \
                say_verbose("Got a corrupted row during decoding. "                     \
                            "Not enough memory to dump row contents.");                 \
                break;                                                                  \
        }                                                                               \
                                                                                        \
-       char *_pos = _buf;                                                              \
-       char *_buf_end = _buf + _len;                                                   \
-       for (const char *_cur = _start; _cur < _end; ++_cur) {                          \
-               _pos += snprintf(_pos, _buf_end - _pos, "%02X ", (unsigned char)*_cur); \
+       char *pos = buf;                                                                \
+       char *buf_end = buf + len;                                                      \
+       for (const char *cur = start; cur < end; ++cur) {                               \
+               pos += snprintf(pos, buf_end - pos, "%02X ", (unsigned char)*cur);      \
        }                                                                               \
-       say_verbose("Corrupted row is: %s", _buf);                                      \
+       say_verbose("Corrupted row is: %s", buf);                                       \
                                                                                        \
-       free(_buf);                                                                     \
+       free(buf);                                                                      \
 } while (0)
 
 int

[-- Attachment #2: Type: text/html, Size: 17066 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2] xrow: print corrupted rows on decoding error.
  2019-04-04 12:29       ` Serge Petrenko
@ 2019-04-04 14:51         ` Vladimir Davydov
  2019-04-05  6:18           ` Konstantin Osipov
  0 siblings, 1 reply; 11+ messages in thread
From: Vladimir Davydov @ 2019-04-04 14:51 UTC (permalink / raw)
  To: Serge Petrenko; +Cc: Kirill Yukhin, tarantool-patches

Pushed to master, 2.1, 1.10.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tarantool-patches] Re: [PATCH v2] xrow: print corrupted rows on decoding error.
  2019-04-04 14:51         ` Vladimir Davydov
@ 2019-04-05  6:18           ` Konstantin Osipov
  2019-04-05  8:06             ` Vladimir Davydov
  0 siblings, 1 reply; 11+ messages in thread
From: Konstantin Osipov @ 2019-04-05  6:18 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Serge Petrenko, Kirill Yukhin

* Vladimir Davydov <vdavydov.dev@gmail.com> [19/04/04 17:57]:
> Pushed to master, 2.1, 1.10.

This was fast.

First of all, we already have request_str() function, and there is
a good deal of code reuse that can take place.

This comment, basically, says it all.

But provided you had to write your own implementation:
- why use a macro?
- why use malloc?
- what's wrong with mp_snprint, and if the problem is that it
  doesn't properly escape non-printable characters, why not fix it
  to escape these or implement a version which does?

-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2] xrow: print corrupted rows on decoding error.
  2019-04-05  6:18           ` Konstantin Osipov
@ 2019-04-05  8:06             ` Vladimir Davydov
  2019-04-05 13:20               ` Konstantin Osipov
  0 siblings, 1 reply; 11+ messages in thread
From: Vladimir Davydov @ 2019-04-05  8:06 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches, Serge Petrenko, Kirill Yukhin

On Fri, Apr 05, 2019 at 09:18:46AM +0300, Konstantin Osipov wrote:
> * Vladimir Davydov <vdavydov.dev@gmail.com> [19/04/04 17:57]:
> > Pushed to master, 2.1, 1.10.
> 
> This was fast.

The first version of this patch was submitted for review more than two
weeks ago, on March 15. We haven't heard from you since then. Though you
didn't care.

> 
> First of all, we already have request_str() function, and there is
> a good deal of code reuse that can take place.

request_str() prints a well-formed request. Here we want to dump
corrupted msgpack.

> 
> This comment, basically, says it all.
> 
> But provided you had to write your own implementation:
> - why use a macro?

So that diag_set() and say() preserve file/line.

> - why use malloc?

Because a corrupted request can easily be longer than 256 bytes (that's
how many bytes fits in tt_static_buf). Since it's a slow path, which is
only executed on decode error provided S_VERBOSE log level is enabled,
I reckon using malloc() is fine.

> - what's wrong with mp_snprint, and if the problem is that it
>   doesn't properly escape non-printable characters, why not fix it
>   to escape these or implement a version which does?

Because data can be corrupted, e.g. an MP_ARRAY with N items in the
header having only M < N items in the body. We can't print such data
with mp_snprint.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2] xrow: print corrupted rows on decoding error.
  2019-04-05  8:06             ` Vladimir Davydov
@ 2019-04-05 13:20               ` Konstantin Osipov
  2019-04-05 13:43                 ` Vladimir Davydov
  0 siblings, 1 reply; 11+ messages in thread
From: Konstantin Osipov @ 2019-04-05 13:20 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: tarantool-patches, Serge Petrenko, Kirill Yukhin

* Vladimir Davydov <vdavydov.dev@gmail.com> [19/04/05 16:11]:
> > But provided you had to write your own implementation:
> > - why use a macro?
> 
> So that diag_set() and say() preserve file/line.

You inline the  macro multiple times. This makes the function body
longer -> CPU job more difficult. You could easily pass file and
line into the function, these are ansi c standard macros, __FILE__
and __LINE__
> 
> > - why use malloc?
> 
> Because a corrupted request can easily be longer than 256 bytes (that's
> how many bytes fits in tt_static_buf). Since it's a slow path, which is
> only executed on decode error provided S_VERBOSE log level is enabled,
> I reckon using malloc() is fine.

It's 1024 bytes.
What's the point of printing it as a single say() call, not as
multiple ones? 

Besides, there is a 4k buffer inside say which limits you anwyay.

> > - what's wrong with mp_snprint, and if the problem is that it
> >   doesn't properly escape non-printable characters, why not fix it
> >   to escape these or implement a version which does?
> 
> Because data can be corrupted, e.g. an MP_ARRAY with N items in the
> header having only M < N items in the body. We can't print such data
> with mp_snprint.


-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2] xrow: print corrupted rows on decoding error.
  2019-04-05 13:20               ` Konstantin Osipov
@ 2019-04-05 13:43                 ` Vladimir Davydov
  0 siblings, 0 replies; 11+ messages in thread
From: Vladimir Davydov @ 2019-04-05 13:43 UTC (permalink / raw)
  To: Konstantin Osipov, Serge Petrenko; +Cc: tarantool-patches, Kirill Yukhin

On Fri, Apr 05, 2019 at 04:20:15PM +0300, Konstantin Osipov wrote:
> * Vladimir Davydov <vdavydov.dev@gmail.com> [19/04/05 16:11]:
> > > But provided you had to write your own implementation:
> > > - why use a macro?
> > 
> > So that diag_set() and say() preserve file/line.
> 
> You inline the  macro multiple times. This makes the function body
> longer -> CPU job more difficult. You could easily pass file and
> line into the function, these are ansi c standard macros, __FILE__
> and __LINE__

You're right. Let's move print to a helper function then.

> > 
> > > - why use malloc?
> > 
> > Because a corrupted request can easily be longer than 256 bytes (that's
> > how many bytes fits in tt_static_buf). Since it's a slow path, which is
> > only executed on decode error provided S_VERBOSE log level is enabled,
> > I reckon using malloc() is fine.
> 
> It's 1024 bytes.
> What's the point of printing it as a single say() call, not as
> multiple ones? 
> 
> Besides, there is a 4k buffer inside say which limits you anwyay.

Makes sense.

Sergey, please prepare a follow-up patch to fix these issues.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-04-05 13:43 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-01 13:09 [PATCH v2] xrow: print corrupted rows on decoding error Serge Petrenko
2019-04-03  9:18 ` Vladimir Davydov
2019-04-03  9:41   ` [tarantool-patches] " Kirill Yukhin
2019-04-04 11:52   ` Serge Petrenko
2019-04-04 12:06     ` Vladimir Davydov
2019-04-04 12:29       ` Serge Petrenko
2019-04-04 14:51         ` Vladimir Davydov
2019-04-05  6:18           ` Konstantin Osipov
2019-04-05  8:06             ` Vladimir Davydov
2019-04-05 13:20               ` Konstantin Osipov
2019-04-05 13:43                 ` Vladimir Davydov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox