Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH v5 0/2] base64: Fix decoder, improve its performance
@ 2020-12-25 14:09 Sergey Nikiforov
  2020-12-25 14:09 ` [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads) Sergey Nikiforov
  2020-12-25 14:09 ` [Tarantool-patches] [PATCH v5 2/2] base64: improve decoder performance Sergey Nikiforov
  0 siblings, 2 replies; 5+ messages in thread
From: Sergey Nikiforov @ 2020-12-25 14:09 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Vladislav Shpilevoy, Alexander Turenko

First patch fixes #3069 and adds test for zero-sized output buffer,
second one improves base64 decoder performance

Issue: https://github.com/tarantool/tarantool/issues/3069
Branch: https://github.com/tarantool/tarantool/tree/void234/gh-3069-fix-base64-memory-overrun-v5

Sergey Nikiforov (2):
  base64: fix decoder output buffer overrun (reads)
  base64: improve decoder performance

 test/unit/base64.c      | 13 +++++++++-
 test/unit/base64.result |  3 ++-
 third_party/base64.c    | 57 +++++++++++++++++++++++++++++------------
 3 files changed, 54 insertions(+), 19 deletions(-)

-- 
2.25.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads)
  2020-12-25 14:09 [Tarantool-patches] [PATCH v5 0/2] base64: Fix decoder, improve its performance Sergey Nikiforov
@ 2020-12-25 14:09 ` Sergey Nikiforov
  2020-12-25 14:09 ` [Tarantool-patches] [PATCH v5 2/2] base64: improve decoder performance Sergey Nikiforov
  1 sibling, 0 replies; 5+ messages in thread
From: Sergey Nikiforov @ 2020-12-25 14:09 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Vladislav Shpilevoy, Alexander Turenko

Was caught by base64 test with enabled ASAN.

It also caused data corruption - garbage instead of "extra bits" was
saved into state->result if there was no space in output buffer.

Added test for "zero-sized output buffer" case.

Fixes: #3069
---

Issue: https://github.com/tarantool/tarantool/issues/3069
Branch: https://github.com/tarantool/tarantool/tree/void234/gh-3069-fix-base64-memory-overrun-v5

 test/unit/base64.c      | 13 ++++++++++++-
 test/unit/base64.result |  3 ++-
 third_party/base64.c    | 28 +++++++++++++++++-----------
 3 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/test/unit/base64.c b/test/unit/base64.c
index ada497adf..c0ccf1321 100644
--- a/test/unit/base64.c
+++ b/test/unit/base64.c
@@ -58,9 +58,18 @@ base64_nowrap_test(const char *str)
 	base64_test(str, BASE64_NOWRAP, symbols, lengthof(symbols));
 }
 
+static void
+base64_no_space_test(void)
+{
+	const char *const in = "sIIpHw==";
+	const int in_len = strlen(in);
+	const int rc = base64_decode(in, in_len, NULL, 0);
+	is(rc, 0, "no space in out buffer");
+}
+
 int main(int argc, char *argv[])
 {
-	plan(28);
+	plan(29);
 	header();
 
 	const char *option_tests[] = {
@@ -78,6 +87,8 @@ int main(int argc, char *argv[])
 		base64_nowrap_test(option_tests[i]);
 	}
 
+	base64_no_space_test();
+
 	footer();
 	return check_plan();
 }
diff --git a/test/unit/base64.result b/test/unit/base64.result
index cd1f2b3f6..71b3519bf 100644
--- a/test/unit/base64.result
+++ b/test/unit/base64.result
@@ -1,4 +1,4 @@
-1..28
+1..29
 	*** main ***
     1..3
     ok 1 - length
@@ -175,4 +175,5 @@ ok 27 - subtests
     ok 3 - decode length ok
     ok 4 - encode/decode
 ok 28 - subtests
+ok 29 - no space in out buffer
 	*** main: done ***
diff --git a/third_party/base64.c b/third_party/base64.c
index 8ecab23eb..3350a98ff 100644
--- a/third_party/base64.c
+++ b/third_party/base64.c
@@ -247,8 +247,9 @@ base64_decode_block(const char *in_base64, int in_len,
 	char *out_pos = out_bin;
 	char *out_end = out_bin + out_len;
 	int fragment;
+	char curr_byte;
 
-	*out_pos = state->result;
+	curr_byte = state->result;
 
 	switch (state->step)
 	{
@@ -259,49 +260,54 @@ base64_decode_block(const char *in_base64, int in_len,
 				if (in_pos == in_end || out_pos >= out_end)
 				{
 					state->step = step_a;
-					state->result = *out_pos;
+					state->result = curr_byte;
 					return out_pos - out_bin;
 				}
 				fragment = base64_decode_value(*in_pos++);
 			} while (fragment < 0);
-			*out_pos    = (fragment & 0x03f) << 2;
+			curr_byte = (fragment & 0x03f) << 2;
 	case step_b:
 			do {
 				if (in_pos == in_end || out_pos >= out_end)
 				{
 					state->step = step_b;
-					state->result = *out_pos;
+					state->result = curr_byte;
 					return out_pos - out_bin;
 				}
 				fragment = base64_decode_value(*in_pos++);
 			} while (fragment < 0);
-			*out_pos++ |= (fragment & 0x030) >> 4;
+			curr_byte |= (fragment & 0x030) >> 4;
+			*out_pos++ = curr_byte;
+			curr_byte = (fragment & 0x00f) << 4;
 			if (out_pos < out_end)
-				*out_pos = (fragment & 0x00f) << 4;
+				*out_pos = curr_byte;
 	case step_c:
 			do {
 				if (in_pos == in_end || out_pos >= out_end)
 				{
 					state->step = step_c;
-					state->result = *out_pos;
+					state->result = curr_byte;
 					return out_pos - out_bin;
 				}
 				fragment = base64_decode_value(*in_pos++);
 			} while (fragment < 0);
-			*out_pos++ |= (fragment & 0x03c) >> 2;
+			curr_byte |= (fragment & 0x03c) >> 2;
+			*out_pos++ = curr_byte;
+			curr_byte = (fragment & 0x003) << 6;
 			if (out_pos < out_end)
-				*out_pos = (fragment & 0x003) << 6;
+				*out_pos = curr_byte;
 	case step_d:
 			do {
 				if (in_pos == in_end || out_pos >= out_end)
 				{
 					state->step = step_d;
-					state->result = *out_pos;
+					state->result = curr_byte;
 					return out_pos - out_bin;
 				}
 				fragment = base64_decode_value(*in_pos++);
 			} while (fragment < 0);
-			*out_pos++   |= (fragment & 0x03f);
+			curr_byte |= (fragment & 0x03f);
+			*out_pos++ = curr_byte;
 		}
 	}
 	/* control should not reach here */
-- 
2.25.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Tarantool-patches] [PATCH v5 2/2] base64: improve decoder performance
  2020-12-25 14:09 [Tarantool-patches] [PATCH v5 0/2] base64: Fix decoder, improve its performance Sergey Nikiforov
  2020-12-25 14:09 ` [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads) Sergey Nikiforov
@ 2020-12-25 14:09 ` Sergey Nikiforov
  1 sibling, 0 replies; 5+ messages in thread
From: Sergey Nikiforov @ 2020-12-25 14:09 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Vladislav Shpilevoy, Alexander Turenko

Unnecessary checks were removed from internal loops.
Benchmark shows that performance is now ~1.19 times higher
(release build, Intel Core I7-9700K, only one thread).
---

Branch: https://github.com/tarantool/tarantool/tree/void234/gh-3069-fix-base64-memory-overrun-v5

 third_party/base64.c | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/third_party/base64.c b/third_party/base64.c
index 3350a98ff..2b2b61ba4 100644
--- a/third_party/base64.c
+++ b/third_party/base64.c
@@ -257,10 +257,10 @@ base64_decode_block(const char *in_base64, int in_len,
 		{
 	case step_a:
 			do {
-				if (in_pos == in_end || out_pos >= out_end)
+				if (in_pos >= in_end)
 				{
 					state->step = step_a;
-					state->result = curr_byte;
+					/* curr_byte is useless now. */
 					return out_pos - out_bin;
 				}
 				fragment = base64_decode_value(*in_pos++);
@@ -268,7 +268,7 @@ base64_decode_block(const char *in_base64, int in_len,
 			curr_byte = (fragment & 0x03f) << 2;
 	case step_b:
 			do {
-				if (in_pos == in_end || out_pos >= out_end)
+				if (in_pos >= in_end)
 				{
 					state->step = step_b;
 					state->result = curr_byte;
@@ -276,14 +276,19 @@ base64_decode_block(const char *in_base64, int in_len,
 				}
 				fragment = base64_decode_value(*in_pos++);
 			} while (fragment < 0);
+			if (out_pos >= out_end)
+			{
+				/* We are losing some data. */
+				state->step = step_b;
+				state->result = curr_byte;
+				return out_pos - out_bin;
+			}
 			curr_byte |= (fragment & 0x030) >> 4;
 			*out_pos++ = curr_byte;
 			curr_byte = (fragment & 0x00f) << 4;
-			if (out_pos < out_end)
-				*out_pos = curr_byte;
 	case step_c:
 			do {
-				if (in_pos == in_end || out_pos >= out_end)
+				if (in_pos >= in_end)
 				{
 					state->step = step_c;
 					state->result = curr_byte;
@@ -291,14 +296,19 @@ base64_decode_block(const char *in_base64, int in_len,
 				}
 				fragment = base64_decode_value(*in_pos++);
 			} while (fragment < 0);
+			if (out_pos >= out_end)
+			{
+				/* We are losing some data. */
+				state->step = step_c;
+				state->result = curr_byte;
+				return out_pos - out_bin;
+			}
 			curr_byte |= (fragment & 0x03c) >> 2;
 			*out_pos++ = curr_byte;
 			curr_byte = (fragment & 0x003) << 6;
-			if (out_pos < out_end)
-				*out_pos = curr_byte;
 	case step_d:
 			do {
-				if (in_pos == in_end || out_pos >= out_end)
+				if (in_pos >= in_end)
 				{
 					state->step = step_d;
 					state->result = curr_byte;
@@ -306,6 +316,13 @@ base64_decode_block(const char *in_base64, int in_len,
 				}
 				fragment = base64_decode_value(*in_pos++);
 			} while (fragment < 0);
+			if (out_pos >= out_end)
+			{
+				/* We are losing some data. */
+				state->step = step_d;
+				state->result = curr_byte;
+				return out_pos - out_bin;
+			}
 			curr_byte |= (fragment & 0x03f);
 			*out_pos++ = curr_byte;
 		}
-- 
2.25.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads)
  2020-12-25 10:41 ` [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads) Sergey Nikiforov
@ 2020-12-25 13:00   ` Leonid Vasiliev
  0 siblings, 0 replies; 5+ messages in thread
From: Leonid Vasiliev @ 2020-12-25 13:00 UTC (permalink / raw)
  To: Sergey Nikiforov, tarantool-patches; +Cc: Vladislav Shpilevoy

Hi! Thank you for the patch.
LGTM.
See 1 comment below:

On 25.12.2020 13:41, Sergey Nikiforov wrote:
> Was caught by base64 test with enabled ASAN.
> 
> It also caused data corruption - garbage instead of "extra bits" was
> saved into state->result if there was no space in output buffer.
> 
> Added test for "zero-sized output buffer" case.
> 
> Fixes: #3069
> ---
>   test/unit/base64.c      | 13 ++++++++++++-
>   test/unit/base64.result |  3 ++-
>   third_party/base64.c    | 28 +++++++++++++++++-----------
>   3 files changed, 31 insertions(+), 13 deletions(-)
> 
> diff --git a/test/unit/base64.c b/test/unit/base64.c
> index ada497adf..c0ccf1321 100644
> --- a/test/unit/base64.c
> +++ b/test/unit/base64.c
> @@ -58,9 +58,18 @@ base64_nowrap_test(const char *str)
>   	base64_test(str, BASE64_NOWRAP, symbols, lengthof(symbols));
>   }
>   
> +static void
> +base64_no_space_test(void)
> +{

Add `plan (1)` to the beginning of the function and `check_plan ()` to
the end to treat it as a separate subtest.

> +	const char *const in = "sIIpHw==";
> +	const int in_len = strlen(in);
> +	const int rc = base64_decode(in, in_len, NULL, 0);
> +	is(rc, 0, "no space in out buffer");
> +}
> +
>   int main(int argc, char *argv[])
>   {
> -	plan(28);
> +	plan(29);
>   	header();
>   
>   	const char *option_tests[] = {
> @@ -78,6 +87,8 @@ int main(int argc, char *argv[])
>   		base64_nowrap_test(option_tests[i]);
>   	}
>   
> +	base64_no_space_test();
> +
>   	footer();
>   	return check_plan();
>   }
> diff --git a/test/unit/base64.result b/test/unit/base64.result
> index cd1f2b3f6..71b3519bf 100644
> --- a/test/unit/base64.result
> +++ b/test/unit/base64.result
> @@ -1,4 +1,4 @@
> -1..28
> +1..29
>   	*** main ***
>       1..3
>       ok 1 - length
> @@ -175,4 +175,5 @@ ok 27 - subtests
>       ok 3 - decode length ok
>       ok 4 - encode/decode
>   ok 28 - subtests
> +ok 29 - no space in out buffer
>   	*** main: done ***
> diff --git a/third_party/base64.c b/third_party/base64.c
> index 8ecab23eb..3350a98ff 100644
> --- a/third_party/base64.c
> +++ b/third_party/base64.c
> @@ -247,8 +247,9 @@ base64_decode_block(const char *in_base64, int in_len,
>   	char *out_pos = out_bin;
>   	char *out_end = out_bin + out_len;
>   	int fragment;
> +	char curr_byte;
>   
> -	*out_pos = state->result;
> +	curr_byte = state->result;
>   
>   	switch (state->step)
>   	{
> @@ -259,49 +260,54 @@ base64_decode_block(const char *in_base64, int in_len,
>   				if (in_pos == in_end || out_pos >= out_end)
>   				{
>   					state->step = step_a;
> -					state->result = *out_pos;
> +					state->result = curr_byte;
>   					return out_pos - out_bin;
>   				}
>   				fragment = base64_decode_value(*in_pos++);
>   			} while (fragment < 0);
> -			*out_pos    = (fragment & 0x03f) << 2;
> +			curr_byte = (fragment & 0x03f) << 2;
>   	case step_b:
>   			do {
>   				if (in_pos == in_end || out_pos >= out_end)
>   				{
>   					state->step = step_b;
> -					state->result = *out_pos;
> +					state->result = curr_byte;
>   					return out_pos - out_bin;
>   				}
>   				fragment = base64_decode_value(*in_pos++);
>   			} while (fragment < 0);
> -			*out_pos++ |= (fragment & 0x030) >> 4;
> +			curr_byte |= (fragment & 0x030) >> 4;
> +			*out_pos++ = curr_byte;
> +			curr_byte = (fragment & 0x00f) << 4;
>   			if (out_pos < out_end)
> -				*out_pos = (fragment & 0x00f) << 4;
> +				*out_pos = curr_byte;
>   	case step_c:
>   			do {
>   				if (in_pos == in_end || out_pos >= out_end)
>   				{
>   					state->step = step_c;
> -					state->result = *out_pos;
> +					state->result = curr_byte;
>   					return out_pos - out_bin;
>   				}
>   				fragment = base64_decode_value(*in_pos++);
>   			} while (fragment < 0);
> -			*out_pos++ |= (fragment & 0x03c) >> 2;
> +			curr_byte |= (fragment & 0x03c) >> 2;
> +			*out_pos++ = curr_byte;
> +			curr_byte = (fragment & 0x003) << 6;
>   			if (out_pos < out_end)
> -				*out_pos = (fragment & 0x003) << 6;
> +				*out_pos = curr_byte;
>   	case step_d:
>   			do {
>   				if (in_pos == in_end || out_pos >= out_end)
>   				{
>   					state->step = step_d;
> -					state->result = *out_pos;
> +					state->result = curr_byte;
>   					return out_pos - out_bin;
>   				}
>   				fragment = base64_decode_value(*in_pos++);
>   			} while (fragment < 0);
> -			*out_pos++   |= (fragment & 0x03f);
> +			curr_byte |= (fragment & 0x03f);
> +			*out_pos++ = curr_byte;
>   		}
>   	}
>   	/* control should not reach here */
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads)
  2020-12-25 10:41 [Tarantool-patches] [PATCH v5 0/2] base64: Fix decoder, improve its performance Sergey Nikiforov
@ 2020-12-25 10:41 ` Sergey Nikiforov
  2020-12-25 13:00   ` Leonid Vasiliev
  0 siblings, 1 reply; 5+ messages in thread
From: Sergey Nikiforov @ 2020-12-25 10:41 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Vladislav Shpilevoy

Was caught by base64 test with enabled ASAN.

It also caused data corruption - garbage instead of "extra bits" was
saved into state->result if there was no space in output buffer.

Added test for "zero-sized output buffer" case.

Fixes: #3069
---
 test/unit/base64.c      | 13 ++++++++++++-
 test/unit/base64.result |  3 ++-
 third_party/base64.c    | 28 +++++++++++++++++-----------
 3 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/test/unit/base64.c b/test/unit/base64.c
index ada497adf..c0ccf1321 100644
--- a/test/unit/base64.c
+++ b/test/unit/base64.c
@@ -58,9 +58,18 @@ base64_nowrap_test(const char *str)
 	base64_test(str, BASE64_NOWRAP, symbols, lengthof(symbols));
 }
 
+static void
+base64_no_space_test(void)
+{
+	const char *const in = "sIIpHw==";
+	const int in_len = strlen(in);
+	const int rc = base64_decode(in, in_len, NULL, 0);
+	is(rc, 0, "no space in out buffer");
+}
+
 int main(int argc, char *argv[])
 {
-	plan(28);
+	plan(29);
 	header();
 
 	const char *option_tests[] = {
@@ -78,6 +87,8 @@ int main(int argc, char *argv[])
 		base64_nowrap_test(option_tests[i]);
 	}
 
+	base64_no_space_test();
+
 	footer();
 	return check_plan();
 }
diff --git a/test/unit/base64.result b/test/unit/base64.result
index cd1f2b3f6..71b3519bf 100644
--- a/test/unit/base64.result
+++ b/test/unit/base64.result
@@ -1,4 +1,4 @@
-1..28
+1..29
 	*** main ***
     1..3
     ok 1 - length
@@ -175,4 +175,5 @@ ok 27 - subtests
     ok 3 - decode length ok
     ok 4 - encode/decode
 ok 28 - subtests
+ok 29 - no space in out buffer
 	*** main: done ***
diff --git a/third_party/base64.c b/third_party/base64.c
index 8ecab23eb..3350a98ff 100644
--- a/third_party/base64.c
+++ b/third_party/base64.c
@@ -247,8 +247,9 @@ base64_decode_block(const char *in_base64, int in_len,
 	char *out_pos = out_bin;
 	char *out_end = out_bin + out_len;
 	int fragment;
+	char curr_byte;
 
-	*out_pos = state->result;
+	curr_byte = state->result;
 
 	switch (state->step)
 	{
@@ -259,49 +260,54 @@ base64_decode_block(const char *in_base64, int in_len,
 				if (in_pos == in_end || out_pos >= out_end)
 				{
 					state->step = step_a;
-					state->result = *out_pos;
+					state->result = curr_byte;
 					return out_pos - out_bin;
 				}
 				fragment = base64_decode_value(*in_pos++);
 			} while (fragment < 0);
-			*out_pos    = (fragment & 0x03f) << 2;
+			curr_byte = (fragment & 0x03f) << 2;
 	case step_b:
 			do {
 				if (in_pos == in_end || out_pos >= out_end)
 				{
 					state->step = step_b;
-					state->result = *out_pos;
+					state->result = curr_byte;
 					return out_pos - out_bin;
 				}
 				fragment = base64_decode_value(*in_pos++);
 			} while (fragment < 0);
-			*out_pos++ |= (fragment & 0x030) >> 4;
+			curr_byte |= (fragment & 0x030) >> 4;
+			*out_pos++ = curr_byte;
+			curr_byte = (fragment & 0x00f) << 4;
 			if (out_pos < out_end)
-				*out_pos = (fragment & 0x00f) << 4;
+				*out_pos = curr_byte;
 	case step_c:
 			do {
 				if (in_pos == in_end || out_pos >= out_end)
 				{
 					state->step = step_c;
-					state->result = *out_pos;
+					state->result = curr_byte;
 					return out_pos - out_bin;
 				}
 				fragment = base64_decode_value(*in_pos++);
 			} while (fragment < 0);
-			*out_pos++ |= (fragment & 0x03c) >> 2;
+			curr_byte |= (fragment & 0x03c) >> 2;
+			*out_pos++ = curr_byte;
+			curr_byte = (fragment & 0x003) << 6;
 			if (out_pos < out_end)
-				*out_pos = (fragment & 0x003) << 6;
+				*out_pos = curr_byte;
 	case step_d:
 			do {
 				if (in_pos == in_end || out_pos >= out_end)
 				{
 					state->step = step_d;
-					state->result = *out_pos;
+					state->result = curr_byte;
 					return out_pos - out_bin;
 				}
 				fragment = base64_decode_value(*in_pos++);
 			} while (fragment < 0);
-			*out_pos++   |= (fragment & 0x03f);
+			curr_byte |= (fragment & 0x03f);
+			*out_pos++ = curr_byte;
 		}
 	}
 	/* control should not reach here */
-- 
2.25.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-12-25 14:09 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-25 14:09 [Tarantool-patches] [PATCH v5 0/2] base64: Fix decoder, improve its performance Sergey Nikiforov
2020-12-25 14:09 ` [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads) Sergey Nikiforov
2020-12-25 14:09 ` [Tarantool-patches] [PATCH v5 2/2] base64: improve decoder performance Sergey Nikiforov
  -- strict thread matches above, loose matches on Subject: below --
2020-12-25 10:41 [Tarantool-patches] [PATCH v5 0/2] base64: Fix decoder, improve its performance Sergey Nikiforov
2020-12-25 10:41 ` [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads) Sergey Nikiforov
2020-12-25 13:00   ` Leonid Vasiliev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox