Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH v7] base64: fix decoder output buffer overrun (reads)
@ 2021-03-01 12:31 Sergey Nikiforov via Tarantool-patches
  2021-03-02 23:08 ` Alexander Turenko via Tarantool-patches
  0 siblings, 1 reply; 3+ messages in thread
From: Sergey Nikiforov via Tarantool-patches @ 2021-03-01 12:31 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Vladislav Shpilevoy, Alexander Turenko

Was caught by base64 test with enabled ASAN.

It also caused data corruption - garbage instead of "extra bits" was
saved into state->result if there was no space in output buffer.

Decode state removed along with helper functions.

Added test for "zero-sized output buffer" case.

Fixes: #3069
---

Branch: https://github.com/tarantool/tarantool/tree/void234/gh-3069-fix-base64-memory-overrun-v7
Issue: https://github.com/tarantool/tarantool/issues/3069

 test/unit/base64.c      |  16 +++++-
 test/unit/base64.result |   5 +-
 third_party/base64.c    | 123 +++++++++++++---------------------------
 3 files changed, 57 insertions(+), 87 deletions(-)

diff --git a/test/unit/base64.c b/test/unit/base64.c
index cc74f64d1..508877217 100644
--- a/test/unit/base64.c
+++ b/test/unit/base64.c
@@ -75,9 +75,22 @@ base64_invalid_chars_test(void)
 	check_plan();
 }
 
+static void
+base64_no_space_test(void)
+{
+	plan(1);
+
+	const char *const in = "sIIpHw==";
+	const int in_len = strlen(in);
+	const int rc = base64_decode(in, in_len, NULL, 0);
+	is(rc, 0, "no space in out buffer");
+
+	check_plan();
+}
+
 int main(int argc, char *argv[])
 {
-	plan(29);
+	plan(30);
 	header();
 
 	const char *option_tests[] = {
@@ -96,6 +109,7 @@ int main(int argc, char *argv[])
 	}
 
 	base64_invalid_chars_test();
+	base64_no_space_test();
 
 	footer();
 	return check_plan();
diff --git a/test/unit/base64.result b/test/unit/base64.result
index 3bc2c2275..495e2d0a2 100644
--- a/test/unit/base64.result
+++ b/test/unit/base64.result
@@ -1,4 +1,4 @@
-1..29
+1..30
 	*** main ***
     1..3
     ok 1 - length
@@ -178,4 +178,7 @@ ok 28 - subtests
     1..1
     ok 1 - ignoring invalid chars
 ok 29 - subtests
+    1..1
+    ok 1 - no space in out buffer
+ok 30 - subtests
 	*** main: done ***
diff --git a/third_party/base64.c b/third_party/base64.c
index 7c69315ea..8304adaa3 100644
--- a/third_party/base64.c
+++ b/third_party/base64.c
@@ -202,14 +202,6 @@ base64_encode(const char *in_bin, int in_len,
 
 /* {{{ decode */
 
-enum base64_decodestep { step_a, step_b, step_c, step_d };
-
-struct base64_decodestate
-{
-	enum base64_decodestep step;
-	char result;
-};
-
 static int
 base64_decode_value(int value)
 {
@@ -231,94 +223,55 @@ base64_decode_value(int value)
 	return decoding[codepos];
 }
 
-static inline void
-base64_decodestate_init(struct base64_decodestate *state)
-{
-	state->step = step_a;
-	state->result = 0;
-}
-
-static int
-base64_decode_block(const char *in_base64, int in_len,
-		    char *out_bin, int out_len,
-		    struct base64_decodestate *state)
+int
+base64_decode(const char *in_base64, int in_len,
+	      char *out_bin, int out_len)
 {
 	const char *in_pos = in_base64;
 	const char *in_end = in_base64 + in_len;
 	char *out_pos = out_bin;
 	char *out_end = out_bin + out_len;
 	int fragment;
+	char curr_byte;
 
-	*out_pos = state->result;
-
-	switch (state->step)
+	while (1)
 	{
-		while (1)
-		{
-	case step_a:
-			do {
-				if (in_pos == in_end || out_pos >= out_end)
-				{
-					state->step = step_a;
-					state->result = *out_pos;
-					return out_pos - out_bin;
-				}
-				fragment = base64_decode_value(*in_pos++);
-			} while (fragment < 0);
-			*out_pos    = (fragment & 0x03f) << 2;
-	case step_b:
-			do {
-				if (in_pos == in_end || out_pos >= out_end)
-				{
-					state->step = step_b;
-					state->result = *out_pos;
-					return out_pos - out_bin;
-				}
-				fragment = base64_decode_value(*in_pos++);
-			} while (fragment < 0);
-			*out_pos++ |= (fragment & 0x030) >> 4;
-			if (out_pos < out_end)
-				*out_pos = (fragment & 0x00f) << 4;
-	case step_c:
-			do {
-				if (in_pos == in_end || out_pos >= out_end)
-				{
-					state->step = step_c;
-					state->result = *out_pos;
-					return out_pos - out_bin;
-				}
-				fragment = base64_decode_value(*in_pos++);
-			} while (fragment < 0);
-			*out_pos++ |= (fragment & 0x03c) >> 2;
-			if (out_pos < out_end)
-				*out_pos = (fragment & 0x003) << 6;
-	case step_d:
-			do {
-				if (in_pos == in_end || out_pos >= out_end)
-				{
-					state->step = step_d;
-					state->result = *out_pos;
-					return out_pos - out_bin;
-				}
-				fragment = base64_decode_value(*in_pos++);
-			} while (fragment < 0);
-			*out_pos++   |= (fragment & 0x03f);
-		}
+		do {
+			if (in_pos == in_end || out_pos >= out_end)
+				return out_pos - out_bin;
+			fragment = base64_decode_value(*in_pos++);
+		} while (fragment < 0);
+		curr_byte = (fragment & 0x03f) << 2;
+		do {
+			if (in_pos == in_end || out_pos >= out_end)
+				return out_pos - out_bin;
+			fragment = base64_decode_value(*in_pos++);
+		} while (fragment < 0);
+		curr_byte |= (fragment & 0x030) >> 4;
+		*out_pos++ = curr_byte;
+		curr_byte = (fragment & 0x00f) << 4;
+		if (out_pos < out_end)
+			*out_pos = curr_byte;
+		do {
+			if (in_pos == in_end || out_pos >= out_end)
+				return out_pos - out_bin;
+			fragment = base64_decode_value(*in_pos++);
+		} while (fragment < 0);
+		curr_byte |= (fragment & 0x03c) >> 2;
+		*out_pos++ = curr_byte;
+		curr_byte = (fragment & 0x003) << 6;
+		if (out_pos < out_end)
+			*out_pos = curr_byte;
+		do {
+			if (in_pos == in_end || out_pos >= out_end)
+				return out_pos - out_bin;
+			fragment = base64_decode_value(*in_pos++);
+		} while (fragment < 0);
+		curr_byte |= (fragment & 0x03f);
+		*out_pos++ = curr_byte;
 	}
 	/* control should not reach here */
 	return out_pos - out_bin;
 }
 
-
-
-int
-base64_decode(const char *in_base64, int in_len,
-	      char *out_bin, int out_len)
-{
-	struct base64_decodestate state;
-	base64_decodestate_init(&state);
-	return base64_decode_block(in_base64, in_len,
-				   out_bin, out_len, &state);
-}
-
 /* }}} */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Tarantool-patches] [PATCH v7] base64: fix decoder output buffer overrun (reads)
  2021-03-01 12:31 [Tarantool-patches] [PATCH v7] base64: fix decoder output buffer overrun (reads) Sergey Nikiforov via Tarantool-patches
@ 2021-03-02 23:08 ` Alexander Turenko via Tarantool-patches
  2021-03-09 10:09   ` Sergey Nikiforov via Tarantool-patches
  0 siblings, 1 reply; 3+ messages in thread
From: Alexander Turenko via Tarantool-patches @ 2021-03-02 23:08 UTC (permalink / raw)
  To: Sergey Nikiforov; +Cc: tarantool-patches, Vladislav Shpilevoy

> -static int
> -base64_decode_block(const char *in_base64, int in_len,
> -		    char *out_bin, int out_len,
> -		    struct base64_decodestate *state)
> +int
> +base64_decode(const char *in_base64, int in_len,
> +	      char *out_bin, int out_len)
>  {
>  	const char *in_pos = in_base64;
>  	const char *in_end = in_base64 + in_len;
>  	char *out_pos = out_bin;
>  	char *out_end = out_bin + out_len;
>  	int fragment;
> +	char curr_byte;

AFAIR, nothing prevent us from using `*out_pos`. I don't see a reason to
introduce `curr_byte`. It was necessary in v6 to store the initial
state for a block.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Tarantool-patches] [PATCH v7] base64: fix decoder output buffer overrun (reads)
  2021-03-02 23:08 ` Alexander Turenko via Tarantool-patches
@ 2021-03-09 10:09   ` Sergey Nikiforov via Tarantool-patches
  0 siblings, 0 replies; 3+ messages in thread
From: Sergey Nikiforov via Tarantool-patches @ 2021-03-09 10:09 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: tarantool-patches, Vladislav Shpilevoy

On 03.03.2021 2:08, Alexander Turenko wrote:
>> -static int
>> -base64_decode_block(const char *in_base64, int in_len,
>> -		    char *out_bin, int out_len,
>> -		    struct base64_decodestate *state)
>> +int
>> +base64_decode(const char *in_base64, int in_len,
>> +	      char *out_bin, int out_len)
>>   {
>>   	const char *in_pos = in_base64;
>>   	const char *in_end = in_base64 + in_len;
>>   	char *out_pos = out_bin;
>>   	char *out_end = out_bin + out_len;
>>   	int fragment;
>> +	char curr_byte;
> 
> AFAIR, nothing prevent us from using `*out_pos`. I don't see a reason to
> introduce `curr_byte`. It was necessary in v6 to store the initial
> state for a block.

I have removed this local var in v8.

Initially I planned to send patch series with this local var added in 
second patch and performance-optimized (w/o unnecessarily checks) 
version of decode function in third but my benchmarks have shown 
controversial effects. Adding this local var "by itself" actually 
decreases performance on one of my test machines but improves on another 
one. Optimized decode function, however, benefits from this local var 
but not when output buffer is large enough. And effect is different on 
another machine...

I haved saved optimized code (nothing really new here) and benchmark 
locally to work on them further later on.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-03-09 10:09 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-01 12:31 [Tarantool-patches] [PATCH v7] base64: fix decoder output buffer overrun (reads) Sergey Nikiforov via Tarantool-patches
2021-03-02 23:08 ` Alexander Turenko via Tarantool-patches
2021-03-09 10:09   ` Sergey Nikiforov via Tarantool-patches

Tarantool development patches archive

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://lists.tarantool.org/tarantool-patches/0 tarantool-patches/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 tarantool-patches tarantool-patches/ https://lists.tarantool.org/tarantool-patches \
		tarantool-patches@dev.tarantool.org.
	public-inbox-index tarantool-patches

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git