* [Tarantool-patches] [PATCH v7] base64: fix decoder output buffer overrun (reads)
@ 2021-03-01 12:31 Sergey Nikiforov via Tarantool-patches
2021-03-02 23:08 ` Alexander Turenko via Tarantool-patches
0 siblings, 1 reply; 3+ messages in thread
From: Sergey Nikiforov via Tarantool-patches @ 2021-03-01 12:31 UTC (permalink / raw)
To: tarantool-patches; +Cc: Vladislav Shpilevoy, Alexander Turenko
Was caught by base64 test with enabled ASAN.
It also caused data corruption - garbage instead of "extra bits" was
saved into state->result if there was no space in output buffer.
Decode state removed along with helper functions.
Added test for "zero-sized output buffer" case.
Fixes: #3069
---
Branch: https://github.com/tarantool/tarantool/tree/void234/gh-3069-fix-base64-memory-overrun-v7
Issue: https://github.com/tarantool/tarantool/issues/3069
test/unit/base64.c | 16 +++++-
test/unit/base64.result | 5 +-
third_party/base64.c | 123 +++++++++++++---------------------------
3 files changed, 57 insertions(+), 87 deletions(-)
diff --git a/test/unit/base64.c b/test/unit/base64.c
index cc74f64d1..508877217 100644
--- a/test/unit/base64.c
+++ b/test/unit/base64.c
@@ -75,9 +75,22 @@ base64_invalid_chars_test(void)
check_plan();
}
+static void
+base64_no_space_test(void)
+{
+ plan(1);
+
+ const char *const in = "sIIpHw==";
+ const int in_len = strlen(in);
+ const int rc = base64_decode(in, in_len, NULL, 0);
+ is(rc, 0, "no space in out buffer");
+
+ check_plan();
+}
+
int main(int argc, char *argv[])
{
- plan(29);
+ plan(30);
header();
const char *option_tests[] = {
@@ -96,6 +109,7 @@ int main(int argc, char *argv[])
}
base64_invalid_chars_test();
+ base64_no_space_test();
footer();
return check_plan();
diff --git a/test/unit/base64.result b/test/unit/base64.result
index 3bc2c2275..495e2d0a2 100644
--- a/test/unit/base64.result
+++ b/test/unit/base64.result
@@ -1,4 +1,4 @@
-1..29
+1..30
*** main ***
1..3
ok 1 - length
@@ -178,4 +178,7 @@ ok 28 - subtests
1..1
ok 1 - ignoring invalid chars
ok 29 - subtests
+ 1..1
+ ok 1 - no space in out buffer
+ok 30 - subtests
*** main: done ***
diff --git a/third_party/base64.c b/third_party/base64.c
index 7c69315ea..8304adaa3 100644
--- a/third_party/base64.c
+++ b/third_party/base64.c
@@ -202,14 +202,6 @@ base64_encode(const char *in_bin, int in_len,
/* {{{ decode */
-enum base64_decodestep { step_a, step_b, step_c, step_d };
-
-struct base64_decodestate
-{
- enum base64_decodestep step;
- char result;
-};
-
static int
base64_decode_value(int value)
{
@@ -231,94 +223,55 @@ base64_decode_value(int value)
return decoding[codepos];
}
-static inline void
-base64_decodestate_init(struct base64_decodestate *state)
-{
- state->step = step_a;
- state->result = 0;
-}
-
-static int
-base64_decode_block(const char *in_base64, int in_len,
- char *out_bin, int out_len,
- struct base64_decodestate *state)
+int
+base64_decode(const char *in_base64, int in_len,
+ char *out_bin, int out_len)
{
const char *in_pos = in_base64;
const char *in_end = in_base64 + in_len;
char *out_pos = out_bin;
char *out_end = out_bin + out_len;
int fragment;
+ char curr_byte;
- *out_pos = state->result;
-
- switch (state->step)
+ while (1)
{
- while (1)
- {
- case step_a:
- do {
- if (in_pos == in_end || out_pos >= out_end)
- {
- state->step = step_a;
- state->result = *out_pos;
- return out_pos - out_bin;
- }
- fragment = base64_decode_value(*in_pos++);
- } while (fragment < 0);
- *out_pos = (fragment & 0x03f) << 2;
- case step_b:
- do {
- if (in_pos == in_end || out_pos >= out_end)
- {
- state->step = step_b;
- state->result = *out_pos;
- return out_pos - out_bin;
- }
- fragment = base64_decode_value(*in_pos++);
- } while (fragment < 0);
- *out_pos++ |= (fragment & 0x030) >> 4;
- if (out_pos < out_end)
- *out_pos = (fragment & 0x00f) << 4;
- case step_c:
- do {
- if (in_pos == in_end || out_pos >= out_end)
- {
- state->step = step_c;
- state->result = *out_pos;
- return out_pos - out_bin;
- }
- fragment = base64_decode_value(*in_pos++);
- } while (fragment < 0);
- *out_pos++ |= (fragment & 0x03c) >> 2;
- if (out_pos < out_end)
- *out_pos = (fragment & 0x003) << 6;
- case step_d:
- do {
- if (in_pos == in_end || out_pos >= out_end)
- {
- state->step = step_d;
- state->result = *out_pos;
- return out_pos - out_bin;
- }
- fragment = base64_decode_value(*in_pos++);
- } while (fragment < 0);
- *out_pos++ |= (fragment & 0x03f);
- }
+ do {
+ if (in_pos == in_end || out_pos >= out_end)
+ return out_pos - out_bin;
+ fragment = base64_decode_value(*in_pos++);
+ } while (fragment < 0);
+ curr_byte = (fragment & 0x03f) << 2;
+ do {
+ if (in_pos == in_end || out_pos >= out_end)
+ return out_pos - out_bin;
+ fragment = base64_decode_value(*in_pos++);
+ } while (fragment < 0);
+ curr_byte |= (fragment & 0x030) >> 4;
+ *out_pos++ = curr_byte;
+ curr_byte = (fragment & 0x00f) << 4;
+ if (out_pos < out_end)
+ *out_pos = curr_byte;
+ do {
+ if (in_pos == in_end || out_pos >= out_end)
+ return out_pos - out_bin;
+ fragment = base64_decode_value(*in_pos++);
+ } while (fragment < 0);
+ curr_byte |= (fragment & 0x03c) >> 2;
+ *out_pos++ = curr_byte;
+ curr_byte = (fragment & 0x003) << 6;
+ if (out_pos < out_end)
+ *out_pos = curr_byte;
+ do {
+ if (in_pos == in_end || out_pos >= out_end)
+ return out_pos - out_bin;
+ fragment = base64_decode_value(*in_pos++);
+ } while (fragment < 0);
+ curr_byte |= (fragment & 0x03f);
+ *out_pos++ = curr_byte;
}
/* control should not reach here */
return out_pos - out_bin;
}
-
-
-int
-base64_decode(const char *in_base64, int in_len,
- char *out_bin, int out_len)
-{
- struct base64_decodestate state;
- base64_decodestate_init(&state);
- return base64_decode_block(in_base64, in_len,
- out_bin, out_len, &state);
-}
-
/* }}} */
--
2.25.1
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Tarantool-patches] [PATCH v7] base64: fix decoder output buffer overrun (reads)
2021-03-01 12:31 [Tarantool-patches] [PATCH v7] base64: fix decoder output buffer overrun (reads) Sergey Nikiforov via Tarantool-patches
@ 2021-03-02 23:08 ` Alexander Turenko via Tarantool-patches
2021-03-09 10:09 ` Sergey Nikiforov via Tarantool-patches
0 siblings, 1 reply; 3+ messages in thread
From: Alexander Turenko via Tarantool-patches @ 2021-03-02 23:08 UTC (permalink / raw)
To: Sergey Nikiforov; +Cc: tarantool-patches, Vladislav Shpilevoy
> -static int
> -base64_decode_block(const char *in_base64, int in_len,
> - char *out_bin, int out_len,
> - struct base64_decodestate *state)
> +int
> +base64_decode(const char *in_base64, int in_len,
> + char *out_bin, int out_len)
> {
> const char *in_pos = in_base64;
> const char *in_end = in_base64 + in_len;
> char *out_pos = out_bin;
> char *out_end = out_bin + out_len;
> int fragment;
> + char curr_byte;
AFAIR, nothing prevent us from using `*out_pos`. I don't see a reason to
introduce `curr_byte`. It was necessary in v6 to store the initial
state for a block.
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Tarantool-patches] [PATCH v7] base64: fix decoder output buffer overrun (reads)
2021-03-02 23:08 ` Alexander Turenko via Tarantool-patches
@ 2021-03-09 10:09 ` Sergey Nikiforov via Tarantool-patches
0 siblings, 0 replies; 3+ messages in thread
From: Sergey Nikiforov via Tarantool-patches @ 2021-03-09 10:09 UTC (permalink / raw)
To: Alexander Turenko; +Cc: tarantool-patches, Vladislav Shpilevoy
On 03.03.2021 2:08, Alexander Turenko wrote:
>> -static int
>> -base64_decode_block(const char *in_base64, int in_len,
>> - char *out_bin, int out_len,
>> - struct base64_decodestate *state)
>> +int
>> +base64_decode(const char *in_base64, int in_len,
>> + char *out_bin, int out_len)
>> {
>> const char *in_pos = in_base64;
>> const char *in_end = in_base64 + in_len;
>> char *out_pos = out_bin;
>> char *out_end = out_bin + out_len;
>> int fragment;
>> + char curr_byte;
>
> AFAIR, nothing prevent us from using `*out_pos`. I don't see a reason to
> introduce `curr_byte`. It was necessary in v6 to store the initial
> state for a block.
I have removed this local var in v8.
Initially I planned to send patch series with this local var added in
second patch and performance-optimized (w/o unnecessarily checks)
version of decode function in third but my benchmarks have shown
controversial effects. Adding this local var "by itself" actually
decreases performance on one of my test machines but improves on another
one. Optimized decode function, however, benefits from this local var
but not when output buffer is large enough. And effect is different on
another machine...
I haved saved optimized code (nothing really new here) and benchmark
locally to work on them further later on.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-03-09 10:09 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-01 12:31 [Tarantool-patches] [PATCH v7] base64: fix decoder output buffer overrun (reads) Sergey Nikiforov via Tarantool-patches
2021-03-02 23:08 ` Alexander Turenko via Tarantool-patches
2021-03-09 10:09 ` Sergey Nikiforov via Tarantool-patches
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox