* [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads)
2020-12-25 10:41 [Tarantool-patches] [PATCH v5 0/2] base64: Fix decoder, improve its performance Sergey Nikiforov
@ 2020-12-25 10:41 ` Sergey Nikiforov
2020-12-25 13:00 ` Leonid Vasiliev
2020-12-25 10:41 ` [Tarantool-patches] [PATCH v5 2/2] base64: improve decoder performance Sergey Nikiforov
2020-12-25 13:08 ` [Tarantool-patches] [PATCH v5 0/2] base64: Fix decoder, improve its performance Leonid Vasiliev
2 siblings, 1 reply; 7+ messages in thread
From: Sergey Nikiforov @ 2020-12-25 10:41 UTC (permalink / raw)
To: tarantool-patches; +Cc: Vladislav Shpilevoy
Was caught by base64 test with enabled ASAN.
It also caused data corruption - garbage instead of "extra bits" was
saved into state->result if there was no space in output buffer.
Added test for "zero-sized output buffer" case.
Fixes: #3069
---
test/unit/base64.c | 13 ++++++++++++-
test/unit/base64.result | 3 ++-
third_party/base64.c | 28 +++++++++++++++++-----------
3 files changed, 31 insertions(+), 13 deletions(-)
diff --git a/test/unit/base64.c b/test/unit/base64.c
index ada497adf..c0ccf1321 100644
--- a/test/unit/base64.c
+++ b/test/unit/base64.c
@@ -58,9 +58,18 @@ base64_nowrap_test(const char *str)
base64_test(str, BASE64_NOWRAP, symbols, lengthof(symbols));
}
+static void
+base64_no_space_test(void)
+{
+ const char *const in = "sIIpHw==";
+ const int in_len = strlen(in);
+ const int rc = base64_decode(in, in_len, NULL, 0);
+ is(rc, 0, "no space in out buffer");
+}
+
int main(int argc, char *argv[])
{
- plan(28);
+ plan(29);
header();
const char *option_tests[] = {
@@ -78,6 +87,8 @@ int main(int argc, char *argv[])
base64_nowrap_test(option_tests[i]);
}
+ base64_no_space_test();
+
footer();
return check_plan();
}
diff --git a/test/unit/base64.result b/test/unit/base64.result
index cd1f2b3f6..71b3519bf 100644
--- a/test/unit/base64.result
+++ b/test/unit/base64.result
@@ -1,4 +1,4 @@
-1..28
+1..29
*** main ***
1..3
ok 1 - length
@@ -175,4 +175,5 @@ ok 27 - subtests
ok 3 - decode length ok
ok 4 - encode/decode
ok 28 - subtests
+ok 29 - no space in out buffer
*** main: done ***
diff --git a/third_party/base64.c b/third_party/base64.c
index 8ecab23eb..3350a98ff 100644
--- a/third_party/base64.c
+++ b/third_party/base64.c
@@ -247,8 +247,9 @@ base64_decode_block(const char *in_base64, int in_len,
char *out_pos = out_bin;
char *out_end = out_bin + out_len;
int fragment;
+ char curr_byte;
- *out_pos = state->result;
+ curr_byte = state->result;
switch (state->step)
{
@@ -259,49 +260,54 @@ base64_decode_block(const char *in_base64, int in_len,
if (in_pos == in_end || out_pos >= out_end)
{
state->step = step_a;
- state->result = *out_pos;
+ state->result = curr_byte;
return out_pos - out_bin;
}
fragment = base64_decode_value(*in_pos++);
} while (fragment < 0);
- *out_pos = (fragment & 0x03f) << 2;
+ curr_byte = (fragment & 0x03f) << 2;
case step_b:
do {
if (in_pos == in_end || out_pos >= out_end)
{
state->step = step_b;
- state->result = *out_pos;
+ state->result = curr_byte;
return out_pos - out_bin;
}
fragment = base64_decode_value(*in_pos++);
} while (fragment < 0);
- *out_pos++ |= (fragment & 0x030) >> 4;
+ curr_byte |= (fragment & 0x030) >> 4;
+ *out_pos++ = curr_byte;
+ curr_byte = (fragment & 0x00f) << 4;
if (out_pos < out_end)
- *out_pos = (fragment & 0x00f) << 4;
+ *out_pos = curr_byte;
case step_c:
do {
if (in_pos == in_end || out_pos >= out_end)
{
state->step = step_c;
- state->result = *out_pos;
+ state->result = curr_byte;
return out_pos - out_bin;
}
fragment = base64_decode_value(*in_pos++);
} while (fragment < 0);
- *out_pos++ |= (fragment & 0x03c) >> 2;
+ curr_byte |= (fragment & 0x03c) >> 2;
+ *out_pos++ = curr_byte;
+ curr_byte = (fragment & 0x003) << 6;
if (out_pos < out_end)
- *out_pos = (fragment & 0x003) << 6;
+ *out_pos = curr_byte;
case step_d:
do {
if (in_pos == in_end || out_pos >= out_end)
{
state->step = step_d;
- state->result = *out_pos;
+ state->result = curr_byte;
return out_pos - out_bin;
}
fragment = base64_decode_value(*in_pos++);
} while (fragment < 0);
- *out_pos++ |= (fragment & 0x03f);
+ curr_byte |= (fragment & 0x03f);
+ *out_pos++ = curr_byte;
}
}
/* control should not reach here */
--
2.25.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads)
2020-12-25 10:41 ` [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads) Sergey Nikiforov
@ 2020-12-25 13:00 ` Leonid Vasiliev
0 siblings, 0 replies; 7+ messages in thread
From: Leonid Vasiliev @ 2020-12-25 13:00 UTC (permalink / raw)
To: Sergey Nikiforov, tarantool-patches; +Cc: Vladislav Shpilevoy
Hi! Thank you for the patch.
LGTM.
See 1 comment below:
On 25.12.2020 13:41, Sergey Nikiforov wrote:
> Was caught by base64 test with enabled ASAN.
>
> It also caused data corruption - garbage instead of "extra bits" was
> saved into state->result if there was no space in output buffer.
>
> Added test for "zero-sized output buffer" case.
>
> Fixes: #3069
> ---
> test/unit/base64.c | 13 ++++++++++++-
> test/unit/base64.result | 3 ++-
> third_party/base64.c | 28 +++++++++++++++++-----------
> 3 files changed, 31 insertions(+), 13 deletions(-)
>
> diff --git a/test/unit/base64.c b/test/unit/base64.c
> index ada497adf..c0ccf1321 100644
> --- a/test/unit/base64.c
> +++ b/test/unit/base64.c
> @@ -58,9 +58,18 @@ base64_nowrap_test(const char *str)
> base64_test(str, BASE64_NOWRAP, symbols, lengthof(symbols));
> }
>
> +static void
> +base64_no_space_test(void)
> +{
Add `plan (1)` to the beginning of the function and `check_plan ()` to
the end to treat it as a separate subtest.
> + const char *const in = "sIIpHw==";
> + const int in_len = strlen(in);
> + const int rc = base64_decode(in, in_len, NULL, 0);
> + is(rc, 0, "no space in out buffer");
> +}
> +
> int main(int argc, char *argv[])
> {
> - plan(28);
> + plan(29);
> header();
>
> const char *option_tests[] = {
> @@ -78,6 +87,8 @@ int main(int argc, char *argv[])
> base64_nowrap_test(option_tests[i]);
> }
>
> + base64_no_space_test();
> +
> footer();
> return check_plan();
> }
> diff --git a/test/unit/base64.result b/test/unit/base64.result
> index cd1f2b3f6..71b3519bf 100644
> --- a/test/unit/base64.result
> +++ b/test/unit/base64.result
> @@ -1,4 +1,4 @@
> -1..28
> +1..29
> *** main ***
> 1..3
> ok 1 - length
> @@ -175,4 +175,5 @@ ok 27 - subtests
> ok 3 - decode length ok
> ok 4 - encode/decode
> ok 28 - subtests
> +ok 29 - no space in out buffer
> *** main: done ***
> diff --git a/third_party/base64.c b/third_party/base64.c
> index 8ecab23eb..3350a98ff 100644
> --- a/third_party/base64.c
> +++ b/third_party/base64.c
> @@ -247,8 +247,9 @@ base64_decode_block(const char *in_base64, int in_len,
> char *out_pos = out_bin;
> char *out_end = out_bin + out_len;
> int fragment;
> + char curr_byte;
>
> - *out_pos = state->result;
> + curr_byte = state->result;
>
> switch (state->step)
> {
> @@ -259,49 +260,54 @@ base64_decode_block(const char *in_base64, int in_len,
> if (in_pos == in_end || out_pos >= out_end)
> {
> state->step = step_a;
> - state->result = *out_pos;
> + state->result = curr_byte;
> return out_pos - out_bin;
> }
> fragment = base64_decode_value(*in_pos++);
> } while (fragment < 0);
> - *out_pos = (fragment & 0x03f) << 2;
> + curr_byte = (fragment & 0x03f) << 2;
> case step_b:
> do {
> if (in_pos == in_end || out_pos >= out_end)
> {
> state->step = step_b;
> - state->result = *out_pos;
> + state->result = curr_byte;
> return out_pos - out_bin;
> }
> fragment = base64_decode_value(*in_pos++);
> } while (fragment < 0);
> - *out_pos++ |= (fragment & 0x030) >> 4;
> + curr_byte |= (fragment & 0x030) >> 4;
> + *out_pos++ = curr_byte;
> + curr_byte = (fragment & 0x00f) << 4;
> if (out_pos < out_end)
> - *out_pos = (fragment & 0x00f) << 4;
> + *out_pos = curr_byte;
> case step_c:
> do {
> if (in_pos == in_end || out_pos >= out_end)
> {
> state->step = step_c;
> - state->result = *out_pos;
> + state->result = curr_byte;
> return out_pos - out_bin;
> }
> fragment = base64_decode_value(*in_pos++);
> } while (fragment < 0);
> - *out_pos++ |= (fragment & 0x03c) >> 2;
> + curr_byte |= (fragment & 0x03c) >> 2;
> + *out_pos++ = curr_byte;
> + curr_byte = (fragment & 0x003) << 6;
> if (out_pos < out_end)
> - *out_pos = (fragment & 0x003) << 6;
> + *out_pos = curr_byte;
> case step_d:
> do {
> if (in_pos == in_end || out_pos >= out_end)
> {
> state->step = step_d;
> - state->result = *out_pos;
> + state->result = curr_byte;
> return out_pos - out_bin;
> }
> fragment = base64_decode_value(*in_pos++);
> } while (fragment < 0);
> - *out_pos++ |= (fragment & 0x03f);
> + curr_byte |= (fragment & 0x03f);
> + *out_pos++ = curr_byte;
> }
> }
> /* control should not reach here */
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Tarantool-patches] [PATCH v5 2/2] base64: improve decoder performance
2020-12-25 10:41 [Tarantool-patches] [PATCH v5 0/2] base64: Fix decoder, improve its performance Sergey Nikiforov
2020-12-25 10:41 ` [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads) Sergey Nikiforov
@ 2020-12-25 10:41 ` Sergey Nikiforov
2020-12-25 13:01 ` Leonid Vasiliev
2020-12-25 13:08 ` [Tarantool-patches] [PATCH v5 0/2] base64: Fix decoder, improve its performance Leonid Vasiliev
2 siblings, 1 reply; 7+ messages in thread
From: Sergey Nikiforov @ 2020-12-25 10:41 UTC (permalink / raw)
To: tarantool-patches; +Cc: Vladislav Shpilevoy
Unnecessary checks were removed from internal loops.
Benchmark shows that performance is now ~1.19 times higher
(release build, Intel Core I7-9700K, only one thread).
---
third_party/base64.c | 35 ++++++++++++++++++++++++++---------
1 file changed, 26 insertions(+), 9 deletions(-)
diff --git a/third_party/base64.c b/third_party/base64.c
index 3350a98ff..2b2b61ba4 100644
--- a/third_party/base64.c
+++ b/third_party/base64.c
@@ -257,10 +257,10 @@ base64_decode_block(const char *in_base64, int in_len,
{
case step_a:
do {
- if (in_pos == in_end || out_pos >= out_end)
+ if (in_pos >= in_end)
{
state->step = step_a;
- state->result = curr_byte;
+ /* curr_byte is useless now. */
return out_pos - out_bin;
}
fragment = base64_decode_value(*in_pos++);
@@ -268,7 +268,7 @@ base64_decode_block(const char *in_base64, int in_len,
curr_byte = (fragment & 0x03f) << 2;
case step_b:
do {
- if (in_pos == in_end || out_pos >= out_end)
+ if (in_pos >= in_end)
{
state->step = step_b;
state->result = curr_byte;
@@ -276,14 +276,19 @@ base64_decode_block(const char *in_base64, int in_len,
}
fragment = base64_decode_value(*in_pos++);
} while (fragment < 0);
+ if (out_pos >= out_end)
+ {
+ /* We are losing some data. */
+ state->step = step_b;
+ state->result = curr_byte;
+ return out_pos - out_bin;
+ }
curr_byte |= (fragment & 0x030) >> 4;
*out_pos++ = curr_byte;
curr_byte = (fragment & 0x00f) << 4;
- if (out_pos < out_end)
- *out_pos = curr_byte;
case step_c:
do {
- if (in_pos == in_end || out_pos >= out_end)
+ if (in_pos >= in_end)
{
state->step = step_c;
state->result = curr_byte;
@@ -291,14 +296,19 @@ base64_decode_block(const char *in_base64, int in_len,
}
fragment = base64_decode_value(*in_pos++);
} while (fragment < 0);
+ if (out_pos >= out_end)
+ {
+ /* We are losing some data. */
+ state->step = step_c;
+ state->result = curr_byte;
+ return out_pos - out_bin;
+ }
curr_byte |= (fragment & 0x03c) >> 2;
*out_pos++ = curr_byte;
curr_byte = (fragment & 0x003) << 6;
- if (out_pos < out_end)
- *out_pos = curr_byte;
case step_d:
do {
- if (in_pos == in_end || out_pos >= out_end)
+ if (in_pos >= in_end)
{
state->step = step_d;
state->result = curr_byte;
@@ -306,6 +316,13 @@ base64_decode_block(const char *in_base64, int in_len,
}
fragment = base64_decode_value(*in_pos++);
} while (fragment < 0);
+ if (out_pos >= out_end)
+ {
+ /* We are losing some data. */
+ state->step = step_d;
+ state->result = curr_byte;
+ return out_pos - out_bin;
+ }
curr_byte |= (fragment & 0x03f);
*out_pos++ = curr_byte;
}
--
2.25.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Tarantool-patches] [PATCH v5 2/2] base64: improve decoder performance
2020-12-25 10:41 ` [Tarantool-patches] [PATCH v5 2/2] base64: improve decoder performance Sergey Nikiforov
@ 2020-12-25 13:01 ` Leonid Vasiliev
0 siblings, 0 replies; 7+ messages in thread
From: Leonid Vasiliev @ 2020-12-25 13:01 UTC (permalink / raw)
To: Sergey Nikiforov, tarantool-patches; +Cc: Vladislav Shpilevoy
Hi! Thank you for the patch.
LGTM.
On 25.12.2020 13:41, Sergey Nikiforov wrote:
> Unnecessary checks were removed from internal loops.
> Benchmark shows that performance is now ~1.19 times higher
> (release build, Intel Core I7-9700K, only one thread).
> ---
> third_party/base64.c | 35 ++++++++++++++++++++++++++---------
> 1 file changed, 26 insertions(+), 9 deletions(-)
>
> diff --git a/third_party/base64.c b/third_party/base64.c
> index 3350a98ff..2b2b61ba4 100644
> --- a/third_party/base64.c
> +++ b/third_party/base64.c
> @@ -257,10 +257,10 @@ base64_decode_block(const char *in_base64, int in_len,
> {
> case step_a:
> do {
> - if (in_pos == in_end || out_pos >= out_end)
> + if (in_pos >= in_end)
> {
> state->step = step_a;
> - state->result = curr_byte;
> + /* curr_byte is useless now. */
> return out_pos - out_bin;
> }
> fragment = base64_decode_value(*in_pos++);
> @@ -268,7 +268,7 @@ base64_decode_block(const char *in_base64, int in_len,
> curr_byte = (fragment & 0x03f) << 2;
> case step_b:
> do {
> - if (in_pos == in_end || out_pos >= out_end)
> + if (in_pos >= in_end)
> {
> state->step = step_b;
> state->result = curr_byte;
> @@ -276,14 +276,19 @@ base64_decode_block(const char *in_base64, int in_len,
> }
> fragment = base64_decode_value(*in_pos++);
> } while (fragment < 0);
> + if (out_pos >= out_end)
> + {
> + /* We are losing some data. */
> + state->step = step_b;
> + state->result = curr_byte;
> + return out_pos - out_bin;
> + }
> curr_byte |= (fragment & 0x030) >> 4;
> *out_pos++ = curr_byte;
> curr_byte = (fragment & 0x00f) << 4;
> - if (out_pos < out_end)
> - *out_pos = curr_byte;
> case step_c:
> do {
> - if (in_pos == in_end || out_pos >= out_end)
> + if (in_pos >= in_end)
> {
> state->step = step_c;
> state->result = curr_byte;
> @@ -291,14 +296,19 @@ base64_decode_block(const char *in_base64, int in_len,
> }
> fragment = base64_decode_value(*in_pos++);
> } while (fragment < 0);
> + if (out_pos >= out_end)
> + {
> + /* We are losing some data. */
> + state->step = step_c;
> + state->result = curr_byte;
> + return out_pos - out_bin;
> + }
> curr_byte |= (fragment & 0x03c) >> 2;
> *out_pos++ = curr_byte;
> curr_byte = (fragment & 0x003) << 6;
> - if (out_pos < out_end)
> - *out_pos = curr_byte;
> case step_d:
> do {
> - if (in_pos == in_end || out_pos >= out_end)
> + if (in_pos >= in_end)
> {
> state->step = step_d;
> state->result = curr_byte;
> @@ -306,6 +316,13 @@ base64_decode_block(const char *in_base64, int in_len,
> }
> fragment = base64_decode_value(*in_pos++);
> } while (fragment < 0);
> + if (out_pos >= out_end)
> + {
> + /* We are losing some data. */
> + state->step = step_d;
> + state->result = curr_byte;
> + return out_pos - out_bin;
> + }
> curr_byte |= (fragment & 0x03f);
> *out_pos++ = curr_byte;
> }
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Tarantool-patches] [PATCH v5 0/2] base64: Fix decoder, improve its performance
2020-12-25 10:41 [Tarantool-patches] [PATCH v5 0/2] base64: Fix decoder, improve its performance Sergey Nikiforov
2020-12-25 10:41 ` [Tarantool-patches] [PATCH v5 1/2] base64: fix decoder output buffer overrun (reads) Sergey Nikiforov
2020-12-25 10:41 ` [Tarantool-patches] [PATCH v5 2/2] base64: improve decoder performance Sergey Nikiforov
@ 2020-12-25 13:08 ` Leonid Vasiliev
2 siblings, 0 replies; 7+ messages in thread
From: Leonid Vasiliev @ 2020-12-25 13:08 UTC (permalink / raw)
To: Sergey Nikiforov, tarantool-patches; +Cc: Vladislav Shpilevoy, Sergey Nikiforov
Hi!
The patchset LGTM.
I think it can be sent to A. Turenko to recive the second LGTM.
Add links to issue and branch.
On 25.12.2020 13:41, Sergey Nikiforov wrote:
> From: Sergey Nikiforov <void234@gmail.com>
>
> First patch fixes #3069 and adds test for zero-sized output buffer,
> second one improves base64 decoder performance
>
> Sergey Nikiforov (2):
> base64: fix decoder output buffer overrun (reads)
> base64: improve decoder performance
>
> test/unit/base64.c | 13 +++++++++-
> test/unit/base64.result | 3 ++-
> third_party/base64.c | 57 +++++++++++++++++++++++++++++------------
> 3 files changed, 54 insertions(+), 19 deletions(-)
>
^ permalink raw reply [flat|nested] 7+ messages in thread