* [Tarantool-patches] [PATCH v2 1/2] fiber: set diagnostics at madvise/mprotect failure
2020-07-29 19:44 [Tarantool-patches] [PATCH v2 0/2] fiber: backport for stack madvise/mprotect errors handling Ilya Kosarev
@ 2020-07-29 19:44 ` Ilya Kosarev
2020-07-29 19:44 ` [Tarantool-patches] [PATCH v2 2/2] fiber: leak slab if unable to bring prots back Ilya Kosarev
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Ilya Kosarev @ 2020-07-29 19:44 UTC (permalink / raw)
To: gorcunov; +Cc: tarantool-patches
Both madvise and mprotect calls can fail due to various
reasons, mostly because of lack of free memory in the
system.
We log such cases via say_x helpers but this is not enough.
In particular tarantool/memcached relies on diag error to be
set to detect an error condition:
| expire_fiber = fiber_new(name, memcached_expire_loop);
| const box_error_t *err = box_error_last();
| if (err) {
| say_error("Can't start the expire fiber");
| say_error("%s", box_error_message(err));
| return -1;
| }
Thus lets use diag_set() helper here and instead of macros
use inline functions for better readability.
Closes #5211
Co-authored-by: Cyrill Gorcunov <gorcunov@gmail.com>
(backported from commit c67522973b32fae955835109d4f86eada8f67ae5)
---
src/errinj.h | 2 +
src/fiber.c | 83 ++++++++++++++++++++++++++++--------
test/box/errinj.result | 2 +
test/unit/CMakeLists.txt | 4 ++
test/unit/fiber_stack.c | 79 ++++++++++++++++++++++++++++++++++
test/unit/fiber_stack.result | 8 ++++
test/unit/suite.ini | 1 +
7 files changed, 162 insertions(+), 17 deletions(-)
create mode 100644 test/unit/fiber_stack.c
create mode 100644 test/unit/fiber_stack.result
diff --git a/src/errinj.h b/src/errinj.h
index 14cea11e9..77c8d2e3f 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -128,6 +128,8 @@ struct errinj {
_(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT, {.iparam = 0}) \
_(ERRINJ_INDEX_RESERVE, ERRINJ_BOOL, {.bparam = false})\
_(ERRINJ_VY_STMT_ALLOC, ERRINJ_INT, {.iparam = -1})\
+ _(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL, {.bparam = false})\
+ _(ERRINJ_FIBER_MPROTECT, ERRINJ_INT, {.iparam = -1})\
_(ERRINJ_VY_READ_VIEW_MERGE_FAIL, ERRINJ_BOOL, {.bparam = false})\
_(ERRINJ_VY_WRITE_ITERATOR_START_FAIL, ERRINJ_BOOL, {.bparam = false})\
_(ERRINJ_VY_RUN_OPEN, ERRINJ_INT, {.iparam = -1})\
diff --git a/src/fiber.c b/src/fiber.c
index d7ea9924a..a4124dfd1 100644
--- a/src/fiber.c
+++ b/src/fiber.c
@@ -41,6 +41,7 @@
#include "assoc.h"
#include "memory.h"
#include "trigger.h"
+#include "errinj.h"
#include "third_party/valgrind/memcheck.h"
@@ -66,22 +67,40 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
#define ASAN_FINISH_SWITCH_FIBER(var_name)
#endif
-#define fiber_madvise(addr, len, advice) \
-({ \
- int err = madvise((addr), (len), (advice)); \
- if (err) \
- say_syserror("madvise"); \
- err; \
-})
-
-#define fiber_mprotect(addr, len, prot) \
-({ \
- int err = mprotect((addr), (len), (prot)); \
- if (err) \
- say_syserror("mprotect"); \
- err; \
-})
+static inline int
+fiber_madvise(void *addr, size_t len, int advice)
+{
+ int rc = 0;
+
+ ERROR_INJECT(ERRINJ_FIBER_MADVISE, {
+ errno = ENOMEM;
+ rc = -1;
+ });
+
+ if (rc != 0 || madvise(addr, len, advice) != 0) {
+ diag_set(SystemError, "fiber madvise failed");
+ return -1;
+ }
+ return 0;
+}
+static inline int
+fiber_mprotect(void *addr, size_t len, int prot)
+{
+ int rc = 0;
+
+ struct errinj *inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+ if (inj != NULL && inj->iparam == prot) {
+ errno = ENOMEM;
+ rc = -1;
+ }
+
+ if (rc != 0 || mprotect(addr, len, prot) != 0) {
+ diag_set(SystemError, "fiber mprotect failed");
+ return -1;
+ }
+ return 0;
+}
/*
* Defines a handler to be executed on exit from cord's thread func,
* accessible via cord()->on_exit (normally NULL). It is used to
@@ -833,6 +852,12 @@ fiber_stack_recycle(struct fiber *fiber)
start = page_align_up(fiber->stack_watermark);
end = fiber->stack + fiber->stack_size;
}
+
+ /*
+ * Ignore errors on MADV_DONTNEED because this is
+ * just a hint for OS and not critical for
+ * functionality.
+ */
fiber_madvise(start, end - start, MADV_DONTNEED);
stack_put_watermark(fiber->stack_watermark);
}
@@ -851,7 +876,9 @@ fiber_stack_watermark_create(struct fiber *fiber)
/*
* We don't expect the whole stack usage in regular
- * loads, let's try to minimize rss pressure.
+ * loads, let's try to minimize rss pressure. But do
+ * not exit if MADV_DONTNEED failed, it is just a hint
+ * for OS, not critical one.
*/
fiber_madvise(fiber->stack, fiber->stack_size, MADV_DONTNEED);
@@ -889,6 +916,8 @@ fiber_stack_watermark_create(struct fiber *fiber)
static void
fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
{
+ static const int mprotect_flags = PROT_READ | PROT_WRITE;
+
if (fiber->stack != NULL) {
VALGRIND_STACK_DEREGISTER(fiber->stack_id);
#if ENABLE_ASAN
@@ -899,7 +928,22 @@ fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
guard = page_align_down(fiber->stack - page_size);
else
guard = page_align_up(fiber->stack + fiber->stack_size);
- fiber_mprotect(guard, page_size, PROT_READ | PROT_WRITE);
+
+ if (fiber_mprotect(guard, page_size, mprotect_flags) != 0) {
+ /*
+ * FIXME: We need some intelligent handling:
+ * say put this slab into a queue and retry
+ * to setup the original protection back in
+ * background.
+ *
+ * Note that in case if we're called from
+ * fiber_stack_create() the @a mprotect_flags is
+ * the same as the slab been created with, so
+ * calling mprotect for VMA with same flags
+ * won't fail.
+ */
+ diag_log();
+ }
slab_put(slabc, fiber->stack_slab);
}
}
@@ -946,6 +990,11 @@ fiber_stack_create(struct fiber *fiber, struct slab_cache *slabc,
fiber->stack_size);
if (fiber_mprotect(guard, page_size, PROT_NONE)) {
+ /*
+ * Write an error into the log since a guard
+ * page is critical for functionality.
+ */
+ diag_log();
fiber_stack_destroy(fiber, slabc);
return -1;
}
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 0b2cb7409..a4906547d 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -35,6 +35,8 @@ evals
- - ERRINJ_AUTO_UPGRADE: false
- ERRINJ_BUILD_INDEX: -1
- ERRINJ_DYN_MODULE_COUNT: 0
+ - ERRINJ_FIBER_MADVISE: false
+ - ERRINJ_FIBER_MPROTECT: -1
- ERRINJ_HTTPC_EXECUTE: false
- ERRINJ_HTTP_RESPONSE_ADD_WAIT: false
- ERRINJ_INDEX_ALLOC: false
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 6dfb10f46..cdafd785f 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -72,6 +72,10 @@ add_executable(fiber.test fiber.cc)
set_source_files_properties(fiber.cc PROPERTIES COMPILE_FLAGS -O0)
target_link_libraries(fiber.test core unit)
+add_executable(fiber_stack.test fiber_stack.c)
+set_source_files_properties(fiber_stack.c PROPERTIES COMPILE_FLAGS -O0)
+target_link_libraries(fiber_stack.test core unit)
+
if (NOT ENABLE_GCOV)
# This test is known to be broken with GCOV
add_executable(guard.test guard.cc)
diff --git a/test/unit/fiber_stack.c b/test/unit/fiber_stack.c
new file mode 100644
index 000000000..103dfaf0d
--- /dev/null
+++ b/test/unit/fiber_stack.c
@@ -0,0 +1,79 @@
+#include "memory.h"
+#include "fiber.h"
+#include "unit.h"
+#include "trivia/util.h"
+#include "errinj.h"
+
+static struct fiber_attr default_attr;
+
+static int
+noop_f(va_list ap)
+{
+ return 0;
+}
+
+static int
+main_f(va_list ap)
+{
+ struct errinj *inj;
+ struct fiber *fiber;
+
+ header();
+ plan(4);
+
+ /*
+ * Set non-default stack size to prevent reusing of an
+ * existing fiber.
+ */
+ struct fiber_attr *fiber_attr = fiber_attr_new();
+ fiber_attr_setstacksize(fiber_attr, default_attr.stack_size * 2);
+
+ /*
+ * Clear the fiber's diagnostics area to check that failed
+ * fiber_new() sets an error.
+ */
+ diag_clear(diag_get());
+
+ /*
+ * Check guard page setup via mprotect. We can't test the fiber
+ * destroy path since it clears fiber's diag.
+ */
+ inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+ inj->iparam = PROT_NONE;
+ fiber = fiber_new_ex("test_mprotect", fiber_attr, noop_f);
+ inj->iparam = -1;
+
+ ok(fiber == NULL, "mprotect: failed to setup fiber guard page");
+ ok(diag_get() != NULL, "mprotect: diag is armed after error");
+
+ /*
+ * Check madvise. We can't test the fiber destroy
+ * path since it is cleaning error.
+ */
+ diag_clear(diag_get());
+ inj = errinj(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL);
+ inj->bparam = true;
+ fiber = fiber_new_ex("test_madvise", fiber_attr, noop_f);
+ inj->bparam = false;
+
+ ok(fiber != NULL, "madvise: non critical error on madvise hint");
+ ok(diag_get() != NULL, "madvise: diag is armed after error");
+
+ footer();
+
+ ev_break(loop(), EVBREAK_ALL);
+ return check_plan();
+}
+
+int main()
+{
+ memory_init();
+ fiber_init(fiber_c_invoke);
+ fiber_attr_create(&default_attr);
+ struct fiber *f = fiber_new("main", main_f);
+ fiber_wakeup(f);
+ ev_run(loop(), 0);
+ fiber_free();
+ memory_free();
+ return 0;
+}
diff --git a/test/unit/fiber_stack.result b/test/unit/fiber_stack.result
new file mode 100644
index 000000000..43ff74b2f
--- /dev/null
+++ b/test/unit/fiber_stack.result
@@ -0,0 +1,8 @@
+SystemError fiber mprotect failed: Cannot allocate memory
+ *** main_f ***
+1..4
+ok 1 - mprotect: failed to setup fiber guard page
+ok 2 - mprotect: diag is armed after error
+ok 3 - madvise: non critical error on madvise hint
+ok 4 - madvise: diag is armed after error
+ *** main_f: done ***
diff --git a/test/unit/suite.ini b/test/unit/suite.ini
index 75c80ece1..b48eb9c76 100644
--- a/test/unit/suite.ini
+++ b/test/unit/suite.ini
@@ -1,4 +1,5 @@
[default]
core = unittest
description = unit tests
+release_disabled = fiber_stack.test
is_parallel = True
--
2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [Tarantool-patches] [PATCH v2 2/2] fiber: leak slab if unable to bring prots back
2020-07-29 19:44 [Tarantool-patches] [PATCH v2 0/2] fiber: backport for stack madvise/mprotect errors handling Ilya Kosarev
2020-07-29 19:44 ` [Tarantool-patches] [PATCH v2 1/2] fiber: set diagnostics at madvise/mprotect failure Ilya Kosarev
@ 2020-07-29 19:44 ` Ilya Kosarev
2020-07-29 19:57 ` [Tarantool-patches] [PATCH v2 0/2] fiber: backport for stack madvise/mprotect errors handling Cyrill Gorcunov
2020-08-04 7:28 ` Kirill Yukhin
3 siblings, 0 replies; 5+ messages in thread
From: Ilya Kosarev @ 2020-07-29 19:44 UTC (permalink / raw)
To: gorcunov; +Cc: tarantool-patches
In case if we unable to revert guard page back to
read|write we should never use such slab again.
Initially I thought of just put panic here and
exit but it is too destructive. I think better
print an error and continue. If node admin ignore
this message then one moment at future there won't
be slab left for use and creating new fibers get
prohibited.
In future (hopefully near one) we plan to drop
guard pages to prevent VMA fracturing and use
stack marks instead.
Relates to #5211
Co-authored-by: Cyrill Gorcunov <gorcunov@gmail.com>
(backported from commit 8d53fadc0cb15a45ea0cd461d2d5243be51b37e0)
---
src/fiber.c | 11 +++++++++--
test/unit/fiber_stack.c | 36 +++++++++++++++++++++++++++++++++---
test/unit/fiber_stack.result | 5 ++++-
3 files changed, 46 insertions(+), 6 deletions(-)
diff --git a/src/fiber.c b/src/fiber.c
index a4124dfd1..ecc59c783 100644
--- a/src/fiber.c
+++ b/src/fiber.c
@@ -936,15 +936,22 @@ fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
* to setup the original protection back in
* background.
*
+ * For now lets keep such slab referenced and
+ * leaked: if mprotect failed we must not allow
+ * to reuse such slab with PROT_NONE'ed page
+ * inside.
+ *
* Note that in case if we're called from
* fiber_stack_create() the @a mprotect_flags is
* the same as the slab been created with, so
* calling mprotect for VMA with same flags
* won't fail.
*/
- diag_log();
+ say_syserror("fiber: Can't put guard page to slab. "
+ "Leak %zu bytes", (size_t) fiber->stack_size);
+ } else {
+ slab_put(slabc, fiber->stack_slab);
}
- slab_put(slabc, fiber->stack_slab);
}
}
diff --git a/test/unit/fiber_stack.c b/test/unit/fiber_stack.c
index 103dfaf0d..41abe3764 100644
--- a/test/unit/fiber_stack.c
+++ b/test/unit/fiber_stack.c
@@ -15,11 +15,13 @@ noop_f(va_list ap)
static int
main_f(va_list ap)
{
+ struct slab_cache *slabc = &cord()->slabc;
+ size_t used_before, used_after;
struct errinj *inj;
struct fiber *fiber;
header();
- plan(4);
+ plan(6);
/*
* Set non-default stack size to prevent reusing of an
@@ -47,8 +49,7 @@ main_f(va_list ap)
ok(diag_get() != NULL, "mprotect: diag is armed after error");
/*
- * Check madvise. We can't test the fiber destroy
- * path since it is cleaning error.
+ * Check madvise error on fiber creation.
*/
diag_clear(diag_get());
inj = errinj(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL);
@@ -59,6 +60,35 @@ main_f(va_list ap)
ok(fiber != NULL, "madvise: non critical error on madvise hint");
ok(diag_get() != NULL, "madvise: diag is armed after error");
+ /*
+ * Check if we leak on fiber destruction.
+ * We will print an error and result get
+ * compared by testing engine.
+ */
+ fiber_attr_delete(fiber_attr);
+ fiber_attr = fiber_attr_new();
+ fiber_attr->flags |= FIBER_CUSTOM_STACK;
+ fiber_attr->stack_size = 64 << 10;
+
+ diag_clear(diag_get());
+
+ used_before = slabc->allocated.stats.used;
+
+ fiber = fiber_new_ex("test_madvise", fiber_attr, noop_f);
+ ok(fiber != NULL, "fiber with custom stack");
+ fiber_set_joinable(fiber, true);
+
+ inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+ inj->iparam = PROT_READ | PROT_WRITE;
+
+ fiber_start(fiber);
+ fiber_join(fiber);
+ inj->iparam = -1;
+
+ used_after = slabc->allocated.stats.used;
+ ok(used_after > used_before, "expected leak detected");
+
+ fiber_attr_delete(fiber_attr);
footer();
ev_break(loop(), EVBREAK_ALL);
diff --git a/test/unit/fiber_stack.result b/test/unit/fiber_stack.result
index 43ff74b2f..7cae4e96c 100644
--- a/test/unit/fiber_stack.result
+++ b/test/unit/fiber_stack.result
@@ -1,8 +1,11 @@
SystemError fiber mprotect failed: Cannot allocate memory
+fiber: Can't put guard page to slab. Leak 57344 bytes: Cannot allocate memory
*** main_f ***
-1..4
+1..6
ok 1 - mprotect: failed to setup fiber guard page
ok 2 - mprotect: diag is armed after error
ok 3 - madvise: non critical error on madvise hint
ok 4 - madvise: diag is armed after error
+ok 5 - fiber with custom stack
+ok 6 - expected leak detected
*** main_f: done ***
--
2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread