[Tarantool-patches] [PATCH v8 1/2] fiber: set diagnostics at madvise/mprotect failure
Cyrill Gorcunov
gorcunov at gmail.com
Fri Feb 14 16:22:19 MSK 2020
Both madvise and mprotect calls can fail due to various
reasons, mostly because of lack of free memory in the
system.
We log such cases via say_x helpers but this is not enough.
In particular tarantool/memcached relies on diag error to be
set to detect an error condition:
| expire_fiber = fiber_new(name, memcached_expire_loop);
| const box_error_t *err = box_error_last();
| if (err) {
| say_error("Can't start the expire fiber");
| say_error("%s", box_error_message(err));
| return -1;
| }
Thus lets use diag_set() helper here and instead of macros
use inline functions for better readability.
Fixes #4722
Reported-by: Alexander Turenko <alexander.turenko at tarantool.org>
Reviewed-by: Alexander Turenko <alexander.turenko at tarantool.org>
Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
src/lib/core/errinj.h | 2 +
src/lib/core/fiber.c | 84 ++++++++++++++++++++++++++++--------
test/box/errinj.result | 2 +
test/unit/CMakeLists.txt | 4 ++
test/unit/fiber_stack.c | 79 +++++++++++++++++++++++++++++++++
test/unit/fiber_stack.result | 8 ++++
test/unit/suite.ini | 2 +-
7 files changed, 163 insertions(+), 18 deletions(-)
create mode 100644 test/unit/fiber_stack.c
create mode 100644 test/unit/fiber_stack.result
diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
index 672da2119..ed0cba903 100644
--- a/src/lib/core/errinj.h
+++ b/src/lib/core/errinj.h
@@ -135,6 +135,8 @@ struct errinj {
_(ERRINJ_COIO_SENDFILE_CHUNK, ERRINJ_INT, {.iparam = -1}) \
_(ERRINJ_SWIM_FD_ONLY, ERRINJ_BOOL, {.bparam = false}) \
_(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT, {.iparam = 0}) \
+ _(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL, {.bparam = false}) \
+ _(ERRINJ_FIBER_MPROTECT, ERRINJ_INT, {.iparam = -1})
ENUM0(errinj_id, ERRINJ_LIST);
extern struct errinj errinjs[];
diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
index 00ae8cded..f10687b29 100644
--- a/src/lib/core/fiber.c
+++ b/src/lib/core/fiber.c
@@ -41,6 +41,7 @@
#include "assoc.h"
#include "memory.h"
#include "trigger.h"
+#include "errinj.h"
#if ENABLE_FIBER_TOP
#include <x86intrin.h> /* __rdtscp() */
@@ -173,21 +174,40 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
#define ASAN_FINISH_SWITCH_FIBER(var_name)
#endif
-#define fiber_madvise(addr, len, advice) \
-({ \
- int err = madvise((addr), (len), (advice)); \
- if (err) \
- say_syserror("madvise"); \
- err; \
-})
-
-#define fiber_mprotect(addr, len, prot) \
-({ \
- int err = mprotect((addr), (len), (prot)); \
- if (err) \
- say_syserror("mprotect"); \
- err; \
-})
+static inline int
+fiber_madvise(void *addr, size_t len, int advice)
+{
+ int rc = 0;
+
+ ERROR_INJECT(ERRINJ_FIBER_MADVISE, {
+ errno = ENOMEM;
+ rc = -1;
+ });
+
+ if (rc != 0 || madvise(addr, len, advice) != 0) {
+ diag_set(SystemError, "fiber madvise failed");
+ return -1;
+ }
+ return 0;
+}
+
+static inline int
+fiber_mprotect(void *addr, size_t len, int prot)
+{
+ int rc = 0;
+
+ struct errinj *inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+ if (inj != NULL && inj->iparam == prot) {
+ errno = ENOMEM;
+ rc = -1;
+ }
+
+ if (rc != 0 || mprotect(addr, len, prot) != 0) {
+ diag_set(SystemError, "fiber mprotect failed");
+ return -1;
+ }
+ return 0;
+}
#if ENABLE_FIBER_TOP
static __thread bool fiber_top_enabled = false;
@@ -951,6 +971,12 @@ fiber_stack_recycle(struct fiber *fiber)
start = page_align_up(fiber->stack_watermark);
end = fiber->stack + fiber->stack_size;
}
+
+ /*
+ * Ignore errors on MADV_DONTNEED because this is
+ * just a hint for OS and not critical for
+ * functionality.
+ */
fiber_madvise(start, end - start, MADV_DONTNEED);
stack_put_watermark(fiber->stack_watermark);
}
@@ -969,7 +995,9 @@ fiber_stack_watermark_create(struct fiber *fiber)
/*
* We don't expect the whole stack usage in regular
- * loads, let's try to minimize rss pressure.
+ * loads, let's try to minimize rss pressure. But do
+ * not exit if MADV_DONTNEED failed, it is just a hint
+ * for OS, not critical one.
*/
fiber_madvise(fiber->stack, fiber->stack_size, MADV_DONTNEED);
@@ -1007,6 +1035,8 @@ fiber_stack_watermark_create(struct fiber *fiber)
static void
fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
{
+ static const int mprotect_flags = PROT_READ | PROT_WRITE;
+
if (fiber->stack != NULL) {
VALGRIND_STACK_DEREGISTER(fiber->stack_id);
#if ENABLE_ASAN
@@ -1017,7 +1047,22 @@ fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
guard = page_align_down(fiber->stack - page_size);
else
guard = page_align_up(fiber->stack + fiber->stack_size);
- fiber_mprotect(guard, page_size, PROT_READ | PROT_WRITE);
+
+ if (fiber_mprotect(guard, page_size, mprotect_flags) != 0) {
+ /*
+ * FIXME: We need some intelligent handling:
+ * say put this slab into a queue and retry
+ * to setup the original protection back in
+ * background.
+ *
+ * Note that in case if we're called from
+ * fiber_stack_create() the @a mprotect_flags is
+ * the same as the slab been created with, so
+ * calling mprotect for VMA with same flags
+ * won't fail.
+ */
+ diag_log();
+ }
slab_put(slabc, fiber->stack_slab);
}
}
@@ -1064,6 +1109,11 @@ fiber_stack_create(struct fiber *fiber, struct slab_cache *slabc,
fiber->stack_size);
if (fiber_mprotect(guard, page_size, PROT_NONE)) {
+ /*
+ * Write an error into the log since a guard
+ * page is critical for functionality.
+ */
+ diag_log();
fiber_stack_destroy(fiber, slabc);
return -1;
}
diff --git a/test/box/errinj.result b/test/box/errinj.result
index f043c6689..daa27ed24 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -48,6 +48,8 @@ evals
- ERRINJ_CHECK_FORMAT_DELAY: false
- ERRINJ_COIO_SENDFILE_CHUNK: -1
- ERRINJ_DYN_MODULE_COUNT: 0
+ - ERRINJ_FIBER_MADVISE: false
+ - ERRINJ_FIBER_MPROTECT: -1
- ERRINJ_HTTPC_EXECUTE: false
- ERRINJ_HTTP_RESPONSE_ADD_WAIT: false
- ERRINJ_INDEX_ALLOC: false
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 4a57597e9..c037ac539 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -74,6 +74,10 @@ add_executable(fiber.test fiber.cc)
set_source_files_properties(fiber.cc PROPERTIES COMPILE_FLAGS -O0)
target_link_libraries(fiber.test core unit)
+add_executable(fiber_stack.test fiber_stack.c)
+set_source_files_properties(fiber_stack.c PROPERTIES COMPILE_FLAGS -O0)
+target_link_libraries(fiber_stack.test core unit)
+
if (NOT ENABLE_GCOV)
# This test is known to be broken with GCOV
add_executable(guard.test guard.cc)
diff --git a/test/unit/fiber_stack.c b/test/unit/fiber_stack.c
new file mode 100644
index 000000000..103dfaf0d
--- /dev/null
+++ b/test/unit/fiber_stack.c
@@ -0,0 +1,79 @@
+#include "memory.h"
+#include "fiber.h"
+#include "unit.h"
+#include "trivia/util.h"
+#include "errinj.h"
+
+static struct fiber_attr default_attr;
+
+static int
+noop_f(va_list ap)
+{
+ return 0;
+}
+
+static int
+main_f(va_list ap)
+{
+ struct errinj *inj;
+ struct fiber *fiber;
+
+ header();
+ plan(4);
+
+ /*
+ * Set non-default stack size to prevent reusing of an
+ * existing fiber.
+ */
+ struct fiber_attr *fiber_attr = fiber_attr_new();
+ fiber_attr_setstacksize(fiber_attr, default_attr.stack_size * 2);
+
+ /*
+ * Clear the fiber's diagnostics area to check that failed
+ * fiber_new() sets an error.
+ */
+ diag_clear(diag_get());
+
+ /*
+ * Check guard page setup via mprotect. We can't test the fiber
+ * destroy path since it clears fiber's diag.
+ */
+ inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+ inj->iparam = PROT_NONE;
+ fiber = fiber_new_ex("test_mprotect", fiber_attr, noop_f);
+ inj->iparam = -1;
+
+ ok(fiber == NULL, "mprotect: failed to setup fiber guard page");
+ ok(diag_get() != NULL, "mprotect: diag is armed after error");
+
+ /*
+ * Check madvise. We can't test the fiber destroy
+ * path since it is cleaning error.
+ */
+ diag_clear(diag_get());
+ inj = errinj(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL);
+ inj->bparam = true;
+ fiber = fiber_new_ex("test_madvise", fiber_attr, noop_f);
+ inj->bparam = false;
+
+ ok(fiber != NULL, "madvise: non critical error on madvise hint");
+ ok(diag_get() != NULL, "madvise: diag is armed after error");
+
+ footer();
+
+ ev_break(loop(), EVBREAK_ALL);
+ return check_plan();
+}
+
+int main()
+{
+ memory_init();
+ fiber_init(fiber_c_invoke);
+ fiber_attr_create(&default_attr);
+ struct fiber *f = fiber_new("main", main_f);
+ fiber_wakeup(f);
+ ev_run(loop(), 0);
+ fiber_free();
+ memory_free();
+ return 0;
+}
diff --git a/test/unit/fiber_stack.result b/test/unit/fiber_stack.result
new file mode 100644
index 000000000..43ff74b2f
--- /dev/null
+++ b/test/unit/fiber_stack.result
@@ -0,0 +1,8 @@
+SystemError fiber mprotect failed: Cannot allocate memory
+ *** main_f ***
+1..4
+ok 1 - mprotect: failed to setup fiber guard page
+ok 2 - mprotect: diag is armed after error
+ok 3 - madvise: non critical error on madvise hint
+ok 4 - madvise: diag is armed after error
+ *** main_f: done ***
diff --git a/test/unit/suite.ini b/test/unit/suite.ini
index 89c8499fc..d429c95e9 100644
--- a/test/unit/suite.ini
+++ b/test/unit/suite.ini
@@ -1,5 +1,5 @@
[default]
core = unittest
description = unit tests
-release_disabled = swim_errinj.test
+release_disabled = fiber_stack.test swim_errinj.test
is_parallel = True
--
2.20.1
More information about the Tarantool-patches
mailing list