[Tarantool-patches] [PATCH v6 1/2] fiber: set diagnostics at madvise/mprotect failure

Cyrill Gorcunov gorcunov at gmail.com
Thu Feb 6 15:31:13 MSK 2020


Both madvise and mprotect calls can fail due to various
reasons, mostly because of lack of free memory in the
system.

We log such cases via say_x helpers but this is not enough.
In particular tarantool/memcached relies on diag error to be
set to detect an error condition:

 | expire_fiber = fiber_new(name, memcached_expire_loop);
 | const box_error_t *err = box_error_last();
 | if (err) {
 |	say_error("Can't start the expire fiber");
 |	say_error("%s", box_error_message(err));
 |	return -1;
 | }

Thus lets use diag_set() helper here and instead of macros
use inline functions for better readability.

Fixes #4722

Reported-by: Alexander Turenko <alexander.turenko at tarantool.org>
Reviewed-by: Alexander Turenko <alexander.turenko at tarantool.org>
Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
 src/lib/core/errinj.h        |   2 +
 src/lib/core/fiber.c         |  84 +++++++++++++++++-----
 test/box/errinj.result       | 136 ++++++++++++++++++-----------------
 test/unit/CMakeLists.txt     |   4 ++
 test/unit/fiber_stack.cc     |  83 +++++++++++++++++++++
 test/unit/fiber_stack.result |   7 ++
 test/unit/suite.ini          |   2 +-
 7 files changed, 234 insertions(+), 84 deletions(-)
 create mode 100644 test/unit/fiber_stack.cc
 create mode 100644 test/unit/fiber_stack.result

diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
index 672da2119..ed0cba903 100644
--- a/src/lib/core/errinj.h
+++ b/src/lib/core/errinj.h
@@ -135,6 +135,8 @@ struct errinj {
 	_(ERRINJ_COIO_SENDFILE_CHUNK, ERRINJ_INT, {.iparam = -1}) \
 	_(ERRINJ_SWIM_FD_ONLY, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT, {.iparam = 0}) \
+	_(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL, {.bparam = false}) \
+	_(ERRINJ_FIBER_MPROTECT, ERRINJ_INT, {.iparam = -1})
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
index 00ae8cded..57e4cb6ef 100644
--- a/src/lib/core/fiber.c
+++ b/src/lib/core/fiber.c
@@ -41,6 +41,7 @@
 #include "assoc.h"
 #include "memory.h"
 #include "trigger.h"
+#include "errinj.h"
 
 #if ENABLE_FIBER_TOP
 #include <x86intrin.h> /* __rdtscp() */
@@ -173,21 +174,40 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
 #define ASAN_FINISH_SWITCH_FIBER(var_name)
 #endif
 
-#define fiber_madvise(addr, len, advice)				\
-({									\
-	int err = madvise((addr), (len), (advice));			\
-	if (err)							\
-		say_syserror("madvise");				\
-	err;								\
-})
-
-#define fiber_mprotect(addr, len, prot)					\
-({									\
-	int err = mprotect((addr), (len), (prot));			\
-	if (err)							\
-		say_syserror("mprotect");				\
-	err;								\
-})
+static inline int
+fiber_madvise(void *addr, size_t len, int advice)
+{
+	int rc = 0;
+
+	ERROR_INJECT(ERRINJ_FIBER_MADVISE, {
+		errno = ENOMEM;
+		rc = -1;
+	});
+
+	if (rc != 0 || madvise(addr, len, advice) != 0) {
+		diag_set(SystemError, "fiber madvise failed");
+		return -1;
+	}
+	return 0;
+}
+
+static inline int
+fiber_mprotect(void *addr, size_t len, int prot)
+{
+	int rc = 0;
+
+	struct errinj *inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+	if (inj != NULL && inj->iparam == prot) {
+		errno = ENOMEM;
+		rc = -1;
+	}
+
+	if (rc != 0 || mprotect(addr, len, prot) != 0) {
+		diag_set(SystemError, "fiber mprotect failed");
+		return -1;
+	}
+	return 0;
+}
 
 #if ENABLE_FIBER_TOP
 static __thread bool fiber_top_enabled = false;
@@ -951,6 +971,12 @@ fiber_stack_recycle(struct fiber *fiber)
 		start = page_align_up(fiber->stack_watermark);
 		end = fiber->stack + fiber->stack_size;
 	}
+
+	/*
+	 * Ignore errors on MADV_DONTNEED because this is
+	 * just a hint for OS and not critical for
+	 * functionality.
+	 */
 	fiber_madvise(start, end - start, MADV_DONTNEED);
 	stack_put_watermark(fiber->stack_watermark);
 }
@@ -969,7 +995,9 @@ fiber_stack_watermark_create(struct fiber *fiber)
 
 	/*
 	 * We don't expect the whole stack usage in regular
-	 * loads, let's try to minimize rss pressure.
+	 * loads, let's try to minimize rss pressure. But do
+	 * not exit if MADV_DONTNEED failed, it is just a hint
+	 * for OS, not critical one.
 	 */
 	fiber_madvise(fiber->stack, fiber->stack_size, MADV_DONTNEED);
 
@@ -1007,6 +1035,8 @@ fiber_stack_watermark_create(struct fiber *fiber)
 static void
 fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
 {
+	static const int mprotect_flags = PROT_READ | PROT_WRITE;
+
 	if (fiber->stack != NULL) {
 		VALGRIND_STACK_DEREGISTER(fiber->stack_id);
 #if ENABLE_ASAN
@@ -1017,7 +1047,22 @@ fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
 			guard = page_align_down(fiber->stack - page_size);
 		else
 			guard = page_align_up(fiber->stack + fiber->stack_size);
-		fiber_mprotect(guard, page_size, PROT_READ | PROT_WRITE);
+
+		if (fiber_mprotect(guard, page_size, mprotect_flags) != 0) {
+			/*
+			 * FIXME: We need some intelligent handling:
+			 * say put this slab into a queue and retry
+			 * to setup the original protection back in
+			 * background.
+			 *
+			 * Note that in case if we're called from
+			 * fiber_stack_create() the @mprotect_flags is
+			 * the same as the slab been created with, so
+			 * calling mprotect for VMA with same flags
+			 * won't fail.
+			 */
+			diag_log();
+		}
 		slab_put(slabc, fiber->stack_slab);
 	}
 }
@@ -1064,6 +1109,11 @@ fiber_stack_create(struct fiber *fiber, struct slab_cache *slabc,
 						  fiber->stack_size);
 
 	if (fiber_mprotect(guard, page_size, PROT_NONE)) {
+		/*
+		 * Write an error into the log since a guard
+		 * page is critical for functionality.
+		 */
+		diag_log();
 		fiber_stack_destroy(fiber, slabc);
 		return -1;
 	}
diff --git a/test/box/errinj.result b/test/box/errinj.result
index babe36b1b..16f6b40c2 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -23,132 +23,136 @@ errinj.info()
 ---
 - ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT:
     state: 0
-  ERRINJ_WAL_WRITE:
-    state: false
-  ERRINJ_RELAY_BREAK_LSN:
+  ERRINJ_WAL_BREAK_LSN:
     state: -1
-  ERRINJ_HTTPC_EXECUTE:
-    state: false
   ERRINJ_VYRUN_DATA_READ:
     state: false
-  ERRINJ_SWIM_FD_ONLY:
-    state: false
-  ERRINJ_SQL_NAME_NORMALIZATION:
-    state: false
   ERRINJ_VY_SCHED_TIMEOUT:
     state: 0
-  ERRINJ_COIO_SENDFILE_CHUNK:
-    state: -1
   ERRINJ_HTTP_RESPONSE_ADD_WAIT:
     state: false
-  ERRINJ_WAL_WRITE_PARTIAL:
-    state: -1
-  ERRINJ_VY_GC:
-    state: false
-  ERRINJ_WAL_DELAY:
-    state: false
-  ERRINJ_INDEX_ALLOC:
-    state: false
   ERRINJ_WAL_WRITE_EOF:
     state: false
-  ERRINJ_WAL_SYNC:
-    state: false
-  ERRINJ_BUILD_INDEX:
-    state: -1
   ERRINJ_BUILD_INDEX_DELAY:
     state: false
-  ERRINJ_VY_RUN_FILE_RENAME:
-    state: false
-  ERRINJ_VY_COMPACTION_DELAY:
-    state: false
-  ERRINJ_VY_DUMP_DELAY:
-    state: false
   ERRINJ_VY_DELAY_PK_LOOKUP:
     state: false
-  ERRINJ_VY_TASK_COMPLETE:
-    state: false
-  ERRINJ_PORT_DUMP:
+  ERRINJ_VY_POINT_ITER_WAIT:
     state: false
-  ERRINJ_WAL_BREAK_LSN:
-    state: -1
   ERRINJ_WAL_IO:
     state: false
-  ERRINJ_WAL_FALLOCATE:
-    state: 0
-  ERRINJ_DYN_MODULE_COUNT:
-    state: 0
   ERRINJ_VY_INDEX_FILE_RENAME:
     state: false
   ERRINJ_TUPLE_FORMAT_COUNT:
     state: -1
   ERRINJ_TUPLE_ALLOC:
     state: false
-  ERRINJ_VY_RUN_WRITE_DELAY:
+  ERRINJ_VY_RUN_FILE_RENAME:
     state: false
   ERRINJ_VY_READ_PAGE:
     state: false
   ERRINJ_RELAY_REPORT_INTERVAL:
     state: 0
-  ERRINJ_VY_LOG_FILE_RENAME:
-    state: false
-  ERRINJ_VY_READ_PAGE_TIMEOUT:
-    state: 0
+  ERRINJ_RELAY_BREAK_LSN:
+    state: -1
   ERRINJ_XLOG_META:
     state: false
-  ERRINJ_SIO_READ_MAX:
-    state: -1
   ERRINJ_SNAP_COMMIT_DELAY:
     state: false
-  ERRINJ_WAL_WRITE_DISK:
+  ERRINJ_VY_RUN_WRITE:
     state: false
-  ERRINJ_SNAP_WRITE_DELAY:
+  ERRINJ_BUILD_INDEX:
+    state: -1
+  ERRINJ_RELAY_FINAL_JOIN:
+    state: false
+  ERRINJ_REPLICA_JOIN_DELAY:
     state: false
   ERRINJ_LOG_ROTATE:
     state: false
-  ERRINJ_VY_RUN_WRITE:
+  ERRINJ_MEMTX_DELAY_GC:
     state: false
-  ERRINJ_CHECK_FORMAT_DELAY:
+  ERRINJ_XLOG_GARBAGE:
+    state: false
+  ERRINJ_VY_READ_PAGE_DELAY:
+    state: false
+  ERRINJ_SWIM_FD_ONLY:
+    state: false
+  ERRINJ_WAL_WRITE:
+    state: false
+  ERRINJ_HTTPC_EXECUTE:
+    state: false
+  ERRINJ_SQL_NAME_NORMALIZATION:
+    state: false
+  ERRINJ_WAL_WRITE_PARTIAL:
+    state: -1
+  ERRINJ_VY_GC:
+    state: false
+  ERRINJ_WAL_DELAY:
+    state: false
+  ERRINJ_XLOG_READ:
+    state: -1
+  ERRINJ_WAL_SYNC:
+    state: false
+  ERRINJ_VY_TASK_COMPLETE:
     state: false
+  ERRINJ_PORT_DUMP:
+    state: false
+  ERRINJ_COIO_SENDFILE_CHUNK:
+    state: -1
+  ERRINJ_DYN_MODULE_COUNT:
+    state: 0
+  ERRINJ_SIO_READ_MAX:
+    state: -1
+  ERRINJ_FIBER_MPROTECT:
+    state: -1
+  ERRINJ_FIBER_MADVISE:
+    state: false
+  ERRINJ_RELAY_TIMEOUT:
+    state: 0
+  ERRINJ_VY_DUMP_DELAY:
+    state: false
+  ERRINJ_VY_SQUASH_TIMEOUT:
+    state: 0
   ERRINJ_VY_LOG_FLUSH_DELAY:
     state: false
-  ERRINJ_RELAY_FINAL_JOIN:
+  ERRINJ_RELAY_SEND_DELAY:
     state: false
-  ERRINJ_REPLICA_JOIN_DELAY:
+  ERRINJ_VY_COMPACTION_DELAY:
     state: false
-  ERRINJ_RELAY_FINAL_SLEEP:
+  ERRINJ_VY_LOG_FILE_RENAME:
     state: false
   ERRINJ_VY_RUN_DISCARD:
     state: false
   ERRINJ_WAL_ROTATE:
     state: false
-  ERRINJ_RELAY_EXIT_DELAY:
+  ERRINJ_VY_READ_PAGE_TIMEOUT:
     state: 0
-  ERRINJ_VY_POINT_ITER_WAIT:
+  ERRINJ_VY_INDEX_DUMP:
+    state: -1
+  ERRINJ_TUPLE_FIELD:
     state: false
-  ERRINJ_MEMTX_DELAY_GC:
+  ERRINJ_SNAP_WRITE_DELAY:
     state: false
   ERRINJ_IPROTO_TX_DELAY:
     state: false
-  ERRINJ_XLOG_READ:
-    state: -1
-  ERRINJ_TUPLE_FIELD:
+  ERRINJ_RELAY_EXIT_DELAY:
+    state: 0
+  ERRINJ_RELAY_FINAL_SLEEP:
     state: false
-  ERRINJ_XLOG_GARBAGE:
+  ERRINJ_WAL_WRITE_DISK:
     state: false
-  ERRINJ_VY_INDEX_DUMP:
-    state: -1
-  ERRINJ_VY_READ_PAGE_DELAY:
+  ERRINJ_CHECK_FORMAT_DELAY:
     state: false
   ERRINJ_TESTING:
     state: false
-  ERRINJ_RELAY_SEND_DELAY:
+  ERRINJ_VY_RUN_WRITE_DELAY:
     state: false
-  ERRINJ_VY_SQUASH_TIMEOUT:
+  ERRINJ_WAL_FALLOCATE:
     state: 0
   ERRINJ_VY_LOG_FLUSH:
     state: false
-  ERRINJ_RELAY_TIMEOUT:
-    state: 0
+  ERRINJ_INDEX_ALLOC:
+    state: false
 ...
 errinj.set("some-injection", true)
 ---
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 4a57597e9..212a02d36 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -74,6 +74,10 @@ add_executable(fiber.test fiber.cc)
 set_source_files_properties(fiber.cc PROPERTIES COMPILE_FLAGS -O0)
 target_link_libraries(fiber.test core unit)
 
+add_executable(fiber_stack.test fiber_stack.cc)
+set_source_files_properties(fiber_stack.cc PROPERTIES COMPILE_FLAGS -O0)
+target_link_libraries(fiber_stack.test core unit)
+
 if (NOT ENABLE_GCOV)
     # This test is known to be broken with GCOV
     add_executable(guard.test guard.cc)
diff --git a/test/unit/fiber_stack.cc b/test/unit/fiber_stack.cc
new file mode 100644
index 000000000..de7fe90e3
--- /dev/null
+++ b/test/unit/fiber_stack.cc
@@ -0,0 +1,83 @@
+#include "memory.h"
+#include "fiber.h"
+#include "unit.h"
+#include "trivia/util.h"
+#include "errinj.h"
+
+static struct fiber_attr default_attr;
+
+static int
+noop_f(va_list ap)
+{
+	return 0;
+}
+
+static void
+fiber_stack_fail_test(void)
+{
+	struct errinj *inj;
+	struct fiber *fiber;
+
+	header();
+
+	/*
+	 * Set non-default stack size to prevent reusing of an
+	 * existing fiber.
+	 */
+	struct fiber_attr *fiber_attr = fiber_attr_new();
+	fiber_attr_setstacksize(fiber_attr, default_attr.stack_size * 2);
+
+	/*
+	 * Clear the fiber's diagnostics area to check that failed
+	 * fiber_new() sets an error.
+	 */
+	diag_clear(diag_get());
+
+	/*
+	 * Check guard page setup via mprotect. We can't test the fiber
+	 * destroy path since it clears fiber's diag.
+	 */
+	inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+	inj->iparam = PROT_NONE;
+	fiber = fiber_new_ex("test_mprotect", fiber_attr, noop_f);
+	inj->iparam = -1;
+
+	ok(fiber == NULL, "mprotect: failed to setup fiber guard page");
+	ok(diag_get() != NULL, "mprotect: diag is armed after error");
+
+	/*
+	 * Check madvise. We can't test the fiber destroy
+	 * path since it is cleaning error.
+	 */
+	diag_clear(diag_get());
+	inj = errinj(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL);
+	inj->bparam = true;
+	fiber = fiber_new_ex("test_madvise", fiber_attr, noop_f);
+	inj->bparam = false;
+
+	ok(fiber != NULL, "madvise: non critical error on madvise hint");
+	ok(diag_get() != NULL, "madvise: diag is armed after error");
+
+	footer();
+}
+
+static int
+main_f(va_list ap)
+{
+	fiber_stack_fail_test();
+	ev_break(loop(), EVBREAK_ALL);
+	return 0;
+}
+
+int main()
+{
+	memory_init();
+	fiber_init(fiber_cxx_invoke);
+	fiber_attr_create(&default_attr);
+	struct fiber *main = fiber_new_xc("main", main_f);
+	fiber_wakeup(main);
+	ev_run(loop(), 0);
+	fiber_free();
+	memory_free();
+	return 0;
+}
diff --git a/test/unit/fiber_stack.result b/test/unit/fiber_stack.result
new file mode 100644
index 000000000..83f0d1490
--- /dev/null
+++ b/test/unit/fiber_stack.result
@@ -0,0 +1,7 @@
+SystemError fiber mprotect failed: Cannot allocate memory
+	*** fiber_stack_fail_test ***
+ok 1 - mprotect: failed to setup fiber guard page
+ok 2 - mprotect: diag is armed after error
+ok 3 - madvise: non critical error on madvise hint
+ok 4 - madvise: diag is armed after error
+	*** fiber_stack_fail_test: done ***
diff --git a/test/unit/suite.ini b/test/unit/suite.ini
index 89c8499fc..d429c95e9 100644
--- a/test/unit/suite.ini
+++ b/test/unit/suite.ini
@@ -1,5 +1,5 @@
 [default]
 core = unittest
 description = unit tests
-release_disabled = swim_errinj.test
+release_disabled = fiber_stack.test swim_errinj.test
 is_parallel = True
-- 
2.20.1



More information about the Tarantool-patches mailing list