Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH 0/2] fiber: backport for stack madvise/mprotect errors handling
@ 2020-07-29 16:50 Ilya Kosarev
  2020-07-29 16:50 ` [Tarantool-patches] [PATCH 1/2] fiber: set diagnostics at madvise/mprotect failure Ilya Kosarev
  2020-07-29 16:50 ` [Tarantool-patches] [PATCH 2/2] fiber: leak slab if unable to bring prots back Ilya Kosarev
  0 siblings, 2 replies; 4+ messages in thread
From: Ilya Kosarev @ 2020-07-29 16:50 UTC (permalink / raw)
  To: gorcunov; +Cc: tarantool-patches

This patchset is a backport for
c67522973b32fae955835109d4f86eada8f67ae5 and
8d53fadc0cb15a45ea0cd461d2d5243be51b37e0 patches on stack
madvise/mprotect errors handling.

Branch: https://github.com/tarantool/tarantool/tree/i.kosarev/gh-5211-diag-at-fiber-mprotect-and-madvise
Issue: https://github.com/tarantool/tarantool/issues/5211

Ilya Kosarev (2):
  fiber: set diagnostics at madvise/mprotect failure
  fiber: leak slab if unable to bring prots back

 src/errinj.h                 |  32 +++---
 src/fiber.c                  | 192 ++++++++++++++++++++++-------------
 test/box/errinj.result       |   2 +
 test/unit/CMakeLists.txt     |   4 +
 test/unit/fiber_stack.c      | 109 ++++++++++++++++++++
 test/unit/fiber_stack.result |  11 ++
 test/unit/suite.ini          |   1 +
 7 files changed, 268 insertions(+), 83 deletions(-)
 create mode 100644 test/unit/fiber_stack.c
 create mode 100644 test/unit/fiber_stack.result

-- 
2.17.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Tarantool-patches] [PATCH 1/2] fiber: set diagnostics at madvise/mprotect failure
  2020-07-29 16:50 [Tarantool-patches] [PATCH 0/2] fiber: backport for stack madvise/mprotect errors handling Ilya Kosarev
@ 2020-07-29 16:50 ` Ilya Kosarev
  2020-07-29 17:04   ` Cyrill Gorcunov
  2020-07-29 16:50 ` [Tarantool-patches] [PATCH 2/2] fiber: leak slab if unable to bring prots back Ilya Kosarev
  1 sibling, 1 reply; 4+ messages in thread
From: Ilya Kosarev @ 2020-07-29 16:50 UTC (permalink / raw)
  To: gorcunov; +Cc: tarantool-patches

Both madvise and mprotect calls can fail due to various
reasons, mostly because of lack of free memory in the
system.

We log such cases via say_x helpers but this is not enough.
In particular tarantool/memcached relies on diag error to be
set to detect an error condition:

 | expire_fiber = fiber_new(name, memcached_expire_loop);
 | const box_error_t *err = box_error_last();
 | if (err) {
 |	say_error("Can't start the expire fiber");
 |	say_error("%s", box_error_message(err));
 |	return -1;
 | }

Thus lets use diag_set() helper here and instead of macros
use inline functions for better readability.

Closes #5211

Co-authored-by: Cyrill Gorcunov <gorcunov@gmail.com>
(backported from commit c67522973b32fae955835109d4f86eada8f67ae5)
---
 src/errinj.h                 |  32 +++---
 src/fiber.c                  | 183 ++++++++++++++++++++++-------------
 test/box/errinj.result       |   2 +
 test/unit/CMakeLists.txt     |   4 +
 test/unit/fiber_stack.c      |  79 +++++++++++++++
 test/unit/fiber_stack.result |   8 ++
 test/unit/suite.ini          |   1 +
 7 files changed, 227 insertions(+), 82 deletions(-)
 create mode 100644 test/unit/fiber_stack.c
 create mode 100644 test/unit/fiber_stack.result

diff --git a/src/errinj.h b/src/errinj.h
index 14cea11e9..ff6c8cf3f 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -41,27 +41,27 @@ extern "C" {
  * Injection type
  */
 enum errinj_type {
-	/** boolean */
-	ERRINJ_BOOL	= 0,
-	/** uint64_t */
-	ERRINJ_INT	= 1,
-	/** double */
-	ERRINJ_DOUBLE   = 2
+    /** boolean */
+    ERRINJ_BOOL	= 0,
+    /** uint64_t */
+    ERRINJ_INT	= 1,
+    /** double */
+    ERRINJ_DOUBLE   = 2
 };
 
 /**
  * Injection state
  */
 struct errinj {
-	/** Name, e.g "ERRINJ_WAL_WRITE" */
-	const char *name;
-	/** Type, e.g. BOOL, U64, DOUBLE */
-	enum errinj_type type;
-	union {
-		/** bool parameter */
-		bool bparam;
-		/** integer parameter */
-		int64_t iparam;
+    /** Name, e.g "ERRINJ_WAL_WRITE" */
+    const char *name;
+    /** Type, e.g. BOOL, U64, DOUBLE */
+    enum errinj_type type;
+    union {
+	/** bool parameter */
+	bool bparam;
+	/** integer parameter */
+	int64_t iparam;
 		/** double parameter */
 		double dparam;
 	};
@@ -128,6 +128,8 @@ struct errinj {
 	_(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT, {.iparam = 0}) \
 	_(ERRINJ_INDEX_RESERVE, ERRINJ_BOOL, {.bparam = false})\
 	_(ERRINJ_VY_STMT_ALLOC, ERRINJ_INT, {.iparam = -1})\
+	_(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL, {.bparam = false})\
+	_(ERRINJ_FIBER_MPROTECT, ERRINJ_INT, {.iparam = -1})\
 	_(ERRINJ_VY_READ_VIEW_MERGE_FAIL, ERRINJ_BOOL, {.bparam = false})\
 	_(ERRINJ_VY_WRITE_ITERATOR_START_FAIL, ERRINJ_BOOL, {.bparam = false})\
 	_(ERRINJ_VY_RUN_OPEN, ERRINJ_INT, {.iparam = -1})\
diff --git a/src/fiber.c b/src/fiber.c
index d7ea9924a..2d83bbc1a 100644
--- a/src/fiber.c
+++ b/src/fiber.c
@@ -41,6 +41,7 @@
 #include "assoc.h"
 #include "memory.h"
 #include "trigger.h"
+#include "errinj.h"
 
 #include "third_party/valgrind/memcheck.h"
 
@@ -66,30 +67,48 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
 #define ASAN_FINISH_SWITCH_FIBER(var_name)
 #endif
 
-#define fiber_madvise(addr, len, advice)				\
-({									\
-	int err = madvise((addr), (len), (advice));			\
-	if (err)							\
-		say_syserror("madvise");				\
-	err;								\
-})
-
-#define fiber_mprotect(addr, len, prot)					\
-({									\
-	int err = mprotect((addr), (len), (prot));			\
-	if (err)							\
-		say_syserror("mprotect");				\
-	err;								\
-})
+static inline int
+fiber_madvise(void *addr, size_t len, int advice)
+{
+	int rc = 0;
+
+	ERROR_INJECT(ERRINJ_FIBER_MADVISE, {
+		errno = ENOMEM;
+		rc = -1;
+	});
+
+	if (rc != 0 || madvise(addr, len, advice) != 0) {
+		diag_set(SystemError, "fiber madvise failed");
+		return -1;
+	}
+	return 0;
+}
+
+static inline int
+fiber_mprotect(void *addr, size_t len, int prot)
+{
+	int rc = 0;
+
+	struct errinj *inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+	if (inj != NULL && inj->iparam == prot) {
+		errno = ENOMEM;
+		rc = -1;
+	}
 
+	if (rc != 0 || mprotect(addr, len, prot) != 0) {
+		diag_set(SystemError, "fiber mprotect failed");
+		return -1;
+	}
+	return 0;
+}
 /*
  * Defines a handler to be executed on exit from cord's thread func,
  * accessible via cord()->on_exit (normally NULL). It is used to
  * implement cord_cojoin.
  */
 struct cord_on_exit {
-	void (*callback)(void*);
-	void *argument;
+    void (*callback)(void*);
+    void *argument;
 };
 
 /*
@@ -108,18 +127,18 @@ static size_t page_size;
 static int stack_direction;
 
 enum {
-	/* The minimum allowable fiber stack size in bytes */
-	FIBER_STACK_SIZE_MINIMAL = 16384,
-	/* Default fiber stack size in bytes */
-	FIBER_STACK_SIZE_DEFAULT = 524288,
-	/* Stack size watermark in bytes. */
-	FIBER_STACK_SIZE_WATERMARK = 65536,
+    /* The minimum allowable fiber stack size in bytes */
+    FIBER_STACK_SIZE_MINIMAL = 16384,
+    /* Default fiber stack size in bytes */
+    FIBER_STACK_SIZE_DEFAULT = 524288,
+    /* Stack size watermark in bytes. */
+    FIBER_STACK_SIZE_WATERMARK = 65536,
 };
 
 /** Default fiber attributes */
 static const struct fiber_attr fiber_attr_default = {
-       .stack_size = FIBER_STACK_SIZE_DEFAULT,
-       .flags = FIBER_DEFAULT_FLAGS
+	.stack_size = FIBER_STACK_SIZE_DEFAULT,
+	.flags = FIBER_DEFAULT_FLAGS
 };
 
 #ifdef HAVE_MADV_DONTNEED
@@ -147,7 +166,7 @@ static const uint64_t poison_pool[] = {
 void
 fiber_attr_create(struct fiber_attr *fiber_attr)
 {
-       *fiber_attr = fiber_attr_default;
+	*fiber_attr = fiber_attr_default;
 }
 
 struct fiber_attr *
@@ -156,7 +175,7 @@ fiber_attr_new()
 	struct fiber_attr *fiber_attr = malloc(sizeof(*fiber_attr));
 	if (fiber_attr == NULL)  {
 		diag_set(OutOfMemory, sizeof(*fiber_attr),
-			 "runtime", "fiber attr");
+		         "runtime", "fiber attr");
 		return NULL;
 	}
 	fiber_attr_create(fiber_attr);
@@ -190,7 +209,7 @@ size_t
 fiber_attr_getstacksize(struct fiber_attr *fiber_attr)
 {
 	return fiber_attr != NULL ? fiber_attr->stack_size :
-				    fiber_attr_default.stack_size;
+	       fiber_attr_default.stack_size;
 }
 
 void
@@ -514,14 +533,14 @@ struct fiber_watcher_data {
 
 static void
 fiber_schedule_timeout(ev_loop *loop,
-		       ev_timer *watcher, int revents)
+                       ev_timer *watcher, int revents)
 {
 	(void) loop;
 	(void) revents;
 
 	assert(fiber() == &cord()->sched);
 	struct fiber_watcher_data *state =
-			(struct fiber_watcher_data *) watcher->data;
+		(struct fiber_watcher_data *) watcher->data;
 	state->timed_out = true;
 	fiber_wakeup(state->f);
 }
@@ -733,12 +752,12 @@ fiber_loop(MAYBE_UNUSED void *data)
 		}
 		fiber->flags |= FIBER_IS_DEAD;
 		while (! rlist_empty(&fiber->wake)) {
-		       struct fiber *f;
-		       f = rlist_shift_entry(&fiber->wake, struct fiber,
-					     state);
-		       assert(f != fiber);
-		       fiber_wakeup(f);
-	        }
+			struct fiber *f;
+			f = rlist_shift_entry(&fiber->wake, struct fiber,
+			                      state);
+			assert(f != fiber);
+			fiber_wakeup(f);
+		}
 		fiber_on_stop(fiber);
 		/* reset pending wakeups */
 		rlist_del(&fiber->state);
@@ -833,6 +852,12 @@ fiber_stack_recycle(struct fiber *fiber)
 		start = page_align_up(fiber->stack_watermark);
 		end = fiber->stack + fiber->stack_size;
 	}
+
+	/*
+	 * Ignore errors on MADV_DONTNEED because this is
+	 * just a hint for OS and not critical for
+	 * functionality.
+	 */
 	fiber_madvise(start, end - start, MADV_DONTNEED);
 	stack_put_watermark(fiber->stack_watermark);
 }
@@ -851,7 +876,9 @@ fiber_stack_watermark_create(struct fiber *fiber)
 
 	/*
 	 * We don't expect the whole stack usage in regular
-	 * loads, let's try to minimize rss pressure.
+	 * loads, let's try to minimize rss pressure. But do
+	 * not exit if MADV_DONTNEED failed, it is just a hint
+	 * for OS, not critical one.
 	 */
 	fiber_madvise(fiber->stack, fiber->stack_size, MADV_DONTNEED);
 
@@ -889,6 +916,8 @@ fiber_stack_watermark_create(struct fiber *fiber)
 static void
 fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
 {
+	static const int mprotect_flags = PROT_READ | PROT_WRITE;
+
 	if (fiber->stack != NULL) {
 		VALGRIND_STACK_DEREGISTER(fiber->stack_id);
 #if ENABLE_ASAN
@@ -899,21 +928,36 @@ fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
 			guard = page_align_down(fiber->stack - page_size);
 		else
 			guard = page_align_up(fiber->stack + fiber->stack_size);
-		fiber_mprotect(guard, page_size, PROT_READ | PROT_WRITE);
+
+		if (fiber_mprotect(guard, page_size, mprotect_flags) != 0) {
+			/*
+			 * FIXME: We need some intelligent handling:
+			 * say put this slab into a queue and retry
+			 * to setup the original protection back in
+			 * background.
+			 *
+			 * Note that in case if we're called from
+			 * fiber_stack_create() the @a mprotect_flags is
+			 * the same as the slab been created with, so
+			 * calling mprotect for VMA with same flags
+			 * won't fail.
+			 */
+			diag_log();
+		}
 		slab_put(slabc, fiber->stack_slab);
 	}
 }
 
 static int
 fiber_stack_create(struct fiber *fiber, struct slab_cache *slabc,
-		   size_t stack_size)
+                   size_t stack_size)
 {
 	stack_size -= slab_sizeof();
 	fiber->stack_slab = slab_get(slabc, stack_size);
 
 	if (fiber->stack_slab == NULL) {
 		diag_set(OutOfMemory, stack_size,
-			 "runtime arena", "fiber stack");
+		         "runtime arena", "fiber stack");
 		return -1;
 	}
 	void *guard;
@@ -928,7 +972,7 @@ fiber_stack_create(struct fiber *fiber, struct slab_cache *slabc,
 		guard = page_align_up(slab_data(fiber->stack_slab));
 		fiber->stack = guard + page_size;
 		fiber->stack_size = slab_data(fiber->stack_slab) + stack_size -
-				    fiber->stack;
+		                    fiber->stack;
 	} else {
 		/*
 		 * A stack grows up. Last page should be protected and
@@ -936,16 +980,21 @@ fiber_stack_create(struct fiber *fiber, struct slab_cache *slabc,
 		 * be used for coro stack usage
 		 */
 		guard = page_align_down(fiber->stack_slab + stack_size) -
-			page_size;
+		        page_size;
 		fiber->stack = fiber->stack_slab + slab_sizeof();
 		fiber->stack_size = guard - fiber->stack;
 	}
 
 	fiber->stack_id = VALGRIND_STACK_REGISTER(fiber->stack,
-						  (char *)fiber->stack +
-						  fiber->stack_size);
+	                                          (char *)fiber->stack +
+	                                          fiber->stack_size);
 
 	if (fiber_mprotect(guard, page_size, PROT_NONE)) {
+		/*
+		 * Write an error into the log since a guard
+		 * page is critical for functionality.
+		 */
+		diag_log();
 		fiber_stack_destroy(fiber, slabc);
 		return -1;
 	}
@@ -956,7 +1005,7 @@ fiber_stack_create(struct fiber *fiber, struct slab_cache *slabc,
 
 struct fiber *
 fiber_new_ex(const char *name, const struct fiber_attr *fiber_attr,
-	     fiber_func f)
+             fiber_func f)
 {
 	struct cord *cord = cord();
 	struct fiber *fiber = NULL;
@@ -966,14 +1015,14 @@ fiber_new_ex(const char *name, const struct fiber_attr *fiber_attr,
 	if (!(fiber_attr->flags & FIBER_CUSTOM_STACK) &&
 	    !rlist_empty(&cord->dead)) {
 		fiber = rlist_first_entry(&cord->dead,
-					  struct fiber, link);
+		                          struct fiber, link);
 		rlist_move_entry(&cord->alive, fiber, link);
 	} else {
 		fiber = (struct fiber *)
 			mempool_alloc(&cord->fiber_mempool);
 		if (fiber == NULL) {
 			diag_set(OutOfMemory, sizeof(struct fiber),
-				 "fiber pool", "fiber");
+			         "fiber pool", "fiber");
 			return NULL;
 		}
 		memset(fiber, 0, sizeof(struct fiber));
@@ -1057,7 +1106,7 @@ fiber_destroy_all(struct cord *cord)
 {
 	while (!rlist_empty(&cord->alive))
 		fiber_destroy(cord, rlist_first_entry(&cord->alive,
-						      struct fiber, link));
+		                                      struct fiber, link));
 	while (!rlist_empty(&cord->dead))
 		fiber_destroy(cord, rlist_first_entry(&cord->dead,
 						      struct fiber, link));
@@ -1130,13 +1179,13 @@ cord_destroy(struct cord *cord)
 
 struct cord_thread_arg
 {
-	struct cord *cord;
-	const char *name;
-	void *(*f)(void *);
-	void *arg;
-	bool is_started;
-	pthread_mutex_t start_mutex;
-	pthread_cond_t start_cond;
+    struct cord *cord;
+    const char *name;
+    void *(*f)(void *);
+    void *arg;
+    bool is_started;
+    pthread_mutex_t start_mutex;
+    pthread_cond_t start_cond;
 };
 
 /**
@@ -1181,7 +1230,7 @@ cord_start(struct cord *cord, const char *name, void *(*f)(void *), void *arg)
 {
 	int res = -1;
 	struct cord_thread_arg ct_arg = { cord, name, f, arg, false,
-		PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER };
+	                                  PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER };
 	tt_pthread_mutex_lock(&ct_arg.start_mutex);
 	cord->loop = ev_loop_new(EVFLAG_AUTO | EVFLAG_ALLOCFD);
 	if (cord->loop == NULL) {
@@ -1189,14 +1238,14 @@ cord_start(struct cord *cord, const char *name, void *(*f)(void *), void *arg)
 		goto end;
 	}
 	if (tt_pthread_create(&cord->id, NULL,
-			      cord_thread_func, &ct_arg) != 0) {
+	                      cord_thread_func, &ct_arg) != 0) {
 		diag_set(SystemError, "failed to create thread");
 		goto end;
 	}
 	res = 0;
 	while (! ct_arg.is_started)
 		tt_pthread_cond_wait(&ct_arg.start_cond, &ct_arg.start_mutex);
-end:
+	end:
 	if (res != 0) {
 		if (cord->loop) {
 			ev_loop_destroy(cord->loop);
@@ -1232,15 +1281,15 @@ cord_join(struct cord *cord)
 /** The state of the waiter for a thread to complete. */
 struct cord_cojoin_ctx
 {
-	struct ev_loop *loop;
-	/** Waiting fiber. */
-	struct fiber *fiber;
-	/*
-	 * This event is signalled when the subject thread is
-	 * about to die.
-	 */
-	struct ev_async async;
-	bool task_complete;
+    struct ev_loop *loop;
+    /** Waiting fiber. */
+    struct fiber *fiber;
+    /*
+     * This event is signalled when the subject thread is
+     * about to die.
+     */
+    struct ev_async async;
+    bool task_complete;
 };
 
 static void
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 0b2cb7409..a4906547d 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -35,6 +35,8 @@ evals
 - - ERRINJ_AUTO_UPGRADE: false
   - ERRINJ_BUILD_INDEX: -1
   - ERRINJ_DYN_MODULE_COUNT: 0
+  - ERRINJ_FIBER_MADVISE: false
+  - ERRINJ_FIBER_MPROTECT: -1
   - ERRINJ_HTTPC_EXECUTE: false
   - ERRINJ_HTTP_RESPONSE_ADD_WAIT: false
   - ERRINJ_INDEX_ALLOC: false
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 6dfb10f46..cdafd785f 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -72,6 +72,10 @@ add_executable(fiber.test fiber.cc)
 set_source_files_properties(fiber.cc PROPERTIES COMPILE_FLAGS -O0)
 target_link_libraries(fiber.test core unit)
 
+add_executable(fiber_stack.test fiber_stack.c)
+set_source_files_properties(fiber_stack.c PROPERTIES COMPILE_FLAGS -O0)
+target_link_libraries(fiber_stack.test core unit)
+
 if (NOT ENABLE_GCOV)
     # This test is known to be broken with GCOV
     add_executable(guard.test guard.cc)
diff --git a/test/unit/fiber_stack.c b/test/unit/fiber_stack.c
new file mode 100644
index 000000000..103dfaf0d
--- /dev/null
+++ b/test/unit/fiber_stack.c
@@ -0,0 +1,79 @@
+#include "memory.h"
+#include "fiber.h"
+#include "unit.h"
+#include "trivia/util.h"
+#include "errinj.h"
+
+static struct fiber_attr default_attr;
+
+static int
+noop_f(va_list ap)
+{
+	return 0;
+}
+
+static int
+main_f(va_list ap)
+{
+	struct errinj *inj;
+	struct fiber *fiber;
+
+	header();
+	plan(4);
+
+	/*
+	 * Set non-default stack size to prevent reusing of an
+	 * existing fiber.
+	 */
+	struct fiber_attr *fiber_attr = fiber_attr_new();
+	fiber_attr_setstacksize(fiber_attr, default_attr.stack_size * 2);
+
+	/*
+	 * Clear the fiber's diagnostics area to check that failed
+	 * fiber_new() sets an error.
+	 */
+	diag_clear(diag_get());
+
+	/*
+	 * Check guard page setup via mprotect. We can't test the fiber
+	 * destroy path since it clears fiber's diag.
+	 */
+	inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+	inj->iparam = PROT_NONE;
+	fiber = fiber_new_ex("test_mprotect", fiber_attr, noop_f);
+	inj->iparam = -1;
+
+	ok(fiber == NULL, "mprotect: failed to setup fiber guard page");
+	ok(diag_get() != NULL, "mprotect: diag is armed after error");
+
+	/*
+	 * Check madvise. We can't test the fiber destroy
+	 * path since it is cleaning error.
+	 */
+	diag_clear(diag_get());
+	inj = errinj(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL);
+	inj->bparam = true;
+	fiber = fiber_new_ex("test_madvise", fiber_attr, noop_f);
+	inj->bparam = false;
+
+	ok(fiber != NULL, "madvise: non critical error on madvise hint");
+	ok(diag_get() != NULL, "madvise: diag is armed after error");
+
+	footer();
+
+	ev_break(loop(), EVBREAK_ALL);
+	return check_plan();
+}
+
+int main()
+{
+	memory_init();
+	fiber_init(fiber_c_invoke);
+	fiber_attr_create(&default_attr);
+	struct fiber *f = fiber_new("main", main_f);
+	fiber_wakeup(f);
+	ev_run(loop(), 0);
+	fiber_free();
+	memory_free();
+	return 0;
+}
diff --git a/test/unit/fiber_stack.result b/test/unit/fiber_stack.result
new file mode 100644
index 000000000..43ff74b2f
--- /dev/null
+++ b/test/unit/fiber_stack.result
@@ -0,0 +1,8 @@
+SystemError fiber mprotect failed: Cannot allocate memory
+	*** main_f ***
+1..4
+ok 1 - mprotect: failed to setup fiber guard page
+ok 2 - mprotect: diag is armed after error
+ok 3 - madvise: non critical error on madvise hint
+ok 4 - madvise: diag is armed after error
+	*** main_f: done ***
diff --git a/test/unit/suite.ini b/test/unit/suite.ini
index 75c80ece1..b48eb9c76 100644
--- a/test/unit/suite.ini
+++ b/test/unit/suite.ini
@@ -1,4 +1,5 @@
 [default]
 core = unittest
 description = unit tests
+release_disabled = fiber_stack.test
 is_parallel = True
-- 
2.17.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Tarantool-patches] [PATCH 2/2] fiber: leak slab if unable to bring prots back
  2020-07-29 16:50 [Tarantool-patches] [PATCH 0/2] fiber: backport for stack madvise/mprotect errors handling Ilya Kosarev
  2020-07-29 16:50 ` [Tarantool-patches] [PATCH 1/2] fiber: set diagnostics at madvise/mprotect failure Ilya Kosarev
@ 2020-07-29 16:50 ` Ilya Kosarev
  1 sibling, 0 replies; 4+ messages in thread
From: Ilya Kosarev @ 2020-07-29 16:50 UTC (permalink / raw)
  To: gorcunov; +Cc: tarantool-patches

In case if we unable to revert guard page back to
read|write we should never use such slab again.

Initially I thought of just put panic here and
exit but it is too destructive. I think better
print an error and continue. If node admin ignore
this message then one moment at future there won't
be slab left for use and creating new fibers get
prohibited.

In future (hopefully near one) we plan to drop
guard pages to prevent VMA fracturing and use
stack marks instead.

Relates to #5211

Co-authored-by: Cyrill Gorcunov <gorcunov@gmail.com>
(backported from commit 8d53fadc0cb15a45ea0cd461d2d5243be51b37e0)
---
 src/fiber.c                  | 11 +++++++++--
 test/unit/fiber_stack.c      | 36 +++++++++++++++++++++++++++++++++---
 test/unit/fiber_stack.result |  5 ++++-
 3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/src/fiber.c b/src/fiber.c
index 2d83bbc1a..cf6b78c43 100644
--- a/src/fiber.c
+++ b/src/fiber.c
@@ -936,15 +936,22 @@ fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
 			 * to setup the original protection back in
 			 * background.
 			 *
+			 * For now lets keep such slab referenced and
+			 * leaked: if mprotect failed we must not allow
+			 * to reuse such slab with PROT_NONE'ed page
+			 * inside.
+			 *
 			 * Note that in case if we're called from
 			 * fiber_stack_create() the @a mprotect_flags is
 			 * the same as the slab been created with, so
 			 * calling mprotect for VMA with same flags
 			 * won't fail.
 			 */
-			diag_log();
+			say_syserror("fiber: Can't put guard page to slab. "
+			             "Leak %zu bytes", (size_t) fiber->stack_size);
+		} else {
+			slab_put(slabc, fiber->stack_slab);
 		}
-		slab_put(slabc, fiber->stack_slab);
 	}
 }
 
diff --git a/test/unit/fiber_stack.c b/test/unit/fiber_stack.c
index 103dfaf0d..41abe3764 100644
--- a/test/unit/fiber_stack.c
+++ b/test/unit/fiber_stack.c
@@ -15,11 +15,13 @@ noop_f(va_list ap)
 static int
 main_f(va_list ap)
 {
+	struct slab_cache *slabc = &cord()->slabc;
+	size_t used_before, used_after;
 	struct errinj *inj;
 	struct fiber *fiber;
 
 	header();
-	plan(4);
+	plan(6);
 
 	/*
 	 * Set non-default stack size to prevent reusing of an
@@ -47,8 +49,7 @@ main_f(va_list ap)
 	ok(diag_get() != NULL, "mprotect: diag is armed after error");
 
 	/*
-	 * Check madvise. We can't test the fiber destroy
-	 * path since it is cleaning error.
+	 * Check madvise error on fiber creation.
 	 */
 	diag_clear(diag_get());
 	inj = errinj(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL);
@@ -59,6 +60,35 @@ main_f(va_list ap)
 	ok(fiber != NULL, "madvise: non critical error on madvise hint");
 	ok(diag_get() != NULL, "madvise: diag is armed after error");
 
+	/*
+	 * Check if we leak on fiber destruction.
+	 * We will print an error and result get
+	 * compared by testing engine.
+	 */
+	fiber_attr_delete(fiber_attr);
+	fiber_attr = fiber_attr_new();
+	fiber_attr->flags |= FIBER_CUSTOM_STACK;
+	fiber_attr->stack_size = 64 << 10;
+
+	diag_clear(diag_get());
+
+	used_before = slabc->allocated.stats.used;
+
+	fiber = fiber_new_ex("test_madvise", fiber_attr, noop_f);
+	ok(fiber != NULL, "fiber with custom stack");
+	fiber_set_joinable(fiber, true);
+
+	inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+	inj->iparam = PROT_READ | PROT_WRITE;
+
+	fiber_start(fiber);
+	fiber_join(fiber);
+	inj->iparam = -1;
+
+	used_after = slabc->allocated.stats.used;
+	ok(used_after > used_before, "expected leak detected");
+
+	fiber_attr_delete(fiber_attr);
 	footer();
 
 	ev_break(loop(), EVBREAK_ALL);
diff --git a/test/unit/fiber_stack.result b/test/unit/fiber_stack.result
index 43ff74b2f..7cae4e96c 100644
--- a/test/unit/fiber_stack.result
+++ b/test/unit/fiber_stack.result
@@ -1,8 +1,11 @@
 SystemError fiber mprotect failed: Cannot allocate memory
+fiber: Can't put guard page to slab. Leak 57344 bytes: Cannot allocate memory
 	*** main_f ***
-1..4
+1..6
 ok 1 - mprotect: failed to setup fiber guard page
 ok 2 - mprotect: diag is armed after error
 ok 3 - madvise: non critical error on madvise hint
 ok 4 - madvise: diag is armed after error
+ok 5 - fiber with custom stack
+ok 6 - expected leak detected
 	*** main_f: done ***
-- 
2.17.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Tarantool-patches] [PATCH 1/2] fiber: set diagnostics at madvise/mprotect failure
  2020-07-29 16:50 ` [Tarantool-patches] [PATCH 1/2] fiber: set diagnostics at madvise/mprotect failure Ilya Kosarev
@ 2020-07-29 17:04   ` Cyrill Gorcunov
  0 siblings, 0 replies; 4+ messages in thread
From: Cyrill Gorcunov @ 2020-07-29 17:04 UTC (permalink / raw)
  To: Ilya Kosarev; +Cc: tarantool-patches

On Wed, Jul 29, 2020 at 07:50:41PM +0300, Ilya Kosarev wrote:
>  
>  /**
>   * Injection state
>   */
>  struct errinj {
> -	/** Name, e.g "ERRINJ_WAL_WRITE" */
> -	const char *name;
> -	/** Type, e.g. BOOL, U64, DOUBLE */
> -	enum errinj_type type;
> -	union {
> -		/** bool parameter */
> -		bool bparam;
> -		/** integer parameter */
> -		int64_t iparam;
> +    /** Name, e.g "ERRINJ_WAL_WRITE" */
> +    const char *name;
> +    /** Type, e.g. BOOL, U64, DOUBLE */
> +    enum errinj_type type;

Ilya, the whole patch is tabs/space screwed. Is is supposed to be so?

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-07-29 17:04 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-29 16:50 [Tarantool-patches] [PATCH 0/2] fiber: backport for stack madvise/mprotect errors handling Ilya Kosarev
2020-07-29 16:50 ` [Tarantool-patches] [PATCH 1/2] fiber: set diagnostics at madvise/mprotect failure Ilya Kosarev
2020-07-29 17:04   ` Cyrill Gorcunov
2020-07-29 16:50 ` [Tarantool-patches] [PATCH 2/2] fiber: leak slab if unable to bring prots back Ilya Kosarev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox