[PATCH 3/3] lib/core/fiber: Put watermarks into stack to track its usage

Cyrill Gorcunov gorcunov at gmail.com
Wed Mar 13 01:47:21 MSK 2019


We want to detect a situation where task in fiber is too eager for
stack and close to its exhausting. For this sake upon stack creation
we put 8 marks on last stack page with step of 128 bytes. Such params
allows us to fill ~1/4 of a page, which does seem reasonable but
we might change this params with time.

Since the watermark position is permanent and some task is close
to stack limit we print about the situation once to not spam
a user much and stop putting the mark on recycling.

Still this techique doesn't guarantee to handle all possible
situations so to increas probability of catching eager fibers
we put marks *not* at page start address but withing some offset,
randomly generated.

To minimize a pressue on memory system we try to relax stack
usage with that named "shink" watermark. Basically it is the
same mark as for overflow detection but placed at 64K bound
and on every recycle we try to shrink stack usage dropping
pages if the watermark has been erased by a task.

Closes #3418

Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
 src/lib/core/fiber.c | 167 +++++++++++++++++++++++++++++++++++++++++++
 src/lib/core/fiber.h |  15 ++++
 2 files changed, 182 insertions(+)

diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
index a1b261ad4..cae291d1a 100644
--- a/src/lib/core/fiber.c
+++ b/src/lib/core/fiber.c
@@ -104,6 +104,27 @@ static const struct fiber_attr fiber_attr_default = {
        .flags = FIBER_DEFAULT_FLAGS
 };
 
+/*
+ * Random values generated with uuid.
+ */
+static const uint64_t poison_pool[] = {
+	0x74f31d37285c4c37, 0xb10269a05bf10c29,
+	0x0994d845bd284e0f, 0x9ffd4f7129c184df,
+	0x357151e6711c4415, 0x8c5e5f41aafe6f28,
+	0x6917dd79e78049d5, 0xba61957c65ca2465,
+};
+
+/*
+ * We poison by 8 bytes as it natural for stack
+ * step on x86-64. Also 128 byte gap between
+ * poison values should cover a common cases.
+ */
+#define POISON_SIZE	(sizeof(poison_pool) / sizeof(poison_pool[0]))
+#define POISON_GAP	(128 + sizeof(poison_pool[0]))
+#define POISON_OFF	(POISON_GAP / sizeof(poison_pool[0]))
+
+static void fiber_wmark_recycle(struct fiber *fiber);
+
 void
 fiber_attr_create(struct fiber_attr *fiber_attr)
 {
@@ -624,6 +645,7 @@ fiber_recycle(struct fiber *fiber)
 	/* no pending wakeup */
 	assert(rlist_empty(&fiber->state));
 	bool has_custom_stack = fiber->flags & FIBER_CUSTOM_STACK;
+	fiber_wmark_recycle(fiber);
 	fiber_reset(fiber);
 	fiber->name[0] = '\0';
 	fiber->f = NULL;
@@ -710,6 +732,146 @@ page_align_up(void *ptr)
 	return page_align_down(ptr + page_size - 1);
 }
 
+static bool
+stack_has_wmark(void *addr)
+{
+	const uint64_t *src = poison_pool;
+	const uint64_t *dst = addr;
+	size_t i;
+
+	for (i = 0; i < POISON_SIZE; i++) {
+		if (*dst != src[i])
+			return false;
+		dst += POISON_OFF;
+	}
+
+	return true;
+}
+
+static void
+stack_put_wmark(void *addr)
+{
+	const uint64_t *src = poison_pool;
+	uint64_t *dst = addr;
+	size_t i;
+
+	for (i = 0; i < POISON_SIZE; i++) {
+		*dst = src[i];
+		dst += POISON_OFF;
+	}
+}
+
+#ifndef TARGET_OS_DARWIN
+static void
+stack_shrink(struct fiber *fiber)
+{
+	void *start, *end;
+
+	/*
+	 * When dropping pages make sure the page
+	 * containing overflow mark is untouched.
+	 * Same time no need to unmap the page which
+	 * carries "shrink" wmark, since we're updating
+	 * this page anyway.
+	 */
+	if (stack_direction < 0) {
+		end = page_align_down(fiber->stack_shrink_wmark);
+		start = page_align_up(fiber->stack_overflow_wmark);
+		madvise(start, end - start, MADV_DONTNEED);
+	} else {
+		start = page_align_up(fiber->stack_shrink_wmark);
+		end = page_align_down(fiber->stack_overflow_wmark);
+		madvise(start, end - start, MADV_DONTNEED);
+	}
+	stack_put_wmark(fiber->stack_shrink_wmark);
+}
+#endif
+
+static void
+fiber_wmark_recycle(struct fiber *fiber)
+{
+	static bool overflow_warned = false;
+
+	if (fiber->stack == NULL || fiber->flags & FIBER_CUSTOM_STACK)
+		return;
+
+#ifndef TARGET_OS_DARWIN
+	/*
+	 * On recycle we're trying to shrink stack
+	 * to release memory pressure but if only
+	 * a fiber has been using too much memory.
+	 */
+	if (!stack_has_wmark(fiber->stack_shrink_wmark))
+		stack_shrink(fiber);
+#endif
+
+	/*
+	 * We are watching for stack overflow in one shot way:
+	 * simply to not spam a user with messages, if someone
+	 * triggered the problem it is highly likely that
+	 * an another fiber hit the same soon.
+	 */
+	if (overflow_warned)
+		return;
+
+	if (!stack_has_wmark(fiber->stack_overflow_wmark)) {
+		say_warn("stack usage is close to the limit of %zu bytes",
+			 (size_t)FIBER_STACK_SIZE_DEFAULT);
+		overflow_warned = true;
+	}
+}
+
+static void
+fiber_wmark_init(struct fiber *fiber)
+{
+	/*
+	 * No tracking on custom stacks
+	 * in a sake of simplicity.
+	 */
+	if (fiber->flags & FIBER_CUSTOM_STACK) {
+		fiber->stack_overflow_wmark = NULL;
+		fiber->stack_shrink_wmark = NULL;
+		fiber->wmark_inpage_offset = 0;
+		return;
+	}
+
+	/*
+	 * To increase probability of the stack overflow
+	 * detection we put first mark at random position
+	 * of the first 128 bytes range. The rest of the marks
+	 * are put with constant step simply to not carry
+	 * offsets in memory.
+	 */
+	fiber->wmark_inpage_offset = ((rand() % 128) + 8) & ~7;
+
+	/*
+	 * Initially we arm the last page of the stack
+	 * to catch if we're getting close to its exhausting.
+	 *
+	 * The shrink watermark is put at 64K limit which is
+	 * known value to not cause much memory pressue even
+	 * with large number of fibers.
+	 */
+	if (stack_direction < 0) {
+		fiber->stack_overflow_wmark  = fiber->stack;
+		fiber->stack_overflow_wmark += fiber->wmark_inpage_offset;
+
+		fiber->stack_shrink_wmark  = fiber->stack + fiber->stack_size;
+		fiber->stack_shrink_wmark -= 16 << 12;
+		fiber->stack_shrink_wmark += fiber->wmark_inpage_offset;
+	} else {
+		fiber->stack_overflow_wmark  = fiber->stack + fiber->stack_size;
+		fiber->stack_overflow_wmark -= page_size;
+		fiber->stack_overflow_wmark += fiber->wmark_inpage_offset;
+
+		fiber->stack_shrink_wmark  = fiber->stack;
+		fiber->stack_shrink_wmark += (16 << 12) - page_size;
+		fiber->stack_shrink_wmark += fiber->wmark_inpage_offset;
+	}
+	stack_put_wmark(fiber->stack_overflow_wmark);
+	stack_put_wmark(fiber->stack_shrink_wmark);
+}
+
 static int
 fiber_stack_create(struct fiber *fiber, size_t stack_size)
 {
@@ -758,6 +920,7 @@ fiber_stack_create(struct fiber *fiber, size_t stack_size)
 	madvise(fiber->stack, fiber->stack_size, MADV_DONTNEED);
 #endif
 
+	fiber_wmark_init(fiber);
 	mprotect(guard, page_size, PROT_NONE);
 	return 0;
 }
@@ -927,8 +1090,12 @@ cord_create(struct cord *cord, const char *name)
 	/* Record stack extents */
 	tt_pthread_attr_getstack(cord->id, &cord->sched.stack,
 				 &cord->sched.stack_size);
+	cord->sched.stack_overflow_wmark = cord->sched.stack;
+	cord->sched.stack_shrink_wmark = cord->sched.stack;
 #else
 	cord->sched.stack = NULL;
+	cord->sched.stack_overflow_wmark = NULL;
+	cord->sched.stack_shrink_wmark = NULL;
 	cord->sched.stack_size = 0;
 #endif
 }
diff --git a/src/lib/core/fiber.h b/src/lib/core/fiber.h
index f1f5a0555..431e3da09 100644
--- a/src/lib/core/fiber.h
+++ b/src/lib/core/fiber.h
@@ -348,6 +348,21 @@ struct fiber {
 	struct slab *stack_slab;
 	/** Coro stack addr. */
 	void *stack;
+	/**
+	 * Stack watermark addr to detect
+	 * if we need shrink stack on reuse.
+	 */
+	void *stack_shrink_wmark;
+	/**
+	 * Stack watermark addr for overflow detection. To warn
+	 * a user about stack eager fibers.
+	 */
+	void *stack_overflow_wmark;
+	/**
+	 * An offset to watermark position in stack
+	 * since page bound address.
+	 */
+	unsigned int wmark_inpage_offset;
 	/** Coro stack size. */
 	size_t stack_size;
 	/** Valgrind stack id. */
-- 
2.20.1




More information about the Tarantool-patches mailing list