[PATCH 3/3] core/fiber: Put watermarks into stack to track its usage

Vladimir Davydov vdavydov.dev at gmail.com
Wed Mar 6 21:05:04 MSK 2019


On Wed, Mar 06, 2019 at 01:38:54AM +0300, Cyrill Gorcunov wrote:
> diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
> index 64bcda26a..d18227e7f 100644
> --- a/src/lib/core/fiber.c
> +++ b/src/lib/core/fiber.c
> @@ -41,6 +41,7 @@
>  #include "assoc.h"
>  #include "memory.h"
>  #include "trigger.h"
> +#include "random.h"
>  
>  #include "third_party/valgrind/memcheck.h"
>  
> @@ -104,6 +105,32 @@ static const struct fiber_attr fiber_attr_default = {
>         .flags = FIBER_DEFAULT_FLAGS
>  };
>  
> +/*
> + * Random values generated with uuid.
> + */
> +static const uint64_t poison_pool[] = {
> +	0x74f31d37285c4c37, 0xb10269a05bf10c29,
> +	0x0994d845bd284e0f, 0x9ffd4f7129c184df,
> +	0x357151e6711c4415, 0x8c5e5f41aafe6f28,
> +	0x6917dd79e78049d5, 0xba61957c65ca2465,
> +};
> +
> +/*
> + * An offset inside page to the first mark.
> + */
> +static unsigned int wmark_inpage_offset;
> +
> +/*
> + * We poison by 8 bytes as it natural for stack
> + * step on x86-64. Also 128 byte gap between
> + * poison values should cover a common cases.
> + */
> +#define POISON_SIZE	(sizeof(poison_pool) / sizeof(poison_pool[0]))
> +#define POISON_GAP	(128 + sizeof(poison_pool[0]))
> +#define POISON_OFF	(POISON_GAP / sizeof(poison_pool[0]))
> +
> +static void fiber_wmark_recycle(struct fiber *fiber);
> +
>  void
>  fiber_attr_create(struct fiber_attr *fiber_attr)
>  {
> @@ -624,6 +651,7 @@ fiber_recycle(struct fiber *fiber)
>  	/* no pending wakeup */
>  	assert(rlist_empty(&fiber->state));
>  	bool has_custom_stack = fiber->flags & FIBER_CUSTOM_STACK;
> +	fiber_wmark_recycle(fiber);
>  	fiber_reset(fiber);
>  	fiber->name[0] = '\0';
>  	fiber->f = NULL;
> @@ -710,6 +738,157 @@ page_align_up(void *ptr)
>  	return page_align_down(ptr + page_size - 1);
>  }
>  
> +static bool
> +stack_has_wmark(void *addr)
> +{
> +	const uint64_t *src = poison_pool;
> +	const uint64_t *dst = addr;
> +	size_t i;
> +
> +	for (i = 0; i < POISON_SIZE; i++) {
> +		if (*dst != src[i])
> +			return false;
> +		dst += POISON_OFF;
> +	}
> +
> +	return true;
> +}
> +
> +static void
> +stack_put_wmark(void *addr)
> +{
> +	const uint64_t *src = poison_pool;
> +	uint64_t *dst = addr;
> +	size_t i;
> +
> +	for (i = 0; i < POISON_SIZE; i++) {
> +		*dst = src[i];
> +		dst += POISON_OFF;
> +	}
> +}
> +
> +#ifndef TARGET_OS_DARWIN
> +static void
> +stack_shrink(struct fiber *fiber)
> +{
> +	void *hi, *lo;
> +
> +	if (stack_direction < 0) {
> +		hi = fiber->stack + fiber->stack_size;
> +		lo = fiber->stack_overflow_wmark+ page_size;
> +	} else {
> +		hi = fiber->stack_overflow_wmark - page_size;
> +		lo = fiber->stack - fiber->stack_size;
> +	}
> +
> +	if (fiber->stack_dynamic_wmark <= lo ||
> +	    fiber->stack_dynamic_wmark >= hi)
> +		return;
> +
> +	if (stack_direction < 0) {
> +		madvise(page_align_up(fiber->stack_dynamic_wmark),
> +			page_size, MADV_DONTNEED);
> +		fiber->stack_dynamic_wmark += page_size;
> +	} else {
> +		madvise(page_align_down(fiber->stack_dynamic_wmark),
> +			page_size, MADV_DONTNEED);
> +		fiber->stack_dynamic_wmark -= page_size;
> +	}
> +	stack_put_wmark(fiber);
> +}
> +#endif
> +
> +static void
> +fiber_wmark_recycle(struct fiber *fiber)
> +{
> +	static bool overflow_warned = false;
> +
> +	if (fiber->stack == NULL || fiber->flags & FIBER_CUSTOM_STACK)
> +		return;
> +
> +#ifndef TARGET_OS_DARWIN
> +	/*
> +	 * On recycle we're trying to shrink stack
> +	 * as much as we can until first mark overwrite
> +	 * detected, then we simply zap watermark and
> +	 * assume the stack is balanced and won't change
> +	 * much in future.
> +	 */

I don't understand. If the watermark has been exceeded once for a
particular fiber, we'll never ever shrink the stack back to normal?
That is if a task eager for stack is rescheduled on different fibers
they will all end up with huge stacks. That's not what we want.
We want the stack size to be set dynamically so that the *majority* of
fibers don't need to shrink/grow stacks back and forth. Or am I reading
the code wrong?

> +	if (fiber->stack_dynamic_wmark != NULL) {
> +		if (!stack_has_wmark(fiber->stack_dynamic_wmark))
> +			fiber->stack_dynamic_wmark = NULL;
> +		else
> +			stack_shrink(fiber);
> +	}
> +#endif
> +
> +	/*
> +	 * We are watching for stack overflow in one shot way:
> +	 * simply to not spam a user with messages, if someone
> +	 * triggered the problem it is highly likely that another
> +	 * fiber hit the same.
> +	 */
> +	if (overflow_warned)
> +		return;
> +
> +	if (!stack_has_wmark(fiber->stack_overflow_wmark)) {
> +		say_warn("fiber %d seems to overflow the stack soon",
> +			 fiber->name, fiber->fid);

No point in printing fiber->name and id - they are printed by the say
module, anyway. At the same time I'd prefer to keep max stack size here
and print its value whenever we have to grow stack, similarly to how
the Linux kernel handles it.

> +		overflow_warned = true;
> +	}
> +}
> +
> +static void
> +fiber_wmark_init(struct fiber *fiber)
> +{
> +	/*
> +	 * No tracking on custom stacks
> +	 * in a sake of simplicity.
> +	 */
> +	if (fiber->flags & FIBER_CUSTOM_STACK) {
> +		fiber->stack_overflow_wmark = NULL;
> +		fiber->stack_dynamic_wmark = NULL;
> +		return;
> +	}
> +
> +	/*
> +	 * Initially we arm the last page of the stack
> +	 * to catch if we're getting close to its exhausting.
> +	 * In turn the dynamic mark is put into next page so
> +	 * we could drop pages later if they are unused.
> +	 */
> +	if (stack_direction < 0) {
> +		fiber->stack_overflow_wmark  = fiber->stack;
> +		fiber->stack_overflow_wmark += wmark_inpage_offset;
> +
> +		fiber->stack_dynamic_wmark = fiber->stack_overflow_wmark + page_size;
> +	} else {
> +		fiber->stack_overflow_wmark  = fiber->stack + fiber->stack_size;
> +		fiber->stack_overflow_wmark -= page_size;
> +		fiber->stack_overflow_wmark -= wmark_inpage_offset;
> +
> +		fiber->stack_dynamic_wmark = fiber->stack_overflow_wmark - page_size;

I'd rather start with the minimal allowed stack size and grow it on
demand, if there are too many fibers that exceed the watermark now and
then, not vice versa (imagine we start from 16 MB).

> +	}
> +	stack_put_wmark(fiber->stack_overflow_wmark);
> +}
> +
> +static void
> +stack_wmark_init(void)
> +{
> +	uint16_t v;
> +
> +	/*
> +	 * To increase probability of the stack overflow
> +	 * detection we put _first_ mark at random position
> +	 * in first 128 bytes range. The rest of the marks
> +	 * are put with constant step (because we should not
> +	 * pressue random generator much in case of hight
> +	 * number of fibers).
> +	 */
> +	random_bytes((void *)&v, sizeof(v));

Why not rand?

Anyway, I think we should recalculate the offset for each fiber
individually each time it's recycled. Setting it once on system
initialization is no better than not using randomization at all.
As for pressure on the random generator - I don't understand why
it can possibly be bad. After all, it's a pseudo random generator,
not /dev/random.

> +	wmark_inpage_offset = ((v % 128) + 8) & ~7;
> +}
> +
>  static int
>  fiber_stack_create(struct fiber *fiber, size_t stack_size)
>  {
> @@ -757,6 +936,7 @@ fiber_stack_create(struct fiber *fiber, size_t stack_size)
>  	madvise(fiber->stack, fiber->stack_size, MADV_DONTNEED);
>  #endif
>  
> +	fiber_wmark_init(fiber);
>  	mprotect(guard, page_size, PROT_NONE);
>  	return 0;
>  }
> @@ -926,8 +1106,12 @@ cord_create(struct cord *cord, const char *name)
>  	/* Record stack extents */
>  	tt_pthread_attr_getstack(cord->id, &cord->sched.stack,
>  				 &cord->sched.stack_size);
> +	cord->sched.stack_overflow_wmark = cord->sched.stack;
> +	cord->sched.stack_dynamic_wmark = cord->sched.stack;
>  #else
>  	cord->sched.stack = NULL;
> +	cord->sched.stack_overflow_wmark = NULL;
> +	cord->sched.stack_dynamic_wmark = NULL;
>  	cord->sched.stack_size = 0;
>  #endif
>  }
> @@ -1238,6 +1422,7 @@ fiber_init(int (*invoke)(fiber_func f, va_list ap))
>  {
>  	page_size = sysconf(_SC_PAGESIZE);
>  	stack_direction = check_stack_direction(__builtin_frame_address(0));
> +	stack_wmark_init();
>  	fiber_invoke = invoke;
>  	main_thread_id = pthread_self();
>  	main_cord.loop = ev_default_loop(EVFLAG_AUTO | EVFLAG_ALLOCFD);
> diff --git a/src/lib/core/fiber.h b/src/lib/core/fiber.h
> index f1f5a0555..271d2b4d0 100644
> --- a/src/lib/core/fiber.h
> +++ b/src/lib/core/fiber.h
> @@ -348,6 +348,10 @@ struct fiber {
>  	struct slab *stack_slab;
>  	/** Coro stack addr. */
>  	void *stack;
> +	/** Stack dynamic watermark addr for usage optimization. */
> +	void *stack_dynamic_wmark;
> +	/** Stack watermark addr for overflow detection. */
> +	void *stack_overflow_wmark;

Here I'd like to see a more elaborate comment, explaining why we need
these members, what exactly the stack usage optimization is about.



More information about the Tarantool-patches mailing list