[Tarantool-patches] [PATCH v3] lua: add fiber.top() listing fiber cpu consumption

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sat Nov 2 20:12:19 MSK 2019


Hi! Thanks for the patch!

See 3 comments below.

> diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
> index 93f22ae68..52888cc64 100644
> --- a/src/lib/core/fiber.c
> +++ b/src/lib/core/fiber.c
> @@ -82,6 +86,38 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
> +#if ENABLE_FIBER_TOP
> +static __thread bool fiber_top_enabled = false;
> +
> +/**
> + * An action performed each time a context switch happens.
> + * Used to count each fiber's processing time.
> + */
> +static inline void
> +clock_set_on_csw(struct fiber *caller)
> +{
> +	caller->csw++;
> +	if (!fiber_top_enabled)
> +		return;
> +
> +	uint64_t clock;
> +	uint32_t cpu_id;
> +	clock = __rdtscp(&cpu_id);
> +
> +	if (cpu_id == cord()->cpu_id_last) {
> +		caller->clock_delta += clock - cord()->clock_last;
> +		cord()->clock_delta += clock - cord()->clock_last;
> +	} else {
> +		cord()->cpu_id_last = cpu_id;
> +		cord()->cpu_miss_count++;
> +	}
> +	cord()->clock_last = clock;
> +}
> +
> +#else
> +#define clock_set_on_csw(caller) ;

1. With undefined ENABLE_FIBER_TOP you don't update csw counter.
I would move this #if ENABLE to the clock_set_on_csw() body,
right after csw is incremented.

> +#endif /* ENABLE_FIBER_TOP */
> +
> diff --git a/src/lua/fiber.c b/src/lua/fiber.c
> index 124908a05..a030e444d 100644
> --- a/src/lua/fiber.c
> +++ b/src/lua/fiber.c
> @@ -319,6 +323,67 @@ lbox_fiber_statof_nobt(struct fiber *f, void *cb_ctx)
>  	return lbox_fiber_statof(f, cb_ctx, false);
>  }
>  
> +#if ENABLE_FIBER_TOP
> +static int
> +lbox_fiber_top_entry(struct fiber *f, void *cb_ctx)
> +{
> +	struct lua_State *L = (struct lua_State *) cb_ctx;
> +	char name_buf[64];
> +
> +	snprintf(name_buf, sizeof(name_buf), "%u/%s", f->fid, f->name);
> +	lua_pushstring(L, name_buf);

2. A piece of advice - use tt_sprintf:

    lua_pushstring(L, tt_sprintf("%u/%s", f->fid, f->name));

> +
> +	lua_newtable(L);
> +
> +	lua_pushliteral(L, "average");
> +	lua_pushnumber(L, f->clock_acc / (double)cord()->clock_acc * 100);
> +	lua_settable(L, -3);
> +	lua_pushliteral(L, "instant");
> +	lua_pushnumber(L, f->clock_delta_last / (double)cord()->clock_delta_last * 100);
> +	lua_settable(L, -3);
> +	lua_pushliteral(L, "time");
> +	lua_pushnumber(L, f->cputime / (double) FIBER_TIME_RES);
> +	lua_settable(L, -3);
> +	lua_settable(L, -3);
> +
> +	return 0;
> +}
> diff --git a/test/app/fiber.result b/test/app/fiber.result
> index 3c6115a33..3b9e5da9a 100644
> --- a/test/app/fiber.result
> +++ b/test/app/fiber.result
> @@ -1462,6 +1462,91 @@ fiber.join(fiber.self())
>  ---
>  - error: the fiber is not joinable
>  ...
> +sum = 0
> +---
> +...
> +-- gh-2694 fiber.top()
> +fiber.top_enable()
> +---
> +...
> +a = fiber.top()
> +---
> +...
> +type(a)
> +---
> +- table
> +...
> +-- scheduler is present in fiber.top()
> +-- and is indexed by name
> +a["1/sched"] ~= nil
> +---
> +- true
> +...
> +type(a["cpu misses"]) == 'number'
> +---
> +- true
> +...
> +sum_inst = 0
> +---
> +...
> +sum_avg = 0
> +---
> +...
> +-- update table to make sure
> +-- a full event loop iteration
> +-- has ended
> +a = fiber.top()
> +---
> +...
> +for k, v in pairs(a) do\
> +    if type(v) == 'table' then\

3. This looks hard to use. The fact, that one table contains
records totally different in their structure. I would propose
to return cpu misses and fibers separately:

    fiber.top() =

        cpu_misses = <number>,
        time = [
            '<id>/<name>' = {...},
            '<id>/<name>' = {...},
            '<id>/<name>' = {...},
            ...
        ]

Then you can take fiber.top().time and be sure, that all
records here have the same structure. As far as I remember
we already had similar problems with other statistics, so
it is better to design it know in the most extendible way.
And it will be easier to add new global statistics to the
top in future.


More information about the Tarantool-patches mailing list