[Tarantool-patches] [tarantool-patches] [PATCH v3] lua: add fiber.top() listing fiber cpu consumption
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Wed Nov 6 17:02:31 MSK 2019
Hi! Thanks for the fixes! Now LGTM.
On 05/11/2019 17:42, Serge Petrenko wrote:
> Hi! Thanks for your review!
> I pushed the changes on the branch, the diff is below.
>
> --
> Serge Petrenko
> sergepetrenko at tarantool.org <mailto:sergepetrenko at tarantool.org>
>
>
>
>
>> 2 нояб. 2019 г., в 20:12, Vladislav Shpilevoy <v.shpilevoy at tarantool.org <mailto:v.shpilevoy at tarantool.org>> написал(а):
>>
>> Hi! Thanks for the patch!
>>
>> See 3 comments below.
>>
>>> diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
>>> index 93f22ae68..52888cc64 100644
>>> --- a/src/lib/core/fiber.c
>>> +++ b/src/lib/core/fiber.c
>>> @@ -82,6 +86,38 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
>>> +#if ENABLE_FIBER_TOP
>>> +static __thread bool fiber_top_enabled = false;
>>> +
>>> +/**
>>> + * An action performed each time a context switch happens.
>>> + * Used to count each fiber's processing time.
>>> + */
>>> +static inline void
>>> +clock_set_on_csw(struct fiber *caller)
>>> +{
>>> +caller->csw++;
>>> +if (!fiber_top_enabled)
>>> +return;
>>> +
>>> +uint64_t clock;
>>> +uint32_t cpu_id;
>>> +clock = __rdtscp(&cpu_id);
>>> +
>>> +if (cpu_id == cord()->cpu_id_last) {
>>> +caller->clock_delta += clock - cord()->clock_last;
>>> +cord()->clock_delta += clock - cord()->clock_last;
>>> +} else {
>>> +cord()->cpu_id_last = cpu_id;
>>> +cord()->cpu_miss_count++;
>>> +}
>>> +cord()->clock_last = clock;
>>> +}
>>> +
>>> +#else
>>> +#define clock_set_on_csw(caller) ;
>>
>> 1. With undefined ENABLE_FIBER_TOP you don't update csw counter.
>> I would move this #if ENABLE to the clock_set_on_csw() body,
>> right after csw is incremented.
>
> Fixed.
>
>>
>>> +#endif /* ENABLE_FIBER_TOP */
>>> +
>>> diff --git a/src/lua/fiber.c b/src/lua/fiber.c
>>> index 124908a05..a030e444d 100644
>>> --- a/src/lua/fiber.c
>>> +++ b/src/lua/fiber.c
>>> @@ -319,6 +323,67 @@ lbox_fiber_statof_nobt(struct fiber *f, void *cb_ctx)
>>> return lbox_fiber_statof(f, cb_ctx, false);
>>> }
>>>
>>> +#if ENABLE_FIBER_TOP
>>> +static int
>>> +lbox_fiber_top_entry(struct fiber *f, void *cb_ctx)
>>> +{
>>> +struct lua_State *L = (struct lua_State *) cb_ctx;
>>> +char name_buf[64];
>>> +
>>> +snprintf(name_buf, sizeof(name_buf), "%u/%s", f->fid, f->name);
>>> +lua_pushstring(L, name_buf);
>>
>> 2. A piece of advice - use tt_sprintf:
>>
>> lua_pushstring(L, tt_sprintf("%u/%s", f->fid, f->name));
>
> Thanks! Changed.
>
>>
>>> +
>>> +lua_newtable(L);
>>> +
>>> +lua_pushliteral(L, "average");
>>> +lua_pushnumber(L, f->clock_acc / (double)cord()->clock_acc * 100);
>>> +lua_settable(L, -3);
>>> +lua_pushliteral(L, "instant");
>>> +lua_pushnumber(L, f->clock_delta_last / (double)cord()->clock_delta_last * 100);
>>> +lua_settable(L, -3);
>>> +lua_pushliteral(L, "time");
>>> +lua_pushnumber(L, f->cputime / (double) FIBER_TIME_RES);
>>> +lua_settable(L, -3);
>>> +lua_settable(L, -3);
>>> +
>>> +return 0;
>>> +}
>>> diff --git a/test/app/fiber.result b/test/app/fiber.result
>>> index 3c6115a33..3b9e5da9a 100644
>>> --- a/test/app/fiber.result
>>> +++ b/test/app/fiber.result
>>> @@ -1462,6 +1462,91 @@ fiber.join(fiber.self())
>>> ---
>>> - error: the fiber is not joinable
>>> ...
>>> +sum = 0
>>> +---
>>> +...
>>> +-- gh-2694 fiber.top <http://fiber.top>()
>>> +fiber.top_enable()
>>> +---
>>> +...
>>> +a = fiber.top <http://fiber.top>()
>>> +---
>>> +...
>>> +type(a)
>>> +---
>>> +- table
>>> +...
>>> +-- scheduler is present in fiber.top <http://fiber.top>()
>>> +-- and is indexed by name
>>> +a["1/sched"] ~= nil
>>> +---
>>> +- true
>>> +...
>>> +type(a["cpu misses"]) == 'number'
>>> +---
>>> +- true
>>> +...
>>> +sum_inst = 0
>>> +---
>>> +...
>>> +sum_avg = 0
>>> +---
>>> +...
>>> +-- update table to make sure
>>> +-- a full event loop iteration
>>> +-- has ended
>>> +a = fiber.top <http://fiber.top>()
>>> +---
>>> +...
>>> +for k, v in pairs(a) do\
>>> + if type(v) == 'table' then\
>>
>> 3. This looks hard to use. The fact, that one table contains
>> records totally different in their structure. I would propose
>> to return cpu misses and fibers separately:
>>
>> fiber.top <http://fiber.top>() =
>>
>> cpu_misses = <number>,
>> time = [
>> '<id>/<name>' = {...},
>> '<id>/<name>' = {...},
>> '<id>/<name>' = {...},
>> ...
>> ]
>>
>> Then you can take fiber.top <http://fiber.top>().time and be sure, that all
>> records here have the same structure. As far as I remember
>> we already had similar problems with other statistics, so
>> it is better to design it know in the most extendible way.
>> And it will be easier to add new global statistics to the
>> top in future.
>>
>
> Good point. I named the subtable `cpu` instead of `time`. It makes more
> sense imo. Also renamed `cpu misses` to `cpu_misses` so that it can be
> accessed as fiber.top <http://fiber.top>().cpu_misses
>
>
> diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
> index 52888cc64..aebaba7f0 100644
> --- a/src/lib/core/fiber.c
> +++ b/src/lib/core/fiber.c
> @@ -88,6 +88,7 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
>
> #if ENABLE_FIBER_TOP
> static __thread bool fiber_top_enabled = false;
> +#endif /* ENABLE_FIBER_TOP */
>
> /**
> * An action performed each time a context switch happens.
> @@ -97,6 +98,8 @@ static inline void
> clock_set_on_csw(struct fiber *caller)
> {
> caller->csw++;
> +
> +#if ENABLE_FIBER_TOP
> if (!fiber_top_enabled)
> return;
>
> @@ -112,12 +115,10 @@ clock_set_on_csw(struct fiber *caller)
> cord()->cpu_miss_count++;
> }
> cord()->clock_last = clock;
> -}
> -
> -#else
> -#define clock_set_on_csw(caller) ;
> #endif /* ENABLE_FIBER_TOP */
>
> +}
> +
> /*
> * Defines a handler to be executed on exit from cord's thread func,
> * accessible via cord()->on_exit (normally NULL). It is used to
> diff --git a/src/lua/fiber.c b/src/lua/fiber.c
> index a030e444d..8b3b22e55 100644
> --- a/src/lua/fiber.c
> +++ b/src/lua/fiber.c
> @@ -33,6 +33,7 @@
> #include <fiber.h>
> #include "lua/utils.h"
> #include "backtrace.h"
> +#include "tt_static.h"
>
> #include <lua.h>
> #include <lauxlib.h>
> @@ -328,10 +329,8 @@ static int
> lbox_fiber_top_entry(struct fiber *f, void *cb_ctx)
> {
> struct lua_State *L = (struct lua_State *) cb_ctx;
> -char name_buf[64];
>
> -snprintf(name_buf, sizeof(name_buf), "%u/%s", f->fid, f->name);
> -lua_pushstring(L, name_buf);
> +lua_pushstring(L, tt_sprintf("%u/%s", f->fid, f->name));
>
> lua_newtable(L);
>
> @@ -357,12 +356,15 @@ lbox_fiber_top(struct lua_State *L)
> " fiber.top_enable() first");
> }
> lua_newtable(L);
> -lua_pushliteral(L, "cpu misses");
> +lua_pushliteral(L, "cpu_misses");
> lua_pushnumber(L, cord()->cpu_miss_count_last);
> lua_settable(L, -3);
>
> +lua_pushliteral(L, "cpu");
> +lua_newtable(L);
> lbox_fiber_top_entry(&cord()->sched, L);
> fiber_stat(lbox_fiber_top_entry, L);
> +lua_settable(L, -3);
>
> return 1;
> }
> diff --git a/test/app/fiber.result b/test/app/fiber.result
> index 3b9e5da9a..4a094939f 100644
> --- a/test/app/fiber.result
> +++ b/test/app/fiber.result
> @@ -1478,11 +1478,11 @@ type(a)
> ...
> -- scheduler is present in fiber.top <http://fiber.top>()
> -- and is indexed by name
> -a["1/sched"] ~= nil
> +a.cpu["1/sched"] ~= nil
> ---
> - true
> ...
> -type(a["cpu misses"]) == 'number'
> +type(a.cpu_misses) == 'number'
> ---
> - true
> ...
> @@ -1495,14 +1495,12 @@ sum_avg = 0
> -- update table to make sure
> -- a full event loop iteration
> -- has ended
> -a = fiber.top <http://fiber.top>()
> +a = fiber.top <http://fiber.top>().cpu
> ---
> ...
> for k, v in pairs(a) do\
> - if type(v) == 'table' then\
> - sum_inst = sum_inst + v["instant"]\
> - sum_avg = sum_avg + v["average"]\
> - end\
> + sum_inst = sum_inst + v["instant"]\
> + sum_avg = sum_avg + v["average"]\
> end
> ---
> ...
> @@ -1521,7 +1519,7 @@ tbl = nil
> f = fiber.new(function()\
> for i = 1,1000 do end\
> fiber.yield()\
> - tbl = fiber.top <http://fiber.top>()[fiber.self().id()..'/'..fiber.self().name()]\
> + tbl = fiber.top <http://fiber.top>().cpu[fiber.self().id()..'/'..fiber.self().name()]\
> end)
> ---
> ...
> diff --git a/test/app/fiber.test.lua b/test/app/fiber.test.lua
> index ce1f55e8d..38b85d554 100644
> --- a/test/app/fiber.test.lua
> +++ b/test/app/fiber.test.lua
> @@ -638,20 +638,18 @@ a = fiber.top <http://fiber.top>()
> type(a)
> -- scheduler is present in fiber.top <http://fiber.top>()
> -- and is indexed by name
> -a["1/sched"] ~= nil
> -type(a["cpu misses"]) == 'number'
> +a.cpu["1/sched"] ~= nil
> +type(a.cpu_misses) == 'number'
> sum_inst = 0
> sum_avg = 0
>
> -- update table to make sure
> -- a full event loop iteration
> -- has ended
> -a = fiber.top <http://fiber.top>()
> +a = fiber.top <http://fiber.top>().cpu
> for k, v in pairs(a) do\
> - if type(v) == 'table' then\
> - sum_inst = sum_inst + v["instant"]\
> - sum_avg = sum_avg + v["average"]\
> - end\
> + sum_inst = sum_inst + v["instant"]\
> + sum_avg = sum_avg + v["average"]\
> end
>
> sum_inst
> @@ -661,7 +659,7 @@ tbl = nil
> f = fiber.new(function()\
> for i = 1,1000 do end\
> fiber.yield()\
> - tbl = fiber.top <http://fiber.top>()[fiber.self().id()..'/'..fiber.self().name()]\
> + tbl = fiber.top <http://fiber.top>().cpu[fiber.self().id()..'/'..fiber.self().name()]\
> end)
> while f:status() ~= 'dead' do fiber.sleep(0.01) end
> tbl["average"] > 0
>
More information about the Tarantool-patches
mailing list