[Tarantool-patches] [tarantool-patches] [PATCH v3] lua: add fiber.top() listing fiber cpu consumption

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Nov 6 17:02:31 MSK 2019


Hi! Thanks for the fixes! Now LGTM.

On 05/11/2019 17:42, Serge Petrenko wrote:
> Hi! Thanks for your review!
> I pushed the changes on the branch, the diff is below.
> 
> --
> Serge Petrenko
> sergepetrenko at tarantool.org <mailto:sergepetrenko at tarantool.org>
> 
> 
> 
> 
>> 2 нояб. 2019 г., в 20:12, Vladislav Shpilevoy <v.shpilevoy at tarantool.org <mailto:v.shpilevoy at tarantool.org>> написал(а):
>>
>> Hi! Thanks for the patch!
>>
>> See 3 comments below.
>>
>>> diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
>>> index 93f22ae68..52888cc64 100644
>>> --- a/src/lib/core/fiber.c
>>> +++ b/src/lib/core/fiber.c
>>> @@ -82,6 +86,38 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
>>> +#if ENABLE_FIBER_TOP
>>> +static __thread bool fiber_top_enabled = false;
>>> +
>>> +/**
>>> + * An action performed each time a context switch happens.
>>> + * Used to count each fiber's processing time.
>>> + */
>>> +static inline void
>>> +clock_set_on_csw(struct fiber *caller)
>>> +{
>>> +caller->csw++;
>>> +if (!fiber_top_enabled)
>>> +return;
>>> +
>>> +uint64_t clock;
>>> +uint32_t cpu_id;
>>> +clock = __rdtscp(&cpu_id);
>>> +
>>> +if (cpu_id == cord()->cpu_id_last) {
>>> +caller->clock_delta += clock - cord()->clock_last;
>>> +cord()->clock_delta += clock - cord()->clock_last;
>>> +} else {
>>> +cord()->cpu_id_last = cpu_id;
>>> +cord()->cpu_miss_count++;
>>> +}
>>> +cord()->clock_last = clock;
>>> +}
>>> +
>>> +#else
>>> +#define clock_set_on_csw(caller) ;
>>
>> 1. With undefined ENABLE_FIBER_TOP you don't update csw counter.
>> I would move this #if ENABLE to the clock_set_on_csw() body,
>> right after csw is incremented.
> 
> Fixed.
> 
>>
>>> +#endif /* ENABLE_FIBER_TOP */
>>> +
>>> diff --git a/src/lua/fiber.c b/src/lua/fiber.c
>>> index 124908a05..a030e444d 100644
>>> --- a/src/lua/fiber.c
>>> +++ b/src/lua/fiber.c
>>> @@ -319,6 +323,67 @@ lbox_fiber_statof_nobt(struct fiber *f, void *cb_ctx)
>>> return lbox_fiber_statof(f, cb_ctx, false);
>>> }
>>>
>>> +#if ENABLE_FIBER_TOP
>>> +static int
>>> +lbox_fiber_top_entry(struct fiber *f, void *cb_ctx)
>>> +{
>>> +struct lua_State *L = (struct lua_State *) cb_ctx;
>>> +char name_buf[64];
>>> +
>>> +snprintf(name_buf, sizeof(name_buf), "%u/%s", f->fid, f->name);
>>> +lua_pushstring(L, name_buf);
>>
>> 2. A piece of advice - use tt_sprintf:
>>
>>    lua_pushstring(L, tt_sprintf("%u/%s", f->fid, f->name));
> 
> Thanks! Changed.
> 
>>
>>> +
>>> +lua_newtable(L);
>>> +
>>> +lua_pushliteral(L, "average");
>>> +lua_pushnumber(L, f->clock_acc / (double)cord()->clock_acc * 100);
>>> +lua_settable(L, -3);
>>> +lua_pushliteral(L, "instant");
>>> +lua_pushnumber(L, f->clock_delta_last / (double)cord()->clock_delta_last * 100);
>>> +lua_settable(L, -3);
>>> +lua_pushliteral(L, "time");
>>> +lua_pushnumber(L, f->cputime / (double) FIBER_TIME_RES);
>>> +lua_settable(L, -3);
>>> +lua_settable(L, -3);
>>> +
>>> +return 0;
>>> +}
>>> diff --git a/test/app/fiber.result b/test/app/fiber.result
>>> index 3c6115a33..3b9e5da9a 100644
>>> --- a/test/app/fiber.result
>>> +++ b/test/app/fiber.result
>>> @@ -1462,6 +1462,91 @@ fiber.join(fiber.self())
>>> ---
>>> - error: the fiber is not joinable
>>> ...
>>> +sum = 0
>>> +---
>>> +...
>>> +-- gh-2694 fiber.top <http://fiber.top>()
>>> +fiber.top_enable()
>>> +---
>>> +...
>>> +a = fiber.top <http://fiber.top>()
>>> +---
>>> +...
>>> +type(a)
>>> +---
>>> +- table
>>> +...
>>> +-- scheduler is present in fiber.top <http://fiber.top>()
>>> +-- and is indexed by name
>>> +a["1/sched"] ~= nil
>>> +---
>>> +- true
>>> +...
>>> +type(a["cpu misses"]) == 'number'
>>> +---
>>> +- true
>>> +...
>>> +sum_inst = 0
>>> +---
>>> +...
>>> +sum_avg = 0
>>> +---
>>> +...
>>> +-- update table to make sure
>>> +-- a full event loop iteration
>>> +-- has ended
>>> +a = fiber.top <http://fiber.top>()
>>> +---
>>> +...
>>> +for k, v in pairs(a) do\
>>> +    if type(v) == 'table' then\
>>
>> 3. This looks hard to use. The fact, that one table contains
>> records totally different in their structure. I would propose
>> to return cpu misses and fibers separately:
>>
>>    fiber.top <http://fiber.top>() =
>>
>>        cpu_misses = <number>,
>>        time = [
>>            '<id>/<name>' = {...},
>>            '<id>/<name>' = {...},
>>            '<id>/<name>' = {...},
>>            ...
>>        ]
>>
>> Then you can take fiber.top <http://fiber.top>().time and be sure, that all
>> records here have the same structure. As far as I remember
>> we already had similar problems with other statistics, so
>> it is better to design it know in the most extendible way.
>> And it will be easier to add new global statistics to the
>> top in future.
>>
> 
> Good point. I named the subtable `cpu` instead of `time`. It makes more
> sense imo. Also renamed `cpu misses` to `cpu_misses` so that it can be
> accessed as fiber.top <http://fiber.top>().cpu_misses
> 
> 
> diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
> index 52888cc64..aebaba7f0 100644
> --- a/src/lib/core/fiber.c
> +++ b/src/lib/core/fiber.c
> @@ -88,6 +88,7 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
>  
>  #if ENABLE_FIBER_TOP
>  static __thread bool fiber_top_enabled = false;
> +#endif /* ENABLE_FIBER_TOP */
>  
>  /**
>   * An action performed each time a context switch happens.
> @@ -97,6 +98,8 @@ static inline void
>  clock_set_on_csw(struct fiber *caller)
>  {
>  caller->csw++;
> +
> +#if ENABLE_FIBER_TOP
>  if (!fiber_top_enabled)
>  return;
>  
> @@ -112,12 +115,10 @@ clock_set_on_csw(struct fiber *caller)
>  cord()->cpu_miss_count++;
>  }
>  cord()->clock_last = clock;
> -}
> -
> -#else
> -#define clock_set_on_csw(caller) ;
>  #endif /* ENABLE_FIBER_TOP */
>  
> +}
> +
>  /*
>   * Defines a handler to be executed on exit from cord's thread func,
>   * accessible via cord()->on_exit (normally NULL). It is used to
> diff --git a/src/lua/fiber.c b/src/lua/fiber.c
> index a030e444d..8b3b22e55 100644
> --- a/src/lua/fiber.c
> +++ b/src/lua/fiber.c
> @@ -33,6 +33,7 @@
>  #include <fiber.h>
>  #include "lua/utils.h"
>  #include "backtrace.h"
> +#include "tt_static.h"
>  
>  #include <lua.h>
>  #include <lauxlib.h>
> @@ -328,10 +329,8 @@ static int
>  lbox_fiber_top_entry(struct fiber *f, void *cb_ctx)
>  {
>  struct lua_State *L = (struct lua_State *) cb_ctx;
> -char name_buf[64];
>  
> -snprintf(name_buf, sizeof(name_buf), "%u/%s", f->fid, f->name);
> -lua_pushstring(L, name_buf);
> +lua_pushstring(L, tt_sprintf("%u/%s", f->fid, f->name));
>  
>  lua_newtable(L);
>  
> @@ -357,12 +356,15 @@ lbox_fiber_top(struct lua_State *L)
>        " fiber.top_enable() first");
>  }
>  lua_newtable(L);
> -lua_pushliteral(L, "cpu misses");
> +lua_pushliteral(L, "cpu_misses");
>  lua_pushnumber(L, cord()->cpu_miss_count_last);
>  lua_settable(L, -3);
>  
> +lua_pushliteral(L, "cpu");
> +lua_newtable(L);
>  lbox_fiber_top_entry(&cord()->sched, L);
>  fiber_stat(lbox_fiber_top_entry, L);
> +lua_settable(L, -3);
>  
>  return 1;
>  }
> diff --git a/test/app/fiber.result b/test/app/fiber.result
> index 3b9e5da9a..4a094939f 100644
> --- a/test/app/fiber.result
> +++ b/test/app/fiber.result
> @@ -1478,11 +1478,11 @@ type(a)
>  ...
>  -- scheduler is present in fiber.top <http://fiber.top>()
>  -- and is indexed by name
> -a["1/sched"] ~= nil
> +a.cpu["1/sched"] ~= nil
>  ---
>  - true
>  ...
> -type(a["cpu misses"]) == 'number'
> +type(a.cpu_misses) == 'number'
>  ---
>  - true
>  ...
> @@ -1495,14 +1495,12 @@ sum_avg = 0
>  -- update table to make sure
>  -- a full event loop iteration
>  -- has ended
> -a = fiber.top <http://fiber.top>()
> +a = fiber.top <http://fiber.top>().cpu
>  ---
>  ...
>  for k, v in pairs(a) do\
> -    if type(v) == 'table' then\
> -        sum_inst = sum_inst + v["instant"]\
> -        sum_avg = sum_avg + v["average"]\
> -    end\
> +    sum_inst = sum_inst + v["instant"]\
> +    sum_avg = sum_avg + v["average"]\
>  end
>  ---
>  ...
> @@ -1521,7 +1519,7 @@ tbl = nil
>  f = fiber.new(function()\
>      for i = 1,1000 do end\
>      fiber.yield()\
> -    tbl = fiber.top <http://fiber.top>()[fiber.self().id()..'/'..fiber.self().name()]\
> +    tbl = fiber.top <http://fiber.top>().cpu[fiber.self().id()..'/'..fiber.self().name()]\
>  end)
>  ---
>  ...
> diff --git a/test/app/fiber.test.lua b/test/app/fiber.test.lua
> index ce1f55e8d..38b85d554 100644
> --- a/test/app/fiber.test.lua
> +++ b/test/app/fiber.test.lua
> @@ -638,20 +638,18 @@ a = fiber.top <http://fiber.top>()
>  type(a)
>  -- scheduler is present in fiber.top <http://fiber.top>()
>  -- and is indexed by name
> -a["1/sched"] ~= nil
> -type(a["cpu misses"]) == 'number'
> +a.cpu["1/sched"] ~= nil
> +type(a.cpu_misses) == 'number'
>  sum_inst = 0
>  sum_avg = 0
>  
>  -- update table to make sure
>  -- a full event loop iteration
>  -- has ended
> -a = fiber.top <http://fiber.top>()
> +a = fiber.top <http://fiber.top>().cpu
>  for k, v in pairs(a) do\
> -    if type(v) == 'table' then\
> -        sum_inst = sum_inst + v["instant"]\
> -        sum_avg = sum_avg + v["average"]\
> -    end\
> +    sum_inst = sum_inst + v["instant"]\
> +    sum_avg = sum_avg + v["average"]\
>  end
>  
>  sum_inst
> @@ -661,7 +659,7 @@ tbl = nil
>  f = fiber.new(function()\
>      for i = 1,1000 do end\
>      fiber.yield()\
> -    tbl = fiber.top <http://fiber.top>()[fiber.self().id()..'/'..fiber.self().name()]\
> +    tbl = fiber.top <http://fiber.top>().cpu[fiber.self().id()..'/'..fiber.self().name()]\
>  end)
>  while f:status() ~= 'dead' do fiber.sleep(0.01) end
>  tbl["average"] > 0
> 


More information about the Tarantool-patches mailing list