Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH 0/2] Implement LuaJIT platform metrics
@ 2020-07-21 11:34 Sergey Kaplun
  2020-07-21 11:34 ` [Tarantool-patches] [PATCH 1/2] metrics: add counters for metrics interested in Sergey Kaplun
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Sergey Kaplun @ 2020-07-21 11:34 UTC (permalink / raw)
  To: Igor Munkin, Sergey Ostanevich; +Cc: tarantool-patches

The series consists of 2 patches. The first one adds corresponding
counters to LuaJIT internal structures. The second provides C and Lua
API using this counters to collect metrics.

Branch: https://github.com/tarantool/luajit/tree/skaplun/gh-5187-luajit-metrics
Issue: https://github.com/tarantool/tarantool/issues/5187

Sergey Kaplun (2):
  metrics: add counters for metrics interested in
  metrics: add C and Lua API

 Makefile           |  2 +-
 src/Makefile       |  5 +--
 src/Makefile.dep   |  3 ++
 src/lib_init.c     |  2 ++
 src/lib_misc.c     | 89 ++++++++++++++++++++++++++++++++++++++++++++++
 src/lj_cdata.c     |  2 ++
 src/lj_cdata.h     |  2 ++
 src/lj_gc.c        |  4 +++
 src/lj_gc.h        |  6 +---
 src/lj_jit.h       |  7 ++++
 src/lj_misc_capi.c | 82 ++++++++++++++++++++++++++++++++++++++++++
 src/lj_obj.h       | 25 +++++++++++++
 src/lj_snap.c      |  1 +
 src/lj_state.c     | 13 ++++++-
 src/lj_str.c       |  5 +++
 src/lj_tab.c       |  2 ++
 src/lj_trace.c     |  5 ++-
 src/lj_udata.c     |  2 ++
 src/lmisclib.h     | 71 ++++++++++++++++++++++++++++++++++++
 src/luaconf.h      |  1 +
 20 files changed, 319 insertions(+), 10 deletions(-)
 create mode 100644 src/lib_misc.c
 create mode 100644 src/lj_misc_capi.c
 create mode 100644 src/lmisclib.h

-- 
2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Tarantool-patches] [PATCH 1/2] metrics: add counters for metrics interested in
  2020-07-21 11:34 [Tarantool-patches] [PATCH 0/2] Implement LuaJIT platform metrics Sergey Kaplun
@ 2020-07-21 11:34 ` Sergey Kaplun
  2020-07-23 20:12   ` Igor Munkin
  2020-07-21 11:34 ` [Tarantool-patches] [PATCH 2/2] metrics: add C and Lua API Sergey Kaplun
  2020-07-21 11:37 ` [Tarantool-patches] [RFC] rfc: luajit metrics Sergey Kaplun
  2 siblings, 1 reply; 16+ messages in thread
From: Sergey Kaplun @ 2020-07-21 11:34 UTC (permalink / raw)
  To: Igor Munkin, Sergey Ostanevich; +Cc: tarantool-patches

This patch adds counters for:
  - amount of table, cdata and udata objects,
  - GC invokations by GC states,
  - strhash misses or hits,
  - amount of freed or allocated memory,
  - amount of aborted traces and restored snapshots.

C and Lua API will be added in the next patch.

Part of tarantool/tarantool#5187
---
 src/lj_cdata.c |  2 ++
 src/lj_cdata.h |  2 ++
 src/lj_gc.c    |  4 ++++
 src/lj_gc.h    |  6 +-----
 src/lj_jit.h   |  7 +++++++
 src/lj_obj.h   | 25 +++++++++++++++++++++++++
 src/lj_snap.c  |  1 +
 src/lj_state.c | 13 ++++++++++++-
 src/lj_str.c   |  5 +++++
 src/lj_tab.c   |  2 ++
 src/lj_trace.c |  5 ++++-
 src/lj_udata.c |  2 ++
 12 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index 68e16d7..0dd8553 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -46,6 +46,7 @@ GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align)
   cd->marked |= 0x80;
   cd->gct = ~LJ_TCDATA;
   cd->ctypeid = id;
+  g->gc.cdatanum++;
   return cd;
 }
 
@@ -82,6 +83,7 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
   } else {
     lj_mem_free(g, memcdatav(cd), sizecdatav(cd));
   }
+  g->gc.cdatanum--;
 }
 
 void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
diff --git a/src/lj_cdata.h b/src/lj_cdata.h
index 5bb0f5d..66b023b 100644
--- a/src/lj_cdata.h
+++ b/src/lj_cdata.h
@@ -45,6 +45,7 @@ static LJ_AINLINE GCcdata *lj_cdata_new(CTState *cts, CTypeID id, CTSize sz)
   cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz);
   cd->gct = ~LJ_TCDATA;
   cd->ctypeid = ctype_check(cts, id);
+  G(cts->L)->gc.cdatanum++;
   return cd;
 }
 
@@ -54,6 +55,7 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz)
   GCcdata *cd = (GCcdata *)lj_mem_newgco(L, sizeof(GCcdata) + sz);
   cd->gct = ~LJ_TCDATA;
   cd->ctypeid = id;
+  G(L)->gc.cdatanum++;
   return cd;
 }
 
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 1c8e6ce..44c8aa1 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -634,6 +634,7 @@ static void atomic(global_State *g, lua_State *L)
 static size_t gc_onestep(lua_State *L)
 {
   global_State *g = G(L);
+  g->gc.state_count[g->gc.state]++;
   switch (g->gc.state) {
   case GCSpause:
     gc_mark_start(g);  /* Start a new GC cycle by marking all GC roots. */
@@ -857,6 +858,8 @@ void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz)
   lua_assert((nsz == 0) == (p == NULL));
   lua_assert(checkptrGC(p));
   g->gc.total = (g->gc.total - osz) + nsz;
+  g->gc.allocated += nsz;
+  g->gc.freed += osz;
   return p;
 }
 
@@ -869,6 +872,7 @@ void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size)
     lj_err_mem(L);
   lua_assert(checkptrGC(o));
   g->gc.total += size;
+  g->gc.allocated += size;
   setgcrefr(o->gch.nextgc, g->gc.root);
   setgcref(g->gc.root, o);
   newwhite(g, o);
diff --git a/src/lj_gc.h b/src/lj_gc.h
index 669bbe9..2051220 100644
--- a/src/lj_gc.h
+++ b/src/lj_gc.h
@@ -8,11 +8,6 @@
 
 #include "lj_obj.h"
 
-/* Garbage collector states. Order matters. */
-enum {
-  GCSpause, GCSpropagate, GCSatomic, GCSsweepstring, GCSsweep, GCSfinalize
-};
-
 /* Bitmasks for marked field of GCobj. */
 #define LJ_GC_WHITE0	0x01
 #define LJ_GC_WHITE1	0x02
@@ -117,6 +112,7 @@ LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
 static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize)
 {
   g->gc.total -= (GCSize)osize;
+  g->gc.freed += osize;
   g->allocf(g->allocd, p, osize, 0);
 }
 
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 7eb3d2a..960a02b 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -475,6 +475,13 @@ typedef struct jit_State {
   size_t szmcarea;	/* Size of current mcode area. */
   size_t szallmcarea;	/* Total size of all allocated mcode areas. */
 
+  size_t nsnaprestore;	/* Overall number of snap restores for all traces
+                        ** "belonging" to the given jit_State
+                        ** since the last call to luaM_metrics(). */
+  size_t ntraceabort;	/* Overall number of abort traces
+                        ** "belonging" to the given jit_State
+                        ** since the last call to luaM_metrics(). */
+
   TValue errinfo;	/* Additional info element for trace errors. */
 
 #if LJ_HASPROFILE
diff --git a/src/lj_obj.h b/src/lj_obj.h
index f368578..1fee04f 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -571,13 +571,28 @@ typedef enum {
 #define basemt_obj(g, o)	((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
 #define mmname_str(g, mm)	(strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
 
+/* Garbage collector states. Order matters. */
+enum {
+  GCSpause,
+  GCSpropagate,
+  GCSatomic,
+  GCSsweepstring,
+  GCSsweep,
+  GCSfinalize,
+  GCSlast
+};
+
 typedef struct GCState {
   GCSize total;		/* Memory currently allocated. */
+  size_t freed;		/* Memory freed since last luaM_metrics() call. */
+  size_t allocated;	/* Memory allocated since last luaM_metrics() call. */
   GCSize threshold;	/* Memory threshold. */
   uint8_t currentwhite;	/* Current white color. */
   uint8_t state;	/* GC state. */
   uint8_t nocdatafin;	/* No cdata finalizer called. */
   uint8_t unused2;
+  size_t state_count[GCSlast]; /* Count of GC invocations with different states
+			       ** since previous call of luaM_metrics().  */
   MSize sweepstr;	/* Sweep position in string table. */
   GCRef root;		/* List of all collectable objects. */
   MRef sweep;		/* Sweep position in root list. */
@@ -589,6 +604,12 @@ typedef struct GCState {
   GCSize estimate;	/* Estimate of memory actually in use. */
   MSize stepmul;	/* Incremental GC step granularity. */
   MSize pause;		/* Pause between successive GC cycles. */
+
+  size_t tabnum;	/* Current amount of table objects. */
+  size_t udatanum;	/* Current amount of udata objects. */
+#ifdef LJ_HASFFI
+  size_t cdatanum;	/* Current amount of cdata objects. */
+#endif
 } GCState;
 
 /* Global state, shared by all threads of a Lua universe. */
@@ -602,6 +623,10 @@ typedef struct global_State {
     BloomFilter next[2];
   } strbloom;
 #endif
+  size_t strhash_hit;	/* New string has been found in the storage
+			** since last luaM_metrics() call. */
+  size_t strhash_miss;	/* New string has been added to the storage
+			** since last luaM_metrics() call. */
   lua_Alloc allocf;	/* Memory allocator. */
   void *allocd;		/* Memory allocator data. */
   GCState gc;		/* Garbage collector. */
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 7554caf..4cf27e7 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -904,6 +904,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
     L->top = frame + snap->nslots;
     break;
   }
+  J->nsnaprestore++;
   return pc;
 }
 
diff --git a/src/lj_state.c b/src/lj_state.c
index 632dd07..b71ebae 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -204,6 +204,16 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
   setgcref(g->uvhead.prev, obj2gco(&g->uvhead));
   setgcref(g->uvhead.next, obj2gco(&g->uvhead));
   g->strmask = ~(MSize)0;
+
+  memset(g->gc.state_count, 0, GCSlast * sizeof(g->gc.state_count[0]));
+  g->strhash_hit = 0;
+  g->strhash_miss = 0;
+  g->gc.tabnum = 0;
+  g->gc.udatanum = 0;
+#if LJ_HASFFI
+  g->gc.cdatanum = 0;
+#endif
+
   setnilV(registry(L));
   setnilV(&g->nilnode.val);
   setnilV(&g->nilnode.key);
@@ -214,7 +224,8 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
   g->gc.state = GCSpause;
   setgcref(g->gc.root, obj2gco(L));
   setmref(g->gc.sweep, &g->gc.root);
-  g->gc.total = sizeof(GG_State);
+  g->gc.allocated = g->gc.total = sizeof(GG_State);
+  g->gc.freed = 0;
   g->gc.pause = LUAI_GCPAUSE;
   g->gc.stepmul = LUAI_GCMUL;
   lj_dispatch_init((GG_State *)L);
diff --git a/src/lj_str.c b/src/lj_str.c
index 24e90ca..8ff955e 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -222,6 +222,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
                       str_fastcmp(str, strdata(sx), len) == 0) {
 	/* Resurrect if dead. Can only happen with fixstring() (keywords). */
 	if (isdead(g, o)) flipwhite(o);
+	g->strhash_hit++;
 	return sx;  /* Return existing string. */
       }
       o = gcnext(o);
@@ -234,6 +235,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
                       memcmp(str, strdata(sx), len) == 0) {
 	/* Resurrect if dead. Can only happen with fixstring() (keywords). */
 	if (isdead(g, o)) flipwhite(o);
+	g->strhash_hit++;
 	return sx;  /* Return existing string. */
       }
       o = gcnext(o);
@@ -266,6 +268,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
 	    if (sx->hash == fh && sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
 	      /* Resurrect if dead. Can only happen with fixstring() (keywords). */
 	      if (isdead(g, o)) flipwhite(o);
+	      g->strhash_hit++;
 	      return sx;  /* Return existing string. */
 	    }
 	    o = gcnext(o);
@@ -276,6 +279,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
 	    if (sx->hash == fh && sx->len == len && memcmp(str, strdata(sx), len) == 0) {
 	      /* Resurrect if dead. Can only happen with fixstring() (keywords). */
 	      if (isdead(g, o)) flipwhite(o);
+	      g->strhash_hit++;
 	      return sx;  /* Return existing string. */
 	    }
 	    o = gcnext(o);
@@ -293,6 +297,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
     }
   }
 #endif
+  g->strhash_miss++;
   /* Nope, create a new string. */
   s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
   newwhite(g, s);
diff --git a/src/lj_tab.c b/src/lj_tab.c
index ff216f3..c5f358e 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -141,6 +141,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
   }
   if (hbits)
     newhpart(L, t, hbits);
+  G(L)->gc.tabnum++;
   return t;
 }
 
@@ -240,6 +241,7 @@ void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
     lj_mem_free(g, t, sizetabcolo((uint32_t)t->colo & 0x7f));
   else
     lj_mem_freet(g, t);
+  g->gc.tabnum--;
 }
 
 /* -- Table resizing ------------------------------------------------------ */
diff --git a/src/lj_trace.c b/src/lj_trace.c
index d85b47f..b17bde3 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -538,8 +538,10 @@ static int trace_downrec(jit_State *J)
   /* Restart recording at the return instruction. */
   lua_assert(J->pt != NULL);
   lua_assert(bc_isret(bc_op(*J->pc)));
-  if (bc_op(*J->pc) == BC_RETM)
+  if (bc_op(*J->pc) == BC_RETM) {
+    J->ntraceabort++;
     return 0;  /* NYI: down-recursion with RETM. */
+  }
   J->parent = 0;
   J->exitno = 0;
   J->state = LJ_TRACE_RECORD;
@@ -616,6 +618,7 @@ static int trace_abort(jit_State *J)
     return trace_downrec(J);
   else if (e == LJ_TRERR_MCODEAL)
     lj_trace_flushall(L);
+  J->ntraceabort++;
   return 0;
 }
 
diff --git a/src/lj_udata.c b/src/lj_udata.c
index bd0321b..70c722a 100644
--- a/src/lj_udata.c
+++ b/src/lj_udata.c
@@ -24,11 +24,13 @@ GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env)
   /* Chain to userdata list (after main thread). */
   setgcrefr(ud->nextgc, mainthread(g)->nextgc);
   setgcref(mainthread(g)->nextgc, obj2gco(ud));
+  g->gc.udatanum++;
   return ud;
 }
 
 void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud)
 {
+  g->gc.udatanum--;
   lj_mem_free(g, ud, sizeudata(ud));
 }
 
-- 
2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Tarantool-patches] [PATCH 2/2] metrics: add C and Lua API
  2020-07-21 11:34 [Tarantool-patches] [PATCH 0/2] Implement LuaJIT platform metrics Sergey Kaplun
  2020-07-21 11:34 ` [Tarantool-patches] [PATCH 1/2] metrics: add counters for metrics interested in Sergey Kaplun
@ 2020-07-21 11:34 ` Sergey Kaplun
  2020-07-21 11:37 ` [Tarantool-patches] [RFC] rfc: luajit metrics Sergey Kaplun
  2 siblings, 0 replies; 16+ messages in thread
From: Sergey Kaplun @ 2020-07-21 11:34 UTC (permalink / raw)
  To: Igor Munkin, Sergey Ostanevich; +Cc: tarantool-patches

This patch adds C and Lua API for luajit metrics. Metrics include GC
statistic, JIT information, strhash hit/miss counters and amount of
different objects. C API provides with aditional header <lmisclib.h>
that contains structure `luam_metrics` and `luaM_metrics` function
definition. Lua userspace expanded with builtin library (named `misc`)
with corresponding method `getmetrics`. Metrics are divided by the two
types: global and incremental. Global metrics are collected throughout
the platform uptime. Incremental metrics are reset after each
`luaM_metrics()` call.

Part of tarantool/tarantool#5187
---
 Makefile           |  2 +-
 src/Makefile       |  5 +--
 src/Makefile.dep   |  3 ++
 src/lib_init.c     |  2 ++
 src/lib_misc.c     | 89 ++++++++++++++++++++++++++++++++++++++++++++++
 src/lj_misc_capi.c | 82 ++++++++++++++++++++++++++++++++++++++++++
 src/lmisclib.h     | 71 ++++++++++++++++++++++++++++++++++++
 src/luaconf.h      |  1 +
 8 files changed, 252 insertions(+), 3 deletions(-)
 create mode 100644 src/lib_misc.c
 create mode 100644 src/lj_misc_capi.c
 create mode 100644 src/lmisclib.h

diff --git a/Makefile b/Makefile
index 0f93308..4a56917 100644
--- a/Makefile
+++ b/Makefile
@@ -84,7 +84,7 @@ FILE_A= libluajit.a
 FILE_SO= libluajit.so
 FILE_MAN= luajit.1
 FILE_PC= luajit.pc
-FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
+FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h lmisclib.h
 FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
 	      dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
 	      dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
diff --git a/src/Makefile b/src/Makefile
index 827d4a4..fac69bc 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -480,13 +480,14 @@ LJVM_BOUT= $(LJVM_S)
 LJVM_MODE= elfasm
 
 LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
-	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
+	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \
+	 lib_misc.o
 LJLIB_C= $(LJLIB_O:.o=.c)
 
 LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
-	  lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \
+	  lj_strfmt.o lj_strfmt_num.o lj_api.o lj_misc_capi.o lj_profile.o \
 	  lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 2b1cb5e..1c3d8bd 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -41,6 +41,9 @@ lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
  lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+lib_misc.o: lib_misc.c lua.h lmisclib.h luaconf.h lj_obj.h lj_tab.h lj_str.h \
+ lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
+lj_misc_capi.o: lj_misc_capi.c lua.h lmisclib.h luaconf.h lj_obj.h lj_arch.h
 lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
 lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
diff --git a/src/lib_init.c b/src/lib_init.c
index 2ed370e..664aa7d 100644
--- a/src/lib_init.c
+++ b/src/lib_init.c
@@ -12,6 +12,7 @@
 #include "lua.h"
 #include "lauxlib.h"
 #include "lualib.h"
+#include "lmisclib.h"
 
 #include "lj_arch.h"
 
@@ -26,6 +27,7 @@ static const luaL_Reg lj_lib_load[] = {
   { LUA_DBLIBNAME,	luaopen_debug },
   { LUA_BITLIBNAME,	luaopen_bit },
   { LUA_JITLIBNAME,	luaopen_jit },
+  { LUAM_MISCLIBNAME,	luaopen_misc },
   { NULL,		NULL }
 };
 
diff --git a/src/lib_misc.c b/src/lib_misc.c
new file mode 100644
index 0000000..d23c2db
--- /dev/null
+++ b/src/lib_misc.c
@@ -0,0 +1,89 @@
+/*
+ * Lua interface to tarantool-specific extensions to the public Lua/C API.
+ *
+ * Major portions taken verbatim or adapted from the LuaVela interpreter.
+ * Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#define lib_misc_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lmisclib.h"
+
+#include "lj_obj.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_ff.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+static LJ_AINLINE void setnumfield(struct lua_State *L, GCtab *t,
+				   const char *name, int64_t val)
+{
+	GCstr *key = lj_str_new(L, name, strlen(name));
+	setnumV(lj_tab_setstr(L, t, key), (double)val);
+}
+
+static LJ_AINLINE void settabfield(struct lua_State *L, GCtab *t,
+				   const char *name, GCtab *val)
+{
+	GCstr *key = lj_str_new(L, name, strlen(name));
+	settabV(L, lj_tab_setstr(L, t, key), val);
+}
+
+#define LJLIB_MODULE_misc
+
+LJLIB_CF(misc_getmetrics)
+{
+	lua_createtable(L, 0, 7);
+	GCtab *glob_m = tabV(L->top - 1);
+	lua_createtable(L, 0, 12);
+	GCtab *inc_m = tabV(L->top - 1);
+	lua_createtable(L, 0, 2);
+	GCtab *m = tabV(L->top - 1);
+
+	struct luam_Metrics metrics;
+	luaM_metrics(L, &metrics);
+
+	setnumfield(L, glob_m, "strnum", metrics.strnum);
+	setnumfield(L, glob_m, "tabnum", metrics.tabnum);
+	setnumfield(L, glob_m, "udatanum", metrics.udatanum);
+	setnumfield(L, glob_m, "cdatanum", metrics.cdatanum);
+
+	setnumfield(L, glob_m, "gc_total", metrics.gc_total);
+
+	setnumfield(L, glob_m, "jit_mcode_size", metrics.jit_mcode_size);
+	setnumfield(L, glob_m, "jit_trace_num", metrics.jit_trace_num);
+
+	setnumfield(L, inc_m, "gc_freed", metrics.gc_freed);
+	setnumfield(L, inc_m, "gc_allocated", metrics.gc_allocated);
+
+	setnumfield(L, inc_m, "gc_steps_pause", metrics.gc_steps_pause);
+	setnumfield(L, inc_m, "gc_steps_propagate", metrics.gc_steps_propagate);
+	setnumfield(L, inc_m, "gc_steps_atomic", metrics.gc_steps_atomic);
+	setnumfield(L, inc_m, "gc_steps_sweepstring",
+		    metrics.gc_steps_sweepstring);
+	setnumfield(L, inc_m, "gc_steps_sweep", metrics.gc_steps_sweep);
+	setnumfield(L, inc_m, "gc_steps_finalize", metrics.gc_steps_finalize);
+
+	setnumfield(L, inc_m, "jit_snap_restore", metrics.jit_snap_restore);
+	setnumfield(L, inc_m, "jit_trace_abort", metrics.jit_trace_abort);
+
+	setnumfield(L, inc_m, "strhash_hit", metrics.strhash_hit);
+	setnumfield(L, inc_m, "strhash_miss", metrics.strhash_miss);
+	settabfield(L, m, "global", glob_m);
+	settabfield(L, m, "incremental", inc_m);
+	return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_misc(struct lua_State *L)
+{
+	LJ_LIB_REG(L, LUAM_MISCLIBNAME, misc);
+	return 1;
+}
diff --git a/src/lj_misc_capi.c b/src/lj_misc_capi.c
new file mode 100644
index 0000000..5f99064
--- /dev/null
+++ b/src/lj_misc_capi.c
@@ -0,0 +1,82 @@
+/*
+** Tarantool-specific extensions to the public Lua/C API.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#include "lua.h"
+#include "lmisclib.h"
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_dispatch.h"
+
+#if LJ_HASJIT
+#include "lj_jit.h"
+#endif
+
+LUAM_API struct luam_Metrics *
+luaM_metrics(lua_State *L, struct luam_Metrics *dst)
+{
+	memset(dst, 0, sizeof(*dst));
+	global_State *g = G(L);
+	GCState *gc = &g->gc;
+#if LJ_HASJIT
+	jit_State *J = G2J(g);
+#endif
+
+	dst->strhash_hit = g->strhash_hit;
+	g->strhash_hit = 0;
+	dst->strhash_miss = g->strhash_miss;
+	g->strhash_miss = 0;
+
+	dst->strnum = g->strnum;
+	dst->tabnum = gc->tabnum;
+	dst->udatanum = gc->udatanum;
+#if LJ_HASFFI
+	dst->cdatanum = gc->cdatanum;
+#else
+	dst->cdatanum = 0;
+#endif
+
+	dst->gc_total = gc->total;
+	dst->gc_freed = gc->freed;
+	gc->freed = 0;
+	dst->gc_allocated = gc->allocated;
+	gc->allocated = 0;
+
+	dst->gc_steps_pause = gc->state_count[GCSpause];
+	gc->state_count[GCSpause] = 0;
+
+	dst->gc_steps_propagate = gc->state_count[GCSpropagate];
+	gc->state_count[GCSpropagate] = 0;
+
+	dst->gc_steps_atomic = gc->state_count[GCSatomic];
+	gc->state_count[GCSatomic] = 0;
+
+	dst->gc_steps_sweepstring = gc->state_count[GCSsweepstring];
+	gc->state_count[GCSsweepstring] = 0;
+
+	dst->gc_steps_sweep = gc->state_count[GCSsweep];
+	gc->state_count[GCSsweep] = 0;
+
+	dst->gc_steps_finalize = gc->state_count[GCSfinalize];
+	gc->state_count[GCSfinalize] = 0;
+
+#if LJ_HASJIT
+	dst->jit_snap_restore = J->nsnaprestore;
+	J->nsnaprestore = 0;
+	dst->jit_trace_abort = J->ntraceabort;
+	J->ntraceabort = 0;
+
+	dst->jit_mcode_size = J->szallmcarea;
+	dst->jit_trace_num = J->freetrace;
+#else
+	dst->jit_snap_restore = 0;
+	dst->jit_mcode_size = 0;
+	dst->jit_trace_num = 0;
+#endif
+
+	return dst;
+}
diff --git a/src/lmisclib.h b/src/lmisclib.h
new file mode 100644
index 0000000..8d63038
--- /dev/null
+++ b/src/lmisclib.h
@@ -0,0 +1,71 @@
+/*
+ * Major portions taken verbatim or adapted from the LuaVela.
+ * Copyright (C) 2015-2019 IPONWEB Ltd.
+ */
+
+#ifndef _LMISCLIB_H_INCLUDED
+#define _LMISCLIB_H_INCLUDED
+
+#include "lua.h"
+
+/* API for obtaining various metrics from the platform. */
+
+struct luam_Metrics {
+	/*
+	 * New string has been found in the storage since last
+	 * luaM_metrics() call.
+	 */
+	size_t strhash_hit;
+	/*
+	 * New string has been added to the storage since last
+	 * luaM_metrics() call.
+	 */
+	size_t strhash_miss;
+
+	size_t strnum;   /* Current amount of string objects. */
+	size_t tabnum;   /* Current amount of table objects. */
+	size_t udatanum; /* Current amount of udata objects. */
+	size_t cdatanum; /* Current amount of cdata objects. */
+
+	/* Memory currently allocated. */
+	size_t gc_total;
+	/* Memory freed since last luaM_metrics() call. */
+	size_t gc_freed;
+	/* Memory allocated since last luaM_metrics() call. */
+	size_t gc_allocated;
+
+	/*
+	 * Count of GC invocations with different states
+	 * since previous call of luaM_metrics().
+	 */
+	size_t gc_steps_pause;
+	size_t gc_steps_propagate;
+	size_t gc_steps_atomic;
+	size_t gc_steps_sweepstring;
+	size_t gc_steps_sweep;
+	size_t gc_steps_finalize;
+
+	/*
+	 * Overall number of snap restores for all traces
+	 * "belonging" to the given jit_State since the last call
+	 * to luaM_metrics().
+	 */
+	size_t jit_snap_restore;
+	/*
+	 * Overall number of abort traces "belonging" to the given
+	 * jit_State since the last call to luaM_metrics().
+	 */
+	size_t jit_trace_abort;
+	/* Total size of all allocated machine code areas. */
+	size_t jit_mcode_size;
+	/* Current amount of JIT traces. */
+	unsigned int jit_trace_num;
+};
+
+LUAM_API struct luam_Metrics *luaM_metrics(lua_State *L,
+					   struct luam_Metrics *dst);
+
+#define LUAM_MISCLIBNAME "misc"
+LUALIB_API int luaopen_misc(lua_State *L);
+
+#endif /* _LMISCLIB_H_INCLUDED */
diff --git a/src/luaconf.h b/src/luaconf.h
index 60cb928..cf01e36 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -144,6 +144,7 @@
 #endif
 
 #define LUALIB_API	LUA_API
+#define LUAM_API	LUA_API
 
 /* Support for internal assertions. */
 #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
-- 
2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Tarantool-patches] [RFC] rfc: luajit metrics
  2020-07-21 11:34 [Tarantool-patches] [PATCH 0/2] Implement LuaJIT platform metrics Sergey Kaplun
  2020-07-21 11:34 ` [Tarantool-patches] [PATCH 1/2] metrics: add counters for metrics interested in Sergey Kaplun
  2020-07-21 11:34 ` [Tarantool-patches] [PATCH 2/2] metrics: add C and Lua API Sergey Kaplun
@ 2020-07-21 11:37 ` Sergey Kaplun
  2020-07-23 10:15   ` Sergey Ostanevich
  2 siblings, 1 reply; 16+ messages in thread
From: Sergey Kaplun @ 2020-07-21 11:37 UTC (permalink / raw)
  To: Igor Munkin, Sergey Ostanevich; +Cc: tarantool-patches

Part of #5187
---

This patch adds RFC to LuaJIT metrics interfaces. Nevertheless name
`misc` for builtin library is not good and should be discussed, because
tons of user modules can use that name for their own libraies.

Branch: https://github.com/tarantool/tarantool/tree/skaplun/5187-luajit-metrics
Issue: https://github.com/tarantool/tarantool/issues/5187

 doc/rfc/5187-luajit-metrics.md | 134 +++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 doc/rfc/5187-luajit-metrics.md

diff --git a/doc/rfc/5187-luajit-metrics.md b/doc/rfc/5187-luajit-metrics.md
new file mode 100644
index 000000000..33a8bf8e0
--- /dev/null
+++ b/doc/rfc/5187-luajit-metrics.md
@@ -0,0 +1,134 @@
+# LuaJIT metrics
+
+* **Status**: In progress
+* **Start date**: 17-07-2020
+* **Authors**: Sergey Kaplun @Buristan skaplun@tarantool.org,
+               Igor Munkin @igormunkin imun@tarantool.org,
+               Sergey Ostanevich @sergos sergos@tarantool.org
+* **Issues**: [#5187](https://github.com/tarantool/tarantool/issues/5187)
+
+## Summary
+
+LuaJIT metrics provide extra information about the Lua state. They consists of
+GC metrics (overall amount of objects and memory usage), JIT stats (both
+related to the compiled traces and the engine itself), string hash hits/misses.
+
+## Background and motivation
+
+One can be curious about their application performance. We are going to provide
+various metrics about the several platform subsystems behaviour. GC pressure
+produced by user code can weight down all application performance. Irrelevant
+traces compiled by the JIT engine can just burn CPU time with no benefits as a
+result. String hash collisions can lead to DoS caused by a single request. All
+these metrics should be well monitored by users wanting to improve the
+performance of their application.
+
+## Detailed design
+
+For C API we introduce additional extension header <lmisclib.h> that provides
+interfaces for new LuaJIT C API extensions. The first interface in this header
+will be the following:
+
+```
+/* API for obtaining various metrics from the platform. */
+
+LUAM_API struct luam_Metrics *luaM_metrics(lua_State *L,
+					   struct luam_Metrics *dst);
+```
+
+This function fills the structure pointed by `dst` with the corresponding
+metrics related to Lua state anchored to the given coroutine `L`. The result of
+the function is a pointer to the filled structure (the same `dst` points to).
+
+`struct luam_Metrics` has the following definition:
+
+```
+struct luam_Metrics {
+	/*
+	 * New string has been found in the storage since last
+	 * luaM_metrics() call.
+	 */
+	size_t strhash_hit;
+	/*
+	 * New string has been added to the storage since last
+	 * luaM_metrics() call.
+	 */
+	size_t strhash_miss;
+
+	size_t strnum;   /* Current amount of string objects. */
+	size_t tabnum;   /* Current amount of table objects. */
+	size_t udatanum; /* Current amount of udata objects. */
+	size_t cdatanum; /* Current amount of cdata objects. */
+
+	/* Memory currently allocated. */
+	size_t gc_total;
+	/* Memory freed since last luaM_metrics() call. */
+	size_t gc_freed;
+	/* Memory allocated since last luaM_metrics() call. */
+	size_t gc_allocated;
+
+	/*
+	 * Count of GC invocations with different states
+	 * since previous call of luaM_metrics().
+	 */
+	size_t gc_steps_pause;
+	size_t gc_steps_propagate;
+	size_t gc_steps_atomic;
+	size_t gc_steps_sweepstring;
+	size_t gc_steps_sweep;
+	size_t gc_steps_finalize;
+
+	/*
+	 * Overall number of snap restores for all traces
+	 * "belonging" to the given jit_State since the last call
+	 * to luaM_metrics().
+	 */
+	size_t jit_snap_restore;
+	/*
+	 * Overall number of abort traces "belonging" to the given
+	 * jit_State since the last call to luaM_metrics().
+	 */
+	size_t jit_trace_abort;
+	/* Total size of all allocated machine code areas. */
+	size_t jit_mcode_size;
+	/* Current amount of JIT traces. */
+	unsigned int jit_trace_num;
+};
+```
+
+One can see metrics are divided by the two types: global and incremental.
+Global metrics are collected throughout the platform uptime. Incremental
+metrics are reset after each `luaM_metrics()` call.
+
+There is also a complement introduced for Lua space -- `misc.getmetrics()`.
+This function is just a wrapper for `luaM_metrics()` returning a Lua table with
+the similar metrics. Its usage is quite simple:
+```
+$ ./src/tarantool
+Tarantool 2.5.0-267-gbf047ad44
+type 'help' for interactive help
+tarantool> misc.getmetrics()
+---
+- global:
+    tabnum: 1812
+    gc_total: 1369927
+    strnum: 5767
+    jit_trace_num: 0
+    cdatanum: 89
+    jit_mcode_size: 0
+    udatanum: 17
+  incremental:
+    jit_snap_restore: 0
+    gc_freed: 2239391
+    strhash_hit: 53759
+    gc_steps_finalize: 0
+    gc_allocated: 3609318
+    gc_steps_atomic: 6
+    gc_steps_sweep: 296
+    gc_steps_sweepstring: 17920
+    jit_trace_abort: 0
+    strhash_miss: 6874
+    gc_steps_propagate: 10106
+    gc_steps_pause: 7
+...
+```
-- 
2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Tarantool-patches] [RFC] rfc: luajit metrics
  2020-07-21 11:37 ` [Tarantool-patches] [RFC] rfc: luajit metrics Sergey Kaplun
@ 2020-07-23 10:15   ` Sergey Ostanevich
  2020-07-24 11:18     ` Sergey Kaplun
  0 siblings, 1 reply; 16+ messages in thread
From: Sergey Ostanevich @ 2020-07-23 10:15 UTC (permalink / raw)
  To: Sergey Kaplun; +Cc: tarantool-patches

Hi!

Thanks for the patch! See my comments below.
Otherwise LGTM.

Sergos

On 21 Jul 14:37, Sergey Kaplun wrote:
> Part of #5187
> ---
> 
> This patch adds RFC to LuaJIT metrics interfaces. Nevertheless name
> `misc` for builtin library is not good and should be discussed, because
> tons of user modules can use that name for their own libraies.
> 
> Branch: https://github.com/tarantool/tarantool/tree/skaplun/5187-luajit-metrics
> Issue: https://github.com/tarantool/tarantool/issues/5187
> 
>  doc/rfc/5187-luajit-metrics.md | 134 +++++++++++++++++++++++++++++++++
>  1 file changed, 134 insertions(+)
>  create mode 100644 doc/rfc/5187-luajit-metrics.md
> 
> diff --git a/doc/rfc/5187-luajit-metrics.md b/doc/rfc/5187-luajit-metrics.md
> new file mode 100644
> index 000000000..33a8bf8e0
> --- /dev/null
> +++ b/doc/rfc/5187-luajit-metrics.md
> @@ -0,0 +1,134 @@
> +# LuaJIT metrics
> +
> +* **Status**: In progress
> +* **Start date**: 17-07-2020
> +* **Authors**: Sergey Kaplun @Buristan skaplun@tarantool.org,
> +               Igor Munkin @igormunkin imun@tarantool.org,
> +               Sergey Ostanevich @sergos sergos@tarantool.org
> +* **Issues**: [#5187](https://github.com/tarantool/tarantool/issues/5187)
> +
> +## Summary
> +
> +LuaJIT metrics provide extra information about the Lua state. They consists of
> +GC metrics (overall amount of objects and memory usage), JIT stats (both
> +related to the compiled traces and the engine itself), string hash hits/misses.
> +
> +## Background and motivation
> +
> +One can be curious about their application performance. We are going to provide
> +various metrics about the several platform subsystems behaviour. GC pressure
> +produced by user code can weight down all application performance. Irrelevant
> +traces compiled by the JIT engine can just burn CPU time with no benefits as a
> +result. String hash collisions can lead to DoS caused by a single request. All
> +these metrics should be well monitored by users wanting to improve the
> +performance of their application.
> +
> +## Detailed design
> +
> +For C API we introduce additional extension header <lmisclib.h> that provides
> +interfaces for new LuaJIT C API extensions. The first interface in this header
> +will be the following:
> +
> +```
> +/* API for obtaining various metrics from the platform. */
> +
> +LUAM_API struct luam_Metrics *luaM_metrics(lua_State *L,
> +					   struct luam_Metrics *dst);
> +```
> +
> +This function fills the structure pointed by `dst` with the corresponding
> +metrics related to Lua state anchored to the given coroutine `L`. The result of
> +the function is a pointer to the filled structure (the same `dst` points to).
> +
> +`struct luam_Metrics` has the following definition:

   ^ The

> +
> +```
> +struct luam_Metrics {
> +	/*
> +	 * New string has been found in the storage since last

The 'new string found' is misleading IMO. 
Number of strings found in string hash instead of allocation of new
ones since ...

> +	 * luaM_metrics() call.
> +	 */
> +	size_t strhash_hit;
> +	/*
> +	 * New string has been added to the storage since last

Number of new strings allocated and put into string hash since ...

> +	 * luaM_metrics() call.
> +	 */
> +	size_t strhash_miss;
> +
> +	size_t strnum;   /* Current amount of string objects. */
> +	size_t tabnum;   /* Current amount of table objects. */
> +	size_t udatanum; /* Current amount of udata objects. */
> +	size_t cdatanum; /* Current amount of cdata objects. */
> +
> +	/* Memory currently allocated. */
> +	size_t gc_total;
> +	/* Memory freed since last luaM_metrics() call. */
> +	size_t gc_freed;
> +	/* Memory allocated since last luaM_metrics() call. */
> +	size_t gc_allocated;
> +
> +	/*
> +	 * Count of GC invocations with different states
> +	 * since previous call of luaM_metrics().
> +	 */
> +	size_t gc_steps_pause;
> +	size_t gc_steps_propagate;
> +	size_t gc_steps_atomic;
> +	size_t gc_steps_sweepstring;
> +	size_t gc_steps_sweep;
> +	size_t gc_steps_finalize;
> +
> +	/*
> +	 * Overall number of snap restores for all traces

Snap restores needs some explanation.

> +	 * "belonging" to the given jit_State since the last call
> +	 * to luaM_metrics().
> +	 */
> +	size_t jit_snap_restore;
> +	/*
> +	 * Overall number of abort traces "belonging" to the given
> +	 * jit_State since the last call to luaM_metrics().
> +	 */
> +	size_t jit_trace_abort;
> +	/* Total size of all allocated machine code areas. */
> +	size_t jit_mcode_size;
> +	/* Current amount of JIT traces. */
> +	unsigned int jit_trace_num;
> +};
> +```
> +
> +One can see metrics are divided by the two types: global and incremental.
> +Global metrics are collected throughout the platform uptime. Incremental
> +metrics are reset after each `luaM_metrics()` call.
> +
> +There is also a complement introduced for Lua space -- `misc.getmetrics()`.
> +This function is just a wrapper for `luaM_metrics()` returning a Lua table with
> +the similar metrics. Its usage is quite simple:
> +```
> +$ ./src/tarantool
> +Tarantool 2.5.0-267-gbf047ad44
> +type 'help' for interactive help
> +tarantool> misc.getmetrics()
> +---
> +- global:
> +    tabnum: 1812
> +    gc_total: 1369927
> +    strnum: 5767
> +    jit_trace_num: 0
> +    cdatanum: 89
> +    jit_mcode_size: 0
> +    udatanum: 17
> +  incremental:

incremental_from_last_call will be more descriptive?

> +    jit_snap_restore: 0
> +    gc_freed: 2239391
> +    strhash_hit: 53759
> +    gc_steps_finalize: 0
> +    gc_allocated: 3609318
> +    gc_steps_atomic: 6
> +    gc_steps_sweep: 296
> +    gc_steps_sweepstring: 17920
> +    jit_trace_abort: 0
> +    strhash_miss: 6874

I wonder if those keys can be sorted? Consider, I want to find
something by the name. Sorting helps greatly. 

> +    gc_steps_propagate: 10106
> +    gc_steps_pause: 7
> +...
> +```
> -- 
> 2.24.1
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Tarantool-patches] [PATCH 1/2] metrics: add counters for metrics interested in
  2020-07-21 11:34 ` [Tarantool-patches] [PATCH 1/2] metrics: add counters for metrics interested in Sergey Kaplun
@ 2020-07-23 20:12   ` Igor Munkin
  2020-07-24 12:00     ` Sergey Kaplun
  2020-07-27  2:25     ` Sergey Kaplun
  0 siblings, 2 replies; 16+ messages in thread
From: Igor Munkin @ 2020-07-23 20:12 UTC (permalink / raw)
  To: Sergey Kaplun; +Cc: tarantool-patches

Sergey,

Thanks for the patch! Please consider my comments below.

On 21.07.20, Sergey Kaplun wrote:

We still don't have strict prefixes, so I propose to use 'core' for this
patch. Does it look fine to you? Feel free to propose your own one.

> This patch adds counters for:
>   - amount of table, cdata and udata objects,
>   - GC invokations by GC states,
>   - strhash misses or hits,
>   - amount of freed or allocated memory,
>   - amount of aborted traces and restored snapshots.
> 
> C and Lua API will be added in the next patch.

OK, let's make the commit message a bit clearer:
| core: introduce various platform metrics
|
| This patch introduces the following counters:
| - overall amount of allocated tables, cdata and udata objects
| - number of incremental GC steps grouped by GC state
| - number of string hashes hits and misses
| - amount of allocated and freed memory
| - number of trace aborts and restored snapshots
|
| Interfaces to obtain these metrics via both Lua and C API are
| introduced in the next patch.

Feel free to change it on your own.

> 
> Part of tarantool/tarantool#5187
> ---
>  src/lj_cdata.c |  2 ++
>  src/lj_cdata.h |  2 ++
>  src/lj_gc.c    |  4 ++++
>  src/lj_gc.h    |  6 +-----
>  src/lj_jit.h   |  7 +++++++
>  src/lj_obj.h   | 25 +++++++++++++++++++++++++
>  src/lj_snap.c  |  1 +
>  src/lj_state.c | 13 ++++++++++++-
>  src/lj_str.c   |  5 +++++
>  src/lj_tab.c   |  2 ++
>  src/lj_trace.c |  5 ++++-
>  src/lj_udata.c |  2 ++
>  12 files changed, 67 insertions(+), 7 deletions(-)
> 

<snipped>

> diff --git a/src/lj_jit.h b/src/lj_jit.h
> index 7eb3d2a..960a02b 100644
> --- a/src/lj_jit.h
> +++ b/src/lj_jit.h
> @@ -475,6 +475,13 @@ typedef struct jit_State {
>    size_t szmcarea;	/* Size of current mcode area. */
>    size_t szallmcarea;	/* Total size of all allocated mcode areas. */
>  
> +  size_t nsnaprestore;	/* Overall number of snap restores for all traces
> +                        ** "belonging" to the given jit_State
> +                        ** since the last call to luaM_metrics(). */
> +  size_t ntraceabort;	/* Overall number of abort traces
> +                        ** "belonging" to the given jit_State
> +                        ** since the last call to luaM_metrics(). */

Code indentation is really strange here, like you mixed LuaJIT and uJIT
styles. Besides, I guess the remark regarding the field reset is wrong:
* These fields are not reset in this patch.
* <luaM_metrics> is introduced only in the following patch but you've
  already mentioned it here.

> +
>    TValue errinfo;	/* Additional info element for trace errors. */
>  
>  #if LJ_HASPROFILE
> diff --git a/src/lj_obj.h b/src/lj_obj.h
> index f368578..1fee04f 100644
> --- a/src/lj_obj.h
> +++ b/src/lj_obj.h
> @@ -571,13 +571,28 @@ typedef enum {
>  #define basemt_obj(g, o)	((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
>  #define mmname_str(g, mm)	(strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
>  
> +/* Garbage collector states. Order matters. */
> +enum {
> +  GCSpause,
> +  GCSpropagate,
> +  GCSatomic,
> +  GCSsweepstring,
> +  GCSsweep,
> +  GCSfinalize,
> +  GCSlast

Minor: GCSMAX looks better, doesn't it? Feel free to ignore.

> +};
> +
>  typedef struct GCState {
>    GCSize total;		/* Memory currently allocated. */
> +  size_t freed;		/* Memory freed since last luaM_metrics() call. */
> +  size_t allocated;	/* Memory allocated since last luaM_metrics() call. */

Well, why <total> field type differs from <freed> and <allocated> fields
type? And again, remark about <luaM_metrics> can be dropped (see above).

>    GCSize threshold;	/* Memory threshold. */
>    uint8_t currentwhite;	/* Current white color. */
>    uint8_t state;	/* GC state. */
>    uint8_t nocdatafin;	/* No cdata finalizer called. */
>    uint8_t unused2;
> +  size_t state_count[GCSlast]; /* Count of GC invocations with different states
> +			       ** since previous call of luaM_metrics().  */

Again, a mess with whitespace. Furthermore, I propose to reword this
comment the following way:
| /* Count of incremental GC steps per state */

>    MSize sweepstr;	/* Sweep position in string table. */
>    GCRef root;		/* List of all collectable objects. */
>    MRef sweep;		/* Sweep position in root list. */
> @@ -589,6 +604,12 @@ typedef struct GCState {
>    GCSize estimate;	/* Estimate of memory actually in use. */
>    MSize stepmul;	/* Incremental GC step granularity. */
>    MSize pause;		/* Pause between successive GC cycles. */
> +
> +  size_t tabnum;	/* Current amount of table objects. */
> +  size_t udatanum;	/* Current amount of udata objects. */

Minor: These comments are a bit misleading. "Current" sounds like the
metric is reset somewhen. But you're talking about "alive" objects of
the particular type. Yes, formally (in GC terms) some of them are not
alive, but are still allocated (so called "dead") objects. So the term
"alive" is also a misleading one. I guess you can freely reword the
comments the following way:
| /* Number of allocated <GCtype> objects */

> +#ifdef LJ_HASFFI
> +  size_t cdatanum;	/* Current amount of cdata objects. */

This counter is not incremented if cdata is allocated on trace. Consider
the following test:
| $ LUA_PATH="$HOME/.luarocks/share/lua/5.1/?.lua;;" ./luajit -joff <<EOF
| heredoc> local t = { }
| heredoc> for i = 1, 1000 do
| heredoc>   table.insert(t, require('ffi').new('uint64_t', i))
| heredoc> end
| heredoc> t = nil
| heredoc> collectgarbage('collect')
| heredoc> print(require('inspect')(misc.getmetrics()))
| heredoc> EOF
| {
|   global = {
|     cdatanum = 0,
|     gc_total = 74722,
|     jit_mcode_size = 0,
|     jit_trace_num = 0,
|     strnum = 534,
|     tabnum = 77,
|     udatanum = 5
|   },
|   incremental = {
|     gc_allocated = 173092,
|     gc_freed = 98370,
|     gc_steps_atomic = 1,
|     gc_steps_finalize = 0,
|     gc_steps_pause = 3,
|     gc_steps_propagate = 553,
|     gc_steps_sweep = 64,
|     gc_steps_sweepstring = 1024,
|     jit_snap_restore = 0,
|     jit_trace_abort = 0,
|     strhash_hit = 4021,
|     strhash_miss = 538
|   }
| }
| $ LUA_PATH="$HOME/.luarocks/share/lua/5.1/?.lua;;" ./luajit -jon <<EOF
| heredoc> local t = { }
| heredoc> for i = 1, 1000 do
| heredoc>   table.insert(t, require('ffi').new('uint64_t', i))
| heredoc> end
| heredoc> t = nil
| heredoc> collectgarbage('collect')
| heredoc> print(require('inspect')(misc.getmetrics()))
| heredoc> EOF
| {
|   global = {
|     cdatanum = -941,
|     gc_total = 76561,
|     jit_mcode_size = 65536,
|     jit_trace_num = 3,
|     strnum = 534,
|     tabnum = 77,
|     udatanum = 5
|   },
|   incremental = {
|     gc_allocated = 145299,
|     gc_freed = 68738,
|     gc_steps_atomic = 3,
|     gc_steps_finalize = 0,
|     gc_steps_pause = 2,
|     gc_steps_propagate = 505,
|     gc_steps_sweep = 64,
|     gc_steps_sweepstring = 1024,
|     jit_snap_restore = 6,
|     jit_trace_abort = 0,
|     strhash_hit = 3085,
|     strhash_miss = 538
|   }
| }

As you can see global.cdatanum is negative when JIT is on. You can take
a look on the compiled trace by yourself. The root cause is CNEW IR
semantics: for VLA/VLS cdata types <lj_cdata_newv> call is emitted and
the counters are fine, but for other cdata types JIT compiler emits just
a <lj_mem_newgco> call that so this object is not counted. Thereby JIT
has to emit the corresponding instructions while assembling CNEW IR.
I will elaborate on this later.

> +#endif
>  } GCState;
>  
>  /* Global state, shared by all threads of a Lua universe. */
> @@ -602,6 +623,10 @@ typedef struct global_State {
>      BloomFilter next[2];
>    } strbloom;
>  #endif
> +  size_t strhash_hit;	/* New string has been found in the storage
> +			** since last luaM_metrics() call. */
> +  size_t strhash_miss;	/* New string has been added to the storage
> +			** since last luaM_metrics() call. */

Well, such indentation is closer to the desired one, but I would use two
spaces prior to these tabs. What do you think? Also please, don't forget
about <luaM_metrics> remark.

>    lua_Alloc allocf;	/* Memory allocator. */
>    void *allocd;		/* Memory allocator data. */
>    GCState gc;		/* Garbage collector. */

<snipped>

> diff --git a/src/lj_state.c b/src/lj_state.c
> index 632dd07..b71ebae 100644
> --- a/src/lj_state.c
> +++ b/src/lj_state.c
> @@ -204,6 +204,16 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
>    setgcref(g->uvhead.prev, obj2gco(&g->uvhead));
>    setgcref(g->uvhead.next, obj2gco(&g->uvhead));
>    g->strmask = ~(MSize)0;
> +
> +  memset(g->gc.state_count, 0, GCSlast * sizeof(g->gc.state_count[0]));
> +  g->strhash_hit = 0;
> +  g->strhash_miss = 0;
> +  g->gc.tabnum = 0;
> +  g->gc.udatanum = 0;
> +#if LJ_HASFFI
> +  g->gc.cdatanum = 0;
> +#endif
> +

It looks like all assignments above are excess: <GG_State> is filled
with zeros, <g> points to the corresponding <GG_State> chunk, <gc> is
allocated within <g>, ergo also initialized with zeros. Feel free to
drop this hunk.

>    setnilV(registry(L));
>    setnilV(&g->nilnode.val);
>    setnilV(&g->nilnode.key);
> @@ -214,7 +224,8 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
>    g->gc.state = GCSpause;
>    setgcref(g->gc.root, obj2gco(L));
>    setmref(g->gc.sweep, &g->gc.root);
> -  g->gc.total = sizeof(GG_State);
> +  g->gc.allocated = g->gc.total = sizeof(GG_State);
> +  g->gc.freed = 0;

Ditto for g->gc.freed.

>    g->gc.pause = LUAI_GCPAUSE;
>    g->gc.stepmul = LUAI_GCMUL;
>    lj_dispatch_init((GG_State *)L);

<snipped>

> -- 
> 2.24.1
> 

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Tarantool-patches] [RFC] rfc: luajit metrics
  2020-07-23 10:15   ` Sergey Ostanevich
@ 2020-07-24 11:18     ` Sergey Kaplun
  2020-07-24 11:22       ` Sergey Kaplun
  2020-07-24 17:20       ` Sergey Ostanevich
  0 siblings, 2 replies; 16+ messages in thread
From: Sergey Kaplun @ 2020-07-24 11:18 UTC (permalink / raw)
  To: Sergey Ostanevich; +Cc: tarantool-patches

Hi! Thanks for the review!

Mons makes no bones of mailing list so I cite his comments from
[1] here.

On 23.07.20, Sergey Ostanevich wrote:
> Hi!
> 
> Thanks for the patch! See my comments below.
> Otherwise LGTM.
> 
> Sergos
> 
> On 21 Jul 14:37, Sergey Kaplun wrote:
> > Part of #5187
> > ---
> > 
> > This patch adds RFC to LuaJIT metrics interfaces. Nevertheless name
> > `misc` for builtin library is not good and should be discussed, because
> > tons of user modules can use that name for their own libraies.
> > 
> > Branch: https://github.com/tarantool/tarantool/tree/skaplun/5187-luajit-metrics
> > Issue: https://github.com/tarantool/tarantool/issues/5187
> > 
> >  doc/rfc/5187-luajit-metrics.md | 134 +++++++++++++++++++++++++++++++++
> >  1 file changed, 134 insertions(+)
> >  create mode 100644 doc/rfc/5187-luajit-metrics.md
> > 
> > diff --git a/doc/rfc/5187-luajit-metrics.md b/doc/rfc/5187-luajit-metrics.md
> > new file mode 100644
> > index 000000000..33a8bf8e0
> > --- /dev/null
> > +++ b/doc/rfc/5187-luajit-metrics.md
> > @@ -0,0 +1,134 @@
> > +# LuaJIT metrics
> > +
> > +* **Status**: In progress
> > +* **Start date**: 17-07-2020
> > +* **Authors**: Sergey Kaplun @Buristan skaplun@tarantool.org,
> > +               Igor Munkin @igormunkin imun@tarantool.org,
> > +               Sergey Ostanevich @sergos sergos@tarantool.org
> > +* **Issues**: [#5187](https://github.com/tarantool/tarantool/issues/5187)
> > +
> > +## Summary
> > +
> > +LuaJIT metrics provide extra information about the Lua state. They consists of
> > +GC metrics (overall amount of objects and memory usage), JIT stats (both
> > +related to the compiled traces and the engine itself), string hash hits/misses.
> > +
> > +## Background and motivation
> > +
> > +One can be curious about their application performance. We are going to provide
> > +various metrics about the several platform subsystems behaviour. GC pressure
> > +produced by user code can weight down all application performance. Irrelevant
> > +traces compiled by the JIT engine can just burn CPU time with no benefits as a
> > +result. String hash collisions can lead to DoS caused by a single request. All
> > +these metrics should be well monitored by users wanting to improve the
> > +performance of their application.
> > +
> > +## Detailed design
> > +
> > +For C API we introduce additional extension header <lmisclib.h> that provides
> > +interfaces for new LuaJIT C API extensions. The first interface in this header
> > +will be the following:
> > +
> > +```
> > +/* API for obtaining various metrics from the platform. */
> > +
> > +LUAM_API struct luam_Metrics *luaM_metrics(lua_State *L,
> > +					   struct luam_Metrics *dst);
> > +```
> > +
> > +This function fills the structure pointed by `dst` with the corresponding
> > +metrics related to Lua state anchored to the given coroutine `L`. The result of
> > +the function is a pointer to the filled structure (the same `dst` points to).
> > +
> > +`struct luam_Metrics` has the following definition:
> 
>    ^ The

Thanks!

> > +
> > +```
> > +struct luam_Metrics {
> > +	/*
> > +	 * New string has been found in the storage since last
> 
> The 'new string found' is misleading IMO. 
> Number of strings found in string hash instead of allocation of new
> ones since ...

Ok, thanks!

> > +	 * luaM_metrics() call.
> > +	 */
> > +	size_t strhash_hit;
> > +	/*
> > +	 * New string has been added to the storage since last
> 
> Number of new strings allocated and put into string hash since ...

Sure!

> > +	 * luaM_metrics() call.
> > +	 */
> > +	size_t strhash_miss;
> > +
> > +	size_t strnum;   /* Current amount of string objects. */
> > +	size_t tabnum;   /* Current amount of table objects. */
> > +	size_t udatanum; /* Current amount of udata objects. */
> > +	size_t cdatanum; /* Current amount of cdata objects. */
> > +
> > +	/* Memory currently allocated. */
> > +	size_t gc_total;
> > +	/* Memory freed since last luaM_metrics() call. */
> > +	size_t gc_freed;
> > +	/* Memory allocated since last luaM_metrics() call. */
> > +	size_t gc_allocated;
> > +
> > +	/*
> > +	 * Count of GC invocations with different states
> > +	 * since previous call of luaM_metrics().
> > +	 */
> > +	size_t gc_steps_pause;
> > +	size_t gc_steps_propagate;
> > +	size_t gc_steps_atomic;
> > +	size_t gc_steps_sweepstring;
> > +	size_t gc_steps_sweep;
> > +	size_t gc_steps_finalize;
> > +
> > +	/*
> > +	 * Overall number of snap restores for all traces
> 
> Snap restores needs some explanation.

Ok, sounds reasonable. AFAIK number of snap restores equals to amount of
trace exits. May be this will be more informative for users.

> > +	 * "belonging" to the given jit_State since the last call
> > +	 * to luaM_metrics().
> > +	 */
> > +	size_t jit_snap_restore;
> > +	/*
> > +	 * Overall number of abort traces "belonging" to the given
> > +	 * jit_State since the last call to luaM_metrics().
> > +	 */
> > +	size_t jit_trace_abort;
> > +	/* Total size of all allocated machine code areas. */
> > +	size_t jit_mcode_size;
> > +	/* Current amount of JIT traces. */
> > +	unsigned int jit_trace_num;
> > +};
> > +```
> > +
> > +One can see metrics are divided by the two types: global and incremental.
> > +Global metrics are collected throughout the platform uptime. Incremental
> > +metrics are reset after each `luaM_metrics()` call.

As Mons said:
| Incremental metrics must not reset after each call (see practice from
| Linux kernel, for ex.). Rationale is simple: 2 different modules, both
| using this function, will ruin metrics to each other

It's very important comment. With two modules that use metrics our
counters become just garbage. I suppose that we should use monotonic
increase counters for all "incremental" metrics. Users can count delta
between calls of `getmetrics` and analise a result.

We can use uint64_t for storage of these metrics. If we astimate amount
of allocated objects as 40Gb per second it will be necessary 14 years of
instance uptime to overflow this counter. Also all logic corresponding
to overflow of the counter can be realized inside user code.

> > +
> > +There is also a complement introduced for Lua space -- `misc.getmetrics()`.
> > +This function is just a wrapper for `luaM_metrics()` returning a Lua table with
> > +the similar metrics. Its usage is quite simple:
> > +```
> > +$ ./src/tarantool
> > +Tarantool 2.5.0-267-gbf047ad44
> > +type 'help' for interactive help
> > +tarantool> misc.getmetrics()
> > +---
> > +- global:
> > +    tabnum: 1812
> > +    gc_total: 1369927
> > +    strnum: 5767
> > +    jit_trace_num: 0
> > +    cdatanum: 89
> > +    jit_mcode_size: 0
> > +    udatanum: 17
> > +  incremental:
> 
> incremental_from_last_call will be more descriptive?

Mons:
| I'd consider rename global to absolute. All metrics are global (not
| scoped). But some are incremental counters and others are absolute
| current values.

And also as far as we mustn't use updatable by call metrics, should we join all
metrics inside one table, shouldn't we?

> > +    jit_snap_restore: 0
> > +    gc_freed: 2239391
> > +    strhash_hit: 53759
> > +    gc_steps_finalize: 0
> > +    gc_allocated: 3609318
> > +    gc_steps_atomic: 6
> > +    gc_steps_sweep: 296
> > +    gc_steps_sweepstring: 17920
> > +    jit_trace_abort: 0
> > +    strhash_miss: 6874
> 
> I wonder if those keys can be sorted? Consider, I want to find
> something by the name. Sorting helps greatly. 

It can be implement by user as simple as:

| local metrics = xjit.getmetrics()
| local sorted = {}
| for k, v in pairs(metrics.incremental) do table.insert(sorted, k) end
| table.sort(sorted)
| for _, k in ipairs(sorted) do print(k, metrics.incremental[k]) end
|
| --[[
| gc_allocated    8920
| gc_freed        3528
| gc_steps_atomic 0
| gc_steps_finalize       0
| gc_steps_pause  0
| gc_steps_propagate      62
| gc_steps_sweep  0
| gc_steps_sweepstring    0
| jit_snap_restore        0
| jit_trace_abort 0
| strhash_hit     118
| strhash_miss    7
| ]]


> > +    gc_steps_propagate: 10106
> > +    gc_steps_pause: 7
> > +...
> > +```
> > -- 
> > 2.24.1
> > 

Thoughts?

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Tarantool-patches] [RFC] rfc: luajit metrics
  2020-07-24 11:18     ` Sergey Kaplun
@ 2020-07-24 11:22       ` Sergey Kaplun
  2020-07-24 17:20       ` Sergey Ostanevich
  1 sibling, 0 replies; 16+ messages in thread
From: Sergey Kaplun @ 2020-07-24 11:22 UTC (permalink / raw)
  To: Sergey Ostanevich; +Cc: tarantool-patches

On 24.07.20, Sergey Kaplun wrote:
> Hi! Thanks for the review!
> 
> Mons makes no bones of mailing list so I cite his comments from
> [1] here.
> 
Sorry! I forgot about the link.

[1]: https://github.com/tarantool/tarantool/commit/a3b39ebadc109fe45ab115d0d7a82974861858e3

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Tarantool-patches] [PATCH 1/2] metrics: add counters for metrics interested in
  2020-07-23 20:12   ` Igor Munkin
@ 2020-07-24 12:00     ` Sergey Kaplun
  2020-07-24 20:05       ` Igor Munkin
  2020-07-27  2:25     ` Sergey Kaplun
  1 sibling, 1 reply; 16+ messages in thread
From: Sergey Kaplun @ 2020-07-24 12:00 UTC (permalink / raw)
  To: Igor Munkin; +Cc: tarantool-patches

Hi Igor! Thanks for the review!

On 23.07.20, Igor Munkin wrote:
> Sergey,
> 
> Thanks for the patch! Please consider my comments below.
> 
> On 21.07.20, Sergey Kaplun wrote:
> 
> We still don't have strict prefixes, so I propose to use 'core' for this
> patch. Does it look fine to you? Feel free to propose your own one.

Maybe we should use 'misc' for this kind of patches?

> > This patch adds counters for:
> >   - amount of table, cdata and udata objects,
> >   - GC invokations by GC states,
> >   - strhash misses or hits,
> >   - amount of freed or allocated memory,
> >   - amount of aborted traces and restored snapshots.
> > 
> > C and Lua API will be added in the next patch.
> 
> OK, let's make the commit message a bit clearer:
> | core: introduce various platform metrics
> |
> | This patch introduces the following counters:
> | - overall amount of allocated tables, cdata and udata objects
> | - number of incremental GC steps grouped by GC state
> | - number of string hashes hits and misses
> | - amount of allocated and freed memory
> | - number of trace aborts and restored snapshots
> |
> | Interfaces to obtain these metrics via both Lua and C API are
> | introduced in the next patch.
> 
> Feel free to change it on your own.

Thanks! It looks fine for me.

> > 
> > Part of tarantool/tarantool#5187
> > ---
> >  src/lj_cdata.c |  2 ++
> >  src/lj_cdata.h |  2 ++
> >  src/lj_gc.c    |  4 ++++
> >  src/lj_gc.h    |  6 +-----
> >  src/lj_jit.h   |  7 +++++++
> >  src/lj_obj.h   | 25 +++++++++++++++++++++++++
> >  src/lj_snap.c  |  1 +
> >  src/lj_state.c | 13 ++++++++++++-
> >  src/lj_str.c   |  5 +++++
> >  src/lj_tab.c   |  2 ++
> >  src/lj_trace.c |  5 ++++-
> >  src/lj_udata.c |  2 ++
> >  12 files changed, 67 insertions(+), 7 deletions(-)
> > 
> 
> <snipped>
> 
> > diff --git a/src/lj_jit.h b/src/lj_jit.h
> > index 7eb3d2a..960a02b 100644
> > --- a/src/lj_jit.h
> > +++ b/src/lj_jit.h
> > @@ -475,6 +475,13 @@ typedef struct jit_State {
> >    size_t szmcarea;	/* Size of current mcode area. */
> >    size_t szallmcarea;	/* Total size of all allocated mcode areas. */
> >  
> > +  size_t nsnaprestore;	/* Overall number of snap restores for all traces
> > +                        ** "belonging" to the given jit_State
> > +                        ** since the last call to luaM_metrics(). */
> > +  size_t ntraceabort;	/* Overall number of abort traces
> > +                        ** "belonging" to the given jit_State
> > +                        ** since the last call to luaM_metrics(). */
> 
> Code indentation is really strange here, like you mixed LuaJIT and uJIT
> styles. Besides, I guess the remark regarding the field reset is wrong:
> * These fields are not reset in this patch.
> * <luaM_metrics> is introduced only in the following patch but you've
>   already mentioned it here.

My bad! Thanks for this catch. I will fix indentation in the next
patch.

Naming-wise, as you can see at [1] we shouldn't use metrics since last
call. So comment should gone.

> > +
> >    TValue errinfo;	/* Additional info element for trace errors. */
> >  
> >  #if LJ_HASPROFILE
> > diff --git a/src/lj_obj.h b/src/lj_obj.h
> > index f368578..1fee04f 100644
> > --- a/src/lj_obj.h
> > +++ b/src/lj_obj.h
> > @@ -571,13 +571,28 @@ typedef enum {
> >  #define basemt_obj(g, o)	((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
> >  #define mmname_str(g, mm)	(strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
> >  
> > +/* Garbage collector states. Order matters. */
> > +enum {
> > +  GCSpause,
> > +  GCSpropagate,
> > +  GCSatomic,
> > +  GCSsweepstring,
> > +  GCSsweep,
> > +  GCSfinalize,
> > +  GCSlast
> 
> Minor: GCSMAX looks better, doesn't it? Feel free to ignore.

Really, GCSlast is unreachable. GCSmax for this kind of naming is good
enough. GCSunreachable also looks nice. What do you think?

> > +};
> > +
> >  typedef struct GCState {
> >    GCSize total;		/* Memory currently allocated. */
> > +  size_t freed;		/* Memory freed since last luaM_metrics() call. */
> > +  size_t allocated;	/* Memory allocated since last luaM_metrics() call. */
> 
> Well, why <total> field type differs from <freed> and <allocated> fields
> type? And again, remark about <luaM_metrics> can be dropped (see above).

GCSize for total memfory usage is defines by LJ_GC64 macro definition.
| #if LJ_GC64
| typedef uint64_t GCSize;
| #else
| typedef uint32_t GCSize;
| #endif

But summarized allocation size can be many times over maximum memory
size). So these field types should be independet.

> >    GCSize threshold;	/* Memory threshold. */
> >    uint8_t currentwhite;	/* Current white color. */
> >    uint8_t state;	/* GC state. */
> >    uint8_t nocdatafin;	/* No cdata finalizer called. */
> >    uint8_t unused2;
> > +  size_t state_count[GCSlast]; /* Count of GC invocations with different states
> > +			       ** since previous call of luaM_metrics().  */
> 
> Again, a mess with whitespace. Furthermore, I propose to reword this
> comment the following way:
> | /* Count of incremental GC steps per state */

Ok, no problems!

> >    MSize sweepstr;	/* Sweep position in string table. */
> >    GCRef root;		/* List of all collectable objects. */
> >    MRef sweep;		/* Sweep position in root list. */
> > @@ -589,6 +604,12 @@ typedef struct GCState {
> >    GCSize estimate;	/* Estimate of memory actually in use. */
> >    MSize stepmul;	/* Incremental GC step granularity. */
> >    MSize pause;		/* Pause between successive GC cycles. */
> > +
> > +  size_t tabnum;	/* Current amount of table objects. */
> > +  size_t udatanum;	/* Current amount of udata objects. */
> 
> Minor: These comments are a bit misleading. "Current" sounds like the
> metric is reset somewhen. But you're talking about "alive" objects of
> the particular type. Yes, formally (in GC terms) some of them are not
> alive, but are still allocated (so called "dead") objects. So the term
> "alive" is also a misleading one. I guess you can freely reword the
> comments the following way:
> | /* Number of allocated <GCtype> objects */
> 

I think that "allocated" is unnecessarily. We can omit it easily.

I should reword the comments as
| /* Number of <GCtype> objects. */
or
| /* Amount of <GCtype> objects. */
and it seems enough.

> > +#ifdef LJ_HASFFI
> > +  size_t cdatanum;	/* Current amount of cdata objects. */
> 
> This counter is not incremented if cdata is allocated on trace. Consider
> the following test:
> | $ LUA_PATH="$HOME/.luarocks/share/lua/5.1/?.lua;;" ./luajit -joff <<EOF
> | heredoc> local t = { }
> | heredoc> for i = 1, 1000 do
> | heredoc>   table.insert(t, require('ffi').new('uint64_t', i))
> | heredoc> end
> | heredoc> t = nil
> | heredoc> collectgarbage('collect')
> | heredoc> print(require('inspect')(misc.getmetrics()))
> | heredoc> EOF
> | {
> |   global = {
> |     cdatanum = 0,
> |     gc_total = 74722,
> |     jit_mcode_size = 0,
> |     jit_trace_num = 0,
> |     strnum = 534,
> |     tabnum = 77,
> |     udatanum = 5
> |   },
> |   incremental = {
> |     gc_allocated = 173092,
> |     gc_freed = 98370,
> |     gc_steps_atomic = 1,
> |     gc_steps_finalize = 0,
> |     gc_steps_pause = 3,
> |     gc_steps_propagate = 553,
> |     gc_steps_sweep = 64,
> |     gc_steps_sweepstring = 1024,
> |     jit_snap_restore = 0,
> |     jit_trace_abort = 0,
> |     strhash_hit = 4021,
> |     strhash_miss = 538
> |   }
> | }
> | $ LUA_PATH="$HOME/.luarocks/share/lua/5.1/?.lua;;" ./luajit -jon <<EOF
> | heredoc> local t = { }
> | heredoc> for i = 1, 1000 do
> | heredoc>   table.insert(t, require('ffi').new('uint64_t', i))
> | heredoc> end
> | heredoc> t = nil
> | heredoc> collectgarbage('collect')
> | heredoc> print(require('inspect')(misc.getmetrics()))
> | heredoc> EOF
> | {
> |   global = {
> |     cdatanum = -941,
> |     gc_total = 76561,
> |     jit_mcode_size = 65536,
> |     jit_trace_num = 3,
> |     strnum = 534,
> |     tabnum = 77,
> |     udatanum = 5
> |   },
> |   incremental = {
> |     gc_allocated = 145299,
> |     gc_freed = 68738,
> |     gc_steps_atomic = 3,
> |     gc_steps_finalize = 0,
> |     gc_steps_pause = 2,
> |     gc_steps_propagate = 505,
> |     gc_steps_sweep = 64,
> |     gc_steps_sweepstring = 1024,
> |     jit_snap_restore = 6,
> |     jit_trace_abort = 0,
> |     strhash_hit = 3085,
> |     strhash_miss = 538
> |   }
> | }
> 
> As you can see global.cdatanum is negative when JIT is on. You can take
> a look on the compiled trace by yourself. The root cause is CNEW IR
> semantics: for VLA/VLS cdata types <lj_cdata_newv> call is emitted and
> the counters are fine, but for other cdata types JIT compiler emits just
> a <lj_mem_newgco> call that so this object is not counted. Thereby JIT
> has to emit the corresponding instructions while assembling CNEW IR.
> I will elaborate on this later.

My fault!
Ok, I will try to implement it at this week.

> > +#endif
> >  } GCState;
> >  
> >  /* Global state, shared by all threads of a Lua universe. */
> > @@ -602,6 +623,10 @@ typedef struct global_State {
> >      BloomFilter next[2];
> >    } strbloom;
> >  #endif
> > +  size_t strhash_hit;	/* New string has been found in the storage
> > +			** since last luaM_metrics() call. */
> > +  size_t strhash_miss;	/* New string has been added to the storage
> > +			** since last luaM_metrics() call. */
> 
> Well, such indentation is closer to the desired one, but I would use two
> spaces prior to these tabs. What do you think? Also please, don't forget
> about <luaM_metrics> remark.

Sorry, but ^\s+\t+\s+ stylistics looks very bad. In *.c files Mike use
exactly ^\t+ style for code, so we have to use the same for comments.

Thoughts?

> >    lua_Alloc allocf;	/* Memory allocator. */
> >    void *allocd;		/* Memory allocator data. */
> >    GCState gc;		/* Garbage collector. */
> 
> <snipped>
> 
> > diff --git a/src/lj_state.c b/src/lj_state.c
> > index 632dd07..b71ebae 100644
> > --- a/src/lj_state.c
> > +++ b/src/lj_state.c
> > @@ -204,6 +204,16 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
> >    setgcref(g->uvhead.prev, obj2gco(&g->uvhead));
> >    setgcref(g->uvhead.next, obj2gco(&g->uvhead));
> >    g->strmask = ~(MSize)0;
> > +
> > +  memset(g->gc.state_count, 0, GCSlast * sizeof(g->gc.state_count[0]));
> > +  g->strhash_hit = 0;
> > +  g->strhash_miss = 0;
> > +  g->gc.tabnum = 0;
> > +  g->gc.udatanum = 0;
> > +#if LJ_HASFFI
> > +  g->gc.cdatanum = 0;
> > +#endif
> > +
> 
> It looks like all assignments above are excess: <GG_State> is filled
> with zeros, <g> points to the corresponding <GG_State> chunk, <gc> is
> allocated within <g>, ergo also initialized with zeros. Feel free to
> drop this hunk.

Nice catch! Thanks!

> >    setnilV(registry(L));
> >    setnilV(&g->nilnode.val);
> >    setnilV(&g->nilnode.key);
> > @@ -214,7 +224,8 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
> >    g->gc.state = GCSpause;
> >    setgcref(g->gc.root, obj2gco(L));
> >    setmref(g->gc.sweep, &g->gc.root);
> > -  g->gc.total = sizeof(GG_State);
> > +  g->gc.allocated = g->gc.total = sizeof(GG_State);
> > +  g->gc.freed = 0;
> 
> Ditto for g->gc.freed.

Sure!

> 
> >    g->gc.pause = LUAI_GCPAUSE;
> >    g->gc.stepmul = LUAI_GCMUL;
> >    lj_dispatch_init((GG_State *)L);
> 
> <snipped>
> 
> > -- 
> > 2.24.1
> > 
> 
> -- 
> Best regards,
> IM

[1]: https://lists.tarantool.org/pipermail/tarantool-patches/2020-July/018845.html

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Tarantool-patches] [RFC] rfc: luajit metrics
  2020-07-24 11:18     ` Sergey Kaplun
  2020-07-24 11:22       ` Sergey Kaplun
@ 2020-07-24 17:20       ` Sergey Ostanevich
  2020-07-27  2:18         ` Sergey Kaplun
  2020-08-11 20:13         ` Igor Munkin
  1 sibling, 2 replies; 16+ messages in thread
From: Sergey Ostanevich @ 2020-07-24 17:20 UTC (permalink / raw)
  To: Sergey Kaplun; +Cc: tarantool-patches

Hi!


On 24 Jul 14:18, Sergey Kaplun wrote:
> Hi! Thanks for the review!
> 
> Mons makes no bones of mailing list so I cite his comments from
> [1] here.
> 
> On 23.07.20, Sergey Ostanevich wrote:
> > Hi!
> > 
> > Thanks for the patch! See my comments below.
> > Otherwise LGTM.
> > 
> > Sergos
> > 
...
> > > +	/*
> > > +	 * Overall number of snap restores for all traces
> > 
> > Snap restores needs some explanation.
> 
> Ok, sounds reasonable. AFAIK number of snap restores equals to amount of
> trace exits. May be this will be more informative for users.
> 

Well. What this will tell to you as a user running a Lua code? Consider
the number is grows - ??? And what if it shrinks??

I believe it is tighly bound to the total number of traces and the size
of code. But what should it disclose to the user - how should it react,
refactor its code?

> > > +	 * "belonging" to the given jit_State since the last call
> > > +	 * to luaM_metrics().
> > > +	 */
> > > +	size_t jit_snap_restore;
> > > +	/*
> > > +	 * Overall number of abort traces "belonging" to the given
> > > +	 * jit_State since the last call to luaM_metrics().
> > > +	 */
> > > +	size_t jit_trace_abort;
> > > +	/* Total size of all allocated machine code areas. */
> > > +	size_t jit_mcode_size;
> > > +	/* Current amount of JIT traces. */
> > > +	unsigned int jit_trace_num;
> > > +};
> > > +```
> > > +
> > > +One can see metrics are divided by the two types: global and incremental.
> > > +Global metrics are collected throughout the platform uptime. Incremental
> > > +metrics are reset after each `luaM_metrics()` call.
> 
> As Mons said:
> | Incremental metrics must not reset after each call (see practice from
> | Linux kernel, for ex.). Rationale is simple: 2 different modules, both
> | using this function, will ruin metrics to each other
> 
> It's very important comment. With two modules that use metrics our
> counters become just garbage. I suppose that we should use monotonic
> increase counters for all "incremental" metrics. Users can count delta
> between calls of `getmetrics` and analise a result.
> 
> We can use uint64_t for storage of these metrics. If we astimate amount
> of allocated objects as 40Gb per second it will be necessary 14 years of
> instance uptime to overflow this counter. Also all logic corresponding
> to overflow of the counter can be realized inside user code.
> 
> > > +
> > > +There is also a complement introduced for Lua space -- `misc.getmetrics()`.
> > > +This function is just a wrapper for `luaM_metrics()` returning a Lua table with
> > > +the similar metrics. Its usage is quite simple:
> > > +```
> > > +$ ./src/tarantool
> > > +Tarantool 2.5.0-267-gbf047ad44
> > > +type 'help' for interactive help
> > > +tarantool> misc.getmetrics()
> > > +---
> > > +- global:
> > > +    tabnum: 1812
> > > +    gc_total: 1369927
> > > +    strnum: 5767
> > > +    jit_trace_num: 0
> > > +    cdatanum: 89
> > > +    jit_mcode_size: 0
> > > +    udatanum: 17
> > > +  incremental:
> > 
> > incremental_from_last_call will be more descriptive?
> 
> Mons:
> | I'd consider rename global to absolute. All metrics are global (not
> | scoped). But some are incremental counters and others are absolute
> | current values.
> 
> And also as far as we mustn't use updatable by call metrics, should we join all
> metrics inside one table, shouldn't we?

Sure, both comments are correct - we should report just absolute values,
but mention this fact in documentation. 

> 
> > > +    jit_snap_restore: 0
> > > +    gc_freed: 2239391
> > > +    strhash_hit: 53759
> > > +    gc_steps_finalize: 0
> > > +    gc_allocated: 3609318
> > > +    gc_steps_atomic: 6
> > > +    gc_steps_sweep: 296
> > > +    gc_steps_sweepstring: 17920
> > > +    jit_trace_abort: 0
> > > +    strhash_miss: 6874
> > 
> > I wonder if those keys can be sorted? Consider, I want to find
> > something by the name. Sorting helps greatly. 
> 
> It can be implement by user as simple as:
> 
> | local metrics = xjit.getmetrics()
> | local sorted = {}
> | for k, v in pairs(metrics.incremental) do table.insert(sorted, k) end
> | table.sort(sorted)
> | for _, k in ipairs(sorted) do print(k, metrics.incremental[k]) end
> |
> | --[[
> | gc_allocated    8920
> | gc_freed        3528
> | gc_steps_atomic 0
> | gc_steps_finalize       0
> | gc_steps_pause  0
> | gc_steps_propagate      62
> | gc_steps_sweep  0
> | gc_steps_sweepstring    0
> | jit_snap_restore        0
> | jit_trace_abort 0
> | strhash_hit     118
> | strhash_miss    7
> | ]]
> 

For me it is not that simple, frankly. Not in understanding - in use.
Can we just incorporate the code into the getmetrics()?

Thanks,
Sergos

> 
> > > +    gc_steps_propagate: 10106
> > > +    gc_steps_pause: 7
> > > +...
> > > +```
> > > -- 
> > > 2.24.1
> > > 
> 
> Thoughts?
> 
> -- 
> Best regards,
> Sergey Kaplun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Tarantool-patches] [PATCH 1/2] metrics: add counters for metrics interested in
  2020-07-24 12:00     ` Sergey Kaplun
@ 2020-07-24 20:05       ` Igor Munkin
  2020-07-25 10:00         ` Sergey Kaplun
  0 siblings, 1 reply; 16+ messages in thread
From: Igor Munkin @ 2020-07-24 20:05 UTC (permalink / raw)
  To: Sergey Kaplun; +Cc: tarantool-patches

Sergey,

On 24.07.20, Sergey Kaplun wrote:
> Hi Igor! Thanks for the review!
> 
> On 23.07.20, Igor Munkin wrote:
> > Sergey,
> > 
> > Thanks for the patch! Please consider my comments below.
> > 
> > On 21.07.20, Sergey Kaplun wrote:
> > 
> > We still don't have strict prefixes, so I propose to use 'core' for this
> > patch. Does it look fine to you? Feel free to propose your own one.
> 
> Maybe we should use 'misc' for this kind of patches?

Pardon, I don't get your proposal:
* if you refer the introduced <misc> library, then no, cause this
  patch doesn't relate to it
* if you refer to "miscellaneous changes", then also no, cause this
  patch directly related to the core components of platform runtime
  (e.g. GC machinery, string interning and lookup, JIT engine).

> 

<snipped>

> 
> > > +
> > >    TValue errinfo;	/* Additional info element for trace errors. */
> > >  
> > >  #if LJ_HASPROFILE
> > > diff --git a/src/lj_obj.h b/src/lj_obj.h
> > > index f368578..1fee04f 100644
> > > --- a/src/lj_obj.h
> > > +++ b/src/lj_obj.h
> > > @@ -571,13 +571,28 @@ typedef enum {
> > >  #define basemt_obj(g, o)	((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
> > >  #define mmname_str(g, mm)	(strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
> > >  
> > > +/* Garbage collector states. Order matters. */
> > > +enum {
> > > +  GCSpause,
> > > +  GCSpropagate,
> > > +  GCSatomic,
> > > +  GCSsweepstring,
> > > +  GCSsweep,
> > > +  GCSfinalize,
> > > +  GCSlast
> > 
> > Minor: GCSMAX looks better, doesn't it? Feel free to ignore.
> 
> Really, GCSlast is unreachable. GCSmax for this kind of naming is good
> enough. GCSunreachable also looks nice. What do you think?

GCSmax and GCSMAX are totally fine. Use the most convenient to you.

> 
> > > +};
> > > +
> > >  typedef struct GCState {
> > >    GCSize total;		/* Memory currently allocated. */
> > > +  size_t freed;		/* Memory freed since last luaM_metrics() call. */
> > > +  size_t allocated;	/* Memory allocated since last luaM_metrics() call. */
> > 
> > Well, why <total> field type differs from <freed> and <allocated> fields
> > type? And again, remark about <luaM_metrics> can be dropped (see above).
> 
> GCSize for total memfory usage is defines by LJ_GC64 macro definition.
> | #if LJ_GC64
> | typedef uint64_t GCSize;
> | #else
> | typedef uint32_t GCSize;
> | #endif
> 
> But summarized allocation size can be many times over maximum memory
> size). So these field types should be independet.

OK, thanks, now I see. However, there is another issue with these
fields: if LJ_GC64 is disabled (AFAIR any Tarantool build except MacOS
ones) the structure will have unused pad after <total> field. Otherwise,
if LuaJIT is build in LJ_GC64 mode, such pad occurs after <unused2>
field due to <state_count> field. Please reorder the fields considering
the structure alignment.

> 
> > >    GCSize threshold;	/* Memory threshold. */
> > >    uint8_t currentwhite;	/* Current white color. */
> > >    uint8_t state;	/* GC state. */
> > >    uint8_t nocdatafin;	/* No cdata finalizer called. */
> > >    uint8_t unused2;
> > > +  size_t state_count[GCSlast]; /* Count of GC invocations with different states
> > > +			       ** since previous call of luaM_metrics().  */

<snipped>

> > >    MSize sweepstr;	/* Sweep position in string table. */
> > >    GCRef root;		/* List of all collectable objects. */
> > >    MRef sweep;		/* Sweep position in root list. */
> > > @@ -589,6 +604,12 @@ typedef struct GCState {
> > >    GCSize estimate;	/* Estimate of memory actually in use. */
> > >    MSize stepmul;	/* Incremental GC step granularity. */
> > >    MSize pause;		/* Pause between successive GC cycles. */
> > > +
> > > +  size_t tabnum;	/* Current amount of table objects. */
> > > +  size_t udatanum;	/* Current amount of udata objects. */
> > 
> > Minor: These comments are a bit misleading. "Current" sounds like the
> > metric is reset somewhen. But you're talking about "alive" objects of
> > the particular type. Yes, formally (in GC terms) some of them are not
> > alive, but are still allocated (so called "dead") objects. So the term
> > "alive" is also a misleading one. I guess you can freely reword the
> > comments the following way:
> > | /* Number of allocated <GCtype> objects */
> > 
> 
> I think that "allocated" is unnecessarily. We can omit it easily.

Side note: The comment is better, but I still don't agree that
"allocated" can be easily omitted. Since it's a side note, no changes
are obliged.

> 

<snipped>

> 
> > > +#endif
> > >  } GCState;
> > >  
> > >  /* Global state, shared by all threads of a Lua universe. */
> > > @@ -602,6 +623,10 @@ typedef struct global_State {
> > >      BloomFilter next[2];
> > >    } strbloom;
> > >  #endif
> > > +  size_t strhash_hit;	/* New string has been found in the storage
> > > +			** since last luaM_metrics() call. */
> > > +  size_t strhash_miss;	/* New string has been added to the storage
> > > +			** since last luaM_metrics() call. */
> > 
> > Well, such indentation is closer to the desired one, but I would use two
> > spaces prior to these tabs. What do you think? Also please, don't forget
> > about <luaM_metrics> remark.
> 
> Sorry, but ^\s+\t+\s+ stylistics looks very bad. In *.c files Mike use
> exactly ^\t+ style for code, so we have to use the same for comments.
> 
> Thoughts?

I failed to find multiline comments for structures. So if you need one,
it looks more convenient to me to save the original indent of the
structure (i.e. spaces) and then align the wording with tabs considering
the first line. Or make the situation much easier to you and other
developers: just avoid multiline inline comments :)

> 
> > >    lua_Alloc allocf;	/* Memory allocator. */
> > >    void *allocd;		/* Memory allocator data. */
> > >    GCState gc;		/* Garbage collector. */

<snipped>

> > > -- 
> > > 2.24.1
> > > 
> > 
> > -- 
> > Best regards,
> > IM
> 
> [1]: https://lists.tarantool.org/pipermail/tarantool-patches/2020-July/018845.html
> 
> -- 
> Best regards,
> Sergey Kaplun

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Tarantool-patches] [PATCH 1/2] metrics: add counters for metrics interested in
  2020-07-24 20:05       ` Igor Munkin
@ 2020-07-25 10:00         ` Sergey Kaplun
  0 siblings, 0 replies; 16+ messages in thread
From: Sergey Kaplun @ 2020-07-25 10:00 UTC (permalink / raw)
  To: Igor Munkin; +Cc: tarantool-patches

On 24.07.20, Igor Munkin wrote:
> Sergey,
> 
> On 24.07.20, Sergey Kaplun wrote:
> > Hi Igor! Thanks for the review!
> > 
> > On 23.07.20, Igor Munkin wrote:
> > > Sergey,
> > > 
> > > Thanks for the patch! Please consider my comments below.
> > > 
> > > On 21.07.20, Sergey Kaplun wrote:
> > > 
> > > We still don't have strict prefixes, so I propose to use 'core' for this
> > > patch. Does it look fine to you? Feel free to propose your own one.
> > 
> > Maybe we should use 'misc' for this kind of patches?
> 
> Pardon, I don't get your proposal:
> * if you refer the introduced <misc> library, then no, cause this
>   patch doesn't relate to it
> * if you refer to "miscellaneous changes", then also no, cause this
>   patch directly related to the core components of platform runtime
>   (e.g. GC machinery, string interning and lookup, JIT engine).
> 

You are right, I'll use "core" here.

<snipped>
> > > > +};
> > > > +
> > > >  typedef struct GCState {
> > > >    GCSize total;		/* Memory currently allocated. */
> > > > +  size_t freed;		/* Memory freed since last luaM_metrics() call. */
> > > > +  size_t allocated;	/* Memory allocated since last luaM_metrics() call. */
> > > 
> > > Well, why <total> field type differs from <freed> and <allocated> fields
> > > type? And again, remark about <luaM_metrics> can be dropped (see above).
> > 
> > GCSize for total memfory usage is defines by LJ_GC64 macro definition.
> > | #if LJ_GC64
> > | typedef uint64_t GCSize;
> > | #else
> > | typedef uint32_t GCSize;
> > | #endif
> > 
> > But summarized allocation size can be many times over maximum memory
> > size). So these field types should be independet.
> 
> OK, thanks, now I see. However, there is another issue with these
> fields: if LJ_GC64 is disabled (AFAIR any Tarantool build except MacOS
> ones) the structure will have unused pad after <total> field. Otherwise,
> if LuaJIT is build in LJ_GC64 mode, such pad occurs after <unused2>
> field due to <state_count> field. Please reorder the fields considering
> the structure alignment.

Sure!

> > 
<snipped>
> > 
> > > > +#endif
> > > >  } GCState;
> > > >  
> > > >  /* Global state, shared by all threads of a Lua universe. */
> > > > @@ -602,6 +623,10 @@ typedef struct global_State {
> > > >      BloomFilter next[2];
> > > >    } strbloom;
> > > >  #endif
> > > > +  size_t strhash_hit;	/* New string has been found in the storage
> > > > +			** since last luaM_metrics() call. */
> > > > +  size_t strhash_miss;	/* New string has been added to the storage
> > > > +			** since last luaM_metrics() call. */
> > > 
> > > Well, such indentation is closer to the desired one, but I would use two
> > > spaces prior to these tabs. What do you think? Also please, don't forget
> > > about <luaM_metrics> remark.
> > 
> > Sorry, but ^\s+\t+\s+ stylistics looks very bad. In *.c files Mike use
> > exactly ^\t+ style for code, so we have to use the same for comments.
> > 
> > Thoughts?
> 
> I failed to find multiline comments for structures. So if you need one,
> it looks more convenient to me to save the original indent of the
> structure (i.e. spaces) and then align the wording with tabs considering
> the first line. Or make the situation much easier to you and other
> developers: just avoid multiline inline comments :)

I suppose that one-line comment will satisfy both of us :)

> > 
> > > >    lua_Alloc allocf;	/* Memory allocator. */
> > > >    void *allocd;		/* Memory allocator data. */
> > > >    GCState gc;		/* Garbage collector. */
> 
> <snipped>
> 
> > > > -- 
> > > > 2.24.1
> > > > 
> > > 
> > > -- 
> > > Best regards,
> > > IM
> > 
> > [1]: https://lists.tarantool.org/pipermail/tarantool-patches/2020-July/018845.html
> > 
> > -- 
> > Best regards,
> > Sergey Kaplun
> 
> -- 
> Best regards,
> IM

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Tarantool-patches] [RFC] rfc: luajit metrics
  2020-07-24 17:20       ` Sergey Ostanevich
@ 2020-07-27  2:18         ` Sergey Kaplun
  2020-08-11 20:13         ` Igor Munkin
  1 sibling, 0 replies; 16+ messages in thread
From: Sergey Kaplun @ 2020-07-27  2:18 UTC (permalink / raw)
  To: Sergey Ostanevich; +Cc: tarantool-patches

Hi!

On 24.07.20, Sergey Ostanevich wrote:
> Hi!
> 
> 
> On 24 Jul 14:18, Sergey Kaplun wrote:
> > Hi! Thanks for the review!
> > 
> > Mons makes no bones of mailing list so I cite his comments from
> > [1] here.
> > 
> > On 23.07.20, Sergey Ostanevich wrote:
> > > Hi!
> > > 
> > > Thanks for the patch! See my comments below.
> > > Otherwise LGTM.
> > > 
> > > Sergos
> > > 
> ...
> > > > +	/*
> > > > +	 * Overall number of snap restores for all traces
> > > 
> > > Snap restores needs some explanation.
> > 
> > Ok, sounds reasonable. AFAIK number of snap restores equals to amount of
> > trace exits. May be this will be more informative for users.
> > 
> 
> Well. What this will tell to you as a user running a Lua code? Consider
> the number is grows - ??? And what if it shrinks??
> 
> I believe it is tighly bound to the total number of traces and the size
> of code. But what should it disclose to the user - how should it react,
> refactor its code?

The more time you spend away from traces the worse it is for the
performance of your application. So fast growing number of snap restores is
good reason to refactor your code.

> > > > +	 * "belonging" to the given jit_State since the last call
> > > > +	 * to luaM_metrics().
> > > > +	 */
> > > > +	size_t jit_snap_restore;
> > > > +	/*
> > > > +	 * Overall number of abort traces "belonging" to the given
> > > > +	 * jit_State since the last call to luaM_metrics().
> > > > +	 */
> > > > +	size_t jit_trace_abort;
> > > > +	/* Total size of all allocated machine code areas. */
> > > > +	size_t jit_mcode_size;
> > > > +	/* Current amount of JIT traces. */
> > > > +	unsigned int jit_trace_num;
> > > > +};
> > > > +```
> > > > +
> > > > +One can see metrics are divided by the two types: global and incremental.
> > > > +Global metrics are collected throughout the platform uptime. Incremental
> > > > +metrics are reset after each `luaM_metrics()` call.
> > 
> > As Mons said:
> > | Incremental metrics must not reset after each call (see practice from
> > | Linux kernel, for ex.). Rationale is simple: 2 different modules, both
> > | using this function, will ruin metrics to each other
> > 
> > It's very important comment. With two modules that use metrics our
> > counters become just garbage. I suppose that we should use monotonic
> > increase counters for all "incremental" metrics. Users can count delta
> > between calls of `getmetrics` and analise a result.
> > 
> > We can use uint64_t for storage of these metrics. If we astimate amount
> > of allocated objects as 40Gb per second it will be necessary 14 years of
> > instance uptime to overflow this counter. Also all logic corresponding
> > to overflow of the counter can be realized inside user code.
> > 
> > > > +
> > > > +There is also a complement introduced for Lua space -- `misc.getmetrics()`.
> > > > +This function is just a wrapper for `luaM_metrics()` returning a Lua table with
> > > > +the similar metrics. Its usage is quite simple:
> > > > +```
> > > > +$ ./src/tarantool
> > > > +Tarantool 2.5.0-267-gbf047ad44
> > > > +type 'help' for interactive help
> > > > +tarantool> misc.getmetrics()
> > > > +---
> > > > +- global:
> > > > +    tabnum: 1812
> > > > +    gc_total: 1369927
> > > > +    strnum: 5767
> > > > +    jit_trace_num: 0
> > > > +    cdatanum: 89
> > > > +    jit_mcode_size: 0
> > > > +    udatanum: 17
> > > > +  incremental:
> > > 
> > > incremental_from_last_call will be more descriptive?
> > 
> > Mons:
> > | I'd consider rename global to absolute. All metrics are global (not
> > | scoped). But some are incremental counters and others are absolute
> > | current values.
> > 
> > And also as far as we mustn't use updatable by call metrics, should we join all
> > metrics inside one table, shouldn't we?
> 
> Sure, both comments are correct - we should report just absolute values,
> but mention this fact in documentation. 

I've fixed your comments in [1].

> > 
> > > > +    jit_snap_restore: 0
> > > > +    gc_freed: 2239391
> > > > +    strhash_hit: 53759
> > > > +    gc_steps_finalize: 0
> > > > +    gc_allocated: 3609318
> > > > +    gc_steps_atomic: 6
> > > > +    gc_steps_sweep: 296
> > > > +    gc_steps_sweepstring: 17920
> > > > +    jit_trace_abort: 0
> > > > +    strhash_miss: 6874
> > > 
> > > I wonder if those keys can be sorted? Consider, I want to find
> > > something by the name. Sorting helps greatly. 
> > 
> > It can be implement by user as simple as:
> > 
> > | local metrics = xjit.getmetrics()
> > | local sorted = {}
> > | for k, v in pairs(metrics.incremental) do table.insert(sorted, k) end
> > | table.sort(sorted)
> > | for _, k in ipairs(sorted) do print(k, metrics.incremental[k]) end
> > |
> > | --[[
> > | gc_allocated    8920
> > | gc_freed        3528
> > | gc_steps_atomic 0
> > | gc_steps_finalize       0
> > | gc_steps_pause  0
> > | gc_steps_propagate      62
> > | gc_steps_sweep  0
> > | gc_steps_sweepstring    0
> > | jit_snap_restore        0
> > | jit_trace_abort 0
> > | strhash_hit     118
> > | strhash_miss    7
> > | ]]
> > 
> 
> For me it is not that simple, frankly. Not in understanding - in use.
> Can we just incorporate the code into the getmetrics()?

Unfortunately, there is no way in Lua to return key-sorted table.  But
for better user experience we can add new serializer (if it doesn't
already exist) in tarantool console :)

> 
> Thanks,
> Sergos
> 
> > 
> > > > +    gc_steps_propagate: 10106
> > > > +    gc_steps_pause: 7
> > > > +...
> > > > +```
> > > > -- 
> > > > 2.24.1
> > > > 
> > 
> > Thoughts?
> > 
> > -- 
> > Best regards,
> > Sergey Kaplun

[1]: https://lists.tarantool.org/pipermail/tarantool-patches/2020-July/018869.html

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Tarantool-patches] [PATCH 1/2] metrics: add counters for metrics interested in
  2020-07-23 20:12   ` Igor Munkin
  2020-07-24 12:00     ` Sergey Kaplun
@ 2020-07-27  2:25     ` Sergey Kaplun
  2020-08-12  9:39       ` Igor Munkin
  1 sibling, 1 reply; 16+ messages in thread
From: Sergey Kaplun @ 2020-07-27  2:25 UTC (permalink / raw)
  To: Igor Munkin; +Cc: tarantool-patches

Igor,

On 23.07.20, Igor Munkin wrote:
> Sergey,
> 
> Thanks for the patch! Please consider my comments below.
> 
> On 21.07.20, Sergey Kaplun wrote:
> 
<snipped>
> 
> This counter is not incremented if cdata is allocated on trace. Consider
> the following test:
> | $ LUA_PATH="$HOME/.luarocks/share/lua/5.1/?.lua;;" ./luajit -joff <<EOF
> | heredoc> local t = { }
> | heredoc> for i = 1, 1000 do
> | heredoc>   table.insert(t, require('ffi').new('uint64_t', i))
> | heredoc> end
> | heredoc> t = nil
> | heredoc> collectgarbage('collect')
> | heredoc> print(require('inspect')(misc.getmetrics()))
> | heredoc> EOF
> | {
> |   global = {
> |     cdatanum = 0,
> |     gc_total = 74722,
> |     jit_mcode_size = 0,
> |     jit_trace_num = 0,
> |     strnum = 534,
> |     tabnum = 77,
> |     udatanum = 5
> |   },
> |   incremental = {
> |     gc_allocated = 173092,
> |     gc_freed = 98370,
> |     gc_steps_atomic = 1,
> |     gc_steps_finalize = 0,
> |     gc_steps_pause = 3,
> |     gc_steps_propagate = 553,
> |     gc_steps_sweep = 64,
> |     gc_steps_sweepstring = 1024,
> |     jit_snap_restore = 0,
> |     jit_trace_abort = 0,
> |     strhash_hit = 4021,
> |     strhash_miss = 538
> |   }
> | }
> | $ LUA_PATH="$HOME/.luarocks/share/lua/5.1/?.lua;;" ./luajit -jon <<EOF
> | heredoc> local t = { }
> | heredoc> for i = 1, 1000 do
> | heredoc>   table.insert(t, require('ffi').new('uint64_t', i))
> | heredoc> end
> | heredoc> t = nil
> | heredoc> collectgarbage('collect')
> | heredoc> print(require('inspect')(misc.getmetrics()))
> | heredoc> EOF
> | {
> |   global = {
> |     cdatanum = -941,
> |     gc_total = 76561,
> |     jit_mcode_size = 65536,
> |     jit_trace_num = 3,
> |     strnum = 534,
> |     tabnum = 77,
> |     udatanum = 5
> |   },
> |   incremental = {
> |     gc_allocated = 145299,
> |     gc_freed = 68738,
> |     gc_steps_atomic = 3,
> |     gc_steps_finalize = 0,
> |     gc_steps_pause = 2,
> |     gc_steps_propagate = 505,
> |     gc_steps_sweep = 64,
> |     gc_steps_sweepstring = 1024,
> |     jit_snap_restore = 6,
> |     jit_trace_abort = 0,
> |     strhash_hit = 3085,
> |     strhash_miss = 538
> |   }
> | }
> 
> As you can see global.cdatanum is negative when JIT is on. You can take
> a look on the compiled trace by yourself. The root cause is CNEW IR
> semantics: for VLA/VLS cdata types <lj_cdata_newv> call is emitted and
> the counters are fine, but for other cdata types JIT compiler emits just
> a <lj_mem_newgco> call that so this object is not counted. Thereby JIT
> has to emit the corresponding instructions while assembling CNEW IR.
> I will elaborate on this later.

Can we use an approach like this?

diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 3e189b1..a32a8c4 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -1835,6 +1835,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
     return;
   }
 
+  /* Global state live longer than given trace.
+  ** Increment cdatanum counter by address directly.
+  */
+  emit_gmroi(as, XG_ARITHi(XOg_ADD), RID_NONE,
+        ptr2addr(&J2G(as->J)->gc.cdatanum), (int32_t)1);
+
   /* Combine initialization of marked, gct and ctypeid. */
   emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
   emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,

> 
<snipped>
> 
> -- 
> Best regards,
> IM

I've fixed your other comments in [1].

[1]: https://lists.tarantool.org/pipermail/tarantool-patches/2020-July/018866.html

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Tarantool-patches] [RFC] rfc: luajit metrics
  2020-07-24 17:20       ` Sergey Ostanevich
  2020-07-27  2:18         ` Sergey Kaplun
@ 2020-08-11 20:13         ` Igor Munkin
  1 sibling, 0 replies; 16+ messages in thread
From: Igor Munkin @ 2020-08-11 20:13 UTC (permalink / raw)
  To: Sergey Ostanevich; +Cc: tarantool-patches

Sergos,

On 24.07.20, Sergey Ostanevich wrote:
> Hi!
> 
> 
> On 24 Jul 14:18, Sergey Kaplun wrote:

<snipped>

> > > > +	/*
> > > > +	 * Overall number of snap restores for all traces
> > > 
> > > Snap restores needs some explanation.
> > 
> > Ok, sounds reasonable. AFAIK number of snap restores equals to amount of
> > trace exits. May be this will be more informative for users.
> > 
> 
> Well. What this will tell to you as a user running a Lua code? Consider
> the number is grows - ??? And what if it shrinks??

What does a pretty high lymphocyte percentage tell you? This might be
either allergic reaction you're facing right now, or infection, or
something else. You can say nothing considering only this one index.

<jit_snap_restore> is just a single JIT-related index and it shows
nothing per se.

Since this index is absolute, it doesn't shrink.

> 
> I believe it is tighly bound to the total number of traces and the size
> of code.

It's closely related to the overall number of side exits leading to VM.
NB: side exits leading to the side trace are not counted here (since
snapshot is "replayed" and the corresponding code is added prior to the
side trace enter), but trace exits violating the loop condition guards
are (this is the way trace execution leaves the recorded loop).

> But what should it disclose to the user - how should it react,
> refactor its code?

Wild guess: there *might* be irrelevant traces. At the same time it may
be just a noise, since "succeeded" trace exits are not counted (but we
can implement it within <lj_vm_exit_interp> in VM). Anyway, the user has
to proceed with the degradation investigation at first.

> 

<snipped>

> > 
> > > > +    jit_snap_restore: 0
> > > > +    gc_freed: 2239391
> > > > +    strhash_hit: 53759
> > > > +    gc_steps_finalize: 0
> > > > +    gc_allocated: 3609318
> > > > +    gc_steps_atomic: 6
> > > > +    gc_steps_sweep: 296
> > > > +    gc_steps_sweepstring: 17920
> > > > +    jit_trace_abort: 0
> > > > +    strhash_miss: 6874
> > > 
> > > I wonder if those keys can be sorted? Consider, I want to find
> > > something by the name. Sorting helps greatly. 
> > 
> > It can be implement by user as simple as:
> > 
> > | local metrics = xjit.getmetrics()
> > | local sorted = {}
> > | for k, v in pairs(metrics.incremental) do table.insert(sorted, k) end
> > | table.sort(sorted)
> > | for _, k in ipairs(sorted) do print(k, metrics.incremental[k]) end
> > |
> > | --[[
> > | gc_allocated    8920
> > | gc_freed        3528
> > | gc_steps_atomic 0
> > | gc_steps_finalize       0
> > | gc_steps_pause  0
> > | gc_steps_propagate      62
> > | gc_steps_sweep  0
> > | gc_steps_sweepstring    0
> > | jit_snap_restore        0
> > | jit_trace_abort 0
> > | strhash_hit     118
> > | strhash_miss    7
> > | ]]
> > 
> 
> For me it is not that simple, frankly. Not in understanding - in use.
> Can we just incorporate the code into the getmetrics()?

No, <getmetrics> yields a table (unordered map if you want), not a
string. And you question relates to stats view, not its storage. I
personally prefer inspect module[1], that serializes a table sorting its
keys as you can see below:

| $ LUA_PATH="$HOME/.luarocks/share/lua/5.1/?.lua;;" \
| 	./luajit -e 'print(require("inspect")(misc.getmetrics()))'
| {
|   cdatanum = 0,
|   gc_allocated = 93583,
|   gc_freed = 28408,
|   gc_steps_atomic = 0,
|   gc_steps_finalize = 0,
|   gc_steps_pause = 1,
|   gc_steps_propagate = 218,
|   gc_steps_sweep = 0,
|   gc_steps_sweepstring = 0,
|   gc_total = 65175,
|   jit_mcode_size = 0,
|   jit_snap_restore = 0,
|   jit_trace_abort = 0,
|   jit_trace_num = 0,
|   strhash_hit = 958,
|   strhash_miss = 447,
|   strnum = 447,
|   tabnum = 69,
|   udatanum = 4
| }

> 
> Thanks,
> Sergos
> 
> > 

<snipped>

> > 
> > -- 
> > Best regards,
> > Sergey Kaplun

[1]: https://github.com/kikito/inspect.lua

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Tarantool-patches] [PATCH 1/2] metrics: add counters for metrics interested in
  2020-07-27  2:25     ` Sergey Kaplun
@ 2020-08-12  9:39       ` Igor Munkin
  0 siblings, 0 replies; 16+ messages in thread
From: Igor Munkin @ 2020-08-12  9:39 UTC (permalink / raw)
  To: Sergey Kaplun; +Cc: tarantool-patches

Sergey,

On 27.07.20, Sergey Kaplun wrote:
> Igor,
> 
> On 23.07.20, Igor Munkin wrote:
> > Sergey,
> > 
> > Thanks for the patch! Please consider my comments below.
> > 
> > On 21.07.20, Sergey Kaplun wrote:
> > 
> <snipped>
> > 
> > This counter is not incremented if cdata is allocated on trace. Consider
> > the following test:
> > | $ LUA_PATH="$HOME/.luarocks/share/lua/5.1/?.lua;;" ./luajit -joff <<EOF
> > | heredoc> local t = { }
> > | heredoc> for i = 1, 1000 do
> > | heredoc>   table.insert(t, require('ffi').new('uint64_t', i))
> > | heredoc> end
> > | heredoc> t = nil
> > | heredoc> collectgarbage('collect')
> > | heredoc> print(require('inspect')(misc.getmetrics()))
> > | heredoc> EOF
> > | {
> > |   global = {
> > |     cdatanum = 0,
> > |     gc_total = 74722,
> > |     jit_mcode_size = 0,
> > |     jit_trace_num = 0,
> > |     strnum = 534,
> > |     tabnum = 77,
> > |     udatanum = 5
> > |   },
> > |   incremental = {
> > |     gc_allocated = 173092,
> > |     gc_freed = 98370,
> > |     gc_steps_atomic = 1,
> > |     gc_steps_finalize = 0,
> > |     gc_steps_pause = 3,
> > |     gc_steps_propagate = 553,
> > |     gc_steps_sweep = 64,
> > |     gc_steps_sweepstring = 1024,
> > |     jit_snap_restore = 0,
> > |     jit_trace_abort = 0,
> > |     strhash_hit = 4021,
> > |     strhash_miss = 538
> > |   }
> > | }
> > | $ LUA_PATH="$HOME/.luarocks/share/lua/5.1/?.lua;;" ./luajit -jon <<EOF
> > | heredoc> local t = { }
> > | heredoc> for i = 1, 1000 do
> > | heredoc>   table.insert(t, require('ffi').new('uint64_t', i))
> > | heredoc> end
> > | heredoc> t = nil
> > | heredoc> collectgarbage('collect')
> > | heredoc> print(require('inspect')(misc.getmetrics()))
> > | heredoc> EOF
> > | {
> > |   global = {
> > |     cdatanum = -941,
> > |     gc_total = 76561,
> > |     jit_mcode_size = 65536,
> > |     jit_trace_num = 3,
> > |     strnum = 534,
> > |     tabnum = 77,
> > |     udatanum = 5
> > |   },
> > |   incremental = {
> > |     gc_allocated = 145299,
> > |     gc_freed = 68738,
> > |     gc_steps_atomic = 3,
> > |     gc_steps_finalize = 0,
> > |     gc_steps_pause = 2,
> > |     gc_steps_propagate = 505,
> > |     gc_steps_sweep = 64,
> > |     gc_steps_sweepstring = 1024,
> > |     jit_snap_restore = 6,
> > |     jit_trace_abort = 0,
> > |     strhash_hit = 3085,
> > |     strhash_miss = 538
> > |   }
> > | }
> > 
> > As you can see global.cdatanum is negative when JIT is on. You can take
> > a look on the compiled trace by yourself. The root cause is CNEW IR
> > semantics: for VLA/VLS cdata types <lj_cdata_newv> call is emitted and
> > the counters are fine, but for other cdata types JIT compiler emits just
> > a <lj_mem_newgco> call that so this object is not counted. Thereby JIT
> > has to emit the corresponding instructions while assembling CNEW IR.
> > I will elaborate on this later.
> 
> Can we use an approach like this?

Kinda.

> 
> diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
> index 3e189b1..a32a8c4 100644
> --- a/src/lj_asm_x86.h
> +++ b/src/lj_asm_x86.h
> @@ -1835,6 +1835,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
>      return;
>    }
>  
> +  /* Global state live longer than given trace.

Global state is a Lua universe, so this line is too obvious.

Typo: s/live/lives/.

> +  ** Increment cdatanum counter by address directly.
> +  */
> +  emit_gmroi(as, XG_ARITHi(XOg_ADD), RID_NONE,
> +        ptr2addr(&J2G(as->J)->gc.cdatanum), (int32_t)1);

I see no reason for explicit cast here:
* the parameter is <int32_t> type
* IIRC, integral literals are 32 bit integer types

There is also an additional check within <emit_gmroi> and imm8 addition
is emitted for your case.

Besides, other emitters are still left unpatched.

> +
>    /* Combine initialization of marked, gct and ctypeid. */
>    emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
>    emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
> 
> > 
> <snipped>
> > 
> > -- 
> > Best regards,
> > IM
> 
> I've fixed your other comments in [1].

Great, thanks!

> 
> [1]: https://lists.tarantool.org/pipermail/tarantool-patches/2020-July/018866.html
> 
> -- 
> Best regards,
> Sergey Kaplun

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2020-08-12  9:49 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-21 11:34 [Tarantool-patches] [PATCH 0/2] Implement LuaJIT platform metrics Sergey Kaplun
2020-07-21 11:34 ` [Tarantool-patches] [PATCH 1/2] metrics: add counters for metrics interested in Sergey Kaplun
2020-07-23 20:12   ` Igor Munkin
2020-07-24 12:00     ` Sergey Kaplun
2020-07-24 20:05       ` Igor Munkin
2020-07-25 10:00         ` Sergey Kaplun
2020-07-27  2:25     ` Sergey Kaplun
2020-08-12  9:39       ` Igor Munkin
2020-07-21 11:34 ` [Tarantool-patches] [PATCH 2/2] metrics: add C and Lua API Sergey Kaplun
2020-07-21 11:37 ` [Tarantool-patches] [RFC] rfc: luajit metrics Sergey Kaplun
2020-07-23 10:15   ` Sergey Ostanevich
2020-07-24 11:18     ` Sergey Kaplun
2020-07-24 11:22       ` Sergey Kaplun
2020-07-24 17:20       ` Sergey Ostanevich
2020-07-27  2:18         ` Sergey Kaplun
2020-08-11 20:13         ` Igor Munkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox