[Tarantool-patches] [PATCH luajit v1] memprof: enrich symtab when new trace is allocated

Mikhail Shishatskiy m.shishatskiy at tarantool.org
Tue Aug 24 17:55:09 MSK 2021


Since symtab can be enriched with new prototypes, it
can be enriched with new traces as well. This feature
can help to investigate trace-generation-heavy apps
with memprof.

This patch introduces the functionality described above by
adding a `is_streamed` flag to GCtrace. If the profiler meets
allocation from a trace, it checks if the trace was already
streamed to the symtab. If not, extra information is provided
with the allocation source location.

| loc            := loc-lua | loc-lua-sym | loc-c | loc-trace | loc-trace-sym
| loc-trace      := trace-no
| loc-trace-sym  := trace-no sym-addr line-no

The profiler parser is adjusted to recognize new entries with
additional information about the trace's starting point.

Since this patch, traces are no longer identified by address,
as all the name collisions can be resolved. To avoid traceno
collisions `trace generations` are introduced: now the trace parser
module adds an additional number, representing the number of traces
overridden before. So, when the parser meets a new trace entry
overriding the existing trace, it increments the generation
counter by one.

| TRACE [<traceno>:<trace-gen>] started at ...

Also, the API of <utils/symtab.lua> changed: function `parse_sym_trace`
is now public in order to use it from the <memrprof/parser.lua> module.

Follows up tarantool/tarantool#5815
---

Branch: https://github.com/tarantool/luajit/tree/shishqa/enrich-symtab-when-trace-is-allocated
Tarantool branch: https://github.com/tarantool/tarantool/tree/shishqa/enrich-symtab-when-trace-is-allocated

The results of running benchmark [1] are presented below.
The time is average for 15 runs. The second parameter is
the standard deviation.

| -------------------------------------- |
|             JIT-on, memprof-on         |
| -------------------------------------- |
|       BEFORE       |       AFTER       |
| ------------------ | ----------------- |
| 2.9010s (0.0481)   | +0.0936s (0.0557) |
| -------------------------------------- |

[1]: https://gist.github.com/Shishqa/9753063a258e38520d3c2f09d18378a5

 src/lj_jit.h                                  |  4 +++
 src/lj_memprof.c                              | 27 ++++++++++++-------
 src/lj_memprof.h                              |  5 ++--
 src/lj_trace.c                                |  3 +++
 .../misclib-memprof-lapi.test.lua             | 15 ++++++-----
 tools/memprof/humanize.lua                    |  2 +-
 tools/memprof/parse.lua                       |  6 ++++-
 tools/utils/symtab.lua                        | 21 ++++++++++-----
 8 files changed, 55 insertions(+), 28 deletions(-)

diff --git a/src/lj_jit.h b/src/lj_jit.h
index d82292f8..c456b787 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -254,7 +254,11 @@ typedef struct GCtrace {
   uint8_t sinktags;	/* Trace has SINK tags. */
   uint8_t topslot;	/* Top stack slot already checked to be allocated. */
   uint8_t linktype;	/* Type of link. */
+#if LJ_HASMEMPROF
+  uint8_t is_streamed;
+#else
   uint8_t unused1;
+#endif
 #ifdef LUAJIT_USE_GDBJIT
   void *gdbjit_entry;	/* GDB JIT entry. */
 #endif
diff --git a/src/lj_memprof.c b/src/lj_memprof.c
index fb217a2c..05266acb 100644
--- a/src/lj_memprof.c
+++ b/src/lj_memprof.c
@@ -30,7 +30,7 @@ static const unsigned char ljs_header[] = {'l', 'j', 's', LJS_CURRENT_VERSION,
 
 #if LJ_HASJIT
 
-static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
+static void dump_symtab_trace(struct lj_wbuf *out, GCtrace *trace)
 {
   const GCproto *pt = &gcref(trace->startpt)->pt;
   BCLine lineno = -1;
@@ -42,8 +42,6 @@ static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
   lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
   lua_assert(lineno >= 0);
 
-  lj_wbuf_addbyte(out, SYMTAB_TRACE);
-  lj_wbuf_addu64(out, (uint64_t)trace->mcode);
   lj_wbuf_addu64(out, (uint64_t)trace->traceno);
   /*
   ** All the existing prototypes have already been dumped, so we do not
@@ -51,6 +49,8 @@ static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
   */
   lj_wbuf_addu64(out, (uintptr_t)pt);
   lj_wbuf_addu64(out, (uint64_t)lineno);
+
+  trace->is_streamed = 1;
 }
 
 #else
@@ -85,6 +85,7 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
       break;
     }
     case (~LJ_TTRACE): {
+      lj_wbuf_addbyte(out, SYMTAB_TRACE);
       dump_symtab_trace(out, gco2trace(o));
       break;
     }
@@ -216,13 +217,19 @@ static void memprof_write_func(struct memprof *mp, uint8_t aevent)
 static void memprof_write_trace(struct memprof *mp, uint8_t aevent)
 {
   struct lj_wbuf *out = &mp->out;
-  const global_State *g = mp->g;
-  const jit_State *J = G2J(g);
-  const TraceNo traceno = g->vmstate;
-  const GCtrace *trace = traceref(J, traceno);
-  lj_wbuf_addbyte(out, aevent | ASOURCE_TRACE);
-  lj_wbuf_addu64(out, (uintptr_t)trace->mcode);
-  lj_wbuf_addu64(out, (uint64_t)traceno);
+  global_State *g = mp->g;
+  GCtrace *trace = traceref(G2J(g), g->vmstate);
+
+  if (LJ_UNLIKELY(!trace->is_streamed)) {
+
+    lj_wbuf_addbyte(out, aevent | ASOURCE_TRACE | LJM_SYMTAB);
+    dump_symtab_trace(out, trace);
+
+  } else {
+
+    lj_wbuf_addbyte(out, aevent | ASOURCE_TRACE);
+    lj_wbuf_addu64(out, (uint64_t)g->vmstate);
+  }
 }
 
 #else
diff --git a/src/lj_memprof.h b/src/lj_memprof.h
index 0f5b4c6d..c7156fe9 100644
--- a/src/lj_memprof.h
+++ b/src/lj_memprof.h
@@ -73,11 +73,12 @@
 ** event-realloc  := event-header loc? oaddr osize naddr nsize
 ** event-free     := event-header loc? oaddr osize
 ** event-header   := <BYTE>
-** loc            := loc-lua | loc-lua-sym | loc-c | loc-trace
+** loc            := loc-lua | loc-lua-sym | loc-c | loc-trace | loc-trace-sym
 ** loc-lua        := sym-addr line-no
 ** loc-lua-sym    := sym-addr sym-chunk sym-line line-no
 ** loc-c          := sym-addr
-** loc-trace      := trace-addr trace-no
+** loc-trace      := trace-no
+** loc-trace-sym  := trace-no sym-addr line-no
 ** sym-addr       := <ULEB128>
 ** sym-chunk      := string
 ** sym-line       := <ULEB128>
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 86563cdb..fd0da095 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -163,6 +163,9 @@ static void trace_save(jit_State *J, GCtrace *T)
 #ifdef LUAJIT_USE_PERFTOOLS
   perftools_addtrace(T);
 #endif
+#if LJ_HASMEMPROF
+  T->is_streamed = 0;
+#endif
 }
 
 void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T)
diff --git a/test/tarantool-tests/misclib-memprof-lapi.test.lua b/test/tarantool-tests/misclib-memprof-lapi.test.lua
index f4666da5..6f030efa 100644
--- a/test/tarantool-tests/misclib-memprof-lapi.test.lua
+++ b/test/tarantool-tests/misclib-memprof-lapi.test.lua
@@ -276,7 +276,7 @@ test:test("jit-output", function(subtest)
     return
   end
 
-  subtest:plan(3)
+  subtest:plan(4)
 
   jit.opt.start(3, "hotloop=10")
   jit.flush()
@@ -291,14 +291,15 @@ test:test("jit-output", function(subtest)
   -- See also https://github.com/tarantool/tarantool/issues/5679.
   subtest:ok(alloc[0] == nil)
 
-  -- Run already generated traces.
-  symbols, events = generate_parsed_output(default_payload)
-
-  alloc = fill_ev_type(events, symbols, "alloc")
-
   -- We expect, that loop will be compiled into a trace.
+  -- 10 allocations in interpreter mode, 1 allocation for a trace
+  -- recording and assembling and next 9 allocations will happen
+  -- inside the trace.
+  subtest:ok(check_alloc_report(
+    { line = 39, function_line = 32 }, alloc, 11
+  ))
   subtest:ok(check_alloc_report(
-    { traceno = 1, line = 37, function_line = 32 }, alloc, 20
+    { traceno = 1, line = 37, function_line = 32 }, alloc, 9
   ))
   -- See same checks with jit.off().
   subtest:ok(check_alloc_report(
diff --git a/tools/memprof/humanize.lua b/tools/memprof/humanize.lua
index 72e71080..2dc5314c 100644
--- a/tools/memprof/humanize.lua
+++ b/tools/memprof/humanize.lua
@@ -94,7 +94,7 @@ function M.describe_location(symbols, loc)
   -- If trace, which was remembered in the symtab, has not
   -- been flushed, assotiate it with a proto, where trace
   -- recording started.
-  if trace and trace.addr == loc.addr then
+  if trace then
     return symtab.demangle(symbols, loc).." started at "..
            symtab.demangle(symbols, trace.sym_loc)
   end
diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
index a540360b..00640fa7 100644
--- a/tools/memprof/parse.lua
+++ b/tools/memprof/parse.lua
@@ -84,6 +84,10 @@ local function parse_location_symbols(reader, asource, symbols)
       0,
       symbols
     )
+  elseif asource == ASOURCE_TRACE then
+    return id_location(
+      0, 0, symtab.parse_sym_trace(reader, symbols), symbols
+    )
   end
   error("Unknown asource "..asource)
 end
@@ -96,7 +100,7 @@ local function parse_location_common(reader, asource, symbols)
   elseif asource == ASOURCE_LFUNC then
     return id_location(reader:read_uleb128(), reader:read_uleb128(), 0, symbols)
   elseif asource == ASOURCE_TRACE then
-    return id_location(reader:read_uleb128(), 0, reader:read_uleb128(), symbols)
+    return id_location(0, 0, reader:read_uleb128(), symbols)
   end
   error("Unknown asource "..asource)
 end
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
index 88fdb42e..c2776790 100644
--- a/tools/utils/symtab.lua
+++ b/tools/utils/symtab.lua
@@ -33,25 +33,31 @@ function M.parse_sym_lfunc(reader, symtab)
   return sym_addr
 end
 
-local function parse_sym_trace(reader, symtab)
-  local trace_addr = reader:read_uleb128()
+function M.parse_sym_trace(reader, symtab)
   local traceno = reader:read_uleb128()
   local sym_addr = reader:read_uleb128()
   local sym_line = reader:read_uleb128()
 
+  local prev_gen = 0
+  if symtab.trace[traceno] then
+    prev_gen = symtab.trace[traceno].gen
+  end
+
   symtab.trace[traceno] = {
-    addr = trace_addr,
+    gen = prev_gen + 1,
     sym_loc = {
       addr = sym_addr,
       line = sym_line,
       traceno = 0,
     },
   }
+
+  return traceno
 end
 
 local parsers = {
   [SYMTAB_LFUNC] = M.parse_sym_lfunc,
-  [SYMTAB_TRACE] = parse_sym_trace,
+  [SYMTAB_TRACE] = M.parse_sym_trace,
 }
 
 function M.parse(reader)
@@ -119,13 +125,14 @@ local function demangle_lfunc(symtab, loc)
   return string_format("CFUNC %#x", addr)
 end
 
-local function demangle_trace(loc)
-  return string_format("TRACE [%d] 0x%x", loc.traceno, loc.addr)
+local function demangle_trace(symtab, loc)
+  local traceno = loc.traceno
+  return string_format("TRACE [%d:%d]", traceno, symtab.trace[traceno].gen)
 end
 
 function M.demangle(symtab, loc)
   if loc.traceno ~= 0 then
-    return demangle_trace(loc)
+    return demangle_trace(symtab, loc)
   end
   return demangle_lfunc(symtab, loc)
 end
-- 
2.32.0



More information about the Tarantool-patches mailing list