Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH luajit v3] sysprof: fix crash during FFUNC stream
@ 2023-07-05  8:56 Maxim Kokryashkin via Tarantool-patches
  2023-07-09 12:21 ` Sergey Kaplun via Tarantool-patches
  0 siblings, 1 reply; 11+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2023-07-05  8:56 UTC (permalink / raw)
  To: tarantool-patches, skaplun, sergeyb

Sometimes, the Lua stack can be inconsistent during
the FFUNC execution, which may lead to a sysprof
crash during the stack unwinding.

This patch replaces the `top_frame` property of `global_State`
with `lj_sysprof_topframe` structure, which contains `top_frame`
and `ffid` properties. `ffid` property makes sense only when the
LuaJIT VM state is set to `FFUNC`. That property is set to the
ffid of the fast function that VM is about to execute.
In the same time, `top_frame` property is not updated now, so
the top frame of the Lua stack can be streamed based on the ffid,
and the rest of the Lua stack can be streamed as usual.

Resolves tarantool/tarantool#8594
---
Changes in v3:
- Fixed comments as per review by Sergey
- Increased profile interval and reduced loop iterations to prevent CI
  from hanging. The test is less likely to fail before the patch, but
  it's better then nothing.

 src/lj_obj.h                                  |  7 +++-
 src/lj_sysprof.c                              | 26 ++++++++++++---
 src/vm_x64.dasc                               | 21 ++++++++++--
 src/vm_x86.dasc                               | 22 ++++++++++---
 .../gh-8594-sysprof-ffunc-crash.test.lua      | 33 +++++++++++++++++++
 5 files changed, 96 insertions(+), 13 deletions(-)
 create mode 100644 test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua

diff --git a/src/lj_obj.h b/src/lj_obj.h
index 45507e0d..186433a3 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -598,6 +598,11 @@ enum {
   GCSmax
 };
 
+struct lj_sysprof_topframe {
+  TValue *top_frame; /* Top frame for sysprof. */
+  uint8_t ffid; /* FFID of the fast function VM is about to execute. */
+};
+
 typedef struct GCState {
   GCSize total;		/* Memory currently allocated. */
   GCSize threshold;	/* Memory threshold. */
@@ -675,7 +680,7 @@ typedef struct global_State {
   MRef ctype_state;	/* Pointer to C type state. */
   GCRef gcroot[GCROOT_MAX];  /* GC roots. */
 #ifdef LJ_HASSYSPROF
-  TValue *top_frame;	/* Top frame for sysprof. */
+  struct lj_sysprof_topframe top_frame_info;	/* Top frame info for sysprof. */
 #endif
 } global_State;
 
diff --git a/src/lj_sysprof.c b/src/lj_sysprof.c
index 2e9ed9b3..0a341e16 100644
--- a/src/lj_sysprof.c
+++ b/src/lj_sysprof.c
@@ -109,6 +109,12 @@ static void stream_epilogue(struct sysprof *sp)
   lj_wbuf_addbyte(&sp->out, LJP_EPILOGUE_BYTE);
 }
 
+static void stream_ffunc_impl(struct lj_wbuf *buf, uint8_t ffid)
+{
+  lj_wbuf_addbyte(buf, LJP_FRAME_FFUNC);
+  lj_wbuf_addu64(buf, ffid);
+}
+
 static void stream_lfunc(struct lj_wbuf *buf, const GCfunc *func)
 {
   lua_assert(isluafunc(func));
@@ -129,8 +135,7 @@ static void stream_cfunc(struct lj_wbuf *buf, const GCfunc *func)
 static void stream_ffunc(struct lj_wbuf *buf, const GCfunc *func)
 {
   lua_assert(isffunc(func));
-  lj_wbuf_addbyte(buf, LJP_FRAME_FFUNC);
-  lj_wbuf_addu64(buf, func->c.ffid);
+  stream_ffunc_impl(buf, func->c.ffid);
 }
 
 static void stream_frame_lua(struct lj_wbuf *buf, const cTValue *frame)
@@ -148,7 +153,7 @@ static void stream_frame_lua(struct lj_wbuf *buf, const cTValue *frame)
     lua_assert(0);
 }
 
-static void stream_backtrace_lua(struct sysprof *sp)
+static void stream_backtrace_lua(struct sysprof *sp, uint32_t vmstate)
 {
   global_State *g = sp->g;
   struct lj_wbuf *buf = &sp->out;
@@ -158,8 +163,19 @@ static void stream_backtrace_lua(struct sysprof *sp)
   lua_assert(g != NULL);
   L = gco2th(gcref(g->cur_L));
   lua_assert(L != NULL);
+  /*
+  ** Lua stack may be inconsistent during the execution of a
+  ** fast-function, so instead of updating the `top_frame` for
+  ** it, its `ffid` is set instead. The first frame on the
+  ** result stack is streamed manually, and the rest of the
+  ** stack is streamed based on the previous `top_frame` value.
+  */
+  if (vmstate == LJ_VMST_FFUNC) {
+    uint8_t ffid = g->top_frame_info.ffid;
+    stream_ffunc_impl(buf, ffid);
+  }
 
-  top_frame = g->top_frame - 1; //(1 + LJ_FR2)
+  top_frame = g->top_frame_info.top_frame - 1;
 
   bot = tvref(L->stack) + LJ_FR2;
   /* Traverse frames backwards */
@@ -234,7 +250,7 @@ static void stream_trace(struct sysprof *sp, uint32_t vmstate)
 static void stream_guest(struct sysprof *sp, uint32_t vmstate)
 {
   lj_wbuf_addbyte(&sp->out, (uint8_t)vmstate);
-  stream_backtrace_lua(sp);
+  stream_backtrace_lua(sp, vmstate);
   stream_backtrace_host(sp);
 }
 
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 7b04b928..3a35b9f7 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -353,14 +353,29 @@
 |// it syncs with the BASE register only when the control is passed to
 |// user code. So we need to sync the BASE on each vmstate change to
 |// keep it consistent.
+|// The only execption are FFUNCs because sometimes even internal BASE
+|// stash is inconsistent for them. To address that issue, their ffid
+|// is stashed instead, so the corresponding frame can be streamed
+|// manually.
 |.macro set_vmstate_sync_base, st
 |.if LJ_HASSYSPROF
 |  set_vmstate INTERP  // Guard for non-atomic VM context restoration
-|  mov qword [DISPATCH+DISPATCH_GL(top_frame)], BASE
+|  mov qword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], BASE
 |.endif
 |  set_vmstate st
 |.endmacro
 |
+|.macro set_vmstate_ffunc
+|.if LJ_HASSYSPROF
+|  set_vmstate INTERP
+|  mov TMPR, [BASE - 16]
+|  cleartp LFUNC:TMPR
+|  mov r10b, LFUNC:TMPR->ffid // r10b is the byte-sized part of TMPR
+|  mov byte [DISPATCH+DISPATCH_GL(top_frame_info.ffid)], r10b
+|.endif
+|  set_vmstate FFUNC
+|.endmacro
+|
 |// Uses TMPRd (r10d).
 |.macro save_vmstate
 |.if not WIN
@@ -376,7 +391,7 @@
 |  set_vmstate INTERP
 |  mov TMPR, SAVE_L
 |  mov TMPR, L:TMPR->base
-|  mov qword [DISPATCH+DISPATCH_GL(top_frame)], TMPR
+|  mov qword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], TMPR
 |.endif
 |  mov TMPRd, SAVE_VMSTATE
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], TMPRd
@@ -1188,7 +1203,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
-  |  set_vmstate_sync_base FFUNC
+  |  set_vmstate_ffunc
   |.endmacro
   |
   |.macro .ffunc_1, name
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index bd1e940e..fabeec9f 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -451,14 +451,28 @@
 |// it syncs with the BASE register only when the control is passed to
 |// user code. So we need to sync the BASE on each vmstate change to
 |// keep it consistent.
+|// The only execption are FFUNCs because sometimes even internal BASE
+|// stash is inconsistent for them. To address that issue, their ffid
+|// is stashed instead, so the corresponding frame can be streamed
+|// manually.
 |.macro set_vmstate_sync_base, st
 |.if LJ_HASSYSPROF
 |  set_vmstate INTERP  // Guard for non-atomic VM context restoration
-|  mov dword [DISPATCH+DISPATCH_GL(top_frame)], BASE
+|  mov dword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], BASE
 |.endif
 |  set_vmstate st
 |.endmacro
 |
+|.macro set_vmstate_ffunc
+|.if LJ_HASSYSPROF
+|  set_vmstate INTERP
+|  mov LFUNC:XCHGd, [BASE - 8]
+|  mov r11b, LFUNC:XCHGd->ffid // r11b is the byte-sized part of XCHGd
+|  mov byte [DISPATCH+DISPATCH_GL(top_frame_info.ffid)], r11b
+|.endif
+|  set_vmstate FFUNC
+|.endmacro
+|
 |// Uses spilled ecx on x86 or XCHGd (r11d) on x64.
 |.macro save_vmstate
 |.if not WIN
@@ -485,7 +499,7 @@
 |.if LJ_HASSYSPROF
 |  mov ecx, SAVE_L
 |  mov ecx, L:ecx->base
-|  mov dword [DISPATCH+DISPATCH_GL(top_frame)], ecx
+|  mov dword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], ecx
 |.endif
 |  mov ecx, SAVE_VMSTATE
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ecx
@@ -494,7 +508,7 @@
 |.if LJ_HASSYSPROF
 |  mov XCHGd, SAVE_L
 |  mov XCHGd, L:XCHGd->base
-|  mov dword [DISPATCH+DISPATCH_GL(top_frame)], XCHGd
+|  mov dword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], XCHGd
 |.endif
 |  mov XCHGd, SAVE_VMSTATE
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], XCHGd
@@ -1488,7 +1502,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
-  |  set_vmstate_sync_base FFUNC
+  |  set_vmstate_ffunc
   |.endmacro
   |
   |.macro .ffunc_1, name
diff --git a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
new file mode 100644
index 00000000..e2973f15
--- /dev/null
+++ b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
@@ -0,0 +1,33 @@
+local tap = require('tap')
+local test = tap.test('gh-8594-sysprof-ffunc-crash'):skipcond({
+  ['Sysprof is implemented for x86_64 only'] = jit.arch ~= 'x86' and
+                                               jit.arch ~= 'x64',
+  ['Sysprof is implemented for Linux only'] = jit.os ~= "Linux",
+})
+
+test:plan(1)
+
+jit.off()
+-- XXX: Run JIT tuning functions in a safe frame to avoid errors
+-- thrown when LuaJIT is compiled with JIT engine disabled.
+pcall(jit.flush)
+
+local TMP_BINFILE = '/dev/null'
+
+local res, err = misc.sysprof.start{
+  mode = 'C',
+  interval = 3,
+  path = TMP_BINFILE,
+}
+assert(res, err)
+
+for i = 1, 1e5 do
+  tostring(i)
+end
+
+res, err = misc.sysprof.stop()
+assert(res, err)
+
+test:ok(true, 'sysprof finished successfully')
+
+os.exit(test:check() and 0 or 1)
-- 
2.40.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit v3] sysprof: fix crash during FFUNC stream
  2023-07-05  8:56 [Tarantool-patches] [PATCH luajit v3] sysprof: fix crash during FFUNC stream Maxim Kokryashkin via Tarantool-patches
@ 2023-07-09 12:21 ` Sergey Kaplun via Tarantool-patches
  0 siblings, 0 replies; 11+ messages in thread
From: Sergey Kaplun via Tarantool-patches @ 2023-07-09 12:21 UTC (permalink / raw)
  To: Maxim Kokryashkin; +Cc: tarantool-patches

Hi, Maxim!
I see only changes related to the profiling interval. May you resend the
letter with the other fixes too (maybe you've forgot to commit them)?

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit v3] sysprof: fix crash during FFUNC stream
  2023-07-10 12:28 Maxim Kokryashkin via Tarantool-patches
  2023-07-13  8:12 ` Sergey Bronnikov via Tarantool-patches
  2023-07-15 15:36 ` Sergey Kaplun via Tarantool-patches
@ 2023-11-23  6:32 ` Igor Munkin via Tarantool-patches
  2 siblings, 0 replies; 11+ messages in thread
From: Igor Munkin via Tarantool-patches @ 2023-11-23  6:32 UTC (permalink / raw)
  To: Maxim Kokryashkin; +Cc: tarantool-patches

Max,

I've checked the patchset into all long-term branches in
tarantool/luajit and bumped a new version in master, release/2.11 and
release/2.10.

On 10.07.23, Maxim Kokryashkin via Tarantool-patches wrote:
> Sometimes, the Lua stack can be inconsistent during
> the FFUNC execution, which may lead to a sysprof
> crash during the stack unwinding.
> 
> This patch replaces the `top_frame` property of `global_State`
> with `lj_sysprof_topframe` structure, which contains `top_frame`
> and `ffid` properties. `ffid` property makes sense only when the
> LuaJIT VM state is set to `FFUNC`. That property is set to the
> ffid of the fast function that VM is about to execute.
> In the same time, `top_frame` property is not updated now, so
> the top frame of the Lua stack can be streamed based on the ffid,
> and the rest of the Lua stack can be streamed as usual.
> 
> Also, this patch fixes build with plain makefile, by adding
> the `LJ_HASSYSPROF` flag support to it.
> 
> Resolves tarantool/tarantool#8594
> ---
> Changes in v3:
> - Fixed comments as per review by Sergey
> 
> Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-8594-sysprof-ffunc-crash
> PR: https://github.com/tarantool/tarantool/pull/8737
>  src/Makefile.original                         |  3 ++
>  src/lj_obj.h                                  |  7 +++-
>  src/lj_sysprof.c                              | 26 ++++++++++++---
>  src/vm_x64.dasc                               | 22 +++++++++++--
>  src/vm_x86.dasc                               | 31 ++++++++++++++---
>  .../gh-8594-sysprof-ffunc-crash.test.lua      | 33 +++++++++++++++++++
>  6 files changed, 109 insertions(+), 13 deletions(-)
>  create mode 100644 test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
> 

<snipped>

> -- 
> 2.40.1
> 

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit v3] sysprof: fix crash during FFUNC stream
  2023-11-08 23:01       ` Maxim Kokryashkin via Tarantool-patches
@ 2023-11-09  7:06         ` Sergey Bronnikov via Tarantool-patches
  0 siblings, 0 replies; 11+ messages in thread
From: Sergey Bronnikov via Tarantool-patches @ 2023-11-09  7:06 UTC (permalink / raw)
  To: Maxim Kokryashkin; +Cc: Maxim Kokryashkin, tarantool-patches

Hi, Max

On 11/9/23 02:01, Maxim Kokryashkin wrote:
> Hi!
> Thanks for the review!
> Fixed your comment, branch is force-pushed. Diff is below:
> ===
> diff --git a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
> index 347bd087..90a19156 100644
> --- a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
> +++ b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
> @@ -48,6 +48,6 @@ end
>   res, err = misc.sysprof.stop()
>   assert(res, err)
>   
> -test:ok(true, 'sysprof finished successfully')
> +test:ok(true, 'FFUNC frames were streamed correctly')
>   
>   os.exit(test:check() and 0 or 1)
> ===
>
Thanks! LGTM as I said before

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit v3] sysprof: fix crash during FFUNC stream
  2023-11-08 15:47     ` Sergey Bronnikov via Tarantool-patches
@ 2023-11-08 23:01       ` Maxim Kokryashkin via Tarantool-patches
  2023-11-09  7:06         ` Sergey Bronnikov via Tarantool-patches
  0 siblings, 1 reply; 11+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2023-11-08 23:01 UTC (permalink / raw)
  To: Sergey Bronnikov; +Cc: Maxim Kokryashkin, tarantool-patches

Hi!
Thanks for the review!
Fixed your comment, branch is force-pushed. Diff is below:
===
diff --git a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
index 347bd087..90a19156 100644
--- a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
+++ b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
@@ -48,6 +48,6 @@ end
 res, err = misc.sysprof.stop()
 assert(res, err)
 
-test:ok(true, 'sysprof finished successfully')
+test:ok(true, 'FFUNC frames were streamed correctly')
 
 os.exit(test:check() and 0 or 1)
===
On Wed, Nov 08, 2023 at 06:47:28PM +0300, Sergey Bronnikov wrote:
> Hi, Max
> 
> sorry for a delayed answer.
> 
> LGTM with a minor comment
> 
> 
> On 7/14/23 17:30, Maxim Kokryashkin wrote:
> > 
> <snipped>
> > 
> > 
> >     > +end
> >     > +
> >     > +res, err = misc.sysprof.stop()
> >     > +assert(res, err)
> >     > +
> >     > +test:ok(true, 'sysprof finished successfully')
> > 
> I propose to reflect a goal of the test in a test description. Please add
> something about ffunc to description.
> 
> The test is not about whole sysprof, but about ffunc support in sysprof.
> 
> >     > +
> >     > +os.exit(test:check() and 0 or 1)
> > 
> > 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit v3] sysprof: fix crash during FFUNC stream
  2023-07-14 14:30   ` Maxim Kokryashkin via Tarantool-patches
@ 2023-11-08 15:47     ` Sergey Bronnikov via Tarantool-patches
  2023-11-08 23:01       ` Maxim Kokryashkin via Tarantool-patches
  0 siblings, 1 reply; 11+ messages in thread
From: Sergey Bronnikov via Tarantool-patches @ 2023-11-08 15:47 UTC (permalink / raw)
  To: Maxim Kokryashkin; +Cc: Maxim Kokryashkin, tarantool-patches

[-- Attachment #1: Type: text/plain, Size: 525 bytes --]

Hi, Max

sorry for a delayed answer.

LGTM with a minor comment


On 7/14/23 17:30, Maxim Kokryashkin wrote:
>
<snipped>
>
>
>     > +end
>     > +
>     > +res, err = misc.sysprof.stop()
>     > +assert(res, err)
>     > +
>     > +test:ok(true, 'sysprof finished successfully')
>
I propose to reflect a goal of the test in a test description. Please 
add something about ffunc to description.

The test is not about whole sysprof, but about ffunc support in sysprof.

>     > +
>     > +os.exit(test:check() and 0 or 1)
>
>

[-- Attachment #2: Type: text/html, Size: 2015 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches]  [PATCH luajit v3] sysprof: fix crash during FFUNC stream
  2023-07-15 15:36 ` Sergey Kaplun via Tarantool-patches
@ 2023-07-17 12:52   ` Maxim Kokryashkin via Tarantool-patches
  0 siblings, 0 replies; 11+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2023-07-17 12:52 UTC (permalink / raw)
  To: Sergey Kaplun; +Cc: Maxim Kokryashkin, tarantool-patches

[-- Attachment #1: Type: text/plain, Size: 5762 bytes --]


Hi!
Thanks for the review!
 
> 
>>Hi, Maxim!
>>Thanks for the fixes!
>>Generally LGTM, but please consider my comments below.
>>
>>On 10.07.23, Maxim Kokryashkin wrote:
>>> Sometimes, the Lua stack can be inconsistent during
>>> the FFUNC execution, which may lead to a sysprof
>>> crash during the stack unwinding.
>>>
>>> This patch replaces the `top_frame` property of `global_State`
>>> with `lj_sysprof_topframe` structure, which contains `top_frame`
>>> and `ffid` properties. `ffid` property makes sense only when the
>>> LuaJIT VM state is set to `FFUNC`. That property is set to the
>>> ffid of the fast function that VM is about to execute.
>>> In the same time, `top_frame` property is not updated now, so
>>> the top frame of the Lua stack can be streamed based on the ffid,
>>> and the rest of the Lua stack can be streamed as usual.
>>>
>>> Also, this patch fixes build with plain makefile, by adding
>>
>>Nit: I suggest to rephrase it like "original Makefile" or
>>Makefile.original.
>>
>>Feel free to ignore.
>Fixed! The branch is force-pushed.
>>
>>> the `LJ_HASSYSPROF` flag support to it.
>>>
>>> Resolves tarantool/tarantool#8594
>>> ---
>>> Changes in v3:
>>> - Fixed comments as per review by Sergey
>>>
>>> Branch:  https://github.com/tarantool/luajit/tree/fckxorg/gh-8594-sysprof-ffunc-crash
>>> PR:  https://github.com/tarantool/tarantool/pull/8737
>>> src/Makefile.original | 3 ++
>>> src/lj_obj.h | 7 +++-
>>> src/lj_sysprof.c | 26 ++++++++++++---
>>> src/vm_x64.dasc | 22 +++++++++++--
>>> src/vm_x86.dasc | 31 ++++++++++++++---
>>> .../gh-8594-sysprof-ffunc-crash.test.lua | 33 +++++++++++++++++++
>>> 6 files changed, 109 insertions(+), 13 deletions(-)
>>> create mode 100644 test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
>>>
>>> diff --git a/src/Makefile.original b/src/Makefile.original
>>> index aedaaa73..e21a0e56 100644
>>> --- a/src/Makefile.original
>>> +++ b/src/Makefile.original
>>
>><snipped>
>>
>>> diff --git a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
>>> new file mode 100644
>>> index 00000000..e5cdeb07
>>> --- /dev/null
>>> +++ b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
>>> @@ -0,0 +1,33 @@
>>> +local tap = require('tap')
>>> +local test = tap.test('gh-8594-sysprof-ffunc-crash'):skipcond({
>>> + ['Sysprof is implemented for x86_64 only'] = jit.arch ~= 'x86' and
>>> + jit.arch ~= 'x64',
>>> + ['Sysprof is implemented for Linux only'] = jit.os ~= 'Linux',
>>> +})
>>> +
>>> +test:plan(1)
>>> +
>>> +jit.off()
>>> +-- XXX: Run JIT tuning functions in a safe frame to avoid errors
>>> +-- thrown when LuaJIT is compiled with JIT engine disabled.
>>> +pcall(jit.flush)
>>> +
>>> +local TMP_BINFILE = '/dev/null'
>>> +
>>> +local res, err = misc.sysprof.start{
>>> + mode = 'C',
>>> + interval = 3,
>>> + path = TMP_BINFILE,
>>> +}
>>> +assert(res, err)
>>> +
>>> +for i = 1, 1e5 do
>>> + tostring(i)
>>> +end
>>
>>Within these (interval/iterations) changes I hardly can see assertion failure
>>on master branch for my laptop (1/10 cases).
>>
>>| >>> git log -n1 --no-decorate --oneline
>>| 8e46d601 test: fix flaky <unit-jit-parse.test.lua>
>>| >>> LUA_PATH="src/?.lua;test/tarantool-tests/?.lua;;" src/luajit test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
>>| TAP version 13
>>| 1..1
>>| ok - sysprof finished successfully
>>
>>I don't know how to resolve this problem with our CI at death's door...
>>OTOH, we may consider that this value is enough for our CI, so I'll see
>>the problem there.
>>So, I'll agree with Sergey's opinion about this flaky test.
>I’ve added a conditional parametrization for this test case, here is the diff:
>==========================================================
>diff --git a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
>index a053e41c..347bd087 100644
>--- a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
>+++ b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
>@@ -17,17 +17,30 @@ local TMP_BINFILE = '/dev/null'
> -- XXX: The best way to test the issue is to set the profile
> -- interval to be as short as possible. However, our CI is
> -- not capable of handling such intense testing, so it was a
>--- forced decision to reduce the sampling frequency. As a
>+-- forced decision to reduce the sampling frequency for it. As a
> -- result, it is now less likely to reproduce the issue
> -- statistically, but the test case is still valid.
>+
>+-- GitHub always sets[1] the `CI` environment variable to `true`
>+-- for every step in a workflow.
>+-- [1]: https://docs.github.com/en/actions/learn-github-actions/variables#default-environment-variables
>+local CI = os.getenv('CI') == 'true'
>+
>+-- Profile interval and number of iterations for CI are
>+-- empirical. Non-CI profile interval is set to be as short
>+-- as possible, so the issue is more likely to reproduce.
>+-- Non-CI number of iterations is greater for the same reason.
>+local PROFILE_INTERVAL = CI and 3 or 1
>+local N_ITERATIONS = CI and 1e5 or 1e6
>+
> local res, err = misc.sysprof.start{
>   mode = 'C',
>-  interval = 3,
>+  interval = PROFILE_INTERVAL,
>   path = TMP_BINFILE,
> }
> assert(res, err)
> 
>-for i = 1, 1e5 do
>+for i = 1, N_ITERATIONS do
>   -- XXX: `tostring` is FFUNC.
>   tostring(i)
> end
>==========================================================
>The branch is force-pushed.
>>
>>> +
>>> +res, err = misc.sysprof.stop()
>>> +assert(res, err)
>>> +
>>> +test:ok(true, 'sysprof finished successfully')
>>> +
>>> +os.exit(test:check() and 0 or 1)
>>> --
>>> 2.40.1
>>>
>>
>>--
>>Best regards,
>>Sergey Kaplun
>--
>Best regards,
>Maxim Kokryashkin

[-- Attachment #2: Type: text/html, Size: 7567 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit v3] sysprof: fix crash during FFUNC stream
  2023-07-10 12:28 Maxim Kokryashkin via Tarantool-patches
  2023-07-13  8:12 ` Sergey Bronnikov via Tarantool-patches
@ 2023-07-15 15:36 ` Sergey Kaplun via Tarantool-patches
  2023-07-17 12:52   ` Maxim Kokryashkin via Tarantool-patches
  2023-11-23  6:32 ` Igor Munkin via Tarantool-patches
  2 siblings, 1 reply; 11+ messages in thread
From: Sergey Kaplun via Tarantool-patches @ 2023-07-15 15:36 UTC (permalink / raw)
  To: Maxim Kokryashkin; +Cc: tarantool-patches

Hi, Maxim!
Thanks for the fixes!
Generally LGTM, but please consider my comments below.

On 10.07.23, Maxim Kokryashkin wrote:
> Sometimes, the Lua stack can be inconsistent during
> the FFUNC execution, which may lead to a sysprof
> crash during the stack unwinding.
> 
> This patch replaces the `top_frame` property of `global_State`
> with `lj_sysprof_topframe` structure, which contains `top_frame`
> and `ffid` properties. `ffid` property makes sense only when the
> LuaJIT VM state is set to `FFUNC`. That property is set to the
> ffid of the fast function that VM is about to execute.
> In the same time, `top_frame` property is not updated now, so
> the top frame of the Lua stack can be streamed based on the ffid,
> and the rest of the Lua stack can be streamed as usual.
> 
> Also, this patch fixes build with plain makefile, by adding

Nit: I suggest to rephrase it like "original Makefile" or
Makefile.original.

Feel free to ignore.

> the `LJ_HASSYSPROF` flag support to it.
> 
> Resolves tarantool/tarantool#8594
> ---
> Changes in v3:
> - Fixed comments as per review by Sergey
> 
> Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-8594-sysprof-ffunc-crash
> PR: https://github.com/tarantool/tarantool/pull/8737
>  src/Makefile.original                         |  3 ++
>  src/lj_obj.h                                  |  7 +++-
>  src/lj_sysprof.c                              | 26 ++++++++++++---
>  src/vm_x64.dasc                               | 22 +++++++++++--
>  src/vm_x86.dasc                               | 31 ++++++++++++++---
>  .../gh-8594-sysprof-ffunc-crash.test.lua      | 33 +++++++++++++++++++
>  6 files changed, 109 insertions(+), 13 deletions(-)
>  create mode 100644 test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
> 
> diff --git a/src/Makefile.original b/src/Makefile.original
> index aedaaa73..e21a0e56 100644
> --- a/src/Makefile.original
> +++ b/src/Makefile.original

<snipped>

> diff --git a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
> new file mode 100644
> index 00000000..e5cdeb07
> --- /dev/null
> +++ b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
> @@ -0,0 +1,33 @@
> +local tap = require('tap')
> +local test = tap.test('gh-8594-sysprof-ffunc-crash'):skipcond({
> +  ['Sysprof is implemented for x86_64 only'] = jit.arch ~= 'x86' and
> +                                               jit.arch ~= 'x64',
> +  ['Sysprof is implemented for Linux only'] = jit.os ~= 'Linux',
> +})
> +
> +test:plan(1)
> +
> +jit.off()
> +-- XXX: Run JIT tuning functions in a safe frame to avoid errors
> +-- thrown when LuaJIT is compiled with JIT engine disabled.
> +pcall(jit.flush)
> +
> +local TMP_BINFILE = '/dev/null'
> +
> +local res, err = misc.sysprof.start{
> +  mode = 'C',
> +  interval = 3,
> +  path = TMP_BINFILE,
> +}
> +assert(res, err)
> +
> +for i = 1, 1e5 do
> +  tostring(i)
> +end

Within these (interval/iterations) changes I hardly can see assertion failure
on master branch for my laptop (1/10 cases).

| >>> git log -n1 --no-decorate --oneline
| 8e46d601 test: fix flaky <unit-jit-parse.test.lua>
| >>> LUA_PATH="src/?.lua;test/tarantool-tests/?.lua;;" src/luajit test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
| TAP version 13
| 1..1
| ok - sysprof finished successfully

I don't know how to resolve this problem with our CI at death's door...
OTOH, we may consider that this value is enough for our CI, so I'll see
the problem there.
So, I'll agree with Sergey's opinion about this flaky test.

> +
> +res, err = misc.sysprof.stop()
> +assert(res, err)
> +
> +test:ok(true, 'sysprof finished successfully')
> +
> +os.exit(test:check() and 0 or 1)
> -- 
> 2.40.1
> 

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches]  [PATCH luajit v3] sysprof: fix crash during FFUNC stream
  2023-07-13  8:12 ` Sergey Bronnikov via Tarantool-patches
@ 2023-07-14 14:30   ` Maxim Kokryashkin via Tarantool-patches
  2023-11-08 15:47     ` Sergey Bronnikov via Tarantool-patches
  0 siblings, 1 reply; 11+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2023-07-14 14:30 UTC (permalink / raw)
  To: Sergey Bronnikov; +Cc: Maxim Kokryashkin, tarantool-patches

[-- Attachment #1: Type: text/plain, Size: 13628 bytes --]


Hi!
Thanks for the review!
 
  
>Четверг, 13 июля 2023, 11:12 +03:00 от Sergey Bronnikov <sergeyb@tarantool.org>:
> 
>Hi, Max!
>
>Thanks for the patch!
>
>
>Test is passed with reverted patch, so it is useless.
It is not useless. The issue is really hard to reproduce in a stable
way, because it is vital to land in the very small region of the
VM assembly code. I’ve tried to make it happen more often
by increasing the sampling frequency, however our CI is not
capable of handling intense cases like that. I’ve reduced
the frequency, so CI is ok now, but the issue is statistically less
likely to reproduce.
 
I’ve added the comment about that to the test,
so there won’t be any questions regarding that in the future.
>
>
>As I get it right there is a non-zero possibility to catch inconsistency
>in VM state in a future,
>
>so I propose to enable sysprof in our fuzzing test [1] with a high
>sampling interval and check that profiler
>
>will work on generated Lua programs . What do you think?
Sure, I welcome that kind of testing.
>
>
>Also see my comments inline.
>
>
>1.
>https://github.com/ligurio/tarantool/commit/b8bc3293da66ded41383faf5ea913a2554d987b8
>
>
>Sergey
>
>On 7/10/23 15:28, Maxim Kokryashkin wrote:
>> Sometimes, the Lua stack can be inconsistent during
>> the FFUNC execution, which may lead to a sysprof
>> crash during the stack unwinding.
>>
>> This patch replaces the `top_frame` property of `global_State`
>> with `lj_sysprof_topframe` structure, which contains `top_frame`
>> and `ffid` properties. `ffid` property makes sense only when the
>> LuaJIT VM state is set to `FFUNC`. That property is set to the
>> ffid of the fast function that VM is about to execute.
>> In the same time, `top_frame` property is not updated now, so
>> the top frame of the Lua stack can be streamed based on the ffid,
>> and the rest of the Lua stack can be streamed as usual.
>>
>> Also, this patch fixes build with plain makefile, by adding
>> the `LJ_HASSYSPROF` flag support to it.
>>
>> Resolves tarantool/tarantool#8594
>> ---
>> Changes in v3:
>> - Fixed comments as per review by Sergey
>>
>> Branch:  https://github.com/tarantool/luajit/tree/fckxorg/gh-8594-sysprof-ffunc-crash
>> PR:  https://github.com/tarantool/tarantool/pull/8737
>> src/Makefile.original | 3 ++
>> src/lj_obj.h | 7 +++-
>> src/lj_sysprof.c | 26 ++++++++++++---
>> src/vm_x64.dasc | 22 +++++++++++--
>> src/vm_x86.dasc | 31 ++++++++++++++---
>> .../gh-8594-sysprof-ffunc-crash.test.lua | 33 +++++++++++++++++++
>> 6 files changed, 109 insertions(+), 13 deletions(-)
>> create mode 100644 test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
>>
>> diff --git a/src/Makefile.original b/src/Makefile.original
>> index aedaaa73..e21a0e56 100644
>> --- a/src/Makefile.original
>> +++ b/src/Makefile.original
>> @@ -441,6 +441,9 @@ ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
>> DASM_AFLAGS+= -D NO_UNWIND
>> TARGET_ARCH+= -DLUAJIT_NO_UNWIND
>> endif
>> +ifneq (,$(findstring LJ_HASSYSPROF 1,$(TARGET_TESTARCH)))
>> + DASM_AFLAGS+= -D LJ_HASSYSPROF
>> +endif
>> DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
>> ifeq (Windows,$(TARGET_SYS))
>> DASM_AFLAGS+= -D WIN
>> diff --git a/src/lj_obj.h b/src/lj_obj.h
>> index 45507e0d..e17316df 100644
>> --- a/src/lj_obj.h
>> +++ b/src/lj_obj.h
>> @@ -598,6 +598,11 @@ enum {
>> GCSmax
>> };
>>
>> +struct lj_sysprof_topframe {
>> + uint8_t ffid; /* FFID of the fast function VM is about to execute. */
>> + TValue *top_frame; /* Top frame for sysprof. */
>> +};
>> +
>> typedef struct GCState {
>> GCSize total; /* Memory currently allocated. */
>> GCSize threshold; /* Memory threshold. */
>> @@ -675,7 +680,7 @@ typedef struct global_State {
>> MRef ctype_state; /* Pointer to C type state. */
>> GCRef gcroot[GCROOT_MAX]; /* GC roots. */
>> #ifdef LJ_HASSYSPROF
>> - TValue *top_frame; /* Top frame for sysprof. */
>> + struct lj_sysprof_topframe top_frame_info; /* Top frame info for sysprof. */
>> #endif
>> } global_State;
>>
>> diff --git a/src/lj_sysprof.c b/src/lj_sysprof.c
>> index 2e9ed9b3..0a341e16 100644
>> --- a/src/lj_sysprof.c
>> +++ b/src/lj_sysprof.c
>> @@ -109,6 +109,12 @@ static void stream_epilogue(struct sysprof *sp)
>> lj_wbuf_addbyte(&sp->out, LJP_EPILOGUE_BYTE);
>> }
>>
>> +static void stream_ffunc_impl(struct lj_wbuf *buf, uint8_t ffid)
>> +{
>> + lj_wbuf_addbyte(buf, LJP_FRAME_FFUNC);
>> + lj_wbuf_addu64(buf, ffid);
>> +}
>> +
>> static void stream_lfunc(struct lj_wbuf *buf, const GCfunc *func)
>> {
>> lua_assert(isluafunc(func));
>> @@ -129,8 +135,7 @@ static void stream_cfunc(struct lj_wbuf *buf, const GCfunc *func)
>> static void stream_ffunc(struct lj_wbuf *buf, const GCfunc *func)
>> {
>> lua_assert(isffunc(func));
>> - lj_wbuf_addbyte(buf, LJP_FRAME_FFUNC);
>> - lj_wbuf_addu64(buf, func->c.ffid);
>> + stream_ffunc_impl(buf, func->c.ffid);
>> }
>>
>> static void stream_frame_lua(struct lj_wbuf *buf, const cTValue *frame)
>> @@ -148,7 +153,7 @@ static void stream_frame_lua(struct lj_wbuf *buf, const cTValue *frame)
>> lua_assert(0);
>> }
>>
>> -static void stream_backtrace_lua(struct sysprof *sp)
>> +static void stream_backtrace_lua(struct sysprof *sp, uint32_t vmstate)
>> {
>> global_State *g = sp->g;
>> struct lj_wbuf *buf = &sp->out;
>> @@ -158,8 +163,19 @@ static void stream_backtrace_lua(struct sysprof *sp)
>> lua_assert(g != NULL);
>> L = gco2th(gcref(g->cur_L));
>> lua_assert(L != NULL);
>> + /*
>> + ** Lua stack may be inconsistent during the execution of a
>> + ** fast-function, so instead of updating the `top_frame` for
>> + ** it, its `ffid` is set instead. The first frame on the
>> + ** result stack is streamed manually, and the rest of the
>> + ** stack is streamed based on the previous `top_frame` value.
>> + */
>> + if (vmstate == LJ_VMST_FFUNC) {
>> + uint8_t ffid = g->top_frame_info.ffid;
>> + stream_ffunc_impl(buf, ffid);
>> + }
>>
>> - top_frame = g->top_frame - 1; //(1 + LJ_FR2)
>> + top_frame = g->top_frame_info.top_frame - 1;
>>
>> bot = tvref(L->stack) + LJ_FR2;
>> /* Traverse frames backwards */
>> @@ -234,7 +250,7 @@ static void stream_trace(struct sysprof *sp, uint32_t vmstate)
>> static void stream_guest(struct sysprof *sp, uint32_t vmstate)
>> {
>> lj_wbuf_addbyte(&sp->out, (uint8_t)vmstate);
>> - stream_backtrace_lua(sp);
>> + stream_backtrace_lua(sp, vmstate);
>> stream_backtrace_host(sp);
>> }
>>
>> diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
>> index 7b04b928..2a0c3f03 100644
>> --- a/src/vm_x64.dasc
>> +++ b/src/vm_x64.dasc
>> @@ -53,6 +53,7 @@
>> |.define RDL, RCL
>> |.define TMPR, r10
>> |.define TMPRd, r10d
>> +|.define TMPRb, r10b
>> |.define ITYPE, r11
>> |.define ITYPEd, r11d
>> |
>> @@ -353,14 +354,29 @@
>> |// it syncs with the BASE register only when the control is passed to
>> |// user code. So we need to sync the BASE on each vmstate change to
>> |// keep it consistent.
>> +|// The only exception are FFUNCs because sometimes even internal BASE
>> +|// stash is inconsistent for them. To address that issue, their ffid
>> +|// is stashed instead, so the corresponding frame can be streamed
>> +|// manually.
>> |.macro set_vmstate_sync_base, st
>> |.if LJ_HASSYSPROF
>> | set_vmstate INTERP // Guard for non-atomic VM context restoration
>> -| mov qword [DISPATCH+DISPATCH_GL(top_frame)], BASE
>> +| mov qword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], BASE
>> |.endif
>> | set_vmstate st
>> |.endmacro
>> |
>> +|.macro set_vmstate_ffunc
>> +|.if LJ_HASSYSPROF
>> +| set_vmstate INTERP
>> +| mov TMPR, [BASE - 16]
>> +| cleartp LFUNC:TMPR // Obtain plain address value. Equivalent of `gcval`.
>> +| mov TMPRb, LFUNC:TMPR->ffid
>> +| mov byte [DISPATCH+DISPATCH_GL(top_frame_info.ffid)], TMPRb
>> +|.endif
>> +| set_vmstate FFUNC
>> +|.endmacro
>> +|
>> |// Uses TMPRd (r10d).
>> |.macro save_vmstate
>> |.if not WIN
>> @@ -376,7 +392,7 @@
>> | set_vmstate INTERP
>> | mov TMPR, SAVE_L
>> | mov TMPR, L:TMPR->base
>> -| mov qword [DISPATCH+DISPATCH_GL(top_frame)], TMPR
>> +| mov qword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], TMPR
>> |.endif
>> | mov TMPRd, SAVE_VMSTATE
>> | mov dword [DISPATCH+DISPATCH_GL(vmstate)], TMPRd
>> @@ -1188,7 +1204,7 @@ static void build_subroutines(BuildCtx *ctx)
>> |
>> |.macro .ffunc, name
>> |->ff_ .. name:
>> - | set_vmstate_sync_base FFUNC
>> + | set_vmstate_ffunc
>> |.endmacro
>> |
>> |.macro .ffunc_1, name
>> diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
>> index bd1e940e..ff388d58 100644
>> --- a/src/vm_x86.dasc
>> +++ b/src/vm_x86.dasc
>> @@ -101,6 +101,7 @@
>> |.define FCARG2, CARG2d
>> |
>> |.define XCHGd, r11d // TMP on x64, used for exchange.
>> +|.define XCHGb, r11b // TMPRb on x64, used for exchange.
>> |.endif
>> |
>> |// Type definitions. Some of these are only used for documentation.
>> @@ -451,14 +452,36 @@
>> |// it syncs with the BASE register only when the control is passed to
>> |// user code. So we need to sync the BASE on each vmstate change to
>> |// keep it consistent.
>> +|// The only exception are FFUNCs because sometimes even internal BASE
>> +|// stash is inconsistent for them. To address that issue, their ffid
>> +|// is stashed instead, so the corresponding frame can be streamed
>> +|// manually.
>> |.macro set_vmstate_sync_base, st
>> |.if LJ_HASSYSPROF
>> | set_vmstate INTERP // Guard for non-atomic VM context restoration
>> -| mov dword [DISPATCH+DISPATCH_GL(top_frame)], BASE
>> +| mov dword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], BASE
>> |.endif
>> | set_vmstate st
>> |.endmacro
>> |
>> +|.macro set_vmstate_ffunc
>> +|.if LJ_HASSYSPROF
>> +| set_vmstate INTERP
>> +|.if not X64
>> +| mov SPILLECX, ecx
>> +| mov LFUNC:ecx, [BASE - 4]
>> +| mov cl, LFUNC:ecx->ffid
>> +| mov byte [DISPATCH+DISPATCH_GL(top_frame_info.ffid)], cl
>> +| mov ecx, SPILLECX
>> +|.else // X64
>> +| mov LFUNC:XCHGd, [BASE - 8]
>> +| mov XCHGb, LFUNC:XCHGd->ffid
>> +| mov byte [DISPATCH+DISPATCH_GL(top_frame_info.ffid)], XCHGb
>> +|.endif // X64
>> +|.endif // LJ_HASSYSPROF
>> +| set_vmstate FFUNC
>> +|.endmacro
>> +|
>> |// Uses spilled ecx on x86 or XCHGd (r11d) on x64.
>> |.macro save_vmstate
>> |.if not WIN
>> @@ -485,7 +508,7 @@
>> |.if LJ_HASSYSPROF
>> | mov ecx, SAVE_L
>> | mov ecx, L:ecx->base
>> -| mov dword [DISPATCH+DISPATCH_GL(top_frame)], ecx
>> +| mov dword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], ecx
>> |.endif
>> | mov ecx, SAVE_VMSTATE
>> | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ecx
>> @@ -494,7 +517,7 @@
>> |.if LJ_HASSYSPROF
>> | mov XCHGd, SAVE_L
>> | mov XCHGd, L:XCHGd->base
>> -| mov dword [DISPATCH+DISPATCH_GL(top_frame)], XCHGd
>> +| mov dword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], XCHGd
>> |.endif
>> | mov XCHGd, SAVE_VMSTATE
>> | mov dword [DISPATCH+DISPATCH_GL(vmstate)], XCHGd
>> @@ -1488,7 +1511,7 @@ static void build_subroutines(BuildCtx *ctx)
>> |
>> |.macro .ffunc, name
>> |->ff_ .. name:
>> - | set_vmstate_sync_base FFUNC
>> + | set_vmstate_ffunc
>> |.endmacro
>> |
>> |.macro .ffunc_1, name
>> diff --git a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
>> new file mode 100644
>> index 00000000..e5cdeb07
>> --- /dev/null
>> +++ b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
>> @@ -0,0 +1,33 @@
>> +local tap = require('tap')
>> +local test = tap.test('gh-8594-sysprof-ffunc-crash'):skipcond({
>> + ['Sysprof is implemented for x86_64 only'] = jit.arch ~= 'x86' and
>> + jit.arch ~= 'x64',
>> + ['Sysprof is implemented for Linux only'] = jit.os ~= 'Linux',
>> +})
>> +
>> +test:plan(1)
>> +
>> +jit.off()
>> +-- XXX: Run JIT tuning functions in a safe frame to avoid errors
>> +-- thrown when LuaJIT is compiled with JIT engine disabled.
>> +pcall(jit.flush)
>> +
>> +local TMP_BINFILE = '/dev/null'
>> +
>> +local res, err = misc.sysprof.start{
>> + mode = 'C',
>
>In the reproducer from bug description "L" mode is used.
>
>Why do you use "C" mode here?
>
>> + interval = 3,
>> + path = TMP_BINFILE,
>> +}
>> +assert(res, err)
>> +
>> +for i = 1, 1e5 do
>> + tostring(i)
>
>Please add a comment, that we use "tostring" here, because "tostring" is
>a fast function.
>
>(Sure it's may be obvious for you, but it will be more clear for those
>who will read it.)
Added.
 
Here is the diff:
================================================
diff --git a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
index e5cdeb07..a053e41c 100644
--- a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
+++ b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
@@ -14,6 +14,12 @@ pcall(jit.flush)
 
 local TMP_BINFILE = '/dev/null'
 
+-- XXX: The best way to test the issue is to set the profile
+-- interval to be as short as possible. However, our CI is
+-- not capable of handling such intense testing, so it was a
+-- forced decision to reduce the sampling frequency. As a
+-- result, it is now less likely to reproduce the issue
+-- statistically, but the test case is still valid.
 local res, err = misc.sysprof.start{
   mode = 'C',
   interval = 3,
@@ -22,6 +28,7 @@ local res, err = misc.sysprof.start{
 assert(res, err)
 
 for i = 1, 1e5 do
+  -- XXX: `tostring` is FFUNC.
   tostring(i)
 end
================================================
Branch is force-pushed.
 
>
>> +end
>> +
>> +res, err = misc.sysprof.stop()
>> +assert(res, err)
>> +
>> +test:ok(true, 'sysprof finished successfully')
>> +
>> +os.exit(test:check() and 0 or 1)
--
Best regards,
Maxim Kokryashkin
 

[-- Attachment #2: Type: text/html, Size: 17026 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit v3] sysprof: fix crash during FFUNC stream
  2023-07-10 12:28 Maxim Kokryashkin via Tarantool-patches
@ 2023-07-13  8:12 ` Sergey Bronnikov via Tarantool-patches
  2023-07-14 14:30   ` Maxim Kokryashkin via Tarantool-patches
  2023-07-15 15:36 ` Sergey Kaplun via Tarantool-patches
  2023-11-23  6:32 ` Igor Munkin via Tarantool-patches
  2 siblings, 1 reply; 11+ messages in thread
From: Sergey Bronnikov via Tarantool-patches @ 2023-07-13  8:12 UTC (permalink / raw)
  To: Maxim Kokryashkin, tarantool-patches, skaplun

Hi, Max!

Thanks for the patch!


Test is passed with reverted patch, so it is useless.


As I get it right there is a non-zero possibility to catch inconsistency 
in VM state in a future,

so I propose to enable sysprof in our fuzzing test [1] with a high 
sampling interval and check that profiler

will work on generated Lua programs . What do you think?


Also see my comments inline.


1. 
https://github.com/ligurio/tarantool/commit/b8bc3293da66ded41383faf5ea913a2554d987b8


Sergey

On 7/10/23 15:28, Maxim Kokryashkin wrote:
> Sometimes, the Lua stack can be inconsistent during
> the FFUNC execution, which may lead to a sysprof
> crash during the stack unwinding.
>
> This patch replaces the `top_frame` property of `global_State`
> with `lj_sysprof_topframe` structure, which contains `top_frame`
> and `ffid` properties. `ffid` property makes sense only when the
> LuaJIT VM state is set to `FFUNC`. That property is set to the
> ffid of the fast function that VM is about to execute.
> In the same time, `top_frame` property is not updated now, so
> the top frame of the Lua stack can be streamed based on the ffid,
> and the rest of the Lua stack can be streamed as usual.
>
> Also, this patch fixes build with plain makefile, by adding
> the `LJ_HASSYSPROF` flag support to it.
>
> Resolves tarantool/tarantool#8594
> ---
> Changes in v3:
> - Fixed comments as per review by Sergey
>
> Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-8594-sysprof-ffunc-crash
> PR: https://github.com/tarantool/tarantool/pull/8737
>   src/Makefile.original                         |  3 ++
>   src/lj_obj.h                                  |  7 +++-
>   src/lj_sysprof.c                              | 26 ++++++++++++---
>   src/vm_x64.dasc                               | 22 +++++++++++--
>   src/vm_x86.dasc                               | 31 ++++++++++++++---
>   .../gh-8594-sysprof-ffunc-crash.test.lua      | 33 +++++++++++++++++++
>   6 files changed, 109 insertions(+), 13 deletions(-)
>   create mode 100644 test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
>
> diff --git a/src/Makefile.original b/src/Makefile.original
> index aedaaa73..e21a0e56 100644
> --- a/src/Makefile.original
> +++ b/src/Makefile.original
> @@ -441,6 +441,9 @@ ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
>     DASM_AFLAGS+= -D NO_UNWIND
>     TARGET_ARCH+= -DLUAJIT_NO_UNWIND
>   endif
> +ifneq (,$(findstring LJ_HASSYSPROF 1,$(TARGET_TESTARCH)))
> +  DASM_AFLAGS+= -D LJ_HASSYSPROF
> +endif
>   DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
>   ifeq (Windows,$(TARGET_SYS))
>     DASM_AFLAGS+= -D WIN
> diff --git a/src/lj_obj.h b/src/lj_obj.h
> index 45507e0d..e17316df 100644
> --- a/src/lj_obj.h
> +++ b/src/lj_obj.h
> @@ -598,6 +598,11 @@ enum {
>     GCSmax
>   };
>   
> +struct lj_sysprof_topframe {
> +  uint8_t ffid; /* FFID of the fast function VM is about to execute. */
> +  TValue *top_frame; /* Top frame for sysprof. */
> +};
> +
>   typedef struct GCState {
>     GCSize total;		/* Memory currently allocated. */
>     GCSize threshold;	/* Memory threshold. */
> @@ -675,7 +680,7 @@ typedef struct global_State {
>     MRef ctype_state;	/* Pointer to C type state. */
>     GCRef gcroot[GCROOT_MAX];  /* GC roots. */
>   #ifdef LJ_HASSYSPROF
> -  TValue *top_frame;	/* Top frame for sysprof. */
> +  struct lj_sysprof_topframe top_frame_info;	/* Top frame info for sysprof. */
>   #endif
>   } global_State;
>   
> diff --git a/src/lj_sysprof.c b/src/lj_sysprof.c
> index 2e9ed9b3..0a341e16 100644
> --- a/src/lj_sysprof.c
> +++ b/src/lj_sysprof.c
> @@ -109,6 +109,12 @@ static void stream_epilogue(struct sysprof *sp)
>     lj_wbuf_addbyte(&sp->out, LJP_EPILOGUE_BYTE);
>   }
>   
> +static void stream_ffunc_impl(struct lj_wbuf *buf, uint8_t ffid)
> +{
> +  lj_wbuf_addbyte(buf, LJP_FRAME_FFUNC);
> +  lj_wbuf_addu64(buf, ffid);
> +}
> +
>   static void stream_lfunc(struct lj_wbuf *buf, const GCfunc *func)
>   {
>     lua_assert(isluafunc(func));
> @@ -129,8 +135,7 @@ static void stream_cfunc(struct lj_wbuf *buf, const GCfunc *func)
>   static void stream_ffunc(struct lj_wbuf *buf, const GCfunc *func)
>   {
>     lua_assert(isffunc(func));
> -  lj_wbuf_addbyte(buf, LJP_FRAME_FFUNC);
> -  lj_wbuf_addu64(buf, func->c.ffid);
> +  stream_ffunc_impl(buf, func->c.ffid);
>   }
>   
>   static void stream_frame_lua(struct lj_wbuf *buf, const cTValue *frame)
> @@ -148,7 +153,7 @@ static void stream_frame_lua(struct lj_wbuf *buf, const cTValue *frame)
>       lua_assert(0);
>   }
>   
> -static void stream_backtrace_lua(struct sysprof *sp)
> +static void stream_backtrace_lua(struct sysprof *sp, uint32_t vmstate)
>   {
>     global_State *g = sp->g;
>     struct lj_wbuf *buf = &sp->out;
> @@ -158,8 +163,19 @@ static void stream_backtrace_lua(struct sysprof *sp)
>     lua_assert(g != NULL);
>     L = gco2th(gcref(g->cur_L));
>     lua_assert(L != NULL);
> +  /*
> +  ** Lua stack may be inconsistent during the execution of a
> +  ** fast-function, so instead of updating the `top_frame` for
> +  ** it, its `ffid` is set instead. The first frame on the
> +  ** result stack is streamed manually, and the rest of the
> +  ** stack is streamed based on the previous `top_frame` value.
> +  */
> +  if (vmstate == LJ_VMST_FFUNC) {
> +    uint8_t ffid = g->top_frame_info.ffid;
> +    stream_ffunc_impl(buf, ffid);
> +  }
>   
> -  top_frame = g->top_frame - 1; //(1 + LJ_FR2)
> +  top_frame = g->top_frame_info.top_frame - 1;
>   
>     bot = tvref(L->stack) + LJ_FR2;
>     /* Traverse frames backwards */
> @@ -234,7 +250,7 @@ static void stream_trace(struct sysprof *sp, uint32_t vmstate)
>   static void stream_guest(struct sysprof *sp, uint32_t vmstate)
>   {
>     lj_wbuf_addbyte(&sp->out, (uint8_t)vmstate);
> -  stream_backtrace_lua(sp);
> +  stream_backtrace_lua(sp, vmstate);
>     stream_backtrace_host(sp);
>   }
>   
> diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
> index 7b04b928..2a0c3f03 100644
> --- a/src/vm_x64.dasc
> +++ b/src/vm_x64.dasc
> @@ -53,6 +53,7 @@
>   |.define RDL,		RCL
>   |.define TMPR,		r10
>   |.define TMPRd,		r10d
> +|.define TMPRb,		r10b
>   |.define ITYPE,		r11
>   |.define ITYPEd,	r11d
>   |
> @@ -353,14 +354,29 @@
>   |// it syncs with the BASE register only when the control is passed to
>   |// user code. So we need to sync the BASE on each vmstate change to
>   |// keep it consistent.
> +|// The only exception are FFUNCs because sometimes even internal BASE
> +|// stash is inconsistent for them. To address that issue, their ffid
> +|// is stashed instead, so the corresponding frame can be streamed
> +|// manually.
>   |.macro set_vmstate_sync_base, st
>   |.if LJ_HASSYSPROF
>   |  set_vmstate INTERP  // Guard for non-atomic VM context restoration
> -|  mov qword [DISPATCH+DISPATCH_GL(top_frame)], BASE
> +|  mov qword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], BASE
>   |.endif
>   |  set_vmstate st
>   |.endmacro
>   |
> +|.macro set_vmstate_ffunc
> +|.if LJ_HASSYSPROF
> +|  set_vmstate INTERP
> +|  mov TMPR, [BASE - 16]
> +|  cleartp LFUNC:TMPR // Obtain plain address value. Equivalent of `gcval`.
> +|  mov TMPRb, LFUNC:TMPR->ffid
> +|  mov byte [DISPATCH+DISPATCH_GL(top_frame_info.ffid)], TMPRb
> +|.endif
> +|  set_vmstate FFUNC
> +|.endmacro
> +|
>   |// Uses TMPRd (r10d).
>   |.macro save_vmstate
>   |.if not WIN
> @@ -376,7 +392,7 @@
>   |  set_vmstate INTERP
>   |  mov TMPR, SAVE_L
>   |  mov TMPR, L:TMPR->base
> -|  mov qword [DISPATCH+DISPATCH_GL(top_frame)], TMPR
> +|  mov qword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], TMPR
>   |.endif
>   |  mov TMPRd, SAVE_VMSTATE
>   |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], TMPRd
> @@ -1188,7 +1204,7 @@ static void build_subroutines(BuildCtx *ctx)
>     |
>     |.macro .ffunc, name
>     |->ff_ .. name:
> -  |  set_vmstate_sync_base FFUNC
> +  |  set_vmstate_ffunc
>     |.endmacro
>     |
>     |.macro .ffunc_1, name
> diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
> index bd1e940e..ff388d58 100644
> --- a/src/vm_x86.dasc
> +++ b/src/vm_x86.dasc
> @@ -101,6 +101,7 @@
>   |.define FCARG2,	CARG2d
>   |
>   |.define XCHGd,		r11d		// TMP on x64, used for exchange.
> +|.define XCHGb,		r11b		// TMPRb on x64, used for exchange.
>   |.endif
>   |
>   |// Type definitions. Some of these are only used for documentation.
> @@ -451,14 +452,36 @@
>   |// it syncs with the BASE register only when the control is passed to
>   |// user code. So we need to sync the BASE on each vmstate change to
>   |// keep it consistent.
> +|// The only exception are FFUNCs because sometimes even internal BASE
> +|// stash is inconsistent for them. To address that issue, their ffid
> +|// is stashed instead, so the corresponding frame can be streamed
> +|// manually.
>   |.macro set_vmstate_sync_base, st
>   |.if LJ_HASSYSPROF
>   |  set_vmstate INTERP  // Guard for non-atomic VM context restoration
> -|  mov dword [DISPATCH+DISPATCH_GL(top_frame)], BASE
> +|  mov dword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], BASE
>   |.endif
>   |  set_vmstate st
>   |.endmacro
>   |
> +|.macro set_vmstate_ffunc
> +|.if LJ_HASSYSPROF
> +|  set_vmstate INTERP
> +|.if not X64
> +|  mov SPILLECX, ecx
> +|  mov LFUNC:ecx, [BASE - 4]
> +|  mov cl, LFUNC:ecx->ffid
> +|  mov byte [DISPATCH+DISPATCH_GL(top_frame_info.ffid)], cl
> +|  mov ecx, SPILLECX
> +|.else // X64
> +|  mov LFUNC:XCHGd, [BASE - 8]
> +|  mov XCHGb, LFUNC:XCHGd->ffid
> +|  mov byte [DISPATCH+DISPATCH_GL(top_frame_info.ffid)], XCHGb
> +|.endif // X64
> +|.endif // LJ_HASSYSPROF
> +|  set_vmstate FFUNC
> +|.endmacro
> +|
>   |// Uses spilled ecx on x86 or XCHGd (r11d) on x64.
>   |.macro save_vmstate
>   |.if not WIN
> @@ -485,7 +508,7 @@
>   |.if LJ_HASSYSPROF
>   |  mov ecx, SAVE_L
>   |  mov ecx, L:ecx->base
> -|  mov dword [DISPATCH+DISPATCH_GL(top_frame)], ecx
> +|  mov dword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], ecx
>   |.endif
>   |  mov ecx, SAVE_VMSTATE
>   |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ecx
> @@ -494,7 +517,7 @@
>   |.if LJ_HASSYSPROF
>   |  mov XCHGd, SAVE_L
>   |  mov XCHGd, L:XCHGd->base
> -|  mov dword [DISPATCH+DISPATCH_GL(top_frame)], XCHGd
> +|  mov dword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], XCHGd
>   |.endif
>   |  mov XCHGd, SAVE_VMSTATE
>   |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], XCHGd
> @@ -1488,7 +1511,7 @@ static void build_subroutines(BuildCtx *ctx)
>     |
>     |.macro .ffunc, name
>     |->ff_ .. name:
> -  |  set_vmstate_sync_base FFUNC
> +  |  set_vmstate_ffunc
>     |.endmacro
>     |
>     |.macro .ffunc_1, name
> diff --git a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
> new file mode 100644
> index 00000000..e5cdeb07
> --- /dev/null
> +++ b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
> @@ -0,0 +1,33 @@
> +local tap = require('tap')
> +local test = tap.test('gh-8594-sysprof-ffunc-crash'):skipcond({
> +  ['Sysprof is implemented for x86_64 only'] = jit.arch ~= 'x86' and
> +                                               jit.arch ~= 'x64',
> +  ['Sysprof is implemented for Linux only'] = jit.os ~= 'Linux',
> +})
> +
> +test:plan(1)
> +
> +jit.off()
> +-- XXX: Run JIT tuning functions in a safe frame to avoid errors
> +-- thrown when LuaJIT is compiled with JIT engine disabled.
> +pcall(jit.flush)
> +
> +local TMP_BINFILE = '/dev/null'
> +
> +local res, err = misc.sysprof.start{
> +  mode = 'C',

In the reproducer from bug description "L" mode is used.

Why do you use "C" mode here?

> +  interval = 3,
> +  path = TMP_BINFILE,
> +}
> +assert(res, err)
> +
> +for i = 1, 1e5 do
> +  tostring(i)

Please add a comment, that we use "tostring" here, because "tostring" is 
a fast function.

(Sure it's may be obvious for you, but it will be more clear for those 
who will read it.)

> +end
> +
> +res, err = misc.sysprof.stop()
> +assert(res, err)
> +
> +test:ok(true, 'sysprof finished successfully')
> +
> +os.exit(test:check() and 0 or 1)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Tarantool-patches] [PATCH luajit v3] sysprof: fix crash during FFUNC stream
@ 2023-07-10 12:28 Maxim Kokryashkin via Tarantool-patches
  2023-07-13  8:12 ` Sergey Bronnikov via Tarantool-patches
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2023-07-10 12:28 UTC (permalink / raw)
  To: tarantool-patches, skaplun, sergeyb

Sometimes, the Lua stack can be inconsistent during
the FFUNC execution, which may lead to a sysprof
crash during the stack unwinding.

This patch replaces the `top_frame` property of `global_State`
with `lj_sysprof_topframe` structure, which contains `top_frame`
and `ffid` properties. `ffid` property makes sense only when the
LuaJIT VM state is set to `FFUNC`. That property is set to the
ffid of the fast function that VM is about to execute.
In the same time, `top_frame` property is not updated now, so
the top frame of the Lua stack can be streamed based on the ffid,
and the rest of the Lua stack can be streamed as usual.

Also, this patch fixes build with plain makefile, by adding
the `LJ_HASSYSPROF` flag support to it.

Resolves tarantool/tarantool#8594
---
Changes in v3:
- Fixed comments as per review by Sergey

Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-8594-sysprof-ffunc-crash
PR: https://github.com/tarantool/tarantool/pull/8737
 src/Makefile.original                         |  3 ++
 src/lj_obj.h                                  |  7 +++-
 src/lj_sysprof.c                              | 26 ++++++++++++---
 src/vm_x64.dasc                               | 22 +++++++++++--
 src/vm_x86.dasc                               | 31 ++++++++++++++---
 .../gh-8594-sysprof-ffunc-crash.test.lua      | 33 +++++++++++++++++++
 6 files changed, 109 insertions(+), 13 deletions(-)
 create mode 100644 test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua

diff --git a/src/Makefile.original b/src/Makefile.original
index aedaaa73..e21a0e56 100644
--- a/src/Makefile.original
+++ b/src/Makefile.original
@@ -441,6 +441,9 @@ ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
   DASM_AFLAGS+= -D NO_UNWIND
   TARGET_ARCH+= -DLUAJIT_NO_UNWIND
 endif
+ifneq (,$(findstring LJ_HASSYSPROF 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D LJ_HASSYSPROF
+endif
 DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
 ifeq (Windows,$(TARGET_SYS))
   DASM_AFLAGS+= -D WIN
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 45507e0d..e17316df 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -598,6 +598,11 @@ enum {
   GCSmax
 };
 
+struct lj_sysprof_topframe {
+  uint8_t ffid; /* FFID of the fast function VM is about to execute. */
+  TValue *top_frame; /* Top frame for sysprof. */
+};
+
 typedef struct GCState {
   GCSize total;		/* Memory currently allocated. */
   GCSize threshold;	/* Memory threshold. */
@@ -675,7 +680,7 @@ typedef struct global_State {
   MRef ctype_state;	/* Pointer to C type state. */
   GCRef gcroot[GCROOT_MAX];  /* GC roots. */
 #ifdef LJ_HASSYSPROF
-  TValue *top_frame;	/* Top frame for sysprof. */
+  struct lj_sysprof_topframe top_frame_info;	/* Top frame info for sysprof. */
 #endif
 } global_State;
 
diff --git a/src/lj_sysprof.c b/src/lj_sysprof.c
index 2e9ed9b3..0a341e16 100644
--- a/src/lj_sysprof.c
+++ b/src/lj_sysprof.c
@@ -109,6 +109,12 @@ static void stream_epilogue(struct sysprof *sp)
   lj_wbuf_addbyte(&sp->out, LJP_EPILOGUE_BYTE);
 }
 
+static void stream_ffunc_impl(struct lj_wbuf *buf, uint8_t ffid)
+{
+  lj_wbuf_addbyte(buf, LJP_FRAME_FFUNC);
+  lj_wbuf_addu64(buf, ffid);
+}
+
 static void stream_lfunc(struct lj_wbuf *buf, const GCfunc *func)
 {
   lua_assert(isluafunc(func));
@@ -129,8 +135,7 @@ static void stream_cfunc(struct lj_wbuf *buf, const GCfunc *func)
 static void stream_ffunc(struct lj_wbuf *buf, const GCfunc *func)
 {
   lua_assert(isffunc(func));
-  lj_wbuf_addbyte(buf, LJP_FRAME_FFUNC);
-  lj_wbuf_addu64(buf, func->c.ffid);
+  stream_ffunc_impl(buf, func->c.ffid);
 }
 
 static void stream_frame_lua(struct lj_wbuf *buf, const cTValue *frame)
@@ -148,7 +153,7 @@ static void stream_frame_lua(struct lj_wbuf *buf, const cTValue *frame)
     lua_assert(0);
 }
 
-static void stream_backtrace_lua(struct sysprof *sp)
+static void stream_backtrace_lua(struct sysprof *sp, uint32_t vmstate)
 {
   global_State *g = sp->g;
   struct lj_wbuf *buf = &sp->out;
@@ -158,8 +163,19 @@ static void stream_backtrace_lua(struct sysprof *sp)
   lua_assert(g != NULL);
   L = gco2th(gcref(g->cur_L));
   lua_assert(L != NULL);
+  /*
+  ** Lua stack may be inconsistent during the execution of a
+  ** fast-function, so instead of updating the `top_frame` for
+  ** it, its `ffid` is set instead. The first frame on the
+  ** result stack is streamed manually, and the rest of the
+  ** stack is streamed based on the previous `top_frame` value.
+  */
+  if (vmstate == LJ_VMST_FFUNC) {
+    uint8_t ffid = g->top_frame_info.ffid;
+    stream_ffunc_impl(buf, ffid);
+  }
 
-  top_frame = g->top_frame - 1; //(1 + LJ_FR2)
+  top_frame = g->top_frame_info.top_frame - 1;
 
   bot = tvref(L->stack) + LJ_FR2;
   /* Traverse frames backwards */
@@ -234,7 +250,7 @@ static void stream_trace(struct sysprof *sp, uint32_t vmstate)
 static void stream_guest(struct sysprof *sp, uint32_t vmstate)
 {
   lj_wbuf_addbyte(&sp->out, (uint8_t)vmstate);
-  stream_backtrace_lua(sp);
+  stream_backtrace_lua(sp, vmstate);
   stream_backtrace_host(sp);
 }
 
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 7b04b928..2a0c3f03 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -53,6 +53,7 @@
 |.define RDL,		RCL
 |.define TMPR,		r10
 |.define TMPRd,		r10d
+|.define TMPRb,		r10b
 |.define ITYPE,		r11
 |.define ITYPEd,	r11d
 |
@@ -353,14 +354,29 @@
 |// it syncs with the BASE register only when the control is passed to
 |// user code. So we need to sync the BASE on each vmstate change to
 |// keep it consistent.
+|// The only exception are FFUNCs because sometimes even internal BASE
+|// stash is inconsistent for them. To address that issue, their ffid
+|// is stashed instead, so the corresponding frame can be streamed
+|// manually.
 |.macro set_vmstate_sync_base, st
 |.if LJ_HASSYSPROF
 |  set_vmstate INTERP  // Guard for non-atomic VM context restoration
-|  mov qword [DISPATCH+DISPATCH_GL(top_frame)], BASE
+|  mov qword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], BASE
 |.endif
 |  set_vmstate st
 |.endmacro
 |
+|.macro set_vmstate_ffunc
+|.if LJ_HASSYSPROF
+|  set_vmstate INTERP
+|  mov TMPR, [BASE - 16]
+|  cleartp LFUNC:TMPR // Obtain plain address value. Equivalent of `gcval`.
+|  mov TMPRb, LFUNC:TMPR->ffid
+|  mov byte [DISPATCH+DISPATCH_GL(top_frame_info.ffid)], TMPRb
+|.endif
+|  set_vmstate FFUNC
+|.endmacro
+|
 |// Uses TMPRd (r10d).
 |.macro save_vmstate
 |.if not WIN
@@ -376,7 +392,7 @@
 |  set_vmstate INTERP
 |  mov TMPR, SAVE_L
 |  mov TMPR, L:TMPR->base
-|  mov qword [DISPATCH+DISPATCH_GL(top_frame)], TMPR
+|  mov qword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], TMPR
 |.endif
 |  mov TMPRd, SAVE_VMSTATE
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], TMPRd
@@ -1188,7 +1204,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
-  |  set_vmstate_sync_base FFUNC
+  |  set_vmstate_ffunc
   |.endmacro
   |
   |.macro .ffunc_1, name
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index bd1e940e..ff388d58 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -101,6 +101,7 @@
 |.define FCARG2,	CARG2d
 |
 |.define XCHGd,		r11d		// TMP on x64, used for exchange.
+|.define XCHGb,		r11b		// TMPRb on x64, used for exchange.
 |.endif
 |
 |// Type definitions. Some of these are only used for documentation.
@@ -451,14 +452,36 @@
 |// it syncs with the BASE register only when the control is passed to
 |// user code. So we need to sync the BASE on each vmstate change to
 |// keep it consistent.
+|// The only exception are FFUNCs because sometimes even internal BASE
+|// stash is inconsistent for them. To address that issue, their ffid
+|// is stashed instead, so the corresponding frame can be streamed
+|// manually.
 |.macro set_vmstate_sync_base, st
 |.if LJ_HASSYSPROF
 |  set_vmstate INTERP  // Guard for non-atomic VM context restoration
-|  mov dword [DISPATCH+DISPATCH_GL(top_frame)], BASE
+|  mov dword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], BASE
 |.endif
 |  set_vmstate st
 |.endmacro
 |
+|.macro set_vmstate_ffunc
+|.if LJ_HASSYSPROF
+|  set_vmstate INTERP
+|.if not X64
+|  mov SPILLECX, ecx
+|  mov LFUNC:ecx, [BASE - 4]
+|  mov cl, LFUNC:ecx->ffid
+|  mov byte [DISPATCH+DISPATCH_GL(top_frame_info.ffid)], cl
+|  mov ecx, SPILLECX
+|.else // X64
+|  mov LFUNC:XCHGd, [BASE - 8]
+|  mov XCHGb, LFUNC:XCHGd->ffid
+|  mov byte [DISPATCH+DISPATCH_GL(top_frame_info.ffid)], XCHGb
+|.endif // X64
+|.endif // LJ_HASSYSPROF
+|  set_vmstate FFUNC
+|.endmacro
+|
 |// Uses spilled ecx on x86 or XCHGd (r11d) on x64.
 |.macro save_vmstate
 |.if not WIN
@@ -485,7 +508,7 @@
 |.if LJ_HASSYSPROF
 |  mov ecx, SAVE_L
 |  mov ecx, L:ecx->base
-|  mov dword [DISPATCH+DISPATCH_GL(top_frame)], ecx
+|  mov dword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], ecx
 |.endif
 |  mov ecx, SAVE_VMSTATE
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ecx
@@ -494,7 +517,7 @@
 |.if LJ_HASSYSPROF
 |  mov XCHGd, SAVE_L
 |  mov XCHGd, L:XCHGd->base
-|  mov dword [DISPATCH+DISPATCH_GL(top_frame)], XCHGd
+|  mov dword [DISPATCH+DISPATCH_GL(top_frame_info.top_frame)], XCHGd
 |.endif
 |  mov XCHGd, SAVE_VMSTATE
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], XCHGd
@@ -1488,7 +1511,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
-  |  set_vmstate_sync_base FFUNC
+  |  set_vmstate_ffunc
   |.endmacro
   |
   |.macro .ffunc_1, name
diff --git a/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
new file mode 100644
index 00000000..e5cdeb07
--- /dev/null
+++ b/test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
@@ -0,0 +1,33 @@
+local tap = require('tap')
+local test = tap.test('gh-8594-sysprof-ffunc-crash'):skipcond({
+  ['Sysprof is implemented for x86_64 only'] = jit.arch ~= 'x86' and
+                                               jit.arch ~= 'x64',
+  ['Sysprof is implemented for Linux only'] = jit.os ~= 'Linux',
+})
+
+test:plan(1)
+
+jit.off()
+-- XXX: Run JIT tuning functions in a safe frame to avoid errors
+-- thrown when LuaJIT is compiled with JIT engine disabled.
+pcall(jit.flush)
+
+local TMP_BINFILE = '/dev/null'
+
+local res, err = misc.sysprof.start{
+  mode = 'C',
+  interval = 3,
+  path = TMP_BINFILE,
+}
+assert(res, err)
+
+for i = 1, 1e5 do
+  tostring(i)
+end
+
+res, err = misc.sysprof.stop()
+assert(res, err)
+
+test:ok(true, 'sysprof finished successfully')
+
+os.exit(test:check() and 0 or 1)
-- 
2.40.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2023-11-23  6:40 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-05  8:56 [Tarantool-patches] [PATCH luajit v3] sysprof: fix crash during FFUNC stream Maxim Kokryashkin via Tarantool-patches
2023-07-09 12:21 ` Sergey Kaplun via Tarantool-patches
2023-07-10 12:28 Maxim Kokryashkin via Tarantool-patches
2023-07-13  8:12 ` Sergey Bronnikov via Tarantool-patches
2023-07-14 14:30   ` Maxim Kokryashkin via Tarantool-patches
2023-11-08 15:47     ` Sergey Bronnikov via Tarantool-patches
2023-11-08 23:01       ` Maxim Kokryashkin via Tarantool-patches
2023-11-09  7:06         ` Sergey Bronnikov via Tarantool-patches
2023-07-15 15:36 ` Sergey Kaplun via Tarantool-patches
2023-07-17 12:52   ` Maxim Kokryashkin via Tarantool-patches
2023-11-23  6:32 ` Igor Munkin via Tarantool-patches

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox