[Tarantool-patches] [PATCH luajit 2/2] test: rewrite sysprof test using managed execution

Igor Munkin imun at tarantool.org
Tue Dec 5 15:04:57 MSK 2023


Sergey,

Thanks for your review!

On 05.12.23, Sergey Kaplun wrote:
> Hi, Igor!
> Thanks for the patch!
> Really awesome improvement for our sysprof testing!
> Please consider my comments below.
> 
> On 03.12.23, Maxim Kokryashkin wrote:

<snipped>

> > > ---
> > >  .../gh-8594-sysprof-ffunc-crash.test.c        | 269 ++++++++++++++++++
> > >  .../gh-8594-sysprof-ffunc-crash.test.lua      |  55 ----
> > >  2 files changed, 269 insertions(+), 55 deletions(-)
> > >  create mode 100644 test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
> > >  delete mode 100644 test/tarantool-tests/gh-8594-sysprof-ffunc-crash.test.lua
> > >
> > > diff --git a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
> > > new file mode 100644
> > > index 00000000..4caed8cb
> > > --- /dev/null
> > > +++ b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
> > > @@ -0,0 +1,269 @@

<snipped>

> > > +
> > > +#if LUAJIT_OS == LUAJIT_OS_LINUX && LUAJIT_TARGET == LUAJIT_ARCH_X64
> > Why not use the `LJ_HASSYSPROF` flag?
> 
> I agree here. `LJ_HASSYSPROF` is more foolproof -- if we will port
> sysprof for arm64 (for example) we will not forget to update this test.

See my arguments for this in my previous reply.

> 

<snipped>

> 
> Minor: This link may be useful too:
> https://c9x.me/x86/html/file_module_x86_id_142.html

Thanks, added:

================================================================================

diff --git a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
index d17386dd..d9d2e481 100644
--- a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
+++ b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
@@ -41,6 +41,7 @@
  *
  * See more info here:
  * * https://man7.org/linux/man-pages/man2/ptrace.2.html
+ * * https://c9x.me/x86/html/file_module_x86_id_142.html
  * * https://github.com/tarantool/tarantool/issues/8594
  * * https://github.com/tarantool/tarantool/issues/9387
  */

================================================================================

> 
> > > + */
> > > +
> > > +#define MESSAGE "Canary is alive"
> > > +#define LUACALL "local a = tostring('" MESSAGE "') return a"
> 
> Nit: I suggest adding an excess empty line here to separate extern asm functions
> from Lua code defines.

OK, added.

> 
> > > +/* XXX: Resolve the necessary addresses from VM engine. */
> > > +extern void *lj_ff_tostring(void);
> > > +extern void *lj_fff_res1(void);
> > > +
> > > +/* Sysprof "/dev/null" stream helpers. {{{ */
> > > +
> > > +/*
> > > + * Yep, 8Mb. Tuned in order not to bother the platform with too
> > > + * often flushes.
> > > + */
> 
> This comment is misleading since there is no any flushes in the writer
> function.

Well, it's not about I/O flushes, but rather about sysprof write-buffer
pressure, isn't it? Hence, 8Mb is needed to make the test omit almost
all buffer-flush related callbacks.

> Also, do we need such big buffer? Maybe `PATH_MAX` * 2 (or something
> like that) should be enough?

Could you elaborate the reason for such a strange constant, please?

> 
> > > +#define STREAM_BUFFER_SIZE (8 * 1024 * 1024)
> > > +#define DEVNULL -1
> 
> `DEVNULL` name is misleading (like we've actually opened `/dev/null`).
> Maybe `DUMMY_FD` is better? Also, I suppose we can drop the `fd` field
> at all. We can just use static structure in this translation unit and
> compare pointer against it, so there is no need for an additional field
> fd in the structure itself.

I see little reasoning in this suggestion: the approach with the
structure is quite straight-forward and copied from another sysprof test
implemented in C. Re naming: I prefer to leave it as is.

In general, your nit is nice, but I believe it should be applied
for the patch moving <ptrace> tricks to utils library.

> 
> > > +

<snipped>

> > > +
> > > +static int stream_new(struct luam_Sysprof_Options *options) {
> 
> Please, use separate line for block start `{`.
> Here and below.

OK, fixed everywhere in test.

================================================================================

diff --git a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
index 942787a4..884c62cc 100644
--- a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
+++ b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
@@ -72,7 +72,8 @@ struct devnull_ctx {
 	uint8_t buf[STREAM_BUFFER_SIZE];
 };
 
-static int stream_new(struct luam_Sysprof_Options *options) {
+static int stream_new(struct luam_Sysprof_Options *options)
+{
 	struct devnull_ctx *ctx = calloc(1, sizeof(struct devnull_ctx));
 	if (ctx == NULL)
 		return PROFILE_ERRIO;
@@ -86,14 +87,16 @@ static int stream_new(struct luam_Sysprof_Options *options) {
 	return PROFILE_SUCCESS;
 }
 
-static int stream_delete(void *rawctx, uint8_t *buf) {
+static int stream_delete(void *rawctx, uint8_t *buf)
+{
 	struct devnull_ctx *ctx = rawctx;
 	assert(ctx->fd == DEVNULL);
 	free(ctx);
 	return PROFILE_SUCCESS;
 }
 
-static size_t stream_writer(const void **buf_addr, size_t len, void *rawctx) {
+static size_t stream_writer(const void **buf_addr, size_t len, void *rawctx)
+{
 	struct devnull_ctx *ctx = rawctx;
 	assert(ctx->fd == DEVNULL);
 	/* Do nothing, just return back to the profiler. */
@@ -102,7 +105,8 @@ static size_t stream_writer(const void **buf_addr, size_t len, void *rawctx) {
 
 /* }}} Sysprof "/dev/null" stream helpers. */
 
-static int tracee(const char *luacode) {
+static int tracee(const char *luacode)
+{
 	struct luam_Sysprof_Counters counters = {};
 	struct luam_Sysprof_Options opt = {
 		/* Collect full backtraces per event. */
@@ -162,7 +166,8 @@ static int tracee(const char *luacode) {
 	return EXIT_SUCCESS;
 }
 
-static void wait_alive(pid_t chpid) {
+static void wait_alive(pid_t chpid)
+{
 	int wstatus;
 
 	/* Wait <chpid> tracee signal-delivery-stop. */
@@ -175,13 +180,15 @@ static void wait_alive(pid_t chpid) {
 }
 
 const uint8_t INT3 = 0xCC;
-static inline unsigned long int3poison(unsigned long instruction) {
+static inline unsigned long int3poison(unsigned long instruction)
+{
 	const size_t int3bits = sizeof(INT3) * 8;
 	const unsigned long int3mask = -1UL >> int3bits << int3bits;
 	return (instruction & int3mask) | INT3;
 }
 
-static void continue_until(pid_t chpid, void *addr) {
+static void continue_until(pid_t chpid, void *addr)
+{
 	struct user_regs_struct regs;
 
 	/* Obtain the instructions at the <addr>. */
@@ -220,7 +227,8 @@ static void continue_until(pid_t chpid, void *addr) {
 	ptrace(PTRACE_POKETEXT, chpid, addr, data);
 }
 
-static int tracer(pid_t chpid) {
+static int tracer(pid_t chpid)
+{
 	int wstatus;
 
 	/* Wait until <chpid> tracee is ready. */
@@ -250,7 +258,8 @@ static int tracer(pid_t chpid) {
 	return TEST_EXIT_SUCCESS;
 }
 
-static int test_tostring_call(void *ctx) {
+static int test_tostring_call(void *ctx)
+{
 	pid_t chpid = fork();
 	switch(chpid) {
 	case -1:
@@ -271,13 +280,15 @@ static int test_tostring_call(void *ctx) {
 
 #else /* LUAJIT_OS == LUAJIT_OS_LINUX && LUAJIT_TARGET == LUAJIT_ARCH_X64 */
 
-static int test_tostring_call(void *ctx) {
+static int test_tostring_call(void *ctx)
+{
 	return skip_all("sysprof is implemented for Linux/x86_64 only");
 }
 
 #endif /* LUAJIT_OS == LUAJIT_OS_LINUX && LUAJIT_TARGET == LUAJIT_ARCH_X64 */
 
-int main(void) {
+int main(void)
+{
 	const struct test_unit tgroup[] = {
 		test_unit_def(test_tostring_call),
 	};

================================================================================

> 

<snipped>

> > > +static int tracee(const char *luacode) {
> > > +	struct luam_Sysprof_Counters counters = {};
> > > +	struct luam_Sysprof_Options opt = {
> > > +		/* Collect full backtraces per event. */
> > > +		.mode = LUAM_SYSPROF_CALLGRAPH,
> > > +		/*
> > > +		 * XXX: Setting the "endless timer". The test
> > > +		 * requires the single event to be streamed at
> > > +		 * <lj_fff_res1> instruction, so to avoid spoiling
> > > +		 * the stream with other unwanted events, the
> > > +		 * timer is set to some unreachable point, so the
> > > +		 * profiler will be guaranteed to stop before any
> > > +		 * event is emitted.
> > > +		 */
> > > +		.interval = -1ULL,
> > > +	};
> > > +
> > > +	/* Allow tracing for this process */
> 
> Nit: Missed dot at the end of the sentence.

Fixed.

================================================================================

diff --git a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
index 884c62cc..40926a50 100644
--- a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
+++ b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
@@ -123,7 +123,7 @@ static int tracee(const char *luacode)
 		.interval = -1ULL,
 	};
 
-	/* Allow tracing for this process */
+	/* Allow tracing for this process. */
 	if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
 		perror("Failed to turn the calling process into a tracee");
 		return EXIT_FAILURE;

================================================================================

> 
> > > +	if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
> 
> Nit: It's more readable to use `ptrace(PTRACE_TRACEME, 0, NULL, NULL)`
> to match arguments types.

OK, as you wish.

================================================================================

diff --git a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
index 40926a50..6e5a9255 100644
--- a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
+++ b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
@@ -124,7 +124,7 @@ static int tracee(const char *luacode)
 	};
 
 	/* Allow tracing for this process. */
-	if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
+	if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) < 0) {
 		perror("Failed to turn the calling process into a tracee");
 		return EXIT_FAILURE;
 	}

================================================================================

> 
> > > +		perror("Failed to turn the calling thread into a tracee");
> > Typo?: s/thread/process/
> 
> Also, it's better to use `test_comment()` format here (i.e., with "#")
> to be TAP-compatible.

No, it's not. Tracee output is not TAP compatible, since the tracer is
the only TAP output emitter.

> 
> > > +		return EXIT_FAILURE;
> > > +	}
> > > +
> > > +	/*
> > > +	 * XXX: Allow parent (which is our tracer now) to observe
> > > +	 * our signal-delivery-stop (i.e. the tracee is ready).
> > > +	 * For more info see ptrace(2), "Attaching and detaching".
> > > +	 */
> > > +	raise(SIGSTOP);
> > > +
> > > +	lua_State *L = utils_lua_init();
> > > +
> > > +	/* Customize and start profiler. */
> > > +	assert(stream_new(&opt) == PROFILE_SUCCESS);
> > > +	assert(luaM_sysprof_set_writer(stream_writer) == PROFILE_SUCCESS);
> > > +	assert(luaM_sysprof_set_on_stop(stream_delete) == PROFILE_SUCCESS);
> > > +	assert(luaM_sysprof_start(L, &opt) == PROFILE_SUCCESS);
> > > +
> > > +	/* FIXME: Make this part test agnostic. */
> 
> Typo: s/test agnostic/test-agnostic/
> 
> Nit: TODO looks more valid for me (since it's working).
> Feel free to ignore.

Fixed.

================================================================================

diff --git a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
index 6e5a9255..44421812 100644
--- a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
+++ b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
@@ -144,7 +144,7 @@ static int tracee(const char *luacode)
 	assert(luaM_sysprof_set_on_stop(stream_delete) == PROFILE_SUCCESS);
 	assert(luaM_sysprof_start(L, &opt) == PROFILE_SUCCESS);
 
-	/* FIXME: Make this part test agnostic. */
+	/* TODO: Make this part test-agnostic. */
 	assert(luaL_dostring(L, luacode) == LUA_OK);
 	assert(strcmp(lua_tostring(L, -1), MESSAGE) == 0);
 

================================================================================

> 
> > > +	assert(luaL_dostring(L, luacode) == LUA_OK);
> > > +	assert(strcmp(lua_tostring(L, -1), MESSAGE) == 0);
> > > +
> > > +	/* Terminate profiler and Lua universe. */
> > > +	assert(luaM_sysprof_stop(L) == PROFILE_SUCCESS);
> > > +	utils_lua_close(L);
> > It doesn't seem to be semantically correct to terminate Lua universe
> > before obtaining the counters, even though there is no obstacles to do
> > so in the API implementation itself.
> > > +
> > > +	/*
> > > +	 * XXX: The only event to be streamed must be FFUNC at
> > > +	 * <lj_fff_res1> instruction.
> 
> Typo: s/<lj_fff_res1>/the <lj_fff_res1>/
> 
> > > +	 * FIXME: Make this part test agnostic.
> 
> Typo: s/test agnostic/test-agnostic/
> 
> Nit: TODO looks more valid for me (since it's working).
> Feel free to ignore.

Fixed.

================================================================================

diff --git a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
index 44421812..036f8141 100644
--- a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
+++ b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
@@ -153,8 +153,8 @@ static int tracee(const char *luacode)
 
 	/*
 	 * XXX: The only event to be streamed must be FFUNC at
-	 * <lj_fff_res1> instruction.
-	 * FIXME: Make this part test agnostic.
+	 * the <lj_fff_res1> instruction.
+	 * TODO: Make this part test agnostic.
 	 */
 	assert(luaM_sysprof_report(&counters) == PROFILE_SUCCESS);
 	assert(counters.samples == 1);

================================================================================

> 

<snipped>

> > > +const uint8_t INT3 = 0xCC;
> 
> Maybe it should be moved up to other constants and defines?

I'd rather prefer to leave it here to localize the scope of its usage.

> 
> > > +static inline unsigned long int3poison(unsigned long instruction) {
> > > +	const size_t int3bits = sizeof(INT3) * 8;
> > > +	const unsigned long int3mask = -1UL >> int3bits << int3bits;
> 
> Minor: It's better to use parentesises here.

I just want to do tuda-suda to nil eight LSBs. Since operations has the
same prio, it will look like (1 + 2) + 3. I see little enhancements, so
ignoring.

> 
> > > +	return (instruction & int3mask) | INT3;
> > > +}
> > > +
> > > +static int continue_until(pid_t chpid, void *addr) {
> > > +	int wstatus;
> > > +	struct user_regs_struct regs;
> > > +
> > > +	/* Obtain the instructions at the <addr>. */
> > > +	unsigned long data = ptrace(PTRACE_PEEKTEXT, chpid, addr, 0);
> 
> Nit: s/0/NULL/.
> Here and below.

Yes, honey :(

================================================================================

diff --git a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
index 036f8141..6bf7562f 100644
--- a/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
+++ b/test/tarantool-c-tests/gh-8594-sysprof-ffunc-crash.test.c
@@ -192,7 +192,7 @@ static void continue_until(pid_t chpid, void *addr)
 	struct user_regs_struct regs;
 
 	/* Obtain the instructions at the <addr>. */
-	unsigned long data = ptrace(PTRACE_PEEKTEXT, chpid, addr, 0);
+	unsigned long data = ptrace(PTRACE_PEEKTEXT, chpid, addr, NULL);
 	/*
 	 * Emit the <int 3> instruction to the <addr>.
 	 * XXX: <int 3> is poisoned as the LSB to the <data>
@@ -201,7 +201,7 @@ static void continue_until(pid_t chpid, void *addr)
 	ptrace(PTRACE_POKETEXT, chpid, addr, int3poison(data));
 
 	/* Resume <chpid> tracee until SIGTRAP occurs. */
-	ptrace(PTRACE_CONT, chpid, 0, 0);
+	ptrace(PTRACE_CONT, chpid, NULL, NULL);
 
 	/*
 	 * Wait <chpid> tracee signal-delivery-stop and check
@@ -210,7 +210,7 @@ static void continue_until(pid_t chpid, void *addr)
 	wait_alive(chpid);
 
 	/* Obtain GPR set to tweak RIP for further execution. */
-	ptrace(PTRACE_GETREGS, chpid, 0, &regs);
+	ptrace(PTRACE_GETREGS, chpid, NULL, &regs);
 	/*
 	 * Make sure we indeed are stopped at <addr>.
 	 * XXX: RIP points right after <int 3> instruction.
@@ -223,7 +223,7 @@ static void continue_until(pid_t chpid, void *addr)
 	 * instruction at the <addr>.
 	 */
 	regs.rip -= sizeof(INT3);
-	ptrace(PTRACE_SETREGS, chpid, 0, &regs);
+	ptrace(PTRACE_SETREGS, chpid, NULL, &regs);
 	ptrace(PTRACE_POKETEXT, chpid, addr, data);
 }
 

================================================================================

> 

<snipped>

> > > +	/* The tracee has to be alive and stopped by SIGTRAP. */
> > > +	assert_false(WIFEXITED(wstatus));
> > > +	assert_true(WIFSTOPPED(wstatus));
> > This part is duplicated, let's move it to a separate `healthcheck`
> > function, or something like that.
> 
> May be we can move these checks inside the `continue_until()` routine?
> Do we want to check something except the status is `WIFSTOPPED(status)`?

See the updated hunk in the previous reply.

> 

<snipped>

> 
> -- 
> Best regards,
> Sergey Kaplun

-- 
Best regards,
IM


More information about the Tarantool-patches mailing list