[Tarantool-patches] [PATCH luajit v5] memprof: introduce cli flag to run dump parser

Maksim Kokryashkin max.kokryashkin at gmail.com
Thu May 25 00:04:45 MSK 2023


From: Maxim Kokryashkin <m.kokryashkin at tarantool.org>

It is really inconvenient to use a standalone shell script to parse
memprof dump. That is why this commit introduces a CLI flag for tools
to the LuaJIT, so now it is possible to parse memprof dump
as simple as:
```
luajit -tm memprof.bin
```

Closes tarantool/tarantool#5688
---
Changes in v5:
- Fixed comments as per review by Sergos

Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-5688-cli-for-memprof-parse
PR: https://github.com/tarantool/tarantool/pull/8002

 .gitignore                                    |  1 +
 CMakeLists.txt                                |  9 ++-
 Makefile.original                             |  7 +-
 src/CMakeLists.txt                            |  5 ++
 src/lj_tools_conf.h.in                        |  7 ++
 src/luajit.c                                  | 79 +++++++++++++++++--
 .../gh-5688-memprof-cli-flag.test.lua         | 56 +++++++++++++
 tools/CMakeLists.txt                          |  2 +
 8 files changed, 154 insertions(+), 12 deletions(-)
 create mode 100644 src/lj_tools_conf.h.in
 create mode 100644 test/tarantool-tests/gh-5688-memprof-cli-flag.test.lua

diff --git a/.gitignore b/.gitignore
index b7908aee..75783771 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,3 +24,4 @@ install_manifest.txt
 luajit-parse-memprof
 luajit-parse-sysprof
 luajit.pc
+src/lj_tools_conf.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d5fa4c9c..537e340a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -266,6 +266,11 @@ endif()
 # related compiler and linker flags passed. This should be done
 # the right way later.

+# --- Tools --------------------------------------------------------------------
+
+add_subdirectory(tools)
+set(LUAJIT_TOOLS_DIR "${LUAJIT_TOOLS_DIR}")
+
 # --- Main source tree ---------------------------------------------------------

 add_subdirectory(src)
@@ -274,10 +279,6 @@ add_subdirectory(src)

 add_subdirectory(etc)

-# --- Tools --------------------------------------------------------------------
-
-add_subdirectory(tools)
-
 # --- Testing source tree ------------------------------------------------------

 # Auxiliary options for testing.
diff --git a/Makefile.original b/Makefile.original
index 0c92df9e..bb0ab73d 100644
--- a/Makefile.original
+++ b/Makefile.original
@@ -104,6 +104,7 @@ FILES_UTILSLIB= avl.lua bufread.lua symtab.lua
 FILES_MEMPROFLIB= parse.lua humanize.lua
 FILES_TOOLSLIB= memprof.lua
 FILE_TMEMPROF= luajit-parse-memprof
+FILE_TOOLSHEADER= lj_tools_conf.h.in

 ifeq (,$(findstring Windows,$(OS)))
   HOST_SYS:= $(shell uname -s)
@@ -193,7 +194,7 @@ clean:
 	$(RM) tools/$(FILE_TMEMPROF)
 	$(MAKE) -C src -f Makefile.original clean

-tools: tools/$(FILE_TMEMPROF)
+tools: tools/$(FILE_TMEMPROF) src/$(FILE_TOOLSHEADER)

 # FIXME: This is an ugly hack to manually configure an auxiliary
 # tools/luajit-parse-memprof. This file should go away in scope of
@@ -204,6 +205,10 @@ tools/$(FILE_TMEMPROF): src/luajit
 	     $@.in > $@
 	@chmod +x $@

+src/$(FILE_TOOLSHEADER):
+	@sed -e "s|@LUAJIT_TOOLS_DIR@|$(realpath tools)|" \
+	     $@.in > $@
+
 .PHONY: all install amalg clean tools

 ##############################################################################
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index dffc0a4d..17674a41 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -145,6 +145,8 @@ make_source_list(SOURCES_CORE_NO_JIT_FFI
     ${SOURCES_UTILS}
 )

+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lj_tools_conf.h.in ${CMAKE_CURRENT_SOURCE_DIR}/lj_tools_conf.h)
+
 set(SOURCES_CORE ${SOURCES_CORE_NO_JIT_FFI})

 # Build JIT sources if JIT support is enabled.
@@ -251,6 +253,9 @@ add_custom_target(
           jit/vmdef.lua
 )

+# --- Generate luajit tools config header -------------------------------------
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lj_tools_conf.h.in ${CMAKE_CURRENT_SOURCE_DIR}/lj_tools_conf.h)
+
 # --- Generate core and VM object files ---------------------------------------

 # Virtual machine.
diff --git a/src/lj_tools_conf.h.in b/src/lj_tools_conf.h.in
new file mode 100644
index 00000000..9f9a2e49
--- /dev/null
+++ b/src/lj_tools_conf.h.in
@@ -0,0 +1,7 @@
+#ifndef LJ_TOOLS_CONF_H
+#define LJ_TOOLS_CONF_H
+
+#define TOOLS_PATH "@LUAJIT_TOOLS_DIR@/?.lua"
+#define PARSER_PATH "@LUAJIT_TOOLS_DIR@/memprof.lua"
+
+#endif
diff --git a/src/luajit.c b/src/luajit.c
index 1ca24301..a655410f 100644
--- a/src/luajit.c
+++ b/src/luajit.c
@@ -9,6 +9,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/errno.h>

 #define luajit_c

@@ -19,6 +20,8 @@

 #include "lj_arch.h"

+#include "lj_tools_conf.h"
+
 #if LJ_TARGET_POSIX
 #include <unistd.h>
 #define lua_stdin_is_tty()	isatty(0)
@@ -72,6 +75,7 @@ static void print_usage(void)
   "  -O[opt]   Control LuaJIT optimizations.\n"
   "  -i        Enter interactive mode after executing " LUA_QL("script") ".\n"
   "  -v        Show version information.\n"
+  "  -t[cmd]   Execute tool.\n"
   "  -E        Ignore environment variables.\n"
   "  --        Stop handling options.\n"
   "  -         Execute stdin and stop handling options.\n", stderr);
@@ -266,13 +270,9 @@ static void dotty(lua_State *L)
   progname = oldprogname;
 }

-static int handle_script(lua_State *L, char **argx)
+static int call_script(lua_State *L, const char *fname)
 {
-  int status;
-  const char *fname = argx[0];
-  if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0)
-    fname = NULL;  /* stdin */
-  status = luaL_loadfile(L, fname);
+  int status = luaL_loadfile(L, fname);
   if (status == LUA_OK) {
     /* Fetch args from arg table. LUA_INIT or -e might have changed them. */
     int narg = 0;
@@ -290,6 +290,16 @@ static int handle_script(lua_State *L, char **argx)
     }
     status = docall(L, narg, 0);
   }
+  return status;
+}
+
+static int handle_script(lua_State *L, char **argx)
+{
+  int status;
+  const char *fname = argx[0];
+  if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0)
+    fname = NULL;  /* stdin */
+  status = call_script(L, fname);
   return report(L, status);
 }

@@ -361,6 +371,15 @@ static int dojitcmd(lua_State *L, const char *cmd)
   return runcmdopt(L, opt ? opt+1 : opt);
 }

+static int dotoolcmd(lua_State *L, const char *cmd)
+{
+  if(strcmp(cmd, "m") == 0) {
+    const int status = call_script(L, PARSER_PATH);
+    return report(L, status);
+  }
+  return -1;
+}
+
 /* Optimization flags. */
 static int dojitopt(lua_State *L, const char *opt)
 {
@@ -390,6 +409,38 @@ static int dobytecode(lua_State *L, char **argv)
   return -1;
 }

+/*
+** On most Linux distros, it is the default value for the
+** maximum length of a string passed to `execve`.
+** However, there is no common value for other OSes, so
+** the size of 32 default memory pages is adopted.
+*/
+#define MAX_ENV_VAR 32 * 4096
+
+static int update_env_var(const char *name, const char *value)
+{
+  char env_buf[MAX_ENV_VAR] = "";
+  const char *env = getenv(name);
+  /*
+  ** The `+ 1` is added here to handle case where `env` is
+  ** empty and `value` is too long without any additional
+  ** `if` statements.
+  */
+  size_t env_len = strnlen(env, MAX_ENV_VAR + 1);
+  size_t value_len = strnlen(value, MAX_ENV_VAR + 1);
+  if (value_len + env_len > MAX_ENV_VAR) {
+    errno = ENOMEM;
+    return -1;
+  }
+
+  if (env == NULL) {
+    return setenv(name, value, 0);
+  } else {
+    strncpy(env_buf, env, env_len);
+    return setenv(name, strncat(env_buf, value, MAX_ENV_VAR - value_len), 1);
+  }
+}
+
 /* check that argument has no extra characters at the end */
 #define notail(x)	{if ((x)[2] != '\0') return -1;}

@@ -398,10 +449,12 @@ static int dobytecode(lua_State *L, char **argv)
 #define FLAGS_EXEC		4
 #define FLAGS_OPTION		8
 #define FLAGS_NOENV		16
+#define FLAGS_TOOL		32

 static int collectargs(char **argv, int *flags)
 {
   int i;
+  int result;
   for (i = 1; argv[i] != NULL; i++) {
     if (argv[i][0] != '-')  /* Not an option? */
       return i;
@@ -419,6 +472,14 @@ static int collectargs(char **argv, int *flags)
       notail(argv[i]);
       *flags |= FLAGS_VERSION;
       break;
+    case 't':
+      *flags |= FLAGS_TOOL;
+      if (argv[i][2] == '\0') return -1;
+      if (argv[i + 1] == NULL) return -1;
+      result = update_env_var("LUA_PATH", TOOLS_PATH);
+      if (result != 0)
+        return result;
+      return i + 1;
     case 'e':
       *flags |= FLAGS_EXEC;
     case 'j':  /* LuaJIT extension */
@@ -474,6 +535,10 @@ static int runargs(lua_State *L, char **argv, int argn)
 	return 1;
       break;
       }
+    case 't': { /* Tarantool's fork extension. */
+      const char *cmd = argv[i] + 2;
+      return dotoolcmd(L, cmd) == LUA_OK;
+    }
     case 'O':  /* LuaJIT extension. */
       if (dojitopt(L, argv[i] + 2))
 	return 1;
@@ -535,7 +600,7 @@ static int pmain(lua_State *L)
   luaL_openlibs(L);
   lua_gc(L, LUA_GCRESTART, -1);

-  createargtable(L, argv, s->argc, argn);
+  createargtable(L, argv, s->argc, (flags & FLAGS_TOOL) ? argn - 1 : argn);

   if (!(flags & FLAGS_NOENV)) {
     s->status = handle_luainit(L);
diff --git a/test/tarantool-tests/gh-5688-memprof-cli-flag.test.lua b/test/tarantool-tests/gh-5688-memprof-cli-flag.test.lua
new file mode 100644
index 00000000..ad7bf732
--- /dev/null
+++ b/test/tarantool-tests/gh-5688-memprof-cli-flag.test.lua
@@ -0,0 +1,56 @@
+local utils = require('utils')
+
+-- XXX: The patch is for LuaJIT only, and it doesn't
+-- work on Tarantool.
+utils.skipcond(
+-- luacheck: no global
+  (jit.arch ~= 'x86' and jit.arch ~= 'x64') or _TARANTOOL,
+  jit.arch..' architecture is NIY for memprof'
+)
+
+local tap = require('tap')
+
+local test = tap.test('gh-5688-memprof-cli-flag')
+test:plan(2)
+
+jit.off()
+jit.flush()
+
+local table_new = require 'table.new'
+
+local TMP_BINFILE = utils.profilename('memprofdata.tmp.bin')
+local BAD_PATH = utils.profilename('bad-path-tmp.bin')
+local EXECUTABLE = utils.luacmd(arg)
+local TABLE_SIZE = 20
+
+local function default_payload()
+  local _ = table_new(TABLE_SIZE, 0)
+   _ = nil
+  collectgarbage()
+end
+
+local function generate_output(filename, payload)
+  -- Clean up all garbage to avoid pollution of free.
+  collectgarbage()
+
+  local res, err = misc.memprof.start(filename)
+  -- Should start succesfully.
+  assert(res, err)
+
+  payload()
+
+  res, err = misc.memprof.stop()
+  -- Should stop succesfully.
+  assert(res, err)
+end
+
+generate_output(TMP_BINFILE, default_payload)
+
+local errcode = os.execute(EXECUTABLE .. ' -tm ' .. BAD_PATH)
+test:ok(errcode ~= 0, 'binfile does not exist')
+
+errcode = os.execute(EXECUTABLE .. ' -tm ' .. TMP_BINFILE)
+test:ok(errcode == 0, 'memprof binfile parsing')
+
+os.remove(TMP_BINFILE)
+os.exit(test:check() and 0 or 1)
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index dd7ec6bd..e2e97b63 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -16,6 +16,7 @@ else()
   # path where LuaJIT binary is located.
   set(LUAJIT_TOOLS_BIN ${LUAJIT_BINARY_DIR}/${LUAJIT_CLI_NAME})
   set(LUAJIT_TOOLS_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+  set(LUAJIT_TOOLS_DIR ${LUAJIT_TOOLS_DIR} PARENT_SCOPE)
   # XXX: Unfortunately, there is no convenient way to set
   # particular permissions to the output file via CMake.
   # Furthermore, I even failed to copy the given file to the same
@@ -77,6 +78,7 @@ else()
     "
       set(LUAJIT_TOOLS_BIN ${CMAKE_INSTALL_PREFIX}/bin/${LUAJIT_CLI_NAME})
       set(LUAJIT_TOOLS_DIR ${CMAKE_INSTALL_PREFIX}/${LUAJIT_DATAROOTDIR})
+      set(LUAJIT_TOOLS_DIR ${LUAJIT_TOOLS_DIR} PARENT_SCOPE)
       configure_file(${CMAKE_CURRENT_SOURCE_DIR}/luajit-parse-memprof.in
         ${PROJECT_BINARY_DIR}/luajit-parse-memprof @ONLY ESCAPE_QUOTES)
       file(INSTALL ${PROJECT_BINARY_DIR}/luajit-parse-memprof
--
2.39.2 (Apple Git-143)



More information about the Tarantool-patches mailing list