[Tarantool-patches] [PATCH luajit v2 2/2] test: add test for debugging extension

Wed Oct 18 16:17:09 MSK 2023

Hello, Max

thanks for the patches.  See my comments.

Please put this patch first, before refactoring.

On 10/12/23 13:25, Maksim Kokryashkin wrote:
> This patch introduces Test::Base-like tests for the
> LuaJIT debugging extension. The newly introduced test
> suite is TAP-compatible and is tested with prove.
> Test specification is generalized to a great extent,
> however, it is still important to keep in mind the
> platform-specific aspects of assertions.
> ---
>   test/CMakeLists.txt                           |   3 +
>   test/tarantool-debugger-tests/CMakeLists.txt  |  82 ++++++++++
>   test/tarantool-debugger-tests/config.py       | 148 ++++++++++++++++++
>   .../luajit_dbg.test.md                        | 136 ++++++++++++++++
>   test/tarantool-debugger-tests/run.py          |   8 +
>   test/tarantool-debugger-tests/test_base.py    |  73 +++++++++
>   6 files changed, 450 insertions(+)
>   create mode 100644 test/tarantool-debugger-tests/CMakeLists.txt
>   create mode 100644 test/tarantool-debugger-tests/config.py
>   create mode 100644 test/tarantool-debugger-tests/luajit_dbg.test.md

Why do you use .md extension if it is not a document but a file with 
testcases?

Why markdown format and not a YAML?

>   create mode 100755 test/tarantool-debugger-tests/run.py
>   create mode 100644 test/tarantool-debugger-tests/test_base.py
>
> diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
> index 58cba5ba..87ee40b3 100644
> --- a/test/CMakeLists.txt
> +++ b/test/CMakeLists.txt
> @@ -78,6 +78,7 @@ add_subdirectory(PUC-Rio-Lua-5.1-tests)
>   add_subdirectory(lua-Harness-tests)
>   add_subdirectory(tarantool-c-tests)
>   add_subdirectory(tarantool-tests)
> +add_subdirectory(tarantool-debugger-tests)
Nit: it is actually not tests for debugger, but for debugger extensions
>   
>   add_custom_target(${PROJECT_NAME}-test DEPENDS
>     LuaJIT-tests
> @@ -85,6 +86,8 @@ add_custom_target(${PROJECT_NAME}-test DEPENDS
>     lua-Harness-tests
>     tarantool-c-tests
>     tarantool-tests
> +  tarantool-gdb-tests
> +  tarantool-lldb-tests
>   )
>   
>   if(LUAJIT_USE_TEST)
> diff --git a/test/tarantool-debugger-tests/CMakeLists.txt b/test/tarantool-debugger-tests/CMakeLists.txt
> new file mode 100644
> index 00000000..ffe8ff39
> --- /dev/null
> +++ b/test/tarantool-debugger-tests/CMakeLists.txt
> @@ -0,0 +1,82 @@
> +add_custom_target(tarantool-gdb-tests
Why "tarantool-" and not "LuaJIT-"? Nothing specific for Tarantool here. 
Same below.
> +  DEPENDS ${LUAJIT_TEST_BINARY}
> +)
> +
> +add_custom_target(tarantool-lldb-tests
> +  DEPENDS ${LUAJIT_TEST_BINARY}
> +)
> +
> +# Debug info is required for testing of extensions.
> +if(NOT (CMAKE_BUILD_TYPE MATCHES Debug))
> +  message(WARNING "not a DEBUG build, tarantool-*db-tests are dummy")

Why do you use reduced target name in messages? here and below.

> +  return()
> +endif()
> +
> +# MacOS asks for permission to debug a process even when the
> +# machine is set into development mode. To solve the issue,
> +# it is required to add relevant users to the `_developer` user
> +# group in MacOS. Disabled for now.
> +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND DEFINED ENV{CI})
> +  message(WARNING "non-interactive debugging on macOS, tarantool-*db-tests are dummy")
probably you mean something like: non-interactive debugging on macOS is 
not available
> +  return()
> +endif()
> +
> +find_program(PROVE prove)
> +if(NOT PROVE)
> +  message(WARNING "`prove' is not found, so tarantool-*db-tests target are dummy")
> +  return()
> +endif()

What is a point to search PROVE four times?

../tarantool-tests/CMakeLists.txt:find_program(PROVE prove)
../tarantool-debugger-tests/CMakeLists.txt:find_program(PROVE prove)
../tarantool-c-tests/CMakeLists.txt:find_program(PROVE prove)
../lua-Harness-tests/CMakeLists.txt:find_program(PROVE prove)

I suspect one time is more than enough.

> +
> +find_package(PythonInterp)

Deprecated since version 3.12: Use FindPython3, FindPython2 or 
FindPython instead.

https://cmake.org/cmake/help/latest/module/FindPythonInterp.html

> +if(NOT PYTHONINTERP_FOUND)
> +  message(WARNING "`python' is not found, so tarantool-*db-tests target are dummy")
> +  return()
> +endif()
> +
> +set(DEBUGGER_TEST_FLAGS --failures)
> +if(CMAKE_VERBOSE_MAKEFILE)
> +  list(APPEND DEBUGGER_TEST_FLAGS --verbose)
> +endif()
> +
> +set(DEBUGGER_TEST_ENV
> +  "LUAJIT_TEST_BINARY=${LUAJIT_TEST_BINARY}"
> +  # Suppresses __pycache__ generation.
> +  "PYTHONDONTWRITEBYTECODE=1"
> +  "DEBUGGER_EXTENSION_PATH=${PROJECT_SOURCE_DIR}/src/luajit_dbg.py"
> +)
> +
> +find_program(GDB gdb)
> +if(GDB)
> +  set(GDB_TEST_ENV ${DEBUGGER_TEST_ENV}
> +    "DEBUGGER_COMMAND=${GDB}"
> +  )
> +  add_custom_command(TARGET tarantool-gdb-tests
> +    COMMENT "Running luajit_dbg.py tests with gdb"
> +    COMMAND
> +    ${GDB_TEST_ENV}
> +      ${PROVE} ${CMAKE_CURRENT_SOURCE_DIR}/luajit_dbg.test.md
> +      --exec '${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/run.py'
> +      ${DEBUGGER_TEST_FLAGS}
> +    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
> +  )
> +else()
> +  message(WARNING "`gdb' is not found, so tarantool-gdb-tests target is dummy")
> +endif()
> +
> +find_program(LLDB lldb)
> +if(LLDB)
> +  set(LLDB_TEST_ENV ${DEBUGGER_TEST_ENV}
> +    "DEBUGGER_COMMAND=${LLDB}"
> +  )
> +  add_custom_command(TARGET tarantool-lldb-tests
> +    COMMENT "Running luajit_dbg.py tests with lldb"
> +    COMMAND
> +    ${LLDB_TEST_ENV}
> +      ${PROVE} ${CMAKE_CURRENT_SOURCE_DIR}/luajit_dbg.test.md
> +      --exec '${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/run.py'
> +      ${DEBUGGER_TEST_FLAGS}
> +    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
> +  )
> +else()
> +  message(WARNING "`lldb' is not found, so tarantool-lldb-tests target is dummy")
> +endif()
> diff --git a/test/tarantool-debugger-tests/config.py b/test/tarantool-debugger-tests/config.py
> new file mode 100644
> index 00000000..a75d1452
> --- /dev/null
> +++ b/test/tarantool-debugger-tests/config.py
> @@ -0,0 +1,148 @@
> +# This file provides filters and logic for running the debugger tests. It is
> +# imported by the runner and the test_base.py, so its symbols become exposed
> +# to the globals(). Thus, they can be called during the specification
> +# execution.
> +import re
> +import subprocess
> +import os
> +import sys
> +import tempfile
> +from threading import Timer
> +
> +LEGACY = re.match(r'^2\.', sys.version)
> +
> +LUAJIT_BINARY = os.environ['LUAJIT_TEST_BINARY']
> +EXTENSION = os.environ['DEBUGGER_EXTENSION_PATH']
> +DEBUGGER_COMMAND = os.environ['DEBUGGER_COMMAND']
> +LLDB = 'lldb' in DEBUGGER_COMMAND
> +TIMEOUT = 10
> +
> +active_block = None
> +output = ''
> +dbg_cmds = ''
> +
> +
> +def persist(data):
> +    tmp = tempfile.NamedTemporaryFile(mode='w')
> +    tmp.write(data)
> +    tmp.flush()
> +    return tmp
> +
> +
> +def execute_process(cmd, timeout=TIMEOUT):
> +    if LEGACY:
> +        # XXX: The Python 2.7 version of `subprocess.Popen` doesn't have a
> +        # timeout option, so the required functionality was implemented via
> +        # `threading.Timer`.
> +        process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
> +        timer = Timer(TIMEOUT, process.kill)
> +        timer.start()
> +        stdout, _ = process.communicate()
> +        timer.cancel()
> +
> +        # XXX: If the timeout is exceeded and the process is killed by the
> +        # timer, then the return code is non-zero, and we are going to blow up.
> +        assert process.returncode == 0
> +        return stdout.decode('ascii')
> +    else:
> +        process = subprocess.run(cmd, capture_output=True, timeout=TIMEOUT)
> +        return process.stdout.decode('ascii')
> +
> +
> +def load_extension_cmd():
> +    load_cmd = 'command script import {ext}' if LLDB else 'source {ext}'
> +    return load_cmd.format(ext=EXTENSION)
> +
> +
> +def filter_debugger_output(output):
> +    descriptor = '(lldb)' if LLDB else '(gdb)'
> +    return ''.join(
> +        filter(
> +            lambda line: not line.startswith(descriptor),
> +            output.splitlines(True),
> +        )
> +    )
> +
> +
> +def lua(data):
> +    global output
> +
> +    exec_file_flag = '-s' if LLDB else '-x'
> +    inferior_args_flag = '--' if LLDB else '--args'
> +
> +    tmp_cmds = persist(dbg_cmds)
> +    lua_script = persist(data)
> +
> +    process_cmd = [
> +        DEBUGGER_COMMAND,
> +        exec_file_flag,
> +        tmp_cmds.name,
> +        inferior_args_flag,
> +        LUAJIT_BINARY,
> +        lua_script.name,
> +    ]
> +
> +    output = execute_process(process_cmd)
> +    output = filter_debugger_output(output)
> +
> +    tmp_cmds.close()
> +    lua_script.close()
> +
> +
> +def run_until_breakpoint(location):
> +    return [
> +        'b {loc}'.format(loc=location),
> +        'process launch' if LLDB else 'r',
> +        'n',
> +    ]
> +
> +
> +def lj_cf_print(data):
> +    return run_until_breakpoint('lj_cf_print'), data
> +
> +
> +def lj_cf_dofile(data):
> +    return run_until_breakpoint('lj_cf_dofile'), data
> +
> +
> +def lj_cf_unpack(data):
> +    return run_until_breakpoint('lj_cf_unpack'), data
> +
> +
> +def debug(data):
> +    global dbg_cmds
> +    setup_cmds, extension_cmds = data
> +    setup_cmds.append(load_extension_cmd())
> +
> +    if extension_cmds:
> +        setup_cmds.append(extension_cmds)
> +
> +    setup_cmds.append('q')
> +    dbg_cmds = '\n'.join(setup_cmds)
> +
> +
> +def test_ok(result):
> +    status = 'ok' if result else 'not ok'
> +    print(
> +        '{status} - {test_name}'.format(
> +            status=status,
> +            test_name=active_block.name,
> +        )
> +    )
> +
> +
> +def expected(data):
> +    test_ok(data in output)
> +
> +
> +def matches(data):
> +    test_ok(re.search(data, output))
> +
> +
> +def runner(blocks):
> +    print('1..{n}'.format(n=len(blocks)))
> +    global active_block
> +    for block in blocks:
> +        active_block = block
> +        for section in block.sections:
> +            globals()[section.name](section.pipeline(section.data.strip()))
> diff --git a/test/tarantool-debugger-tests/luajit_dbg.test.md b/test/tarantool-debugger-tests/luajit_dbg.test.md
> new file mode 100644
> index 00000000..d2003328
> --- /dev/null
> +++ b/test/tarantool-debugger-tests/luajit_dbg.test.md
> @@ -0,0 +1,136 @@
> +## smoke
> +### debug lj_cf_print
> +### lua
> +print(1)
> +### expected
> +lj-tv command intialized
> +lj-state command intialized
> +lj-arch command intialized
> +lj-gc command intialized
> +lj-str command intialized
> +lj-tab command intialized
> +lj-stack command intialized
> +LuaJIT debug extension is successfully loaded
> +
> +
> +## lj-arch
> +### debug lj_cf_print
> +lj-arch
> +### lua
> +print(1)
> +### matches
> +LJ_64: (True|False), LJ_GC64: (True|False), LJ_DUALNUM: (True|False)
> +
> +
> +## lj-state
> +### debug lj_cf_print
> +lj-state
> +### lua
> +print(1)
> +### matches
> +VM state: [A-Z]+
> +GC state: [A-Z]+
> +JIT state: [A-Z]+
> +
> +
> +## lj-gc
> +### debug lj_cf_print
> +lj-gc
> +### lua
> +print(1)
> +### matches
> +GC stats: [A-Z]+
> +\ttotal: \d+
> +\tthreshold: \d+
> +\tdebt: \d+
> +\testimate: \d+
> +\tstepmul: \d+
> +\tpause: \d+
> +\tsweepstr: \d+/\d+
> +\troot: \d+ objects
> +\tgray: \d+ objects
> +\tgrayagain: \d+ objects
> +\tweak: \d+ objects
> +\tmmudata: \d+ objects
> +
> +
> +## lj-stack
> +### debug lj_cf_print
> +lj-stack
> +### lua
> +print(1)
> +### matches
> +-+ Red zone:\s+\d+ slots -+
> +(0x[a-zA-Z0-9]+\s+\[(S|\s)(B|\s)(T|\s)(M|\s)\] VALUE: nil\n?)*
> +-+ Stack:\s+\d+ slots -+
> +(0x[A-Za-z0-9]+(:0x[A-Za-z0-9]+)?\s+\[(S|\s)(B|\s)(T|\s)(M|\s)\].*\n?)+
> +
> +
> +## lj-tv
> +### debug lj_cf_print
> +lj-tv L->base
> +lj-tv L->base + 1
> +lj-tv L->base + 2
> +lj-tv L->base + 3
> +lj-tv L->base + 4
> +lj-tv L->base + 5
> +lj-tv L->base + 6
> +lj-tv L->base + 7
> +lj-tv L->base + 8
> +lj-tv L->base + 9
> +lj-tv L->base + 10
> +lj-tv L->base + 11
> +### lua
> +local ffi = require('ffi')
> +
> +print(
> +  nil,
> +  false,
> +  true,
> +  "hello",
> +  {1},
> +  1,
> +  1.1,
> +  coroutine.create(function() end),
> +  ffi.new('int*'),
> +  function() end,
> +  print,
> +  require
> +)

trailing space

> +### matches
> +nil
> +false
> +true
> +string \"hello\" @ 0x[a-zA-Z0-9]+
> +table @ 0x[a-zA-Z0-9]+ \(asize: \d+, hmask: 0x[a-zA-Z0-9]+\)
> +(number|integer) .*1.*
> +number 1.1\d+
> +thread @ 0x[a-zA-Z0-9]+
> +cdata @ 0x[a-zA-Z0-9]+
> +Lua function @ 0x[a-zA-Z0-9]+, [0-9]+ upvalues, .+:[0-9]+
> +fast function #[0-9]+
> +C function @ 0x[a-zA-Z0-9]+
> +
> +
> +## lj-str
> +### debug lj_cf_dofile
> +lj-str fname
> +### lua
> +pcall(dofile('name'))
> +### matches
> +String: .* \[\d+ bytes\] with hash 0x[a-zA-Z0-9]+
> +
> +
> +## lj-tab
> +### debug lj_cf_unpack
> +lj-tab t
> +### lua
> +unpack({1; a = 1})
> +### matches
> +Array part: 3 slots
> +0x[a-zA-Z0-9]+: \[0\]: nil
> +0x[a-zA-Z0-9]+: \[1\]: .+ 1
> +0x[a-zA-Z0-9]+: \[2\]: nil
> +Hash part: 2 nodes
> +0x[a-zA-Z0-9]+: { string "a" @ 0x[a-zA-Z0-9]+ } => { .+ 1 }; next = 0x0
> +0x[a-zA-Z0-9]+: { nil } => { nil }; next = 0x0
> diff --git a/test/tarantool-debugger-tests/run.py b/test/tarantool-debugger-tests/run.py
> new file mode 100755
> index 00000000..cc84940d
> --- /dev/null
> +++ b/test/tarantool-debugger-tests/run.py
> @@ -0,0 +1,8 @@
> +# Runner script for compatibility with `prove`.

Why do we need prove here for running tests?

Python is not a Lua and has builtin unit-testing library "unittest" [1].

Prove здесь как пятая нога.

With current approach it is difficult to follow testing progress:

[100%] Linking C executable luajit
[100%] Built target luajit_static
Running luajit_dbg.py tests with gdb
/home/sergeyb/sources/MRG/tarantool/third_party/luajit/test/tarantool-debugger-tests/luajit_dbg.test.md

<hang here>

Prove shows either *all* testcases passed or failed  testcases. It is 
not convenient.

1. https://docs.python.org/3/library/unittest.html

> +import sys
> +
> +from config import runner
> +from test_base import Spec
> +
> +with open(sys.argv[1], 'r') as stream:
> +    runner(Spec(stream.read()).blocks)
> diff --git a/test/tarantool-debugger-tests/test_base.py b/test/tarantool-debugger-tests/test_base.py
> new file mode 100644
> index 00000000..9d7931bd
> --- /dev/null
> +++ b/test/tarantool-debugger-tests/test_base.py
> @@ -0,0 +1,73 @@
> +# This file provides a pythonic implementation of similar to Perl's Test::Base
> +# module functionality for declarative testing.
> +# See https://metacpan.org/pod/Test::Base#Rolling-Your-Own-Filters
> +from config import *  # noqa: F401,F403
> +
> +
> +class Pipeline(object):
> +    def __init__(self, funcs):
> +        self.funcs = funcs
> +
> +    def __call__(self, data):
> +        for func in self.funcs:
> +            data = func(data)
> +        return data
> +
> +
> +class Section(object):
> +    def __init__(self, name, pipeline):
> +        self.name = name
> +        self.data = ''
> +        self.pipeline = pipeline
> +
> +
> +class Block(object):
> +    def __init__(self, name):
> +        self.name = name
> +        self.description = ''
> +        self.sections = []
> +
> +
> +class Spec(object):
> +    def __init__(
> +        self,
> +        spec,
> +        block_descriptor='## ',
> +        section_descriptor='### ',
> +    ):
> +        self.blocks = []
> +        self.block_descriptor = block_descriptor
> +        self.section_descriptor = section_descriptor
> +        self.parse_spec(spec)
> +
> +    def _is_block_start(self, line):
> +        return line.startswith(self.block_descriptor)
> +
> +    def _is_section_start(self, line):
> +        return line.startswith(self.section_descriptor)
> +
> +    def _is_description(self, line):
> +        return not self.blocks[-1].sections
> +
> +    def parse_spec(
> +        self,
> +        spec,
> +    ):
Put arguments in a single line.
> +        spec = spec.strip().splitlines(True)
> +
> +        for line in spec:
> +            if self._is_block_start(line):
> +                name = line.lstrip(self.block_descriptor).strip()
> +                self.blocks.append(Block(name))
> +
> +            elif self._is_section_start(line):
> +                meta = line.lstrip(self.section_descriptor).strip().split()
> +                name = meta[0]
> +                pipeline = Pipeline([globals()[fname] for fname in meta[1:]])
> +                self.blocks[-1].sections.append(Section(name, pipeline))
> +
> +            elif self._is_description(line):
> +                self.blocks[-1].description += line
> +
> +            else:
> +                self.blocks[-1].sections[-1].data += line