[Tarantool-patches] [PATCH luajit v2 1/7] utils: introduce leb128 reader and writer
Sergey Kaplun
skaplun at tarantool.org
Fri Dec 25 18:26:03 MSK 2020
Most of the numeric data written by the memory profiler is encoded
via LEB128 compression. This patch introduces the module for encoding
and decoding 64bit number to LEB128 form.
Part of tarantool/tarantool#5442
---
Changes in v2:
- Removed reader funciton's parameter named guard.
- Code style fixes.
src/Makefile | 3 +-
src/Makefile.dep | 7 ++-
src/lj_utils.h | 58 +++++++++++++++++++
src/lj_utils_leb128.c | 132 ++++++++++++++++++++++++++++++++++++++++++
src/ljamalg.c | 1 +
5 files changed, 197 insertions(+), 4 deletions(-)
create mode 100644 src/lj_utils.h
create mode 100644 src/lj_utils_leb128.c
diff --git a/src/Makefile b/src/Makefile
index 2786348..dc2ddb6 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -466,6 +466,7 @@ endif
DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
DASM_DASC= vm_$(DASM_ARCH).dasc
+UTILS_O= lj_utils_leb128.o
BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \
host/buildvm_lib.o host/buildvm_fold.o
BUILDVM_T= host/buildvm
@@ -495,7 +496,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
lj_asm.o lj_trace.o lj_gdbjit.o \
lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
lj_carith.o lj_clib.o lj_cparse.o \
- lj_lib.o lj_alloc.o lib_aux.o \
+ lj_lib.o lj_alloc.o $(UTILS_O) lib_aux.o \
$(LJLIB_O) lib_init.o
LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 556314e..75409bf 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -205,6 +205,7 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h
lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_udata.h
+lj_utils_leb128.o: lj_utils_leb128.c lj_utils.h lj_def.h lua.h luaconf.h
lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
lj_vm.h lj_vmevent.h
@@ -229,9 +230,9 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
- lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
- lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
- lib_ffi.c lib_misc.c lib_init.c
+ lj_utils_leb128.c lj_utils.h lib_aux.c lib_base.c lj_libdef.h lib_math.c \
+ lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c \
+ lib_bit.c lib_jit.c lib_ffi.c lib_misc.c lib_init.c
luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
diff --git a/src/lj_utils.h b/src/lj_utils.h
new file mode 100644
index 0000000..1671e8e
--- /dev/null
+++ b/src/lj_utils.h
@@ -0,0 +1,58 @@
+/*
+** Interfaces for working with LEB128/ULEB128 encoding.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#ifndef _LJ_UTILS_H
+#define _LJ_UTILS_H
+
+#include "lj_def.h"
+
+/* Maximum number of bytes needed for LEB128 encoding of any 64-bit value. */
+#define LEB128_U64_MAXSIZE 10
+
+/*
+** Reads a value from a buffer of bytes to a int64_t output.
+** No bounds checks for the buffer. Returns number of bytes read.
+*/
+size_t LJ_FASTCALL lj_utils_read_leb128(int64_t *out, const uint8_t *buffer);
+
+/*
+** Reads a value from a buffer of bytes to a int64_t output. Consumes no more
+** than n bytes. No bounds checks for the buffer. Returns number of bytes
+** read. If more than n bytes is about to be consumed, returns 0 without
+** touching out.
+*/
+size_t LJ_FASTCALL lj_utils_read_leb128_n(int64_t *out, const uint8_t *buffer,
+ size_t n);
+
+/*
+** Reads a value from a buffer of bytes to a uint64_t output.
+** No bounds checks for the buffer. Returns number of bytes read.
+*/
+size_t LJ_FASTCALL lj_utils_read_uleb128(uint64_t *out, const uint8_t *buffer);
+
+/*
+** Reads a value from a buffer of bytes to a uint64_t output. Consumes no more
+** than n bytes. No bounds checks for the buffer. Returns number of bytes
+** read. If more than n bytes is about to be consumed, returns 0 without
+** touching out.
+*/
+size_t LJ_FASTCALL lj_utils_read_uleb128_n(uint64_t *out, const uint8_t *buffer,
+ size_t n);
+
+/*
+** Writes a value from an signed 64-bit input to a buffer of bytes.
+** No bounds checks for the buffer. Returns number of bytes written.
+*/
+size_t LJ_FASTCALL lj_utils_write_leb128(uint8_t *buffer, int64_t value);
+
+/*
+** Writes a value from an unsigned 64-bit input to a buffer of bytes.
+** No bounds checks for the buffer. Returns number of bytes written.
+*/
+size_t LJ_FASTCALL lj_utils_write_uleb128(uint8_t *buffer, uint64_t value);
+
+#endif
diff --git a/src/lj_utils_leb128.c b/src/lj_utils_leb128.c
new file mode 100644
index 0000000..ce8081b
--- /dev/null
+++ b/src/lj_utils_leb128.c
@@ -0,0 +1,132 @@
+/*
+** Working with LEB128/ULEB128 encoding.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#define lj_utils_leb128_c
+#define LUA_CORE
+
+#include "lj_utils.h"
+
+#define LINK_BIT (0x80)
+#define MIN_TWOBYTE_VALUE (0x80)
+#define PAYLOAD_MASK (0x7f)
+#define SHIFT_STEP (7)
+#define LEB_SIGN_BIT (0x40)
+
+/* ------------------------- Reading LEB128/ULEB128 ------------------------- */
+
+/*
+** XXX: For each LEB128 type (signed/unsigned) we have two versions of read
+** functions: The one consuming unlimited number of input octets and the one
+** consuming not more than given number of input octets. Currently reading
+** is not used in performance critical places, so these two functions are
+** implemented via single low-level function + run-time mode check. Feel free
+** to change if this becomes a bottleneck.
+*/
+
+static LJ_AINLINE size_t _read_leb128(int64_t *out, const uint8_t *buffer,
+ size_t n)
+{
+ size_t i = 0;
+ uint64_t shift = 0;
+ int64_t value = 0;
+ uint8_t octet;
+
+ for(;;) {
+ if (n != 0 && i + 1 > n)
+ return 0;
+ octet = buffer[i++];
+ value |= ((int64_t)(octet & PAYLOAD_MASK)) << shift;
+ shift += SHIFT_STEP;
+ if (!(octet & LINK_BIT))
+ break;
+ }
+
+ if (octet & LEB_SIGN_BIT && shift < sizeof(int64_t) * 8)
+ value |= -(1 << shift);
+
+ *out = value;
+ return i;
+}
+
+size_t LJ_FASTCALL lj_utils_read_leb128(int64_t *out, const uint8_t *buffer)
+{
+ return _read_leb128(out, buffer, 0);
+}
+
+size_t LJ_FASTCALL lj_utils_read_leb128_n(int64_t *out, const uint8_t *buffer,
+ size_t n)
+{
+ return _read_leb128(out, buffer, n);
+}
+
+
+static LJ_AINLINE size_t _read_uleb128(uint64_t *out, const uint8_t *buffer,
+ size_t n)
+{
+ size_t i = 0;
+ uint64_t value = 0;
+ uint64_t shift = 0;
+ uint8_t octet;
+
+ for(;;) {
+ if (n != 0 && i + 1 > n)
+ return 0;
+ octet = buffer[i++];
+ value |= ((uint64_t)(octet & PAYLOAD_MASK)) << shift;
+ shift += SHIFT_STEP;
+ if (!(octet & LINK_BIT))
+ break;
+ }
+
+ *out = value;
+ return i;
+}
+
+size_t LJ_FASTCALL lj_utils_read_uleb128(uint64_t *out, const uint8_t *buffer)
+{
+ return _read_uleb128(out, buffer, 0);
+}
+
+size_t LJ_FASTCALL lj_utils_read_uleb128_n(uint64_t *out, const uint8_t *buffer,
+ size_t n)
+{
+ return _read_uleb128(out, buffer, n);
+}
+
+/* ------------------------- Writing LEB128/ULEB128 ------------------------- */
+
+size_t LJ_FASTCALL lj_utils_write_leb128(uint8_t *buffer, int64_t value)
+{
+ size_t i = 0;
+
+ /* LEB_SIGN_BIT propagation to check the remaining value. */
+ while ((uint64_t)(value + LEB_SIGN_BIT) >= MIN_TWOBYTE_VALUE) {
+ buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT);
+ value >>= SHIFT_STEP;
+ }
+
+ /* Omit LINK_BIT in case of overflow. */
+ buffer[i++] = (uint8_t)(value & PAYLOAD_MASK);
+
+ lua_assert(i <= LEB128_U64_MAXSIZE);
+
+ return i;
+}
+
+size_t LJ_FASTCALL lj_utils_write_uleb128(uint8_t *buffer, uint64_t value)
+{
+ size_t i = 0;
+
+ for (; value >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP)
+ buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT);
+
+ buffer[i++] = (uint8_t)value;
+
+ lua_assert(i <= LEB128_U64_MAXSIZE);
+
+ return i;
+}
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 371bbb6..268b321 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -81,6 +81,7 @@
#include "lj_trace.c"
#include "lj_gdbjit.c"
#include "lj_alloc.c"
+#include "lj_utils_leb128.c"
#include "lib_aux.c"
#include "lib_base.c"
--
2.28.0
More information about the Tarantool-patches
mailing list