From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp41.i.mail.ru (smtp41.i.mail.ru [94.100.177.101]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id C07124765E0 for ; Fri, 25 Dec 2020 18:26:57 +0300 (MSK) From: Sergey Kaplun Date: Fri, 25 Dec 2020 18:26:03 +0300 Message-Id: In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH luajit v2 1/7] utils: introduce leb128 reader and writer List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Igor Munkin , Sergey Ostanevich Cc: tarantool-patches@dev.tarantool.org Most of the numeric data written by the memory profiler is encoded via LEB128 compression. This patch introduces the module for encoding and decoding 64bit number to LEB128 form. Part of tarantool/tarantool#5442 --- Changes in v2: - Removed reader funciton's parameter named guard. - Code style fixes. src/Makefile | 3 +- src/Makefile.dep | 7 ++- src/lj_utils.h | 58 +++++++++++++++++++ src/lj_utils_leb128.c | 132 ++++++++++++++++++++++++++++++++++++++++++ src/ljamalg.c | 1 + 5 files changed, 197 insertions(+), 4 deletions(-) create mode 100644 src/lj_utils.h create mode 100644 src/lj_utils_leb128.c diff --git a/src/Makefile b/src/Makefile index 2786348..dc2ddb6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -466,6 +466,7 @@ endif DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) DASM_DASC= vm_$(DASM_ARCH).dasc +UTILS_O= lj_utils_leb128.o BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ host/buildvm_lib.o host/buildvm_fold.o BUILDVM_T= host/buildvm @@ -495,7 +496,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_asm.o lj_trace.o lj_gdbjit.o \ lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ lj_carith.o lj_clib.o lj_cparse.o \ - lj_lib.o lj_alloc.o lib_aux.o \ + lj_lib.o lj_alloc.o $(UTILS_O) lib_aux.o \ $(LJLIB_O) lib_init.o LJVMCORE_O= $(LJVM_O) $(LJCORE_O) diff --git a/src/Makefile.dep b/src/Makefile.dep index 556314e..75409bf 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -205,6 +205,7 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_udata.h +lj_utils_leb128.o: lj_utils_leb128.c lj_utils.h lj_def.h lua.h luaconf.h lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \ lj_vm.h lj_vmevent.h @@ -229,9 +230,9 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \ lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \ lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \ - lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \ - lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \ - lib_ffi.c lib_misc.c lib_init.c + lj_utils_leb128.c lj_utils.h lib_aux.c lib_base.c lj_libdef.h lib_math.c \ + lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c \ + lib_bit.c lib_jit.c lib_ffi.c lib_misc.c lib_init.c luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ diff --git a/src/lj_utils.h b/src/lj_utils.h new file mode 100644 index 0000000..1671e8e --- /dev/null +++ b/src/lj_utils.h @@ -0,0 +1,58 @@ +/* +** Interfaces for working with LEB128/ULEB128 encoding. +** +** Major portions taken verbatim or adapted from the LuaVela. +** Copyright (C) 2015-2019 IPONWEB Ltd. +*/ + +#ifndef _LJ_UTILS_H +#define _LJ_UTILS_H + +#include "lj_def.h" + +/* Maximum number of bytes needed for LEB128 encoding of any 64-bit value. */ +#define LEB128_U64_MAXSIZE 10 + +/* +** Reads a value from a buffer of bytes to a int64_t output. +** No bounds checks for the buffer. Returns number of bytes read. +*/ +size_t LJ_FASTCALL lj_utils_read_leb128(int64_t *out, const uint8_t *buffer); + +/* +** Reads a value from a buffer of bytes to a int64_t output. Consumes no more +** than n bytes. No bounds checks for the buffer. Returns number of bytes +** read. If more than n bytes is about to be consumed, returns 0 without +** touching out. +*/ +size_t LJ_FASTCALL lj_utils_read_leb128_n(int64_t *out, const uint8_t *buffer, + size_t n); + +/* +** Reads a value from a buffer of bytes to a uint64_t output. +** No bounds checks for the buffer. Returns number of bytes read. +*/ +size_t LJ_FASTCALL lj_utils_read_uleb128(uint64_t *out, const uint8_t *buffer); + +/* +** Reads a value from a buffer of bytes to a uint64_t output. Consumes no more +** than n bytes. No bounds checks for the buffer. Returns number of bytes +** read. If more than n bytes is about to be consumed, returns 0 without +** touching out. +*/ +size_t LJ_FASTCALL lj_utils_read_uleb128_n(uint64_t *out, const uint8_t *buffer, + size_t n); + +/* +** Writes a value from an signed 64-bit input to a buffer of bytes. +** No bounds checks for the buffer. Returns number of bytes written. +*/ +size_t LJ_FASTCALL lj_utils_write_leb128(uint8_t *buffer, int64_t value); + +/* +** Writes a value from an unsigned 64-bit input to a buffer of bytes. +** No bounds checks for the buffer. Returns number of bytes written. +*/ +size_t LJ_FASTCALL lj_utils_write_uleb128(uint8_t *buffer, uint64_t value); + +#endif diff --git a/src/lj_utils_leb128.c b/src/lj_utils_leb128.c new file mode 100644 index 0000000..ce8081b --- /dev/null +++ b/src/lj_utils_leb128.c @@ -0,0 +1,132 @@ +/* +** Working with LEB128/ULEB128 encoding. +** +** Major portions taken verbatim or adapted from the LuaVela. +** Copyright (C) 2015-2019 IPONWEB Ltd. +*/ + +#define lj_utils_leb128_c +#define LUA_CORE + +#include "lj_utils.h" + +#define LINK_BIT (0x80) +#define MIN_TWOBYTE_VALUE (0x80) +#define PAYLOAD_MASK (0x7f) +#define SHIFT_STEP (7) +#define LEB_SIGN_BIT (0x40) + +/* ------------------------- Reading LEB128/ULEB128 ------------------------- */ + +/* +** XXX: For each LEB128 type (signed/unsigned) we have two versions of read +** functions: The one consuming unlimited number of input octets and the one +** consuming not more than given number of input octets. Currently reading +** is not used in performance critical places, so these two functions are +** implemented via single low-level function + run-time mode check. Feel free +** to change if this becomes a bottleneck. +*/ + +static LJ_AINLINE size_t _read_leb128(int64_t *out, const uint8_t *buffer, + size_t n) +{ + size_t i = 0; + uint64_t shift = 0; + int64_t value = 0; + uint8_t octet; + + for(;;) { + if (n != 0 && i + 1 > n) + return 0; + octet = buffer[i++]; + value |= ((int64_t)(octet & PAYLOAD_MASK)) << shift; + shift += SHIFT_STEP; + if (!(octet & LINK_BIT)) + break; + } + + if (octet & LEB_SIGN_BIT && shift < sizeof(int64_t) * 8) + value |= -(1 << shift); + + *out = value; + return i; +} + +size_t LJ_FASTCALL lj_utils_read_leb128(int64_t *out, const uint8_t *buffer) +{ + return _read_leb128(out, buffer, 0); +} + +size_t LJ_FASTCALL lj_utils_read_leb128_n(int64_t *out, const uint8_t *buffer, + size_t n) +{ + return _read_leb128(out, buffer, n); +} + + +static LJ_AINLINE size_t _read_uleb128(uint64_t *out, const uint8_t *buffer, + size_t n) +{ + size_t i = 0; + uint64_t value = 0; + uint64_t shift = 0; + uint8_t octet; + + for(;;) { + if (n != 0 && i + 1 > n) + return 0; + octet = buffer[i++]; + value |= ((uint64_t)(octet & PAYLOAD_MASK)) << shift; + shift += SHIFT_STEP; + if (!(octet & LINK_BIT)) + break; + } + + *out = value; + return i; +} + +size_t LJ_FASTCALL lj_utils_read_uleb128(uint64_t *out, const uint8_t *buffer) +{ + return _read_uleb128(out, buffer, 0); +} + +size_t LJ_FASTCALL lj_utils_read_uleb128_n(uint64_t *out, const uint8_t *buffer, + size_t n) +{ + return _read_uleb128(out, buffer, n); +} + +/* ------------------------- Writing LEB128/ULEB128 ------------------------- */ + +size_t LJ_FASTCALL lj_utils_write_leb128(uint8_t *buffer, int64_t value) +{ + size_t i = 0; + + /* LEB_SIGN_BIT propagation to check the remaining value. */ + while ((uint64_t)(value + LEB_SIGN_BIT) >= MIN_TWOBYTE_VALUE) { + buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT); + value >>= SHIFT_STEP; + } + + /* Omit LINK_BIT in case of overflow. */ + buffer[i++] = (uint8_t)(value & PAYLOAD_MASK); + + lua_assert(i <= LEB128_U64_MAXSIZE); + + return i; +} + +size_t LJ_FASTCALL lj_utils_write_uleb128(uint8_t *buffer, uint64_t value) +{ + size_t i = 0; + + for (; value >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP) + buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT); + + buffer[i++] = (uint8_t)value; + + lua_assert(i <= LEB128_U64_MAXSIZE); + + return i; +} diff --git a/src/ljamalg.c b/src/ljamalg.c index 371bbb6..268b321 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -81,6 +81,7 @@ #include "lj_trace.c" #include "lj_gdbjit.c" #include "lj_alloc.c" +#include "lj_utils_leb128.c" #include "lib_aux.c" #include "lib_base.c" -- 2.28.0