From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp54.i.mail.ru (smtp54.i.mail.ru [217.69.128.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 15B1845C307 for ; Wed, 16 Dec 2020 22:14:33 +0300 (MSK) From: Sergey Kaplun Date: Wed, 16 Dec 2020 22:13:37 +0300 Message-Id: <7f5108768b70c9ffd2561f89c4974379085921e4.1608142899.git.skaplun@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH luajit v1 02/11] utils: introduce leb128 reader and writer List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Igor Munkin , Sergey Ostanevich Cc: tarantool-patches@dev.tarantool.org This patch introduces module for reading and writing leb128 compression. It will be used for streaming profiling events writing, that will be added at the next patches. Part of tarantool/tarantool#5442 --- src/Makefile | 5 +- src/Makefile.dep | 1 + src/utils/leb128.c | 124 +++++++++++++++++++++++++++++++++++++++++++++ src/utils/leb128.h | 55 ++++++++++++++++++++ 4 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 src/utils/leb128.c create mode 100644 src/utils/leb128.h diff --git a/src/Makefile b/src/Makefile index caa49f9..be7ed95 100644 --- a/src/Makefile +++ b/src/Makefile @@ -468,6 +468,7 @@ endif DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) DASM_DASC= vm_$(DASM_ARCH).dasc +UTILS_O= utils/leb128.o BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ host/buildvm_lib.o host/buildvm_fold.o BUILDVM_T= host/buildvm @@ -498,7 +499,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ lj_carith.o lj_clib.o lj_cparse.o \ lj_lib.o lj_alloc.o lib_aux.o \ - $(LJLIB_O) lib_init.o + $(LJLIB_O) lib_init.o $(UTILS_O) LJVMCORE_O= $(LJVM_O) $(LJCORE_O) LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) @@ -516,7 +517,7 @@ ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \ host/buildvm_arch.h ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP) WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk -ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM) +ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o utils/*.o $(WIN_RM) ############################################################################## # Build mode handling. diff --git a/src/Makefile.dep b/src/Makefile.dep index 556314e..cc75d03 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -248,3 +248,4 @@ host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h host/minilua.o: host/minilua.c +utils/leb128.o: utils/leb128.c diff --git a/src/utils/leb128.c b/src/utils/leb128.c new file mode 100644 index 0000000..921e5bc --- /dev/null +++ b/src/utils/leb128.c @@ -0,0 +1,124 @@ +/* +** Working with LEB128/ULEB128 encoding. +** +** Major portions taken verbatim or adapted from the LuaVela. +** Copyright (C) 2015-2019 IPONWEB Ltd. +*/ + +#include +#include + +#define LINK_BIT (0x80) +#define MIN_TWOBYTE_VALUE (0x80) +#define PAYLOAD_MASK (0x7f) +#define SHIFT_STEP (7) +#define LEB_SIGN_BIT (0x40) + +/* ------------------------- Writing ULEB128/LEB128 ------------------------- */ + +size_t write_uleb128(uint8_t *buffer, uint64_t value) +{ + size_t i = 0; + + for (; value >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP) { + buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT); + } + buffer[i++] = (uint8_t)value; + + return i; +} + +size_t write_leb128(uint8_t *buffer, int64_t value) +{ + size_t i = 0; + + for (; (uint64_t)(value + 0x40) >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP) { + buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT); + } + buffer[i++] = (uint8_t)(value & PAYLOAD_MASK); + + return i; +} + +/* ------------------------- Reading ULEB128/LEB128 ------------------------- */ + +/* +** NB! For each LEB128 type (signed/unsigned) we have two versions of read +** functions: The one consuming unlimited number of input octets and the one +** consuming not more than given number of input octets. Currently reading +** is not used in performance critical places, so these two functions are +** implemented via single low-level function + run-time mode check. Feel free +** to change if this becomes a bottleneck. +*/ + +static size_t _read_uleb128(uint64_t *out, const uint8_t *buffer, int guarded, + size_t n) +{ + size_t i = 0; + uint64_t value = 0; + uint64_t shift = 0; + uint8_t octet; + + for(;;) { + if (guarded && i + 1 > n) { + return 0; + } + octet = buffer[i++]; + value |= ((uint64_t)(octet & PAYLOAD_MASK)) << shift; + shift += SHIFT_STEP; + if (!(octet & LINK_BIT)) { + break; + } + } + + *out = value; + return i; +} + +size_t read_uleb128(uint64_t *out, const uint8_t *buffer) +{ + return _read_uleb128(out, buffer, 0, 0); +} + +size_t read_uleb128_n(uint64_t *out, const uint8_t *buffer, size_t n) +{ + return _read_uleb128(out, buffer, 1, n); +} + +static size_t _read_leb128(int64_t *out, const uint8_t *buffer, int guarded, + size_t n) +{ + size_t i = 0; + int64_t value = 0; + uint64_t shift = 0; + uint8_t octet; + + for(;;) { + if (guarded && i + 1 > n) { + return 0; + } + octet = buffer[i++]; + value |= ((int64_t)(octet & PAYLOAD_MASK)) << shift; + shift += SHIFT_STEP; + if (!(octet & LINK_BIT)) { + break; + } + } + + if (octet & LEB_SIGN_BIT && shift < sizeof(int64_t) * 8) { + value |= -(1 << shift); + } + + *out = value; + return i; +} + +size_t read_leb128(int64_t *out, const uint8_t *buffer) +{ + return _read_leb128(out, buffer, 0, 0); +} + +size_t read_leb128_n(int64_t *out, const uint8_t *buffer, size_t n) +{ + return _read_leb128(out, buffer, 1, n); +} diff --git a/src/utils/leb128.h b/src/utils/leb128.h new file mode 100644 index 0000000..46d90bc --- /dev/null +++ b/src/utils/leb128.h @@ -0,0 +1,55 @@ +/* +** Interfaces for working with LEB128/ULEB128 encoding. +** +** Major portions taken verbatim or adapted from the LuaVela. +** Copyright (C) 2015-2019 IPONWEB Ltd. +*/ + +#ifndef _LJ_UTILS_LEB128_H +#define _LJ_UTILS_LEB128_H + +#include +#include + +/* Maximum number of bytes needed for LEB128 encoding of any 64-bit value. */ +#define LEB128_U64_MAXSIZE 10 + +/* +** Writes a value from an unsigned 64-bit input to a buffer of bytes. +** Buffer overflow is not checked. Returns number of bytes written. +*/ +size_t write_uleb128(uint8_t *buffer, uint64_t value); + +/* +** Writes a value from an signed 64-bit input to a buffer of bytes. +** Buffer overflow is not checked. Returns number of bytes written. +*/ +size_t write_leb128(uint8_t *buffer, int64_t value); + +/* +** Reads a value from a buffer of bytes to a uint64_t output. +** Buffer overflow is not checked. Returns number of bytes read. +*/ +size_t read_uleb128(uint64_t *out, const uint8_t *buffer); + +/* +** Reads a value from a buffer of bytes to a int64_t output. +** Buffer overflow is not checked. Returns number of bytes read. +*/ +size_t read_leb128(int64_t *out, const uint8_t *buffer); + +/* +** Reads a value from a buffer of bytes to a uint64_t output. Consumes no more +** than n bytes. Buffer overflow is not checked. Returns number of bytes read. +** If more than n bytes is about to be consumed, returns 0 without touching out. +*/ +size_t read_uleb128_n(uint64_t *out, const uint8_t *buffer, size_t n); + +/* +** Reads a value from a buffer of bytes to a int64_t output. Consumes no more +** than n bytes. Buffer overflow is not checked. Returns number of bytes read. +** If more than n bytes is about to be consumed, returns 0 without touching out. +*/ +size_t read_leb128_n(int64_t *out, const uint8_t *buffer, size_t n); + +#endif -- 2.28.0