From: Sergey Kaplun <skaplun@tarantool.org> To: Igor Munkin <imun@tarantool.org>, Sergey Ostanevich <sergos@tarantool.org> Cc: tarantool-patches@dev.tarantool.org Subject: [Tarantool-patches] [PATCH luajit v1 02/11] utils: introduce leb128 reader and writer Date: Wed, 16 Dec 2020 22:13:37 +0300 [thread overview] Message-ID: <7f5108768b70c9ffd2561f89c4974379085921e4.1608142899.git.skaplun@tarantool.org> (raw) In-Reply-To: <cover.1608142899.git.skaplun@tarantool.org> This patch introduces module for reading and writing leb128 compression. It will be used for streaming profiling events writing, that will be added at the next patches. Part of tarantool/tarantool#5442 --- src/Makefile | 5 +- src/Makefile.dep | 1 + src/utils/leb128.c | 124 +++++++++++++++++++++++++++++++++++++++++++++ src/utils/leb128.h | 55 ++++++++++++++++++++ 4 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 src/utils/leb128.c create mode 100644 src/utils/leb128.h diff --git a/src/Makefile b/src/Makefile index caa49f9..be7ed95 100644 --- a/src/Makefile +++ b/src/Makefile @@ -468,6 +468,7 @@ endif DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) DASM_DASC= vm_$(DASM_ARCH).dasc +UTILS_O= utils/leb128.o BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ host/buildvm_lib.o host/buildvm_fold.o BUILDVM_T= host/buildvm @@ -498,7 +499,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ lj_carith.o lj_clib.o lj_cparse.o \ lj_lib.o lj_alloc.o lib_aux.o \ - $(LJLIB_O) lib_init.o + $(LJLIB_O) lib_init.o $(UTILS_O) LJVMCORE_O= $(LJVM_O) $(LJCORE_O) LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) @@ -516,7 +517,7 @@ ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \ host/buildvm_arch.h ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP) WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk -ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM) +ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o utils/*.o $(WIN_RM) ############################################################################## # Build mode handling. diff --git a/src/Makefile.dep b/src/Makefile.dep index 556314e..cc75d03 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -248,3 +248,4 @@ host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h host/minilua.o: host/minilua.c +utils/leb128.o: utils/leb128.c diff --git a/src/utils/leb128.c b/src/utils/leb128.c new file mode 100644 index 0000000..921e5bc --- /dev/null +++ b/src/utils/leb128.c @@ -0,0 +1,124 @@ +/* +** Working with LEB128/ULEB128 encoding. +** +** Major portions taken verbatim or adapted from the LuaVela. +** Copyright (C) 2015-2019 IPONWEB Ltd. +*/ + +#include <stdint.h> +#include <stddef.h> + +#define LINK_BIT (0x80) +#define MIN_TWOBYTE_VALUE (0x80) +#define PAYLOAD_MASK (0x7f) +#define SHIFT_STEP (7) +#define LEB_SIGN_BIT (0x40) + +/* ------------------------- Writing ULEB128/LEB128 ------------------------- */ + +size_t write_uleb128(uint8_t *buffer, uint64_t value) +{ + size_t i = 0; + + for (; value >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP) { + buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT); + } + buffer[i++] = (uint8_t)value; + + return i; +} + +size_t write_leb128(uint8_t *buffer, int64_t value) +{ + size_t i = 0; + + for (; (uint64_t)(value + 0x40) >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP) { + buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT); + } + buffer[i++] = (uint8_t)(value & PAYLOAD_MASK); + + return i; +} + +/* ------------------------- Reading ULEB128/LEB128 ------------------------- */ + +/* +** NB! For each LEB128 type (signed/unsigned) we have two versions of read +** functions: The one consuming unlimited number of input octets and the one +** consuming not more than given number of input octets. Currently reading +** is not used in performance critical places, so these two functions are +** implemented via single low-level function + run-time mode check. Feel free +** to change if this becomes a bottleneck. +*/ + +static size_t _read_uleb128(uint64_t *out, const uint8_t *buffer, int guarded, + size_t n) +{ + size_t i = 0; + uint64_t value = 0; + uint64_t shift = 0; + uint8_t octet; + + for(;;) { + if (guarded && i + 1 > n) { + return 0; + } + octet = buffer[i++]; + value |= ((uint64_t)(octet & PAYLOAD_MASK)) << shift; + shift += SHIFT_STEP; + if (!(octet & LINK_BIT)) { + break; + } + } + + *out = value; + return i; +} + +size_t read_uleb128(uint64_t *out, const uint8_t *buffer) +{ + return _read_uleb128(out, buffer, 0, 0); +} + +size_t read_uleb128_n(uint64_t *out, const uint8_t *buffer, size_t n) +{ + return _read_uleb128(out, buffer, 1, n); +} + +static size_t _read_leb128(int64_t *out, const uint8_t *buffer, int guarded, + size_t n) +{ + size_t i = 0; + int64_t value = 0; + uint64_t shift = 0; + uint8_t octet; + + for(;;) { + if (guarded && i + 1 > n) { + return 0; + } + octet = buffer[i++]; + value |= ((int64_t)(octet & PAYLOAD_MASK)) << shift; + shift += SHIFT_STEP; + if (!(octet & LINK_BIT)) { + break; + } + } + + if (octet & LEB_SIGN_BIT && shift < sizeof(int64_t) * 8) { + value |= -(1 << shift); + } + + *out = value; + return i; +} + +size_t read_leb128(int64_t *out, const uint8_t *buffer) +{ + return _read_leb128(out, buffer, 0, 0); +} + +size_t read_leb128_n(int64_t *out, const uint8_t *buffer, size_t n) +{ + return _read_leb128(out, buffer, 1, n); +} diff --git a/src/utils/leb128.h b/src/utils/leb128.h new file mode 100644 index 0000000..46d90bc --- /dev/null +++ b/src/utils/leb128.h @@ -0,0 +1,55 @@ +/* +** Interfaces for working with LEB128/ULEB128 encoding. +** +** Major portions taken verbatim or adapted from the LuaVela. +** Copyright (C) 2015-2019 IPONWEB Ltd. +*/ + +#ifndef _LJ_UTILS_LEB128_H +#define _LJ_UTILS_LEB128_H + +#include <stddef.h> +#include <stdint.h> + +/* Maximum number of bytes needed for LEB128 encoding of any 64-bit value. */ +#define LEB128_U64_MAXSIZE 10 + +/* +** Writes a value from an unsigned 64-bit input to a buffer of bytes. +** Buffer overflow is not checked. Returns number of bytes written. +*/ +size_t write_uleb128(uint8_t *buffer, uint64_t value); + +/* +** Writes a value from an signed 64-bit input to a buffer of bytes. +** Buffer overflow is not checked. Returns number of bytes written. +*/ +size_t write_leb128(uint8_t *buffer, int64_t value); + +/* +** Reads a value from a buffer of bytes to a uint64_t output. +** Buffer overflow is not checked. Returns number of bytes read. +*/ +size_t read_uleb128(uint64_t *out, const uint8_t *buffer); + +/* +** Reads a value from a buffer of bytes to a int64_t output. +** Buffer overflow is not checked. Returns number of bytes read. +*/ +size_t read_leb128(int64_t *out, const uint8_t *buffer); + +/* +** Reads a value from a buffer of bytes to a uint64_t output. Consumes no more +** than n bytes. Buffer overflow is not checked. Returns number of bytes read. +** If more than n bytes is about to be consumed, returns 0 without touching out. +*/ +size_t read_uleb128_n(uint64_t *out, const uint8_t *buffer, size_t n); + +/* +** Reads a value from a buffer of bytes to a int64_t output. Consumes no more +** than n bytes. Buffer overflow is not checked. Returns number of bytes read. +** If more than n bytes is about to be consumed, returns 0 without touching out. +*/ +size_t read_leb128_n(int64_t *out, const uint8_t *buffer, size_t n); + +#endif -- 2.28.0
next prev parent reply other threads:[~2020-12-16 19:14 UTC|newest] Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-12-16 19:13 [Tarantool-patches] [PATCH luajit v1 00/11] LuaJIT memory profiler Sergey Kaplun 2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 01/11] build: add src dir in building Sergey Kaplun 2020-12-20 21:27 ` Igor Munkin 2020-12-23 18:20 ` Sergey Kaplun 2020-12-16 19:13 ` Sergey Kaplun [this message] 2020-12-20 22:44 ` [Tarantool-patches] [PATCH luajit v1 02/11] utils: introduce leb128 reader and writer Igor Munkin 2020-12-23 22:34 ` Sergey Kaplun 2020-12-24 9:11 ` Igor Munkin 2020-12-25 8:46 ` Sergey Kaplun 2020-12-23 16:50 ` Sergey Ostanevich 2020-12-23 22:36 ` Sergey Kaplun 2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 03/11] profile: introduce profiler writing module Sergey Kaplun 2020-12-21 9:24 ` Igor Munkin 2020-12-24 6:46 ` Sergey Kaplun 2020-12-24 15:45 ` Sergey Ostanevich 2020-12-24 21:20 ` Sergey Kaplun 2020-12-25 9:37 ` Igor Munkin 2020-12-25 10:13 ` Sergey Kaplun 2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 04/11] profile: introduce symtab write module Sergey Kaplun 2020-12-21 10:30 ` Igor Munkin 2020-12-24 7:00 ` Sergey Kaplun 2020-12-24 9:36 ` Igor Munkin 2020-12-25 8:45 ` Sergey Kaplun 2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 05/11] vm: introduce LFUNC and FFUNC vmstates Sergey Kaplun 2020-12-25 11:07 ` Sergey Ostanevich 2020-12-25 11:23 ` Sergey Kaplun 2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 06/11] core: introduce new mem_L field Sergey Kaplun 2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 07/11] debug: move debug_frameline to public module API Sergey Kaplun 2020-12-20 22:46 ` Igor Munkin 2020-12-24 6:50 ` Sergey Kaplun 2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 08/11] profile: introduce memory profiler Sergey Kaplun 2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 09/11] misc: add Lua API for " Sergey Kaplun 2020-12-24 16:32 ` Sergey Ostanevich 2020-12-24 21:25 ` Sergey Kaplun 2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 10/11] tools: introduce tools directory Sergey Kaplun 2020-12-20 22:46 ` Igor Munkin 2020-12-24 6:47 ` Sergey Kaplun 2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 11/11] profile: introduce profile parser Sergey Kaplun 2020-12-24 23:09 ` Igor Munkin 2020-12-25 8:41 ` Sergey Kaplun 2020-12-21 10:43 ` [Tarantool-patches] [PATCH luajit v1 00/11] LuaJIT memory profiler Igor Munkin 2020-12-24 7:02 ` Sergey Kaplun
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=7f5108768b70c9ffd2561f89c4974379085921e4.1608142899.git.skaplun@tarantool.org \ --to=skaplun@tarantool.org \ --cc=imun@tarantool.org \ --cc=sergos@tarantool.org \ --cc=tarantool-patches@dev.tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH luajit v1 02/11] utils: introduce leb128 reader and writer' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox