From: Sergey Kaplun <skaplun@tarantool.org>
To: Igor Munkin <imun@tarantool.org>,
Sergey Ostanevich <sergos@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH luajit v1 02/11] utils: introduce leb128 reader and writer
Date: Wed, 16 Dec 2020 22:13:37 +0300 [thread overview]
Message-ID: <7f5108768b70c9ffd2561f89c4974379085921e4.1608142899.git.skaplun@tarantool.org> (raw)
In-Reply-To: <cover.1608142899.git.skaplun@tarantool.org>
This patch introduces module for reading and writing leb128 compression.
It will be used for streaming profiling events writing, that will be
added at the next patches.
Part of tarantool/tarantool#5442
---
src/Makefile | 5 +-
src/Makefile.dep | 1 +
src/utils/leb128.c | 124 +++++++++++++++++++++++++++++++++++++++++++++
src/utils/leb128.h | 55 ++++++++++++++++++++
4 files changed, 183 insertions(+), 2 deletions(-)
create mode 100644 src/utils/leb128.c
create mode 100644 src/utils/leb128.h
diff --git a/src/Makefile b/src/Makefile
index caa49f9..be7ed95 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -468,6 +468,7 @@ endif
DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
DASM_DASC= vm_$(DASM_ARCH).dasc
+UTILS_O= utils/leb128.o
BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \
host/buildvm_lib.o host/buildvm_fold.o
BUILDVM_T= host/buildvm
@@ -498,7 +499,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
lj_carith.o lj_clib.o lj_cparse.o \
lj_lib.o lj_alloc.o lib_aux.o \
- $(LJLIB_O) lib_init.o
+ $(LJLIB_O) lib_init.o $(UTILS_O)
LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o)
@@ -516,7 +517,7 @@ ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \
host/buildvm_arch.h
ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP)
WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk
-ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM)
+ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o utils/*.o $(WIN_RM)
##############################################################################
# Build mode handling.
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 556314e..cc75d03 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -248,3 +248,4 @@ host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
host/minilua.o: host/minilua.c
+utils/leb128.o: utils/leb128.c
diff --git a/src/utils/leb128.c b/src/utils/leb128.c
new file mode 100644
index 0000000..921e5bc
--- /dev/null
+++ b/src/utils/leb128.c
@@ -0,0 +1,124 @@
+/*
+** Working with LEB128/ULEB128 encoding.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define LINK_BIT (0x80)
+#define MIN_TWOBYTE_VALUE (0x80)
+#define PAYLOAD_MASK (0x7f)
+#define SHIFT_STEP (7)
+#define LEB_SIGN_BIT (0x40)
+
+/* ------------------------- Writing ULEB128/LEB128 ------------------------- */
+
+size_t write_uleb128(uint8_t *buffer, uint64_t value)
+{
+ size_t i = 0;
+
+ for (; value >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP) {
+ buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT);
+ }
+ buffer[i++] = (uint8_t)value;
+
+ return i;
+}
+
+size_t write_leb128(uint8_t *buffer, int64_t value)
+{
+ size_t i = 0;
+
+ for (; (uint64_t)(value + 0x40) >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP) {
+ buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT);
+ }
+ buffer[i++] = (uint8_t)(value & PAYLOAD_MASK);
+
+ return i;
+}
+
+/* ------------------------- Reading ULEB128/LEB128 ------------------------- */
+
+/*
+** NB! For each LEB128 type (signed/unsigned) we have two versions of read
+** functions: The one consuming unlimited number of input octets and the one
+** consuming not more than given number of input octets. Currently reading
+** is not used in performance critical places, so these two functions are
+** implemented via single low-level function + run-time mode check. Feel free
+** to change if this becomes a bottleneck.
+*/
+
+static size_t _read_uleb128(uint64_t *out, const uint8_t *buffer, int guarded,
+ size_t n)
+{
+ size_t i = 0;
+ uint64_t value = 0;
+ uint64_t shift = 0;
+ uint8_t octet;
+
+ for(;;) {
+ if (guarded && i + 1 > n) {
+ return 0;
+ }
+ octet = buffer[i++];
+ value |= ((uint64_t)(octet & PAYLOAD_MASK)) << shift;
+ shift += SHIFT_STEP;
+ if (!(octet & LINK_BIT)) {
+ break;
+ }
+ }
+
+ *out = value;
+ return i;
+}
+
+size_t read_uleb128(uint64_t *out, const uint8_t *buffer)
+{
+ return _read_uleb128(out, buffer, 0, 0);
+}
+
+size_t read_uleb128_n(uint64_t *out, const uint8_t *buffer, size_t n)
+{
+ return _read_uleb128(out, buffer, 1, n);
+}
+
+static size_t _read_leb128(int64_t *out, const uint8_t *buffer, int guarded,
+ size_t n)
+{
+ size_t i = 0;
+ int64_t value = 0;
+ uint64_t shift = 0;
+ uint8_t octet;
+
+ for(;;) {
+ if (guarded && i + 1 > n) {
+ return 0;
+ }
+ octet = buffer[i++];
+ value |= ((int64_t)(octet & PAYLOAD_MASK)) << shift;
+ shift += SHIFT_STEP;
+ if (!(octet & LINK_BIT)) {
+ break;
+ }
+ }
+
+ if (octet & LEB_SIGN_BIT && shift < sizeof(int64_t) * 8) {
+ value |= -(1 << shift);
+ }
+
+ *out = value;
+ return i;
+}
+
+size_t read_leb128(int64_t *out, const uint8_t *buffer)
+{
+ return _read_leb128(out, buffer, 0, 0);
+}
+
+size_t read_leb128_n(int64_t *out, const uint8_t *buffer, size_t n)
+{
+ return _read_leb128(out, buffer, 1, n);
+}
diff --git a/src/utils/leb128.h b/src/utils/leb128.h
new file mode 100644
index 0000000..46d90bc
--- /dev/null
+++ b/src/utils/leb128.h
@@ -0,0 +1,55 @@
+/*
+** Interfaces for working with LEB128/ULEB128 encoding.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#ifndef _LJ_UTILS_LEB128_H
+#define _LJ_UTILS_LEB128_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+/* Maximum number of bytes needed for LEB128 encoding of any 64-bit value. */
+#define LEB128_U64_MAXSIZE 10
+
+/*
+** Writes a value from an unsigned 64-bit input to a buffer of bytes.
+** Buffer overflow is not checked. Returns number of bytes written.
+*/
+size_t write_uleb128(uint8_t *buffer, uint64_t value);
+
+/*
+** Writes a value from an signed 64-bit input to a buffer of bytes.
+** Buffer overflow is not checked. Returns number of bytes written.
+*/
+size_t write_leb128(uint8_t *buffer, int64_t value);
+
+/*
+** Reads a value from a buffer of bytes to a uint64_t output.
+** Buffer overflow is not checked. Returns number of bytes read.
+*/
+size_t read_uleb128(uint64_t *out, const uint8_t *buffer);
+
+/*
+** Reads a value from a buffer of bytes to a int64_t output.
+** Buffer overflow is not checked. Returns number of bytes read.
+*/
+size_t read_leb128(int64_t *out, const uint8_t *buffer);
+
+/*
+** Reads a value from a buffer of bytes to a uint64_t output. Consumes no more
+** than n bytes. Buffer overflow is not checked. Returns number of bytes read.
+** If more than n bytes is about to be consumed, returns 0 without touching out.
+*/
+size_t read_uleb128_n(uint64_t *out, const uint8_t *buffer, size_t n);
+
+/*
+** Reads a value from a buffer of bytes to a int64_t output. Consumes no more
+** than n bytes. Buffer overflow is not checked. Returns number of bytes read.
+** If more than n bytes is about to be consumed, returns 0 without touching out.
+*/
+size_t read_leb128_n(int64_t *out, const uint8_t *buffer, size_t n);
+
+#endif
--
2.28.0
next prev parent reply other threads:[~2020-12-16 19:14 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-16 19:13 [Tarantool-patches] [PATCH luajit v1 00/11] LuaJIT memory profiler Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 01/11] build: add src dir in building Sergey Kaplun
2020-12-20 21:27 ` Igor Munkin
2020-12-23 18:20 ` Sergey Kaplun
2020-12-16 19:13 ` Sergey Kaplun [this message]
2020-12-20 22:44 ` [Tarantool-patches] [PATCH luajit v1 02/11] utils: introduce leb128 reader and writer Igor Munkin
2020-12-23 22:34 ` Sergey Kaplun
2020-12-24 9:11 ` Igor Munkin
2020-12-25 8:46 ` Sergey Kaplun
2020-12-23 16:50 ` Sergey Ostanevich
2020-12-23 22:36 ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 03/11] profile: introduce profiler writing module Sergey Kaplun
2020-12-21 9:24 ` Igor Munkin
2020-12-24 6:46 ` Sergey Kaplun
2020-12-24 15:45 ` Sergey Ostanevich
2020-12-24 21:20 ` Sergey Kaplun
2020-12-25 9:37 ` Igor Munkin
2020-12-25 10:13 ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 04/11] profile: introduce symtab write module Sergey Kaplun
2020-12-21 10:30 ` Igor Munkin
2020-12-24 7:00 ` Sergey Kaplun
2020-12-24 9:36 ` Igor Munkin
2020-12-25 8:45 ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 05/11] vm: introduce LFUNC and FFUNC vmstates Sergey Kaplun
2020-12-25 11:07 ` Sergey Ostanevich
2020-12-25 11:23 ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 06/11] core: introduce new mem_L field Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 07/11] debug: move debug_frameline to public module API Sergey Kaplun
2020-12-20 22:46 ` Igor Munkin
2020-12-24 6:50 ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 08/11] profile: introduce memory profiler Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 09/11] misc: add Lua API for " Sergey Kaplun
2020-12-24 16:32 ` Sergey Ostanevich
2020-12-24 21:25 ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 10/11] tools: introduce tools directory Sergey Kaplun
2020-12-20 22:46 ` Igor Munkin
2020-12-24 6:47 ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 11/11] profile: introduce profile parser Sergey Kaplun
2020-12-24 23:09 ` Igor Munkin
2020-12-25 8:41 ` Sergey Kaplun
2020-12-21 10:43 ` [Tarantool-patches] [PATCH luajit v1 00/11] LuaJIT memory profiler Igor Munkin
2020-12-24 7:02 ` Sergey Kaplun
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7f5108768b70c9ffd2561f89c4974379085921e4.1608142899.git.skaplun@tarantool.org \
--to=skaplun@tarantool.org \
--cc=imun@tarantool.org \
--cc=sergos@tarantool.org \
--cc=tarantool-patches@dev.tarantool.org \
--subject='Re: [Tarantool-patches] [PATCH luajit v1 02/11] utils: introduce leb128 reader and writer' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox