Tarantool development patches archive
 help / color / mirror / Atom feed
From: Sergey Kaplun <skaplun@tarantool.org>
To: Igor Munkin <imun@tarantool.org>,
	Sergey Ostanevich <sergos@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH luajit v1 02/11] utils: introduce leb128 reader and writer
Date: Wed, 16 Dec 2020 22:13:37 +0300	[thread overview]
Message-ID: <7f5108768b70c9ffd2561f89c4974379085921e4.1608142899.git.skaplun@tarantool.org> (raw)
In-Reply-To: <cover.1608142899.git.skaplun@tarantool.org>

This patch introduces module for reading and writing leb128 compression.
It will be used for streaming profiling events writing, that will be
added at the next patches.

Part of tarantool/tarantool#5442
---
 src/Makefile       |   5 +-
 src/Makefile.dep   |   1 +
 src/utils/leb128.c | 124 +++++++++++++++++++++++++++++++++++++++++++++
 src/utils/leb128.h |  55 ++++++++++++++++++++
 4 files changed, 183 insertions(+), 2 deletions(-)
 create mode 100644 src/utils/leb128.c
 create mode 100644 src/utils/leb128.h

diff --git a/src/Makefile b/src/Makefile
index caa49f9..be7ed95 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -468,6 +468,7 @@ endif
 DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
 DASM_DASC= vm_$(DASM_ARCH).dasc
 
+UTILS_O= utils/leb128.o
 BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \
 	   host/buildvm_lib.o host/buildvm_fold.o
 BUILDVM_T= host/buildvm
@@ -498,7 +499,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
 	  lj_carith.o lj_clib.o lj_cparse.o \
 	  lj_lib.o lj_alloc.o lib_aux.o \
-	  $(LJLIB_O) lib_init.o
+	  $(LJLIB_O) lib_init.o $(UTILS_O)
 
 LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
 LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o)
@@ -516,7 +517,7 @@ ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \
 	    host/buildvm_arch.h
 ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP)
 WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk
-ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM)
+ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o utils/*.o $(WIN_RM)
 
 ##############################################################################
 # Build mode handling.
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 556314e..cc75d03 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -248,3 +248,4 @@ host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
 host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
  luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
 host/minilua.o: host/minilua.c
+utils/leb128.o: utils/leb128.c
diff --git a/src/utils/leb128.c b/src/utils/leb128.c
new file mode 100644
index 0000000..921e5bc
--- /dev/null
+++ b/src/utils/leb128.c
@@ -0,0 +1,124 @@
+/*
+** Working with LEB128/ULEB128 encoding.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define LINK_BIT          (0x80)
+#define MIN_TWOBYTE_VALUE (0x80)
+#define PAYLOAD_MASK      (0x7f)
+#define SHIFT_STEP        (7)
+#define LEB_SIGN_BIT      (0x40)
+
+/* ------------------------- Writing ULEB128/LEB128 ------------------------- */
+
+size_t write_uleb128(uint8_t *buffer, uint64_t value)
+{
+  size_t i = 0;
+
+  for (; value >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP) {
+    buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT);
+  }
+  buffer[i++] = (uint8_t)value;
+
+  return i;
+}
+
+size_t write_leb128(uint8_t *buffer, int64_t value)
+{
+  size_t i = 0;
+
+  for (; (uint64_t)(value + 0x40) >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP) {
+    buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT);
+  }
+  buffer[i++] = (uint8_t)(value & PAYLOAD_MASK);
+
+  return i;
+}
+
+/* ------------------------- Reading ULEB128/LEB128 ------------------------- */
+
+/*
+** NB! For each LEB128 type (signed/unsigned) we have two versions of read
+** functions: The one consuming unlimited number of input octets and the one
+** consuming not more than given number of input octets. Currently reading
+** is not used in performance critical places, so these two functions are
+** implemented via single low-level function + run-time mode check. Feel free
+** to change if this becomes a bottleneck.
+*/
+
+static size_t _read_uleb128(uint64_t *out, const uint8_t *buffer, int guarded,
+			    size_t n)
+{
+  size_t i = 0;
+  uint64_t value = 0;
+  uint64_t shift = 0;
+  uint8_t octet;
+
+  for(;;) {
+    if (guarded && i + 1 > n) {
+      return 0;
+    }
+    octet = buffer[i++];
+    value |= ((uint64_t)(octet & PAYLOAD_MASK)) << shift;
+    shift += SHIFT_STEP;
+    if (!(octet & LINK_BIT)) {
+      break;
+    }
+  }
+
+  *out = value;
+  return i;
+}
+
+size_t read_uleb128(uint64_t *out, const uint8_t *buffer)
+{
+  return _read_uleb128(out, buffer, 0, 0);
+}
+
+size_t read_uleb128_n(uint64_t *out, const uint8_t *buffer, size_t n)
+{
+  return _read_uleb128(out, buffer, 1, n);
+}
+
+static size_t _read_leb128(int64_t *out, const uint8_t *buffer, int guarded,
+			   size_t n)
+{
+  size_t i = 0;
+  int64_t  value = 0;
+  uint64_t shift = 0;
+  uint8_t  octet;
+
+  for(;;) {
+    if (guarded && i + 1 > n) {
+      return 0;
+    }
+    octet  = buffer[i++];
+    value |= ((int64_t)(octet & PAYLOAD_MASK)) << shift;
+    shift += SHIFT_STEP;
+    if (!(octet & LINK_BIT)) {
+      break;
+    }
+  }
+
+  if (octet & LEB_SIGN_BIT && shift < sizeof(int64_t) * 8) {
+    value |= -(1 << shift);
+  }
+
+  *out = value;
+  return i;
+}
+
+size_t read_leb128(int64_t *out, const uint8_t *buffer)
+{
+  return _read_leb128(out, buffer, 0, 0);
+}
+
+size_t read_leb128_n(int64_t *out, const uint8_t *buffer, size_t n)
+{
+  return _read_leb128(out, buffer, 1, n);
+}
diff --git a/src/utils/leb128.h b/src/utils/leb128.h
new file mode 100644
index 0000000..46d90bc
--- /dev/null
+++ b/src/utils/leb128.h
@@ -0,0 +1,55 @@
+/*
+** Interfaces for working with LEB128/ULEB128 encoding.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#ifndef _LJ_UTILS_LEB128_H
+#define _LJ_UTILS_LEB128_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+/* Maximum number of bytes needed for LEB128 encoding of any 64-bit value. */
+#define LEB128_U64_MAXSIZE 10
+
+/*
+** Writes a value from an unsigned 64-bit input to a buffer of bytes.
+** Buffer overflow is not checked. Returns number of bytes written.
+*/
+size_t write_uleb128(uint8_t *buffer, uint64_t value);
+
+/*
+** Writes a value from an signed 64-bit input to a buffer of bytes.
+** Buffer overflow is not checked. Returns number of bytes written.
+*/
+size_t write_leb128(uint8_t *buffer, int64_t value);
+
+/*
+** Reads a value from a buffer of bytes to a uint64_t output.
+** Buffer overflow is not checked. Returns number of bytes read.
+*/
+size_t read_uleb128(uint64_t *out, const uint8_t *buffer);
+
+/*
+** Reads a value from a buffer of bytes to a int64_t output.
+** Buffer overflow is not checked. Returns number of bytes read.
+*/
+size_t read_leb128(int64_t *out, const uint8_t *buffer);
+
+/*
+** Reads a value from a buffer of bytes to a uint64_t output. Consumes no more
+** than n bytes. Buffer overflow is not checked. Returns number of bytes read.
+** If more than n bytes is about to be consumed, returns 0 without touching out.
+*/
+size_t read_uleb128_n(uint64_t *out, const uint8_t *buffer, size_t n);
+
+/*
+** Reads a value from a buffer of bytes to a int64_t output. Consumes no more
+** than n bytes. Buffer overflow is not checked. Returns number of bytes read.
+** If more than n bytes is about to be consumed, returns 0 without touching out.
+*/
+size_t read_leb128_n(int64_t *out, const uint8_t *buffer, size_t n);
+
+#endif
-- 
2.28.0

  parent reply	other threads:[~2020-12-16 19:14 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-16 19:13 [Tarantool-patches] [PATCH luajit v1 00/11] LuaJIT memory profiler Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 01/11] build: add src dir in building Sergey Kaplun
2020-12-20 21:27   ` Igor Munkin
2020-12-23 18:20     ` Sergey Kaplun
2020-12-16 19:13 ` Sergey Kaplun [this message]
2020-12-20 22:44   ` [Tarantool-patches] [PATCH luajit v1 02/11] utils: introduce leb128 reader and writer Igor Munkin
2020-12-23 22:34     ` Sergey Kaplun
2020-12-24  9:11       ` Igor Munkin
2020-12-25  8:46         ` Sergey Kaplun
2020-12-23 16:50   ` Sergey Ostanevich
2020-12-23 22:36     ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 03/11] profile: introduce profiler writing module Sergey Kaplun
2020-12-21  9:24   ` Igor Munkin
2020-12-24  6:46     ` Sergey Kaplun
2020-12-24 15:45       ` Sergey Ostanevich
2020-12-24 21:20         ` Sergey Kaplun
2020-12-25  9:37           ` Igor Munkin
2020-12-25 10:13             ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 04/11] profile: introduce symtab write module Sergey Kaplun
2020-12-21 10:30   ` Igor Munkin
2020-12-24  7:00     ` Sergey Kaplun
2020-12-24  9:36       ` Igor Munkin
2020-12-25  8:45         ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 05/11] vm: introduce LFUNC and FFUNC vmstates Sergey Kaplun
2020-12-25 11:07   ` Sergey Ostanevich
2020-12-25 11:23     ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 06/11] core: introduce new mem_L field Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 07/11] debug: move debug_frameline to public module API Sergey Kaplun
2020-12-20 22:46   ` Igor Munkin
2020-12-24  6:50     ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 08/11] profile: introduce memory profiler Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 09/11] misc: add Lua API for " Sergey Kaplun
2020-12-24 16:32   ` Sergey Ostanevich
2020-12-24 21:25     ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 10/11] tools: introduce tools directory Sergey Kaplun
2020-12-20 22:46   ` Igor Munkin
2020-12-24  6:47     ` Sergey Kaplun
2020-12-16 19:13 ` [Tarantool-patches] [PATCH luajit v1 11/11] profile: introduce profile parser Sergey Kaplun
2020-12-24 23:09   ` Igor Munkin
2020-12-25  8:41     ` Sergey Kaplun
2020-12-21 10:43 ` [Tarantool-patches] [PATCH luajit v1 00/11] LuaJIT memory profiler Igor Munkin
2020-12-24  7:02   ` Sergey Kaplun

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7f5108768b70c9ffd2561f89c4974379085921e4.1608142899.git.skaplun@tarantool.org \
    --to=skaplun@tarantool.org \
    --cc=imun@tarantool.org \
    --cc=sergos@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit v1 02/11] utils: introduce leb128 reader and writer' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox