Tarantool development patches archive
 help / color / mirror / Atom feed
From: Sergey Kaplun <skaplun@tarantool.org>
To: Igor Munkin <imun@tarantool.org>,
	Sergey Ostanevich <sergos@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH luajit v2 1/7] utils: introduce leb128 reader and writer
Date: Fri, 25 Dec 2020 18:26:03 +0300	[thread overview]
Message-ID: <cacf5ad9ec0cf3a2633f1dff23ba1a3b44ecc4aa.1608907726.git.skaplun@tarantool.org> (raw)
In-Reply-To: <cover.1608907726.git.skaplun@tarantool.org>

Most of the numeric data written by the memory profiler is encoded
via LEB128 compression. This patch introduces the module for encoding
and decoding 64bit number to LEB128 form.

Part of tarantool/tarantool#5442
---

Changes in v2:
  - Removed reader funciton's parameter named guard.
  - Code style fixes.

 src/Makefile          |   3 +-
 src/Makefile.dep      |   7 ++-
 src/lj_utils.h        |  58 +++++++++++++++++++
 src/lj_utils_leb128.c | 132 ++++++++++++++++++++++++++++++++++++++++++
 src/ljamalg.c         |   1 +
 5 files changed, 197 insertions(+), 4 deletions(-)
 create mode 100644 src/lj_utils.h
 create mode 100644 src/lj_utils_leb128.c

diff --git a/src/Makefile b/src/Makefile
index 2786348..dc2ddb6 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -466,6 +466,7 @@ endif
 DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
 DASM_DASC= vm_$(DASM_ARCH).dasc
 
+UTILS_O= lj_utils_leb128.o
 BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \
 	   host/buildvm_lib.o host/buildvm_fold.o
 BUILDVM_T= host/buildvm
@@ -495,7 +496,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_asm.o lj_trace.o lj_gdbjit.o \
 	  lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
 	  lj_carith.o lj_clib.o lj_cparse.o \
-	  lj_lib.o lj_alloc.o lib_aux.o \
+	  lj_lib.o lj_alloc.o $(UTILS_O) lib_aux.o \
 	  $(LJLIB_O) lib_init.o
 
 LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 556314e..75409bf 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -205,6 +205,7 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h
 lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_udata.h
+lj_utils_leb128.o: lj_utils_leb128.c lj_utils.h lj_def.h lua.h luaconf.h
 lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
  lj_vm.h lj_vmevent.h
@@ -229,9 +230,9 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
  lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
  lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
  lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
- lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
- lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
- lib_ffi.c lib_misc.c lib_init.c
+ lj_utils_leb128.c lj_utils.h lib_aux.c lib_base.c lj_libdef.h lib_math.c \
+ lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c \
+ lib_bit.c lib_jit.c lib_ffi.c lib_misc.c lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
 host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
  lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
diff --git a/src/lj_utils.h b/src/lj_utils.h
new file mode 100644
index 0000000..1671e8e
--- /dev/null
+++ b/src/lj_utils.h
@@ -0,0 +1,58 @@
+/*
+** Interfaces for working with LEB128/ULEB128 encoding.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#ifndef _LJ_UTILS_H
+#define _LJ_UTILS_H
+
+#include "lj_def.h"
+
+/* Maximum number of bytes needed for LEB128 encoding of any 64-bit value. */
+#define LEB128_U64_MAXSIZE 10
+
+/*
+** Reads a value from a buffer of bytes to a int64_t output.
+** No bounds checks for the buffer. Returns number of bytes read.
+*/
+size_t LJ_FASTCALL lj_utils_read_leb128(int64_t *out, const uint8_t *buffer);
+
+/*
+** Reads a value from a buffer of bytes to a int64_t output. Consumes no more
+** than n bytes. No bounds checks for the buffer. Returns number of bytes
+** read. If more than n bytes is about to be consumed, returns 0 without
+** touching out.
+*/
+size_t LJ_FASTCALL lj_utils_read_leb128_n(int64_t *out, const uint8_t *buffer,
+					  size_t n);
+
+/*
+** Reads a value from a buffer of bytes to a uint64_t output.
+** No bounds checks for the buffer. Returns number of bytes read.
+*/
+size_t LJ_FASTCALL lj_utils_read_uleb128(uint64_t *out, const uint8_t *buffer);
+
+/*
+** Reads a value from a buffer of bytes to a uint64_t output. Consumes no more
+** than n bytes. No bounds checks for the buffer. Returns number of bytes
+** read. If more than n bytes is about to be consumed, returns 0 without
+** touching out.
+*/
+size_t LJ_FASTCALL lj_utils_read_uleb128_n(uint64_t *out, const uint8_t *buffer,
+					   size_t n);
+
+/*
+** Writes a value from an signed 64-bit input to a buffer of bytes.
+** No bounds checks for the buffer. Returns number of bytes written.
+*/
+size_t LJ_FASTCALL lj_utils_write_leb128(uint8_t *buffer, int64_t value);
+
+/*
+** Writes a value from an unsigned 64-bit input to a buffer of bytes.
+** No bounds checks for the buffer. Returns number of bytes written.
+*/
+size_t LJ_FASTCALL lj_utils_write_uleb128(uint8_t *buffer, uint64_t value);
+
+#endif
diff --git a/src/lj_utils_leb128.c b/src/lj_utils_leb128.c
new file mode 100644
index 0000000..ce8081b
--- /dev/null
+++ b/src/lj_utils_leb128.c
@@ -0,0 +1,132 @@
+/*
+** Working with LEB128/ULEB128 encoding.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#define lj_utils_leb128_c
+#define LUA_CORE
+
+#include "lj_utils.h"
+
+#define LINK_BIT               (0x80)
+#define MIN_TWOBYTE_VALUE      (0x80)
+#define PAYLOAD_MASK           (0x7f)
+#define SHIFT_STEP             (7)
+#define LEB_SIGN_BIT           (0x40)
+
+/* ------------------------- Reading LEB128/ULEB128 ------------------------- */
+
+/*
+** XXX: For each LEB128 type (signed/unsigned) we have two versions of read
+** functions: The one consuming unlimited number of input octets and the one
+** consuming not more than given number of input octets. Currently reading
+** is not used in performance critical places, so these two functions are
+** implemented via single low-level function + run-time mode check. Feel free
+** to change if this becomes a bottleneck.
+*/
+
+static LJ_AINLINE size_t _read_leb128(int64_t *out, const uint8_t *buffer,
+				      size_t n)
+{
+  size_t i = 0;
+  uint64_t shift = 0;
+  int64_t value = 0;
+  uint8_t octet;
+
+  for(;;) {
+    if (n != 0 && i + 1 > n)
+      return 0;
+    octet = buffer[i++];
+    value |= ((int64_t)(octet & PAYLOAD_MASK)) << shift;
+    shift += SHIFT_STEP;
+    if (!(octet & LINK_BIT))
+      break;
+  }
+
+  if (octet & LEB_SIGN_BIT && shift < sizeof(int64_t) * 8)
+    value |= -(1 << shift);
+
+  *out = value;
+  return i;
+}
+
+size_t LJ_FASTCALL lj_utils_read_leb128(int64_t *out, const uint8_t *buffer)
+{
+  return _read_leb128(out, buffer, 0);
+}
+
+size_t LJ_FASTCALL lj_utils_read_leb128_n(int64_t *out, const uint8_t *buffer,
+					  size_t n)
+{
+  return _read_leb128(out, buffer, n);
+}
+
+
+static LJ_AINLINE size_t _read_uleb128(uint64_t *out, const uint8_t *buffer,
+				       size_t n)
+{
+  size_t i = 0;
+  uint64_t value = 0;
+  uint64_t shift = 0;
+  uint8_t octet;
+
+  for(;;) {
+    if (n != 0 && i + 1 > n)
+      return 0;
+    octet = buffer[i++];
+    value |= ((uint64_t)(octet & PAYLOAD_MASK)) << shift;
+    shift += SHIFT_STEP;
+    if (!(octet & LINK_BIT))
+      break;
+  }
+
+  *out = value;
+  return i;
+}
+
+size_t LJ_FASTCALL lj_utils_read_uleb128(uint64_t *out, const uint8_t *buffer)
+{
+  return _read_uleb128(out, buffer, 0);
+}
+
+size_t LJ_FASTCALL lj_utils_read_uleb128_n(uint64_t *out, const uint8_t *buffer,
+					   size_t n)
+{
+  return _read_uleb128(out, buffer, n);
+}
+
+/* ------------------------- Writing LEB128/ULEB128 ------------------------- */
+
+size_t LJ_FASTCALL lj_utils_write_leb128(uint8_t *buffer, int64_t value)
+{
+  size_t i = 0;
+
+  /* LEB_SIGN_BIT propagation to check the remaining value. */
+  while ((uint64_t)(value + LEB_SIGN_BIT) >= MIN_TWOBYTE_VALUE) {
+    buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT);
+    value >>= SHIFT_STEP;
+  }
+
+  /* Omit LINK_BIT in case of overflow. */
+  buffer[i++] = (uint8_t)(value & PAYLOAD_MASK);
+
+  lua_assert(i <= LEB128_U64_MAXSIZE);
+
+  return i;
+}
+
+size_t LJ_FASTCALL lj_utils_write_uleb128(uint8_t *buffer, uint64_t value)
+{
+  size_t i = 0;
+
+  for (; value >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP)
+    buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT);
+
+  buffer[i++] = (uint8_t)value;
+
+  lua_assert(i <= LEB128_U64_MAXSIZE);
+
+  return i;
+}
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 371bbb6..268b321 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -81,6 +81,7 @@
 #include "lj_trace.c"
 #include "lj_gdbjit.c"
 #include "lj_alloc.c"
+#include "lj_utils_leb128.c"
 
 #include "lib_aux.c"
 #include "lib_base.c"
-- 
2.28.0

  reply	other threads:[~2020-12-25 15:26 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-25 15:26 [Tarantool-patches] [PATCH luajit v2 0/7] LuaJIT memory profiler Sergey Kaplun
2020-12-25 15:26 ` Sergey Kaplun [this message]
2020-12-25 21:42   ` [Tarantool-patches] [PATCH luajit v2 1/7] utils: introduce leb128 reader and writer Igor Munkin
2020-12-26  9:32     ` Sergey Kaplun
2020-12-26 13:57       ` Sergey Kaplun
2020-12-26 18:47         ` Sergey Ostanevich
2020-12-25 15:26 ` [Tarantool-patches] [PATCH luajit v2 2/7] core: introduce write buffer module Sergey Kaplun
2020-12-26 14:22   ` Igor Munkin
2020-12-26 15:26     ` Sergey Kaplun
2020-12-26 19:03       ` Sergey Ostanevich
2020-12-26 19:37         ` Sergey Kaplun
2020-12-28  1:43           ` Sergey Kaplun
2020-12-25 15:26 ` [Tarantool-patches] [PATCH luajit v2 3/7] vm: introduce VM states for Lua and fast functions Sergey Kaplun
2020-12-26 19:07   ` Sergey Ostanevich
2020-12-27 23:48   ` Igor Munkin
2020-12-28  3:54     ` Sergey Kaplun
2020-12-25 15:26 ` [Tarantool-patches] [PATCH luajit v2 4/7] core: introduce new mem_L field Sergey Kaplun
2020-12-26 19:12   ` Sergey Ostanevich
2020-12-26 19:42     ` Sergey Kaplun
2020-12-27 13:09   ` Igor Munkin
2020-12-27 17:44     ` Sergey Kaplun
2020-12-25 15:26 ` [Tarantool-patches] [PATCH luajit v2 5/7] core: introduce memory profiler Sergey Kaplun
2020-12-27 10:58   ` Sergey Ostanevich
2020-12-27 11:54     ` Sergey Kaplun
2020-12-27 13:27       ` Sergey Ostanevich
2020-12-27 16:44   ` Igor Munkin
2020-12-27 21:47     ` Sergey Kaplun
2020-12-25 15:26 ` [Tarantool-patches] [PATCH luajit v2 6/7] misc: add Lua API for " Sergey Kaplun
2020-12-27 11:54   ` Sergey Ostanevich
2020-12-27 13:42     ` Sergey Kaplun
2020-12-27 15:37       ` Sergey Ostanevich
2020-12-27 18:58   ` Igor Munkin
2020-12-28  0:14     ` Sergey Kaplun
2020-12-25 15:26 ` [Tarantool-patches] [PATCH luajit v2 7/7] tools: introduce a memory profile parser Sergey Kaplun
2020-12-26 22:56   ` Igor Munkin
2020-12-27  7:16     ` Sergey Kaplun
2020-12-28  5:30       ` Sergey Kaplun
2020-12-28  5:33         ` Igor Munkin
2020-12-28  6:28           ` Sergey Kaplun
2020-12-28  6:31             ` Igor Munkin
2020-12-27 13:24   ` Sergey Ostanevich
2020-12-27 16:02     ` Sergey Kaplun
2020-12-27 21:55       ` Sergey Ostanevich
2020-12-28  2:05 ` [Tarantool-patches] [PATCH luajit v3 2/2] misc: add Lua API for memory profiler Sergey Kaplun
2020-12-28  2:49   ` Igor Munkin
2020-12-28  5:19     ` Sergey Kaplun
2020-12-28  2:06 ` [Tarantool-patches] [PATCH luajit v3 1/2] core: introduce " Sergey Kaplun
2020-12-28  3:59   ` Igor Munkin
2020-12-28  4:05 ` [Tarantool-patches] [PATCH luajit v3 3/7] vm: introduce VM states for Lua and fast functions Sergey Kaplun
2020-12-28  5:14   ` Igor Munkin
2020-12-28  6:01 ` [Tarantool-patches] [PATCH luajit v2 0/7] LuaJIT memory profiler Alexander V. Tikhonov
2020-12-28  8:15 ` Igor Munkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cacf5ad9ec0cf3a2633f1dff23ba1a3b44ecc4aa.1608907726.git.skaplun@tarantool.org \
    --to=skaplun@tarantool.org \
    --cc=imun@tarantool.org \
    --cc=sergos@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit v2 1/7] utils: introduce leb128 reader and writer' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox