[Tarantool-patches] [PATCH luajit v1 02/11] utils: introduce leb128 reader and writer

Sergey Kaplun skaplun at tarantool.org
Wed Dec 16 22:13:37 MSK 2020


This patch introduces module for reading and writing leb128 compression.
It will be used for streaming profiling events writing, that will be
added at the next patches.

Part of tarantool/tarantool#5442
---
 src/Makefile       |   5 +-
 src/Makefile.dep   |   1 +
 src/utils/leb128.c | 124 +++++++++++++++++++++++++++++++++++++++++++++
 src/utils/leb128.h |  55 ++++++++++++++++++++
 4 files changed, 183 insertions(+), 2 deletions(-)
 create mode 100644 src/utils/leb128.c
 create mode 100644 src/utils/leb128.h

diff --git a/src/Makefile b/src/Makefile
index caa49f9..be7ed95 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -468,6 +468,7 @@ endif
 DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
 DASM_DASC= vm_$(DASM_ARCH).dasc
 
+UTILS_O= utils/leb128.o
 BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \
 	   host/buildvm_lib.o host/buildvm_fold.o
 BUILDVM_T= host/buildvm
@@ -498,7 +499,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
 	  lj_carith.o lj_clib.o lj_cparse.o \
 	  lj_lib.o lj_alloc.o lib_aux.o \
-	  $(LJLIB_O) lib_init.o
+	  $(LJLIB_O) lib_init.o $(UTILS_O)
 
 LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
 LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o)
@@ -516,7 +517,7 @@ ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \
 	    host/buildvm_arch.h
 ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP)
 WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk
-ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM)
+ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o utils/*.o $(WIN_RM)
 
 ##############################################################################
 # Build mode handling.
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 556314e..cc75d03 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -248,3 +248,4 @@ host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
 host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
  luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
 host/minilua.o: host/minilua.c
+utils/leb128.o: utils/leb128.c
diff --git a/src/utils/leb128.c b/src/utils/leb128.c
new file mode 100644
index 0000000..921e5bc
--- /dev/null
+++ b/src/utils/leb128.c
@@ -0,0 +1,124 @@
+/*
+** Working with LEB128/ULEB128 encoding.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define LINK_BIT          (0x80)
+#define MIN_TWOBYTE_VALUE (0x80)
+#define PAYLOAD_MASK      (0x7f)
+#define SHIFT_STEP        (7)
+#define LEB_SIGN_BIT      (0x40)
+
+/* ------------------------- Writing ULEB128/LEB128 ------------------------- */
+
+size_t write_uleb128(uint8_t *buffer, uint64_t value)
+{
+  size_t i = 0;
+
+  for (; value >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP) {
+    buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT);
+  }
+  buffer[i++] = (uint8_t)value;
+
+  return i;
+}
+
+size_t write_leb128(uint8_t *buffer, int64_t value)
+{
+  size_t i = 0;
+
+  for (; (uint64_t)(value + 0x40) >= MIN_TWOBYTE_VALUE; value >>= SHIFT_STEP) {
+    buffer[i++] = (uint8_t)((value & PAYLOAD_MASK) | LINK_BIT);
+  }
+  buffer[i++] = (uint8_t)(value & PAYLOAD_MASK);
+
+  return i;
+}
+
+/* ------------------------- Reading ULEB128/LEB128 ------------------------- */
+
+/*
+** NB! For each LEB128 type (signed/unsigned) we have two versions of read
+** functions: The one consuming unlimited number of input octets and the one
+** consuming not more than given number of input octets. Currently reading
+** is not used in performance critical places, so these two functions are
+** implemented via single low-level function + run-time mode check. Feel free
+** to change if this becomes a bottleneck.
+*/
+
+static size_t _read_uleb128(uint64_t *out, const uint8_t *buffer, int guarded,
+			    size_t n)
+{
+  size_t i = 0;
+  uint64_t value = 0;
+  uint64_t shift = 0;
+  uint8_t octet;
+
+  for(;;) {
+    if (guarded && i + 1 > n) {
+      return 0;
+    }
+    octet = buffer[i++];
+    value |= ((uint64_t)(octet & PAYLOAD_MASK)) << shift;
+    shift += SHIFT_STEP;
+    if (!(octet & LINK_BIT)) {
+      break;
+    }
+  }
+
+  *out = value;
+  return i;
+}
+
+size_t read_uleb128(uint64_t *out, const uint8_t *buffer)
+{
+  return _read_uleb128(out, buffer, 0, 0);
+}
+
+size_t read_uleb128_n(uint64_t *out, const uint8_t *buffer, size_t n)
+{
+  return _read_uleb128(out, buffer, 1, n);
+}
+
+static size_t _read_leb128(int64_t *out, const uint8_t *buffer, int guarded,
+			   size_t n)
+{
+  size_t i = 0;
+  int64_t  value = 0;
+  uint64_t shift = 0;
+  uint8_t  octet;
+
+  for(;;) {
+    if (guarded && i + 1 > n) {
+      return 0;
+    }
+    octet  = buffer[i++];
+    value |= ((int64_t)(octet & PAYLOAD_MASK)) << shift;
+    shift += SHIFT_STEP;
+    if (!(octet & LINK_BIT)) {
+      break;
+    }
+  }
+
+  if (octet & LEB_SIGN_BIT && shift < sizeof(int64_t) * 8) {
+    value |= -(1 << shift);
+  }
+
+  *out = value;
+  return i;
+}
+
+size_t read_leb128(int64_t *out, const uint8_t *buffer)
+{
+  return _read_leb128(out, buffer, 0, 0);
+}
+
+size_t read_leb128_n(int64_t *out, const uint8_t *buffer, size_t n)
+{
+  return _read_leb128(out, buffer, 1, n);
+}
diff --git a/src/utils/leb128.h b/src/utils/leb128.h
new file mode 100644
index 0000000..46d90bc
--- /dev/null
+++ b/src/utils/leb128.h
@@ -0,0 +1,55 @@
+/*
+** Interfaces for working with LEB128/ULEB128 encoding.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#ifndef _LJ_UTILS_LEB128_H
+#define _LJ_UTILS_LEB128_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+/* Maximum number of bytes needed for LEB128 encoding of any 64-bit value. */
+#define LEB128_U64_MAXSIZE 10
+
+/*
+** Writes a value from an unsigned 64-bit input to a buffer of bytes.
+** Buffer overflow is not checked. Returns number of bytes written.
+*/
+size_t write_uleb128(uint8_t *buffer, uint64_t value);
+
+/*
+** Writes a value from an signed 64-bit input to a buffer of bytes.
+** Buffer overflow is not checked. Returns number of bytes written.
+*/
+size_t write_leb128(uint8_t *buffer, int64_t value);
+
+/*
+** Reads a value from a buffer of bytes to a uint64_t output.
+** Buffer overflow is not checked. Returns number of bytes read.
+*/
+size_t read_uleb128(uint64_t *out, const uint8_t *buffer);
+
+/*
+** Reads a value from a buffer of bytes to a int64_t output.
+** Buffer overflow is not checked. Returns number of bytes read.
+*/
+size_t read_leb128(int64_t *out, const uint8_t *buffer);
+
+/*
+** Reads a value from a buffer of bytes to a uint64_t output. Consumes no more
+** than n bytes. Buffer overflow is not checked. Returns number of bytes read.
+** If more than n bytes is about to be consumed, returns 0 without touching out.
+*/
+size_t read_uleb128_n(uint64_t *out, const uint8_t *buffer, size_t n);
+
+/*
+** Reads a value from a buffer of bytes to a int64_t output. Consumes no more
+** than n bytes. Buffer overflow is not checked. Returns number of bytes read.
+** If more than n bytes is about to be consumed, returns 0 without touching out.
+*/
+size_t read_leb128_n(int64_t *out, const uint8_t *buffer, size_t n);
+
+#endif
-- 
2.28.0



More information about the Tarantool-patches mailing list