[Tarantool-patches] [PATCH 07/10] vinyl: align statements and bps tree extents

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Thu May 21 23:37:30 MSK 2020


Vinyl tuples (vy_stmt) in 0 level of LSM tree are stored in
lsregion. They were allocated using lsregion_alloc(), which does
not align its results, and is good only for byte arrays.

As a result, vy_stmt object addresses in 0 LSM level were not
aligned. Unaligned memory access is slower, and may even crash on
some platforms.

Besides, even aligned allocations couldn't help upserts in 0 level
of the LSM tree, because upsert vy_stmt objects had 1 byte prefix
to count merged upserts stored in this statement. This 1 byte
prefix ruined all the alignment. Now the upsert counter is also
aligned, the same as vy_stmt. Note, it does not consume
significantly more memory, since it used only for vinyl and only
for upserts, stored in 0 level of the LSM tree.

The same about BPS tree extents. LSM 0 level is a BPS tree, whose
blocks are allocated on lsregion. The extents are used as pointer
arrays inside the tree, so they need alignof(void *) alignment.

The mentioned unaligned accesses were revealed by clang undefined
behaviour sanitizer, and are fixed by this patch.

Part of #4609
---
 src/box/vy_mem.c                |  9 +++++----
 src/box/vy_stmt.c               | 13 ++++++++-----
 test/vinyl/quota.result         | 10 +++++-----
 test/vinyl/quota_timeout.result |  4 ++--
 test/vinyl/stat.result          |  4 ++--
 5 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/src/box/vy_mem.c b/src/box/vy_mem.c
index b4d016a68..98027e784 100644
--- a/src/box/vy_mem.c
+++ b/src/box/vy_mem.c
@@ -75,11 +75,12 @@ vy_mem_tree_extent_alloc(void *ctx)
 {
 	struct vy_mem *mem = (struct vy_mem *) ctx;
 	struct vy_mem_env *env = mem->env;
-	void *ret = lsregion_alloc(&env->allocator, VY_MEM_TREE_EXTENT_SIZE,
-				   mem->generation);
+	void *ret = lsregion_aligned_alloc(&env->allocator,
+					   VY_MEM_TREE_EXTENT_SIZE,
+					   alignof(void *), mem->generation);
 	if (ret == NULL) {
-		diag_set(OutOfMemory, VY_MEM_TREE_EXTENT_SIZE, "lsregion_alloc",
-			 "ret");
+		diag_set(OutOfMemory, VY_MEM_TREE_EXTENT_SIZE,
+			 "lsregion_aligned_alloc", "ret");
 		return NULL;
 	}
 	mem->tree_extent_size += VY_MEM_TREE_EXTENT_SIZE;
diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
index adc3ba452..dc6960068 100644
--- a/src/box/vy_stmt.c
+++ b/src/box/vy_stmt.c
@@ -223,20 +223,23 @@ vy_stmt_dup_lsregion(struct tuple *stmt, struct lsregion *lsregion,
 	size_t size = tuple_size(stmt);
 	size_t alloc_size = size;
 	struct tuple *mem_stmt;
+	const size_t align = alignof(struct vy_stmt);
 
 	/* Reserve one byte for UPSERT counter. */
 	if (type == IPROTO_UPSERT)
-		alloc_size++;
+		alloc_size += align;
 
-	mem_stmt = lsregion_alloc(lsregion, alloc_size, alloc_id);
+	mem_stmt = lsregion_aligned_alloc(lsregion, alloc_size, align,
+					  alloc_id);
 	if (mem_stmt == NULL) {
-		diag_set(OutOfMemory, size, "lsregion_alloc", "mem_stmt");
+		diag_set(OutOfMemory, size, "lsregion_aligned_alloc",
+			 "mem_stmt");
 		return NULL;
 	}
 
 	if (type == IPROTO_UPSERT) {
-		*(uint8_t *)mem_stmt = 0;
-		mem_stmt = (struct tuple *)((uint8_t *)mem_stmt + 1);
+		memset(mem_stmt, 0, align);
+		mem_stmt = (struct tuple *)((uint8_t *)mem_stmt + align);
 	}
 
 	memcpy(mem_stmt, stmt, size);
diff --git a/test/vinyl/quota.result b/test/vinyl/quota.result
index d1b28ee51..940df4e49 100644
--- a/test/vinyl/quota.result
+++ b/test/vinyl/quota.result
@@ -31,7 +31,7 @@ space:insert({1, 1})
 ...
 box.stat.vinyl().memory.level0
 ---
-- 98343
+- 98344
 ...
 space:insert({1, 1})
 ---
@@ -39,7 +39,7 @@ space:insert({1, 1})
 ...
 box.stat.vinyl().memory.level0
 ---
-- 98343
+- 98344
 ...
 space:update({1}, {{'!', 1, 100}}) -- try to modify the primary key
 ---
@@ -47,7 +47,7 @@ space:update({1}, {{'!', 1, 100}}) -- try to modify the primary key
 ...
 box.stat.vinyl().memory.level0
 ---
-- 98343
+- 98344
 ...
 space:insert({2, 2})
 ---
@@ -63,7 +63,7 @@ space:insert({4, 4})
 ...
 box.stat.vinyl().memory.level0
 ---
-- 98460
+- 98463
 ...
 box.snapshot()
 ---
@@ -89,7 +89,7 @@ _ = space:replace{1, 1, string.rep('a', 1024 * 1024 * 5)}
 ...
 box.stat.vinyl().memory.level0
 ---
-- 5292076
+- 5292080
 ...
 space:drop()
 ---
diff --git a/test/vinyl/quota_timeout.result b/test/vinyl/quota_timeout.result
index 7a71b29c6..31ca23670 100644
--- a/test/vinyl/quota_timeout.result
+++ b/test/vinyl/quota_timeout.result
@@ -49,7 +49,7 @@ s:count()
 ...
 box.stat.vinyl().memory.level0
 ---
-- 748241
+- 748248
 ...
 -- Since the following operation requires more memory than configured
 -- and dump is disabled, it should fail with ER_VY_QUOTA_TIMEOUT.
@@ -63,7 +63,7 @@ s:count()
 ...
 box.stat.vinyl().memory.level0
 ---
-- 748241
+- 748248
 ...
 --
 -- Check that increasing box.cfg.vinyl_memory wakes up fibers
diff --git a/test/vinyl/stat.result b/test/vinyl/stat.result
index d35def13d..a895528b9 100644
--- a/test/vinyl/stat.result
+++ b/test/vinyl/stat.result
@@ -761,7 +761,7 @@ put(1)
 ...
 stat_diff(gstat(), st, 'memory.level0')
 ---
-- 1061
+- 1064
 ...
 -- use cache
 st = gstat()
@@ -1130,7 +1130,7 @@ gstat()
   memory:
     tuple_cache: 14417
     tx: 0
-    level0: 262583
+    level0: 263210
     page_index: 1250
     bloom_filter: 140
   disk:
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list