[PATCH v5 4/4] box: introduce multikey indexes in memtx

Vladimir Davydov vdavydov.dev at gmail.com
Tue May 7 16:13:49 MSK 2019


On Mon, May 06, 2019 at 06:46:03PM +0300, Vladimir Davydov wrote:
> The patch looks generally okay, but I think there's a problem re
> field_map_size we have overlooked. The problem is memtx_tuple_delete
> uses field_map_size to find out the allocation size:
> 
>  | void
>  | memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple)
>  | {
>  | 	struct memtx_engine *memtx = (struct memtx_engine *)format->engine;
>  | 	say_debug("%s(%p)", __func__, tuple);
>  | 	assert(tuple->refs == 0);
>  | 	size_t total = sizeof(struct memtx_tuple) + format->field_map_size +
>  | 		tuple->bsize;
>  | 	tuple_format_unref(format);
>  | 	struct memtx_tuple *memtx_tuple =
>  | 		container_of(tuple, struct memtx_tuple, base);
>  | 	if (memtx->alloc.free_mode != SMALL_DELAYED_FREE ||
>  | 	    memtx_tuple->version == memtx->snapshot_version ||
>  | 	    format->is_temporary)
>  | 		smfree(&memtx->alloc, memtx_tuple, total);
>  | 	else
>  | 		smfree_delayed(&memtx->alloc, memtx_tuple, total);
>  | }
> 
> How's it going to work in case the field map stored in a tuple is
> greater than field_map_size?
> 
> I think we should calculate the real size of the field map here
> in case the format allows multikey indexes.

Turned out we don't need to compute the field map size when we free a
tuple - we can simply use tuple->data_offset. Sorry, I overlooked it
during review. Pushed the patch below to master.
--

>From 55e1a140db245b5eb1f50ff9de0568c6f402316e Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov.dev at gmail.com>
Date: Tue, 7 May 2019 14:51:06 +0300
Subject: [PATCH] box: zap field_map_get_size

Turns out we don't really need it as we can use data_offset + bsize
(i.e. the value returned by tuple_size() helper function) to get the
size of a tuple to free. We only need to take into account the offset
of the base tuple struct in the derived struct (memtx_tuple).

There's a catch though:

 - We use sizeof(struct memtx_tuple) + field_map_size + bsize for
   allocation size.
 - We set data_offset to sizeof(struct tuple) + field_map_size.
 - struct tuple is packed, which makes its size 10 bytes.
 - memtx_tuple embeds struct tuple (base) at 4 byte offset, but since
   it is not packed, its size is 16 bytes, NOT 4 + 10 = 14 bytes as
   one might expect!
 - This means data_offset + bsize + offsetof(struct memtx_tuple, base)
   doesn't equal allocation size.

To fix that, let's mark memtx_tuple packed. The only side effect it has
is that we save 2 bytes per each memtx tuple. It won't affect tuple data
layout at all, because struct memtx_tuple already has a packed layout
and so 'packed' will only affect its size, which is only used for
computing allocation size.

My bad I overlooked it during review.

Follow-up f1d9f2575c11 ("box: introduce multikey indexes in memtx").

diff --git a/src/box/field_map.c b/src/box/field_map.c
index fd696c19..1876bdd9 100644
--- a/src/box/field_map.c
+++ b/src/box/field_map.c
@@ -114,22 +114,4 @@ field_map_build(struct field_map_builder *builder, char *buffer)
 		extent_wptr += sizeof(uint32_t) + extent_offset_sz;
 	}
 	assert(extent_wptr == buffer + builder->extents_size);
-	assert(field_map_get_size(field_map,
-				  builder->slot_count * sizeof(uint32_t)) ==
-	       field_map_build_size(builder));
-}
-
-uint32_t
-field_map_get_size(const uint32_t *field_map, uint32_t format_field_map_sz)
-{
-	uint32_t total = format_field_map_sz;
-	int32_t field_map_items = format_field_map_sz / sizeof(uint32_t);
-	for (int32_t slot = -1; slot >= -field_map_items; slot--) {
-		if ((int32_t)field_map[slot] >= 0)
-			continue;
-		uint32_t *extent = (uint32_t *)((char *)field_map +
-					(int32_t)field_map[slot]);
-		total += (1 + extent[0]) * sizeof(uint32_t);
-	}
-	return total;
 }
diff --git a/src/box/field_map.h b/src/box/field_map.h
index 78c96e4f..0fd35c3e 100644
--- a/src/box/field_map.h
+++ b/src/box/field_map.h
@@ -164,19 +164,6 @@ field_map_get_offset(const uint32_t *field_map, int32_t offset_slot,
 }
 
 /**
- * Get size of the tuple field_map using
- * field_map pointer and format root field_map size.
- *
- * In case of multikey indexes the real field_map size may be
- * greater than the size of format root field_map. To calculate
- * the total size of the field_map extentions, routine relies
- * on the fact that all field_map slots that has an extention
- * use negative offset as a marker.
- */
-uint32_t
-field_map_get_size(const uint32_t *field_map, uint32_t format_field_map_sz);
-
-/**
  * Initialize field_map_builder.
  *
  * The field_map_size argument is a size of the minimal field_map
diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 806a79c5..58cfd619 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -110,7 +110,7 @@ memtx_init_txn(struct txn *txn)
 	txn->engine_tx = txn;
 }
 
-struct memtx_tuple {
+struct PACKED memtx_tuple {
 	/*
 	 * sic: the header of the tuple is used
 	 * to store a free list pointer in smfree_delayed.
@@ -1192,12 +1192,10 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple)
 	struct memtx_engine *memtx = (struct memtx_engine *)format->engine;
 	say_debug("%s(%p)", __func__, tuple);
 	assert(tuple->refs == 0);
-	const uint32_t *field_map = tuple_field_map(tuple);
-	size_t total = sizeof(struct memtx_tuple) + tuple->bsize +
-		       field_map_get_size(field_map, format->field_map_size);
 	tuple_format_unref(format);
 	struct memtx_tuple *memtx_tuple =
 		container_of(tuple, struct memtx_tuple, base);
+	size_t total = tuple_size(tuple) + offsetof(struct memtx_tuple, base);
 	if (memtx->alloc.free_mode != SMALL_DELAYED_FREE ||
 	    memtx_tuple->version == memtx->snapshot_version ||
 	    format->is_temporary)
diff --git a/src/box/tuple.c b/src/box/tuple.c
index 35194001..fe815a64 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -112,8 +112,7 @@ runtime_tuple_delete(struct tuple_format *format, struct tuple *tuple)
 	assert(format->vtab.tuple_delete == tuple_format_runtime_vtab.tuple_delete);
 	say_debug("%s(%p)", __func__, tuple);
 	assert(tuple->refs == 0);
-	size_t total = sizeof(struct tuple) + format->field_map_size +
-		tuple->bsize;
+	size_t total = tuple_size(tuple);
 	tuple_format_unref(format);
 	smfree(&runtime_alloc, tuple, total);
 }
diff --git a/test/box/errinj.result b/test/box/errinj.result
index fe4ba63b..061501c9 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -464,7 +464,7 @@ errinj.set("ERRINJ_TUPLE_ALLOC", true)
 ...
 s:auto_increment{}
 ---
-- error: Failed to allocate 18 bytes in slab allocator for memtx_tuple
+- error: Failed to allocate 16 bytes in slab allocator for memtx_tuple
 ...
 s:select{}
 ---
@@ -472,7 +472,7 @@ s:select{}
 ...
 s:auto_increment{}
 ---
-- error: Failed to allocate 18 bytes in slab allocator for memtx_tuple
+- error: Failed to allocate 16 bytes in slab allocator for memtx_tuple
 ...
 s:select{}
 ---
@@ -480,7 +480,7 @@ s:select{}
 ...
 s:auto_increment{}
 ---
-- error: Failed to allocate 18 bytes in slab allocator for memtx_tuple
+- error: Failed to allocate 16 bytes in slab allocator for memtx_tuple
 ...
 s:select{}
 ---
@@ -494,7 +494,7 @@ box.begin()
     s:insert{1}
 box.commit();
 ---
-- error: Failed to allocate 18 bytes in slab allocator for memtx_tuple
+- error: Failed to allocate 16 bytes in slab allocator for memtx_tuple
 ...
 box.rollback();
 ---
@@ -508,7 +508,7 @@ box.begin()
     s:insert{2}
 box.commit();
 ---
-- error: Failed to allocate 18 bytes in slab allocator for memtx_tuple
+- error: Failed to allocate 16 bytes in slab allocator for memtx_tuple
 ...
 s:select{};
 ---
@@ -522,7 +522,7 @@ box.begin()
     s:insert{2}
 box.commit();
 ---
-- error: Failed to allocate 18 bytes in slab allocator for memtx_tuple
+- error: Failed to allocate 16 bytes in slab allocator for memtx_tuple
 ...
 s:select{};
 ---
@@ -541,7 +541,7 @@ box.begin()
     s:insert{2}
 box.commit();
 ---
-- error: Failed to allocate 18 bytes in slab allocator for memtx_tuple
+- error: Failed to allocate 16 bytes in slab allocator for memtx_tuple
 ...
 errinj.set("ERRINJ_TUPLE_ALLOC", false);
 ---
@@ -803,7 +803,7 @@ errinj.set("ERRINJ_TUPLE_ALLOC", true)
 ...
 s:replace{1, "test"}
 ---
-- error: Failed to allocate 23 bytes in slab allocator for memtx_tuple
+- error: Failed to allocate 21 bytes in slab allocator for memtx_tuple
 ...
 s:bsize()
 ---
@@ -815,7 +815,7 @@ utils.space_bsize(s)
 ...
 s:update({1}, {{'=', 3, '!'}})
 ---
-- error: Failed to allocate 22 bytes in slab allocator for memtx_tuple
+- error: Failed to allocate 20 bytes in slab allocator for memtx_tuple
 ...
 s:bsize()
 ---
diff --git a/test/box/reconfigure.result b/test/box/reconfigure.result
index 9ed1864e..d2493844 100644
--- a/test/box/reconfigure.result
+++ b/test/box/reconfigure.result
@@ -162,7 +162,7 @@ pad = string.rep('x', 100 * 1024)
 ...
 for i = 1, count do s:replace{i, pad} end -- error: not enough memory
 ---
-- error: Failed to allocate 102424 bytes in slab allocator for memtx_tuple
+- error: Failed to allocate 102422 bytes in slab allocator for memtx_tuple
 ...
 s:count() < count
 ---
diff --git a/test/box/upsert_errinj.result b/test/box/upsert_errinj.result
index aa3d9e37..ed52e285 100644
--- a/test/box/upsert_errinj.result
+++ b/test/box/upsert_errinj.result
@@ -19,7 +19,7 @@ errinj.set("ERRINJ_TUPLE_ALLOC", true)
 ...
 s:upsert({111, '111', 222, '222'}, {{'!', 5, '!'}})
 ---
-- error: Failed to allocate 28 bytes in slab allocator for memtx_tuple
+- error: Failed to allocate 26 bytes in slab allocator for memtx_tuple
 ...
 errinj.set("ERRINJ_TUPLE_ALLOC", false)
 ---
diff --git a/test/wal_off/tuple.result b/test/wal_off/tuple.result
index 6ea3814f..bd434029 100644
--- a/test/wal_off/tuple.result
+++ b/test/wal_off/tuple.result
@@ -52,7 +52,7 @@ n + 32 >= box.cfg.memtx_max_tuple_size
 ...
 reason
 ---
-- 'Failed to allocate 1048603 bytes for tuple: tuple is too large. Check ''memtx_max_tuple_size''
+- 'Failed to allocate 1048601 bytes for tuple: tuple is too large. Check ''memtx_max_tuple_size''
   configuration option.'
 ...
 tester:drop()



More information about the Tarantool-patches mailing list