[tarantool-patches] Re: [PATCH v1 1/1] sql: improve vdbe_field_ref fetcher

Kirill Shcherbatov kshcherbatov at tarantool.org
Wed Aug 14 13:24:43 MSK 2019


> 1. 'uses'
> 2. 'an' is not used with plural.
> 3. 'initially initialized'? Please, chose one word.
> 4. Superlative adjectives should have 'the' almost always, not 'a'.
> 5. 'As few as'.
Tnx, fixed.

> 6. Comment says '> 1', but the assertion says '> 0'. What is
> correct?
The second. Fixed.

> 7. 'param'.
> 8. 'the nearest left neighbor of'.
> 9. In fact it finds not the nearest neighbor, but the
> nearest left.
> 11. Usually we start sentences from a capital letter.
> 12. Why not 'inline'? There is only one usage place, it won't
> affect the code section size.
Fixed.

> 13. Why do you consider fieldno >= 64 separately? If it is >= 64,
> then ((1LLU << fieldno) - 1) is all ones, a full mask. It won't affect
> slot_bitmask, and you will have the same result without this
> branching.
Because type overflow with << operator has undefined behavior.
This doesn't work in release mode (with optimization flags).

> 14. 'be present'.
> 15. 'a' is not used with plural.
> 16. 'less'.
> 17. Indentation is broken. In some places below too.
> 18. Fits in 2 lines.
> 19. Fits one line.
> 20. Fits one line.
Fixed.

> 21. I remember we have already had this discussion, but here we go again.
> When you use a smart mask as a dumb mask, you will face problems, for
> sure. This leads to an assertion:
> 
>     box.cfg{}
>     format = {}
>     t = {}
>     for i = 1, 70 do
>         format[i] = {name = 'FIELD'..i, type = 'unsigned'}
>         t[i] = i
>     end
>     s = box.schema.create_space('TEST', {format = format})
>     pk = s:create_index('pk', {parts = {70}})
>     s:insert(t)
>     box.execute('SELECT field70, field64 FROM test')
> 
> My opinion has not changed, we need separate functions for smart
> and not-smart masks. This code not only asserts, but its fix on
> top of smart masks would look bulky (you will manually set the bits
> and check for < 64), repetitive (you have 2 places where bits
> are set and one where checked), work slower (set_fieldno still would
> make a check if fieldno >= 63 to set a smart bit).
I've kept your test in test suite and fixed it:
I've introduced a new bitmask64_set_bit and bitmask64_bit_is_set helpers.

===============================================

Vdbe field ref is a dynamic index over tuple fields storing
offsets to each field and filling the offset array on demand.
It is highly used in SQL, because it strongly relies on fast and
repetitive access to any field, not only indexed.

There is an optimisation for the case when a requested field
fieldno is indexed, and the tuple itself stores offset to the
field in its own small field map, used by indexes. vdbe_field_ref
then uses that map to retrieve offset value without decoding
anything. But when SQL requests any field > fieldno, then
vdbe_field_ref decodes the tuple from the beginning in the worst
case. Even having previously accessed fieldno. But it could
start decoding from the latter.

An updated vdbe_field_ref fetcher class uses a bitmask of
initialized slots to use pre-calculated offsets when possible.
This speed-ups SQL in some corner case and doesn't damage
performance in general scenarios.

Closes #4267
---
 src/box/column_mask.h  |  30 ++++++++++++
 src/box/sql.h          |   9 ++--
 src/box/sql.c          |   3 +-
 src/box/sql/vdbe.c     | 102 +++++++++++++++++++++--------------------
 test/sql/misc.result   |  46 +++++++++++++++++++
 test/sql/misc.test.lua |  17 +++++++
 6 files changed, 153 insertions(+), 54 deletions(-)

diff --git a/src/box/column_mask.h b/src/box/column_mask.h
index 14da841f1..0518fa6c3 100644
--- a/src/box/column_mask.h
+++ b/src/box/column_mask.h
@@ -123,4 +123,34 @@ column_mask_fieldno_is_set(uint64_t column_mask, uint32_t fieldno)
 	return (column_mask & mask) != 0;
 }
 
+/**
+ * Set a bit with given index @a bitno in a given @a bitmask.
+ * Do nothing when @a bitno is greater than size if bitmask.
+ * @param bitmask Bitmask to test.
+ * @param bitno bit number to test (must be 0-based).
+ */
+static inline void
+bitmask64_set_bit(uint64_t *bitmask, uint32_t bitno)
+{
+	if (bitno >= 64)
+		return;
+	*bitmask |= ((uint64_t)1 << bitno);
+}
+
+/**
+ * Test a bit in the bitmask corresponding to a bitno..
+ * @param bitmask Bitmask to test.
+ * @param bitno bit number to test (must be 0-based).
+ * @return Whether given bit is set in a given bitmask.
+ *         When bitno is greater than size of bitmask returns
+ *         false.
+ */
+static inline bool
+bitmask64_bit_is_set(uint64_t bitmask, uint32_t bitno)
+{
+	if (bitno >= 64)
+		return false;
+	return (bitmask & ((uint64_t)1 << bitno)) != 0;
+}
+
 #endif
diff --git a/src/box/sql.h b/src/box/sql.h
index 9ccecf28c..08fe5c7af 100644
--- a/src/box/sql.h
+++ b/src/box/sql.h
@@ -372,10 +372,13 @@ struct vdbe_field_ref {
 	/** Count of fields in tuple. */
 	uint32_t field_count;
 	/**
-	 * Index of the rightmost initialized slot in slots
-	 * array.
+	 * Bitmask of initialized slots. The fieldno == 0 slot
+	 * must be initialized in vdbe_field_ref constructor.
+	 * This bitmap allows to lookup for the nearest
+	 * initialized slot for a given fieldno to perform a few
+	 * extra tuple decoding, as possible.
 	 */
-	uint32_t rightmost_slot;
+	uint64_t slot_bitmask;
 	/**
 	 * Array of offsets of tuple fields.
 	 * Only values <= rightmost_slot are valid.
diff --git a/src/box/sql.c b/src/box/sql.c
index 0ab3a506f..1a9a94eff 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -1245,7 +1245,8 @@ vdbe_field_ref_create(struct vdbe_field_ref *field_ref, struct tuple *tuple,
 	const char *field0 = data;
 	field_ref->field_count = mp_decode_array((const char **) &field0);
 	field_ref->slots[0] = (uint32_t)(field0 - data);
-	field_ref->rightmost_slot = 0;
+	field_ref->slot_bitmask = 0;
+	bitmask64_set_bit(&field_ref->slot_bitmask, 0);
 }
 
 void
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index e096e1f65..cfb487115 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -606,27 +606,60 @@ vdbe_field_ref_fetch_field(struct vdbe_field_ref *field_ref, uint32_t fieldno)
 }
 
 /**
- * Try to get a current tuple field using its field map.
+ * Find the closest field for given fieldno in field_ref's
+ * slot_bitmask. The fieldno is expected to be greater than 0.
+ * @param field_ref The vdbe_field_ref instance to use.
+ * @param fieldno Number of a field to find the nearest left
+ *        neighbor of.
+ * @retval >0 An index of the closest smaller fieldno
+ *            initialized in slot_bitmask.
+ */
+static inline uint32_t
+vdbe_field_ref_closest_slotno(struct vdbe_field_ref *field_ref,
+			      uint32_t fieldno)
+{
+	uint64_t slot_bitmask = field_ref->slot_bitmask;
+	assert(slot_bitmask != 0 && fieldno > 0);
+	uint64_t le_mask = fieldno < 64 ?
+			   slot_bitmask & ((1LLU << fieldno) - 1) : slot_bitmask;
+	assert(bit_clz_u64(le_mask) < 64);
+	return 64 - bit_clz_u64(le_mask) - 1;
+}
+
+/**
+ * Get a tuple's field using field_ref's slot_bitmask, and tuple's
+ * field_map when possible. Required field must be present in
+ * tuple.
  * @param field_ref The vdbe_field_ref instance to use.
  * @param fieldno Number of a field to get.
- * @param[out] field_size Result field size.
  * @retval not NULL MessagePack field.
- * @retval NULL Can not use field_map - it does not contain
- *              offset to @a fieldno.
  */
-static const void *
-vdbe_field_ref_fast_fetch(struct vdbe_field_ref *field_ref, uint32_t fieldno,
-			  uint32_t *field_size)
+static const char *
+vdbe_field_ref_fetch_data(struct vdbe_field_ref *field_ref, uint32_t fieldno)
 {
-	const struct tuple_field *tf =
-		vdbe_field_ref_fetch_field(field_ref, fieldno);
-	if (tf == NULL || tf->offset_slot == TUPLE_OFFSET_SLOT_NIL)
-		return NULL;
-	const char *field = tuple_field(field_ref->tuple, fieldno);
-	const char *end = field;
-	mp_next(&end);
-	*field_size = end - field;
-	return field;
+	if (bitmask64_bit_is_set(field_ref->slot_bitmask, fieldno))
+		return field_ref->data + field_ref->slots[fieldno];
+
+	const char *field_begin;
+	const struct tuple_field *field = vdbe_field_ref_fetch_field(field_ref,
+								     fieldno);
+	if (field != NULL && field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
+		field_begin = tuple_field(field_ref->tuple, fieldno);
+	} else {
+		uint32_t prev = vdbe_field_ref_closest_slotno(field_ref,
+							      fieldno);
+		field_begin = field_ref->data + field_ref->slots[prev];
+		for (prev++; prev < fieldno; prev++) {
+			mp_next(&field_begin);
+			field_ref->slots[prev] =
+				(uint32_t)(field_begin - field_ref->data);
+			bitmask64_set_bit(&field_ref->slot_bitmask, prev);
+		}
+		mp_next(&field_begin);
+	}
+	field_ref->slots[fieldno] = (uint32_t)(field_begin - field_ref->data);
+	bitmask64_set_bit(&field_ref->slot_bitmask, fieldno);
+	return field_begin;
 }
 
 static inline enum field_type
@@ -652,45 +685,13 @@ static int
 vdbe_field_ref_fetch(struct vdbe_field_ref *field_ref, uint32_t fieldno,
 		     struct Mem *dest_mem)
 {
-	uint32_t *slots = field_ref->slots;
 	if (fieldno >= field_ref->field_count) {
 		UPDATE_MAX_BLOBSIZE(dest_mem);
 		return 0;
 	}
-
-	const char *data;
-	if (field_ref->rightmost_slot <= fieldno) {
-		uint32_t field_sz;
-		data = vdbe_field_ref_fast_fetch(field_ref, fieldno,
-						 &field_sz);
-		if (data != NULL) {
-			/*
-			 * Special case for tarantool spaces: for
-			 * indexed fields a tuple field map can be
-			 * used. Else there is no sense in
-			 * tuple_field usage, because it makes
-			 * foreach field { mp_next(); } to find
-			 * a field. In such a case sql is
-			 * better - it saves offsets to all fields
-			 * visited in mp_next() cycle.
-			 */
-			uint32_t offset = (uint32_t)(data - field_ref->data);
-			slots[fieldno] = offset;
-			slots[fieldno + 1] = offset + field_sz;
-		} else {
-			uint32_t i = field_ref->rightmost_slot;
-			data = field_ref->data + slots[i];
-			do {
-				mp_next(&data);
-				slots[++i] = (uint32_t)(data - field_ref->data);
-			} while (i <= fieldno);
-			field_ref->rightmost_slot = i;
-		}
-	}
-
 	assert(sqlVdbeCheckMemInvariants(dest_mem) != 0);
+	const char *data = vdbe_field_ref_fetch_data(field_ref, fieldno);
 	uint32_t dummy;
-	data = field_ref->data + slots[fieldno];
 	if (vdbe_decode_msgpack_into_mem(data, dest_mem, &dummy) != 0)
 		return -1;
 
@@ -699,8 +700,9 @@ vdbe_field_ref_fetch(struct vdbe_field_ref *field_ref, uint32_t fieldno,
 	 * Wrap it in a blob verbatim.
 	 */
 	if (dest_mem->flags == 0) {
-		dest_mem->n = slots[fieldno + 1] - slots[fieldno];
 		dest_mem->z = (char *) data;
+		dest_mem->n = vdbe_field_ref_fetch_data(field_ref,
+							fieldno + 1) - data;
 		dest_mem->flags = MEM_Blob | MEM_Ephem | MEM_Subtype;
 		dest_mem->subtype = SQL_SUBTYPE_MSGPACK;
 	}
diff --git a/test/sql/misc.result b/test/sql/misc.result
index 2591d9d75..f2357c122 100644
--- a/test/sql/misc.result
+++ b/test/sql/misc.result
@@ -133,3 +133,49 @@ box.execute('select * from "s"')
 s:drop()
 ---
 ...
+--
+-- gh-4267: Full power of vdbe_field_ref
+--
+format = {}
+---
+...
+t = {}
+---
+...
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+for i = 1, 70 do
+        format[i] = {name = 'FIELD'..i, type = 'unsigned'}
+        t[i] = i
+end
+test_run:cmd("setopt delimiter ''");
+---
+...
+s = box.schema.create_space('TEST', {format = format})
+---
+...
+pk = s:create_index('pk', {parts = {70}})
+---
+...
+s:insert(t)
+---
+- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+  23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+  43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+  63, 64, 65, 66, 67, 68, 69, 70]
+...
+box.execute('SELECT field70, field64 FROM test')
+---
+- metadata:
+  - name: FIELD70
+    type: unsigned
+  - name: FIELD64
+    type: unsigned
+  rows:
+  - [70, 64]
+...
+box.space.TEST:drop()
+---
+...
diff --git a/test/sql/misc.test.lua b/test/sql/misc.test.lua
index fdc19f3ac..fb0fd5394 100644
--- a/test/sql/misc.test.lua
+++ b/test/sql/misc.test.lua
@@ -35,3 +35,20 @@ s = box.schema.space.create('s',{format=format, temporary=true})
 i = s:create_index('i')
 box.execute('select * from "s"')
 s:drop()
+
+--
+-- gh-4267: Full power of vdbe_field_ref
+--
+format = {}
+t = {}
+test_run:cmd("setopt delimiter ';'")
+for i = 1, 70 do
+        format[i] = {name = 'FIELD'..i, type = 'unsigned'}
+        t[i] = i
+end
+test_run:cmd("setopt delimiter ''");
+s = box.schema.create_space('TEST', {format = format})
+pk = s:create_index('pk', {parts = {70}})
+s:insert(t)
+box.execute('SELECT field70, field64 FROM test')
+box.space.TEST:drop()
-- 
2.22.0





More information about the Tarantool-patches mailing list