[Tarantool-patches] [PATCH v2 4/4] sql: introduce mem_cmp_msgpack()

Mergen Imeev imeevma at tarantool.org
Tue Jul 13 11:10:07 MSK 2021


Thank you for the review! My answers, diff and new patch below.

On Mon, Jul 12, 2021 at 11:09:02PM +0200, Vladislav Shpilevoy wrote:
> Thanks for the fixes!
> 
> See 2 comments below.
> 
> > diff --git a/src/box/sql.c b/src/box/sql.c
> > index 790ca7f70..a5afcfabb 100644
> > --- a/src/box/sql.c
> > +++ b/src/box/sql.c
> > @@ -765,10 +765,12 @@ tarantoolsqlIdxKeyCompare(struct BtCursor *cursor,
> >  			}
> >  		}
> >  		next_fieldno = fieldno + 1;
> > -		rc = sqlVdbeCompareMsgpack(&p, unpacked, i);
> > +		struct key_part *part = &unpacked->key_def->parts[i];
> > +		struct Mem *mem = unpacked->aMem + i;
> > +		struct coll *coll = part->coll;
> > +		mem_cmp_msgpack(mem, &p, &rc, coll);
> 
> 1. The same as in the previous patch - you either need to fill
> rc out parameter even in case of a fail, or check result of
> mem_cmp_msgpack() to keep the results stable. Not depending on
> the stack's garbage content.
> 
> The same in the other places where it is called.
> 
Fixed. However, since there is no proper way to throw an error from both
functions where mem_cmp_msgpack() was used, I just made rc equal to 0.

> >  		if (rc != 0) {
> > -			if (unpacked->key_def->parts[i].sort_order !=
> > -			    SORT_ORDER_ASC)
> > +			if (part->sort_order == SORT_ORDER_ASC)
> >  				rc = -rc;
> >  			goto out;
> >  		}
> > diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
> > index da27cd191..4062ff4b3 100644
> > --- a/src/box/sql/mem.c
> > +++ b/src/box/sql/mem.c
> > @@ -2077,35 +2077,78 @@ mem_cmp_scalar(const struct Mem *a, const struct Mem *b, int *result,
> 
> <...>
> 
> > +	case MP_EXT: {
> > +		int8_t type;
> > +		const char *buf = *b;
> > +		uint32_t len = mp_decode_extl(&buf, &type);
> > +		if (type == MP_UUID) {
> > +			assert(len == UUID_LEN);
> > +			mem.type = MEM_TYPE_UUID;
> > +			if (mp_decode_uuid(b, &mem.u.uuid) == NULL)
> > +				return -1;
> > +			break;
> > +		}
> > +		len += buf - *b;
> > +		mem.type = MEM_TYPE_BIN;
> > +		mem.z = (char *)*b;
> > +		mem.n = len;
> > +		mem.flags = MEM_Ephem;
> > +		*b += len;
> > +		break;
> 
> 2. I just remembered now, that we have uuid_unpack(). It is better
> to use when you already decoded the EXT header. Consider this:
> 
> ====================
> @@ -2129,20 +2129,18 @@ mem_cmp_msgpack(const struct Mem *a, const char **b, int *result,
>  	case MP_EXT: {
>  		int8_t type;
>  		const char *buf = *b;
> -		uint32_t len = mp_decode_extl(&buf, &type);
> +		uint32_t len = mp_decode_extl(b, &type);
>  		if (type == MP_UUID) {
> -			assert(len == UUID_LEN);
> -			mem.type = MEM_TYPE_UUID;
> -			if (mp_decode_uuid(b, &mem.u.uuid) == NULL)
> +			if (uuid_unpack(&b, len, &mem.u.uuid) == NULL)
>  				return -1;
> +			mem.type = MEM_TYPE_UUID;
>  			break;
>  		}
> -		len += buf - *b;
>  		mem.type = MEM_TYPE_BIN;
> -		mem.z = (char *)*b;
> -		mem.n = len;
> +		mem.z = (char *)buf;
> +		mem.n = b - buf + len;
>  		mem.flags = MEM_Ephem;
> -		*b += len;
> +		b += len;
>  		break;
>  	}
>  	default:
> ====================
> 
> (I didn't test though.)
Thank you for the suggestion! I applied your diff with some changes.

Diff:


diff --git a/src/box/sql.c b/src/box/sql.c
index a5afcfabb..c1271ee0a 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -768,7 +768,9 @@ tarantoolsqlIdxKeyCompare(struct BtCursor *cursor,
 		struct key_part *part = &unpacked->key_def->parts[i];
 		struct Mem *mem = unpacked->aMem + i;
 		struct coll *coll = part->coll;
-		mem_cmp_msgpack(mem, &p, &rc, coll);
+		/* In case of fail make rc equal to 0. */
+		if (mem_cmp_msgpack(mem, &p, &rc, coll) != 0)
+			rc = 0;
 		if (rc != 0) {
 			if (part->sort_order == SORT_ORDER_ASC)
 				rc = -rc;
diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
index 4062ff4b3..8e176e418 100644
--- a/src/box/sql/mem.c
+++ b/src/box/sql/mem.c
@@ -2129,20 +2129,19 @@ mem_cmp_msgpack(const struct Mem *a, const char **b, int *result,
 	case MP_EXT: {
 		int8_t type;
 		const char *buf = *b;
-		uint32_t len = mp_decode_extl(&buf, &type);
+		uint32_t len = mp_decode_extl(b, &type);
 		if (type == MP_UUID) {
 			assert(len == UUID_LEN);
 			mem.type = MEM_TYPE_UUID;
-			if (mp_decode_uuid(b, &mem.u.uuid) == NULL)
+			if (uuid_unpack(b, len, &mem.u.uuid) == NULL)
 				return -1;
 			break;
 		}
-		len += buf - *b;
+		*b += len;
 		mem.type = MEM_TYPE_BIN;
-		mem.z = (char *)*b;
-		mem.n = len;
+		mem.z = (char *)buf;
+		mem.n = *b - buf;
 		mem.flags = MEM_Ephem;
-		*b += len;
 		break;
 	}
 	default:
@@ -2590,7 +2589,9 @@ sqlVdbeRecordCompareMsgpack(const void *key1,
 		struct key_part *part = &key2->key_def->parts[i];
 		struct Mem *mem = key2->aMem + i;
 		struct coll *coll = part->coll;
-		mem_cmp_msgpack(mem, (const char **)&key1, &rc, coll);
+		/* In case of fail make rc equal to 0. */
+		if (mem_cmp_msgpack(mem, (const char **)&key1, &rc, coll) != 0)
+			rc = 0;
 		if (rc != 0) {
 			if (part->sort_order != SORT_ORDER_ASC)
 				return rc;


New patch:


commit 2f80030b9914256bf931c95b8d76113f31c01655
Author: Mergen Imeev <imeevma at gmail.com>
Date:   Fri Jul 9 11:59:34 2021 +0300

    sql: introduce mem_cmp_msgpack()
    
    This patch introduces the mem_cmp_msgpack() function that compares MEM
    and packed to msgpack value using SCALAR rules. MEM and packed value
    must be scalars. Prior to this patch, there was a function that used
    SCALAR rules to compare MEM and packed value, but its design became
    overly complex as new types appeared.
    
    Closes #6164

diff --git a/src/box/sql.c b/src/box/sql.c
index 790ca7f70..c1271ee0a 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -765,10 +765,14 @@ tarantoolsqlIdxKeyCompare(struct BtCursor *cursor,
 			}
 		}
 		next_fieldno = fieldno + 1;
-		rc = sqlVdbeCompareMsgpack(&p, unpacked, i);
+		struct key_part *part = &unpacked->key_def->parts[i];
+		struct Mem *mem = unpacked->aMem + i;
+		struct coll *coll = part->coll;
+		/* In case of fail make rc equal to 0. */
+		if (mem_cmp_msgpack(mem, &p, &rc, coll) != 0)
+			rc = 0;
 		if (rc != 0) {
-			if (unpacked->key_def->parts[i].sort_order !=
-			    SORT_ORDER_ASC)
+			if (part->sort_order == SORT_ORDER_ASC)
 				rc = -rc;
 			goto out;
 		}
diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
index da27cd191..8e176e418 100644
--- a/src/box/sql/mem.c
+++ b/src/box/sql/mem.c
@@ -2077,35 +2077,77 @@ mem_cmp_scalar(const struct Mem *a, const struct Mem *b, int *result,
 	return 0;
 }
 
-/*
- * Both *pMem1 and *pMem2 contain string values. Compare the two values
- * using the collation sequence pColl. As usual, return a negative , zero
- * or positive value if *pMem1 is less than, equal to or greater than
- * *pMem2, respectively. Similar in spirit to "rc = (*pMem1) - (*pMem2);".
- *
- * Strungs assume to be UTF-8 encoded
- */
-static int
-vdbeCompareMemString(const Mem * pMem1, const Mem * pMem2,
-		     const struct coll * pColl)
-{
-	return pColl->cmp(pMem1->z, (size_t)pMem1->n,
-			      pMem2->z, (size_t)pMem2->n, pColl);
-}
-
-/*
- * The input pBlob is guaranteed to be a Blob that is not marked
- * with MEM_Zero.  Return true if it could be a zero-blob.
- */
-static int
-isAllZero(const char *z, int n)
-{
-	int i;
-	for (i = 0; i < n; i++) {
-		if (z[i])
-			return 0;
+int
+mem_cmp_msgpack(const struct Mem *a, const char **b, int *result,
+		const struct coll *coll)
+{
+	struct Mem mem;
+	switch (mp_typeof(**b)) {
+	case MP_NIL:
+		mem.type = MEM_TYPE_NULL;
+		mp_decode_nil(b);
+		break;
+	case MP_BOOL:
+		mem.type = MEM_TYPE_BOOL;
+		mem.u.b = mp_decode_bool(b);
+		break;
+	case MP_UINT:
+		mem.type = MEM_TYPE_UINT;
+		mem.u.u = mp_decode_uint(b);
+		break;
+	case MP_INT:
+		mem.type = MEM_TYPE_INT;
+		mem.u.i = mp_decode_int(b);
+		break;
+	case MP_FLOAT:
+		mem.type = MEM_TYPE_DOUBLE;
+		mem.u.r = mp_decode_float(b);
+		break;
+	case MP_DOUBLE:
+		mem.type = MEM_TYPE_DOUBLE;
+		mem.u.r = mp_decode_double(b);
+		break;
+	case MP_STR:
+		mem.type = MEM_TYPE_STR;
+		mem.n = mp_decode_strl(b);
+		mem.z = (char *)*b;
+		*b += mem.n;
+		mem.flags = MEM_Ephem;
+		break;
+	case MP_BIN:
+		mem.type = MEM_TYPE_BIN;
+		mem.n = mp_decode_binl(b);
+		mem.z = (char *)*b;
+		*b += mem.n;
+		mem.flags = MEM_Ephem;
+		break;
+	case MP_ARRAY:
+	case MP_MAP:
+		mp_next(b);
+		*result = -1;
+		return 0;
+	case MP_EXT: {
+		int8_t type;
+		const char *buf = *b;
+		uint32_t len = mp_decode_extl(b, &type);
+		if (type == MP_UUID) {
+			assert(len == UUID_LEN);
+			mem.type = MEM_TYPE_UUID;
+			if (uuid_unpack(b, len, &mem.u.uuid) == NULL)
+				return -1;
+			break;
+		}
+		*b += len;
+		mem.type = MEM_TYPE_BIN;
+		mem.z = (char *)buf;
+		mem.n = *b - buf;
+		mem.flags = MEM_Ephem;
+		break;
 	}
-	return 1;
+	default:
+		unreachable();
+	}
+	return mem_cmp_scalar(a, &mem, result, coll);
 }
 
 char *
@@ -2534,183 +2576,6 @@ sql_vdbemem_finalize(struct Mem *mem, struct func *func)
 	return ctx.is_aborted ? -1 : 0;
 }
 
-int
-sqlVdbeCompareMsgpack(const char **key1,
-			  struct UnpackedRecord *unpacked, int key2_idx)
-{
-	const char *aKey1 = *key1;
-	Mem *pKey2 = unpacked->aMem + key2_idx;
-	Mem mem1;
-	int rc = 0;
-	switch (mp_typeof(*aKey1)) {
-	default:{
-			/* FIXME */
-			rc = -1;
-			break;
-		}
-	case MP_NIL:{
-			rc = -(pKey2->type != MEM_TYPE_NULL);
-			mp_decode_nil(&aKey1);
-			break;
-		}
-	case MP_BOOL:{
-			mem1.u.b = mp_decode_bool(&aKey1);
-			if (pKey2->type == MEM_TYPE_BOOL) {
-				if (mem1.u.b != pKey2->u.b)
-					rc = mem1.u.b ? 1 : -1;
-			} else {
-				rc = pKey2->type == MEM_TYPE_NULL ? 1 : -1;
-			}
-			break;
-		}
-	case MP_UINT:{
-			mem1.u.u = mp_decode_uint(&aKey1);
-			if (pKey2->type == MEM_TYPE_INT) {
-				rc = +1;
-			} else if (pKey2->type == MEM_TYPE_UINT) {
-				if (mem1.u.u < pKey2->u.u)
-					rc = -1;
-				else if (mem1.u.u > pKey2->u.u)
-					rc = +1;
-			} else if (pKey2->type == MEM_TYPE_DOUBLE) {
-				rc = double_compare_uint64(pKey2->u.r,
-							   mem1.u.u, -1);
-			} else if (pKey2->type == MEM_TYPE_NULL) {
-				rc = 1;
-			} else if (pKey2->type == MEM_TYPE_BOOL) {
-				rc = 1;
-			} else {
-				rc = -1;
-			}
-			break;
-		}
-	case MP_INT:{
-			mem1.u.i = mp_decode_int(&aKey1);
-			if (pKey2->type == MEM_TYPE_UINT) {
-				rc = -1;
-			} else if (pKey2->type == MEM_TYPE_INT) {
-				if (mem1.u.i < pKey2->u.i) {
-					rc = -1;
-				} else if (mem1.u.i > pKey2->u.i) {
-					rc = +1;
-				}
-			} else if (pKey2->type == MEM_TYPE_DOUBLE) {
-				rc = double_compare_nint64(pKey2->u.r, mem1.u.i,
-							   -1);
-			} else if (pKey2->type == MEM_TYPE_NULL) {
-				rc = 1;
-			} else if (pKey2->type == MEM_TYPE_BOOL) {
-				rc = 1;
-			} else {
-				rc = -1;
-			}
-			break;
-		}
-	case MP_FLOAT:{
-			mem1.u.r = mp_decode_float(&aKey1);
-			goto do_float;
-		}
-	case MP_DOUBLE:{
-			mem1.u.r = mp_decode_double(&aKey1);
- do_float:
-			if (pKey2->type == MEM_TYPE_INT) {
-				rc = double_compare_nint64(mem1.u.r, pKey2->u.i,
-							   1);
-			} else if (pKey2->type == MEM_TYPE_UINT) {
-				rc = double_compare_uint64(mem1.u.r,
-							   pKey2->u.u, 1);
-			} else if (pKey2->type == MEM_TYPE_DOUBLE) {
-				if (mem1.u.r < pKey2->u.r) {
-					rc = -1;
-				} else if (mem1.u.r > pKey2->u.r) {
-					rc = +1;
-				}
-			} else if (pKey2->type == MEM_TYPE_NULL) {
-				rc = 1;
-			} else if (pKey2->type == MEM_TYPE_BOOL) {
-				rc = 1;
-			} else {
-				rc = -1;
-			}
-			break;
-		}
-	case MP_STR:{
-			if (pKey2->type == MEM_TYPE_STR) {
-				struct key_def *key_def = unpacked->key_def;
-				mem1.n = mp_decode_strl(&aKey1);
-				mem1.z = (char *)aKey1;
-				aKey1 += mem1.n;
-				struct coll *coll =
-					key_def->parts[key2_idx].coll;
-				if (coll != NULL) {
-					mem1.type = MEM_TYPE_STR;
-					mem1.flags = 0;
-					rc = vdbeCompareMemString(&mem1, pKey2,
-								  coll);
-				} else {
-					goto do_bin_cmp;
-				}
-			} else {
-				rc = pKey2->type == MEM_TYPE_BIN ? -1 : +1;
-			}
-			break;
-		}
-	case MP_BIN:{
-			mem1.n = mp_decode_binl(&aKey1);
-			mem1.z = (char *)aKey1;
-			aKey1 += mem1.n;
- do_blob:
-			if (pKey2->type == MEM_TYPE_BIN) {
-				if (pKey2->flags & MEM_Zero) {
-					if (!isAllZero
-					    ((const char *)mem1.z, mem1.n)) {
-						rc = 1;
-					} else {
-						rc = mem1.n - pKey2->u.nZero;
-					}
-				} else {
-					int nCmp;
- do_bin_cmp:
-					nCmp = MIN(mem1.n, pKey2->n);
-					rc = memcmp(mem1.z, pKey2->z, nCmp);
-					if (rc == 0)
-						rc = mem1.n - pKey2->n;
-				}
-			} else {
-				rc = 1;
-			}
-			break;
-		}
-	case MP_ARRAY:
-	case MP_MAP: {
-			mem1.z = (char *)aKey1;
-			mp_next(&aKey1);
-			mem1.n = aKey1 - (char *)mem1.z;
-			goto do_blob;
-		}
-	case MP_EXT: {
-		int8_t type;
-		const char *buf = aKey1;
-		uint32_t len = mp_decode_extl(&aKey1, &type);
-		if (type == MP_UUID) {
-			assert(len == UUID_LEN);
-			mem1.type = MEM_TYPE_UUID;
-			aKey1 = buf;
-			if (mp_decode_uuid(&aKey1, &mem1.u.uuid) == NULL ||
-			    mem_cmp_uuid(&mem1, pKey2, &rc) != 0)
-				rc = 1;
-			break;
-		}
-		aKey1 += len;
-		mem1.z = (char *)buf;
-		mem1.n = aKey1 - buf;
-		goto do_blob;
-	}
-	}
-	*key1 = aKey1;
-	return rc;
-}
-
 int
 sqlVdbeRecordCompareMsgpack(const void *key1,
 				struct UnpackedRecord *key2)
@@ -2721,13 +2586,16 @@ sqlVdbeRecordCompareMsgpack(const void *key1,
 	n = MIN(n, key2->nField);
 
 	for (i = 0; i != n; i++) {
-		rc = sqlVdbeCompareMsgpack((const char**)&key1, key2, i);
+		struct key_part *part = &key2->key_def->parts[i];
+		struct Mem *mem = key2->aMem + i;
+		struct coll *coll = part->coll;
+		/* In case of fail make rc equal to 0. */
+		if (mem_cmp_msgpack(mem, (const char **)&key1, &rc, coll) != 0)
+			rc = 0;
 		if (rc != 0) {
-			if (key2->key_def->parts[i].sort_order !=
-			    SORT_ORDER_ASC) {
-				rc = -rc;
-			}
-			return rc;
+			if (part->sort_order != SORT_ORDER_ASC)
+				return rc;
+			return -rc;
 		}
 	}
 
diff --git a/src/box/sql/mem.h b/src/box/sql/mem.h
index bbb99c4d2..c73536efb 100644
--- a/src/box/sql/mem.h
+++ b/src/box/sql/mem.h
@@ -704,6 +704,16 @@ int
 mem_cmp_scalar(const struct Mem *a, const struct Mem *b, int *result,
 	       const struct coll *coll);
 
+/**
+ * Compare MEM and packed to msgpack value using SCALAR rules and return the
+ * result of comparison. Both values should be scalars. Original MEM is not
+ * changed. If successful, the second argument will contain the address
+ * following the specified packed value.
+ */
+int
+mem_cmp_msgpack(const struct Mem *a, const char **b, int *result,
+		const struct coll *coll);
+
 /**
  * Convert the given MEM to INTEGER. This function and the function below define
  * the rules that are used to convert values of all other types to INTEGER. In
@@ -981,20 +991,6 @@ int sqlVdbeMemTooBig(Mem *);
 int
 sql_vdbemem_finalize(struct Mem *mem, struct func *func);
 
-/** MEM and msgpack functions. */
-
-/**
- * Perform comparison of two keys: one is packed and one is not.
- *
- * @param key1 Pointer to pointer to first key.
- * @param unpacked Pointer to unpacked tuple.
- * @param key2_idx index of key in umpacked record to compare.
- *
- * @retval +1 if key1 > pUnpacked[iKey2], -1 ptherwise.
- */
-int sqlVdbeCompareMsgpack(const char **key1,
-			      struct UnpackedRecord *unpacked, int key2_idx);
-
 /**
  * Perform comparison of two tuples: unpacked (key1) and packed (key2)
  *
diff --git a/test/sql-tap/gh-6164-uuid-follow-ups.test.lua b/test/sql-tap/gh-6164-uuid-follow-ups.test.lua
index 6b4a811c3..dd041c0b4 100755
--- a/test/sql-tap/gh-6164-uuid-follow-ups.test.lua
+++ b/test/sql-tap/gh-6164-uuid-follow-ups.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 local test = require("sqltester")
-test:plan(8)
+test:plan(9)
 
 -- Make sure that function quote() can work with uuid.
 test:do_execsql_test(
@@ -76,6 +76,18 @@ test:do_execsql_test(
         2
     })
 
+box.execute([[DELETE FROM t;]])
+
+box.execute([[INSERT INTO t VALUES (1, X'00'), (2, ?);]], {uuid1})
+
+test:do_execsql_test(
+    "gh-6164-9",
+    [[
+        SELECT i FROM t ORDER BY s;
+    ]], {
+        1, 2
+    })
+
 box.execute([[DROP TABLE t;]])
 
 test:finish_test()


More information about the Tarantool-patches mailing list