[Tarantool-patches] [PATCH v5 42/52] sql: introduce mem_to_str() and mem_to_str0()

imeevma at tarantool.org imeevma at tarantool.org
Fri Apr 9 23:53:40 MSK 2021


Thank you for the review! My answers and new patch below.


On 30.03.2021 02:07, Vladislav Shpilevoy wrote:
> Thanks for the patch!
>
> See 3 comments below.
>
> On 23.03.2021 10:36, imeevma at tarantool.org wrote:
>> This patch introduces mem_convert_to_string(). This function is used to
>> convert MEM to MEM contains string value.
>>
>> Part of #5818
>> ---
>>  src/box/sql/mem.c | 197 ++++++++++++++++++++++++++++++----------------
>>  src/box/sql/mem.h |   7 +-
>>  2 files changed, 133 insertions(+), 71 deletions(-)
>>
>> diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
>> index 8600b5c41..262f48aca 100644
>> --- a/src/box/sql/mem.c
>> +++ b/src/box/sql/mem.c
>> @@ -818,6 +818,131 @@ mem_convert_to_number(struct Mem *mem)
>
> <...>
>
>> +
>> +static inline int
>> +mem_convert_boolean_to_string(struct Mem *mem)
>> +{
>> +	const char *str = mem->u.b ? "TRUE" : "FALSE";
>> +	return mem_copy_string0(mem, str);
>
> 1. This can be a static string. Because the string
> constants are not going anywhere, can ref them as is.
>
I tried to do this, but it breaks something in group_concat() built-in function.
Decided to left it as it is now.

> <...>
>
>> +
>> +int
>> +mem_convert_to_string0(struct Mem *mem)
>> +{
>> +	if ((mem->flags & MEM_Str) != 0 && (mem->flags & MEM_Term) != 0)
>
> 2. Can be done in one check:
>
> 	(mem->flags & (MEM_Str | MEM_Term)) == (MEM_Str | MEM_Term)
>
Thanks, fixed.

>> +		return 0;
>> +	if ((mem->flags & MEM_Str) != 0)
>> +		return mem_convert_string_to_string0(mem);
>> +	if ((mem->flags & (MEM_Int | MEM_UInt)) != 0)
>> +		return mem_convert_integer_to_string(mem);
>> +	if ((mem->flags & MEM_Real) != 0)
>> +		return mem_convert_double_to_string(mem);
>> +	if ((mem->flags & MEM_Bool) != 0)
>> +		return mem_convert_boolean_to_string(mem);
>> +	if ((mem->flags & MEM_Blob) != 0) {
>> +		if ((mem->flags & MEM_Subtype) == 0)
>> +			return mem_convert_binary_to_string0(mem);
>> +		if (mp_typeof(*mem->z) == MP_MAP)
>> +			return mem_convert_map_to_string(mem);
>> +		return mem_convert_array_to_string(mem);
>> +	}
>> +	return -1;
>> +}
>> +
>> +int
>> +mem_convert_to_string(struct Mem *mem)
>
> 3. Why would you need a not terminated string? The old function
> always terminated the strings AFAIS.
>
Actually, to work with MEM we do not need NULL-terminated strings. They are
needed for debugging, built-in functions and somewhere in printf.c. I think that
if we come to a way to return converted to string value using mem_get_str0()
without changing MEM, we may get rid of NULL-terminateds string in MEM.
Depending on '\0' too much may lead to some unnecessary problems. For example
see #5938.

For example, if we create analogue of mem_str() that returns string
representation of value of MEM allocated on region, then there will be no need
of NULL-terminated strings in MEMs.

NULL-termination was needed in SQLite since string should be NULL-terminated
when it is returned to user. We do not have such problem since MEM converted to
msgpack which do not need NULL-termination.

>> +{
>> +	if ((mem->flags & MEM_Str) != 0)
>> +		return 0;
>> +	if ((mem->flags & (MEM_Int | MEM_UInt)) != 0)
>> +		return mem_convert_integer_to_string(mem);
>> +	if ((mem->flags & MEM_Real) != 0)
>> +		return mem_convert_double_to_string(mem);
>> +	if ((mem->flags & MEM_Bool) != 0)
>> +		return mem_convert_boolean_to_string(mem);
>> +	if ((mem->flags & MEM_Blob) != 0) {
>> +		if ((mem->flags & MEM_Subtype) == 0)
>> +			return mem_convert_binary_to_string(mem);
>> +		if (mp_typeof(*mem->z) == MP_MAP)
>> +			return mem_convert_map_to_string(mem);
>> +		return mem_convert_array_to_string(mem);
>> +	}
>> +	return -1;
>> +}
>> +
>>  int
>>  mem_copy(struct Mem *to, const struct Mem *from)
>>  {


New patch:

commit 39f3cc5d8873f763fb0d609f64e026120cbf6de4
Author: Mergen Imeev <imeevma at gmail.com>
Date:   Wed Mar 17 11:55:48 2021 +0300

    sql: introduce mem_to_str() and mem_to_str0()
    
    This patch introduces mem_to_str() and mem_to_str0() functions. These
    functions are used to convert a MEM to a MEM that contains string value.
    These functions defines the rules that are used during convertion from
    values of all other types to STRING.
    
    Part of #5818

diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
index 9a0234e60..be7b47e76 100644
--- a/src/box/sql/mem.c
+++ b/src/box/sql/mem.c
@@ -822,6 +822,130 @@ mem_to_number(struct Mem *mem)
 	return -1;
 }
 
+static inline int
+int_to_str0(struct Mem *mem)
+{
+	const char *str;
+	if ((mem->flags & MEM_UInt) != 0)
+		str = tt_sprintf("%llu", mem->u.u);
+	else
+		str = tt_sprintf("%lld", mem->u.i);
+	return mem_copy_str0(mem, str);
+}
+
+static inline int
+array_to_str0(struct Mem *mem)
+{
+	const char *str = mp_str(mem->z);
+	return mem_copy_str0(mem, str);
+}
+
+static inline int
+map_to_str0(struct Mem *mem)
+{
+	const char *str = mp_str(mem->z);
+	return mem_copy_str0(mem, str);
+}
+
+static inline int
+bool_to_str0(struct Mem *mem)
+{
+	const char *str = mem->u.b ? "TRUE" : "FALSE";
+	return mem_copy_str0(mem, str);
+}
+
+static inline int
+bin_to_str(struct Mem *mem)
+{
+	if (ExpandBlob(mem) != 0)
+		return -1;
+	mem->flags = (mem->flags & (MEM_Dyn | MEM_Static | MEM_Ephem)) |
+		      MEM_Str;
+	mem->field_type = FIELD_TYPE_STRING;
+	return 0;
+}
+
+static inline int
+bin_to_str0(struct Mem *mem)
+{
+	if (ExpandBlob(mem) != 0)
+		return -1;
+	if (sqlVdbeMemGrow(mem, mem->n + 1, 1) != 0)
+		return -1;
+	mem->z[mem->n] = '\0';
+	mem->flags = MEM_Str | MEM_Term;
+	mem->field_type = FIELD_TYPE_STRING;
+	return 0;
+}
+
+static inline int
+double_to_str0(struct Mem *mem)
+{
+	if (sqlVdbeMemGrow(mem, BUF_SIZE, 0) != 0)
+		return -1;
+	sql_snprintf(BUF_SIZE, mem->z, "%!.15g", mem->u.r);
+	mem->n = strlen(mem->z);
+	mem->flags = MEM_Str | MEM_Term;
+	mem->field_type = FIELD_TYPE_STRING;
+	return 0;
+}
+
+static inline int
+str_to_str0(struct Mem *mem)
+{
+	assert((mem->flags | MEM_Str) != 0);
+	if (sqlVdbeMemGrow(mem, mem->n + 1, 1) != 0)
+		return -1;
+	mem->z[mem->n] = '\0';
+	mem->flags |= MEM_Term;
+	mem->field_type = FIELD_TYPE_STRING;
+	return 0;
+}
+
+int
+mem_to_str0(struct Mem *mem)
+{
+	if ((mem->flags & (MEM_Str | MEM_Term)) == (MEM_Str | MEM_Term))
+		return 0;
+	if ((mem->flags & MEM_Str) != 0)
+		return str_to_str0(mem);
+	if ((mem->flags & (MEM_Int | MEM_UInt)) != 0)
+		return int_to_str0(mem);
+	if ((mem->flags & MEM_Real) != 0)
+		return double_to_str0(mem);
+	if ((mem->flags & MEM_Bool) != 0)
+		return bool_to_str0(mem);
+	if ((mem->flags & MEM_Blob) != 0) {
+		if ((mem->flags & MEM_Subtype) == 0)
+			return bin_to_str0(mem);
+		if (mp_typeof(*mem->z) == MP_MAP)
+			return map_to_str0(mem);
+		return array_to_str0(mem);
+	}
+	return -1;
+}
+
+int
+mem_to_str(struct Mem *mem)
+{
+	if ((mem->flags & MEM_Str) != 0)
+		return 0;
+	if ((mem->flags & (MEM_Int | MEM_UInt)) != 0)
+		return int_to_str0(mem);
+	if ((mem->flags & MEM_Real) != 0)
+		return double_to_str0(mem);
+	if ((mem->flags & MEM_Bool) != 0)
+		return bool_to_str0(mem);
+	if ((mem->flags & MEM_Blob) != 0) {
+		if ((mem->flags & MEM_Subtype) == 0)
+			return bin_to_str(mem);
+		if (mp_typeof(*mem->z) == MP_MAP)
+			return map_to_str0(mem);
+		return array_to_str0(mem);
+	}
+	return -1;
+}
+
 int
 mem_copy(struct Mem *to, const struct Mem *from)
 {
@@ -1517,7 +1641,7 @@ valueToText(sql_value * pVal)
 		pVal->flags |= MEM_Str;
 		sqlVdbeMemNulTerminate(pVal);	/* IMP: R-31275-44060 */
 	} else {
-		sqlVdbeMemStringify(pVal);
+		mem_to_str(pVal);
 		assert(0 == (1 & SQL_PTR_TO_INT(pVal->z)));
 	}
 	return pVal->z;
@@ -2001,74 +2125,6 @@ sqlVdbeMemCast(Mem * pMem, enum field_type type)
 	}
 }
 
-/*
- * Add MEM_Str to the set of representations for the given Mem.  Numbers
- * are converted using sql_snprintf().  Converting a BLOB to a string
- * is a no-op.
- *
- * Existing representations MEM_Int and MEM_Real are invalidated if
- * bForce is true but are retained if bForce is false.
- *
- * A MEM_Null value will never be passed to this function. This function is
- * used for converting values to text for returning to the user (i.e. via
- * sql_value_text()), or for ensuring that values to be used as btree
- * keys are strings. In the former case a NULL pointer is returned the
- * user and the latter is an internal programming error.
- */
-int
-sqlVdbeMemStringify(Mem * pMem)
-{
-	int fg = pMem->flags;
-	int nByte = 32;
-
-	if ((fg & (MEM_Null | MEM_Str | MEM_Blob)) != 0 &&
-	    !mem_has_msgpack_subtype(pMem))
-		return 0;
-
-	assert(!(fg & MEM_Zero));
-	assert((fg & (MEM_Int | MEM_UInt | MEM_Real | MEM_Bool |
-		      MEM_Blob)) != 0);
-	assert(EIGHT_BYTE_ALIGNMENT(pMem));
-
-	/*
-	 * In case we have ARRAY/MAP we should save decoded value
-	 * before clearing pMem->z.
-	 */
-	char *value = NULL;
-	if (mem_has_msgpack_subtype(pMem)) {
-		const char *value_str = mp_str(pMem->z);
-		nByte = strlen(value_str) + 1;
-		value = region_alloc(&fiber()->gc, nByte);
-		memcpy(value, value_str, nByte);
-	}
-
-	if (sqlVdbeMemClearAndResize(pMem, nByte)) {
-		return -1;
-	}
-	if (fg & MEM_Int) {
-		sql_snprintf(nByte, pMem->z, "%lld", pMem->u.i);
-		pMem->flags &= ~MEM_Int;
-	} else if ((fg & MEM_UInt) != 0) {
-		sql_snprintf(nByte, pMem->z, "%llu", pMem->u.u);
-		pMem->flags &= ~MEM_UInt;
-	} else if ((fg & MEM_Bool) != 0) {
-		sql_snprintf(nByte, pMem->z, "%s",
-			     SQL_TOKEN_BOOLEAN(pMem->u.b));
-		pMem->flags &= ~MEM_Bool;
-	} else if (mem_has_msgpack_subtype(pMem)) {
-		sql_snprintf(nByte, pMem->z, "%s", value);
-		pMem->flags &= ~MEM_Subtype;
-		pMem->subtype = SQL_SUBTYPE_NO;
-	} else {
-		assert(fg & MEM_Real);
-		sql_snprintf(nByte, pMem->z, "%!.15g", pMem->u.r);
-		pMem->flags &= ~MEM_Real;
-	}
-	pMem->n = sqlStrlen30(pMem->z);
-	pMem->flags |= MEM_Str | MEM_Term;
-	return 0;
-}
-
 /*
  * Make sure the given Mem is \u0000 terminated.
  */
@@ -2191,7 +2247,7 @@ mem_apply_type(struct Mem *record, enum field_type type)
 		 */
 		if ((record->flags & MEM_Str) == 0 &&
 		    (record->flags & (MEM_Real | MEM_Int | MEM_UInt)) != 0)
-			sqlVdbeMemStringify(record);
+			mem_to_str(record);
 		record->flags &= ~(MEM_Real | MEM_Int | MEM_UInt);
 		return 0;
 	case FIELD_TYPE_VARBINARY:
diff --git a/src/box/sql/mem.h b/src/box/sql/mem.h
index 90f46af80..146d08a2a 100644
--- a/src/box/sql/mem.h
+++ b/src/box/sql/mem.h
@@ -505,6 +505,23 @@ mem_to_double(struct Mem *mem);
 int
 mem_to_number(struct Mem *mem);
 
+/**
+ * Convert the given MEM to STRING. This function and the function below define
+ * the rules that are used to convert values of all other types to STRING. In
+ * this function, the string received after the convertion may be not
+ * NULL-terminated.
+ */
+int
+mem_to_str(struct Mem *mem);
+
+/**
+ * Convert the given MEM to STRING. This function and the function above define
+ * the rules that are used to convert values of all other types to STRING. In
+ * this function, the string received after convertion is NULL-terminated.
+ */
+int
+mem_to_str0(struct Mem *mem);
+
 /**
  * Simple type to str convertor. It is used to simplify
  * error reporting.
@@ -539,7 +556,6 @@ registerTrace(int iReg, Mem *p);
 #endif
 
 int sqlVdbeMemCast(struct Mem *, enum field_type type);
-int sqlVdbeMemStringify(struct Mem *);
 int sqlVdbeMemNulTerminate(struct Mem *);
 int sqlVdbeMemExpandBlob(struct Mem *);
 #define ExpandBlob(P) (mem_is_zerobin(P)? sqlVdbeMemExpandBlob(P) : 0)


More information about the Tarantool-patches mailing list