[Tarantool-patches] [PATCH v5 49/52] sql: introduce mem_get_str0() and mem_as_str0()

Mergen Imeev imeevma at tarantool.org
Wed Apr 14 04:43:08 MSK 2021


Thank you for the review! My answers, diff and new patch below.

On Wed, Apr 14, 2021 at 01:06:09AM +0200, Vladislav Shpilevoy wrote:
> Thanks for the discussion!
> 
> See 2 comments below.
> 
> 1. With this getter it is less trivial than with the previous
> ones.
> 
> I just realized, that we call mem_as_str() on function arguments.
> What if a mem is passed to multiple functions, and one of them
> changes it affecting the other functions? What if the same function
> is called multiple times during a query? - the first execution
> affects the argument for the next executions? Does not look right.
> Am I missing something?
> 
> It seems that could be fixed not so hard.
> 
> If the value is already a 0-terminated string, we return it.
> If it is a blob or not terminated string, we extended it, add 0,
> and return. It does not really change the original value anyway.
> Mem.n stays the same, and the first n bytes stay the same.
> If it is a number, we can save its string representation to
> Mem.z, which is not used for numbers.
> 
> Will it work?
> 
Formally it will work, but I am not sure that it is good idea. Still, I believe
that there is no need for this since before this patch such functions as
sql_value_text(), valueToText(), sql_value_bytes() and so on actually converted
MEM to string. So, what we have done is just a bit refactored these functions.
There is no changes in behaviour. So, I expect that there shouldn't be any new
problems. Still, there may be some old problems. Also, our function to get
string or varbinary length actually do not changes MEM as it was before.

> > diff --git a/src/box/sql/mem.h b/src/box/sql/mem.h
> > index e48e8788c..5848ae729 100644
> > --- a/src/box/sql/mem.h
> > +++ b/src/box/sql/mem.h
> > @@ -568,6 +568,28 @@ mem_get_double(const struct Mem *mem, double *d);
> >  int
> >  mem_get_bool(const struct Mem *mem, bool *b);
> >  
> > +/**
> > + * Return value for MEM of STRING type if MEM contains a NULL-terminated string.
> > + * Otherwise convert value of the MEM to NULL-terminated string if possible and
> > + * return converted value. Original MEM is not changed.
> > + */
> > +int
> > +mem_get_str0(const struct Mem *mem, const char **s);
> > +
> > +/**
> > + * Return value for MEM of STRING type if MEM contains NULL-terminated string.
> > + * Otherwise convert MEM to MEM of string type that contains NULL-terminated
> > + * string and return its value. Return NULL if conversion is impossible.
> > + */
> > +static inline const char *
> > +mem_as_str0(struct Mem *mem)
> > +{
> > + const char *str;
> > + if (mem_to_str0(mem) != 0 || mem_get_str0(mem, &str) != 0)
> 
> 2. If mem_to_str0 succeeded, why can't you return mem->z right away?
> 
True, I can. Fixed.

> > +   return NULL;
> > + return str;
> > +}


Diff:


diff --git a/src/box/sql/mem.h b/src/box/sql/mem.h
index b16536e6e..8d53fa161 100644
--- a/src/box/sql/mem.h
+++ b/src/box/sql/mem.h
@@ -850,10 +850,9 @@ mem_get_str0(const struct Mem *mem, const char **s);
 static inline const char *
 mem_as_str0(struct Mem *mem)
 {
-	const char *str;
-	if (mem_to_str0(mem) != 0 || mem_get_str0(mem, &str) != 0)
+	if (mem_to_str0(mem) != 0)
 		return NULL;
-	return str;
+	return mem->z;
 }
 
 /**



New patch:



commit 18a745c61eaeaa2d4b324b5aa13cbe0e80ce9ad7
Author: Mergen Imeev <imeevma at gmail.com>
Date:   Thu Mar 18 12:41:41 2021 +0300

    sql: introduce mem_get_str0() and mem_as_str0()
    
    This patch introduces mem_get_str0() and mem_as_str0(). Function
    mem_get_str0() is used to receive NULL-terminated string from MEM. If
    value of MEM is not NULL-terminated string, it is converted to
    NULL-terminated string if possible. MEM is not changed. Function
    mem_as_str0() is also used to receive NULL-terminated string from MEM,
    however if MEM does not contain NULL-terminated string it converts MEM
    to MEM that contains NULL-terminated string and returns its value.
    
    Part of #5818

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 3d8028077..38a7da69f 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -48,6 +48,12 @@
 #include "box/coll_id_cache.h"
 #include "box/schema.h"
 
+static const unsigned char *
+mem_as_ustr(struct Mem *mem)
+{
+	return (const unsigned char *)mem_as_str0(mem);
+}
+
 /*
  * Return the collating function associated with a function.
  */
@@ -174,7 +180,7 @@ lengthFunc(sql_context * context, int argc, sql_value ** argv)
 			break;
 		}
 	case MP_STR:{
-			const unsigned char *z = sql_value_text(argv[0]);
+			const unsigned char *z = mem_as_ustr(argv[0]);
 			if (z == 0)
 				return;
 			len = sql_utf8_char_count(z, sql_value_bytes(argv[0]));
@@ -323,8 +329,8 @@ position_func(struct sql_context *context, int argc, struct Mem **argv)
 			 * Character size is equal to
 			 * needle char size.
 			 */
-			haystack_str = sql_value_text(haystack);
-			needle_str = sql_value_text(needle);
+			haystack_str = mem_as_ustr(haystack);
+			needle_str = mem_as_ustr(needle);
 
 			int n_needle_chars =
 				sql_utf8_char_count(needle_str, n_needle_bytes);
@@ -386,8 +392,7 @@ printfFunc(sql_context * context, int argc, sql_value ** argv)
 	int n;
 	sql *db = sql_context_db_handle(context);
 
-	if (argc >= 1
-	    && (zFormat = (const char *)sql_value_text(argv[0])) != 0) {
+	if (argc >= 1 && (zFormat = mem_as_str0(argv[0])) != NULL) {
 		x.nArg = argc - 1;
 		x.nUsed = 0;
 		x.apArg = argv + 1;
@@ -440,7 +445,7 @@ substrFunc(sql_context * context, int argc, sql_value ** argv)
 			return;
 		assert(len == sql_value_bytes(argv[0]));
 	} else {
-		z = sql_value_text(argv[0]);
+		z = mem_as_ustr(argv[0]);
 		if (z == 0)
 			return;
 		len = 0;
@@ -602,13 +607,13 @@ case_type##ICUFunc(sql_context *context, int argc, sql_value **argv)   \
 		context->is_aborted = true;                                    \
 		return;                                                        \
 	}                                                                      \
-	z2 = (char *)sql_value_text(argv[0]);                              \
+	z2 = mem_as_str0(argv[0]);                                             \
 	n = sql_value_bytes(argv[0]);                                      \
 	/*                                                                     \
 	 * Verify that the call to _bytes()                                    \
 	 * does not invalidate the _text() pointer.                            \
 	 */                                                                    \
-	assert(z2 == (char *)sql_value_text(argv[0]));                     \
+	assert(z2 == mem_as_str0(argv[0]));                                    \
 	if (!z2)                                                               \
 		return;                                                        \
 	z1 = contextMalloc(context, ((i64) n) + 1);                            \
@@ -938,8 +943,8 @@ likeFunc(sql_context *context, int argc, sql_value **argv)
 		context->is_aborted = true;
 		return;
 	}
-	const char *zB = (const char *) sql_value_text(argv[0]);
-	const char *zA = (const char *) sql_value_text(argv[1]);
+	const char *zB = mem_as_str0(argv[0]);
+	const char *zA = mem_as_str0(argv[1]);
 	const char *zB_end = zB + sql_value_bytes(argv[0]);
 	const char *zA_end = zA + sql_value_bytes(argv[1]);
 
@@ -958,7 +963,7 @@ likeFunc(sql_context *context, int argc, sql_value **argv)
 		return;
 	}
 	/* Encoding did not change */
-	assert(zB == (const char *) sql_value_text(argv[0]));
+	assert(zB == mem_as_str0(argv[0]));
 
 	if (argc == 3) {
 		/*
@@ -966,7 +971,7 @@ likeFunc(sql_context *context, int argc, sql_value **argv)
 		 * single UTF-8 character. Otherwise, return an
 		 * error.
 		 */
-		const unsigned char *zEsc = sql_value_text(argv[2]);
+		const unsigned char *zEsc = mem_as_ustr(argv[2]);
 		if (zEsc == 0)
 			return;
 		if (sql_utf8_char_count(zEsc, sql_value_bytes(argv[2])) != 1) {
@@ -1095,7 +1100,7 @@ quoteFunc(sql_context * context, int argc, sql_value ** argv)
 	case MP_STR:{
 			int i, j;
 			u64 n;
-			const unsigned char *zArg = sql_value_text(argv[0]);
+			const unsigned char *zArg = mem_as_ustr(argv[0]);
 			char *z;
 
 			if (zArg == 0)
@@ -1141,7 +1146,7 @@ quoteFunc(sql_context * context, int argc, sql_value ** argv)
 static void
 unicodeFunc(sql_context * context, int argc, sql_value ** argv)
 {
-	const unsigned char *z = sql_value_text(argv[0]);
+	const unsigned char *z = mem_as_ustr(argv[0]);
 	(void)argc;
 	if (z && z[0])
 		sql_result_uint(context, sqlUtf8Read(&z));
@@ -1259,12 +1264,12 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
 
 	assert(argc == 3);
 	UNUSED_PARAMETER(argc);
-	zStr = sql_value_text(argv[0]);
+	zStr = mem_as_ustr(argv[0]);
 	if (zStr == 0)
 		return;
 	nStr = sql_value_bytes(argv[0]);
-	assert(zStr == sql_value_text(argv[0]));	/* No encoding change */
-	zPattern = sql_value_text(argv[1]);
+	assert(zStr == mem_as_ustr(argv[0]));	/* No encoding change */
+	zPattern = mem_as_ustr(argv[1]);
 	if (zPattern == 0) {
 		assert(mem_is_null(argv[1])
 		       || sql_context_db_handle(context)->mallocFailed);
@@ -1276,12 +1281,12 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
 		sql_result_value(context, argv[0]);
 		return;
 	}
-	assert(zPattern == sql_value_text(argv[1]));	/* No encoding change */
-	zRep = sql_value_text(argv[2]);
+	assert(zPattern == mem_as_ustr(argv[1]));	/* No encoding change */
+	zRep = mem_as_ustr(argv[2]);
 	if (zRep == 0)
 		return;
 	nRep = sql_value_bytes(argv[2]);
-	assert(zRep == sql_value_text(argv[2]));
+	assert(zRep == mem_as_ustr(argv[2]));
 	nOut = nStr + 1;
 	assert(nOut < SQL_MAX_LENGTH);
 	zOut = contextMalloc(context, (i64) nOut);
@@ -1443,7 +1448,7 @@ trim_func_one_arg(struct sql_context *context, sql_value *arg)
 	else
 		default_trim = (const unsigned char *) " ";
 	int input_str_sz = sql_value_bytes(arg);
-	const unsigned char *input_str = sql_value_text(arg);
+	const unsigned char *input_str = mem_as_ustr(arg);
 	uint8_t trim_char_len[1] = { 1 };
 	trim_procedure(context, TRIM_BOTH, default_trim, trim_char_len, 1,
 		       input_str, input_str_sz);
@@ -1465,7 +1470,7 @@ trim_func_two_args(struct sql_context *context, sql_value *arg1,
 		   sql_value *arg2)
 {
 	const unsigned char *input_str, *trim_set;
-	if ((input_str = sql_value_text(arg2)) == NULL)
+	if ((input_str = mem_as_ustr(arg2)) == NULL)
 		return;
 
 	int input_str_sz = sql_value_bytes(arg2);
@@ -1474,7 +1479,7 @@ trim_func_two_args(struct sql_context *context, sql_value *arg1,
 		trim_procedure(context, mem_get_int_unsafe(arg1),
 			       (const unsigned char *) " ", &len_one, 1,
 			       input_str, input_str_sz);
-	} else if ((trim_set = sql_value_text(arg1)) != NULL) {
+	} else if ((trim_set = mem_as_ustr(arg1)) != NULL) {
 		int trim_set_sz = sql_value_bytes(arg1);
 		uint8_t *char_len;
 		int char_cnt = trim_prepare_char_len(context, trim_set,
@@ -1500,8 +1505,8 @@ trim_func_three_args(struct sql_context *context, sql_value *arg1,
 {
 	assert(sql_value_type(arg1) == MP_INT || sql_value_type(arg1) == MP_UINT);
 	const unsigned char *input_str, *trim_set;
-	if ((input_str = sql_value_text(arg3)) == NULL ||
-	    (trim_set = sql_value_text(arg2)) == NULL)
+	if ((input_str = mem_as_ustr(arg3)) == NULL ||
+	    (trim_set = mem_as_ustr(arg2)) == NULL)
 		return;
 
 	int trim_set_sz = sql_value_bytes(arg2);
@@ -1573,7 +1578,7 @@ soundexFunc(sql_context * context, int argc, sql_value ** argv)
 		context->is_aborted = true;
 		return;
 	}
-	zIn = (u8 *) sql_value_text(argv[0]);
+	zIn = (u8 *) mem_as_ustr(argv[0]);
 	if (zIn == 0)
 		zIn = (u8 *) "";
 	for (i = 0; zIn[i] && !sqlIsalpha(zIn[i]); i++) {
@@ -1818,7 +1823,7 @@ groupConcatStep(sql_context * context, int argc, sql_value ** argv)
 		pAccum->mxAlloc = db->aLimit[SQL_LIMIT_LENGTH];
 		if (!firstTerm) {
 			if (argc == 2) {
-				zSep = (char *)sql_value_text(argv[1]);
+				zSep = mem_as_str0(argv[1]);
 				nSep = sql_value_bytes(argv[1]);
 			} else {
 				zSep = ",";
@@ -1827,7 +1832,7 @@ groupConcatStep(sql_context * context, int argc, sql_value ** argv)
 			if (zSep)
 				sqlStrAccumAppend(pAccum, zSep, nSep);
 		}
-		zVal = (char *)sql_value_text(argv[0]);
+		zVal = mem_as_str0(argv[0]);
 		nVal = sql_value_bytes(argv[0]);
 		if (zVal)
 			sqlStrAccumAppend(pAccum, zVal, nVal);
diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
index 661764f9c..c73edb21e 100644
--- a/src/box/sql/mem.c
+++ b/src/box/sql/mem.c
@@ -1134,6 +1134,15 @@ mem_get_bool(const struct Mem *mem, bool *b)
 	return -1;
 }
 
+int
+mem_get_str0(const struct Mem *mem, const char **s)
+{
+	if ((mem->flags & MEM_Str) == 0 || (mem->flags & MEM_Term) == 0)
+		return -1;
+	*s = mem->z;
+	return 0;
+}
+
 int
 mem_copy(struct Mem *to, const struct Mem *from)
 {
@@ -1777,45 +1786,6 @@ mem_has_msgpack_subtype(struct Mem *mem)
 	       mem->subtype == SQL_SUBTYPE_MSGPACK;
 }
 
-/*
- * The pVal argument is known to be a value other than NULL.
- * Convert it into a string with encoding enc and return a pointer
- * to a zero-terminated version of that string.
- */
-static SQL_NOINLINE const void *
-valueToText(sql_value * pVal)
-{
-	assert(pVal != 0);
-	assert((pVal->flags & (MEM_Null)) == 0);
-	if ((pVal->flags & (MEM_Blob | MEM_Str)) &&
-	    !mem_has_msgpack_subtype(pVal)) {
-		if (ExpandBlob(pVal))
-			return 0;
-		pVal->flags |= MEM_Str;
-		sqlVdbeMemNulTerminate(pVal);	/* IMP: R-31275-44060 */
-	} else {
-		mem_to_str(pVal);
-		assert(0 == (1 & SQL_PTR_TO_INT(pVal->z)));
-	}
-	return pVal->z;
-}
-
-/*
- * It is already known that pMem contains an unterminated string.
- * Add the zero terminator.
- */
-static SQL_NOINLINE int
-vdbeMemAddTerminator(Mem * pMem)
-{
-	if (sqlVdbeMemGrow(pMem, pMem->n + 2, 1)) {
-		return -1;
-	}
-	pMem->z[pMem->n] = 0;
-	pMem->z[pMem->n + 1] = 0;
-	pMem->flags |= MEM_Term;
-	return 0;
-}
-
 /*
  * Both *pMem1 and *pMem2 contain string values. Compare the two values
  * using the collation sequence pColl. As usual, return a negative , zero
@@ -1917,7 +1887,9 @@ sql_value_type(sql_value *pVal)
 static SQL_NOINLINE int
 valueBytes(sql_value * pVal)
 {
-	return valueToText(pVal) != 0 ? pVal->n : 0;
+	if (mem_to_str(pVal) != 0)
+		return 0;
+	return pVal->n;
 }
 
 int
@@ -2106,21 +2078,6 @@ registerTrace(int iReg, Mem *p) {
 }
 #endif
 
-/*
- * Make sure the given Mem is \u0000 terminated.
- */
-int
-sqlVdbeMemNulTerminate(Mem * pMem)
-{
-	testcase((pMem->flags & (MEM_Term | MEM_Str)) == (MEM_Term | MEM_Str));
-	testcase((pMem->flags & (MEM_Term | MEM_Str)) == 0);
-	if ((pMem->flags & (MEM_Term | MEM_Str)) != MEM_Str) {
-		return 0;	/* Nothing to do */
-	} else {
-		return vdbeMemAddTerminator(pMem);
-	}
-}
-
 /*
  * If the given Mem* has a zero-filled tail, turn it into an ordinary
  * blob stored in dynamically allocated space.
@@ -2279,7 +2236,9 @@ sql_value_blob(sql_value * pVal)
 		p->flags |= MEM_Blob;
 		return p->n ? p->z : 0;
 	} else {
-		return sql_value_text(pVal);
+		if (mem_to_str(pVal) != 0)
+			return NULL;
+		return pVal->z;
 	}
 }
 
@@ -2289,36 +2248,6 @@ sql_value_bytes(sql_value * pVal)
 	return sqlValueBytes(pVal);
 }
 
-const unsigned char *
-sql_value_text(sql_value * pVal)
-{
-	return (const unsigned char *)sqlValueText(pVal);
-}
-
-/* This function is only available internally, it is not part of the
- * external API. It works in a similar way to sql_value_text(),
- * except the data returned is in the encoding specified by the second
- * parameter, which must be one of SQL_UTF16BE, SQL_UTF16LE or
- * SQL_UTF8.
- *
- * (2006-02-16:)  The enc value can be or-ed with SQL_UTF16_ALIGNED.
- * If that is the case, then the result must be aligned on an even byte
- * boundary.
- */
-const void *
-sqlValueText(sql_value * pVal)
-{
-	if (!pVal)
-		return 0;
-	if ((pVal->flags & (MEM_Str | MEM_Term)) == (MEM_Str | MEM_Term)) {
-		return pVal->z;
-	}
-	if (pVal->flags & MEM_Null) {
-		return 0;
-	}
-	return valueToText(pVal);
-}
-
 /*
  * Return a pointer to static memory containing an SQL NULL value.
  */
diff --git a/src/box/sql/mem.h b/src/box/sql/mem.h
index b59197e6c..8d53fa161 100644
--- a/src/box/sql/mem.h
+++ b/src/box/sql/mem.h
@@ -834,6 +834,27 @@ mem_get_bool_unsafe(const struct Mem *mem)
 	return b;
 }
 
+/**
+ * Return value for MEM of STRING type if MEM contains a NULL-terminated string.
+ * Otherwise convert value of the MEM to NULL-terminated string if possible and
+ * return converted value. Original MEM is not changed.
+ */
+int
+mem_get_str0(const struct Mem *mem, const char **s);
+
+/**
+ * Return value for MEM of STRING type if MEM contains NULL-terminated string.
+ * Otherwise convert MEM to MEM of string type that contains NULL-terminated
+ * string and return its value. Return NULL if conversion is impossible.
+ */
+static inline const char *
+mem_as_str0(struct Mem *mem)
+{
+	if (mem_to_str0(mem) != 0)
+		return NULL;
+	return mem->z;
+}
+
 /**
  * Simple type to str convertor. It is used to simplify
  * error reporting.
@@ -867,7 +888,6 @@ registerTrace(int iReg, Mem *p);
 #define memIsValid(M)  ((M)->flags & MEM_Undefined)==0
 #endif
 
-int sqlVdbeMemNulTerminate(struct Mem *);
 int sqlVdbeMemExpandBlob(struct Mem *);
 #define ExpandBlob(P) (((P)->flags&MEM_Zero)?sqlVdbeMemExpandBlob(P):0)
 
@@ -892,11 +912,6 @@ sql_value_blob(struct Mem *);
 int
 sql_value_bytes(struct Mem *);
 
-const unsigned char *
-sql_value_text(struct Mem *);
-
-const void *sqlValueText(struct Mem *);
-
 #define VdbeFrameMem(p) ((Mem *)&((u8 *)p)[ROUND8(sizeof(VdbeFrame))])
 
 const Mem *
diff --git a/src/box/sql/printf.c b/src/box/sql/printf.c
index 2f1948ff8..b4ab0d0f9 100644
--- a/src/box/sql/printf.c
+++ b/src/box/sql/printf.c
@@ -160,7 +160,8 @@ getTextArg(PrintfArguments * p)
 {
 	if (p->nArg <= p->nUsed)
 		return 0;
-	return (char *)sql_value_text(p->apArg[p->nUsed++]);
+	struct Mem *mem = p->apArg[p->nUsed++];
+	return (char *)mem_as_str0(mem);
 }
 
 /*
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index df9469941..51613aea3 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -442,10 +442,6 @@ sql_column_bytes(sql_stmt *, int iCol);
 int
 sql_column_bytes16(sql_stmt *, int iCol);
 
-const unsigned char *
-sql_column_text(sql_stmt *,
-		    int iCol);
-
 char *
 sql_result_to_msgpack(struct sql_stmt *stmt, uint32_t *tuple_size,
 		      struct region *region);
diff --git a/src/box/sql/vdbeapi.c b/src/box/sql/vdbeapi.c
index c75322899..da161e298 100644
--- a/src/box/sql/vdbeapi.c
+++ b/src/box/sql/vdbeapi.c
@@ -464,12 +464,6 @@ sql_column_bytes(sql_stmt * pStmt, int i)
 	return sql_value_bytes(columnMem(pStmt, i));
 }
 
-const unsigned char *
-sql_column_text(sql_stmt * pStmt, int i)
-{
-	return sql_value_text(columnMem(pStmt, i));
-}
-
 char *
 sql_result_to_msgpack(struct sql_stmt *stmt, uint32_t *tuple_size,
 		      struct region *region)
diff --git a/src/box/sql/whereexpr.c b/src/box/sql/whereexpr.c
index 93de722cb..fe7329ea8 100644
--- a/src/box/sql/whereexpr.c
+++ b/src/box/sql/whereexpr.c
@@ -312,8 +312,10 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
 		pVal =
 		    sqlVdbeGetBoundValue(pReprepare, iCol,
 					     FIELD_TYPE_SCALAR);
-		if (pVal != NULL && mem_is_str(pVal))
-			z = (char *)sql_value_text(pVal);
+		if (pVal != NULL && mem_is_str(pVal)) {
+			if (mem_as_str0(pVal) == NULL)
+				return -1;
+		}
 		assert(pRight->op == TK_VARIABLE || pRight->op == TK_REGISTER);
 	} else if (op == TK_STRING) {
 		z = pRight->u.zToken;


More information about the Tarantool-patches mailing list