Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: Roman Khabibov <roman.habibov@tarantool.org>,
	tarantool-patches@freelists.org
Subject: [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature
Date: Mon, 22 Apr 2019 21:22:16 +0300	[thread overview]
Message-ID: <da66227c-7677-4c59-1b61-3c45c7d1ab9b@tarantool.org> (raw)
In-Reply-To: <BE5A0015-5AD5-408D-97EF-4D5A51EBB7C4@tarantool.org>

Hi! Thanks for the fixes! I've applied my
fixes and pushed on top of the branch. Please,
look at them and either squash, or lets discuss
where you do not agree. Otherwise it will LGTM.

> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index abeecefa1..0ec359425 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -1286,108 +1286,198 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
>  	sql_result_text(context, (char *)zOut, j, sql_free);
>  }
>  
> -/*
> - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions.
> - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both.
> +/**
> + * Remove characters included in @a trim_set from @a input_str
> + * until encounter a character that doesn't belong to @a trim_set.
> + * Remove from the side specified by @a flags.
> + * @param context SQL context.
> + * @param flags Trim specification: left, right or both.
> + * @param trim_set The set of characters for trimming.
> + * @param char_len Lengths of each UTF-8 character in @a trim_set.
> + * @param char_cnt A number of UTF-8 characters in @a trim_set.
> + * @param input_str Input string for trimming.
> + * @param input_str_sz Input string size in bytes.
>   */
>  static void
> -trimFunc(sql_context * context, int argc, sql_value ** argv)
> +trim_procedure(struct sql_context *context, enum trim_side_mask flags,
> +	       const unsigned char *trim_set, uint8_t *char_len, int char_cnt,

1. Added 'const' to char_len.

> +	       const unsigned char *input_str, int input_str_sz)
>  {
> -	const unsigned char *zIn;	/* Input string */
> -	const unsigned char *zCharSet;	/* Set of characters to trim */
> -	int nIn;		/* Number of bytes in input */
> -	int flags;		/* 1: trimleft  2: trimright  3: trim */
> -	int i;			/* Loop counter */
> -	unsigned char *aLen = 0;	/* Length of each character in zCharSet */
> -	unsigned char **azChar = 0;	/* Individual characters in zCharSet */
> -	int nChar;		/* Number of characters in zCharSet */
> -
> -	if (sql_value_type(argv[0]) == SQL_NULL) {
> -		return;
> -	}
> -	zIn = sql_value_text(argv[0]);
> -	if (zIn == 0)
> -		return;
> -	nIn = sql_value_bytes(argv[0]);
> -	assert(zIn == sql_value_text(argv[0]));
> -	if (argc == 1) {
> -		static const unsigned char lenOne[] = { 1 };
> -		static unsigned char *const azOne[] = { (u8 *) " " };
> -		nChar = 1;
> -		aLen = (u8 *) lenOne;
> -		azChar = (unsigned char **)azOne;
> -		zCharSet = 0;
> -	} else if ((zCharSet = sql_value_text(argv[1])) == 0) {
> -		return;
> -	} else {
> -		const unsigned char *z = zCharSet;
> -		int trim_set_sz = sql_value_bytes(argv[1]);
> -		/*
> -		* Count the number of UTF-8 characters passing
> -		* through the entire char set, but not up
> -		* to the '\0' or X'00' character. This allows
> -		* to handle trimming set containing such
> -		* characters.
> -		*/
> -		nChar = sql_utf8_char_count(z, trim_set_sz);
> -		if (nChar > 0) {
> -			azChar =
> -			    contextMalloc(context,
> -					  ((i64) nChar) * (sizeof(char *) + 1));
> -			if (azChar == 0) {
> -				return;
> -			}
> -			aLen = (unsigned char *)&azChar[nChar];
> -			z = zCharSet;
> -			i = 0;
> -			nChar = 0;
> -			int handled_bytes_cnt = trim_set_sz;
> -			while(handled_bytes_cnt > 0) {
> -				azChar[nChar] = (unsigned char *)(z + i);
> -				SQL_UTF8_FWD_1(z, i, trim_set_sz);
> -				aLen[nChar] = (u8) (z + i - azChar[nChar]);
> -				handled_bytes_cnt -= aLen[nChar];
> -				nChar++;
> -			}
> -		}
> -	}
> -	if (nChar > 0) {
> -		flags = SQL_PTR_TO_INT(sql_user_data(context));
> -		if (flags & 1) {
> -			while (nIn > 0) {
> -				int len = 0;
> -				for (i = 0; i < nChar; i++) {
> -					len = aLen[i];
> -					if (len <= nIn
> -					    && memcmp(zIn, azChar[i], len) == 0)
> -						break;
> -				}
> -				if (i >= nChar)
> +	if (char_cnt == 0)
> +		goto finish;
> +	int i = 0;
> +	const unsigned char *z = NULL;

2. These values do not need initialization. They are assigned
before each usage.

> +	if ((flags & TRIM_LEADING) != 0) {
> +		while (input_str_sz > 0) {
> +			int len = 0;
> +			z = trim_set;
> +			for (i = 0; i < char_cnt; ++i, z += len) {
> +				len = char_len[i];
> +				if (len <= input_str_sz
> +				    && memcmp(input_str, z, len) == 0)
>  					break;
> -				zIn += len;
> -				nIn -= len;
>  			}
> +			if (i >= char_cnt)
> +				break;
> +			input_str += len;
> +			input_str_sz -= len;
>  		}
> -		if (flags & 2) {
> -			while (nIn > 0) {
> -				int len = 0;
> -				for (i = 0; i < nChar; i++) {
> -					len = aLen[i];
> -					if (len <= nIn
> -					    && memcmp(&zIn[nIn - len],
> -						      azChar[i], len) == 0)
> -						break;
> -				}
> -				if (i >= nChar)
> +	}
> +	if ((flags & TRIM_TRAILING) != 0) {
> +		while (input_str_sz > 0) {
> +			int len = 0;
> +			z = trim_set;
> +			for (i = 0; i < char_cnt; ++i, z += len) {
> +				len = char_len[i];
> +				if (len <= input_str_sz
> +				    && memcmp(&input_str[input_str_sz - len],
> +					      z, len) == 0)
>  					break;
> -				nIn -= len;
>  			}
> +			if (i >= char_cnt)
> +				break;
> +			input_str_sz -= len;
>  		}
> -		if (zCharSet) {
> -			sql_free(azChar);
> -		}
>  	}
> -	sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT);
> +
> +finish:
> +	sql_result_text(context, (char *)input_str, input_str_sz,
> +				SQL_TRANSIENT);

3. The indentation has been broken.

> +}
> +
> +/**
> + * Prepare arguments for trimming procedure. Allocate memory for
> + * @a char_len (array of lengths each character in @a trim_set)
> + * and fill it.
> + *
> + * @param context SQL context.
> + * @param trim_set The set of characters for trimming.
> + * @param[out] char_len Lengths of each character in @ trim_set.
> + * @retval >=0 A number of UTF-8 characters in @a trim_set.
> + * @retval -1 Memory allocation error.
> + */
> +static int
> +prepare_char_len(struct sql_context *context, const unsigned char *trim_set,
> +		 int trim_set_sz, uint8_t **char_len)

4. Usually when we write somehow linked functions, we prefix
them with a certain prefix. Here I would use 'trim_'.

> +{
> +	/*
> +	 * Count the number of UTF-8 characters passing through
> +	 * the entire char set, but not up to the '\0' or X'00'
> +	 * character. This allows to handle trimming set
> +	 * containing such characters.
> +	 */
> +	int char_cnt = sql_utf8_char_count(trim_set, trim_set_sz);
> +	if (char_cnt == 0)
> +		return 0;
> +
> +	if ((*char_len = (uint8_t *)contextMalloc(context, char_cnt)) == NULL)
> +		return -1;
> +
> +	const unsigned char *z = trim_set;

5. You do not need initialization here. Just use 'trim_set'
right below.

> +	int i = 0, j = 0;
> +	while(j < char_cnt) {
> +		int old_i = i;
> +		SQL_UTF8_FWD_1(z, i, trim_set_sz);
> +		(*char_len)[j++] = i - old_i;
> +	}
> +
> +	return char_cnt;
> +}
> +
> +/**
> + * Normalize args from @a argv input array when it has one arg
> + * only.
> + *
> + * Case: TRIM(<str>)
> + * Call trimming procedure with TRIM_BOTH as the flags and " " as
> + * the trimming set.
> + */
> +static void
> +trim_func_one_arg(struct sql_context *context, int argc, sql_value **argv)
> +{
> +	assert(argc == 1);
> +	(void) argc;
> +
> +	const unsigned char *input_str;
> +	if ((input_str = sql_value_text(argv[0])) == NULL)
> +		return;
> +
> +	int input_str_sz = sql_value_bytes(argv[0]);
> +	static uint8_t len_one[] = { 1 };

6. 'static' is overkill here. It is enough to just declare a
variable 'uint8_t len = 1;' and pass it by pointer into the
trim_procedure.

> +	trim_procedure(context, TRIM_BOTH, (const unsigned char *) " ",
> +		       len_one, 1, input_str, input_str_sz);
> +}
> +
> +/**
> + * Normalize args from @a argv input array when it has two args.
> + *
> + * Case: TRIM(<character_set> FROM <str>)
> + * If user has specified <character_set> only, call trimming
> + * procedure with TRIM_BOTH as the flags and that trimming set.
> + *
> + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>)
> + * If user has specified side keyword only, then call trimming
> + * procedure with the specified side and " " as the trimming set.
> + */
> +static void
> +trim_func_two_args(struct sql_context *context, int argc, sql_value **argv)
> +{
> +	assert(argc == 2);
> +	(void) argc;
> +
> +	const unsigned char *input_str;
> +	if ((input_str = sql_value_text(argv[1])) == NULL)
> +		return;
> +
> +	int input_str_sz = sql_value_bytes(argv[1]);
> +	const unsigned char *trim_set;
> +	if (sql_value_type(argv[0]) == SQL_INTEGER) {
> +		static uint8_t len_one[] = { 1 };

7. The same.

> +		trim_procedure(context, sql_value_int(argv[0]),
> +			       (const unsigned char *) " ", len_one, 1,
> +			       input_str, input_str_sz);
> +	} else if ((trim_set = sql_value_text(argv[0])) != NULL) {
> +		int trim_set_sz = sql_value_bytes(argv[0]);
> +		uint8_t *char_len = NULL;
> +		int char_cnt = 0;
> +		if ((char_cnt = prepare_char_len(context, trim_set, trim_set_sz,
> +						 &char_len)) == -1)

8. I would understand if char_cnt was declared somewhere above, or
assignment inside 'if' saved some code lines, like it does with
'input_str' above, but here it doesn't. Normally we do not assign
in 'if' at all, it is SQLite legacy.

> +			return;
> +		trim_procedure(context, TRIM_BOTH, trim_set, char_len, char_cnt,
> +			       input_str, input_str_sz);
> +		sql_free(char_len);
> +	}
> +}
> +
> +/**
> + * Normalize args from @a argv input array when it has three args.
> + *
> + * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>)
> + * If user has specified side keyword and <character_set>, then
> + * call trimming procedure with that args.
> + */
> +static void
> +trim_func_three_args(struct sql_context *context, int argc, sql_value **argv)
> +{
> +	assert(argc == 3);
> +	(void) argc;
> +
> +	assert(sql_value_type(argv[0]) == SQL_INTEGER);
> +	const unsigned char *input_str, *trim_set;
> +	if ((input_str = sql_value_text(argv[2])) == NULL ||
> +	    (trim_set = sql_value_text(argv[1])) == NULL)
> +		return;
> +
> +	int trim_set_sz = sql_value_bytes(argv[1]);
> +	int input_str_sz = sql_value_bytes(argv[2]);
> +	uint8_t *char_len = NULL;
> +	int char_cnt = 0;
> +	if ((char_cnt = prepare_char_len(context, trim_set, trim_set_sz,
> +					 &char_len)) == -1)
> +		return;

9. The same.

> +	trim_procedure(context, sql_value_int(argv[0]), trim_set, char_len,
> +		       char_cnt, input_str, input_str_sz);
> +	sql_free(char_len);
>  }
>  

Fixes on the branch:

========================================================================
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 0ec359425..2bbb2ad4a 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1300,16 +1300,15 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
  */
 static void
 trim_procedure(struct sql_context *context, enum trim_side_mask flags,
-	       const unsigned char *trim_set, uint8_t *char_len, int char_cnt,
-	       const unsigned char *input_str, int input_str_sz)
+	       const unsigned char *trim_set, const uint8_t *char_len,
+	       int char_cnt, const unsigned char *input_str, int input_str_sz)
 {
 	if (char_cnt == 0)
 		goto finish;
-	int i = 0;
-	const unsigned char *z = NULL;
+	int i, len;
+	const unsigned char *z;
 	if ((flags & TRIM_LEADING) != 0) {
 		while (input_str_sz > 0) {
-			int len = 0;
 			z = trim_set;
 			for (i = 0; i < char_cnt; ++i, z += len) {
 				len = char_len[i];
@@ -1325,7 +1324,6 @@ trim_procedure(struct sql_context *context, enum trim_side_mask flags,
 	}
 	if ((flags & TRIM_TRAILING) != 0) {
 		while (input_str_sz > 0) {
-			int len = 0;
 			z = trim_set;
 			for (i = 0; i < char_cnt; ++i, z += len) {
 				len = char_len[i];
@@ -1339,10 +1337,9 @@ trim_procedure(struct sql_context *context, enum trim_side_mask flags,
 			input_str_sz -= len;
 		}
 	}
-
 finish:
 	sql_result_text(context, (char *)input_str, input_str_sz,
-				SQL_TRANSIENT);
+			SQL_TRANSIENT);
 }
 
 /**
@@ -1357,8 +1354,9 @@ finish:
  * @retval -1 Memory allocation error.
  */
 static int
-prepare_char_len(struct sql_context *context, const unsigned char *trim_set,
-		 int trim_set_sz, uint8_t **char_len)
+trim_prepare_char_len(struct sql_context *context,
+		      const unsigned char *trim_set, int trim_set_sz,
+		      uint8_t **char_len)
 {
 	/*
 	 * Count the number of UTF-8 characters passing through
@@ -1367,17 +1365,18 @@ prepare_char_len(struct sql_context *context, const unsigned char *trim_set,
 	 * containing such characters.
 	 */
 	int char_cnt = sql_utf8_char_count(trim_set, trim_set_sz);
-	if (char_cnt == 0)
+	if (char_cnt == 0) {
+		*char_len = NULL;
 		return 0;
+	}
 
 	if ((*char_len = (uint8_t *)contextMalloc(context, char_cnt)) == NULL)
 		return -1;
 
-	const unsigned char *z = trim_set;
 	int i = 0, j = 0;
 	while(j < char_cnt) {
 		int old_i = i;
-		SQL_UTF8_FWD_1(z, i, trim_set_sz);
+		SQL_UTF8_FWD_1(trim_set, i, trim_set_sz);
 		(*char_len)[j++] = i - old_i;
 	}
 
@@ -1403,9 +1402,9 @@ trim_func_one_arg(struct sql_context *context, int argc, sql_value **argv)
 		return;
 
 	int input_str_sz = sql_value_bytes(argv[0]);
-	static uint8_t len_one[] = { 1 };
+	uint8_t len_one = 1;
 	trim_procedure(context, TRIM_BOTH, (const unsigned char *) " ",
-		       len_one, 1, input_str, input_str_sz);
+		       &len_one, 1, input_str, input_str_sz);
 }
 
 /**
@@ -1425,23 +1424,22 @@ trim_func_two_args(struct sql_context *context, int argc, sql_value **argv)
 	assert(argc == 2);
 	(void) argc;
 
-	const unsigned char *input_str;
+	const unsigned char *input_str, *trim_set;
 	if ((input_str = sql_value_text(argv[1])) == NULL)
 		return;
 
 	int input_str_sz = sql_value_bytes(argv[1]);
-	const unsigned char *trim_set;
 	if (sql_value_type(argv[0]) == SQL_INTEGER) {
-		static uint8_t len_one[] = { 1 };
+		uint8_t len_one = 1;
 		trim_procedure(context, sql_value_int(argv[0]),
-			       (const unsigned char *) " ", len_one, 1,
+			       (const unsigned char *) " ", &len_one, 1,
 			       input_str, input_str_sz);
 	} else if ((trim_set = sql_value_text(argv[0])) != NULL) {
 		int trim_set_sz = sql_value_bytes(argv[0]);
-		uint8_t *char_len = NULL;
-		int char_cnt = 0;
-		if ((char_cnt = prepare_char_len(context, trim_set, trim_set_sz,
-						 &char_len)) == -1)
+		uint8_t *char_len;
+		int char_cnt = trim_prepare_char_len(context, trim_set,
+						     trim_set_sz, &char_len);
+		if (char_cnt == -1)
 			return;
 		trim_procedure(context, TRIM_BOTH, trim_set, char_len, char_cnt,
 			       input_str, input_str_sz);
@@ -1470,10 +1468,10 @@ trim_func_three_args(struct sql_context *context, int argc, sql_value **argv)
 
 	int trim_set_sz = sql_value_bytes(argv[1]);
 	int input_str_sz = sql_value_bytes(argv[2]);
-	uint8_t *char_len = NULL;
-	int char_cnt = 0;
-	if ((char_cnt = prepare_char_len(context, trim_set, trim_set_sz,
-					 &char_len)) == -1)
+	uint8_t *char_len;
+	int char_cnt = trim_prepare_char_len(context, trim_set, trim_set_sz,
+					     &char_len);
+	if (char_cnt == -1)
 		return;
 	trim_procedure(context, sql_value_int(argv[0]), trim_set, char_len,
 		       char_cnt, input_str, input_str_sz);

  reply	other threads:[~2019-04-22 18:22 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-11 17:33 [tarantool-patches] " Roman Khabibov
2019-04-14 18:01 ` [tarantool-patches] " Vladislav Shpilevoy
2019-04-16  0:14   ` Roman Khabibov
2019-04-16 17:14     ` Vladislav Shpilevoy
2019-04-18 17:11       ` Roman Khabibov
2019-04-19 12:49         ` Vladislav Shpilevoy
2019-04-20  0:48           ` Roman Khabibov
2019-04-21 19:36             ` Vladislav Shpilevoy
2019-04-22 10:43               ` Vladislav Shpilevoy
2019-04-22 16:45               ` Roman Khabibov
2019-04-22 18:22                 ` Vladislav Shpilevoy [this message]
2019-04-23  1:04                   ` Roman Khabibov
2019-04-23  8:59                     ` Vladislav Shpilevoy
2019-04-23 10:21 ` Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=da66227c-7677-4c59-1b61-3c45c7d1ab9b@tarantool.org \
    --to=v.shpilevoy@tarantool.org \
    --cc=roman.habibov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='[tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox