[tarantool-patches] Re: [PATCH] sql: fix bug with BLOB TRIM() when X'00' in char set
n.pettik
korablev at tarantool.org
Tue Dec 25 14:40:50 MSK 2018
> commit 39b2481341cd2e71d67a04bec9aeed0e1189740c
> Author: Roman Khabibov <roman.habibov at tarantool.org>
> Date: Sat Dec 15 13:21:59 2018 +0300
>
> sql: fix bug with BLOB TRIM() when X'00' in char set
>
> The reason for the bug was that X'00' is a terminal symbol. If the char set
> contained X'00', all characters are ignored after it (including itself).
>
> Closes #3543
>
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index 9667aead5..f397e23c1 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -1223,9 +1223,19 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
> } else if ((zCharSet = sqlite3_value_text(argv[1])) == 0) {
> return;
> } else {
> - const unsigned char *z;
> - for (z = zCharSet, nChar = 0; *z; nChar++) {
> + const unsigned char *z = zCharSet;
> + int sizeOfCharSet = \
We don’t use backslashes to carry code (it is used for macroses and
sometimes for comments).
> + sqlite3_value_bytes(argv[1]); /* Size of char set in bytes. */
We put comments on the top of code to be commented:
/* Size of char set in bytes. */
int sizeOfCharSet = sqlite3_value_bytes(argv[1]);
I guess this comment is completely useless:
name of var and function say exactly the same as comment does.
> + int nProcessedBytes = 0;
> + nChar = 0;
> + const unsigned char *zStepBack;
> + /* Count the number of UTF-8 characters passing through the
> + * entire char set, but not up to the '\0' or X'00' character. */
Use tnt-style comments.
> + while(sizeOfCharSet - nProcessedBytes > 0) {
> + zStepBack = z;
> SQLITE_SKIP_UTF8(z);
> + nProcessedBytes += z - zStepBack;
> + nChar++;
> }
> if (nChar > 0) {
> azChar =
> @@ -1235,10 +1245,18 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
> return;
> }
> aLen = (unsigned char *)&azChar[nChar];
> - for (z = zCharSet, nChar = 0; *z; nChar++) {
> + z = zCharSet;
> + nChar = 0;
> + nProcessedBytes = 0;
This comment is again useless.
> + /* Similar to the previous cycle. But
I see trailing space. Use git diff to spot such places.
> + * now write into "azCharSet". */
Use tnt-style comments.
> + while(sizeOfCharSet - nProcessedBytes > 0) {
> azChar[nChar] = (unsigned char *)z;
> + zStepBack = z;
You don’t need here ‘zStepBack’, you already saved current str
position to azChar.
> SQLITE_SKIP_UTF8(z);
> + nProcessedBytes += z - zStepBack;
> aLen[nChar] = (u8) (z - azChar[nChar]);
> + nChar++;
> }
> }
> }
All points considered, I suggest diff like this:
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index f397e23c1..9b5773321 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1203,7 +1203,8 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
int i; /* Loop counter */
unsigned char *aLen = 0; /* Length of each character in zCharSet */
unsigned char **azChar = 0; /* Individual characters in zCharSet */
- int nChar; /* Number of characters in zCharSet */
+ /* Number of UTF-8 characters in zCharSet. */
+ int nChar;
if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
return;
@@ -1224,17 +1225,20 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
return;
} else {
const unsigned char *z = zCharSet;
- int sizeOfCharSet = \
- sqlite3_value_bytes(argv[1]); /* Size of char set in bytes. */
- int nProcessedBytes = 0;
+ int trim_set_sz = sqlite3_value_bytes(argv[1]);
+ int handled_bytes_cnt = trim_set_sz;
nChar = 0;
- const unsigned char *zStepBack;
- /* Count the number of UTF-8 characters passing through the
- * entire char set, but not up to the '\0' or X'00' character. */
- while(sizeOfCharSet - nProcessedBytes > 0) {
- zStepBack = z;
+ /*
+ * Count the number of UTF-8 characters passing
+ * through the entire char set, but not up
+ * to the '\0' or X'00' character. This allows
+ * to handle trimming set containing such
+ * characters.
+ */
+ while(handled_bytes_cnt > 0) {
+ const unsigned char *prev_byte = z;
SQLITE_SKIP_UTF8(z);
- nProcessedBytes += z - zStepBack;
+ handled_bytes_cnt -= (z - prev_byte);
nChar++;
}
if (nChar > 0) {
@@ -1247,15 +1251,12 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
aLen = (unsigned char *)&azChar[nChar];
z = zCharSet;
nChar = 0;
- nProcessedBytes = 0;
- /* Similar to the previous cycle. But
- * now write into "azCharSet". */
- while(sizeOfCharSet - nProcessedBytes > 0) {
+ handled_bytes_cnt = trim_set_sz;
+ while(handled_bytes_cnt > 0) {
azChar[nChar] = (unsigned char *)z;
- zStepBack = z;
SQLITE_SKIP_UTF8(z);
- nProcessedBytes += z - zStepBack;
aLen[nChar] = (u8) (z - azChar[nChar]);
+ handled_bytes_cnt -= aLen[nChar];
nChar++;
Check it out. If you are ok with it, you can apply it (partially or fully).
More information about the Tarantool-patches
mailing list