From: Roman <roman.habibov@tarantool.org> To: "n.pettik" <korablev@tarantool.org>, tarantool-patches@freelists.org Subject: [tarantool-patches] Re: [PATCH] sql: fix bug with BLOB TRIM() when X'00' in char set Date: Wed, 26 Dec 2018 16:56:51 +0300 [thread overview] Message-ID: <0d60bb46-13fa-f7e1-5ca6-8586456444e4@tarantool.org> (raw) In-Reply-To: <EA1374B8-8DD8-42DD-9CAA-5FE136939959@tarantool.org> On 25.12.2018 14:40, n.pettik wrote: > All points considered, I suggest diff like this: > > diff --git a/src/box/sql/func.c b/src/box/sql/func.c > index f397e23c1..9b5773321 100644 > --- a/src/box/sql/func.c > +++ b/src/box/sql/func.c > @@ -1203,7 +1203,8 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv) > int i; /* Loop counter */ > unsigned char *aLen = 0; /* Length of each character in zCharSet */ > unsigned char **azChar = 0; /* Individual characters in zCharSet */ > - int nChar; /* Number of characters in zCharSet */ > + /* Number of UTF-8 characters in zCharSet. */ > + int nChar; > > if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { > return; > @@ -1224,17 +1225,20 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv) > return; > } else { > const unsigned char *z = zCharSet; > - int sizeOfCharSet = \ > - sqlite3_value_bytes(argv[1]); /* Size of char set in bytes. */ > - int nProcessedBytes = 0; > + int trim_set_sz = sqlite3_value_bytes(argv[1]); > + int handled_bytes_cnt = trim_set_sz; > nChar = 0; > - const unsigned char *zStepBack; > - /* Count the number of UTF-8 characters passing through the > - * entire char set, but not up to the '\0' or X'00' character. */ > - while(sizeOfCharSet - nProcessedBytes > 0) { > - zStepBack = z; > + /* > + * Count the number of UTF-8 characters passing > + * through the entire char set, but not up > + * to the '\0' or X'00' character. This allows > + * to handle trimming set containing such > + * characters. > + */ > + while(handled_bytes_cnt > 0) { > + const unsigned char *prev_byte = z; > SQLITE_SKIP_UTF8(z); > - nProcessedBytes += z - zStepBack; > + handled_bytes_cnt -= (z - prev_byte); > nChar++; > } > if (nChar > 0) { > @@ -1247,15 +1251,12 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv) > aLen = (unsigned char *)&azChar[nChar]; > z = zCharSet; > nChar = 0; > - nProcessedBytes = 0; > - /* Similar to the previous cycle. But > - * now write into "azCharSet". */ > - while(sizeOfCharSet - nProcessedBytes > 0) { > + handled_bytes_cnt = trim_set_sz; > + while(handled_bytes_cnt > 0) { > azChar[nChar] = (unsigned char *)z; > - zStepBack = z; > SQLITE_SKIP_UTF8(z); > - nProcessedBytes += z - zStepBack; > aLen[nChar] = (u8) (z - azChar[nChar]); > + handled_bytes_cnt -= aLen[nChar]; > nChar++; > > Check it out. If you are ok with it, you can apply it (partially or fully). > Applied your diff. commit 844d438852be6e3bc06a7020ec0aeb96d3d5ee4e Author: Roman Khabibov <roman.habibov@tarantool.org> Date: Sat Dec 15 13:21:59 2018 +0300 sql: fix bug with BLOB TRIM() when X'00' in char set The reason for the bug was that X'00' is a terminal symbol. If the char set contained X'00', all characters are ignored after it (including itself). Closes #3543 diff --git a/src/box/sql/func.c b/src/box/sql/func.c index 9667aead5..e46b162d9 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1223,9 +1223,22 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv) } else if ((zCharSet = sqlite3_value_text(argv[1])) == 0) { return; } else { - const unsigned char *z; - for (z = zCharSet, nChar = 0; *z; nChar++) { + const unsigned char *z = zCharSet; + int trim_set_sz = sqlite3_value_bytes(argv[1]); + int handled_bytes_cnt = trim_set_sz; + nChar = 0; + /* + * Count the number of UTF-8 characters passing + * through the entire char set, but not up + * to the '\0' or X'00' character. This allows + * to handle trimming set containing such + * characters. + */ + while(handled_bytes_cnt > 0) { + const unsigned char *prev_byte = z; SQLITE_SKIP_UTF8(z); + handled_bytes_cnt -= (z - prev_byte); + nChar++; } if (nChar > 0) { azChar = @@ -1235,10 +1248,15 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv) return; } aLen = (unsigned char *)&azChar[nChar]; - for (z = zCharSet, nChar = 0; *z; nChar++) { + z = zCharSet; + nChar = 0; + handled_bytes_cnt = trim_set_sz; + while(handled_bytes_cnt > 0) { azChar[nChar] = (unsigned char *)z; SQLITE_SKIP_UTF8(z); aLen[nChar] = (u8) (z - azChar[nChar]); + handled_bytes_cnt -= aLen[nChar]; + nChar++; } } } diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua index 393212968..b7de1d955 100755 --- a/test/sql-tap/func.test.lua +++ b/test/sql-tap/func.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(14535) +test:plan(14547) --!./tcltestrunner.lua -- 2001 September 15 @@ -2100,6 +2100,128 @@ test:do_execsql_test( -- </func-22.22> }) +-- gh-3543 Check trimming of binary string when X'00' in trimming char set. + +test:do_execsql_test( + "func-22.23", + [[ + SELECT TRIM(X'004100', X'00'); + ]], { + -- <func-22.23> + "A" + -- </func-22.23> + }) + +test:do_execsql_test( + "func-22.24", + [[ + SELECT TRIM(X'004100', X'0000'); + ]], { + -- <func-22.24> + "A" + -- </func-22.24> + }) + +test:do_execsql_test( + "func-22.25", + [[ + SELECT TRIM(X'004100', X'0042'); + ]], { + -- <func-22.25> + "A" + -- </func-22.25> + }) + +test:do_execsql_test( + "func-22.26", + [[ + SELECT TRIM(X'00004100420000', X'00'); + ]], { + -- <func-22.26> + "A\0B" + -- </func-22.26> + }) + +test:do_execsql_test( + "func-22.27", + [[ + SELECT LTRIM(X'004100', X'00'); + ]], { + -- <func-22.27> + "A\0" + -- </func-22.27> + }) + +test:do_execsql_test( + "func-22.28", + [[ + SELECT LTRIM(X'004100', X'0000'); + ]], { + -- <func-22.28> + "A\0" + -- </func-22.28> + }) + +test:do_execsql_test( + "func-22.29", + [[ + SELECT LTRIM(X'004100', X'0042'); + ]], { + -- <func-22.29> + "A\0" + -- </func-22.29> + }) + +test:do_execsql_test( + "func-22.30", + [[ + SELECT LTRIM(X'00004100420000', X'00'); + ]], { + -- <func-22.30> + "A\0B\0\0" + -- </func-22.30> + }) + +test:do_execsql_test( + "func-22.31", + [[ + SELECT RTRIM(X'004100', X'00'); + ]], { + -- <func-22.31> + "\0A" + -- </func-22.31> + }) + +test:do_execsql_test( + "func-22.32", + [[ + SELECT RTRIM(X'004100', X'0000'); + ]], { + -- <func-22.32> + "\0A" + -- </func-22.32> + }) + +test:do_execsql_test( + "func-22.33", + [[ + SELECT RTRIM(X'004100', X'0042'); + ]], { + -- <func-22.33> + "\0A" + -- </func-22.33> + }) + +test:do_execsql_test( + "func-22.34", + [[ + SELECT RTRIM(X'00004100420000', X'00'); + ]], { + -- <func-22.34> + "\0\0A\0B" + -- </func-22.34> + }) + -- This is to test the deprecated sqlite3_aggregate_count() API. -- --test:do_test(
next prev parent reply other threads:[~2018-12-26 13:56 UTC|newest] Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-12-15 10:57 [tarantool-patches] " Roman Khabibov 2018-12-18 8:40 ` [tarantool-patches] " n.pettik 2018-12-18 14:30 ` Roman 2018-12-25 11:40 ` n.pettik 2018-12-26 13:56 ` Roman [this message] 2018-12-28 11:09 ` n.pettik 2018-12-20 20:41 ` Vladislav Shpilevoy 2018-12-27 12:28 ` Kirill Yukhin
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=0d60bb46-13fa-f7e1-5ca6-8586456444e4@tarantool.org \ --to=roman.habibov@tarantool.org \ --cc=korablev@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='[tarantool-patches] Re: [PATCH] sql: fix bug with BLOB TRIM() when X'\''00'\'' in char set' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox