[tarantool-patches] Re: [PATCH] sql: fix bug with BLOB TRIM() when X'00' in char set

Roman roman.habibov at tarantool.org
Wed Dec 26 16:56:51 MSK 2018


On 25.12.2018 14:40, n.pettik wrote:
> All points considered, I suggest diff like this:
>
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index f397e23c1..9b5773321 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -1203,7 +1203,8 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
>          int i;                  /* Loop counter */
>          unsigned char *aLen = 0;        /* Length of each character in zCharSet */
>          unsigned char **azChar = 0;     /* Individual characters in zCharSet */
> -       int nChar;              /* Number of characters in zCharSet */
> +       /* Number of UTF-8 characters in zCharSet. */
> +       int nChar;
>   
>          if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
>                  return;
> @@ -1224,17 +1225,20 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
>                  return;
>          } else {
>                  const unsigned char *z = zCharSet;
> -               int sizeOfCharSet = \
> -               sqlite3_value_bytes(argv[1]); /* Size of char set in bytes. */
> -               int nProcessedBytes = 0;
> +               int trim_set_sz = sqlite3_value_bytes(argv[1]);
> +               int handled_bytes_cnt = trim_set_sz;
>                  nChar = 0;
> -               const unsigned char *zStepBack;
> -               /* Count the number of UTF-8 characters passing through the
> -               * entire char set, but not up to the '\0' or X'00' character. */
> -               while(sizeOfCharSet - nProcessedBytes > 0) {
> -                       zStepBack = z;
> +               /*
> +                * Count the number of UTF-8 characters passing
> +                * through the entire char set, but not up
> +                * to the '\0' or X'00' character. This allows
> +                * to handle trimming set containing such
> +                * characters.
> +                */
> +               while(handled_bytes_cnt > 0) {
> +                       const unsigned char *prev_byte = z;
>                          SQLITE_SKIP_UTF8(z);
> -                       nProcessedBytes += z - zStepBack;
> +                       handled_bytes_cnt -= (z - prev_byte);
>                          nChar++;
>                  }
>                  if (nChar > 0) {
> @@ -1247,15 +1251,12 @@ trimFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
>                          aLen = (unsigned char *)&azChar[nChar];
>                          z = zCharSet;
>                          nChar = 0;
> -                       nProcessedBytes = 0;
> -                       /* Similar to the previous cycle. But
> -                       * now write into "azCharSet". */
> -                       while(sizeOfCharSet - nProcessedBytes > 0) {
> +                       handled_bytes_cnt = trim_set_sz;
> +                       while(handled_bytes_cnt > 0) {
>                                  azChar[nChar] = (unsigned char *)z;
> -                               zStepBack = z;
>                                  SQLITE_SKIP_UTF8(z);
> -                               nProcessedBytes += z - zStepBack;
>                                  aLen[nChar] = (u8) (z - azChar[nChar]);
> +                               handled_bytes_cnt -= aLen[nChar];
>                                  nChar++;
>
> Check it out. If you are ok with it, you can apply it (partially or fully).
>
Applied your diff.


commit 844d438852be6e3bc06a7020ec0aeb96d3d5ee4e
Author: Roman Khabibov <roman.habibov at tarantool.org>
Date:   Sat Dec 15 13:21:59 2018 +0300

     sql: fix bug with BLOB TRIM() when X'00' in char set

     The reason for the bug was that X'00' is a terminal symbol. If the 
char set
     contained X'00', all characters are ignored after it (including 
itself).

     Closes #3543

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 9667aead5..e46b162d9 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1223,9 +1223,22 @@ trimFunc(sqlite3_context * context, int argc, 
sqlite3_value ** argv)
      } else if ((zCharSet = sqlite3_value_text(argv[1])) == 0) {
          return;
      } else {
-        const unsigned char *z;
-        for (z = zCharSet, nChar = 0; *z; nChar++) {
+        const unsigned char *z = zCharSet;
+        int trim_set_sz = sqlite3_value_bytes(argv[1]);
+        int handled_bytes_cnt = trim_set_sz;
+        nChar = 0;
+        /*
+        * Count the number of UTF-8 characters passing
+        * through the entire char set, but not up
+        * to the '\0' or X'00' character. This allows
+        * to handle trimming set containing such
+        * characters.
+        */
+        while(handled_bytes_cnt > 0) {
+        const unsigned char *prev_byte = z;
              SQLITE_SKIP_UTF8(z);
+            handled_bytes_cnt -= (z - prev_byte);
+            nChar++;
          }
          if (nChar > 0) {
              azChar =
@@ -1235,10 +1248,15 @@ trimFunc(sqlite3_context * context, int argc, 
sqlite3_value ** argv)
                  return;
              }
              aLen = (unsigned char *)&azChar[nChar];
-            for (z = zCharSet, nChar = 0; *z; nChar++) {
+            z = zCharSet;
+            nChar = 0;
+            handled_bytes_cnt = trim_set_sz;
+            while(handled_bytes_cnt > 0) {
                  azChar[nChar] = (unsigned char *)z;
                  SQLITE_SKIP_UTF8(z);
                  aLen[nChar] = (u8) (z - azChar[nChar]);
+                handled_bytes_cnt -= aLen[nChar];
+                nChar++;
              }
          }
      }
diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
index 393212968..b7de1d955 100755
--- a/test/sql-tap/func.test.lua
+++ b/test/sql-tap/func.test.lua
@@ -1,6 +1,6 @@
  #!/usr/bin/env tarantool
  test = require("sqltester")
-test:plan(14535)
+test:plan(14547)

  --!./tcltestrunner.lua
  -- 2001 September 15
@@ -2100,6 +2100,128 @@ test:do_execsql_test(
          -- </func-22.22>
      })

+-- gh-3543 Check trimming of binary string when X'00' in trimming char set.
+
+test:do_execsql_test(
+    "func-22.23",
+    [[
+        SELECT TRIM(X'004100', X'00');
+    ]], {
+        -- <func-22.23>
+        "A"
+        -- </func-22.23>
+    })
+
+test:do_execsql_test(
+    "func-22.24",
+    [[
+        SELECT TRIM(X'004100', X'0000');
+    ]], {
+        -- <func-22.24>
+        "A"
+        -- </func-22.24>
+    })
+
+test:do_execsql_test(
+    "func-22.25",
+    [[
+        SELECT TRIM(X'004100', X'0042');
+    ]], {
+        -- <func-22.25>
+        "A"
+        -- </func-22.25>
+    })
+
+test:do_execsql_test(
+    "func-22.26",
+    [[
+        SELECT TRIM(X'00004100420000', X'00');
+    ]], {
+        -- <func-22.26>
+        "A\0B"
+        -- </func-22.26>
+    })
+
+test:do_execsql_test(
+    "func-22.27",
+    [[
+        SELECT LTRIM(X'004100', X'00');
+    ]], {
+        -- <func-22.27>
+        "A\0"
+        -- </func-22.27>
+    })
+
+test:do_execsql_test(
+    "func-22.28",
+    [[
+        SELECT LTRIM(X'004100', X'0000');
+    ]], {
+        -- <func-22.28>
+        "A\0"
+        -- </func-22.28>
+    })
+
+test:do_execsql_test(
+    "func-22.29",
+    [[
+        SELECT LTRIM(X'004100', X'0042');
+    ]], {
+        -- <func-22.29>
+        "A\0"
+        -- </func-22.29>
+    })
+
+test:do_execsql_test(
+    "func-22.30",
+    [[
+        SELECT LTRIM(X'00004100420000', X'00');
+    ]], {
+        -- <func-22.30>
+        "A\0B\0\0"
+        -- </func-22.30>
+    })
+
+test:do_execsql_test(
+    "func-22.31",
+    [[
+        SELECT RTRIM(X'004100', X'00');
+    ]], {
+        -- <func-22.31>
+        "\0A"
+        -- </func-22.31>
+    })
+
+test:do_execsql_test(
+    "func-22.32",
+    [[
+        SELECT RTRIM(X'004100', X'0000');
+    ]], {
+        -- <func-22.32>
+        "\0A"
+        -- </func-22.32>
+    })
+
+test:do_execsql_test(
+    "func-22.33",
+    [[
+        SELECT RTRIM(X'004100', X'0042');
+    ]], {
+        -- <func-22.33>
+        "\0A"
+        -- </func-22.33>
+    })
+
+test:do_execsql_test(
+    "func-22.34",
+    [[
+        SELECT RTRIM(X'00004100420000', X'00');
+    ]], {
+        -- <func-22.34>
+        "\0\0A\0B"
+        -- </func-22.34>
+    })
+
  -- This is to test the deprecated sqlite3_aggregate_count() API.
  --
  --test:do_test(





More information about the Tarantool-patches mailing list