<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; line-break: after-white-space;" class=""><br class=""><div><br class=""><blockquote type="cite" class=""><div class="">On 11 Sep 2018, at 13:06, Alex Khatskevich <<a href="mailto:avkhatskevich@tarantool.org" class="">avkhatskevich@tarantool.org</a>> wrote:</div><br class="Apple-interchange-newline"><div class="">
  
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" class="">
  
  <div text="#000000" bgcolor="#FFFFFF" class=""><p class=""><br class="">
    </p>
    <br class="">
    <div class="moz-cite-prefix">On 11.09.2018 09:06, Nikita Tatunov
      wrote:<br class="">
    </div>
    <blockquote type="cite" cite="mid:58B407E2-AF5D-4531-A9FF-9DC57CE0070B@tarantool.org" class="">
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8" class="">
      <br class="">
      <div class=""><br class="">
        <blockquote type="cite" class="">
          <div class="">On 11 Sep 2018, at 01:20, Alex Khatskevich <<a href="mailto:avkhatskevich@tarantool.org" class="" moz-do-not-send="true">avkhatskevich@tarantool.org</a>>
            wrote:</div>
          <br class="Apple-interchange-newline">
          <div class="">
            <blockquote type="cite" cite="mid:87897608-173E-45EB-80A1-8B249706D8A1@tarantool.org" style="font-family: Helvetica; font-size: 12px;
              font-style: normal; font-variant-caps: normal;
              font-weight: normal; letter-spacing: normal; orphans:
              auto; text-align: start; text-indent: 0px; text-transform:
              none; white-space: normal; widows: auto; word-spacing:
              0px; -webkit-text-size-adjust: auto;
              -webkit-text-stroke-width: 0px; background-color: rgb(255,
              255, 255); text-decoration: none;" class="">
              <div class=""><br class="Apple-interchange-newline">
                <br class="">
                <blockquote type="cite" class="">
                  <div class="">On 17 Aug 2018, at 14:42, Alex
                    Khatskevich <<a href="mailto:avkhatskevich@tarantool.org" class="" moz-do-not-send="true">avkhatskevich@tarantool.org</a>>
                    wrote:</div>
                  <br class="Apple-interchange-newline">
                  <div class=""><br class="" style="caret-color: rgb(0,
                      0, 0); font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none;">
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">On 17.08.2018 14:17, Alexander
                      Turenko wrote:</span><br class="" style="caret-color: rgb(0, 0, 0); font-family:
                      Helvetica; font-size: 12px; font-style: normal;
                      font-variant-caps: normal; font-weight: normal;
                      letter-spacing: normal; text-align: start;
                      text-indent: 0px; text-transform: none;
                      white-space: normal; word-spacing: 0px;
                      -webkit-text-stroke-width: 0px; text-decoration:
                      none;">
                    <blockquote type="cite" class="" style="font-family:
                      Helvetica; font-size: 12px; font-style: normal;
                      font-variant-caps: normal; font-weight: normal;
                      letter-spacing: normal; text-align: start;
                      text-indent: 0px; text-transform: none;
                      white-space: normal; word-spacing: 0px;
                      -webkit-text-stroke-width: 0px; text-decoration:
                      none;">0xffff is the result of 'end of a string'
                      check as well as internal buffer<br class="">
                      overflow error. I have the relevant code pasted in
                      the first review of<br class="">
                      the patch (July, 18).<br class="">
                      <br class="">
                      // source/common/ucnv.c::ucnv_getNextUChar<br class="">
                      1860     s=*source;<br class="">
                      1861     if(sourceLimit<s) {<br class="">
                      1862         *err=U_ILLEGAL_ARGUMENT_ERROR;<br class="">
                      1863         return 0xffff;<br class="">
                      1864     }<br class="">
                      <br class="">
                      We should not handle the buffer overflow case as
                      an invalid symbol. Of<br class="">
                      course we should not handle it as the 'end of the
                      string' situation.<br class="">
                      Ideally we should perform pointer myself and raise
                      an error in case of<br class="">
                      0xffff. I had thought that a buffer overflow error
                      is unlikely to meet,<br class="">
                      but you are right: we should differentiate these
                      situations.<br class="">
                      <br class="">
                      In one of the previous version of a patch we
                      perform this check like so:<br class="">
                      <br class="">
                      #define Utf8Read(s, e) (((s) < (e)) ?\<br class="">
                      <span class="Apple-tab-span" style="white-space: pre;"> </span>ucnv_getNextUChar(pUtf8conv,
                      &s, e, &status) : 0)<br class="">
                      <br class="">
                      Don't sure why it was changed. Maybe it is try to
                      correctly handle '\0'<br class="">
                      symbol (it is valid unicode character)?<br class="">
                    </blockquote>
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">The define you have pasted can
                      return 0xffff.</span><br class="" style="caret-color: rgb(0, 0, 0); font-family:
                      Helvetica; font-size: 12px; font-style: normal;
                      font-variant-caps: normal; font-weight: normal;
                      letter-spacing: normal; text-align: start;
                      text-indent: 0px; text-transform: none;
                      white-space: normal; word-spacing: 0px;
                      -webkit-text-stroke-width: 0px; text-decoration:
                      none;">
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">The reasons to change it back
                      are described in the previous patchset.</span><br class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none;">
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">In short:</span><br class="" style="caret-color: rgb(0, 0, 0); font-family:
                      Helvetica; font-size: 12px; font-style: normal;
                      font-variant-caps: normal; font-weight: normal;
                      letter-spacing: normal; text-align: start;
                      text-indent: 0px; text-transform: none;
                      white-space: normal; word-spacing: 0px;
                      -webkit-text-stroke-width: 0px; text-decoration:
                      none;">
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">1. It is equivalent to</span><br class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none;">
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">  <span class="Apple-converted-space"> </span>a. check s
                      < e in a while loop</span><br class="" style="caret-color: rgb(0, 0, 0); font-family:
                      Helvetica; font-size: 12px; font-style: normal;
                      font-variant-caps: normal; font-weight: normal;
                      letter-spacing: normal; text-align: start;
                      text-indent: 0px; text-transform: none;
                      white-space: normal; word-spacing: 0px;
                      -webkit-text-stroke-width: 0px; text-decoration:
                      none;">
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">  <span class="Apple-converted-space"> </span>b. read
                      next character inside of where loop body.</span><br class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none;">
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">2. In some usages of the code
                      this check (s<e) was redundant (it was
                      performed a couple lines above)</span><br class="" style="caret-color: rgb(0, 0, 0); font-family:
                      Helvetica; font-size: 12px; font-style: normal;
                      font-variant-caps: normal; font-weight: normal;
                      letter-spacing: normal; text-align: start;
                      text-indent: 0px; text-transform: none;
                      white-space: normal; word-spacing: 0px;
                      -webkit-text-stroke-width: 0px; text-decoration:
                      none;">
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">3. There is no reason to
                      rewrite the old version of this function. (So, we
                      decided to use old version of the function)</span><br class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none;">
                    <blockquote type="cite" class="" style="font-family:
                      Helvetica; font-size: 12px; font-style: normal;
                      font-variant-caps: normal; font-weight: normal;
                      letter-spacing: normal; text-align: start;
                      text-indent: 0px; text-transform: none;
                      white-space: normal; word-spacing: 0px;
                      -webkit-text-stroke-width: 0px; text-decoration:
                      none;">So I see two ways to proceed:<br class="">
                      <br class="">
                      1. Lean on icu's check and ignore possibility of
                      the buffer overflow.<br class="">
                      2. Use our own check and possibly meet '\0'
                      problems.<br class="">
                      3. Check for U_ILLEGAL_ARGUMENT_ERROR to treat as
                      end of a string, raise<br class="">
                         the error for other 0xffff.<br class="">
                      <br class="">
                      Alex, what do you suggests here?<br class="">
                    </blockquote>
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">As I understand, by now the
                      0xffff is used ONLY to handle the case of
                      unexpectedly ended symbol.</span><br class="" style="caret-color: rgb(0, 0, 0); font-family:
                      Helvetica; font-size: 12px; font-style: normal;
                      font-variant-caps: normal; font-weight: normal;
                      letter-spacing: normal; text-align: start;
                      text-indent: 0px; text-transform: none;
                      white-space: normal; word-spacing: 0px;
                      -webkit-text-stroke-width: 0px; text-decoration:
                      none;">
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">E.g. some symbol consists of 2
                      characters, but the length of the input buffer is
                      1.</span><br class="" style="caret-color: rgb(0,
                      0, 0); font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none;">
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">In my opinion this is the same
                      as an invalid symbol.</span><br class="" style="caret-color: rgb(0, 0, 0); font-family:
                      Helvetica; font-size: 12px; font-style: normal;
                      font-variant-caps: normal; font-weight: normal;
                      letter-spacing: normal; text-align: start;
                      text-indent: 0px; text-transform: none;
                      white-space: normal; word-spacing: 0px;
                      -webkit-text-stroke-width: 0px; text-decoration:
                      none;">
                    <br class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none;">
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">I guess that internal buffer
                      overflow cannot occur in the `ucnv_getNextChar`
                      function.</span><br class="" style="caret-color:
                      rgb(0, 0, 0); font-family: Helvetica; font-size:
                      12px; font-style: normal; font-variant-caps:
                      normal; font-weight: normal; letter-spacing:
                      normal; text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none;">
                    <br class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none;">
                    <span class="" style="caret-color: rgb(0, 0, 0);
                      font-family: Helvetica; font-size: 12px;
                      font-style: normal; font-variant-caps: normal;
                      font-weight: normal; letter-spacing: normal;
                      text-align: start; text-indent: 0px;
                      text-transform: none; white-space: normal;
                      word-spacing: 0px; -webkit-text-stroke-width: 0px;
                      text-decoration: none; float: none; display:
                      inline !important;">I suppose that it is Nikitas
                      duty to investigate this problem and explain it to
                      us all. I just have noticed a strange usage.</span></div>
                </blockquote>
              </div>
              <div class=""><br class="">
              </div>
              <div class="">Hello, please consider my comments.</div>
              <div class=""><br class="">
              </div>
              <div class="">There are some cases when 0xffff can occur,
                but:</div>
              <div class=""><span class="Apple-tab-span" style="white-space: pre;">   </span>1) <span class="" style="font-family: HelveticaNeue;">Cannot
                  trigger in our context.</span></div>
              <div class=""><span class="" style="font-family:
                  HelveticaNeue;"><span class="Apple-tab-span" style="white-space: pre;">     </span>2)
                  C</span><span class="" style="font-family:
                  HelveticaNeue;">annot trigger in our context.</span></div>
              <div class=""><span class="" style="font-family:
                  HelveticaNeue;"><span class="Apple-tab-span" style="white-space: pre;">     </span>3)
                  O</span><span class="" style="font-family:
                  HelveticaNeue;">nly triggers if end < start.
                  (Cannot happen in sql_utf8_pattern_compare, i guess)</span></div>
              <div class=""><span class="" style="font-family:
                  HelveticaNeue;"><span class="Apple-tab-span" style="white-space: pre;">     </span>4)
                  O</span><span class="" style="font-family:
                  HelveticaNeue;">nly triggers if string length >
                  (size_t) 0x7ffffffff (can it actually happen? I don’t
                  think so).</span></div>
              <div class=""><span class="" style="font-family:
                  HelveticaNeue;"><span class="Apple-tab-span" style="white-space: pre;">     </span>5)
                  O</span><span class="" style="font-family:
                  HelveticaNeue;">ccurs when trying to access to not
                  unindexed data.</span></div>
              <div class=""><span class="" style="font-family:
                  HelveticaNeue;"><span class="Apple-tab-span" style="white-space: pre;">     </span>6)
                  Cannot occur in our context.</span></div>
              <div class=""><span class="" style="font-family:
                  HelveticaNeue;"><span class="Apple-tab-span" style="white-space: pre;">     </span>7) </span><span class="" style="font-family: HelveticaNeue;">Cannot
                  occur in our context.</span></div>
            </blockquote>
            <span style="caret-color: rgb(0, 0, 0); font-family:
              Helvetica; font-size: 12px; font-style: normal;
              font-variant-caps: normal; font-weight: normal;
              letter-spacing: normal; text-align: start; text-indent:
              0px; text-transform: none; white-space: normal;
              word-spacing: 0px; -webkit-text-stroke-width: 0px;
              background-color: rgb(255, 255, 255); text-decoration:
              none; float: none; display: inline !important;" class="">I
              do not understand what are those numbers related to.
              Please, describe it.</span><br style="caret-color: rgb(0,
              0, 0); font-family: Helvetica; font-size: 12px;
              font-style: normal; font-variant-caps: normal;
              font-weight: normal; letter-spacing: normal; text-align:
              start; text-indent: 0px; text-transform: none;
              white-space: normal; word-spacing: 0px;
              -webkit-text-stroke-width: 0px; background-color: rgb(255,
              255, 255); text-decoration: none;" class="">
          </div>
        </blockquote>
        <div class=""><br class="">
        </div>
        <div class="">They are related to possible cases returning 0xffff from
          icu source code (function ucnv_getNextUChar()).</div>
      </div>
    </blockquote>
    Can you just copy it here, so that anyone interested in that
    conversation can<br class="">
    analyze it without looking for source files?<br class="">
  </div>

</div></blockquote><br class=""></div><div>Ok then:</div><div><br class=""></div><div><div>U_CAPI UChar32 U_EXPORT2</div><div>ucnv_getNextUChar(UConverter *cnv,</div><div>                  const char **source, const char *sourceLimit,</div><div>                  UErrorCode *err) {</div><div>    UConverterToUnicodeArgs args;</div><div>    UChar buffer[U16_MAX_LENGTH];</div><div>    const char *s;</div><div>    UChar32 c;</div><div>    int32_t i, length;</div><div><br class=""></div><div>    /* check parameters */</div><div>    if(err==NULL || U_FAILURE(*err)) {</div><div>        return 0xffff;</div><div>    }</div><div><br class=""></div><div>    if(cnv==NULL || source==NULL) {</div><div>        *err=U_ILLEGAL_ARGUMENT_ERROR;</div><div>        return 0xffff;</div><div>    }</div><div><br class=""></div><div>    s=*source;</div><div>    if(sourceLimit<s) {</div><div>        *err=U_ILLEGAL_ARGUMENT_ERROR;</div><div>        return 0xffff;</div><div>    }</div><div><br class=""></div><div>    /*</div><div>     * Make sure that the buffer sizes do not exceed the number range for</div><div>     * int32_t because some functions use the size (in units or bytes)</div><div>     * rather than comparing pointers, and because offsets are int32_t values.</div><div>     *</div><div>     * size_t is guaranteed to be unsigned and large enough for the job.</div><div>     *</div><div>     * Return with an error instead of adjusting the limits because we would</div><div>     * not be able to maintain the semantics that either the source must be</div><div>     * consumed or the target filled (unless an error occurs).</div><div>     * An adjustment would be sourceLimit=t+0x7fffffff; for example.</div><div>     */</div><div>    if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {</div><div>        *err=U_ILLEGAL_ARGUMENT_ERROR;</div><div>        return 0xffff;</div><div>    }</div><div><br class=""></div><div>    c=U_SENTINEL;</div><div><br class=""></div><div>    /* flush the target overflow buffer */</div><div>    if(cnv->UCharErrorBufferLength>0) {</div><div>        UChar *overflow;</div><div><br class=""></div><div>        overflow=cnv->UCharErrorBuffer;</div><div>        i=0;</div><div>        length=cnv->UCharErrorBufferLength;</div><div>        U16_NEXT(overflow, i, length, c);</div><div><br class=""></div><div>        /* move the remaining overflow contents up to the beginning */</div><div>        if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {</div><div>            uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,</div><div>                         cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);</div><div>        }</div><div><br class=""></div><div>        if(!U16_IS_LEAD(c) || i<length) {</div><div>            return c;</div><div>        }</div><div>        /*</div><div>         * Continue if the overflow buffer contained only a lead surrogate,</div><div>         * in case the converter outputs single surrogates from complete</div><div>         * input sequences.</div><div>         */</div><div>    }</div><div><br class=""></div><div>    /*</div><div>     * flush==TRUE is implied for ucnv_getNextUChar()</div><div>     *</div><div>     * do not simply return even if s==sourceLimit because the converter may</div><div>     * not have seen flush==TRUE before</div><div>     */</div><div><br class=""></div><div>    /* prepare the converter arguments */</div><div>    args.converter=cnv;</div><div>    args.flush=TRUE;</div><div>    args.offsets=NULL;</div><div>    args.source=s;</div><div>    args.sourceLimit=sourceLimit;</div><div>    args.target=buffer;</div><div>    args.targetLimit=buffer+1;</div><div>    args.size=sizeof(args);</div><div><br class=""></div><div>    if(c<0) {</div><div>        /*</div><div>         * call the native getNextUChar() implementation if we are</div><div>         * at a character boundary (toULength==0)</div><div>         *</div><div>         * unlike with _toUnicode(), getNextUChar() implementations must set</div><div>         * U_TRUNCATED_CHAR_FOUND for truncated input,</div><div>         * in addition to setting toULength/toUBytes[]</div><div>         */</div><div>        if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {</div><div>            c=cnv->sharedData->impl->getNextUChar(&args, err);</div><div>            *source=s=args.source;</div><div>            if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {</div><div>                /* reset the converter without calling the callback function */</div><div>                _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);</div><div>                return 0xffff; /* no output */</div><div>            } else if(U_SUCCESS(*err) && c>=0) {</div><div>                return c;</div><div>            /*</div><div>             * else fall through to use _toUnicode() because</div><div>             *   UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all</div><div>             *   U_FAILURE: call _toUnicode() for callback handling (do not output c)</div><div>             */</div><div>            }</div><div>        }</div><div><br class=""></div><div>        /* convert to one UChar in buffer[0], or handle getNextUChar() errors */</div><div>        _toUnicodeWithCallback(&args, err);</div><div><br class=""></div><div>        if(*err==U_BUFFER_OVERFLOW_ERROR) {</div><div>            *err=U_ZERO_ERROR;</div><div>        }</div><div><br class=""></div><div>        i=0;</div><div>        length=(int32_t)(args.target-buffer);</div><div>    } else {</div><div>        /* write the lead surrogate from the overflow buffer */</div><div>        buffer[0]=(UChar)c;</div><div>        args.target=buffer+1;</div><div>        i=0;</div><div>        length=1;</div><div>    }</div><div><br class=""></div><div>    /* buffer contents starts at i and ends before length */</div><div><br class=""></div><div>    if(U_FAILURE(*err)) {</div><div>        c=0xffff; /* no output */</div><div>    } else if(length==0) {</div><div>        /* no input or only state changes */</div><div>        *err=U_INDEX_OUTOFBOUNDS_ERROR;</div><div>        /* no need to reset explicitly because _toUnicodeWithCallback() did it */</div><div>        c=0xffff; /* no output */</div><div>    } else {</div><div>        c=buffer[0];</div><div>        i=1;</div><div>        if(!U16_IS_LEAD(c)) {</div><div>            /* consume c=buffer[0], done */</div><div>        } else {</div><div>            /* got a lead surrogate, see if a trail surrogate follows */</div><div>            UChar c2;</div><div><br class=""></div><div>            if(cnv->UCharErrorBufferLength>0) {</div><div>                /* got overflow output from the conversion */</div><div>                if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {</div><div>                    /* got a trail surrogate, too */</div><div>                    c=U16_GET_SUPPLEMENTARY(c, c2);</div><div><br class=""></div><div>                    /* move the remaining overflow contents up to the beginning */</div><div>                    if((--cnv->UCharErrorBufferLength)>0) {</div><div>                        uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,</div><div>                                     cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);</div><div>                    }</div><div>                } else {</div><div>                    /* c is an unpaired lead surrogate, just return it */</div><div>                }</div><div>            } else if(args.source<sourceLimit) {</div><div>                /* convert once more, to buffer[1] */</div><div>                args.targetLimit=buffer+2;</div><div>                _toUnicodeWithCallback(&args, err);</div><div>                if(*err==U_BUFFER_OVERFLOW_ERROR) {</div><div>                    *err=U_ZERO_ERROR;</div><div>                }</div><div><br class=""></div><div>                length=(int32_t)(args.target-buffer);</div><div>                if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {</div><div>                    /* got a trail surrogate, too */</div><div>                    c=U16_GET_SUPPLEMENTARY(c, c2);</div><div>                    i=2;</div><div>                }</div><div>            }</div><div>        }</div><div>    }</div><div><br class=""></div><div>    /*</div><div>     * move leftover output from buffer[i..length[</div><div>     * into the beginning of the overflow buffer</div><div>     */</div><div>    if(i<length) {</div><div>        /* move further overflow back */</div><div>        int32_t delta=length-i;</div><div>        if((length=cnv->UCharErrorBufferLength)>0) {</div><div>            uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,</div><div>                         length*U_SIZEOF_UCHAR);</div><div>        }</div><div>        cnv->UCharErrorBufferLength=(int8_t)(length+delta);</div><div><br class=""></div><div>        cnv->UCharErrorBuffer[0]=buffer[i++];</div><div>        if(delta>1) {</div><div>            cnv->UCharErrorBuffer[1]=buffer[i];</div><div>        }</div><div>    }</div><div><br class=""></div><div>    *source=args.source;</div><div>    return c;</div><div>}</div></div><br class=""><div class="">
<div dir="auto" style="word-wrap: break-word; -webkit-nbsp-mode: space; line-break: after-white-space;" class=""><div style="caret-color: rgb(0, 0, 0); color: rgb(0, 0, 0); font-family: Helvetica; font-size: 12px; font-style: normal; font-variant-caps: normal; font-weight: normal; letter-spacing: normal; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; text-decoration: none;">--</div><div style="caret-color: rgb(0, 0, 0); color: rgb(0, 0, 0); font-family: Helvetica; font-size: 12px; font-style: normal; font-variant-caps: normal; font-weight: normal; letter-spacing: normal; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; text-decoration: none;">WBR, Nikita Tatunov.</div><div style="caret-color: rgb(0, 0, 0); color: rgb(0, 0, 0); font-family: Helvetica; font-size: 12px; font-style: normal; font-variant-caps: normal; font-weight: normal; letter-spacing: normal; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; text-decoration: none;"><a href="mailto:n.tatunov@tarantool.org" class="">n.tatunov@tarantool.org</a></div></div>
</div>
<br class=""></body></html>