[tarantool-patches] Re: [PATCH v2] sql: Duplicate key error for a non-unique index

Vladimir Davydov vdavydov.dev at gmail.com
Thu Feb 28 19:19:45 MSK 2019


[ Cc += Kostja - AFAIR he had something against using STRENGTH in
  collation comparison, see my comment inline ]

On Thu, Feb 28, 2019 at 05:17:13PM +0300, Stanislav Zudin wrote:
> > > +static bool
> > > +key_def_need_merge(const struct key_def *sec_key_def,
> > > +		   const struct key_part *pk_key_part)
> > > +{
> > > +	const struct key_part* end = sec_key_def->parts +
> > > +				     sec_key_def->part_count;
> > > +	const struct key_part* part = key_def_find_next(sec_key_def->parts,
> > > +							end,
> > > +							pk_key_part);
> > > +	if (part == NULL)
> > > +		return true;
> > > +
> > > +	/* The duplicate key_part is found,
> > > +	 * compare collation */
> > > +	if (part->coll == pk_key_part->coll)
> > > +		return false;
> > > +
> > > +	if (part->coll == NULL ||
> > > +	    part->coll->strength == COLL_ICU_STRENGTH_DEFAULT) {
> > > +		return false;
> > > +		/* If collation of the sec. key part
> > > +		 * is binary then the sec. key
> > > +		 * doesn't require a composite key.
> > > +		 * */
> >
> > Perhaps, it's worth encapsulating this logic in a helper function
> > defined in coll.c. Something like coll_is_compatible.
>
> This case is not a general one, so it doesn't deserve to be implemented as a
> generic helper function.

Well, coll isn't an independent library. IMO it'd be perfectly okay to
add a helper there, which is only relevant to Tarantool internals.

> The updated patch is below:
> 
> ---
> Branch: https://github.com/tarantool/tarantool/tree/stanztt/gh-3537-nonunique-index-dup-key-error
> Issue: https://github.com/tarantool/tarantool/issues/3537
> 
>  src/CMakeLists.txt          |   3 +-
>  src/box/key_def.c           |  29 +++-
>  src/coll.c                  |  38 +++++
>  src/coll.h                  |   4 +
>  test/sql/collation.result   | 330 ++++++++++++++++++++++++++++++++++++
>  test/sql/collation.test.lua | 123 ++++++++++++++
>  6 files changed, 524 insertions(+), 3 deletions(-)
> 
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
> index 04de5ad04..7346e7ba7 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -125,7 +125,8 @@ target_link_libraries(core
>      ${LIBEIO_LIBRARIES}
>      ${LIBCORO_LIBRARIES}
>      ${MSGPUCK_LIBRARIES}
> -    ${generic_libraries})
> +    ${generic_libraries}
> +    ${ICU_LIBRARIES})
> 
>  add_library(stat STATIC rmean.c latency.c histogram.c)
>  target_link_libraries(stat core)
> diff --git a/src/box/key_def.c b/src/box/key_def.c
> index 9411ade39..61b0258b2 100644
> --- a/src/box/key_def.c
> +++ b/src/box/key_def.c
> @@ -37,6 +37,7 @@
>  #include "schema_def.h"
>  #include "coll_id_cache.h"
>  #include "small/region.h"
> +#include "coll.h"
> 
>  const char *sort_order_strs[] = { "asc", "desc", "undef" };
> 
> @@ -611,6 +612,30 @@ key_def_find(const struct key_def *key_def, const
> struct key_part *to_find)
>      return NULL;
>  }
> 
> +static bool
> +key_def_can_merge(const struct key_def *first_key_def,
> +    const struct key_part *second_key_part)

Your MUA mangles patches. Please fix it - it makes the patches difficult
to review.

> +{
> +    const struct key_part *part = key_def_find(first_key_def,
> +                           second_key_part);
> +    if (part == NULL)
> +        return true;

I asked you to return false in this case, because this means we can't
merge the given key part into the given key def.

> +
> +    /*
> +     * The duplicate key_part is found,
> +     * compare collation
> +     */
> +    if (part->coll == second_key_part->coll)
> +        return false;
> +
> +    if (part->coll == NULL ||
> +        /*part->coll->strength == COLL_ICU_STRENGTH_DEFAULT)*/

Stray hunk, pleaase remove.

> +        coll_get_strength(part->coll) == COLL_ICU_STRENGTH_DEFAULT)

If my memory doesn't fail, on our last daily meeting, Kostja mentioned
that using STRENGTH here isn't quite correct...

> +        return false;
> +    else
> +        return true;
> +}
> +
>  bool
>  key_def_contains(const struct key_def *first, const struct key_def *second)
>  {
> @@ -639,7 +664,7 @@ key_def_merge(const struct key_def *first, const struct
> key_def *second)
>      part = second->parts;
>      end = part + second->part_count;
>      for (; part != end; part++) {
> -        if (key_def_find(first, part) != NULL)
> +        if (!key_def_can_merge(first, part))
>              --new_part_count;
>          else
>              sz += part->path_len;
> @@ -677,7 +702,7 @@ key_def_merge(const struct key_def *first, const struct
> key_def *second)
>      part = second->parts;
>      end = part + second->part_count;
>      for (; part != end; part++) {
> -        if (key_def_find(first, part) != NULL)
> +        if (!key_def_can_merge(first, part))
>              continue;
>          key_def_set_part(new_def, pos++, part->fieldno, part->type,
>                   part->nullable_action, part->coll,
> diff --git a/src/coll.c b/src/coll.c
> index 6d9c44dbf..7d7267d8a 100644
> --- a/src/coll.c
> +++ b/src/coll.c
> @@ -295,6 +295,44 @@ coll_def_snfingerprint(char *buffer, int size, const
> struct coll_def *def)
>      return total;
>  }
> 
> +static enum coll_icu_strength
> +cast_coll_strength(UCollationStrength s)
> +{
> +    enum coll_icu_strength res = COLL_ICU_STRENGTH_DEFAULT;
> +
> +    switch(s) {
> +    case UCOL_PRIMARY:
> +        res = COLL_ICU_STRENGTH_PRIMARY;
> +        break;
> +    case UCOL_SECONDARY:
> +        res = COLL_ICU_STRENGTH_SECONDARY;
> +        break;
> +    case UCOL_TERTIARY:
> +        res = COLL_ICU_STRENGTH_TERTIARY;
> +        break;
> +    case UCOL_QUATERNARY:
> +        res = COLL_ICU_STRENGTH_QUATERNARY;
> +        break;
> +    case UCOL_IDENTICAL:
> +        res = COLL_ICU_STRENGTH_IDENTICAL;
> +        break;
> +    default:
> +        break;

Ouch. This function looks cumbersome. I'd really prefer if you rather
factored out the whole collation check into a helper function defined
in coll.c.

> +    }
> +
> +    return res;
> +}
> +
> +/*enum coll_icu_strength*/
> +enum coll_icu_strength
> +coll_get_strength(const struct coll *coll)
> +{
> +    if (coll->collator == NULL)
> +        return COLL_ICU_STRENGTH_DEFAULT;
> +    else
> +        return cast_coll_strength(ucol_getStrength(coll->collator));
> +}
> +
>  struct coll *
>  coll_new(const struct coll_def *def)
>  {



More information about the Tarantool-patches mailing list