[tarantool-patches] Re: [PATCH v2] sql: Duplicate key error for a non-unique index

Stanislav Zudin szudin at tarantool.org
Fri Mar 1 14:46:45 MSK 2019


The recent patch contains the following changes:
1. collation compatibility analysis was extracted to a single helper 
function.


On 28.02.2019 19:19, Vladimir Davydov wrote:
> [ Cc += Kostja - AFAIR he had something against using STRENGTH in
>    collation comparison, see my comment inline ]
> 
> On Thu, Feb 28, 2019 at 05:17:13PM +0300, Stanislav Zudin wrote:
>>>> +static bool
>>>> +key_def_need_merge(const struct key_def *sec_key_def,
>>>> +		   const struct key_part *pk_key_part)
>>>> +{
>>>> +	const struct key_part* end = sec_key_def->parts +
>>>> +				     sec_key_def->part_count;
>>>> +	const struct key_part* part = key_def_find_next(sec_key_def->parts,
>>>> +							end,
>>>> +							pk_key_part);
>>>> +	if (part == NULL)
>>>> +		return true;
>>>> +
>>>> +	/* The duplicate key_part is found,
>>>> +	 * compare collation */
>>>> +	if (part->coll == pk_key_part->coll)
>>>> +		return false;
>>>> +
>>>> +	if (part->coll == NULL ||
>>>> +	    part->coll->strength == COLL_ICU_STRENGTH_DEFAULT) {
>>>> +		return false;
>>>> +		/* If collation of the sec. key part
>>>> +		 * is binary then the sec. key
>>>> +		 * doesn't require a composite key.
>>>> +		 * */
>>>
>>> Perhaps, it's worth encapsulating this logic in a helper function
>>> defined in coll.c. Something like coll_is_compatible.
>>
>> This case is not a general one, so it doesn't deserve to be implemented as a
>> generic helper function.
> 
> Well, coll isn't an independent library. IMO it'd be perfectly okay to
> add a helper there, which is only relevant to Tarantool internals.

Moved this code to a separate function.

> 
>> The updated patch is below:
>>
>> ---
>> Branch: https://github.com/tarantool/tarantool/tree/stanztt/gh-3537-nonunique-index-dup-key-error
>> Issue: https://github.com/tarantool/tarantool/issues/3537
>>
>>   src/CMakeLists.txt          |   3 +-
>>   src/box/key_def.c           |  29 +++-
>>   src/coll.c                  |  38 +++++
>>   src/coll.h                  |   4 +
>>   test/sql/collation.result   | 330 ++++++++++++++++++++++++++++++++++++
>>   test/sql/collation.test.lua | 123 ++++++++++++++
>>   6 files changed, 524 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
>> index 04de5ad04..7346e7ba7 100644
>> --- a/src/CMakeLists.txt
>> +++ b/src/CMakeLists.txt
>> @@ -125,7 +125,8 @@ target_link_libraries(core
>>       ${LIBEIO_LIBRARIES}
>>       ${LIBCORO_LIBRARIES}
>>       ${MSGPUCK_LIBRARIES}
>> -    ${generic_libraries})
>> +    ${generic_libraries}
>> +    ${ICU_LIBRARIES})
>>
>>   add_library(stat STATIC rmean.c latency.c histogram.c)
>>   target_link_libraries(stat core)
>> diff --git a/src/box/key_def.c b/src/box/key_def.c
>> index 9411ade39..61b0258b2 100644
>> --- a/src/box/key_def.c
>> +++ b/src/box/key_def.c
>> @@ -37,6 +37,7 @@
>>   #include "schema_def.h"
>>   #include "coll_id_cache.h"
>>   #include "small/region.h"
>> +#include "coll.h"
>>
>>   const char *sort_order_strs[] = { "asc", "desc", "undef" };
>>
>> @@ -611,6 +612,30 @@ key_def_find(const struct key_def *key_def, const
>> struct key_part *to_find)
>>       return NULL;
>>   }
>>
>> +static bool
>> +key_def_can_merge(const struct key_def *first_key_def,
>> +    const struct key_part *second_key_part)
> 
> Your MUA mangles patches. Please fix it - it makes the patches difficult
> to review.
> 
>> +{
>> +    const struct key_part *part = key_def_find(first_key_def,
>> +                           second_key_part);
>> +    if (part == NULL)
>> +        return true;
> 
> I asked you to return false in this case, because this means we can't
> merge the given key part into the given key def.

Just the opposite. (part == NULL) means that there is no duplicates and
we must include the second_key_part into the composite key.

> 
>> +
>> +    /*
>> +     * The duplicate key_part is found,
>> +     * compare collation
>> +     */
>> +    if (part->coll == second_key_part->coll)
>> +        return false;
>> +
>> +    if (part->coll == NULL ||
>> +        /*part->coll->strength == COLL_ICU_STRENGTH_DEFAULT)*/
> 
> Stray hunk, pleaase remove.

ooops. Fixed.

> 
>> +        coll_get_strength(part->coll) == COLL_ICU_STRENGTH_DEFAULT)
> 
> If my memory doesn't fail, on our last daily meeting, Kostja mentioned
> that using STRENGTH here isn't quite correct...

strength is the only metric we can evaluate. We can't compare e.g. case 
sensitivity vs accent sensitivity, in such cases we consider them
incompatible.
For the case in question we have to decide whether we append the 
key_part from the primary key or not. If the second key_part has binary 
or case sensitive collation then we shouldn't, since the second key_def 
is enough unique.

> 
>> +        return false;
>> +    else
>> +        return true;
>> +}
>> +
>>   bool
>>   key_def_contains(const struct key_def *first, const struct key_def *second)
>>   {
>> @@ -639,7 +664,7 @@ key_def_merge(const struct key_def *first, const struct
>> key_def *second)
>>       part = second->parts;
>>       end = part + second->part_count;
>>       for (; part != end; part++) {
>> -        if (key_def_find(first, part) != NULL)
>> +        if (!key_def_can_merge(first, part))
>>               --new_part_count;
>>           else
>>               sz += part->path_len;
>> @@ -677,7 +702,7 @@ key_def_merge(const struct key_def *first, const struct
>> key_def *second)
>>       part = second->parts;
>>       end = part + second->part_count;
>>       for (; part != end; part++) {
>> -        if (key_def_find(first, part) != NULL)
>> +        if (!key_def_can_merge(first, part))
>>               continue;
>>           key_def_set_part(new_def, pos++, part->fieldno, part->type,
>>                    part->nullable_action, part->coll,
>> diff --git a/src/coll.c b/src/coll.c
>> index 6d9c44dbf..7d7267d8a 100644
>> --- a/src/coll.c
>> +++ b/src/coll.c
>> @@ -295,6 +295,44 @@ coll_def_snfingerprint(char *buffer, int size, const
>> struct coll_def *def)
>>       return total;
>>   }
>>
>> +static enum coll_icu_strength
>> +cast_coll_strength(UCollationStrength s)
>> +{
>> +    enum coll_icu_strength res = COLL_ICU_STRENGTH_DEFAULT;
>> +
>> +    switch(s) {
>> +    case UCOL_PRIMARY:
>> +        res = COLL_ICU_STRENGTH_PRIMARY;
>> +        break;
>> +    case UCOL_SECONDARY:
>> +        res = COLL_ICU_STRENGTH_SECONDARY;
>> +        break;
>> +    case UCOL_TERTIARY:
>> +        res = COLL_ICU_STRENGTH_TERTIARY;
>> +        break;
>> +    case UCOL_QUATERNARY:
>> +        res = COLL_ICU_STRENGTH_QUATERNARY;
>> +        break;
>> +    case UCOL_IDENTICAL:
>> +        res = COLL_ICU_STRENGTH_IDENTICAL;
>> +        break;
>> +    default:
>> +        break;
> 
> Ouch. This function looks cumbersome. I'd really prefer if you rather
> factored out the whole collation check into a helper function defined
> in coll.c.
> 

Done.

>> +    }
>> +
>> +    return res;
>> +}
>> +
>> +/*enum coll_icu_strength*/
>> +enum coll_icu_strength
>> +coll_get_strength(const struct coll *coll)
>> +{
>> +    if (coll->collator == NULL)
>> +        return COLL_ICU_STRENGTH_DEFAULT;
>> +    else
>> +        return cast_coll_strength(ucol_getStrength(coll->collator));
>> +}
>> +
>>   struct coll *
>>   coll_new(const struct coll_def *def)
>>   {
> 

Please find the recent patch below:

---
  src/box/key_def.c | 21 ++++++++-------------
  src/coll.c        | 44 ++++++++++----------------------------------
  src/coll.h        | 10 +++++++---
  3 files changed, 25 insertions(+), 50 deletions(-)

diff --git a/src/box/key_def.c b/src/box/key_def.c
index 61b0258b2..2b5fbbfcc 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -618,22 +618,17 @@ key_def_can_merge(const struct key_def *first_key_def,
  {
  	const struct key_part *part = key_def_find(first_key_def,
  						   second_key_part);
+	/*
+	 * The key_parts can be merged if
+	 * both key_defs have no duplicated
+	 * key_parts.
+	 * Or there are duplicated key_parts
+	 * but their collations are incompatible.
+	 * */
  	if (part == NULL)
  		return true;

-	/*
-	 * The duplicate key_part is found,
-	 * compare collation
-	 */
-	if (part->coll == second_key_part->coll)
-		return false;
-
-	if (part->coll == NULL ||
-		/*part->coll->strength == COLL_ICU_STRENGTH_DEFAULT)*/
-		coll_get_strength(part->coll) == COLL_ICU_STRENGTH_DEFAULT)
-		return false;
-	else
-		return true;
+	return !coll_is_compatible(part->coll, second_key_part->coll);
  }

  bool
diff --git a/src/coll.c b/src/coll.c
index 7d7267d8a..4a1344fa1 100644
--- a/src/coll.c
+++ b/src/coll.c
@@ -295,42 +295,18 @@ coll_def_snfingerprint(char *buffer, int size, 
const struct coll_def *def)
  	return total;
  }

-static enum coll_icu_strength
-cast_coll_strength(UCollationStrength s)
+bool
+coll_is_compatible(const struct coll *first, const struct coll* second)
  {
-	enum coll_icu_strength res = COLL_ICU_STRENGTH_DEFAULT;
-
-	switch(s) {
-	case UCOL_PRIMARY:
-		res = COLL_ICU_STRENGTH_PRIMARY;
-		break;
-	case UCOL_SECONDARY:
-		res = COLL_ICU_STRENGTH_SECONDARY;
-		break;
-	case UCOL_TERTIARY:
-		res = COLL_ICU_STRENGTH_TERTIARY;
-		break;
-	case UCOL_QUATERNARY:
-		res = COLL_ICU_STRENGTH_QUATERNARY;
-		break;
-	case UCOL_IDENTICAL:
-		res = COLL_ICU_STRENGTH_IDENTICAL;
-		break;
-	default:
-		break;
-	}
-
-	return res;
-}
-
-/*enum coll_icu_strength*/
-enum coll_icu_strength
-coll_get_strength(const struct coll *coll)
-{
-	if (coll->collator == NULL)
-		return COLL_ICU_STRENGTH_DEFAULT;
+	if (first == second)
+		return true;
+	if (first == NULL ||
+	    first->collator == NULL ||
+	    ucol_getStrength(first->collator) == UCOL_DEFAULT)
+		return true;
  	else
-		return cast_coll_strength(ucol_getStrength(coll->collator));
+		return false;
+
  }

  struct coll *
diff --git a/src/coll.h b/src/coll.h
index 8f65b73dd..e3c6447b2 100644
--- a/src/coll.h
+++ b/src/coll.h
@@ -34,6 +34,7 @@
  #include "coll_def.h"
  #include <stddef.h>
  #include <stdint.h>
+#include <stdbool.h>

  #if defined(__cplusplus)
  extern "C" {
@@ -70,9 +71,12 @@ struct coll {
  	const char fingerprint[0];
  };

-/** Retrieve strength of the given collation */
-enum coll_icu_strength
-coll_get_strength(const struct coll *coll);
+/**
+ * Check whether the first collation is compatible
+ * with the second one.
+ * */
+bool
+coll_is_compatible(const struct coll *first, const struct coll* second);

  /**
   * Create a collation by definition. Can return an existing
-- 
2.17.1




More information about the Tarantool-patches mailing list