[patches] [PATCH 1/2] format: add collation to filed_def and tuple_field

v.shpilevoy at tarantool.org v.shpilevoy at tarantool.org
Tue Mar 6 15:36:51 MSK 2018


See below 6 comments.

> 28 февр. 2018 г., в 19:10, Nikita Pettik <korablev at tarantool.org> написал(а):
> 
> Originally, collation can be assigned only to index parts. However, SQL
> standard says that each column is capable of having such property. Thus,
> in order to support standard, add collation id to field_def and pointer
> to collation to tuple_field.
> 
> Closes #2937
> ---
> src/box/alter.cc            |  7 +++++++
> src/box/field_def.c         |  5 ++++-
> src/box/field_def.h         |  2 ++
> src/box/lua/schema.lua      |  7 +++++++
> src/box/sql.c               | 11 ++++++++++-
> src/box/tuple_format.c      | 14 +++++++++++++-
> src/box/tuple_format.h      |  3 +++
> test/engine/iterator.result |  2 +-
> 8 files changed, 47 insertions(+), 4 deletions(-)
> 
> diff --git a/src/box/alter.cc b/src/box/alter.cc
> index 1fcb2b5d3..30baacbb9 100644
> --- a/src/box/alter.cc
> +++ b/src/box/alter.cc
> @@ -374,6 +374,13 @@ field_def_decode(struct field_def *field, const char **data,
> 				     "nullable action properties", fieldno +
> 				     TUPLE_INDEX_BASE));
> 	}
> +	if (field->coll_id != COLL_NONE &&
> +	    field->type != FIELD_TYPE_STRING &&
> +	    field->type != FIELD_TYPE_SCALAR) {

1. We have also FIELD_TYPE_ANY, which can not be indexed and because of it this type is not checked in key_def.cc <http://key_def.cc/> in key_def_decode_parts. But for non-indexed field it can appear and can have a collection, I think.

> +		tnt_raise(ClientError, errcode, tt_cstr(space_name, name_len),
> +			  tt_sprintf("collation is reasonable only for "
> +				     "string and scalar parts"));

2. It is not part. Part is an index term. For regular fields, please, use term 'field'.

> +	}
> }
> 
> /**
> diff --git a/src/box/field_def.c b/src/box/field_def.c
> index 166baa7c5..e48896569 100644
> --- a/src/box/field_def.c
> +++ b/src/box/field_def.c
> @@ -31,6 +31,7 @@
> 
> #include "field_def.h"
> #include "trivia/util.h"
> +#include "key_def.h"
> 
> const char *field_type_strs[] = {
> 	/* [FIELD_TYPE_ANY]      = */ "any",
> @@ -92,6 +93,7 @@ const struct opt_def field_def_reg[] = {
> 	OPT_DEF("is_nullable", OPT_BOOL, struct field_def, is_nullable),
> 	OPT_DEF_ENUM("nullable_action", on_conflict_action, struct field_def,
> 		     nullable_action, NULL),
> +	OPT_DEF("collation", OPT_UINT32, struct field_def, coll_id),
> 	OPT_END,
> };
> 
> @@ -99,7 +101,8 @@ const struct field_def field_def_default = {
> 	.type = FIELD_TYPE_ANY,
> 	.name = NULL,
> 	.is_nullable = false,
> -	.nullable_action = ON_CONFLICT_ACTION_DEFAULT
> +	.nullable_action = ON_CONFLICT_ACTION_DEFAULT,
> +	.coll_id = COLL_NONE
> };
> 
> enum field_type
> diff --git a/src/box/field_def.h b/src/box/field_def.h
> index 92bad6b9f..61f3e49db 100644
> --- a/src/box/field_def.h
> +++ b/src/box/field_def.h
> @@ -108,6 +108,8 @@ struct field_def {
> 	bool is_nullable;
> 	/** Action to perform if NULL constraint failed. */
> 	enum on_conflict_action nullable_action;
> +	/** Collation ID for string comparison. */
> +	uint32_t coll_id;
> };
> 
> #if defined(__cplusplus)
> diff --git a/src/box/lua/schema.lua b/src/box/lua/schema.lua
> index 49b832c77..204e7e85f 100644
> --- a/src/box/lua/schema.lua
> +++ b/src/box/lua/schema.lua
> @@ -338,6 +338,13 @@ function update_format(format)
>                     field.name = v;
>                 elseif k == 2 then
>                     field.type = v;
> +                elseif k == 'collation' then
> +                    local coll = box.space._collation.index.name:get{v}
> +                    if not coll then
> +                        box.error(box.error.ILLEGAL_PARAMS,
> +                            "format[" .. i .. "]: collation was not found by name '" .. v .. "'")
> +                    end
> +                    field[k] = coll[1]

3. Please, rebase on the latest 2.0. Since you patch was created, this code is slightly changed.

>                 else
>                     field[k] = v
>                 end
> diff --git a/src/box/sql.c b/src/box/sql.c
> index 9ce270da9..b5762a297 100644
> --- a/src/box/sql.c
> +++ b/src/box/sql.c
> @@ -1561,7 +1561,12 @@ int tarantoolSqlite3MakeTableFormat(Table *pTable, void *buf)
> 
> 	for (i = 0; i < n; i++) {
> 		const char *t;
> -		p = enc->encode_map(p, 4);
> +		struct coll *coll = NULL;
> +		if (aCol[i].zColl &&

4. I prefer explicit != NULL, but it is up to you. AFAIK pointers checking on NULL is not standardised in Tarantool((

> +		    sqlite3StrICmp(aCol[i].zColl, "binary") != 0) {

5. It is not remark, just a subject to discuss. Do we want continue using sqlite string functions? For example, sqlite3StrICmp can already be replaced with strcasecmp.

> +			coll = sqlite3FindCollSeq(NULL, aCol[i].zColl, 0);
> +		}
> +		p = enc->encode_map(p, coll ? 5 : 4);
> 		p = enc->encode_str(p, "name", 4);
> 		p = enc->encode_str(p, aCol[i].zName, strlen(aCol[i].zName));
> 		p = enc->encode_str(p, "type", 4);
> @@ -1579,6 +1584,10 @@ int tarantoolSqlite3MakeTableFormat(Table *pTable, void *buf)
> 		assert(aCol[i].notNull < on_conflict_action_MAX);
> 		const char *action = on_conflict_action_strs[aCol[i].notNull];
> 		p = enc->encode_str(p, action, strlen(action));
> +		if (coll != NULL) {
> +			p = enc->encode_str(p, "collation", strlen("collation"));
> +			p = enc->encode_uint(p, coll->id);
> +		}
> 	}
> 	return (int)(p - base);
> }
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index e05998bf9..17f579710 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -37,7 +37,8 @@ static intptr_t recycled_format_ids = FORMAT_ID_NIL;
> static uint32_t formats_size = 0, formats_capacity = 0;
> 
> static const struct tuple_field tuple_field_default = {
> -	FIELD_TYPE_ANY, TUPLE_OFFSET_SLOT_NIL, false, ON_CONFLICT_ACTION_DEFAULT
> +	FIELD_TYPE_ANY, TUPLE_OFFSET_SLOT_NIL, false,
> +	ON_CONFLICT_ACTION_DEFAULT, NULL
> };
> 
> /**
> @@ -59,6 +60,17 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
> 		format->fields[i].type = fields[i].type;
> 		format->fields[i].offset_slot = TUPLE_OFFSET_SLOT_NIL;
> 		format->fields[i].nullable_action = fields[i].nullable_action;
> +		struct coll *coll = NULL;
> +		uint32_t coll_id = fields[i].coll_id;
> +		if (coll_id != COLL_NONE) {
> +			coll = coll_by_id(coll_id);
> +			if (coll == NULL) {
> +				diag_set(ClientError,ER_WRONG_COLLATION_OPTIONS,
> +					 i + 1, "collation was not found by ID");
> +				return -1;
> +			}
> +		}
> +		format->fields[i].coll = coll;
> 		if (i + 1 > format->min_field_count && !fields[i].is_nullable)
> 			format->min_field_count = i + 1;
> 	}
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index c4074f014..70565aef9 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -35,6 +35,7 @@
> #include "field_def.h"
> #include "errinj.h"
> #include "tuple_dictionary.h"
> +#include "coll_cache.h"
> 
> #if defined(__cplusplus)
> extern "C" {
> @@ -98,6 +99,8 @@ struct tuple_field {
> 	bool is_key_part;
> 	/** Action to perform if NULL constraint failed. */
> 	enum on_conflict_action nullable_action;
> +	/** Collation definition for string comparison */
> +	struct coll *coll;
> };
> 
> /**
> diff --git a/test/engine/iterator.result b/test/engine/iterator.result
> index e19886677..e54e21203 100644
> --- a/test/engine/iterator.result
> +++ b/test/engine/iterator.result

6. Please, add some tests, that check you can specify collation for a tuple field in lua.

> @@ -4215,7 +4215,7 @@ s:replace{35}
> ...
> state, value = gen(param,state)
> ---
> -- error: 'builtin/box/schema.lua:981: usage: next(param, state)'
> +- error: 'builtin/box/schema.lua:988: usage: next(param, state)'
> ...
> value
> ---
> -- 
> 2.15.1
> 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.tarantool.org/pipermail/tarantool-patches/attachments/20180306/b17b8651/attachment.html>


More information about the Tarantool-patches mailing list