[tarantool-patches] Re: [PATCH v2 4/4] box: introduce tuple compare hint for string index

Vladimir Davydov vdavydov.dev at gmail.com
Thu Feb 21 19:06:07 MSK 2019


On Tue, Feb 19, 2019 at 06:02:55PM +0300, Kirill Shcherbatov wrote:
> diff --git a/src/box/memtx_tree_decl.c b/src/box/memtx_tree_decl.c
> index 084a0594f..55016148b 100644
> --- a/src/box/memtx_tree_decl.c
> +++ b/src/box/memtx_tree_decl.c
> @@ -77,8 +77,133 @@ struct memtx_tuple_tree_key_data {
>  
>  /* }}} */
>  
> +/* {{{ Memtx hinted and hint_only tree class. *******************/

There's no hint_only tree anymore. Please update the comments.

> +
> +/**
> + * Struct that is used as a key in BPS tree definition in
> + * memtx_hint_only_tree and memtx_hinted_tree.
> +*/
> +struct memtx_hinted_tree_key_data {
> +	/** Sequence of msgpacked search fields. */
> +	const char *key;
> +	/** Number of msgpacked search fields. */
> +	uint32_t part_count;
> +	/**
> +	 * Compare hint. Is calculated automatically on 'set'

Comparison hint. 'Is' isn't necessary - could be simply Calculated ...

> +	 * operation with memtx_hinted_tree_key_data_set().
> +	 */
> +	uint64_t hint;
> +};
> +
> +/**
> + * Struct that is used as a key in BPS tree definition in
> + * memtx_hint_only_tree and memtx_hinted_tree.
> + */
> +struct memtx_hinted_tree_data {
> +	/** Tuple this node is representing. */

represents

> +	struct tuple *tuple;
> +	/**
> +	 * Compare hint. Is calculated automatically on 'set'
> +	 * operation with memtx_hinted_tree_data_set().
> +	 */
> +	uint64_t hint;
> +};
> +
> +/**
> + * Compare memtx_hinted_tree records.
> + */
> +static int
> +memtx_hinted_tree_data_cmp(struct memtx_hinted_tree_data *a,
> +			   struct memtx_hinted_tree_data *b,
> +			   struct key_def *key_def)
> +{
> +	if (a->hint != b->hint)
> +		return a->hint < b->hint ? -1 : 1;
> +	return tuple_compare(a->tuple, b->tuple, key_def);
> +}
> +
> +/**
> + * Compare memtx_hinted_tree record with key.
> + */
> +static int
> +memtx_hinted_tree_data_cmp_with_key(struct memtx_hinted_tree_data *a,
> +				    struct memtx_hinted_tree_key_data *key,
> +				    struct key_def *key_def)
> +{
> +	if (a->hint != key->hint)
> +		return a->hint < key->hint ? -1 : 1;
> +	return tuple_compare_with_key(a->tuple, key->key, key->part_count,
> +				      key_def);
> +}
> +
> +/**
> + * Initialize memtx_hinted_tree or memtx_hint_only_tree record
> + * with tuple and recalculate internal hint field.
> + */
> +static void
> +memtx_hinted_tree_data_set(struct memtx_hinted_tree_data *data,
> +			   struct tuple *tuple, struct key_def *key_def)
> +{
> +	data->tuple = tuple;
> +	data->hint = tuple_hint(tuple, key_def);
> +}
> +
> +/**
> + * Initialize memtx_hinted_tree or memtx_hint_only_tree key with
> + * key raw and part count and recalculate internal hint field.
> + */
> +static void
> +memtx_hinted_tree_key_data_set(struct memtx_hinted_tree_key_data *key_data,
> +			       const char *key, uint32_t part_count,
> +			       struct key_def *key_def)

All these functions aren't used anywhere except in MEMTX_TREE_ macros,
and they're pretty short. Worth inlining them there?

> +{
> +	key_data->key = key;
> +	key_data->part_count = part_count;
> +	key_data->hint = key != NULL && part_count > 0 ?

No need to check both part_count and key. Either of them is enough.

> +			 key_hint(key, key_def) : 0;
> +}
> +
> +#define MEMTX_TREE_NAME memtx_hinted_tree
> +#define memtx_tree_elem struct memtx_hinted_tree_data
> +#define memtx_tree_key struct memtx_hinted_tree_key_data

> +#define MEMTX_TREE_ELEM_EQUAL(elem_a_ptr, elem_b_ptr)				\
> +	((elem_a_ptr)->tuple == (elem_b_ptr)->tuple)

This macro isn't required as far as I recall.

> +#define MEMTX_TREE_ELEM_CMP(elem_a_ptr, elem_b_ptr, key_def)			\
> +	memtx_hinted_tree_data_cmp(elem_a_ptr, elem_b_ptr, key_def)
> +#define MEMTX_TREE_ELEM_WITH_KEY_CMP(elem_ptr, key_ptr, key_def)		\
> +	memtx_hinted_tree_data_cmp_with_key(elem_ptr, key_ptr, key_def)
> +#define MEMTX_TREE_ELEM_SET(elem_ptr, tuple, key_def)				\
> +	memtx_hinted_tree_data_set(elem_ptr, tuple, key_def)
> +#define MEMTX_TREE_KEY_SET(key_ptr, key_val, part_count_val, key_def)		\
> +	memtx_hinted_tree_key_data_set(key_ptr, key_val, part_count_val,	\
> +				       key_def)
> +#define MEMTX_TREE_ELEM_GET(elem_ptr) ((elem_ptr)->tuple)
> +#define MEMTX_TREE_KEY_GET(key_ptr, part_count_ptr)				\
> +	({*part_count_ptr = (key_ptr)->part_count; (key_ptr)->key;})
> +
> +#include "memtx_tree_impl.h"
> +
> +#undef memtx_tree_key
> +#undef memtx_tree_elem
> +#undef MEMTX_TREE_KEY_GET
> +#undef MEMTX_TREE_ELEM_GET
> +#undef MEMTX_TREE_KEY_SET
> +#undef MEMTX_TREE_ELEM_SET
> +#undef MEMTX_TREE_ELEM_WITH_KEY_CMP
> +#undef MEMTX_TREE_ELEM_CMP
> +#undef MEMTX_TREE_ELEM_EQUAL
> +#undef memtx_tree_key
> +#undef memtx_tree_elem
> +#undef MEMTX_TREE_NAME
> +
> +/* }}} */
> +
>  struct index *
>  memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
>  {
> +	if (def->cmp_def->parts->type == FIELD_TYPE_STRING ||
> +	    def->cmp_def->parts->type == FIELD_TYPE_UNSIGNED ||
> +	    def->cmp_def->parts->type == FIELD_TYPE_INTEGER)
> +		return memtx_hinted_tree_index_new(memtx, def);
>  	return memtx_tuple_tree_index_new(memtx, def);
>  }
> diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
> index ded802c7d..5845235ff 100644
> --- a/src/box/tuple_compare.cc
> +++ b/src/box/tuple_compare.cc
> @@ -1321,4 +1321,189 @@ tuple_compare_with_key_create(const struct key_def *def)
>  	       compare_with_key_slowpath_funcs[cmp_func_idx];
>  }

I think all the hints should be defined in new files tuple_hint.{h,cc}

>  
> +static uint64_t
> +key_hint_default(const char *key, struct key_def *key_def)
> +{
> +	(void)key;
> +	(void)key_def;
> +	return 0;
> +}
> +
> +static uint64_t
> +tuple_hint_default(const struct tuple *tuple, struct key_def *key_def)
> +{
> +	(void)tuple;
> +	(void)key_def;
> +	return 0;
> +}
> +
> +template<bool is_nullable>

I doubt we need to use templates here, but OK, let them be.

> +static uint64_t
> +key_hint_uint(const char *key, struct key_def *key_def)
> +{
> +	(void)key_def;
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	if (is_nullable && mp_typeof(*key) == MP_NIL)
> +		return 0;
> +	assert(mp_typeof(*key) == MP_UINT);
> +	uint64_t val = mp_decode_uint(&key);
> +	if (val > INT64_MAX)
> +		return INT64_MAX;
> +	return val - (uint64_t)INT64_MIN;
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def)
> +{
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	const char *field = tuple_field_by_part(tuple, key_def->parts);
> +	if (is_nullable && field == NULL)
> +		return 0;
> +	return key_hint_uint<is_nullable>(field, key_def);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +key_hint_int(const char *key, struct key_def *key_def)
> +{
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	if (is_nullable && mp_typeof(*key) == MP_NIL)
> +		return 0;
> +	if (mp_typeof(*key) == MP_UINT) {
> +		return key_hint_uint<is_nullable>(key, key_def);
> +	} else {

> +		assert(key_def->parts->type == FIELD_TYPE_INTEGER);

This assertion should be in the beginning of this function, no?

Please also add assertions checking part->type to other hint functions.

> +		assert(mp_typeof(*key) == MP_INT);
> +		int64_t val = mp_decode_int(&key);
> +		return (uint64_t)val - (uint64_t)INT64_MIN;
> +	}
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_int(const struct tuple *tuple, struct key_def *key_def)
> +{
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	const char *field = tuple_field_by_part(tuple, key_def->parts);
> +	if (is_nullable && field == NULL)
> +		return 0;
> +	return key_hint_int<is_nullable>(field, key_def);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +key_hint_string(const char *key, struct key_def *key_def)
> +{
> +	(void)key_def;
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	if (is_nullable && mp_typeof(*key) == MP_NIL)
> +		return 0;
> +	assert(key_def->parts->type == FIELD_TYPE_STRING);
> +	assert(mp_typeof(*key) == MP_STR);
> +	uint32_t len;
> +	const unsigned char *str =
> +		(const unsigned char *)mp_decode_str(&key, &len);
> +	uint64_t result = 0;
> +	/*
> +	 * Map the sequence of characters in the hint's bytes in
> +	 * the reverse order: the first character will be mapped
> +	 * in the highest byte of the number, etc.
> +	 * One additional bit encodes the presence of the next
> +	 * sign.
> +	 * 0bCHARBYTEU CHARBYTEU CHARBYTEU ... CHARBYTE0
> +	 * The sequence constructed in this way provides a
> +	 * comparison transitivity of strings no longer than 7
> +	 * bytes.

Still I don't understand why we need this transformation and why simply
using a sequence of bytes constituting the string isn't enough. Could
you give an example?

> +	 */
> +	uint32_t process_len = MIN(len, 7);
> +	for (uint32_t i = 0; i < process_len; i++) {
> +		result <<= 9;
> +		result |= 0x100;
> +		result |= str[i];
> +	}
> +	/*
> +	 * If the length of the string is less than the 64-bit
> +	 * hint can accommodate, the insignificant positions are
> +	 * filled with 0.
> +	 */
> +	result <<= 9 * (7 - process_len);
> +	return result;
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_string(const struct tuple *tuple, struct key_def *key_def)
> +{
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	const char *field = tuple_field_by_part(tuple, key_def->parts);
> +	if (is_nullable && field == NULL)
> +		return 0;
> +	return key_hint_string<is_nullable>(field, key_def);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +key_hint_string_coll(const char *key, struct key_def *key_def)
> +{
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	if (is_nullable && mp_typeof(*key) == MP_NIL)
> +		return 0;
> +	assert(key_def->parts->type == FIELD_TYPE_STRING);
> +	assert(mp_typeof(*key) == MP_STR);
> +	assert(key_def->parts->coll != NULL);
> +	uint32_t len;
> +	const char *str = mp_decode_str(&key, &len);
> +	return key_def->parts->coll->hint(str, len, key_def->parts->coll);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def)
> +{
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	const char *field = tuple_field_by_part(tuple, key_def->parts);
> +	if (is_nullable && field == NULL)
> +		return 0;
> +	return key_hint_string_coll<is_nullable>(field, key_def);
> +}
> +
> +void
> +tuple_hint_set(struct key_def *def)

Should be called key_def_set_hint_func. Please rename.

> +{
> +	def->key_hint = key_hint_default;
> +	def->tuple_hint = tuple_hint_default;
> +	bool is_nullable = key_part_is_nullable(def->parts);
> +	if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL &&
> +	    def->parts->coll->type != COLL_TYPE_BINARY) {
> +		def->key_hint = is_nullable ? key_hint_string_coll<true> :
> +					      key_hint_string_coll<false>;
> +		def->tuple_hint = is_nullable ? tuple_hint_string_coll<true> :
> +						tuple_hint_string_coll<false>;
> +		return;
> +	}
> +	switch (def->parts->type) {
> +	case FIELD_TYPE_UNSIGNED:
> +		def->key_hint = is_nullable ? key_hint_uint<true> :
> +					      key_hint_uint<false>;
> +		def->tuple_hint = is_nullable ? tuple_hint_uint<true> :
> +						tuple_hint_uint<false>;
> +		break;
> +	case FIELD_TYPE_INTEGER:
> +		def->key_hint = is_nullable ? key_hint_int<true> :
> +					      key_hint_int<false>;
> +		def->tuple_hint = is_nullable ? tuple_hint_int<true> :
> +						tuple_hint_int<false>;
> +		break;
> +	case FIELD_TYPE_STRING:
> +		def->key_hint = is_nullable ? key_hint_string<true> :
> +					      key_hint_string<false>;
> +		def->tuple_hint = is_nullable ? tuple_hint_string<true> :
> +						tuple_hint_string<false>;
> +		break;
> +	default:
> +		break;
> +	};
> +}



More information about the Tarantool-patches mailing list