Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: Kirill Shcherbatov <kshcherbatov@tarantool.org>
Cc: tarantool-patches@freelists.org
Subject: Re: [tarantool-patches] Re: [PATCH v2 4/4] box: introduce tuple compare hint for string index
Date: Thu, 21 Feb 2019 19:06:07 +0300	[thread overview]
Message-ID: <20190221160607.4s4ighjoxu24lamx@esperanza> (raw)
In-Reply-To: <3c155f7e-94ac-8ff1-2100-99f8b0c42dfc@tarantool.org>

On Tue, Feb 19, 2019 at 06:02:55PM +0300, Kirill Shcherbatov wrote:
> diff --git a/src/box/memtx_tree_decl.c b/src/box/memtx_tree_decl.c
> index 084a0594f..55016148b 100644
> --- a/src/box/memtx_tree_decl.c
> +++ b/src/box/memtx_tree_decl.c
> @@ -77,8 +77,133 @@ struct memtx_tuple_tree_key_data {
>  
>  /* }}} */
>  
> +/* {{{ Memtx hinted and hint_only tree class. *******************/

There's no hint_only tree anymore. Please update the comments.

> +
> +/**
> + * Struct that is used as a key in BPS tree definition in
> + * memtx_hint_only_tree and memtx_hinted_tree.
> +*/
> +struct memtx_hinted_tree_key_data {
> +	/** Sequence of msgpacked search fields. */
> +	const char *key;
> +	/** Number of msgpacked search fields. */
> +	uint32_t part_count;
> +	/**
> +	 * Compare hint. Is calculated automatically on 'set'

Comparison hint. 'Is' isn't necessary - could be simply Calculated ...

> +	 * operation with memtx_hinted_tree_key_data_set().
> +	 */
> +	uint64_t hint;
> +};
> +
> +/**
> + * Struct that is used as a key in BPS tree definition in
> + * memtx_hint_only_tree and memtx_hinted_tree.
> + */
> +struct memtx_hinted_tree_data {
> +	/** Tuple this node is representing. */

represents

> +	struct tuple *tuple;
> +	/**
> +	 * Compare hint. Is calculated automatically on 'set'
> +	 * operation with memtx_hinted_tree_data_set().
> +	 */
> +	uint64_t hint;
> +};
> +
> +/**
> + * Compare memtx_hinted_tree records.
> + */
> +static int
> +memtx_hinted_tree_data_cmp(struct memtx_hinted_tree_data *a,
> +			   struct memtx_hinted_tree_data *b,
> +			   struct key_def *key_def)
> +{
> +	if (a->hint != b->hint)
> +		return a->hint < b->hint ? -1 : 1;
> +	return tuple_compare(a->tuple, b->tuple, key_def);
> +}
> +
> +/**
> + * Compare memtx_hinted_tree record with key.
> + */
> +static int
> +memtx_hinted_tree_data_cmp_with_key(struct memtx_hinted_tree_data *a,
> +				    struct memtx_hinted_tree_key_data *key,
> +				    struct key_def *key_def)
> +{
> +	if (a->hint != key->hint)
> +		return a->hint < key->hint ? -1 : 1;
> +	return tuple_compare_with_key(a->tuple, key->key, key->part_count,
> +				      key_def);
> +}
> +
> +/**
> + * Initialize memtx_hinted_tree or memtx_hint_only_tree record
> + * with tuple and recalculate internal hint field.
> + */
> +static void
> +memtx_hinted_tree_data_set(struct memtx_hinted_tree_data *data,
> +			   struct tuple *tuple, struct key_def *key_def)
> +{
> +	data->tuple = tuple;
> +	data->hint = tuple_hint(tuple, key_def);
> +}
> +
> +/**
> + * Initialize memtx_hinted_tree or memtx_hint_only_tree key with
> + * key raw and part count and recalculate internal hint field.
> + */
> +static void
> +memtx_hinted_tree_key_data_set(struct memtx_hinted_tree_key_data *key_data,
> +			       const char *key, uint32_t part_count,
> +			       struct key_def *key_def)

All these functions aren't used anywhere except in MEMTX_TREE_ macros,
and they're pretty short. Worth inlining them there?

> +{
> +	key_data->key = key;
> +	key_data->part_count = part_count;
> +	key_data->hint = key != NULL && part_count > 0 ?

No need to check both part_count and key. Either of them is enough.

> +			 key_hint(key, key_def) : 0;
> +}
> +
> +#define MEMTX_TREE_NAME memtx_hinted_tree
> +#define memtx_tree_elem struct memtx_hinted_tree_data
> +#define memtx_tree_key struct memtx_hinted_tree_key_data

> +#define MEMTX_TREE_ELEM_EQUAL(elem_a_ptr, elem_b_ptr)				\
> +	((elem_a_ptr)->tuple == (elem_b_ptr)->tuple)

This macro isn't required as far as I recall.

> +#define MEMTX_TREE_ELEM_CMP(elem_a_ptr, elem_b_ptr, key_def)			\
> +	memtx_hinted_tree_data_cmp(elem_a_ptr, elem_b_ptr, key_def)
> +#define MEMTX_TREE_ELEM_WITH_KEY_CMP(elem_ptr, key_ptr, key_def)		\
> +	memtx_hinted_tree_data_cmp_with_key(elem_ptr, key_ptr, key_def)
> +#define MEMTX_TREE_ELEM_SET(elem_ptr, tuple, key_def)				\
> +	memtx_hinted_tree_data_set(elem_ptr, tuple, key_def)
> +#define MEMTX_TREE_KEY_SET(key_ptr, key_val, part_count_val, key_def)		\
> +	memtx_hinted_tree_key_data_set(key_ptr, key_val, part_count_val,	\
> +				       key_def)
> +#define MEMTX_TREE_ELEM_GET(elem_ptr) ((elem_ptr)->tuple)
> +#define MEMTX_TREE_KEY_GET(key_ptr, part_count_ptr)				\
> +	({*part_count_ptr = (key_ptr)->part_count; (key_ptr)->key;})
> +
> +#include "memtx_tree_impl.h"
> +
> +#undef memtx_tree_key
> +#undef memtx_tree_elem
> +#undef MEMTX_TREE_KEY_GET
> +#undef MEMTX_TREE_ELEM_GET
> +#undef MEMTX_TREE_KEY_SET
> +#undef MEMTX_TREE_ELEM_SET
> +#undef MEMTX_TREE_ELEM_WITH_KEY_CMP
> +#undef MEMTX_TREE_ELEM_CMP
> +#undef MEMTX_TREE_ELEM_EQUAL
> +#undef memtx_tree_key
> +#undef memtx_tree_elem
> +#undef MEMTX_TREE_NAME
> +
> +/* }}} */
> +
>  struct index *
>  memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
>  {
> +	if (def->cmp_def->parts->type == FIELD_TYPE_STRING ||
> +	    def->cmp_def->parts->type == FIELD_TYPE_UNSIGNED ||
> +	    def->cmp_def->parts->type == FIELD_TYPE_INTEGER)
> +		return memtx_hinted_tree_index_new(memtx, def);
>  	return memtx_tuple_tree_index_new(memtx, def);
>  }
> diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
> index ded802c7d..5845235ff 100644
> --- a/src/box/tuple_compare.cc
> +++ b/src/box/tuple_compare.cc
> @@ -1321,4 +1321,189 @@ tuple_compare_with_key_create(const struct key_def *def)
>  	       compare_with_key_slowpath_funcs[cmp_func_idx];
>  }

I think all the hints should be defined in new files tuple_hint.{h,cc}

>  
> +static uint64_t
> +key_hint_default(const char *key, struct key_def *key_def)
> +{
> +	(void)key;
> +	(void)key_def;
> +	return 0;
> +}
> +
> +static uint64_t
> +tuple_hint_default(const struct tuple *tuple, struct key_def *key_def)
> +{
> +	(void)tuple;
> +	(void)key_def;
> +	return 0;
> +}
> +
> +template<bool is_nullable>

I doubt we need to use templates here, but OK, let them be.

> +static uint64_t
> +key_hint_uint(const char *key, struct key_def *key_def)
> +{
> +	(void)key_def;
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	if (is_nullable && mp_typeof(*key) == MP_NIL)
> +		return 0;
> +	assert(mp_typeof(*key) == MP_UINT);
> +	uint64_t val = mp_decode_uint(&key);
> +	if (val > INT64_MAX)
> +		return INT64_MAX;
> +	return val - (uint64_t)INT64_MIN;
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def)
> +{
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	const char *field = tuple_field_by_part(tuple, key_def->parts);
> +	if (is_nullable && field == NULL)
> +		return 0;
> +	return key_hint_uint<is_nullable>(field, key_def);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +key_hint_int(const char *key, struct key_def *key_def)
> +{
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	if (is_nullable && mp_typeof(*key) == MP_NIL)
> +		return 0;
> +	if (mp_typeof(*key) == MP_UINT) {
> +		return key_hint_uint<is_nullable>(key, key_def);
> +	} else {

> +		assert(key_def->parts->type == FIELD_TYPE_INTEGER);

This assertion should be in the beginning of this function, no?

Please also add assertions checking part->type to other hint functions.

> +		assert(mp_typeof(*key) == MP_INT);
> +		int64_t val = mp_decode_int(&key);
> +		return (uint64_t)val - (uint64_t)INT64_MIN;
> +	}
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_int(const struct tuple *tuple, struct key_def *key_def)
> +{
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	const char *field = tuple_field_by_part(tuple, key_def->parts);
> +	if (is_nullable && field == NULL)
> +		return 0;
> +	return key_hint_int<is_nullable>(field, key_def);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +key_hint_string(const char *key, struct key_def *key_def)
> +{
> +	(void)key_def;
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	if (is_nullable && mp_typeof(*key) == MP_NIL)
> +		return 0;
> +	assert(key_def->parts->type == FIELD_TYPE_STRING);
> +	assert(mp_typeof(*key) == MP_STR);
> +	uint32_t len;
> +	const unsigned char *str =
> +		(const unsigned char *)mp_decode_str(&key, &len);
> +	uint64_t result = 0;
> +	/*
> +	 * Map the sequence of characters in the hint's bytes in
> +	 * the reverse order: the first character will be mapped
> +	 * in the highest byte of the number, etc.
> +	 * One additional bit encodes the presence of the next
> +	 * sign.
> +	 * 0bCHARBYTEU CHARBYTEU CHARBYTEU ... CHARBYTE0
> +	 * The sequence constructed in this way provides a
> +	 * comparison transitivity of strings no longer than 7
> +	 * bytes.

Still I don't understand why we need this transformation and why simply
using a sequence of bytes constituting the string isn't enough. Could
you give an example?

> +	 */
> +	uint32_t process_len = MIN(len, 7);
> +	for (uint32_t i = 0; i < process_len; i++) {
> +		result <<= 9;
> +		result |= 0x100;
> +		result |= str[i];
> +	}
> +	/*
> +	 * If the length of the string is less than the 64-bit
> +	 * hint can accommodate, the insignificant positions are
> +	 * filled with 0.
> +	 */
> +	result <<= 9 * (7 - process_len);
> +	return result;
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_string(const struct tuple *tuple, struct key_def *key_def)
> +{
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	const char *field = tuple_field_by_part(tuple, key_def->parts);
> +	if (is_nullable && field == NULL)
> +		return 0;
> +	return key_hint_string<is_nullable>(field, key_def);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +key_hint_string_coll(const char *key, struct key_def *key_def)
> +{
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	if (is_nullable && mp_typeof(*key) == MP_NIL)
> +		return 0;
> +	assert(key_def->parts->type == FIELD_TYPE_STRING);
> +	assert(mp_typeof(*key) == MP_STR);
> +	assert(key_def->parts->coll != NULL);
> +	uint32_t len;
> +	const char *str = mp_decode_str(&key, &len);
> +	return key_def->parts->coll->hint(str, len, key_def->parts->coll);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def)
> +{
> +	assert(key_part_is_nullable(key_def->parts) == is_nullable);
> +	const char *field = tuple_field_by_part(tuple, key_def->parts);
> +	if (is_nullable && field == NULL)
> +		return 0;
> +	return key_hint_string_coll<is_nullable>(field, key_def);
> +}
> +
> +void
> +tuple_hint_set(struct key_def *def)

Should be called key_def_set_hint_func. Please rename.

> +{
> +	def->key_hint = key_hint_default;
> +	def->tuple_hint = tuple_hint_default;
> +	bool is_nullable = key_part_is_nullable(def->parts);
> +	if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL &&
> +	    def->parts->coll->type != COLL_TYPE_BINARY) {
> +		def->key_hint = is_nullable ? key_hint_string_coll<true> :
> +					      key_hint_string_coll<false>;
> +		def->tuple_hint = is_nullable ? tuple_hint_string_coll<true> :
> +						tuple_hint_string_coll<false>;
> +		return;
> +	}
> +	switch (def->parts->type) {
> +	case FIELD_TYPE_UNSIGNED:
> +		def->key_hint = is_nullable ? key_hint_uint<true> :
> +					      key_hint_uint<false>;
> +		def->tuple_hint = is_nullable ? tuple_hint_uint<true> :
> +						tuple_hint_uint<false>;
> +		break;
> +	case FIELD_TYPE_INTEGER:
> +		def->key_hint = is_nullable ? key_hint_int<true> :
> +					      key_hint_int<false>;
> +		def->tuple_hint = is_nullable ? tuple_hint_int<true> :
> +						tuple_hint_int<false>;
> +		break;
> +	case FIELD_TYPE_STRING:
> +		def->key_hint = is_nullable ? key_hint_string<true> :
> +					      key_hint_string<false>;
> +		def->tuple_hint = is_nullable ? tuple_hint_string<true> :
> +						tuple_hint_string<false>;
> +		break;
> +	default:
> +		break;
> +	};
> +}

      reply	other threads:[~2019-02-21 16:06 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-13  9:32 [PATCH v2 0/4] box: introduce hint option for memtx tree index Kirill Shcherbatov
2019-02-13  9:32 ` [PATCH v2 1/4] box: move memtx_tree implementation to source file Kirill Shcherbatov
2019-02-14 13:57   ` [tarantool-patches] " Konstantin Osipov
2019-02-21 13:26   ` Vladimir Davydov
2019-02-13  9:32 ` [PATCH v2 2/4] box: rework memtx_tree to store arbitrary nodes Kirill Shcherbatov
2019-02-21 15:01   ` Vladimir Davydov
2019-02-13  9:32 ` [PATCH v2 3/4] box: rename memtx_tree.c to memtx_tree_impl.h Kirill Shcherbatov
2019-02-13  9:32 ` [PATCH v2 4/4] box: introduce tuple compare hint for string index Kirill Shcherbatov
2019-02-14 14:09   ` [tarantool-patches] " Konstantin Osipov
2019-02-15 18:34   ` Kirill Shcherbatov
2019-02-19 15:01     ` [tarantool-patches] " Konstantin Osipov
2019-02-19 15:02     ` Kirill Shcherbatov
2019-02-21 16:06       ` Vladimir Davydov [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190221160607.4s4ighjoxu24lamx@esperanza \
    --to=vdavydov.dev@gmail.com \
    --cc=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [tarantool-patches] Re: [PATCH v2 4/4] box: introduce tuple compare hint for string index' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox