[Tarantool-patches] [PATCH v3 10/16] module api: add box_key_def_new_v2()

Alexander Turenko alexander.turenko at tarantool.org
Tue Oct 13 02:23:17 MSK 2020


Unlike box_key_def_new() it allows to set nullability, collation and
JSON path.

Note: JSON paths are not supported in the backported version of the
patch for 1.10.

Provided public non-opaque key part definition structure to create a key
definition. The next commit will also use this structure to dump a key
definition.

There are several technical points around the box_key_part_def_t
structure. They are mainly about providing stable ABI.

- Two uint32_t fields are placed first for better aligning of next
  fields (pointers, which are usually 64 bit wide).

- A padding is used to guarantee that the structure size will remain the
  same across tarantool versions. It allows to allocate an array of such
  structures.

- The padding array is not a field of the structure itself, but added as
  a union variant (see the code). It allows to get rid of manual
  calculation of cumulative fields size, which is hard to do in a
  platform independent way.

- A minimal size of the structure is guaranteed by the union with
  padding, but a static assert is required to guarantee that the size
  will not overrun the predefined value.

- PACKED is added as an extra remedy to make the structure layout
  predictable (on given target architecture).

- A bit flag is used for is_nullable. bool is considered as too
  expensive (it requires 8 bits). bitfields (int:1 and so on) do no
  guarantee certain data layout (it is compiler implementation detail),
  while a module is compiled outside of tarantool build and may use
  different toolchain. A bit flag is the only choice.

- A collation is identified using a string. Different IDs may be used on
  different tarantool instances for collations. The only 'real'
  identifier is a collation name, so using it as identifier in the API
  should be more convenient and less error-prone.

- A field type is also identified using a string instead of a number. We
  have <enum field_type> in the module API, but it cannot be used here,
  because IDs are changed across tarantool versions. Aside of this, size
  of a enum is compiler defined. Anyway, we can expose field types as
  numbers and implement number-to-name and name-to-number mapping
  functions, but IMHO it would just add extra complexity.

The dependency on the fiber compilation unit is added for key_def: it is
purely to obtain the box region in the module API functions. The key_def
compilation unit does not use anything fiber related in fact.
---
 src/box/key_def.c                | 141 ++++++++++++++++++
 src/box/key_def.h                | 157 +++++++++++++++++++-
 src/exports.h                    |   2 +
 test/app-tap/module_api.c        | 240 +++++++++++++++++++++++++++++++
 test/app-tap/module_api.test.lua |   2 +-
 5 files changed, 540 insertions(+), 2 deletions(-)

diff --git a/src/box/key_def.c b/src/box/key_def.c
index a03537227..43d0c92c8 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -39,6 +39,7 @@
 #include "coll_id_cache.h"
 #include "small/region.h"
 #include "coll/coll.h"
+#include "fiber.h"
 
 const char *sort_order_strs[] = { "asc", "desc", "undef" };
 
@@ -341,6 +342,76 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts,
 	return 0;
 }
 
+ /* {{{ Module API helpers */
+
+static int
+key_def_set_internal_part(struct key_part_def *internal_part,
+			  box_key_part_def_t *part, struct region *region)
+{
+	*internal_part = key_part_def_default;
+
+	/* Set internal_part->fieldno. */
+	internal_part->fieldno = part->fieldno;
+
+	/* Set internal_part->type. */
+	if (part->field_type == NULL) {
+		diag_set(IllegalParams, "Field type is mandatory");
+		return -1;
+	}
+	size_t type_len = strlen(part->field_type);
+	internal_part->type = field_type_by_name(part->field_type, type_len);
+	if (internal_part->type == field_type_MAX) {
+		diag_set(IllegalParams, "Unknown field type: \"%s\"",
+			 part->field_type);
+		return -1;
+	}
+
+	/* Set internal_part->{is_nullable,nullable_action}. */
+	bool is_nullable = (part->flags & BOX_KEY_PART_DEF_IS_NULLABLE) ==
+		BOX_KEY_PART_DEF_IS_NULLABLE;
+	if (is_nullable) {
+		internal_part->is_nullable = is_nullable;
+		internal_part->nullable_action = ON_CONFLICT_ACTION_NONE;
+	}
+
+	/* Set internal_part->coll_id. */
+	if (part->collation != NULL) {
+		size_t collation_len = strlen(part->collation);
+		struct coll_id *coll_id = coll_by_name(part->collation,
+						       collation_len);
+		if (coll_id == NULL) {
+			diag_set(IllegalParams, "Unknown collation: \"%s\"",
+				 part->collation);
+			return -1;
+		}
+		internal_part->coll_id = coll_id->id;
+	}
+
+	/* Set internal_part->path (JSON path). */
+	if (part->path != NULL) {
+		size_t path_len = strlen(part->path);
+		if (json_path_validate(part->path, path_len,
+				       TUPLE_INDEX_BASE) != 0) {
+			diag_set(IllegalParams, "Invalid JSON path: \"%s\"",
+				 part->path);
+			return -1;
+		}
+		char *tmp = region_alloc(region, path_len + 1);
+		if (tmp == NULL) {
+			diag_set(OutOfMemory, path_len + 1, "region", "path");
+			return -1;
+		}
+		memcpy(tmp, part->path, path_len + 1);
+		internal_part->path = tmp;
+	}
+
+	return 0;
+}
+
+/* }}} Module API helpers */
+
+/* {{{ Module API functions */
+
 box_key_def_t *
 box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
 {
@@ -368,6 +439,74 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
 	return key_def;
 }
 
+void
+box_key_part_def_create(box_key_part_def_t *part)
+{
+	memset(part, 0, sizeof(*part));
+}
+
+box_key_def_t *
+box_key_def_new_v2(box_key_part_def_t *parts, uint32_t part_count)
+{
+	if (part_count == 0) {
+		diag_set(IllegalParams, "At least one key part is required");
+		return NULL;
+	}
+
+	struct region *region = &fiber()->gc;
+	size_t region_svp = region_used(region);
+	size_t internal_parts_size;
+	struct key_part_def *internal_parts =
+		region_alloc_array(region, typeof(internal_parts[0]),
+				   part_count, &internal_parts_size);
+	if (internal_parts == NULL) {
+		diag_set(OutOfMemory, internal_parts_size, "region_alloc_array",
+			 "parts");
+		return NULL;
+	}
+
+	/*
+	 * It is possible to implement a function similar to
+	 * key_def_new() and eliminate <box_key_part_def_t> ->
+	 * <struct key_part_def> copying. However this would lead
+	 * to code duplication and would complicate maintanence,
+	 * so it worth to do so only if key_def creation will
+	 * appear on a hot path in some meaningful use case.
+	 */
+	uint32_t min_field_count = 0;
+	for (uint32_t i = 0; i < part_count; ++i) {
+		if (key_def_set_internal_part(&internal_parts[i], &parts[i],
+					      region) != 0) {
+			region_truncate(region, region_svp);
+			return NULL;
+		}
+		bool is_nullable =
+			(parts[i].flags & BOX_KEY_PART_DEF_IS_NULLABLE) ==
+			BOX_KEY_PART_DEF_IS_NULLABLE;
+		if (!is_nullable && parts[i].fieldno > min_field_count)
+			min_field_count = parts[i].fieldno;
+	}
+
+	struct key_def *key_def = key_def_new(internal_parts, part_count,
+					      false);
+	region_truncate(region, region_svp);
+	if (key_def == NULL)
+		return NULL;
+
+	/*
+	 * Update key_def->has_optional_parts and function
+	 * pointers.
+	 *
+	 * FIXME: It seems, this call should be part of
+	 * key_def_new(), because otherwise a callee function may
+	 * obtain an incorrect key_def. However I don't know any
+	 * case that would prove this guess.
+	 */
+	key_def_update_optionality(key_def, min_field_count);
+
+	return key_def;
+}
+
 void
 box_key_def_delete(box_key_def_t *key_def)
 {
@@ -391,6 +530,8 @@ box_tuple_compare_with_key(box_tuple_t *tuple_a, const char *key_b,
 
 }
 
+/* }}} Module API functions */
+
 int
 key_part_cmp(const struct key_part *parts1, uint32_t part_count1,
 	     const struct key_part *parts2, uint32_t part_count2)
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 625bb6fea..5f76890b7 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -289,14 +289,114 @@ key_def_delete(struct key_def *def);
 
 typedef struct tuple box_tuple_t;
 
+/* {{{ Module API */
+
 /** \cond public */
 
 typedef struct key_def box_key_def_t;
 
+/** Key part definition flags. */
+enum {
+	BOX_KEY_PART_DEF_IS_NULLABLE = 1 << 0,
+};
+
+/**
+ * It is recommended to verify size of <box_key_part_def_t>
+ * against this constant on the module side at build time.
+ * Example:
+ *
+ * | #if !defined(__cplusplus) && !defined(static_assert)
+ * | #define static_assert _Static_assert
+ * | #endif
+ * |
+ * | (slash)*
+ * |  * Verify that <box_key_part_def_t> has the same size when
+ * |  * compiled within tarantool and within the module.
+ * |  *
+ * |  * It is important, because the module allocates an array of key
+ * |  * parts and passes it to <box_key_def_new_v2>() tarantool
+ * |  * function.
+ * |  *(slash)
+ * | static_assert(sizeof(box_key_part_def_t) == BOX_KEY_PART_DEF_T_SIZE,
+ * |               "sizeof(box_key_part_def_t)");
+ *
+ * This snippet is not part of module.h, because portability of
+ * static_assert() / _Static_assert() is dubious. It should be
+ * decision of a module author how portable its code should be.
+ */
+enum {
+	BOX_KEY_PART_DEF_T_SIZE = 64,
+};
+
+/**
+ * Public representation of a key part definition.
+ *
+ * Usage: Allocate an array of such key parts, initialize each
+ * key part (call <box_key_part_def_create>() and set necessary
+ * fields), pass the array into <box_key_def_new_v2>() function.
+ *
+ * Important: A module should call <box_key_part_def_create>()
+ * to initialize the structure with default values. There is no
+ * guarantee that all future default values for fields and flags
+ * will be remain the same.
+ *
+ * The idea of separation from internal <struct key_part_def> is
+ * to provide stable API and ABI for modules.
+ *
+ * New fields may be added into the end of the structure in later
+ * tarantool versions. Also new flags may be introduced within
+ * <flags> field. <collation> cannot be changed to a union (to
+ * reuse for some other value), because it is verified even for
+ * a non-string key part by <box_key_def_new_v2>().
+ *
+ * Fields that are unknown at given tarantool version are ignored
+ * in general, but filled with zeros when initialized.
+ */
+typedef union PACKED {
+	struct {
+		/** Index of a tuple field (zero based). */
+		uint32_t fieldno;
+		/** Flags, e.g. nullability. */
+		uint32_t flags;
+		/** Type of the tuple field. */
+		const char *field_type;
+		/** Collation name for string comparisons. */
+		const char *collation;
+		/**
+		 * JSON path to point a nested field.
+		 *
+		 * Example:
+		 *
+		 * tuple: [1, {"foo": "bar"}]
+		 * key parts: [
+		 *     {
+		 *         "fieldno": 2,
+		 *         "type": "string",
+		 *         "path": "foo"
+		 *     }
+		 * ]
+		 *
+		 * => key: ["bar"]
+		 *
+		 * Note: When the path is given, <field_type>
+		 * means type of the nested field.
+		 */
+		const char *path;
+	};
+	/**
+	 * Padding to guarantee certain size across different
+	 * tarantool versions.
+	 */
+	char padding[BOX_KEY_PART_DEF_T_SIZE];
+} box_key_part_def_t;
+
 /**
- * Create key definition with key fields with passed typed on passed positions.
+ * Create key definition with given field numbers and field types.
+ *
  * May be used for tuple format creation and/or tuple comparison.
  *
+ * \sa <box_key_def_new_v2>().
+ *
  * \param fields array with key field identifiers
  * \param types array with key field types (see enum field_type)
  * \param part_count the number of key fields
@@ -305,6 +405,51 @@ typedef struct key_def box_key_def_t;
 API_EXPORT box_key_def_t *
 box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count);
 
+/**
+ * Initialize a key part with default values.
+ *
+ *  | Field       | Default value   | Details |
+ *  | ----------- | --------------- | ------- |
+ *  | fieldno     | 0               |         |
+ *  | flags       | <default flags> |         |
+ *  | field_type  | NULL            | [^1]    |
+ *  | collation   | NULL            |         |
+ *  | path        | NULL            |         |
+ *
+ * Default flag values are the following:
+ *
+ *  | Flag                         | Default value |
+ *  | ---------------------------- | ------------- |
+ *  | BOX_KEY_PART_DEF_IS_NULLABLE | 0 (unset)     |
+ *
+ * Default values of fields and flags are permitted to be changed
+ * in future tarantool versions. However we should be VERY
+ * conservative here and consider any meaningful usage scenarios,
+ * when doing so. At least new defaults should be consistent with
+ * how tarantool itself doing key_def related operations:
+ * validation, key extraction, comparisons and so on.
+ *
+ * All trailing padding bytes are set to zero. The same for
+ * unknown <flags> bits.
+ *
+ * [^1]: <box_key_def_new_v2>() does not accept NULL as a
+ *       <field_type>, so it should be filled explicitly.
+ */
+API_EXPORT void
+box_key_part_def_create(box_key_part_def_t *part);
+
+/**
+ * Create a key_def from given key parts.
+ *
+ * Unlike <box_key_def_new>() this function allows to define
+ * nullability, collation and other options for each key part.
+ *
+ * <box_key_part_def_t> fields that are unknown at given tarantool
+ * version are ignored. The same for unknown <flags> bits.
+ */
+API_EXPORT box_key_def_t *
+box_key_def_new_v2(box_key_part_def_t *parts, uint32_t part_count);
+
 /**
  * Delete key definition
  *
@@ -343,6 +488,16 @@ box_tuple_compare_with_key(box_tuple_t *tuple_a, const char *key_b,
 
 /** \endcond public */
 
+/*
+ * Size of the structure should remain the same across all
+ * tarantool versions in order to allow to allocate an array of
+ * them.
+ */
+static_assert(sizeof(box_key_part_def_t) == BOX_KEY_PART_DEF_T_SIZE,
+	      "sizeof(box_key_part_def_t)");
+
+/* }}} Module API */
+
 static inline size_t
 key_def_sizeof(uint32_t part_count, uint32_t path_pool_size)
 {
diff --git a/src/exports.h b/src/exports.h
index 299bb9fc1..604c1dfaa 100644
--- a/src/exports.h
+++ b/src/exports.h
@@ -31,6 +31,8 @@ EXPORT(box_iterator_free)
 EXPORT(box_iterator_next)
 EXPORT(box_key_def_delete)
 EXPORT(box_key_def_new)
+EXPORT(box_key_def_new_v2)
+EXPORT(box_key_part_def_create)
 EXPORT(box_latch_delete)
 EXPORT(box_latch_lock)
 EXPORT(box_latch_new)
diff --git a/test/app-tap/module_api.c b/test/app-tap/module_api.c
index d7304f86d..50a7fb54e 100644
--- a/test/app-tap/module_api.c
+++ b/test/app-tap/module_api.c
@@ -1,4 +1,5 @@
 #include <stdbool.h>
+#include <string.h>
 #include <module.h>
 
 #include <small/ibuf.h>
@@ -319,6 +320,8 @@ error:
 	return 1;
 }
 
+/* {{{ key_def api */
+
 static int
 test_key_def_api(lua_State *L)
 {
@@ -365,6 +368,242 @@ test_key_def_api(lua_State *L)
 	return 1;
 }
 
+/* }}} key_def api */
+
+/* {{{ key_def api v2 */
+
+/*
+ * More functions around key_def were exposed to the module API
+ * in order to implement external tuple.keydef and tuple.merger
+ * modules (gh-5273, gh-5384).
+ */
+
+/**
+ * Verify type and message of an error in the diagnostics area.
+ *
+ * Accepts a prefix of an actual type or a message. Pass an empty
+ * string if you need to verify only type or only message.
+ */
+static void
+check_diag(const char *exp_err_type, const char *exp_err_msg)
+{
+	box_error_t *e = box_error_last();
+	assert(strcmp(box_error_type(e), exp_err_type) == 0);
+	assert(strcmp(box_error_message(e), exp_err_msg) == 0);
+}
+
+/**
+ * Create a tuple on runtime arena.
+ *
+ * Release this tuple using <box_tuple_unref>().
+ */
+static box_tuple_t *
+new_runtime_tuple(const char *tuple_data, size_t tuple_size)
+{
+	box_tuple_format_t *fmt = box_tuple_format_default();
+	const char *tuple_end = tuple_data + tuple_size;
+	box_tuple_t *tuple = box_tuple_new(fmt, tuple_data, tuple_end);
+	assert(tuple != NULL);
+	box_tuple_ref(tuple);
+	return tuple;
+}
+
+/**
+ * Where padding bytes starts.
+ */
+static size_t
+key_part_padding_offset(void)
+{
+	if (sizeof(void *) * CHAR_BIT == 64)
+		return 32;
+	if (sizeof(void *) * CHAR_BIT == 32)
+		return 20;
+	assert(false);
+}
+
+/**
+ * Mask of all defined flags.
+ */
+static uint32_t
+key_part_def_known_flags(void)
+{
+	return BOX_KEY_PART_DEF_IS_NULLABLE;
+}
+
+/**
+ * Default flags value.
+ *
+ * All unknown bits are set to zero.
+ */
+static uint32_t
+key_part_def_default_flags(void)
+{
+	return 0;
+}
+
+/**
+ * Set all <box_key_part_def_t> fields to nondefault values.
+ *
+ * It also set all padding bytes and unknown flags to non-zero
+ * values.
+ */
+static void
+key_part_def_set_nondefault(box_key_part_def_t *part)
+{
+	size_t padding_offset = key_part_padding_offset();
+	uint32_t default_flags = key_part_def_default_flags();
+
+	/*
+	 * Give correct non-default values for known fields and
+	 * flags. Set unknown flags to non-zero values.
+	 */
+	part->fieldno = 1;
+	part->flags = ~default_flags;
+	part->field_type = "string";
+	part->collation = "unicode_ci";
+	part->path = "foo";
+
+	/* Fill padding with non-zero bytes. */
+	char *padding = ((char *) part) + padding_offset;
+	size_t padding_size = sizeof(box_key_part_def_t) - padding_offset;
+	memset(padding, 0xff, padding_size);
+}
+
+/**
+ * Verify that all known fields and flags are set to default
+ * values.
+ */
+static void
+key_part_def_check_default(box_key_part_def_t *part)
+{
+	uint32_t known_flags = key_part_def_known_flags();
+	uint32_t default_flags = key_part_def_default_flags();
+
+	assert(part->fieldno == 0);
+	assert((part->flags & known_flags) == default_flags);
+	assert(part->field_type == NULL);
+	assert(part->collation == NULL);
+	assert(part->path == NULL);
+}
+
+/**
+ * Verify that all padding bytes and unknown flags are set to
+ * zeros.
+ */
+static void
+key_part_def_check_zeros(const box_key_part_def_t *part)
+{
+	size_t padding_offset = key_part_padding_offset();
+	uint32_t unknown_flags = ~key_part_def_known_flags();
+
+	char *padding = ((char *) part) + padding_offset;
+	char *padding_end = ((char *) part) + sizeof(box_key_part_def_t);
+	for (char *p = padding; p < padding_end; ++p) {
+		assert(*p == 0);
+	}
+
+	assert((part->flags & unknown_flags) == 0);
+}
+
+/**
+ * Basic <box_key_part_def_create>() and <box_key_def_new_v2>()
+ * test.
+ */
+static int
+test_key_def_new_v2(struct lua_State *L)
+{
+	/* Verify <box_key_part_def_t> binary layout. */
+	assert(BOX_KEY_PART_DEF_T_SIZE == 64);
+	assert(sizeof(box_key_part_def_t) == BOX_KEY_PART_DEF_T_SIZE);
+	assert(offsetof(box_key_part_def_t, fieldno) == 0);
+	assert(offsetof(box_key_part_def_t, flags) == 4);
+	assert(offsetof(box_key_part_def_t, field_type) == 8);
+	if (sizeof(void *) * CHAR_BIT == 64) {
+		assert(offsetof(box_key_part_def_t, collation) == 16);
+		assert(offsetof(box_key_part_def_t, path) == 24);
+	} else if (sizeof(void *) * CHAR_BIT == 32) {
+		assert(offsetof(box_key_part_def_t, collation) == 12);
+		assert(offsetof(box_key_part_def_t, path) == 16);
+	} else {
+		assert(false);
+	}
+
+	/*
+	 * Fill key part definition with nondefault values.
+	 * Fill padding and unknown flags with non-zero values.
+	 */
+	box_key_part_def_t part;
+	key_part_def_set_nondefault(&part);
+
+	/*
+	 * Verify that all known fields are set to default values and
+	 * all unknown fields and flags are set to zeros.
+	 */
+	box_key_part_def_create(&part);
+	key_part_def_check_default(&part);
+	key_part_def_check_zeros(&part);
+
+	box_key_def_t *key_def;
+
+	/* Should not accept zero part count. */
+	key_def = box_key_def_new_v2(NULL, 0);
+	assert(key_def == NULL);
+	check_diag("IllegalParams", "At least one key part is required");
+
+	/* Should not accept NULL as a <field_type>. */
+	key_def = box_key_def_new_v2(&part, 1);
+	assert(key_def == NULL);
+	check_diag("IllegalParams", "Field type is mandatory");
+
+	/* Success case. */
+	part.field_type = "unsigned";
+	key_def = box_key_def_new_v2(&part, 1);
+	assert(key_def != NULL);
+
+	/*
+	 * Prepare tuples to do some comparisons.
+	 *
+	 * [1, 2, 3] and [3, 2, 1].
+	 */
+	box_tuple_t *tuple_1 = new_runtime_tuple("\x93\x01\x02\x03", 4);
+	box_tuple_t *tuple_2 = new_runtime_tuple("\x93\x03\x02\x01", 4);
+
+	/*
+	 * Verify that key_def actually can be used in functions
+	 * that accepts it.
+	 *
+	 * Do several comparisons. Far away from being an
+	 * exhaustive comparator test.
+	 */
+	int rc;
+	rc = box_tuple_compare(tuple_1, tuple_1, key_def);
+	assert(rc == 0);
+	rc = box_tuple_compare(tuple_2, tuple_2, key_def);
+	assert(rc == 0);
+	rc = box_tuple_compare(tuple_1, tuple_2, key_def);
+	assert(rc < 0);
+	rc = box_tuple_compare(tuple_2, tuple_1, key_def);
+	assert(rc > 0);
+
+	/* The same idea, but perform comparisons against keys. */
+	rc = box_tuple_compare_with_key(tuple_1, "\x91\x00", key_def);
+	assert(rc > 0);
+	rc = box_tuple_compare_with_key(tuple_1, "\x91\x01", key_def);
+	assert(rc == 0);
+	rc = box_tuple_compare_with_key(tuple_1, "\x91\x02", key_def);
+	assert(rc < 0);
+
+	/* Clean up. */
+	box_tuple_unref(tuple_1);
+	box_tuple_unref(tuple_2);
+	box_key_def_delete(key_def);
+
+	lua_pushboolean(L, 1);
+	return 1;
+}
+
+/* }}} key_def api v2 */
+
 static int
 check_error(lua_State *L)
 {
@@ -740,6 +979,7 @@ luaopen_module_api(lua_State *L)
 		{"test_box_region", test_box_region},
 		{"test_tuple_encode", test_tuple_encode},
 		{"test_tuple_new", test_tuple_new},
+		{"test_key_def_new_v2", test_key_def_new_v2},
 		{NULL, NULL}
 	};
 	luaL_register(L, "module_api", lib);
diff --git a/test/app-tap/module_api.test.lua b/test/app-tap/module_api.test.lua
index f1b377e17..5145ab164 100755
--- a/test/app-tap/module_api.test.lua
+++ b/test/app-tap/module_api.test.lua
@@ -177,7 +177,7 @@ local function test_iscdata(test, module)
 end
 
 local test = require('tap').test("module_api", function(test)
-    test:plan(28)
+    test:plan(29)
     local status, module = pcall(require, 'module_api')
     test:is(status, true, "module")
     test:ok(status, "module is loaded")
-- 
2.25.0



More information about the Tarantool-patches mailing list