[Tarantool-patches] [PATCH 09/14] WIP: module api: add box_key_def_new_ex()
Alexander Turenko
alexander.turenko at tarantool.org
Wed Sep 23 04:14:14 MSK 2020
Unlike box_key_def_new() it allows to set nullability, collation and
JSON path.
Note: JSON paths are not supported in the backported version of the
patch for 1.10.
Provided public non-opaque key part definition structure to create a key
definition. The next commit will also use this structure to dump a key
definition.
There are several techinal points around the box_key_part_def_t
structure. They are mainly about providing stable ABI.
- Two uint32_t fields are placed first for better aligning of next
fields (pointers, which are usually 64 bit wide).
- A padding is used to guarantee that the structure size will remain the
same across tarantool versions. It allows to allocate an array of such
structures.
- The padding array is not a field of the structure itself, but added as
a union variant (see the code). It allows to get rid of manual
calculation of cumulative fields size, which is hard to do in a
platform independent way.
- A minimal size of the structure is guaranteed by the union with
padding, but a static assert is required to guarantee that the size
will not overrun the predefined value.
- PACKED is added as an extra remedy to make the structure layout
predictable.
- A bit flag is used for is_nullable. bool is considered as too
expensive (it requires 8 bits). bitfields (int:1 and so on) do no
guarantee certain data layout (it is compiler implementation detail),
while a module is compiled outside of tarantool build and may use
different toolchain. A bit flag is the only choice.
- A collation is identified using a string. Different IDs may be used on
different tarantool instances for collations. The only 'real'
identifier is a collation name, so using it as identifier in the API
should be more convenient and less error-prone.
- A field type is also identified using a string instead of a number. We
have <enum field_type> in the module API, but it cannot be used here,
because IDs are changed across tarantool versions. Aside of this, size
of a enum is compiler defined. Anyway, we can expose field types as
numbers and implement number-to-name and name-to-number mapping
functions, but IMHO it would just add extra complexity.
XXX: Add a module API test.
Part of #5273
---
src/box/key_def_api.c | 146 ++++++++++++++++++++++++++++++++++++++++++
src/box/key_def_api.h | 123 +++++++++++++++++++++++++++++++++++
src/exports.h | 2 +
3 files changed, 271 insertions(+)
diff --git a/src/box/key_def_api.c b/src/box/key_def_api.c
index 7f6c0ac55..19590095d 100644
--- a/src/box/key_def_api.c
+++ b/src/box/key_def_api.c
@@ -30,6 +30,89 @@
*/
#include "key_def_api.h"
#include "key_def.h"
+#include "small/region.h"
+#include "json/json.h"
+#include "coll_id_cache.h"
+#include "tuple_format.h"
+#include "field_def.h"
+#include "coll_id_cache.h"
+#include "fiber.h"
+
+/* {{{ Helpers */
+
+static int
+key_def_set_internal_part(struct key_part_def *internal_part,
+ box_key_part_def_t *part, struct region *region)
+{
+ *internal_part = key_part_def_default;
+
+ /* Set internal_part->fieldno. */
+ internal_part->fieldno = part->fieldno;
+
+ /* Set internal_part->type. */
+ if (part->field_type == NULL) {
+ diag_set(IllegalParams, "Field type is mandatory");
+ return -1;
+ }
+ size_t type_len = strlen(part->field_type);
+ internal_part->type = field_type_by_name(part->field_type, type_len);
+ if (internal_part->type == field_type_MAX) {
+ diag_set(IllegalParams, "Unknown field type: \"%s\"",
+ part->field_type);
+ return -1;
+ }
+
+ /* Set internal_part->{is_nullable,nullable_action}. */
+ bool is_nullable = (part->flags & BOX_KEY_PART_DEF_IS_NULLABLE_MASK) ==
+ BOX_KEY_PART_DEF_IS_NULLABLE_MASK;
+ if (is_nullable) {
+ internal_part->is_nullable = is_nullable;
+ internal_part->nullable_action = ON_CONFLICT_ACTION_NONE;
+ }
+
+ /* Set internal_part->coll_id. */
+ if (part->collation != NULL) {
+ size_t collation_len = strlen(part->collation);
+ struct coll_id *coll_id = coll_by_name(part->collation,
+ collation_len);
+ if (coll_id == NULL) {
+ diag_set(IllegalParams, "Unknown collation: \"%s\"",
+ part->collation);
+ return -1;
+ }
+ internal_part->coll_id = coll_id->id;
+ }
+
+ /* Set internal_part->path (JSON path). */
+ if (part->path) {
+ size_t path_len = strlen(part->path);
+ if (json_path_validate(part->path, path_len,
+ TUPLE_INDEX_BASE) != 0) {
+ diag_set(IllegalParams, "Invalid JSON path: \"%s\"",
+ part->path);
+ return -1;
+ }
+ char *tmp = region_alloc(region, path_len + 1);
+ if (tmp == NULL) {
+ diag_set(OutOfMemory, path_len + 1, "region", "path");
+ return -1;
+ }
+ memcpy(tmp, part->path, path_len + 1);
+ internal_part->path = tmp;
+ }
+
+ return 0;
+}
+
+/* }}} Helpers */
+
+/* {{{ API functions implementations */
+
+void
+box_key_part_def_create(box_key_part_def_t *part)
+{
+ memset(part, 0, sizeof(*part));
+}
box_key_def_t *
box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
@@ -55,6 +138,67 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
return key_def;
}
+box_key_def_t *
+box_key_def_new_ex(box_key_part_def_t *parts, uint32_t part_count)
+{
+ struct region *region = &fiber()->gc;
+ size_t region_svp = region_used(region);
+ size_t internal_parts_size;
+ struct key_part_def *internal_parts =
+ region_alloc_array(region, typeof(internal_parts[0]),
+ part_count, &internal_parts_size);
+ if (parts == NULL) {
+ diag_set(OutOfMemory, internal_parts_size, "region_alloc_array",
+ "parts");
+ return NULL;
+ }
+ if (part_count == 0) {
+ diag_set(IllegalParams, "At least one key part is required");
+ return NULL;
+ }
+
+ /*
+ * It is possible to implement a function similar to
+ * key_def_new() and eliminate <box_key_part_def_t> ->
+ * <struct key_part_def> copying. However this would lead
+ * to code duplication and would complicate maintanence,
+ * so it worth to do so only if key_def creation will
+ * appear on a hot path in some meaningful use case.
+ */
+ uint32_t min_field_count = 0;
+ for (uint32_t i = 0; i < part_count; ++i) {
+ if (key_def_set_internal_part(&internal_parts[i], &parts[i],
+ region) != 0) {
+ region_truncate(region, region_svp);
+ return NULL;
+ }
+ bool is_nullable =
+ (parts[i].flags & BOX_KEY_PART_DEF_IS_NULLABLE_MASK) ==
+ BOX_KEY_PART_DEF_IS_NULLABLE_MASK;
+ if (!is_nullable && parts[i].fieldno > min_field_count)
+ min_field_count = parts[i].fieldno;
+ }
+
+ struct key_def *key_def = key_def_new(internal_parts, part_count,
+ false);
+ region_truncate(region, region_svp);
+ if (key_def == NULL)
+ return NULL;
+
+ /*
+ * Update key_def->has_optional_parts and function
+ * pointers.
+ *
+ * FIXME: It seems, this call should be part of
+ * key_def_new(), because otherwise a callee function may
+ * obtain an incorrect key_def. However I don't know any
+ * case that would prove this guess.
+ */
+ key_def_update_optionality(key_def, min_field_count);
+
+ return key_def;
+}
+
void
box_key_def_delete(box_key_def_t *key_def)
{
@@ -77,3 +221,5 @@ box_tuple_compare_with_key(box_tuple_t *tuple_a, const char *key_b,
part_count, HINT_NONE, key_def);
}
+
+/* }}} API functions implementations */
diff --git a/src/box/key_def_api.h b/src/box/key_def_api.h
index 5b1c861f5..328a58c70 100644
--- a/src/box/key_def_api.h
+++ b/src/box/key_def_api.h
@@ -44,11 +44,104 @@ typedef struct tuple box_tuple_t;
typedef struct key_def box_key_def_t;
+/** Key part definition flags. */
+enum {
+ BOX_KEY_PART_DEF_IS_NULLABLE_SHIFT = 0,
+ BOX_KEY_PART_DEF_IS_NULLABLE_MASK =
+ 1 << BOX_KEY_PART_DEF_IS_NULLABLE_SHIFT,
+};
+
+/**
+ * It is recommended to verify size of <box_key_part_def_t>
+ * against this constant on the module side at build time.
+ * Example:
+ *
+ * | #if !defined(__cplusplus) && !defined(static_assert)
+ * | #define static_assert _Static_assert
+ * | #endif
+ * |
+ * | (slash)*
+ * | * Verify that <box_key_part_def_t> has the same size when
+ * | * compiled within tarantool and within the module.
+ * | *
+ * | * It is important, because the module allocates an array of key
+ * | * parts and passes it to <box_key_def_new_ex>() tarantool
+ * | * function.
+ * | *(slash)
+ * | static_assert(sizeof(box_key_part_def_t) == BOX_KEY_PART_DEF_T_SIZE,
+ * | "sizeof(box_key_part_def_t)");
+ *
+ * This snippet is not part of module.h, because portability of
+ * static_assert() / _Static_assert() is dubious. It should be
+ * decision of a module author how portable its code should be.
+ */
+enum {
+ BOX_KEY_PART_DEF_T_SIZE = 64,
+};
+
+/**
+ * Public representation of a key part definition.
+ *
+ * Usage: Allocate an array of such key parts, initialize each
+ * key part (call <box_key_part_def_create>() and set necessary
+ * fields), pass the array into <box_key_def_new_ex>() function.
+ *
+ * The idea of separation from internal <struct key_part_def> is
+ * to provide stable API and ABI for modules.
+ *
+ * New fields may be added into the end of the structure in later
+ * tarantool versions. Also new flags may be introduced within
+ * <flags> field. <collation> cannot be changed to a union (to
+ * reuse for some other value), because it is verified even for
+ * a non-string key part by <box_key_def_new_ex>().
+ *
+ * Fields that are unknown at given tarantool version are ignored.
+ */
+typedef union PACKED {
+ struct {
+ /** Index of a tuple field (zero based). */
+ uint32_t fieldno;
+ /** Flags, e.g. nullability. */
+ uint32_t flags;
+ /** Type of the tuple field. */
+ const char *field_type;
+ /** Collation name for string comparisons. */
+ const char *collation;
+ /**
+ * JSON path to point a nested field.
+ *
+ * Example:
+ *
+ * tuple: [1, {"foo": "bar"}]
+ * key parts: [
+ * {
+ * "fieldno": 2,
+ * "type": "string",
+ * "path": "foo"
+ * }
+ * ]
+ *
+ * => key: ["bar"]
+ *
+ * Note: When the path is given, <field_type>
+ * means type of the nested field.
+ */
+ const char *path;
+ };
+ /**
+ * Padding to guarantee certain size across different
+ * tarantool versions.
+ */
+ char padding[BOX_KEY_PART_DEF_T_SIZE];
+} box_key_part_def_t;
+
/**
* Create key definition with given field numbers and field types.
*
* May be used for tuple format creation and/or tuple comparison.
*
+ * \sa <box_key_def_new_ex>().
+ *
* \param fields array with key field identifiers
* \param types array with key field types (see enum field_type)
* \param part_count the number of key fields
@@ -57,6 +150,28 @@ typedef struct key_def box_key_def_t;
API_EXPORT box_key_def_t *
box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count);
+/**
+ * Initialize a key part with default values.
+ *
+ * All trailing padding bytes are set to zero.
+ *
+ * All unknown <flags> bits are set to zero.
+ */
+API_EXPORT void
+box_key_part_def_create(box_key_part_def_t *part);
+
+/**
+ * Create a key_def from given key parts.
+ *
+ * Unlike <box_key_def_new>() this function allows to define
+ * nullability, collation and other options for each key part.
+ *
+ * <box_key_part_def_t> fields that are unknown at given tarantool
+ * version are ignored. The same for unknown <flags> bits.
+ */
+API_EXPORT box_key_def_t *
+box_key_def_new_ex(box_key_part_def_t *parts, uint32_t part_count);
+
/**
* Delete key definition
*
@@ -94,6 +209,14 @@ box_tuple_compare_with_key(box_tuple_t *tuple_a, const char *key_b,
/** \endcond public */
+/*
+ * Size of the structure should remain the same across all
+ * tarantool versions in order to allow to allocate an array of
+ * them.
+ */
+static_assert(sizeof(box_key_part_def_t) == BOX_KEY_PART_DEF_T_SIZE,
+ "sizeof(box_key_part_def_t)");
+
#if defined(__cplusplus)
} /* extern "C" */
#endif /* defined(__cplusplus) */
diff --git a/src/exports.h b/src/exports.h
index 10baefa6e..80dd952c7 100644
--- a/src/exports.h
+++ b/src/exports.h
@@ -31,6 +31,8 @@ EXPORT(box_iterator_free)
EXPORT(box_iterator_next)
EXPORT(box_key_def_delete)
EXPORT(box_key_def_new)
+EXPORT(box_key_def_new_ex)
+EXPORT(box_key_part_def_create)
EXPORT(box_latch_delete)
EXPORT(box_latch_lock)
EXPORT(box_latch_new)
--
2.25.0
More information about the Tarantool-patches
mailing list