[PATCH v8 2/5] box: refactor tuple_field_raw_by_path routine
Vladimir Davydov
vdavydov.dev at gmail.com
Wed Jan 23 16:15:50 MSK 2019
On Wed, Jan 16, 2019 at 04:44:40PM +0300, Kirill Shcherbatov wrote:
> diff --git a/src/box/tuple.h b/src/box/tuple.h
> index 83e5b7013..4368dac4e 100644
> --- a/src/box/tuple.h
> +++ b/src/box/tuple.h
> @@ -513,6 +513,23 @@ tuple_field(const struct tuple *tuple, uint32_t fieldno)
> tuple_field_map(tuple), fieldno);
> }
>
> +/**
> + * Get a field refereed by index @part in tuple.
> + * @param tuple Tuple to get the field from.
> + * @param part Index part to use.
There's no 'part' in this function. Please fix the comment.
> + * @param path Relative JSON path to field data.
> + * @param path_len The length of the @path.
> + * @retval Field data if the field exists or NULL.
> + */
> +static inline const char *
> +tuple_field_by_path(const struct tuple *tuple, uint32_t fieldno,
> + const char *path, uint32_t path_len)
> +{
> + return tuple_field_raw_by_path(tuple_format(tuple), tuple_data(tuple),
> + tuple_field_map(tuple), fieldno,
> + path, path_len);
> +}
> +
> /**
> * Get a field refereed by index @part in tuple.
> * @param tuple Tuple to get the field from.
> @@ -532,38 +549,16 @@ tuple_field_by_part(const struct tuple *tuple, struct key_part *part)
> * @param path Field JSON path.
> * @param path_len Length of @a path.
> * @param path_hash Hash of @a path.
> - * @param[out] field Found field, or NULL, if not found.
> - *
> - * @retval 0 Success.
> - * @retval -1 Error in JSON path.
> - */
> -static inline int
> -tuple_field_by_path(const struct tuple *tuple, const char *path,
> - uint32_t path_len, uint32_t path_hash,
> - const char **field)
> -{
> - return tuple_field_raw_by_path(tuple_format(tuple), tuple_data(tuple),
> - tuple_field_map(tuple), path, path_len,
> - path_hash, field);
> -}
> -
> -/**
> - * Get tuple field by its name.
> - * @param tuple Tuple to get field from.
> - * @param name Field name.
> - * @param name_len Length of @a name.
> - * @param name_hash Hash of @a name.
> *
> - * @retval not NULL MessagePack field.
> - * @retval NULL No field with @a name.
> + * @retval field data if field exists or NULL
Missing dot (.)
Please update the comment so that it's clear what's the difference
between tuple_field_by_full_path and tuple_field_path. Don't forget
about tuple_field_raw_by_full_path.
> */
> static inline const char *
> -tuple_field_by_name(const struct tuple *tuple, const char *name,
> - uint32_t name_len, uint32_t name_hash)
> +tuple_field_by_full_path(const struct tuple *tuple, const char *path,
> + uint32_t path_len, uint32_t path_hash)
> {
> - return tuple_field_raw_by_name(tuple_format(tuple), tuple_data(tuple),
> - tuple_field_map(tuple), name, name_len,
> - name_hash);
> + return tuple_field_raw_by_full_path(tuple_format(tuple), tuple_data(tuple),
> + tuple_field_map(tuple),
> + path, path_len, path_hash);
> }
>
> /**
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index 30b93b610..d32bb91f2 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -397,27 +397,33 @@ tuple_field_raw(struct tuple_format *format, const char *tuple,
> }
>
> /**
> - * Get tuple field by its name.
> - * @param format Tuple format.
> - * @param tuple MessagePack tuple's body.
> - * @param field_map Tuple field map.
> - * @param name Field name.
> - * @param name_len Length of @a name.
> - * @param name_hash Hash of @a name.
> - *
> - * @retval not NULL MessagePack field.
> - * @retval NULL No field with @a name.
> + * Retrieve msgpack data by JSON path.
> + * @param data Pointer to msgpack with data.
> + * @param path The path to process.
> + * @param path_len The length of the @path.
> + * @retval 0 On success.
> + * @retval >0 On path parsing error, invalid character position.
> + */
> +int
> +tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len);
The error position returned by this function isn't used anywhere.
Let's make it return -1 on any error in the scope of this patch.
> +
> +/**
> + * Get a field at the specific position in this MessagePack
> + * array by fieldno and path.
All related functions use doxygen comments: see tuple_field_raw,
tuple_field_raw_by_full_path, tuple_field_by_part. Let's write
a proper comment for this one too for the sake of consistency.
> */
> static inline const char *
> -tuple_field_raw_by_name(struct tuple_format *format, const char *tuple,
> - const uint32_t *field_map, const char *name,
> - uint32_t name_len, uint32_t name_hash)
> +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
> + const uint32_t *field_map, uint32_t fieldno,
> + const char *path, uint32_t path_len)
> {
> - uint32_t fieldno;
> - if (tuple_fieldno_by_name(format->dict, name, name_len, name_hash,
> - &fieldno) != 0)
> + tuple = tuple_field_raw(format, tuple, field_map, fieldno);
> + if (path == NULL)
> + return tuple;
> + if (unlikely(tuple == NULL))
> return NULL;
> - return tuple_field_raw(format, tuple, field_map, fieldno);
> + if (unlikely(tuple_field_go_to_path(&tuple, path, path_len) != 0))
> + return NULL;
> + return tuple;
> }
>
> /**
------------------------------------------------------------------------
To explain what I don't like about the refactoring done by this and
following patches, let me paste the resulting code here and comment
on it.
> /**
> * Get a field at the specific position in this MessagePack array.
> * Returns a pointer to MessagePack data.
> * @param format tuple format
> * @param tuple a pointer to MessagePack array
> * @param field_map a pointer to the LAST element of field map
> * @param field_no the index of field to return
> *
> * @returns field data if field exists or NULL
> * @sa tuple_init_field_map()
> */
> static inline const char *
> tuple_field_raw(struct tuple_format *format, const char *tuple,
> const uint32_t *field_map, uint32_t field_no)
> {
> if (likely(field_no < format->index_field_count)) {
> /* Indexed field */
>
> if (field_no == 0) {
> mp_decode_array(&tuple);
> return tuple;
> }
>
> int32_t offset_slot = tuple_format_field(format,
> field_no)->offset_slot;
> if (offset_slot != TUPLE_OFFSET_SLOT_NIL) {
> if (field_map[offset_slot] != 0)
> return tuple + field_map[offset_slot];
> else
> return NULL;
> }
Note field_map manipulation.
> }
> ERROR_INJECT(ERRINJ_TUPLE_FIELD, return NULL);
> uint32_t field_count = mp_decode_array(&tuple);
> if (unlikely(field_no >= field_count))
> return NULL;
> for (uint32_t k = 0; k < field_no; k++)
> mp_next(&tuple);
> return tuple;
> }
>
> /**
> * Retrieve msgpack data by JSON path.
> * @param data Pointer to msgpack with data.
> * @param path The path to process.
> * @param path_len The length of the @path.
> * @retval 0 On success.
> * @retval >0 On path parsing error, invalid character position.
> */
> int
> tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len);
>
> /**
> * Get a field at the specific position in this MessagePack
> * array by fieldno and path.
> * The offset_slot[out] may be specified to save it on use,
> * set NULL otherwise.
> */
> static inline const char *
> tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
> const uint32_t *field_map, uint32_t fieldno,
> const char *path, uint32_t path_len,
> int32_t *offset_slot)
> {
> if (offset_slot != NULL)
> *offset_slot = TUPLE_OFFSET_SLOT_NIL;
> if (likely(path != NULL && fieldno < format->index_field_count)) {
> /* Indexed field */
> struct tuple_field *field =
> tuple_format_field_by_path(format, fieldno,
> path, path_len);
> if (field == NULL)
> goto parse;
> assert(field != NULL);
> if (offset_slot != NULL)
> *offset_slot = field->offset_slot;
> if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
> assert(-field->offset_slot * sizeof(uint32_t) <=
> format->field_map_size);
> return field_map[field->offset_slot] == 0 ?
> NULL : tuple + field_map[field->offset_slot];
> }
Note another field_map manipulation, copied from tuple_field_raw.
> }
> parse:
> tuple = tuple_field_raw(format, tuple, field_map, fieldno);
And here we'll get exactly the same piece of code inlined in this
function...
> if (path == NULL)
> return tuple;
> if (unlikely(tuple == NULL))
> return NULL;
> if (unlikely(tuple_field_go_to_path(&tuple, path, path_len) != 0))
> return NULL;
> return tuple;
> }
>
> /**
> * Get tuple field by its path.
> * @param format Tuple format.
> * @param tuple MessagePack tuple's body.
> * @param field_map Tuple field map.
> * @param path Field path.
> * @param path_len Length of @a path.
> * @param path_hash Hash of @a path.
> *
> * @retval field data if field exists or NULL
> */
> const char *
> tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple,
> const uint32_t *field_map, const char *path,
> uint32_t path_len, uint32_t path_hash);
>
> /**
> * Get a tuple field pointed to by an index part.
> * @param format Tuple format.
> * @param tuple A pointer to MessagePack array.
> * @param field_map A pointer to the LAST element of field map.
> * @param part Index part to use.
> * @retval Field data if the field exists or NULL.
> */
> static inline const char *
> tuple_field_by_part_raw(struct tuple_format *format, const char *data,
> const uint32_t *field_map, struct key_part *part)
> {
> if (likely(part->format_epoch == format->epoch)) {
> int32_t offset_slot = part->offset_slot_cache;
> assert(-offset_slot * sizeof(uint32_t) <=
> format->field_map_size);
> return field_map[offset_slot] != 0 ?
> data + field_map[offset_slot] : NULL;
Note yet another field_map manipulation.
It is ugly that we have three places in the code where we use
field_map+offset_slot in exactly the same fashion. What's especially
ugly that this code will be multiplied by the compiler: two times for
tuple_field_raw_by_path and three times for tuple_field_by_part_raw.
I think that
1. We should implement tuple_field_raw_by_path without tuple_field_raw.
2. tuple_field_raw should be an alias for tuple_field_raw_by_path with
path = NULL. That's OK - the compiler will remove unused code then
so it shouldn't result in any performance penalty.
3. tuple_field_by_part_raw shouldn't check field_map directly. Instead
it should pass the cached offset_slot to tuple_field_raw_by_path to
do the job. I mean offset_slot ptr should become an in-out parameter
of tuple_field_raw_by_path: if it's not NULL/OFFSET_SLOT_NIL then it
should use it, otherwise it should try to look up offset_slot in
tuple_format; if found and offset_slot is not NULL it should update
the offset_slot with it.
> } else {
> assert(format->epoch != 0);
> int32_t offset_slot;
> const char *field =
> tuple_field_raw_by_path(format, data, field_map,
> part->fieldno, part->path,
> part->path_len, &offset_slot);
> if (offset_slot != TUPLE_OFFSET_SLOT_NIL) {
> part->format_epoch = format->epoch;
> part->offset_slot_cache = offset_slot;
> }
> return field;
> }
> }
More information about the Tarantool-patches
mailing list