From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Wed, 23 Jan 2019 16:15:50 +0300 From: Vladimir Davydov Subject: Re: [PATCH v8 2/5] box: refactor tuple_field_raw_by_path routine Message-ID: <20190123131549.72all7z77deyznjn@esperanza> References: MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: To: Kirill Shcherbatov Cc: tarantool-patches@freelists.org, kostja@tarantool.org List-ID: On Wed, Jan 16, 2019 at 04:44:40PM +0300, Kirill Shcherbatov wrote: > diff --git a/src/box/tuple.h b/src/box/tuple.h > index 83e5b7013..4368dac4e 100644 > --- a/src/box/tuple.h > +++ b/src/box/tuple.h > @@ -513,6 +513,23 @@ tuple_field(const struct tuple *tuple, uint32_t fieldno) > tuple_field_map(tuple), fieldno); > } > > +/** > + * Get a field refereed by index @part in tuple. > + * @param tuple Tuple to get the field from. > + * @param part Index part to use. There's no 'part' in this function. Please fix the comment. > + * @param path Relative JSON path to field data. > + * @param path_len The length of the @path. > + * @retval Field data if the field exists or NULL. > + */ > +static inline const char * > +tuple_field_by_path(const struct tuple *tuple, uint32_t fieldno, > + const char *path, uint32_t path_len) > +{ > + return tuple_field_raw_by_path(tuple_format(tuple), tuple_data(tuple), > + tuple_field_map(tuple), fieldno, > + path, path_len); > +} > + > /** > * Get a field refereed by index @part in tuple. > * @param tuple Tuple to get the field from. > @@ -532,38 +549,16 @@ tuple_field_by_part(const struct tuple *tuple, struct key_part *part) > * @param path Field JSON path. > * @param path_len Length of @a path. > * @param path_hash Hash of @a path. > - * @param[out] field Found field, or NULL, if not found. > - * > - * @retval 0 Success. > - * @retval -1 Error in JSON path. > - */ > -static inline int > -tuple_field_by_path(const struct tuple *tuple, const char *path, > - uint32_t path_len, uint32_t path_hash, > - const char **field) > -{ > - return tuple_field_raw_by_path(tuple_format(tuple), tuple_data(tuple), > - tuple_field_map(tuple), path, path_len, > - path_hash, field); > -} > - > -/** > - * Get tuple field by its name. > - * @param tuple Tuple to get field from. > - * @param name Field name. > - * @param name_len Length of @a name. > - * @param name_hash Hash of @a name. > * > - * @retval not NULL MessagePack field. > - * @retval NULL No field with @a name. > + * @retval field data if field exists or NULL Missing dot (.) Please update the comment so that it's clear what's the difference between tuple_field_by_full_path and tuple_field_path. Don't forget about tuple_field_raw_by_full_path. > */ > static inline const char * > -tuple_field_by_name(const struct tuple *tuple, const char *name, > - uint32_t name_len, uint32_t name_hash) > +tuple_field_by_full_path(const struct tuple *tuple, const char *path, > + uint32_t path_len, uint32_t path_hash) > { > - return tuple_field_raw_by_name(tuple_format(tuple), tuple_data(tuple), > - tuple_field_map(tuple), name, name_len, > - name_hash); > + return tuple_field_raw_by_full_path(tuple_format(tuple), tuple_data(tuple), > + tuple_field_map(tuple), > + path, path_len, path_hash); > } > > /** > diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h > index 30b93b610..d32bb91f2 100644 > --- a/src/box/tuple_format.h > +++ b/src/box/tuple_format.h > @@ -397,27 +397,33 @@ tuple_field_raw(struct tuple_format *format, const char *tuple, > } > > /** > - * Get tuple field by its name. > - * @param format Tuple format. > - * @param tuple MessagePack tuple's body. > - * @param field_map Tuple field map. > - * @param name Field name. > - * @param name_len Length of @a name. > - * @param name_hash Hash of @a name. > - * > - * @retval not NULL MessagePack field. > - * @retval NULL No field with @a name. > + * Retrieve msgpack data by JSON path. > + * @param data Pointer to msgpack with data. > + * @param path The path to process. > + * @param path_len The length of the @path. > + * @retval 0 On success. > + * @retval >0 On path parsing error, invalid character position. > + */ > +int > +tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len); The error position returned by this function isn't used anywhere. Let's make it return -1 on any error in the scope of this patch. > + > +/** > + * Get a field at the specific position in this MessagePack > + * array by fieldno and path. All related functions use doxygen comments: see tuple_field_raw, tuple_field_raw_by_full_path, tuple_field_by_part. Let's write a proper comment for this one too for the sake of consistency. > */ > static inline const char * > -tuple_field_raw_by_name(struct tuple_format *format, const char *tuple, > - const uint32_t *field_map, const char *name, > - uint32_t name_len, uint32_t name_hash) > +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, > + const uint32_t *field_map, uint32_t fieldno, > + const char *path, uint32_t path_len) > { > - uint32_t fieldno; > - if (tuple_fieldno_by_name(format->dict, name, name_len, name_hash, > - &fieldno) != 0) > + tuple = tuple_field_raw(format, tuple, field_map, fieldno); > + if (path == NULL) > + return tuple; > + if (unlikely(tuple == NULL)) > return NULL; > - return tuple_field_raw(format, tuple, field_map, fieldno); > + if (unlikely(tuple_field_go_to_path(&tuple, path, path_len) != 0)) > + return NULL; > + return tuple; > } > > /** ------------------------------------------------------------------------ To explain what I don't like about the refactoring done by this and following patches, let me paste the resulting code here and comment on it. > /** > * Get a field at the specific position in this MessagePack array. > * Returns a pointer to MessagePack data. > * @param format tuple format > * @param tuple a pointer to MessagePack array > * @param field_map a pointer to the LAST element of field map > * @param field_no the index of field to return > * > * @returns field data if field exists or NULL > * @sa tuple_init_field_map() > */ > static inline const char * > tuple_field_raw(struct tuple_format *format, const char *tuple, > const uint32_t *field_map, uint32_t field_no) > { > if (likely(field_no < format->index_field_count)) { > /* Indexed field */ > > if (field_no == 0) { > mp_decode_array(&tuple); > return tuple; > } > > int32_t offset_slot = tuple_format_field(format, > field_no)->offset_slot; > if (offset_slot != TUPLE_OFFSET_SLOT_NIL) { > if (field_map[offset_slot] != 0) > return tuple + field_map[offset_slot]; > else > return NULL; > } Note field_map manipulation. > } > ERROR_INJECT(ERRINJ_TUPLE_FIELD, return NULL); > uint32_t field_count = mp_decode_array(&tuple); > if (unlikely(field_no >= field_count)) > return NULL; > for (uint32_t k = 0; k < field_no; k++) > mp_next(&tuple); > return tuple; > } > > /** > * Retrieve msgpack data by JSON path. > * @param data Pointer to msgpack with data. > * @param path The path to process. > * @param path_len The length of the @path. > * @retval 0 On success. > * @retval >0 On path parsing error, invalid character position. > */ > int > tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len); > > /** > * Get a field at the specific position in this MessagePack > * array by fieldno and path. > * The offset_slot[out] may be specified to save it on use, > * set NULL otherwise. > */ > static inline const char * > tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, > const uint32_t *field_map, uint32_t fieldno, > const char *path, uint32_t path_len, > int32_t *offset_slot) > { > if (offset_slot != NULL) > *offset_slot = TUPLE_OFFSET_SLOT_NIL; > if (likely(path != NULL && fieldno < format->index_field_count)) { > /* Indexed field */ > struct tuple_field *field = > tuple_format_field_by_path(format, fieldno, > path, path_len); > if (field == NULL) > goto parse; > assert(field != NULL); > if (offset_slot != NULL) > *offset_slot = field->offset_slot; > if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { > assert(-field->offset_slot * sizeof(uint32_t) <= > format->field_map_size); > return field_map[field->offset_slot] == 0 ? > NULL : tuple + field_map[field->offset_slot]; > } Note another field_map manipulation, copied from tuple_field_raw. > } > parse: > tuple = tuple_field_raw(format, tuple, field_map, fieldno); And here we'll get exactly the same piece of code inlined in this function... > if (path == NULL) > return tuple; > if (unlikely(tuple == NULL)) > return NULL; > if (unlikely(tuple_field_go_to_path(&tuple, path, path_len) != 0)) > return NULL; > return tuple; > } > > /** > * Get tuple field by its path. > * @param format Tuple format. > * @param tuple MessagePack tuple's body. > * @param field_map Tuple field map. > * @param path Field path. > * @param path_len Length of @a path. > * @param path_hash Hash of @a path. > * > * @retval field data if field exists or NULL > */ > const char * > tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple, > const uint32_t *field_map, const char *path, > uint32_t path_len, uint32_t path_hash); > > /** > * Get a tuple field pointed to by an index part. > * @param format Tuple format. > * @param tuple A pointer to MessagePack array. > * @param field_map A pointer to the LAST element of field map. > * @param part Index part to use. > * @retval Field data if the field exists or NULL. > */ > static inline const char * > tuple_field_by_part_raw(struct tuple_format *format, const char *data, > const uint32_t *field_map, struct key_part *part) > { > if (likely(part->format_epoch == format->epoch)) { > int32_t offset_slot = part->offset_slot_cache; > assert(-offset_slot * sizeof(uint32_t) <= > format->field_map_size); > return field_map[offset_slot] != 0 ? > data + field_map[offset_slot] : NULL; Note yet another field_map manipulation. It is ugly that we have three places in the code where we use field_map+offset_slot in exactly the same fashion. What's especially ugly that this code will be multiplied by the compiler: two times for tuple_field_raw_by_path and three times for tuple_field_by_part_raw. I think that 1. We should implement tuple_field_raw_by_path without tuple_field_raw. 2. tuple_field_raw should be an alias for tuple_field_raw_by_path with path = NULL. That's OK - the compiler will remove unused code then so it shouldn't result in any performance penalty. 3. tuple_field_by_part_raw shouldn't check field_map directly. Instead it should pass the cached offset_slot to tuple_field_raw_by_path to do the job. I mean offset_slot ptr should become an in-out parameter of tuple_field_raw_by_path: if it's not NULL/OFFSET_SLOT_NIL then it should use it, otherwise it should try to look up offset_slot in tuple_format; if found and offset_slot is not NULL it should update the offset_slot with it. > } else { > assert(format->epoch != 0); > int32_t offset_slot; > const char *field = > tuple_field_raw_by_path(format, data, field_map, > part->fieldno, part->path, > part->path_len, &offset_slot); > if (offset_slot != TUPLE_OFFSET_SLOT_NIL) { > part->format_epoch = format->epoch; > part->offset_slot_cache = offset_slot; > } > return field; > } > }