Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH 0/2] JSON preparation part 6
@ 2019-11-11 23:10 Vladislav Shpilevoy
  2019-11-11 23:10 ` [Tarantool-patches] [PATCH 1/2] json: lexer_eof and token_cmp helper functions Vladislav Shpilevoy
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Vladislav Shpilevoy @ 2019-11-11 23:10 UTC (permalink / raw)
  To: tarantool-patches

The last preparatory patch for JSON updates. This is a couple of
old commits rebased on the latest xrow update code. Both already
got LGTM from Kostja. But I can't push them, so here is this
patchset.

Branch: http://github.com/tarantool/tarantool/tree/gerold103/gh-1261-json-updates-preparation-6
Issue: https://github.com/tarantool/tarantool/issues/1261

Vladislav Shpilevoy (2):
  json: lexer_eof and token_cmp helper functions
  tuple: account the whole array in field.data and size

 src/box/xrow_update.c       | 28 ++++++++++++++++------------
 src/box/xrow_update_array.c |  9 +++++----
 src/box/xrow_update_field.h |  6 ++++--
 src/lib/json/json.c         | 37 +++++++++++++------------------------
 src/lib/json/json.h         | 31 +++++++++++++++++++++++++++++++
 5 files changed, 69 insertions(+), 42 deletions(-)

-- 
2.21.0 (Apple Git-122.2)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Tarantool-patches] [PATCH 1/2] json: lexer_eof and token_cmp helper functions
  2019-11-11 23:10 [Tarantool-patches] [PATCH 0/2] JSON preparation part 6 Vladislav Shpilevoy
@ 2019-11-11 23:10 ` Vladislav Shpilevoy
  2019-11-11 23:10 ` [Tarantool-patches] [PATCH 2/2] tuple: account the whole array in field.data and size Vladislav Shpilevoy
  2019-11-12 10:01 ` [Tarantool-patches] [PATCH 0/2] JSON preparation part 6 Kirill Yukhin
  2 siblings, 0 replies; 4+ messages in thread
From: Vladislav Shpilevoy @ 2019-11-11 23:10 UTC (permalink / raw)
  To: tarantool-patches

They are needed in incoming JSON updates, which are going to
solve a task of comparison of two JSON paths, their simultaneous
parsing, and digging into a tuple.

json_token_cmp() existed before this patch, but it was trying to
compare parent pointers too, which is not needed in the JSON
updates, since they won't use JSON trees.

Needed for #1261
---
 src/lib/json/json.c | 37 +++++++++++++------------------------
 src/lib/json/json.h | 31 +++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/src/lib/json/json.c b/src/lib/json/json.c
index 1bfef172a..416c7dfda 100644
--- a/src/lib/json/json.c
+++ b/src/lib/json/json.c
@@ -55,7 +55,7 @@
 static inline int
 json_read_symbol(struct json_lexer *lexer, UChar32 *out)
 {
-	if (lexer->offset == lexer->src_len) {
+	if (json_lexer_is_eof(lexer)) {
 		*out = U_SENTINEL;
 		return lexer->symbol_count + 1;
 	}
@@ -211,7 +211,7 @@ json_parse_identifier(struct json_lexer *lexer, struct json_token *token)
 int
 json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 {
-	if (lexer->offset == lexer->src_len) {
+	if (json_lexer_is_eof(lexer)) {
 		token->type = JSON_TOKEN_END;
 		return 0;
 	}
@@ -223,7 +223,7 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 	switch(c) {
 	case (UChar32)'[':
 		/* Error for '[\0'. */
-		if (lexer->offset == lexer->src_len)
+		if (json_lexer_is_eof(lexer))
 			return lexer->symbol_count;
 		c = json_current_char(lexer);
 		if (c == '"' || c == '\'') {
@@ -240,14 +240,14 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 		 * Expression, started from [ must be finished
 		 * with ] regardless of its type.
 		 */
-		if (lexer->offset == lexer->src_len ||
+		if (json_lexer_is_eof(lexer) ||
 		    json_current_char(lexer) != ']')
 			return lexer->symbol_count + 1;
 		/* Skip ] - one byte char. */
 		json_skip_char(lexer);
 		return 0;
 	case (UChar32)'.':
-		if (lexer->offset == lexer->src_len)
+		if (json_lexer_is_eof(lexer))
 			return lexer->symbol_count + 1;
 		return json_parse_identifier(lexer, token);
 	default:
@@ -259,26 +259,15 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 }
 
 /**
- * Compare JSON token keys.
+ * Compare JSON tokens as nodes of a JSON tree. That is, including
+ * parent references.
  */
 static int
-json_token_cmp(const struct json_token *a, const struct json_token *b)
+json_token_cmp_in_tree(const struct json_token *a, const struct json_token *b)
 {
 	if (a->parent != b->parent)
 		return a->parent - b->parent;
-	if (a->type != b->type)
-		return a->type - b->type;
-	int ret = 0;
-	if (a->type == JSON_TOKEN_STR) {
-		if (a->len != b->len)
-			return a->len - b->len;
-		ret = memcmp(a->str, b->str, a->len);
-	} else if (a->type == JSON_TOKEN_NUM) {
-		ret = a->num - b->num;
-	} else {
-		assert(a->type == JSON_TOKEN_ANY);
-	}
-	return ret;
+	return json_token_cmp(a, b);
 }
 
 int
@@ -289,7 +278,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len,
 	json_lexer_create(&lexer_a, a, a_len, index_base);
 	json_lexer_create(&lexer_b, b, b_len, index_base);
 	struct json_token token_a, token_b;
-	/* For the sake of json_token_cmp(). */
+	/* For the sake of json_token_cmp_in_tree(). */
 	token_a.parent = NULL;
 	token_b.parent = NULL;
 	int rc_a, rc_b;
@@ -297,7 +286,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len,
 	       (rc_b = json_lexer_next_token(&lexer_b, &token_b)) == 0 &&
 		token_a.type != JSON_TOKEN_END &&
 		token_b.type != JSON_TOKEN_END) {
-		int rc = json_token_cmp(&token_a, &token_b);
+		int rc = json_token_cmp_in_tree(&token_a, &token_b);
 		if (rc != 0)
 			return rc;
 	}
@@ -423,8 +412,8 @@ json_tree_snprint_path(char *buf, int size, const struct json_token *token,
 #define mh_arg_t void *
 #define mh_hash(a, arg) ((*(a))->hash)
 #define mh_hash_key(a, arg) ((a)->hash)
-#define mh_cmp(a, b, arg) (json_token_cmp(*(a), *(b)))
-#define mh_cmp_key(a, b, arg) (json_token_cmp((a), *(b)))
+#define mh_cmp(a, b, arg) (json_token_cmp_in_tree(*(a), *(b)))
+#define mh_cmp_key(a, b, arg) (json_token_cmp_in_tree((a), *(b)))
 #include "salad/mhash.h"
 
 static const uint32_t hash_seed = 13U;
diff --git a/src/lib/json/json.h b/src/lib/json/json.h
index d66a9c7a4..3218769a1 100644
--- a/src/lib/json/json.h
+++ b/src/lib/json/json.h
@@ -241,6 +241,13 @@ json_lexer_create(struct json_lexer *lexer, const char *src, int src_len,
 int
 json_lexer_next_token(struct json_lexer *lexer, struct json_token *token);
 
+/** Check if @a lexer has finished parsing. */
+static inline bool
+json_lexer_is_eof(const struct json_lexer *lexer)
+{
+	return lexer->offset == lexer->src_len;
+}
+
 /**
  * Compare two JSON paths using Lexer class.
  * - in case of paths that have same token-sequence prefix,
@@ -279,6 +286,30 @@ json_token_is_leaf(struct json_token *token)
 	return token->max_child_idx < 0;
 }
 
+/**
+ * Compare two JSON tokens, not taking into account their tree
+ * attributes. Only the token values are compared. That might be
+ * used to compare two JSON paths. String comparison of the paths
+ * may not work because the same token can be present in different
+ * forms: ['a'] == .a, for example.
+ */
+static inline int
+json_token_cmp(const struct json_token *l, const struct json_token *r)
+{
+	if (l->type != r->type)
+		return l->type - r->type;
+	switch(l->type) {
+	case JSON_TOKEN_NUM:
+		return l->num - r->num;
+	case JSON_TOKEN_STR:
+		if (l->len != r->len)
+			return l->len - r->len;
+		return memcmp(l->str, r->str, l->len);
+	default:
+		return 0;
+	}
+}
+
 /**
  * Test if a given JSON token is multikey.
  */
-- 
2.21.0 (Apple Git-122.2)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Tarantool-patches] [PATCH 2/2] tuple: account the whole array in field.data and size
  2019-11-11 23:10 [Tarantool-patches] [PATCH 0/2] JSON preparation part 6 Vladislav Shpilevoy
  2019-11-11 23:10 ` [Tarantool-patches] [PATCH 1/2] json: lexer_eof and token_cmp helper functions Vladislav Shpilevoy
@ 2019-11-11 23:10 ` Vladislav Shpilevoy
  2019-11-12 10:01 ` [Tarantool-patches] [PATCH 0/2] JSON preparation part 6 Kirill Yukhin
  2 siblings, 0 replies; 4+ messages in thread
From: Vladislav Shpilevoy @ 2019-11-11 23:10 UTC (permalink / raw)
  To: tarantool-patches

Before the patch a struct xrow_update_field object didn't account
array header in its .size and .data members. Indeed, it was not
needed, because anyway updates could be only 'flat'.
For example, consider the tuple:

    [mp_array, mp_uint, mp_uint, mp_uint]
              ^                         ^
             pos1                      pos2

Struct xrow_update_field.size and .data accounted memory from
pos1 to pos2, without the array header. Number of fields was
stored inside a rope object. This is why it made no sense to keep
array header pointer.

But now updates are going to be not flat, and not only for array.
There will be an update tree. Each node of that tree will describe
update of some part of a tuple.

Some of the nodes will need to know exact borders of their
children, including headers. It is going to be used for fast
copying of neighbours of such children. Consider an example.

Tuple with one field consisting of nested maps:

    tuple = {}
    tuple[1] = {
        a = {
            b = {
                c = {
                    d = {1, 2, 3}
                }
            }
        }
    }

Update:

    {{'+', '[1].a.b.c.d[1]', 1}, {'+', '[1].a.b.c.d[2]', 1}}

To update such a tuple a simple tree will be built:

            root: [ [1] ]
                     |
 isolated path: [ 'a.b.c' ]
                     |
      leaves: [ [1] [2] [3] ]
                +1  +1   -

Root node keeps the whole tuple borders. It is a rope with single
field.
This single field is a deeply updated map. Such deep multiple
updates with long common prefixes are stored as an isolated path
+ map/array in the end. Here the isolated path is 'a.b.c'. It
ends with the terminal array update.

Assume, that operations are applied and it is time to save the
result. Save starts from the root.
Root rope will encode root array header, and will try to save the
single field. The single field is an isolated update. It needs to
save everything before old {1,2,3}, the new array {2,2,3}, and
everything after the old array. The simplest way to do it - know
exact borders of the old array {1,2,3} and memcpy all memory
before and after.

This is exactly what this patch allows to do. Everything before
xrow_update_field.data, and after xrow_update_field.data + .size
can be safely copied, and is not related to the field. To copy
adjacent memory it is not even needed to know field type.
Xrow_update_field.data and .size have the same meaning for all
field types.

Part of #1261
---
 src/box/xrow_update.c       | 28 ++++++++++++++++------------
 src/box/xrow_update_array.c |  9 +++++----
 src/box/xrow_update_field.h |  6 ++++--
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
index bb98b30ad..123db081a 100644
--- a/src/box/xrow_update.c
+++ b/src/box/xrow_update.c
@@ -269,11 +269,12 @@ xrow_update_read_ops(struct xrow_update *update, const char *expr,
  * @retval -1 Error.
  */
 static int
-xrow_update_do_ops(struct xrow_update *update, const char *old_data,
-		   const char *old_data_end, uint32_t part_count)
+xrow_update_do_ops(struct xrow_update *update, const char *header,
+		   const char *old_data, const char *old_data_end,
+		   uint32_t part_count)
 {
-	if (xrow_update_array_create(&update->root, old_data, old_data_end,
-				     part_count) != 0)
+	if (xrow_update_array_create(&update->root, header, old_data,
+				     old_data_end, part_count) != 0)
 		return -1;
 	struct xrow_update_op *op = update->ops;
 	struct xrow_update_op *ops_end = op + update->op_count;
@@ -290,12 +291,12 @@ xrow_update_do_ops(struct xrow_update *update, const char *old_data,
  *        and it is enough to simply write the error to the log.
  */
 static int
-xrow_upsert_do_ops(struct xrow_update *update, const char *old_data,
-		   const char *old_data_end, uint32_t part_count,
-		   bool suppress_error)
+xrow_upsert_do_ops(struct xrow_update *update, const char *header,
+		   const char *old_data, const char *old_data_end,
+		   uint32_t part_count, bool suppress_error)
 {
-	if (xrow_update_array_create(&update->root, old_data, old_data_end,
-				     part_count) != 0)
+	if (xrow_update_array_create(&update->root, header, old_data,
+				     old_data_end, part_count) != 0)
 		return -1;
 	struct xrow_update_op *op = update->ops;
 	struct xrow_update_op *ops_end = op + update->op_count;
@@ -352,12 +353,14 @@ xrow_update_execute(const char *expr,const char *expr_end,
 {
 	struct xrow_update update;
 	xrow_update_init(&update, index_base);
+	const char *header = old_data;
 	uint32_t field_count = mp_decode_array(&old_data);
 
 	if (xrow_update_read_ops(&update, expr, expr_end, dict,
 				 field_count) != 0)
 		return NULL;
-	if (xrow_update_do_ops(&update, old_data, old_data_end, field_count))
+	if (xrow_update_do_ops(&update, header, old_data, old_data_end,
+			       field_count) != 0)
 		return NULL;
 	if (column_mask)
 		*column_mask = update.column_mask;
@@ -373,13 +376,14 @@ xrow_upsert_execute(const char *expr,const char *expr_end,
 {
 	struct xrow_update update;
 	xrow_update_init(&update, index_base);
+	const char *header = old_data;
 	uint32_t field_count = mp_decode_array(&old_data);
 
 	if (xrow_update_read_ops(&update, expr, expr_end, dict,
 				 field_count) != 0)
 		return NULL;
-	if (xrow_upsert_do_ops(&update, old_data, old_data_end, field_count,
-			       suppress_error))
+	if (xrow_upsert_do_ops(&update, header, old_data, old_data_end,
+			       field_count, suppress_error) != 0)
 		return NULL;
 	if (column_mask)
 		*column_mask = update.column_mask;
diff --git a/src/box/xrow_update_array.c b/src/box/xrow_update_array.c
index b5f443cd0..7f198076b 100644
--- a/src/box/xrow_update_array.c
+++ b/src/box/xrow_update_array.c
@@ -142,12 +142,13 @@ xrow_update_array_extract_item(struct xrow_update_field *field,
 }
 
 int
-xrow_update_array_create(struct xrow_update_field *field, const char *data,
-			 const char *data_end, uint32_t field_count)
+xrow_update_array_create(struct xrow_update_field *field, const char *header,
+			 const char *data, const char *data_end,
+			 uint32_t field_count)
 {
 	field->type = XUPDATE_ARRAY;
-	field->data = data;
-	field->size = data_end - data;
+	field->data = header;
+	field->size = data_end - header;
 	struct region *region = &fiber()->gc;
 	field->array.rope = xrow_update_rope_new(region);
 	if (field->array.rope == NULL)
diff --git a/src/box/xrow_update_field.h b/src/box/xrow_update_field.h
index 04e452d23..e90095b9e 100644
--- a/src/box/xrow_update_field.h
+++ b/src/box/xrow_update_field.h
@@ -334,6 +334,7 @@ xrow_update_##type##_store(struct xrow_update_field *field, char *out,		\
 /**
  * Initialize @a field as an array to update.
  * @param[out] field Field to initialize.
+ * @param header Header of the MessagePack array @a data.
  * @param data MessagePack data of the array to update.
  * @param data_end End of @a data.
  * @param field_count Field count in @data.
@@ -342,8 +343,9 @@ xrow_update_##type##_store(struct xrow_update_field *field, char *out,		\
  * @retval -1 Error.
  */
 int
-xrow_update_array_create(struct xrow_update_field *field, const char *data,
-			 const char *data_end, uint32_t field_count);
+xrow_update_array_create(struct xrow_update_field *field, const char *header,
+			 const char *data, const char *data_end,
+			 uint32_t field_count);
 
 OP_DECL_GENERIC(array)
 
-- 
2.21.0 (Apple Git-122.2)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Tarantool-patches] [PATCH 0/2] JSON preparation part 6
  2019-11-11 23:10 [Tarantool-patches] [PATCH 0/2] JSON preparation part 6 Vladislav Shpilevoy
  2019-11-11 23:10 ` [Tarantool-patches] [PATCH 1/2] json: lexer_eof and token_cmp helper functions Vladislav Shpilevoy
  2019-11-11 23:10 ` [Tarantool-patches] [PATCH 2/2] tuple: account the whole array in field.data and size Vladislav Shpilevoy
@ 2019-11-12 10:01 ` Kirill Yukhin
  2 siblings, 0 replies; 4+ messages in thread
From: Kirill Yukhin @ 2019-11-12 10:01 UTC (permalink / raw)
  To: Vladislav Shpilevoy; +Cc: tarantool-patches

Hello,

On 12 ноя 00:10, Vladislav Shpilevoy wrote:
> The last preparatory patch for JSON updates. This is a couple of
> old commits rebased on the latest xrow update code. Both already
> got LGTM from Kostja. But I can't push them, so here is this
> patchset.
> 
> Branch: http://github.com/tarantool/tarantool/tree/gerold103/gh-1261-json-updates-preparation-6
> Issue: https://github.com/tarantool/tarantool/issues/1261

I've checked the patchset into master.

--
Regards, Kirill Yukhin

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-11-12 10:01 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-11 23:10 [Tarantool-patches] [PATCH 0/2] JSON preparation part 6 Vladislav Shpilevoy
2019-11-11 23:10 ` [Tarantool-patches] [PATCH 1/2] json: lexer_eof and token_cmp helper functions Vladislav Shpilevoy
2019-11-11 23:10 ` [Tarantool-patches] [PATCH 2/2] tuple: account the whole array in field.data and size Vladislav Shpilevoy
2019-11-12 10:01 ` [Tarantool-patches] [PATCH 0/2] JSON preparation part 6 Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox