Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@freelists.org
Cc: kostja@tarantool.org
Subject: [tarantool-patches] [PATCH v2 3/8] json: lexer_eof and token_cmp helper functions
Date: Sat, 31 Aug 2019 23:35:53 +0200	[thread overview]
Message-ID: <c6712e02d94d15fb8aa415ff1953380de7335d9a.1567287197.git.v.shpilevoy@tarantool.org> (raw)
In-Reply-To: <cover.1567287197.git.v.shpilevoy@tarantool.org>

They are needed in incoming JSON updates, which are going to
solve a task of comparison of two JSON paths, their simultaneous
parsing, and digging into a tuple.

json_token_cmp() existed before this patch, but it was trying to
compare parent pointers too, which is not needed in the JSON
updates, since they won't use JSON trees.

Needed for #1261
---
 src/lib/json/json.c | 37 +++++++++++++------------------------
 src/lib/json/json.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/src/lib/json/json.c b/src/lib/json/json.c
index 1bfef172a..416c7dfda 100644
--- a/src/lib/json/json.c
+++ b/src/lib/json/json.c
@@ -55,7 +55,7 @@
 static inline int
 json_read_symbol(struct json_lexer *lexer, UChar32 *out)
 {
-	if (lexer->offset == lexer->src_len) {
+	if (json_lexer_is_eof(lexer)) {
 		*out = U_SENTINEL;
 		return lexer->symbol_count + 1;
 	}
@@ -211,7 +211,7 @@ json_parse_identifier(struct json_lexer *lexer, struct json_token *token)
 int
 json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 {
-	if (lexer->offset == lexer->src_len) {
+	if (json_lexer_is_eof(lexer)) {
 		token->type = JSON_TOKEN_END;
 		return 0;
 	}
@@ -223,7 +223,7 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 	switch(c) {
 	case (UChar32)'[':
 		/* Error for '[\0'. */
-		if (lexer->offset == lexer->src_len)
+		if (json_lexer_is_eof(lexer))
 			return lexer->symbol_count;
 		c = json_current_char(lexer);
 		if (c == '"' || c == '\'') {
@@ -240,14 +240,14 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 		 * Expression, started from [ must be finished
 		 * with ] regardless of its type.
 		 */
-		if (lexer->offset == lexer->src_len ||
+		if (json_lexer_is_eof(lexer) ||
 		    json_current_char(lexer) != ']')
 			return lexer->symbol_count + 1;
 		/* Skip ] - one byte char. */
 		json_skip_char(lexer);
 		return 0;
 	case (UChar32)'.':
-		if (lexer->offset == lexer->src_len)
+		if (json_lexer_is_eof(lexer))
 			return lexer->symbol_count + 1;
 		return json_parse_identifier(lexer, token);
 	default:
@@ -259,26 +259,15 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 }
 
 /**
- * Compare JSON token keys.
+ * Compare JSON tokens as nodes of a JSON tree. That is, including
+ * parent references.
  */
 static int
-json_token_cmp(const struct json_token *a, const struct json_token *b)
+json_token_cmp_in_tree(const struct json_token *a, const struct json_token *b)
 {
 	if (a->parent != b->parent)
 		return a->parent - b->parent;
-	if (a->type != b->type)
-		return a->type - b->type;
-	int ret = 0;
-	if (a->type == JSON_TOKEN_STR) {
-		if (a->len != b->len)
-			return a->len - b->len;
-		ret = memcmp(a->str, b->str, a->len);
-	} else if (a->type == JSON_TOKEN_NUM) {
-		ret = a->num - b->num;
-	} else {
-		assert(a->type == JSON_TOKEN_ANY);
-	}
-	return ret;
+	return json_token_cmp(a, b);
 }
 
 int
@@ -289,7 +278,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len,
 	json_lexer_create(&lexer_a, a, a_len, index_base);
 	json_lexer_create(&lexer_b, b, b_len, index_base);
 	struct json_token token_a, token_b;
-	/* For the sake of json_token_cmp(). */
+	/* For the sake of json_token_cmp_in_tree(). */
 	token_a.parent = NULL;
 	token_b.parent = NULL;
 	int rc_a, rc_b;
@@ -297,7 +286,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len,
 	       (rc_b = json_lexer_next_token(&lexer_b, &token_b)) == 0 &&
 		token_a.type != JSON_TOKEN_END &&
 		token_b.type != JSON_TOKEN_END) {
-		int rc = json_token_cmp(&token_a, &token_b);
+		int rc = json_token_cmp_in_tree(&token_a, &token_b);
 		if (rc != 0)
 			return rc;
 	}
@@ -423,8 +412,8 @@ json_tree_snprint_path(char *buf, int size, const struct json_token *token,
 #define mh_arg_t void *
 #define mh_hash(a, arg) ((*(a))->hash)
 #define mh_hash_key(a, arg) ((a)->hash)
-#define mh_cmp(a, b, arg) (json_token_cmp(*(a), *(b)))
-#define mh_cmp_key(a, b, arg) (json_token_cmp((a), *(b)))
+#define mh_cmp(a, b, arg) (json_token_cmp_in_tree(*(a), *(b)))
+#define mh_cmp_key(a, b, arg) (json_token_cmp_in_tree((a), *(b)))
 #include "salad/mhash.h"
 
 static const uint32_t hash_seed = 13U;
diff --git a/src/lib/json/json.h b/src/lib/json/json.h
index d66a9c7a4..08a0ee96c 100644
--- a/src/lib/json/json.h
+++ b/src/lib/json/json.h
@@ -241,6 +241,13 @@ json_lexer_create(struct json_lexer *lexer, const char *src, int src_len,
 int
 json_lexer_next_token(struct json_lexer *lexer, struct json_token *token);
 
+/** Check if @a lexer has finished parsing. */
+static inline bool
+json_lexer_is_eof(const struct json_lexer *lexer)
+{
+	return lexer->offset == lexer->src_len;
+}
+
 /**
  * Compare two JSON paths using Lexer class.
  * - in case of paths that have same token-sequence prefix,
@@ -279,6 +286,27 @@ json_token_is_leaf(struct json_token *token)
 	return token->max_child_idx < 0;
 }
 
+/**
+ * Compare two JSON tokens, not taking into account their tree
+ * attributes.
+ */
+static inline int
+json_token_cmp(const struct json_token *l, const struct json_token *r)
+{
+	if (l->type != r->type)
+		return l->type - r->type;
+	switch(l->type) {
+	case JSON_TOKEN_NUM:
+		return l->num - r->num;
+	case JSON_TOKEN_STR:
+		if (l->len != r->len)
+			return l->len - r->len;
+		return memcmp(l->str, r->str, l->len);
+	default:
+		return 0;
+	}
+}
+
 /**
  * Test if a given JSON token is multikey.
  */
-- 
2.20.1 (Apple Git-117)

  parent reply	other threads:[~2019-08-31 21:32 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-31 21:35 [tarantool-patches] [PATCH v2 0/8] JSON updates Vladislav Shpilevoy
2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 1/8] tuple: expose JSON go_to_key and go_to_index functions Vladislav Shpilevoy
2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 2/8] tuple: rework updates to improve code extendibility Vladislav Shpilevoy
     [not found]   ` <20190903192059.GE15611@atlas>
     [not found]     ` <6ee759cf-a975-e6a9-6f52-f855958ffe06@tarantool.org>
     [not found]       ` <20191005132055.GD3913@atlas>
     [not found]         ` <20191005135037.GJ3913@atlas>
2019-10-19 15:11           ` [Tarantool-patches] [tarantool-patches] " Vladislav Shpilevoy
2019-08-31 21:35 ` Vladislav Shpilevoy [this message]
     [not found]   ` <20190903192433.GF15611@atlas>
     [not found]     ` <f5612e04-dc56-f4bd-1298-c5841ac909f5@tarantool.org>
     [not found]       ` <20191005132231.GE3913@atlas>
     [not found]         ` <20191005135014.GI3913@atlas>
2019-10-19 15:08           ` [Tarantool-patches] [tarantool-patches] Re: [PATCH v2 3/8] json: lexer_eof and token_cmp helper functions Vladislav Shpilevoy
2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 4/8] tuple: account the whole array in field.data and size Vladislav Shpilevoy
2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 5/8] tuple: enable JSON bar updates Vladislav Shpilevoy
2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 6/8] tuple: make update operation tokens consumable Vladislav Shpilevoy
2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 7/8] tuple: JSON updates support intersection by arrays Vladislav Shpilevoy
2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 8/8] tuple: JSON updates support intersection by maps Vladislav Shpilevoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c6712e02d94d15fb8aa415ff1953380de7335d9a.1567287197.git.v.shpilevoy@tarantool.org \
    --to=v.shpilevoy@tarantool.org \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [tarantool-patches] [PATCH v2 3/8] json: lexer_eof and token_cmp helper functions' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox