[Tarantool-patches] [PATCH 1/2] json: lexer_eof and token_cmp helper functions

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Tue Nov 12 02:10:47 MSK 2019


They are needed in incoming JSON updates, which are going to
solve a task of comparison of two JSON paths, their simultaneous
parsing, and digging into a tuple.

json_token_cmp() existed before this patch, but it was trying to
compare parent pointers too, which is not needed in the JSON
updates, since they won't use JSON trees.

Needed for #1261
---
 src/lib/json/json.c | 37 +++++++++++++------------------------
 src/lib/json/json.h | 31 +++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/src/lib/json/json.c b/src/lib/json/json.c
index 1bfef172a..416c7dfda 100644
--- a/src/lib/json/json.c
+++ b/src/lib/json/json.c
@@ -55,7 +55,7 @@
 static inline int
 json_read_symbol(struct json_lexer *lexer, UChar32 *out)
 {
-	if (lexer->offset == lexer->src_len) {
+	if (json_lexer_is_eof(lexer)) {
 		*out = U_SENTINEL;
 		return lexer->symbol_count + 1;
 	}
@@ -211,7 +211,7 @@ json_parse_identifier(struct json_lexer *lexer, struct json_token *token)
 int
 json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 {
-	if (lexer->offset == lexer->src_len) {
+	if (json_lexer_is_eof(lexer)) {
 		token->type = JSON_TOKEN_END;
 		return 0;
 	}
@@ -223,7 +223,7 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 	switch(c) {
 	case (UChar32)'[':
 		/* Error for '[\0'. */
-		if (lexer->offset == lexer->src_len)
+		if (json_lexer_is_eof(lexer))
 			return lexer->symbol_count;
 		c = json_current_char(lexer);
 		if (c == '"' || c == '\'') {
@@ -240,14 +240,14 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 		 * Expression, started from [ must be finished
 		 * with ] regardless of its type.
 		 */
-		if (lexer->offset == lexer->src_len ||
+		if (json_lexer_is_eof(lexer) ||
 		    json_current_char(lexer) != ']')
 			return lexer->symbol_count + 1;
 		/* Skip ] - one byte char. */
 		json_skip_char(lexer);
 		return 0;
 	case (UChar32)'.':
-		if (lexer->offset == lexer->src_len)
+		if (json_lexer_is_eof(lexer))
 			return lexer->symbol_count + 1;
 		return json_parse_identifier(lexer, token);
 	default:
@@ -259,26 +259,15 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 }
 
 /**
- * Compare JSON token keys.
+ * Compare JSON tokens as nodes of a JSON tree. That is, including
+ * parent references.
  */
 static int
-json_token_cmp(const struct json_token *a, const struct json_token *b)
+json_token_cmp_in_tree(const struct json_token *a, const struct json_token *b)
 {
 	if (a->parent != b->parent)
 		return a->parent - b->parent;
-	if (a->type != b->type)
-		return a->type - b->type;
-	int ret = 0;
-	if (a->type == JSON_TOKEN_STR) {
-		if (a->len != b->len)
-			return a->len - b->len;
-		ret = memcmp(a->str, b->str, a->len);
-	} else if (a->type == JSON_TOKEN_NUM) {
-		ret = a->num - b->num;
-	} else {
-		assert(a->type == JSON_TOKEN_ANY);
-	}
-	return ret;
+	return json_token_cmp(a, b);
 }
 
 int
@@ -289,7 +278,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len,
 	json_lexer_create(&lexer_a, a, a_len, index_base);
 	json_lexer_create(&lexer_b, b, b_len, index_base);
 	struct json_token token_a, token_b;
-	/* For the sake of json_token_cmp(). */
+	/* For the sake of json_token_cmp_in_tree(). */
 	token_a.parent = NULL;
 	token_b.parent = NULL;
 	int rc_a, rc_b;
@@ -297,7 +286,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len,
 	       (rc_b = json_lexer_next_token(&lexer_b, &token_b)) == 0 &&
 		token_a.type != JSON_TOKEN_END &&
 		token_b.type != JSON_TOKEN_END) {
-		int rc = json_token_cmp(&token_a, &token_b);
+		int rc = json_token_cmp_in_tree(&token_a, &token_b);
 		if (rc != 0)
 			return rc;
 	}
@@ -423,8 +412,8 @@ json_tree_snprint_path(char *buf, int size, const struct json_token *token,
 #define mh_arg_t void *
 #define mh_hash(a, arg) ((*(a))->hash)
 #define mh_hash_key(a, arg) ((a)->hash)
-#define mh_cmp(a, b, arg) (json_token_cmp(*(a), *(b)))
-#define mh_cmp_key(a, b, arg) (json_token_cmp((a), *(b)))
+#define mh_cmp(a, b, arg) (json_token_cmp_in_tree(*(a), *(b)))
+#define mh_cmp_key(a, b, arg) (json_token_cmp_in_tree((a), *(b)))
 #include "salad/mhash.h"
 
 static const uint32_t hash_seed = 13U;
diff --git a/src/lib/json/json.h b/src/lib/json/json.h
index d66a9c7a4..3218769a1 100644
--- a/src/lib/json/json.h
+++ b/src/lib/json/json.h
@@ -241,6 +241,13 @@ json_lexer_create(struct json_lexer *lexer, const char *src, int src_len,
 int
 json_lexer_next_token(struct json_lexer *lexer, struct json_token *token);
 
+/** Check if @a lexer has finished parsing. */
+static inline bool
+json_lexer_is_eof(const struct json_lexer *lexer)
+{
+	return lexer->offset == lexer->src_len;
+}
+
 /**
  * Compare two JSON paths using Lexer class.
  * - in case of paths that have same token-sequence prefix,
@@ -279,6 +286,30 @@ json_token_is_leaf(struct json_token *token)
 	return token->max_child_idx < 0;
 }
 
+/**
+ * Compare two JSON tokens, not taking into account their tree
+ * attributes. Only the token values are compared. That might be
+ * used to compare two JSON paths. String comparison of the paths
+ * may not work because the same token can be present in different
+ * forms: ['a'] == .a, for example.
+ */
+static inline int
+json_token_cmp(const struct json_token *l, const struct json_token *r)
+{
+	if (l->type != r->type)
+		return l->type - r->type;
+	switch(l->type) {
+	case JSON_TOKEN_NUM:
+		return l->num - r->num;
+	case JSON_TOKEN_STR:
+		if (l->len != r->len)
+			return l->len - r->len;
+		return memcmp(l->str, r->str, l->len);
+	default:
+		return 0;
+	}
+}
+
 /**
  * Test if a given JSON token is multikey.
  */
-- 
2.21.0 (Apple Git-122.2)



More information about the Tarantool-patches mailing list