[PATCH v4 09/14] lib: introduce json_path_normalize routine

Kirill Shcherbatov kshcherbatov at tarantool.org
Thu Oct 11 10:58:54 MSK 2018


Introduced a new routine json_path_normalize that makes a
conversion of JSON path to the 'canonical' form:
  - all maps keys are specified with operator ["key"] form
  - all array indexes are specified with operator [i] form.
This notation is preferable because in the general case it can
be uniquely parsed.
We need such API in JSON indexes patch to store all paths in
'canonical' form to commit the path uniqueness checks and
to tune access with JSON path hashtable.

Need for #1012.
---
 src/lib/json/path.c        | 25 +++++++++++++++++++++++++
 src/lib/json/path.h        | 18 ++++++++++++++++++
 test/unit/json_path.c      | 41 ++++++++++++++++++++++++++++++++++++++++-
 test/unit/json_path.result | 14 +++++++++++++-
 4 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/src/lib/json/path.c b/src/lib/json/path.c
index 2e72930..0eb5d49 100644
--- a/src/lib/json/path.c
+++ b/src/lib/json/path.c
@@ -242,3 +242,28 @@ json_path_next(struct json_path_parser *parser, struct json_path_node *node)
 		return json_parse_identifier(parser, node);
 	}
 }
+
+int
+json_path_normalize(const char *path, uint32_t path_len, char *out)
+{
+	struct json_path_parser parser;
+	struct json_path_node node;
+	json_path_parser_create(&parser, path, path_len);
+	int rc;
+	while ((rc = json_path_next(&parser, &node)) == 0 &&
+		node.type != JSON_PATH_END) {
+		if (node.type == JSON_PATH_NUM) {
+			out += sprintf(out, "[%llu]",
+				      (unsigned long long)node.num);
+		} else if (node.type == JSON_PATH_STR) {
+			out += sprintf(out, "[\"%.*s\"]", node.len, node.str);
+		} else {
+			unreachable();
+		}
+	};
+	if (rc != 0)
+		return rc;
+	*out = '\0';
+	assert(node.type == JSON_PATH_END);
+	return 0;
+}
diff --git a/src/lib/json/path.h b/src/lib/json/path.h
index c3c381a..f6b2ee2 100644
--- a/src/lib/json/path.h
+++ b/src/lib/json/path.h
@@ -105,6 +105,24 @@ json_path_parser_create(struct json_path_parser *parser, const char *src,
 int
 json_path_next(struct json_path_parser *parser, struct json_path_node *node);
 
+/**
+ * Convert path to the 'canonical' form:
+ *  - all maps keys are specified with operator ["key"] form
+ *  - all array indexes are specified with operator [i] form.
+ * This notation is preferable because in the general case it can
+ * be uniquely parsed.
+ * @param path Source path string to be converted.
+ * @param path_len The length of the @path.
+ * @param[out] out Memory to store normalized string.
+ *                 The worst-case scenario require
+ *                 2.5 * path_len + 1 buffer.
+ * @retval 0 On success.
+ * @retval > 0 Position of a syntax error. A position is 1-based
+ *             and starts from a beginning of a source string.
+ */
+int
+json_path_normalize(const char *path, uint32_t path_len, char *out);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/test/unit/json_path.c b/test/unit/json_path.c
index 9a1de06..775b0b1 100644
--- a/test/unit/json_path.c
+++ b/test/unit/json_path.c
@@ -360,15 +360,54 @@ test_tree()
 	footer();
 }
 
+void
+test_normalize_path()
+{
+	header();
+	plan(8);
+
+	const char *path_normalized = "[\"FIO\"][3][\"fname\"]";
+	const char *path1 = "FIO[3].fname";
+	const char *path2 = "[\"FIO\"][3].fname";
+	const char *path3 = "FIO[3][\"fname\"]";
+	char buff[strlen(path_normalized) + 1];
+	int rc;
+
+	rc = json_path_normalize(path_normalized, strlen(path_normalized),
+				 buff);
+	is(rc, 0, "normalize '%s' path status", path_normalized);
+	is(strcmp(buff, path_normalized), 0, "normalize '%s' path compare",
+		  path_normalized);
+
+	rc = json_path_normalize(path1, strlen(path1), buff);
+	is(rc, 0, "normalize '%s' path status", path1);
+	is(strcmp(buff, path_normalized), 0, "normalize '%s' path compare",
+		  path1);
+
+	rc = json_path_normalize(path2, strlen(path2), buff);
+	is(rc, 0, "normalize '%s' path status", path2);
+	is(strcmp(buff, path_normalized), 0, "normalize '%s' path compare",
+		  path2);
+
+	rc = json_path_normalize(path3, strlen(path3), buff);
+	is(rc, 0, "normalize '%s' path status", path3);
+	is(strcmp(buff, path_normalized), 0, "normalize '%s' path compare",
+		  path3);
+
+	check_plan();
+	footer();
+}
+
 int
 main()
 {
 	header();
-	plan(3);
+	plan(4);
 
 	test_basic();
 	test_errors();
 	test_tree();
+	test_normalize_path();
 
 	int rc = check_plan();
 	footer();
diff --git a/test/unit/json_path.result b/test/unit/json_path.result
index 7bc9d37..383c393 100644
--- a/test/unit/json_path.result
+++ b/test/unit/json_path.result
@@ -1,5 +1,5 @@
 	*** main ***
-1..3
+1..4
 	*** test_basic ***
     1..71
     ok 1 - parse <[0]>
@@ -145,4 +145,16 @@ ok 2 - subtests
     ok 42 - records iterated count 5 of 5
 ok 3 - subtests
 	*** test_tree: done ***
+	*** test_normalize_path ***
+    1..8
+    ok 1 - normalize '["FIO"][3]["fname"]' path status
+    ok 2 - normalize '["FIO"][3]["fname"]' path compare
+    ok 3 - normalize 'FIO[3].fname' path status
+    ok 4 - normalize 'FIO[3].fname' path compare
+    ok 5 - normalize '["FIO"][3].fname' path status
+    ok 6 - normalize '["FIO"][3].fname' path compare
+    ok 7 - normalize 'FIO[3]["fname"]' path status
+    ok 8 - normalize 'FIO[3]["fname"]' path compare
+ok 4 - subtests
+	*** test_normalize_path: done ***
 	*** main: done ***
-- 
2.7.4




More information about the Tarantool-patches mailing list