[patches] [AVRO 2/3] Add fingerprint function
Kirill Yukhin
kyukhin at tarantool.org
Wed Feb 21 15:08:36 MSK 2018
On 20 фев 11:26, AKhatskevich wrote:
> From: "AKhatskevich avkhatskevich at tarantool.org" <avkhatskevich at gmail.com>
>
> The function helps to control schema version.
Which function? Re-phrase it pls.
> Fingerprint function is compatibla with any hash from Tarantool's digest
Typo.
> hashing library.
>
> This funct ion produces the same value as spache implementations on simple
Typo.
> cases, but may differ some times. (e.g. in cases with type references,
> which copies type in this implementation)
Please, mention all cases when digest might be different.
> The fingerprint is sustainable and the same for different representations
> of the same schema.
>
> Closes #30
> ---
> CMakeLists.txt | 1 +
> avro_schema/fingerprint.lua | 74 +++++++++++++++++++++++++++++++++++++++++++++
> avro_schema/init.lua | 15 +++++++--
> test/api_tests.lua | 74 ++++++++++++++++++++++++++++++++++++++++++++-
> 4 files changed, 160 insertions(+), 4 deletions(-)
> create mode 100644 avro_schema/fingerprint.lua
>
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index acd35d0..b7a80da 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -85,6 +85,7 @@ add_custom_target(postprocess_lua ALL DEPENDS
> # Install module
> install(FILES avro_schema/init.lua avro_schema/compiler.lua
> avro_schema/frontend.lua avro_schema/runtime.lua
> + avro_schema/fingerprint.lua
> DESTINATION ${TARANTOOL_INSTALL_LUADIR}/avro_schema)
>
> install(FILES ${CMAKE_BINARY_DIR}/il.lua
> diff --git a/avro_schema/fingerprint.lua b/avro_schema/fingerprint.lua
> new file mode 100644
> index 0000000..0391835
> --- /dev/null
> +++ b/avro_schema/fingerprint.lua
> @@ -0,0 +1,74 @@
> +-- This file implements fingerprinting mechanism for Avro schema.
I'd call it module.
> +-- It was necessary to implement our json encoder, because of some special
> +-- rules for avro fingerptint generation and Parsing Canonical Form generation.
> +
> +local json = require "json"
> +-- Tarantool specific module
> +local digest = require "digest"
> +
> +local avro_json
> +
> +local function raise_error(message, ...)
> + error(string.format("avro-fingerprint: "..message, ...))
> +end
> +
> +local function is_primitive_type(xtype)
> + local ptypes = {"string", "number", "boolean"}
> + for _,t in ipairs(ptypes) do
> + if xtype == t then return true end
> + end
> + return false
> +end
> +
> +local function avro_json_array(data)
> + local res = {}
> + for _,item in ipairs(data) do
Space missing.
> + table.insert(res,avro_json(item))
> + end
> + return string.format("[%s]", table.concat(res, ","))
> +end
> +
> +local function avro_json_object(data)
> + local res = {}
> + local necessary_order = {"name", "type", "fields", "symbols", "items", "values", "size"}
Please, make line less than 80 chars long.
> + for _,name in ipairs(necessary_order) do
Space, missing.
> + local item = data[name]
> + if item ~= nil then
> + local inner = avro_json(item)
> + inner = string.format([[%s:%s]], json.encode(name), inner)
> + table.insert(res, inner)
> + end
> + end
> + return string.format("{%s}", table.concat(res, ","))
> +end
> +
> +-- Takes normalized avro schema and produces normalized schema representation
> +-- encoded in json format.
> +avro_json = function (data)
> + local xtype = type(data)
> + if is_primitive_type(xtype) then
> + return json.encode(data)
> + end
> + if xtype ~= "table" then
> + raise_error("data type is not supported: %s", xtype)
> + end
> + -- array
> + if #data > 0 then
> + return avro_json_array(data)
> + end
> + -- object (dict)
> + return avro_json_object(data)
> +end
> +
> +local function get_fingerprint(schema, algo, size)
> + if digest[algo] == nil or type(digest[algo]) ~= "function" then
> + raise_error("The hash function %s is not supported", algo)
> + end
> + local fp = digest[algo](avro_json(schema))
> + return fp:sub(1, size)
> +end
> +
> +return {
> + avro_json = avro_json,
> + get_fingerprint = get_fingerprint,
> +}
> diff --git a/avro_schema/init.lua b/avro_schema/init.lua
> index efd361d..621030d 100644
> --- a/avro_schema/init.lua
> +++ b/avro_schema/init.lua
> @@ -4,6 +4,7 @@ local c = require('avro_schema.compiler')
> local il = require('avro_schema.il')
> local backend_lua = require('avro_schema.backend')
> local rt = require('avro_schema.runtime')
> +local fingerprint = require('avro_schema.fingerprint')
>
> local format, find, sub = string.format, string.find, string.sub
> local insert, remove, concat = table.insert, table.remove, table.concat
> @@ -510,7 +511,14 @@ end
> local function export(schema_h)
> return export_helper(get_schema(schema_h), {})
> end
> -
> +local function get_fingerprint(schema_h, algo, size)
> + if algo == nil then algo = "sha256" end
> + if size == nil then size = 8 end
> + return fingerprint.get_fingerprint(get_schema(schema_h), algo, size)
> +end
> +local function to_json(schema_h)
Newline missing between functions.
> + return fingerprint.avro_json(get_schema(schema_h))
> +end
> return {
> are_compatible = are_compatible,
> create = create,
> @@ -519,5 +527,6 @@ return {
> get_types = get_types,
> is = is_schema,
> validate = validate,
> - export = export
> -}
> + export = export,
> + fingerprint = get_fingerprint,
> +}
> \ No newline at end of file
> diff --git a/test/api_tests.lua b/test/api_tests.lua
> index b1628bb..42fc7f4 100644
> --- a/test/api_tests.lua
> +++ b/test/api_tests.lua
> @@ -5,7 +5,7 @@ local msgpack = require('msgpack')
>
> local test = tap.test('api-tests')
>
> -test:plan(50)
> +test:plan(54)
>
> test:is_deeply({schema.create()}, {false, 'Unknown Avro type: nil'},
> 'error unknown type')
> @@ -208,5 +208,77 @@ for _, type in ipairs({"int", "string", "null", "boolean", "long", "float", "dou
> test:is_deeply(schema.export(res[2]), type, 'schema normalization '..type)
> end
>
> +-- fingerprint tests
> +local fingerprint_testcases = {
> + {
> + schema = [[
> + {
> + "name": "Pet",
> + "type": "record",
> + "fields": [
> + {"name": "kind", "type": {"name": "Kind", "type": "enum", "symbols": ["CAT", "DOG"]}},
Please, make line less than 80 chars long.
> + {"name": "name", "type": "string"}
> + ]
> + }
> + ]],
> + fingerprint = "42620f01b34833f1e70cf2a9567fc4d3b9cf8b74afba64af0e9dce9a148b1e90"
Please, make line less than 80 chars long.
> + },
> + {
> + schema = [[{"type": "fixed", "name": "Id", "size": 4}]],
> + fingerprint = "ecd9e5c6039fe40543f95176d664e1b9b56dddf1e8b1e3a6d87a6402b12e305d"
Please, make line less than 80 chars long.
> + },
> + {
> + schema = [[
> + {
> + "type": "record",
> + "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
Please, make line less than 80 chars long.
> + "fields": [
> + {"name": "match",
> + "type": {"type": "enum", "name": "HandshakeMatch",
> + "symbols": ["BOTH", "CLIENT", "NONE"]}},
> + {"name": "serverProtocol",
> + "type": ["null", "string"]},
> + {"name": "serverHash",
> + "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
Please, make line less than 80 chars long.
> + {"name": "meta",
> + "type": ["null", {"type": "map", "values": "bytes"}]}
> + ]
> + }
> + ]],
> + fingerprint = "a303cbbfe13958f880605d70c521a4b7be34d9265ac5a848f25916a67b11d889"
Please, make line less than 80 chars long.
> + },
> + -- in case of type reuse, it should not be copied. It should only contain type name
Please, make line less than 80 chars long.
> + -- {"name": "serverHash", "type": "MD5"}, -- > {"name":"serverHash","type":{"name":"org.apache.avro.ipc.MD5","type":"fixed","size":16}}!!!
Please, make line less than 80 chars long.
> + -- correct fingerprint is "2b2f7a9b22991fe0df9134cb6b5ff7355343e797aaea337e0150e20f3a35800e"
Please, make line less than 80 chars long.
> + {
> + schema = [[
> + {
> + "type": "record",
> + "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
> + "fields": [
> + {"name": "clientHash",
> + "type": {"type": "fixed", "name": "MD5", "size": 16}},
> + {"name": "clientProtocol", "type": ["null", "string"]},
> + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
> + ]
> + }
> + ]],
> + fingerprint = "ef17a5460289684db839c86a0c2cdcfe69da9dd0a3047e6a91f6d6bc37f76314"
Please, make line less than 80 chars long.
> +
> + },
> +}
> +
> +function string.tohex(str)
> + return (str:gsub('.', function (c)
> + return string.format('%02X', string.byte(c))
> + end))
> +end
> +
> +for i, testcase in ipairs(fingerprint_testcases) do
> + local _, schema_handler = schema.create(json.decode(testcase.schema))
> + local fingerprint = schema.fingerprint(schema_handler, "sha256", 32)
> + test:is(string.lower(string.tohex(fingerprint)), testcase.fingerprint, "Fingerprint testcase "..i)
Please, make line less than 80 chars long.
> +end
> +
> test:check()
> os.exit(test.planned == test.total and test.failed == 0 and 0 or -1)
> --
> 2.14.1
>
More information about the Tarantool-patches
mailing list