[patches] [AVRO 2/3] Add fingerprint function

Kirill Yukhin kyukhin at tarantool.org
Wed Feb 21 15:08:36 MSK 2018


On 20 фев 11:26, AKhatskevich wrote:
> From: "AKhatskevich avkhatskevich at tarantool.org" <avkhatskevich at gmail.com>
> 
> The function helps to control schema version.
Which function? Re-phrase it pls.

> Fingerprint function is compatibla with any hash from Tarantool's digest
Typo.
> hashing library.
> 
> This funct	ion produces the same value as spache implementations on simple
Typo.

> cases, but may differ some times. (e.g. in cases with type references,
> which copies type in this implementation)
Please, mention all cases when digest might be different.

> The fingerprint is sustainable and the same for different representations
> of the same schema.
> 
> Closes #30
> ---
>  CMakeLists.txt              |  1 +
>  avro_schema/fingerprint.lua | 74 +++++++++++++++++++++++++++++++++++++++++++++
>  avro_schema/init.lua        | 15 +++++++--
>  test/api_tests.lua          | 74 ++++++++++++++++++++++++++++++++++++++++++++-
>  4 files changed, 160 insertions(+), 4 deletions(-)
>  create mode 100644 avro_schema/fingerprint.lua
> 
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index acd35d0..b7a80da 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -85,6 +85,7 @@ add_custom_target(postprocess_lua ALL DEPENDS
>  # Install module
>  install(FILES avro_schema/init.lua avro_schema/compiler.lua
>                avro_schema/frontend.lua avro_schema/runtime.lua
> +              avro_schema/fingerprint.lua
>          DESTINATION ${TARANTOOL_INSTALL_LUADIR}/avro_schema)
>  
>  install(FILES ${CMAKE_BINARY_DIR}/il.lua
> diff --git a/avro_schema/fingerprint.lua b/avro_schema/fingerprint.lua
> new file mode 100644
> index 0000000..0391835
> --- /dev/null
> +++ b/avro_schema/fingerprint.lua
> @@ -0,0 +1,74 @@
> +-- This file implements fingerprinting mechanism for Avro schema.
I'd call it module.

> +-- It was necessary to implement our json encoder, because of some special
> +-- rules for avro fingerptint generation and Parsing Canonical Form generation.
> +
> +local json = require "json"
> +-- Tarantool specific module
> +local digest = require "digest"
> +
> +local avro_json
> +
> +local function raise_error(message, ...)
> +    error(string.format("avro-fingerprint: "..message, ...))
> +end
> +
> +local function is_primitive_type(xtype)
> +    local ptypes = {"string", "number", "boolean"}
> +    for _,t in ipairs(ptypes) do
> +        if xtype == t then return true end
> +    end
> +    return false
> +end
> +
> +local function avro_json_array(data)
> +    local res = {}
> +    for _,item in ipairs(data) do
Space missing.

> +        table.insert(res,avro_json(item))
> +    end
> +    return string.format("[%s]", table.concat(res, ","))
> +end
> +
> +local function avro_json_object(data)
> +    local res = {}
> +    local necessary_order = {"name", "type", "fields", "symbols", "items", "values", "size"}
Please, make line less than 80 chars long.

> +    for _,name in ipairs(necessary_order) do
Space, missing.

> +        local item = data[name]
> +        if item ~= nil then
> +            local inner = avro_json(item)
> +            inner = string.format([[%s:%s]], json.encode(name), inner)
> +            table.insert(res, inner)
> +        end
> +    end
> +    return string.format("{%s}", table.concat(res, ","))
> +end
> +
> +-- Takes normalized avro schema and produces normalized schema representation
> +-- encoded in json format.
> +avro_json = function (data)
> +    local xtype = type(data)
> +    if is_primitive_type(xtype) then
> +        return json.encode(data)
> +    end
> +    if xtype ~= "table" then
> +        raise_error("data type is not supported: %s", xtype)
> +    end
> +    -- array
> +    if #data > 0 then
> +        return avro_json_array(data)
> +    end
> +    -- object (dict)
> +    return avro_json_object(data)
> +end
> +
> +local function get_fingerprint(schema, algo, size)
> +    if digest[algo] == nil or type(digest[algo]) ~= "function" then
> +        raise_error("The hash function %s is not supported", algo)
> +    end
> +    local fp = digest[algo](avro_json(schema))
> +    return fp:sub(1, size)
> +end
> +
> +return {
> +    avro_json = avro_json,
> +    get_fingerprint = get_fingerprint,
> +}
> diff --git a/avro_schema/init.lua b/avro_schema/init.lua
> index efd361d..621030d 100644
> --- a/avro_schema/init.lua
> +++ b/avro_schema/init.lua
> @@ -4,6 +4,7 @@ local c           = require('avro_schema.compiler')
>  local il          = require('avro_schema.il')
>  local backend_lua = require('avro_schema.backend')
>  local rt          = require('avro_schema.runtime')
> +local fingerprint = require('avro_schema.fingerprint')
>  
>  local format, find, sub = string.format, string.find, string.sub
>  local insert, remove, concat = table.insert, table.remove, table.concat
> @@ -510,7 +511,14 @@ end
>  local function export(schema_h)
>      return export_helper(get_schema(schema_h), {})
>  end
> -
> +local function get_fingerprint(schema_h, algo, size)
> +    if algo == nil then algo = "sha256" end
> +    if size == nil then size = 8 end
> +    return fingerprint.get_fingerprint(get_schema(schema_h), algo, size)
> +end
> +local function to_json(schema_h)
Newline missing between functions.

> +    return fingerprint.avro_json(get_schema(schema_h))
> +end
>  return {
>      are_compatible = are_compatible,
>      create         = create,
> @@ -519,5 +527,6 @@ return {
>      get_types      = get_types,
>      is             = is_schema,
>      validate       = validate,
> -    export         = export
> -}
> +    export         = export,
> +    fingerprint    = get_fingerprint,
> +}
> \ No newline at end of file
> diff --git a/test/api_tests.lua b/test/api_tests.lua
> index b1628bb..42fc7f4 100644
> --- a/test/api_tests.lua
> +++ b/test/api_tests.lua
> @@ -5,7 +5,7 @@ local msgpack = require('msgpack')
>  
>  local test = tap.test('api-tests')
>  
> -test:plan(50)
> +test:plan(54)
>  
>  test:is_deeply({schema.create()}, {false, 'Unknown Avro type: nil'},
>                 'error unknown type')
> @@ -208,5 +208,77 @@ for _, type in ipairs({"int", "string", "null", "boolean", "long", "float", "dou
>      test:is_deeply(schema.export(res[2]), type, 'schema normalization '..type)
>  end
>  
> +-- fingerprint tests
> +local fingerprint_testcases = {
> +    {
> +        schema = [[
> +            {
> +              "name": "Pet",
> +              "type": "record",
> +              "fields": [
> +                {"name": "kind", "type": {"name": "Kind", "type": "enum", "symbols": ["CAT", "DOG"]}},
Please, make line less than 80 chars long.

> +                {"name": "name", "type": "string"}
> +              ]
> +            }
> +        ]],
> +        fingerprint = "42620f01b34833f1e70cf2a9567fc4d3b9cf8b74afba64af0e9dce9a148b1e90"
Please, make line less than 80 chars long.

> +    },
> +    {
> +        schema = [[{"type": "fixed", "name": "Id", "size": 4}]],
> +        fingerprint = "ecd9e5c6039fe40543f95176d664e1b9b56dddf1e8b1e3a6d87a6402b12e305d"
Please, make line less than 80 chars long.

> +    },
> +    {
> +        schema = [[
> +            {
> +              "type": "record",
> +              "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
Please, make line less than 80 chars long.
> +              "fields": [
> +                {"name": "match",
> +                 "type": {"type": "enum", "name": "HandshakeMatch",
> +                          "symbols": ["BOTH", "CLIENT", "NONE"]}},
> +                {"name": "serverProtocol",
> +                 "type": ["null", "string"]},
> +                {"name": "serverHash",
> +                 "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
Please, make line less than 80 chars long.

> +                {"name": "meta",
> +                 "type": ["null", {"type": "map", "values": "bytes"}]}
> +              ]
> +            }
> +        ]],
> +        fingerprint = "a303cbbfe13958f880605d70c521a4b7be34d9265ac5a848f25916a67b11d889"
Please, make line less than 80 chars long.

> +    },
> +    -- in case of type reuse, it should not be copied. It should only contain type name
Please, make line less than 80 chars long.

> +    -- {"name": "serverHash", "type": "MD5"}, -- > {"name":"serverHash","type":{"name":"org.apache.avro.ipc.MD5","type":"fixed","size":16}}!!!
Please, make line less than 80 chars long.

> +    -- correct fingerprint is "2b2f7a9b22991fe0df9134cb6b5ff7355343e797aaea337e0150e20f3a35800e"
Please, make line less than 80 chars long.

> +    {
> +        schema = [[
> +            {
> +              "type": "record",
> +              "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
> +              "fields": [
> +                {"name": "clientHash",
> +                 "type": {"type": "fixed", "name": "MD5", "size": 16}},
> +                {"name": "clientProtocol", "type": ["null", "string"]},
> +                {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
> +              ]
> +            }
> +        ]],
> +        fingerprint = "ef17a5460289684db839c86a0c2cdcfe69da9dd0a3047e6a91f6d6bc37f76314"
Please, make line less than 80 chars long.

> +
> +    },
> +}
> +
> +function string.tohex(str)
> +    return (str:gsub('.', function (c)
> +        return string.format('%02X', string.byte(c))
> +    end))
> +end
> +
> +for i, testcase in ipairs(fingerprint_testcases) do
> +    local _, schema_handler = schema.create(json.decode(testcase.schema))
> +    local fingerprint = schema.fingerprint(schema_handler, "sha256", 32)
> +    test:is(string.lower(string.tohex(fingerprint)), testcase.fingerprint, "Fingerprint testcase "..i)
Please, make line less than 80 chars long.

> +end
> +
>  test:check()
>  os.exit(test.planned == test.total and test.failed == 0 and 0 or -1)
> -- 
> 2.14.1
> 



More information about the Tarantool-patches mailing list