From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp37.i.mail.ru (smtp37.i.mail.ru [94.100.177.97]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 68697469711 for ; Sun, 7 Jun 2020 19:45:44 +0300 (MSK) References: From: Vladislav Shpilevoy Message-ID: <67c75c01-8503-2355-e1f7-9644def2179c@tarantool.org> Date: Sun, 7 Jun 2020 18:45:42 +0200 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit Subject: Re: [Tarantool-patches] [PATCH 2/2] feedback: collect db engines and index features List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Ilya Konyukhov , tarantool-patches@dev.tarantool.org Cc: alexander.turenko@tarantool.org Thanks for the patch! Generally, I don't like having so much Lua code in the daemon, and system space full scans. Because it is slow and produces Lua garbage. Also anyway it can't collect some internal things such as whether SQL is used (it is not exposed in any system spaces), popen, swim, etc. These things don't register self in any global place. I was rather thinking about keeping track of all these modules and their statistics in C. So as collection of the statistics would be right when it changes, in a set of int counters. And statistics dump would cost O(1) by time, right into a JSON string, without Lua participation except that it would call this C dumper and put its result into an http request. In other words, I am not sure this commit is needed at all, until we understand how to collect all the other features too. See 6 comments below. On 05/06/2020 10:35, Ilya Konyukhov wrote: > This patch adds basic db features to feedback report. > It collects info about what engine and which types of > indexes are setup by the user. > > Here is how report may look like if all the features used: > > ```json > { > "arch": "x64", > "features": { > "has_bitset_index": true, > "has_jsonpath_index": true, > "vinyl": true, > "has_tree_index": true, > "has_primary_index": true, > "has_hash_index": true, > "memtx": true, > "has_temporary_spaces": true, > "has_local_spaces": true, > "has_rtree_index": true, > "has_secondary_index": true, > "has_functional_index": true > }, > "server_id": "7c8490f7-61c5-4e12-a7ff-d9fed05ad8ac", > "is_docker": false, > "os": "OSX", > "feedback_type": "version", > "cluster_id": "1eb7d98e-3344-4f15-a439-c287464f09e7", > "tarantool_version": "2.5.0-90-g27fbe6ecd", > "feedback_version": 1 > } > ``` > > Part of #4943 > --- > src/box/lua/feedback_daemon.lua | 65 +++++++++++++++++++++++++++ > test/box-tap/feedback_daemon.test.lua | 42 ++++++++++++++++- > 2 files changed, 106 insertions(+), 1 deletion(-) > > diff --git a/src/box/lua/feedback_daemon.lua b/src/box/lua/feedback_daemon.lua > index 2ce49fb22..0fcd8ed87 100644 > --- a/src/box/lua/feedback_daemon.lua > +++ b/src/box/lua/feedback_daemon.lua > @@ -41,6 +41,15 @@ local function detect_docker_environment() > return true > end > > +local function is_system_space(sp) > + local sp_id = sp.id > + if box.schema.SYSTEM_ID_MIN <= sp_id and sp_id <= box.schema.SYSTEM_ID_MAX then > + return true > + end 1. Please, keep code lines inside 80 symbols border. Also this function return can be simplified to return box.schema.SYSTEM_ID_MIN <= sp_id and sp_id <= box.schema.SYSTEM_ID_MAX > + > + return false > +end > + > local function fill_in_base_info(feedback) > if box.info.status ~= "running" then > return nil, "not running" > @@ -56,9 +65,65 @@ local function fill_in_platform_info(feedback) > feedback.is_docker = detect_docker_environment() > end > > +local function fill_in_space_indices(feedback, sp) > + if not sp.index[0] then return end > + > + feedback.features.has_primary_index = true 2. What is a purpose of this field? Zero-index spaces always exist, at least because indexes are created in a separate DDL statement. Besides, the function and spaces iteration may be really heavy, if space count is thousands. Or even hundreds, but with many indexes. And there is no a yield. In addition to yields I ask you to add caching of this function results using schema version counter. Schema changes very rarely, so caching would make this function practically free almost always. > + local idx_count = 0 > + for _, idx in pairs(sp.index) do > + for _, part in pairs(idx.parts) do > + if part.path ~= nil then > + feedback.features.has_jsonpath_index = true > + break > + end > + end > + if idx.func ~= nil then > + feedback.features.has_functional_index = true > + end > + if idx.type == 'TREE' then > + feedback.features.has_tree_index = true > + elseif idx.type == 'HASH' then > + feedback.features.has_hash_index = true > + elseif idx.type == 'RTREE' then > + feedback.features.has_rtree_index = true > + elseif idx.type == 'BITSET' then > + feedback.features.has_bitset_index = true > + end > + idx_count = idx_count + 1 > + end > + > + if idx_count > 1 then > + feedback.features.has_secondary_index = true 3. This does not look really useful. What is this flag going to tell us? Secondary indexes exist almost always. Besides, I agree with Dmitry's comment about counters instead of flags. > + end > +end > + > +local function fill_in_features(feedback) > + feedback.features = feedback.features or {} > + > + local is_memtx, is_vinyl, is_temporary, is_local > + for _, sp in pairs(box.space) do > + local is_system = is_system_space(sp) > + if not is_system then > + if sp.engine == 'vinyl' then is_vinyl = true end > + if sp.engine == 'memtx' then > + if sp.temporary ~= nil then is_temporary = true end > + is_memtx = true > + end > + if sp.is_local ~= nil then is_local = true end > + fill_in_space_indices(feedback, sp) > + end > + end > + > + feedback.features.has_temporary_spaces = is_temporary > + feedback.features.has_local_spaces = is_local > + feedback.features.memtx = is_memtx > + feedback.features.vinyl = is_vinyl 4. Why do some flags have prefix 'has_', some have 'is_', and some are just nouns like 'memtx', 'vinyl'? Lets be consistent and use one name template. For that type of flags in C we would use 'has_'. > +end > diff --git a/test/box-tap/feedback_daemon.test.lua b/test/box-tap/feedback_daemon.test.lua > index c36b2a694..e382af8e8 100755 > --- a/test/box-tap/feedback_daemon.test.lua > +++ b/test/box-tap/feedback_daemon.test.lua > @@ -113,6 +113,46 @@ check("feedback after start") > daemon.send_test() > check("feedback after feedback send_test") > > +local feedback_json = json.decode(feedback_save) 5. When write a test for an issue, please, mention the issue in a comment and describe it shortly. Like this: -- -- gh-####: description. -- > +test:is(type(feedback_json.features), 'table', 'features field is present') > +test:isnil(next(feedback_json.features), 'features are empty at the moment') > + > +box.schema.create_space('features_vinyl', {engine = 'vinyl'}) > +box.schema.create_space('features_memtx', {engine = 'memtx', is_local = true, temporary = true}) > +box.space.features_memtx:create_index('vinyl_pk', {type = 'tree'}) > +box.space.features_memtx:create_index('memtx_pk', {type = 'hash'}) > +box.space.features_memtx:create_index('memtx_bitset', {type = 'bitset'}) > +box.space.features_memtx:create_index('memtx_rtree', {type = 'rtree', parts = {3, 'array'}}) > +box.space.features_memtx:create_index('memtx_jpath', > + {parts = {{field=4, type='str', path='data.name'}}}) 6. Please, be consistent in the code style. Surround '=' with whitespaces, add a whitespace after ',' (see your code below). > +box.schema.func.create('features_func', { > + body = "function(tuple) return {string.sub(tuple[2],1,1)} end", > + is_deterministic = true, > + is_sandboxed = true}) > +box.space.features_memtx:create_index('j', > + {parts={{field = 1, type = 'number'}},func = 'features_func'}) > + > +check('old feedback received') > +feedback_reset() > +check('feedback with db features received') > + > +feedback_json = json.decode(feedback_save) > +test:test('features', function(t) > + t:plan(12) > + t:ok(feedback_json.features.memtx, 'memtx engine usage gathered') > + t:ok(feedback_json.features.vinyl, 'vinyl engine usage gathered') > + t:ok(feedback_json.features.has_temporary_spaces, 'temporary space usage gathered') > + t:ok(feedback_json.features.has_local_spaces, 'local space usage gathered') > + t:ok(feedback_json.features.has_primary_index, 'primary index gathered') > + t:ok(feedback_json.features.has_secondary_index, 'secondary index gathered') > + t:ok(feedback_json.features.has_tree_index, 'tree index gathered') > + t:ok(feedback_json.features.has_hash_index, 'hash index gathered') > + t:ok(feedback_json.features.has_rtree_index, 'rtree index gathered') > + t:ok(feedback_json.features.has_bitset_index, 'bitset index gathered') > + t:ok(feedback_json.features.has_jsonpath_index, 'jsonpath index gathered') > + t:ok(feedback_json.features.has_functional_index, 'functional index gathered') > +end) > + > daemon.stop() > > box.feedback.save("feedback.json") >