From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v3.1 4/5] box: introduce offset slot cache in key_part Date: Thu, 20 Sep 2018 10:46:52 +0300 Message-Id: <8fc09d126fb788c858125396b85c88d2075a5ee4.1537429335.git.kshcherbatov@tarantool.org> In-Reply-To: References: In-Reply-To: References: To: tarantool-patches@freelists.org Cc: vdavydov.dev@gmail.com, Kirill Shcherbatov List-ID: Same key_part could be used in different formats multiple times, so different field->offset_slot would be allocated. In most scenarios we work with series of tuples of same format, and (in general) format lookup for field would be expensive operation for JSON-paths defined in key_part. New slot_cache field in key_part structure and epoch-based mechanism to validate it actuality should be effective approach to improve performance. Part of #1012. --- src/box/alter.cc | 7 +++++-- src/box/blackhole.c | 5 +++-- src/box/engine.h | 11 +++++----- src/box/key_def.c | 12 +++++++++-- src/box/key_def.h | 11 ++++++++++ src/box/memtx_engine.c | 4 ++-- src/box/memtx_space.c | 5 +++-- src/box/memtx_space.h | 2 +- src/box/schema.cc | 4 ++-- src/box/space.c | 4 ++-- src/box/space.h | 8 +++++--- src/box/sysview.c | 3 ++- src/box/tuple.c | 4 ++-- src/box/tuple_format.c | 22 +++++++++++++++----- src/box/tuple_format.h | 10 ++++++++- src/box/vinyl.c | 7 ++++--- src/box/vy_lsm.c | 45 +++++++++++++++++++++++++++++++++++++++-- test/unit/vy_iterators_helper.c | 6 +++--- test/unit/vy_mem.c | 2 +- test/unit/vy_point_lookup.c | 2 +- 20 files changed, 132 insertions(+), 42 deletions(-) diff --git a/src/box/alter.cc b/src/box/alter.cc index a6299a1..b90ab3e 100644 --- a/src/box/alter.cc +++ b/src/box/alter.cc @@ -883,7 +883,10 @@ alter_space_do(struct txn *txn, struct alter_space *alter) * Create a new (empty) space for the new definition. * Sic: the triggers are not moved over yet. */ - alter->new_space = space_new_xc(alter->space_def, &alter->key_list); + alter->new_space = + space_new_xc(alter->space_def, &alter->key_list, + alter->old_space->format != NULL ? + alter->old_space->format->epoch + 1 : 1); /* * Copy the replace function, the new space is at the same recovery * phase as the old one. This hack is especially necessary for @@ -1604,7 +1607,7 @@ on_replace_dd_space(struct trigger * /* trigger */, void *event) auto def_guard = make_scoped_guard([=] { space_def_delete(def); }); RLIST_HEAD(empty_list); - struct space *space = space_new_xc(def, &empty_list); + struct space *space = space_new_xc(def, &empty_list, 0); /** * The new space must be inserted in the space * cache right away to achieve linearisable diff --git a/src/box/blackhole.c b/src/box/blackhole.c index f979304..517daf9 100644 --- a/src/box/blackhole.c +++ b/src/box/blackhole.c @@ -135,7 +135,7 @@ blackhole_engine_shutdown(struct engine *engine) static struct space * blackhole_engine_create_space(struct engine *engine, struct space_def *def, - struct rlist *key_list) + struct rlist *key_list, uint64_t epoch) { if (!rlist_empty(key_list)) { diag_set(ClientError, ER_UNSUPPORTED, "Blackhole", "indexes"); @@ -152,7 +152,8 @@ blackhole_engine_create_space(struct engine *engine, struct space_def *def, /* Allocate tuples on runtime arena, but check space format. */ struct tuple_format *format; format = tuple_format_new(&tuple_format_runtime->vtab, NULL, 0, 0, - def->fields, def->field_count, def->dict); + def->fields, def->field_count, def->dict, + epoch); if (format == NULL) { free(space); return NULL; diff --git a/src/box/engine.h b/src/box/engine.h index 5b96c74..0e8c76c 100644 --- a/src/box/engine.h +++ b/src/box/engine.h @@ -72,7 +72,8 @@ struct engine_vtab { void (*shutdown)(struct engine *); /** Allocate a new space instance. */ struct space *(*create_space)(struct engine *engine, - struct space_def *def, struct rlist *key_list); + struct space_def *def, struct rlist *key_list, + uint64_t epoch); /** * Write statements stored in checkpoint @vclock to @stream. */ @@ -237,9 +238,9 @@ engine_find(const char *name) static inline struct space * engine_create_space(struct engine *engine, struct space_def *def, - struct rlist *key_list) + struct rlist *key_list, uint64_t epoch) { - return engine->vtab->create_space(engine, def, key_list); + return engine->vtab->create_space(engine, def, key_list, epoch); } static inline int @@ -390,9 +391,9 @@ engine_find_xc(const char *name) static inline struct space * engine_create_space_xc(struct engine *engine, struct space_def *def, - struct rlist *key_list) + struct rlist *key_list, uint64_t epoch) { - struct space *space = engine_create_space(engine, def, key_list); + struct space *space = engine_create_space(engine, def, key_list, epoch); if (space == NULL) diag_raise(); return space; diff --git a/src/box/key_def.c b/src/box/key_def.c index 5366eb7..216d858 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -316,6 +316,8 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, def->parts[part_no].type = type; def->parts[part_no].coll = coll; def->parts[part_no].coll_id = coll_id; + def->parts[part_no].offset_slot = TUPLE_OFFSET_SLOT_NIL; + def->parts[part_no].offset_slot_epoch = 0; if (path != NULL) { def->parts[part_no].path_len = path_len; assert(def->parts[part_no].path != NULL); @@ -774,9 +776,12 @@ key_def_merge(const struct key_def *first, const struct key_def *second) new_def->parts[pos].path = data; data += part->path_len + 1; } - key_def_set_part(new_def, pos++, part->fieldno, part->type, + key_def_set_part(new_def, pos, part->fieldno, part->type, part->is_nullable, part->coll, part->coll_id, part->path, part->path_len); + new_def->parts[pos].offset_slot_epoch = part->offset_slot_epoch; + new_def->parts[pos].offset_slot = part->offset_slot; + pos++; } /* Set-append second key def's part to the new key def. */ @@ -789,9 +794,12 @@ key_def_merge(const struct key_def *first, const struct key_def *second) new_def->parts[pos].path = data; data += part->path_len + 1; } - key_def_set_part(new_def, pos++, part->fieldno, part->type, + key_def_set_part(new_def, pos, part->fieldno, part->type, part->is_nullable, part->coll, part->coll_id, part->path, part->path_len); + new_def->parts[pos].offset_slot_epoch = part->offset_slot_epoch; + new_def->parts[pos].offset_slot = part->offset_slot; + pos++; } return new_def; } diff --git a/src/box/key_def.h b/src/box/key_def.h index d451258..66691c5 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -82,6 +82,17 @@ struct key_part { uint32_t path_len; /** JSON path hash. */ uint32_t path_hash; + /** + * Epoch of offset slot cache. Initialized with + * incremental epoch of format on caching it's field's + * offset_slot via tuple_field_by_part_raw to speed up + * access on subsequent calls with same format. + * Cache is expected to use "the newest format is most + * relevant" strategy. + */ + uint64_t offset_slot_epoch; + /** Cache with format's field offset slot. */ + int32_t offset_slot; }; struct key_def; diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c index 554d6f9..62c3c9a 100644 --- a/src/box/memtx_engine.c +++ b/src/box/memtx_engine.c @@ -358,10 +358,10 @@ memtx_engine_end_recovery(struct engine *engine) static struct space * memtx_engine_create_space(struct engine *engine, struct space_def *def, - struct rlist *key_list) + struct rlist *key_list, uint64_t epoch) { struct memtx_engine *memtx = (struct memtx_engine *)engine; - return memtx_space_new(memtx, def, key_list); + return memtx_space_new(memtx, def, key_list, epoch); } static int diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c index 08ae0da..3ac606f 100644 --- a/src/box/memtx_space.c +++ b/src/box/memtx_space.c @@ -884,7 +884,7 @@ static const struct space_vtab memtx_space_vtab = { struct space * memtx_space_new(struct memtx_engine *memtx, - struct space_def *def, struct rlist *key_list) + struct space_def *def, struct rlist *key_list, uint64_t epoch) { struct memtx_space *memtx_space = malloc(sizeof(*memtx_space)); if (memtx_space == NULL) { @@ -910,7 +910,8 @@ memtx_space_new(struct memtx_engine *memtx, struct tuple_format *format = tuple_format_new(&memtx_tuple_format_vtab, keys, key_count, 0, - def->fields, def->field_count, def->dict); + def->fields, def->field_count, def->dict, + epoch); if (format == NULL) { free(memtx_space); return NULL; diff --git a/src/box/memtx_space.h b/src/box/memtx_space.h index 7dc3410..b5bec0c 100644 --- a/src/box/memtx_space.h +++ b/src/box/memtx_space.h @@ -79,7 +79,7 @@ memtx_space_replace_all_keys(struct space *, struct tuple *, struct tuple *, struct space * memtx_space_new(struct memtx_engine *memtx, - struct space_def *def, struct rlist *key_list); + struct space_def *def, struct rlist *key_list, uint64_t epoch); #if defined(__cplusplus) } /* extern "C" */ diff --git a/src/box/schema.cc b/src/box/schema.cc index 2411c8c..cf7cf35 100644 --- a/src/box/schema.cc +++ b/src/box/schema.cc @@ -214,7 +214,7 @@ sc_space_new(uint32_t id, const char *name, struct key_def *key_def, struct rlist key_list; rlist_create(&key_list); rlist_add_entry(&key_list, index_def, link); - struct space *space = space_new_xc(def, &key_list); + struct space *space = space_new_xc(def, &key_list, 0); (void) space_cache_replace(space); if (replace_trigger) trigger_add(&space->on_replace, replace_trigger); @@ -380,7 +380,7 @@ schema_init() space_def_delete(def); }); RLIST_HEAD(key_list); - struct space *space = space_new_xc(def, &key_list); + struct space *space = space_new_xc(def, &key_list, 0); space_cache_replace(space); init_system_space(space); trigger_run_xc(&on_alter_space, space); diff --git a/src/box/space.c b/src/box/space.c index 871cc67..2e4df74 100644 --- a/src/box/space.c +++ b/src/box/space.c @@ -181,12 +181,12 @@ fail: } struct space * -space_new(struct space_def *def, struct rlist *key_list) +space_new(struct space_def *def, struct rlist *key_list, uint64_t epoch) { struct engine *engine = engine_find(def->engine_name); if (engine == NULL) return NULL; - return engine_create_space(engine, def, key_list); + return engine_create_space(engine, def, key_list, epoch); } void diff --git a/src/box/space.h b/src/box/space.h index 8888ec8..068ea4b 100644 --- a/src/box/space.h +++ b/src/box/space.h @@ -378,10 +378,11 @@ struct field_def; * Allocate and initialize a space. * @param space_def Space definition. * @param key_list List of index_defs. + * @param epoch Last epoch to initialize format. * @retval Space object. */ struct space * -space_new(struct space_def *space_def, struct rlist *key_list); +space_new(struct space_def *space_def, struct rlist *key_list, uint64_t epoch); /** Destroy and free a space. */ void @@ -416,9 +417,10 @@ int generic_space_prepare_alter(struct space *, struct space *); } /* extern "C" */ static inline struct space * -space_new_xc(struct space_def *space_def, struct rlist *key_list) +space_new_xc(struct space_def *space_def, struct rlist *key_list, + uint64_t epoch) { - struct space *space = space_new(space_def, key_list); + struct space *space = space_new(space_def, key_list, epoch); if (space == NULL) diag_raise(); return space; diff --git a/src/box/sysview.c b/src/box/sysview.c index a636c68..d35ff71 100644 --- a/src/box/sysview.c +++ b/src/box/sysview.c @@ -504,8 +504,9 @@ sysview_engine_shutdown(struct engine *engine) static struct space * sysview_engine_create_space(struct engine *engine, struct space_def *def, - struct rlist *key_list) + struct rlist *key_list, uint64_t epoch) { + (void)epoch; struct space *space = (struct space *)calloc(1, sizeof(*space)); if (space == NULL) { diag_set(OutOfMemory, sizeof(*space), diff --git a/src/box/tuple.c b/src/box/tuple.c index 83bdad1..1d27c63 100644 --- a/src/box/tuple.c +++ b/src/box/tuple.c @@ -231,7 +231,7 @@ tuple_init(field_name_hash_f hash) * Create a format for runtime tuples */ tuple_format_runtime = tuple_format_new(&tuple_format_runtime_vtab, - NULL, 0, 0, NULL, 0, NULL); + NULL, 0, 0, NULL, 0, NULL, 0); if (tuple_format_runtime == NULL) return -1; @@ -403,7 +403,7 @@ box_tuple_format_new(struct key_def **keys, uint16_t key_count) { box_tuple_format_t *format = tuple_format_new(&tuple_format_runtime_vtab, - keys, key_count, 0, NULL, 0, NULL); + keys, key_count, 0, NULL, 0, NULL, 0); if (format != NULL) tuple_format_ref(format); return format; diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index fa8b938..e750129 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -753,6 +753,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, format->dict = dict; tuple_dictionary_ref(dict); } + format->epoch = 0; format->refs = 0; format->id = FORMAT_ID_NIL; format->field_count = field_count; @@ -789,7 +790,8 @@ struct tuple_format * tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys, uint16_t key_count, uint16_t extra_size, const struct field_def *space_fields, - uint32_t space_field_count, struct tuple_dictionary *dict) + uint32_t space_field_count, struct tuple_dictionary *dict, + uint64_t epoch) { struct tuple_format *format = tuple_format_alloc(keys, key_count, space_field_count, dict); @@ -799,6 +801,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys, format->engine = NULL; format->extra_size = extra_size; format->is_temporary = false; + format->epoch = epoch; if (tuple_format_register(format) < 0) { tuple_format_destroy(format); free(format); @@ -1095,13 +1098,22 @@ tuple_field_by_part_raw(const struct tuple_format *format, const char *data, struct mh_strnptr_node_t *ht_record = NULL; int32_t offset_slot; - if (format->path_hash != NULL && - (ht_record = json_path_hash_get(format->path_hash, part->path, - part->path_len, - part->path_hash)) != NULL) { + if (likely(part->offset_slot_epoch == format->epoch)) { + assert(format->epoch != 0); + offset_slot = part->offset_slot; + } else if (format->path_hash != NULL && + (ht_record = json_path_hash_get(format->path_hash, + part->path, part->path_len, + part->path_hash)) != NULL) { struct tuple_field *field = ht_record->val; assert(field != NULL); offset_slot = field->offset_slot; + /* Cache offset_slot if required. */ + if (part->offset_slot_epoch < format->epoch) { + assert(format->epoch != 0); + part->offset_slot = offset_slot; + part->offset_slot_epoch = format->epoch; + } } else { /* * Legacy tuple having no field map for diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h index df500db..b347739 100644 --- a/src/box/tuple_format.h +++ b/src/box/tuple_format.h @@ -132,6 +132,12 @@ struct tuple_field { * Tuple format describes how tuple is stored and information about its fields */ struct tuple_format { + /** + * Counter that grows incrementally on space rebuild if + * format has other distribution of offset slots comparing + * with previous one. + */ + uint64_t epoch; /** Virtual function table */ struct tuple_format_vtab vtab; /** Pointer to engine-specific data. */ @@ -226,6 +232,7 @@ tuple_format_unref(struct tuple_format *format) * @param extra_size Extra bytes to reserve in tuples metadata. * @param space_fields Array of fields, defined in a space format. * @param space_field_count Length of @a space_fields. + * @param epoch Epoch of new format. * * @retval not NULL Tuple format. * @retval NULL Memory error. @@ -234,7 +241,8 @@ struct tuple_format * tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys, uint16_t key_count, uint16_t extra_size, const struct field_def *space_fields, - uint32_t space_field_count, struct tuple_dictionary *dict); + uint32_t space_field_count, struct tuple_dictionary *dict, + uint64_t epoch); /** * Check, if @a format1 can store any tuples of @a format2. For diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 073466f..fb9e0e0 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -584,7 +584,7 @@ vinyl_engine_check_space_def(struct space_def *def) static struct space * vinyl_engine_create_space(struct engine *engine, struct space_def *def, - struct rlist *key_list) + struct rlist *key_list, uint64_t epoch) { struct space *space = malloc(sizeof(*space)); if (space == NULL) { @@ -610,7 +610,8 @@ vinyl_engine_create_space(struct engine *engine, struct space_def *def, struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, keys, key_count, 0, - def->fields, def->field_count, def->dict); + def->fields, def->field_count, def->dict, + epoch); if (format == NULL) { free(space); return NULL; @@ -3030,7 +3031,7 @@ vy_send_lsm(struct vy_join_ctx *ctx, struct vy_lsm_recovery_info *lsm_info) if (ctx->key_def == NULL) goto out; ctx->format = tuple_format_new(&vy_tuple_format_vtab, &ctx->key_def, - 1, 0, NULL, 0, NULL); + 1, 0, NULL, 0, NULL, 0); if (ctx->format == NULL) goto out_free_key_def; tuple_format_ref(ctx->format); diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c index b55933f..e0ce3ec 100644 --- a/src/box/vy_lsm.c +++ b/src/box/vy_lsm.c @@ -62,7 +62,7 @@ vy_lsm_env_create(struct vy_lsm_env *env, const char *path, void *upsert_thresh_arg) { env->key_format = tuple_format_new(&vy_tuple_format_vtab, - NULL, 0, 0, NULL, 0, NULL); + NULL, 0, 0, NULL, 0, NULL, 0); if (env->key_format == NULL) return -1; tuple_format_ref(env->key_format); @@ -156,9 +156,50 @@ vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_cache_env *cache_env, } else { lsm->disk_format = tuple_format_new(&vy_tuple_format_vtab, &cmp_def, 1, 0, NULL, 0, - NULL); + NULL, format->epoch); if (lsm->disk_format == NULL) goto fail_format; + /* + * Tuple formats should be compatible to make + * epoch-based caching work. + */ + int32_t min_offset_slot = 0; + struct tuple_field *dst_fields = lsm->disk_format->fields; + struct mh_strnptr_t *dst_ht = lsm->disk_format->path_hash; + struct mh_strnptr_t *src_ht = format->path_hash; + struct key_part *part = cmp_def->parts; + struct key_part *part_end = part + cmp_def->part_count; + for (; part < part_end; part++) { + struct tuple_field *dst_field = + &dst_fields[part->fieldno]; + struct tuple_field *src_field; + if (dst_field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { + src_field = &format->fields[part->fieldno]; + } else if (part->path != NULL) { + struct mh_strnptr_node_t *ht_record; + ht_record = json_path_hash_get(dst_ht, + part->path, + part->path_len, + part->path_hash); + assert(ht_record != NULL); + dst_field = ht_record->val; + assert(dst_field != NULL); + ht_record = json_path_hash_get(src_ht, + part->path, + part->path_len, + part->path_hash); + assert(ht_record != NULL); + src_field = ht_record->val; + assert(src_field != NULL); + } else { + continue; + } + if (src_field->offset_slot == TUPLE_OFFSET_SLOT_NIL) + continue; + dst_field->offset_slot = src_field->offset_slot; + min_offset_slot = MIN(src_field->offset_slot, min_offset_slot); + } + lsm->disk_format->field_map_size = -min_offset_slot * sizeof(uint32_t); } tuple_format_ref(lsm->disk_format); diff --git a/test/unit/vy_iterators_helper.c b/test/unit/vy_iterators_helper.c index 40d8d6a..bfef6a2 100644 --- a/test/unit/vy_iterators_helper.c +++ b/test/unit/vy_iterators_helper.c @@ -22,7 +22,7 @@ vy_iterator_C_test_init(size_t cache_size) vy_cache_env_create(&cache_env, cord_slab_cache()); vy_cache_env_set_quota(&cache_env, cache_size); vy_key_format = tuple_format_new(&vy_tuple_format_vtab, NULL, 0, 0, - NULL, 0, NULL); + NULL, 0, NULL, 0); tuple_format_ref(vy_key_format); size_t mem_size = 64 * 1024 * 1024; @@ -210,7 +210,7 @@ create_test_mem(struct key_def *def) struct key_def * const defs[] = { def }; struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, defs, def->part_count, - 0, NULL, 0, NULL); + 0, NULL, 0, NULL, 0); fail_if(format == NULL); /* Create format with column mask */ @@ -234,7 +234,7 @@ create_test_cache(uint32_t *fields, uint32_t *types, assert(*def != NULL); vy_cache_create(cache, &cache_env, *def, true); *format = tuple_format_new(&vy_tuple_format_vtab, def, 1, 0, NULL, 0, - NULL); + NULL, 0); tuple_format_ref(*format); } diff --git a/test/unit/vy_mem.c b/test/unit/vy_mem.c index 967aabe..d797b3d 100644 --- a/test/unit/vy_mem.c +++ b/test/unit/vy_mem.c @@ -78,7 +78,7 @@ test_iterator_restore_after_insertion() /* Create format */ struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, &key_def, 1, 0, NULL, 0, - NULL); + NULL, 0); assert(format != NULL); tuple_format_ref(format); diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c index 86877d7..f5756ca 100644 --- a/test/unit/vy_point_lookup.c +++ b/test/unit/vy_point_lookup.c @@ -85,7 +85,7 @@ test_basic() vy_cache_create(&cache, &cache_env, key_def, true); struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, &key_def, 1, 0, NULL, 0, - NULL); + NULL, 0); isnt(format, NULL, "tuple_format_new is not NULL"); tuple_format_ref(format); -- 2.7.4