From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v4 13/14] box: introduce offset slot cache in key_part Date: Thu, 11 Oct 2018 10:58:45 +0300 Message-Id: <7d56730aa9cf2c25f6a20e1a8838ea6f9ff6b172.1539244271.git.kshcherbatov@tarantool.org> In-Reply-To: References: In-Reply-To: References: To: tarantool-patches@freelists.org Cc: vdavydov.dev@gmail.com, Kirill Shcherbatov List-ID: Same key_part could be used in different formats multiple times, so different field->offset_slot would be allocated. In most scenarios we work with series of tuples of same format, and (in general) format lookup for field would be expensive operation for JSON-paths defined in key_part. New slot_cache field in key_part structure and epoch-based mechanism to validate it actuality should be effective approach to improve performance. Part of #1012. --- src/box/alter.cc | 7 +++-- src/box/blackhole.c | 5 ++-- src/box/engine.h | 11 ++++---- src/box/key_def.c | 12 +++++++-- src/box/key_def.h | 11 ++++++++ src/box/memtx_engine.c | 4 +-- src/box/memtx_space.c | 5 ++-- src/box/memtx_space.h | 2 +- src/box/schema.cc | 4 +-- src/box/space.c | 4 +-- src/box/space.h | 8 +++--- src/box/sysview.c | 3 ++- src/box/tuple.c | 4 +-- src/box/tuple_format.c | 60 ++++++++++++++++++++++++++--------------- src/box/tuple_format.h | 6 ++++- src/box/vinyl.c | 7 ++--- src/box/vy_lsm.c | 40 +++++++++++++++++++++++++-- test/unit/vy_iterators_helper.c | 6 ++--- test/unit/vy_mem.c | 2 +- test/unit/vy_point_lookup.c | 2 +- 20 files changed, 145 insertions(+), 58 deletions(-) diff --git a/src/box/alter.cc b/src/box/alter.cc index 4ac44c2..ded5f5b 100644 --- a/src/box/alter.cc +++ b/src/box/alter.cc @@ -883,7 +883,10 @@ alter_space_do(struct txn *txn, struct alter_space *alter) * Create a new (empty) space for the new definition. * Sic: the triggers are not moved over yet. */ - alter->new_space = space_new_xc(alter->space_def, &alter->key_list); + alter->new_space = + space_new_xc(alter->space_def, &alter->key_list, + alter->old_space->format != NULL ? + alter->old_space->format->epoch + 1 : 1); /* * Copy the replace function, the new space is at the same recovery * phase as the old one. This hack is especially necessary for @@ -1604,7 +1607,7 @@ on_replace_dd_space(struct trigger * /* trigger */, void *event) auto def_guard = make_scoped_guard([=] { space_def_delete(def); }); RLIST_HEAD(empty_list); - struct space *space = space_new_xc(def, &empty_list); + struct space *space = space_new_xc(def, &empty_list, 0); /** * The new space must be inserted in the space * cache right away to achieve linearisable diff --git a/src/box/blackhole.c b/src/box/blackhole.c index f979304..517daf9 100644 --- a/src/box/blackhole.c +++ b/src/box/blackhole.c @@ -135,7 +135,7 @@ blackhole_engine_shutdown(struct engine *engine) static struct space * blackhole_engine_create_space(struct engine *engine, struct space_def *def, - struct rlist *key_list) + struct rlist *key_list, uint64_t epoch) { if (!rlist_empty(key_list)) { diag_set(ClientError, ER_UNSUPPORTED, "Blackhole", "indexes"); @@ -152,7 +152,8 @@ blackhole_engine_create_space(struct engine *engine, struct space_def *def, /* Allocate tuples on runtime arena, but check space format. */ struct tuple_format *format; format = tuple_format_new(&tuple_format_runtime->vtab, NULL, 0, 0, - def->fields, def->field_count, def->dict); + def->fields, def->field_count, def->dict, + epoch); if (format == NULL) { free(space); return NULL; diff --git a/src/box/engine.h b/src/box/engine.h index 5b96c74..0e8c76c 100644 --- a/src/box/engine.h +++ b/src/box/engine.h @@ -72,7 +72,8 @@ struct engine_vtab { void (*shutdown)(struct engine *); /** Allocate a new space instance. */ struct space *(*create_space)(struct engine *engine, - struct space_def *def, struct rlist *key_list); + struct space_def *def, struct rlist *key_list, + uint64_t epoch); /** * Write statements stored in checkpoint @vclock to @stream. */ @@ -237,9 +238,9 @@ engine_find(const char *name) static inline struct space * engine_create_space(struct engine *engine, struct space_def *def, - struct rlist *key_list) + struct rlist *key_list, uint64_t epoch) { - return engine->vtab->create_space(engine, def, key_list); + return engine->vtab->create_space(engine, def, key_list, epoch); } static inline int @@ -390,9 +391,9 @@ engine_find_xc(const char *name) static inline struct space * engine_create_space_xc(struct engine *engine, struct space_def *def, - struct rlist *key_list) + struct rlist *key_list, uint64_t epoch) { - struct space *space = engine_create_space(engine, def, key_list); + struct space *space = engine_create_space(engine, def, key_list, epoch); if (space == NULL) diag_raise(); return space; diff --git a/src/box/key_def.c b/src/box/key_def.c index 1b32387..f0f0313 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -176,6 +176,8 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, def->parts[part_no].type = type; def->parts[part_no].coll = coll; def->parts[part_no].coll_id = coll_id; + def->parts[part_no].offset_slot = TUPLE_OFFSET_SLOT_NIL; + def->parts[part_no].offset_slot_epoch = 0; if (path != NULL) { def->parts[part_no].path_len = path_len; assert(def->parts[part_no].path != NULL); @@ -680,9 +682,12 @@ key_def_merge(const struct key_def *first, const struct key_def *second) new_def->parts[pos].path = data; data += part->path_len + 1; } - key_def_set_part(new_def, pos++, part->fieldno, part->type, + key_def_set_part(new_def, pos, part->fieldno, part->type, part->is_nullable, part->coll, part->coll_id, part->path, part->path_len); + new_def->parts[pos].offset_slot_epoch = part->offset_slot_epoch; + new_def->parts[pos].offset_slot = part->offset_slot; + pos++; } /* Set-append second key def's part to the new key def. */ @@ -695,9 +700,12 @@ key_def_merge(const struct key_def *first, const struct key_def *second) new_def->parts[pos].path = data; data += part->path_len + 1; } - key_def_set_part(new_def, pos++, part->fieldno, part->type, + key_def_set_part(new_def, pos, part->fieldno, part->type, part->is_nullable, part->coll, part->coll_id, part->path, part->path_len); + new_def->parts[pos].offset_slot_epoch = part->offset_slot_epoch; + new_def->parts[pos].offset_slot = part->offset_slot; + pos++; } key_def_set_cmp(new_def); return new_def; diff --git a/src/box/key_def.h b/src/box/key_def.h index ca73015..8271cee 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -85,6 +85,17 @@ struct key_part { char *path; /** The length of JSON path. */ uint32_t path_len; + /** + * Epoch of offset slot cache. Initialized with + * incremental epoch of format on caching it's field's + * offset_slot via tuple_field_by_part_raw to speed up + * access on subsequent calls with same format. + * Cache is expected to use "the newest format is most + * relevant" strategy. + */ + uint64_t offset_slot_epoch; + /** Cache with format's field offset slot. */ + int32_t offset_slot; }; struct key_def; diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c index a35f5e7..6051cc7 100644 --- a/src/box/memtx_engine.c +++ b/src/box/memtx_engine.c @@ -358,10 +358,10 @@ memtx_engine_end_recovery(struct engine *engine) static struct space * memtx_engine_create_space(struct engine *engine, struct space_def *def, - struct rlist *key_list) + struct rlist *key_list, uint64_t epoch) { struct memtx_engine *memtx = (struct memtx_engine *)engine; - return memtx_space_new(memtx, def, key_list); + return memtx_space_new(memtx, def, key_list, epoch); } static int diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c index 08ae0da..3ac606f 100644 --- a/src/box/memtx_space.c +++ b/src/box/memtx_space.c @@ -884,7 +884,7 @@ static const struct space_vtab memtx_space_vtab = { struct space * memtx_space_new(struct memtx_engine *memtx, - struct space_def *def, struct rlist *key_list) + struct space_def *def, struct rlist *key_list, uint64_t epoch) { struct memtx_space *memtx_space = malloc(sizeof(*memtx_space)); if (memtx_space == NULL) { @@ -910,7 +910,8 @@ memtx_space_new(struct memtx_engine *memtx, struct tuple_format *format = tuple_format_new(&memtx_tuple_format_vtab, keys, key_count, 0, - def->fields, def->field_count, def->dict); + def->fields, def->field_count, def->dict, + epoch); if (format == NULL) { free(memtx_space); return NULL; diff --git a/src/box/memtx_space.h b/src/box/memtx_space.h index 7dc3410..b5bec0c 100644 --- a/src/box/memtx_space.h +++ b/src/box/memtx_space.h @@ -79,7 +79,7 @@ memtx_space_replace_all_keys(struct space *, struct tuple *, struct tuple *, struct space * memtx_space_new(struct memtx_engine *memtx, - struct space_def *def, struct rlist *key_list); + struct space_def *def, struct rlist *key_list, uint64_t epoch); #if defined(__cplusplus) } /* extern "C" */ diff --git a/src/box/schema.cc b/src/box/schema.cc index 67dbd8c..e5763fa 100644 --- a/src/box/schema.cc +++ b/src/box/schema.cc @@ -221,7 +221,7 @@ sc_space_new(uint32_t id, const char *name, struct rlist key_list; rlist_create(&key_list); rlist_add_entry(&key_list, index_def, link); - struct space *space = space_new_xc(def, &key_list); + struct space *space = space_new_xc(def, &key_list, 0); (void) space_cache_replace(space); if (replace_trigger) trigger_add(&space->on_replace, replace_trigger); @@ -380,7 +380,7 @@ schema_init() space_def_delete(def); }); RLIST_HEAD(key_list); - struct space *space = space_new_xc(def, &key_list); + struct space *space = space_new_xc(def, &key_list, 0); space_cache_replace(space); init_system_space(space); trigger_run_xc(&on_alter_space, space); diff --git a/src/box/space.c b/src/box/space.c index 871cc67..2e4df74 100644 --- a/src/box/space.c +++ b/src/box/space.c @@ -181,12 +181,12 @@ fail: } struct space * -space_new(struct space_def *def, struct rlist *key_list) +space_new(struct space_def *def, struct rlist *key_list, uint64_t epoch) { struct engine *engine = engine_find(def->engine_name); if (engine == NULL) return NULL; - return engine_create_space(engine, def, key_list); + return engine_create_space(engine, def, key_list, epoch); } void diff --git a/src/box/space.h b/src/box/space.h index 8888ec8..068ea4b 100644 --- a/src/box/space.h +++ b/src/box/space.h @@ -378,10 +378,11 @@ struct field_def; * Allocate and initialize a space. * @param space_def Space definition. * @param key_list List of index_defs. + * @param epoch Last epoch to initialize format. * @retval Space object. */ struct space * -space_new(struct space_def *space_def, struct rlist *key_list); +space_new(struct space_def *space_def, struct rlist *key_list, uint64_t epoch); /** Destroy and free a space. */ void @@ -416,9 +417,10 @@ int generic_space_prepare_alter(struct space *, struct space *); } /* extern "C" */ static inline struct space * -space_new_xc(struct space_def *space_def, struct rlist *key_list) +space_new_xc(struct space_def *space_def, struct rlist *key_list, + uint64_t epoch) { - struct space *space = space_new(space_def, key_list); + struct space *space = space_new(space_def, key_list, epoch); if (space == NULL) diag_raise(); return space; diff --git a/src/box/sysview.c b/src/box/sysview.c index a636c68..d35ff71 100644 --- a/src/box/sysview.c +++ b/src/box/sysview.c @@ -504,8 +504,9 @@ sysview_engine_shutdown(struct engine *engine) static struct space * sysview_engine_create_space(struct engine *engine, struct space_def *def, - struct rlist *key_list) + struct rlist *key_list, uint64_t epoch) { + (void)epoch; struct space *space = (struct space *)calloc(1, sizeof(*space)); if (space == NULL) { diag_set(OutOfMemory, sizeof(*space), diff --git a/src/box/tuple.c b/src/box/tuple.c index 2bc414f..6fbd9aa 100644 --- a/src/box/tuple.c +++ b/src/box/tuple.c @@ -234,7 +234,7 @@ tuple_init(field_name_hash_f hash) * Create a format for runtime tuples */ tuple_format_runtime = tuple_format_new(&tuple_format_runtime_vtab, - NULL, 0, 0, NULL, 0, NULL); + NULL, 0, 0, NULL, 0, NULL, 0); if (tuple_format_runtime == NULL) return -1; @@ -406,7 +406,7 @@ box_tuple_format_new(struct key_def **keys, uint16_t key_count) { box_tuple_format_t *format = tuple_format_new(&tuple_format_runtime_vtab, - keys, key_count, 0, NULL, 0, NULL); + keys, key_count, 0, NULL, 0, NULL, 0); if (format != NULL) tuple_format_ref(format); return format; diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 4738eb9..bdd802a 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -502,6 +502,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, } for (uint32_t i = 0; i < field_count; i++) tuple_field_create(&format->fields[i], NULL, 0); + format->epoch = 0; format->max_path_tree_depth = 1; format->allocation_size = total; format->refs = 0; @@ -545,7 +546,8 @@ struct tuple_format * tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys, uint16_t key_count, uint16_t extra_size, const struct field_def *space_fields, - uint32_t space_field_count, struct tuple_dictionary *dict) + uint32_t space_field_count, struct tuple_dictionary *dict, + uint64_t epoch) { struct tuple_format *format = tuple_format_alloc(keys, key_count, space_field_count, dict); @@ -555,6 +557,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys, format->engine = NULL; format->extra_size = extra_size; format->is_temporary = false; + format->epoch = epoch; if (tuple_format_register(format) < 0) { tuple_format_destroy(format); free(format); @@ -1050,27 +1053,42 @@ tuple_field_by_part_raw(const struct tuple_format *format, const char *data, if (likely(part->path == NULL)) return tuple_field_raw(format, data, field_map, part->fieldno); - struct tuple_field *root_field = - unlikely(part->fieldno < format->field_count) ? - (struct tuple_field *)&format->fields[part->fieldno] : NULL; - struct tuple_field *field = - unlikely(root_field == NULL) ? NULL: - tuple_field_tree_lookup(root_field, part->path, part->path_len); - if (unlikely(field == NULL)) { - /* - * Legacy tuple having no field map for JSON - * index require full path parse. - */ - const char *field_raw = - tuple_field_raw(format, data, field_map, part->fieldno); - if (unlikely(field_raw == NULL)) - return NULL; - if (tuple_field_by_relative_path(&field_raw, part->path, - part->path_len) != 0) - return NULL; - return field_raw; + int32_t offset_slot; + if (likely(part->offset_slot_epoch == format->epoch)) { + assert(format->epoch != 0); + offset_slot = part->offset_slot; + } else { + struct tuple_field *root_field = + unlikely(part->fieldno < format->field_count) ? + (struct tuple_field *)&format->fields[part->fieldno] : + NULL; + struct tuple_field *field = + unlikely(root_field == NULL) ? NULL: + tuple_field_tree_lookup(root_field, part->path, + part->path_len); + if (unlikely(field == NULL)) { + /* + * Legacy tuple having no field map for JSON + * index require full path parse. + */ + const char *field_raw = + tuple_field_raw(format, data, field_map, + part->fieldno); + if (unlikely(field_raw == NULL)) + return NULL; + if (tuple_field_by_relative_path(&field_raw, part->path, + part->path_len) != 0) + return NULL; + return field_raw; + } + offset_slot = field->offset_slot; + /* Cache offset_slot if required. */ + if (part->offset_slot_epoch < format->epoch) { + assert(format->epoch != 0); + part->offset_slot = offset_slot; + part->offset_slot_epoch = format->epoch; + } } - int32_t offset_slot = field->offset_slot; assert(offset_slot < 0); assert(-offset_slot * sizeof(uint32_t) <= format->field_map_size); if (unlikely(field_map[offset_slot] == 0)) diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h index 599e768..005f70b 100644 --- a/src/box/tuple_format.h +++ b/src/box/tuple_format.h @@ -120,6 +120,8 @@ struct tuple_field { * Tuple format describes how tuple is stored and information about its fields */ struct tuple_format { + /** Counter that grows incrementally on space rebuild. */ + uint64_t epoch; /** Virtual function table */ struct tuple_format_vtab vtab; /** Pointer to engine-specific data. */ @@ -220,6 +222,7 @@ tuple_format_unref(struct tuple_format *format) * @param extra_size Extra bytes to reserve in tuples metadata. * @param space_fields Array of fields, defined in a space format. * @param space_field_count Length of @a space_fields. + * @param epoch Epoch of new format. * * @retval not NULL Tuple format. * @retval NULL Memory error. @@ -228,7 +231,8 @@ struct tuple_format * tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys, uint16_t key_count, uint16_t extra_size, const struct field_def *space_fields, - uint32_t space_field_count, struct tuple_dictionary *dict); + uint32_t space_field_count, struct tuple_dictionary *dict, + uint64_t epoch); /** * Check, if @a format1 can store any tuples of @a format2. For diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 0a6ead9..807cd9e 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -578,7 +578,7 @@ vinyl_engine_check_space_def(struct space_def *def) static struct space * vinyl_engine_create_space(struct engine *engine, struct space_def *def, - struct rlist *key_list) + struct rlist *key_list, uint64_t epoch) { struct space *space = malloc(sizeof(*space)); if (space == NULL) { @@ -604,7 +604,8 @@ vinyl_engine_create_space(struct engine *engine, struct space_def *def, struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, keys, key_count, 0, - def->fields, def->field_count, def->dict); + def->fields, def->field_count, def->dict, + epoch); if (format == NULL) { free(space); return NULL; @@ -3025,7 +3026,7 @@ vy_send_lsm(struct vy_join_ctx *ctx, struct vy_lsm_recovery_info *lsm_info) if (ctx->key_def == NULL) goto out; ctx->format = tuple_format_new(&vy_tuple_format_vtab, &ctx->key_def, - 1, 0, NULL, 0, NULL); + 1, 0, NULL, 0, NULL, 0); if (ctx->format == NULL) goto out_free_key_def; tuple_format_ref(ctx->format); diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c index de5ad31..c549f4b 100644 --- a/src/box/vy_lsm.c +++ b/src/box/vy_lsm.c @@ -61,7 +61,7 @@ vy_lsm_env_create(struct vy_lsm_env *env, const char *path, void *upsert_thresh_arg) { env->key_format = tuple_format_new(&vy_tuple_format_vtab, - NULL, 0, 0, NULL, 0, NULL); + NULL, 0, 0, NULL, 0, NULL, 0); if (env->key_format == NULL) return -1; tuple_format_ref(env->key_format); @@ -155,9 +155,45 @@ vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_cache_env *cache_env, } else { lsm->disk_format = tuple_format_new(&vy_tuple_format_vtab, &cmp_def, 1, 0, NULL, 0, - NULL); + NULL, format->epoch); if (lsm->disk_format == NULL) goto fail_format; + /* + * Tuple formats should be compatible to make + * epoch-based caching work. + */ + int32_t min_offset_slot = 0; + struct tuple_field *dst_fields = lsm->disk_format->fields; + struct tuple_field *src_fields = format->fields; + struct key_part *part = cmp_def->parts; + struct key_part *part_end = part + cmp_def->part_count; + for (; part < part_end; part++) { + struct tuple_field *dst_field = + &dst_fields[part->fieldno]; + struct tuple_field *src_field = + &src_fields[part->fieldno]; + if (dst_field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { + src_field = &format->fields[part->fieldno]; + } else if (part->path != NULL) { + src_field = + tuple_field_tree_lookup(src_field, + part->path, + part->path_len); + assert(src_field != NULL); + dst_field = + tuple_field_tree_lookup(dst_field, + part->path, + part->path_len); + assert(dst_field != NULL); + } else { + continue; + } + if (src_field->offset_slot == TUPLE_OFFSET_SLOT_NIL) + continue; + dst_field->offset_slot = src_field->offset_slot; + min_offset_slot = MIN(src_field->offset_slot, min_offset_slot); + } + lsm->disk_format->field_map_size = -min_offset_slot * sizeof(uint32_t); } tuple_format_ref(lsm->disk_format); diff --git a/test/unit/vy_iterators_helper.c b/test/unit/vy_iterators_helper.c index 40d8d6a..bfef6a2 100644 --- a/test/unit/vy_iterators_helper.c +++ b/test/unit/vy_iterators_helper.c @@ -22,7 +22,7 @@ vy_iterator_C_test_init(size_t cache_size) vy_cache_env_create(&cache_env, cord_slab_cache()); vy_cache_env_set_quota(&cache_env, cache_size); vy_key_format = tuple_format_new(&vy_tuple_format_vtab, NULL, 0, 0, - NULL, 0, NULL); + NULL, 0, NULL, 0); tuple_format_ref(vy_key_format); size_t mem_size = 64 * 1024 * 1024; @@ -210,7 +210,7 @@ create_test_mem(struct key_def *def) struct key_def * const defs[] = { def }; struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, defs, def->part_count, - 0, NULL, 0, NULL); + 0, NULL, 0, NULL, 0); fail_if(format == NULL); /* Create format with column mask */ @@ -234,7 +234,7 @@ create_test_cache(uint32_t *fields, uint32_t *types, assert(*def != NULL); vy_cache_create(cache, &cache_env, *def, true); *format = tuple_format_new(&vy_tuple_format_vtab, def, 1, 0, NULL, 0, - NULL); + NULL, 0); tuple_format_ref(*format); } diff --git a/test/unit/vy_mem.c b/test/unit/vy_mem.c index 967aabe..d797b3d 100644 --- a/test/unit/vy_mem.c +++ b/test/unit/vy_mem.c @@ -78,7 +78,7 @@ test_iterator_restore_after_insertion() /* Create format */ struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, &key_def, 1, 0, NULL, 0, - NULL); + NULL, 0); assert(format != NULL); tuple_format_ref(format); diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c index dd33bbe..73afbd5 100644 --- a/test/unit/vy_point_lookup.c +++ b/test/unit/vy_point_lookup.c @@ -85,7 +85,7 @@ test_basic() vy_cache_create(&cache, &cache_env, key_def, true); struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, &key_def, 1, 0, NULL, 0, - NULL); + NULL, 0); isnt(format, NULL, "tuple_format_new is not NULL"); tuple_format_ref(format); -- 2.7.4