From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [RFC PATCH 10/12] box: factor out local recovery function Date: Wed, 6 Jun 2018 20:45:10 +0300 Message-Id: <3e7bc40b285ca152cbbc2c9fdbd1a8a8b05c9318.1528305232.git.vdavydov.dev@gmail.com> In-Reply-To: References: In-Reply-To: References: To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: - Factor out local_recovery() from box_cfg_xc(). Make it setup replication and handle local recovery and hot standby cases. - Move replication setup in case of initial bootstrap from box_cfg_xc() to bootstrap() to make bootstrap() consistent with local_recovery(). - Move initial snapshot creation from bootstrap() to bootsrap_master() and bootstrap_from_master(). Needed for #461 --- src/box/box.cc | 277 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 150 insertions(+), 127 deletions(-) diff --git a/src/box/box.cc b/src/box/box.cc index 2ff9fb5f..9b2c2e2a 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -1644,6 +1644,11 @@ bootstrap_master(const struct tt_uuid *replicaset_uuid) /* Set UUID of a new replica set */ box_set_replicaset_uuid(replicaset_uuid); + + /* Make the initial checkpoint */ + if (engine_begin_checkpoint() || + engine_commit_checkpoint(&replicaset.vclock)) + panic("failed to create a checkpoint"); } /** @@ -1698,6 +1703,11 @@ bootstrap_from_master(struct replica *master) /* Switch applier to initial state */ applier_resume_to_state(applier, APPLIER_READY, TIMEOUT_INFINITY); assert(applier->state == APPLIER_READY); + + /* Make the initial checkpoint */ + if (engine_begin_checkpoint() || + engine_commit_checkpoint(&replicaset.vclock)) + panic("failed to create a checkpoint"); } /** @@ -1708,8 +1718,31 @@ bootstrap_from_master(struct replica *master) * the leader of a new cluster */ static void -bootstrap(const struct tt_uuid *replicaset_uuid, bool *is_bootstrap_leader) -{ +bootstrap(const struct tt_uuid *instance_uuid, + const struct tt_uuid *replicaset_uuid, + bool *is_bootstrap_leader) +{ + /* Initialize instance UUID. */ + assert(tt_uuid_is_nil(&INSTANCE_UUID)); + if (!tt_uuid_is_nil(instance_uuid)) + INSTANCE_UUID = *instance_uuid; + else + tt_uuid_create(&INSTANCE_UUID); + /* + * Begin listening on the socket to enable + * master-master replication leader election. + */ + box_listen(); + /* + * Wait for the cluster to start up. + * + * Note, when bootstrapping a new instance, we have to + * connect to all masters to make sure all replicas + * receive the same replica set UUID when a new cluster + * is deployed. + */ + box_sync_replication(TIMEOUT_INFINITY, true); + /* Use the first replica by URI as a bootstrap leader */ struct replica *master = replicaset_leader(); assert(master == NULL || master->applier != NULL); @@ -1727,9 +1760,116 @@ bootstrap(const struct tt_uuid *replicaset_uuid, bool *is_bootstrap_leader) bootstrap_master(replicaset_uuid); *is_bootstrap_leader = true; } - if (engine_begin_checkpoint() || - engine_commit_checkpoint(&replicaset.vclock)) - panic("failed to create a checkpoint"); +} + +/** + * Recover the instance from the local directory. + * Enter hot standby if the directory is locked. + */ +static void +local_recovery(const struct tt_uuid *instance_uuid, + const struct tt_uuid *replicaset_uuid, + const struct vclock *checkpoint_vclock) +{ + /* Check instance UUID. */ + assert(!tt_uuid_is_nil(&INSTANCE_UUID)); + if (!tt_uuid_is_nil(instance_uuid) && + !tt_uuid_is_equal(instance_uuid, &INSTANCE_UUID)) { + tnt_raise(ClientError, ER_INSTANCE_UUID_MISMATCH, + tt_uuid_str(instance_uuid), + tt_uuid_str(&INSTANCE_UUID)); + } + + struct wal_stream wal_stream; + wal_stream_create(&wal_stream, cfg_geti64("rows_per_wal")); + + struct recovery *recovery; + recovery = recovery_new(cfg_gets("wal_dir"), + cfg_geti("force_recovery"), + checkpoint_vclock); + auto guard = make_scoped_guard([=]{ recovery_delete(recovery); }); + + /* + * Initialize the replica set vclock from recovery. + * The local WAL may contain rows from remote masters, + * so we must reflect this in replicaset vclock to + * not attempt to apply these rows twice. + */ + recovery_end_vclock(recovery, &replicaset.vclock); + + if (wal_dir_lock >= 0) { + box_listen(); + box_sync_replication(replication_connect_timeout, false); + } + + /* + * recovery->vclock is needed by Vinyl to filter + * WAL rows that were dumped before restart. + * + * XXX: Passing an internal member of the recovery + * object to an engine is an ugly hack. Instead we + * should introduce space_vtab::apply_wal_row method + * and explicitly pass the statement LSN to it. + */ + engine_begin_initial_recovery_xc(&recovery->vclock); + + struct memtx_engine *memtx; + memtx = (struct memtx_engine *)engine_by_name("memtx"); + assert(memtx != NULL); + + struct recovery_journal journal; + recovery_journal_create(&journal, &recovery->vclock); + journal_set(&journal.base); + + /* + * We explicitly request memtx to recover its + * snapshot as a separate phase since it contains + * data for system spaces, and triggers on + * recovery of system spaces issue DDL events in + * other engines. + */ + memtx_engine_recover_snapshot_xc(memtx, checkpoint_vclock); + + engine_begin_final_recovery_xc(); + recover_remaining_wals(recovery, &wal_stream.base, NULL, true); + /* + * Leave hot standby mode, if any, only after + * acquiring the lock. + */ + if (wal_dir_lock < 0) { + title("hot_standby"); + say_info("Entering hot standby mode"); + recovery_follow_local(recovery, &wal_stream.base, "hot_standby", + cfg_getd("wal_dir_rescan_delay")); + while (true) { + if (path_lock(cfg_gets("wal_dir"), &wal_dir_lock)) + diag_raise(); + if (wal_dir_lock >= 0) + break; + fiber_sleep(0.1); + } + recovery_stop_local(recovery); + /* + * Advance replica set vclock to reflect records + * applied in hot standby mode. + */ + vclock_copy(&replicaset.vclock, &recovery->vclock); + box_listen(); + box_sync_replication(replication_connect_timeout, false); + } + recovery_finalize(recovery); + engine_end_recovery_xc(); + + /* Check replica set UUID. */ + if (!tt_uuid_is_nil(replicaset_uuid) && + !tt_uuid_is_equal(replicaset_uuid, &REPLICASET_UUID)) { + tnt_raise(ClientError, ER_REPLICASET_UUID_MISMATCH, + tt_uuid_str(replicaset_uuid), + tt_uuid_str(&REPLICASET_UUID)); + } + + /* Clear the pointer to journal before it goes out of scope */ + journal_set(NULL); } static void @@ -1826,130 +1966,13 @@ box_cfg_xc(void) } bool is_bootstrap_leader = false; if (last_checkpoint_lsn >= 0) { - /* Check instance UUID. */ - assert(!tt_uuid_is_nil(&INSTANCE_UUID)); - if (!tt_uuid_is_nil(&instance_uuid) && - !tt_uuid_is_equal(&instance_uuid, &INSTANCE_UUID)) { - tnt_raise(ClientError, ER_INSTANCE_UUID_MISMATCH, - tt_uuid_str(&instance_uuid), - tt_uuid_str(&INSTANCE_UUID)); - } - - struct wal_stream wal_stream; - wal_stream_create(&wal_stream, cfg_geti64("rows_per_wal")); - - struct recovery *recovery; - recovery = recovery_new(cfg_gets("wal_dir"), - cfg_geti("force_recovery"), - &last_checkpoint_vclock); - auto guard = make_scoped_guard([=]{ recovery_delete(recovery); }); - - /* - * Initialize the replica set vclock from recovery. - * The local WAL may contain rows from remote masters, - * so we must reflect this in replicaset vclock to - * not attempt to apply these rows twice. - */ - recovery_end_vclock(recovery, &replicaset.vclock); - - if (wal_dir_lock >= 0) { - box_listen(); - box_sync_replication(replication_connect_timeout, false); - } - - /* - * recovery->vclock is needed by Vinyl to filter - * WAL rows that were dumped before restart. - * - * XXX: Passing an internal member of the recovery - * object to an engine is an ugly hack. Instead we - * should introduce Engine::applyWALRow method and - * explicitly pass the statement LSN to it. - */ - engine_begin_initial_recovery_xc(&recovery->vclock); - - struct memtx_engine *memtx; - memtx = (struct memtx_engine *)engine_by_name("memtx"); - assert(memtx != NULL); - - struct recovery_journal journal; - recovery_journal_create(&journal, &recovery->vclock); - journal_set(&journal.base); - - /** - * We explicitly request memtx to recover its - * snapshot as a separate phase since it contains - * data for system spaces, and triggers on - * recovery of system spaces issue DDL events in - * other engines. - */ - memtx_engine_recover_snapshot_xc(memtx, - &last_checkpoint_vclock); - - engine_begin_final_recovery_xc(); - recover_remaining_wals(recovery, &wal_stream.base, NULL, true); - /* - * Leave hot standby mode, if any, only - * after acquiring the lock. - */ - if (wal_dir_lock < 0) { - title("hot_standby"); - say_info("Entering hot standby mode"); - recovery_follow_local(recovery, &wal_stream.base, - "hot_standby", - cfg_getd("wal_dir_rescan_delay")); - while (true) { - if (path_lock(cfg_gets("wal_dir"), - &wal_dir_lock)) - diag_raise(); - if (wal_dir_lock >= 0) - break; - fiber_sleep(0.1); - } - recovery_stop_local(recovery); - /* - * Advance replica set vclock to reflect records - * applied in hot standby mode. - */ - vclock_copy(&replicaset.vclock, &recovery->vclock); - box_listen(); - box_sync_replication(replication_connect_timeout, false); - } - recovery_finalize(recovery); - engine_end_recovery_xc(); - - /* Check replica set UUID. */ - if (!tt_uuid_is_nil(&replicaset_uuid) && - !tt_uuid_is_equal(&replicaset_uuid, &REPLICASET_UUID)) { - tnt_raise(ClientError, ER_REPLICASET_UUID_MISMATCH, - tt_uuid_str(&replicaset_uuid), - tt_uuid_str(&REPLICASET_UUID)); - } - - /* Clear the pointer to journal before it goes out of scope */ - journal_set(NULL); + /* Recover the instance from the local directory */ + local_recovery(&instance_uuid, &replicaset_uuid, + &last_checkpoint_vclock); } else { - if (!tt_uuid_is_nil(&instance_uuid)) - INSTANCE_UUID = instance_uuid; - else - tt_uuid_create(&INSTANCE_UUID); - /* - * Begin listening on the socket to enable - * master-master replication leader election. - */ - box_listen(); - - /* - * Wait for the cluster to start up. - * - * Note, when bootstrapping a new instance, we have to - * connect to all masters to make sure all replicas - * receive the same replica set UUID when a new cluster - * is deployed. - */ - box_sync_replication(TIMEOUT_INFINITY, true); /* Bootstrap a new master */ - bootstrap(&replicaset_uuid, &is_bootstrap_leader); + bootstrap(&instance_uuid, &replicaset_uuid, + &is_bootstrap_leader); } fiber_gc(); -- 2.11.0