[Tarantool-patches] [PATCH] Add options for upgrade testing
Sergey Bronnikov
sergeyb at tarantool.org
Tue Dec 1 15:32:52 MSK 2020
Hi,
On 27.11.2020 04:45, Alexander Turenko wrote:
> Sorry for the long delay.
>
> I looked over the patch, tested it a bit and proposed several fixups.
>
> I pasted them here (for ease commenting) and pushed to the branch
> (upward your commits). If you're okay with them, I can squash commits
> and push the result. Or, maybe, extract several commits (re debug
> property, re xlog module) and squash the rest (keeping the conflicting
> options fix separate, of course).
>
> The branch: ligurio/gh-4801-add-snapshot-option.
>
> And sorry that I reworked a lot of code. I tried to minimize review
> ping-ponging, but make myself comfortable with the result. And amount of
> changes becomes larger than I initially expect. A lot of low hanging
> fruits of the kind 'okay, but there is the easy way to make it a bit
> better' appears.
Many thanks for patches! Especially those that simplifies a code.
> I hope such moments will be less and less frequent with a time we'll
> work in touch. A review is often about compromises and it is the work on
> ourself for both sides.
>
> WBR, Alexander Turenko.
>
<snipped>
> new file mode 100644
> index 0000000..dbaa4dc
> --- /dev/null
> +++ b/lib/xlog.py
> @@ -0,0 +1,232 @@
> +"""Xlog and snapshot utility functions."""
> +
> +import os
> +import msgpack
> +import subprocess
> +import json
> +from uuid import uuid4
> +
> +
> +__all__ = ['init', 'snapshot_is_for_bootstrap', 'prepare_bootstrap_snapshot']
> +
> +
> +# {{{ Constants
> +
> +# Xlog binary format constants.
> +ROW_MARKER = b'\xd5\xba\x0b\xab'
> +EOF_MARKER = b'\xd5\x10\xad\xed'
> +XLOG_FIXHEADER_SIZE = 19
> +VCLOCK_MAX = 32
> +VCLOCK_STR_LEN_MAX = 1 + VCLOCK_MAX * (2 + 2 + 20 + 2) + 1
> +XLOG_META_LEN_MAX = 1024 + VCLOCK_STR_LEN_MAX
> +
> +
> +# The binary protocol (iproto) constants.
> +#
> +# Widely reused for xlog / snapshot files.
> +IPROTO_REQUEST_TYPE = 0x00
> +IPROTO_LSN = 0x03
> +IPROTO_TIMESTAMP = 0x04
> +IPROTO_INSERT = 2
> +IPROTO_SPACE_ID = 0x10
> +IPROTO_TUPLE = 0x21
> +
> +
> +# System space IDs.
> +BOX_SCHEMA_ID = 272
> +BOX_CLUSTER_ID = 320
> +
> +# }}} Constants
> +
> +
> +tarantool_cmd = 'tarantool'
> +tarantoolctl_cmd = 'tarantoolctl'
> +debug_f = lambda x: None # noqa: E731
> +
> +
> +def init(tarantool=None, tarantoolctl=None, debug=None):
> + """ Redefine module level globals.
> +
> + If the function is not called, tarantool and tarantoolctl
> + will be called according to the PATH environment variable.
> +
> + Beware: tarantool and tarantoolctl are lists. Example:
> +
> + tarantool_cmd = ['/path/to/tarantool']
> + tarantoolctl_cmd = tarantool_cmd + ['/path/to/tarantoolctl']
> + xlog.init(tarantool=tarantool_cmd, tarantoolctl=tarantoolctl_cmd)
> + """
> + global tarantool_cmd
> + global tarantoolctl_cmd
> + global debug_f
> +
> + if tarantool:
> + assert isinstance(tarantool, list)
> + tarantool_cmd = tarantool
> + if tarantool_cmd:
> + assert isinstance(tarantoolctl, list)
> + tarantoolctl_cmd = tarantoolctl
> + if debug:
> + debug_f = debug
> +
> +
> +# {{{ General purpose helpers
> +
> +def crc32c(data):
> + """ Use tarantool's implementation of CRC32C algorithm.
> +
> + Python has no built-in implementation of CRC32C.
> + """
> + lua = "print(require('digest').crc32_update(0, io.stdin:read({})))".format(
> + len(data))
> + with open(os.devnull, 'w') as devnull:
> + process = subprocess.Popen(tarantool_cmd + ['-e', lua],
> + stdin=subprocess.PIPE,
> + stdout=subprocess.PIPE,
> + stderr=devnull)
> + process.stdin.write(data)
> + res, _ = process.communicate()
> + return int(res)
> +
> +# }}} General purpose helpers
> +
> +
> +# {{{ parse xlog / snapshot
> +
> +def xlog_rows(xlog_path):
> + cmd = tarantoolctl_cmd + ['cat', xlog_path, '--format=json',
> + '--show-system']
> + with open(os.devnull, 'w') as devnull:
> + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=devnull)
> + for line in process.stdout.readlines():
> + yield json.loads(line)
> +
> +# }}} parse xlog / snapshot
> +
> +
> +# {{{ xlog encode data helpers
> +
> +def encode_xrow_header(xrow):
> + packer = msgpack.Packer(use_bin_type=False)
> + xrow_header = ROW_MARKER
> + # xrow size
> + xrow_header += packer.pack(len(xrow))
> + # previous xrow crc32
> + xrow_header += packer.pack(0)
> + # current xrow crc32
> + xrow_header += packer.pack(crc32c(xrow))
> + # padding
> + padding_size = XLOG_FIXHEADER_SIZE - len(xrow_header)
> + xrow_header += packer.pack(b'\x00' * (padding_size - 1))
> + assert(len(xrow_header) == XLOG_FIXHEADER_SIZE)
> + return xrow_header
> +
> +
> +def encode_xrow(header, body):
> + packer = msgpack.Packer(use_bin_type=False)
> + header = packer.pack(header)
> + body = packer.pack(body)
> + xrow_data = header + body
> + return encode_xrow_header(xrow_data) + xrow_data
> +
> +# }}} xlog encode data helpers
> +
> +
> +# {{{ xlog write data helpers
> +
> +def xlog_seek_end(xlog):
> + """Set the file position right before the end marker."""
> + WHENCE_END = 2
> + xlog.seek(-4, WHENCE_END)
> + eof_marker = xlog.read(4)
> + if eof_marker != EOF_MARKER:
> + raise RuntimeError('Invalid eof marker: {}'.format(eof_marker))
> + xlog.seek(-4, WHENCE_END)
> +
> +
> +def xlog_write_eof(xlog):
> + xlog.write(EOF_MARKER)
> +
> +# }}} xlog write data helpers
> +
> +
> +# {{{ xlog write meta helpers
> +
> +def xlog_meta_write_instance_uuid(xlog, instance_uuid):
> + xlog.seek(0)
> + xlog.seek(xlog.read(XLOG_META_LEN_MAX).find(b'Instance: '))
> + # Trick: old and new UUID have the same size.
> + xlog.write(b'Instance: ' + instance_uuid)
> +
> +# }}} xlog write meta helpers
> +
> +
> +def snapshot_is_for_bootstrap(snapshot_path):
> + """ A bootstrap snapshot (src/box/bootstrap.snap) has no
> + <cluster_uuid> and <instance_uuid> in _schema and
> + _cluster system spaces.
> + """
> + cluster_uuid_exists = False
> + instance_uuid_exists = False
> +
> + for row in xlog_rows(snapshot_path):
> + if row['HEADER']['type'] == 'INSERT' and \
> + row['BODY']['space_id'] == BOX_SCHEMA_ID and \
> + row['BODY']['tuple'][0] == 'cluster':
> + cluster_uuid_exists = True
> +
> + if row['HEADER']['type'] == 'INSERT' and \
> + row['BODY']['space_id'] == BOX_CLUSTER_ID and \
> + row['BODY']['tuple'][0] == 1:
> + instance_uuid_exists = True
> +
> + if cluster_uuid_exists and instance_uuid_exists:
> + break
> +
> + if cluster_uuid_exists != instance_uuid_exists:
> + raise RuntimeError('A cluster UUID and an instance UUID should exist '
> + 'or not exist both')
> +
> + return not cluster_uuid_exists
> +
snapshot_is_for_bootstrap() accept a path that can be None
(see default value in argument --bootstrap) when option --bootstrap is
not specified.
Due to this xlog_rows() may fail. xlog_rows() returns iterator and we
can return something like
"yield None" but we should handle this properly everywhere where
xlog_rows() called.
So I just added patch to snapshot_is_for_bootstrap():
--- a/lib/options.py
+++ b/lib/options.py
@@ -257,7 +257,7 @@ class Options:
exit(-1)
def check_bootstrap_option(self):
- if not snapshot_is_for_bootstrap(self.args.snapshot_path):
+ if self.args.snapshot_path and not
snapshot_is_for_bootstrap(self.args.snapshot_path):
color_stdout('Expected a boostrap snapshot, one for local
recovery '
'is given\n', schema='error')
exit(1)
> +
> +def prepare_bootstrap_snapshot(snapshot_path):
> + """ Prepare a bootstrap snapshot to use with local recovery."""
> + cluster_uuid = str(uuid4()).encode('ascii')
> + debug_f('Cluster UUID: {}'.format(cluster_uuid))
> + instance_uuid = str(uuid4()).encode('ascii')
> + instance_id = 1
> + debug_f('Instance ID: {}'.format(instance_id))
> + debug_f('Instance UUID: {}'.format(instance_uuid))
> +
> + last_row = list(xlog_rows(snapshot_path))[-1]
> + lsn = int(last_row['HEADER']['lsn'])
> + timestamp = float(last_row['HEADER']['timestamp'])
> +
> + with open(snapshot_path, 'rb+') as xlog:
> + xlog_meta_write_instance_uuid(xlog, instance_uuid)
> + xlog_seek_end(xlog)
> +
> + # Write cluster UUID to _schema.
> + lsn += 1
> + xlog.write(encode_xrow({
> + IPROTO_REQUEST_TYPE: IPROTO_INSERT,
> + IPROTO_LSN: lsn,
> + IPROTO_TIMESTAMP: timestamp,
> + }, {
> + IPROTO_SPACE_ID: BOX_SCHEMA_ID,
> + IPROTO_TUPLE: ['cluster', cluster_uuid],
> + }))
> +
> + # Write replica ID and replica UUID to _cluster.
> + lsn += 1
> + xlog.write(encode_xrow({
> + IPROTO_REQUEST_TYPE: IPROTO_INSERT,
> + IPROTO_LSN: lsn,
> + IPROTO_TIMESTAMP: timestamp,
> + }, {
> + IPROTO_SPACE_ID: BOX_CLUSTER_ID,
> + IPROTO_TUPLE: [1, instance_uuid],
> + }))
> +
> + xlog_write_eof(xlog)
>
<snipped>
More information about the Tarantool-patches
mailing list