[Tarantool-patches] [PATCH v1 1/3] Add metafiles cleanup routines at S3 pack script
Alexander V. Tikhonov
avtikhon at tarantool.org
Mon Mar 30 08:38:04 MSK 2020
Added cleanup functionality for the meta files.
Script may have the following situations:
- package files removed at S3, but it still registered:
Script stores and registers the new packages at S3 and
removes all the other registered blocks for the sames
files in meta files.
- package files already exists at S3 with the same hashes:
Script passes it with warning message.
- package files already exists at S3 with the old hashes:
Script fails w/o force flag, otherwise it stores and
registers the new packages at S3 and removes all the other
registered blocks for the sames files in meta files.
Added '-s|skip_errors' option flag to skip errors on changed
packages to avoid of exits on script run.
Follow-up #3380
---
tools/update_repo.sh | 204 +++++++++++++++++++++++++++++++++----------
1 file changed, 159 insertions(+), 45 deletions(-)
diff --git a/tools/update_repo.sh b/tools/update_repo.sh
index f49569b73..ddc44d118 100755
--- a/tools/update_repo.sh
+++ b/tools/update_repo.sh
@@ -9,6 +9,7 @@ ws_prefix=/tmp/tarantool_repo_s3
alloss='ubuntu debian el fedora'
product=tarantool
force=
+skip_errors=
# the path with binaries either repository
repo=.
@@ -82,6 +83,8 @@ EOF
Product name to be packed with, default name is 'tarantool'
-f|--force
Force updating the remote package with the local one despite the checksum difference
+ -s|--skip_errors
+ Skip failing on changed packages
-h|--help
Usage help message
EOF
@@ -114,6 +117,9 @@ case $i in
-f|--force)
force=1
;;
+ -s|--skip_errors)
+ skip_errors=1
+ ;;
-h|--help)
usage
exit 0
@@ -169,6 +175,9 @@ function update_deb_packfile {
function update_deb_metadata {
packpath=$1
packtype=$2
+ packfile=$3
+
+ file_exists=''
if [ ! -f $packpath.saved ] ; then
# get the latest Sources file from S3 either create empty file
@@ -185,38 +194,94 @@ function update_deb_metadata {
# find the hash from the new Sources file
hash=$(grep '^Checksums-Sha256:' -A3 $packpath | \
tail -n 1 | awk '{print $1}')
+ # check if the file already exists in S3
+ if $aws ls "$bucket_path/$packfile" ; then
+ echo "WARNING: DSC file already exists in S3!"
+ file_exists=$bucket_path/$packfile
+ fi
# search the new hash in the old Sources file from S3
if grep " $hash .* .*$" $packpath.saved ; then
echo "WARNING: DSC file already registered in S3!"
- return
+ echo "File hash: $hash"
+ if [ "$file_exists" != "" ] ; then
+ return
+ fi
fi
# check if the DSC file already exists in old Sources file from S3
file=$(grep '^Files:' -A3 $packpath | tail -n 1 | awk '{print $3}')
- if [ "$force" == "" ] && grep " .* .* $file$" $packpath.saved ; then
- echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file"
- echo "New hash: $hash"
- # unlock the publishing
- $rm_file $ws_lockfile
- exit 1
+ if grep " .* .* $file$" $packpath.saved ; then
+ if [ "$force" == "" -a "$file_exists" != "" ] ; then
+ if [ "$skip_errors" == "" ] ; then
+ echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file"
+ echo "New hash: $hash"
+ # unlock the publishing
+ $rm_file $ws_lockfile
+ exit 1
+ else
+ echo "WARNING: the file already exists, but changed, set '-f' to overwrite it: $file"
+ echo "New hash: $hash"
+ return
+ fi
+ fi
+ hashes_old=$(grep '^Checksums-Sha256:' -A3 $packpath.saved | \
+ grep " .* .* $file" | awk '{print $1}')
+ # NOTE: for the single file name may exists more than one
+ # entry in damaged file, to fix it all found entries
+ # of this file need to be removed
+ # find and remove all package blocks for the bad hashes
+ for hash_rm in $hashes_old ; do
+ echo "Removing from $packpath.saved file old hash: $hash_rm"
+ pcregrep -Mi -v "(?s)Package: (\N+\n)+(?=^ ${hash_rm}).*?^$" \
+ $packpath.saved >$packpath.saved_new
+ mv $packpath.saved_new $packpath.saved
+ done
fi
updated_dsc=1
elif [ "$packtype" == "deb" ]; then
# check if the DEB file already exists in old Packages file from S3
# find the hash from the new Packages file
- hash=$(grep '^SHA256: ' $packpath)
+ hash=$(grep '^SHA256: ' $packpath | awk '{print $2}')
+ # check if the file already exists in S3
+ if $aws ls "$bucket_path/$packfile" ; then
+ echo "WARNING: DEB file already exists in S3!"
+ file_exists=$bucket_path/$packfile
+ fi
# search the new hash in the old Packages file from S3
if grep "^SHA256: $hash" $packpath.saved ; then
echo "WARNING: DEB file already registered in S3!"
- return
+ echo "File hash: $hash"
+ if [ "$file_exists" != "" ] ; then
+ return
+ fi
fi
# check if the DEB file already exists in old Packages file from S3
file=$(grep '^Filename:' $packpath | awk '{print $2}')
- if [ "$force" == "" ] && grep "Filename: $file$" $packpath.saved ; then
- echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file"
- echo "New hash: $hash"
- # unlock the publishing
- $rm_file $ws_lockfile
- exit 1
+ if grep "Filename: $file$" $packpath.saved ; then
+ if [ "$force" == "" -a "$file_exists" != "" ] ; then
+ if [ "$skip_errors" == "" ] ; then
+ echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file"
+ echo "New hash: $hash"
+ # unlock the publishing
+ $rm_file $ws_lockfile
+ exit 1
+ else
+ echo "WARNING: the file already exists, but changed, set '-f' to overwrite it: $file"
+ echo "New hash: $hash"
+ return
+ fi
+ fi
+ hashes_old=$(grep -e "^Filename: " -e "^SHA256: " $packpath.saved | \
+ grep -A1 "$file" | grep "^SHA256: " | awk '{print $2}')
+ # NOTE: for the single file name may exists more than one
+ # entry in damaged file, to fix it all found entries
+ # of this file need to be removed
+ # find and remove all package blocks for the bad hashes
+ for hash_rm in $hashes_old ; do
+ echo "Removing from $packpath.saved file old hash: $hash_rm"
+ pcregrep -Mi -v "(?s)Package: (\N+\n)+(?=SHA256: ${hash_rm}).*?^$" \
+ $packpath.saved >$packpath.saved_new
+ mv $packpath.saved_new $packpath.saved
+ done
fi
updated_deb=1
fi
@@ -248,9 +313,6 @@ function pack_deb {
exit 1
fi
- # prepare the workspace
- prepare_ws ${os}
-
# set the subpath with binaries based on literal character of the product name
proddir=$(echo $product | head -c 1)
@@ -297,7 +359,7 @@ EOF
for packages in dists/$loop_dist/$component/binary-*/Packages ; do
# copy Packages file to avoid of removing by the new DEB version
# update metadata 'Packages' files
- update_deb_metadata $packages deb
+ update_deb_metadata $packages deb $locpackfile
[ "$updated_deb" == "1" ] || continue
updated_files=1
done
@@ -316,7 +378,8 @@ EOF
echo "Regenerated DSC file: $locpackfile"
# copy Sources file to avoid of removing by the new DSC version
# update metadata 'Sources' file
- update_deb_metadata dists/$loop_dist/$component/source/Sources dsc
+ update_deb_metadata dists/$loop_dist/$component/source/Sources dsc \
+ $locpackfile
[ "$updated_dsc" == "1" ] || continue
updated_files=1
# save the registered DSC file to S3
@@ -398,11 +461,6 @@ EOF
# 4. sync the latest distribution path changes to S3
$aws_sync_public dists/$loop_dist "$bucket_path/dists/$loop_dist"
done
-
- # unlock the publishing
- $rm_file $ws_lockfile
-
- popd
}
# The 'pack_rpm' function especialy created for RPM packages. It works
@@ -426,9 +484,6 @@ function pack_rpm {
exit 1
fi
- # prepare the workspace
- prepare_ws ${os}_${option_dist}
-
# copy the needed package binaries to the workspace
( cd $repo && cp $pack_rpms $ws/. )
@@ -460,29 +515,76 @@ function pack_rpm {
for hash in $(zcat repodata/other.xml.gz | grep "<package pkgid=" | \
awk -F'"' '{print $2}') ; do
updated_rpm=0
+ file_exists=''
name=$(zcat repodata/other.xml.gz | grep "<package pkgid=\"$hash\"" | \
awk -F'"' '{print $4}')
+ file=$(zcat repodata/primary.xml.gz | \
+ grep -e "<checksum type=" -e "<location href=" | \
+ grep "$hash" -A1 | grep "<location href=" | \
+ awk -F'"' '{print $2}')
+ # check if the file already exists in S3
+ if $aws ls "$bucket_path/$repopath/$file" ; then
+ echo "WARNING: DSC file already exists in S3!"
+ file_exists=$bucket_path/$repopath/$file
+ fi
# search the new hash in the old meta file from S3
if zcat repodata.base/filelists.xml.gz | grep "pkgid=\"$hash\"" | \
grep "name=\"$name\"" ; then
echo "WARNING: $name file already registered in S3!"
echo "File hash: $hash"
- continue
+ if [ "$file_exists" != "" ] ; then
+ continue
+ fi
fi
updated_rpms=1
# check if the hashed file already exists in old meta file from S3
- file=$(zcat repodata/primary.xml.gz | \
- grep -e "<checksum type=" -e "<location href=" | \
- grep "$hash" -A1 | grep "<location href=" | \
- awk -F'"' '{print $2}')
- # check if the file already exists in S3
- if [ "$force" == "" ] && zcat repodata.base/primary.xml.gz | \
+ if zcat repodata.base/primary.xml.gz | \
grep "<location href=\"$file\"" ; then
- echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file"
- echo "New hash: $hash"
- # unlock the publishing
- $rm_file $ws_lockfile
- exit 1
+ if [ "$force" == "" -a "$file_exists" != "" ] ; then
+ if [ "$skip_errors" == "" ] ; then
+ echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file"
+ echo "New hash: $hash"
+ # unlock the publishing
+ $rm_file $ws_lockfile
+ exit 1
+ else
+ echo "WARNING: the file already exists, but changed, set '-f' to overwrite it: $file"
+ echo "New hash: $hash"
+ continue
+ fi
+ fi
+ hashes_old=$(zcat repodata.base/primary.xml.gz | \
+ grep -e "<checksum type=" -e "<location href=" | \
+ grep -B1 "$file" | grep "<checksum type=" | \
+ awk -F'>' '{print $2}' | sed 's#<.*##g')
+ # NOTE: for the single file name may exists more than one
+ # entry in damaged file, to fix it all found entries
+ # of this file need to be removed
+ for metafile in repodata.base/other \
+ repodata.base/filelists \
+ repodata.base/primary ; do
+ up_lines=''
+ if [ "$metafile" == "repodata.base/primary" ]; then
+ up_full_lines='(\N+\n)*'
+ fi
+ packs_rm=0
+ # find and remove all <package> tags for the bad hashes
+ for hash_rm in $hashes_old ; do
+ echo "Removing from ${metafile}.xml.gz file old hash: $hash_rm"
+ zcat ${metafile}.xml.gz | \
+ pcregrep -Mi -v "(?s)<package ${up_full_lines}\N+(?=${hash_rm}).*?package>" | \
+ gzip - >${metafile}_new.xml.gz
+ mv ${metafile}_new.xml.gz ${metafile}.xml.gz
+ packs_rm=$(($packs_rm+1))
+ done
+ # reduce number of packages in metafile counter
+ gunzip ${metafile}.xml.gz
+ packs=$(($(grep " packages=" ${metafile}.xml | \
+ sed 's#.* packages="\([0-9]*\)".*#\1#g')-${packs_rm}))
+ sed "s# packages=\"[0-9]*\"# packages=\"${packs}\"#g" \
+ -i ${metafile}.xml
+ gzip ${metafile}.xml
+ done
fi
done
@@ -554,22 +656,34 @@ EOF
# update the metadata at the S3
$aws_sync_public repodata "$bucket_path/$repopath/repodata"
-
- # unlock the publishing
- $rm_file $ws_lockfile
-
- popd
}
if [ "$os" == "ubuntu" -o "$os" == "debian" ]; then
+ # prepare the workspace
+ prepare_ws ${os}
pack_deb
+ # unlock the publishing
+ $rm_file $ws_lockfile
+ popd
elif [ "$os" == "el" -o "$os" == "fedora" ]; then
# RPM packages structure needs different paths for binaries and sources
# packages, in this way it is needed to call the packages registering
# script twice with the given format:
# pack_rpm <packages store subpath> <patterns of the packages to register>
+
+ # prepare the workspace
+ prepare_ws ${os}_${option_dist}
pack_rpm x86_64 "*.x86_64.rpm *.noarch.rpm"
+ # unlock the publishing
+ $rm_file $ws_lockfile
+ popd
+
+ # prepare the workspace
+ prepare_ws ${os}_${option_dist}
pack_rpm SRPMS "*.src.rpm"
+ # unlock the publishing
+ $rm_file $ws_lockfile
+ popd
else
echo "USAGE: given OS '$os' is not supported, use any single from the list: $alloss"
usage
--
2.17.1
More information about the Tarantool-patches
mailing list