From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp60.i.mail.ru (smtp60.i.mail.ru [217.69.128.40]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 54B5D442BBB for ; Mon, 30 Mar 2020 08:38:10 +0300 (MSK) From: "Alexander V. Tikhonov" Date: Mon, 30 Mar 2020 08:38:04 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: Subject: [Tarantool-patches] [PATCH v1 1/3] Add metafiles cleanup routines at S3 pack script List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Oleg Piskunov , Sergey Bronnikov , Alexander Turenko Cc: tarantool-patches@dev.tarantool.org Added cleanup functionality for the meta files. Script may have the following situations: - package files removed at S3, but it still registered: Script stores and registers the new packages at S3 and removes all the other registered blocks for the sames files in meta files. - package files already exists at S3 with the same hashes: Script passes it with warning message. - package files already exists at S3 with the old hashes: Script fails w/o force flag, otherwise it stores and registers the new packages at S3 and removes all the other registered blocks for the sames files in meta files. Added '-s|skip_errors' option flag to skip errors on changed packages to avoid of exits on script run. Follow-up #3380 --- tools/update_repo.sh | 204 +++++++++++++++++++++++++++++++++---------- 1 file changed, 159 insertions(+), 45 deletions(-) diff --git a/tools/update_repo.sh b/tools/update_repo.sh index f49569b73..ddc44d118 100755 --- a/tools/update_repo.sh +++ b/tools/update_repo.sh @@ -9,6 +9,7 @@ ws_prefix=/tmp/tarantool_repo_s3 alloss='ubuntu debian el fedora' product=tarantool force= +skip_errors= # the path with binaries either repository repo=. @@ -82,6 +83,8 @@ EOF Product name to be packed with, default name is 'tarantool' -f|--force Force updating the remote package with the local one despite the checksum difference + -s|--skip_errors + Skip failing on changed packages -h|--help Usage help message EOF @@ -114,6 +117,9 @@ case $i in -f|--force) force=1 ;; + -s|--skip_errors) + skip_errors=1 + ;; -h|--help) usage exit 0 @@ -169,6 +175,9 @@ function update_deb_packfile { function update_deb_metadata { packpath=$1 packtype=$2 + packfile=$3 + + file_exists='' if [ ! -f $packpath.saved ] ; then # get the latest Sources file from S3 either create empty file @@ -185,38 +194,94 @@ function update_deb_metadata { # find the hash from the new Sources file hash=$(grep '^Checksums-Sha256:' -A3 $packpath | \ tail -n 1 | awk '{print $1}') + # check if the file already exists in S3 + if $aws ls "$bucket_path/$packfile" ; then + echo "WARNING: DSC file already exists in S3!" + file_exists=$bucket_path/$packfile + fi # search the new hash in the old Sources file from S3 if grep " $hash .* .*$" $packpath.saved ; then echo "WARNING: DSC file already registered in S3!" - return + echo "File hash: $hash" + if [ "$file_exists" != "" ] ; then + return + fi fi # check if the DSC file already exists in old Sources file from S3 file=$(grep '^Files:' -A3 $packpath | tail -n 1 | awk '{print $3}') - if [ "$force" == "" ] && grep " .* .* $file$" $packpath.saved ; then - echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file" - echo "New hash: $hash" - # unlock the publishing - $rm_file $ws_lockfile - exit 1 + if grep " .* .* $file$" $packpath.saved ; then + if [ "$force" == "" -a "$file_exists" != "" ] ; then + if [ "$skip_errors" == "" ] ; then + echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file" + echo "New hash: $hash" + # unlock the publishing + $rm_file $ws_lockfile + exit 1 + else + echo "WARNING: the file already exists, but changed, set '-f' to overwrite it: $file" + echo "New hash: $hash" + return + fi + fi + hashes_old=$(grep '^Checksums-Sha256:' -A3 $packpath.saved | \ + grep " .* .* $file" | awk '{print $1}') + # NOTE: for the single file name may exists more than one + # entry in damaged file, to fix it all found entries + # of this file need to be removed + # find and remove all package blocks for the bad hashes + for hash_rm in $hashes_old ; do + echo "Removing from $packpath.saved file old hash: $hash_rm" + pcregrep -Mi -v "(?s)Package: (\N+\n)+(?=^ ${hash_rm}).*?^$" \ + $packpath.saved >$packpath.saved_new + mv $packpath.saved_new $packpath.saved + done fi updated_dsc=1 elif [ "$packtype" == "deb" ]; then # check if the DEB file already exists in old Packages file from S3 # find the hash from the new Packages file - hash=$(grep '^SHA256: ' $packpath) + hash=$(grep '^SHA256: ' $packpath | awk '{print $2}') + # check if the file already exists in S3 + if $aws ls "$bucket_path/$packfile" ; then + echo "WARNING: DEB file already exists in S3!" + file_exists=$bucket_path/$packfile + fi # search the new hash in the old Packages file from S3 if grep "^SHA256: $hash" $packpath.saved ; then echo "WARNING: DEB file already registered in S3!" - return + echo "File hash: $hash" + if [ "$file_exists" != "" ] ; then + return + fi fi # check if the DEB file already exists in old Packages file from S3 file=$(grep '^Filename:' $packpath | awk '{print $2}') - if [ "$force" == "" ] && grep "Filename: $file$" $packpath.saved ; then - echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file" - echo "New hash: $hash" - # unlock the publishing - $rm_file $ws_lockfile - exit 1 + if grep "Filename: $file$" $packpath.saved ; then + if [ "$force" == "" -a "$file_exists" != "" ] ; then + if [ "$skip_errors" == "" ] ; then + echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file" + echo "New hash: $hash" + # unlock the publishing + $rm_file $ws_lockfile + exit 1 + else + echo "WARNING: the file already exists, but changed, set '-f' to overwrite it: $file" + echo "New hash: $hash" + return + fi + fi + hashes_old=$(grep -e "^Filename: " -e "^SHA256: " $packpath.saved | \ + grep -A1 "$file" | grep "^SHA256: " | awk '{print $2}') + # NOTE: for the single file name may exists more than one + # entry in damaged file, to fix it all found entries + # of this file need to be removed + # find and remove all package blocks for the bad hashes + for hash_rm in $hashes_old ; do + echo "Removing from $packpath.saved file old hash: $hash_rm" + pcregrep -Mi -v "(?s)Package: (\N+\n)+(?=SHA256: ${hash_rm}).*?^$" \ + $packpath.saved >$packpath.saved_new + mv $packpath.saved_new $packpath.saved + done fi updated_deb=1 fi @@ -248,9 +313,6 @@ function pack_deb { exit 1 fi - # prepare the workspace - prepare_ws ${os} - # set the subpath with binaries based on literal character of the product name proddir=$(echo $product | head -c 1) @@ -297,7 +359,7 @@ EOF for packages in dists/$loop_dist/$component/binary-*/Packages ; do # copy Packages file to avoid of removing by the new DEB version # update metadata 'Packages' files - update_deb_metadata $packages deb + update_deb_metadata $packages deb $locpackfile [ "$updated_deb" == "1" ] || continue updated_files=1 done @@ -316,7 +378,8 @@ EOF echo "Regenerated DSC file: $locpackfile" # copy Sources file to avoid of removing by the new DSC version # update metadata 'Sources' file - update_deb_metadata dists/$loop_dist/$component/source/Sources dsc + update_deb_metadata dists/$loop_dist/$component/source/Sources dsc \ + $locpackfile [ "$updated_dsc" == "1" ] || continue updated_files=1 # save the registered DSC file to S3 @@ -398,11 +461,6 @@ EOF # 4. sync the latest distribution path changes to S3 $aws_sync_public dists/$loop_dist "$bucket_path/dists/$loop_dist" done - - # unlock the publishing - $rm_file $ws_lockfile - - popd } # The 'pack_rpm' function especialy created for RPM packages. It works @@ -426,9 +484,6 @@ function pack_rpm { exit 1 fi - # prepare the workspace - prepare_ws ${os}_${option_dist} - # copy the needed package binaries to the workspace ( cd $repo && cp $pack_rpms $ws/. ) @@ -460,29 +515,76 @@ function pack_rpm { for hash in $(zcat repodata/other.xml.gz | grep " tags for the bad hashes + for hash_rm in $hashes_old ; do + echo "Removing from ${metafile}.xml.gz file old hash: $hash_rm" + zcat ${metafile}.xml.gz | \ + pcregrep -Mi -v "(?s)" | \ + gzip - >${metafile}_new.xml.gz + mv ${metafile}_new.xml.gz ${metafile}.xml.gz + packs_rm=$(($packs_rm+1)) + done + # reduce number of packages in metafile counter + gunzip ${metafile}.xml.gz + packs=$(($(grep " packages=" ${metafile}.xml | \ + sed 's#.* packages="\([0-9]*\)".*#\1#g')-${packs_rm})) + sed "s# packages=\"[0-9]*\"# packages=\"${packs}\"#g" \ + -i ${metafile}.xml + gzip ${metafile}.xml + done fi done @@ -554,22 +656,34 @@ EOF # update the metadata at the S3 $aws_sync_public repodata "$bucket_path/$repopath/repodata" - - # unlock the publishing - $rm_file $ws_lockfile - - popd } if [ "$os" == "ubuntu" -o "$os" == "debian" ]; then + # prepare the workspace + prepare_ws ${os} pack_deb + # unlock the publishing + $rm_file $ws_lockfile + popd elif [ "$os" == "el" -o "$os" == "fedora" ]; then # RPM packages structure needs different paths for binaries and sources # packages, in this way it is needed to call the packages registering # script twice with the given format: # pack_rpm + + # prepare the workspace + prepare_ws ${os}_${option_dist} pack_rpm x86_64 "*.x86_64.rpm *.noarch.rpm" + # unlock the publishing + $rm_file $ws_lockfile + popd + + # prepare the workspace + prepare_ws ${os}_${option_dist} pack_rpm SRPMS "*.src.rpm" + # unlock the publishing + $rm_file $ws_lockfile + popd else echo "USAGE: given OS '$os' is not supported, use any single from the list: $alloss" usage -- 2.17.1