From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpng2.m.smailru.net (smtpng2.m.smailru.net [94.100.179.3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id C582A469719 for ; Fri, 6 Mar 2020 09:57:49 +0300 (MSK) From: "Alexander V. Tikhonov" Date: Fri, 6 Mar 2020 09:57:46 +0300 Message-Id: Subject: [Tarantool-patches] [PATCH v4] Add metafiles cleanup routines at S3 pack script List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Oleg Piskunov Cc: tarantool-patches@dev.tarantool.org Added cleanup functionality for the meta files. Script may have the following situations: - package files removed at S3, but it still registered: Script stores and registers the new packages at S3 and removes all the other registered blocks for the sames files in meta files. - package files already exists at S3 with the same hashes: Script passes it with warning message. - package files already exists at S3 with the old hashes: Script fails w/o force flag, otherwise it stores and registers the new packages at S3 and removes all the other registered blocks for the sames files in meta files. Follow up #3380 --- Github: https://github.com/tarantool/tarantool/tree/avtikhon/gitlab-ci-perf Changes v4: - moved the rest of Docker infrastructure for perf testing into the bench-run repository - renamed calls to performance prepare/run/cleanup stages Changes v3: - updated commit message - merged all make targets for perf testing into single - set to use variables in gitlab-ci for running perf testings Changes v2: - moved performance variables from global setup to performance template - updated commit message - added more comments to sources tools/update_repo.sh | 147 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 118 insertions(+), 29 deletions(-) diff --git a/tools/update_repo.sh b/tools/update_repo.sh index 65a977187..813684176 100755 --- a/tools/update_repo.sh +++ b/tools/update_repo.sh @@ -172,6 +172,9 @@ function update_deb_packfile { function update_deb_metadata { packpath=$1 packtype=$2 + packfile=$3 + + file_exists='' if [ ! -f $packpath.saved ] ; then # get the latest Sources file from S3 either create empty file @@ -188,38 +191,82 @@ function update_deb_metadata { # find the hash from the new Sources file hash=$(grep '^Checksums-Sha256:' -A3 $packpath | \ tail -n 1 | awk '{print $1}') + # check if the file already exists in S3 + if $aws ls "$bucket_path/$packfile" ; then + echo "WARNING: DSC file already exists in S3!" + file_exists=$bucket_path/$packfile + fi # search the new hash in the old Sources file from S3 if grep " $hash .* .*$" $packpath.saved ; then echo "WARNING: DSC file already registered in S3!" - return + echo "File hash: $hash" + if [ "$file_exists" != "" ] ; then + return + fi fi # check if the DSC file already exists in old Sources file from S3 file=$(grep '^Files:' -A3 $packpath | tail -n 1 | awk '{print $3}') - if [ "$force" == "" ] && grep " .* .* $file$" $packpath.saved ; then - echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file" - echo "New hash: $hash" - # unlock the publishing - $rm_file $ws_lockfile - exit 1 + if grep " .* .* $file$" $packpath.saved ; then + if [ "$force" == "" -a "$file_exists" != "" ] ; then + echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file" + echo "New hash: $hash" + # unlock the publishing + $rm_file $ws_lockfile + exit 1 + fi + hashes_old=$(grep '^Checksums-Sha256:' -A3 $packpath.saved | \ + grep " .* .* $file" | awk '{print $1}') + # NOTE: for the single file name may exists more than one + # entry in damaged file, to fix it all found entries + # of this file need to be removed + # find and remove all package blocks for the bad hashes + for hash_rm in $hashes_old ; do + echo "Removing from $packpath.saved file old hash: $hash_rm" + pcregrep -Mi -v "(?s)Package: (\N+\n)+(?=^ ${hash_rm}).*?^$" \ + $packpath.saved >$packpath.saved_new + mv $packpath.saved_new $packpath.saved + done fi updated_dsc=1 elif [ "$packtype" == "deb" ]; then # check if the DEB file already exists in old Packages file from S3 # find the hash from the new Packages file - hash=$(grep '^SHA256: ' $packpath) + hash=$(grep '^SHA256: ' $packpath | awk '{print $2}') + # check if the file already exists in S3 + if $aws ls "$bucket_path/$packfile" ; then + echo "WARNING: DEB file already exists in S3!" + file_exists=$bucket_path/$packfile + fi # search the new hash in the old Packages file from S3 if grep "^SHA256: $hash" $packpath.saved ; then echo "WARNING: DEB file already registered in S3!" - return + echo "File hash: $hash" + if [ "$file_exists" != "" ] ; then + return + fi fi # check if the DEB file already exists in old Packages file from S3 file=$(grep '^Filename:' $packpath | awk '{print $2}') - if [ "$force" == "" ] && grep "Filename: $file$" $packpath.saved ; then - echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file" - echo "New hash: $hash" - # unlock the publishing - $rm_file $ws_lockfile - exit 1 + if grep "Filename: $file$" $packpath.saved ; then + if [ "$force" == "" -a "$file_exists" != "" ] ; then + echo "ERROR: the file already exists, but changed, set '-f' to overwrite it: $file" + echo "New hash: $hash" + # unlock the publishing + $rm_file $ws_lockfile + exit 1 + fi + hashes_old=$(grep -e "^Filename: " -e "^SHA256: " $packpath.saved | \ + grep -A1 "$file" | grep "^SHA256: " | awk '{print $2}') + # NOTE: for the single file name may exists more than one + # entry in damaged file, to fix it all found entries + # of this file need to be removed + # find and remove all package blocks for the bad hashes + for hash_rm in $hashes_old ; do + echo "Removing from $packpath.saved file old hash: $hash_rm" + pcregrep -Mi -v "(?s)Package: (\N+\n)+(?=SHA256: ${hash_rm}).*?^$" \ + $packpath.saved >$packpath.saved_new + mv $packpath.saved_new $packpath.saved + done fi updated_deb=1 fi @@ -296,7 +343,7 @@ EOF for packages in dists/$loop_dist/$component/binary-*/Packages ; do # copy Packages file to avoid of removing by the new DEB version # update metadata 'Packages' files - update_deb_metadata $packages deb + update_deb_metadata $packages deb $locpackfile [ "$updated_deb" == "1" ] || continue updated_files=1 done @@ -315,7 +362,8 @@ EOF echo "Regenerated DSC file: $locpackfile" # copy Sources file to avoid of removing by the new DSC version # update metadata 'Sources' file - update_deb_metadata dists/$loop_dist/$component/source/Sources dsc + update_deb_metadata dists/$loop_dist/$component/source/Sources dsc \ + $locpackfile [ "$updated_dsc" == "1" ] || continue updated_files=1 # save the registered DSC file to S3 @@ -459,29 +507,70 @@ function pack_rpm { for hash in $(zcat repodata/other.xml.gz | grep " tags for the bad hashes + for hash_rm in $hashes_old ; do + echo "Removing from ${metafile}.xml.gz file old hash: $hash_rm" + zcat ${metafile}.xml.gz | \ + pcregrep -Mi -v "(?s)" | \ + gzip - >${metafile}_new.xml.gz + mv ${metafile}_new.xml.gz ${metafile}.xml.gz + packs_rm=$(($packs_rm+1)) + done + # reduce number of packages in metafile counter + gunzip ${metafile}.xml.gz + packs=$(($(grep " packages=" ${metafile}.xml | \ + sed 's#.* packages="\([0-9]*\)".*#\1#g')-${packs_rm})) + sed "s# packages=\"[0-9]*\"# packages=\"${packs}\"#g" \ + -i ${metafile}.xml + gzip ${metafile}.xml + done fi done -- 2.17.1