Instead of wasting time extracting .PKGINFO twice from every single package in the repos, use the package DB to eliminate most of the heavy lifting. This way we only need to worry about looking at the packages that actually have changed since the last time we built the package database. This should give a noticeable performance increase to this job in addition to reducing IO load and unnecessary reading of every package file. Signed-off-by: Dan McGee <dan@archlinux.org> --- cron-jobs/create-filelists | 41 ++++++++++++++++++++++++++++------------- 1 files changed, 28 insertions(+), 13 deletions(-) diff --git a/cron-jobs/create-filelists b/cron-jobs/create-filelists index a0b6a57..6091bf4 100755 --- a/cron-jobs/create-filelists +++ b/cron-jobs/create-filelists @@ -14,8 +14,12 @@ if [ -f "$lock" ]; then fi touch "$lock" || exit 1 -TMPDIR="$(mktemp -d /tmp/create-filelists.XXXXXX)" || exit 1 -CACHEDIR="$(mktemp -d /tmp/create-filelists.XXXXXX)" || exit 1 +# location where the package DB is extracted so we know what to include +DBDIR="$(mktemp -d /tmp/create-filelists.dbdir.XXXXXX)" || exit 1 +# location where the old files DB is extracted to save us some work +CACHEDIR="$(mktemp -d /tmp/create-filelists.cachedir.XXXXXX)" || exit 1 +# location where the new files DB is built up and eventually zipped +TMPDIR="$(mktemp -d /tmp/create-filelists.tmpdir.XXXXXX)" || exit 1 #adjust the nice level to run at a lower priority /usr/bin/renice +10 -p $$ > /dev/null @@ -30,33 +34,45 @@ esac FILESEXT="${DBEXT//db/files}" for repo in $repos; do + REPO_DB_FILE="${repo}$DBEXT" FILES_DB_FILE="${repo}$FILESEXT" for arch in ${ARCHES[@]}; do +# echo "Running for architecture $arch, repo $repo" cd "$reposdir" repodir="${repo}/os/${arch}" cached="no" + # extract package db archive + if [ -f "${targetdir}/${repodir}/${REPO_DB_FILE}" ]; then + mkdir -p "${DBDIR}/${repodir}" +# echo "extracting $REPO_DB_FILE" + bsdtar -xf "${targetdir}/${repodir}/${REPO_DB_FILE}" -C "${DBDIR}/${repodir}" + else + echo "Fail! Does the repo $repo with arch $arch even exist?" + continue + fi + # extract old file archive if [ -f "${targetdir}/${repodir}/${FILES_DB_FILE}" ]; then mkdir -p "${CACHEDIR}/${repodir}" +# echo "extracting $FILES_DB_FILE" bsdtar -xf "${targetdir}/${repodir}/${FILES_DB_FILE}" -C "${CACHEDIR}/${repodir}" cached="yes" fi # create file lists - for pkg in $repodir/*${PKGEXT}; do - pkgname="$(getpkgname "$pkg")" - pkgver="$(getpkgver "$pkg")" - tmppkgdir="${TMPDIR}/${repodir}/${pkgname}-${pkgver}" + for pkg in $(ls ${DBDIR}/${repodir}); do + tmppkgdir="${TMPDIR}/${repodir}/${pkg}" mkdir -p "$tmppkgdir" - if [ -f "${CACHEDIR}/${repodir}/${pkgname}-${pkgver}/files" ]; then -# echo "cache: $pkgname" - mv "${CACHEDIR}/${repodir}/${pkgname}-${pkgver}/files" "${tmppkgdir}/files" + if [ -f "${CACHEDIR}/${repodir}/${pkg}/files" ]; then +# echo "cache: $pkg" + mv "${CACHEDIR}/${repodir}/${pkg}/files" "${tmppkgdir}/files" else -# echo "$repo/$arch: $pkgname" +# echo "not cache: $repo/$arch: $pkg" + filename=$(grep -A1 '^%FILENAME%$' "${DBDIR}/${repodir}/${pkg}/desc" | tail -n1) echo '%FILES%' > "${tmppkgdir}/files" - bsdtar --exclude=.* -tf "$pkg" >> "${tmppkgdir}/files" + bsdtar --exclude=.* -tf "$repodir/$filename" >> "${tmppkgdir}/files" cached="no" fi done @@ -76,8 +92,7 @@ for repo in $repos; do done cd - >/dev/null -rm -rf "$TMPDIR" || exit 1 -rm -rf "$CACHEDIR" || exit 1 +rm -rf "$TMPDIR" "$CACHEDIR" "$DBDIR" rm -f "$lock" || exit 1 # echo 'done' -- 1.7.0