[arch-dev-public] [PATCH 3/4] create-filelists: rework the package loop completely

Dan McGee dan at archlinux.org
Sat Feb 27 13:01:35 EST 2010


Instead of wasting time extracting .PKGINFO twice from every single package
in the repos, use the package DB to eliminate most of the heavy lifting.
This way we only need to worry about looking at the packages that actually
have changed since the last time we built the package database.

This should give a noticeable performance increase to this job in addition to
reducing IO load and unnecessary reading of every package file.

Signed-off-by: Dan McGee <dan at archlinux.org>
---
 cron-jobs/create-filelists |   41 ++++++++++++++++++++++++++++-------------
 1 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/cron-jobs/create-filelists b/cron-jobs/create-filelists
index a0b6a57..6091bf4 100755
--- a/cron-jobs/create-filelists
+++ b/cron-jobs/create-filelists
@@ -14,8 +14,12 @@ if [ -f "$lock" ]; then
 fi
 
 touch "$lock" || exit 1
-TMPDIR="$(mktemp -d /tmp/create-filelists.XXXXXX)" || exit 1
-CACHEDIR="$(mktemp -d /tmp/create-filelists.XXXXXX)" || exit 1
+# location where the package DB is extracted so we know what to include
+DBDIR="$(mktemp -d /tmp/create-filelists.dbdir.XXXXXX)" || exit 1
+# location where the old files DB is extracted to save us some work
+CACHEDIR="$(mktemp -d /tmp/create-filelists.cachedir.XXXXXX)" || exit 1
+# location where the new files DB is built up and eventually zipped
+TMPDIR="$(mktemp -d /tmp/create-filelists.tmpdir.XXXXXX)" || exit 1
 
 #adjust the nice level to run at a lower priority
 /usr/bin/renice +10 -p $$ > /dev/null
@@ -30,33 +34,45 @@ esac
 FILESEXT="${DBEXT//db/files}"
 
 for repo in $repos; do
+    REPO_DB_FILE="${repo}$DBEXT"
     FILES_DB_FILE="${repo}$FILESEXT"
     for arch in ${ARCHES[@]}; do
+#       echo "Running for architecture $arch, repo $repo"
         cd "$reposdir"
 
         repodir="${repo}/os/${arch}"
         cached="no"
 
+        # extract package db archive
+        if [ -f "${targetdir}/${repodir}/${REPO_DB_FILE}" ]; then
+            mkdir -p "${DBDIR}/${repodir}"
+#           echo "extracting $REPO_DB_FILE"
+            bsdtar -xf "${targetdir}/${repodir}/${REPO_DB_FILE}" -C "${DBDIR}/${repodir}"
+        else
+            echo "Fail! Does the repo $repo with arch $arch even exist?"
+            continue
+        fi
+
         # extract old file archive
         if [ -f "${targetdir}/${repodir}/${FILES_DB_FILE}" ]; then
             mkdir -p "${CACHEDIR}/${repodir}"
+#           echo "extracting $FILES_DB_FILE"
             bsdtar -xf "${targetdir}/${repodir}/${FILES_DB_FILE}" -C "${CACHEDIR}/${repodir}"
             cached="yes"
         fi
 
         # create file lists
-        for pkg in $repodir/*${PKGEXT}; do
-            pkgname="$(getpkgname "$pkg")"
-            pkgver="$(getpkgver "$pkg")"
-            tmppkgdir="${TMPDIR}/${repodir}/${pkgname}-${pkgver}"
+        for pkg in $(ls ${DBDIR}/${repodir}); do
+            tmppkgdir="${TMPDIR}/${repodir}/${pkg}"
             mkdir -p "$tmppkgdir"
-            if [ -f "${CACHEDIR}/${repodir}/${pkgname}-${pkgver}/files" ]; then
-#               echo "cache: $pkgname"
-                mv "${CACHEDIR}/${repodir}/${pkgname}-${pkgver}/files" "${tmppkgdir}/files"
+            if [ -f "${CACHEDIR}/${repodir}/${pkg}/files" ]; then
+#               echo "cache: $pkg"
+                mv "${CACHEDIR}/${repodir}/${pkg}/files" "${tmppkgdir}/files"
             else
-#               echo "$repo/$arch: $pkgname"
+#               echo "not cache: $repo/$arch: $pkg"
+                filename=$(grep -A1 '^%FILENAME%$' "${DBDIR}/${repodir}/${pkg}/desc" | tail -n1)
                 echo '%FILES%' > "${tmppkgdir}/files"
-                bsdtar --exclude=.* -tf "$pkg" >> "${tmppkgdir}/files"
+                bsdtar --exclude=.* -tf "$repodir/$filename" >> "${tmppkgdir}/files"
                 cached="no"
             fi
         done
@@ -76,8 +92,7 @@ for repo in $repos; do
 done
 
 cd - >/dev/null
-rm -rf "$TMPDIR" || exit 1
-rm -rf "$CACHEDIR" || exit 1
+rm -rf "$TMPDIR" "$CACHEDIR" "$DBDIR"
 rm -f "$lock" || exit 1
 # echo 'done'
 
-- 
1.7.0



More information about the arch-dev-public mailing list