[pacman-dev] [PATCH] pacsort: refactor version comparison for correctness

Dave Reisner dreisner at archlinux.org
Tue Nov 5 23:03:08 EST 2013


This refactors the existing vercmp machinery in order to:

1) Correct a deficiency in files comparison. This was too lazily written
and could produce incorrect results when a version contained an epoch.
2) Improve tarball detection. Calling fnmatch a second time for the
uncompressed tarball case is not a noticeable performance hit, and is
more correct.
3) Fix a bug in tarball detection. The glob pattern would match files
with only a pkgver, e.g. foopkg-1.2.3-x86_64.pkg.tar.xz.
4) Be generally cleaner code, easier to follow.

The test suite gets extended to validate my claims for the new behavior.

Fixes FS#37631.

Signed-off-by: Dave Reisner <dreisner at archlinux.org>
---
This supersedes an earlier patch I sent against pacsort which was a bit
more directed and less useful.

 src/util/pacsort.c       | 164 ++++++++++++++++++++++++++++++++++-------------
 test/util/pacsorttest.sh |  31 ++++++++-
 2 files changed, 150 insertions(+), 45 deletions(-)

diff --git a/src/util/pacsort.c b/src/util/pacsort.c
index 687e558..554352c 100644
--- a/src/util/pacsort.c
+++ b/src/util/pacsort.c
@@ -20,6 +20,7 @@
 #include <errno.h>
 #include <fnmatch.h>
 #include <getopt.h>
+#include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -40,6 +41,14 @@ struct list_t {
 	size_t maxcount;
 };
 
+struct pkgmeta_t {
+	const char *pkgname;
+	size_t pkgname_len;
+
+	const char *pkgver;
+	size_t pkgver_len;
+};
+
 static struct options_t {
 	int order;
 	int sortkey;
@@ -258,57 +267,124 @@ static const char *nth_column(const char *string)
 	return prev;
 }
 
-static int vercmp(const void *p1, const void *p2)
+static void extract_pkgmeta(const char *path, struct pkgmeta_t *meta)
 {
-	const char *name1, *name2;
-	char *fn1 = NULL, *fn2 = NULL;
-	int r;
-
-	name1 = *(const char **)p1;
-	name2 = *(const char **)p2;
-
-	/* if we're operating in file mode, we modify the strings under certain
-	 * conditions to appease alpm_pkg_vercmp(). If and only if both inputs end
-	 * with a suffix that appears to be a package name, we strip the suffix and
-	 * remove any leading paths. This means that strings such as:
-	 *
-	 *   /var/cache/pacman/pkg/firefox-18.0-2-x86_64.pkg.tar.xz
-	 *   firefox-18.0-2-x86_64.pkg.tar.gz
-	 *
-	 *  Will be considered equal by this version comparison
-	 */
-	if(opts.filemode) {
-		if(fnmatch("*-*.pkg.tar.?z", name1, 0) == 0 &&
-			 fnmatch("*-*.pkg.tar.?z", name2, 0) == 0) {
-			const char *start, *end;
-
-			start = strrchr(name1, '/');
-			start = start ? start + 1 : name1;
-			end = strrchr(name1, '-');
-			fn1 = strndup(start, end - start);
-
-			start = strrchr(name2, '/');
-			start = start ? start + 1 : name2;
-			end = strrchr(name2, '-');
-			fn2 = strndup(start, end - start);
-
-			name1 = fn1;
-			name2 = fn2;
-		}
-	}
+	const char *pkgver_end;
+	const char *slash;
+
+	/* Find the basename */
+	slash = strrchr(path, '/');
+	meta->pkgname = slash ? slash + 1 : path;
+	pkgver_end = strrchr(meta->pkgname, '-');
+
+	/* Read backwards through pkgrel */
+	for(meta->pkgver = pkgver_end - 1;
+			meta->pkgver > meta->pkgname && *meta->pkgver != '-';
+			--meta->pkgver)
+		;
+	/* Read backwards through pkgver */
+	for(--meta->pkgver;
+			meta->pkgver > meta->pkgname && *meta->pkgver != '-';
+			--meta->pkgver)
+		;
+	++meta->pkgver;
+
+	meta->pkgname_len = meta->pkgver - meta->pkgname - 1;
+	meta->pkgver_len = pkgver_end - meta->pkgver;
+}
+
+static int is_pkgtar(const char *string)
+{
+	return fnmatch("*-*-*.pkg.tar.?z", string, 0) == 0 ||
+			fnmatch("*-*-*.pkg.tar", string, 0) == 0;
+}
+
+static int vercmp_versions(const char *ver1, const char *ver2)
+{
+	return alpm_pkg_vercmp(nth_column(ver1), nth_column(ver2));
+}
+
+static int pkgmeta_vercmp(struct pkgmeta_t *pkg1, struct pkgmeta_t *pkg2)
+{
+	char v1[PATH_MAX], v2[PATH_MAX];
+
+	memcpy(v1, pkg1->pkgname, pkg1->pkgname_len);
+	v1[pkg1->pkgname_len] = '\0';
+
+	memcpy(v2, pkg2->pkgname, pkg2->pkgname_len);
+	v2[pkg2->pkgname_len] = '\0';
 
-	if(opts.sortkey == 0) {
-		r = opts.order * alpm_pkg_vercmp(name1, name2);
+	return alpm_pkg_vercmp(v1, v2);
+}
+
+static int pkgmeta_namecmp(struct pkgmeta_t *pkg1, struct pkgmeta_t *pkg2)
+{
+	int cmp;
+	const size_t cmplen = (pkg1->pkgname_len < pkg2->pkgname_len) ?
+		pkg1->pkgname_len : pkg2->pkgname_len;
+
+	cmp = memcmp(pkg1->pkgname, pkg2->pkgname, cmplen);
+	if(cmp != 0) {
+		return cmp;
 	} else {
-		r = opts.order * alpm_pkg_vercmp(nth_column(name1), nth_column(name2));
+		return pkg1->pkgname_len - pkg2->pkgname_len;
 	}
+}
 
-	if(opts.filemode) {
-		free(fn1);
-		free(fn2);
+static int pkgmeta_trailercmp(struct pkgmeta_t *pkg1, struct pkgmeta_t *pkg2)
+{
+	return strcmp(&pkg1->pkgver[pkg1->pkgver_len],
+			&pkg2->pkgver[pkg2->pkgver_len]);
+}
+
+static int pkgmeta_cmp(struct pkgmeta_t *pkg1, struct pkgmeta_t *pkg2)
+{
+	int cmp;
+
+	/* 1) Compare the package names. */
+	cmp = pkgmeta_namecmp(pkg1, pkg2);
+	if(cmp != 0) {
+		return cmp;
+	}
+
+	/* 2) Compare the versions. */
+	cmp = pkgmeta_vercmp(pkg1, pkg2);
+	if(cmp != 0) {
+		return cmp;
+	}
+
+	/* 3) Nothing else to do, compare the remainder of the strings lexically. */
+	return pkgmeta_trailercmp(pkg1, pkg2);
+}
+
+static int vercmp_files(const char *file1, const char *file2)
+{
+	struct pkgmeta_t pkg1, pkg2;
+
+	/* In order for 2 files to be considered equal, they must have the same
+	 * basename. For example, the two follow strings are considered equal:
+	 *   /var/cache/pacman/pkg/pacman-4.1.1-1-x86_64.pkg.tar.gz
+	 *   /share/packages/pacman-4.1.1-1-x86_64.pkg.tar.gz
+	 */
+
+	extract_pkgmeta(file1, &pkg1);
+	extract_pkgmeta(file2, &pkg2);
+
+	return pkgmeta_cmp(&pkg1, &pkg2);
+}
+
+static int vercmp(const void *v1, const void *v2)
+{
+	const char *s1 = *(const char **)v1, *s2 = *(const char **)v2;
+	int cmp;
+
+	if(opts.filemode && is_pkgtar(s1) && is_pkgtar(s1)) {
+		cmp = vercmp_files(s1, s2);
+	} else {
+		cmp = vercmp_versions(s1, s2);
 	}
 
-	return r;
+	return opts.order * cmp;
 }
 
 static char escape_char(const char *string)
diff --git a/test/util/pacsorttest.sh b/test/util/pacsorttest.sh
index ac16c45..dbd11d1 100755
--- a/test/util/pacsorttest.sh
+++ b/test/util/pacsorttest.sh
@@ -21,7 +21,7 @@
 # default binary if one was not specified as $1
 bin=${1:-${PMTEST_UTIL_DIR}pacsort}
 # holds counts of tests
-total=23
+total=32
 run=0
 failure=0
 
@@ -89,6 +89,35 @@ runtest $in $in "filename sort with uneven leading path components" "--files"
 in="firefox-18.0-2-i686.pkg.tar.xz\nfirefox-18.0.1-1-x86_64.pkg.tar.gz\n"
 runtest $in $in "filename sort with different extensions" "--files"
 
+in="/path1/path2/firefox-18.0.1-1-x86_64.pkg.tar.xz\n/path2/firefox-18.0-2-x86_64.pkg.tar.xz\n"
+runtest $in $in "filename reverse sort" "--files --reverse"
+
+in="/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n/packages/dialog-1:1.2_20130928-1-x86_64.pkg.tar.xz\n"
+runtest $in $in "filename sort with epoch" "--files"
+
+in="/packages/dia-log-1:1.2_20130928-1-x86_64.pkg.tar.xz\n/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n"
+runtest $in $in "filename with different pkgname sort with epoch" "--files"
+
+in="/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n/packages/dialoggg-1:1.2_20130928-1-x86_64.pkg.tar.xz\n"
+runtest $in $in "filename with shared prefix sort with epoch" "--files"
+
+in="/packages/dialoggg-1:1.2_20130928-1-x86_64.pkg.tar.xz\n/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n"
+runtest $in $in "filename reverse sort with epoch" "--files --reverse"
+
+in="firefox-18.0-2-i686.pkg.tar.gz\nfirefox-18.0-2-x86_64.pkg.tar.gz\n"
+runtest "$in" "$in" "filename sort with different architecture" "--files"
+
+in="firefox-18.0-2-x86_64.pkg.tar.gz\nfirefox-18.0-2-x86_64.pkg.tar.xz\n"
+runtest "$in" "$in" "filename sort with different compression" "--files"
+
+in="10 3\n10 2\n10 1\n"
+ex="10 1\n10 2\n10 3\n"
+runtest "$in" "$ex" "sort on key" "--key 2"
+
+in="10 3 1000\n10 2 2000\n10 1\n"
+ex="10 1\n10 3 1000\n10 2 2000\n"
+runtest "$in" "$ex" "sort on key with uneven columns" "--key 3"
+
 # generate some long input/expected for the next few tests
 declare normal reverse names_normal names_reverse
 for ((i=1; i<600; i++)); do
-- 
1.8.4.2



More information about the pacman-dev mailing list