[pacman-dev] [PATCH] pacsort: refactor version comparison for correctness
This refactors the existing vercmp machinery in order to: 1) Correct a deficiency in files comparison. This was too lazily written and could produce incorrect results when a version contained an epoch. 2) Improve tarball detection. Calling fnmatch a second time for the uncompressed tarball case is not a noticeable performance hit, and is more correct. 3) Fix a bug in tarball detection. The glob pattern would match files with only a pkgver, e.g. foopkg-1.2.3-x86_64.pkg.tar.xz. 4) Be generally cleaner code, easier to follow. The test suite gets extended to validate my claims for the new behavior. Fixes FS#37631. Signed-off-by: Dave Reisner <dreisner@archlinux.org> --- This supersedes an earlier patch I sent against pacsort which was a bit more directed and less useful. src/util/pacsort.c | 164 ++++++++++++++++++++++++++++++++++------------- test/util/pacsorttest.sh | 31 ++++++++- 2 files changed, 150 insertions(+), 45 deletions(-) diff --git a/src/util/pacsort.c b/src/util/pacsort.c index 687e558..554352c 100644 --- a/src/util/pacsort.c +++ b/src/util/pacsort.c @@ -20,6 +20,7 @@ #include <errno.h> #include <fnmatch.h> #include <getopt.h> +#include <limits.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -40,6 +41,14 @@ struct list_t { size_t maxcount; }; +struct pkgmeta_t { + const char *pkgname; + size_t pkgname_len; + + const char *pkgver; + size_t pkgver_len; +}; + static struct options_t { int order; int sortkey; @@ -258,57 +267,124 @@ static const char *nth_column(const char *string) return prev; } -static int vercmp(const void *p1, const void *p2) +static void extract_pkgmeta(const char *path, struct pkgmeta_t *meta) { - const char *name1, *name2; - char *fn1 = NULL, *fn2 = NULL; - int r; - - name1 = *(const char **)p1; - name2 = *(const char **)p2; - - /* if we're operating in file mode, we modify the strings under certain - * conditions to appease alpm_pkg_vercmp(). If and only if both inputs end - * with a suffix that appears to be a package name, we strip the suffix and - * remove any leading paths. This means that strings such as: - * - * /var/cache/pacman/pkg/firefox-18.0-2-x86_64.pkg.tar.xz - * firefox-18.0-2-x86_64.pkg.tar.gz - * - * Will be considered equal by this version comparison - */ - if(opts.filemode) { - if(fnmatch("*-*.pkg.tar.?z", name1, 0) == 0 && - fnmatch("*-*.pkg.tar.?z", name2, 0) == 0) { - const char *start, *end; - - start = strrchr(name1, '/'); - start = start ? start + 1 : name1; - end = strrchr(name1, '-'); - fn1 = strndup(start, end - start); - - start = strrchr(name2, '/'); - start = start ? start + 1 : name2; - end = strrchr(name2, '-'); - fn2 = strndup(start, end - start); - - name1 = fn1; - name2 = fn2; - } - } + const char *pkgver_end; + const char *slash; + + /* Find the basename */ + slash = strrchr(path, '/'); + meta->pkgname = slash ? slash + 1 : path; + pkgver_end = strrchr(meta->pkgname, '-'); + + /* Read backwards through pkgrel */ + for(meta->pkgver = pkgver_end - 1; + meta->pkgver > meta->pkgname && *meta->pkgver != '-'; + --meta->pkgver) + ; + /* Read backwards through pkgver */ + for(--meta->pkgver; + meta->pkgver > meta->pkgname && *meta->pkgver != '-'; + --meta->pkgver) + ; + ++meta->pkgver; + + meta->pkgname_len = meta->pkgver - meta->pkgname - 1; + meta->pkgver_len = pkgver_end - meta->pkgver; +} + +static int is_pkgtar(const char *string) +{ + return fnmatch("*-*-*.pkg.tar.?z", string, 0) == 0 || + fnmatch("*-*-*.pkg.tar", string, 0) == 0; +} + +static int vercmp_versions(const char *ver1, const char *ver2) +{ + return alpm_pkg_vercmp(nth_column(ver1), nth_column(ver2)); +} + +static int pkgmeta_vercmp(struct pkgmeta_t *pkg1, struct pkgmeta_t *pkg2) +{ + char v1[PATH_MAX], v2[PATH_MAX]; + + memcpy(v1, pkg1->pkgname, pkg1->pkgname_len); + v1[pkg1->pkgname_len] = '\0'; + + memcpy(v2, pkg2->pkgname, pkg2->pkgname_len); + v2[pkg2->pkgname_len] = '\0'; - if(opts.sortkey == 0) { - r = opts.order * alpm_pkg_vercmp(name1, name2); + return alpm_pkg_vercmp(v1, v2); +} + +static int pkgmeta_namecmp(struct pkgmeta_t *pkg1, struct pkgmeta_t *pkg2) +{ + int cmp; + const size_t cmplen = (pkg1->pkgname_len < pkg2->pkgname_len) ? + pkg1->pkgname_len : pkg2->pkgname_len; + + cmp = memcmp(pkg1->pkgname, pkg2->pkgname, cmplen); + if(cmp != 0) { + return cmp; } else { - r = opts.order * alpm_pkg_vercmp(nth_column(name1), nth_column(name2)); + return pkg1->pkgname_len - pkg2->pkgname_len; } +} - if(opts.filemode) { - free(fn1); - free(fn2); +static int pkgmeta_trailercmp(struct pkgmeta_t *pkg1, struct pkgmeta_t *pkg2) +{ + return strcmp(&pkg1->pkgver[pkg1->pkgver_len], + &pkg2->pkgver[pkg2->pkgver_len]); +} + +static int pkgmeta_cmp(struct pkgmeta_t *pkg1, struct pkgmeta_t *pkg2) +{ + int cmp; + + /* 1) Compare the package names. */ + cmp = pkgmeta_namecmp(pkg1, pkg2); + if(cmp != 0) { + return cmp; + } + + /* 2) Compare the versions. */ + cmp = pkgmeta_vercmp(pkg1, pkg2); + if(cmp != 0) { + return cmp; + } + + /* 3) Nothing else to do, compare the remainder of the strings lexically. */ + return pkgmeta_trailercmp(pkg1, pkg2); +} + +static int vercmp_files(const char *file1, const char *file2) +{ + struct pkgmeta_t pkg1, pkg2; + + /* In order for 2 files to be considered equal, they must have the same + * basename. For example, the two follow strings are considered equal: + * /var/cache/pacman/pkg/pacman-4.1.1-1-x86_64.pkg.tar.gz + * /share/packages/pacman-4.1.1-1-x86_64.pkg.tar.gz + */ + + extract_pkgmeta(file1, &pkg1); + extract_pkgmeta(file2, &pkg2); + + return pkgmeta_cmp(&pkg1, &pkg2); +} + +static int vercmp(const void *v1, const void *v2) +{ + const char *s1 = *(const char **)v1, *s2 = *(const char **)v2; + int cmp; + + if(opts.filemode && is_pkgtar(s1) && is_pkgtar(s1)) { + cmp = vercmp_files(s1, s2); + } else { + cmp = vercmp_versions(s1, s2); } - return r; + return opts.order * cmp; } static char escape_char(const char *string) diff --git a/test/util/pacsorttest.sh b/test/util/pacsorttest.sh index ac16c45..dbd11d1 100755 --- a/test/util/pacsorttest.sh +++ b/test/util/pacsorttest.sh @@ -21,7 +21,7 @@ # default binary if one was not specified as $1 bin=${1:-${PMTEST_UTIL_DIR}pacsort} # holds counts of tests -total=23 +total=32 run=0 failure=0 @@ -89,6 +89,35 @@ runtest $in $in "filename sort with uneven leading path components" "--files" in="firefox-18.0-2-i686.pkg.tar.xz\nfirefox-18.0.1-1-x86_64.pkg.tar.gz\n" runtest $in $in "filename sort with different extensions" "--files" +in="/path1/path2/firefox-18.0.1-1-x86_64.pkg.tar.xz\n/path2/firefox-18.0-2-x86_64.pkg.tar.xz\n" +runtest $in $in "filename reverse sort" "--files --reverse" + +in="/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n/packages/dialog-1:1.2_20130928-1-x86_64.pkg.tar.xz\n" +runtest $in $in "filename sort with epoch" "--files" + +in="/packages/dia-log-1:1.2_20130928-1-x86_64.pkg.tar.xz\n/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n" +runtest $in $in "filename with different pkgname sort with epoch" "--files" + +in="/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n/packages/dialoggg-1:1.2_20130928-1-x86_64.pkg.tar.xz\n" +runtest $in $in "filename with shared prefix sort with epoch" "--files" + +in="/packages/dialoggg-1:1.2_20130928-1-x86_64.pkg.tar.xz\n/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n" +runtest $in $in "filename reverse sort with epoch" "--files --reverse" + +in="firefox-18.0-2-i686.pkg.tar.gz\nfirefox-18.0-2-x86_64.pkg.tar.gz\n" +runtest "$in" "$in" "filename sort with different architecture" "--files" + +in="firefox-18.0-2-x86_64.pkg.tar.gz\nfirefox-18.0-2-x86_64.pkg.tar.xz\n" +runtest "$in" "$in" "filename sort with different compression" "--files" + +in="10 3\n10 2\n10 1\n" +ex="10 1\n10 2\n10 3\n" +runtest "$in" "$ex" "sort on key" "--key 2" + +in="10 3 1000\n10 2 2000\n10 1\n" +ex="10 1\n10 3 1000\n10 2 2000\n" +runtest "$in" "$ex" "sort on key with uneven columns" "--key 3" + # generate some long input/expected for the next few tests declare normal reverse names_normal names_reverse for ((i=1; i<600; i++)); do -- 1.8.4.2
On 11/05/13 at 11:03pm, Dave Reisner wrote:
This refactors the existing vercmp machinery in order to:
1) Correct a deficiency in files comparison. This was too lazily written and could produce incorrect results when a version contained an epoch. 2) Improve tarball detection. Calling fnmatch a second time for the uncompressed tarball case is not a noticeable performance hit, and is more correct. 3) Fix a bug in tarball detection. The glob pattern would match files with only a pkgver, e.g. foopkg-1.2.3-x86_64.pkg.tar.xz. 4) Be generally cleaner code, easier to follow.
The test suite gets extended to validate my claims for the new behavior.
Fixes FS#37631.
Signed-off-by: Dave Reisner <dreisner@archlinux.org> --- This supersedes an earlier patch I sent against pacsort which was a bit more directed and less useful.
src/util/pacsort.c | 164 ++++++++++++++++++++++++++++++++++------------- test/util/pacsorttest.sh | 31 ++++++++- 2 files changed, 150 insertions(+), 45 deletions(-)
diff --git a/src/util/pacsort.c b/src/util/pacsort.c index 687e558..554352c 100644 --- a/src/util/pacsort.c +++ b/src/util/pacsort.c
<snip>
+ const char *pkgver_end; + const char *slash; + + /* Find the basename */ + slash = strrchr(path, '/'); + meta->pkgname = slash ? slash + 1 : path; + pkgver_end = strrchr(meta->pkgname, '-'); + + /* Read backwards through pkgrel */ + for(meta->pkgver = pkgver_end - 1; + meta->pkgver > meta->pkgname && *meta->pkgver != '-'; + --meta->pkgver) + ; + /* Read backwards through pkgver */ + for(--meta->pkgver; + meta->pkgver > meta->pkgname && *meta->pkgver != '-'; + --meta->pkgver) + ;
Nitpick. Personally, I would have an easier time reading this if we used a temporary variable for meta->pkgver so that the for loop could fit all on one line the way it is in _alpm_splitname.
+ ++meta->pkgver; + + meta->pkgname_len = meta->pkgver - meta->pkgname - 1; + meta->pkgver_len = pkgver_end - meta->pkgver; +}
<snip>
+static int vercmp_files(const char *file1, const char *file2) +{ + struct pkgmeta_t pkg1, pkg2; + + /* In order for 2 files to be considered equal, they must have the same + * basename. For example, the two follow strings are considered equal: + * /var/cache/pacman/pkg/pacman-4.1.1-1-x86_64.pkg.tar.gz + * /share/packages/pacman-4.1.1-1-x86_64.pkg.tar.gz + */ + + extract_pkgmeta(file1, &pkg1); + extract_pkgmeta(file2, &pkg2);
We could do this extraction before the sort instead of for every comparison so that we sort a *pkg_meta_t[] instead of *char[]. Then we could also verify that our input consists entirely of paths, because mixed paths and version numbers with --files will give nonsensical results that change based on the order of the input.
+ + return pkgmeta_cmp(&pkg1, &pkg2); +} + +static int vercmp(const void *v1, const void *v2) +{ + const char *s1 = *(const char **)v1, *s2 = *(const char **)v2; + int cmp; + + if(opts.filemode && is_pkgtar(s1) && is_pkgtar(s1)) {
You check s1 twice.
+ cmp = vercmp_files(s1, s2); + } else { + cmp = vercmp_versions(s1, s2); }
- return r; + return opts.order * cmp; }
static char escape_char(const char *string)
participants (2)
-
Andrew Gregory
-
Dave Reisner