On Tue, Nov 05, 2013 at 08:35:27AM -0500, Andrew Gregory wrote:
On 11/04/13 at 12:02pm, Dave Reisner wrote:
alpm_pkg_vercmp rightfully considers foo-1-1 to be newer than foo-1:1-1 because of the leading characters. So, in file mode, pacsort must necessarily ignore the package name when performing comparisons. Create a struct, for convenience, which overlays pointers and lengths to the name and version within a filename. Using this, we can avoid some amount of heap allocation, and do better at comparing versions for similar package names.
Fixes FS#37631.
Signed-off-by: Dave Reisner <dreisner@archlinux.org> --- src/util/pacsort.c | 69 +++++++++++++++++++++++++++++++++++++++++------- test/util/pacsorttest.sh | 11 +++++++- 2 files changed, 69 insertions(+), 11 deletions(-)
diff --git a/src/util/pacsort.c b/src/util/pacsort.c index 687e558..cbee316 100644 --- a/src/util/pacsort.c +++ b/src/util/pacsort.c @@ -28,6 +28,15 @@
#define DELIM ' '
+#ifndef MIN +#define MIN(a, b) \ + __extension__({ \ + __typeof__(a) _a = (a); \ + __typeof__(b) _b = (b); \ + _a < _b ? _a : _b; \ + }) +#endif + struct buffer_t { char *mem; size_t len; @@ -40,6 +49,14 @@ struct list_t { size_t maxcount; };
+struct pkgmeta_t { + const char *pkgname; + size_t pkgname_len; + + const char *pkgver; + size_t pkgver_len; +}; + static struct options_t { int order; int sortkey; @@ -258,6 +275,30 @@ static const char *nth_column(const char *string) return prev; }
+static void extract_pkgmeta(const char *path, struct pkgmeta_t *meta) { + const char *pkgver_end; + + memset(meta, 0, sizeof(struct pkgmeta_t)); + + meta->pkgname = basename(path);
I don't think we should use basename here. You're currently using the GNU version, which seems to be little more than strrchr(path, '/') anyway and is likely to confuse people expecting the POSIX version, which can modify its argument or return a pointer to statically allocated memory.
Ack.
+ pkgver_end = strrchr(meta->pkgname, '-'); + + /* read backwards through pkgrel */ + for(meta->pkgver = pkgver_end - 1; + meta->pkgver > meta->pkgname && *meta->pkgver != '-'; + --meta->pkgver) + ; + /* read backwards through pkgver */ + for(--meta->pkgver; + meta->pkgver > meta->pkgname && *meta->pkgver != '-'; + --meta->pkgver) + ; + ++meta->pkgver; + + meta->pkgname_len = meta->pkgver - meta->pkgname - 1; + meta->pkgver_len = pkgver_end - meta->pkgver; +} +
Could we just expose _alpm_splitname (or a simplified version of it without the hash) and remove this?
A lot of the coding I've done recently with Arch has been surrounding package maintenance and I swear that I keep writing the same filename parsing code over and over again. Exposing _alpm_splitname seems reasonable, but I worry about how much use it would actually see as a public API.
static int vercmp(const void *p1, const void *p2) { const char *name1, *name2; @@ -280,17 +321,25 @@ static int vercmp(const void *p1, const void *p2) if(opts.filemode) { if(fnmatch("*-*.pkg.tar.?z", name1, 0) == 0 && fnmatch("*-*.pkg.tar.?z", name2, 0) == 0) { - const char *start, *end; - - start = strrchr(name1, '/'); - start = start ? start + 1 : name1; - end = strrchr(name1, '-'); - fn1 = strndup(start, end - start); + struct pkgmeta_t pkgmeta1, pkgmeta2; + int namecmp; + + /* Extract name and version */ + extract_pkgmeta(name1, &pkgmeta1); + extract_pkgmeta(name2, &pkgmeta2); + + /* If the package names aren't the same, there's no sense in comparing + * the versions. Short circuit and return a comparison based on the + * pkgnames. This probably could be cleaner, but I'm favoring clarity + * over cleverness. */ + namecmp = memcmp(pkgmeta1.pkgname, pkgmeta2.pkgname, + MIN(pkgmeta1.pkgname_len, pkgmeta2.pkgname_len)); + if(pkgmeta1.pkgname_len != pkgmeta2.pkgname_len || namecmp != 0) { + return opts.order * namecmp; + }
- start = strrchr(name2, '/'); - start = start ? start + 1 : name2; - end = strrchr(name2, '-'); - fn2 = strndup(start, end - start); + fn1 = strndup(pkgmeta1.pkgver, pkgmeta1.pkgver_len); + fn2 = strndup(pkgmeta2.pkgver, pkgmeta2.pkgver_len);
name1 = fn1; name2 = fn2;
If we're going to convert input to a struct like this, why not just store the original input in the struct as well and convert everything up front? vercmp can use the stored info for --files and the original input otherwise.
That's essentially what happens already... I agree there's probably room for improvement here, though.
diff --git a/test/util/pacsorttest.sh b/test/util/pacsorttest.sh index ac16c45..8382603 100755 --- a/test/util/pacsorttest.sh +++ b/test/util/pacsorttest.sh @@ -21,7 +21,7 @@ # default binary if one was not specified as $1 bin=${1:-${PMTEST_UTIL_DIR}pacsort} # holds counts of tests -total=23 +total=26 run=0 failure=0
@@ -89,6 +89,15 @@ runtest $in $in "filename sort with uneven leading path components" "--files" in="firefox-18.0-2-i686.pkg.tar.xz\nfirefox-18.0.1-1-x86_64.pkg.tar.gz\n" runtest $in $in "filename sort with different extensions" "--files"
+in="/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n/packages/dialog-1:1.2_20130928-1-x86_64.pkg.tar.xz\n" +runtest $in $in "filename sort with epoch" "--files" + +in="/packages/dia-log-1:1.2_20130928-1-x86_64.pkg.tar.xz\n/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n" +runtest $in $in "filename sort with epoch" "--files" + +in="/packages/dialoggg-1:1.2_20130928-1-x86_64.pkg.tar.xz\n/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n" +runtest $in $in "filename sort with epoch" "--files" + # generate some long input/expected for the next few tests declare normal reverse names_normal names_reverse for ((i=1; i<600; i++)); do -- 1.8.4.2