[pacman-dev] [PATCH 2/2] pacsort: parse inputs up front into control struct

Dave Reisner dreisner at archlinux.org
Tue Nov 18 14:02:06 UTC 2014


This moves most of the parsing work out of the sorting path. The explode
and splitfile functions now call input_new and append input_t structs
to the list of sort candidates instead of raw strings. This lets us
make smarter and easier decisions in the sorting callbacks, which are
now also split into the version and file comparison methods for clarity.

This fixes two bugs:

1) Incorrect ordering with filenames containing epoch in the pkgver
2) Incorrect ordering with package names which are substrings of
each other (e.g. "systemd" and "systemd-sysvcompat").

Performance of the --files mode degrades slightly as a result of this
change, but not unreasonably. Sorting with small inputs (5-10) doubles
in runtime, but larger inputs (4000+) only increase by 20%.

ref: https://bugs.archlinux.org/task/37631
---
This is a followup to agregory rightfully pointing out that my code was bad and
I should feel bad. Rather than punt on the TODO, I decided to just farking do
it, and realized another bug in the process which I've rolled into this change.

 src/util/pacsort.c       | 204 +++++++++++++++++++++++++++++++++--------------
 test/util/pacsorttest.sh |  11 ++-
 2 files changed, 156 insertions(+), 59 deletions(-)

diff --git a/src/util/pacsort.c b/src/util/pacsort.c
index 86f2fc6..665f218 100644
--- a/src/util/pacsort.c
+++ b/src/util/pacsort.c
@@ -28,6 +28,15 @@
 
 #define DELIM ' '
 
+#ifndef MIN
+#define MIN(a, b)      \
+	__extension__({           \
+		__typeof__(a) _a = (a); \
+		__typeof__(b) _b = (b); \
+		_a < _b ? _a : _b;           \
+	})
+#endif
+
 struct buffer_t {
 	char *mem;
 	size_t len;
@@ -35,11 +44,22 @@ struct buffer_t {
 };
 
 struct list_t {
-	char **list;
+	void **list;
 	size_t count;
 	size_t maxcount;
 };
 
+struct input_t {
+	char *data;
+	int is_file;
+
+	const char *pkgname;
+	size_t pkgname_len;
+
+	const char *pkgver;
+	size_t pkgver_len;
+};
+
 static struct options_t {
 	int order;
 	int sortkey;
@@ -147,9 +167,9 @@ static int list_grow(struct list_t *list)
 	return 0;
 }
 
-static int list_add(struct list_t *list, char *name)
+static int list_add(struct list_t *list, void *obj)
 {
-	if(!list || !name) {
+	if(!list || !obj) {
 		return 1;
 	}
 
@@ -159,13 +179,13 @@ static int list_add(struct list_t *list, char *name)
 		}
 	}
 
-	list->list[list->count] = name;
+	list->list[list->count] = obj;
 	list->count++;
 
 	return 0;
 }
 
-static void list_free(struct list_t *list)
+static void list_free(struct list_t *list, void (*freefn)(void *))
 {
 	size_t i;
 
@@ -175,23 +195,86 @@ static void list_free(struct list_t *list)
 
 	if(list->list) {
 		for(i = 0; i < list->count; i++) {
-			free(list->list[i]);
+			freefn(list->list[i]);
 		}
 		free(list->list);
 	}
 	free(list);
 }
 
+static void input_free(void *p)
+{
+	struct input_t *in = p;
+
+	if(in == NULL) {
+		return;
+	}
+
+	free(in->data);
+	free(in);
+}
+
+static struct input_t *input_new(const char *path, int pathlen)
+{
+	const char *pkgver_end;
+	const char *slash;
+	struct input_t *in;
+
+	in = calloc(1, sizeof(struct input_t));
+	if(in == NULL) {
+		return NULL;
+	}
+
+	in->data = strndup(path, pathlen);
+	if(in->data == NULL) {
+		return NULL;
+	}
+
+	in->is_file = fnmatch("*-*.pkg.tar.?z", in->data, 0) == 0;
+	if(!in->is_file) {
+		return in;
+	}
+
+	/* for files, we parse the pkgname and pkgrel from the full filename. */
+
+	slash = strrchr(in->data, '/');
+	if(slash == NULL) {
+		in->pkgname = in->data;
+	} else {
+		in->pkgname = slash + 1;
+	}
+
+	pkgver_end = strrchr(in->pkgname, '-');
+
+	/* read backwards through pkgrel */
+	for(in->pkgver = pkgver_end - 1;
+			in->pkgver > in->pkgname && *in->pkgver != '-';
+			--in->pkgver)
+		;
+	/* read backwards through pkgver */
+	for(--in->pkgver;
+			in->pkgver > in->pkgname && *in->pkgver != '-';
+			--in->pkgver)
+		;
+	++in->pkgver;
+
+	in->pkgname_len = in->pkgver - in->pkgname - 1;
+	in->pkgver_len = pkgver_end - in->pkgver;
+
+	return in;
+}
+
 static char *explode(struct buffer_t *buffer, struct list_t *list)
 {
-	char *name, *ptr, *end;
+	char *ptr, *end;
 	const char linedelim = opts.null ? '\0' : '\n';
+	struct input_t *meta;
 
 	ptr = buffer->mem;
 	while((end = memchr(ptr, linedelim, &buffer->mem[buffer->len] - ptr))) {
 		*end = '\0';
-		name = strdup(ptr);
-		list_add(list, name);
+		meta = input_new(ptr, end - ptr);
+		list_add(list, meta);
 		ptr = end + 1;
 	}
 
@@ -229,8 +312,8 @@ static int splitfile(FILE *stream, struct buffer_t *buffer, struct list_t *list)
 	}
 
 	if(buffer->len) {
-		char *name = strndup(buffer->mem, buffer->len + 1);
-		if(list_add(list, name) != 0) {
+		struct input_t *meta = input_new(buffer->mem, buffer->len + 1);
+		if(meta == NULL || list_add(list, meta) != 0) {
 			return 1;
 		}
 	}
@@ -256,57 +339,61 @@ static const char *nth_column(const char *string)
 	return prev;
 }
 
-static int vercmp(const void *p1, const void *p2)
+static int compare_versions(const char *v1, const char *v2)
 {
-	const char *name1, *name2;
-	char *fn1 = NULL, *fn2 = NULL;
-	int r;
-
-	name1 = *(const char **)p1;
-	name2 = *(const char **)p2;
-
-	/* if we're operating in file mode, we modify the strings under certain
-	 * conditions to appease alpm_pkg_vercmp(). If and only if both inputs end
-	 * with a suffix that appears to be a package name, we strip the suffix and
-	 * remove any leading paths. This means that strings such as:
-	 *
-	 *   /var/cache/pacman/pkg/firefox-18.0-2-x86_64.pkg.tar.xz
-	 *   firefox-18.0-2-x86_64.pkg.tar.gz
-	 *
-	 *  Will be considered equal by this version comparison
-	 */
-	if(opts.filemode) {
-		if(fnmatch("*-*.pkg.tar.?z", name1, 0) == 0 &&
-			 fnmatch("*-*.pkg.tar.?z", name2, 0) == 0) {
-			const char *start, *end;
-
-			start = strrchr(name1, '/');
-			start = start ? start + 1 : name1;
-			end = strrchr(name1, '-');
-			fn1 = strndup(start, end - start);
-
-			start = strrchr(name2, '/');
-			start = start ? start + 1 : name2;
-			end = strrchr(name2, '-');
-			fn2 = strndup(start, end - start);
-
-			name1 = fn1;
-			name2 = fn2;
-		}
-	}
-
 	if(opts.sortkey == 0) {
-		r = opts.order * alpm_pkg_vercmp(name1, name2);
+		return opts.order * alpm_pkg_vercmp(v1, v2);
 	} else {
-		r = opts.order * alpm_pkg_vercmp(nth_column(name1), nth_column(name2));
+		return opts.order * alpm_pkg_vercmp(nth_column(v1), nth_column(v2));
+	}
+}
+
+static int compare_files(const struct input_t *meta1, const struct input_t *meta2)
+{
+	int cmp;
+	char *verbuf;
+	const char *v1, *v2;
+
+	/* sort first by package name */
+	cmp = memcmp(meta1->pkgname, meta2->pkgname,
+			MIN(meta1->pkgname_len, meta2->pkgname_len));
+
+	/* 1) package names differ, sort by package name */
+	if(cmp != 0) {
+		return opts.order * cmp;
 	}
 
-	if(opts.filemode) {
-		free(fn1);
-		free(fn2);
+	/* 2) prefixes are the same but length differs, sort by length */
+	if(meta1->pkgname_len != meta2->pkgname_len) {
+		return opts.order * (meta1->pkgname_len - meta2->pkgname_len);
 	}
 
-	return r;
+	/* allocate once with enough space for both pkgver */
+	verbuf = calloc(1, meta1->pkgver_len + 1 + meta2->pkgver_len + 1);
+	memcpy(verbuf, meta1->pkgver, meta1->pkgver_len);
+	memcpy(&verbuf[meta1->pkgver_len + 1], meta2->pkgver, meta2->pkgver_len);
+
+	/* 3) sort by package version */
+	v1 = verbuf;
+	v2 = verbuf + meta1->pkgver_len + 1;
+	cmp = compare_versions(v1, v2);
+	free(verbuf);
+
+	return cmp;
+}
+
+static int vercmp(const void *p1, const void *p2)
+{
+	const struct input_t *meta1, *meta2;
+
+	meta1 = *(struct input_t **)p1;
+	meta2 = *(struct input_t **)p2;
+
+	if(opts.filemode && meta1->is_file && meta2->is_file) {
+		return compare_files(meta1, meta2);
+	} else {
+		return compare_versions(meta1->data, meta2->data);
+	}
 }
 
 static char escape_char(const char *string)
@@ -450,13 +537,14 @@ int main(int argc, char *argv[])
 
 	if(list->count) {
 		const char linedelim = opts.null ? '\0' : '\n';
-		qsort(list->list, list->count, sizeof(char *), vercmp);
+		qsort(list->list, list->count, sizeof(void *), vercmp);
 		for(i = 0; i < list->count; i++) {
-			printf("%s%c", list->list[i], linedelim);
+			const struct input_t *in = list->list[i];
+			printf("%s%c", in->data, linedelim);
 		}
 	}
 
-	list_free(list);
+	list_free(list, input_free);
 	buffer_free(buffer);
 
 	return 0;
diff --git a/test/util/pacsorttest.sh b/test/util/pacsorttest.sh
index a2adac9..49d6d8e 100755
--- a/test/util/pacsorttest.sh
+++ b/test/util/pacsorttest.sh
@@ -21,7 +21,7 @@
 # default binary if one was not specified as $1
 bin=${1:-${PMTEST_UTIL_DIR}pacsort}
 # holds counts of tests
-total=23
+total=26
 run=0
 failure=0
 
@@ -89,6 +89,15 @@ runtest $in $in "filename sort with uneven leading path components" "--files"
 in="firefox-18.0-2-i686.pkg.tar.xz\nfirefox-18.0.1-1-x86_64.pkg.tar.gz\n"
 runtest $in $in "filename sort with different extensions" "--files"
 
+in="/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n/packages/dialog-1:1.2_20130928-1-x86_64.pkg.tar.xz\n"
+runtest $in $in "filename sort with epoch" "--files"
+
+in="/packages/dia-log-1:1.2_20130928-1-x86_64.pkg.tar.xz\n/packages/dialog-1.2_20131001-1-x86_64.pkg.tar.xz\n"
+runtest $in $in "filename sort with differing package names and epoch" "--files"
+
+in="/packages/systemd-217-1-x86_64.pkg.tar.xz\n/packages/systemd-sysvcompat-217-1-x86_64.pkg.tar.xz\n"
+runtest $in $in "filename sort with package names as shared substring" "--files"
+
 # generate some long input/expected for the next few tests
 declare normal reverse names_normal names_reverse
 for ((i=1; i<600; i++)); do
-- 
2.1.3


More information about the pacman-dev mailing list