[pacman-dev] [PATCH] _alpm_db_load_pkgcache: use mergesort to improve performance

Jürgen Hötzel juergen at hoetzel.info
Wed Jan 10 16:35:00 EST 2007


Hi,

Yesterday i started to analyze pacman's internal.  pacman's db speed is
typically bound by your systems IO performance: It has to read all db
files, if you search repositories.

But i was also surprised by the CPU time utilized. So i started profiling
"pacman -Ss test":

   calls  ms/call  ms/call  name  
    3751     0.01     0.01  _alpm_list_add_sorted
 2088191     0.00     0.00  _alpm_pkg_cmp

_alpm_list_add_sorted is called 2088191 times while building the
pkgcache. This is because every package is sorted into a linked list while
scanning the database files:

	while((info = _alpm_db_scan(db, NULL, infolevel)) != NULL) {
...
		db->pkgcache = _alpm_list_add_sorted(db->pkgcache, info, _alpm_pkg_cmp);
...

Because on each insert the list has to be traversed, pkgcache building has time complexity of O(n^2). 

Merge-sort is the quickest sort algorithm for linked lists (O(n*log(n)). So i changed above code to use merge-sort.

Results on my System:

bash-3.2$ time ./src/pacman/pacman.static.old -Ss test >/dev/null 

real    0m0.791s
user    0m0.432s
sys     0m0.228s
bash-3.2$ time ./src/pacman/pacman.static -Ss test >/dev/null 

real    0m0.493s
user    0m0.232s
sys     0m0.260s
bash-3.2$ 

Well, only 0.2 seconds for 70 lines of code patch ;)

The different complexity becomes obvious, if you use large repositories. I created a 15000-dummy-packages repository:

bash-3.2$ time ./src/pacman/pacman.static.old -Ss test >/dev/null 

real    0m5.530s
user    0m4.588s
sys     0m0.828s
bash-3.2$ time ./src/pacman/pacman.static -Ss test >/dev/null 

real    0m2.127s
user    0m0.836s
sys     0m1.032s

Don't be surprised by the small real time. I started each command multiple
to prevent IO.

Jürgen
-------------- next part --------------
Index: lib/libalpm/cache.c
===================================================================
RCS file: /home/cvs-pacman/pacman-lib/lib/libalpm/cache.c,v
retrieving revision 1.23
diff -u -p -r1.23 cache.c
--- lib/libalpm/cache.c	1 Nov 2006 06:30:47 -0000	1.23
+++ lib/libalpm/cache.c	10 Jan 2007 20:19:58 -0000
@@ -44,6 +44,7 @@
 int _alpm_db_load_pkgcache(pmdb_t *db, unsigned char infolevel)
 {
 	pmpkg_t *info;
+	int count = 0;
 	/* The group cache needs INFRQ_DESC as well */
 	/*unsigned char infolevel = INFRQ_DEPENDS | INFRQ_DESC;*/
 
@@ -61,9 +62,10 @@ int _alpm_db_load_pkgcache(pmdb_t *db, u
 		info->origin = PKG_FROM_CACHE;
 		info->data = db;
 		/* add to the collection */
-		db->pkgcache = _alpm_list_add_sorted(db->pkgcache, info, _alpm_pkg_cmp);
+		db->pkgcache = _alpm_list_add(db->pkgcache, info);
+		count++;
 	}
-
+	db->pkgcache = _alpm_list_msort(db->pkgcache, count, _alpm_pkg_cmp);
 	return(0);
 }
 
Index: lib/libalpm/list.c
===================================================================
RCS file: /home/cvs-pacman/pacman-lib/lib/libalpm/list.c,v
retrieving revision 1.17
diff -u -p -r1.17 list.c
--- lib/libalpm/list.c	20 Nov 2006 09:10:24 -0000	1.17
+++ lib/libalpm/list.c	10 Jan 2007 20:19:58 -0000
@@ -134,6 +134,84 @@ pmlist_t *_alpm_list_add_sorted(pmlist_t
 	return(list);
 }
 
+/*  return nth element from list (starting with 0) */
+pmlist_t* _alpm_list_nth(pmlist_t *list, int n) {
+	while (n--)
+		list = list->next;
+	return list;
+}
+
+/* merge the two sorted sublists into one sorted list */
+pmlist_t*  _alpm_list_mmerge(pmlist_t *left, pmlist_t *right, _alpm_fn_cmp fn) {
+	pmlist_t *newlist;
+	pmlist_t *lp;
+  
+	if (left == NULL) 
+		return right;
+	if (right == NULL)
+		return left;
+
+	if (fn(left->data, right->data) <= 0) {
+		newlist  = left;
+		left = left->next;
+	} 
+	else {
+		newlist = right;
+		right = right->next;
+	}
+	newlist->prev = NULL;
+	newlist->next = NULL;
+	newlist->last = NULL;
+	lp = newlist;
+
+	while ((left != NULL) && (right != NULL)) {
+		if (fn(left->data, right->data) <= 0) {
+			lp->next = left;
+			left->prev = lp;
+			left = left->next;
+		} 
+		else {
+			lp->next = right;
+			right->prev = lp;
+			right = right->next;
+		}
+		lp = lp->next;
+		lp->next = NULL;
+		newlist->last = lp;      
+	}
+	if (left != NULL) {
+		lp->next = left;
+		left->prev = lp;
+		newlist->last = left->last;
+	}
+	else if (right != NULL) {
+		lp->next = right;
+		right->prev = lp;
+		newlist->last = right->last;
+	}
+	return newlist;
+}
+
+/*  sort an list of size n using mergesort algorithm  */
+pmlist_t* _alpm_list_msort(pmlist_t *list, int len, _alpm_fn_cmp fn) {
+	if (len > 1 ) {
+		pmlist_t *left = list;
+		pmlist_t *lastleft = _alpm_list_nth(list, len/2 - 1);
+		pmlist_t *right = lastleft->next;
+		/* update rights last element, to previous last element*/
+		right->last =  left->last;	
+		/* update lefts last element */
+		left->last = lastleft;	
+		/* terminate first list */
+		lastleft->next = NULL;	
+
+		left = _alpm_list_msort(left, len/2, fn);
+		right = _alpm_list_msort(right, len - (len/2), fn);
+		list = _alpm_list_mmerge(left, right, fn);
+	}
+	return list;
+}
+
 /* Remove an item in a list. Use the given comparaison function to find the
  * item.
  * If the item is found, 'data' is pointing to the removed element.
Index: lib/libalpm/list.h
===================================================================
RCS file: /home/cvs-pacman/pacman-lib/lib/libalpm/list.h,v
retrieving revision 1.18
diff -u -p -r1.18 list.h
--- lib/libalpm/list.h	20 Nov 2006 09:10:24 -0000	1.18
+++ lib/libalpm/list.h	10 Jan 2007 20:19:58 -0000
@@ -43,6 +43,9 @@ pmlist_t *_alpm_list_new(void);
 void _alpm_list_free(pmlist_t *list, _alpm_fn_free fn);
 pmlist_t *_alpm_list_add(pmlist_t *list, void *data);
 pmlist_t *_alpm_list_add_sorted(pmlist_t *list, void *data, _alpm_fn_cmp fn);
+pmlist_t* _alpm_list_mmerge(pmlist_t *left, pmlist_t *right, _alpm_fn_cmp fn);
+pmlist_t* _alpm_list_msort(pmlist_t *list, int len, _alpm_fn_cmp fn);
+pmlist_t* _alpm_list_nth(pmlist_t *list, int n);
 pmlist_t *_alpm_list_remove(pmlist_t *haystack, void *needle, _alpm_fn_cmp fn, void **data);
 int _alpm_list_count(const pmlist_t *list);
 int _alpm_list_is_in(void *needle, pmlist_t *haystack);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://archlinux.org/pipermail/pacman-dev/attachments/20070110/8776ceff/attachment.pgp>


More information about the pacman-dev mailing list