[pacman-dev] [PATCH] _alpm_db_load_pkgcache: use mergesort to improve performance
Jürgen Hötzel
juergen at hoetzel.info
Wed Jan 10 16:35:00 EST 2007
Hi,
Yesterday i started to analyze pacman's internal. pacman's db speed is
typically bound by your systems IO performance: It has to read all db
files, if you search repositories.
But i was also surprised by the CPU time utilized. So i started profiling
"pacman -Ss test":
calls ms/call ms/call name
3751 0.01 0.01 _alpm_list_add_sorted
2088191 0.00 0.00 _alpm_pkg_cmp
_alpm_list_add_sorted is called 2088191 times while building the
pkgcache. This is because every package is sorted into a linked list while
scanning the database files:
while((info = _alpm_db_scan(db, NULL, infolevel)) != NULL) {
...
db->pkgcache = _alpm_list_add_sorted(db->pkgcache, info, _alpm_pkg_cmp);
...
Because on each insert the list has to be traversed, pkgcache building has time complexity of O(n^2).
Merge-sort is the quickest sort algorithm for linked lists (O(n*log(n)). So i changed above code to use merge-sort.
Results on my System:
bash-3.2$ time ./src/pacman/pacman.static.old -Ss test >/dev/null
real 0m0.791s
user 0m0.432s
sys 0m0.228s
bash-3.2$ time ./src/pacman/pacman.static -Ss test >/dev/null
real 0m0.493s
user 0m0.232s
sys 0m0.260s
bash-3.2$
Well, only 0.2 seconds for 70 lines of code patch ;)
The different complexity becomes obvious, if you use large repositories. I created a 15000-dummy-packages repository:
bash-3.2$ time ./src/pacman/pacman.static.old -Ss test >/dev/null
real 0m5.530s
user 0m4.588s
sys 0m0.828s
bash-3.2$ time ./src/pacman/pacman.static -Ss test >/dev/null
real 0m2.127s
user 0m0.836s
sys 0m1.032s
Don't be surprised by the small real time. I started each command multiple
to prevent IO.
Jürgen
-------------- next part --------------
Index: lib/libalpm/cache.c
===================================================================
RCS file: /home/cvs-pacman/pacman-lib/lib/libalpm/cache.c,v
retrieving revision 1.23
diff -u -p -r1.23 cache.c
--- lib/libalpm/cache.c 1 Nov 2006 06:30:47 -0000 1.23
+++ lib/libalpm/cache.c 10 Jan 2007 20:19:58 -0000
@@ -44,6 +44,7 @@
int _alpm_db_load_pkgcache(pmdb_t *db, unsigned char infolevel)
{
pmpkg_t *info;
+ int count = 0;
/* The group cache needs INFRQ_DESC as well */
/*unsigned char infolevel = INFRQ_DEPENDS | INFRQ_DESC;*/
@@ -61,9 +62,10 @@ int _alpm_db_load_pkgcache(pmdb_t *db, u
info->origin = PKG_FROM_CACHE;
info->data = db;
/* add to the collection */
- db->pkgcache = _alpm_list_add_sorted(db->pkgcache, info, _alpm_pkg_cmp);
+ db->pkgcache = _alpm_list_add(db->pkgcache, info);
+ count++;
}
-
+ db->pkgcache = _alpm_list_msort(db->pkgcache, count, _alpm_pkg_cmp);
return(0);
}
Index: lib/libalpm/list.c
===================================================================
RCS file: /home/cvs-pacman/pacman-lib/lib/libalpm/list.c,v
retrieving revision 1.17
diff -u -p -r1.17 list.c
--- lib/libalpm/list.c 20 Nov 2006 09:10:24 -0000 1.17
+++ lib/libalpm/list.c 10 Jan 2007 20:19:58 -0000
@@ -134,6 +134,84 @@ pmlist_t *_alpm_list_add_sorted(pmlist_t
return(list);
}
+/* return nth element from list (starting with 0) */
+pmlist_t* _alpm_list_nth(pmlist_t *list, int n) {
+ while (n--)
+ list = list->next;
+ return list;
+}
+
+/* merge the two sorted sublists into one sorted list */
+pmlist_t* _alpm_list_mmerge(pmlist_t *left, pmlist_t *right, _alpm_fn_cmp fn) {
+ pmlist_t *newlist;
+ pmlist_t *lp;
+
+ if (left == NULL)
+ return right;
+ if (right == NULL)
+ return left;
+
+ if (fn(left->data, right->data) <= 0) {
+ newlist = left;
+ left = left->next;
+ }
+ else {
+ newlist = right;
+ right = right->next;
+ }
+ newlist->prev = NULL;
+ newlist->next = NULL;
+ newlist->last = NULL;
+ lp = newlist;
+
+ while ((left != NULL) && (right != NULL)) {
+ if (fn(left->data, right->data) <= 0) {
+ lp->next = left;
+ left->prev = lp;
+ left = left->next;
+ }
+ else {
+ lp->next = right;
+ right->prev = lp;
+ right = right->next;
+ }
+ lp = lp->next;
+ lp->next = NULL;
+ newlist->last = lp;
+ }
+ if (left != NULL) {
+ lp->next = left;
+ left->prev = lp;
+ newlist->last = left->last;
+ }
+ else if (right != NULL) {
+ lp->next = right;
+ right->prev = lp;
+ newlist->last = right->last;
+ }
+ return newlist;
+}
+
+/* sort an list of size n using mergesort algorithm */
+pmlist_t* _alpm_list_msort(pmlist_t *list, int len, _alpm_fn_cmp fn) {
+ if (len > 1 ) {
+ pmlist_t *left = list;
+ pmlist_t *lastleft = _alpm_list_nth(list, len/2 - 1);
+ pmlist_t *right = lastleft->next;
+ /* update rights last element, to previous last element*/
+ right->last = left->last;
+ /* update lefts last element */
+ left->last = lastleft;
+ /* terminate first list */
+ lastleft->next = NULL;
+
+ left = _alpm_list_msort(left, len/2, fn);
+ right = _alpm_list_msort(right, len - (len/2), fn);
+ list = _alpm_list_mmerge(left, right, fn);
+ }
+ return list;
+}
+
/* Remove an item in a list. Use the given comparaison function to find the
* item.
* If the item is found, 'data' is pointing to the removed element.
Index: lib/libalpm/list.h
===================================================================
RCS file: /home/cvs-pacman/pacman-lib/lib/libalpm/list.h,v
retrieving revision 1.18
diff -u -p -r1.18 list.h
--- lib/libalpm/list.h 20 Nov 2006 09:10:24 -0000 1.18
+++ lib/libalpm/list.h 10 Jan 2007 20:19:58 -0000
@@ -43,6 +43,9 @@ pmlist_t *_alpm_list_new(void);
void _alpm_list_free(pmlist_t *list, _alpm_fn_free fn);
pmlist_t *_alpm_list_add(pmlist_t *list, void *data);
pmlist_t *_alpm_list_add_sorted(pmlist_t *list, void *data, _alpm_fn_cmp fn);
+pmlist_t* _alpm_list_mmerge(pmlist_t *left, pmlist_t *right, _alpm_fn_cmp fn);
+pmlist_t* _alpm_list_msort(pmlist_t *list, int len, _alpm_fn_cmp fn);
+pmlist_t* _alpm_list_nth(pmlist_t *list, int n);
pmlist_t *_alpm_list_remove(pmlist_t *haystack, void *needle, _alpm_fn_cmp fn, void **data);
int _alpm_list_count(const pmlist_t *list);
int _alpm_list_is_in(void *needle, pmlist_t *haystack);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://archlinux.org/pipermail/pacman-dev/attachments/20070110/8776ceff/attachment.pgp>
More information about the pacman-dev
mailing list