[pacman-dev] [PATCH 1/2] lib/conflict: use a binary search within filelists
Take advantage of the fact that our filelists are arrays sorted by filename with a known length and use a binary search. This should speed up file conflict checking, particularly when larger packages are involved. Signed-off-by: Dave Reisner <dreisner@archlinux.org> --- lib/libalpm/conflict.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/lib/libalpm/conflict.c b/lib/libalpm/conflict.c index 7494fd7..fd4eff1 100644 --- a/lib/libalpm/conflict.c +++ b/lib/libalpm/conflict.c @@ -315,19 +315,16 @@ void _alpm_fileconflict_free(alpm_fileconflict_t *conflict) const alpm_file_t *_alpm_filelist_contains(alpm_filelist_t *filelist, const char *name) { - size_t i; - const alpm_file_t *file; + alpm_file_t key; if(!filelist) { return NULL; } - for(file = filelist->files, i = 0; i < filelist->count; file++, i++) { - if(strcmp(file->name, name) == 0) { - return file; - } - } - return NULL; + key.name = (char *)name; + + return bsearch(&key, filelist->files, filelist->count, + sizeof(alpm_file_t), _alpm_files_cmp); } static int dir_belongsto_pkg(alpm_handle_t *handle, const char *dirpath, -- 1.7.11.1
On the assumption that these arrays are already mostly sorted, use the standard quicksort method to sort the files arrays. The files_msort function name is tweaked to give it a more general name to reflect this change. Signed-off-by: Dave Reisner <dreisner@archlinux.org> --- I didn't actually measure the impact here. One thing to consider here is that qsort isn't reentrant, and qsort_r isn't standard. lib/libalpm/be_package.c | 47 +++-------------------------------------------- 1 file changed, 3 insertions(+), 44 deletions(-) diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c index dd027f5..1a6324c 100644 --- a/lib/libalpm/be_package.c +++ b/lib/libalpm/be_package.c @@ -248,50 +248,9 @@ static int parse_descfile(alpm_handle_t *handle, struct archive *a, alpm_pkg_t * return 0; } -static void files_merge(alpm_file_t a[], alpm_file_t b[], alpm_file_t c[], - size_t m, size_t n) +static alpm_file_t *files_sort(alpm_file_t *files, size_t n) { - size_t i = 0, j = 0, k = 0; - while(i < m && j < n) { - if(strcmp(a[i].name, b[j].name) < 0) { - c[k++] = a[i++]; - } else { - c[k++] = b[j++]; - } - } - while(i < m) { - c[k++] = a[i++]; - } - while(j < n) { - c[k++] = b[j++]; - } -} - -static alpm_file_t *files_msort(alpm_file_t *files, size_t n) -{ - alpm_file_t *work; - size_t blocksize = 1; - - CALLOC(work, n, sizeof(alpm_file_t), return NULL); - - for(blocksize = 1; blocksize < n; blocksize *= 2) { - size_t i, max_extent = 0; - for(i = 0; i < n - blocksize; i += 2 * blocksize) { - /* this limits our actual merge to the length of the array, since we will - * not likely be a perfect power of two. */ - size_t right_blocksize = blocksize; - if(i + blocksize * 2 > n) { - right_blocksize = n - i - blocksize; - } - files_merge(files + i, files + i + blocksize, work + i, - blocksize, right_blocksize); - max_extent = i + blocksize + right_blocksize; - } - /* ensure we only copy what we actually touched on this merge pass, - * no more, no less */ - memcpy(files, work, max_extent * sizeof(alpm_file_t)); - } - free(work); + qsort(files, n, sizeof(alpm_file_t), _alpm_files_cmp); return files; } @@ -536,7 +495,7 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle, /* "checking for conflicts" requires a sorted list, ensure that here */ _alpm_log(handle, ALPM_LOG_DEBUG, "sorting package filelist for %s\n", pkgfile); - newpkg->files.files = files_msort(newpkg->files.files, + newpkg->files.files = files_sort(newpkg->files.files, newpkg->files.count); } newpkg->infolevel |= INFRQ_FILES; -- 1.7.11.1
On Thu, Jul 12, 2012 at 12:18 PM, Dave Reisner <dreisner@archlinux.org> wrote:
On the assumption that these arrays are already mostly sorted, use the standard quicksort method to sort the files arrays. The files_msort function name is tweaked to give it a more general name to reflect this change.
Signed-off-by: Dave Reisner <dreisner@archlinux.org> --- I didn't actually measure the impact here. One thing to consider here is that qsort isn't reentrant, and qsort_r isn't standard.
1. Add qsort_r() to configure.ac's list of functions to check for, use that if available, else use plain qsort() ? 2. I can take a look at the impact performance wise, but I do like the loc-- going on here. 3. I'd feel better if this patch came before the other one, and if we also added a qsort() in loading the local filelists, to ensure our bsearch() doesn't fall over due to reading unsorted lists off the filesystem.
lib/libalpm/be_package.c | 47 +++-------------------------------------------- 1 file changed, 3 insertions(+), 44 deletions(-)
diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c index dd027f5..1a6324c 100644 --- a/lib/libalpm/be_package.c +++ b/lib/libalpm/be_package.c @@ -248,50 +248,9 @@ static int parse_descfile(alpm_handle_t *handle, struct archive *a, alpm_pkg_t * return 0; }
-static void files_merge(alpm_file_t a[], alpm_file_t b[], alpm_file_t c[], - size_t m, size_t n) +static alpm_file_t *files_sort(alpm_file_t *files, size_t n) { - size_t i = 0, j = 0, k = 0; - while(i < m && j < n) { - if(strcmp(a[i].name, b[j].name) < 0) { - c[k++] = a[i++]; - } else { - c[k++] = b[j++]; - } - } - while(i < m) { - c[k++] = a[i++]; - } - while(j < n) { - c[k++] = b[j++]; - } -} - -static alpm_file_t *files_msort(alpm_file_t *files, size_t n) -{ - alpm_file_t *work; - size_t blocksize = 1; - - CALLOC(work, n, sizeof(alpm_file_t), return NULL); - - for(blocksize = 1; blocksize < n; blocksize *= 2) { - size_t i, max_extent = 0; - for(i = 0; i < n - blocksize; i += 2 * blocksize) { - /* this limits our actual merge to the length of the array, since we will - * not likely be a perfect power of two. */ - size_t right_blocksize = blocksize; - if(i + blocksize * 2 > n) { - right_blocksize = n - i - blocksize; - } - files_merge(files + i, files + i + blocksize, work + i, - blocksize, right_blocksize); - max_extent = i + blocksize + right_blocksize; - } - /* ensure we only copy what we actually touched on this merge pass, - * no more, no less */ - memcpy(files, work, max_extent * sizeof(alpm_file_t)); - } - free(work); + qsort(files, n, sizeof(alpm_file_t), _alpm_files_cmp); return files; }
@@ -536,7 +495,7 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle, /* "checking for conflicts" requires a sorted list, ensure that here */ _alpm_log(handle, ALPM_LOG_DEBUG, "sorting package filelist for %s\n", pkgfile); - newpkg->files.files = files_msort(newpkg->files.files, + newpkg->files.files = files_sort(newpkg->files.files, newpkg->files.count); } newpkg->infolevel |= INFRQ_FILES; -- 1.7.11.1
participants (2)
-
Dan McGee
-
Dave Reisner