[pacman-dev] [PATCH] Convert package filelists to an array instead of linked list
This accomplishes quite a few things with one rather invasive change. 1. Iteration is much more performant, due to a reduction in pointer chasing and linear item access. 2. Data structures are smaller- we no longer have the overhead of the linked list as the file struts are now laid out consecutively in memory. 3. Memory allocation has been massively reworked. Before, we would allocate three different pieces of memory per file item- the list struct, the file struct, and the copied filename. What this resulted in was massive fragmentation of memory when loading filelists since the memory allocator had to leave holes all over the place. The new situation here now removes the need for any list item allocation; allocates the file structs in contiguous memory (and reallocs as necessary), leaving only the strings as individually allocated. Tests using valgrind (massif) show some pretty significant memory reductions on the worst case `pacman -Ql > /dev/null` (366387 files on my machine): Before: Peak heap: 54,416,024 B Useful heap: 36,840,692 B Extra heap: 17,575,332 B After: Peak heap: 38,004,352 B Useful heap: 28,101,347 B Extra heap: 9,903,005 B Several small helper methods have been introduced, including a list to array conversion helper as well as a filelist merge sort that works directly on arrays. Signed-off-by: Dan McGee <dan@archlinux.org> --- lib/libalpm/alpm.h | 11 +++++- lib/libalpm/alpm_list.c | 32 ++++++++++++++++++ lib/libalpm/alpm_list.h | 1 + lib/libalpm/be_local.c | 34 ++++++++++++++----- lib/libalpm/be_package.c | 79 ++++++++++++++++++++++++++++++++++++++------- lib/libalpm/conflict.c | 65 +++++++++++++++++++++----------------- lib/libalpm/conflict.h | 4 +- lib/libalpm/diskspace.c | 24 +++++++++----- lib/libalpm/package.c | 46 +++++++++++++++----------- lib/libalpm/package.h | 8 ++-- lib/libalpm/remove.c | 25 +++++++++------ src/pacman/package.c | 9 +++-- src/pacman/query.c | 19 ++++++----- 13 files changed, 248 insertions(+), 109 deletions(-) diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index 6e1e4bc..d9f3504 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h @@ -184,6 +184,12 @@ typedef struct _alpm_file_t { mode_t mode; } alpm_file_t; +/** Package filelist container */ +typedef struct _alpm_filelist_t { + size_t count; + alpm_file_t *files; +} alpm_filelist_t; + /** Local package or package file backup entry */ typedef struct _alpm_backup_t { char *name; @@ -671,9 +677,10 @@ alpm_list_t *alpm_pkg_get_replaces(alpm_pkg_t *pkg); * The filenames are relative to the install root, * and do not include leading slashes. * @param pkg a pointer to package - * @return a reference to an internal list of strings. + * @return a pointer to a filelist object containing a count and an array of + * package file objects */ -alpm_list_t *alpm_pkg_get_files(alpm_pkg_t *pkg); +alpm_filelist_t *alpm_pkg_get_files(alpm_pkg_t *pkg); /** Returns the list of files backed up when installing pkg. * The elements of the returned list have the form diff --git a/lib/libalpm/alpm_list.c b/lib/libalpm/alpm_list.c index 071cd99..2dcf08d 100644 --- a/lib/libalpm/alpm_list.c +++ b/lib/libalpm/alpm_list.c @@ -749,6 +749,38 @@ alpm_list_t SYMEXPORT *alpm_list_diff(const alpm_list_t *lhs, return ret; } +/** + * @brief Copy a list and data into a standard C array of fixed length. + * Note that the data elements are shallow copied so any contained pointers + * will point to the original data. + * + * @param list the list to copy + * @param n the size of the list + * @param size the size of each data element + * + * @return an array version of the original list, data copied as well + */ +void SYMEXPORT *alpm_list_to_array(const alpm_list_t *list, size_t n, + size_t size) +{ + size_t i; + const alpm_list_t *item; + char *array; + + if(n == 0) { + return NULL; + } + + array = calloc(n, size); + if(array == NULL) { + return NULL; + } + for(i = 0, item = list; i < n && item; i++, item = item->next) { + memcpy(array + i * n, item->data, size); + } + return array; +} + /** @} */ /* vim: set ts=2 sw=2 noet: */ diff --git a/lib/libalpm/alpm_list.h b/lib/libalpm/alpm_list.h index 5e8ca46..cd7b029 100644 --- a/lib/libalpm/alpm_list.h +++ b/lib/libalpm/alpm_list.h @@ -81,6 +81,7 @@ char *alpm_list_find_str(const alpm_list_t *haystack, const char *needle); alpm_list_t *alpm_list_diff(const alpm_list_t *lhs, const alpm_list_t *rhs, alpm_list_fn_cmp fn); void alpm_list_diff_sorted(const alpm_list_t *left, const alpm_list_t *right, alpm_list_fn_cmp fn, alpm_list_t **onlyleft, alpm_list_t **onlyright); +void *alpm_list_to_array(const alpm_list_t *list, size_t n, size_t size); #ifdef __cplusplus } diff --git a/lib/libalpm/be_local.c b/lib/libalpm/be_local.c index 70f242d..a725076 100644 --- a/lib/libalpm/be_local.c +++ b/lib/libalpm/be_local.c @@ -177,10 +177,10 @@ static alpm_list_t *_cache_get_deltas(alpm_pkg_t UNUSED *pkg) return NULL; } -static alpm_list_t *_cache_get_files(alpm_pkg_t *pkg) +static alpm_filelist_t *_cache_get_files(alpm_pkg_t *pkg) { LAZY_LOAD(INFRQ_FILES, NULL); - return pkg->files; + return &(pkg->files); } static alpm_list_t *_cache_get_backup(alpm_pkg_t *pkg) @@ -631,13 +631,28 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) while(fgets(line, sizeof(line), fp)) { _alpm_strtrim(line); if(strcmp(line, "%FILES%") == 0) { + size_t files_count = 0, files_size = 8; + alpm_file_t *files; + + CALLOC(files, files_size, sizeof(alpm_file_t), goto error); while(fgets(line, sizeof(line), fp) && strlen(_alpm_strtrim(line))) { - alpm_file_t *file; - CALLOC(file, 1, sizeof(alpm_file_t), goto error); - STRDUP(file->name, line, goto error); + if(files_count >= files_size) { + files = realloc(files, sizeof(alpm_file_t) * files_size * 2); + if(!files) { + ALLOC_FAIL(sizeof(alpm_file_t) * files_size * 2); + goto error; + } + memset(files + files_size, 0, sizeof(alpm_file_t) * files_size); + files_size *= 2; + } + STRDUP(files[files_count].name, line, goto error); /* TODO: lstat file, get mode/size */ - info->files = alpm_list_add(info->files, file); + files_count++; } + /* attempt to hand back any memory we don't need */ + files = realloc(files, sizeof(alpm_file_t) * files_count); + info->files.count = files_count; + info->files.files = files; } else if(strcmp(line, "%BACKUP%") == 0) { while(fgets(line, sizeof(line), fp) && strlen(_alpm_strtrim(line))) { alpm_backup_t *backup; @@ -834,10 +849,11 @@ int _alpm_local_db_write(alpm_db_t *db, alpm_pkg_t *info, alpm_dbinfrq_t inforeq retval = -1; goto cleanup; } - if(info->files) { + if(info->files.count) { + size_t i; fprintf(fp, "%%FILES%%\n"); - for(lp = info->files; lp; lp = lp->next) { - const alpm_file_t *file = lp->data; + for(i = 0; i < info->files.count; i++) { + const alpm_file_t *file = info->files.files + i; fprintf(fp, "%s\n", file->name); } fprintf(fp, "\n"); diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c index 46bdaed..d13837f 100644 --- a/lib/libalpm/be_package.c +++ b/lib/libalpm/be_package.c @@ -223,6 +223,53 @@ static int parse_descfile(alpm_handle_t *handle, struct archive *a, alpm_pkg_t * return 0; } +static void files_merge(alpm_file_t a[], alpm_file_t b[], alpm_file_t c[], + size_t m, size_t n) +{ + size_t i = 0, j = 0, k = 0; + while(i < m && j < n) { + if(strcmp(a[i].name, b[j].name) < 0) { + c[k++] = a[i++]; + } else { + c[k++] = b[j++]; + } + } + while(i < m) { + c[k++] = a[i++]; + } + while(j < n) { + c[k++] = b[j++]; + } +} + +static alpm_file_t *files_msort(alpm_file_t *files, size_t n) +{ + alpm_file_t *work; + size_t blocksize = 1; + + CALLOC(work, n, sizeof(alpm_file_t), return NULL); + + for(blocksize = 1; blocksize < n; blocksize *= 2) { + size_t i, max_extent = 0; + for(i = 0; i < n - blocksize; i += 2 * blocksize) { + /* this limits our actual merge to the length of the array, since we will + * not likely be a perfect power of two. */ + size_t right_blocksize = blocksize; + if(i + blocksize * 2 > n) { + right_blocksize = n - i - blocksize; + } + files_merge(files + i, files + i + blocksize, work + i, + blocksize, right_blocksize); + max_extent = i + blocksize + right_blocksize; + } + /* ensure we only copy what we actually touched on this merge pass, + * no more, no less */ + memcpy(files, work, max_extent * sizeof(alpm_file_t)); + } + free(work); + return files; +} + /** * Load a package and create the corresponding alpm_pkg_t struct. * @param handle the context handle @@ -241,7 +288,8 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle, const char *pkgfile, struct archive_entry *entry; alpm_pkg_t *newpkg = NULL; struct stat st; - size_t files_count = 0; + size_t files_count = 0, files_size = 0; + alpm_file_t *files = NULL; if(pkgfile == NULL || strlen(pkgfile) == 0) { RET_ERR(handle, ALPM_ERR_WRONG_ARGS, NULL); @@ -326,12 +374,21 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle, const char *pkgfile, * already been handled (for future possibilities) */ } else if(full) { /* Keep track of all files for filelist generation */ - alpm_file_t *file; - CALLOC(file, 1, sizeof(alpm_file_t), goto error); - STRDUP(file->name, entry_name, goto error); - file->size = archive_entry_size(entry); - file->mode = archive_entry_mode(entry); - newpkg->files = alpm_list_add(newpkg->files, file); + if(files_count >= files_size) { + if(files_size == 0) { + files_size = 4; + } + files = realloc(files, sizeof(alpm_file_t) * files_size * 2); + if(!files) { + ALLOC_FAIL(sizeof(alpm_file_t) * files_size * 2); + goto error; + } + memset(files + files_size, 0, sizeof(alpm_file_t) * files_size); + files_size *= 2; + } + STRDUP(files[files_count].name, entry_name, goto error); + files[files_count].size = archive_entry_size(entry); + files[files_count].mode = archive_entry_mode(entry); files_count++; } @@ -371,14 +428,10 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle, const char *pkgfile, if(full) { /* "checking for conflicts" requires a sorted list, ensure that here */ _alpm_log(handle, ALPM_LOG_DEBUG, "sorting package filelist for %s\n", pkgfile); - newpkg->files = alpm_list_msort(newpkg->files, files_count, - _alpm_files_cmp); + newpkg->files.files = files_msort(files, files_count); + newpkg->files.count = files_count; newpkg->infolevel = INFRQ_ALL; } else { - /* get rid of any partial filelist we may have collected, it is invalid */ - alpm_list_free_inner(newpkg->files, (alpm_list_fn_free)_alpm_files_free); - alpm_list_free(newpkg->files); - newpkg->files = NULL; newpkg->infolevel = INFRQ_BASE | INFRQ_DESC; } diff --git a/lib/libalpm/conflict.c b/lib/libalpm/conflict.c index eda6ba1..538c4c7 100644 --- a/lib/libalpm/conflict.c +++ b/lib/libalpm/conflict.c @@ -229,22 +229,22 @@ static const int INTERSECT = 1; * DIFFERENCE - a difference operation is performed. filesA - filesB. * INTERSECT - an intersection operation is performed. filesA & filesB. */ -static alpm_list_t *filelist_operation(alpm_list_t *filesA, alpm_list_t *filesB, - int operation) +static alpm_list_t *filelist_operation(alpm_filelist_t *filesA, + alpm_filelist_t *filesB, int operation) { alpm_list_t *ret = NULL; - alpm_list_t *pA = filesA, *pB = filesB; + size_t ctrA = 0, ctrB = 0; - while(pA && pB) { - alpm_file_t *fileA = pA->data; - alpm_file_t *fileB = pB->data; + while(ctrA < filesA->count && ctrB < filesB->count) { + alpm_file_t *fileA = filesA->files + ctrA; + alpm_file_t *fileB = filesB->files + ctrB; const char *strA = fileA->name; const char *strB = fileB->name; /* skip directories, we don't care about them */ if(strA[strlen(strA)-1] == '/') { - pA = pA->next; + ctrA++; } else if(strB[strlen(strB)-1] == '/') { - pB = pB->next; + ctrB++; } else { int cmp = strcmp(strA, strB); if(cmp < 0) { @@ -252,29 +252,29 @@ static alpm_list_t *filelist_operation(alpm_list_t *filesA, alpm_list_t *filesB, /* item only in filesA, qualifies as a difference */ ret = alpm_list_add(ret, fileA); } - pA = pA->next; + ctrA++; } else if(cmp > 0) { - pB = pB->next; + ctrB++; } else { if(operation == INTERSECT) { /* item in both, qualifies as an intersect */ ret = alpm_list_add(ret, fileA); } - pA = pA->next; - pB = pB->next; + ctrA++; + ctrB++; } } } /* if doing a difference, ensure we have completely emptied pA */ - while(operation == DIFFERENCE && pA) { - alpm_file_t *fileA = pA->data; + while(operation == DIFFERENCE && ctrA < filesA->count) { + alpm_file_t *fileA = filesA->files + ctrA; const char *strA = fileA->name; /* skip directories */ if(strA[strlen(strA)-1] != '/') { ret = alpm_list_add(ret, fileA); } - pA = pA->next; + ctrA++; } return ret; @@ -319,16 +319,16 @@ void _alpm_fileconflict_free(alpm_fileconflict_t *conflict) FREE(conflict); } -const alpm_file_t *_alpm_filelist_contains(const alpm_list_t *haystack, - const char *needle) +const alpm_file_t *_alpm_filelist_contains(alpm_filelist_t *filelist, + const char *name) { - const alpm_list_t *lp = haystack; - while(lp) { - const alpm_file_t *file = lp->data; - if(strcmp(file->name, needle) == 0) { + size_t i; + const alpm_file_t *file = filelist->files; + for(i = 0; i < filelist->count; i++) { + if(strcmp(file->name, name) == 0) { return file; } - lp = lp->next; + file++; } return NULL; } @@ -400,8 +400,10 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle, * different cases. */ for(current = 0, i = upgrade; i; i = i->next, current++) { alpm_pkg_t *p1 = i->data; - alpm_list_t *j, *tmpfiles; + alpm_list_t *j; + alpm_filelist_t tmpfiles; alpm_pkg_t *dbpkg; + size_t filenum; int percent = (current * 100) / numtargs; PROGRESS(trans, ALPM_TRANS_PROGRESS_CONFLICTS_START, "", percent, @@ -444,16 +446,21 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle, * that the former list needs to be freed while the latter list should NOT * be freed. */ if(dbpkg) { + alpm_list_t *difference; /* older ver of package currently installed */ - tmpfiles = filelist_operation(alpm_pkg_get_files(p1), + difference = filelist_operation(alpm_pkg_get_files(p1), alpm_pkg_get_files(dbpkg), DIFFERENCE); + tmpfiles.count = alpm_list_count(difference); + tmpfiles.files = alpm_list_to_array(difference, tmpfiles.count, + sizeof(alpm_file_t)); + alpm_list_free(difference); } else { /* no version of package currently installed */ - tmpfiles = alpm_pkg_get_files(p1); + tmpfiles = *alpm_pkg_get_files(p1); } - for(j = tmpfiles; j; j = j->next) { - alpm_file_t *file = j->data; + for(filenum = 0; filenum < tmpfiles.count; filenum++) { + alpm_file_t *file = tmpfiles.files + filenum; const char *filestr = file->name; const char *relative_path; alpm_list_t *k; @@ -572,7 +579,7 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle, FREELIST(conflicts); if(dbpkg) { /* only freed if it was generated from filelist_operation() */ - alpm_list_free(tmpfiles); + free(tmpfiles.files); } return NULL; } @@ -580,7 +587,7 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle, } if(dbpkg) { /* only freed if it was generated from filelist_operation() */ - alpm_list_free(tmpfiles); + free(tmpfiles.files); } } PROGRESS(trans, ALPM_TRANS_PROGRESS_CONFLICTS_START, "", 100, diff --git a/lib/libalpm/conflict.h b/lib/libalpm/conflict.h index f2ab625..8b7471f 100644 --- a/lib/libalpm/conflict.h +++ b/lib/libalpm/conflict.h @@ -33,8 +33,8 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle, void _alpm_fileconflict_free(alpm_fileconflict_t *conflict); -const alpm_file_t *_alpm_filelist_contains(const alpm_list_t *haystack, - const char *needle); +const alpm_file_t *_alpm_filelist_contains(alpm_filelist_t *filelist, + const char *name); #endif /* _ALPM_CONFLICT_H */ diff --git a/lib/libalpm/diskspace.c b/lib/libalpm/diskspace.c index 3ab62a8..b28b88a 100644 --- a/lib/libalpm/diskspace.c +++ b/lib/libalpm/diskspace.c @@ -147,14 +147,18 @@ static alpm_mountpoint_t *match_mount_point(const alpm_list_t *mount_points, static int calculate_removed_size(alpm_handle_t *handle, const alpm_list_t *mount_points, alpm_pkg_t *pkg) { - alpm_list_t *i; + size_t i; + alpm_filelist_t *filelist = alpm_pkg_get_files(pkg); - alpm_list_t *files = alpm_pkg_get_files(pkg); - for(i = files; i; i = i->next) { + if(!filelist->count) { + return 0; + } + + for(i = 0; i < filelist->count; i++) { + const alpm_file_t *file = filelist->files + i; alpm_mountpoint_t *mp; struct stat st; char path[PATH_MAX]; - const alpm_file_t *file = i->data; const char *filename = file->name; snprintf(path, PATH_MAX, "%s%s", handle->root, filename); @@ -185,13 +189,17 @@ static int calculate_removed_size(alpm_handle_t *handle, static int calculate_installed_size(alpm_handle_t *handle, const alpm_list_t *mount_points, alpm_pkg_t *pkg) { - alpm_list_t *i; + size_t i; + alpm_filelist_t *filelist = alpm_pkg_get_files(pkg); - for(i = alpm_pkg_get_files(pkg); i; i = i->next) { - const alpm_file_t *file = i->data; + if(!filelist->count) { + return 0; + } + + for(i = 0; i < filelist->count; i++) { + const alpm_file_t *file = filelist->files + i; alpm_mountpoint_t *mp; char path[PATH_MAX]; - const char *filename = file->name; /* libarchive reports these as zero size anyways */ diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c index ae9b9a9..fd3d0c6 100644 --- a/lib/libalpm/package.c +++ b/lib/libalpm/package.c @@ -106,7 +106,7 @@ static alpm_list_t *_pkg_get_conflicts(alpm_pkg_t *pkg) { return pkg->conflicts static alpm_list_t *_pkg_get_provides(alpm_pkg_t *pkg) { return pkg->provides; } static alpm_list_t *_pkg_get_replaces(alpm_pkg_t *pkg) { return pkg->replaces; } static alpm_list_t *_pkg_get_deltas(alpm_pkg_t *pkg) { return pkg->deltas; } -static alpm_list_t *_pkg_get_files(alpm_pkg_t *pkg) { return pkg->files; } +static alpm_filelist_t *_pkg_get_files(alpm_pkg_t *pkg) { return &(pkg->files); } static alpm_list_t *_pkg_get_backup(alpm_pkg_t *pkg) { return pkg->backup; } static void *_pkg_changelog_open(alpm_pkg_t UNUSED *pkg) @@ -313,7 +313,7 @@ alpm_list_t SYMEXPORT *alpm_pkg_get_deltas(alpm_pkg_t *pkg) return pkg->ops->get_deltas(pkg); } -alpm_list_t SYMEXPORT *alpm_pkg_get_files(alpm_pkg_t *pkg) +alpm_filelist_t SYMEXPORT *alpm_pkg_get_files(alpm_pkg_t *pkg) { ASSERT(pkg != NULL, return NULL); pkg->handle->pm_errno = 0; @@ -427,22 +427,14 @@ alpm_list_t SYMEXPORT *alpm_pkg_compute_requiredby(alpm_pkg_t *pkg) /** @} */ -void _alpm_files_free(alpm_file_t *file) +alpm_file_t *_alpm_file_copy(alpm_file_t *dest, + const alpm_file_t *src) { - free(file->name); - free(file); -} - -alpm_file_t *_alpm_files_dup(const alpm_file_t *file) -{ - alpm_file_t *newfile; - CALLOC(newfile, 1, sizeof(alpm_file_t), return NULL); - - STRDUP(newfile->name, file->name, return NULL); - newfile->size = file->size; - newfile->mode = file->mode; + STRDUP(dest->name, src->name, return NULL); + dest->size = src->size; + dest->mode = src->mode; - return newfile; + return dest; } /* Helper function for comparing files list entries @@ -493,8 +485,17 @@ alpm_pkg_t *_alpm_pkg_dup(alpm_pkg_t *pkg) newpkg->licenses = alpm_list_strdup(pkg->licenses); newpkg->replaces = alpm_list_strdup(pkg->replaces); newpkg->groups = alpm_list_strdup(pkg->groups); - for(i = pkg->files; i; i = alpm_list_next(i)) { - newpkg->files = alpm_list_add(newpkg->files, _alpm_files_dup(i->data)); + if(pkg->files.count) { + size_t filenum; + size_t len = sizeof(alpm_file_t) * pkg->files.count; + MALLOC(newpkg->files.files, len, goto cleanup); + for(filenum = 0; filenum < pkg->files.count; filenum++) { + if(!_alpm_file_copy(newpkg->files.files + filenum, + pkg->files.files + filenum)) { + goto cleanup; + } + } + newpkg->files.count = pkg->files.count; } for(i = pkg->backup; i; i = alpm_list_next(i)) { newpkg->backup = alpm_list_add(newpkg->backup, _alpm_backup_dup(i->data)); @@ -545,8 +546,13 @@ void _alpm_pkg_free(alpm_pkg_t *pkg) FREELIST(pkg->licenses); FREELIST(pkg->replaces); FREELIST(pkg->groups); - alpm_list_free_inner(pkg->files, (alpm_list_fn_free)_alpm_files_free); - alpm_list_free(pkg->files); + if(pkg->files.count) { + size_t i; + for(i = 0; i < pkg->files.count; i++) { + free(pkg->files.files[i].name); + } + free(pkg->files.files); + } alpm_list_free_inner(pkg->backup, (alpm_list_fn_free)_alpm_backup_free); alpm_list_free(pkg->backup); alpm_list_free_inner(pkg->depends, (alpm_list_fn_free)_alpm_dep_free); diff --git a/lib/libalpm/package.h b/lib/libalpm/package.h index f76812b..d19d833 100644 --- a/lib/libalpm/package.h +++ b/lib/libalpm/package.h @@ -69,7 +69,7 @@ struct pkg_operations { alpm_list_t *(*get_provides) (alpm_pkg_t *); alpm_list_t *(*get_replaces) (alpm_pkg_t *); alpm_list_t *(*get_deltas) (alpm_pkg_t *); - alpm_list_t *(*get_files) (alpm_pkg_t *); + alpm_filelist_t *(*get_files) (alpm_pkg_t *); alpm_list_t *(*get_backup) (alpm_pkg_t *); void *(*changelog_open) (alpm_pkg_t *); @@ -126,7 +126,6 @@ struct __alpm_pkg_t { alpm_list_t *licenses; alpm_list_t *replaces; alpm_list_t *groups; - alpm_list_t *files; alpm_list_t *backup; alpm_list_t *depends; alpm_list_t *optdepends; @@ -137,10 +136,11 @@ struct __alpm_pkg_t { alpm_list_t *removes; /* in transaction targets only */ struct pkg_operations *ops; + + alpm_filelist_t files; }; -void _alpm_files_free(alpm_file_t *file); -alpm_file_t *_alpm_files_dup(const alpm_file_t *file); +alpm_file_t *_alpm_file_copy(alpm_file_t *dest, const alpm_file_t *src); int _alpm_files_cmp(const void *f1, const void *f2); alpm_pkg_t* _alpm_pkg_new(void); diff --git a/lib/libalpm/remove.c b/lib/libalpm/remove.c index 2c5d98c..83c437f 100644 --- a/lib/libalpm/remove.c +++ b/lib/libalpm/remove.c @@ -262,15 +262,15 @@ static void unlink_file(alpm_handle_t *handle, alpm_pkg_t *info, local_pkgs = _alpm_db_get_pkgcache(handle->db_local); for(local = local_pkgs; local && !found; local = local->next) { alpm_pkg_t *local_pkg = local->data; - alpm_list_t *files; + alpm_filelist_t *filelist; /* we duplicated the package when we put it in the removal list, so we * so we can't use direct pointer comparison here. */ if(_alpm_pkg_cmp(info, local_pkg) == 0) { continue; } - files = alpm_pkg_get_files(local_pkg); - if(_alpm_filelist_contains(files, fileobj->name)) { + filelist = alpm_pkg_get_files(local_pkg); + if(_alpm_filelist_contains(filelist, fileobj->name)) { _alpm_log(handle, ALPM_LOG_DEBUG, "keeping directory %s (owned by %s)\n", file, local_pkg->name); found = 1; @@ -320,11 +320,13 @@ int _alpm_remove_single_package(alpm_handle_t *handle, alpm_pkg_t *oldpkg, alpm_pkg_t *newpkg, size_t targ_count, size_t pkg_count) { - alpm_list_t *files, *skip_remove, *lp; + alpm_list_t *skip_remove; size_t filenum = 0, position = 0; const char *pkgname = oldpkg->name; const char *pkgver = oldpkg->version; + alpm_filelist_t *filelist; char scriptlet[PATH_MAX]; + size_t i; if(newpkg) { _alpm_log(handle, ALPM_LOG_DEBUG, "removing old package first (%s-%s)\n", @@ -349,7 +351,8 @@ int _alpm_remove_single_package(alpm_handle_t *handle, } if(newpkg) { - alpm_list_t *newfiles, *b; + alpm_filelist_t *newfiles; + alpm_list_t *b; skip_remove = alpm_list_join( alpm_list_strdup(handle->trans->skip_remove), alpm_list_strdup(handle->noupgrade)); @@ -371,9 +374,10 @@ int _alpm_remove_single_package(alpm_handle_t *handle, skip_remove = alpm_list_strdup(handle->trans->skip_remove); } - files = alpm_pkg_get_files(oldpkg); - for(lp = files; lp; lp = lp->next) { - if(!can_remove_file(handle, lp->data, skip_remove)) { + filelist = alpm_pkg_get_files(oldpkg); + for(i = 0; i < filelist->count; i++) { + alpm_file_t *file = filelist->files + i; + if(!can_remove_file(handle, file, skip_remove)) { _alpm_log(handle, ALPM_LOG_DEBUG, "not removing package '%s', can't remove all files\n", pkgname); RET_ERR(handle, ALPM_ERR_PKG_CANT_REMOVE, -1); @@ -390,9 +394,10 @@ int _alpm_remove_single_package(alpm_handle_t *handle, } /* iterate through the list backwards, unlinking files */ - for(lp = alpm_list_last(files); lp; lp = alpm_list_previous(lp)) { + for(i = filelist->count; i > 0; i--) { + alpm_file_t *file = filelist->files + i - 1; int percent; - unlink_file(handle, oldpkg, lp->data, skip_remove, + unlink_file(handle, oldpkg, file, skip_remove, handle->trans->flags & ALPM_TRANS_FLAG_NOSAVE); if(!newpkg) { diff --git a/src/pacman/package.c b/src/pacman/package.c index afbac6b..45afded 100644 --- a/src/pacman/package.c +++ b/src/pacman/package.c @@ -230,15 +230,16 @@ void dump_pkg_backups(alpm_pkg_t *pkg) void dump_pkg_files(alpm_pkg_t *pkg, int quiet) { const char *pkgname, *root; - alpm_list_t *i, *pkgfiles; + alpm_filelist_t *pkgfiles; + size_t i; pkgname = alpm_pkg_get_name(pkg); pkgfiles = alpm_pkg_get_files(pkg); root = alpm_option_get_root(config->handle); - for(i = pkgfiles; i; i = alpm_list_next(i)) { - const alpm_file_t *file = alpm_list_getdata(i); - if(!quiet){ + for(i = 0; i < pkgfiles->count; i++) { + const alpm_file_t *file = pkgfiles->files + i; + if(!quiet) { fprintf(stdout, "%s %s%s\n", pkgname, root, file->name); } else { fprintf(stdout, "%s%s\n", root, file->name); diff --git a/src/pacman/query.c b/src/pacman/query.c index 251c4dd..163c331 100644 --- a/src/pacman/query.c +++ b/src/pacman/query.c @@ -191,12 +191,13 @@ static int query_fileowner(alpm_list_t *targets) free(dname); for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) { - alpm_list_t *j; alpm_pkg_t *info = alpm_list_getdata(i); + alpm_filelist_t *filelist = alpm_pkg_get_files(info); + size_t i; - for(j = alpm_pkg_get_files(info); j && !found; j = alpm_list_next(j)) { + for(i = 0; i < filelist->count; i++) { + const alpm_file_t *file = filelist->files + i; char *ppath, *pdname; - const alpm_file_t *file = alpm_list_getdata(j); const char *pkgfile = file->name; /* avoid the costly resolve_path usage if the basenames don't match */ @@ -402,11 +403,12 @@ static int filter(alpm_pkg_t *pkg) * loop through files to check if they exist. */ static int check(alpm_pkg_t *pkg) { - alpm_list_t *i; - const char *root; + const char *root, *pkgname; int allfiles = 0, errors = 0; size_t rootlen; char f[PATH_MAX]; + alpm_filelist_t *filelist; + size_t i; root = alpm_option_get_root(config->handle); rootlen = strlen(root); @@ -417,10 +419,11 @@ static int check(alpm_pkg_t *pkg) } strcpy(f, root); - const char *pkgname = alpm_pkg_get_name(pkg); - for(i = alpm_pkg_get_files(pkg); i; i = alpm_list_next(i)) { + pkgname = alpm_pkg_get_name(pkg); + filelist = alpm_pkg_get_files(pkg); + for(i = 0; i < filelist->count; i++) { + const alpm_file_t *file = filelist->files + i; struct stat st; - const alpm_file_t *file = alpm_list_getdata(i); const char *path = file->name; if(rootlen + 1 + strlen(path) > PATH_MAX) { -- 1.7.6
On 19/07/11 20:01, Dan McGee wrote:
This accomplishes quite a few things with one rather invasive change.
1. Iteration is much more performant, due to a reduction in pointer chasing and linear item access. 2. Data structures are smaller- we no longer have the overhead of the linked list as the file struts are now laid out consecutively in memory. 3. Memory allocation has been massively reworked. Before, we would allocate three different pieces of memory per file item- the list struct, the file struct, and the copied filename. What this resulted in was massive fragmentation of memory when loading filelists since the memory allocator had to leave holes all over the place. The new situation here now removes the need for any list item allocation; allocates the file structs in contiguous memory (and reallocs as necessary), leaving only the strings as individually allocated. Tests using valgrind (massif) show some pretty significant memory reductions on the worst case `pacman -Ql> /dev/null` (366387 files on my machine):
Before: Peak heap: 54,416,024 B Useful heap: 36,840,692 B Extra heap: 17,575,332 B
After: Peak heap: 38,004,352 B Useful heap: 28,101,347 B Extra heap: 9,903,005 B
Several small helper methods have been introduced, including a list to array conversion helper as well as a filelist merge sort that works directly on arrays.
Minor comments: Maybe want to look at consistency between the two places where file lists are read (local_db_read, _alpm_pkg_load_internal). e.g. starting with size 4 vs 8, freeing excess memory at the end. Do you intend to just make filelist_operation return an array at a later stage? Allan
On Tue, Jul 19, 2011 at 6:57 AM, Allan McRae <allan@archlinux.org> wrote:
On 19/07/11 20:01, Dan McGee wrote:
This accomplishes quite a few things with one rather invasive change.
1. Iteration is much more performant, due to a reduction in pointer chasing and linear item access. 2. Data structures are smaller- we no longer have the overhead of the linked list as the file struts are now laid out consecutively in memory. 3. Memory allocation has been massively reworked. Before, we would allocate three different pieces of memory per file item- the list struct, the file struct, and the copied filename. What this resulted in was massive fragmentation of memory when loading filelists since the memory allocator had to leave holes all over the place. The new situation here now removes the need for any list item allocation; allocates the file structs in contiguous memory (and reallocs as necessary), leaving only the strings as individually allocated. Tests using valgrind (massif) show some pretty significant memory reductions on the worst case `pacman -Ql> /dev/null` (366387 files on my machine):
Before: Peak heap: 54,416,024 B Useful heap: 36,840,692 B Extra heap: 17,575,332 B
After: Peak heap: 38,004,352 B Useful heap: 28,101,347 B Extra heap: 9,903,005 B
Several small helper methods have been introduced, including a list to array conversion helper as well as a filelist merge sort that works directly on arrays.
Minor comments:
Maybe want to look at consistency between the two places where file lists are read (local_db_read, _alpm_pkg_load_internal). e.g. starting with size 4 vs 8, freeing excess memory at the end. It actually starts with 8 in both cases, it just does it in different ways. Look closer at it, but I agree it isn't all that clear, more cleaver than necessary.
Do you intend to just make filelist_operation return an array at a later stage? I contemplated doing that, as it is the only user of the list_to_array method right now. The tricky part there is the unknown size in advance bit- we could have a list ranging anywhere in size from 0 to max(len(lista), len(listb)).
-Dan
This accomplishes quite a few things with one rather invasive change. 1. Iteration is much more performant, due to a reduction in pointer chasing and linear item access. 2. Data structures are smaller- we no longer have the overhead of the linked list as the file struts are now laid out consecutively in memory. 3. Memory allocation has been massively reworked. Before, we would allocate three different pieces of memory per file item- the list struct, the file struct, and the copied filename. What this resulted in was massive fragmentation of memory when loading filelists since the memory allocator had to leave holes all over the place. The new situation here now removes the need for any list item allocation; allocates the file structs in contiguous memory (and reallocs as necessary), leaving only the strings as individually allocated. Tests using valgrind (massif) show some pretty significant memory reductions on the worst case `pacman -Ql > /dev/null` (366387 files on my machine): Before: Peak heap: 54,416,024 B Useful heap: 36,840,692 B Extra heap: 17,575,332 B After: Peak heap: 38,004,352 B Useful heap: 28,101,347 B Extra heap: 9,903,005 B Several small helper methods have been introduced, including a list to array conversion helper as well as a filelist merge sort that works directly on arrays. Signed-off-by: Dan McGee <dan@archlinux.org> --- Updated with some feedback: * Fixed the four failing pactests due to missing a small fix in alpm_list.c. * Unified the alloc/realloc code between be_local and be_package. * Do late stage realloc to free extra allocated memory for filelist in be_package code. lib/libalpm/alpm.h | 11 +++++- lib/libalpm/alpm_list.c | 32 +++++++++++++++++ lib/libalpm/alpm_list.h | 1 + lib/libalpm/be_local.c | 41 +++++++++++++++++----- lib/libalpm/be_package.c | 86 +++++++++++++++++++++++++++++++++++++++------- lib/libalpm/conflict.c | 65 +++++++++++++++++++--------------- lib/libalpm/conflict.h | 4 +- lib/libalpm/diskspace.c | 24 ++++++++---- lib/libalpm/package.c | 46 ++++++++++++++----------- lib/libalpm/package.h | 8 ++-- lib/libalpm/remove.c | 25 ++++++++----- src/pacman/package.c | 9 +++-- src/pacman/query.c | 19 ++++++---- 13 files changed, 262 insertions(+), 109 deletions(-) diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index 6e1e4bc..d9f3504 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h @@ -184,6 +184,12 @@ typedef struct _alpm_file_t { mode_t mode; } alpm_file_t; +/** Package filelist container */ +typedef struct _alpm_filelist_t { + size_t count; + alpm_file_t *files; +} alpm_filelist_t; + /** Local package or package file backup entry */ typedef struct _alpm_backup_t { char *name; @@ -671,9 +677,10 @@ alpm_list_t *alpm_pkg_get_replaces(alpm_pkg_t *pkg); * The filenames are relative to the install root, * and do not include leading slashes. * @param pkg a pointer to package - * @return a reference to an internal list of strings. + * @return a pointer to a filelist object containing a count and an array of + * package file objects */ -alpm_list_t *alpm_pkg_get_files(alpm_pkg_t *pkg); +alpm_filelist_t *alpm_pkg_get_files(alpm_pkg_t *pkg); /** Returns the list of files backed up when installing pkg. * The elements of the returned list have the form diff --git a/lib/libalpm/alpm_list.c b/lib/libalpm/alpm_list.c index 071cd99..83b9824 100644 --- a/lib/libalpm/alpm_list.c +++ b/lib/libalpm/alpm_list.c @@ -749,6 +749,38 @@ alpm_list_t SYMEXPORT *alpm_list_diff(const alpm_list_t *lhs, return ret; } +/** + * @brief Copy a list and data into a standard C array of fixed length. + * Note that the data elements are shallow copied so any contained pointers + * will point to the original data. + * + * @param list the list to copy + * @param n the size of the list + * @param size the size of each data element + * + * @return an array version of the original list, data copied as well + */ +void SYMEXPORT *alpm_list_to_array(const alpm_list_t *list, size_t n, + size_t size) +{ + size_t i; + const alpm_list_t *item; + char *array; + + if(n == 0) { + return NULL; + } + + array = calloc(n, size); + if(array == NULL) { + return NULL; + } + for(i = 0, item = list; i < n && item; i++, item = item->next) { + memcpy(array + i * size, item->data, size); + } + return array; +} + /** @} */ /* vim: set ts=2 sw=2 noet: */ diff --git a/lib/libalpm/alpm_list.h b/lib/libalpm/alpm_list.h index 5e8ca46..cd7b029 100644 --- a/lib/libalpm/alpm_list.h +++ b/lib/libalpm/alpm_list.h @@ -81,6 +81,7 @@ char *alpm_list_find_str(const alpm_list_t *haystack, const char *needle); alpm_list_t *alpm_list_diff(const alpm_list_t *lhs, const alpm_list_t *rhs, alpm_list_fn_cmp fn); void alpm_list_diff_sorted(const alpm_list_t *left, const alpm_list_t *right, alpm_list_fn_cmp fn, alpm_list_t **onlyleft, alpm_list_t **onlyright); +void *alpm_list_to_array(const alpm_list_t *list, size_t n, size_t size); #ifdef __cplusplus } diff --git a/lib/libalpm/be_local.c b/lib/libalpm/be_local.c index 70f242d..261ad87 100644 --- a/lib/libalpm/be_local.c +++ b/lib/libalpm/be_local.c @@ -177,10 +177,10 @@ static alpm_list_t *_cache_get_deltas(alpm_pkg_t UNUSED *pkg) return NULL; } -static alpm_list_t *_cache_get_files(alpm_pkg_t *pkg) +static alpm_filelist_t *_cache_get_files(alpm_pkg_t *pkg) { LAZY_LOAD(INFRQ_FILES, NULL); - return pkg->files; + return &(pkg->files); } static alpm_list_t *_cache_get_backup(alpm_pkg_t *pkg) @@ -631,13 +631,35 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) while(fgets(line, sizeof(line), fp)) { _alpm_strtrim(line); if(strcmp(line, "%FILES%") == 0) { + size_t files_count = 0, files_size = 0; + alpm_file_t *files = NULL; + while(fgets(line, sizeof(line), fp) && strlen(_alpm_strtrim(line))) { - alpm_file_t *file; - CALLOC(file, 1, sizeof(alpm_file_t), goto error); - STRDUP(file->name, line, goto error); + if(files_count >= files_size) { + size_t old_size = files_size; + if(files_size == 0) { + files_size = 8; + } else { + files_size *= 2; + } + files = realloc(files, sizeof(alpm_file_t) * files_size); + if(!files) { + ALLOC_FAIL(sizeof(alpm_file_t) * files_size); + goto error; + } + /* ensure all new memory is zeroed out, in both the initial + * allocation and later reallocs */ + memset(files + old_size, 0, + sizeof(alpm_file_t) * (files_size - old_size)); + } + STRDUP(files[files_count].name, line, goto error); /* TODO: lstat file, get mode/size */ - info->files = alpm_list_add(info->files, file); + files_count++; } + /* attempt to hand back any memory we don't need */ + files = realloc(files, sizeof(alpm_file_t) * files_count); + info->files.count = files_count; + info->files.files = files; } else if(strcmp(line, "%BACKUP%") == 0) { while(fgets(line, sizeof(line), fp) && strlen(_alpm_strtrim(line))) { alpm_backup_t *backup; @@ -834,10 +856,11 @@ int _alpm_local_db_write(alpm_db_t *db, alpm_pkg_t *info, alpm_dbinfrq_t inforeq retval = -1; goto cleanup; } - if(info->files) { + if(info->files.count) { + size_t i; fprintf(fp, "%%FILES%%\n"); - for(lp = info->files; lp; lp = lp->next) { - const alpm_file_t *file = lp->data; + for(i = 0; i < info->files.count; i++) { + const alpm_file_t *file = info->files.files + i; fprintf(fp, "%s\n", file->name); } fprintf(fp, "\n"); diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c index 46bdaed..0edaa5a 100644 --- a/lib/libalpm/be_package.c +++ b/lib/libalpm/be_package.c @@ -223,6 +223,53 @@ static int parse_descfile(alpm_handle_t *handle, struct archive *a, alpm_pkg_t * return 0; } +static void files_merge(alpm_file_t a[], alpm_file_t b[], alpm_file_t c[], + size_t m, size_t n) +{ + size_t i = 0, j = 0, k = 0; + while(i < m && j < n) { + if(strcmp(a[i].name, b[j].name) < 0) { + c[k++] = a[i++]; + } else { + c[k++] = b[j++]; + } + } + while(i < m) { + c[k++] = a[i++]; + } + while(j < n) { + c[k++] = b[j++]; + } +} + +static alpm_file_t *files_msort(alpm_file_t *files, size_t n) +{ + alpm_file_t *work; + size_t blocksize = 1; + + CALLOC(work, n, sizeof(alpm_file_t), return NULL); + + for(blocksize = 1; blocksize < n; blocksize *= 2) { + size_t i, max_extent = 0; + for(i = 0; i < n - blocksize; i += 2 * blocksize) { + /* this limits our actual merge to the length of the array, since we will + * not likely be a perfect power of two. */ + size_t right_blocksize = blocksize; + if(i + blocksize * 2 > n) { + right_blocksize = n - i - blocksize; + } + files_merge(files + i, files + i + blocksize, work + i, + blocksize, right_blocksize); + max_extent = i + blocksize + right_blocksize; + } + /* ensure we only copy what we actually touched on this merge pass, + * no more, no less */ + memcpy(files, work, max_extent * sizeof(alpm_file_t)); + } + free(work); + return files; +} + /** * Load a package and create the corresponding alpm_pkg_t struct. * @param handle the context handle @@ -241,7 +288,8 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle, const char *pkgfile, struct archive_entry *entry; alpm_pkg_t *newpkg = NULL; struct stat st; - size_t files_count = 0; + size_t files_count = 0, files_size = 0; + alpm_file_t *files = NULL; if(pkgfile == NULL || strlen(pkgfile) == 0) { RET_ERR(handle, ALPM_ERR_WRONG_ARGS, NULL); @@ -326,12 +374,26 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle, const char *pkgfile, * already been handled (for future possibilities) */ } else if(full) { /* Keep track of all files for filelist generation */ - alpm_file_t *file; - CALLOC(file, 1, sizeof(alpm_file_t), goto error); - STRDUP(file->name, entry_name, goto error); - file->size = archive_entry_size(entry); - file->mode = archive_entry_mode(entry); - newpkg->files = alpm_list_add(newpkg->files, file); + if(files_count >= files_size) { + size_t old_size = files_size; + if(files_size == 0) { + files_size = 4; + } else { + files_size *= 2; + } + files = realloc(files, sizeof(alpm_file_t) * files_size); + if(!files) { + ALLOC_FAIL(sizeof(alpm_file_t) * files_size); + goto error; + } + /* ensure all new memory is zeroed out, in both the initial + * allocation and later reallocs */ + memset(files + old_size, 0, + sizeof(alpm_file_t) * (files_size - old_size)); + } + STRDUP(files[files_count].name, entry_name, goto error); + files[files_count].size = archive_entry_size(entry); + files[files_count].mode = archive_entry_mode(entry); files_count++; } @@ -369,16 +431,14 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle, const char *pkgfile, newpkg->handle = handle; if(full) { + /* attempt to hand back any memory we don't need */ + files = realloc(files, sizeof(alpm_file_t) * files_count); /* "checking for conflicts" requires a sorted list, ensure that here */ _alpm_log(handle, ALPM_LOG_DEBUG, "sorting package filelist for %s\n", pkgfile); - newpkg->files = alpm_list_msort(newpkg->files, files_count, - _alpm_files_cmp); + newpkg->files.files = files_msort(files, files_count); + newpkg->files.count = files_count; newpkg->infolevel = INFRQ_ALL; } else { - /* get rid of any partial filelist we may have collected, it is invalid */ - alpm_list_free_inner(newpkg->files, (alpm_list_fn_free)_alpm_files_free); - alpm_list_free(newpkg->files); - newpkg->files = NULL; newpkg->infolevel = INFRQ_BASE | INFRQ_DESC; } diff --git a/lib/libalpm/conflict.c b/lib/libalpm/conflict.c index eda6ba1..538c4c7 100644 --- a/lib/libalpm/conflict.c +++ b/lib/libalpm/conflict.c @@ -229,22 +229,22 @@ static const int INTERSECT = 1; * DIFFERENCE - a difference operation is performed. filesA - filesB. * INTERSECT - an intersection operation is performed. filesA & filesB. */ -static alpm_list_t *filelist_operation(alpm_list_t *filesA, alpm_list_t *filesB, - int operation) +static alpm_list_t *filelist_operation(alpm_filelist_t *filesA, + alpm_filelist_t *filesB, int operation) { alpm_list_t *ret = NULL; - alpm_list_t *pA = filesA, *pB = filesB; + size_t ctrA = 0, ctrB = 0; - while(pA && pB) { - alpm_file_t *fileA = pA->data; - alpm_file_t *fileB = pB->data; + while(ctrA < filesA->count && ctrB < filesB->count) { + alpm_file_t *fileA = filesA->files + ctrA; + alpm_file_t *fileB = filesB->files + ctrB; const char *strA = fileA->name; const char *strB = fileB->name; /* skip directories, we don't care about them */ if(strA[strlen(strA)-1] == '/') { - pA = pA->next; + ctrA++; } else if(strB[strlen(strB)-1] == '/') { - pB = pB->next; + ctrB++; } else { int cmp = strcmp(strA, strB); if(cmp < 0) { @@ -252,29 +252,29 @@ static alpm_list_t *filelist_operation(alpm_list_t *filesA, alpm_list_t *filesB, /* item only in filesA, qualifies as a difference */ ret = alpm_list_add(ret, fileA); } - pA = pA->next; + ctrA++; } else if(cmp > 0) { - pB = pB->next; + ctrB++; } else { if(operation == INTERSECT) { /* item in both, qualifies as an intersect */ ret = alpm_list_add(ret, fileA); } - pA = pA->next; - pB = pB->next; + ctrA++; + ctrB++; } } } /* if doing a difference, ensure we have completely emptied pA */ - while(operation == DIFFERENCE && pA) { - alpm_file_t *fileA = pA->data; + while(operation == DIFFERENCE && ctrA < filesA->count) { + alpm_file_t *fileA = filesA->files + ctrA; const char *strA = fileA->name; /* skip directories */ if(strA[strlen(strA)-1] != '/') { ret = alpm_list_add(ret, fileA); } - pA = pA->next; + ctrA++; } return ret; @@ -319,16 +319,16 @@ void _alpm_fileconflict_free(alpm_fileconflict_t *conflict) FREE(conflict); } -const alpm_file_t *_alpm_filelist_contains(const alpm_list_t *haystack, - const char *needle) +const alpm_file_t *_alpm_filelist_contains(alpm_filelist_t *filelist, + const char *name) { - const alpm_list_t *lp = haystack; - while(lp) { - const alpm_file_t *file = lp->data; - if(strcmp(file->name, needle) == 0) { + size_t i; + const alpm_file_t *file = filelist->files; + for(i = 0; i < filelist->count; i++) { + if(strcmp(file->name, name) == 0) { return file; } - lp = lp->next; + file++; } return NULL; } @@ -400,8 +400,10 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle, * different cases. */ for(current = 0, i = upgrade; i; i = i->next, current++) { alpm_pkg_t *p1 = i->data; - alpm_list_t *j, *tmpfiles; + alpm_list_t *j; + alpm_filelist_t tmpfiles; alpm_pkg_t *dbpkg; + size_t filenum; int percent = (current * 100) / numtargs; PROGRESS(trans, ALPM_TRANS_PROGRESS_CONFLICTS_START, "", percent, @@ -444,16 +446,21 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle, * that the former list needs to be freed while the latter list should NOT * be freed. */ if(dbpkg) { + alpm_list_t *difference; /* older ver of package currently installed */ - tmpfiles = filelist_operation(alpm_pkg_get_files(p1), + difference = filelist_operation(alpm_pkg_get_files(p1), alpm_pkg_get_files(dbpkg), DIFFERENCE); + tmpfiles.count = alpm_list_count(difference); + tmpfiles.files = alpm_list_to_array(difference, tmpfiles.count, + sizeof(alpm_file_t)); + alpm_list_free(difference); } else { /* no version of package currently installed */ - tmpfiles = alpm_pkg_get_files(p1); + tmpfiles = *alpm_pkg_get_files(p1); } - for(j = tmpfiles; j; j = j->next) { - alpm_file_t *file = j->data; + for(filenum = 0; filenum < tmpfiles.count; filenum++) { + alpm_file_t *file = tmpfiles.files + filenum; const char *filestr = file->name; const char *relative_path; alpm_list_t *k; @@ -572,7 +579,7 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle, FREELIST(conflicts); if(dbpkg) { /* only freed if it was generated from filelist_operation() */ - alpm_list_free(tmpfiles); + free(tmpfiles.files); } return NULL; } @@ -580,7 +587,7 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle, } if(dbpkg) { /* only freed if it was generated from filelist_operation() */ - alpm_list_free(tmpfiles); + free(tmpfiles.files); } } PROGRESS(trans, ALPM_TRANS_PROGRESS_CONFLICTS_START, "", 100, diff --git a/lib/libalpm/conflict.h b/lib/libalpm/conflict.h index f2ab625..8b7471f 100644 --- a/lib/libalpm/conflict.h +++ b/lib/libalpm/conflict.h @@ -33,8 +33,8 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle, void _alpm_fileconflict_free(alpm_fileconflict_t *conflict); -const alpm_file_t *_alpm_filelist_contains(const alpm_list_t *haystack, - const char *needle); +const alpm_file_t *_alpm_filelist_contains(alpm_filelist_t *filelist, + const char *name); #endif /* _ALPM_CONFLICT_H */ diff --git a/lib/libalpm/diskspace.c b/lib/libalpm/diskspace.c index 3ab62a8..b28b88a 100644 --- a/lib/libalpm/diskspace.c +++ b/lib/libalpm/diskspace.c @@ -147,14 +147,18 @@ static alpm_mountpoint_t *match_mount_point(const alpm_list_t *mount_points, static int calculate_removed_size(alpm_handle_t *handle, const alpm_list_t *mount_points, alpm_pkg_t *pkg) { - alpm_list_t *i; + size_t i; + alpm_filelist_t *filelist = alpm_pkg_get_files(pkg); - alpm_list_t *files = alpm_pkg_get_files(pkg); - for(i = files; i; i = i->next) { + if(!filelist->count) { + return 0; + } + + for(i = 0; i < filelist->count; i++) { + const alpm_file_t *file = filelist->files + i; alpm_mountpoint_t *mp; struct stat st; char path[PATH_MAX]; - const alpm_file_t *file = i->data; const char *filename = file->name; snprintf(path, PATH_MAX, "%s%s", handle->root, filename); @@ -185,13 +189,17 @@ static int calculate_removed_size(alpm_handle_t *handle, static int calculate_installed_size(alpm_handle_t *handle, const alpm_list_t *mount_points, alpm_pkg_t *pkg) { - alpm_list_t *i; + size_t i; + alpm_filelist_t *filelist = alpm_pkg_get_files(pkg); - for(i = alpm_pkg_get_files(pkg); i; i = i->next) { - const alpm_file_t *file = i->data; + if(!filelist->count) { + return 0; + } + + for(i = 0; i < filelist->count; i++) { + const alpm_file_t *file = filelist->files + i; alpm_mountpoint_t *mp; char path[PATH_MAX]; - const char *filename = file->name; /* libarchive reports these as zero size anyways */ diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c index ae9b9a9..fd3d0c6 100644 --- a/lib/libalpm/package.c +++ b/lib/libalpm/package.c @@ -106,7 +106,7 @@ static alpm_list_t *_pkg_get_conflicts(alpm_pkg_t *pkg) { return pkg->conflicts static alpm_list_t *_pkg_get_provides(alpm_pkg_t *pkg) { return pkg->provides; } static alpm_list_t *_pkg_get_replaces(alpm_pkg_t *pkg) { return pkg->replaces; } static alpm_list_t *_pkg_get_deltas(alpm_pkg_t *pkg) { return pkg->deltas; } -static alpm_list_t *_pkg_get_files(alpm_pkg_t *pkg) { return pkg->files; } +static alpm_filelist_t *_pkg_get_files(alpm_pkg_t *pkg) { return &(pkg->files); } static alpm_list_t *_pkg_get_backup(alpm_pkg_t *pkg) { return pkg->backup; } static void *_pkg_changelog_open(alpm_pkg_t UNUSED *pkg) @@ -313,7 +313,7 @@ alpm_list_t SYMEXPORT *alpm_pkg_get_deltas(alpm_pkg_t *pkg) return pkg->ops->get_deltas(pkg); } -alpm_list_t SYMEXPORT *alpm_pkg_get_files(alpm_pkg_t *pkg) +alpm_filelist_t SYMEXPORT *alpm_pkg_get_files(alpm_pkg_t *pkg) { ASSERT(pkg != NULL, return NULL); pkg->handle->pm_errno = 0; @@ -427,22 +427,14 @@ alpm_list_t SYMEXPORT *alpm_pkg_compute_requiredby(alpm_pkg_t *pkg) /** @} */ -void _alpm_files_free(alpm_file_t *file) +alpm_file_t *_alpm_file_copy(alpm_file_t *dest, + const alpm_file_t *src) { - free(file->name); - free(file); -} - -alpm_file_t *_alpm_files_dup(const alpm_file_t *file) -{ - alpm_file_t *newfile; - CALLOC(newfile, 1, sizeof(alpm_file_t), return NULL); - - STRDUP(newfile->name, file->name, return NULL); - newfile->size = file->size; - newfile->mode = file->mode; + STRDUP(dest->name, src->name, return NULL); + dest->size = src->size; + dest->mode = src->mode; - return newfile; + return dest; } /* Helper function for comparing files list entries @@ -493,8 +485,17 @@ alpm_pkg_t *_alpm_pkg_dup(alpm_pkg_t *pkg) newpkg->licenses = alpm_list_strdup(pkg->licenses); newpkg->replaces = alpm_list_strdup(pkg->replaces); newpkg->groups = alpm_list_strdup(pkg->groups); - for(i = pkg->files; i; i = alpm_list_next(i)) { - newpkg->files = alpm_list_add(newpkg->files, _alpm_files_dup(i->data)); + if(pkg->files.count) { + size_t filenum; + size_t len = sizeof(alpm_file_t) * pkg->files.count; + MALLOC(newpkg->files.files, len, goto cleanup); + for(filenum = 0; filenum < pkg->files.count; filenum++) { + if(!_alpm_file_copy(newpkg->files.files + filenum, + pkg->files.files + filenum)) { + goto cleanup; + } + } + newpkg->files.count = pkg->files.count; } for(i = pkg->backup; i; i = alpm_list_next(i)) { newpkg->backup = alpm_list_add(newpkg->backup, _alpm_backup_dup(i->data)); @@ -545,8 +546,13 @@ void _alpm_pkg_free(alpm_pkg_t *pkg) FREELIST(pkg->licenses); FREELIST(pkg->replaces); FREELIST(pkg->groups); - alpm_list_free_inner(pkg->files, (alpm_list_fn_free)_alpm_files_free); - alpm_list_free(pkg->files); + if(pkg->files.count) { + size_t i; + for(i = 0; i < pkg->files.count; i++) { + free(pkg->files.files[i].name); + } + free(pkg->files.files); + } alpm_list_free_inner(pkg->backup, (alpm_list_fn_free)_alpm_backup_free); alpm_list_free(pkg->backup); alpm_list_free_inner(pkg->depends, (alpm_list_fn_free)_alpm_dep_free); diff --git a/lib/libalpm/package.h b/lib/libalpm/package.h index f76812b..d19d833 100644 --- a/lib/libalpm/package.h +++ b/lib/libalpm/package.h @@ -69,7 +69,7 @@ struct pkg_operations { alpm_list_t *(*get_provides) (alpm_pkg_t *); alpm_list_t *(*get_replaces) (alpm_pkg_t *); alpm_list_t *(*get_deltas) (alpm_pkg_t *); - alpm_list_t *(*get_files) (alpm_pkg_t *); + alpm_filelist_t *(*get_files) (alpm_pkg_t *); alpm_list_t *(*get_backup) (alpm_pkg_t *); void *(*changelog_open) (alpm_pkg_t *); @@ -126,7 +126,6 @@ struct __alpm_pkg_t { alpm_list_t *licenses; alpm_list_t *replaces; alpm_list_t *groups; - alpm_list_t *files; alpm_list_t *backup; alpm_list_t *depends; alpm_list_t *optdepends; @@ -137,10 +136,11 @@ struct __alpm_pkg_t { alpm_list_t *removes; /* in transaction targets only */ struct pkg_operations *ops; + + alpm_filelist_t files; }; -void _alpm_files_free(alpm_file_t *file); -alpm_file_t *_alpm_files_dup(const alpm_file_t *file); +alpm_file_t *_alpm_file_copy(alpm_file_t *dest, const alpm_file_t *src); int _alpm_files_cmp(const void *f1, const void *f2); alpm_pkg_t* _alpm_pkg_new(void); diff --git a/lib/libalpm/remove.c b/lib/libalpm/remove.c index 2c5d98c..83c437f 100644 --- a/lib/libalpm/remove.c +++ b/lib/libalpm/remove.c @@ -262,15 +262,15 @@ static void unlink_file(alpm_handle_t *handle, alpm_pkg_t *info, local_pkgs = _alpm_db_get_pkgcache(handle->db_local); for(local = local_pkgs; local && !found; local = local->next) { alpm_pkg_t *local_pkg = local->data; - alpm_list_t *files; + alpm_filelist_t *filelist; /* we duplicated the package when we put it in the removal list, so we * so we can't use direct pointer comparison here. */ if(_alpm_pkg_cmp(info, local_pkg) == 0) { continue; } - files = alpm_pkg_get_files(local_pkg); - if(_alpm_filelist_contains(files, fileobj->name)) { + filelist = alpm_pkg_get_files(local_pkg); + if(_alpm_filelist_contains(filelist, fileobj->name)) { _alpm_log(handle, ALPM_LOG_DEBUG, "keeping directory %s (owned by %s)\n", file, local_pkg->name); found = 1; @@ -320,11 +320,13 @@ int _alpm_remove_single_package(alpm_handle_t *handle, alpm_pkg_t *oldpkg, alpm_pkg_t *newpkg, size_t targ_count, size_t pkg_count) { - alpm_list_t *files, *skip_remove, *lp; + alpm_list_t *skip_remove; size_t filenum = 0, position = 0; const char *pkgname = oldpkg->name; const char *pkgver = oldpkg->version; + alpm_filelist_t *filelist; char scriptlet[PATH_MAX]; + size_t i; if(newpkg) { _alpm_log(handle, ALPM_LOG_DEBUG, "removing old package first (%s-%s)\n", @@ -349,7 +351,8 @@ int _alpm_remove_single_package(alpm_handle_t *handle, } if(newpkg) { - alpm_list_t *newfiles, *b; + alpm_filelist_t *newfiles; + alpm_list_t *b; skip_remove = alpm_list_join( alpm_list_strdup(handle->trans->skip_remove), alpm_list_strdup(handle->noupgrade)); @@ -371,9 +374,10 @@ int _alpm_remove_single_package(alpm_handle_t *handle, skip_remove = alpm_list_strdup(handle->trans->skip_remove); } - files = alpm_pkg_get_files(oldpkg); - for(lp = files; lp; lp = lp->next) { - if(!can_remove_file(handle, lp->data, skip_remove)) { + filelist = alpm_pkg_get_files(oldpkg); + for(i = 0; i < filelist->count; i++) { + alpm_file_t *file = filelist->files + i; + if(!can_remove_file(handle, file, skip_remove)) { _alpm_log(handle, ALPM_LOG_DEBUG, "not removing package '%s', can't remove all files\n", pkgname); RET_ERR(handle, ALPM_ERR_PKG_CANT_REMOVE, -1); @@ -390,9 +394,10 @@ int _alpm_remove_single_package(alpm_handle_t *handle, } /* iterate through the list backwards, unlinking files */ - for(lp = alpm_list_last(files); lp; lp = alpm_list_previous(lp)) { + for(i = filelist->count; i > 0; i--) { + alpm_file_t *file = filelist->files + i - 1; int percent; - unlink_file(handle, oldpkg, lp->data, skip_remove, + unlink_file(handle, oldpkg, file, skip_remove, handle->trans->flags & ALPM_TRANS_FLAG_NOSAVE); if(!newpkg) { diff --git a/src/pacman/package.c b/src/pacman/package.c index afbac6b..45afded 100644 --- a/src/pacman/package.c +++ b/src/pacman/package.c @@ -230,15 +230,16 @@ void dump_pkg_backups(alpm_pkg_t *pkg) void dump_pkg_files(alpm_pkg_t *pkg, int quiet) { const char *pkgname, *root; - alpm_list_t *i, *pkgfiles; + alpm_filelist_t *pkgfiles; + size_t i; pkgname = alpm_pkg_get_name(pkg); pkgfiles = alpm_pkg_get_files(pkg); root = alpm_option_get_root(config->handle); - for(i = pkgfiles; i; i = alpm_list_next(i)) { - const alpm_file_t *file = alpm_list_getdata(i); - if(!quiet){ + for(i = 0; i < pkgfiles->count; i++) { + const alpm_file_t *file = pkgfiles->files + i; + if(!quiet) { fprintf(stdout, "%s %s%s\n", pkgname, root, file->name); } else { fprintf(stdout, "%s%s\n", root, file->name); diff --git a/src/pacman/query.c b/src/pacman/query.c index 251c4dd..163c331 100644 --- a/src/pacman/query.c +++ b/src/pacman/query.c @@ -191,12 +191,13 @@ static int query_fileowner(alpm_list_t *targets) free(dname); for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) { - alpm_list_t *j; alpm_pkg_t *info = alpm_list_getdata(i); + alpm_filelist_t *filelist = alpm_pkg_get_files(info); + size_t i; - for(j = alpm_pkg_get_files(info); j && !found; j = alpm_list_next(j)) { + for(i = 0; i < filelist->count; i++) { + const alpm_file_t *file = filelist->files + i; char *ppath, *pdname; - const alpm_file_t *file = alpm_list_getdata(j); const char *pkgfile = file->name; /* avoid the costly resolve_path usage if the basenames don't match */ @@ -402,11 +403,12 @@ static int filter(alpm_pkg_t *pkg) * loop through files to check if they exist. */ static int check(alpm_pkg_t *pkg) { - alpm_list_t *i; - const char *root; + const char *root, *pkgname; int allfiles = 0, errors = 0; size_t rootlen; char f[PATH_MAX]; + alpm_filelist_t *filelist; + size_t i; root = alpm_option_get_root(config->handle); rootlen = strlen(root); @@ -417,10 +419,11 @@ static int check(alpm_pkg_t *pkg) } strcpy(f, root); - const char *pkgname = alpm_pkg_get_name(pkg); - for(i = alpm_pkg_get_files(pkg); i; i = alpm_list_next(i)) { + pkgname = alpm_pkg_get_name(pkg); + filelist = alpm_pkg_get_files(pkg); + for(i = 0; i < filelist->count; i++) { + const alpm_file_t *file = filelist->files + i; struct stat st; - const alpm_file_t *file = alpm_list_getdata(i); const char *path = file->name; if(rootlen + 1 + strlen(path) > PATH_MAX) { -- 1.7.6
participants (3)
-
Allan McRae
-
Dan McGee
-
Dan McGee