[pacman-dev] [PATCH 1/3] Rework the alpm_unpack functions
Add support to extract a list of entries Signed-off-by: Xavier Chantry <shiningxc@gmail.com> --- lib/libalpm/be_files.c | 2 +- lib/libalpm/trans.c | 2 +- lib/libalpm/util.c | 55 ++++++++++++++++++++++++++++++++++++++--------- lib/libalpm/util.h | 5 +++- 4 files changed, 50 insertions(+), 14 deletions(-) diff --git a/lib/libalpm/be_files.c b/lib/libalpm/be_files.c index 51492d5..53bbda1 100644 --- a/lib/libalpm/be_files.c +++ b/lib/libalpm/be_files.c @@ -245,7 +245,7 @@ int SYMEXPORT alpm_db_update(int force, pmdb_t *db) /* uncompress the sync database */ checkdbdir(db); - ret = _alpm_unpack(dbfilepath, syncdbpath, NULL); + ret = _alpm_unpack(dbfilepath, syncdbpath, NULL, 0); if(ret) { free(dbfilepath); RET_ERR(PM_ERR_SYSTEM, -1); diff --git a/lib/libalpm/trans.c b/lib/libalpm/trans.c index c99f596..6f5216e 100644 --- a/lib/libalpm/trans.c +++ b/lib/libalpm/trans.c @@ -368,7 +368,7 @@ int _alpm_runscriptlet(const char *root, const char *installfn, /* either extract or copy the scriptlet */ snprintf(scriptfn, PATH_MAX, "%s/.INSTALL", tmpdir); if(!strcmp(script, "pre_upgrade") || !strcmp(script, "pre_install")) { - if(_alpm_unpack(installfn, tmpdir, ".INSTALL")) { + if(_alpm_unpack_single(installfn, tmpdir, ".INSTALL")) { retval = 1; } } else { diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c index 381b116..3777349 100644 --- a/lib/libalpm/util.c +++ b/lib/libalpm/util.c @@ -241,14 +241,38 @@ int _alpm_lckrm() /* Compression functions */ /** - * @brief Unpack a specific file or all files in an archive. + * @brief Unpack a specific file in an archive. * * @param archive the archive to unpack * @param prefix where to extract the files - * @param fn a file within the archive to unpack or NULL for all + * @param fn a file within the archive to unpack * @return 0 on success, 1 on failure */ -int _alpm_unpack(const char *archive, const char *prefix, const char *fn) +int _alpm_unpack_single(const char *archive, const char *prefix, const char *fn) +{ + alpm_list_t *list = NULL; + int ret = 0; + if(fn == NULL) { + return(1); + } + list = alpm_list_add(list, (void *)fn); + ret = _alpm_unpack(archive, prefix, list, 1); + alpm_list_free(list); + return(ret); +} + +/** + * @brief Unpack a list of files in an archive. + * + * @param archive the archive to unpack + * @param prefix where to extract the files + * @param list a list of files within the archive to unpack or + * NULL for all + * @param breakfirst break after the first entry found + * + * @return 0 on success, 1 on failure + */ +int _alpm_unpack(const char *archive, const char *prefix, alpm_list_t *list, int breakfirst) { int ret = 0; mode_t oldmask; @@ -301,14 +325,23 @@ int _alpm_unpack(const char *archive, const char *prefix, const char *fn) archive_entry_set_perm(entry, 0755); } - /* If a specific file was requested skip entries that don't match. */ - if (fn && strcmp(fn, entryname)) { - _alpm_log(PM_LOG_DEBUG, "skipping: %s\n", entryname); - if (archive_read_data_skip(_archive) != ARCHIVE_OK) { - ret = 1; - goto cleanup; + /* If specific files were requested, skip entries that don't match. */ + if(list) { + char *prefix = strdup(entryname); + char *p = strstr(prefix,"/"); + if(p) { + *(p+1) = '\0'; + } + char *found = alpm_list_find_str(list, prefix); + free(prefix); + if(!found) { + _alpm_log(PM_LOG_DEBUG, "skipping: %s\n", entryname); + if (archive_read_data_skip(_archive) != ARCHIVE_OK) { + ret = 1; + goto cleanup; + } + continue; } - continue; } /* Extract the archive entry. */ @@ -324,7 +357,7 @@ int _alpm_unpack(const char *archive, const char *prefix, const char *fn) goto cleanup; } - if(fn) { + if(breakfirst) { break; } } diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h index ccf169d..baba858 100644 --- a/lib/libalpm/util.h +++ b/lib/libalpm/util.h @@ -26,6 +26,8 @@ #include "config.h" +#include "alpm_list.h" + #include <stdio.h> #include <string.h> #include <stdarg.h> @@ -62,7 +64,8 @@ int _alpm_copyfile(const char *src, const char *dest); char *_alpm_strtrim(char *str); int _alpm_lckmk(); int _alpm_lckrm(); -int _alpm_unpack(const char *archive, const char *prefix, const char *fn); +int _alpm_unpack_single(const char *archive, const char *prefix, const char *fn); +int _alpm_unpack(const char *archive, const char *prefix, alpm_list_t *list, int breakfirst); int _alpm_rmrf(const char *path); int _alpm_logaction(unsigned short usesyslog, FILE *f, const char *fmt, va_list args); int _alpm_run_chroot(const char *root, const char *cmd); -- 1.6.4.4
This is more efficient than alpm_list_diff since it assumes the two lists are sorted. And also we get the two sides of the diff. Even sorting should more efficient than the current list_diff. Sorting the two lists should be O(n*log(n)+m*log(m)) while the current list_diff is O(n*m). So I also reimplemented list_diff using list_diff_sorted. Signed-off-by: Xavier Chantry <shiningxc@gmail.com> --- lib/libalpm/alpm_list.c | 82 +++++++++++++++++++++++++++++++++++++---------- lib/libalpm/alpm_list.h | 2 + 2 files changed, 67 insertions(+), 17 deletions(-) diff --git a/lib/libalpm/alpm_list.c b/lib/libalpm/alpm_list.c index 127f72a..5cd1087 100644 --- a/lib/libalpm/alpm_list.c +++ b/lib/libalpm/alpm_list.c @@ -644,11 +644,64 @@ char SYMEXPORT *alpm_list_find_str(const alpm_list_t *haystack, } /** - * @brief Find the items in list `lhs` that are not present in list `rhs`. + * @brief Find the items in list `left` that are not present in list `right` and vice-versa. + * + * The two lists must be sorted. Items only in list `left` are added to the `onlyleft` list. Items only in list `right` + * are added to the `onlyright` list. * - * Entries are not duplicated. Operation is O(m*n). The first list is stepped - * through one node at a time, and for each node in the first list, each node - * in the second list is compared to it. + * @param left the first list + * @param right the second list + * @param fn the comparison function + * @param onlyleft pointer to the first result list + * @param onlyright pointer to the second result list + * + */ +void SYMEXPORT alpm_list_diff_sorted(alpm_list_t *left, + alpm_list_t *right, alpm_list_fn_cmp fn, + alpm_list_t **onlyleft, alpm_list_t **onlyright) +{ + alpm_list_t *l = left; + alpm_list_t *r = right; + + if(!onlyleft && !onlyright) { + return; + } + + while (l != NULL && r != NULL) { + int cmp = fn(l->data, r->data); + if(cmp < 0) { + if(onlyleft) { + *onlyleft = alpm_list_add(*onlyleft, l->data); + } + l = l->next; + } + else if(cmp > 0) { + if(onlyright) { + *onlyright = alpm_list_add(*onlyright, r->data); + } + r = r->next; + } else { + l = l->next; + r = r->next; + } + } + while (l != NULL) { + if(onlyleft) { + *onlyleft = alpm_list_add(*onlyleft, l->data); + } + l = l->next; + } + while (r != NULL) { + if(onlyright) { + *onlyright = alpm_list_add(*onlyright, r->data); + } + r = r->next; + } +} + + +/** + * @brief Find the items in list `lhs` that are not present in list `rhs`. * * @param lhs the first list * @param rhs the second list @@ -659,20 +712,15 @@ char SYMEXPORT *alpm_list_find_str(const alpm_list_t *haystack, alpm_list_t SYMEXPORT *alpm_list_diff(const alpm_list_t *lhs, const alpm_list_t *rhs, alpm_list_fn_cmp fn) { - const alpm_list_t *i, *j; + alpm_list_t *left, *right; alpm_list_t *ret = NULL; - for(i = lhs; i; i = i->next) { - int found = 0; - for(j = rhs; j; j = j->next) { - if(fn(i->data, j->data) == 0) { - found = 1; - break; - } - } - if(!found) { - ret = alpm_list_add(ret, i->data); - } - } + + left = alpm_list_copy(lhs); + left = alpm_list_msort(left, alpm_list_count(left), fn); + right = alpm_list_copy(rhs); + right = alpm_list_msort(right, alpm_list_count(right), fn); + + alpm_list_diff_sorted(left, right, fn, &ret, NULL); return(ret); } diff --git a/lib/libalpm/alpm_list.h b/lib/libalpm/alpm_list.h index f079ecf..48e9117 100644 --- a/lib/libalpm/alpm_list.h +++ b/lib/libalpm/alpm_list.h @@ -78,6 +78,8 @@ void *alpm_list_find(const alpm_list_t *haystack, const void *needle, alpm_list_ void *alpm_list_find_ptr(const alpm_list_t *haystack, const void *needle); char *alpm_list_find_str(const alpm_list_t *haystack, const char *needle); alpm_list_t *alpm_list_diff(const alpm_list_t *lhs, const alpm_list_t *rhs, alpm_list_fn_cmp fn); +void alpm_list_diff_sorted(alpm_list_t *left, alpm_list_t *right, + alpm_list_fn_cmp fn, alpm_list_t **onlyleft, alpm_list_t **onlyright); #ifdef __cplusplus } -- 1.6.4.4
This implements FS#15198. The idea apparently came from Csaba Henk <csaba-ml <at> creo.hu> which submitted a patch to frugalware, so thanks to him, even though I did not look at the code :) The idea is to only extract folders for new packages into the package database and clean up the old directories. This is essentially implementing Xyne's "rebase" script within pacman. If using -Syy, just remove and extract everything. If using -Sy : 1. Generate list of directories in db 2. Generate list of directories in archive 3. Compare both 4. Clean up old directories 5. Extract new directories Original-work-by: Allan McRae <allan@archlinux.org> Signed-off-by: Xavier Chantry <shiningxc@gmail.com> --- lib/libalpm/be_files.c | 158 +++++++++++++++++++++++++++++++++++++++++++---- lib/libalpm/util.c | 3 +- 2 files changed, 146 insertions(+), 15 deletions(-) diff --git a/lib/libalpm/be_files.c b/lib/libalpm/be_files.c index 53bbda1..fcd5ead 100644 --- a/lib/libalpm/be_files.c +++ b/lib/libalpm/be_files.c @@ -33,6 +33,10 @@ #include <limits.h> /* PATH_MAX */ #include <locale.h> /* setlocale */ +/* libarchive */ +#include <archive.h> +#include <archive_entry.h> + /* libalpm */ #include "db.h" #include "alpm_list.h" @@ -137,6 +141,102 @@ static int checkdbdir(pmdb_t *db) return(0); } +/* create list of directories in db */ +static alpm_list_t *dirlist_from_tar(const char *archive) +{ + alpm_list_t *dirlist = NULL; + struct archive *_archive; + struct archive_entry *entry; + + if((_archive = archive_read_new()) == NULL) + RET_ERR(PM_ERR_LIBARCHIVE, NULL); + + archive_read_support_compression_all(_archive); + archive_read_support_format_all(_archive); + + if(archive_read_open_filename(_archive, archive, + ARCHIVE_DEFAULT_BYTES_PER_BLOCK) != ARCHIVE_OK) { + _alpm_log(PM_LOG_ERROR, _("could not open %s: %s\n"), archive, + archive_error_string(_archive)); + RET_ERR(PM_ERR_PKG_OPEN, NULL); + } + + while(archive_read_next_header(_archive, &entry) == ARCHIVE_OK) { + const struct stat *st; + const char *entryname; /* the name of the file in the archive */ + + st = archive_entry_stat(entry); + entryname = archive_entry_pathname(entry); + + if(S_ISDIR(st->st_mode)) { + char *name = strdup(entryname); + dirlist = alpm_list_add(dirlist, name); + } + } + archive_read_finish(_archive); + + dirlist = alpm_list_msort(dirlist, alpm_list_count(dirlist), _alpm_str_cmp); + return(dirlist); +} + +/* create list of directories in db */ +static alpm_list_t *dirlist_from_fs(const char *syncdbpath) +{ + DIR *dbdir; + struct dirent *ent = NULL; + alpm_list_t *dirlist = NULL; + struct stat sbuf; + char path[PATH_MAX]; + + dbdir = opendir(syncdbpath); + if (dbdir != NULL) { + while((ent = readdir(dbdir)) != NULL) { + char *name = ent->d_name; + if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { + continue; + } + + /* stat the entry, make sure it's a directory */ + snprintf(path, PATH_MAX, "%s%s", syncdbpath, name); + if(stat(path, &sbuf) != 0 || !S_ISDIR(sbuf.st_mode)) { + continue; + } + + int len = strlen(name); + char *entry = malloc(len + 2); + strcpy(entry, name); + entry[len] = '/'; + entry[len+1] = '\0'; + dirlist = alpm_list_add(dirlist, entry); + } + } + closedir(dbdir); + + dirlist = alpm_list_msort(dirlist, alpm_list_count(dirlist), _alpm_str_cmp); + return(dirlist); +} + +/* remove old directories from dbdir */ +static int remove_olddir(const char *syncdbpath, alpm_list_t *dirlist) +{ + alpm_list_t *i; + for (i = dirlist; i; i = i->next) { + const char *name = i->data; + char *dbdir; + int len = strlen(syncdbpath) + strlen(name) + 2; + MALLOC(dbdir, len, RET_ERR(PM_ERR_MEMORY, -1)); + snprintf(dbdir, len, "%s%s", syncdbpath, name); + _alpm_log(PM_LOG_DEBUG, "removing: %s\n", dbdir); + if(_alpm_rmrf(dbdir) != 0) { + _alpm_log(PM_LOG_ERROR, _("could not remove database directory %s\n"), dbdir); + free(dbdir); + RET_ERR(PM_ERR_DB_REMOVE, -1); + } + free(dbdir); + } + return(0); +} + /** Update a package database * * An update of the package database \a db will be attempted. Unless @@ -229,27 +329,54 @@ int SYMEXPORT alpm_db_update(int force, pmdb_t *db) return(-1); } else { const char *syncdbpath = _alpm_db_path(db); - /* remove the old dir */ - if(_alpm_rmrf(syncdbpath) != 0) { - _alpm_log(PM_LOG_ERROR, _("could not remove database %s\n"), db->treename); - RET_ERR(PM_ERR_DB_REMOVE, -1); - } - - /* Cache needs to be rebuilt */ - _alpm_db_free_pkgcache(db); /* form the path to the db location */ len = strlen(dbpath) + strlen(db->treename) + strlen(DBEXT) + 1; MALLOC(dbfilepath, len, RET_ERR(PM_ERR_MEMORY, -1)); sprintf(dbfilepath, "%s%s" DBEXT, dbpath, db->treename); - /* uncompress the sync database */ - checkdbdir(db); - ret = _alpm_unpack(dbfilepath, syncdbpath, NULL, 0); - if(ret) { - free(dbfilepath); - RET_ERR(PM_ERR_SYSTEM, -1); + /* remove the old dir if forcing update */ + if(force) { + if(_alpm_rmrf(syncdbpath) != 0) { + _alpm_log(PM_LOG_ERROR, _("could not remove database %s\n"), db->treename); + RET_ERR(PM_ERR_DB_REMOVE, -1); + } + + checkdbdir(db); + + ret = _alpm_unpack(dbfilepath, syncdbpath, NULL, 0); + if(ret) { + free(dbfilepath); + RET_ERR(PM_ERR_SYSTEM, -1); + } + } else { + alpm_list_t *onlyold = NULL; + alpm_list_t *onlynew = NULL; + alpm_list_t *olddirlist = NULL; + alpm_list_t *newdirlist = NULL; + + newdirlist = dirlist_from_tar(dbfilepath); + olddirlist = dirlist_from_fs(syncdbpath); + + alpm_list_diff_sorted(olddirlist, newdirlist, _alpm_str_cmp, &onlyold, &onlynew); + + ret = remove_olddir(syncdbpath, onlyold); + if(ret == 0) { + checkdbdir(db); + ret = _alpm_unpack(dbfilepath, syncdbpath, onlynew, 0); + } + + alpm_list_free(olddirlist); + alpm_list_free(newdirlist); + FREELIST(onlyold); + FREELIST(onlynew); + + if(ret) { + free(dbfilepath); + return(-1); + } } + unlink(dbfilepath); free(dbfilepath); @@ -259,6 +386,9 @@ int SYMEXPORT alpm_db_update(int force, pmdb_t *db) db->treename, (uintmax_t)newmtime); setlastupdate(db, newmtime); } + + /* Cache needs to be rebuilt */ + _alpm_db_free_pkgcache(db); } return(0); diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c index 3777349..1f140c8 100644 --- a/lib/libalpm/util.c +++ b/lib/libalpm/util.c @@ -335,12 +335,13 @@ int _alpm_unpack(const char *archive, const char *prefix, alpm_list_t *list, int char *found = alpm_list_find_str(list, prefix); free(prefix); if(!found) { - _alpm_log(PM_LOG_DEBUG, "skipping: %s\n", entryname); if (archive_read_data_skip(_archive) != ARCHIVE_OK) { ret = 1; goto cleanup; } continue; + } else { + _alpm_log(PM_LOG_DEBUG, "extracting: %s\n", entryname); } } -- 1.6.4.4
On Sun, Oct 11, 2009 at 3:42 PM, Xavier Chantry <shiningxc@gmail.com> wrote:
+ + left = alpm_list_copy(lhs); + left = alpm_list_msort(left, alpm_list_count(left), fn); + right = alpm_list_copy(rhs); + right = alpm_list_msort(right, alpm_list_count(right), fn); + + alpm_list_diff_sorted(left, right, fn, &ret, NULL);
return(ret); }
Oops, I forgot to free left and right here. Thanks Dan :) Fixed on my working branch.
participants (2)
-
Xavier
-
Xavier Chantry