[pacman-dev] [RFC] Iterator interface for reading databases
Hello, I am thinking an iterator-like interface for database reading could be useful. The most obvious example use is the pkgfile functionality. It could be a convenient tool to do one-pass work on databases without loading them completely in memory, which can be impossible for databases with files info (I don't think everyone can load the whole [community] database using libalpm standard structures). Here's a patch that roughly shows what I am thinking about: only the code for the "local" backend is shown. diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index 6e1e4bc..986e9a1 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h @@ -119,6 +119,7 @@ typedef enum _alpm_sigstatus_t { typedef struct __alpm_handle_t alpm_handle_t; typedef struct __alpm_db_t alpm_db_t; +typedef struct __alpm_dbreader_t alpm_dbreader_t; typedef struct __alpm_pkg_t alpm_pkg_t; typedef struct __alpm_trans_t alpm_trans_t; @@ -371,6 +372,9 @@ int alpm_option_set_default_siglevel(alpm_handle_t *handle, alpm_siglevel_t leve * @{ */ +alpm_dbreader_t *alpm_open_dbdir(alpm_handle_t *handle, const char *dbpath); +alpm_dbreader_t *alpm_open_dbarchive(alpm_handle_t *handle, const char *dbpath); + /** Get the database of locally installed packages. * The returned pointer points to an internal structure * of libalpm which should only be manipulated through diff --git a/lib/libalpm/be_local.c b/lib/libalpm/be_local.c index 70f242d..74d6886 100644 --- a/lib/libalpm/be_local.c +++ b/lib/libalpm/be_local.c @@ -41,15 +41,22 @@ #include "package.h" #include "deps.h" -static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq); +static int local_db_read(const char *path, alpm_pkg_t *info, alpm_dbinfrq_t inforeq); #define LAZY_LOAD(info, errret) \ do { \ if(!(pkg->infolevel & info)) { \ - local_db_read(pkg, info); \ + local_db_read(NULL, pkg, info); \ } \ } while(0) +struct local_dbreader_t { + alpm_handle_t *handle; + alpm_pkg_t *(*getitem) (alpm_dbreader_t*); + DIR* dbdir; + const char *dbpath; + char finished; +}; /* Cache-specific accessor functions. These implementations allow for lazy * loading by the files backend when a data member is actually needed @@ -235,7 +242,7 @@ static int _cache_changelog_close(const alpm_pkg_t UNUSED *pkg, void *fp) static int _cache_force_load(alpm_pkg_t *pkg) { - return local_db_read(pkg, INFRQ_ALL); + return local_db_read(NULL, pkg, INFRQ_ALL); } @@ -376,9 +383,10 @@ static int local_db_populate(alpm_db_t *db) size_t est_count; int count = 0; struct stat buf; - struct dirent *ent = NULL; const char *dbpath; DIR *dbdir; + alpm_dbreader_t *reader; + alpm_pkg_t *pkg; dbpath = _alpm_db_path(db); if(dbpath == NULL) { @@ -408,7 +416,6 @@ static int local_db_populate(alpm_db_t *db) while(readdir(dbdir) != NULL) { est_count++; } - rewinddir(dbdir); } if(est_count >= 2) { /* subtract the two extra pointers to get # of children */ @@ -418,35 +425,13 @@ static int local_db_populate(alpm_db_t *db) /* initialize hash at 50% full */ db->pkgcache = _alpm_pkghash_create(est_count * 2); if(db->pkgcache == NULL){ - closedir(dbdir); RET_ERR(db->handle, ALPM_ERR_MEMORY, -1); } + closedir(dbdir); - while((ent = readdir(dbdir)) != NULL) { - const char *name = ent->d_name; - - alpm_pkg_t *pkg; - - if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { - continue; - } - if(!is_dir(dbpath, ent)) { - continue; - } - - pkg = _alpm_pkg_new(); - if(pkg == NULL) { - closedir(dbdir); - RET_ERR(db->handle, ALPM_ERR_MEMORY, -1); - } - /* split the db entry name */ - if(_alpm_splitname(name, &(pkg->name), &(pkg->version), - &(pkg->name_hash)) != 0) { - _alpm_log(db->handle, ALPM_LOG_ERROR, _("invalid name for database entry '%s'\n"), - name); - _alpm_pkg_free(pkg); - continue; - } + reader = alpm_open_dbdir(db->handle, dbpath); + while((pkg = reader->getitem(reader))) { + pkg->origin_data.db = db; /* duplicated database entries are not allowed */ if(_alpm_pkghash_find(db->pkgcache, pkg->name)) { @@ -455,18 +440,6 @@ static int local_db_populate(alpm_db_t *db) continue; } - pkg->origin = PKG_FROM_LOCALDB; - pkg->origin_data.db = db; - pkg->ops = &local_pkg_ops; - pkg->handle = db->handle; - - /* explicitly read with only 'BASE' data, accessors will handle the rest */ - if(local_db_read(pkg, INFRQ_BASE) == -1) { - _alpm_log(db->handle, ALPM_LOG_ERROR, _("corrupted database entry '%s'\n"), name); - _alpm_pkg_free(pkg); - continue; - } - /* add to the collection */ _alpm_log(db->handle, ALPM_LOG_FUNCTION, "adding '%s' to package cache for db '%s'\n", pkg->name, db->treename); @@ -474,7 +447,6 @@ static int local_db_populate(alpm_db_t *db) count++; } - closedir(dbdir); if(count > 0) { db->pkgcache->list = alpm_list_msort(db->pkgcache->list, (size_t)count, _alpm_pkg_cmp); } @@ -516,13 +488,13 @@ static char *get_pkgpath(alpm_db_t *db, alpm_pkg_t *info) f = alpm_list_add(f, linedup); \ } while(1) /* note the while(1) and not (0) */ -static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) +static int local_db_read(const char* pkgpath, alpm_pkg_t *info, alpm_dbinfrq_t inforeq) { FILE *fp = NULL; char path[PATH_MAX]; char line[1024]; - char *pkgpath = NULL; - alpm_db_t *db = info->origin_data.db; + alpm_db_t *db = info->origin_data.db; + const char *treename = db ? db->treename : "NULL"; /* bitmask logic here: * infolevel: 00001111 @@ -533,18 +505,20 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) /* already loaded all of this info, do nothing */ return 0; } - _alpm_log(db->handle, ALPM_LOG_FUNCTION, "loading package data for %s : level=0x%x\n", + _alpm_log(info->handle, ALPM_LOG_FUNCTION, "loading package data for %s : level=0x%x\n", info->name, inforeq); /* clear out 'line', to be certain - and to make valgrind happy */ memset(line, 0, sizeof(line)); - pkgpath = get_pkgpath(db, info); + if (pkgpath == NULL) { + pkgpath = get_pkgpath(db, info); + } if(access(pkgpath, F_OK)) { /* directory doesn't exist or can't be opened */ - _alpm_log(db->handle, ALPM_LOG_DEBUG, "cannot find '%s-%s' in db '%s'\n", - info->name, info->version, db->treename); + _alpm_log(info->handle, ALPM_LOG_DEBUG, "cannot find '%s-%s' in db '%s'\n", + info->name, info->version, db ? treename : "NULL"); goto error; } @@ -552,7 +526,7 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) if(inforeq & INFRQ_DESC && !(info->infolevel & INFRQ_DESC)) { snprintf(path, PATH_MAX, "%sdesc", pkgpath); if((fp = fopen(path, "r")) == NULL) { - _alpm_log(db->handle, ALPM_LOG_ERROR, _("could not open file %s: %s\n"), path, strerror(errno)); + _alpm_log(info->handle, ALPM_LOG_ERROR, _("could not open file %s: %s\n"), path, strerror(errno)); goto error; } while(!feof(fp)) { @@ -560,14 +534,14 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) if(strcmp(line, "%NAME%") == 0) { READ_NEXT(); if(strcmp(line, info->name) != 0) { - _alpm_log(db->handle, ALPM_LOG_ERROR, _("%s database is inconsistent: name " - "mismatch on package %s\n"), db->treename, info->name); + _alpm_log(info->handle, ALPM_LOG_ERROR, _("%s database is inconsistent: name " + "mismatch on package %s\n"), treename, info->name); } } else if(strcmp(line, "%VERSION%") == 0) { READ_NEXT(); if(strcmp(line, info->version) != 0) { - _alpm_log(db->handle, ALPM_LOG_ERROR, _("%s database is inconsistent: version " - "mismatch on package %s\n"), db->treename, info->name); + _alpm_log(info->handle, ALPM_LOG_ERROR, _("%s database is inconsistent: version " + "mismatch on package %s\n"), treename, info->name); } } else if(strcmp(line, "%DESC%") == 0) { READ_AND_STORE(info->desc); @@ -625,7 +599,7 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) if(inforeq & INFRQ_FILES && !(info->infolevel & INFRQ_FILES)) { snprintf(path, PATH_MAX, "%sfiles", pkgpath); if((fp = fopen(path, "r")) == NULL) { - _alpm_log(db->handle, ALPM_LOG_ERROR, _("could not open file %s: %s\n"), path, strerror(errno)); + _alpm_log(info->handle, ALPM_LOG_ERROR, _("could not open file %s: %s\n"), path, strerror(errno)); goto error; } while(fgets(line, sizeof(line), fp)) { @@ -664,11 +638,9 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) /* internal */ info->infolevel |= inforeq; - free(pkgpath); return 0; error: - free(pkgpath); if(fp) { fclose(fp); } @@ -913,4 +885,96 @@ alpm_db_t *_alpm_db_register_local(alpm_handle_t *handle) return db; } +static alpm_pkg_t *local_dbreader_getitem(struct local_dbreader_t *reader) { + struct dirent *ent = NULL; + reader->handle->pm_errno = 0; + if (reader->finished) { + /* iteration has already finished */ + RET_ERR(reader->handle, ALPM_ERR_WRONG_ARGS, NULL); + } + if (reader->dbdir == NULL) { + reader->finished = 1; + return NULL; + } + + while(1) { + ent = readdir(reader->dbdir); + if (ent == NULL) { + closedir(reader->dbdir); + reader->finished = 1; + return NULL; + } + const char *name = ent->d_name; + char fullpath[PATH_MAX]; + snprintf(fullpath, PATH_MAX, "%s/%s", reader->dbpath, name); + + alpm_pkg_t *pkg; + + if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { + continue; + } + if(!is_dir(reader->dbpath, ent)) { + continue; + } + + pkg = _alpm_pkg_new(); + if(pkg == NULL) { + closedir(reader->dbdir); + reader->finished = 1; + RET_ERR(reader->handle, ALPM_ERR_MEMORY, NULL); + } + pkg->handle = reader->handle; + /* split the db entry name */ + if(_alpm_splitname(name, &(pkg->name), &(pkg->version), + &(pkg->name_hash)) != 0) { + _alpm_log(reader->handle, ALPM_LOG_ERROR, _("invalid name for database entry '%s'\n"), + name); + _alpm_pkg_free(pkg); + continue; + } + + pkg->origin = PKG_FROM_LOCALDB; + pkg->origin_data.db = NULL; + pkg->ops = &local_pkg_ops; + + /* explicitly read with only 'BASE' data, accessors will handle the rest */ + if(local_db_read(fullpath, pkg, INFRQ_BASE) == -1) { + _alpm_log(reader->handle, ALPM_LOG_ERROR, _("corrupted database entry '%s'\n"), name); + _alpm_pkg_free(pkg); + continue; + } + return pkg; + } +} + +alpm_dbreader_t SYMEXPORT *alpm_open_dbdir(alpm_handle_t *handle, const char *dbpath) { + struct local_dbreader_t *reader = malloc(sizeof(struct local_dbreader_t)); + DIR *dbdir; + if(handle == NULL) { + return NULL; + } + if(reader == NULL) { + RET_ERR(handle, ALPM_ERR_MEMORY, NULL); + } + if(dbpath == NULL) { + RET_ERR(handle, ALPM_ERR_WRONG_ARGS, NULL); + } + + /* start reading */ + dbdir = opendir(dbpath); + if(dbdir == NULL) { + if(errno != ENOENT) { + /* no database existing yet is not an error */ + free(reader); + return NULL; + } + } + reader->getitem = &local_dbreader_getitem; + reader->handle = handle; + reader->dbdir = dbdir; + reader->dbpath = dbpath; + reader->finished = 0; + return (alpm_dbreader_t*)reader; +} + /* vim: set ts=2 sw=2 noet: */ diff --git a/lib/libalpm/db.h b/lib/libalpm/db.h index a950130..3ce5889 100644 --- a/lib/libalpm/db.h +++ b/lib/libalpm/db.h @@ -56,6 +56,11 @@ struct db_operations { void (*unregister) (alpm_db_t *); }; +struct __alpm_dbreader_t { + alpm_handle_t *handle; + alpm_pkg_t *(*getitem) (alpm_dbreader_t*); +}; + /* Database */ struct __alpm_db_t { alpm_handle_t *handle; -- Rémy.
On Sat, Jul 16, 2011 at 11:32 AM, Rémy Oudompheng <remyoudompheng@gmail.com> wrote:
Hello,
I am thinking an iterator-like interface for database reading could be useful. The most obvious example use is the pkgfile functionality. I don't disagree with the thinking.
It could be a convenient tool to do one-pass work on databases without loading them completely in memory, which can be impossible for databases with files info (I don't think everyone can load the whole [community] database using libalpm standard structures). Have you analyzed the heap using massif to see what is going on here? I doubt we break the ~65 MB mark (with one database), which is big, but not that ridiculous.
I guess the implementation below seems rather hacky to me, as it breaks at least two invariants we have: * Local DB entries are never loaded more than once * The caller is responsible for freeing the package (I think?) Wouldn't a much simpler solution be to just have a corollary method to alpm_pkg_get_files() that released the memory used by the files data, and dropped the infrq level back down? At least for local packages. With that said, this gets immensely tricker when it comes to sync databases as we don't lazy load anything there. So I did some profiling with my sync db loading files branch using massif, results attached. This includes loading 6 repos (testing, core, extra, multilib, community-testing, community). Use ms_print on the file to see what is going on here and get a pretty graph, but the short summary: * Heap usage peaks at ~263 MB * A ton of that is filelists (at least 66% if not more, see sample 48 breakdown, the three biggest allocations are all my filelist alloc block of code) * Our useful heap/extra heap ratio sucks. We could allocate memory a whole lot smarter apparently. (I have another WIP filelists as arrays patch which would probably help a lot, as you don't need so many damn pointers for the linked list layout).
Here's a patch that roughly shows what I am thinking about: only the code for the "local" backend is shown.
diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index 6e1e4bc..986e9a1 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h @@ -119,6 +119,7 @@ typedef enum _alpm_sigstatus_t {
typedef struct __alpm_handle_t alpm_handle_t; typedef struct __alpm_db_t alpm_db_t; +typedef struct __alpm_dbreader_t alpm_dbreader_t; typedef struct __alpm_pkg_t alpm_pkg_t; typedef struct __alpm_trans_t alpm_trans_t;
@@ -371,6 +372,9 @@ int alpm_option_set_default_siglevel(alpm_handle_t *handle, alpm_siglevel_t leve * @{ */
+alpm_dbreader_t *alpm_open_dbdir(alpm_handle_t *handle, const char *dbpath); +alpm_dbreader_t *alpm_open_dbarchive(alpm_handle_t *handle, const char *dbpath); + /** Get the database of locally installed packages. * The returned pointer points to an internal structure * of libalpm which should only be manipulated through diff --git a/lib/libalpm/be_local.c b/lib/libalpm/be_local.c index 70f242d..74d6886 100644 --- a/lib/libalpm/be_local.c +++ b/lib/libalpm/be_local.c @@ -41,15 +41,22 @@ #include "package.h" #include "deps.h"
-static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq); +static int local_db_read(const char *path, alpm_pkg_t *info, alpm_dbinfrq_t inforeq);
#define LAZY_LOAD(info, errret) \ do { \ if(!(pkg->infolevel & info)) { \ - local_db_read(pkg, info); \ + local_db_read(NULL, pkg, info); \ } \ } while(0)
+struct local_dbreader_t { + alpm_handle_t *handle; + alpm_pkg_t *(*getitem) (alpm_dbreader_t*); + DIR* dbdir; + const char *dbpath; + char finished; +};
/* Cache-specific accessor functions. These implementations allow for lazy * loading by the files backend when a data member is actually needed @@ -235,7 +242,7 @@ static int _cache_changelog_close(const alpm_pkg_t UNUSED *pkg, void *fp)
static int _cache_force_load(alpm_pkg_t *pkg) { - return local_db_read(pkg, INFRQ_ALL); + return local_db_read(NULL, pkg, INFRQ_ALL); }
@@ -376,9 +383,10 @@ static int local_db_populate(alpm_db_t *db) size_t est_count; int count = 0; struct stat buf; - struct dirent *ent = NULL; const char *dbpath; DIR *dbdir; + alpm_dbreader_t *reader; + alpm_pkg_t *pkg;
dbpath = _alpm_db_path(db); if(dbpath == NULL) { @@ -408,7 +416,6 @@ static int local_db_populate(alpm_db_t *db) while(readdir(dbdir) != NULL) { est_count++; } - rewinddir(dbdir); } if(est_count >= 2) { /* subtract the two extra pointers to get # of children */ @@ -418,35 +425,13 @@ static int local_db_populate(alpm_db_t *db) /* initialize hash at 50% full */ db->pkgcache = _alpm_pkghash_create(est_count * 2); if(db->pkgcache == NULL){ - closedir(dbdir); RET_ERR(db->handle, ALPM_ERR_MEMORY, -1); } + closedir(dbdir);
- while((ent = readdir(dbdir)) != NULL) { - const char *name = ent->d_name; - - alpm_pkg_t *pkg; - - if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { - continue; - } - if(!is_dir(dbpath, ent)) { - continue; - } - - pkg = _alpm_pkg_new(); - if(pkg == NULL) { - closedir(dbdir); - RET_ERR(db->handle, ALPM_ERR_MEMORY, -1); - } - /* split the db entry name */ - if(_alpm_splitname(name, &(pkg->name), &(pkg->version), - &(pkg->name_hash)) != 0) { - _alpm_log(db->handle, ALPM_LOG_ERROR, _("invalid name for database entry '%s'\n"), - name); - _alpm_pkg_free(pkg); - continue; - } + reader = alpm_open_dbdir(db->handle, dbpath); + while((pkg = reader->getitem(reader))) { + pkg->origin_data.db = db;
/* duplicated database entries are not allowed */ if(_alpm_pkghash_find(db->pkgcache, pkg->name)) { @@ -455,18 +440,6 @@ static int local_db_populate(alpm_db_t *db) continue; }
- pkg->origin = PKG_FROM_LOCALDB; - pkg->origin_data.db = db; - pkg->ops = &local_pkg_ops; - pkg->handle = db->handle; - - /* explicitly read with only 'BASE' data, accessors will handle the rest */ - if(local_db_read(pkg, INFRQ_BASE) == -1) { - _alpm_log(db->handle, ALPM_LOG_ERROR, _("corrupted database entry '%s'\n"), name); - _alpm_pkg_free(pkg); - continue; - } - /* add to the collection */ _alpm_log(db->handle, ALPM_LOG_FUNCTION, "adding '%s' to package cache for db '%s'\n", pkg->name, db->treename); @@ -474,7 +447,6 @@ static int local_db_populate(alpm_db_t *db) count++; }
- closedir(dbdir); if(count > 0) { db->pkgcache->list = alpm_list_msort(db->pkgcache->list, (size_t)count, _alpm_pkg_cmp); } @@ -516,13 +488,13 @@ static char *get_pkgpath(alpm_db_t *db, alpm_pkg_t *info) f = alpm_list_add(f, linedup); \ } while(1) /* note the while(1) and not (0) */
-static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) +static int local_db_read(const char* pkgpath, alpm_pkg_t *info, alpm_dbinfrq_t inforeq) { FILE *fp = NULL; char path[PATH_MAX]; char line[1024]; - char *pkgpath = NULL; - alpm_db_t *db = info->origin_data.db; + alpm_db_t *db = info->origin_data.db; + const char *treename = db ? db->treename : "NULL";
/* bitmask logic here: * infolevel: 00001111 @@ -533,18 +505,20 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) /* already loaded all of this info, do nothing */ return 0; } - _alpm_log(db->handle, ALPM_LOG_FUNCTION, "loading package data for %s : level=0x%x\n", + _alpm_log(info->handle, ALPM_LOG_FUNCTION, "loading package data for %s : level=0x%x\n", info->name, inforeq);
/* clear out 'line', to be certain - and to make valgrind happy */ memset(line, 0, sizeof(line));
- pkgpath = get_pkgpath(db, info); + if (pkgpath == NULL) { + pkgpath = get_pkgpath(db, info); + }
if(access(pkgpath, F_OK)) { /* directory doesn't exist or can't be opened */ - _alpm_log(db->handle, ALPM_LOG_DEBUG, "cannot find '%s-%s' in db '%s'\n", - info->name, info->version, db->treename); + _alpm_log(info->handle, ALPM_LOG_DEBUG, "cannot find '%s-%s' in db '%s'\n", + info->name, info->version, db ? treename : "NULL"); goto error; }
@@ -552,7 +526,7 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) if(inforeq & INFRQ_DESC && !(info->infolevel & INFRQ_DESC)) { snprintf(path, PATH_MAX, "%sdesc", pkgpath); if((fp = fopen(path, "r")) == NULL) { - _alpm_log(db->handle, ALPM_LOG_ERROR, _("could not open file %s: %s\n"), path, strerror(errno)); + _alpm_log(info->handle, ALPM_LOG_ERROR, _("could not open file %s: %s\n"), path, strerror(errno)); goto error; } while(!feof(fp)) { @@ -560,14 +534,14 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) if(strcmp(line, "%NAME%") == 0) { READ_NEXT(); if(strcmp(line, info->name) != 0) { - _alpm_log(db->handle, ALPM_LOG_ERROR, _("%s database is inconsistent: name " - "mismatch on package %s\n"), db->treename, info->name); + _alpm_log(info->handle, ALPM_LOG_ERROR, _("%s database is inconsistent: name " + "mismatch on package %s\n"), treename, info->name); } } else if(strcmp(line, "%VERSION%") == 0) { READ_NEXT(); if(strcmp(line, info->version) != 0) { - _alpm_log(db->handle, ALPM_LOG_ERROR, _("%s database is inconsistent: version " - "mismatch on package %s\n"), db->treename, info->name); + _alpm_log(info->handle, ALPM_LOG_ERROR, _("%s database is inconsistent: version " + "mismatch on package %s\n"), treename, info->name); } } else if(strcmp(line, "%DESC%") == 0) { READ_AND_STORE(info->desc); @@ -625,7 +599,7 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) if(inforeq & INFRQ_FILES && !(info->infolevel & INFRQ_FILES)) { snprintf(path, PATH_MAX, "%sfiles", pkgpath); if((fp = fopen(path, "r")) == NULL) { - _alpm_log(db->handle, ALPM_LOG_ERROR, _("could not open file %s: %s\n"), path, strerror(errno)); + _alpm_log(info->handle, ALPM_LOG_ERROR, _("could not open file %s: %s\n"), path, strerror(errno)); goto error; } while(fgets(line, sizeof(line), fp)) { @@ -664,11 +638,9 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq) /* internal */ info->infolevel |= inforeq;
- free(pkgpath); return 0;
error: - free(pkgpath); if(fp) { fclose(fp); } @@ -913,4 +885,96 @@ alpm_db_t *_alpm_db_register_local(alpm_handle_t *handle) return db; }
+static alpm_pkg_t *local_dbreader_getitem(struct local_dbreader_t *reader) { + struct dirent *ent = NULL; + reader->handle->pm_errno = 0; + if (reader->finished) { + /* iteration has already finished */ + RET_ERR(reader->handle, ALPM_ERR_WRONG_ARGS, NULL); + } + if (reader->dbdir == NULL) { + reader->finished = 1; + return NULL; + } + + while(1) { + ent = readdir(reader->dbdir); + if (ent == NULL) { + closedir(reader->dbdir); + reader->finished = 1; + return NULL; + } + const char *name = ent->d_name; + char fullpath[PATH_MAX]; + snprintf(fullpath, PATH_MAX, "%s/%s", reader->dbpath, name); + + alpm_pkg_t *pkg; + + if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { + continue; + } + if(!is_dir(reader->dbpath, ent)) { + continue; + } + + pkg = _alpm_pkg_new(); + if(pkg == NULL) { + closedir(reader->dbdir); + reader->finished = 1; + RET_ERR(reader->handle, ALPM_ERR_MEMORY, NULL); + } + pkg->handle = reader->handle; + /* split the db entry name */ + if(_alpm_splitname(name, &(pkg->name), &(pkg->version), + &(pkg->name_hash)) != 0) { + _alpm_log(reader->handle, ALPM_LOG_ERROR, _("invalid name for database entry '%s'\n"), + name); + _alpm_pkg_free(pkg); + continue; + } + + pkg->origin = PKG_FROM_LOCALDB; + pkg->origin_data.db = NULL; + pkg->ops = &local_pkg_ops; + + /* explicitly read with only 'BASE' data, accessors will handle the rest */ + if(local_db_read(fullpath, pkg, INFRQ_BASE) == -1) { + _alpm_log(reader->handle, ALPM_LOG_ERROR, _("corrupted database entry '%s'\n"), name); + _alpm_pkg_free(pkg); + continue; + } + return pkg; + } +} + +alpm_dbreader_t SYMEXPORT *alpm_open_dbdir(alpm_handle_t *handle, const char *dbpath) { + struct local_dbreader_t *reader = malloc(sizeof(struct local_dbreader_t)); + DIR *dbdir; + if(handle == NULL) { + return NULL; + } + if(reader == NULL) { + RET_ERR(handle, ALPM_ERR_MEMORY, NULL); + } + if(dbpath == NULL) { + RET_ERR(handle, ALPM_ERR_WRONG_ARGS, NULL); + } + + /* start reading */ + dbdir = opendir(dbpath); + if(dbdir == NULL) { + if(errno != ENOENT) { + /* no database existing yet is not an error */ + free(reader); + return NULL; + } + } + reader->getitem = &local_dbreader_getitem; + reader->handle = handle; + reader->dbdir = dbdir; + reader->dbpath = dbpath; + reader->finished = 0; + return (alpm_dbreader_t*)reader; +} + /* vim: set ts=2 sw=2 noet: */ diff --git a/lib/libalpm/db.h b/lib/libalpm/db.h index a950130..3ce5889 100644 --- a/lib/libalpm/db.h +++ b/lib/libalpm/db.h @@ -56,6 +56,11 @@ struct db_operations { void (*unregister) (alpm_db_t *); };
+struct __alpm_dbreader_t { + alpm_handle_t *handle; + alpm_pkg_t *(*getitem) (alpm_dbreader_t*); +}; + /* Database */ struct __alpm_db_t { alpm_handle_t *handle;
-- Rémy.
On 2011/7/16 Dan McGee <dpmcgee@gmail.com> wrote:
On Sat, Jul 16, 2011 at 11:32 AM, Rémy Oudompheng <remyoudompheng@gmail.com> wrote:
Hello,
I am thinking an iterator-like interface for database reading could be useful. The most obvious example use is the pkgfile functionality. I don't disagree with the thinking.
It could be a convenient tool to do one-pass work on databases without loading them completely in memory, which can be impossible for databases with files info (I don't think everyone can load the whole [community] database using libalpm standard structures). Have you analyzed the heap using massif to see what is going on here? I doubt we break the ~65 MB mark (with one database), which is big, but not that ridiculous.
I guess the implementation below seems rather hacky to me, as it breaks at least two invariants we have: * Local DB entries are never loaded more than once
The goal here is not to change the behaviour of pacman, but to factorise the code so that it can be reused in other ways: the iterator implementation need not be public, if the interface exposes fixed usage patterns, e.g. mapping a function over all pmpkg_t.
* The caller is responsible for freeing the package (I think?)
The idea was that it would be used inside libalpm for two things: * loading the database * mapping a function over all packages without actually loading the database. So that there are two implementation patterns: * the iterator frees the previous package at each iteration (and the last package at the final iteration) * the iterator is private and the only public interface is a map() thing. -- Rémy.
On Sun, Jul 17, 2011 at 12:32 AM, Rémy Oudompheng <remyoudompheng@gmail.com> wrote:
Hello,
I am thinking an iterator-like interface for database reading could be useful. The most obvious example use is the pkgfile functionality. It could be a convenient tool to do one-pass work on databases without loading them completely in memory, which can be impossible for databases with files info (I don't think everyone can load the whole [community] database using libalpm standard structures).
Here's a patch that roughly shows what I am thinking about: only the code for the "local" backend is shown.
diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index 6e1e4bc..986e9a1 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h
[...]
+struct local_dbreader_t { + alpm_handle_t *handle; + alpm_pkg_t *(*getitem) (alpm_dbreader_t*); + DIR* dbdir; + const char *dbpath; + char finished; +};
How about refactoring this even more into a generic alpm_iterator_t? Something like: struct alpm_iterator_t { void *(*peek)(void *context); void *(*next)(void *context); void *context; }; The context member would reference something similar to your local_dbreader_t structure (just without the getitem member) which contains all the state, and gets passed into peek() and next(). Then you'd have a consistent API for any iterators alpm might use: void *alpm_iter_peek(struct alpm_iterator_t *iter) { // <insert sanity checks> return iter->peek(iter->context); } void *alpm_iter_next(struct alpm_iterator_t *iter) { // <insert sanity checks> return iter->next(iter->context); } struct *alpm_iter_t db_iter = alpm_open_dbdir(db->handle, dbpath); // or alpm_db_iter()? pmpkg_t *pkg; pkg = alpm_iter_peek(db_iter); // NULL pkg = alpm_iter_next(db_iter); // a pmpkg_t* pkg = alpm_iter_peek(db_iter); // same pkmpkg_t* as above
participants (3)
-
Dan McGee
-
Rémy Oudompheng
-
Sebastian Nowicki