[pacman-dev] [PATCH 1/2] Make sync DB reading a bit more flexible
We can reorganize things a bit to not require reading a directory-only entry first (or at all). This was noticed while working on some pactest improvements, but should be a good step forward anyway. Also make _alpm_splitname() a bit more generic in where it stores the data it parses. Signed-off-by: Dan McGee <dan@archlinux.org> --- lib/libalpm/be_local.c | 3 +- lib/libalpm/be_sync.c | 130 ++++++++++++++++++++++++----------------------- lib/libalpm/util.c | 46 ++++++++++------- lib/libalpm/util.h | 3 +- 4 files changed, 97 insertions(+), 85 deletions(-) diff --git a/lib/libalpm/be_local.c b/lib/libalpm/be_local.c index 4b2a301..9e4de54 100644 --- a/lib/libalpm/be_local.c +++ b/lib/libalpm/be_local.c @@ -382,7 +382,8 @@ static int local_db_populate(pmdb_t *db) RET_ERR(db->handle, PM_ERR_MEMORY, -1); } /* split the db entry name */ - if(_alpm_splitname(name, pkg) != 0) { + if(_alpm_splitname(name, &(pkg->name), &(pkg->version), + &(pkg->name_hash)) != 0) { _alpm_log(db->handle, PM_LOG_ERROR, _("invalid name for database entry '%s'\n"), name); _alpm_pkg_free(pkg); diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index f0b1736..6d1ab39 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -182,7 +182,61 @@ cleanup: /* Forward decl so I don't reorganize the whole file right now */ static int sync_db_read(pmdb_t *db, struct archive *archive, - struct archive_entry *entry, pmpkg_t *likely_pkg); + struct archive_entry *entry, pmpkg_t **likely_pkg); + +static pmpkg_t *load_pkg_for_entry(pmdb_t *db, const char *entryname, + const char **entry_filename, pmpkg_t *likely_pkg) +{ + char *pkgname = NULL, *pkgver = NULL; + unsigned long pkgname_hash; + pmpkg_t *pkg; + + /* get package and db file names */ + if(entry_filename) { + char *fname = strrchr(entryname, '/'); + if(fname) { + *entry_filename = fname + 1; + } else { + *entry_filename = NULL; + } + } + if(_alpm_splitname(entryname, &pkgname, &pkgver, &pkgname_hash) != 0) { + _alpm_log(db->handle, PM_LOG_ERROR, + _("invalid name for database entry '%s'\n"), entryname); + return NULL; + } + + if(likely_pkg && strcmp(likely_pkg->name, pkgname) == 0) { + pkg = likely_pkg; + } else { + pkg = _alpm_pkghash_find(db->pkgcache, pkgname); + } + if(pkg == NULL) { + pkg = _alpm_pkg_new(); + if(pkg == NULL) { + RET_ERR(db->handle, PM_ERR_MEMORY, NULL); + } + + pkg->name = pkgname; + pkg->version = pkgver; + pkg->name_hash = pkgname_hash; + + pkg->origin = PKG_FROM_SYNCDB; + pkg->origin_data.db = db; + pkg->ops = &default_pkg_ops; + pkg->handle = db->handle; + + /* add to the collection */ + _alpm_log(db->handle, PM_LOG_FUNCTION, "adding '%s' to package cache for db '%s'\n", + pkg->name, db->treename); + db->pkgcache = _alpm_pkghash_add(db->pkgcache, pkg); + } else { + free(pkgname); + free(pkgver); + } + + return pkg; +} /* * This is the data table used to generate the estimating function below. @@ -292,55 +346,20 @@ static int sync_db_populate(pmdb_t *db) st = archive_entry_stat(entry); if(S_ISDIR(st->st_mode)) { - const char *name; - - pkg = _alpm_pkg_new(); - if(pkg == NULL) { - archive_read_finish(archive); - RET_ERR(db->handle, PM_ERR_MEMORY, -1); - } - - name = archive_entry_pathname(entry); - - if(_alpm_splitname(name, pkg) != 0) { - _alpm_log(db->handle, PM_LOG_ERROR, _("invalid name for database entry '%s'\n"), - name); - _alpm_pkg_free(pkg); - pkg = NULL; - continue; - } - - /* duplicated database entries are not allowed */ - if(_alpm_pkghash_find(db->pkgcache, pkg->name)) { - _alpm_log(db->handle, PM_LOG_ERROR, _("duplicated database entry '%s'\n"), pkg->name); - _alpm_pkg_free(pkg); - pkg = NULL; - continue; - } - - pkg->origin = PKG_FROM_SYNCDB; - pkg->origin_data.db = db; - pkg->ops = &default_pkg_ops; - pkg->handle = db->handle; - - /* add to the collection */ - _alpm_log(db->handle, PM_LOG_FUNCTION, "adding '%s' to package cache for db '%s'\n", - pkg->name, db->treename); - db->pkgcache = _alpm_pkghash_add(db->pkgcache, pkg); - count++; + continue; } else { /* we have desc, depends or deltas - parse it */ - if(sync_db_read(db, archive, entry, pkg) != 0) { + if(sync_db_read(db, archive, entry, &pkg) != 0) { _alpm_log(db->handle, PM_LOG_ERROR, _("could not parse package '%s' description file from db '%s'\n"), pkg->name, db->treename); - _alpm_pkg_free(pkg); - pkg = NULL; continue; } } } + count = alpm_list_count(db->pkgcache->list); + if(count > 0) { db->pkgcache->list = alpm_list_msort(db->pkgcache->list, (size_t)count, _alpm_pkg_cmp); } @@ -370,10 +389,9 @@ static int sync_db_populate(pmdb_t *db) } while(1) /* note the while(1) and not (0) */ static int sync_db_read(pmdb_t *db, struct archive *archive, - struct archive_entry *entry, pmpkg_t *likely_pkg) + struct archive_entry *entry, pmpkg_t **likely_pkg) { const char *entryname, *filename; - char *pkgname, *p, *q; pmpkg_t *pkg; struct archive_read_buffer buf; @@ -391,27 +409,12 @@ static int sync_db_read(pmdb_t *db, struct archive *archive, /* 512K for a line length seems reasonable */ buf.max_line_size = 512 * 1024; - /* get package and db file names */ - STRDUP(pkgname, entryname, RET_ERR(db->handle, PM_ERR_MEMORY, -1)); - p = pkgname + strlen(pkgname); - for(q = --p; *q && *q != '/'; q--); - filename = q + 1; - for(p = --q; *p && *p != '-'; p--); - for(q = --p; *q && *q != '-'; q--); - *q = '\0'; - - /* package is already in db due to parsing of directory name */ - if(likely_pkg && strcmp(likely_pkg->name, pkgname) == 0) { - pkg = likely_pkg; - } else { - if(db->pkgcache == NULL) { - RET_ERR(db->handle, PM_ERR_MEMORY, -1); - } - pkg = _alpm_pkghash_find(db->pkgcache, pkgname); - } + pkg = load_pkg_for_entry(db, entryname, &filename, *likely_pkg); + if(pkg == NULL) { - _alpm_log(db->handle, PM_LOG_DEBUG, "package %s not found in %s sync database", - pkgname, db->treename); + _alpm_log(db->handle, PM_LOG_DEBUG, + "entry %s could not be loaded into %s sync database", + entryname, db->treename); return -1; } @@ -498,6 +501,7 @@ static int sync_db_read(pmdb_t *db, struct archive *archive, if(ret != ARCHIVE_EOF) { goto error; } + *likely_pkg = pkg; } else if(strcmp(filename, "files") == 0) { /* currently do nothing with this file */ } else { @@ -505,12 +509,10 @@ static int sync_db_read(pmdb_t *db, struct archive *archive, _alpm_log(db->handle, PM_LOG_DEBUG, "unknown database file: %s\n", filename); } - FREE(pkgname); return 0; error: _alpm_log(db->handle, PM_LOG_DEBUG, "error parsing database file: %s\n", filename); - FREE(pkgname); return -1; } diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c index 4976703..e56c9f2 100644 --- a/lib/libalpm/util.c +++ b/lib/libalpm/util.c @@ -825,46 +825,54 @@ cleanup: } } -int _alpm_splitname(const char *target, pmpkg_t *pkg) +int _alpm_splitname(const char *target, char **name, char **version, + unsigned long *name_hash) { /* the format of a db entry is as follows: * package-version-rel/ + * package-version-rel/desc (we ignore the filename portion) * package name can contain hyphens, so parse from the back- go back * two hyphens and we have split the version from the name. */ - const char *version, *end; + const char *pkgver, *end; - if(target == NULL || pkg == NULL) { + if(target == NULL) { return -1; } - end = target + strlen(target); - /* remove any trailing '/' */ - while(*(end - 1) == '/') { - --end; + /* remove anything trailing a '/' */ + end = strchr(target, '/'); + if(!end) { + end = target + strlen(target); } /* do the magic parsing- find the beginning of the version string * by doing two iterations of same loop to lop off two hyphens */ - for(version = end - 1; *version && *version != '-'; version--); - for(version = version - 1; *version && *version != '-'; version--); - if(*version != '-' || version == target) { + for(pkgver = end - 1; *pkgver && *pkgver != '-'; pkgver--); + for(pkgver = pkgver - 1; *pkgver && *pkgver != '-'; pkgver--); + if(*pkgver != '-' || pkgver == target) { return -1; } /* copy into fields and return */ - if(pkg->version) { - FREE(pkg->version); + if(version) { + if(*version) { + FREE(*version); + } + /* version actually points to the dash, so need to increment 1 and account + * for potential end character */ + STRNDUP(*version, pkgver + 1, end - pkgver - 1, return -1); } - /* version actually points to the dash, so need to increment 1 and account - * for potential end character */ - STRNDUP(pkg->version, version + 1, end - version - 1, return -1); - if(pkg->name) { - FREE(pkg->name); + if(name) { + if(*name) { + FREE(*name); + } + STRNDUP(*name, target, pkgver - target, return -1); + if(name_hash) { + *name_hash = _alpm_hash_sdbm(*name); + } } - STRNDUP(pkg->name, target, version - target, return -1); - pkg->name_hash = _alpm_hash_sdbm(pkg->name); return 0; } diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h index 778e20f..c68b07b 100644 --- a/lib/libalpm/util.h +++ b/lib/libalpm/util.h @@ -109,7 +109,8 @@ const char *_alpm_filecache_setup(pmhandle_t *handle); int _alpm_lstat(const char *path, struct stat *buf); int _alpm_test_md5sum(const char *filepath, const char *md5sum); int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b); -int _alpm_splitname(const char *target, pmpkg_t *pkg); +int _alpm_splitname(const char *target, char **name, char **version, + unsigned long *name_hash); unsigned long _alpm_hash_sdbm(const char *str); long _alpm_parsedate(const char *line); -- 1.7.5.4
Discovered this when doing some pactest rewrite work to generate archives in memory only. If a sync database file or PKGINFO file is missing a newline on the final line, the text from that line gets tossed aside and never read into the package struct. This is pretty critical when that last line is a depend or something. Signed-off-by: Dan McGee <dan@archlinux.org> --- lib/libalpm/util.c | 18 ++++++++++++------ 1 files changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c index e56c9f2..7bd4542 100644 --- a/lib/libalpm/util.c +++ b/lib/libalpm/util.c @@ -760,9 +760,8 @@ int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b) &b->block_size, &offset); b->block_offset = b->block; - /* error or end of archive with no data read, cleanup */ - if(b->ret < ARCHIVE_OK || - (b->block_size == 0 && b->ret == ARCHIVE_EOF)) { + /* error, cleanup */ + if(b->ret < ARCHIVE_OK) { goto cleanup; } } @@ -779,19 +778,20 @@ int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b) /* allocate our buffer, or ensure our existing one is big enough */ if(!b->line) { /* set the initial buffer to the read block_size */ - CALLOC(b->line, b->block_size + 1, sizeof(char), return ENOMEM); + CALLOC(b->line, b->block_size + 1, sizeof(char), b->ret = -ENOMEM; goto cleanup); b->line_size = b->block_size + 1; b->line_offset = b->line; } else { size_t needed = (size_t)((b->line_offset - b->line) + (i - b->block_offset) + 1); if(needed > b->max_line_size) { - return ERANGE; + b->ret = -ERANGE; + goto cleanup; } if(needed > b->line_size) { /* need to realloc + copy data to fit total length */ char *new; - CALLOC(new, needed, sizeof(char), return ENOMEM); + CALLOC(new, needed, sizeof(char), b->ret = -ENOMEM; goto cleanup); memcpy(new, b->line, b->line_size); b->line_size = needed; b->line_offset = new + (b->line_offset - b->line); @@ -813,6 +813,12 @@ int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b) memcpy(b->line_offset, b->block_offset, len); b->line_offset += len; b->block_offset = i; + /* there was no new data, return what is left; saved ARCHIVE_EOF will be + * returned on next call */ + if(len == 0) { + b->line_offset[0] = '\0'; + return ARCHIVE_OK; + } } } -- 1.7.5.4
On 24/06/11 15:18, Dan McGee wrote:
Discovered this when doing some pactest rewrite work to generate archives in memory only. If a sync database file or PKGINFO file is missing a newline on the final line, the text from that line gets tossed aside and never read into the package struct. This is pretty critical when that last line is a depend or something.
Signed-off-by: Dan McGee<dan@archlinux.org>
Signed-off-by: Allan
On 24/06/11 15:18, Dan McGee wrote:
We can reorganize things a bit to not require reading a directory-only entry first (or at all). This was noticed while working on some pactest improvements, but should be a good step forward anyway.
Also make _alpm_splitname() a bit more generic in where it stores the data it parses.
Signed-off-by: Dan McGee<dan@archlinux.org>
Signed-off-by: Allan with minor query below.
--- lib/libalpm/be_local.c | 3 +- lib/libalpm/be_sync.c | 130 ++++++++++++++++++++++++----------------------- lib/libalpm/util.c | 46 ++++++++++------- lib/libalpm/util.h | 3 +- 4 files changed, 97 insertions(+), 85 deletions(-)
<snip> All the be_sync.c stuff looks fine to me.
diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c index 4976703..e56c9f2 100644 --- a/lib/libalpm/util.c +++ b/lib/libalpm/util.c @@ -825,46 +825,54 @@ cleanup: } }
-int _alpm_splitname(const char *target, pmpkg_t *pkg) +int _alpm_splitname(const char *target, char **name, char **version, + unsigned long *name_hash) { /* the format of a db entry is as follows: * package-version-rel/ + * package-version-rel/desc (we ignore the filename portion) * package name can contain hyphens, so parse from the back- go back * two hyphens and we have split the version from the name. */ - const char *version, *end; + const char *pkgver, *end;
- if(target == NULL || pkg == NULL) { + if(target == NULL) { return -1; }
Should we check name/version/name_hash here for being NULL and abort similar to what was done for pkg previously? Or could it be of use for this function to be called with those as NULL?
On Mon, Jun 27, 2011 at 4:36 AM, Allan McRae <allan@archlinux.org> wrote: > On 24/06/11 15:18, Dan McGee wrote: >> >> We can reorganize things a bit to not require reading a directory-only >> entry >> first (or at all). This was noticed while working on some pactest >> improvements, but should be a good step forward anyway. >> >> Also make _alpm_splitname() a bit more generic in where it stores the data >> it >> parses. >> >> Signed-off-by: Dan McGee<dan@archlinux.org> > > Signed-off-by: Allan with minor query below. > >> --- >> lib/libalpm/be_local.c | 3 +- >> lib/libalpm/be_sync.c | 130 >> ++++++++++++++++++++++++----------------------- >> lib/libalpm/util.c | 46 ++++++++++------- >> lib/libalpm/util.h | 3 +- >> 4 files changed, 97 insertions(+), 85 deletions(-) >> > > <snip> > All the be_sync.c stuff looks fine to me. > >> >> diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c >> index 4976703..e56c9f2 100644 >> --- a/lib/libalpm/util.c >> +++ b/lib/libalpm/util.c >> @@ -825,46 +825,54 @@ cleanup: >> } >> } >> >> -int _alpm_splitname(const char *target, pmpkg_t *pkg) >> +int _alpm_splitname(const char *target, char **name, char **version, >> + unsigned long *name_hash) >> { >> /* the format of a db entry is as follows: >> * package-version-rel/ >> + * package-version-rel/desc (we ignore the filename portion) >> * package name can contain hyphens, so parse from the back- go >> back >> * two hyphens and we have split the version from the name. >> */ >> - const char *version, *end; >> + const char *pkgver, *end; >> >> - if(target == NULL || pkg == NULL) { >> + if(target == NULL) { >> return -1; >> } > > Should we check name/version/name_hash here for being NULL and abort similar > to what was done for pkg previously? Or could it be of use for this > function to be called with those as NULL? I didn't null check here because 1) Look later in the function; we only set version and name if they are non-NULL 2) in reality, the current two callers guarantee non-NULL for those arguments, so even that might be overkill. -Dan
On 27/06/11 22:04, Dan McGee wrote: > On Mon, Jun 27, 2011 at 4:36 AM, Allan McRae<allan@archlinux.org> wrote: >> On 24/06/11 15:18, Dan McGee wrote: >>> >>> We can reorganize things a bit to not require reading a directory-only >>> entry >>> first (or at all). This was noticed while working on some pactest >>> improvements, but should be a good step forward anyway. >>> >>> Also make _alpm_splitname() a bit more generic in where it stores the data >>> it >>> parses. >>> >>> Signed-off-by: Dan McGee<dan@archlinux.org> >> >> Signed-off-by: Allan with minor query below. >> >>> --- >>> lib/libalpm/be_local.c | 3 +- >>> lib/libalpm/be_sync.c | 130 >>> ++++++++++++++++++++++++----------------------- >>> lib/libalpm/util.c | 46 ++++++++++------- >>> lib/libalpm/util.h | 3 +- >>> 4 files changed, 97 insertions(+), 85 deletions(-) >>> >> >> <snip> >> All the be_sync.c stuff looks fine to me. >> >>> >>> diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c >>> index 4976703..e56c9f2 100644 >>> --- a/lib/libalpm/util.c >>> +++ b/lib/libalpm/util.c >>> @@ -825,46 +825,54 @@ cleanup: >>> } >>> } >>> >>> -int _alpm_splitname(const char *target, pmpkg_t *pkg) >>> +int _alpm_splitname(const char *target, char **name, char **version, >>> + unsigned long *name_hash) >>> { >>> /* the format of a db entry is as follows: >>> * package-version-rel/ >>> + * package-version-rel/desc (we ignore the filename portion) >>> * package name can contain hyphens, so parse from the back- go >>> back >>> * two hyphens and we have split the version from the name. >>> */ >>> - const char *version, *end; >>> + const char *pkgver, *end; >>> >>> - if(target == NULL || pkg == NULL) { >>> + if(target == NULL) { >>> return -1; >>> } >> >> Should we check name/version/name_hash here for being NULL and abort similar >> to what was done for pkg previously? Or could it be of use for this >> function to be called with those as NULL? > I didn't null check here because > 1) Look later in the function; we only set version and name if they are non-NULL > 2) in reality, the current two callers guarantee non-NULL for those > arguments, so even that might be overkill. Sure. If they were checks for NULL there, then checks later in the function could obviously be removed. But I suppose that one day we might like to use this just to get the name so version and name_hash could be passed as NULL. So ack from me. BTW, the patch needed a minor rebase to apply to the current head. That is done on my working branch. Allan
participants (3)
-
Allan McRae
-
Dan McGee
-
Dan McGee