[pacman-dev] [PATCH 1/2] Make sync DB reading a bit more flexible

Dan McGee dan at archlinux.org
Fri Jun 24 01:18:46 EDT 2011


We can reorganize things a bit to not require reading a directory-only entry
first (or at all). This was noticed while working on some pactest
improvements, but should be a good step forward anyway.

Also make _alpm_splitname() a bit more generic in where it stores the data it
parses.

Signed-off-by: Dan McGee <dan at archlinux.org>
---
 lib/libalpm/be_local.c |    3 +-
 lib/libalpm/be_sync.c  |  130 ++++++++++++++++++++++++-----------------------
 lib/libalpm/util.c     |   46 ++++++++++-------
 lib/libalpm/util.h     |    3 +-
 4 files changed, 97 insertions(+), 85 deletions(-)

diff --git a/lib/libalpm/be_local.c b/lib/libalpm/be_local.c
index 4b2a301..9e4de54 100644
--- a/lib/libalpm/be_local.c
+++ b/lib/libalpm/be_local.c
@@ -382,7 +382,8 @@ static int local_db_populate(pmdb_t *db)
 			RET_ERR(db->handle, PM_ERR_MEMORY, -1);
 		}
 		/* split the db entry name */
-		if(_alpm_splitname(name, pkg) != 0) {
+		if(_alpm_splitname(name, &(pkg->name), &(pkg->version),
+					&(pkg->name_hash)) != 0) {
 			_alpm_log(db->handle, PM_LOG_ERROR, _("invalid name for database entry '%s'\n"),
 					name);
 			_alpm_pkg_free(pkg);
diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c
index f0b1736..6d1ab39 100644
--- a/lib/libalpm/be_sync.c
+++ b/lib/libalpm/be_sync.c
@@ -182,7 +182,61 @@ cleanup:
 
 /* Forward decl so I don't reorganize the whole file right now */
 static int sync_db_read(pmdb_t *db, struct archive *archive,
-		struct archive_entry *entry, pmpkg_t *likely_pkg);
+		struct archive_entry *entry, pmpkg_t **likely_pkg);
+
+static pmpkg_t *load_pkg_for_entry(pmdb_t *db, const char *entryname,
+		const char **entry_filename, pmpkg_t *likely_pkg)
+{
+	char *pkgname = NULL, *pkgver = NULL;
+	unsigned long pkgname_hash;
+	pmpkg_t *pkg;
+
+	/* get package and db file names */
+	if(entry_filename) {
+		char *fname = strrchr(entryname, '/');
+		if(fname) {
+			*entry_filename = fname + 1;
+		} else {
+			*entry_filename = NULL;
+		}
+	}
+	if(_alpm_splitname(entryname, &pkgname, &pkgver, &pkgname_hash) != 0) {
+		_alpm_log(db->handle, PM_LOG_ERROR,
+				_("invalid name for database entry '%s'\n"), entryname);
+		return NULL;
+	}
+
+	if(likely_pkg && strcmp(likely_pkg->name, pkgname) == 0) {
+		pkg = likely_pkg;
+	} else {
+		pkg = _alpm_pkghash_find(db->pkgcache, pkgname);
+	}
+	if(pkg == NULL) {
+		pkg = _alpm_pkg_new();
+		if(pkg == NULL) {
+			RET_ERR(db->handle, PM_ERR_MEMORY, NULL);
+		}
+
+		pkg->name = pkgname;
+		pkg->version = pkgver;
+		pkg->name_hash = pkgname_hash;
+
+		pkg->origin = PKG_FROM_SYNCDB;
+		pkg->origin_data.db = db;
+		pkg->ops = &default_pkg_ops;
+		pkg->handle = db->handle;
+
+		/* add to the collection */
+		_alpm_log(db->handle, PM_LOG_FUNCTION, "adding '%s' to package cache for db '%s'\n",
+				pkg->name, db->treename);
+		db->pkgcache = _alpm_pkghash_add(db->pkgcache, pkg);
+	} else {
+		free(pkgname);
+		free(pkgver);
+	}
+
+	return pkg;
+}
 
 /*
  * This is the data table used to generate the estimating function below.
@@ -292,55 +346,20 @@ static int sync_db_populate(pmdb_t *db)
 		st = archive_entry_stat(entry);
 
 		if(S_ISDIR(st->st_mode)) {
-			const char *name;
-
-			pkg = _alpm_pkg_new();
-			if(pkg == NULL) {
-				archive_read_finish(archive);
-				RET_ERR(db->handle, PM_ERR_MEMORY, -1);
-			}
-
-			name = archive_entry_pathname(entry);
-
-			if(_alpm_splitname(name, pkg) != 0) {
-				_alpm_log(db->handle, PM_LOG_ERROR, _("invalid name for database entry '%s'\n"),
-						name);
-				_alpm_pkg_free(pkg);
-				pkg = NULL;
-				continue;
-			}
-
-			/* duplicated database entries are not allowed */
-			if(_alpm_pkghash_find(db->pkgcache, pkg->name)) {
-				_alpm_log(db->handle, PM_LOG_ERROR, _("duplicated database entry '%s'\n"), pkg->name);
-				_alpm_pkg_free(pkg);
-				pkg = NULL;
-				continue;
-			}
-
-			pkg->origin = PKG_FROM_SYNCDB;
-			pkg->origin_data.db = db;
-			pkg->ops = &default_pkg_ops;
-			pkg->handle = db->handle;
-
-			/* add to the collection */
-			_alpm_log(db->handle, PM_LOG_FUNCTION, "adding '%s' to package cache for db '%s'\n",
-					pkg->name, db->treename);
-			db->pkgcache = _alpm_pkghash_add(db->pkgcache, pkg);
-			count++;
+			continue;
 		} else {
 			/* we have desc, depends or deltas - parse it */
-			if(sync_db_read(db, archive, entry, pkg) != 0) {
+			if(sync_db_read(db, archive, entry, &pkg) != 0) {
 				_alpm_log(db->handle, PM_LOG_ERROR,
 						_("could not parse package '%s' description file from db '%s'\n"),
 						pkg->name, db->treename);
-				_alpm_pkg_free(pkg);
-				pkg = NULL;
 				continue;
 			}
 		}
 	}
 
+	count = alpm_list_count(db->pkgcache->list);
+
 	if(count > 0) {
 		db->pkgcache->list = alpm_list_msort(db->pkgcache->list, (size_t)count, _alpm_pkg_cmp);
 	}
@@ -370,10 +389,9 @@ static int sync_db_populate(pmdb_t *db)
 } while(1) /* note the while(1) and not (0) */
 
 static int sync_db_read(pmdb_t *db, struct archive *archive,
-		struct archive_entry *entry, pmpkg_t *likely_pkg)
+		struct archive_entry *entry, pmpkg_t **likely_pkg)
 {
 	const char *entryname, *filename;
-	char *pkgname, *p, *q;
 	pmpkg_t *pkg;
 	struct archive_read_buffer buf;
 
@@ -391,27 +409,12 @@ static int sync_db_read(pmdb_t *db, struct archive *archive,
 	/* 512K for a line length seems reasonable */
 	buf.max_line_size = 512 * 1024;
 
-	/* get package and db file names */
-	STRDUP(pkgname, entryname, RET_ERR(db->handle, PM_ERR_MEMORY, -1));
-	p = pkgname + strlen(pkgname);
-	for(q = --p; *q && *q != '/'; q--);
-	filename = q + 1;
-	for(p = --q; *p && *p != '-'; p--);
-	for(q = --p; *q && *q != '-'; q--);
-	*q = '\0';
-
-	/* package is already in db due to parsing of directory name */
-	if(likely_pkg && strcmp(likely_pkg->name, pkgname) == 0) {
-		pkg = likely_pkg;
-	} else {
-		if(db->pkgcache == NULL) {
-			RET_ERR(db->handle, PM_ERR_MEMORY, -1);
-		}
-		pkg = _alpm_pkghash_find(db->pkgcache, pkgname);
-	}
+	pkg = load_pkg_for_entry(db, entryname, &filename, *likely_pkg);
+
 	if(pkg == NULL) {
-		_alpm_log(db->handle, PM_LOG_DEBUG, "package %s not found in %s sync database",
-					pkgname, db->treename);
+		_alpm_log(db->handle, PM_LOG_DEBUG,
+				"entry %s could not be loaded into %s sync database",
+				entryname, db->treename);
 		return -1;
 	}
 
@@ -498,6 +501,7 @@ static int sync_db_read(pmdb_t *db, struct archive *archive,
 		if(ret != ARCHIVE_EOF) {
 			goto error;
 		}
+		*likely_pkg = pkg;
 	} else if(strcmp(filename, "files") == 0) {
 		/* currently do nothing with this file */
 	} else {
@@ -505,12 +509,10 @@ static int sync_db_read(pmdb_t *db, struct archive *archive,
 		_alpm_log(db->handle, PM_LOG_DEBUG, "unknown database file: %s\n", filename);
 	}
 
-	FREE(pkgname);
 	return 0;
 
 error:
 	_alpm_log(db->handle, PM_LOG_DEBUG, "error parsing database file: %s\n", filename);
-	FREE(pkgname);
 	return -1;
 }
 
diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c
index 4976703..e56c9f2 100644
--- a/lib/libalpm/util.c
+++ b/lib/libalpm/util.c
@@ -825,46 +825,54 @@ cleanup:
 	}
 }
 
-int _alpm_splitname(const char *target, pmpkg_t *pkg)
+int _alpm_splitname(const char *target, char **name, char **version,
+		unsigned long *name_hash)
 {
 	/* the format of a db entry is as follows:
 	 *    package-version-rel/
+	 *    package-version-rel/desc (we ignore the filename portion)
 	 * package name can contain hyphens, so parse from the back- go back
 	 * two hyphens and we have split the version from the name.
 	 */
-	const char *version, *end;
+	const char *pkgver, *end;
 
-	if(target == NULL || pkg == NULL) {
+	if(target == NULL) {
 		return -1;
 	}
-	end = target + strlen(target);
 
-	/* remove any trailing '/' */
-	while(*(end - 1) == '/') {
-	  --end;
+	/* remove anything trailing a '/' */
+	end = strchr(target, '/');
+	if(!end) {
+		end = target + strlen(target);
 	}
 
 	/* do the magic parsing- find the beginning of the version string
 	 * by doing two iterations of same loop to lop off two hyphens */
-	for(version = end - 1; *version && *version != '-'; version--);
-	for(version = version - 1; *version && *version != '-'; version--);
-	if(*version != '-' || version == target) {
+	for(pkgver = end - 1; *pkgver && *pkgver != '-'; pkgver--);
+	for(pkgver = pkgver - 1; *pkgver && *pkgver != '-'; pkgver--);
+	if(*pkgver != '-' || pkgver == target) {
 		return -1;
 	}
 
 	/* copy into fields and return */
-	if(pkg->version) {
-		FREE(pkg->version);
+	if(version) {
+		if(*version) {
+			FREE(*version);
+		}
+		/* version actually points to the dash, so need to increment 1 and account
+		 * for potential end character */
+		STRNDUP(*version, pkgver + 1, end - pkgver - 1, return -1);
 	}
-	/* version actually points to the dash, so need to increment 1 and account
-	 * for potential end character */
-	STRNDUP(pkg->version, version + 1, end - version - 1, return -1);
 
-	if(pkg->name) {
-		FREE(pkg->name);
+	if(name) {
+		if(*name) {
+			FREE(*name);
+		}
+		STRNDUP(*name, target, pkgver - target, return -1);
+		if(name_hash) {
+			*name_hash = _alpm_hash_sdbm(*name);
+		}
 	}
-	STRNDUP(pkg->name, target, version - target, return -1);
-	pkg->name_hash = _alpm_hash_sdbm(pkg->name);
 
 	return 0;
 }
diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h
index 778e20f..c68b07b 100644
--- a/lib/libalpm/util.h
+++ b/lib/libalpm/util.h
@@ -109,7 +109,8 @@ const char *_alpm_filecache_setup(pmhandle_t *handle);
 int _alpm_lstat(const char *path, struct stat *buf);
 int _alpm_test_md5sum(const char *filepath, const char *md5sum);
 int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b);
-int _alpm_splitname(const char *target, pmpkg_t *pkg);
+int _alpm_splitname(const char *target, char **name, char **version,
+		unsigned long *name_hash);
 unsigned long _alpm_hash_sdbm(const char *str);
 long _alpm_parsedate(const char *line);
 
-- 
1.7.5.4



More information about the pacman-dev mailing list