[pacman-dev] [PATCHv2 5/5] be_package: Build the file list from MTREE if possible

Florian Pritz bluewind at xinu.at
Tue Jan 28 05:14:06 EST 2014


This greatly speeds up file list generation times by avoiding
uncompressing the whole package.

pacman -S base with a deliberate file conflict:
before: 9.1 seconds
after:  2.2 seconds

Signed-off-by: Florian Pritz <bluewind at xinu.at>
---

v2: Adjust for v2 of realloc patch.

 lib/libalpm/be_package.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 151 insertions(+), 2 deletions(-)

diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c
index 6ef84fc..d876967 100644
--- a/lib/libalpm/be_package.c
+++ b/lib/libalpm/be_package.c
@@ -380,14 +380,34 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, size_t *files_size, struct a
 {
 	const size_t files_count = pkg->files.count;
 	alpm_file_t *current_file;
+	mode_t type;
+	size_t pathlen;
 	const char *path;
 
 	if(!_alpm_realloc((void **)&pkg->files.files, files_size, (files_count + 1) * sizeof(alpm_file_t))) {
 		return -1;
 	}
 
+	type = archive_entry_filetype(entry);
 	path = archive_entry_pathname(entry);
 
+	pathlen = strlen(path);
+
+	/* mtree paths don't contain a tailing slash, those we get from
+	 * the archive directly do (expensive way)
+	 * Other code relies on it to detect directories so add it here.*/
+	if(type == AE_IFDIR && path[pathlen - 1] != '/') {
+		/* 2 = 1 for / + 1 for \0 */
+		char *newpath = malloc(pathlen + 2);
+		if (!newpath) {
+			_alpm_alloc_fail(pathlen + 2);
+			return -1;
+		}
+		strcpy(newpath, path);
+		newpath[pathlen] = '/';
+		newpath[pathlen + 1] = '\0';
+		path = newpath;
+	}
 	current_file = pkg->files.files + files_count;
 	STRDUP(current_file->name, path, return -1);
 	current_file->size = archive_entry_size(entry);
@@ -397,6 +417,122 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, size_t *files_size, struct a
 }
 
 /**
+ * Generate a new file list from an mtree file and add it to the package.
+ * An existing file list will be free()d first.
+ *
+ * archive should point to an archive struct which is already at the
+ * position of the mtree's header.
+ *
+ * @param handle
+ * @param pkg package to add the file list to
+ * @param archive archive containing the mtree
+ * @return 0 on success, <0 on error
+ */
+static int build_filelist_from_mtree(alpm_handle_t *handle, alpm_pkg_t *pkg, struct archive *archive)
+{
+	int ret = 0;
+	size_t mtree_msize = 0;
+	size_t mtree_csize = 0;
+	size_t files_size = 0; // we clean up the existing array so this is fine
+	char *mtree_data = NULL;
+	struct archive *mtree;
+	struct archive_entry *mtree_entry = NULL;
+
+	_alpm_log(handle, ALPM_LOG_DEBUG, "found mtree for package %s, getting file list\n", pkg->filename);
+
+	/* throw away any files we might have already found */
+	free(pkg->files.files);
+	pkg->files.files = NULL;
+	pkg->files.count = 0;
+
+	/* create a new archive to parse the mtree and load it from archive into memory */
+	// TODO: split this into a function
+	if((mtree = archive_read_new()) == NULL) {
+		handle->pm_errno = ALPM_ERR_LIBARCHIVE;
+		goto error;
+	}
+
+	_alpm_archive_read_support_filter_all(mtree);
+	archive_read_support_format_mtree(mtree);
+
+	// TODO: split this into a function
+	while(1) {
+		ssize_t size;
+
+		if(!_alpm_realloc((void **)&mtree_data, &mtree_msize, mtree_csize + ALPM_BUFFER_SIZE)) {
+			goto error;
+		}
+
+		size = archive_read_data(archive, mtree_data + mtree_csize, ALPM_BUFFER_SIZE);
+
+		if(size < 0) {
+			_alpm_log(handle, ALPM_LOG_ERROR, _("error while reading package %s: %s\n"),
+					pkg->filename, archive_error_string(archive));
+			handle->pm_errno = ALPM_ERR_LIBARCHIVE;
+			goto error;
+		}
+		if(size == 0) {
+			break;
+		}
+
+		mtree_csize += size;
+	}
+
+	// TODO: need wrapper function for archive_read_open_memory for old libarchive versions?
+	if(archive_read_open_memory(mtree, mtree_data, mtree_csize)) {
+		_alpm_log(handle, ALPM_LOG_ERROR, _("error while reading mtree of package %s: %s\n"),
+				pkg->filename, archive_error_string(mtree));
+		handle->pm_errno = ALPM_ERR_LIBARCHIVE;
+		goto error;
+	}
+
+	while((ret = archive_read_next_header(mtree, &mtree_entry)) == ARCHIVE_OK) {
+		const char *path = archive_entry_pathname(mtree_entry);
+		char *p = NULL;
+
+		/* strip leading "./" from path entries */
+		if(path[0] == '.' && path[1] == '/') {
+			path += 2;
+		}
+
+		if(handle_simple_path(pkg, path)) {
+			continue;
+		}
+
+		if(*path == '.') {
+			continue;
+		}
+
+		/* Apparently archive_entry_copy/set_pathname doesn't like to get it's own
+		 * pointer and produces rather strange results */
+		STRDUP(p, path, goto error);
+		archive_entry_copy_pathname(mtree_entry, p);
+
+		if(add_entry_to_files_list(pkg, &files_size, mtree_entry) < 0) {
+			free(p);
+			goto error;
+		}
+		free(p);
+	}
+
+	if(ret != ARCHIVE_EOF && ret != ARCHIVE_OK) { /* An error occurred */
+		_alpm_log(handle, ALPM_LOG_ERROR, _("error while reading mtree of package %s: %s\n"),
+				pkg->filename, archive_error_string(mtree));
+		handle->pm_errno = ALPM_ERR_LIBARCHIVE;
+		goto error;
+	}
+
+	free(mtree_data);
+	_alpm_archive_read_free(mtree);
+	_alpm_log(handle, ALPM_LOG_DEBUG, "finished mtree reading for %s\n", pkg->filename);
+	return 0;
+error:
+	free(mtree_data);
+	_alpm_archive_read_free(mtree);
+	return -1;
+}
+
+/**
  * Load a package and create the corresponding alpm_pkg_t struct.
  * @param handle the context handle
  * @param pkgfile path to the package file
@@ -406,7 +542,7 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, size_t *files_size, struct a
 alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle,
 		const char *pkgfile, int full)
 {
-	int ret, fd, config = 0;
+	int ret, fd, config, hit_mtree = 0;
 	struct archive *archive;
 	struct archive_entry *entry;
 	alpm_pkg_t *newpkg;
@@ -469,10 +605,23 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle,
 			continue;
 		} else if(handle_simple_path(newpkg, entry_name)) {
 			continue;
+		} else if(full && strcmp(entry_name, ".MTREE") == 0) {
+			/* building the file list: cheap way
+			 * get the filelist from the mtree file rather than scanning
+			 * the whole archive  */
+			if(build_filelist_from_mtree(handle, newpkg, archive) < 0) {
+				goto error;
+			}
+			hit_mtree = 1;
+			continue;
 		} else if(*entry_name == '.') {
 			/* for now, ignore all files starting with '.' that haven't
 			 * already been handled (for future possibilities) */
-		} else if(full) {
+		} else if(hit_mtree && config) {
+			/* we have all we need */
+			break;
+		} else if(full && !hit_mtree) {
+			/* building the file list: expensive way */
 			if(add_entry_to_files_list(newpkg, &files_size, entry) < 0) {
 				goto error;
 			}
-- 
1.8.5.3


More information about the pacman-dev mailing list