[pacman-dev] [PATCH 2/3] [RFC] Resolve file paths during inter-package conflict check

Allan McRae allan at archlinux.org
Tue Jul 17 20:57:36 EDT 2012


On 18/07/12 00:42, Allan McRae wrote:
> File paths are resolved if necessary during inter-package conflict
> checks so that packages carrying the same effective file due to
> directory symlinks on the filesystem are flagged as conflicting.
> 
> Signed-off-by: Allan McRae <allan at archlinux.org>
> ---
> 
> Prepare for some ugly...

I pushed this to my symlink branch because it is clearly not ready
(although it does work!).

There are two things I would like help with:
1) generating the resolved filelist - I think that the detection of when
a filelist needs resolving due to directory symlinks is quite efficient,
bu the actual construction of the list needs work.

2) It needs more efficiency in using these...   currently the resolved
filelist is calculated on average (n+1)/2.   One possibility is it add a
resolved_files pointer to the alpm_filelist_t struct.  Usually this will
point to the raw filelist and so not be a big memory hit.  In fact...
as I write this I have decided that I really like this idea so it is
being implemented!.  It also means the cleanup can happen in the normal
package struct cleanup.  Yay!

So can comments focus on how I can improve #1.   I am not great with
string manipulation in C, so I am sure that can be improved.  Also,
there are chunks which are a bit duplicated, but I am not seeing how I
can adjust the flow to reduce that...

Allan


> 
>  lib/libalpm/conflict.c               | 180 ++++++++++++++++++++++++++++++++++-
>  test/pacman/tests/fileconflict001.py |   2 -
>  test/pacman/tests/fileconflict016.py |   2 -
>  test/pacman/tests/fileconflict017.py |  26 +++++
>  4 files changed, 204 insertions(+), 6 deletions(-)
>  create mode 100644 test/pacman/tests/fileconflict017.py
> 
> diff --git a/lib/libalpm/conflict.c b/lib/libalpm/conflict.c
> index adb4b37..faee7bd 100644
> --- a/lib/libalpm/conflict.c
> +++ b/lib/libalpm/conflict.c
> @@ -213,6 +213,166 @@ alpm_list_t SYMEXPORT *alpm_checkconflicts(alpm_handle_t *handle,
>  	return _alpm_innerconflicts(handle, pkglist);
>  }
>  
> +
> +/* Takes a file list and resolves all directory paths
> + * Pre-condition: list must be sorted!
> + * If the filelist has no resolving needed, the original list is returned.
> + * Otherwise the list and will need freed */
> +static alpm_filelist_t *resolved_filelist(alpm_handle_t *handle,
> +		alpm_filelist_t *filelist)
> +{
> +	size_t i;
> +	struct stat sbuf;
> +	char path[PATH_MAX];
> +	alpm_filelist_t *resolved;
> +	int list_resolved = 1;
> +
> +	const char *root = handle->root;
> +
> +	for(i = 0; i < filelist->count; i++) {
> +		const char *filename = filelist->files[i].name;
> +
> +		if(filename[strlen(filename)-1] != '/') {
> +			continue;
> +		}
> +
> +		snprintf(path, PATH_MAX, "%s%s", root, filename);
> +
> +		if(_alpm_lstat(path, &sbuf) != 0) {
> +			continue;
> +		}
> +
> +		if(S_ISLNK(sbuf.st_mode)) {
> +			list_resolved = 0;
> +
> +			resolved = malloc(sizeof(alpm_filelist_t));
> +			resolved->files = malloc(sizeof(alpm_file_t) * filelist->count);
> +			resolved->count = i;
> +
> +			break;
> +		}
> +	}
> +
> +	if(list_resolved == 1) {
> +		return filelist;
> +	}
> +
> +	/* generate the resolved list */
> +	char *resolved_root, *resolved_path, *causal_dir, *current_dir;
> +	char resolved_dir[PATH_MAX];
> +	int resolving = 0, resolved_dir_len;
> +
> +	for(i = 0; i < resolved->count; i++) {
> +		alpm_file_t *current_file = filelist->files + i;
> +		alpm_file_t *resolved_file = resolved->files + i;
> +
> +		resolved_file->name = strdup(current_file->name);
> +		resolved_file->size = current_file->size;
> +		resolved_file->mode = current_file->mode;
> +	}
> +
> +	resolved_root = realpath(root, NULL);
> +
> +	causal_dir = filelist->files[resolved->count].name;
> +	current_dir = strdup(causal_dir);
> +	snprintf(path, PATH_MAX, "%s%s", root, causal_dir);
> +	resolved_path = realpath(path, NULL);
> +	snprintf(resolved_dir, PATH_MAX, "%s/", resolved_path + strlen(resolved_root) + 1);
> +	resolved->files[resolved->count].name = strdup(resolved_dir);
> +	resolved_dir_len = strlen(resolved_dir);
> +	free(resolved_path);
> +	resolving = 1;
> +
> +	for(i = resolved->count + 1; i < filelist->count; i++) {
> +		alpm_file_t *current_file = filelist->files + i;
> +		alpm_file_t *resolved_file = resolved->files + i;
> +		char *filename = current_file->name;
> +
> +		resolved_file->size = current_file->size;
> +		resolved_file->mode = current_file->mode;
> +
> +		if(filename[strlen(filename) - 1] == '/') {
> +			if(strncmp(filename, causal_dir, strlen(causal_dir)) == 0) {
> +				char *c = filename + strlen(causal_dir) + 1;
> +				// we must resolve path
> +				while((c = strchr(c, '/')) != NULL) {
> +					*c = '\0';
> +					snprintf(path, PATH_MAX, "%s%s", root, filename);
> +					if(_alpm_lstat(path, &sbuf) != 0) {
> +						*c = '/';
> +						*(c + 1) = '\0';
> +						snprintf(path, PATH_MAX, "%s%s", root, current_dir);
> +						break;
> +					}
> +					free(current_dir);
> +					current_dir = strdup(filename);
> +					*c = '/';
> +				}
> +				resolved_path = realpath(path, NULL);
> +				snprintf(resolved_dir, PATH_MAX, "%s/", resolved_path + strlen(resolved_root) + 1);
> +				resolved->files[resolved->count].name = strdup(resolved_dir);
> +				resolved_dir_len = strlen(resolved_dir);
> +				free(resolved_path);
> +				resolved_file->name = strdup(filename);
> +				continue;
> +			} else {
> +				// test if path needs resolved
> +				snprintf(path, PATH_MAX, "%s%s", root, filename);
> +				if(_alpm_lstat(path, &sbuf) != 0 || !S_ISLNK(sbuf.st_mode)) {
> +						resolved_file->name = strdup(filename);
> +					resolving = 0;
> +					if(current_dir) {
> +						free(current_dir);
> +						current_dir = NULL;
> +					}
> +					resolved_file->name = strdup(filename);
> +					continue;
> +				}
> +
> +				causal_dir = filename;
> +				current_dir = strdup(causal_dir);
> +				snprintf(path, PATH_MAX, "%s%s", root, causal_dir);
> +				resolved_path = realpath(path, NULL);
> +				snprintf(resolved_dir, PATH_MAX, "%s/", resolved_path + strlen(resolved_root) + 1);
> +				resolved_file->name = strdup(resolved_dir);
> +				resolved_dir_len = strlen(resolved_dir);
> +				free(resolved_path);
> +				resolving = 1;
> +				continue;
> +			}
> +		}
> +
> +		if(resolving == 0) {
> +			resolved_file->name = strdup(filename);
> +		} else {
> +			size_t len;
> +			filename = filename + strlen(current_dir);
> +			len = resolved_dir_len + strlen(filename) + 1;
> +			resolved_file->name = malloc(len);
> +			snprintf(resolved_file->name, len, "%s%s", resolved_dir, filename);
> +		}
> +	}
> +
> +	resolved->count = filelist->count;
> +
> +	if(current_dir) {
> +		free(current_dir);
> +	}
> +
> +	_alpm_files_msort(resolved->files, resolved->count);
> +	return resolved;
> +}
> +
> +static void free_filelist(alpm_filelist_t *fl)
> +{
> +	size_t i;
> +	for(i = 0; i < fl->count; i++) {
> +		free(fl->files[i].name);
> +	}
> +	free(fl->files);
> +	free(fl);
> +}
> +
>  /* Returns the difference of the provided two lists of files.
>   * Pre-condition: both lists are sorted!
>   * When done, free the list but NOT the contained data.
> @@ -481,6 +641,8 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle,
>  		alpm_pkg_t *dbpkg;
>  		size_t filenum;
>  
> +		alpm_filelist_t *p1_filelist = resolved_filelist(handle, alpm_pkg_get_files(p1));
> +
>  		int percent = (current * 100) / numtargs;
>  		PROGRESS(handle, ALPM_PROGRESS_CONFLICTS_START, "", percent,
>  		         numtargs, current);
> @@ -490,8 +652,8 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle,
>  		for(j = i->next; j; j = j->next) {
>  			alpm_list_t *common_files;
>  			alpm_pkg_t *p2 = j->data;
> -			common_files = filelist_intersection(alpm_pkg_get_files(p1),
> -					alpm_pkg_get_files(p2));
> +			alpm_filelist_t *p2_filelist = resolved_filelist(handle, alpm_pkg_get_files(p2));
> +			common_files = filelist_intersection(p1_filelist, p2_filelist);
>  
>  			if(common_files) {
>  				alpm_list_t *k;
> @@ -503,11 +665,25 @@ alpm_list_t *_alpm_db_find_fileconflicts(alpm_handle_t *handle,
>  					if(handle->pm_errno == ALPM_ERR_MEMORY) {
>  						FREELIST(conflicts);
>  						FREELIST(common_files);
> +						if(p1_filelist != alpm_pkg_get_files(p1)) {
> +							free_filelist(p1_filelist);
> +						}
> +						if(p2_filelist != alpm_pkg_get_files(p2)) {
> +							free_filelist(p2_filelist);
> +						}
>  						return NULL;
>  					}
>  				}
>  				alpm_list_free(common_files);
>  			}
> +
> +			if(p2_filelist != alpm_pkg_get_files(p2)) {
> +				free_filelist(p2_filelist);
> +			}
> +		}
> +
> +		if(p1_filelist != alpm_pkg_get_files(p1)) {
> +			free_filelist(p1_filelist);
>  		}
>  
>  		/* CHECK 2: check every target against the filesystem */
> diff --git a/test/pacman/tests/fileconflict001.py b/test/pacman/tests/fileconflict001.py
> index e1371f8..b1ad5e1 100644
> --- a/test/pacman/tests/fileconflict001.py
> +++ b/test/pacman/tests/fileconflict001.py
> @@ -25,5 +25,3 @@
>  self.addrule("!PKG_EXIST=pkg2")
>  self.addrule("FILE_EXIST=dir/realdir/realfile")
>  self.addrule("!FILE_EXIST=dir/realdir/file")
> -
> -self.expectfailure = True
> diff --git a/test/pacman/tests/fileconflict016.py b/test/pacman/tests/fileconflict016.py
> index 5625984..c5daf48 100644
> --- a/test/pacman/tests/fileconflict016.py
> +++ b/test/pacman/tests/fileconflict016.py
> @@ -22,5 +22,3 @@
>  self.addrule("PACMAN_RETCODE=1")
>  self.addrule("!PKG_EXIST=pkg1")
>  self.addrule("!PKG_EXIST=pkg2")
> -
> -self.expectfailure = True
> diff --git a/test/pacman/tests/fileconflict017.py b/test/pacman/tests/fileconflict017.py
> new file mode 100644
> index 0000000..2df7865
> --- /dev/null
> +++ b/test/pacman/tests/fileconflict017.py
> @@ -0,0 +1,26 @@
> +self.description = "file conflict with same effective path across packages (directory symlink - deep)"
> +
> +lp1 = pmpkg("filesystem", "1.0-1")
> +lp1.files = ["usr/",
> +             "usr/lib/",
> +             "lib -> usr/lib/"]
> +self.addpkg2db("local", lp1)
> +
> +p1 = pmpkg("pkg1")
> +p1.files = ["lib/",
> +            "lib/foo/",
> +            "lib/foo/bar"]
> +self.addpkg2db("sync", p1)
> +
> +p2 = pmpkg("pkg2")
> +p2.files = ["usr/",
> +            "usr/lib/",
> +	    "usr/lib/foo/",
> +	    "usr/lib/foo/bar"]
> +self.addpkg2db("sync", p2)
> +
> +self.args = "-S pkg1 pkg2"
> +
> +self.addrule("PACMAN_RETCODE=1")
> +self.addrule("!PKG_EXIST=pkg1")
> +self.addrule("!PKG_EXIST=pkg2")
> 




More information about the pacman-dev mailing list