[pacman-dev] [PATCH] Allow querying directory ownership

Allan McRae allan at archlinux.org
Fri Sep 7 03:29:09 EDT 2012


On 06/09/12 13:34, Andrew Gregory wrote:
> The restriction of not checking the ownership of a directory is
> unnecessary given that all the package filelists contain this
> information. Remove this restriction, with the expectation that you
> might get multiple packages returned for a given directory.
> Additionally attempt to minimise the number of files getting through
> to the slow realpath call.
> 
> Original-work-by: Dan McGee <dan at archlinux.org>
> Original-work-by: Allan McRae <allan at archlinux.org>
> Signed-off-by: Andrew Gregory <andrew.gregory.8 at gmail.com>
> ---
> 
> Applies on top of my other -Qo patches queued in Allan's
> working-split/query-owner branch.
> 
>  src/pacman/query.c | 61 +++++++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 47 insertions(+), 14 deletions(-)
> 
> diff --git a/src/pacman/query.c b/src/pacman/query.c
> index 92219e8..06505eb 100644
> --- a/src/pacman/query.c
> +++ b/src/pacman/query.c
> @@ -97,6 +97,7 @@ static int query_fileowner(alpm_list_t *targets)
>  	size_t rootlen;
>  	alpm_list_t *t;
>  	alpm_db_t *db_local;
> +	alpm_list_t *packages;
>  
>  	/* This code is here for safety only */
>  	if(targets == NULL) {
> @@ -129,13 +130,14 @@ static int query_fileowner(alpm_list_t *targets)
>  	}
>  
>  	db_local = alpm_get_localdb(config->handle);
> +	packages = alpm_db_get_pkgcache(db_local);
>  
>  	for(t = targets; t; t = alpm_list_next(t)) {
>  		char *filename = NULL, *dname = NULL, *rpath = NULL;
>  		const char *bname;
>  		struct stat buf;
>  		alpm_list_t *i;
> -		size_t len;
> +		size_t len, is_dir, bname_len, pbname_len;
>  		int found = 0;
>  
>  		if((filename = strdup(t->data)) == NULL) {
> @@ -163,15 +165,20 @@ static int query_fileowner(alpm_list_t *targets)
>  			}
>  		}
>  
> -		if(S_ISDIR(buf.st_mode)) {
> -			pm_printf(ALPM_LOG_ERROR,
> -				_("cannot determine ownership of directory '%s'\n"), filename);
> -			goto targcleanup;
> +		if((is_dir = S_ISDIR(buf.st_mode))) {
> +			/* the entire filename is safe to resolve if we know it points to a dir,
> +			 * and it's necessary in case it ends in . or .. */
> +			dname = realpath(filename, NULL);
> +			bname = mbasename(dname);
> +			rpath = mdirname(dname);
> +		} else {
> +			bname = mbasename(filename);
> +			dname = mdirname(filename);
> +			rpath = realpath(dname, NULL);
>  		}
>  
> -		bname = mbasename(filename);
> -		dname = mdirname(filename);
> -		rpath = realpath(dname, NULL);
> +		bname_len = strlen(bname);
> +		pbname_len = bname_len + (is_dir ? 1 : 0);
>  
>  		if(!dname || !rpath) {
>  			pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s\n"),
> @@ -179,7 +186,7 @@ static int query_fileowner(alpm_list_t *targets)
>  			goto targcleanup;
>  		}
>  
> -		for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) {
> +		for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) {
>  			alpm_pkg_t *info = i->data;
>  			alpm_filelist_t *filelist = alpm_pkg_get_files(info);
>  			size_t j;
> @@ -188,22 +195,48 @@ static int query_fileowner(alpm_list_t *targets)
>  				const alpm_file_t *file = filelist->files + j;
>  				char *ppath, *pdname;
>  				const char *pkgfile = file->name;
> +				size_t pkgfile_len = strlen(pkgfile);
> +
> +				/* make sure pkgfile and target are of the same type */
> +				if(!is_dir != !(pkgfile[pkgfile_len - 1] == '/')) {

!!! - literally....   How about:

if (is_dir && (pkgfile[pkgfile_len - 1] != '/'))

> +					continue;
> +				}
> +
> +				/* make sure pkgfile is long enough */
> +				if(pkgfile_len < pbname_len) {
> +					continue;
> +				}

If I understand this correctly, we are comparing the length of the final
component of the passed in parameter to the entire filename length from
the package.  I think that it would be quite uncommon to hit the
continue here, so that can be removed.

>  
> -				/* avoid the costly realpath usage if the basenames don't match */
> -				if(strcmp(mbasename(pkgfile), bname) != 0) {
> +				/* make sure pbname_len takes us to the start of a path component */
> +				if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') {

With the above removal:
if (pbname_len > pkgfile_len)

> +					continue;
> +				}
> +
> +				/* compare basename with bname */
> +				if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) {

Why an strncmp here?  strcmp will do the same comparison...

>  					continue;
>  				}
>  
>  				/* concatenate our file and the root path */
> -				if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) {
> +				if(rootlen + 1 + pkgfile_len > PATH_MAX) {
>  					path[rootlen] = '\0'; /* reset path for error message */
>  					pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s\n"), path, pkgfile);
>  					continue;
>  				}
>  				strcpy(path + rootlen, pkgfile);
>  
> -				pdname = mdirname(path);
> -				ppath = realpath(pdname, NULL);
> +				/* check that the file exists on disk and is the correct type */
> +				if(lstat(path, &buf) == -1 || !is_dir != !S_ISDIR(buf.st_mode)) {

!!! again.  How about:

if(is_dir && lstat(path, &buf) == 0 && !S_ISDIR(buf.st_mode))

which is what I think is being tested here.

But I wonder if we really need this...  I believe this is to avoid
"strange" results when people replace directories in a package with
symlinks to directories in another package.  The more I think about
this, the more I want to just remove our special case handling of this
in general.  A directory in a package and a symlink on the filesystem
should just be a conflict.  So unless I am missing another reason for
this, get rid of it.

> +					continue;
> +				}
> +
> +				if(is_dir) {
> +					pdname = realpath(path, NULL);
> +					ppath = mdirname(pdname);
> +				} else {
> +					pdname = mdirname(path);
> +					ppath = realpath(pdname, NULL);
> +				}
>  				free(pdname);

Umm...   why calculate pdname just to free it immediately?

>  				if(!ppath) {
> 



More information about the pacman-dev mailing list