On 06/09/12 13:34, Andrew Gregory wrote:
The restriction of not checking the ownership of a directory is unnecessary given that all the package filelists contain this information. Remove this restriction, with the expectation that you might get multiple packages returned for a given directory. Additionally attempt to minimise the number of files getting through to the slow realpath call.
Original-work-by: Dan McGee <dan@archlinux.org> Original-work-by: Allan McRae <allan@archlinux.org> Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com> ---
Applies on top of my other -Qo patches queued in Allan's working-split/query-owner branch.
src/pacman/query.c | 61 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 14 deletions(-)
diff --git a/src/pacman/query.c b/src/pacman/query.c index 92219e8..06505eb 100644 --- a/src/pacman/query.c +++ b/src/pacman/query.c @@ -97,6 +97,7 @@ static int query_fileowner(alpm_list_t *targets) size_t rootlen; alpm_list_t *t; alpm_db_t *db_local; + alpm_list_t *packages;
/* This code is here for safety only */ if(targets == NULL) { @@ -129,13 +130,14 @@ static int query_fileowner(alpm_list_t *targets) }
db_local = alpm_get_localdb(config->handle); + packages = alpm_db_get_pkgcache(db_local);
for(t = targets; t; t = alpm_list_next(t)) { char *filename = NULL, *dname = NULL, *rpath = NULL; const char *bname; struct stat buf; alpm_list_t *i; - size_t len; + size_t len, is_dir, bname_len, pbname_len; int found = 0;
if((filename = strdup(t->data)) == NULL) { @@ -163,15 +165,20 @@ static int query_fileowner(alpm_list_t *targets) } }
- if(S_ISDIR(buf.st_mode)) { - pm_printf(ALPM_LOG_ERROR, - _("cannot determine ownership of directory '%s'\n"), filename); - goto targcleanup; + if((is_dir = S_ISDIR(buf.st_mode))) { + /* the entire filename is safe to resolve if we know it points to a dir, + * and it's necessary in case it ends in . or .. */ + dname = realpath(filename, NULL); + bname = mbasename(dname); + rpath = mdirname(dname); + } else { + bname = mbasename(filename); + dname = mdirname(filename); + rpath = realpath(dname, NULL); }
- bname = mbasename(filename); - dname = mdirname(filename); - rpath = realpath(dname, NULL); + bname_len = strlen(bname); + pbname_len = bname_len + (is_dir ? 1 : 0);
if(!dname || !rpath) { pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s\n"), @@ -179,7 +186,7 @@ static int query_fileowner(alpm_list_t *targets) goto targcleanup; }
- for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) { + for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) { alpm_pkg_t *info = i->data; alpm_filelist_t *filelist = alpm_pkg_get_files(info); size_t j; @@ -188,22 +195,48 @@ static int query_fileowner(alpm_list_t *targets) const alpm_file_t *file = filelist->files + j; char *ppath, *pdname; const char *pkgfile = file->name; + size_t pkgfile_len = strlen(pkgfile); + + /* make sure pkgfile and target are of the same type */ + if(!is_dir != !(pkgfile[pkgfile_len - 1] == '/')) {
!!! - literally.... How about: if (is_dir && (pkgfile[pkgfile_len - 1] != '/'))
+ continue; + } + + /* make sure pkgfile is long enough */ + if(pkgfile_len < pbname_len) { + continue; + }
If I understand this correctly, we are comparing the length of the final component of the passed in parameter to the entire filename length from the package. I think that it would be quite uncommon to hit the continue here, so that can be removed.
- /* avoid the costly realpath usage if the basenames don't match */ - if(strcmp(mbasename(pkgfile), bname) != 0) { + /* make sure pbname_len takes us to the start of a path component */ + if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') {
With the above removal: if (pbname_len > pkgfile_len)
+ continue; + } + + /* compare basename with bname */ + if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) {
Why an strncmp here? strcmp will do the same comparison...
continue; }
/* concatenate our file and the root path */ - if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) { + if(rootlen + 1 + pkgfile_len > PATH_MAX) { path[rootlen] = '\0'; /* reset path for error message */ pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s\n"), path, pkgfile); continue; } strcpy(path + rootlen, pkgfile);
- pdname = mdirname(path); - ppath = realpath(pdname, NULL); + /* check that the file exists on disk and is the correct type */ + if(lstat(path, &buf) == -1 || !is_dir != !S_ISDIR(buf.st_mode)) {
!!! again. How about: if(is_dir && lstat(path, &buf) == 0 && !S_ISDIR(buf.st_mode)) which is what I think is being tested here. But I wonder if we really need this... I believe this is to avoid "strange" results when people replace directories in a package with symlinks to directories in another package. The more I think about this, the more I want to just remove our special case handling of this in general. A directory in a package and a symlink on the filesystem should just be a conflict. So unless I am missing another reason for this, get rid of it.
+ continue; + } + + if(is_dir) { + pdname = realpath(path, NULL); + ppath = mdirname(pdname); + } else { + pdname = mdirname(path); + ppath = realpath(pdname, NULL); + } free(pdname);
Umm... why calculate pdname just to free it immediately?
if(!ppath) {