[pacman-dev] [PATCH] Allow querying directory ownership
The restriction of not checking the ownership of a directory is unnecessary given that all the package filelists contain this information. Remove this restriction, with the expectation that you might get multiple packages returned for a given directory. Additionally attempt to minimise the number of files getting through to the slow realpath call. Original-work-by: Dan McGee <dan@archlinux.org> Original-work-by: Allan McRae <allan@archlinux.org> Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com> --- Applies on top of my other -Qo patches queued in Allan's working-split/query-owner branch. src/pacman/query.c | 61 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 14 deletions(-) diff --git a/src/pacman/query.c b/src/pacman/query.c index 92219e8..06505eb 100644 --- a/src/pacman/query.c +++ b/src/pacman/query.c @@ -97,6 +97,7 @@ static int query_fileowner(alpm_list_t *targets) size_t rootlen; alpm_list_t *t; alpm_db_t *db_local; + alpm_list_t *packages; /* This code is here for safety only */ if(targets == NULL) { @@ -129,13 +130,14 @@ static int query_fileowner(alpm_list_t *targets) } db_local = alpm_get_localdb(config->handle); + packages = alpm_db_get_pkgcache(db_local); for(t = targets; t; t = alpm_list_next(t)) { char *filename = NULL, *dname = NULL, *rpath = NULL; const char *bname; struct stat buf; alpm_list_t *i; - size_t len; + size_t len, is_dir, bname_len, pbname_len; int found = 0; if((filename = strdup(t->data)) == NULL) { @@ -163,15 +165,20 @@ static int query_fileowner(alpm_list_t *targets) } } - if(S_ISDIR(buf.st_mode)) { - pm_printf(ALPM_LOG_ERROR, - _("cannot determine ownership of directory '%s'\n"), filename); - goto targcleanup; + if((is_dir = S_ISDIR(buf.st_mode))) { + /* the entire filename is safe to resolve if we know it points to a dir, + * and it's necessary in case it ends in . or .. */ + dname = realpath(filename, NULL); + bname = mbasename(dname); + rpath = mdirname(dname); + } else { + bname = mbasename(filename); + dname = mdirname(filename); + rpath = realpath(dname, NULL); } - bname = mbasename(filename); - dname = mdirname(filename); - rpath = realpath(dname, NULL); + bname_len = strlen(bname); + pbname_len = bname_len + (is_dir ? 1 : 0); if(!dname || !rpath) { pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s\n"), @@ -179,7 +186,7 @@ static int query_fileowner(alpm_list_t *targets) goto targcleanup; } - for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) { + for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) { alpm_pkg_t *info = i->data; alpm_filelist_t *filelist = alpm_pkg_get_files(info); size_t j; @@ -188,22 +195,48 @@ static int query_fileowner(alpm_list_t *targets) const alpm_file_t *file = filelist->files + j; char *ppath, *pdname; const char *pkgfile = file->name; + size_t pkgfile_len = strlen(pkgfile); + + /* make sure pkgfile and target are of the same type */ + if(!is_dir != !(pkgfile[pkgfile_len - 1] == '/')) { + continue; + } + + /* make sure pkgfile is long enough */ + if(pkgfile_len < pbname_len) { + continue; + } - /* avoid the costly realpath usage if the basenames don't match */ - if(strcmp(mbasename(pkgfile), bname) != 0) { + /* make sure pbname_len takes us to the start of a path component */ + if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') { + continue; + } + + /* compare basename with bname */ + if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) { continue; } /* concatenate our file and the root path */ - if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) { + if(rootlen + 1 + pkgfile_len > PATH_MAX) { path[rootlen] = '\0'; /* reset path for error message */ pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s\n"), path, pkgfile); continue; } strcpy(path + rootlen, pkgfile); - pdname = mdirname(path); - ppath = realpath(pdname, NULL); + /* check that the file exists on disk and is the correct type */ + if(lstat(path, &buf) == -1 || !is_dir != !S_ISDIR(buf.st_mode)) { + continue; + } + + if(is_dir) { + pdname = realpath(path, NULL); + ppath = mdirname(pdname); + } else { + pdname = mdirname(path); + ppath = realpath(pdname, NULL); + } free(pdname); if(!ppath) { -- 1.7.12
On 06/09/12 13:34, Andrew Gregory wrote:
The restriction of not checking the ownership of a directory is unnecessary given that all the package filelists contain this information. Remove this restriction, with the expectation that you might get multiple packages returned for a given directory. Additionally attempt to minimise the number of files getting through to the slow realpath call.
Original-work-by: Dan McGee <dan@archlinux.org> Original-work-by: Allan McRae <allan@archlinux.org> Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com> ---
Applies on top of my other -Qo patches queued in Allan's working-split/query-owner branch.
src/pacman/query.c | 61 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 14 deletions(-)
diff --git a/src/pacman/query.c b/src/pacman/query.c index 92219e8..06505eb 100644 --- a/src/pacman/query.c +++ b/src/pacman/query.c @@ -97,6 +97,7 @@ static int query_fileowner(alpm_list_t *targets) size_t rootlen; alpm_list_t *t; alpm_db_t *db_local; + alpm_list_t *packages;
/* This code is here for safety only */ if(targets == NULL) { @@ -129,13 +130,14 @@ static int query_fileowner(alpm_list_t *targets) }
db_local = alpm_get_localdb(config->handle); + packages = alpm_db_get_pkgcache(db_local);
for(t = targets; t; t = alpm_list_next(t)) { char *filename = NULL, *dname = NULL, *rpath = NULL; const char *bname; struct stat buf; alpm_list_t *i; - size_t len; + size_t len, is_dir, bname_len, pbname_len; int found = 0;
if((filename = strdup(t->data)) == NULL) { @@ -163,15 +165,20 @@ static int query_fileowner(alpm_list_t *targets) } }
- if(S_ISDIR(buf.st_mode)) { - pm_printf(ALPM_LOG_ERROR, - _("cannot determine ownership of directory '%s'\n"), filename); - goto targcleanup; + if((is_dir = S_ISDIR(buf.st_mode))) { + /* the entire filename is safe to resolve if we know it points to a dir, + * and it's necessary in case it ends in . or .. */ + dname = realpath(filename, NULL); + bname = mbasename(dname); + rpath = mdirname(dname); + } else { + bname = mbasename(filename); + dname = mdirname(filename); + rpath = realpath(dname, NULL); }
- bname = mbasename(filename); - dname = mdirname(filename); - rpath = realpath(dname, NULL); + bname_len = strlen(bname); + pbname_len = bname_len + (is_dir ? 1 : 0);
if(!dname || !rpath) { pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s\n"), @@ -179,7 +186,7 @@ static int query_fileowner(alpm_list_t *targets) goto targcleanup; }
- for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) { + for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) { alpm_pkg_t *info = i->data; alpm_filelist_t *filelist = alpm_pkg_get_files(info); size_t j; @@ -188,22 +195,48 @@ static int query_fileowner(alpm_list_t *targets) const alpm_file_t *file = filelist->files + j; char *ppath, *pdname; const char *pkgfile = file->name; + size_t pkgfile_len = strlen(pkgfile); + + /* make sure pkgfile and target are of the same type */ + if(!is_dir != !(pkgfile[pkgfile_len - 1] == '/')) {
!!! - literally.... How about: if (is_dir && (pkgfile[pkgfile_len - 1] != '/'))
+ continue; + } + + /* make sure pkgfile is long enough */ + if(pkgfile_len < pbname_len) { + continue; + }
If I understand this correctly, we are comparing the length of the final component of the passed in parameter to the entire filename length from the package. I think that it would be quite uncommon to hit the continue here, so that can be removed.
- /* avoid the costly realpath usage if the basenames don't match */ - if(strcmp(mbasename(pkgfile), bname) != 0) { + /* make sure pbname_len takes us to the start of a path component */ + if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') {
With the above removal: if (pbname_len > pkgfile_len)
+ continue; + } + + /* compare basename with bname */ + if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) {
Why an strncmp here? strcmp will do the same comparison...
continue; }
/* concatenate our file and the root path */ - if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) { + if(rootlen + 1 + pkgfile_len > PATH_MAX) { path[rootlen] = '\0'; /* reset path for error message */ pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s\n"), path, pkgfile); continue; } strcpy(path + rootlen, pkgfile);
- pdname = mdirname(path); - ppath = realpath(pdname, NULL); + /* check that the file exists on disk and is the correct type */ + if(lstat(path, &buf) == -1 || !is_dir != !S_ISDIR(buf.st_mode)) {
!!! again. How about: if(is_dir && lstat(path, &buf) == 0 && !S_ISDIR(buf.st_mode)) which is what I think is being tested here. But I wonder if we really need this... I believe this is to avoid "strange" results when people replace directories in a package with symlinks to directories in another package. The more I think about this, the more I want to just remove our special case handling of this in general. A directory in a package and a symlink on the filesystem should just be a conflict. So unless I am missing another reason for this, get rid of it.
+ continue; + } + + if(is_dir) { + pdname = realpath(path, NULL); + ppath = mdirname(pdname); + } else { + pdname = mdirname(path); + ppath = realpath(pdname, NULL); + } free(pdname);
Umm... why calculate pdname just to free it immediately?
if(!ppath) {
On 06/09/12 13:34, Andrew Gregory wrote:
The restriction of not checking the ownership of a directory is unnecessary given that all the package filelists contain this information. Remove this restriction, with the expectation that you might get multiple packages returned for a given directory. Additionally attempt to minimise the number of files getting through to the slow realpath call.
Original-work-by: Dan McGee <dan@archlinux.org> Original-work-by: Allan McRae <allan@archlinux.org> Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com> ---
Applies on top of my other -Qo patches queued in Allan's working-split/query-owner branch.
src/pacman/query.c | 61 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 14 deletions(-)
diff --git a/src/pacman/query.c b/src/pacman/query.c index 92219e8..06505eb 100644 --- a/src/pacman/query.c +++ b/src/pacman/query.c @@ -97,6 +97,7 @@ static int query_fileowner(alpm_list_t *targets) size_t rootlen; alpm_list_t *t; alpm_db_t *db_local; + alpm_list_t *packages;
/* This code is here for safety only */ if(targets == NULL) { @@ -129,13 +130,14 @@ static int query_fileowner(alpm_list_t *targets) }
db_local = alpm_get_localdb(config->handle); + packages = alpm_db_get_pkgcache(db_local);
for(t = targets; t; t = alpm_list_next(t)) { char *filename = NULL, *dname = NULL, *rpath = NULL; const char *bname; struct stat buf; alpm_list_t *i; - size_t len; + size_t len, is_dir, bname_len, pbname_len; int found = 0;
if((filename = strdup(t->data)) == NULL) { @@ -163,15 +165,20 @@ static int query_fileowner(alpm_list_t *targets) } }
- if(S_ISDIR(buf.st_mode)) { - pm_printf(ALPM_LOG_ERROR, - _("cannot determine ownership of directory '%s'\n"), filename); - goto targcleanup; + if((is_dir = S_ISDIR(buf.st_mode))) { + /* the entire filename is safe to resolve if we know it points to a dir, + * and it's necessary in case it ends in . or .. */ + dname = realpath(filename, NULL); + bname = mbasename(dname); + rpath = mdirname(dname); + } else { + bname = mbasename(filename); + dname = mdirname(filename); + rpath = realpath(dname, NULL); }
- bname = mbasename(filename); - dname = mdirname(filename); - rpath = realpath(dname, NULL); + bname_len = strlen(bname); + pbname_len = bname_len + (is_dir ? 1 : 0);
if(!dname || !rpath) { pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s\n"), @@ -179,7 +186,7 @@ static int query_fileowner(alpm_list_t *targets) goto targcleanup; }
- for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) { + for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) { alpm_pkg_t *info = i->data; alpm_filelist_t *filelist = alpm_pkg_get_files(info); size_t j; @@ -188,22 +195,48 @@ static int query_fileowner(alpm_list_t *targets) const alpm_file_t *file = filelist->files + j; char *ppath, *pdname; const char *pkgfile = file->name; + size_t pkgfile_len = strlen(pkgfile); + + /* make sure pkgfile and target are of the same type */ + if(!is_dir != !(pkgfile[pkgfile_len - 1] == '/')) {
!!! - literally.... How about:
if (is_dir && (pkgfile[pkgfile_len - 1] != '/')) This isn't the same check - Andrew's check verifies that is_dir XOR (pkgfile's last character is not '/' (i.e. pkgfile names a file
I'm not a dev, nor the original author of the patch, but... On Fri, Sep 7, 2012 at 10:29 AM, Allan McRae <allan@archlinux.org> wrote: path)), is true, while your test is for AND. This way of defining XOR is quite confusing, though - isn't there a clearer way? (i.e. macros, inline functions?)
+ continue; + } + + /* make sure pkgfile is long enough */ + if(pkgfile_len < pbname_len) { + continue; + }
If I understand this correctly, we are comparing the length of the final component of the passed in parameter to the entire filename length from the package. I think that it would be quite uncommon to hit the continue here, so that can be removed.
I think you mean to say that we replace this if(){continue;} with if(!){...} wrapping the rest of the loop? That would make sense if this branch is costly.
- /* avoid the costly realpath usage if the basenames don't match */ - if(strcmp(mbasename(pkgfile), bname) != 0) { + /* make sure pbname_len takes us to the start of a path component */ + if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') {
With the above removal: if (pbname_len > pkgfile_len)
+ continue; + } + + /* compare basename with bname */ + if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) {
Why an strncmp here? strcmp will do the same comparison...
In order to make sure that we don't run into buffer overflow issues - http://stackoverflow.com/questions/448563/am-i-correct-that-strcmp-is-equiva...
continue; }
/* concatenate our file and the root path */ - if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) { + if(rootlen + 1 + pkgfile_len > PATH_MAX) { path[rootlen] = '\0'; /* reset path for error message */ pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s\n"), path, pkgfile); continue; } strcpy(path + rootlen, pkgfile);
- pdname = mdirname(path); - ppath = realpath(pdname, NULL); + /* check that the file exists on disk and is the correct type */ + if(lstat(path, &buf) == -1 || !is_dir != !S_ISDIR(buf.st_mode)) {
!!! again. How about:
if(is_dir && lstat(path, &buf) == 0 && !S_ISDIR(buf.st_mode))
which is what I think is being tested here. Again, your fix doesn't match the original test - Andrew's test verifies that the file either doesn't exist or that (the file is a directory) XOR is_dir is true.
But I wonder if we really need this... I believe this is to avoid "strange" results when people replace directories in a package with symlinks to directories in another package. The more I think about this, the more I want to just remove our special case handling of this in general. A directory in a package and a symlink on the filesystem should just be a conflict. So unless I am missing another reason for this, get rid of it.
No comment, as I do not know the codebase well enough to respond.
+ continue; + } + + if(is_dir) { + pdname = realpath(path, NULL); + ppath = mdirname(pdname); + } else { + pdname = mdirname(path); + ppath = realpath(pdname, NULL); + } free(pdname);
Umm... why calculate pdname just to free it immediately? Since we need to use it to calculate ppath?
if(!ppath) {
Overall, I can't really judge the quality of Andrew's patch, but I definitely do not agree with the comments given by Allan. HTH in my own way, Gesh
On 07/09/12 21:06, Menachem Moystoviz wrote:
I'm not a dev, nor the original author of the patch, but...
On 06/09/12 13:34, Andrew Gregory wrote:
The restriction of not checking the ownership of a directory is unnecessary given that all the package filelists contain this information. Remove this restriction, with the expectation that you might get multiple packages returned for a given directory. Additionally attempt to minimise the number of files getting through to the slow realpath call.
Original-work-by: Dan McGee <dan@archlinux.org> Original-work-by: Allan McRae <allan@archlinux.org> Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com> ---
Applies on top of my other -Qo patches queued in Allan's working-split/query-owner branch.
src/pacman/query.c | 61 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 14 deletions(-)
diff --git a/src/pacman/query.c b/src/pacman/query.c index 92219e8..06505eb 100644 --- a/src/pacman/query.c +++ b/src/pacman/query.c @@ -97,6 +97,7 @@ static int query_fileowner(alpm_list_t *targets) size_t rootlen; alpm_list_t *t; alpm_db_t *db_local; + alpm_list_t *packages;
/* This code is here for safety only */ if(targets == NULL) { @@ -129,13 +130,14 @@ static int query_fileowner(alpm_list_t *targets) }
db_local = alpm_get_localdb(config->handle); + packages = alpm_db_get_pkgcache(db_local);
for(t = targets; t; t = alpm_list_next(t)) { char *filename = NULL, *dname = NULL, *rpath = NULL; const char *bname; struct stat buf; alpm_list_t *i; - size_t len; + size_t len, is_dir, bname_len, pbname_len; int found = 0;
if((filename = strdup(t->data)) == NULL) { @@ -163,15 +165,20 @@ static int query_fileowner(alpm_list_t *targets) } }
- if(S_ISDIR(buf.st_mode)) { - pm_printf(ALPM_LOG_ERROR, - _("cannot determine ownership of directory '%s'\n"), filename); - goto targcleanup; + if((is_dir = S_ISDIR(buf.st_mode))) { + /* the entire filename is safe to resolve if we know it points to a dir, + * and it's necessary in case it ends in . or .. */ + dname = realpath(filename, NULL); + bname = mbasename(dname); + rpath = mdirname(dname); + } else { + bname = mbasename(filename); + dname = mdirname(filename); + rpath = realpath(dname, NULL); }
- bname = mbasename(filename); - dname = mdirname(filename); - rpath = realpath(dname, NULL); + bname_len = strlen(bname); + pbname_len = bname_len + (is_dir ? 1 : 0);
if(!dname || !rpath) { pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s\n"), @@ -179,7 +186,7 @@ static int query_fileowner(alpm_list_t *targets) goto targcleanup; }
- for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) { + for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) { alpm_pkg_t *info = i->data; alpm_filelist_t *filelist = alpm_pkg_get_files(info); size_t j; @@ -188,22 +195,48 @@ static int query_fileowner(alpm_list_t *targets) const alpm_file_t *file = filelist->files + j; char *ppath, *pdname; const char *pkgfile = file->name; + size_t pkgfile_len = strlen(pkgfile); + + /* make sure pkgfile and target are of the same type */ + if(!is_dir != !(pkgfile[pkgfile_len - 1] == '/')) {
!!! - literally.... How about:
if (is_dir && (pkgfile[pkgfile_len - 1] != '/')) This isn't the same check - Andrew's check verifies that is_dir XOR (pkgfile's last character is not '/' (i.e. pkgfile names a file
On Fri, Sep 7, 2012 at 10:29 AM, Allan McRae <allan@archlinux.org> wrote: path)), is true, while your test is for AND. This way of defining XOR is quite confusing, though - isn't there a clearer way? (i.e. macros, inline functions?)
Ah - that is confusing... obviously! So a clearer version of the test would be: if ((!is_dir && pkgfile[pkgfile_len - 1] == '/') || (is_dir && pkgfile[pkgfile_len - 1] != '/'))
+ continue; + } + + /* make sure pkgfile is long enough */ + if(pkgfile_len < pbname_len) { + continue; + }
If I understand this correctly, we are comparing the length of the final component of the passed in parameter to the entire filename length from the package. I think that it would be quite uncommon to hit the continue here, so that can be removed.
I think you mean to say that we replace this if(){continue;} with if(!){...} wrapping the rest of the loop? That would make sense if this branch is costly.
No. I was saying that I think this test is going to result in a very small number of cases being skipped and so it would be more efficient to just not perform it.
- /* avoid the costly realpath usage if the basenames don't match */ - if(strcmp(mbasename(pkgfile), bname) != 0) { + /* make sure pbname_len takes us to the start of a path component */ + if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') {
With the above removal: if (pbname_len > pkgfile_len)
+ continue; + } + + /* compare basename with bname */ + if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) {
Why an strncmp here? strcmp will do the same comparison...
In order to make sure that we don't run into buffer overflow issues - http://stackoverflow.com/questions/448563/am-i-correct-that-strcmp-is-equiva...
Both strings are null terminated.
continue; }
/* concatenate our file and the root path */ - if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) { + if(rootlen + 1 + pkgfile_len > PATH_MAX) { path[rootlen] = '\0'; /* reset path for error message */ pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s\n"), path, pkgfile); continue; } strcpy(path + rootlen, pkgfile);
- pdname = mdirname(path); - ppath = realpath(pdname, NULL); + /* check that the file exists on disk and is the correct type */ + if(lstat(path, &buf) == -1 || !is_dir != !S_ISDIR(buf.st_mode)) {
!!! again. How about:
if(is_dir && lstat(path, &buf) == 0 && !S_ISDIR(buf.st_mode))
which is what I think is being tested here. Again, your fix doesn't match the original test - Andrew's test verifies that the file either doesn't exist or that (the file is a directory) XOR is_dir is true.
But I wonder if we really need this... I believe this is to avoid "strange" results when people replace directories in a package with symlinks to directories in another package. The more I think about this, the more I want to just remove our special case handling of this in general. A directory in a package and a symlink on the filesystem should just be a conflict. So unless I am missing another reason for this, get rid of it.
No comment, as I do not know the codebase well enough to respond.
+ continue; + } + + if(is_dir) { + pdname = realpath(path, NULL); + ppath = mdirname(pdname); + } else { + pdname = mdirname(path); + ppath = realpath(pdname, NULL); + } free(pdname);
Umm... why calculate pdname just to free it immediately? Since we need to use it to calculate ppath?
Oops... I blame doing quick review before rushing to the train!
if(!ppath) {
Overall, I can't really judge the quality of Andrew's patch, but I definitely do not agree with the comments given by Allan.
HTH in my own way,
Gesh
On Fri, 07 Sep 2012 21:56:48 +1000 Allan McRae <allan@archlinux.org> wrote:
On 07/09/12 21:06, Menachem Moystoviz wrote:
I'm not a dev, nor the original author of the patch, but...
On Fri, Sep 7, 2012 at 10:29 AM, Allan McRae <allan@archlinux.org> wrote:
On 06/09/12 13:34, Andrew Gregory wrote:
<snip>
+ + /* make sure pkgfile and target are of the same type */ + if(!is_dir != !(pkgfile[pkgfile_len - 1] == '/')) {
!!! - literally.... How about:
if (is_dir && (pkgfile[pkgfile_len - 1] != '/')) This isn't the same check - Andrew's check verifies that is_dir XOR (pkgfile's last character is not '/' (i.e. pkgfile names a file path)), is true, while your test is for AND. This way of defining XOR is quite confusing, though - isn't there a clearer way? (i.e. macros, inline functions?)
Ah - that is confusing... obviously!
So a clearer version of the test would be:
if ((!is_dir && pkgfile[pkgfile_len - 1] == '/') || (is_dir && pkgfile[pkgfile_len - 1] != '/'))
Yeah, the !!! xor can be a bit confusing if you're not familiar with it, but I dislike long multi-line ifs. I can just force is_dir to 1 or 0 and compare them directly: is_dir = S_ISDIR(buf.st_mode) ? 1 : 0; ... if(is_dir != (pkgfile[pkgfile_len - 1] == '/'))...
+ continue; + } + + /* make sure pkgfile is long enough */ + if(pkgfile_len < pbname_len) { + continue; + }
If I understand this correctly, we are comparing the length of the final component of the passed in parameter to the entire filename length from the package. I think that it would be quite uncommon to hit the continue here, so that can be removed.
I think you mean to say that we replace this if(){continue;} with if(!){...} wrapping the rest of the loop? That would make sense if this branch is costly.
This is a safety to make sure that we don't try to dereference a negative index in the next if.
No. I was saying that I think this test is going to result in a very small number of cases being skipped and so it would be more efficient to just not perform it.
- /* avoid the costly realpath usage if the basenames don't match */ - if(strcmp(mbasename(pkgfile), bname) != 0) { + /* make sure pbname_len takes us to the start of a path component */ + if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') {
With the above removal: if (pbname_len > pkgfile_len)
This check must be != because it is specifically looking for pbname_len either taking us to the beginning of the string or the beginning of a path component, ie: pkgfile == "basename" or pkgfile == "foo/bar/basename" but not pkgfile == "foo/barbasename" It could be combined (not replaced) with the check in the previous if statement, but that gets a little ugly, so I left them separate.
+ continue; + } + + /* compare basename with bname */ + if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) {
Why an strncmp here? strcmp will do the same comparison...
In order to make sure that we don't run into buffer overflow issues - http://stackoverflow.com/questions/448563/am-i-correct-that-strcmp-is-equiva...
Both strings are null terminated.
strncmp is needed because if it's a directory pkgfile will end in '/' but bname will not.
continue; }
/* concatenate our file and the root path */ - if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) { + if(rootlen + 1 + pkgfile_len > PATH_MAX) { path[rootlen] = '\0'; /* reset path for error message */ pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s\n"), path, pkgfile); continue; } strcpy(path + rootlen, pkgfile);
- pdname = mdirname(path); - ppath = realpath(pdname, NULL); + /* check that the file exists on disk and is the correct type */ + if(lstat(path, &buf) == -1 || !is_dir != !S_ISDIR(buf.st_mode)) {
!!! again. How about:
if(is_dir && lstat(path, &buf) == 0 && !S_ISDIR(buf.st_mode))
which is what I think is being tested here. Again, your fix doesn't match the original test - Andrew's test verifies that the file either doesn't exist or that (the file is a directory) XOR is_dir is true.
But I wonder if we really need this... I believe this is to avoid "strange" results when people replace directories in a package with symlinks to directories in another package. The more I think about this, the more I want to just remove our special case handling of this in general. A directory in a package and a symlink on the filesystem should just be a conflict. So unless I am missing another reason for this, get rid of it.
No comment, as I do not know the codebase well enough to respond.
I didn't really like this in the first place; deleted.
+ continue; + } + + if(is_dir) { + pdname = realpath(path, NULL); + ppath = mdirname(pdname); + } else { + pdname = mdirname(path); + ppath = realpath(pdname, NULL); + } free(pdname);
Umm... why calculate pdname just to free it immediately? Since we need to use it to calculate ppath?
Oops... I blame doing quick review before rushing to the train!
I copied this particular bit from above where I do the same thing to the target, but it's unnecessary work here. A simple strncpy will actually suffice: strncpy(path + rootlen, pkgfile, pkgfile_len - pbname_len); path[rootlen + pkgfile_len - pbname_len] = '\0'; ppath = realpath(path, NULL);
if(!ppath) {
Overall, I can't really judge the quality of Andrew's patch, but I definitely do not agree with the comments given by Allan.
HTH in my own way,
Gesh
The restriction of not checking the ownership of a directory is unnecessary given that all the package filelists contain this information. Remove this restriction, with the expectation that you might get multiple packages returned for a given directory. Additionally attempt to minimise the number of files getting through to the slow realpath call. Original-work-by: Dan McGee <dan@archlinux.org> Original-work-by: Allan McRae <allan@archlinux.org> Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com> --- src/pacman/query.c | 60 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/src/pacman/query.c b/src/pacman/query.c index 92219e8..4b6f429 100644 --- a/src/pacman/query.c +++ b/src/pacman/query.c @@ -97,6 +97,7 @@ static int query_fileowner(alpm_list_t *targets) size_t rootlen; alpm_list_t *t; alpm_db_t *db_local; + alpm_list_t *packages; /* This code is here for safety only */ if(targets == NULL) { @@ -129,13 +130,14 @@ static int query_fileowner(alpm_list_t *targets) } db_local = alpm_get_localdb(config->handle); + packages = alpm_db_get_pkgcache(db_local); for(t = targets; t; t = alpm_list_next(t)) { char *filename = NULL, *dname = NULL, *rpath = NULL; const char *bname; struct stat buf; alpm_list_t *i; - size_t len; + size_t len, is_dir, bname_len, pbname_len; int found = 0; if((filename = strdup(t->data)) == NULL) { @@ -163,15 +165,21 @@ static int query_fileowner(alpm_list_t *targets) } } - if(S_ISDIR(buf.st_mode)) { - pm_printf(ALPM_LOG_ERROR, - _("cannot determine ownership of directory '%s'\n"), filename); - goto targcleanup; + is_dir = S_ISDIR(buf.st_mode) ? 1 : 0; + if(is_dir) { + /* the entire filename is safe to resolve if we know it points to a dir, + * and it's necessary in case it ends in . or .. */ + dname = realpath(filename, NULL); + bname = mbasename(dname); + rpath = mdirname(dname); + } else { + bname = mbasename(filename); + dname = mdirname(filename); + rpath = realpath(dname, NULL); } - bname = mbasename(filename); - dname = mdirname(filename); - rpath = realpath(dname, NULL); + bname_len = strlen(bname); + pbname_len = bname_len + is_dir; if(!dname || !rpath) { pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s\n"), @@ -179,33 +187,47 @@ static int query_fileowner(alpm_list_t *targets) goto targcleanup; } - for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) { + for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) { alpm_pkg_t *info = i->data; alpm_filelist_t *filelist = alpm_pkg_get_files(info); size_t j; for(j = 0; j < filelist->count; j++) { const alpm_file_t *file = filelist->files + j; - char *ppath, *pdname; + char *ppath; const char *pkgfile = file->name; + size_t pkgfile_len = strlen(pkgfile); + + /* make sure pkgfile and target are of the same type */ + if(is_dir != (pkgfile[pkgfile_len - 1] == '/')) { + continue; + } + + /* make sure pkgfile is long enough */ + if(pkgfile_len < pbname_len) { + continue; + } + + /* make sure pbname_len takes us to the start of a path component */ + if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') { + continue; + } - /* avoid the costly realpath usage if the basenames don't match */ - if(strcmp(mbasename(pkgfile), bname) != 0) { + /* compare basename with bname */ + if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) { continue; } - /* concatenate our file and the root path */ - if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) { + /* concatenate our dirname and the root path */ + if(rootlen + 1 + pkgfile_len - pbname_len > PATH_MAX) { path[rootlen] = '\0'; /* reset path for error message */ pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s\n"), path, pkgfile); continue; } - strcpy(path + rootlen, pkgfile); - - pdname = mdirname(path); - ppath = realpath(pdname, NULL); - free(pdname); + strncpy(path + rootlen, pkgfile, pkgfile_len - pbname_len); + path[rootlen + pkgfile_len - pbname_len] = '\0'; + ppath = realpath(path, NULL); if(!ppath) { pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s\n"), path, strerror(errno)); -- 1.7.12
participants (3)
-
Allan McRae
-
Andrew Gregory
-
Menachem Moystoviz