[pacman-dev] [PATCH] New feature: files verification

Dan McGee dpmcgee at gmail.com
Sat Mar 28 17:12:59 EDT 2009


On Sat, Mar 28, 2009 at 8:52 AM, changaco <changaco at laposte.net> wrote:
> From 7c0348fa499f55968764fef4c7bb3d3d7245932b Mon Sep 17 00:00:00 2001
> From: Charly COSTE <changaco at laposte.net>
> Date: Sat, 28 Mar 2009 12:36:23 +0100
> Subject: [PATCH] New feature: files verification
>
> A new option "-Qy" which checks if the files owned by a/some/all package(s) really are on the system (i.e. not accidentally deleted). http://bugs.archlinux.org/task/13877
>
> Signed-off-by: Charly COSTE <changaco at laposte.net>

Two initial comments before we get into the meat of the patch:
1. Please enable the pre-commit hook in git:
mv .git/hooks/pre-commit.sample .git/hooks/pre-commit
This will ensure you don't have the following that I caught when
applying the patch locally:

dmcgee at galway ~/projects/pacman (master)
$ git am -3 < /tmp/pacman-verify.patch
Applying: New feature: files verification
/home/dmcgee/projects/pacman/.git/rebase-apply/patch:160: trailing whitespace.
warning: 1 line adds whitespace errors.

2. Ensure you build with ./configure --enable-debug, which turns on
the gcc -Wall -Werror options:
cc1: warnings being treated as errors
query.c: In function ‘verify’:
query.c:348: error: initialization discards qualifiers from pointer target type
(you should make this a 'const')
query.c:386: error: format ‘%s’ expects type ‘char *’, but argument 3
has type ‘void *’
(you need an explicit (char *) cast here)
make[2]: *** [query.o] Error 1

You'll see a few more errors pop up after fixing this, so you've been warned. :P

> ---
>  src/pacman/conf.h   |    1 +
>  src/pacman/pacman.c |    6 +++-
>  src/pacman/query.c  |   87 +++++++++++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 84 insertions(+), 10 deletions(-)
>
> diff --git a/src/pacman/conf.h b/src/pacman/conf.h
> index 466d983..01a1828 100644
> --- a/src/pacman/conf.h
> +++ b/src/pacman/conf.h
> @@ -50,6 +50,7 @@ typedef struct __config_t {
>        unsigned short op_q_search;
>        unsigned short op_q_changelog;
>        unsigned short op_q_upgrade;
> +       unsigned short op_q_verify;
>
>        unsigned short op_s_clean;
>        unsigned short op_s_downloadonly;
> diff --git a/src/pacman/pacman.c b/src/pacman/pacman.c
> index 59916d6..e1d13a4 100644
> --- a/src/pacman/pacman.c
> +++ b/src/pacman/pacman.c
> @@ -360,6 +360,7 @@ static int parseargs(int argc, char *argv[])
>                {"verbose",    no_argument,       0, 'v'},
>                {"downloadonly", no_argument,     0, 'w'},
>                {"refresh",    no_argument,       0, 'y'},
> +               {"verify",     no_argument,       0, 'y'},
I think -k would be a better option, after seeing Aaron's comments.
Why don't we change it to that instead.

>                {"noconfirm",  no_argument,       0, 1000},
>                {"config",     required_argument, 0, 1001},
>                {"ignore",     required_argument, 0, 1002},
> @@ -510,7 +511,10 @@ static int parseargs(int argc, char *argv[])
>                                config->flags |= PM_TRANS_FLAG_DOWNLOADONLY;
>                                config->flags |= PM_TRANS_FLAG_NOCONFLICTS;
>                                break;
> -                       case 'y': (config->op_s_sync)++; break;
> +                       case 'y':
> +                               (config->op_s_sync)++;
> +                               config->op_q_verify = 1;
> +                               break;
>                        case '?': return(1);
>                        default: return(1);
>                }

You missed adding it to the query directions, somewhere around line
110 in pacman.c. We will also want to document it in the
doc/pacman.8.txt manpage.

> diff --git a/src/pacman/query.c b/src/pacman/query.c
> index 0d48638..2a7dba5 100644
> --- a/src/pacman/query.c
> +++ b/src/pacman/query.c
> @@ -334,10 +334,70 @@ static void display(pmpkg_t *pkg)
>        }
>  }
>
> +// We loop through the packages and for each packages we loop through files to check if they exist
> +static void verify(alpm_list_t *pkgs)
> +{
> +       alpm_list_t *i, *files, *file, *pkgs2repair;
Some of these will be uninitialized on their first access, but using
./configure --enable-debug will show you this.

> +       pmpkg_t *pkg;
> +       struct stat stFileInfo;
Please stick with the naming conventions in the rest of the code- no
camel case necessary. Just 'st' is the preferred name for a struct
stat. This also can be made local to the for(files) loop.

> +       int pkgs_n = alpm_list_count(pkgs);
> +       int j = 0, k = 0;
> +       for(i = pkgs; i; i = alpm_list_next(i)) {
> +               j++;
> +               pkg = alpm_list_getdata(i);
> +               char *pkgname = alpm_pkg_get_name(pkg);
> +               if(filter(pkg)) {
> +                       files = alpm_pkg_get_files(pkg);
> +                       for(file = files; file; file = alpm_list_next(file)){
> +                               char *x = alpm_list_getdata(file);
> +                               char *f = malloc (strlen(x)+2);
No automatic GC in C. You are leaking like a sieve here:
==26832== 43,564 bytes in 1,512 blocks are definitely lost in loss record 3 of 3
==26832==    at 0x4C2391E: malloc (in
/usr/lib/valgrind/amd64-linux/vgpreload_memcheck.so)
==26832==    by 0x40852C: verify (query.c:353)
==26832==    by 0x408B69: pacman_query (query.c:485)
==26832==    by 0x407A6B: main (pacman.c:944)

Valgrind is awesome. Compile pacman, run ./src/pacman/pacman at least
once, then do the following:

dmcgee at galway ~/projects/pacman (master)
$ valgrind ./src/pacman/.libs/lt-pacman -Qy glibc pacman-git

And it should give you a fairly concise summary of memory leaks. See
here for more: http://www.toofishes.net/blog/using-valgrind-c-programming/

> +                               strcpy(f,"/");
> +                               strcat(f, x);
This logic isn't always correct. See lib/libalpm/add.c, line 309:
snprintf(filename, PATH_MAX, "%s%s", handle->root, entryname);

You cannot assume the root path is always '/'. In addition, it would
probably be better to just use a static buffer of length PATH_MAX like
that code does or you will be spending a tremendous amount of time
mallocing (and currently leaking memory).

> +                               k++;
> +                               if(strncmp(f, "/tmp", 4)==0){
> +                                       continue; // ignore files in /tmp
> +                               }
What has files in /tmp?

> +                               if(lstat(f,&stFileInfo) == 0) { // we use lstat to prevent errors from symbolic links
> +                                       if(!config->quiet){
> +                                               fprintf(stderr, "\rfile n°%i (package %i/%i): OK", k, j, pkgs_n); fflush(stdout);
Hmm. I like the interactivity of this, but I'm not completely sure
what you having going on here. You write to stderr but flush stdout?
(And keep statements to one per line, please.) In addition, the '°'
symbol means nothing to me- this could be a cultural thing though. :)

It would probably also be best to use a format like we do in
callback.c, line 399, which is something like this:
checking package integrity...
(1/1) checking for file conflicts                   [#####################] 100%
(1/1) installing emacs                              [#####################] 100%

I am referring to the last two lines here. We can probably even use
this callback function to do the dirty work of printing status and a
progress bar for us.

I am also a fan of output only when errors happen, besides a progress
bar. For instance, look at the output of testdb:
$ testdb
Checking the integrity of the local database in /var/lib/pacman/
missing dependency for mysql : mysql-clients>=5.1.32

If there is any output at all, it is important as it is telling you problems.

> +                                       }
> +                               }
> +                               else {
> +                                       if(config->quiet){
> +                                               fprintf(stderr, "%s %s\n", pkgname, f); fflush(stdout);
> +                                       } else {
> +                                               fprintf(stderr, "\rfile n°%i (package %i/%i): Missing file owned by \"%s\": %s\n", k, j, pkgs_n, pkgname, f); fflush(stdout);
> +                                       }
> +                                       if(alpm_list_find_ptr(pkgs2repair, pkgname) == NULL) {
> +                                               pkgs2repair = alpm_list_add(pkgs2repair, pkgname);
> +                                       }
> +                               }
> +                       }
> +               }
> +       }
> +       if(!config->quiet){
> +               fprintf(stderr, "\n");
> +       }
> +       if(alpm_list_count(pkgs2repair) > 0) {
> +               if(!config->quiet){
> +                       fprintf(stderr, "Damaged packages: ");
> +               }
> +               for(i = pkgs2repair; i; i = alpm_list_next(i)) {
Since we don't actually do any repair, can we just call pkgs2repair
"damaged"? That also kills the numeral from the variable name, which I
at least don't find very appealing.

> +                       fprintf(stdout, "%s ", alpm_list_getdata(i));
> +               }
pkgs2repair is also never freed. Memory leak.

> +       } else {
> +               if(!config->quiet){
> +                       fprintf(stderr, "No damaged packages.");
> +               }
> +       }
> +       fprintf(stdout, "\n");
> +       fflush(stdout);
> +}
> +
>  int pacman_query(alpm_list_t *targets)
>  {
>        int ret = 0;
> -       alpm_list_t *i;
> +       alpm_list_t *i, *pkgs;
>
>        /* First: operations that do not require targets */
>
> @@ -363,18 +423,20 @@ int pacman_query(alpm_list_t *targets)
>        }
>
>        /* operations on all packages in the local DB
> -        * valid: no-op (plain -Q), list, info
> +        * valid: no-op (plain -Q), list, info, verify
>         * invalid: isfile, owns */
>        if(targets == NULL) {
>                if(config->op_q_isfile || config->op_q_owns) {
>                        pm_printf(PM_LOG_ERROR, _("no targets specified (use -h for help)\n"));
>                        return(1);
> -               }
> -
> -               for(i = alpm_db_get_pkgcache(db_local); i; i = alpm_list_next(i)) {
> -                       pmpkg_t *pkg = alpm_list_getdata(i);
> -                       if(filter(pkg)) {
> -                               display(pkg);
> +               } else if(config->op_q_verify) {
> +                       verify(alpm_db_get_pkgcache(db_local));
> +               } else {
> +                       for(i = alpm_db_get_pkgcache(db_local); i; i = alpm_list_next(i)) {
> +                               pmpkg_t *pkg = alpm_list_getdata(i);
> +                               if(filter(pkg)) {
> +                                       display(pkg);
> +                               }
>                        }
>                }
>                return(0);
> @@ -389,7 +451,7 @@ int pacman_query(alpm_list_t *targets)
>        }
>
>        /* operations on named packages in the local DB
> -        * valid: no-op (plain -Q), list, info */
> +        * valid: no-op (plain -Q), list, info, verify */
>        for(i = targets; i; i = alpm_list_next(i)) {
>                char *strname = alpm_list_getdata(i);
>                pmpkg_t *pkg = NULL;
> @@ -406,8 +468,12 @@ int pacman_query(alpm_list_t *targets)
>                        continue;
>                }
>
> +
>                if(filter(pkg)) {
>                        display(pkg);
> +                       if(config->op_q_verify){
> +                               pkgs = alpm_list_add(pkgs, pkg);
Memory leak!
> +                       }
>                }
>
>                if(config->op_q_isfile) {
> @@ -415,6 +481,9 @@ int pacman_query(alpm_list_t *targets)
>                        pkg = NULL;
>                }
>        }
> +       if(config->op_q_verify){
> +               verify(pkgs);
> +       }
>
>        return(ret);
>  }

I think your logic is a bit weird here, as you are trying to
incorporate both filters and a verify- note that we end up displaying
both pieces of information:

$ ./src/pacman/.libs/lt-pacman -Qy glibc pacman-git
glibc 2.9-4
pacman-git 20090116-1
file n°1512 (package 2/2): OK
No damaged packages.

We should probably only do verification and not print the names of the
packages and run them through filters, unless anyone else has an
opinion.

I commented a lot, but don't think I hate your patch. I definitely
like this idea.

-Dan


More information about the pacman-dev mailing list