[pacman-dev] [PATCH v2 1/3] contrib: Add verify-pacman-repo-db.pl
Andrew Gregory
andrew.gregory.8 at gmail.com
Mon Aug 8 19:08:19 UTC 2016
On 08/07/16 at 03:46pm, Florian Pritz wrote:
> From the documentation:
>
> verify-pacman-repo-db looks at a pacman repo database and verifies its
> content with the actual package files. The database is expected to be in
> the same directory as the packages (or symlinks to the packages).
>
> The following properties are verified for each package in the database:
>
> - existence of the package file
> - file size
> - MD5 and SHA256 checksum (--checksum)
>
> Signed-off-by: Florian Pritz <bluewind at xinu.at>
> ---
>
> v2:
>
> - Remove GPG TODO
> - Add commit message
> - Remove duplicate documentation from script
> - Move actual documentation to dedicated commit
>
> contrib/verify-pacman-repo-db.pl | 260 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 260 insertions(+)
> create mode 100755 contrib/verify-pacman-repo-db.pl
>
> diff --git a/contrib/verify-pacman-repo-db.pl b/contrib/verify-pacman-repo-db.pl
> new file mode 100755
> index 0000000..1d02c26
> --- /dev/null
> +++ b/contrib/verify-pacman-repo-db.pl
> @@ -0,0 +1,260 @@
> +#!/usr/bin/perl -T
> +use warnings;
> +use strict;
> +
> +
> +# This is used for the usage output
> +=pod
> +
> +=head1 SYNOPSIS
> +
> +verify-pacman-repo-db.pl [options] <database file> ...
> +
> + Options:
> + --help, -h Show short help message
> + --debug Enable debug output
> + --checksum, -c Verify checksums of packages
> + --thread, -t <num> Use num threads to verify packages. Default: 1
> + NOTE: Each thread uses up to approx. 128MiB of memory
> +
> +=cut
> +
> +package main;
> +use Getopt::Long;
> +use Pod::Usage;
> +
> +exit main();
> +
> +sub main {
> + my %opts = (
> + threads => 1,
> + );
> +
> + Getopt::Long::Configure ("bundling");
> + pod2usage(-verbose => 0) if (@ARGV== 0);
> + GetOptions(\%opts, "help|h", "debug", "threads|t=i", "checksum|c") or pod2usage(2);
> + pod2usage(0) if $opts{help};
> +
> + my $verifier = Verifier->new(\%opts);
> +
> + for my $repodb (@ARGV) {
> + $verifier->check_repodb($repodb);
> + }
> +
> + $verifier->finalize();
> + return $verifier->get_error_status();
> +}
> +
> +package Verifier;
> +use Archive::Tar;
> +use Digest::MD5;
> +use Digest::SHA;
> +use File::Basename;
> +use threads;
> +use threads::shared;
> +use Thread::Queue;
> +
> +sub new {
> + my $class = shift;
> + my $opts = shift;
> +
> + my $self :shared = shared_clone({
> + opts => \%{$opts},
> + package_queue => Thread::Queue->new(),
> + output_queue => Thread::Queue->new(),
> + workers => [],
> + errors => 0,
> + });
> +
> + bless $self, $class;
> + $self->start_workers();
> + return $self;
> +}
> +
> +sub start_workers {
> + my $self = shift;
> +
> + threads->new(\&_worker_output_queue, $self);
> +
> + for (my $i = 0; $i < $self->{opts}->{threads}; $i++) {
> + my $thr :shared = shared_clone(threads->new(\&_worker_package_queue, $self));
> + push @{$self->{workers}}, $thr;
> + }
> +}
> +
> +sub _worker_package_queue {
> + my $self = shift;
> + while (my $workpack = $self->{package_queue}->dequeue()) {
> + my $dbdata = $self->_parse_db_entry($workpack->{db_desc_content});
> + $self->{errors} += $self->_verify_db_entry($workpack->{dirname}, $dbdata);
> + }
> +}
> +
> +sub _worker_output_queue {
> + my $self = shift;
> + while (my $output = $self->{output_queue}->dequeue()) {
> + print STDERR $output;
> + }
> +}
> +
> +sub finalize {
> + my $self = shift;
> +
> + $self->{package_queue}->end();
> + $self->_join_threads($self->{workers});
> +
> + $self->{output_queue}->end();
> + $self->_join_threads([threads->list]);
> +}
> +
> +sub _join_threads {
> + my $self = shift;
> + my $threads = shift;
> +
> + for my $thr (@{$threads}) {
> + if ($thr->tid && !threads::equal($thr, threads->self)) {
> + print "waiting for thread ".$thr->tid()." to finish\n" if $self->{opts}->{debug};
> + $thr->join;
> + }
> + }
> +}
> +
> +sub get_error_status {
> + my $self = shift;
> +
> + return $self->{errors} > 0;
> +}
> +
> +sub check_repodb {
> + my $self = shift;
> + my $repodb = shift;
> +
> + my $db = Archive::Tar->new();
> + $db->read($repodb);
> +
> + my $dirname = dirname($repodb);
> + my $pkgcount = 0;
> +
> + my @files = $db->list_files();
> + for my $file_object ($db->get_files()) {
> + if ($file_object->name =~ m/^([^\/]+)\/desc$/) {
> + my $package = $1;
> + $self->{package_queue}->enqueue({
> + package => $package,
> + db_desc_content => $file_object->get_content(),
> + dirname => $dirname,
> + });
> + $pkgcount++;
> + }
> + }
> +
> + $self->_debug(sprintf("Queued %d package from database '%s'\n", $pkgcount, $repodb));
> +}
> +
> +sub _parse_db_entry {
> + my $self = shift;
> + my $content = shift;
> + my %db;
> + my $key;
> +
> + for my $line (split /\n/, $content) {
> + if ($line eq '') {
> + $key = undef;
> + next;
> + }
> + if ($line =~ m/^%(.+)%$/) {
> + $key = $1;
> + } else {
> + push @{$db{$key}}, $line;
> + die "\$key not set. Is the db formated incorrectly?" unless $key;
> + }
This if/else chain is wrong, values can match /^%.+%$/. It should be:
if($line eq '') ...
elsif($key)...
elsif($line =~ /^%(.+)%$/) ...
else ...
Also, s/formated/formatted/.
> + }
> + return \%db;
> +}
> +
> +sub _output {
> + my $self = shift;
> + my $output = shift;
> +
> + return if $output eq "";
> +
> + $output = sprintf("Thread %s: %s", threads->self->tid(), $output);
> + $self->{output_queue}->enqueue($output);
> +}
> +
> +sub _debug {
> + my $self = shift;
> + my $output = shift;
> + $self->_output($output) if $self->{opts}->{debug};
> +}
> +
> +sub _verify_db_entry {
> + my $self = shift;
> + my $basedir = shift;
> + my $dbdata = shift;
> + my $ret = 0;
> + my $output = "";
> +
> + # verify package exists
> + my $pkgfile = $basedir.'/'.$dbdata->{FILENAME}[0];
> + $self->_debug(sprintf("Checking package %s\n", $dbdata->{FILENAME}[0]));
> + unless (-e $pkgfile) {
> + $self->_output(sprintf("Package file missing: %s\n", $pkgfile));
> + return 1;
> + }
> +
> + $ret += $self->_verify_package_size($dbdata, $pkgfile);
> + $ret += $self->_verify_package_checksum($dbdata, $pkgfile) if $self->{opts}->{checksum};
> +
> + return $ret;
> +}
> +
> +sub _verify_package_size {
> + my $self = shift;
> + my $dbdata = shift;
> + my $pkgfile = shift;
> +
> + my $csize = $dbdata->{CSIZE}[0];
> + my $filesize = (stat($pkgfile))[7];
> + unless ($csize == $filesize) {
> + $self->_output(sprintf("Package file has incorrect size: %d vs %d: %s\n", $csize, $filesize, $pkgfile));
> + return 1;
> + }
> + return 0;
> +}
> +
> +sub _verify_package_checksum {
> + my $self = shift;
> + my $dbdata = shift;
> + my $pkgfile = shift;
> +
> + my $md5 = Digest::MD5->new;
> + my $sha = Digest::SHA->new(256);
> +
> + my $content;
> + # 128MiB to keep random IO low when using multiple threads (only works for large packages though)
> + my $chunksize = 1024*1024*128;
> + open my $fh, "<", $pkgfile;
> + while (read($fh, $content, $chunksize)) {
> + $md5->add($content);
> + $sha->add($content);
> + }
> +
> + my $expected_sha = $dbdata->{SHA256SUM}[0];
> + my $expected_md5 = $dbdata->{MD5SUM}[0];
> + my $got_md5 = $md5->hexdigest;
> + my $got_sha = $sha->hexdigest;
> +
> + unless ($expected_sha eq $got_sha and $expected_md5 eq $got_md5) {
> + my $output;
> + $output .= sprintf "Package file has incorrect checksum: %s\n", $pkgfile;
> + $output .= sprintf "expected: SHA %s\n", $expected_sha;
> + $output .= sprintf "got: SHA %s\n", $got_sha;
> + $output .= sprintf "expected: MD5 %s\n", $expected_md5;
> + $output .= sprintf "got: MD5 %s\n", $got_md5;
> + $self->_output($output);
> + return 1;
> + }
> + return 0;
> +}
> +
> --
> 2.9.0
More information about the pacman-dev
mailing list