[pacman-dev] [PATCH v2 1/3] contrib: Add verify-pacman-repo-db.pl

Florian Pritz bluewind at xinu.at
Sun Aug 7 13:46:16 UTC 2016


>From the documentation:

verify-pacman-repo-db looks at a pacman repo database and verifies its
content with the actual package files. The database is expected to be in
the same directory as the packages (or symlinks to the packages).

The following properties are verified for each package in the database:

    - existence of the package file
    - file size
    - MD5 and SHA256 checksum (--checksum)

Signed-off-by: Florian Pritz <bluewind at xinu.at>
---

v2:

 - Remove GPG TODO
 - Add commit message
 - Remove duplicate documentation from script
 - Move actual documentation to dedicated commit

 contrib/verify-pacman-repo-db.pl | 260 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 260 insertions(+)
 create mode 100755 contrib/verify-pacman-repo-db.pl

diff --git a/contrib/verify-pacman-repo-db.pl b/contrib/verify-pacman-repo-db.pl
new file mode 100755
index 0000000..1d02c26
--- /dev/null
+++ b/contrib/verify-pacman-repo-db.pl
@@ -0,0 +1,260 @@
+#!/usr/bin/perl -T
+use warnings;
+use strict;
+
+
+# This is used for the usage output
+=pod
+
+=head1 SYNOPSIS
+
+verify-pacman-repo-db.pl [options] <database file> ...
+
+ Options:
+   --help, -h         Show short help message
+   --debug            Enable debug output
+   --checksum, -c     Verify checksums of packages
+   --thread, -t <num> Use num threads to verify packages. Default: 1
+                      NOTE: Each thread uses up to approx. 128MiB of memory
+
+=cut
+
+package main;
+use Getopt::Long;
+use Pod::Usage;
+
+exit main();
+
+sub main {
+	my %opts = (
+		threads => 1,
+	);
+
+	Getopt::Long::Configure ("bundling");
+	pod2usage(-verbose => 0) if (@ARGV== 0);
+	GetOptions(\%opts, "help|h", "debug", "threads|t=i", "checksum|c") or pod2usage(2);
+	pod2usage(0) if $opts{help};
+
+	my $verifier = Verifier->new(\%opts);
+
+	for my $repodb (@ARGV) {
+		$verifier->check_repodb($repodb);
+	}
+
+	$verifier->finalize();
+	return $verifier->get_error_status();
+}
+
+package Verifier;
+use Archive::Tar;
+use Digest::MD5;
+use Digest::SHA;
+use File::Basename;
+use threads;
+use threads::shared;
+use Thread::Queue;
+
+sub new {
+	my $class = shift;
+	my $opts = shift;
+
+	my $self :shared = shared_clone({
+		opts => \%{$opts},
+		package_queue => Thread::Queue->new(),
+		output_queue => Thread::Queue->new(),
+		workers => [],
+		errors => 0,
+	});
+
+	bless $self, $class;
+	$self->start_workers();
+	return $self;
+}
+
+sub start_workers {
+	my $self = shift;
+
+	threads->new(\&_worker_output_queue, $self);
+
+	for (my $i = 0; $i < $self->{opts}->{threads}; $i++) {
+		my $thr :shared = shared_clone(threads->new(\&_worker_package_queue, $self));
+		push @{$self->{workers}}, $thr;
+	}
+}
+
+sub _worker_package_queue {
+	my $self = shift;
+	while (my $workpack = $self->{package_queue}->dequeue()) {
+		my $dbdata = $self->_parse_db_entry($workpack->{db_desc_content});
+		$self->{errors} += $self->_verify_db_entry($workpack->{dirname}, $dbdata);
+	}
+}
+
+sub _worker_output_queue {
+	my $self = shift;
+	while (my $output = $self->{output_queue}->dequeue()) {
+		print STDERR $output;
+	}
+}
+
+sub finalize {
+	my $self = shift;
+
+	$self->{package_queue}->end();
+	$self->_join_threads($self->{workers});
+
+	$self->{output_queue}->end();
+	$self->_join_threads([threads->list]);
+}
+
+sub _join_threads {
+	my $self = shift;
+	my $threads = shift;
+
+	for my $thr (@{$threads}) {
+		if ($thr->tid && !threads::equal($thr, threads->self)) {
+			print "waiting for thread ".$thr->tid()." to finish\n" if $self->{opts}->{debug};
+			$thr->join;
+		}
+	}
+}
+
+sub get_error_status {
+	my $self = shift;
+
+	return $self->{errors} > 0;
+}
+
+sub check_repodb {
+	my $self = shift;
+	my $repodb = shift;
+
+	my $db = Archive::Tar->new();
+	$db->read($repodb);
+
+	my $dirname = dirname($repodb);
+	my $pkgcount = 0;
+
+	my @files = $db->list_files();
+	for my $file_object ($db->get_files()) {
+		if ($file_object->name =~ m/^([^\/]+)\/desc$/) {
+			my $package = $1;
+			$self->{package_queue}->enqueue({
+					package => $package,
+					db_desc_content => $file_object->get_content(),
+					dirname => $dirname,
+				});
+			$pkgcount++;
+		}
+	}
+
+	$self->_debug(sprintf("Queued %d package from database '%s'\n", $pkgcount, $repodb));
+}
+
+sub _parse_db_entry {
+	my $self = shift;
+	my $content = shift;
+	my %db;
+	my $key;
+
+	for my $line (split /\n/, $content) {
+		if ($line eq '') {
+			$key = undef;
+			next;
+		}
+		if ($line =~ m/^%(.+)%$/) {
+			$key = $1;
+		} else {
+			push @{$db{$key}}, $line;
+			die "\$key not set. Is the db formated incorrectly?" unless $key;
+		}
+	}
+	return \%db;
+}
+
+sub _output {
+	my $self = shift;
+	my $output = shift;
+
+	return if $output eq "";
+
+	$output = sprintf("Thread %s: %s", threads->self->tid(), $output);
+	$self->{output_queue}->enqueue($output);
+}
+
+sub _debug {
+	my $self = shift;
+	my $output = shift;
+	$self->_output($output) if $self->{opts}->{debug};
+}
+
+sub _verify_db_entry {
+	my $self = shift;
+	my $basedir = shift;
+	my $dbdata = shift;
+	my $ret = 0;
+	my $output = "";
+
+	# verify package exists
+	my $pkgfile = $basedir.'/'.$dbdata->{FILENAME}[0];
+	$self->_debug(sprintf("Checking package %s\n", $dbdata->{FILENAME}[0]));
+	unless (-e $pkgfile) {
+		$self->_output(sprintf("Package file missing: %s\n", $pkgfile));
+		return 1;
+	}
+
+	$ret += $self->_verify_package_size($dbdata, $pkgfile);
+	$ret += $self->_verify_package_checksum($dbdata, $pkgfile) if $self->{opts}->{checksum};
+
+	return $ret;
+}
+
+sub _verify_package_size {
+	my $self = shift;
+	my $dbdata = shift;
+	my $pkgfile = shift;
+
+	my $csize = $dbdata->{CSIZE}[0];
+	my $filesize = (stat($pkgfile))[7];
+	unless ($csize == $filesize) {
+		$self->_output(sprintf("Package file has incorrect size: %d vs %d: %s\n", $csize, $filesize, $pkgfile));
+		return 1;
+	}
+	return 0;
+}
+
+sub _verify_package_checksum {
+	my $self = shift;
+	my $dbdata = shift;
+	my $pkgfile = shift;
+
+	my $md5 = Digest::MD5->new;
+	my $sha = Digest::SHA->new(256);
+
+	my $content;
+	# 128MiB to keep random IO low when using multiple threads (only works for large packages though)
+	my $chunksize = 1024*1024*128;
+	open my $fh, "<", $pkgfile;
+	while (read($fh, $content, $chunksize)) {
+		$md5->add($content);
+		$sha->add($content);
+	}
+
+	my $expected_sha = $dbdata->{SHA256SUM}[0];
+	my $expected_md5 = $dbdata->{MD5SUM}[0];
+	my $got_md5 = $md5->hexdigest;
+	my $got_sha = $sha->hexdigest;
+
+	unless ($expected_sha eq $got_sha and $expected_md5 eq $got_md5) {
+		my $output;
+		$output .= sprintf "Package file has incorrect checksum: %s\n", $pkgfile;
+		$output .= sprintf "expected: SHA %s\n", $expected_sha;
+		$output .= sprintf "got:      SHA %s\n", $got_sha;
+		$output .= sprintf "expected: MD5 %s\n", $expected_md5;
+		$output .= sprintf "got:      MD5 %s\n", $got_md5;
+		$self->_output($output);
+		return 1;
+	}
+	return 0;
+}
+
-- 
2.9.0


More information about the pacman-dev mailing list