[arch-general] patch integrity check to compare tree to dbs - was: Integrity Check i686 of core, extra 28-08-2009
Henning Garus
henning.garus at googlemail.com
Tue Sep 1 18:28:33 EDT 2009
On Sun, Aug 30, 2009 at 04:57:52PM +0200, Henning Garus wrote:
> On Sun, Aug 30, 2009 at 01:18:32PM +0200, Xavier wrote:
> > On Sun, Aug 30, 2009 at 12:56 PM, Henning
> > Garus<henning.garus at googlemail.com> wrote:
> > > On Sun, Aug 30, 2009 at 01:56:23AM +0200, Xavier wrote:
> > >>
> > >> Great, thanks! It indeed found all the problems I had noticed, and much more.
> > >>
> > >> It would be nice if this script could be automatically run as well,
> > >> once per week or so.
> > >>
> > >> Can you share the script used? Then we need to figure out if it can be
> > >> run in the same place than the other script.
> > >
> > > Since my script is largely based on check_packages.py that should be fairly
> > > straightforward. In fact my script expects parse_pkgbuilds.sh in the same
> > > directory.
> > >
> > > I have uploaded the script to codepad: http://codepad.org/tSmNwYNI
> > >
> >
> > I see. Then I am not sure whether we want to keep this check separate
> > or just include it in check_packages.py
>
> I kept it separate, because it deals with DBs and the ABS tree, while
> check_packages.py deals with the ABS tree only. On the other hand, integrating
> it should speed things up a bit (you run parse_pkgbuilds.sh only once) and we
> get rid of some duplicated code. On the downside the output can be quite long
> with activated --vercmp, But I am not sure if that is even useful.
>
> Somehow integrating feels like the better idea, I will look into it.
Here it is. seems a bit shorter this way. I also changed the handling of the
any arch. Checking any alone does not seem very useful, so I allowed multiple
abs roots to be specified.
-------------- next part --------------
>From 9f1d948bf3bd61f45e737c2b67cb4ae28cae9184 Mon Sep 17 00:00:00 2001
From: Henning Garus <henning.garus at gmail.com>
Date: Tue, 1 Sep 2009 23:54:47 +0200
Subject: [PATCH 1/2] check_packages.py: Allow multiple abs-trees
By parsing multiple abs trees we can add any when parsing the other trees,
checking any standalone doesn't make much sense.
Signed-off-by: Henning Garus <henning.garus at gmail.com>
---
cron-jobs/check_archlinux/check_packages.py | 32 ++++++++++++++------------
cron-jobs/integrity-check | 2 +-
2 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/cron-jobs/check_archlinux/check_packages.py b/cron-jobs/check_archlinux/check_packages.py
index f2a9601..e4798a0 100755
--- a/cron-jobs/check_archlinux/check_packages.py
+++ b/cron-jobs/check_archlinux/check_packages.py
@@ -51,10 +51,11 @@ class Depend:
self.mod = mod
def parse_pkgbuilds(repos,arch):
- for repo in repos:
- data = commands.getoutput(os.path.dirname(sys.argv[0]) + '/parse_pkgbuilds.sh '
- + arch + ' ' + absroot + '/' + repo)
- parse_data(repo,data)
+ for absroot in absroots:
+ for repo in repos:
+ data = commands.getoutput(os.path.dirname(sys.argv[0]) + '/parse_pkgbuilds.sh '
+ + arch + ' ' + absroot + '/' + repo)
+ parse_data(repo,data)
def parse_data(repo,data):
attrname = None
@@ -332,9 +333,9 @@ def print_usage():
print "Usage: ./check_packages.py [OPTION]"
print ""
print "Options:"
- print " --abs-tree=<path> Check the specified tree (default : /var/abs)"
+ print " --abs-tree=<path[,path]> Check the specified tree(s) (default : /var/abs)"
print " --repos=<r1,r2,...> Check the specified repos (default : core,extra)"
- print " --arch=<any|i686|x86_64> Check the specified arch (default : i686)"
+ print " --arch=<i686|x86_64> Check the specified arch (default : i686)"
print " -h, --help Show this help and exit"
print ""
print "Examples:"
@@ -345,7 +346,7 @@ def print_usage():
print ""
## Default path to the abs root directory
-absroot = "/var/abs"
+absroots = ["/var/abs"]
## Default list of repos to check
repos = ['core', 'extra']
## Default arch
@@ -359,7 +360,7 @@ except getopt.GetoptError:
if opts != []:
for o, a in opts:
if o in ("--abs-tree"):
- absroot = a
+ absroot = a.split(',')
elif o in ("--repos"):
repos = a.split(",")
elif o in ("--arch"):
@@ -371,14 +372,15 @@ if opts != []:
print_usage()
sys.exit()
-if not os.path.isdir(absroot):
- print "Error : the abs tree " + absroot + " does not exist"
- sys.exit()
-for repo in repos:
- repopath = absroot + "/" + repo
- if not os.path.isdir(repopath):
- print "Error : the repository " + repo + " does not exist in " + absroot
+for absroot in absroots:
+ if not os.path.isdir(absroot):
+ print "Error : the abs tree " + absroot + " does not exist"
sys.exit()
+ for repo in repos:
+ repopath = absroot + "/" + repo
+ if not os.path.isdir(repopath):
+ print "Error : the repository " + repo + " does not exist in " + absroot
+ sys.exit()
# repos which need to be loaded
loadrepos = set([])
for repo in repos:
diff --git a/cron-jobs/integrity-check b/cron-jobs/integrity-check
index 0b59064..b3185ec 100755
--- a/cron-jobs/integrity-check
+++ b/cron-jobs/integrity-check
@@ -9,5 +9,5 @@ fi
$basedir/check_archlinux/check_packages.py \
--repos="$1" \
- --abs-tree="/srv/abs/rsync/$2" --arch="$2" |\
+ --abs-tree="/srv/abs/rsync/$2,/srv/abs/rsync/any" --arch="$2" |\
$basedir/devlist-mailer "Integrity Check $2: $1" "$3"
--
1.6.4.1
-------------- next part --------------
>From ab0d57072f2e02d0664f31ffdbeff58d42091667 Mon Sep 17 00:00:00 2001
From: Henning Garus <henning.garus at gmail.com>
Date: Tue, 1 Sep 2009 23:57:39 +0200
Subject: [PATCH 2/2] Add db comparison to intgrity check
Compare the abs tree with the repo dbs to check if we have a PKGBUILD for
each package in the dbs and vice versa.
Signed-off-by: Henning Garus <henning.garus at gmail.com>
---
cron-jobs/check_archlinux/check_packages.py | 55 ++++++++++++++++++++++++++-
1 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/cron-jobs/check_archlinux/check_packages.py b/cron-jobs/check_archlinux/check_packages.py
index e4798a0..faa8c2f 100755
--- a/cron-jobs/check_archlinux/check_packages.py
+++ b/cron-jobs/check_archlinux/check_packages.py
@@ -16,9 +16,11 @@
# a non-core package)
# 8. Circular dependencies
-import os,re,commands,getopt,sys,alpm
+import os,re,commands,getopt,sys,tarfile,alpm
import pdb
+DBEXT='.db.tar.gz'
+
packages = {} # pkgname : PacmanPackage
provisions = {} # provision : PacmanPackage
pkgdeps,makepkgdeps = {},{} # pkgname : list of the PacmanPackage dependencies
@@ -26,6 +28,9 @@ invalid_pkgbuilds = []
missing_pkgbuilds = []
dups = []
+dbonly = []
+absonly = []
+
mismatches = []
missing_deps = []
missing_makedeps = []
@@ -102,6 +107,17 @@ def parse_data(repo,data):
provisions[provname] = []
provisions[provname].append(pkg)
+def parse_dbs(repos,arch):
+ dbpkgs = {}
+ for repo in repos:
+ pkgs = set([])
+ db = tarfile.open(os.path.join(repodir,repo,'os',arch,repo + DBEXT))
+ for line in db.getnames():
+ if not '/' in line:
+ pkgs.add(line.rsplit('-',2)[0])
+ dbpkgs[repo] = pkgs
+ return(dbpkgs)
+
def splitdep(dep):
name = dep
version = ""
@@ -317,6 +333,8 @@ def print_results():
print_result(dep_hierarchy, "Repo Hierarchy for Dependencies")
print_result(makedep_hierarchy, "Repo Hierarchy for Makedepends")
print_result(circular_deps, "Circular Dependencies")
+ print_result(dbonly, "Packages found in db, but not in tree")
+ print_result(absonly,"Packages found in tree, but not in db")
print_subheading("Summary")
print "Missing PKGBUILDs: ", len(missing_pkgbuilds)
print "Invalid PKGBUILDs: ", len(invalid_pkgbuilds)
@@ -326,6 +344,8 @@ def print_results():
print "Missing (make)dependencies: ", len(missing_deps)+len(missing_makedeps)
print "Repo hierarchy problems: ", len(dep_hierarchy)+len(makedep_hierarchy)
print "Circular dependencies: ", len(circular_deps)
+ print "In db, but not in tree: ", len(dbonly)
+ print "In tree, but not in db ", len(absonly)
print ""
def print_usage():
@@ -336,6 +356,7 @@ def print_usage():
print " --abs-tree=<path[,path]> Check the specified tree(s) (default : /var/abs)"
print " --repos=<r1,r2,...> Check the specified repos (default : core,extra)"
print " --arch=<i686|x86_64> Check the specified arch (default : i686)"
+ print " --repo-dir=<path> Check the dbs at the specified path (default : /srv/ftp)"
print " -h, --help Show this help and exit"
print ""
print "Examples:"
@@ -351,9 +372,12 @@ absroots = ["/var/abs"]
repos = ['core', 'extra']
## Default arch
arch = "i686"
+## Default repodir
+repodir = "/srv/ftp"
try:
- opts, args = getopt.getopt(sys.argv[1:], "", ["abs-tree=", "repos=", "arch="])
+ opts, args = getopt.getopt(sys.argv[1:], "", ["abs-tree=", "repos=",
+ "arch=", "repo-dir="])
except getopt.GetoptError:
print_usage()
sys.exit()
@@ -365,6 +389,8 @@ if opts != []:
repos = a.split(",")
elif o in ("--arch"):
arch = a
+ elif o in ("--repo-dir"):
+ repodir = a
else:
print_usage()
sys.exit()
@@ -381,6 +407,17 @@ for absroot in absroots:
if not os.path.isdir(repopath):
print "Error : the repository " + repo + " does not exist in " + absroot
sys.exit()
+if not os.path.isdir(repodir):
+ print "Error: the repository directory %s does not exiist" % repodir
+ sys.exit()
+for repo in repos:
+ path = os.path.join(repodir,repo,'os',arch,repo + DBEXT)
+ if not os.path.isfile(path):
+ print "Error : repo DB %s : File not found" % path
+ sys.exit()
+ if not tarfile.is_tarfile(path):
+ print "Error : Cant open repo DB %s, not a valid tar file" % path
+ sys.exit()
# repos which need to be loaded
loadrepos = set([])
for repo in repos:
@@ -397,6 +434,9 @@ for name,pkg in packages.iteritems():
if pkg.repo in repos:
repopkgs[name] = pkg
+print "==> parsing db files"
+dbpkgs = parse_dbs(repos,arch)
+
print "==> checking mismatches"
for name,pkg in repopkgs.iteritems():
pkgdirname = pkg.path.split("/")[-1]
@@ -441,4 +481,15 @@ for name,pkg in packages.iteritems():
pkgdeps[pkg] = deps
find_scc(repopkgs.values())
+print "==> checking for differences between db files and pkgbuilds"
+for repo in repos:
+ for pkg in dbpkgs[repo]:
+ if not (pkg in repopkgs.keys() and repopkgs[pkg].repo == repo):
+ dbonly.append("%s/%s" % (repo,pkg))
+ dbonly.sort()
+for name,pkg in repopkgs.iteritems():
+ if not name in dbpkgs[pkg.repo]:
+ absonly.append("%s/%s" % (pkg.repo,name))
+absonly.sort
+
print_results()
--
1.6.4.1
More information about the arch-general
mailing list