[arch-dev-public] [PATCH 1/4] Handle error case of parsepkgbuild correctly
We were never checking the return code of parsepkgbuild so would try to parse the error message it printed out as a package definition rather than erroring and returning None. Look at the return code, and as a bonus, use the more correct communicate() rather than stdout.read(). Signed-off-by: Dan McGee <dan@archlinux.org> --- pacman.py | 7 +++++-- parsepkgbuild | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pacman.py b/pacman.py index 1f78be2..f8edaab 100644 --- a/pacman.py +++ b/pacman.py @@ -105,8 +105,11 @@ def load(package, root=None): workingdir = None filename = os.path.basename(package) process = subprocess.Popen(['parsepkgbuild',filename], stdout=subprocess.PIPE, cwd=workingdir) - data = process.stdout.read() - ret = loaddb(None, data) + data = process.communicate() + # this means parsepkgbuild returned an error, so we are not valid + if process.returncode > 0: + return None + ret = loaddb(None, data[0]) # Add a nice little .pkgbuild surprise pkgbuild = open(package) diff --git a/parsepkgbuild b/parsepkgbuild index f9b5a91..d8bcc98 100755 --- a/parsepkgbuild +++ b/parsepkgbuild @@ -9,9 +9,9 @@ export PATH=/tmp/parsepkgbuild; exec /bin/bash --noprofile --norc -r << EOF source $1 -# ensure $pkgname and $pkgver variables were found -if [ -z "\$pkgname" -o -z "\$pkgver" ]; then - echo " error: invalid package file" +# ensure $pkgname, $pkgver, and $pkgrel variables were found +if [ -z "\$pkgname" -o -z "\$pkgver" -o -z "\$pkgrel" ]; then + echo "error: invalid package file" exit 1 fi -- 1.7.1
Some files in packages do not have the owner read bit set, which means open() blows up when trying to peek into the file. Add a workaround that temporarily sets the extracted file to be readable so we can peek at it and determine if it is an ELF file. For those files that are either links or something else funky, just assume it is not an ELF file. Signed-off-by: Dan McGee <dan@archlinux.org> --- Namcap/util.py | 18 ++++++++++++++++++ 1 files changed, 18 insertions(+), 0 deletions(-) diff --git a/Namcap/util.py b/Namcap/util.py index 8f61c30..3d09212 100644 --- a/Namcap/util.py +++ b/Namcap/util.py @@ -19,6 +19,7 @@ import os import re +import stat def is_elf(path): """ @@ -27,9 +28,26 @@ def is_elf(path): """ if not os.path.isfile(path): return False + reset_perms = False + if not os.access(path, os.R_OK): + # don't mess with links we can't read + if os.path.islink(path): + return False + reset_perms = True + # attempt to make it readable if possible + statinfo = os.stat(path) + newmode = statinfo.st_mode | stat.S_IRUSR + try: + os.chmod(path, newmode) + except IOError: + return False fd = open(path) bytes = fd.read(4) fd.close() + # reset permissions if necessary + if reset_perms: + # set file back to original permissions + os.chmod(path, statinfo.st_mode) # magic elf header, present in binaries and libraries if bytes == "\x7FELF": return True -- 1.7.1
And also add the same chmod() business we do for the is_elf() checks. Of course, a refactor is coming that will make for less code duplication. Signed-off-by: Dan McGee <dan@archlinux.org> --- After this patch, we can successfully parse both the sudo and udev packages which were blowing up namcap before. -Dan Namcap/depends.py | 20 +++++--------------- Namcap/util.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/Namcap/depends.py b/Namcap/depends.py index 39cd508..ea03129 100644 --- a/Namcap/depends.py +++ b/Namcap/depends.py @@ -18,10 +18,7 @@ # import re, os, os.path, pacman, subprocess -from Namcap.util import is_elf - -supported_scripts = ['python', 'perl', 'ruby', 'wish', 'expect', - 'tk', 'bash', 'sh', 'dash', 'tcsh', 'pdksh' ] +from Namcap.util import is_elf, script_type pkgcache = {} @@ -67,9 +64,7 @@ def scanlibs(data, dirname, names): If they depend on a library, store that library's path in sharedlibs """ sharedlibs, scripts = data - global supported_scripts shared = re.compile('Shared library: \[(.*)\]') - script = re.compile('#!.*/(.*)') for i in names: path = dirname + '/' + i if is_elf(path): @@ -90,15 +85,10 @@ def scanlibs(data, dirname, names): # We didn't know about the library, so add it for fail later sharedlibs.setdefault(n.group(1), {})[path] = 1 # Maybe it is a script file - elif os.path.isfile(path): - fd = open(path) - firstline = fd.readline() - fd.close() - firstlinere = script.match(firstline) - if firstlinere != None: - cmdname = firstlinere.group(1).split()[0] - if cmdname in supported_scripts: - scripts.setdefault(cmdname, {})[path] = 1 + else: + cmd = script_type(path) + if cmd != None: + scripts.setdefault(cmd, {})[path] = 1 def finddepends(liblist): dependlist = {} diff --git a/Namcap/util.py b/Namcap/util.py index 3d09212..4568891 100644 --- a/Namcap/util.py +++ b/Namcap/util.py @@ -54,6 +54,42 @@ def is_elf(path): else: return False +supported_scripts = ['python', 'perl', 'ruby', 'wish', 'expect', + 'tk', 'bash', 'sh', 'dash', 'tcsh', 'pdksh' ] + +def script_type(path): + global supported_scripts + if not os.path.isfile(path): + return None + reset_perms = False + if not os.access(path, os.R_OK): + # don't mess with links we can't read + if os.path.islink(path): + return None + reset_perms = True + # attempt to make it readable if possible + statinfo = os.stat(path) + newmode = statinfo.st_mode | stat.S_IRUSR + try: + os.chmod(path, newmode) + except IOError: + return None + + fd = open(path) + firstline = fd.readline() + fd.close() + # reset permissions if necessary + if reset_perms: + # set file back to original permissions + os.chmod(path, statinfo.st_mode) + script = re.compile('#!.*/(.*)') + firstlinere = script.match(firstline) + if firstlinere != None: + cmdname = firstlinere.group(1).split()[0] + if cmdname in supported_scripts: + return cmdname + return None + clean_filename = lambda s: re.search(r"/tmp/namcap\.[0-9]*/(.*)", s).group(1) # vim: set ts=4 sw=4 noet: -- 1.7.1
The commit that created these very similar pair of functions did a copy-paste job; this commit fixes up some of that to reduce code duplication and keep all of the error handling code in once place so the actual work of the two methods is much clearer. Signed-off-by: Dan McGee <dan@archlinux.org> --- Namcap/util.py | 46 ++++++++++++++++------------------------------ 1 files changed, 16 insertions(+), 30 deletions(-) diff --git a/Namcap/util.py b/Namcap/util.py index 4568891..80d6857 100644 --- a/Namcap/util.py +++ b/Namcap/util.py @@ -21,18 +21,14 @@ import os import re import stat -def is_elf(path): - """ - Given a file path, ensure it exists and peek at the first few bytes - to determine if it is an ELF file. - """ +def read_carefully(path, readcall): if not os.path.isfile(path): return False reset_perms = False if not os.access(path, os.R_OK): # don't mess with links we can't read if os.path.islink(path): - return False + return None reset_perms = True # attempt to make it readable if possible statinfo = os.stat(path) @@ -40,14 +36,24 @@ def is_elf(path): try: os.chmod(path, newmode) except IOError: - return False + return None fd = open(path) - bytes = fd.read(4) + val = readcall(fd) fd.close() # reset permissions if necessary if reset_perms: # set file back to original permissions os.chmod(path, statinfo.st_mode) + return val + +def is_elf(path): + """ + Given a file path, ensure it exists and peek at the first few bytes + to determine if it is an ELF file. + """ + bytes = read_carefully(path, lambda fd: fd.read(4)) + if not bytes: + return False # magic elf header, present in binaries and libraries if bytes == "\x7FELF": return True @@ -59,29 +65,9 @@ supported_scripts = ['python', 'perl', 'ruby', 'wish', 'expect', def script_type(path): global supported_scripts - if not os.path.isfile(path): + firstline = read_carefully(path, lambda fd: fd.readline()) + if not firstline: return None - reset_perms = False - if not os.access(path, os.R_OK): - # don't mess with links we can't read - if os.path.islink(path): - return None - reset_perms = True - # attempt to make it readable if possible - statinfo = os.stat(path) - newmode = statinfo.st_mode | stat.S_IRUSR - try: - os.chmod(path, newmode) - except IOError: - return None - - fd = open(path) - firstline = fd.readline() - fd.close() - # reset permissions if necessary - if reset_perms: - # set file back to original permissions - os.chmod(path, statinfo.st_mode) script = re.compile('#!.*/(.*)') firstlinere = script.match(firstline) if firstlinere != None: -- 1.7.1
On 15/06/10 15:29, Dan McGee wrote:
We were never checking the return code of parsepkgbuild so would try to parse the error message it printed out as a package definition rather than erroring and returning None. Look at the return code, and as a bonus, use the more correct communicate() rather than stdout.read().
Signed-off-by: Dan McGee<dan@archlinux.org>
Something is changed here... After applying this commit:
PATH=$(pwd):$PATH PYTHONPATH=$(pwd) for i in /var/abs/core/*; do ./namcap -t namcap-tags $i/PKGBUILD; done Error: /var/abs/core/acl/PKGBUILD is not a valid PKGBUILD Error: /var/abs/core/ar9170-fw/PKGBUILD is not a valid PKGBUILD Error: /var/abs/core/attr/PKGBUILD is not a valid PKGBUILD Error: /var/abs/core/b43-fwcutter/PKGBUILD is not a valid PKGBUILD PKGBUILD: line 27: seq: command not found PKGBUILD (bash) E: Too many md5sums: 5 needed Error: /var/abs/core/bin86/PKGBUILD is not a valid PKGBUILD Error: /var/abs/core/bridge-utils/PKGBUILD is not a valid PKGBUILD Error: /var/abs/core/bzip2/PKGBUILD is not a valid PKGBUILD ... PKGBUILD (db) W: Description should not contain the package name. ... PKGBUILD (filesystem) W: Description should not contain the package name.
As you can see, some PKGBUILD appear to work... although I have no idea why. Allan
On Tue, Jun 15, 2010 at 6:24 AM, Allan McRae <allan@archlinux.org> wrote:
On 15/06/10 15:29, Dan McGee wrote:
We were never checking the return code of parsepkgbuild so would try to parse the error message it printed out as a package definition rather than erroring and returning None. Look at the return code, and as a bonus, use the more correct communicate() rather than stdout.read().
Signed-off-by: Dan McGee<dan@archlinux.org>
Something is changed here... After applying this commit:
PATH=$(pwd):$PATH PYTHONPATH=$(pwd) for i in /var/abs/core/*; do ./namcap -t namcap-tags $i/PKGBUILD; done Error: /var/abs/core/acl/PKGBUILD is not a valid PKGBUILD Error: /var/abs/core/ar9170-fw/PKGBUILD is not a valid PKGBUILD Error: /var/abs/core/attr/PKGBUILD is not a valid PKGBUILD Error: /var/abs/core/b43-fwcutter/PKGBUILD is not a valid PKGBUILD PKGBUILD: line 27: seq: command not found PKGBUILD (bash) E: Too many md5sums: 5 needed Error: /var/abs/core/bin86/PKGBUILD is not a valid PKGBUILD Error: /var/abs/core/bridge-utils/PKGBUILD is not a valid PKGBUILD Error: /var/abs/core/bzip2/PKGBUILD is not a valid PKGBUILD ... PKGBUILD (db) W: Description should not contain the package name. ... PKGBUILD (filesystem) W: Description should not contain the package name.
As you can see, some PKGBUILD appear to work... although I have no idea why.
They have never worked quite right, we've just exposed the B.S. failure (return code of 1 for some reason) from before: (namcap 2.6 version) dmcgee@galway ~/projects/arch-repos/acl/trunk $ parsepkgbuild PKGBUILD >/dev/null; echo $? 1 dmcgee@galway ~/projects/arch-repos/filesystem/trunk $ parsepkgbuild PKGBUILD >/dev/null; echo $? 0 What is interesting is both of these do spit out the full .PKGINFO-like stuff. -Dan
when using `[ ] && action` style tests, the return code of the test can leak, which was unfortuante for every PKGBUILD that didn't contain an install file as the last test would always return 1 (failure). We want parsepkgbuild to return 0 if we make it all the way through. "Fix" all of the shortcut test forms to just use the full `if [ ]; then` block instead of a shortcut. Signed-off-by: Dan McGee <dan@archlinux.org> --- Found the problem. I think the commit message covers the jist of it. -Dan parsepkgbuild | 21 +++++++++++++++------ 1 files changed, 15 insertions(+), 6 deletions(-) diff --git a/parsepkgbuild b/parsepkgbuild index d8bcc98..850fea7 100755 --- a/parsepkgbuild +++ b/parsepkgbuild @@ -27,7 +27,9 @@ if [ -n "\$groups" ]; then echo "" fi -[ -n "\$url" ] && echo -e "%URL%\n\$url\n" +if [ -n "\$url" ]; then + echo -e "%URL%\n\$url\n" +fi if [ -n "\$license" ]; then echo "%LICENSE%" for i in \${license[@]}; do echo \$i; done @@ -38,15 +40,21 @@ if [ -n "\$arch" ]; then for i in \${arch[@]}; do echo \$i; done echo "" fi -[ -n "\$builddate" ] && echo -e "%BUILDDATE%\n\$builddate\n" -[ -n "\$packager" ] && echo -e "%PACKAGER%\n\$packager\n" +if [ -n "\$builddate" ]; then + echo -e "%BUILDDATE%\n\$builddate\n" +fi +if [ -n "\$packager" ]; then + echo -e "%PACKAGER%\n\$packager\n" +fi if [ -n "\$replaces" ]; then echo "%REPLACES%" for i in \${replaces[@]}; do echo \$i; done echo "" fi -[ -n "\$force" ] && echo -e "%FORCE%\n" +if [ -n "\$force" ]; then + echo -e "%FORCE%\n" +fi # create depends entry if [ -n "\$depends" ]; then @@ -115,7 +123,8 @@ if [ -n "\$sha512sums" ]; then echo "" fi - -[ -n "\$install" ] && echo -e "%INSTALL%\n\$install\n" +if [ -n "\$install" ]; then + echo -e "%INSTALL%\n\$install\n" +fi EOF -- 1.7.1
On 16/06/10 01:14, Dan McGee wrote:
when using `[ ]&& action` style tests, the return code of the test can leak, which was unfortuante for every PKGBUILD that didn't contain an install file as the last test would always return 1 (failure). We want parsepkgbuild to return 0 if we make it all the way through.
"Fix" all of the shortcut test forms to just use the full `if [ ]; then` block instead of a shortcut.
Signed-off-by: Dan McGee<dan@archlinux.org> ---
Great. After applying all the patches sent to this list, the only problems I have scanning all the packages and PKGBUILDs from [core] is the bash and readline PKGBUILDs which have a non-bash way of generating the source array. Signoff all, Allan
On Tue, Jun 15, 2010 at 6:24 AM, Allan McRae <allan@archlinux.org> wrote:
On 15/06/10 15:29, Dan McGee wrote:
We were never checking the return code of parsepkgbuild so would try to parse the error message it printed out as a package definition rather than erroring and returning None. Look at the return code, and as a bonus, use the more correct communicate() rather than stdout.read().
Signed-off-by: Dan McGee<dan@archlinux.org>
Something is changed here... After applying this commit:
PATH=$(pwd):$PATH PYTHONPATH=$(pwd) for i in /var/abs/core/*; do ./namcap -t namcap-tags $i/PKGBUILD; done
And on a related note, something like this is probably a lot faster since namcap is great at (and a lot faster at) processing multiple files at once: $ time find /var/abs/core/ -name PKGBUILD | xargs ./namcap -t namcap-tags ... real 0m4.053s user 0m2.630s sys 0m0.867s vs. $ time for i in /var/abs/core/*; do ./namcap -t namcap-tags $i/PKGBUILD; done ... real 0m14.161s user 0m9.816s sys 0m3.066s -Dan
participants (3)
-
Allan McRae
-
Dan McGee
-
Dan McGee