[arch-projects] [namcap] [PATCH] Add handling for compression into namcap.py

meganomic meganomic at pm.me
Sat Dec 12 11:59:01 UTC 2020


I refactored it and I think I've ironed out all the bugs. Does anyone have any additional ideas or see any glaring problems?

diff --git a/namcap b/namcap
index ea0bc94..019b077 100755
--- a/namcap
+++ b/namcap
@@ -1,39 +1,2 @@
 #!/bin/bash
-
-args=''
-tmp=$(mktemp -d --tmpdir namcap.XXXXXXXXXX)
-cleanup() {
-	rm -rf "${tmp}"
-}
-trap 'cleanup' 0
-
-for arg in "${@}"; do
-	if echo "${arg}" | grep -q -E "^.+\.pkg\.tar\..+$" && [ -f "${arg}" ]; then
-
-		extra_opts=''
-		case "${arg##*.}" in
-			gz|z|Z) cmd='gzip' ;;
-			bz2|bz) cmd='bzip2' ;;
-			xz)     cmd='xz' ;;
-			lzo)    cmd='lzop' ;;
-			lrz)    cmd='lrzip'
-				extra_opts="-q -o -" ;;
-			lz4)    cmd='lz4'
-				extra_opts="-q" ;;
-			lz)     cmd='lzip'
-				extra_opts="-q" ;;
-			zst)    cmd='zstd'
-				extra_opts="-q" ;;
-			*)      echo 'Unsupported compression'; exit 1;;
-		esac
-
-		tar="${tmp}/$(basename "${arg%.*}")"
-		$cmd -dcf $extra_opts "${arg}" > "${tar}"
-
-		args="${args} ${tar}"
-	else
-		args="${args} ${arg}"
-	fi
-done
-
-/usr/bin/env python3 -m namcap ${args}
+/usr/bin/env python3 -m namcap ${@}
diff --git a/namcap.py b/namcap.py
index a7f532a..9a240dc 100755
--- a/namcap.py
+++ b/namcap.py
@@ -22,7 +22,11 @@
 import getopt
 import os
 import sys
+import subprocess
+import tempfile
+import pathlib
 import tarfile
+import xtarfile

 import Namcap.depends
 import Namcap.tags
@@ -49,18 +53,6 @@ def usage():

 	sys.exit(2)

-def open_package(filename):
-	try:
-		tar = tarfile.open(filename, "r")
-		if '.PKGINFO' not in tar.getnames():
-			tar.close()
-			return None
-	except IOError:
-		if tar:
-			tar.close()
-		return None
-	return tar
-
 def check_rules_exclude(optlist):
 	'''Check if the -r (--rules) and the -r (--exclude) options
 	are being used at same time'''
@@ -76,13 +68,10 @@ def show_messages(name, key, messages):
 	for msg in messages:
 		print("%s %s: %s" % (name, key, Namcap.tags.format_message(msg)))

-def process_realpackage(package, modules):
+def process_realpackage(pkgtar, package, modules):
 	"""Runs namcap checks over a package tarball"""
-	extracted = 0
-	pkgtar = open_package(package)
-
-	if not pkgtar:
-		print("Error: %s is empty or is not a valid package" % package)
+	if '.PKGINFO' not in pkgtar.getnames():
+		raise TypeError('Error: %s is not a valid package' % package)
 		return 1

 	pkginfo = Namcap.package.load_from_tarball(package)
@@ -98,7 +87,7 @@ def process_realpackage(package, modules):
 			rule.analyze(pkginfo, pkgtar)
 		else:
 			show_messages(pkginfo["name"], 'E',
-			              [('error-running-rule %s', i)])
+										[('error-running-rule %s', i)])

 		# Output the three types of messages
 		show_messages(pkginfo["name"], 'E', rule.errors)
@@ -237,15 +226,73 @@ if len(active_modules) == 0:

 # Go through each package, get the info, and apply the rules
 for package in packages:
-	if not os.access(package, os.R_OK):
+	pkgpath = pathlib.Path(package)
+
+	if not pkgpath.is_file():
 		print("Error: Problem reading %s" % package)
 		usage()
+		sys.exit(1)
+
+	if pkgpath.with_suffix('').suffix == '.tar':
+		# What compression is used?
+		try: # Try using xtarfile first
+			with xtarfile.open(package, "r") as pkgtar:
+				process_realpackage(pkgtar, package, active_modules)
+
+		# If xtarfile can't handle the compression do it with external software
+		except NotImplementedError:
+			if pkgpath.suffix == '.lzo':
+				cmd = 'lzop'
+				extra_opts = '-c'
+			elif pkgpath.suffix == '.lrz':
+				cmd = 'lrzip'
+				extra_opts = '-o -'
+			elif pkgpath.suffix == '.lz4':
+				cmd = 'lz4'
+				extra_opts = '-c'
+			elif pkgpath.suffix == '.lz':
+				cmd = 'lzip'
+				extra_opts='-c'
+			else:
+				print("Unsupported compression format:", package)
+				sys.exit(1)
+
+			# Run decompression and put the .tar file in a temporary directory
+			try:
+				tmpdir = tempfile.TemporaryDirectory(prefix='namcap.')
+			except Exception as err:
+				print("Unable to create temporary storage:", err)
+				sys.exit(1)
+
+			tmpfilepath = pathlib.Path(tmpdir.name).joinpath(pkgpath.with_suffix('').name)
+			try:
+				with open(tmpfilepath.as_posix(), 'wb') as outfile:
+					p = subprocess.run(cmd + ' -dqf ' + extra_opts + ' ' + pkgpath.as_posix(), stdout=outfile, shell=True)
+					p.check_returncode() # Make sure it actually ran without errors
+			except Exception as err:
+				print("Unable to decompress:", err)
+				sys.exit(1)
+
+			try:
+#			if tmpfilepath.is_file() and tarfile.is_tarfile(tmpfilepath):
+				with tarfile.open(tmpfilepath) as pkgtar: # Is additional error checking needed here?
+					process_realpackage(pkgtar, package, active_modules)
+			except Exception as err:
+				print("File doesn't exist or isn't a tar file:", package, err)
+				sys.exit(1)
+
+		except TypeError as err: # The tar file doesn't contain a .PKGINFO file
+			print(err)
+			sys.exit(1)
+		except Exception as err:
+			print("Something went horrible wrong:", err)
+			sys.exit(1)

-	if os.path.isfile(package) and tarfile.is_tarfile(package):
-		process_realpackage(package, active_modules)
 	elif 'PKGBUILD' in package:
 		process_pkgbuild(package, active_modules)
+
 	else:
 		print("Error: %s not package or PKGBUILD" % package)
+		sys.exit(1)

 # vim: set ts=4 sw=4 noet:



‐‐‐‐‐‐‐ Original Message ‐‐‐‐‐‐‐
On Friday, December 11, 2020 11:10 PM, meganomic via arch-projects <arch-projects at archlinux.org> wrote:

> I did a quick patch using your idea and it definitely seems like a good way to do it. Can probably clean it up and make it look prettier with some refactoring. I'll look into it later.
>
> diff --git a/namcap.py b/namcap.py
> index a7f532a..0e78b89 100755
> --- a/namcap.py
> +++ b/namcap.py
> @@ -22,7 +22,10 @@
> import getopt
> import os
> import sys
> -import tarfile
> +import subprocess
> +import tempfile
> +import pathlib
> +import xtarfile
>
> import Namcap.depends
> import Namcap.tags
> @@ -49,18 +52,6 @@ def usage():
>
> sys.exit(2)
>
> -def open_package(filename):
>
> -   try:
> -         tar = tarfile.open(filename, "r")
>
>
> -         if '.PKGINFO' not in tar.getnames():
>
>
> -         	tar.close()
>
>
> -         	return None
>
>
> -   except IOError:
> -         if tar:
>
>
> -         	tar.close()
>
>
> -         return None
>
>
> -   return tar
> -
>
> def check_rules_exclude(optlist):
> '''Check if the -r (--rules) and the -r (--exclude) options
> are being used at same time'''
> @@ -79,39 +70,38 @@ def show_messages(name, key, messages):
> def process_realpackage(package, modules):
> """Runs namcap checks over a package tarball"""
> extracted = 0
>
> -   pkgtar = open_package(package)
> -
> -   if not pkgtar:
> -         print("Error: %s is empty or is not a valid package" % package)
>
>
> -         return 1
>
>
> -
> -   pkginfo = Namcap.package.load_from_tarball(package)
> -   Loop through each one, load them apply if possible
>
>     ===================================================
>
> -   for i in modules:
> -         rule = get_modules()[i]()
>
>
> -
> -         if isinstance(rule, Namcap.ruleclass.PkgInfoRule):
>
>
> -         	rule.analyze(pkginfo, None)
>
>
> -         elif isinstance(rule, Namcap.ruleclass.PkgbuildRule):
>
>
> -         	pass
>
>
> -         elif isinstance(rule, Namcap.ruleclass.TarballRule):
>
>
> -         	rule.analyze(pkginfo, pkgtar)
>
>
> -         else:
>
>
> -         	show_messages(pkginfo["name"], 'E',
>
>
> -         	              [('error-running-rule %s', i)])
>
>
> -
> -         # Output the three types of messages
>
>
> -         show_messages(pkginfo["name"], 'E', rule.errors)
>
>
> -         show_messages(pkginfo["name"], 'W', rule.warnings)
>
>
>
> -   with xtarfile.open(package, "r") as pkgtar:
> -         if '.PKGINFO' not in pkgtar.getnames():
>
>
> -         	print("Error: %s is not a valid package" % package)
>
>
> -         	return 1
>
>
> -
> -         pkginfo = Namcap.package.load_from_tarball(package)
>
>
> -         # Loop through each one, load them apply if possible
>
>
> -         for i in modules:
>
>
> -         	rule = get_modules()[i]()
>
>
> -
> -         	if isinstance(rule, Namcap.ruleclass.PkgInfoRule):
>
>
> -         		rule.analyze(pkginfo, None)
>
>
> -         	elif isinstance(rule, Namcap.ruleclass.PkgbuildRule):
>
>
> -         		pass
>
>
> -         	elif isinstance(rule, Namcap.ruleclass.TarballRule):
>
>
> -         		rule.analyze(pkginfo, pkgtar)
>
>
> -         	else:
>
>
> -         		show_messages(pkginfo["name"], 'E',
>
>
> -         									[('error-running-rule %s', i)])
>
>
> -
> -         	# Output the three types of messages
>
>
> -         	show_messages(pkginfo["name"], 'E', rule.errors)
>
>
> -         	show_messages(pkginfo["name"], 'W', rule.warnings)
>
>
> -         	if info_reporting:
>
>
> -         		show_messages(pkginfo["name"], 'I', rule.infos)
>
>
> -
> -         # dependency analysis
>
>
> -         errs, warns, infos = Namcap.depends.analyze_depends(pkginfo)
>
>
> -         show_messages(pkginfo["name"], 'E', errs)
>
>
> -         show_messages(pkginfo["name"], 'W', warns)
>           if info_reporting:
>
>
>
> -         	show_messages(pkginfo["name"], 'I', rule.infos)
>
>
> -
> -   dependency analysis
>
>     ====================
>
> -   errs, warns, infos = Namcap.depends.analyze_depends(pkginfo)
> -   show_messages(pkginfo["name"], 'E', errs)
> -   show_messages(pkginfo["name"], 'W', warns)
> -   if info_reporting:
> -         show_messages(pkginfo["name"], 'I', infos)
>
>
>
> -         	show_messages(pkginfo["name"], 'I', infos)
>
>
>
> def process_pkginfo(pkginfo, modules):
> """Runs namcap checks of a single, non-split PacmanPackage object"""
> @@ -237,14 +227,45 @@ if len(active_modules) == 0:
>
> Go through each package, get the info, and apply the rules
>
> ===========================================================
>
> for package in packages:
>
> -   if not os.access(package, os.R_OK):
>
> -   pkgpath = pathlib.Path(package)
> -
> -   if not pkgpath.is_file():
>     print("Error: Problem reading %s" % package)
>
>
> -         usage()
>
>
>
> -         parser.print_usage()
>
>
> -         continue # Skip to next package if any
>
>
> -
> -   if pkgpath.with_suffix('').suffix == '.tar':
>
> -         # What compression is used?
>
>
> -         extra_opts = ''
>
>
> -         if pkgpath.suffix == '.gz' or pkgpath.suffix == '.z' or pkgpath.suffix == '.Z' or pkgpath.suffix == '.bz2' or pkgpath.suffix == '.bz' or pkgpath.suffix == '.xz' or pkgpath.suffix == '.zst':
>
>
> -         	process_realpackage(package, active_modules)
>
>
> -         	continue # Skip to next package if any
>
>
> -         elif pkgpath.suffix == '.lzo':
>
>
> -         	cmd = 'lzop'
>
>
> -         elif pkgpath.suffix == '.lrz':
>
>
> -         	cmd = 'lrzip'
>
>
> -         	extra_opts="-q -o -"
>
>
> -         elif pkgpath.suffix == '.lz4':
>
>
> -         	cmd = 'lz4'
>
>
> -         	extra_opts="-q"
>
>
> -         elif pkgpath.suffix == '.lz':
>
>
> -         	cmd = 'lzip'
>
>
> -         	extra_opts="-q"
>
>
> -         else:
>
>
> -         	print("Unsupported compression!")
>
>
> -         	continue # Skip to next package if any
>
>
> -
> -         # Run decompression and put the .tar file in a temporary directory
>
>
> -         tmpdir = tempfile.TemporaryDirectory(prefix='namcap.')
>
>
> -         tmpfilepath = pathlib.Path(tmpdir.name).joinpath(pkgpath.with_suffix('').name)
>
>
> -         subprocess.run(cmd + ' -dcf ' + extra_opts + pkgpath.as_posix() + ' > ' + tmpfilepath.as_posix(), shell=True)
>
>
> -
> -         if tmpfilepath.is_file() and xtarfile.is_tarfile(tmpfilepath):
>
>
> -         	process_realpackage(package, active_modules)
>
>
>
> -   if os.path.isfile(package) and tarfile.is_tarfile(package):
> -         process_realpackage(package, active_modules)
>
>
>     elif 'PKGBUILD' in package:
>     process_pkgbuild(package, active_modules)
>
>
> -   else:
>     print("Error: %s not package or PKGBUILD" % package)
>
>     ‐‐‐‐‐‐‐ Original Message ‐‐‐‐‐‐‐
>     On Friday, December 11, 2020 9:29 PM, Eli Schwartz via arch-projects arch-projects at archlinux.org wrote:
>
>
> > On 12/11/20 2:25 PM, meganomic via arch-projects wrote:
> >
> > > Adds handling of the compression and temporary storage into namcap.py
> > > so it can be removed from the bash script.
> > > https://bugs.archlinux.org/task/59844 mentions "use setuptools entry
> > > points." instead of the bash script but I don't know how to do that.
> > > I just removed it from the bash script for now.
> >
> > The subprocess to decompression tools is still (with your patch) just as annoying to me as the existence of the bash script. Python's builtin tarfile.open can handle gz, bz2, or xz. Using the python-xtarfile module you can use xtarfile.open to also handle zst. We should prefer the native code instead of basically inlining bash into python.
> > True, in order to support lzo, lrz, lz4, lz, we might still need to do subprocess out and create a temporary file with the decompressed package, but this should really be a last resort (and users in practice should rarely hit this code).
> > What do you think about trying xtarfile first?
> >
> > Eli Schwartz
> > Bug Wrangler and Trusted User


More information about the arch-projects mailing list