[pacman-dev] [PATCH] cachemoney: add new contrib script

Dave Reisner d at falconindy.com
Wed Jul 27 15:13:54 EDT 2011

cachemoney is a robust and flexible package cache cleaner with a variety
of options. Much credit goes to DJ Mills and Pat Brisbin for the ideas
behind this script.

Signed-off-by: Dave Reisner <dreisner at archlinux.org>
This sort of functionality has been in a few feature requests, and there exist
a number of tools that already provide this, but none flexible enough (or
portable enough) to be to my liking. This is all bash3 compat with awk, tested
on gawk 3.1.8, gawk 4.0.0, bwk (the one true), mawk, and busybox awk (which
fscking _sucks_) to do the heavy lifting (rather than bash4). It ends up being
insanely fast, with the drawback being that its dependent on the format of the
filename to find and consider packages for deletion. In other words, a valid
package tarball that isn't compliant with the extglob *.pkg.tar?(.+([^.]))
isn't going to be considered.

I'm a fan of the name, but I'm also happy to hear alternatives.

 contrib/Makefile.am   |    4 +
 contrib/cachemoney.in |  294 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 298 insertions(+), 0 deletions(-)
 create mode 100755 contrib/cachemoney.in

diff --git a/contrib/Makefile.am b/contrib/Makefile.am
index 69304a4..301de41 100644
--- a/contrib/Makefile.am
+++ b/contrib/Makefile.am
@@ -1,5 +1,6 @@
 	bacman \
+	cachemoney \
 	pacdiff \
 	paclist \
 	paclog-pkglist \
@@ -14,6 +15,7 @@ EXTRA_DIST = \
 	PKGBUILD.vim \
 	bacman.in \
 	bash_completion.in \
+	cachemoney.in \
 	paclog-pkglist.in \
 	pacdiff.in \
 	paclist.in \
@@ -29,6 +31,7 @@ MOSTLYCLEANFILES = $(OURSCRIPTS) $(OURFILES) *.tmp
 edit = sed \
 	-e 's|@sysconfdir[@]|$(sysconfdir)|g' \
 	-e 's|@localstatedir[@]|$(localstatedir)|g' \
+	-e 's|@SIZECMD[@]|$(SIZECMD)|g' \
 	-e '1s|!/bin/bash|!$(BASH_SHELL)|g'
 $(OURSCRIPTS): Makefile
@@ -50,6 +53,7 @@ all-am: $(OURSCRIPTS) $(OURFILES)
 bacman: $(srcdir)/bacman.in
 bash_completion: $(srcdir)/bash_completion.in
+cachemoney: $(srcdir)/cachemoney.in
 pacdiff: $(srcdir)/pacdiff.in
 paclist: $(srcdir)/paclist.in
 paclog-pkglist: $(srcdir)/paclog-pkglist.in
diff --git a/contrib/cachemoney.in b/contrib/cachemoney.in
new file mode 100755
index 0000000..794ae45
--- /dev/null
+++ b/contrib/cachemoney.in
@@ -0,0 +1,294 @@
+# cachemoney - clean yo pacman cache, dawg
+# Copyright (C) 2011 Dave Reisner <dreisner at archlinux.org>
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+shopt -s extglob
+declare -a candidates=() cmdopts=() whitelist=() blacklist=()
+declare -i delete=0 diskspace=0 dryrun=0 filecount=0 keep=3 move=0 totalsaved=0
+declare    cachedir=@localstatedir@/cache/pacman/pkg delim=$'\n' movedir= scanarch=
+msg() {
+	local mesg=$1; shift
+	printf "==> $mesg\n" "$@"
+} >&2
+error() {
+	local mesg=$1; shift
+	printf "==> ERROR: $mesg\n" "$@"
+} >&2
+die() {
+	error "$@"
+	exit 1
+# reads a list of files on stdin and prints out deletion candidates
+pkgfilter() {
+	# there's whitelist and blacklist parameters passed to this
+	# script after the block of awk.
+	awk -v keep="$1" -v scanarch="$2" '
+	function parse_filename(filename,     parts, count, i, pkgname, arch) {
+		count = split(filename, parts, "-")
+		i = 1
+		pkgname = parts[i++]
+		while (i <= count - 3) {
+			pkgname = pkgname "-" parts[i++]
+		}
+		arch = substr(parts[count], 1, index(parts[count], ".") - 1)
+		# filter on whitelist or blacklist
+		if (wlen && !whitelist[pkgname]) return
+		if (blen && blacklist[pkgname]) return
+		if ("" == packages[pkgname,arch]) {
+			packages[pkgname,arch] = filename
+		} else {
+			packages[pkgname,arch] = packages[pkgname,arch] SUBSEP filename
+		}
+	}
+		# create whitelist
+		wlen = ARGV[1]; delete ARGV[1]
+		for (i = 2; i < 2 + wlen; i++) {
+			whitelist[ARGV[i]] = 1
+			delete ARGV[i]
+		}
+		# create blacklist
+		blen = ARGV[i]; delete ARGV[i]
+		while (i++ < ARGC) {
+			blacklist[ARGV[i]] = 1
+			delete ARGV[i]
+		}
+		# read package filenames
+		while (getline < "/dev/stdin") {
+			parse_filename($0)
+		}
+	}
+	END {
+		for (pkglist in packages) {
+			# idx[1,2] = idx[pkgname,arch]
+			split(pkglist, idx, SUBSEP)
+			# enforce architecture match if specified
+			if (!scanarch || scanarch == idx[2]) {
+				count = split(packages[idx[1], idx[2]], pkgs, SUBSEP)
+				if (count > keep) {
+					for(i = 1; i <= count - keep; i++) {
+						print pkgs[i]
+					}
+				}
+			}
+		}
+	}' "${@:3}"
+size_to_human() {
+	awk -v size="$1" '
+		suffix[1] = "KiB"
+		suffix[2] = "MiB"
+		suffix[3] = "GiB"
+		suffix[4] = "TiB"
+		count = 1
+		while (size > 1024) {
+			size /= 1024
+			count++
+		}
+		sizestr = sprintf("%.2f", size)
+		sub(/.?0+$/, "", sizestr)
+		printf("%s %s", sizestr, suffix[count])
+	}'
+runcmd() {
+	if (( needsroot )); then
+		msg "Privilege escalation required"
+		if sudo -v &>/dev/null && sudo -l &>/dev/null; then
+			sudo "$@"
+		else
+			printf '%s ' 'root'
+			su -c "$(printf '%q ' "$@")"
+		fi
+	else
+		"$@"
+	fi
+summarize() {
+	local -i filecount=$#
+	local seenarch= seen= arch= name=
+	local -r pkg_re='(.+)-[^-]+-[0-9]+-([^.]+)\.pkg.*'
+	if (( delete )); then
+		printf -v output 'finished: %d packages removed' "$filecount"
+	elif (( move )); then
+		printf -v output "finished: %d packages moved to \`%s'" "$filecount" "$movedir"
+	elif (( dryrun )); then
+		if (( verbose )); then
+			msg "Candidate packages:"
+			while read -r pkg; do
+				if (( verbose >= 3 )); then
+					[[ $pkg =~ $pkg_re ]] && name=${BASH_REMATCH[1]} arch=${BASH_REMATCH[2]}
+					if [[ -z $seen || $seenarch != $arch || $seen != $name ]]; then
+						printf '%s (%s):\n' "$name" "$arch"
+					fi
+					printf '  %s\n' "$pkg"
+				elif (( verbose >= 2 )); then
+					printf "$PWD/%s$delim" "$pkg"
+				else
+					printf "%s$delim" "$pkg"
+				fi
+			done < <(printf '%s\n' "$@" | sort -V)
+			printf '\n' >&2
+		fi
+		printf -v output 'finished dry run: %d candidates' "$filecount"
+	fi
+	if (( diskspace )); then
+		output+=" (diskspace saved: $(size_to_human "$totalsaved"))"
+	fi
+	msg "$output"
+usage() {
+	cat <<EOF
+usage: ${0##*/} <operation> [options] [targets...]
+  Operations:
+    -d               perform a dry run, only finding candidate packages.
+    -m  <movedir>    move candidate packages to 'movedir'.
+    -r               remove candidate packages.
+  Options:
+    -a  <arch>       scan for 'arch' (default: all architectures).
+    -c  <cachedir>   scan 'cachedir' for packages (default: @localstatedir@/cache/pacman/pkg).
+    -f               apply force to mv(1) and rm(1) operations.
+    -h               display this help message.
+    -i  <pkgs>       ignore 'pkgs', which is a comma separated. Alternatively,
+                     specify '-' to read package names from stdin, newline delimited.
+    -k  <num>        keep 'num' of each package in 'cachedir' (default: 3).
+    -s               show diskspace saved by the operation.
+    -u               target uninstalled packages.
+    -v               increase verbosity. specify up to 3 times.
+    -z               use null delimiters for candidate names (only with -v and -vv)
+if (( ! UID )); then
+	error "Bad dog, no biscuit. You will be prompted for privilege escalation."
+	exit 42
+while getopts ':a:c:dfhi:k:m:rsuvz' opt; do
+	case $opt in
+		a) scanarch=$OPTARG ;;
+		c) cachedir=$OPTARG ;;
+		d) dryrun=1 ;;
+		f) cmdopts=(-f) ;;
+		h) usage
+			exit 0 ;;
+		i) if [[ $OPTARG = '-' ]]; then
+				 [[ ! -t 0 ]] && IFS=$'\n' read -r -d '' -a ign
+			 else
+				 IFS=',' read -r -a ign <<< "$OPTARG"
+			 fi
+			 blacklist+=("${ign[@]}")
+			unset i ign ;;
+		k) keep=$OPTARG
+			if [[ $keep != $OPTARG ]] || (( keep < 0 )); then
+				die 'argument to option -k must be a non-negative integer'
+			fi ;;
+		m) move=1 movedir=$OPTARG ;;
+		r) delete=1 ;;
+		s) diskspace=1 ;;
+		u) IFS=$'\n' read -r -d '' -a ign < <(pacman -Qq)
+			 blacklist+=("${ign[@]}")
+			 unset ign ;;
+		v) (( ++verbose )) ;;
+		z) delim='\0' ;;
+		:) die "option '--%s' requires an argument" "$OPTARG" ;;
+		?) die "invalid option -- '%s'" "$OPTARG" ;;
+	esac
+shift $(( OPTIND - 1 ))
+# remaining args are a whitelist
+# sanity checks
+case $(( dryrun+delete+move )) in
+	0) die "no operation specified (use -h for help)" ;;
+	[^1]) die "only one operation may be used at a time" ;;
+[[ -d $cachedir ]] ||
+	die "cachedir \`%s' does not exist or is not a directory" "$cachedir"
+[[ $movedir && ! -d $movedir ]] &&
+	die "move-to directory \`%s' does not exist or is not a directory" "$movedir"
+if (( move || delete )); then
+	# make it an absolute path since we're about to chdir
+	[[ ${movedir:0:1} != '/' ]] && movedir=$PWD/$movedir
+	[[ ! -w $cachedir || ( $movedir && ! -w $movedir ) ]] && needsroot=1
+# unlikely that this will fail, but better make sure
+cd "$cachedir" || die "failed to chdir to \`%s'" "$cachedir"
+# note that these results are returned in an arbitrary order from awk, but
+# they'll be resorted (in summarize) iff we have a verbosity level set.
+IFS=$'\n' read -r -d '' -a candidates < \
+	<(printf '%s\n' *.pkg.tar?(.+([^.])) | sort -V |
+		pkgfilter "$keep" "$scanarch" \
+			"${#whitelist[*]}" "${whitelist[@]}" \
+			"${#blacklist[*]}" "${blacklist[@]}")
+if (( ! ${#candidates[*]} )); then
+	msg 'no candidate packages found for pruning'
+	exit 1
+# do this before we destroy anything
+if (( diskspace )); then
+	totalsaved=$(@SIZECMD@ "${candidates[@]}" | awk '{ sum = $1 } END { print sum }')
+# crush. kill. destroy.
+(( verbose )) && cmdopts+=(-v)
+if (( delete )); then
+	runcmd rm "${cmdopts[@]}" "${candidates[@]}"
+elif (( move )); then
+	runcmd mv "${cmdopts[@]}" "${candidates[@]}" "$movedir"
+summarize "${candidates[@]}"

