[pacman-dev] [PATCH 1/3] Add a `vcsget` tool to download source from VCS repositories.

Luke Shumaker LukeShu at sbcglobal.net
Sat Aug 25 13:36:42 EDT 2012


It would support
 * cvs
 * git
 * svn
 * bzr
 * hg

But cvs support is yet to be written, and bzr support is half-way written.

Git is the only mode that I'm thoroughly confident in, the others need
testing.

Signed-off-by: Luke Shumaker <LukeShu at sbcglobal.net>
---
 scripts/.gitignore   |   1 +
 scripts/Makefile.am  |   4 +-
 scripts/vcsget.sh.in | 294 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 298 insertions(+), 1 deletion(-)
 create mode 100644 scripts/vcsget.sh.in

diff --git a/scripts/.gitignore b/scripts/.gitignore
index 9e403bf..e59551e 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -6,3 +6,4 @@ pkgdelta
 repo-add
 repo-elephant
 repo-remove
+vcsget
diff --git a/scripts/Makefile.am b/scripts/Makefile.am
index 29c81aa..8e6d037 100644
--- a/scripts/Makefile.am
+++ b/scripts/Makefile.am
@@ -14,7 +14,8 @@ OURSCRIPTS = \
 	pacman-key \
 	pacman-optimize \
 	pkgdelta \
-	repo-add
+	repo-add \
+	vcsget
 
 EXTRA_DIST = \
 	makepkg.sh.in \
@@ -23,6 +24,7 @@ EXTRA_DIST = \
 	pacman-optimize.sh.in \
 	pkgdelta.sh.in \
 	repo-add.sh.in \
+	vcsget.sh.in \
 	$(LIBRARY)
 
 LIBRARY = \
diff --git a/scripts/vcsget.sh.in b/scripts/vcsget.sh.in
new file mode 100644
index 0000000..f8f6e4b
--- /dev/null
+++ b/scripts/vcsget.sh.in
@@ -0,0 +1,294 @@
+#!/bin/bash
+#
+#   vcsget - downloader agent that generates tarballs from VCS repos
+#   @configure_input@
+#
+#   Copyright (c) 2012 Luke Shumaker <lukeshu at sbcglobal.net>
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# gettext initialization
+export TEXTDOMAIN='pacman-scripts'
+export TEXTDOMAINDIR='@localedir@'
+
+unset CDPATH
+
+m4_include(library/output_format.sh)
+
+##
+#  usage : in_array( $needle, $haystack )
+# return : 0 - found
+#          1 - not found
+##
+in_array() {
+	local needle=$1; shift
+	local item
+	for item in "$@"; do
+		[[ $item = "$needle" ]] && return 0 # Found
+	done
+	return 1 # Not Found
+}
+
+usage() {
+	echo "Usage: $0 <URL> <OUTPUT_TARBALL>"
+	echo ""
+	echo "Talks with multiple version control systems (VCSs) to create a"
+	echo "tarball of a specific commit."
+	echo ""
+	echo "<OUTPUT_TARBALL> is a plain, uncompressed tarball. Given the same"
+	echo "commit, the tarball will always have the same checksum."
+	echo ""
+	echo "<URL> is the repository, and possibly commit to download and tar."
+	echo "The following schemes are recognized:"
+	echo "  * cvs://"
+	echo "  * git://"
+	echo "  * git+PROTO://"
+	echo "  * svn+PROTO://"
+	echo "  * bzr://"
+	echo "  * hg+PROTO://"
+	echo ""
+	echo "URLs to be consumed by $0 are not always in the format of the"
+	echo "relevent VCS program, but normalized to proper URLs. Simply, this"
+	echo "means:"
+	echo ""
+	echo "    scheme://[authinfo@]host[:port]/path[#fragment]"
+	echo ""
+	echo "Where <authinfo> is in the format \"username[:password]\" and"
+	echo "<fragment> is the commit ID, branch, or tag to be returned."
+}
+
+##
+# Get the column "$2" from the space-delimited string "$1".
+# This has the advantage over `set -- $1` and `arr=($1)` that it separates on
+# a single space, instead of '\s+', which allows it to have empty columns.
+##
+get_field() {
+	local str=$1
+	local i=$2
+	echo "$str"|cut -d' ' -f $i
+}
+
+##
+# Parse a URL into parts, according to RFC 3986, "URI Generic Syntax".
+# Sets the variables `scheme`, `authority`, `path`, `query` and `fragment`.
+##
+parse_url() {
+	local url=$1
+
+	# This regex is copied from RFC 3986
+	local regex='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
+	local parts=$(echo "$url"|sed -r "s@$regex@\2 \4 \5 \7 \9@")
+
+	scheme=$(   get_field "$parts" 1)
+	authority=$(get_field "$parts" 2)
+	path=$(     get_field "$parts" 3)
+	query=$(    get_field "$parts" 4)
+	fragment=$( get_field "$parts" 5)
+}
+
+##
+# Parse an authority from a URL.
+# Sets the variables `user`, `pass`, `sock`, `host` and `port`.
+# `sock` is socket, which is defined as `hostport` in the RFC.
+##
+parse_authority() {
+	local authority=$1
+
+	local regex='^(([^:]*)(:([^@]*))?@)?(([^:]+)(:([0-9]*))?)'
+	#             12      3 4           56      7 8
+	# 1 = userinfo@
+	# 2 = user
+	# 3 = :pass
+	# 4 = pass
+	# 5 = hostport
+	# 6 = host
+	# 7 = :port
+	# 8 = port
+	local parts=$(echo "$authority"|sed -r "s%$regex%\2 \4 \5 \6 \8%")
+	user=$(get_field "$parts" 1)
+	pass=$(get_field "$parts" 2)
+	sock=$(get_field "$parts" 3)
+	host=$(get_field "$parts" 4)
+	port=$(get_field "$parts" 5)
+}
+
+download_cvs() {
+	# TODO
+	error "$(gettext 'CVS not implemented')"
+	exit 1
+}
+
+download_git() {
+	msg "$(gettext 'Getting source from git')"
+	: ${fragment:=master} # default to 'master' if not set
+	send_url="${scheme#git+}://${authority}${path}"
+
+	# Where to checkout the code to
+	dir="$basedir/git/$send_url"
+	if [[ "${dir##*.}" != git ]]; then
+		dir="${dir}/.git"
+	fi
+
+	# Download
+	if [ ! -d "$dir" ]; then
+		msg2 "$(gettext 'doing initial clone')"
+		mkdir -p "$dir"
+		git clone --bare "$send_url" "$dir"
+	fi
+
+	cd "$dir"
+	refs=($(git show-ref|sed 's at .*/@@'|sort -u))
+	if in_array "$fragment" "${refs[@]}"; then
+		# The ref is symbolic, so it may have changed upstream
+		msg2 "$(gettext 'fetching updates')"
+		git fetch --all
+	fi
+
+	# Generate tarball
+	archive=(git archive --format=tar --prefix="$base/"
+		"$fragment" -o "$tarball")
+	msg2 "$(gettext 'trying to generate tarball')"
+	if "${archive[@]}" 2>/dev/null; then
+		# success
+		msg2 "$(gettext 'success')"
+		exit 0
+	else
+		# the relevent commit is probably newer than we have
+		msg2 "$(gettext 'failure, forcing an update')"
+		git fetch --all
+		msg2 "$(gettext 'generating tarball')"
+		"${archive[@]}"
+		local s=$?
+		exit $?
+	fi
+}
+
+download_svn() {
+	msg "$(gettext 'Getting source from Subversion')"
+	parse_authority "$authority"
+	send_url="${scheme#svn+}://${sock}${path}"
+	args=('--config-dir' "$basedir/svn/.config")
+	if [ -n "$user" ]; then
+		args+=('--username' "$user")
+	fi
+	if [ -n "$pass" ]; then
+		args+=('--password' "$pass")
+	fi
+	if [ -n "$fragment" ]; then
+		args+=('-r' "$fragment")
+	fi
+
+	# Where to checkout the code to
+	dir="$basedir/svn/$send_url#$fragment"
+
+	# Download
+	if [ ! -d "$dir" ]; then
+		mkdir -p "$dir"
+		svn co "${args[@]}" "$send_url" "$dir"
+	else
+		cd "$dir"
+		svn up "${args[@]}"
+	fi
+
+	# Generate tarball
+	mkdir "$dir.tmp$$"
+	cp -r "$dir" "$dir.tmp$$/$base"
+	find "$dir.tmp$$/$base" -name .svn -exec rm -rf '{}' +
+	cd "$dir.tmp$$"
+	bsdtar cf "$tarball" "$base"
+	rm -rf "$dir.tmp$$"
+}
+
+download_bzr() {
+	# TODO finish bzr support
+	error "$(gettext 'Bazaar not implemented')"
+	exit 1
+
+	send_url="${url%%#*}"
+	if [ "$scheme" != 'bzr+ssh' ]; then
+		send_url="${send_url#bzr+}"
+	fi
+
+	dir="$basedir/bzr/$send_url"
+
+	# Download
+	if [ ! -d "$dir" ]; then
+		msg2 "$(gettext 'doing initial checkout')"
+		mkdir -p "$dir"
+		bzr checkout "$send_url" "$dir"
+	fi
+
+	cd "$dir"
+	bzr update
+
+	# Generate tarball
+}
+
+download_hg() {
+	msg "$(gettext 'Getting source from Mercurial')"
+	send_url="${url#hg+}"
+	hg clone -U "$send_url"
+	dir="$basedir/hg/${send_url%%#*}"
+
+	# Download
+	if [ ! -d "$dir" ]; then
+		mkdir -p "$dir"
+		hg clone "$send_url" "$dir"
+	else
+		cd "$dir"
+		hg update -r "$fragment"
+	fi
+
+	# Generate tarball
+	mkdir "$dir.tmp$$"
+	cp -r "$dir" "$dir.tmp$$/$base"
+	rm -rf "$dir.tmp$$/$base/.hg"
+	cd "$dir.tmp$$"
+	bsdtar cf "$tarball" "$base"
+	rm -rf "$dir.tmp$$"
+}
+
+main() {
+	url=$1
+	tarball=$(readlink -m "$2")
+
+	base="$(echo "$tarball"|sed -r 's at .*/(.*)\.tar(\..*)?$@\1@')"
+	basedir="${tarball%/*}/vcs-cache"
+
+	if ( in_array '-h' "$@" || in_array '--help' "$@" ); then
+		usage
+		exit 0
+	fi
+	if [ "$#" -ne 2 ]; then
+		usage
+		exit 1
+	fi
+
+	msg "$(printf "$(gettext "Saving '%s' to '%s'")" "$url" "$tarball")"
+
+	parse_url "$url"
+	case "$scheme" in
+		cvs) download_cvs;;
+		git+*|git) download_git;;
+		svn+*) download_svn;;
+		bzr) download_bzr;;
+		hg+*) download_hg;;
+		*)
+			error "$(gettext 'Unable to match a VCS with scheme'): $scheme"
+			exit 1;;
+	esac
+}
+
+main "$@" >&2
-- 
1.7.12



More information about the pacman-dev mailing list