[pacman-dev] [PATCH 2/4] implement a curl based download_file()

Dave Reisner d at falconindy.com
Sun Jan 2 20:13:22 EST 2011


This does not implement any linkage from current code to the curl based
download_file() function. libfetch continues to be used unless
configured and compiled explicitly with LDFLAGS/CPPFLAGS set for -lcurl
and -DHAVE_LIBCURL, respectively.

Signed-off-by: Dave Reisner <d at falconindy.com>
---
 lib/libalpm/dload.c |  325 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 313 insertions(+), 12 deletions(-)

diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
index 09f716f..9154d33 100644
--- a/lib/libalpm/dload.c
+++ b/lib/libalpm/dload.c
@@ -39,6 +39,10 @@
 #include <fetch.h>
 #endif
 
+#ifdef HAVE_LIBCURL
+#include <curl/curl.h>
+#endif
+
 /* libalpm */
 #include "dload.h"
 #include "alpm_list.h"
@@ -55,7 +59,6 @@ static char *get_filename(const char *url) {
 	return(filename);
 }
 
-#ifdef HAVE_LIBFETCH
 static char *get_destfile(const char *path, const char *filename) {
 	char *destfile;
 	/* len = localpath len + filename len + null */
@@ -76,6 +79,32 @@ static char *get_tempfile(const char *path, const char *filename) {
 	return(tempfile);
 }
 
+#define check_stop() if(dload_interrupted) { ret = -1; goto cleanup; }
+enum sighandlers { OLD = 0, NEW = 1 };
+
+int dload_interrupted;
+static void inthandler(int signum)
+{
+	dload_interrupted = 1;
+}
+
+#ifdef HAVE_LIBCURL
+static int gethost(const char *url, char *buffer)
+{
+  char *start, *end;
+	int ret;
+
+	start = strstr(url, "//") + 2;
+	end = strchr(start, '/');
+
+	ret = snprintf(&buffer[0], end - start + 1, "%s", start);
+	buffer[end - start + 1] = '\0';
+
+	return(ret);
+}
+#endif
+
+#ifdef HAVE_LIBFETCH
 static const char *gethost(struct url *fileurl)
 {
 	const char *host = _("disk");
@@ -85,15 +114,6 @@ static const char *gethost(struct url *fileurl)
 	return(host);
 }
 
-int dload_interrupted;
-static void inthandler(int signum)
-{
-	dload_interrupted = 1;
-}
-
-#define check_stop() if(dload_interrupted) { ret = -1; goto cleanup; }
-enum sighandlers { OLD = 0, NEW = 1 };
-
 static int download_internal(const char *url, const char *localpath,
 		int force) {
 	FILE *localf = NULL;
@@ -179,7 +199,7 @@ static int download_internal(const char *url, const char *localpath,
 	if(fetchStat(fileurl, &ust, "") == -1) {
 		pm_errno = PM_ERR_LIBFETCH;
 		_alpm_log(PM_LOG_ERROR, _("failed retrieving file '%s' from %s : %s\n"),
-				filename, gethost(fileurl), fetchLastErrString);
+				filename, gethost(url), fetchLastErrString);
 		ret = -1;
 		goto cleanup;
 	}
@@ -332,10 +352,291 @@ cleanup:
 }
 #endif
 
+#ifdef HAVE_LIBCURL
+int curl_progress(void *filename, double dltotal, double dlnow, double ultotal,
+		double ulnow)
+{
+  static int done = 0;
+
+	/* unused upload parameters */
+	(void)ultotal;
+	(void)ulnow;
+
+	/* curl calls the progress callback before the first recv() call, but we've
+	 * already "initialized" the progress meter in download_internal. reporting 0
+	 * twice causes pacman's callback to print multiple debug outputs. However,
+	 * not initializing prior to this callback causes the progress meter to
+	 * display bogus values when downloading a file:// schema url.
+	 */
+	if (dlnow == 0) {
+		return(0);
+	}
+
+	if (dload_interrupted) {
+		return(1); /* abort transfer */
+	}
+
+	/* curl still reports progress as its closing up the connection. if we keep
+	 * pounding on the callback, we get a lot of extra progress bars. This is a
+	 * horrible hack for a multitude of reasons.
+	 */
+	if (dltotal != dlnow) {
+		done = 0;
+	} else {
+		if (done) {
+			return(0);
+		}
+		done = 1;
+	}
+
+	handle->dlcb((const char*)filename, (long)dlnow, (long)dltotal);
+
+	return(0);
+}
+
+static int download_internal(const char *url, const char *localpath,
+		int force)
+{
+	FILE *devnull, *localf = NULL;
+	struct stat st;
+	int ret = 0;
+	double dl_thisfile = 0;
+	char *tempfile, *destfile, *filename;
+	char hostname[PATH_MAX];
+	struct sigaction sig_pipe[2], sig_int[2];
+
+	double remote_size;
+	long local_size = 0;
+	time_t remote_time, local_time = 0;
+
+	CURLcode curlret;
+
+	CURL *curl = curl_easy_init();
+	if(curl == NULL) {
+		/* XXX: bad things happened here */
+		return(-1);
+	}
+
+	filename = get_filename(url);
+	if(!filename) {
+		_alpm_log(PM_LOG_ERROR, _("url '%s' is invalid\n"), url);
+		RET_ERR(PM_ERR_SERVER_BAD_URL, -1);
+	}
+
+	gethost(url, &hostname[0]);
+
+	curl_easy_setopt(curl, CURLOPT_URL, url);
+
+	destfile = get_destfile(localpath, filename);
+	tempfile = get_tempfile(localpath, filename);
+
+	if(stat(tempfile, &st) == 0 && st.st_size > 0) {
+		_alpm_log(PM_LOG_DEBUG, "tempfile found, attempting continuation\n");
+		local_time = st.st_mtime;
+		local_size = (long)st.st_size;
+		dl_thisfile = st.st_size; 
+		localf = fopen(tempfile, "ab");
+	} else if(!force && stat(destfile, &st) == 0 && st.st_size > 0) {
+		_alpm_log(PM_LOG_DEBUG, "destfile found, using mtime only\n");
+		local_time = st.st_mtime;
+		local_size = (long)st.st_size;
+	} else {
+		_alpm_log(PM_LOG_DEBUG, "no file found matching criteria, starting from scratch\n");
+	}
+
+	/* pass the raw filename for passing to the callback function */
+	_alpm_log(PM_LOG_DEBUG, "using '%s' for download progress\n", filename);
+
+	/* print proxy info for debug purposes */
+	_alpm_log(PM_LOG_DEBUG, "HTTP_PROXY: %s\n", getenv("HTTP_PROXY"));
+	_alpm_log(PM_LOG_DEBUG, "http_proxy: %s\n", getenv("http_proxy"));
+	_alpm_log(PM_LOG_DEBUG, "FTP_PROXY:  %s\n", getenv("FTP_PROXY"));
+	_alpm_log(PM_LOG_DEBUG, "ftp_proxy:  %s\n", getenv("ftp_proxy"));
+
+	/* set 10s connect timeout */
+	curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10L);
+
+	/* ignore any SIGPIPE signals- these may occur if our FTP socket dies or
+	 * something along those lines. Store the old signal handler first. */
+	sig_pipe[NEW].sa_handler = SIG_IGN;
+	sigemptyset(&sig_pipe[NEW].sa_mask);
+	sig_pipe[NEW].sa_flags = 0;
+	sigaction(SIGPIPE, NULL, &sig_pipe[OLD]);
+	sigaction(SIGPIPE, &sig_pipe[NEW], NULL);
+
+	dload_interrupted = 0;
+	sig_int[NEW].sa_handler = &inthandler;
+	sigemptyset(&sig_int[NEW].sa_mask);
+	sig_int[NEW].sa_flags = 0;
+	sigaction(SIGINT, NULL, &sig_int[OLD]);
+	sigaction(SIGINT, &sig_int[NEW], NULL);
+
+	/* Use /dev/null as a place to dump output during the file stat
+	 * operation. This prevents connection messags from showing during
+	 * ftp transactions and prevents file:// repos from being dumped
+	 * to stdout. CURLOPT_WRITEDATA will be overwritten appropriately
+	 * for the actual sync operation.
+	 */
+	devnull = fopen("/dev/null", "wb");
+
+	/* use a no-body request to get mtime and filesize */
+	curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
+	curl_easy_setopt(curl, CURLOPT_FILETIME, 1L);
+	curl_easy_setopt(curl, CURLOPT_WRITEDATA, devnull);
+
+	curlret = curl_easy_perform(curl);
+	fclose(devnull);
+
+	if(curlret != CURLE_OK) {
+		pm_errno = PM_ERR_LIBFETCH;
+		_alpm_log(PM_LOG_ERROR, _("failed retrieving file '%s' from %s : %s\n"),
+				filename, hostname, curl_easy_strerror(curlret));
+		ret = -1;
+		goto cleanup;
+	}
+
+	curl_easy_getinfo(curl, CURLINFO_FILETIME, &remote_time);
+	curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &remote_size);
+	check_stop();
+
+	_alpm_log(PM_LOG_DEBUG, "remote_mtime: %ld local_time: %ld compare: %ld\n",
+			remote_time, local_time, local_time - remote_time);
+	_alpm_log(PM_LOG_DEBUG, "remote_size: %jd local_size: %jd compare: %jd\n",
+			(intmax_t)remote_size, (intmax_t)local_size, (intmax_t)(local_size - remote_size));
+	if(!force && remote_time && remote_time == local_time
+			&& remote_size != -1 && remote_size == local_size) {
+		/* the remote time and size values agreed with what we have, so move on
+		 * because there is nothing more to do. */
+		_alpm_log(PM_LOG_DEBUG, "files are identical, skipping %s\n", filename);
+		ret = 1;
+		goto cleanup;
+	}
+
+	if(remote_time == -1 || remote_time != local_time) {
+		_alpm_log(PM_LOG_DEBUG, "mtimes were different or unavailable, downloading %s from beginning\n", filename);
+		if (localf) {
+			fclose(localf);
+			localf = NULL;
+		}
+	} else if((localf && local_size == 0)) {
+		_alpm_log(PM_LOG_WARNING, _("resuming download of %s not possible; starting over\n"), filename);
+		fclose(localf);
+		localf = NULL;
+	} else if(local_size) {
+		_alpm_log(PM_LOG_DEBUG, "resuming download at position %jd\n", (intmax_t)local_size);
+		curl_easy_setopt(curl, CURLOPT_RESUME_FROM, local_size);
+	}
+
+	if(localf == NULL) {
+		_alpm_rmrf(tempfile);
+		dl_thisfile = 0;
+		localf = fopen(tempfile, "wb");
+		if(localf == NULL) { /* still null? */
+			pm_errno = PM_ERR_RETRIEVE;
+			_alpm_log(PM_LOG_ERROR, _("error writing to file '%s': %s\n"),
+					tempfile, strerror(errno));
+			ret = -1;
+			goto cleanup;
+		}
+	}
+
+	curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
+	curl_easy_setopt(curl, CURLOPT_WRITEDATA, localf);
+	curl_easy_setopt(curl, CURLOPT_ENCODING, "deflate, gzip");
+	curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
+	curl_easy_setopt(curl, CURLOPT_PROGRESSDATA, filename);
+	curl_easy_setopt(curl, CURLOPT_PROGRESSFUNCTION, curl_progress);
+
+	/* Progress 0 - initialize */
+	if(handle->dlcb) {
+		handle->dlcb(filename, 0, remote_size);
+	}
+
+	curlret = curl_easy_perform(curl);
+
+	/* check for user cancellation */
+	if(curlret == CURLE_ABORTED_BY_CALLBACK) {
+		goto cleanup;
+	}
+
+	if(curlret != CURLE_OK) {
+		pm_errno = PM_ERR_LIBFETCH; /* this needs to change */
+		_alpm_log(PM_LOG_ERROR, _("failed retrieving file '%s' from %s : %s\n"),
+				filename, hostname, curl_easy_strerror(curlret));
+		ret = -1;
+		goto cleanup;
+	}
+
+	curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &dl_thisfile);
+
+	/* did the transfer complete normally? */
+	if(dl_thisfile == 0) {
+		pm_errno = PM_ERR_RETRIEVE;
+		_alpm_log(PM_LOG_ERROR, _("failed retrieving file '%s' from %s\n"),
+				filename, hostname);
+		ret = -1;
+		goto cleanup;
+	}
+
+	if(remote_size != -1 && local_size + dl_thisfile < remote_size) {
+		pm_errno = PM_ERR_RETRIEVE;
+		_alpm_log(PM_LOG_ERROR, _("%s appears to be truncated: %jd/%jd bytes\n"),
+				filename, (intmax_t)(local_size + dl_thisfile), (intmax_t)remote_size);
+		ret = -1;
+		goto cleanup;
+	}
+
+	/* probably safer to close the file descriptors now before renaming the file,
+	 * for example to make sure the buffers are flushed.
+	 */
+	fclose(localf);
+	localf = NULL;
+
+	/* set the times on the file to the same as that of the remote file */
+	if(remote_time) {
+		struct timeval tv[2];
+		memset(&tv, 0, sizeof(tv));
+		tv[0].tv_sec = tv[1].tv_sec = remote_time;
+		utimes(tempfile, tv);
+	}
+	rename(tempfile, destfile);
+	ret = 0;
+
+cleanup:
+	FREE(tempfile);
+	FREE(destfile);
+	if(localf != NULL) {
+		/* if we still had a local file open, we got interrupted. set the mtimes on
+		 * the file accordingly. */
+		fflush(localf);
+		if(remote_time) {
+			struct timeval tv[2];
+			memset(&tv, 0, sizeof(tv));
+			tv[0].tv_sec = tv[1].tv_sec = remote_time;
+			futimes(fileno(localf), tv);
+		}
+		fclose(localf);
+	}
+	if(curl != NULL) {
+		curl_easy_cleanup(curl);
+	}
+
+	/* restore the old signal handlers */
+	sigaction(SIGINT, &sig_int[OLD], NULL);
+	sigaction(SIGPIPE, &sig_pipe[OLD], NULL);
+	/* if we were interrupted, trip the old handler */
+	if(dload_interrupted) {
+		raise(SIGINT);
+	}
+
+	return(ret);
+}
+#endif
+
 static int download(const char *url, const char *localpath,
 		int force) {
 	if(handle->fetchcb == NULL) {
-#ifdef HAVE_LIBFETCH
+#if defined(HAVE_LIBFETCH) || defined(HAVE_LIBCURL)
 		return(download_internal(url, localpath, force));
 #else
 		RET_ERR(PM_ERR_EXTERNAL_DOWNLOAD, -1);
-- 
1.7.3.4



More information about the pacman-dev mailing list