On Sun, Jan 02, 2011 at 08:35:01PM -0600, Dan McGee wrote:
On Sun, Jan 2, 2011 at 7:13 PM, Dave Reisner <d@falconindy.com> wrote:
This does not implement any linkage from current code to the curl based download_file() function. libfetch continues to be used unless configured and compiled explicitly with LDFLAGS/CPPFLAGS set for -lcurl and -DHAVE_LIBCURL, respectively.
Signed-off-by: Dave Reisner <d@falconindy.com> Big comments that might make this easier to review, since it is a bit of a monster:
1. If you are just moving functions around to fix #ifdef locations, do that in a separate and preceeding no-functional-changes patch. It would help for the dload_interrupted/signal stuff here for instance. The two (different!) defintions of gethost() that occur is also noise that doesn't help review.
Can't blame you here. Will fix.
2. The dupe code stinks to have to review- I don't know how easy it would be to refactor some things out (mtime/size/signal/proxy stuff?), but yeah.
I agree that it's not the most readable, but if eventual goal is to remove libfetch code, this seems like a lot of work for no gain. I'll see what I can though.
3. Follow. The. Coding. Style. You used spaces everywhere for indentation.
Totally stumped on this one. I'm staring at my source files and I'm seeing tabs. I'm not sure how they ended up as spaces, as I merely just peeled off HEAD~4 with git-send-email. d
--- lib/libalpm/dload.c | 325 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 313 insertions(+), 12 deletions(-)
diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c index 09f716f..9154d33 100644 --- a/lib/libalpm/dload.c +++ b/lib/libalpm/dload.c @@ -39,6 +39,10 @@ #include <fetch.h> #endif
+#ifdef HAVE_LIBCURL +#include <curl/curl.h> +#endif + /* libalpm */ #include "dload.h" #include "alpm_list.h" @@ -55,7 +59,6 @@ static char *get_filename(const char *url) { return(filename); }
-#ifdef HAVE_LIBFETCH static char *get_destfile(const char *path, const char *filename) { char *destfile; /* len = localpath len + filename len + null */ @@ -76,6 +79,32 @@ static char *get_tempfile(const char *path, const char *filename) { return(tempfile); }
+#define check_stop() if(dload_interrupted) { ret = -1; goto cleanup; } +enum sighandlers { OLD = 0, NEW = 1 }; + +int dload_interrupted; +static void inthandler(int signum) +{ + dload_interrupted = 1; +} + +#ifdef HAVE_LIBCURL +static int gethost(const char *url, char *buffer) +{ + char *start, *end; + int ret; + + start = strstr(url, "//") + 2; + end = strchr(start, '/'); + + ret = snprintf(&buffer[0], end - start + 1, "%s", start); + buffer[end - start + 1] = '\0'; + + return(ret); +} +#endif + +#ifdef HAVE_LIBFETCH static const char *gethost(struct url *fileurl) { const char *host = _("disk"); @@ -85,15 +114,6 @@ static const char *gethost(struct url *fileurl) return(host); }
-int dload_interrupted; -static void inthandler(int signum) -{ - dload_interrupted = 1; -} - -#define check_stop() if(dload_interrupted) { ret = -1; goto cleanup; } -enum sighandlers { OLD = 0, NEW = 1 }; - static int download_internal(const char *url, const char *localpath, int force) { FILE *localf = NULL; @@ -179,7 +199,7 @@ static int download_internal(const char *url, const char *localpath, if(fetchStat(fileurl, &ust, "") == -1) { pm_errno = PM_ERR_LIBFETCH; _alpm_log(PM_LOG_ERROR, _("failed retrieving file '%s' from %s : %s\n"), - filename, gethost(fileurl), fetchLastErrString); + filename, gethost(url), fetchLastErrString); ret = -1; goto cleanup; } @@ -332,10 +352,291 @@ cleanup: } #endif
+#ifdef HAVE_LIBCURL +int curl_progress(void *filename, double dltotal, double dlnow, double ultotal, + double ulnow) +{ + static int done = 0; + + /* unused upload parameters */ + (void)ultotal; + (void)ulnow; + + /* curl calls the progress callback before the first recv() call, but we've + * already "initialized" the progress meter in download_internal. reporting 0 + * twice causes pacman's callback to print multiple debug outputs. However, + * not initializing prior to this callback causes the progress meter to + * display bogus values when downloading a file:// schema url. + */ + if (dlnow == 0) { + return(0); + } + + if (dload_interrupted) { + return(1); /* abort transfer */ + } + + /* curl still reports progress as its closing up the connection. if we keep + * pounding on the callback, we get a lot of extra progress bars. This is a + * horrible hack for a multitude of reasons. + */ + if (dltotal != dlnow) { + done = 0; + } else { + if (done) { + return(0); + } + done = 1; + } + + handle->dlcb((const char*)filename, (long)dlnow, (long)dltotal); + + return(0); +} + +static int download_internal(const char *url, const char *localpath, + int force) +{ + FILE *devnull, *localf = NULL; + struct stat st; + int ret = 0; + double dl_thisfile = 0; + char *tempfile, *destfile, *filename; + char hostname[PATH_MAX]; + struct sigaction sig_pipe[2], sig_int[2]; + + double remote_size; + long local_size = 0; + time_t remote_time, local_time = 0; + + CURLcode curlret; + + CURL *curl = curl_easy_init(); + if(curl == NULL) { + /* XXX: bad things happened here */ + return(-1); + } + + filename = get_filename(url); + if(!filename) { + _alpm_log(PM_LOG_ERROR, _("url '%s' is invalid\n"), url); + RET_ERR(PM_ERR_SERVER_BAD_URL, -1); + } + + gethost(url, &hostname[0]); + + curl_easy_setopt(curl, CURLOPT_URL, url); + + destfile = get_destfile(localpath, filename); + tempfile = get_tempfile(localpath, filename); + + if(stat(tempfile, &st) == 0 && st.st_size > 0) { + _alpm_log(PM_LOG_DEBUG, "tempfile found, attempting continuation\n"); + local_time = st.st_mtime; + local_size = (long)st.st_size; + dl_thisfile = st.st_size; + localf = fopen(tempfile, "ab"); + } else if(!force && stat(destfile, &st) == 0 && st.st_size > 0) { + _alpm_log(PM_LOG_DEBUG, "destfile found, using mtime only\n"); + local_time = st.st_mtime; + local_size = (long)st.st_size; + } else { + _alpm_log(PM_LOG_DEBUG, "no file found matching criteria, starting from scratch\n"); + } + + /* pass the raw filename for passing to the callback function */ + _alpm_log(PM_LOG_DEBUG, "using '%s' for download progress\n", filename); + + /* print proxy info for debug purposes */ + _alpm_log(PM_LOG_DEBUG, "HTTP_PROXY: %s\n", getenv("HTTP_PROXY")); + _alpm_log(PM_LOG_DEBUG, "http_proxy: %s\n", getenv("http_proxy")); + _alpm_log(PM_LOG_DEBUG, "FTP_PROXY: %s\n", getenv("FTP_PROXY")); + _alpm_log(PM_LOG_DEBUG, "ftp_proxy: %s\n", getenv("ftp_proxy")); + + /* set 10s connect timeout */ + curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10L); + + /* ignore any SIGPIPE signals- these may occur if our FTP socket dies or + * something along those lines. Store the old signal handler first. */ + sig_pipe[NEW].sa_handler = SIG_IGN; + sigemptyset(&sig_pipe[NEW].sa_mask); + sig_pipe[NEW].sa_flags = 0; + sigaction(SIGPIPE, NULL, &sig_pipe[OLD]); + sigaction(SIGPIPE, &sig_pipe[NEW], NULL); + + dload_interrupted = 0; + sig_int[NEW].sa_handler = &inthandler; + sigemptyset(&sig_int[NEW].sa_mask); + sig_int[NEW].sa_flags = 0; + sigaction(SIGINT, NULL, &sig_int[OLD]); + sigaction(SIGINT, &sig_int[NEW], NULL); + + /* Use /dev/null as a place to dump output during the file stat + * operation. This prevents connection messags from showing during + * ftp transactions and prevents file:// repos from being dumped + * to stdout. CURLOPT_WRITEDATA will be overwritten appropriately + * for the actual sync operation. + */ + devnull = fopen("/dev/null", "wb"); + + /* use a no-body request to get mtime and filesize */ + curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); + curl_easy_setopt(curl, CURLOPT_FILETIME, 1L); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, devnull); This is probably a dumb hack we don't need to do with cURL, although my experience is limited to HTTP so I don't know the FTP case at all. If we set headers correctly we can probably get back to just doing one request.
I attempted something like this for a conky patch, see http://git.omp.am/?p=conky.git;a=commitdiff;h=93c6baebb6c6dcbc5e6e93fb6982a7...
This is actually pretty easy. curl has CURLOPT_TIMECONDITION and the associated CURLOPT_TIMEVALUE which can be set for both http and ftp to skip a download if its unnecessary. I'll definitely refactor this.
+ + curlret = curl_easy_perform(curl); + fclose(devnull); + + if(curlret != CURLE_OK) { + pm_errno = PM_ERR_LIBFETCH; + _alpm_log(PM_LOG_ERROR, _("failed retrieving file '%s' from %s : %s\n"), + filename, hostname, curl_easy_strerror(curlret)); + ret = -1; + goto cleanup; + } + + curl_easy_getinfo(curl, CURLINFO_FILETIME, &remote_time); + curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &remote_size); + check_stop(); + + _alpm_log(PM_LOG_DEBUG, "remote_mtime: %ld local_time: %ld compare: %ld\n", + remote_time, local_time, local_time - remote_time); + _alpm_log(PM_LOG_DEBUG, "remote_size: %jd local_size: %jd compare: %jd\n", + (intmax_t)remote_size, (intmax_t)local_size, (intmax_t)(local_size - remote_size)); + if(!force && remote_time && remote_time == local_time + && remote_size != -1 && remote_size == local_size) { + /* the remote time and size values agreed with what we have, so move on + * because there is nothing more to do. */ + _alpm_log(PM_LOG_DEBUG, "files are identical, skipping %s\n", filename); + ret = 1; + goto cleanup; + } + + if(remote_time == -1 || remote_time != local_time) { + _alpm_log(PM_LOG_DEBUG, "mtimes were different or unavailable, downloading %s from beginning\n", filename); + if (localf) { + fclose(localf); + localf = NULL; + } + } else if((localf && local_size == 0)) { + _alpm_log(PM_LOG_WARNING, _("resuming download of %s not possible; starting over\n"), filename); + fclose(localf); + localf = NULL; + } else if(local_size) { + _alpm_log(PM_LOG_DEBUG, "resuming download at position %jd\n", (intmax_t)local_size); + curl_easy_setopt(curl, CURLOPT_RESUME_FROM, local_size); + } + + if(localf == NULL) { + _alpm_rmrf(tempfile); + dl_thisfile = 0; + localf = fopen(tempfile, "wb"); + if(localf == NULL) { /* still null? */ + pm_errno = PM_ERR_RETRIEVE; + _alpm_log(PM_LOG_ERROR, _("error writing to file '%s': %s\n"), + tempfile, strerror(errno)); + ret = -1; + goto cleanup; + } + } + + curl_easy_setopt(curl, CURLOPT_NOBODY, 0L); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, localf); + curl_easy_setopt(curl, CURLOPT_ENCODING, "deflate, gzip"); + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); + curl_easy_setopt(curl, CURLOPT_PROGRESSDATA, filename); + curl_easy_setopt(curl, CURLOPT_PROGRESSFUNCTION, curl_progress); + + /* Progress 0 - initialize */ + if(handle->dlcb) { + handle->dlcb(filename, 0, remote_size); + } + + curlret = curl_easy_perform(curl); + + /* check for user cancellation */ + if(curlret == CURLE_ABORTED_BY_CALLBACK) { + goto cleanup; + } + + if(curlret != CURLE_OK) { + pm_errno = PM_ERR_LIBFETCH; /* this needs to change */ + _alpm_log(PM_LOG_ERROR, _("failed retrieving file '%s' from %s : %s\n"), + filename, hostname, curl_easy_strerror(curlret)); + ret = -1; + goto cleanup; + } + + curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &dl_thisfile); + + /* did the transfer complete normally? */ + if(dl_thisfile == 0) { + pm_errno = PM_ERR_RETRIEVE; + _alpm_log(PM_LOG_ERROR, _("failed retrieving file '%s' from %s\n"), + filename, hostname); + ret = -1; + goto cleanup; + } + + if(remote_size != -1 && local_size + dl_thisfile < remote_size) { + pm_errno = PM_ERR_RETRIEVE; + _alpm_log(PM_LOG_ERROR, _("%s appears to be truncated: %jd/%jd bytes\n"), + filename, (intmax_t)(local_size + dl_thisfile), (intmax_t)remote_size); + ret = -1; + goto cleanup; + } + + /* probably safer to close the file descriptors now before renaming the file, + * for example to make sure the buffers are flushed. + */ + fclose(localf); + localf = NULL; + + /* set the times on the file to the same as that of the remote file */ + if(remote_time) { + struct timeval tv[2]; + memset(&tv, 0, sizeof(tv)); + tv[0].tv_sec = tv[1].tv_sec = remote_time; + utimes(tempfile, tv); + } + rename(tempfile, destfile); + ret = 0; + +cleanup: + FREE(tempfile); + FREE(destfile); + if(localf != NULL) { + /* if we still had a local file open, we got interrupted. set the mtimes on + * the file accordingly. */ + fflush(localf); + if(remote_time) { + struct timeval tv[2]; + memset(&tv, 0, sizeof(tv)); + tv[0].tv_sec = tv[1].tv_sec = remote_time; + futimes(fileno(localf), tv); + } + fclose(localf); + } + if(curl != NULL) { + curl_easy_cleanup(curl); + } + + /* restore the old signal handlers */ + sigaction(SIGINT, &sig_int[OLD], NULL); + sigaction(SIGPIPE, &sig_pipe[OLD], NULL); + /* if we were interrupted, trip the old handler */ + if(dload_interrupted) { + raise(SIGINT); + } + + return(ret); +} +#endif + static int download(const char *url, const char *localpath, int force) { if(handle->fetchcb == NULL) { -#ifdef HAVE_LIBFETCH +#if defined(HAVE_LIBFETCH) || defined(HAVE_LIBCURL) return(download_internal(url, localpath, force)); #else RET_ERR(PM_ERR_EXTERNAL_DOWNLOAD, -1); -- 1.7.3.4