[pacman-dev] [PATCH 2/4] implement a curl based download_file()

Dave Reisner d at falconindy.com
Sun Jan 2 22:07:36 EST 2011


On Sun, Jan 02, 2011 at 08:35:01PM -0600, Dan McGee wrote:
> On Sun, Jan 2, 2011 at 7:13 PM, Dave Reisner <d at falconindy.com> wrote:
> > This does not implement any linkage from current code to the curl based
> > download_file() function. libfetch continues to be used unless
> > configured and compiled explicitly with LDFLAGS/CPPFLAGS set for -lcurl
> > and -DHAVE_LIBCURL, respectively.
> >
> > Signed-off-by: Dave Reisner <d at falconindy.com>
> Big comments that might make this easier to review, since it is a bit
> of a monster:
> 
> 1. If you are just moving functions around to fix #ifdef locations, do
> that in a separate and preceeding no-functional-changes patch. It
> would help for the dload_interrupted/signal stuff here for instance.
> The two (different!) defintions of gethost() that occur is also noise
> that doesn't help review.

Can't blame you here. Will fix.

> 2. The dupe code stinks to have to review- I don't know how easy it
> would be to refactor some things out (mtime/size/signal/proxy stuff?),
> but yeah.

I agree that it's not the most readable, but if eventual goal is to
remove libfetch code, this seems like a lot of work for no gain. I'll
see what I can though.

> 3. Follow. The. Coding. Style. You used spaces everywhere for indentation.
> 

Totally stumped on this one. I'm staring at my source files and I'm
seeing tabs. I'm not sure how they ended up as spaces, as I merely just
peeled off HEAD~4 with git-send-email.

d

> > ---
> >  lib/libalpm/dload.c |  325 +++++++++++++++++++++++++++++++++++++++++++++++++--
> >  1 files changed, 313 insertions(+), 12 deletions(-)
> >
> > diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> > index 09f716f..9154d33 100644
> > --- a/lib/libalpm/dload.c
> > +++ b/lib/libalpm/dload.c
> > @@ -39,6 +39,10 @@
> >  #include <fetch.h>
> >  #endif
> >
> > +#ifdef HAVE_LIBCURL
> > +#include <curl/curl.h>
> > +#endif
> > +
> >  /* libalpm */
> >  #include "dload.h"
> >  #include "alpm_list.h"
> > @@ -55,7 +59,6 @@ static char *get_filename(const char *url) {
> >        return(filename);
> >  }
> >
> > -#ifdef HAVE_LIBFETCH
> >  static char *get_destfile(const char *path, const char *filename) {
> >        char *destfile;
> >        /* len = localpath len + filename len + null */
> > @@ -76,6 +79,32 @@ static char *get_tempfile(const char *path, const char *filename) {
> >        return(tempfile);
> >  }
> >
> > +#define check_stop() if(dload_interrupted) { ret = -1; goto cleanup; }
> > +enum sighandlers { OLD = 0, NEW = 1 };
> > +
> > +int dload_interrupted;
> > +static void inthandler(int signum)
> > +{
> > +       dload_interrupted = 1;
> > +}
> > +
> > +#ifdef HAVE_LIBCURL
> > +static int gethost(const char *url, char *buffer)
> > +{
> > +  char *start, *end;
> > +       int ret;
> > +
> > +       start = strstr(url, "//") + 2;
> > +       end = strchr(start, '/');
> > +
> > +       ret = snprintf(&buffer[0], end - start + 1, "%s", start);
> > +       buffer[end - start + 1] = '\0';
> > +
> > +       return(ret);
> > +}
> > +#endif
> > +
> > +#ifdef HAVE_LIBFETCH
> >  static const char *gethost(struct url *fileurl)
> >  {
> >        const char *host = _("disk");
> > @@ -85,15 +114,6 @@ static const char *gethost(struct url *fileurl)
> >        return(host);
> >  }
> >
> > -int dload_interrupted;
> > -static void inthandler(int signum)
> > -{
> > -       dload_interrupted = 1;
> > -}
> > -
> > -#define check_stop() if(dload_interrupted) { ret = -1; goto cleanup; }
> > -enum sighandlers { OLD = 0, NEW = 1 };
> > -
> >  static int download_internal(const char *url, const char *localpath,
> >                int force) {
> >        FILE *localf = NULL;
> > @@ -179,7 +199,7 @@ static int download_internal(const char *url, const char *localpath,
> >        if(fetchStat(fileurl, &ust, "") == -1) {
> >                pm_errno = PM_ERR_LIBFETCH;
> >                _alpm_log(PM_LOG_ERROR, _("failed retrieving file '%s' from %s : %s\n"),
> > -                               filename, gethost(fileurl), fetchLastErrString);
> > +                               filename, gethost(url), fetchLastErrString);
> >                ret = -1;
> >                goto cleanup;
> >        }
> > @@ -332,10 +352,291 @@ cleanup:
> >  }
> >  #endif
> >
> > +#ifdef HAVE_LIBCURL
> > +int curl_progress(void *filename, double dltotal, double dlnow, double ultotal,
> > +               double ulnow)
> > +{
> > +  static int done = 0;
> > +
> > +       /* unused upload parameters */
> > +       (void)ultotal;
> > +       (void)ulnow;
> > +
> > +       /* curl calls the progress callback before the first recv() call, but we've
> > +        * already "initialized" the progress meter in download_internal. reporting 0
> > +        * twice causes pacman's callback to print multiple debug outputs. However,
> > +        * not initializing prior to this callback causes the progress meter to
> > +        * display bogus values when downloading a file:// schema url.
> > +        */
> > +       if (dlnow == 0) {
> > +               return(0);
> > +       }
> > +
> > +       if (dload_interrupted) {
> > +               return(1); /* abort transfer */
> > +       }
> > +
> > +       /* curl still reports progress as its closing up the connection. if we keep
> > +        * pounding on the callback, we get a lot of extra progress bars. This is a
> > +        * horrible hack for a multitude of reasons.
> > +        */
> > +       if (dltotal != dlnow) {
> > +               done = 0;
> > +       } else {
> > +               if (done) {
> > +                       return(0);
> > +               }
> > +               done = 1;
> > +       }
> > +
> > +       handle->dlcb((const char*)filename, (long)dlnow, (long)dltotal);
> > +
> > +       return(0);
> > +}
> > +
> > +static int download_internal(const char *url, const char *localpath,
> > +               int force)
> > +{
> > +       FILE *devnull, *localf = NULL;
> > +       struct stat st;
> > +       int ret = 0;
> > +       double dl_thisfile = 0;
> > +       char *tempfile, *destfile, *filename;
> > +       char hostname[PATH_MAX];
> > +       struct sigaction sig_pipe[2], sig_int[2];
> > +
> > +       double remote_size;
> > +       long local_size = 0;
> > +       time_t remote_time, local_time = 0;
> > +
> > +       CURLcode curlret;
> > +
> > +       CURL *curl = curl_easy_init();
> > +       if(curl == NULL) {
> > +               /* XXX: bad things happened here */
> > +               return(-1);
> > +       }
> > +
> > +       filename = get_filename(url);
> > +       if(!filename) {
> > +               _alpm_log(PM_LOG_ERROR, _("url '%s' is invalid\n"), url);
> > +               RET_ERR(PM_ERR_SERVER_BAD_URL, -1);
> > +       }
> > +
> > +       gethost(url, &hostname[0]);
> > +
> > +       curl_easy_setopt(curl, CURLOPT_URL, url);
> > +
> > +       destfile = get_destfile(localpath, filename);
> > +       tempfile = get_tempfile(localpath, filename);
> > +
> > +       if(stat(tempfile, &st) == 0 && st.st_size > 0) {
> > +               _alpm_log(PM_LOG_DEBUG, "tempfile found, attempting continuation\n");
> > +               local_time = st.st_mtime;
> > +               local_size = (long)st.st_size;
> > +               dl_thisfile = st.st_size;
> > +               localf = fopen(tempfile, "ab");
> > +       } else if(!force && stat(destfile, &st) == 0 && st.st_size > 0) {
> > +               _alpm_log(PM_LOG_DEBUG, "destfile found, using mtime only\n");
> > +               local_time = st.st_mtime;
> > +               local_size = (long)st.st_size;
> > +       } else {
> > +               _alpm_log(PM_LOG_DEBUG, "no file found matching criteria, starting from scratch\n");
> > +       }
> > +
> > +       /* pass the raw filename for passing to the callback function */
> > +       _alpm_log(PM_LOG_DEBUG, "using '%s' for download progress\n", filename);
> > +
> > +       /* print proxy info for debug purposes */
> > +       _alpm_log(PM_LOG_DEBUG, "HTTP_PROXY: %s\n", getenv("HTTP_PROXY"));
> > +       _alpm_log(PM_LOG_DEBUG, "http_proxy: %s\n", getenv("http_proxy"));
> > +       _alpm_log(PM_LOG_DEBUG, "FTP_PROXY:  %s\n", getenv("FTP_PROXY"));
> > +       _alpm_log(PM_LOG_DEBUG, "ftp_proxy:  %s\n", getenv("ftp_proxy"));
> > +
> > +       /* set 10s connect timeout */
> > +       curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10L);
> > +
> > +       /* ignore any SIGPIPE signals- these may occur if our FTP socket dies or
> > +        * something along those lines. Store the old signal handler first. */
> > +       sig_pipe[NEW].sa_handler = SIG_IGN;
> > +       sigemptyset(&sig_pipe[NEW].sa_mask);
> > +       sig_pipe[NEW].sa_flags = 0;
> > +       sigaction(SIGPIPE, NULL, &sig_pipe[OLD]);
> > +       sigaction(SIGPIPE, &sig_pipe[NEW], NULL);
> > +
> > +       dload_interrupted = 0;
> > +       sig_int[NEW].sa_handler = &inthandler;
> > +       sigemptyset(&sig_int[NEW].sa_mask);
> > +       sig_int[NEW].sa_flags = 0;
> > +       sigaction(SIGINT, NULL, &sig_int[OLD]);
> > +       sigaction(SIGINT, &sig_int[NEW], NULL);
> > +
> > +       /* Use /dev/null as a place to dump output during the file stat
> > +        * operation. This prevents connection messags from showing during
> > +        * ftp transactions and prevents file:// repos from being dumped
> > +        * to stdout. CURLOPT_WRITEDATA will be overwritten appropriately
> > +        * for the actual sync operation.
> > +        */
> > +       devnull = fopen("/dev/null", "wb");
> > +
> > +       /* use a no-body request to get mtime and filesize */
> > +       curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
> > +       curl_easy_setopt(curl, CURLOPT_FILETIME, 1L);
> > +       curl_easy_setopt(curl, CURLOPT_WRITEDATA, devnull);
> This is probably a dumb hack we don't need to do with cURL, although
> my experience is limited to HTTP so I don't know the FTP case at all.
> If we set headers correctly we can probably get back to just doing one
> request.
> 
> I attempted something like this for a conky patch, see
> http://git.omp.am/?p=conky.git;a=commitdiff;h=93c6baebb6c6dcbc5e6e93fb6982a7c5826369ee
> 

This is actually pretty easy. curl has CURLOPT_TIMECONDITION and the
associated CURLOPT_TIMEVALUE which can be set for both http and ftp to
skip a download if its unnecessary. I'll definitely refactor this.

> > +
> > +       curlret = curl_easy_perform(curl);
> > +       fclose(devnull);
> > +
> > +       if(curlret != CURLE_OK) {
> > +               pm_errno = PM_ERR_LIBFETCH;
> > +               _alpm_log(PM_LOG_ERROR, _("failed retrieving file '%s' from %s : %s\n"),
> > +                               filename, hostname, curl_easy_strerror(curlret));
> > +               ret = -1;
> > +               goto cleanup;
> > +       }
> > +
> > +       curl_easy_getinfo(curl, CURLINFO_FILETIME, &remote_time);
> > +       curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &remote_size);
> > +       check_stop();
> > +
> > +       _alpm_log(PM_LOG_DEBUG, "remote_mtime: %ld local_time: %ld compare: %ld\n",
> > +                       remote_time, local_time, local_time - remote_time);
> > +       _alpm_log(PM_LOG_DEBUG, "remote_size: %jd local_size: %jd compare: %jd\n",
> > +                       (intmax_t)remote_size, (intmax_t)local_size, (intmax_t)(local_size - remote_size));
> > +       if(!force && remote_time && remote_time == local_time
> > +                       && remote_size != -1 && remote_size == local_size) {
> > +               /* the remote time and size values agreed with what we have, so move on
> > +                * because there is nothing more to do. */
> > +               _alpm_log(PM_LOG_DEBUG, "files are identical, skipping %s\n", filename);
> > +               ret = 1;
> > +               goto cleanup;
> > +       }
> > +
> > +       if(remote_time == -1 || remote_time != local_time) {
> > +               _alpm_log(PM_LOG_DEBUG, "mtimes were different or unavailable, downloading %s from beginning\n", filename);
> > +               if (localf) {
> > +                       fclose(localf);
> > +                       localf = NULL;
> > +               }
> > +       } else if((localf && local_size == 0)) {
> > +               _alpm_log(PM_LOG_WARNING, _("resuming download of %s not possible; starting over\n"), filename);
> > +               fclose(localf);
> > +               localf = NULL;
> > +       } else if(local_size) {
> > +               _alpm_log(PM_LOG_DEBUG, "resuming download at position %jd\n", (intmax_t)local_size);
> > +               curl_easy_setopt(curl, CURLOPT_RESUME_FROM, local_size);
> > +       }
> > +
> > +       if(localf == NULL) {
> > +               _alpm_rmrf(tempfile);
> > +               dl_thisfile = 0;
> > +               localf = fopen(tempfile, "wb");
> > +               if(localf == NULL) { /* still null? */
> > +                       pm_errno = PM_ERR_RETRIEVE;
> > +                       _alpm_log(PM_LOG_ERROR, _("error writing to file '%s': %s\n"),
> > +                                       tempfile, strerror(errno));
> > +                       ret = -1;
> > +                       goto cleanup;
> > +               }
> > +       }
> > +
> > +       curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
> > +       curl_easy_setopt(curl, CURLOPT_WRITEDATA, localf);
> > +       curl_easy_setopt(curl, CURLOPT_ENCODING, "deflate, gzip");
> > +       curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
> > +       curl_easy_setopt(curl, CURLOPT_PROGRESSDATA, filename);
> > +       curl_easy_setopt(curl, CURLOPT_PROGRESSFUNCTION, curl_progress);
> > +
> > +       /* Progress 0 - initialize */
> > +       if(handle->dlcb) {
> > +               handle->dlcb(filename, 0, remote_size);
> > +       }
> > +
> > +       curlret = curl_easy_perform(curl);
> > +
> > +       /* check for user cancellation */
> > +       if(curlret == CURLE_ABORTED_BY_CALLBACK) {
> > +               goto cleanup;
> > +       }
> > +
> > +       if(curlret != CURLE_OK) {
> > +               pm_errno = PM_ERR_LIBFETCH; /* this needs to change */
> > +               _alpm_log(PM_LOG_ERROR, _("failed retrieving file '%s' from %s : %s\n"),
> > +                               filename, hostname, curl_easy_strerror(curlret));
> > +               ret = -1;
> > +               goto cleanup;
> > +       }
> > +
> > +       curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &dl_thisfile);
> > +
> > +       /* did the transfer complete normally? */
> > +       if(dl_thisfile == 0) {
> > +               pm_errno = PM_ERR_RETRIEVE;
> > +               _alpm_log(PM_LOG_ERROR, _("failed retrieving file '%s' from %s\n"),
> > +                               filename, hostname);
> > +               ret = -1;
> > +               goto cleanup;
> > +       }
> > +
> > +       if(remote_size != -1 && local_size + dl_thisfile < remote_size) {
> > +               pm_errno = PM_ERR_RETRIEVE;
> > +               _alpm_log(PM_LOG_ERROR, _("%s appears to be truncated: %jd/%jd bytes\n"),
> > +                               filename, (intmax_t)(local_size + dl_thisfile), (intmax_t)remote_size);
> > +               ret = -1;
> > +               goto cleanup;
> > +       }
> > +
> > +       /* probably safer to close the file descriptors now before renaming the file,
> > +        * for example to make sure the buffers are flushed.
> > +        */
> > +       fclose(localf);
> > +       localf = NULL;
> > +
> > +       /* set the times on the file to the same as that of the remote file */
> > +       if(remote_time) {
> > +               struct timeval tv[2];
> > +               memset(&tv, 0, sizeof(tv));
> > +               tv[0].tv_sec = tv[1].tv_sec = remote_time;
> > +               utimes(tempfile, tv);
> > +       }
> > +       rename(tempfile, destfile);
> > +       ret = 0;
> > +
> > +cleanup:
> > +       FREE(tempfile);
> > +       FREE(destfile);
> > +       if(localf != NULL) {
> > +               /* if we still had a local file open, we got interrupted. set the mtimes on
> > +                * the file accordingly. */
> > +               fflush(localf);
> > +               if(remote_time) {
> > +                       struct timeval tv[2];
> > +                       memset(&tv, 0, sizeof(tv));
> > +                       tv[0].tv_sec = tv[1].tv_sec = remote_time;
> > +                       futimes(fileno(localf), tv);
> > +               }
> > +               fclose(localf);
> > +       }
> > +       if(curl != NULL) {
> > +               curl_easy_cleanup(curl);
> > +       }
> > +
> > +       /* restore the old signal handlers */
> > +       sigaction(SIGINT, &sig_int[OLD], NULL);
> > +       sigaction(SIGPIPE, &sig_pipe[OLD], NULL);
> > +       /* if we were interrupted, trip the old handler */
> > +       if(dload_interrupted) {
> > +               raise(SIGINT);
> > +       }
> > +
> > +       return(ret);
> > +}
> > +#endif
> > +
> >  static int download(const char *url, const char *localpath,
> >                int force) {
> >        if(handle->fetchcb == NULL) {
> > -#ifdef HAVE_LIBFETCH
> > +#if defined(HAVE_LIBFETCH) || defined(HAVE_LIBCURL)
> >                return(download_internal(url, localpath, force));
> >  #else
> >                RET_ERR(PM_ERR_EXTERNAL_DOWNLOAD, -1);
> > --
> > 1.7.3.4
> >
> >
> >
> 


More information about the pacman-dev mailing list