[pacman-dev] [PATCH] dload: handle irregular URLs

Dan McGee dpmcgee at gmail.com
Mon Jun 13 19:35:56 EDT 2011


On Sat, Jun 11, 2011 at 2:16 PM, Dave Reisner <d at falconindy.com> wrote:
> URLs might end with a slash and follow redirects, or could be a
> generated by a script such as /getpkg.php?id=12345. In both cases, we
> may have a better filename that we can write to, taken from either
> content-disposition header, or the effective URL.
>
> Specific to the first case, we write to a temporary file of the format
> 'alpmtmp.XXXXXX', where XXXXXX is randomized by mkstemp(3). Since this
> is a randomly generated file, we cannot support resuming and the file is
> unlinked in the event of an interrupt.
>
> We also run into the possibility of changing out the filename from under
> alpm on a -U operation, so callers of _alpm_download can optionally pass
> a pointer to a *char to be filled in by curl_download_internal with the
> actual filename we wrote to. Any sync operation will pass a NULL pointer
> here, as we rely on specific names for packages from a mirror.
>
> Fixes FS#22645.
>
> Signed-off-by: Dave Reisner <d at falconindy.com>
> ---
> There's one hack in here that I'm not happy with, so feedback is welcome.
> Basically, the signature file is guaranteed to have a length, because we
> simply append '.sig' to the URL. In the case of a URL ending with a /, this
> results in downloading a file called '.sig'. This patchwork hardcodes the
> edge case and forces the use of a temporary file. It's not ideal, but it
> works.
I'm not totally against this hack; its ugly but it does work.

>
> Any other comments are also, of course, welcome.
Regarding your other comments, I'd say take this one piece at a time.
This addresses 90% of what people (me?) were having trouble with
before, and that last 10% is going to be a lot more work. I'm not
saying it shouldn't be done, but if you are happy with this patch the
way it stands addressing the immediate issues, let's apply it and then
make further changes as necessary.

> d
>
>
>  lib/libalpm/be_sync.c |    4 +-
>  lib/libalpm/dload.c   |  120 ++++++++++++++++++++++++++++++++++++++++++-------
>  lib/libalpm/dload.h   |    2 +-
>  lib/libalpm/sync.c    |    2 +-
>  4 files changed, 107 insertions(+), 21 deletions(-)
>
> diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c
> index a75cbda..c12fff1 100644
> --- a/lib/libalpm/be_sync.c
> +++ b/lib/libalpm/be_sync.c
> @@ -127,7 +127,7 @@ int SYMEXPORT alpm_db_update(int force, pmdb_t *db)
>                CALLOC(fileurl, len, sizeof(char), RET_ERR(PM_ERR_MEMORY, -1));
>                snprintf(fileurl, len, "%s/%s.db", server, db->treename);
>
> -               ret = _alpm_download(fileurl, syncpath, force, 0, 0);
> +               ret = _alpm_download(fileurl, syncpath, NULL, force, 0, 0);
>
>                if(ret == 0 && (check_sig == PM_PGP_VERIFY_ALWAYS ||
>                                        check_sig == PM_PGP_VERIFY_OPTIONAL)) {
> @@ -135,7 +135,7 @@ int SYMEXPORT alpm_db_update(int force, pmdb_t *db)
>                        /* if we downloaded a DB, we want the .sig from the same server */
>                        snprintf(fileurl, len, "%s/%s.db.sig", server, db->treename);
>
> -                       sig_ret = _alpm_download(fileurl, syncpath, 1, 0, errors_ok);
> +                       sig_ret = _alpm_download(fileurl, syncpath, NULL, 1, 0, errors_ok);
>                        /* errors_ok suppresses error messages, but not the return code */
>                        sig_ret = errors_ok ? 0 : sig_ret;
>                }
> diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> index 0ba3bc1..b37ca02 100644
> --- a/lib/libalpm/dload.c
> +++ b/lib/libalpm/dload.c
> @@ -152,15 +152,45 @@ static int utimes_long(const char *path, long time)
>        return 0;
>  }
>
> +static size_t parse_headers(void *ptr, size_t size, size_t nmemb, void *user)
> +{
> +       size_t realsize = size * nmemb;
> +       const char *fptr, *endptr = NULL;
> +       const char * const cd_header = "Content-Disposition:";
> +       const char * const fn_key = "filename=";
> +       char **cd_filename = (char**)user;
> +
> +       if(strncasecmp(cd_header, ptr, strlen(cd_header)) == 0) {
> +               if((fptr = strstr(ptr, fn_key))) {
> +                       fptr += strlen(fn_key);
> +
> +                       /* find the end of the field, which is either a semi-colon, or the end of
> +                        * the data. As per curl_easy_setopt(3), we cannot count on headers being
> +                        * null terminated, so we look for the closing \r\n */
> +                       endptr = fptr + strcspn(fptr, ";\r\n") - 1;
> +
> +                       /* remove quotes */
> +                       if(*fptr == '"' && *endptr == '"') {
> +                               fptr++;
> +                               endptr--;
> +                       }
> +
> +                       STRNDUP(*cd_filename, fptr, endptr - fptr + 1, RET_ERR(PM_ERR_MEMORY, realsize));
> +               }
> +       }
> +
> +       return realsize;
> +}
>
>  static int curl_download_internal(const char *url, const char *localpath,
> -               int force, int allow_resume, int errors_ok)
> +               char **final_file, int force, int allow_resume, int errors_ok)
>  {
> -       int ret = -1;
> +       int ret = -1, should_unlink = 0;
>        FILE *localf = NULL;
>        const char *useragent;
>        const char *open_mode = "wb";
> -       char *destfile, *tempfile;
> +       char *destfile = NULL, *tempfile = NULL, *effective_url;
> +       char *cd_filename = NULL; /* filename from content-disposition header */
This is perhaps a personal pref, but we don't have many of these in
the codebase anyway- same line comments. I'd rather it be two lines
(you have some others in this patch too).

>        /* RFC1123 states applications should support this length */
>        char hostname[256];
>        char error_buffer[CURL_ERROR_SIZE];
> @@ -177,10 +207,31 @@ static int curl_download_internal(const char *url, const char *localpath,
>                RET_ERR(PM_ERR_SERVER_BAD_URL, -1);
>        }
>
> -       destfile = get_fullpath(localpath, dlfile.filename, "");
> -       tempfile = get_fullpath(localpath, dlfile.filename, ".part");
> -       if(!destfile || !tempfile) {
> -               goto cleanup;
> +       if(strlen(dlfile.filename) > 0 && strcmp(dlfile.filename, ".sig") != 0) {
> +               destfile = get_fullpath(localpath, dlfile.filename, "");
> +               tempfile = get_fullpath(localpath, dlfile.filename, ".part");
> +               if(!destfile || !tempfile) {
> +                       goto cleanup;
> +               }
> +       } else { /* URL isn't to a file and ended with a slash */
^^ as mentioned above
> +               int fd;
> +               char randpath[PATH_MAX];
> +
> +               /* we can't support resuming this kind of download, so a partial transfer
> +                * will be destroyed */
> +               should_unlink = 1;
> +
> +               /* create a random filename, which is opened with O_EXCL */
> +               snprintf(randpath, PATH_MAX, "%salpmtmp.XXXXXX", localpath);
> +               if((fd = mkstemp(randpath)) == -1 || !(localf = fdopen(fd, "w+"))) {
> +                       unlink(randpath);
> +                       close(fd);
> +                       _alpm_log(PM_LOG_ERROR, _("failed to create temporary file for download\n"));
> +                       goto cleanup;
> +               }
> +               /* localf now points to our alpmtmp.XXXXXX */
> +               STRDUP(tempfile, randpath, RET_ERR(PM_ERR_MEMORY, -1));
> +               dlfile.filename = strrchr(randpath, '/') + 1;
>        }
>
>        error_buffer[0] = '\0';
> @@ -199,6 +250,8 @@ static int curl_download_internal(const char *url, const char *localpath,
>        curl_easy_setopt(handle->curl, CURLOPT_PROGRESSDATA, (void *)&dlfile);
>        curl_easy_setopt(handle->curl, CURLOPT_LOW_SPEED_LIMIT, 1024L);
>        curl_easy_setopt(handle->curl, CURLOPT_LOW_SPEED_TIME, 10L);
> +       curl_easy_setopt(handle->curl, CURLOPT_HEADERFUNCTION, parse_headers);
> +       curl_easy_setopt(handle->curl, CURLOPT_WRITEHEADER, &cd_filename);
>
>        useragent = getenv("HTTP_USER_AGENT");
>        if(useragent != NULL) {
> @@ -217,9 +270,11 @@ static int curl_download_internal(const char *url, const char *localpath,
>                dlfile.initial_size = (double)st.st_size;
>        }
>
> -       localf = fopen(tempfile, open_mode);
>        if(localf == NULL) {
> -               goto cleanup;
> +               localf = fopen(tempfile, open_mode);
> +               if(localf == NULL) {
> +                       goto cleanup;
> +               }
>        }
>
>        curl_easy_setopt(handle->curl, CURLOPT_WRITEDATA, localf);
> @@ -266,6 +321,7 @@ static int curl_download_internal(const char *url, const char *localpath,
>        curl_easy_getinfo(handle->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &remote_size);
>        curl_easy_getinfo(handle->curl, CURLINFO_SIZE_DOWNLOAD, &bytes_dl);
>        curl_easy_getinfo(handle->curl, CURLINFO_CONDITION_UNMET, &timecond);
> +       curl_easy_getinfo(handle->curl, CURLINFO_EFFECTIVE_URL, &effective_url);
>
>        /* time condition was met and we didn't download anything. we need to
>         * clean up the 0 byte .part file that's left behind. */
> @@ -286,6 +342,26 @@ static int curl_download_internal(const char *url, const char *localpath,
>                goto cleanup;
>        }
>
> +       if(cd_filename) {
> +               /* content-disposition header has a better name for our file */
> +               free(destfile);
> +               destfile = get_fullpath(localpath, cd_filename, "");
> +       } else {
> +               const char *effective_filename = strrchr(effective_url, '/');
> +               if(effective_filename) {
> +                       effective_filename++;
> +
> +                       /* if destfile was never set, we wrote to a tempfile. even if destfile is
> +                        * set, we may have followed some redirects and the effective url may
> +                        * have a better suggestion as to what to name our file. in either case,
> +                        * refactor destfile to this newly derived name. */
> +                       if(!destfile || strcmp(effective_filename, strrchr(destfile, '/') + 1) != 0) {
> +                               free(destfile);
> +                               destfile = get_fullpath(localpath, effective_filename, "");
> +                       }
> +               }
> +       }
> +
>        ret = 0;
>
>  cleanup:
> @@ -296,10 +372,18 @@ cleanup:
>
>        if(ret == 0) {
>                rename(tempfile, destfile);
> +               if(final_file) {
> +                       *final_file = strdup(strrchr(destfile, '/') + 1);
> +               }
> +       }
> +
> +       if(dload_interrupted && should_unlink) {
> +               unlink(tempfile);
>        }
>
>        FREE(tempfile);
>        FREE(destfile);
> +       FREE(cd_filename);
>
>        /* restore the old signal handlers */
>        sigaction(SIGINT, &sig_int[OLD], NULL);
> @@ -313,12 +397,13 @@ cleanup:
>  }
>  #endif
>
> -int _alpm_download(const char *url, const char *localpath,
> +int _alpm_download(const char *url, const char *localpath, char **final_file,
>                int force, int allow_resume, int errors_ok)
>  {
>        if(handle->fetchcb == NULL) {
>  #ifdef HAVE_LIBCURL
> -               return curl_download_internal(url, localpath, force, allow_resume, errors_ok);
> +               return curl_download_internal(url, localpath, final_file, force, allow_resume,
> +                               errors_ok);
>  #else
>                RET_ERR(PM_ERR_EXTERNAL_DOWNLOAD, -1);
>  #endif
> @@ -335,16 +420,15 @@ int _alpm_download(const char *url, const char *localpath,
>  char SYMEXPORT *alpm_fetch_pkgurl(pmhandle_t *handle, const char *url)
>  {
>        char *filepath;
> -       const char *filename, *cachedir;
> +       const char *cachedir;
> +       char *final_file = NULL;
>        int ret;
>
> -       filename = get_filename(url);
> -
>        /* find a valid cache dir to download to */
>        cachedir = _alpm_filecache_setup(handle);
>
>        /* download the file */
> -       ret = _alpm_download(url, cachedir, 0, 1, 0);
> +       ret = _alpm_download(url, cachedir, &final_file, 0, 1, 0);
>        if(ret == -1) {
>                _alpm_log(PM_LOG_WARNING, _("failed to download %s\n"), url);
>                return NULL;
> @@ -362,7 +446,7 @@ char SYMEXPORT *alpm_fetch_pkgurl(pmhandle_t *handle, const char *url)
>                CALLOC(sig_url, len, sizeof(char), RET_ERR(PM_ERR_MEMORY, NULL));
>                snprintf(sig_url, len, "%s.sig", url);
>
> -               ret = _alpm_download(sig_url, cachedir, 1, 0, errors_ok);
> +               ret = _alpm_download(sig_url, cachedir, &final_file, 1, 0, errors_ok);
>                if(ret == -1 && !errors_ok) {
>                        _alpm_log(PM_LOG_WARNING, _("failed to download %s\n"), sig_url);
>                        /* Warn now, but don't return NULL. We will fail later during package
> @@ -374,7 +458,9 @@ char SYMEXPORT *alpm_fetch_pkgurl(pmhandle_t *handle, const char *url)
>        }
>
>        /* we should be able to find the file the second time around */
> -       filepath = _alpm_filecache_find(handle, filename);
> +       filepath = _alpm_filecache_find(handle, final_file);
> +       FREE(final_file);
> +
>        return filepath;
>  }
>
> diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h
> index f4fd14c..c5d05b0 100644
> --- a/lib/libalpm/dload.h
> +++ b/lib/libalpm/dload.h
> @@ -32,7 +32,7 @@ struct fileinfo {
>  };
>
>  int _alpm_download(const char *url, const char *localpath,
> -               int force, int allow_resume, int errors_ok);
> +               char **final_file, int force, int allow_resume, int errors_ok);
>
>  #endif /* _ALPM_DLOAD_H */
>
> diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c
> index 16be6d9..12a8c90 100644
> --- a/lib/libalpm/sync.c
> +++ b/lib/libalpm/sync.c
> @@ -794,7 +794,7 @@ static int download_files(pmhandle_t *handle, alpm_list_t **deltas)
>                                        CALLOC(fileurl, len, sizeof(char), RET_ERR(PM_ERR_MEMORY, -1));
>                                        snprintf(fileurl, len, "%s/%s", server_url, filename);
>
> -                                       ret = _alpm_download(fileurl, cachedir, 0, 1, 0);
> +                                       ret = _alpm_download(fileurl, cachedir, NULL, 0, 1, 0);
>                                        FREE(fileurl);
>                                        if(ret != -1) {
>                                                break;
> --
> 1.7.5.4
>
>
>


More information about the pacman-dev mailing list