[pacman-dev] [PATCH 1/4] dload: handle irregular URLs

Dan McGee dpmcgee at gmail.com
Sun Jul 3 14:56:05 EDT 2011


On Fri, Jul 1, 2011 at 7:59 AM, Dave Reisner <d at falconindy.com> wrote:
> From: Dave Reisner <d at falconindy.com>
>
> URLs might end with a slash and follow redirects, or could be a
> generated by a script such as /getpkg.php?id=12345. In both cases, we
> may have a better filename that we can write to, taken from either
> content-disposition header, or the effective URL.
>
> Specific to the first case, we write to a temporary file of the format
> 'alpmtmp.XXXXXX', where XXXXXX is randomized by mkstemp(3). Since this
> is a randomly generated file, we cannot support resuming and the file is
> unlinked in the event of an interrupt.
>
> We also run into the possibility of changing out the filename from under
> alpm on a -U operation, so callers of _alpm_download can optionally pass
> a pointer to a *char to be filled in by curl_download_internal with the
> actual filename we wrote to. Any sync operation will pass a NULL pointer
> here, as we rely on specific names for packages from a mirror.
>
> Fixes FS#22645.
>
> Signed-off-by: Dave Reisner <d at falconindy.com>
> ---
>  lib/libalpm/be_sync.c |    4 +-
>  lib/libalpm/dload.c   |  129 +++++++++++++++++++++++++++++++++++++++++--------
>  lib/libalpm/dload.h   |    3 +-
>  lib/libalpm/sync.c    |    2 +-
>  4 files changed, 114 insertions(+), 24 deletions(-)
>
> diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c
> index ce418d5..16351f8 100644
> --- a/lib/libalpm/be_sync.c
> +++ b/lib/libalpm/be_sync.c
> @@ -184,7 +184,7 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
>                CALLOC(fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1));
>                snprintf(fileurl, len, "%s/%s.db", server, db->treename);
>
> -               ret = _alpm_download(handle, fileurl, syncpath, force, 0, 0);
> +               ret = _alpm_download(handle, fileurl, syncpath, NULL, force, 0, 0);
>
>                if(ret == 0 && (check_sig == PM_PGP_VERIFY_ALWAYS ||
>                                        check_sig == PM_PGP_VERIFY_OPTIONAL)) {
> @@ -201,7 +201,7 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
>                        /* if we downloaded a DB, we want the .sig from the same server */
>                        snprintf(fileurl, len, "%s/%s.db.sig", server, db->treename);
>
> -                       sig_ret = _alpm_download(handle, fileurl, syncpath, 1, 0, errors_ok);
> +                       sig_ret = _alpm_download(handle, fileurl, syncpath, NULL, 1, 0, errors_ok);
>                        /* errors_ok suppresses error messages, but not the return code */
>                        sig_ret = errors_ok ? 0 : sig_ret;
>                }
> diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> index fd83aac..eefa285 100644
> --- a/lib/libalpm/dload.c
> +++ b/lib/libalpm/dload.c
> @@ -148,16 +148,46 @@ static int utimes_long(const char *path, long seconds)
>        return 0;
>  }
>
> +static size_t parse_headers(void *ptr, size_t size, size_t nmemb, void *user)
> +{
> +       size_t realsize = size * nmemb;
> +       const char *fptr, *endptr = NULL;
> +       const char * const cd_header = "Content-Disposition:";
> +       const char * const fn_key = "filename=";
> +       struct fileinfo **dlfile = (struct fileinfo**)user;
> +
> +       if(strncasecmp(cd_header, ptr, strlen(cd_header)) == 0) {
Locale issues come into play? This string has an 'i'. If cURL
normalizes headers one way or the other we should probably do a direct
compare, but the strncasemp() isn't explicit regarding locale or
anything, so it may just map lower/upper ASCII to the opposite ASCII
and that is it.

> +               if((fptr = strstr(ptr, fn_key))) {
> +                       fptr += strlen(fn_key);
> +
> +                       /* find the end of the field, which is either a semi-colon, or the end of
> +                        * the data. As per curl_easy_setopt(3), we cannot count on headers being
> +                        * null terminated, so we look for the closing \r\n */
> +                       endptr = fptr + strcspn(fptr, ";\r\n") - 1;
> +
> +                       /* remove quotes */
> +                       if(*fptr == '"' && *endptr == '"') {
> +                               fptr++;
> +                               endptr--;
> +                       }
> +
> +                       STRNDUP((*dlfile)->cd_filename, fptr, endptr - fptr + 1,
> +                                       RET_ERR((*dlfile)->handle, PM_ERR_MEMORY, realsize));
> +               }
> +       }
>
> -static int curl_download_internal(alpm_handle_t *handle,
> -               const char *url, const char *localpath,
> -               int force, int allow_resume, int errors_ok)
> +       return realsize;
> +}
> +
> +static int curl_download_internal(alpm_handle_t *handle, const char *url,
> +               const char *localpath, char **final_file, int force, int allow_resume,
> +               int errors_ok)
I believe this hideous function signature gets addressed later, so I
won't worry about it now.

>  {
> -       int ret = -1;
> +       int ret = -1, should_unlink = 0;
>        FILE *localf = NULL;
>        const char *useragent;
>        const char *open_mode = "wb";
> -       char *destfile, *tempfile;
> +       char *destfile = NULL, *tempfile = NULL, *effective_url;
>        /* RFC1123 states applications should support this length */
>        char hostname[256];
>        char error_buffer[CURL_ERROR_SIZE];
> @@ -170,15 +200,39 @@ static int curl_download_internal(alpm_handle_t *handle,
>        dlfile.handle = handle;
>        dlfile.initial_size = 0.0;
>        dlfile.filename = get_filename(url);
> +       dlfile.cd_filename = NULL;
>        if(!dlfile.filename || curl_gethost(url, hostname) != 0) {
>                _alpm_log(handle, PM_LOG_ERROR, _("url '%s' is invalid\n"), url);
>                RET_ERR(handle, PM_ERR_SERVER_BAD_URL, -1);
>        }
>
> -       destfile = get_fullpath(localpath, dlfile.filename, "");
> -       tempfile = get_fullpath(localpath, dlfile.filename, ".part");
> -       if(!destfile || !tempfile) {
> -               goto cleanup;
> +       if(strlen(dlfile.filename) > 0 && strcmp(dlfile.filename, ".sig") != 0) {
> +               destfile = get_fullpath(localpath, dlfile.filename, "");
> +               tempfile = get_fullpath(localpath, dlfile.filename, ".part");
> +               if(!destfile || !tempfile) {
> +                       goto cleanup;
> +               }
> +       } else {
> +               /* URL isn't to a file and ended with a slash */
> +               int fd;
> +               char randpath[PATH_MAX];
> +
> +               /* we can't support resuming this kind of download, so a partial transfer
> +                * will be destroyed */
> +               should_unlink = 1;
> +
> +               /* create a random filename, which is opened with O_EXCL */
> +               snprintf(randpath, PATH_MAX, "%salpmtmp.XXXXXX", localpath);
> +               if((fd = mkstemp(randpath)) == -1 || !(localf = fdopen(fd, open_mode))) {
> +                       unlink(randpath);
> +                       close(fd);
> +                       _alpm_log(handle, PM_LOG_ERROR,
> +                                       _("failed to create temporary file for download\n"));
> +                       goto cleanup;
> +               }
> +               /* localf now points to our alpmtmp.XXXXXX */
> +               STRDUP(tempfile, randpath, RET_ERR(handle, PM_ERR_MEMORY, -1));
> +               dlfile.filename = strrchr(randpath, '/') + 1;
>        }
>
>        error_buffer[0] = '\0';
> @@ -197,6 +251,8 @@ static int curl_download_internal(alpm_handle_t *handle,
>        curl_easy_setopt(handle->curl, CURLOPT_PROGRESSDATA, (void *)&dlfile);
>        curl_easy_setopt(handle->curl, CURLOPT_LOW_SPEED_LIMIT, 1024L);
>        curl_easy_setopt(handle->curl, CURLOPT_LOW_SPEED_TIME, 10L);
> +       curl_easy_setopt(handle->curl, CURLOPT_HEADERFUNCTION, parse_headers);
> +       curl_easy_setopt(handle->curl, CURLOPT_WRITEHEADER, &dlfile);
>
>        useragent = getenv("HTTP_USER_AGENT");
>        if(useragent != NULL) {
> @@ -215,9 +271,11 @@ static int curl_download_internal(alpm_handle_t *handle,
>                dlfile.initial_size = (double)st.st_size;
>        }
>
> -       localf = fopen(tempfile, open_mode);
>        if(localf == NULL) {
> -               goto cleanup;
> +               localf = fopen(tempfile, open_mode);
> +               if(localf == NULL) {
> +                       goto cleanup;
> +               }
>        }
>
>        curl_easy_setopt(handle->curl, CURLOPT_WRITEDATA, localf);
> @@ -264,6 +322,7 @@ static int curl_download_internal(alpm_handle_t *handle,
>        curl_easy_getinfo(handle->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &remote_size);
>        curl_easy_getinfo(handle->curl, CURLINFO_SIZE_DOWNLOAD, &bytes_dl);
>        curl_easy_getinfo(handle->curl, CURLINFO_CONDITION_UNMET, &timecond);
> +       curl_easy_getinfo(handle->curl, CURLINFO_EFFECTIVE_URL, &effective_url);
>
>        /* time condition was met and we didn't download anything. we need to
>         * clean up the 0 byte .part file that's left behind. */
> @@ -284,6 +343,26 @@ static int curl_download_internal(alpm_handle_t *handle,
>                goto cleanup;
>        }
>
> +       if(dlfile.cd_filename) {
> +               /* content-disposition header has a better name for our file */
> +               free(destfile);
> +               destfile = get_fullpath(localpath, dlfile.cd_filename, "");
> +       } else {
> +               const char *effective_filename = strrchr(effective_url, '/');
> +               if(effective_filename) {
> +                       effective_filename++;
> +
> +                       /* if destfile was never set, we wrote to a tempfile. even if destfile is
> +                        * set, we may have followed some redirects and the effective url may
> +                        * have a better suggestion as to what to name our file. in either case,
> +                        * refactor destfile to this newly derived name. */
> +                       if(!destfile || strcmp(effective_filename, strrchr(destfile, '/') + 1) != 0) {
Any chance of strrchr returning NULL here?

> +                               free(destfile);
> +                               destfile = get_fullpath(localpath, effective_filename, "");
> +                       }
> +               }
> +       }
> +
>        ret = 0;
>
>  cleanup:
> @@ -294,10 +373,18 @@ cleanup:
>
>        if(ret == 0) {
>                rename(tempfile, destfile);
> +               if(final_file) {
> +                       *final_file = strdup(strrchr(destfile, '/') + 1);
Same concern as above with strrchr(). STRDUP would be nice too.

> +               }
> +       }
> +
> +       if(dload_interrupted && should_unlink) {
> +               unlink(tempfile);
>        }
>
>        FREE(tempfile);
>        FREE(destfile);
> +       FREE(dlfile.cd_filename);
>
>        /* restore the old signal handlers */
>        sigaction(SIGINT, &sig_int[OLD], NULL);
> @@ -316,18 +403,19 @@ cleanup:
>  * @param handle the context handle
>  * @param url the file's URL
>  * @param localpath the directory to save the file in
> + * @param final_file the real name of the downloaded file (may be NULL)
>  * @param force force download even if there is an up-to-date local copy
>  * @param allow_resume allow a partial download to be resumed
>  * @param errors_ok do not log errors (but still return them)
>  * @return 0 on success, -1 on error (pm_errno is set accordingly if errors_ok == 0)
>  */
>  int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath,
> -               int force, int allow_resume, int errors_ok)
> +               char **final_file, int force, int allow_resume, int errors_ok)
>  {
>        if(handle->fetchcb == NULL) {
>  #ifdef HAVE_LIBCURL
> -               return curl_download_internal(handle, url, localpath,
> -                               force, allow_resume, errors_ok);
> +               return curl_download_internal(handle, url, localpath, final_file, force,
> +                               allow_resume, errors_ok);
>  #else
>                RET_ERR(handle, PM_ERR_EXTERNAL_DOWNLOAD, -1);
>  #endif
> @@ -344,18 +432,17 @@ int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath
>  char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
>  {
>        char *filepath;
> -       const char *filename, *cachedir;
> +       const char *cachedir;
> +       char *final_file = NULL;
>        int ret;
>
>        CHECK_HANDLE(handle, return NULL);
>
> -       filename = get_filename(url);
> -
>        /* find a valid cache dir to download to */
>        cachedir = _alpm_filecache_setup(handle);
>
>        /* download the file */
> -       ret = _alpm_download(handle, url, cachedir, 0, 1, 0);
> +       ret = _alpm_download(handle, url, cachedir, &final_file, 0, 1, 0);
>        if(ret == -1) {
>                _alpm_log(handle, PM_LOG_WARNING, _("failed to download %s\n"), url);
>                return NULL;
> @@ -373,7 +460,7 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
>                CALLOC(sig_url, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, NULL));
>                snprintf(sig_url, len, "%s.sig", url);
>
> -               ret = _alpm_download(handle, sig_url, cachedir, 1, 0, errors_ok);
> +               ret = _alpm_download(handle, sig_url, cachedir, &final_file, 1, 0, errors_ok);
>                if(ret == -1 && !errors_ok) {
>                        _alpm_log(handle, PM_LOG_WARNING, _("failed to download %s\n"), sig_url);
>                        /* Warn now, but don't return NULL. We will fail later during package
> @@ -385,7 +472,9 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
>        }
>
>        /* we should be able to find the file the second time around */
> -       filepath = _alpm_filecache_find(handle, filename);
> +       filepath = _alpm_filecache_find(handle, final_file);
> +       FREE(final_file);
> +
>        return filepath;
>  }
>
> diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h
> index 0cdd900..351b2b3 100644
> --- a/lib/libalpm/dload.h
> +++ b/lib/libalpm/dload.h
> @@ -29,11 +29,12 @@
>  struct fileinfo {
>        alpm_handle_t *handle;
>        const char *filename;
> +       char *cd_filename;
>        double initial_size;
>  };
>
>  int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath,
> -               int force, int allow_resume, int errors_ok);
> +               char **final_file, int force, int allow_resume, int errors_ok);
>
>  #endif /* _ALPM_DLOAD_H */
>
> diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c
> index 8f03840..8df8a17 100644
> --- a/lib/libalpm/sync.c
> +++ b/lib/libalpm/sync.c
> @@ -797,7 +797,7 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas)
>                                        CALLOC(fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1));
>                                        snprintf(fileurl, len, "%s/%s", server_url, filename);
>
> -                                       ret = _alpm_download(handle, fileurl, cachedir, 0, 1, 0);
> +                                       ret = _alpm_download(handle, fileurl, cachedir, NULL, 0, 1, 0);
>                                        FREE(fileurl);
>                                        if(ret != -1) {
>                                                break;
> --
> 1.7.6


More information about the pacman-dev mailing list