On Sun, Jul 03, 2011 at 01:56:05PM -0500, Dan McGee wrote:
On Fri, Jul 1, 2011 at 7:59 AM, Dave Reisner <d@falconindy.com> wrote:
From: Dave Reisner <d@falconindy.com>
URLs might end with a slash and follow redirects, or could be a generated by a script such as /getpkg.php?id=12345. In both cases, we may have a better filename that we can write to, taken from either content-disposition header, or the effective URL.
Specific to the first case, we write to a temporary file of the format 'alpmtmp.XXXXXX', where XXXXXX is randomized by mkstemp(3). Since this is a randomly generated file, we cannot support resuming and the file is unlinked in the event of an interrupt.
We also run into the possibility of changing out the filename from under alpm on a -U operation, so callers of _alpm_download can optionally pass a pointer to a *char to be filled in by curl_download_internal with the actual filename we wrote to. Any sync operation will pass a NULL pointer here, as we rely on specific names for packages from a mirror.
Fixes FS#22645.
Signed-off-by: Dave Reisner <d@falconindy.com> --- lib/libalpm/be_sync.c | 4 +- lib/libalpm/dload.c | 129 +++++++++++++++++++++++++++++++++++++++++-------- lib/libalpm/dload.h | 3 +- lib/libalpm/sync.c | 2 +- 4 files changed, 114 insertions(+), 24 deletions(-)
diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index ce418d5..16351f8 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -184,7 +184,7 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) CALLOC(fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1)); snprintf(fileurl, len, "%s/%s.db", server, db->treename);
- ret = _alpm_download(handle, fileurl, syncpath, force, 0, 0); + ret = _alpm_download(handle, fileurl, syncpath, NULL, force, 0, 0);
if(ret == 0 && (check_sig == PM_PGP_VERIFY_ALWAYS || check_sig == PM_PGP_VERIFY_OPTIONAL)) { @@ -201,7 +201,7 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) /* if we downloaded a DB, we want the .sig from the same server */ snprintf(fileurl, len, "%s/%s.db.sig", server, db->treename);
- sig_ret = _alpm_download(handle, fileurl, syncpath, 1, 0, errors_ok); + sig_ret = _alpm_download(handle, fileurl, syncpath, NULL, 1, 0, errors_ok); /* errors_ok suppresses error messages, but not the return code */ sig_ret = errors_ok ? 0 : sig_ret; } diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c index fd83aac..eefa285 100644 --- a/lib/libalpm/dload.c +++ b/lib/libalpm/dload.c @@ -148,16 +148,46 @@ static int utimes_long(const char *path, long seconds) return 0; }
+static size_t parse_headers(void *ptr, size_t size, size_t nmemb, void *user) +{ + size_t realsize = size * nmemb; + const char *fptr, *endptr = NULL; + const char * const cd_header = "Content-Disposition:"; + const char * const fn_key = "filename="; + struct fileinfo **dlfile = (struct fileinfo**)user; + + if(strncasecmp(cd_header, ptr, strlen(cd_header)) == 0) { Locale issues come into play? This string has an 'i'. If cURL normalizes headers one way or the other we should probably do a direct compare, but the strncasemp() isn't explicit regarding locale or anything, so it may just map lower/upper ASCII to the opposite ASCII and that is it.
Fixed by borrowing some of curl's rawstr.c and adding it to our sources.
+ if((fptr = strstr(ptr, fn_key))) { + fptr += strlen(fn_key); + + /* find the end of the field, which is either a semi-colon, or the end of + * the data. As per curl_easy_setopt(3), we cannot count on headers being + * null terminated, so we look for the closing \r\n */ + endptr = fptr + strcspn(fptr, ";\r\n") - 1; + + /* remove quotes */ + if(*fptr == '"' && *endptr == '"') { + fptr++; + endptr--; + } + + STRNDUP((*dlfile)->cd_filename, fptr, endptr - fptr + 1, + RET_ERR((*dlfile)->handle, PM_ERR_MEMORY, realsize)); + } + }
-static int curl_download_internal(alpm_handle_t *handle, - const char *url, const char *localpath, - int force, int allow_resume, int errors_ok) + return realsize; +} + +static int curl_download_internal(alpm_handle_t *handle, const char *url, + const char *localpath, char **final_file, int force, int allow_resume, + int errors_ok) I believe this hideous function signature gets addressed later, so I won't worry about it now.
{ - int ret = -1; + int ret = -1, should_unlink = 0; FILE *localf = NULL; const char *useragent; const char *open_mode = "wb"; - char *destfile, *tempfile; + char *destfile = NULL, *tempfile = NULL, *effective_url; /* RFC1123 states applications should support this length */ char hostname[256]; char error_buffer[CURL_ERROR_SIZE]; @@ -170,15 +200,39 @@ static int curl_download_internal(alpm_handle_t *handle, dlfile.handle = handle; dlfile.initial_size = 0.0; dlfile.filename = get_filename(url); + dlfile.cd_filename = NULL; if(!dlfile.filename || curl_gethost(url, hostname) != 0) { _alpm_log(handle, PM_LOG_ERROR, _("url '%s' is invalid\n"), url); RET_ERR(handle, PM_ERR_SERVER_BAD_URL, -1); }
- destfile = get_fullpath(localpath, dlfile.filename, ""); - tempfile = get_fullpath(localpath, dlfile.filename, ".part"); - if(!destfile || !tempfile) { - goto cleanup; + if(strlen(dlfile.filename) > 0 && strcmp(dlfile.filename, ".sig") != 0) { + destfile = get_fullpath(localpath, dlfile.filename, ""); + tempfile = get_fullpath(localpath, dlfile.filename, ".part"); + if(!destfile || !tempfile) { + goto cleanup; + } + } else { + /* URL isn't to a file and ended with a slash */ + int fd; + char randpath[PATH_MAX]; + + /* we can't support resuming this kind of download, so a partial transfer + * will be destroyed */ + should_unlink = 1; + + /* create a random filename, which is opened with O_EXCL */ + snprintf(randpath, PATH_MAX, "%salpmtmp.XXXXXX", localpath); + if((fd = mkstemp(randpath)) == -1 || !(localf = fdopen(fd, open_mode))) { + unlink(randpath); + close(fd); + _alpm_log(handle, PM_LOG_ERROR, + _("failed to create temporary file for download\n")); + goto cleanup; + } + /* localf now points to our alpmtmp.XXXXXX */ + STRDUP(tempfile, randpath, RET_ERR(handle, PM_ERR_MEMORY, -1)); + dlfile.filename = strrchr(randpath, '/') + 1; }
error_buffer[0] = '\0'; @@ -197,6 +251,8 @@ static int curl_download_internal(alpm_handle_t *handle, curl_easy_setopt(handle->curl, CURLOPT_PROGRESSDATA, (void *)&dlfile); curl_easy_setopt(handle->curl, CURLOPT_LOW_SPEED_LIMIT, 1024L); curl_easy_setopt(handle->curl, CURLOPT_LOW_SPEED_TIME, 10L); + curl_easy_setopt(handle->curl, CURLOPT_HEADERFUNCTION, parse_headers); + curl_easy_setopt(handle->curl, CURLOPT_WRITEHEADER, &dlfile);
useragent = getenv("HTTP_USER_AGENT"); if(useragent != NULL) { @@ -215,9 +271,11 @@ static int curl_download_internal(alpm_handle_t *handle, dlfile.initial_size = (double)st.st_size; }
- localf = fopen(tempfile, open_mode); if(localf == NULL) { - goto cleanup; + localf = fopen(tempfile, open_mode); + if(localf == NULL) { + goto cleanup; + } }
curl_easy_setopt(handle->curl, CURLOPT_WRITEDATA, localf); @@ -264,6 +322,7 @@ static int curl_download_internal(alpm_handle_t *handle, curl_easy_getinfo(handle->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &remote_size); curl_easy_getinfo(handle->curl, CURLINFO_SIZE_DOWNLOAD, &bytes_dl); curl_easy_getinfo(handle->curl, CURLINFO_CONDITION_UNMET, &timecond); + curl_easy_getinfo(handle->curl, CURLINFO_EFFECTIVE_URL, &effective_url);
/* time condition was met and we didn't download anything. we need to * clean up the 0 byte .part file that's left behind. */ @@ -284,6 +343,26 @@ static int curl_download_internal(alpm_handle_t *handle, goto cleanup; }
+ if(dlfile.cd_filename) { + /* content-disposition header has a better name for our file */ + free(destfile); + destfile = get_fullpath(localpath, dlfile.cd_filename, ""); + } else { + const char *effective_filename = strrchr(effective_url, '/'); + if(effective_filename) { + effective_filename++; + + /* if destfile was never set, we wrote to a tempfile. even if destfile is + * set, we may have followed some redirects and the effective url may + * have a better suggestion as to what to name our file. in either case, + * refactor destfile to this newly derived name. */ + if(!destfile || strcmp(effective_filename, strrchr(destfile, '/') + 1) != 0) { Any chance of strrchr returning NULL here?
Not as far as I can tell. destfile includes localpath, which is our CacheDir. It'd have to be pretty fucked up not to include a /. Agree?
+ free(destfile); + destfile = get_fullpath(localpath, effective_filename, ""); + } + } + } + ret = 0;
cleanup: @@ -294,10 +373,18 @@ cleanup:
if(ret == 0) { rename(tempfile, destfile); + if(final_file) { + *final_file = strdup(strrchr(destfile, '/') + 1); Same concern as above with strrchr(). STRDUP would be nice too.
lack of STRDUP is laziness. Same as above wrt strrchr and NULL.
+ } + } + + if(dload_interrupted && should_unlink) { + unlink(tempfile); }
FREE(tempfile); FREE(destfile); + FREE(dlfile.cd_filename);
/* restore the old signal handlers */ sigaction(SIGINT, &sig_int[OLD], NULL); @@ -316,18 +403,19 @@ cleanup: * @param handle the context handle * @param url the file's URL * @param localpath the directory to save the file in + * @param final_file the real name of the downloaded file (may be NULL) * @param force force download even if there is an up-to-date local copy * @param allow_resume allow a partial download to be resumed * @param errors_ok do not log errors (but still return them) * @return 0 on success, -1 on error (pm_errno is set accordingly if errors_ok == 0) */ int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath, - int force, int allow_resume, int errors_ok) + char **final_file, int force, int allow_resume, int errors_ok) { if(handle->fetchcb == NULL) { #ifdef HAVE_LIBCURL - return curl_download_internal(handle, url, localpath, - force, allow_resume, errors_ok); + return curl_download_internal(handle, url, localpath, final_file, force, + allow_resume, errors_ok); #else RET_ERR(handle, PM_ERR_EXTERNAL_DOWNLOAD, -1); #endif @@ -344,18 +432,17 @@ int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url) { char *filepath; - const char *filename, *cachedir; + const char *cachedir; + char *final_file = NULL; int ret;
CHECK_HANDLE(handle, return NULL);
- filename = get_filename(url); - /* find a valid cache dir to download to */ cachedir = _alpm_filecache_setup(handle);
/* download the file */ - ret = _alpm_download(handle, url, cachedir, 0, 1, 0); + ret = _alpm_download(handle, url, cachedir, &final_file, 0, 1, 0); if(ret == -1) { _alpm_log(handle, PM_LOG_WARNING, _("failed to download %s\n"), url); return NULL; @@ -373,7 +460,7 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url) CALLOC(sig_url, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, NULL)); snprintf(sig_url, len, "%s.sig", url);
- ret = _alpm_download(handle, sig_url, cachedir, 1, 0, errors_ok); + ret = _alpm_download(handle, sig_url, cachedir, &final_file, 1, 0, errors_ok); if(ret == -1 && !errors_ok) { _alpm_log(handle, PM_LOG_WARNING, _("failed to download %s\n"), sig_url); /* Warn now, but don't return NULL. We will fail later during package @@ -385,7 +472,9 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url) }
/* we should be able to find the file the second time around */ - filepath = _alpm_filecache_find(handle, filename); + filepath = _alpm_filecache_find(handle, final_file); + FREE(final_file); + return filepath; }
diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h index 0cdd900..351b2b3 100644 --- a/lib/libalpm/dload.h +++ b/lib/libalpm/dload.h @@ -29,11 +29,12 @@ struct fileinfo { alpm_handle_t *handle; const char *filename; + char *cd_filename; double initial_size; };
int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath, - int force, int allow_resume, int errors_ok); + char **final_file, int force, int allow_resume, int errors_ok);
#endif /* _ALPM_DLOAD_H */
diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index 8f03840..8df8a17 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -797,7 +797,7 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas) CALLOC(fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1)); snprintf(fileurl, len, "%s/%s", server_url, filename);
- ret = _alpm_download(handle, fileurl, cachedir, 0, 1, 0); + ret = _alpm_download(handle, fileurl, cachedir, NULL, 0, 1, 0); FREE(fileurl); if(ret != -1) { break; -- 1.7.6