[pacman-dev] [PATCH] Implement multiplexed download using mCURL
Allan McRae
allan at archlinux.org
Wed Mar 25 11:37:45 UTC 2020
On 10/3/20 10:28 am, Anatol Pomozov wrote:
> curl_multi_download_internal() is the main loop that creates up to
> 'ParallelDownloads' easy curl handles, adds them to mcurl and then
> performs curl execution. This is when the paralled downloads heppen.
> Once any of the downloads complete the function checks its result.
> In case if the download fails it initiates retry with the next server
> from payload->servers list. At the download completion all the payload
> resources are cleaned up.
>
> curl_multi_handle_single_done() is essentially refactored version of
> curl_download_internal() adopted for multi_curl. Once mcurl porting is
> complete curl_download_internal() will be removed.
>
> Signed-off-by: Anatol Pomozov <anatol.pomozov at gmail.com>
> ---
> lib/libalpm/dload.c | 361 +++++++++++++++++++++++++++++++++++++++++++-
> lib/libalpm/dload.h | 2 +
> 2 files changed, 359 insertions(+), 4 deletions(-)
>
> diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> index 3570e234..06bce330 100644
> --- a/lib/libalpm/dload.c
> +++ b/lib/libalpm/dload.c
> @@ -18,6 +18,7 @@
> * along with this program. If not, see <http://www.gnu.org/licenses/>.
> */
>
> +#include <assert.h>
Use our ASSERT define instead. Allows for cleanup.
> #include <stdlib.h>
> #include <stdio.h>
> #include <errno.h>
> @@ -271,6 +272,8 @@ static void curl_set_handle_opts(struct dload_payload *payload,
> curl_easy_setopt(curl, CURLOPT_TCP_KEEPINTVL, 60L);
> curl_easy_setopt(curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
>
Extra newline not needed.
> + curl_easy_setopt(curl, CURLOPT_PRIVATE, (void *)payload);
> +
> _alpm_log(handle, ALPM_LOG_DEBUG, "url: %s\n", payload->fileurl);
>
> if(payload->max_size) {
> @@ -601,15 +604,365 @@ cleanup:
> return ret;
> }
>
> +/* Return 0 if retry was sucessfull, -1 otherwise */
> +static int curl_multi_retry_next_server(CURLM *curlm, CURL *curl, struct dload_payload *payload)
> +{
> + const char *server;
> + size_t len;
> + alpm_handle_t *handle = payload->handle;
> +
> + payload->servers = payload->servers->next;
> + if(!payload->servers) {
> + return -1;
> + }
> + server = payload->servers->data;
OK.
> +
> + /* regenerate a new fileurl */
> + free(payload->fileurl);
> + len = strlen(server) + strlen(payload->filepath) + 2;
> + MALLOC(payload->fileurl, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1));
> + snprintf(payload->fileurl, len, "%s/%s", server, payload->filepath);
> +
OK.
> + if(payload->unlink_on_fail) {
> + /* we keep the file for a new retry but remove its data if any */
> + fflush(payload->localf);
> + ftruncate(fileno(payload->localf), 0);
> + fseek(payload->localf, 0, SEEK_SET);
> + }
> +
> + /* Set curl with the new URL */
> + curl_easy_setopt(curl, CURLOPT_URL, payload->fileurl);
> +
> + curl_multi_remove_handle(curlm, curl);
> + curl_multi_add_handle(curlm, curl);
Is this right. I don't know the curl API, but wouldn't you do the
remove at the start of the function?
> +
> + return 0;
> +}
> +
> +/* Returns 2 if download retry happened
> + * Returns 1 if the file is up-to-date
> + * Returns 0 if current payload is completed sucessfully
Typo
> + * Returns -1 if an error happened
> + */
> +static int curl_multi_handle_single_done(CURLM *curlm, CURLMsg *msg, const char *localpath)
I have no idea what this function does from its name.
curl_multi_check_download_finished() ?
> +{
> + alpm_handle_t *handle = NULL;
> + struct dload_payload *payload = NULL;
> + CURL *curl = msg->easy_handle;
> + CURLcode curlerr;
> + char *effective_url;
> + long timecond;
> + double remote_size, bytes_dl = 0;
> + long remote_time = -1;
> + struct stat st;
> + char hostname[HOSTNAME_SIZE];
> + int ret = -1;
> +
> + curlerr = curl_easy_getinfo(curl, CURLINFO_PRIVATE, &payload);
> + assert(curlerr == CURLE_OK);
ASSERT(curlerr == CURLE_OK, return ret);
> + handle = payload->handle;
> +
> + curl_gethost(payload->fileurl, hostname, sizeof(hostname));
> + curlerr = msg->data.result;
> + _alpm_log(handle, ALPM_LOG_DEBUG, "curl returned result %d from transfer\n",
> + curlerr);
> +
> + /* was it a success? */
> + switch(curlerr) {
> + case CURLE_OK:
> + /* get http/ftp response code */
> + _alpm_log(handle, ALPM_LOG_DEBUG, "response code: %ld\n", payload->respcode);
> + if(payload->respcode >= 400) {
> + payload->unlink_on_fail = 1;
> + if(!payload->errors_ok) {
> + handle->pm_errno = ALPM_ERR_RETRIEVE;
> + /* non-translated message is same as libcurl */
> + snprintf(payload->error_buffer, sizeof(payload->error_buffer),
> + "The requested URL returned error: %ld", payload->respcode);
> + _alpm_log(handle, ALPM_LOG_ERROR,
> + _("failed retrieving file '%s' from %s : %s\n"),
> + payload->remote_name, hostname, payload->error_buffer);
> + }
> + if(curl_multi_retry_next_server(curlm, curl, payload) == 0) {
> + return 2;
> + } else {
> + goto cleanup;
> + }
> + }
> + break;
OK.
> + case CURLE_ABORTED_BY_CALLBACK:
> + /* handle the interrupt accordingly */
> + if(dload_interrupted == ABORT_OVER_MAXFILESIZE) {
> + curlerr = CURLE_FILESIZE_EXCEEDED;
> + payload->unlink_on_fail = 1;
> + handle->pm_errno = ALPM_ERR_LIBCURL;
> + _alpm_log(handle, ALPM_LOG_ERROR,
> + _("failed retrieving file '%s' from %s : expected download size exceeded\n"),
> + payload->remote_name, hostname);
> + }
> + goto cleanup;
OK.
> + case CURLE_COULDNT_RESOLVE_HOST:
> + payload->unlink_on_fail = 1;
> + handle->pm_errno = ALPM_ERR_SERVER_BAD_URL;
> + _alpm_log(handle, ALPM_LOG_ERROR,
> + _("failed retrieving file '%s' from %s : %s\n"),
> + payload->remote_name, hostname, payload->error_buffer);
> + if(curl_multi_retry_next_server(curlm, curl, payload) == 0) {
> + return 2;
> + } else {
> + goto cleanup;
> + }
OK.
> + default:
> + /* delete zero length downloads */
> + if(fstat(fileno(payload->localf), &st) == 0 && st.st_size == 0) {
> + payload->unlink_on_fail = 1;
> + }
> + if(!payload->errors_ok) {
> + handle->pm_errno = ALPM_ERR_LIBCURL;
> + _alpm_log(handle, ALPM_LOG_ERROR,
> + _("failed retrieving file '%s' from %s : %s\n"),
> + payload->remote_name, hostname, payload->error_buffer);
> + } else {
> + _alpm_log(handle, ALPM_LOG_DEBUG,
> + "failed retrieving file '%s' from %s : %s\n",
> + payload->remote_name, hostname, payload->error_buffer);
> + }
> + if(curl_multi_retry_next_server(curlm, curl, payload) == 0) {
> + return 2;
> + } else {
> + goto cleanup;
> + }
OK. That if() is repeated a lot, but seems necessary.
> + }
> +
> + /* retrieve info about the state of the transfer */
> + curl_easy_getinfo(curl, CURLINFO_FILETIME, &remote_time);
> + curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &remote_size);
> + curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &bytes_dl);
> + curl_easy_getinfo(curl, CURLINFO_CONDITION_UNMET, &timecond);
> + curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &effective_url);
> +
In curl_download_internal there is a "if(final_url != NULL) {" bit here.
Why is that not transferred here?
> + /* time condition was met and we didn't download anything. we need to
> + * clean up the 0 byte .part file that's left behind. */
> + if(timecond == 1 && DOUBLE_EQ(bytes_dl, 0)) {
> + _alpm_log(handle, ALPM_LOG_DEBUG, "file met time condition\n");
> + ret = 1;
> + unlink(payload->tempfile_name);
> + goto cleanup;
> + }
OK
> +
> + /* remote_size isn't necessarily the full size of the file, just what the
> + * server reported as remaining to download. compare it to what curl reported
> + * as actually being transferred during curl_easy_perform() */
> + if(!DOUBLE_EQ(remote_size, -1) && !DOUBLE_EQ(bytes_dl, -1) &&
> + !DOUBLE_EQ(bytes_dl, remote_size)) {
> + _alpm_log(handle, ALPM_LOG_ERROR, _("%s appears to be truncated: %jd/%jd bytes\n"),
> + payload->remote_name, (intmax_t)bytes_dl, (intmax_t)remote_size);
> + GOTO_ERR(handle, ALPM_ERR_RETRIEVE, cleanup);
> + }
OK.
> +
> + if(payload->trust_remote_name) {
> + if(payload->content_disp_name) {
> + /* content-disposition header has a better name for our file */
> + free(payload->destfile_name);
> + payload->destfile_name = get_fullpath(localpath,
> + get_filename(payload->content_disp_name), "");
> + } else {
> + const char *effective_filename = strrchr(effective_url, '/');
> + if(effective_filename && strlen(effective_filename) > 2) {
> + effective_filename++;
> +
> + /* if destfile was never set, we wrote to a tempfile. even if destfile is
> + * set, we may have followed some redirects and the effective url may
> + * have a better suggestion as to what to name our file. in either case,
> + * refactor destfile to this newly derived name. */
> + if(!payload->destfile_name || strcmp(effective_filename,
> + strrchr(payload->destfile_name, '/') + 1) != 0) {
> + free(payload->destfile_name);
> + payload->destfile_name = get_fullpath(localpath, effective_filename, "");
> + }
> + }
> + }
> + }
OK.
> +
> + ret = 0;
> +
> +cleanup:
> + /* disconnect relationships from the curl handle for things that might go out
> + * of scope, but could still be touched on connection teardown. This really
> + * only applies to FTP transfers. */
> + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L);
> + curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, (char *)NULL);
> +
OK.
> + if(payload->localf != NULL) {
> + fclose(payload->localf);
> + utimes_long(payload->tempfile_name, remote_time);
> + }
> +
OK.
> + if(ret == 0) {
> + if(payload->destfile_name) {
> + if(rename(payload->tempfile_name, payload->destfile_name)) {
> + _alpm_log(handle, ALPM_LOG_ERROR, _("could not rename %s to %s (%s)\n"),
> + payload->tempfile_name, payload->destfile_name, strerror(errno));
> + ret = -1;
> + }
> + }
> + }
OK.
> +
> + if((ret == -1 || dload_interrupted) && payload->unlink_on_fail &&
> + payload->tempfile_name) {
> + unlink(payload->tempfile_name);
> + }
OK.
> +
> + // TODO: report that the download has been completed
> +
> + curl_multi_remove_handle(curlm, curl);
> + curl_easy_cleanup(curl);
> + payload->curl = NULL;
> +
> + FREE(payload->fileurl);
> + return ret;
> +}
OK.
> +
> +/* Returns 0 in case if a new download transaction has been successfully started
> + * Returns -1 if am error happened while starting a new download
> + */
> +static int curl_multi_add_payload(alpm_handle_t *handle, CURLM *curlm,
> + struct dload_payload *payload, const char *localpath)
> +{
> + size_t len;
> + const char *server;
> + CURL *curl = NULL;
> + char hostname[HOSTNAME_SIZE];
> +
> + ASSERT(payload->servers, RET_ERR(handle, ALPM_ERR_SERVER_NONE, -1));
> + server = payload->servers->data;
OK.
> +
> + curl = curl_easy_init();
> + payload->curl = curl;
> +
> + len = strlen(server) + strlen(payload->filepath) + 2;
> + MALLOC(payload->fileurl, len, GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> + snprintf(payload->fileurl, len, "%s/%s", server, payload->filepath);
OK.
> +
> + payload->tempfile_openmode = "wb";
> + if(!payload->remote_name) {
> + STRDUP(payload->remote_name, get_filename(payload->fileurl),
> + GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> + }
> + if(curl_gethost(payload->fileurl, hostname, sizeof(hostname)) != 0) {
> + _alpm_log(handle, ALPM_LOG_ERROR, _("url '%s' is invalid\n"), payload->fileurl);
> + GOTO_ERR(handle, ALPM_ERR_SERVER_BAD_URL, cleanup);
> + }
OK.
> +
> + if(payload->remote_name && strlen(payload->remote_name) > 0) {
> + payload->destfile_name = get_fullpath(localpath, payload->remote_name, "");
> + payload->tempfile_name = get_fullpath(localpath, payload->remote_name, ".part");
> + if(!payload->destfile_name || !payload->tempfile_name) {
> + goto cleanup;
> + }
> + } else {
> + /* URL doesn't contain a filename, so make a tempfile. We can't support
> + * resuming this kind of download; partial transfers will be destroyed */
> + payload->unlink_on_fail = 1;
> +
> + payload->localf = create_tempfile(payload, localpath);
> + if(payload->localf == NULL) {
> + goto cleanup;
> + }
> + }
OK.
> +
> + curl_set_handle_opts(payload, curl, payload->error_buffer);
> +
> + if(payload->max_size == payload->initial_size) {
> + /* .part file is complete */
> + goto cleanup;
> + }
> +
> + if(payload->localf == NULL) {
> + payload->localf = fopen(payload->tempfile_name, payload->tempfile_openmode);
> + if(payload->localf == NULL) {
> + _alpm_log(handle, ALPM_LOG_ERROR,
> + _("could not open file %s: %s\n"),
> + payload->tempfile_name, strerror(errno));
> + GOTO_ERR(handle, ALPM_ERR_RETRIEVE, cleanup);
> + }
> + }
> +
OK.
> + _alpm_log(handle, ALPM_LOG_DEBUG,
> + "opened tempfile for download: %s (%s)\n", payload->tempfile_name,
> + payload->tempfile_openmode);
> +
> + curl_easy_setopt(curl, CURLOPT_WRITEDATA, payload->localf);
> + curl_multi_add_handle(curlm, curl);
> + return 0;
> +
> +cleanup:
> + FREE(payload->fileurl);
> + FREE(payload->tempfile_name);
> + FREE(payload->destfile_name);
> + FREE(payload->content_disp_name);
> + curl_easy_cleanup(curl);
> + return -1;
> +}
> +
OK.
> static int curl_multi_download_internal(alpm_handle_t *handle,
> alpm_list_t *payloads /* struct dload_payload */,
> const char *localpath)
> {
> - (void)handle;
> - (void)payloads;
> - (void)localpath;
> - return 0;
> + int still_running = 0;
> + int err = 0;
> + int parallel_downloads = handle->parallel_downloads;
> +
> + CURLM *curlm = handle->curlm;
> + CURLMsg *msg;
> +
> + while(still_running || payloads) {
> + int msgs_left = -1;
> +
> + for(; still_running < parallel_downloads && payloads; still_running++) {
> + struct dload_payload *payload = payloads->data;
> +
> + if(curl_multi_add_payload(handle, curlm, payloads->data, localpath) == 0) {
> + // TODO: report that download has started
> + payloads = payloads->next;
> + } else {
> + // the payload failed to start, do not start any new downloads just wait until
> + // active one complete.
> + _alpm_log(handle, ALPM_LOG_ERROR, "failed to setup a download payload for %s\n", payload->remote_name);
> + payloads = NULL;
> + err = -1;
OK.
> + }
> + }
> +
> + CURLMcode mc = curl_multi_perform(curlm, &still_running);
> +
> + if(mc != CURLM_OK) {
> + _alpm_log(handle, ALPM_LOG_ERROR, "curl returned error %d from transfer\n", mc);
> + payloads = NULL;
> + err = -1;
> + }
OK
> +
> + while((msg = curl_multi_info_read(curlm, &msgs_left))) {
> + if(msg->msg == CURLMSG_DONE) {
> + int done_code = curl_multi_handle_single_done(curlm, msg, localpath);
> + if(done_code == 2) {
> + /* in case of a retry increase the counter of active requests
> + * to avoid exiting the loop early
> + */
> + still_running++;
> + }
OK.
> + } else {
> + _alpm_log(handle, ALPM_LOG_ERROR, "curl curl_multi_info_read error %d\n", msg->msg);
> + }
> + }
> + if(still_running) {
> + curl_multi_wait(curlm, NULL, 0, 1000, NULL);
> + }
> + }
> +
> + return err;
> }
> +
> #endif
>
> /** Download a file given by a URL to a local directory.
> diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h
> index e87b6a93..a40b51b7 100644
> --- a/lib/libalpm/dload.h
> +++ b/lib/libalpm/dload.h
> @@ -45,6 +45,8 @@ struct dload_payload {
> int cb_initialized;
> #ifdef HAVE_LIBCURL
> CURL *curl;
> + char error_buffer[CURL_ERROR_SIZE];
> + FILE *localf; /* temp download file */
> #endif
> };
>
>
More information about the pacman-dev
mailing list