[pacman-dev] [PATCH] Convert '-U pkg1 pkg2' codepath to parallel download
Anatol Pomozov
anatol.pomozov at gmail.com
Tue Jun 23 05:57:24 UTC 2020
Hi
On Wed, Jun 10, 2020 at 7:57 PM Allan McRae <allan at archlinux.org> wrote:
>
> On 11/5/20 6:50 pm, Anatol Pomozov wrote:
> > Installing remote packages using its URL is an interesting case for ALPM
> > API. Unlike package sync ('pacman -S pkg1 pkg2') '-U' does not deal with
> > server mirror list. Thus _alpm_multi_download() should be able to
> > handle file download for payloads that either have 'fileurl' field
> > or pair of fields ('servers' and 'filepath') set.
> >
> > Signature for alpm_fetch_pkgurl() has changed and it accepts an
> > output list that is populated with filepaths to fetched packages.
> >
> > Signed-off-by: Anatol Pomozov <anatol.pomozov at gmail.com>
>
> Thanks. Very minor comments belown.
>
> > ---
> > lib/libalpm/alpm.h | 11 ++-
> > lib/libalpm/dload.c | 204 +++++++++++++++++++++++++------------------
> > lib/libalpm/dload.h | 7 +-
> > src/pacman/upgrade.c | 102 ++++++++++------------
> > 4 files changed, 181 insertions(+), 143 deletions(-)
> >
> > diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h
> > index 3ea66ccc..c6a13273 100644
> > --- a/lib/libalpm/alpm.h
> > +++ b/lib/libalpm/alpm.h
> > @@ -755,12 +755,15 @@ typedef void (*alpm_cb_totaldl)(off_t total);
> > typedef int (*alpm_cb_fetch)(const char *url, const char *localpath,
> > int force);
> >
> > -/** Fetch a remote pkg.
> > +/** Fetch a list of remote packages.
> > * @param handle the context handle
> > - * @param url URL of the package to download
> > - * @return the downloaded filepath on success, NULL on error
> > + * @param urls list of package URLs to download
> > + * @param fetched list of filepaths to the fetched packages, each item
> > + * corresponds to one in `urls` list
>
> Can you make it clear that this is filled by the function, and not
> something that should be passed.
Done.
>
> > + * @return 0 on success or -1 on failure
> > */
> > -char *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url);
> > +int alpm_fetch_pkgurl(alpm_handle_t *handle, const alpm_list_t *urls,
> > + alpm_list_t **fetched);
> >
> > /** @addtogroup alpm_api_options Options
> > * Libalpm option getters and setters
> > diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> > index 13aa4086..43fe9847 100644
> > --- a/lib/libalpm/dload.c
> > +++ b/lib/libalpm/dload.c
> > @@ -613,7 +613,9 @@ static int curl_multi_retry_next_server(CURLM *curlm, CURL *curl, struct dload_p
> > size_t len;
> > alpm_handle_t *handle = payload->handle;
> >
> > - payload->servers = payload->servers->next;
> > + if(payload->servers) {
> > + payload->servers = payload->servers->next;
> > + }
>
> OK.
>
> > if(!payload->servers) {
> > _alpm_log(payload->handle, ALPM_LOG_DEBUG,
> > "%s: no more servers to retry\n", payload->remote_name);
> > @@ -622,7 +624,7 @@ static int curl_multi_retry_next_server(CURLM *curlm, CURL *curl, struct dload_p
> > server = payload->servers->data;
> >
> > /* regenerate a new fileurl */
> > - free(payload->fileurl);
> > + FREE(payload->fileurl);
>
> Change unrelated to this patch, but OK.
>
> > len = strlen(server) + strlen(payload->filepath) + 2;
> > MALLOC(payload->fileurl, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1));
> > snprintf(payload->fileurl, len, "%s/%s", server, payload->filepath);
> > @@ -849,20 +851,28 @@ static int curl_multi_add_payload(alpm_handle_t *handle, CURLM *curlm,
> > struct dload_payload *payload, const char *localpath)
> > {
> > size_t len;
> > - const char *server;
> > CURL *curl = NULL;
> > char hostname[HOSTNAME_SIZE];
> > int ret = -1;
> >
> > - ASSERT(payload->servers, RET_ERR(handle, ALPM_ERR_SERVER_NONE, -1));
> > - server = payload->servers->data;
> > -
> > curl = curl_easy_init();
> > payload->curl = curl;
> >
> > - len = strlen(server) + strlen(payload->filepath) + 2;
> > - MALLOC(payload->fileurl, len, GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> > - snprintf(payload->fileurl, len, "%s/%s", server, payload->filepath);
> > + if(payload->fileurl) {
> > + ASSERT(!payload->servers, GOTO_ERR(handle, ALPM_ERR_WRONG_ARGS, cleanup));
> > + ASSERT(!payload->filepath, GOTO_ERR(handle, ALPM_ERR_WRONG_ARGS, cleanup));
>
> OK.
>
> > + } else {
> > + const char *server;
> > +
> > + ASSERT(payload->servers, GOTO_ERR(handle, ALPM_ERR_SERVER_NONE, cleanup));
> > + ASSERT(payload->filepath, GOTO_ERR(handle, ALPM_ERR_WRONG_ARGS, cleanup));
> > +
> > + server = payload->servers->data;
> > +
> > + len = strlen(server) + strlen(payload->filepath) + 2;
> > + MALLOC(payload->fileurl, len, GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> > + snprintf(payload->fileurl, len, "%s/%s", server, payload->filepath);
> > + }
> >
>
> OK.
>
> > payload->tempfile_openmode = "wb";
> > if(!payload->remote_name) {
> > @@ -1046,22 +1056,29 @@ int _alpm_multi_download(alpm_handle_t *handle,
> > alpm_list_t *s;
> > int success = 0;
> >
> > - for(s = payload->servers; s; s = s->next) {
> > - const char *server = s->data;
> > - char *fileurl;
> > - int ret;
> > -
> > - size_t len = strlen(server) + strlen(payload->filepath) + 2;
> > - MALLOC(fileurl, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1));
> > - snprintf(fileurl, len, "%s/%s", server, payload->filepath);
> > -
> > - ret = handle->fetchcb(fileurl, localpath, payload->force);
> > - free(fileurl);
> > -
> > - if (ret != -1) {
> > + if(payload->fileurl) {
> > + if (handle->fetchcb(payload->fileurl, localpath, payload->force) != -1) {
> > success = 1;
> > break;
> > }
> > + } else {
> > + for(s = payload->servers; s; s = s->next) {
> > + const char *server = s->data;
> > + char *fileurl;
> > + int ret;
> > +
> > + size_t len = strlen(server) + strlen(payload->filepath) + 2;
> > + MALLOC(fileurl, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1));
> > + snprintf(fileurl, len, "%s/%s", server, payload->filepath);
> > +
> > + ret = handle->fetchcb(fileurl, localpath, payload->force);
> > + free(fileurl);
> > +
> > + if (ret != -1) {
> > + success = 1;
> > + break;
> > + }
> > + }
> > }
>
> OK.
>
> > if(!success && !payload->errors_ok) {
> > RET_ERR(handle, ALPM_ERR_EXTERNAL_DOWNLOAD, -1);
> > @@ -1087,82 +1104,101 @@ static char *filecache_find_url(alpm_handle_t *handle, const char *url)
> > return _alpm_filecache_find(handle, filebase);
> > }
> >
> > -char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
> > +int SYMEXPORT alpm_fetch_pkgurl(alpm_handle_t *handle, const alpm_list_t *urls,
> > + alpm_list_t **fetched)
> > {
> > - char *filepath;
> > - const char *cachedir, *final_pkg_url = NULL;
> > - char *final_file = NULL;
> > - struct dload_payload payload = {0};
> > - int ret = 0;
> > + const char *cachedir;
> > + alpm_list_t *payloads = NULL;
> > + int download_sigs;
> > + const alpm_list_t *i;
> > + alpm_event_t event;
> >
> > - CHECK_HANDLE(handle, return NULL);
> > - ASSERT(url, RET_ERR(handle, ALPM_ERR_WRONG_ARGS, NULL));
> > + CHECK_HANDLE(handle, return -1);
> > + download_sigs = handle->siglevel & ALPM_SIG_PACKAGE;
> >
>
> Should we check *fetched is NULL?
It is a good idea. I also updated the function documentation and
specified that callee expects
*fetched is NULL.
>
> > /* find a valid cache dir to download to */
> > cachedir = _alpm_filecache_setup(handle);
> >
> > - /* attempt to find the file in our pkgcache */
> > - filepath = filecache_find_url(handle, url);
> > - if(filepath == NULL) {
> > - STRDUP(payload.fileurl, url, RET_ERR(handle, ALPM_ERR_MEMORY, NULL));
> > - payload.allow_resume = 1;
> > - payload.handle = handle;
> > - payload.trust_remote_name = 1;
> > -
> > - /* download the file */
> > - ret = _alpm_download(&payload, cachedir, &final_file, &final_pkg_url);
> > - _alpm_dload_payload_reset(&payload);
> > - if(ret == -1) {
> > - _alpm_log(handle, ALPM_LOG_WARNING, _("failed to download %s\n"), url);
> > - free(final_file);
> > - return NULL;
> > + for(i = urls; i; i = i->next) {
> > + char *url = i->data;
> > +
> > + /* attempt to find the file in our pkgcache */
> > + char *filepath = filecache_find_url(handle, url);
> > + if(filepath) {
> > + /* the file is locally cached so add it to the output right away */
> > + alpm_list_append(fetched, filepath);
> > + } else {
> > + struct dload_payload *payload = NULL;
> > +
> > + ASSERT(url, GOTO_ERR(handle, ALPM_ERR_WRONG_ARGS, err));
> > + CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, err));
> > + STRDUP(payload->fileurl, url, FREE(payload); GOTO_ERR(handle, ALPM_ERR_MEMORY, err));
> > + payload->allow_resume = 1;
> > + payload->handle = handle;
> > + payload->trust_remote_name = 1;
> > + payloads = alpm_list_add(payloads, payload);
> > +
> > + if(download_sigs) {
> > + int len = strlen(url) + 5;
> > + CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, err));
> > + payload->signature = 1;
> > + MALLOC(payload->fileurl, len, FREE(payload); GOTO_ERR(handle, ALPM_ERR_MEMORY, err));
> > + snprintf(payload->fileurl, len, "%s.sig", url);
> > + payload->handle = handle;
> > + payload->trust_remote_name = 1;
> > + payload->errors_ok = (handle->siglevel & ALPM_SIG_PACKAGE_OPTIONAL);
> > + /* set hard upper limit of 16KiB */
> > + payload->max_size = 16 * 1024;
> > + payloads = alpm_list_add(payloads, payload);
> > + }
>
> OK (realising this will be updated in a couple of patches time to handle
> redircting stuff)
Yes, there is an upcoming patch that refactors this and a few other
places to make sure that
*.sig file is downloaded from the same mirror as the main file.
> > }
> > - _alpm_log(handle, ALPM_LOG_DEBUG, "successfully downloaded %s\n", url);
> > }
> >
> > - /* attempt to download the signature */
> > - if(ret == 0 && final_pkg_url && (handle->siglevel & ALPM_SIG_PACKAGE)) {
> > - char *sig_filepath, *sig_final_file = NULL;
> > - size_t len;
> > -
> > - len = strlen(final_pkg_url) + 5;
> > - MALLOC(payload.fileurl, len, free(final_file); RET_ERR(handle, ALPM_ERR_MEMORY, NULL));
> > - snprintf(payload.fileurl, len, "%s.sig", final_pkg_url);
> > -
> > - sig_filepath = filecache_find_url(handle, payload.fileurl);
> > - if(sig_filepath == NULL) {
> > - payload.signature = 1;
> > - payload.handle = handle;
> > - payload.trust_remote_name = 1;
> > - payload.force = 1;
> > - payload.errors_ok = (handle->siglevel & ALPM_SIG_PACKAGE_OPTIONAL);
> > -
> > - /* set hard upper limit of 16KiB */
> > - payload.max_size = 16 * 1024;
> > -
> > - ret = _alpm_download(&payload, cachedir, &sig_final_file, NULL);
> > - if(ret == -1 && !payload.errors_ok) {
> > - _alpm_log(handle, ALPM_LOG_WARNING,
> > - _("failed to download %s\n"), payload.fileurl);
> > - /* Warn now, but don't return NULL. We will fail later during package
> > - * load time. */
> > - } else if(ret == 0) {
> > - _alpm_log(handle, ALPM_LOG_DEBUG,
> > - "successfully downloaded %s\n", payload.fileurl);
>
> OK.
>
> > + if(payloads) {
> > + event.type = ALPM_EVENT_PKG_RETRIEVE_START;
> > + EVENT(handle, &event);
> > + if(_alpm_multi_download(handle, payloads, cachedir) == -1) {
> > + _alpm_log(handle, ALPM_LOG_WARNING, _("failed to retrieve some files\n"));
> > + event.type = ALPM_EVENT_PKG_RETRIEVE_FAILED;
> > + EVENT(handle, &event);
> > +
> > + GOTO_ERR(handle, ALPM_ERR_RETRIEVE, err);
> > + } else {
> > + event.type = ALPM_EVENT_PKG_RETRIEVE_DONE;
> > + EVENT(handle, &event);
> > + }
> > +
> > + for(i = payloads; i; i = i->next) {
> > + struct dload_payload *payload = i->data;
> > + const char *filename;
> > + char *filepath;
> > +
> > + if(payload->signature) {
> > + continue;
> > + }
> > +
> > + filename = mbasename(payload->destfile_name);
> > + filepath = _alpm_filecache_find(handle, filename);
> > + if(filepath) {
> > + alpm_list_append(fetched, filepath);
> > + } else {
> > + _alpm_log(handle, ALPM_LOG_WARNING, _("download completed successfully but no file in the cache\n"));
> > + GOTO_ERR(handle, ALPM_ERR_RETRIEVE, err);
> > }
> > - FREE(sig_final_file);
> > }
>
> OK.
>
> > - free(sig_filepath);
> > - _alpm_dload_payload_reset(&payload);
> > - }
> >
> > - /* we should be able to find the file the second time around */
> > - if(filepath == NULL) {
> > - filepath = _alpm_filecache_find(handle, final_file);
> > + alpm_list_free_inner(payloads, (alpm_list_fn_free)_alpm_dload_payload_reset);
> > + FREELIST(payloads);
> > }
> > - free(final_file);
> >
> > - return filepath;
> > + return 0;
> > +
> > +err:
> > + alpm_list_free_inner(payloads, (alpm_list_fn_free)_alpm_dload_payload_reset);
> > + FREELIST(payloads);
> > + FREELIST(*fetched);
> > +
> > + return -1;
> > }
> >
>
> OK.
>
> > void _alpm_dload_payload_reset(struct dload_payload *payload)
> > diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h
> > index 1e4af755..f119fc2e 100644
> > --- a/lib/libalpm/dload.h
> > +++ b/lib/libalpm/dload.h
> > @@ -30,9 +30,14 @@ struct dload_payload {
> > char *tempfile_name;
> > char *destfile_name;
> > char *content_disp_name;
> > + /* client has to provide either
> > + * 1) fileurl - full URL to the file
> > + * 2) pair of (servers, filepath), in this case ALPM iterates over the
> > + * server list and tries to download "$server/$filepath"
> > + */
>
> OK.
>
> > char *fileurl;
> > - char *filepath; /* download URL path */
> > alpm_list_t *servers;
> > + char *filepath; /* download URL path */
>
> Random churn.
reverted
>
> > long respcode;
> > off_t initial_size;
> > off_t max_size;
> > diff --git a/src/pacman/upgrade.c b/src/pacman/upgrade.c
> > index 5f984e44..840e3a31 100644
> > --- a/src/pacman/upgrade.c
> > +++ b/src/pacman/upgrade.c
> > @@ -30,6 +30,34 @@
> > #include "conf.h"
> > #include "util.h"
> >
> > +/* add targets to the created transaction */
> > +static int load_packages(alpm_list_t *targets, int siglevel)
> > +{
> > + alpm_list_t *i;
> > + int retval = 0;
> > +
> > + for(i = targets; i; i = alpm_list_next(i)) {
> > + const char *targ = i->data;
> > + alpm_pkg_t *pkg;
> > +
> > + if(alpm_pkg_load(config->handle, targ, 1, siglevel, &pkg) != 0) {
> > + pm_printf(ALPM_LOG_ERROR, "'%s': %s\n",
> > + targ, alpm_strerror(alpm_errno(config->handle)));
> > + retval = 1;
> > + continue;
> > + }
> > + if(alpm_add_pkg(config->handle, pkg) == -1) {
> > + pm_printf(ALPM_LOG_ERROR, "'%s': %s\n",
> > + targ, alpm_strerror(alpm_errno(config->handle)));
> > + alpm_pkg_free(pkg);
> > + retval = 1;
> > + continue;
> > + }
> > + config->explicit_adds = alpm_list_add(config->explicit_adds, pkg);
> > + }
> > + return retval;
> > +}
> > +
> > /**
> > * @brief Upgrade a specified list of packages.
> > *
> > @@ -39,43 +67,30 @@
> > */
> > int pacman_upgrade(alpm_list_t *targets)
> > {
> > - int retval = 0, *file_is_remote;
> > + int retval = 0;
> > + alpm_list_t *remote_targets = NULL, *fetched_files = NULL;
> > + alpm_list_t *local_targets = NULL;
> > alpm_list_t *i;
> > - unsigned int n, num_targets;
> >
> > if(targets == NULL) {
> > pm_printf(ALPM_LOG_ERROR, _("no targets specified (use -h for help)\n"));
> > return 1;
> > }
> >
> > - num_targets = alpm_list_count(targets);
> > -
> > - /* Check for URL targets and process them */
> > - file_is_remote = malloc(num_targets * sizeof(int));
> > - if(file_is_remote == NULL) {
> > - pm_printf(ALPM_LOG_ERROR, _("memory exhausted\n"));
> > - return 1;
> > - }
> > -
> > - for(i = targets, n = 0; i; i = alpm_list_next(i), n++) {
> > + /* carve out remote targets and move it into a separate list */
> > + for(i = targets; i; i = alpm_list_next(i)) {
> > if(strstr(i->data, "://")) {
> > - char *str = alpm_fetch_pkgurl(config->handle, i->data);
> > - if(str == NULL) {
> > - pm_printf(ALPM_LOG_ERROR, "'%s': %s\n",
> > - (char *)i->data, alpm_strerror(alpm_errno(config->handle)));
> > - retval = 1;
> > - } else {
> > - free(i->data);
> > - i->data = str;
> > - file_is_remote[n] = 1;
> > - }
> > + remote_targets = alpm_list_add(remote_targets, i->data);
> > } else {
> > - file_is_remote[n] = 0;
> > + local_targets = alpm_list_add(local_targets, i->data);
> > }
> > }
> >
> > - if(retval) {
> > - goto fail_free;
> > + if(remote_targets) {
> > + retval = alpm_fetch_pkgurl(config->handle, remote_targets, &fetched_files);
> > + if(retval) {
> > + goto fail_free;
> > + }
> > }
> >
>
> OK.
>
> > /* Step 1: create a new transaction */
> > @@ -85,39 +100,16 @@ int pacman_upgrade(alpm_list_t *targets)
> > }
> >
> > printf(_("loading packages...\n"));
> > - /* add targets to the created transaction */
> > - for(i = targets, n = 0; i; i = alpm_list_next(i), n++) {
> > - const char *targ = i->data;
> > - alpm_pkg_t *pkg;
> > - int siglevel;
> > -
> > - if(file_is_remote[n]) {
> > - siglevel = alpm_option_get_remote_file_siglevel(config->handle);
> > - } else {
> > - siglevel = alpm_option_get_local_file_siglevel(config->handle);
> > - }
> > -
> > - if(alpm_pkg_load(config->handle, targ, 1, siglevel, &pkg) != 0) {
> > - pm_printf(ALPM_LOG_ERROR, "'%s': %s\n",
> > - targ, alpm_strerror(alpm_errno(config->handle)));
> > - retval = 1;
> > - continue;
> > - }
> > - if(alpm_add_pkg(config->handle, pkg) == -1) {
> > - pm_printf(ALPM_LOG_ERROR, "'%s': %s\n",
> > - targ, alpm_strerror(alpm_errno(config->handle)));
> > - alpm_pkg_free(pkg);
> > - retval = 1;
> > - continue;
> > - }
> > - config->explicit_adds = alpm_list_add(config->explicit_adds, pkg);
> > - }
> > + retval |= load_packages(local_targets, alpm_option_get_local_file_siglevel(config->handle));
> > + retval |= load_packages(fetched_files, alpm_option_get_remote_file_siglevel(config->handle));
> >
>
> OK.
>
> > if(retval) {
> > goto fail_release;
> > }
> >
> > - free(file_is_remote);
> > + alpm_list_free(remote_targets);
> > + alpm_list_free(local_targets);
> > + FREELIST(fetched_files);
> >
> > /* now that targets are resolved, we can hand it all off to the sync code */
> > return sync_prepare_execute();
> > @@ -125,7 +117,9 @@ int pacman_upgrade(alpm_list_t *targets)
> > fail_release:
> > trans_release();
> > fail_free:
> > - free(file_is_remote);
> > + alpm_list_free(remote_targets);
> > + alpm_list_free(local_targets);
> > + FREELIST(fetched_files);
> >
> > return retval;
> > }
> >
>
> OK.
More information about the pacman-dev
mailing list