[pacman-dev] [PATCH] Move signature payload creation to download engine

Anatol Pomozov anatol.pomozov at gmail.com
Tue Jun 23 17:54:58 UTC 2020


Hi

On Mon, Jun 22, 2020 at 11:35 PM Anatol Pomozov
<anatol.pomozov at gmail.com> wrote:
>
> Hi
>
> On Wed, Jun 10, 2020 at 9:33 PM Allan McRae <allan at archlinux.org> wrote:
> >
> > On 14/5/20 8:15 am, Anatol Pomozov wrote:
> > > Until now callee of ALPM download functionality has been in charge of
> > > payload creation both for the main file (e.g. *.pkg) and for the accompanied
> > > *.sig file. One advantage of such solution is that all payloads are
> > > independent and can be fetched in parallel thus exploiting the maximum
> > > level of download parallelism.
> > >
> > > To build *.sig file url we've been using a simple string concatenation:
> > > $requested_url + ".sig". Unfortunately there are cases when it does not
> > > work. For example an archlinux.org "Download From Mirror" link looks like
> > > this https://www.archlinux.org/packages/core/x86_64/bash/download/ and
> > > it gets redirected to some mirror. But if we append ".sig" to the end of
> > > the link url and try to download it then archlinux.org returns 404 error.
> > >
> > > To overcome this issue we need to follow redirects for the main payload
> > > first, find the final url and only then append '.sig' suffix.
> > > This implies 2 things:
> > >  - the signature payload initialization need to be moved to dload.c
> > >  as it is the place where we have access to the resolved url
> > >  - *.sig is downloaded serially with the main payload and this reduces
> > >  level of parallelism
> > >
> > > Move *.sig payload creation to dload.c. Once the main payload is fetched
> > > successfully we check if the callee asked to download the accompanied
> > > signature. If yes - create a new payload and add it to mcurl.
> > >
> > > *.sig payload does not use server list of the main payload and thus does
> > > not support mirror failover. *.sig file comes from the same server as
> > > the main payload.
> > >
> > > Refactor event loop in curl_multi_download_internal() a bit. Instead of
> > > relying on curl_multi_check_finished_download() to return number of new
> > > payloads we simply rerun the loop iteration one more time to check if
> > > there are any active downloads left.
> > > ---
> > >  lib/libalpm/be_sync.c | 34 +++-------------
> > >  lib/libalpm/dload.c   | 91 ++++++++++++++++++++++++++-----------------
> > >  lib/libalpm/dload.h   |  4 +-
> > >  3 files changed, 65 insertions(+), 64 deletions(-)
> > >
> > > diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c
> > > index 82018e15..41675d21 100644
> > > --- a/lib/libalpm/be_sync.c
> > > +++ b/lib/libalpm/be_sync.c
> > > @@ -180,12 +180,10 @@ int SYMEXPORT alpm_db_update(alpm_handle_t *handle, alpm_list_t *dbs, int force)
> > >                       dbforce = 1;
> > >               }
> > >
> > > -             CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> > > +             siglevel = alpm_db_get_siglevel(db);
> > >
> > > -             /* set hard upper limit of 128 MiB */
> > > -             payload->max_size = 128 * 1024 * 1024;
> > > +             CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> > >               payload->servers = db->servers;
> > > -
> > >               /* print server + filename into a buffer */
> > >               len = strlen(db->treename) + strlen(dbext) + 1;
> > >               MALLOC(payload->filepath, len,
> > > @@ -194,31 +192,11 @@ int SYMEXPORT alpm_db_update(alpm_handle_t *handle, alpm_list_t *dbs, int force)
> > >               payload->handle = handle;
> > >               payload->force = dbforce;
> > >               payload->unlink_on_fail = 1;
> > > -
> > > +             payload->download_signature = (siglevel & ALPM_SIG_DATABASE);
> > > +             payload->signature_optional = (siglevel & ALPM_SIG_DATABASE_OPTIONAL);
> > > +             /* set hard upper limit of 128 MiB */
> > > +             payload->max_size = 128 * 1024 * 1024;
> > >               payloads = alpm_list_add(payloads, payload);
> >
> > OK.
> >
> > > -
> > > -             siglevel = alpm_db_get_siglevel(db);
> > > -             if(siglevel & ALPM_SIG_DATABASE) {
> > > -                     struct dload_payload *sig_payload;
> > > -                     CALLOC(sig_payload, 1, sizeof(*sig_payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> > > -                     sig_payload->signature = 1;
> > > -
> > > -                     /* print filename into a buffer (leave space for separator and .sig) */
> > > -                     len = strlen(db->treename) + strlen(dbext) + 5;
> > > -                     MALLOC(sig_payload->filepath, len,
> > > -                             FREE(sig_payload); GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> > > -                     snprintf(sig_payload->filepath, len, "%s%s.sig", db->treename, dbext);
> > > -
> > > -                     sig_payload->handle = handle;
> > > -                     sig_payload->force = dbforce;
> > > -                     sig_payload->errors_ok = (siglevel & ALPM_SIG_DATABASE_OPTIONAL);
> > > -
> > > -                     /* set hard upper limit of 16 KiB */
> > > -                     sig_payload->max_size = 16 * 1024;
> > > -                     sig_payload->servers = db->servers;
> > > -
> > > -                     payloads = alpm_list_add(payloads, sig_payload);
> > > -             }
> > >       }
> > >
> > >       event.type = ALPM_EVENT_DB_RETRIEVE_START;
> >
> > OK.
> >
> > > diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> > > index 4dbb011f..b68dcf6d 100644
> > > --- a/lib/libalpm/dload.c
> > > +++ b/lib/libalpm/dload.c
> > > @@ -18,6 +18,7 @@
> > >   *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
> > >   */
> > >
> > > +#include <stdbool.h>
> >
> > See below.
> >
> > >  #include <stdlib.h>
> > >  #include <stdio.h>
> > >  #include <errno.h>
> > > @@ -49,6 +50,10 @@
> > >  #include "handle.h"
> > >
> > >  #ifdef HAVE_LIBCURL
> > > +
> > > +static int curl_multi_add_payload(alpm_handle_t *handle, CURLM *curlm,
> > > +     struct dload_payload *payload, const char *localpath);
> > > +
> >
> > OK.
> >
> > >  static const char *get_filename(const char *url)
> > >  {
> > >       char *filename = strrchr(url, '/');
> > > @@ -476,6 +481,25 @@ static int curl_multi_check_finished_download(CURLM *curlm, CURLMsg *msg,
> > >       curl_easy_getinfo(curl, CURLINFO_CONDITION_UNMET, &timecond);
> > >       curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &effective_url);
> > >
> > > +     /* Let's check if client requested downloading accompanion *.sig file */
> > > +     if(!payload->signature && payload->download_signature && curlerr == CURLE_OK && payload->respcode < 400) {
> > > +             struct dload_payload *sig = NULL;
> > > +
> > > +             int len = strlen(effective_url) + 5;
> > > +             CALLOC(sig, 1, sizeof(*sig), GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> > > +             MALLOC(sig->fileurl, len, FREE(sig); GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> > > +             snprintf(sig->fileurl, len, "%s.sig", effective_url);
> > > +             sig->signature = 1;
> > > +             sig->handle = handle;
> > > +             sig->force = payload->force;
> > > +             sig->unlink_on_fail = payload->unlink_on_fail;
> > > +             sig->errors_ok = payload->signature_optional;
> > > +             /* set hard upper limit of 16KiB */
> > > +             sig->max_size = 16 * 1024;
> > > +
> > > +             curl_multi_add_payload(handle, curlm, sig, localpath);
> > > +     }
> > > +
> >
> > OK.
> >
> > >       /* time condition was met and we didn't download anything. we need to
> > >        * clean up the 0 byte .part file that's left behind. */
> > >       if(timecond == 1 && DOUBLE_EQ(bytes_dl, 0)) {
> > > @@ -565,6 +589,13 @@ cleanup:
> > >       if(ret == -1 && payload->errors_ok) {
> > >               ret = -2;
> > >       }
> > > +
> > > +     if(payload->signature) {
> > > +             /* free signature payload memory that was allocated earlier in dload.c */
> > > +             _alpm_dload_payload_reset(payload);
> > > +             FREE(payload);
> > > +     }
> > > +
> >
> > OK.
> >
> > >       return ret;
> > >  }
> > >
> > > @@ -669,12 +700,12 @@ static int curl_multi_download_internal(alpm_handle_t *handle,
> > >       int still_running = 0;
> > >       int err = 0;
> > >       int parallel_downloads = handle->parallel_downloads;
> > > -
> > >       CURLM *curlm = handle->curlm;
> > > -     CURLMsg *msg;
> > > +     bool msg_done = false;
> > >
> >
> > This is a weird mix of bool and int...  still_running is 0/1.
>
> "still_running" is actually a positive int and represents a number of
> active downloads.
> We use it to make sure that the number of concurrent downloads does not overflow
> "ParallelDownloads" configuration value.
>
> Let me know if you have ideas for a better name for this variable.

I am thinking of renaming variables
"still_running"->"active_downloads_num" and
"parallel_downloads"->"max_downloads". Hopefully it will make the code
a bit more readable.

>
> >  msg_done
> > is true/false.f.
> >
> > I know stdbool started to be used in pacman with this patch series, but
> > I think it should be removed from here and a more comprehensive made
> > later on so not to have a mix.
> >
> > > -     while(still_running || payloads) {
> > > -             int msgs_left = -1;
> > > +     while(still_running || payloads || msg_done) {
> > > +             int msgs_left = 0;
> > > +             CURLMcode mc;
> > >
> > >               for(; still_running < parallel_downloads && payloads; still_running++) {
> > >                       struct dload_payload *payload = payloads->data;
> > > @@ -687,15 +718,19 @@ static int curl_multi_download_internal(alpm_handle_t *handle,
> > >
> > >                               payloads = payloads->next;
> > >                       } else {
> > > -                             // the payload failed to start, do not start any new downloads just wait until
> > > -                             // active one complete.
> > > +                             /* The payload failed to start. Do not start any new downloads.
> > > +                              * Wait until all active downloads complete.
> > > +                              */
> >
> > Not directly related to this patch, but OK...
> >
> > >                               _alpm_log(handle, ALPM_LOG_ERROR, _("failed to setup a download payload for %s\n"), payload->remote_name);
> > >                               payloads = NULL;
> > >                               err = -1;
> > >                       }
> > >               }
> > >
> > > -             CURLMcode mc = curl_multi_perform(curlm, &still_running);
> > > +             mc = curl_multi_perform(curlm, &still_running);
> > > +             if(mc == CURLM_OK) {
> > > +                     mc = curl_multi_wait(curlm, NULL, 0, 1000, NULL);
> > > +             }
> >
> > OK.
> >
> > >
> > >               if(mc != CURLM_OK) {
> > >                       _alpm_log(handle, ALPM_LOG_ERROR, _("curl returned error %d from transfer\n"), mc);
> > > @@ -703,28 +738,28 @@ static int curl_multi_download_internal(alpm_handle_t *handle,
> > >                       err = -1;
> > >               }
> > >
> > > -             while((msg = curl_multi_info_read(curlm, &msgs_left))) {
> > > +             msg_done = false;
> > > +             while(true) {
> > > +                     CURLMsg *msg = curl_multi_info_read(curlm, &msgs_left);
> > > +                     if(!msg) {
> > > +                             break;
> > > +                     }
> >
> > OK.
> >
> > >                       if(msg->msg == CURLMSG_DONE) {
> > >                               int ret = curl_multi_check_finished_download(curlm, msg, localpath);
>
> One more thing that can be cleaned up is return value for
> curl_multi_check_finished_download().
> Currently the function returns 5 different error codes but in fact now
> we only care if download had any
> fatal errors or not. i.e. it can return 0/-1 codes only and it will
> simplify curl_multi_check_finished_download() a bit.
>
> > >                               if(ret == -1) {
> > > -                                     /* if current payload failed to download then stop adding new payloads but wait for the
> > > -                                      * current ones
> > > -                                      */
> > > +                                     /* if current payload failed to download then stop adding new payloads */
> >
> > Fine.
> >
> > >                                       payloads = NULL;
> > >                                       err = -1;
> > > -                             } else if(ret == 2) {
> > > -                                     /* in case of a retry increase the counter of active requests
> > > -                                      * to avoid exiting the loop early
> > > -                                      */
> > > -                                     still_running++;
> > >                               }
> >
> > Where is this going?
> >
> > > +                             /* curl_multi_check_finished_download() might add more payloads e.g. in case of a retry
> > > +                              * from the next mirror. We need to execute curl_multi_perform() at least one more time
> > > +                              * to make sure new payload requests are processed.
> > > +                              */
> > > +                             msg_done = true;
> >
> > This is a weird variable name now I have figured out what it does.  My
> > understanding is that this is really a flag to actually check that
> > everything is done. I know it is related to the current CURLMsg being
> > complete.  Can it be renamed something like "recheck_downloads"?
>
> Done.
>
> > >                       } else {
> > >                               _alpm_log(handle, ALPM_LOG_ERROR, _("curl transfer error: %d\n"), msg->msg);
> > >                       }
> > >               }
> > > -             if(still_running) {
> > > -                     curl_multi_wait(curlm, NULL, 0, 1000, NULL);
> > > -             }
> > >       }
> > >
> > >       _alpm_log(handle, ALPM_LOG_DEBUG, "curl_multi_download_internal return code is %d\n", err);
> > > @@ -803,12 +838,10 @@ int SYMEXPORT alpm_fetch_pkgurl(alpm_handle_t *handle, const alpm_list_t *urls,
> > >  {
> > >       const char *cachedir;
> > >       alpm_list_t *payloads = NULL;
> > > -     int download_sigs;
> > >       const alpm_list_t *i;
> > >       alpm_event_t event;
> > >
> > >       CHECK_HANDLE(handle, return -1);
> > > -     download_sigs = handle->siglevel & ALPM_SIG_PACKAGE;
> > >
> > >       /* find a valid cache dir to download to */
> > >       cachedir = _alpm_filecache_setup(handle);
> > > @@ -830,21 +863,9 @@ int SYMEXPORT alpm_fetch_pkgurl(alpm_handle_t *handle, const alpm_list_t *urls,
> > >                       payload->allow_resume = 1;
> > >                       payload->handle = handle;
> > >                       payload->trust_remote_name = 1;
> > > +                     payload->download_signature = (handle->siglevel & ALPM_SIG_PACKAGE);
> > > +                     payload->signature_optional = (handle->siglevel & ALPM_SIG_PACKAGE_OPTIONAL);
> > >                       payloads = alpm_list_add(payloads, payload);
> > > -
> > > -                     if(download_sigs) {
> > > -                             int len = strlen(url) + 5;
> > > -                             CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, err));
> > > -                             payload->signature = 1;
> > > -                             MALLOC(payload->fileurl, len, FREE(payload); GOTO_ERR(handle, ALPM_ERR_MEMORY, err));
> > > -                             snprintf(payload->fileurl, len, "%s.sig", url);
> > > -                             payload->handle = handle;
> > > -                             payload->trust_remote_name = 1;
> > > -                             payload->errors_ok = (handle->siglevel & ALPM_SIG_PACKAGE_OPTIONAL);
> > > -                             /* set hard upper limit of 16KiB */
> > > -                             payload->max_size = 16 * 1024;
> > > -                             payloads = alpm_list_add(payloads, payload);
> > > -                     }
> >
> > OK.
> >
> > >               }
> > >       }
> > >
> > > diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h
> > > index d13bc1b5..facafca2 100644
> > > --- a/lib/libalpm/dload.h
> > > +++ b/lib/libalpm/dload.h
> > > @@ -47,11 +47,13 @@ struct dload_payload {
> > >       int errors_ok;
> > >       int unlink_on_fail;
> > >       int trust_remote_name;
> > > -     int signature; /* specifies if the payload is a signature file */
> > > +     int download_signature; /* specifies if an accompanion *.sig file need to be downloaded*/
> > > +     int signature_optional; /* *.sig file is optional */
> > >  #ifdef HAVE_LIBCURL
> > >       CURL *curl;
> > >       char error_buffer[CURL_ERROR_SIZE];
> > >       FILE *localf; /* temp download file */
> > > +     int signature; /* specifies if this payload is for a signature file */
> > >  #endif
> > >  };
> > >
> > >


More information about the pacman-dev mailing list