[pacman-dev] [PATCH v3] Introduce alpm_dbs_update() function for parallel db updates

Anatol Pomozov anatol.pomozov at gmail.com
Mon Apr 13 06:12:16 UTC 2020


Hi Allan and others

Is there a chance you can look at this and other "parallel-download" patches?

On Thu, Mar 26, 2020 at 1:20 PM Anatol Pomozov <anatol.pomozov at gmail.com> wrote:
>
> This is an equivalent of alpm_db_update but for multiplexed (parallel)
> download. The difference is that this function accepts list of
> databases to update. And then ALPM internals download it in parallel if
> possible.
>
> Add a stub for _alpm_multi_download the function that will do parallel
> payloads downloads in the future.
>
> Introduce dload_payload->filepath field that contains url path to the
> file we download. It is like fileurl field but does not contain
> protocol/server part. The rationale for having this field is that with
> the curl multidownload the server retry logic is going to move to a curl
> callback. And the callback needs to be able to reconstruct the 'next'
> fileurl. One will be able to do it by getting the next server url from
> 'servers' list and then concat with filepath. Once the 'parallel download'
> refactoring is over 'fileurl' field will go away.
>
> Signed-off-by: Anatol Pomozov <anatol.pomozov at gmail.com>
> ---
>  lib/libalpm/alpm.h    |  29 ++++++++++
>  lib/libalpm/be_sync.c | 128 ++++++++++++++++++++++++++++++++++++++++++
>  lib/libalpm/dload.c   |  12 ++++
>  lib/libalpm/dload.h   |   5 ++
>  4 files changed, 174 insertions(+)
>
> diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h
> index 5d559db1..2cf20343 100644
> --- a/lib/libalpm/alpm.h
> +++ b/lib/libalpm/alpm.h
> @@ -1049,6 +1049,35 @@ int alpm_db_remove_server(alpm_db_t *db, const char *url);
>   */
>  int alpm_db_update(int force, alpm_db_t *db);
>
> +/** Update package databases
> + *
> + * An update of the package databases in the list \a dbs will be attempted.
> + * Unless \a force is true, the update will only be performed if the remote
> + * databases were modified since the last update.
> + *
> + * This operation requires a database lock, and will return an applicable error
> + * if the lock could not be obtained.
> + *
> + * Example:
> + * @code
> + * alpm_list_t *dbs = alpm_get_syncdbs();
> + * ret = alpm_dbs_update(config->handle, dbs, force);
> + * if(ret < 0) {
> + *     pm_printf(ALPM_LOG_ERROR, _("failed to synchronize all databases (%s)\n"),
> + *         alpm_strerror(alpm_errno(config->handle)));
> + * }
> + * @endcode
> + *
> + * @note After a successful update, the \link alpm_db_get_pkgcache()
> + * package cache \endlink will be invalidated
> + * @param handle the context handle
> + * @param dbs list of package databases to update
> + * @param force if true, then forces the update, otherwise update only in case
> + * the databases aren't up to date
> + * @return 0 on success, -1 on error (pm_errno is set accordingly)
> + */
> +int alpm_dbs_update(alpm_handle_t *handle, alpm_list_t *dbs, int force);
> +
>  /** Get a package entry from a package database.
>   * @param db pointer to the package database to get the package from
>   * @param name of the package
> diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c
> index aafed15d..7ab52301 100644
> --- a/lib/libalpm/be_sync.c
> +++ b/lib/libalpm/be_sync.c
> @@ -301,6 +301,134 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
>         return ret;
>  }
>
> +int SYMEXPORT alpm_dbs_update(alpm_handle_t *handle, alpm_list_t *dbs, int force) {
> +       char *syncpath;
> +       const char *dbext = handle->dbext;
> +       alpm_list_t *i;
> +       int ret = -1;
> +       mode_t oldmask;
> +       alpm_list_t *payloads = NULL;
> +
> +       /* Sanity checks */
> +       CHECK_HANDLE(handle, return -1);
> +       ASSERT(dbs != NULL, return -1);
> +       handle->pm_errno = ALPM_ERR_OK;
> +
> +       syncpath = get_sync_dir(handle);
> +       ASSERT(syncpath != NULL, return -1);
> +
> +       /* make sure we have a sane umask */
> +       oldmask = umask(0022);
> +
> +       /* attempt to grab a lock */
> +       if(_alpm_handle_lock(handle)) {
> +               GOTO_ERR(handle, ALPM_ERR_HANDLE_LOCK, cleanup);
> +       }
> +
> +       for(i = dbs; i; i = i->next) {
> +               alpm_db_t *db = i->data;
> +               int dbforce = force;
> +               struct dload_payload *payload = NULL;
> +               size_t len;
> +               int siglevel;
> +
> +               if(!(db->usage & ALPM_DB_USAGE_SYNC)) {
> +                       continue;
> +               }
> +
> +               ASSERT(db != handle->db_local, GOTO_ERR(handle, ALPM_ERR_WRONG_ARGS, cleanup));
> +               ASSERT(db->servers != NULL, GOTO_ERR(handle, ALPM_ERR_SERVER_NONE, cleanup));
> +
> +               /* force update of invalid databases to fix potential mismatched database/signature */
> +               if(db->status & DB_STATUS_INVALID) {
> +                       dbforce = 1;
> +               }
> +
> +               CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> +
> +               /* set hard upper limit of 128 MiB */
> +               payload->max_size = 128 * 1024 * 1024;
> +               payload->servers = db->servers;
> +
> +               /* print server + filename into a buffer */
> +               len = strlen(db->treename) + strlen(dbext) + 1;
> +               MALLOC(payload->filepath, len, GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> +               snprintf(payload->filepath, len, "%s%s", db->treename, dbext);
> +               payload->handle = handle;
> +               payload->force = dbforce;
> +               payload->unlink_on_fail = 1;
> +
> +               payloads = alpm_list_add(payloads, payload);
> +
> +               siglevel = alpm_db_get_siglevel(db);
> +               if(siglevel & ALPM_SIG_DATABASE) {
> +                       struct dload_payload *sig_payload;
> +                       CALLOC(sig_payload, 1, sizeof(*sig_payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> +
> +                       /* print filename into a buffer (leave space for separator and .sig) */
> +                       len = strlen(db->treename) + strlen(dbext) + 5;
> +                       MALLOC(sig_payload->filepath, len, GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> +                       snprintf(sig_payload->filepath, len, "%s%s.sig", db->treename, dbext);
> +
> +                       sig_payload->handle = handle;
> +                       sig_payload->force = 1;
> +                       sig_payload->errors_ok = (siglevel & ALPM_SIG_DATABASE_OPTIONAL);
> +
> +                       /* set hard upper limit of 16 KiB */
> +                       sig_payload->max_size = 16 * 1024;
> +                       sig_payload->servers = db->servers;
> +
> +                       payloads = alpm_list_add(payloads, sig_payload);
> +               }
> +       }
> +
> +       ret = _alpm_multi_download(handle, payloads, syncpath);
> +       if(ret < 0) {
> +               goto cleanup;
> +       }
> +
> +       for(i = dbs; i; i = i->next) {
> +               alpm_db_t *db = i->data;
> +               if(!(db->usage & ALPM_DB_USAGE_SYNC)) {
> +                       continue;
> +               }
> +
> +               /* Cache needs to be rebuilt */
> +               _alpm_db_free_pkgcache(db);
> +
> +               /* clear all status flags regarding validity/existence */
> +               db->status &= ~DB_STATUS_VALID;
> +               db->status &= ~DB_STATUS_INVALID;
> +               db->status &= ~DB_STATUS_EXISTS;
> +               db->status &= ~DB_STATUS_MISSING;
> +
> +               /* if the download failed skip validation to preserve the download error */
> +               if(sync_db_validate(db) != 0) {
> +                       /* pm_errno should be set */
> +                       ret = -1;
> +               }
> +       }
> +
> +cleanup:
> +       _alpm_handle_unlock(handle);
> +
> +       if(ret == -1) {
> +               /* pm_errno was set by the download code */
> +               _alpm_log(handle, ALPM_LOG_DEBUG, "failed to sync dbs: %s\n",
> +                               alpm_strerror(handle->pm_errno));
> +       } else {
> +               handle->pm_errno = ALPM_ERR_OK;
> +       }
> +
> +       if(payloads) {
> +               alpm_list_free_inner(payloads, (alpm_list_fn_free)_alpm_dload_payload_reset);
> +               FREELIST(payloads);
> +       }
> +       free(syncpath);
> +       umask(oldmask);
> +       return ret;
> +}
> +
>  /* Forward decl so I don't reorganize the whole file right now */
>  static int sync_db_read(alpm_db_t *db, struct archive *archive,
>                 struct archive_entry *entry, alpm_pkg_t **likely_pkg);
> diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> index 670da03d..7cd3e3a4 100644
> --- a/lib/libalpm/dload.c
> +++ b/lib/libalpm/dload.c
> @@ -636,6 +636,16 @@ int _alpm_download(struct dload_payload *payload, const char *localpath,
>         }
>  }
>
> +int _alpm_multi_download(alpm_handle_t *handle,
> +               alpm_list_t *payloads /* struct dload_payload */,
> +               const char *localpath)
> +{
> +       (void)handle;
> +       (void)payloads;
> +       (void)localpath;
> +       return 0;
> +}
> +
>  static char *filecache_find_url(alpm_handle_t *handle, const char *url)
>  {
>         const char *filebase = strrchr(url, '/');
> @@ -738,6 +748,7 @@ void _alpm_dload_payload_reset(struct dload_payload *payload)
>         FREE(payload->destfile_name);
>         FREE(payload->content_disp_name);
>         FREE(payload->fileurl);
> +       FREE(payload->filepath);
>         *payload = (struct dload_payload){0};
>  }
>
> @@ -746,6 +757,7 @@ void _alpm_dload_payload_reset_for_retry(struct dload_payload *payload)
>         ASSERT(payload, return);
>
>         FREE(payload->fileurl);
> +       FREE(payload->filepath);
>         payload->initial_size += payload->prevprogress;
>         payload->prevprogress = 0;
>         payload->unlink_on_fail = 0;
> diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h
> index 1e8f75f3..3eb7fbe1 100644
> --- a/lib/libalpm/dload.h
> +++ b/lib/libalpm/dload.h
> @@ -31,6 +31,7 @@ struct dload_payload {
>         char *destfile_name;
>         char *content_disp_name;
>         char *fileurl;
> +       char *filepath; /* download URL path */
>         alpm_list_t *servers;
>         long respcode;
>         off_t initial_size;
> @@ -53,4 +54,8 @@ void _alpm_dload_payload_reset_for_retry(struct dload_payload *payload);
>  int _alpm_download(struct dload_payload *payload, const char *localpath,
>                 char **final_file, const char **final_url);
>
> +int _alpm_multi_download(alpm_handle_t *handle,
> +               alpm_list_t *payloads /* struct dload_payload */,
> +               const char *localpath);
> +
>  #endif /* ALPM_DLOAD_H */
> --
> 2.26.0
>


More information about the pacman-dev mailing list