[pacman-dev] [PATCH v3] Introduce alpm_dbs_update() function for parallel db updates
Anatol Pomozov
anatol.pomozov at gmail.com
Mon Apr 13 06:12:16 UTC 2020
Hi Allan and others
Is there a chance you can look at this and other "parallel-download" patches?
On Thu, Mar 26, 2020 at 1:20 PM Anatol Pomozov <anatol.pomozov at gmail.com> wrote:
>
> This is an equivalent of alpm_db_update but for multiplexed (parallel)
> download. The difference is that this function accepts list of
> databases to update. And then ALPM internals download it in parallel if
> possible.
>
> Add a stub for _alpm_multi_download the function that will do parallel
> payloads downloads in the future.
>
> Introduce dload_payload->filepath field that contains url path to the
> file we download. It is like fileurl field but does not contain
> protocol/server part. The rationale for having this field is that with
> the curl multidownload the server retry logic is going to move to a curl
> callback. And the callback needs to be able to reconstruct the 'next'
> fileurl. One will be able to do it by getting the next server url from
> 'servers' list and then concat with filepath. Once the 'parallel download'
> refactoring is over 'fileurl' field will go away.
>
> Signed-off-by: Anatol Pomozov <anatol.pomozov at gmail.com>
> ---
> lib/libalpm/alpm.h | 29 ++++++++++
> lib/libalpm/be_sync.c | 128 ++++++++++++++++++++++++++++++++++++++++++
> lib/libalpm/dload.c | 12 ++++
> lib/libalpm/dload.h | 5 ++
> 4 files changed, 174 insertions(+)
>
> diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h
> index 5d559db1..2cf20343 100644
> --- a/lib/libalpm/alpm.h
> +++ b/lib/libalpm/alpm.h
> @@ -1049,6 +1049,35 @@ int alpm_db_remove_server(alpm_db_t *db, const char *url);
> */
> int alpm_db_update(int force, alpm_db_t *db);
>
> +/** Update package databases
> + *
> + * An update of the package databases in the list \a dbs will be attempted.
> + * Unless \a force is true, the update will only be performed if the remote
> + * databases were modified since the last update.
> + *
> + * This operation requires a database lock, and will return an applicable error
> + * if the lock could not be obtained.
> + *
> + * Example:
> + * @code
> + * alpm_list_t *dbs = alpm_get_syncdbs();
> + * ret = alpm_dbs_update(config->handle, dbs, force);
> + * if(ret < 0) {
> + * pm_printf(ALPM_LOG_ERROR, _("failed to synchronize all databases (%s)\n"),
> + * alpm_strerror(alpm_errno(config->handle)));
> + * }
> + * @endcode
> + *
> + * @note After a successful update, the \link alpm_db_get_pkgcache()
> + * package cache \endlink will be invalidated
> + * @param handle the context handle
> + * @param dbs list of package databases to update
> + * @param force if true, then forces the update, otherwise update only in case
> + * the databases aren't up to date
> + * @return 0 on success, -1 on error (pm_errno is set accordingly)
> + */
> +int alpm_dbs_update(alpm_handle_t *handle, alpm_list_t *dbs, int force);
> +
> /** Get a package entry from a package database.
> * @param db pointer to the package database to get the package from
> * @param name of the package
> diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c
> index aafed15d..7ab52301 100644
> --- a/lib/libalpm/be_sync.c
> +++ b/lib/libalpm/be_sync.c
> @@ -301,6 +301,134 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
> return ret;
> }
>
> +int SYMEXPORT alpm_dbs_update(alpm_handle_t *handle, alpm_list_t *dbs, int force) {
> + char *syncpath;
> + const char *dbext = handle->dbext;
> + alpm_list_t *i;
> + int ret = -1;
> + mode_t oldmask;
> + alpm_list_t *payloads = NULL;
> +
> + /* Sanity checks */
> + CHECK_HANDLE(handle, return -1);
> + ASSERT(dbs != NULL, return -1);
> + handle->pm_errno = ALPM_ERR_OK;
> +
> + syncpath = get_sync_dir(handle);
> + ASSERT(syncpath != NULL, return -1);
> +
> + /* make sure we have a sane umask */
> + oldmask = umask(0022);
> +
> + /* attempt to grab a lock */
> + if(_alpm_handle_lock(handle)) {
> + GOTO_ERR(handle, ALPM_ERR_HANDLE_LOCK, cleanup);
> + }
> +
> + for(i = dbs; i; i = i->next) {
> + alpm_db_t *db = i->data;
> + int dbforce = force;
> + struct dload_payload *payload = NULL;
> + size_t len;
> + int siglevel;
> +
> + if(!(db->usage & ALPM_DB_USAGE_SYNC)) {
> + continue;
> + }
> +
> + ASSERT(db != handle->db_local, GOTO_ERR(handle, ALPM_ERR_WRONG_ARGS, cleanup));
> + ASSERT(db->servers != NULL, GOTO_ERR(handle, ALPM_ERR_SERVER_NONE, cleanup));
> +
> + /* force update of invalid databases to fix potential mismatched database/signature */
> + if(db->status & DB_STATUS_INVALID) {
> + dbforce = 1;
> + }
> +
> + CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> +
> + /* set hard upper limit of 128 MiB */
> + payload->max_size = 128 * 1024 * 1024;
> + payload->servers = db->servers;
> +
> + /* print server + filename into a buffer */
> + len = strlen(db->treename) + strlen(dbext) + 1;
> + MALLOC(payload->filepath, len, GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> + snprintf(payload->filepath, len, "%s%s", db->treename, dbext);
> + payload->handle = handle;
> + payload->force = dbforce;
> + payload->unlink_on_fail = 1;
> +
> + payloads = alpm_list_add(payloads, payload);
> +
> + siglevel = alpm_db_get_siglevel(db);
> + if(siglevel & ALPM_SIG_DATABASE) {
> + struct dload_payload *sig_payload;
> + CALLOC(sig_payload, 1, sizeof(*sig_payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> +
> + /* print filename into a buffer (leave space for separator and .sig) */
> + len = strlen(db->treename) + strlen(dbext) + 5;
> + MALLOC(sig_payload->filepath, len, GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
> + snprintf(sig_payload->filepath, len, "%s%s.sig", db->treename, dbext);
> +
> + sig_payload->handle = handle;
> + sig_payload->force = 1;
> + sig_payload->errors_ok = (siglevel & ALPM_SIG_DATABASE_OPTIONAL);
> +
> + /* set hard upper limit of 16 KiB */
> + sig_payload->max_size = 16 * 1024;
> + sig_payload->servers = db->servers;
> +
> + payloads = alpm_list_add(payloads, sig_payload);
> + }
> + }
> +
> + ret = _alpm_multi_download(handle, payloads, syncpath);
> + if(ret < 0) {
> + goto cleanup;
> + }
> +
> + for(i = dbs; i; i = i->next) {
> + alpm_db_t *db = i->data;
> + if(!(db->usage & ALPM_DB_USAGE_SYNC)) {
> + continue;
> + }
> +
> + /* Cache needs to be rebuilt */
> + _alpm_db_free_pkgcache(db);
> +
> + /* clear all status flags regarding validity/existence */
> + db->status &= ~DB_STATUS_VALID;
> + db->status &= ~DB_STATUS_INVALID;
> + db->status &= ~DB_STATUS_EXISTS;
> + db->status &= ~DB_STATUS_MISSING;
> +
> + /* if the download failed skip validation to preserve the download error */
> + if(sync_db_validate(db) != 0) {
> + /* pm_errno should be set */
> + ret = -1;
> + }
> + }
> +
> +cleanup:
> + _alpm_handle_unlock(handle);
> +
> + if(ret == -1) {
> + /* pm_errno was set by the download code */
> + _alpm_log(handle, ALPM_LOG_DEBUG, "failed to sync dbs: %s\n",
> + alpm_strerror(handle->pm_errno));
> + } else {
> + handle->pm_errno = ALPM_ERR_OK;
> + }
> +
> + if(payloads) {
> + alpm_list_free_inner(payloads, (alpm_list_fn_free)_alpm_dload_payload_reset);
> + FREELIST(payloads);
> + }
> + free(syncpath);
> + umask(oldmask);
> + return ret;
> +}
> +
> /* Forward decl so I don't reorganize the whole file right now */
> static int sync_db_read(alpm_db_t *db, struct archive *archive,
> struct archive_entry *entry, alpm_pkg_t **likely_pkg);
> diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> index 670da03d..7cd3e3a4 100644
> --- a/lib/libalpm/dload.c
> +++ b/lib/libalpm/dload.c
> @@ -636,6 +636,16 @@ int _alpm_download(struct dload_payload *payload, const char *localpath,
> }
> }
>
> +int _alpm_multi_download(alpm_handle_t *handle,
> + alpm_list_t *payloads /* struct dload_payload */,
> + const char *localpath)
> +{
> + (void)handle;
> + (void)payloads;
> + (void)localpath;
> + return 0;
> +}
> +
> static char *filecache_find_url(alpm_handle_t *handle, const char *url)
> {
> const char *filebase = strrchr(url, '/');
> @@ -738,6 +748,7 @@ void _alpm_dload_payload_reset(struct dload_payload *payload)
> FREE(payload->destfile_name);
> FREE(payload->content_disp_name);
> FREE(payload->fileurl);
> + FREE(payload->filepath);
> *payload = (struct dload_payload){0};
> }
>
> @@ -746,6 +757,7 @@ void _alpm_dload_payload_reset_for_retry(struct dload_payload *payload)
> ASSERT(payload, return);
>
> FREE(payload->fileurl);
> + FREE(payload->filepath);
> payload->initial_size += payload->prevprogress;
> payload->prevprogress = 0;
> payload->unlink_on_fail = 0;
> diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h
> index 1e8f75f3..3eb7fbe1 100644
> --- a/lib/libalpm/dload.h
> +++ b/lib/libalpm/dload.h
> @@ -31,6 +31,7 @@ struct dload_payload {
> char *destfile_name;
> char *content_disp_name;
> char *fileurl;
> + char *filepath; /* download URL path */
> alpm_list_t *servers;
> long respcode;
> off_t initial_size;
> @@ -53,4 +54,8 @@ void _alpm_dload_payload_reset_for_retry(struct dload_payload *payload);
> int _alpm_download(struct dload_payload *payload, const char *localpath,
> char **final_file, const char **final_url);
>
> +int _alpm_multi_download(alpm_handle_t *handle,
> + alpm_list_t *payloads /* struct dload_payload */,
> + const char *localpath);
> +
> #endif /* ALPM_DLOAD_H */
> --
> 2.26.0
>
More information about the pacman-dev
mailing list