[pacman-dev] [PATCH] Convert download packages logic to multiplexed API
Create a list of dload_payloads and pass it to the new _alpm_multi_* interface. Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com> --- lib/libalpm/sync.c | 74 ++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 46 deletions(-) diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index 8a9dcae8..aa417bca 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -726,47 +726,13 @@ static int find_dl_candidates(alpm_handle_t *handle, alpm_list_t **files) return 0; } -static int download_single_file(alpm_handle_t *handle, struct dload_payload *payload, - const char *cachedir) -{ - alpm_event_pkgdownload_t event = { - .type = ALPM_EVENT_PKGDOWNLOAD_START, - .file = payload->remote_name - }; - const alpm_list_t *server; - - payload->handle = handle; - payload->allow_resume = 1; - - EVENT(handle, &event); - for(server = payload->servers; server; server = server->next) { - const char *server_url = server->data; - size_t len; - - /* print server + filename into a buffer */ - len = strlen(server_url) + strlen(payload->remote_name) + 2; - MALLOC(payload->fileurl, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); - snprintf(payload->fileurl, len, "%s/%s", server_url, payload->remote_name); - - if(_alpm_download(payload, cachedir, NULL, NULL) != -1) { - event.type = ALPM_EVENT_PKGDOWNLOAD_DONE; - EVENT(handle, &event); - return 0; - } - _alpm_dload_payload_reset_for_retry(payload); - } - - event.type = ALPM_EVENT_PKGDOWNLOAD_FAILED; - EVENT(handle, &event); - return -1; -} - static int download_files(alpm_handle_t *handle) { const char *cachedir; alpm_list_t *i, *files = NULL; int errors = 0; alpm_event_t event; + alpm_list_t *payloads = NULL; cachedir = _alpm_filecache_setup(handle); handle->trans->state = STATE_DOWNLOADING; @@ -814,26 +780,42 @@ static int download_files(alpm_handle_t *handle) event.type = ALPM_EVENT_RETRIEVE_START; EVENT(handle, &event); - event.type = ALPM_EVENT_RETRIEVE_DONE; for(i = files; i; i = i->next) { const alpm_pkg_t *pkg = i->data; - struct dload_payload payload = {0}; + struct dload_payload *payload = NULL; - STRDUP(payload.remote_name, pkg->filename, GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); - payload.servers = pkg->origin_data.db->servers; - payload.max_size = pkg->size; + CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); + STRDUP(payload->remote_name, pkg->filename, GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); + STRDUP(payload->filepath, pkg->filename, GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); + payload->max_size = pkg->size; + payload->servers = pkg->origin_data.db->servers; + payload->handle = handle; + payload->allow_resume = 1; - if(download_single_file(handle, &payload, cachedir) == -1) { - errors++; - event.type = ALPM_EVENT_RETRIEVE_FAILED; - _alpm_log(handle, ALPM_LOG_WARNING, _("failed to retrieve some files\n")); - } - _alpm_dload_payload_reset(&payload); + payloads = alpm_list_add(payloads, payload); + + /* TOASK: do we need to initiate *.sig file download here? */ + } + /* TOASK: the sequential download logic reported ALPM_EVENT_PKGDOWNLOAD_START/DONE/FAILED event. + * Do we want to report the event as well? If yes then it should be moved inside alpm_multi_*() + * function but at that point ALPM operates with dload_payload() and does not care if it downloads + * package or database file. + */ + event.type = ALPM_EVENT_RETRIEVE_DONE; + if(_alpm_multi_download(handle, payloads, cachedir) == -1) { + errors++; + event.type = ALPM_EVENT_RETRIEVE_FAILED; + _alpm_log(handle, ALPM_LOG_WARNING, _("failed to retrieve some files\n")); } EVENT(handle, &event); } finish: + if(payloads) { + alpm_list_free_inner(payloads, (alpm_list_fn_free)_alpm_dload_payload_reset); + FREELIST(payloads); + } + if(files) { alpm_list_free(files); } -- 2.26.1
Hi On Sat, Apr 18, 2020 at 7:04 PM Anatol Pomozov <anatol.pomozov@gmail.com> wrote:
Create a list of dload_payloads and pass it to the new _alpm_multi_* interface.
Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com> --- lib/libalpm/sync.c | 74 ++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 46 deletions(-)
diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index 8a9dcae8..aa417bca 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -726,47 +726,13 @@ static int find_dl_candidates(alpm_handle_t *handle, alpm_list_t **files) return 0; }
-static int download_single_file(alpm_handle_t *handle, struct dload_payload *payload, - const char *cachedir) -{ - alpm_event_pkgdownload_t event = { - .type = ALPM_EVENT_PKGDOWNLOAD_START, - .file = payload->remote_name - }; - const alpm_list_t *server; - - payload->handle = handle; - payload->allow_resume = 1; - - EVENT(handle, &event); - for(server = payload->servers; server; server = server->next) { - const char *server_url = server->data; - size_t len; - - /* print server + filename into a buffer */ - len = strlen(server_url) + strlen(payload->remote_name) + 2; - MALLOC(payload->fileurl, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); - snprintf(payload->fileurl, len, "%s/%s", server_url, payload->remote_name); - - if(_alpm_download(payload, cachedir, NULL, NULL) != -1) { - event.type = ALPM_EVENT_PKGDOWNLOAD_DONE; - EVENT(handle, &event); - return 0; - } - _alpm_dload_payload_reset_for_retry(payload); - } - - event.type = ALPM_EVENT_PKGDOWNLOAD_FAILED; - EVENT(handle, &event); - return -1; -} - static int download_files(alpm_handle_t *handle) { const char *cachedir; alpm_list_t *i, *files = NULL; int errors = 0; alpm_event_t event; + alpm_list_t *payloads = NULL;
cachedir = _alpm_filecache_setup(handle); handle->trans->state = STATE_DOWNLOADING; @@ -814,26 +780,42 @@ static int download_files(alpm_handle_t *handle)
event.type = ALPM_EVENT_RETRIEVE_START; EVENT(handle, &event); - event.type = ALPM_EVENT_RETRIEVE_DONE; for(i = files; i; i = i->next) { const alpm_pkg_t *pkg = i->data; - struct dload_payload payload = {0}; + struct dload_payload *payload = NULL;
- STRDUP(payload.remote_name, pkg->filename, GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); - payload.servers = pkg->origin_data.db->servers; - payload.max_size = pkg->size; + CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); + STRDUP(payload->remote_name, pkg->filename, GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); + STRDUP(payload->filepath, pkg->filename, GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); + payload->max_size = pkg->size; + payload->servers = pkg->origin_data.db->servers; + payload->handle = handle; + payload->allow_resume = 1;
- if(download_single_file(handle, &payload, cachedir) == -1) { - errors++; - event.type = ALPM_EVENT_RETRIEVE_FAILED; - _alpm_log(handle, ALPM_LOG_WARNING, _("failed to retrieve some files\n")); - } - _alpm_dload_payload_reset(&payload); + payloads = alpm_list_add(payloads, payload); + + /* TOASK: do we need to initiate *.sig file download here? */
One question with this change is whether we need to download the package signatures here. It looks like the current codepath does not try to download *.sig files. But at other place that fetches packages (alpm_fetch_pkgurl) we actually *do* download sig files. So there is inconsistency between different download codepaths. Also Arch does not use detached *.sig files for the packages. So I am not sure what is the current plan regarding package signature files.
+ } + /* TOASK: the sequential download logic reported ALPM_EVENT_PKGDOWNLOAD_START/DONE/FAILED event. + * Do we want to report the event as well? If yes then it should be moved inside alpm_multi_*() + * function but at that point ALPM operates with dload_payload() and does not care if it downloads + * package or database file. + */ + event.type = ALPM_EVENT_RETRIEVE_DONE; + if(_alpm_multi_download(handle, payloads, cachedir) == -1) { + errors++; + event.type = ALPM_EVENT_RETRIEVE_FAILED; + _alpm_log(handle, ALPM_LOG_WARNING, _("failed to retrieve some files\n")); } EVENT(handle, &event); }
finish: + if(payloads) { + alpm_list_free_inner(payloads, (alpm_list_fn_free)_alpm_dload_payload_reset); + FREELIST(payloads); + } + if(files) { alpm_list_free(files); } -- 2.26.1
On 19/4/20 1:15 pm, Anatol Pomozov wrote:
+ /* TOASK: do we need to initiate *.sig file download here? */
One question with this change is whether we need to download the package signatures here.
It looks like the current codepath does not try to download *.sig files. But at other place that fetches packages (alpm_fetch_pkgurl) we actually *do* download sig files. So there is inconsistency between different download codepaths.
Also Arch does not use detached *.sig files for the packages. So I am not sure what is the current plan regarding package signature files.
Arch does have detached .sig files on its mirrors. The two download paths currently are: 1) -S <pkg>: this uses the signature from the db 2) -U <url>: this uses a signature if it finds one There is a bug report, which we have discussed implementing that asks us to always download signatures. It would be good to add this while you are working on this area. This has several advantages. We can verify packages in our cache, even if they are no longer in the database. And we could drop signatures from the databases, making them substantially smaller (we added them when the overhead was lower due to using less secure signing keys). Allan
Hello On Mon, Apr 20, 2020 at 4:53 AM Allan McRae <allan@archlinux.org> wrote:
On 19/4/20 1:15 pm, Anatol Pomozov wrote:
+ /* TOASK: do we need to initiate *.sig file download here? */
One question with this change is whether we need to download the package signatures here.
It looks like the current codepath does not try to download *.sig files. But at other place that fetches packages (alpm_fetch_pkgurl) we actually *do* download sig files. So there is inconsistency between different download codepaths.
Also Arch does not use detached *.sig files for the packages. So I am not sure what is the current plan regarding package signature files.
Arch does have detached .sig files on its mirrors.
The two download paths currently are:
1) -S <pkg>: this uses the signature from the db 2) -U <url>: this uses a signature if it finds one
-U codepath handles downloading package signatures already. Its logic is essentially checking for this flag: handle->siglevel & ALPM_SIG_PACKAGE; -S does not download the *.sig file. My understanding that we need to download the signature if db SigLevel is ether "Optional" or "Required".
There is a bug report, which we have discussed implementing that asks us to always download signatures. It would be good to add this while you are working on this area.
Sure I can look at it and send a separate patch for this new functionality. What is the bug report number? I can't find the discussion at bugs.archlinux.org.
This has several advantages. We can verify packages in our cache, even if they are no longer in the database. And we could drop signatures from the databases, making them substantially smaller (we added them when the overhead was lower due to using less secure signing keys).
Sure I can implement it.
On 19/4/20 12:04 pm, Anatol Pomozov wrote:
Create a list of dload_payloads and pass it to the new _alpm_multi_* interface.
Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com> --- lib/libalpm/sync.c | 74 ++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 46 deletions(-)
Deletions!
diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index 8a9dcae8..aa417bca 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -726,47 +726,13 @@ static int find_dl_candidates(alpm_handle_t *handle, alpm_list_t **files) return 0; }
-static int download_single_file(alpm_handle_t *handle, struct dload_payload *payload, - const char *cachedir) -{ - alpm_event_pkgdownload_t event = { - .type = ALPM_EVENT_PKGDOWNLOAD_START, - .file = payload->remote_name - }; - const alpm_list_t *server; - - payload->handle = handle; - payload->allow_resume = 1; - - EVENT(handle, &event); - for(server = payload->servers; server; server = server->next) { - const char *server_url = server->data; - size_t len; - - /* print server + filename into a buffer */ - len = strlen(server_url) + strlen(payload->remote_name) + 2; - MALLOC(payload->fileurl, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); - snprintf(payload->fileurl, len, "%s/%s", server_url, payload->remote_name); - - if(_alpm_download(payload, cachedir, NULL, NULL) != -1) { - event.type = ALPM_EVENT_PKGDOWNLOAD_DONE; - EVENT(handle, &event); - return 0; - } - _alpm_dload_payload_reset_for_retry(payload); - } - - event.type = ALPM_EVENT_PKGDOWNLOAD_FAILED; - EVENT(handle, &event); - return -1; -} - static int download_files(alpm_handle_t *handle) { const char *cachedir; alpm_list_t *i, *files = NULL; int errors = 0; alpm_event_t event; + alpm_list_t *payloads = NULL;
cachedir = _alpm_filecache_setup(handle); handle->trans->state = STATE_DOWNLOADING; @@ -814,26 +780,42 @@ static int download_files(alpm_handle_t *handle)
event.type = ALPM_EVENT_RETRIEVE_START; EVENT(handle, &event); - event.type = ALPM_EVENT_RETRIEVE_DONE; for(i = files; i; i = i->next) { const alpm_pkg_t *pkg = i->data; - struct dload_payload payload = {0}; + struct dload_payload *payload = NULL;
- STRDUP(payload.remote_name, pkg->filename, GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); - payload.servers = pkg->origin_data.db->servers; - payload.max_size = pkg->size; + CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); + STRDUP(payload->remote_name, pkg->filename, GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); + STRDUP(payload->filepath, pkg->filename, GOTO_ERR(handle, ALPM_ERR_MEMORY, finish));
My suspicion that the payload struct now has redundant information increases... But as I said at the time, we need to set up a good download testsuite so we can detect any breakage of edge cases.
+ payload->max_size = pkg->size; + payload->servers = pkg->origin_data.db->servers; + payload->handle = handle; + payload->allow_resume = 1;
OK.
- if(download_single_file(handle, &payload, cachedir) == -1) { - errors++; - event.type = ALPM_EVENT_RETRIEVE_FAILED; - _alpm_log(handle, ALPM_LOG_WARNING, _("failed to retrieve some files\n")); - } - _alpm_dload_payload_reset(&payload); + payloads = alpm_list_add(payloads, payload); + + /* TOASK: do we need to initiate *.sig file download here? */
As discussed on IRC - we should. That way we can stop distributing them in repo dbs.
+ } + /* TOASK: the sequential download logic reported ALPM_EVENT_PKGDOWNLOAD_START/DONE/FAILED event. + * Do we want to report the event as well? If yes then it should be moved inside alpm_multi_*() + * function but at that point ALPM operates with dload_payload() and does not care if it downloads + * package or database file. + */
These events were not used by pacman. My recollection is that it was added to allow other frontends to monitor download progress. Following my comments on an earlier patch about adding ALPM_EVENT_DB_RETRIEVE_* and changing ALPM_EVENT_RETRIEVE_* to ALPM_EVENT_PKG_RETRIEVE_*, I think this is covered enough for other frontends if they need it. You can remove ALPM_EVENT_PKGDOWNLOAD_* from the codebase. Other frontends will need to overhaul for parallel download anyway.
+ event.type = ALPM_EVENT_RETRIEVE_DONE; + if(_alpm_multi_download(handle, payloads, cachedir) == -1) { + errors++;
errors = 1; Just to avoid it looking like we are counting something.
+ event.type = ALPM_EVENT_RETRIEVE_FAILED; + _alpm_log(handle, ALPM_LOG_WARNING, _("failed to retrieve some files\n")); }
OK.
EVENT(handle, &event); }
finish: + if(payloads) { + alpm_list_free_inner(payloads, (alpm_list_fn_free)_alpm_dload_payload_reset); + FREELIST(payloads); + } + if(files) { alpm_list_free(files); }
Hello On Tue, Apr 28, 2020 at 9:36 PM Allan McRae <allan@archlinux.org> wrote:
On 19/4/20 12:04 pm, Anatol Pomozov wrote:
Create a list of dload_payloads and pass it to the new _alpm_multi_* interface.
Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com> --- lib/libalpm/sync.c | 74 ++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 46 deletions(-)
Deletions!
:D and more deletions are coming...
diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index 8a9dcae8..aa417bca 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -726,47 +726,13 @@ static int find_dl_candidates(alpm_handle_t *handle, alpm_list_t **files) return 0; }
-static int download_single_file(alpm_handle_t *handle, struct dload_payload *payload, - const char *cachedir) -{ - alpm_event_pkgdownload_t event = { - .type = ALPM_EVENT_PKGDOWNLOAD_START, - .file = payload->remote_name - }; - const alpm_list_t *server; - - payload->handle = handle; - payload->allow_resume = 1; - - EVENT(handle, &event); - for(server = payload->servers; server; server = server->next) { - const char *server_url = server->data; - size_t len; - - /* print server + filename into a buffer */ - len = strlen(server_url) + strlen(payload->remote_name) + 2; - MALLOC(payload->fileurl, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); - snprintf(payload->fileurl, len, "%s/%s", server_url, payload->remote_name); - - if(_alpm_download(payload, cachedir, NULL, NULL) != -1) { - event.type = ALPM_EVENT_PKGDOWNLOAD_DONE; - EVENT(handle, &event); - return 0; - } - _alpm_dload_payload_reset_for_retry(payload); - } - - event.type = ALPM_EVENT_PKGDOWNLOAD_FAILED; - EVENT(handle, &event); - return -1; -} - static int download_files(alpm_handle_t *handle) { const char *cachedir; alpm_list_t *i, *files = NULL; int errors = 0; alpm_event_t event; + alpm_list_t *payloads = NULL;
cachedir = _alpm_filecache_setup(handle); handle->trans->state = STATE_DOWNLOADING; @@ -814,26 +780,42 @@ static int download_files(alpm_handle_t *handle)
event.type = ALPM_EVENT_RETRIEVE_START; EVENT(handle, &event); - event.type = ALPM_EVENT_RETRIEVE_DONE; for(i = files; i; i = i->next) { const alpm_pkg_t *pkg = i->data; - struct dload_payload payload = {0}; + struct dload_payload *payload = NULL;
- STRDUP(payload.remote_name, pkg->filename, GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); - payload.servers = pkg->origin_data.db->servers; - payload.max_size = pkg->size; + CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); + STRDUP(payload->remote_name, pkg->filename, GOTO_ERR(handle, ALPM_ERR_MEMORY, finish)); + STRDUP(payload->filepath, pkg->filename, GOTO_ERR(handle, ALPM_ERR_MEMORY, finish));
My suspicion that the payload struct now has redundant information increases... But as I said at the time, we need to set up a good download testsuite so we can detect any breakage of edge cases.
+ payload->max_size = pkg->size; + payload->servers = pkg->origin_data.db->servers; + payload->handle = handle; + payload->allow_resume = 1;
OK.
- if(download_single_file(handle, &payload, cachedir) == -1) { - errors++; - event.type = ALPM_EVENT_RETRIEVE_FAILED; - _alpm_log(handle, ALPM_LOG_WARNING, _("failed to retrieve some files\n")); - } - _alpm_dload_payload_reset(&payload); + payloads = alpm_list_add(payloads, payload); + + /* TOASK: do we need to initiate *.sig file download here? */
As discussed on IRC - we should. That way we can stop distributing them in repo dbs.
Ack. This *.sig file download functionality is implemented in the subsequent commit that currently stays in my WIP branch here https://github.com/anatol/pacman/tree/parallel-download
+ } + /* TOASK: the sequential download logic reported ALPM_EVENT_PKGDOWNLOAD_START/DONE/FAILED event. + * Do we want to report the event as well? If yes then it should be moved inside alpm_multi_*() + * function but at that point ALPM operates with dload_payload() and does not care if it downloads + * package or database file. + */
These events were not used by pacman. My recollection is that it was added to allow other frontends to monitor download progress.
Following my comments on an earlier patch about adding ALPM_EVENT_DB_RETRIEVE_* and changing ALPM_EVENT_RETRIEVE_* to ALPM_EVENT_PKG_RETRIEVE_*, I think this is covered enough for other frontends if they need it.
You can remove ALPM_EVENT_PKGDOWNLOAD_* from the codebase. Other frontends will need to overhaul for parallel download anyway.
SGTM. I am removing these unused events. I also added information about the API change to README.
+ event.type = ALPM_EVENT_RETRIEVE_DONE; + if(_alpm_multi_download(handle, payloads, cachedir) == -1) { + errors++;
errors = 1; Just to avoid it looking like we are counting something.
Yeah, this kind of error handling looks weird. I am changing it to a more typical "int ret = -1" style in one of the future patches where I add downloading *.sig file functionality.
+ event.type = ALPM_EVENT_RETRIEVE_FAILED; + _alpm_log(handle, ALPM_LOG_WARNING, _("failed to retrieve some files\n")); }
OK.
EVENT(handle, &event); }
finish: + if(payloads) { + alpm_list_free_inner(payloads, (alpm_list_fn_free)_alpm_dload_payload_reset); + FREELIST(payloads); + } + if(files) { alpm_list_free(files); }
participants (2)
-
Allan McRae
-
Anatol Pomozov