[pacman-dev] [PATCH 1/2] skip servers with too many errors

28 Mar 2021

Keep track of errors from servers so that bad ones can be skipped once
a threshold is reached.  Key the error tracking off the hostname because
hosts may serve multiple repos under different url's and errors are
likely to be host-wide.

Implements: FS#29293.

Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com>
---

Changes from 1st draft:
* error list is now tracked on the handle, i.e. servers blacklisted during
  -y will still be ignored during -u
* a warning message is printed when a server is ignored
* error tracking keys off the hostname

I have not removed the individual error messages.  I agree with Emil
that those messages provide useful information to the user about what,
if anything, they need to do about their mirrorlist.  I think a small
number of similar error messages is worth the utility they add.

 lib/libalpm/alpm.c   |  1 +
 lib/libalpm/dload.c  | 89 ++++++++++++++++++++++++++++++++++++++++++--
 lib/libalpm/handle.h |  1 +
 3 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/lib/libalpm/alpm.c b/lib/libalpm/alpm.c
index f8298454..6708e202 100644
--- a/lib/libalpm/alpm.c
+++ b/lib/libalpm/alpm.c
@@ -114,6 +114,7 @@ int SYMEXPORT alpm_release(alpm_handle_t *myhandle)
 #ifdef HAVE_LIBCURL
 	curl_multi_cleanup(myhandle->curlm);
 	curl_global_cleanup();
+	FREELIST(myhandle->server_errors);
 #endif
 
 	_alpm_handle_unlock(myhandle);
diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
index 0d0936c7..1318bde2 100644
--- a/lib/libalpm/dload.c
+++ b/lib/libalpm/dload.c
@@ -51,8 +51,83 @@
 
 #ifdef HAVE_LIBCURL
 
+/* RFC1123 states applications should support this length */
+#define HOSTNAME_SIZE 256
+
 static int curl_add_payload(alpm_handle_t *handle, CURLM *curlm,
 	struct dload_payload *payload, const char *localpath);
+static int curl_gethost(const char *url, char *buffer, size_t buf_len);
+
+/* number of "soft" errors required to blacklist a server, set to 0 to disable
+ * server blacklisting */
+const unsigned int server_error_limit = 3;
+
+struct server_error_count {
+	char server[HOSTNAME_SIZE];
+	unsigned int errors;
+};
+
+static struct server_error_count *find_server_errors(alpm_handle_t *handle, const char *server)
+{
+	alpm_list_t *i;
+	struct server_error_count *h;
+	char hostname[HOSTNAME_SIZE];
+	/* key off the hostname because a host may serve multiple repos under
+	 * different url's and errors are likely to be host-wide */
+	if(curl_gethost(server, hostname, sizeof(hostname)) != 0) {
+		return NULL;
+	}
+	for(i = handle->server_errors; i; i = i->next) {
+		h = i->data;
+		if(strcmp(hostname, h->server) == 0) {
+			return h;
+		}
+	}
+	if((h = calloc(sizeof(struct server_error_count), 1))
+			&& alpm_list_append(&handle->server_errors, h)) {
+		strcpy(h->server, hostname);
+		h->errors = 0;
+		return h;
+	} else {
+		free(h);
+		return NULL;
+	}
+}
+
+static int should_skip_server(alpm_handle_t *handle, const char *server)
+{
+	struct server_error_count *h;
+	if(server_error_limit && (h = find_server_errors(handle, server)) ) {
+		return h->errors >= server_error_limit;
+	}
+	return 0;
+}
+
+static void server_increment_error(alpm_handle_t *handle, const char *server, int count)
+{
+	struct server_error_count *h;
+	if(server_error_limit
+			&& (h = find_server_errors(handle, server))
+			&& !should_skip_server(handle, server) ) {
+		h->errors += count;
+
+		if(should_skip_server(handle, server)) {
+			_alpm_log(handle, ALPM_LOG_WARNING,
+					_("too many errors from %s, skipping for the remainder of this transaction\n"),
+					h->server);
+		}
+	}
+}
+
+static void server_soft_error(alpm_handle_t *handle, const char *server)
+{
+	server_increment_error(handle, server, 1);
+}
+
+static void server_hard_error(alpm_handle_t *handle, const char *server)
+{
+	server_increment_error(handle, server, server_error_limit);
+}
 
 static const char *get_filename(const char *url)
 {
@@ -332,9 +407,6 @@ static FILE *create_tempfile(struct dload_payload *payload, const char *localpat
 	return fp;
 }
 
-/* RFC1123 states applications should support this length */
-#define HOSTNAME_SIZE 256
-
 /* Return 0 if retry was successful, -1 otherwise */
 static int curl_retry_next_server(CURLM *curlm, CURL *curl, struct dload_payload *payload)
 {
@@ -342,7 +414,7 @@ static int curl_retry_next_server(CURLM *curlm, CURL *curl, struct dload_payload
 	size_t len;
 	alpm_handle_t *handle = payload->handle;
 
-	if(payload->servers) {
+	while(payload->servers && should_skip_server(handle, payload->servers->data)) {
 		payload->servers = payload->servers->next;
 	}
 	if(!payload->servers) {
@@ -351,6 +423,7 @@ static int curl_retry_next_server(CURLM *curlm, CURL *curl, struct dload_payload
 		return -1;
 	}
 	server = payload->servers->data;
+	payload->servers = payload->servers->next;
 
 	/* regenerate a new fileurl */
 	FREE(payload->fileurl);
@@ -423,6 +496,7 @@ static int curl_check_finished_download(CURLM *curlm, CURLMsg *msg,
 					_alpm_log(handle, ALPM_LOG_ERROR,
 							_("failed retrieving file '%s' from %s : %s\n"),
 							payload->remote_name, hostname, payload->error_buffer);
+					server_soft_error(handle, payload->fileurl);
 				}
 				if(curl_retry_next_server(curlm, curl, payload) == 0) {
 					return 2;
@@ -440,6 +514,7 @@ static int curl_check_finished_download(CURLM *curlm, CURLMsg *msg,
 				_alpm_log(handle, ALPM_LOG_ERROR,
 						_("failed retrieving file '%s' from %s : expected download size exceeded\n"),
 						payload->remote_name, hostname);
+				server_soft_error(handle, payload->fileurl);
 			}
 			goto cleanup;
 		case CURLE_COULDNT_RESOLVE_HOST:
@@ -448,6 +523,7 @@ static int curl_check_finished_download(CURLM *curlm, CURLMsg *msg,
 			_alpm_log(handle, ALPM_LOG_ERROR,
 					_("failed retrieving file '%s' from %s : %s\n"),
 					payload->remote_name, hostname, payload->error_buffer);
+			server_hard_error(handle, payload->fileurl);
 			if(curl_retry_next_server(curlm, curl, payload) == 0) {
 				return 2;
 			} else {
@@ -463,6 +539,7 @@ static int curl_check_finished_download(CURLM *curlm, CURLMsg *msg,
 				_alpm_log(handle, ALPM_LOG_ERROR,
 						_("failed retrieving file '%s' from %s : %s\n"),
 						payload->remote_name, hostname, payload->error_buffer);
+				server_soft_error(handle, payload->fileurl);
 			} else {
 				_alpm_log(handle, ALPM_LOG_DEBUG,
 						"failed retrieving file '%s' from %s : %s\n",
@@ -632,11 +709,15 @@ static int curl_add_payload(alpm_handle_t *handle, CURLM *curlm,
 		ASSERT(!payload->filepath, GOTO_ERR(handle, ALPM_ERR_WRONG_ARGS, cleanup));
 	} else {
 		const char *server;
+		while(payload->servers && should_skip_server(handle, payload->servers->data)) {
+			payload->servers = payload->servers->next;
+		}
 
 		ASSERT(payload->servers, GOTO_ERR(handle, ALPM_ERR_SERVER_NONE, cleanup));
 		ASSERT(payload->filepath, GOTO_ERR(handle, ALPM_ERR_WRONG_ARGS, cleanup));
 
 		server = payload->servers->data;
+		payload->servers = payload->servers->next;
 
 		len = strlen(server) + strlen(payload->filepath) + 2;
 		MALLOC(payload->fileurl, len, GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup));
diff --git a/lib/libalpm/handle.h b/lib/libalpm/handle.h
index 349ef2c7..fdd8e6e7 100644
--- a/lib/libalpm/handle.h
+++ b/lib/libalpm/handle.h
@@ -60,6 +60,7 @@ struct __alpm_handle_t {
 #ifdef HAVE_LIBCURL
 	/* libcurl handle */
 	CURLM *curlm;
+	alpm_list_t *server_errors;
 #endif
 
 	unsigned short disable_dl_timeout;
-- 
2.31.1