[pacman-dev] [PATCH 2/4] lib/dload: prevent large file attacks

Dan McGee dpmcgee at gmail.com
Sun Jul 3 15:13:30 EDT 2011


On Fri, Jul 1, 2011 at 7:59 AM, Dave Reisner <d at falconindy.com> wrote:
> From: Dave Reisner <d at falconindy.com>
>
> This means creating a new struct which can pass more descriptive data
> from the back end sync functions to the downloader. In particular, we're
> interested in the download size read from the sync DB. When the remote
> server reports a size larger than this (via a content-length header),
> abort the transfer.
>
> In cases where the size is unknown, we set a hard upper limit of:
>
> * 25MiB for a sync DB
> * 16KiB for a signature
>
> For reference, 25mb is more than twice the size of all of the current
> binary repos (with files) combined, and 16k is a truly gargantuan
> signature.
MiB/KiB here to not confuse people, please. (Or MB/KB, but definitely
not lowercase). Same with any usages in the code comments.

>
> Signed-off-by: Dave Reisner <dreisner at archlinux.org>
> ---
>  lib/libalpm/be_sync.c |   22 +++++++++++++------
>  lib/libalpm/dload.c   |   55 +++++++++++++++++++++++++++++++++---------------
>  lib/libalpm/dload.h   |   13 +++++++++-
>  lib/libalpm/sync.c    |   30 +++++++++++++++++---------
>  4 files changed, 84 insertions(+), 36 deletions(-)
>
> diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c
> index 16351f8..65420cf 100644
> --- a/lib/libalpm/be_sync.c
> +++ b/lib/libalpm/be_sync.c
> @@ -175,16 +175,21 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
>
>        for(i = db->servers; i; i = i->next) {
>                const char *server = i->data;
> -               char *fileurl;
> +               struct dload_payload *payload;
>                size_t len;
>                int sig_ret = 0;
>
> +               CALLOC(payload, 1, sizeof(*payload), RET_ERR(handle, PM_ERR_MEMORY, -1));
Any reason not to just allocate it on the stack and not use a pointer?
You will want to memset(&payload, 0, sizeof(payload); then.

Edit: after reading the rest of the patch, we need heap allocated
payloads elsewhere, so you can probably ignore me here.

> +
> +               /* set hard upper limit of 25MiB */
> +               payload->max_size = 25 * 1024 * 1024;
> +
>                /* print server + filename into a buffer (leave space for .sig) */
>                len = strlen(server) + strlen(db->treename) + 9;
> -               CALLOC(fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1));
> -               snprintf(fileurl, len, "%s/%s.db", server, db->treename);
> +               CALLOC(payload->fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1));
> +               snprintf(payload->fileurl, len, "%s/%s.db", server, db->treename);
>
> -               ret = _alpm_download(handle, fileurl, syncpath, NULL, force, 0, 0);
> +               ret = _alpm_download(handle, payload, syncpath, NULL, force, 0, 0);
>
>                if(ret == 0 && (check_sig == PM_PGP_VERIFY_ALWAYS ||
>                                        check_sig == PM_PGP_VERIFY_OPTIONAL)) {
> @@ -199,14 +204,17 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
>
>                        int errors_ok = (check_sig == PM_PGP_VERIFY_OPTIONAL);
>                        /* if we downloaded a DB, we want the .sig from the same server */
> -                       snprintf(fileurl, len, "%s/%s.db.sig", server, db->treename);
> +                       snprintf(payload->fileurl, len, "%s/%s.db.sig", server, db->treename);
> +
> +                       /* set hard upper limited of 16KiB */
"limit"
> +                       payload->max_size = 16 * 1024;
>
> -                       sig_ret = _alpm_download(handle, fileurl, syncpath, NULL, 1, 0, errors_ok);
> +                       sig_ret = _alpm_download(handle, payload, syncpath, NULL, 1, 0, errors_ok);
>                        /* errors_ok suppresses error messages, but not the return code */
>                        sig_ret = errors_ok ? 0 : sig_ret;
>                }
>
> -               FREE(fileurl);
> +               _alpm_dload_payload_free(payload);
If you do stack-based, this will have to change to not free &payload
itself, but would still be useful as a helper to free all the attached
strings.

>                if(ret != -1 && sig_ret != -1) {
>                        break;
>                }
> diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> index eefa285..1758744 100644
> --- a/lib/libalpm/dload.c
> +++ b/lib/libalpm/dload.c
> @@ -179,7 +179,7 @@ static size_t parse_headers(void *ptr, size_t size, size_t nmemb, void *user)
>        return realsize;
>  }
>
> -static int curl_download_internal(alpm_handle_t *handle, const char *url,
> +static int curl_download_internal(alpm_handle_t *handle, struct dload_payload *payload,
>                const char *localpath, char **final_file, int force, int allow_resume,
>                int errors_ok)
>  {
> @@ -199,10 +199,10 @@ static int curl_download_internal(alpm_handle_t *handle, const char *url,
>
>        dlfile.handle = handle;
>        dlfile.initial_size = 0.0;
> -       dlfile.filename = get_filename(url);
> +       dlfile.filename = get_filename(payload->fileurl);
>        dlfile.cd_filename = NULL;
> -       if(!dlfile.filename || curl_gethost(url, hostname) != 0) {
> -               _alpm_log(handle, PM_LOG_ERROR, _("url '%s' is invalid\n"), url);
> +       if(!dlfile.filename || curl_gethost(payload->fileurl, hostname) != 0) {
> +               _alpm_log(handle, PM_LOG_ERROR, _("url '%s' is invalid\n"), payload->fileurl);
>                RET_ERR(handle, PM_ERR_SERVER_BAD_URL, -1);
>        }
>
> @@ -240,7 +240,7 @@ static int curl_download_internal(alpm_handle_t *handle, const char *url,
>        /* the curl_easy handle is initialized with the alpm handle, so we only need
>         * to reset the curl handle set parameters for each time it's used. */
>        curl_easy_reset(handle->curl);
> -       curl_easy_setopt(handle->curl, CURLOPT_URL, url);
> +       curl_easy_setopt(handle->curl, CURLOPT_URL, payload->fileurl);
>        curl_easy_setopt(handle->curl, CURLOPT_FAILONERROR, 1L);
>        curl_easy_setopt(handle->curl, CURLOPT_ERRORBUFFER, error_buffer);
>        curl_easy_setopt(handle->curl, CURLOPT_CONNECTTIMEOUT, 10L);
> @@ -254,6 +254,10 @@ static int curl_download_internal(alpm_handle_t *handle, const char *url,
>        curl_easy_setopt(handle->curl, CURLOPT_HEADERFUNCTION, parse_headers);
>        curl_easy_setopt(handle->curl, CURLOPT_WRITEHEADER, &dlfile);
>
> +       if(payload->max_size) {
> +               curl_easy_setopt(handle->curl, CURLOPT_MAXFILESIZE, payload->max_size);

So I hate to rain on the parade here, but:
    CURLOPT_MAXFILESIZE
    Pass a long as parameter. This allows you to specify the maximum
size (in bytes) of a file to download. If the file requested is larger
than this value, the transfer will not start and
CURLE_FILESIZE_EXCEEDED will be returned.
    The file size is not always known prior to download, and for such
files this option has no effect even if the file transfer ends up
being larger than this given limit. This concerns both FTP and HTTP
transfers.

Which seems to indicate it is rather easy to bypass said restrictions
if you simply put up a server that forgets the Content-Length header.
Do we need to do something in the progress callback as well to check
up on how much we've downloaded?

> +       }
> +
>        useragent = getenv("HTTP_USER_AGENT");
>        if(useragent != NULL) {
>                curl_easy_setopt(handle->curl, CURLOPT_USERAGENT, useragent);
> @@ -409,18 +413,19 @@ cleanup:
>  * @param errors_ok do not log errors (but still return them)
>  * @return 0 on success, -1 on error (pm_errno is set accordingly if errors_ok == 0)
>  */
> -int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath,
> -               char **final_file, int force, int allow_resume, int errors_ok)
> +int _alpm_download(alpm_handle_t *handle, struct dload_payload *payload,
> +               const char *localpath, char **final_file, int force, int allow_resume,
> +               int errors_ok)
>  {
>        if(handle->fetchcb == NULL) {
>  #ifdef HAVE_LIBCURL
> -               return curl_download_internal(handle, url, localpath, final_file, force,
> +               return curl_download_internal(handle, payload, localpath, final_file, force,
>                                allow_resume, errors_ok);
>  #else
>                RET_ERR(handle, PM_ERR_EXTERNAL_DOWNLOAD, -1);
>  #endif
>        } else {
> -               int ret = handle->fetchcb(url, localpath, force);
> +               int ret = handle->fetchcb(payload->fileurl, localpath, force);
>                if(ret == -1 && !errors_ok) {
>                        RET_ERR(handle, PM_ERR_EXTERNAL_DOWNLOAD, -1);
>                }
> @@ -434,6 +439,7 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
>        char *filepath;
>        const char *cachedir;
>        char *final_file = NULL;
> +       struct dload_payload *payload;
>        int ret;
>
>        CHECK_HANDLE(handle, return NULL);
> @@ -441,8 +447,11 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
>        /* find a valid cache dir to download to */
>        cachedir = _alpm_filecache_setup(handle);
>
> +       CALLOC(payload, 1, sizeof(*payload), RET_ERR(handle, PM_ERR_MEMORY, NULL));
> +       payload->filename = strdup(url);
> +
>        /* download the file */
> -       ret = _alpm_download(handle, url, cachedir, &final_file, 0, 1, 0);
> +       ret = _alpm_download(handle, payload, cachedir, &final_file, 0, 1, 0);
>        if(ret == -1) {
>                _alpm_log(handle, PM_LOG_WARNING, _("failed to download %s\n"), url);
>                return NULL;
> @@ -452,30 +461,42 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
>        /* attempt to download the signature */
>        if(ret == 0 && (handle->sigverify == PM_PGP_VERIFY_ALWAYS ||
>                                handle->sigverify == PM_PGP_VERIFY_OPTIONAL)) {
> -               char *sig_url;
>                size_t len;
>                int errors_ok = (handle->sigverify == PM_PGP_VERIFY_OPTIONAL);
> +               struct dload_payload *spayload;
Is the load getting spayed? sig_payload would be a bit more readable.
It also looks like you never set the 16 KiB limit here.

>
> +               CALLOC(spayload, 1, sizeof(*spayload), RET_ERR(handle, PM_ERR_MEMORY, NULL));
>                len = strlen(url) + 5;
> -               CALLOC(sig_url, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, NULL));
> -               snprintf(sig_url, len, "%s.sig", url);
> +               CALLOC(spayload->fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, NULL));
> +               snprintf(spayload->fileurl, len, "%s.sig", url);
>
> -               ret = _alpm_download(handle, sig_url, cachedir, &final_file, 1, 0, errors_ok);
> +               ret = _alpm_download(handle, spayload, cachedir, &final_file, 1, 0, errors_ok);
>                if(ret == -1 && !errors_ok) {
> -                       _alpm_log(handle, PM_LOG_WARNING, _("failed to download %s\n"), sig_url);
> +                       _alpm_log(handle, PM_LOG_WARNING, _("failed to download %s\n"), spayload->fileurl);
>                        /* Warn now, but don't return NULL. We will fail later during package
>                         * load time. */
>                } else if(ret == 0) {
> -                       _alpm_log(handle, PM_LOG_DEBUG, "successfully downloaded %s\n", sig_url);
> +                       _alpm_log(handle, PM_LOG_DEBUG, "successfully downloaded %s\n", spayload->fileurl);
>                }
> -               FREE(sig_url);
> +               _alpm_dload_payload_free(spayload);
>        }
>
>        /* we should be able to find the file the second time around */
>        filepath = _alpm_filecache_find(handle, final_file);
>        FREE(final_file);
> +       _alpm_dload_payload_free(payload);
>
>        return filepath;
>  }
>
> +void _alpm_dload_payload_free(void *payload) {
> +       struct dload_payload *load = (struct dload_payload*)payload;
> +
> +       ASSERT(load, return);
> +
> +       FREE(load->filename);
> +       FREE(load->fileurl);
> +       FREE(load);
> +}
> +
>  /* vim: set ts=2 sw=2 noet: */
> diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h
> index 351b2b3..8aaed0c 100644
> --- a/lib/libalpm/dload.h
> +++ b/lib/libalpm/dload.h
> @@ -33,8 +33,17 @@ struct fileinfo {
>        double initial_size;
>  };
>
> -int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath,
> -               char **final_file, int force, int allow_resume, int errors_ok);
> +struct dload_payload {
> +       char *filename;
> +       char *fileurl;
> +       long max_size;
> +};
> +
> +void _alpm_dload_payload_free(void *payload);
> +
> +int _alpm_download(alpm_handle_t *handle, struct dload_payload *payload,
> +               const char *localpath, char **final_file, int force, int allow_resume,
> +               int errors_ok);
>
>  #endif /* _ALPM_DLOAD_H */
>
> diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c
> index 8df8a17..4277283 100644
> --- a/lib/libalpm/sync.c
> +++ b/lib/libalpm/sync.c
> @@ -757,10 +757,12 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas)
>                        alpm_pkg_t *spkg = j->data;
>
>                        if(spkg->origin != PKG_FROM_FILE && current == spkg->origin_data.db) {
> -                               const char *fname = NULL;
> +                               struct dload_payload *payload;
> +
> +                               CALLOC(payload, 1, sizeof(*payload), RET_ERR(handle, PM_ERR_MEMORY, -1));
> +                               STRDUP(payload->filename, spkg->filename, RET_ERR(handle, PM_ERR_MEMORY, -1));
> +                               payload->max_size = spkg->download_size;
This is actually wrong. Take a look at where and when we set this
field (compute_download_size). You never want a possible delta chain
total size, so you really want to use alpm_pkg_get_size() in all cases
here.

Sidenote: I also have a mental TODO in the future of allowing
compute_download_size() to take partial downloads into account, so if
you say had half of libreoffice downloaded, it would report the proper
size at the prompt.

>
> -                               fname = alpm_pkg_get_filename(spkg);
> -                               ASSERT(fname != NULL, RET_ERR(handle, PM_ERR_PKG_INVALID_NAME, -1));
>                                alpm_list_t *delta_path = spkg->delta_path;
>                                if(delta_path) {
>                                        /* using deltas */
> @@ -768,14 +770,20 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas)
>                                        for(dlts = delta_path; dlts; dlts = dlts->next) {
>                                                alpm_delta_t *delta = dlts->data;
>                                                if(delta->download_size != 0) {
> -                                                       files = alpm_list_add(files, strdup(delta->delta));
> +                                                       struct dload_payload *dpayload;
> +
> +                                                       CALLOC(dpayload, 1, sizeof(*payload), RET_ERR(handle, PM_ERR_MEMORY, -1));
> +                                                       STRDUP(dpayload->filename, delta->delta, RET_ERR(handle, PM_ERR_MEMORY, -1));
> +                                                       dpayload->max_size = delta->download_size;
> +
> +                                                       files = alpm_list_add(files, dpayload);
>                                                }
>                                                /* keep a list of all the delta files for md5sums */
>                                                *deltas = alpm_list_add(*deltas, delta);
>                                        }
>
>                                } else if(spkg->download_size != 0) {
> -                                       files = alpm_list_add(files, strdup(fname));
> +                                       files = alpm_list_add(files, payload);
>                                }
>
>                        }
> @@ -784,7 +792,7 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas)
>                if(files) {
>                        EVENT(handle->trans, PM_TRANS_EVT_RETRIEVE_START, current->treename, NULL);
>                        for(j = files; j; j = j->next) {
> -                               const char *filename = j->data;
> +                               struct dload_payload *payload = j->data;
>                                alpm_list_t *server;
>                                int ret = -1;
>                                for(server = current->servers; server; server = server->next) {
> @@ -793,11 +801,11 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas)
>                                        size_t len;
>
>                                        /* print server + filename into a buffer */
> -                                       len = strlen(server_url) + strlen(filename) + 2;
> -                                       CALLOC(fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1));
> -                                       snprintf(fileurl, len, "%s/%s", server_url, filename);
> +                                       len = strlen(server_url) + strlen(payload->filename) + 2;
> +                                       CALLOC(payload->fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1));
> +                                       snprintf(payload->fileurl, len, "%s/%s", server_url, payload->filename);
>
> -                                       ret = _alpm_download(handle, fileurl, cachedir, NULL, 0, 1, 0);
> +                                       ret = _alpm_download(handle, payload, cachedir, NULL, 0, 1, 0);
>                                        FREE(fileurl);
>                                        if(ret != -1) {
>                                                break;
> @@ -809,6 +817,8 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas)
>                        }
>
>                        FREELIST(files);
> +                       alpm_list_free_inner(files, _alpm_dload_payload_free);
This is going to segfault, you left the FREELIST() call in; you want
to remove that completely.
> +                       alpm_list_free(files);
>                        if(errors) {
>                                _alpm_log(handle, PM_LOG_WARNING, _("failed to retrieve some files from %s\n"),
>                                                current->treename);
> --
> 1.7.6


More information about the pacman-dev mailing list