[pacman-dev] [PATCH 2/4] lib/dload: prevent large file attacks

Dave Reisner d at falconindy.com
Sun Jul 3 19:20:48 EDT 2011


On Sun, Jul 03, 2011 at 02:13:30PM -0500, Dan McGee wrote:
> On Fri, Jul 1, 2011 at 7:59 AM, Dave Reisner <d at falconindy.com> wrote:
> > From: Dave Reisner <d at falconindy.com>
> >
> > This means creating a new struct which can pass more descriptive data
> > from the back end sync functions to the downloader. In particular, we're
> > interested in the download size read from the sync DB. When the remote
> > server reports a size larger than this (via a content-length header),
> > abort the transfer.
> >
> > In cases where the size is unknown, we set a hard upper limit of:
> >
> > * 25MiB for a sync DB
> > * 16KiB for a signature
> >
> > For reference, 25mb is more than twice the size of all of the current
> > binary repos (with files) combined, and 16k is a truly gargantuan
> > signature.
> MiB/KiB here to not confuse people, please. (Or MB/KB, but definitely
> not lowercase). Same with any usages in the code comments.
> 

Fixed.

> >
> > Signed-off-by: Dave Reisner <dreisner at archlinux.org>
> > ---
> >  lib/libalpm/be_sync.c |   22 +++++++++++++------
> >  lib/libalpm/dload.c   |   55 +++++++++++++++++++++++++++++++++---------------
> >  lib/libalpm/dload.h   |   13 +++++++++-
> >  lib/libalpm/sync.c    |   30 +++++++++++++++++---------
> >  4 files changed, 84 insertions(+), 36 deletions(-)
> >
> > diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c
> > index 16351f8..65420cf 100644
> > --- a/lib/libalpm/be_sync.c
> > +++ b/lib/libalpm/be_sync.c
> > @@ -175,16 +175,21 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
> >
> >        for(i = db->servers; i; i = i->next) {
> >                const char *server = i->data;
> > -               char *fileurl;
> > +               struct dload_payload *payload;
> >                size_t len;
> >                int sig_ret = 0;
> >
> > +               CALLOC(payload, 1, sizeof(*payload), RET_ERR(handle, PM_ERR_MEMORY, -1));
> Any reason not to just allocate it on the stack and not use a pointer?
> You will want to memset(&payload, 0, sizeof(payload); then.
> 
> Edit: after reading the rest of the patch, we need heap allocated
> payloads elsewhere, so you can probably ignore me here.
> 

Yep, I considered this, but opted for heap allocations throughout since
it wasn't always possible to stack allocate.

> > +
> > +               /* set hard upper limit of 25MiB */
> > +               payload->max_size = 25 * 1024 * 1024;
> > +
> >                /* print server + filename into a buffer (leave space for .sig) */
> >                len = strlen(server) + strlen(db->treename) + 9;
> > -               CALLOC(fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1));
> > -               snprintf(fileurl, len, "%s/%s.db", server, db->treename);
> > +               CALLOC(payload->fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1));
> > +               snprintf(payload->fileurl, len, "%s/%s.db", server, db->treename);
> >
> > -               ret = _alpm_download(handle, fileurl, syncpath, NULL, force, 0, 0);
> > +               ret = _alpm_download(handle, payload, syncpath, NULL, force, 0, 0);
> >
> >                if(ret == 0 && (check_sig == PM_PGP_VERIFY_ALWAYS ||
> >                                        check_sig == PM_PGP_VERIFY_OPTIONAL)) {
> > @@ -199,14 +204,17 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
> >
> >                        int errors_ok = (check_sig == PM_PGP_VERIFY_OPTIONAL);
> >                        /* if we downloaded a DB, we want the .sig from the same server */
> > -                       snprintf(fileurl, len, "%s/%s.db.sig", server, db->treename);
> > +                       snprintf(payload->fileurl, len, "%s/%s.db.sig", server, db->treename);
> > +
> > +                       /* set hard upper limited of 16KiB */
> "limit"

Fixed.

> > +                       payload->max_size = 16 * 1024;
> >
> > -                       sig_ret = _alpm_download(handle, fileurl, syncpath, NULL, 1, 0, errors_ok);
> > +                       sig_ret = _alpm_download(handle, payload, syncpath, NULL, 1, 0, errors_ok);
> >                        /* errors_ok suppresses error messages, but not the return code */
> >                        sig_ret = errors_ok ? 0 : sig_ret;
> >                }
> >
> > -               FREE(fileurl);
> > +               _alpm_dload_payload_free(payload);
> If you do stack-based, this will have to change to not free &payload
> itself, but would still be useful as a helper to free all the attached
> strings.
> 

Ignoring this.

> >                if(ret != -1 && sig_ret != -1) {
> >                        break;
> >                }
> > diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> > index eefa285..1758744 100644
> > --- a/lib/libalpm/dload.c
> > +++ b/lib/libalpm/dload.c
> > @@ -179,7 +179,7 @@ static size_t parse_headers(void *ptr, size_t size, size_t nmemb, void *user)
> >        return realsize;
> >  }
> >
> > -static int curl_download_internal(alpm_handle_t *handle, const char *url,
> > +static int curl_download_internal(alpm_handle_t *handle, struct dload_payload *payload,
> >                const char *localpath, char **final_file, int force, int allow_resume,
> >                int errors_ok)
> >  {
> > @@ -199,10 +199,10 @@ static int curl_download_internal(alpm_handle_t *handle, const char *url,
> >
> >        dlfile.handle = handle;
> >        dlfile.initial_size = 0.0;
> > -       dlfile.filename = get_filename(url);
> > +       dlfile.filename = get_filename(payload->fileurl);
> >        dlfile.cd_filename = NULL;
> > -       if(!dlfile.filename || curl_gethost(url, hostname) != 0) {
> > -               _alpm_log(handle, PM_LOG_ERROR, _("url '%s' is invalid\n"), url);
> > +       if(!dlfile.filename || curl_gethost(payload->fileurl, hostname) != 0) {
> > +               _alpm_log(handle, PM_LOG_ERROR, _("url '%s' is invalid\n"), payload->fileurl);
> >                RET_ERR(handle, PM_ERR_SERVER_BAD_URL, -1);
> >        }
> >
> > @@ -240,7 +240,7 @@ static int curl_download_internal(alpm_handle_t *handle, const char *url,
> >        /* the curl_easy handle is initialized with the alpm handle, so we only need
> >         * to reset the curl handle set parameters for each time it's used. */
> >        curl_easy_reset(handle->curl);
> > -       curl_easy_setopt(handle->curl, CURLOPT_URL, url);
> > +       curl_easy_setopt(handle->curl, CURLOPT_URL, payload->fileurl);
> >        curl_easy_setopt(handle->curl, CURLOPT_FAILONERROR, 1L);
> >        curl_easy_setopt(handle->curl, CURLOPT_ERRORBUFFER, error_buffer);
> >        curl_easy_setopt(handle->curl, CURLOPT_CONNECTTIMEOUT, 10L);
> > @@ -254,6 +254,10 @@ static int curl_download_internal(alpm_handle_t *handle, const char *url,
> >        curl_easy_setopt(handle->curl, CURLOPT_HEADERFUNCTION, parse_headers);
> >        curl_easy_setopt(handle->curl, CURLOPT_WRITEHEADER, &dlfile);
> >
> > +       if(payload->max_size) {
> > +               curl_easy_setopt(handle->curl, CURLOPT_MAXFILESIZE, payload->max_size);
> 
> So I hate to rain on the parade here, but:
>     CURLOPT_MAXFILESIZE
>     Pass a long as parameter. This allows you to specify the maximum
> size (in bytes) of a file to download. If the file requested is larger
> than this value, the transfer will not start and
> CURLE_FILESIZE_EXCEEDED will be returned.
>     The file size is not always known prior to download, and for such
> files this option has no effect even if the file transfer ends up
> being larger than this given limit. This concerns both FTP and HTTP
> transfers.
> 
> Which seems to indicate it is rather easy to bypass said restrictions
> if you simply put up a server that forgets the Content-Length header.
> Do we need to do something in the progress callback as well to check
> up on how much we've downloaded?
> 

In theory, yeah -- we should probably add this. That said, I did a
survey of all our current known mirrors, and failed to find any which
did _not_ report a content-length header.

> > +       }
> > +
> >        useragent = getenv("HTTP_USER_AGENT");
> >        if(useragent != NULL) {
> >                curl_easy_setopt(handle->curl, CURLOPT_USERAGENT, useragent);
> > @@ -409,18 +413,19 @@ cleanup:
> >  * @param errors_ok do not log errors (but still return them)
> >  * @return 0 on success, -1 on error (pm_errno is set accordingly if errors_ok == 0)
> >  */
> > -int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath,
> > -               char **final_file, int force, int allow_resume, int errors_ok)
> > +int _alpm_download(alpm_handle_t *handle, struct dload_payload *payload,
> > +               const char *localpath, char **final_file, int force, int allow_resume,
> > +               int errors_ok)
> >  {
> >        if(handle->fetchcb == NULL) {
> >  #ifdef HAVE_LIBCURL
> > -               return curl_download_internal(handle, url, localpath, final_file, force,
> > +               return curl_download_internal(handle, payload, localpath, final_file, force,
> >                                allow_resume, errors_ok);
> >  #else
> >                RET_ERR(handle, PM_ERR_EXTERNAL_DOWNLOAD, -1);
> >  #endif
> >        } else {
> > -               int ret = handle->fetchcb(url, localpath, force);
> > +               int ret = handle->fetchcb(payload->fileurl, localpath, force);
> >                if(ret == -1 && !errors_ok) {
> >                        RET_ERR(handle, PM_ERR_EXTERNAL_DOWNLOAD, -1);
> >                }
> > @@ -434,6 +439,7 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
> >        char *filepath;
> >        const char *cachedir;
> >        char *final_file = NULL;
> > +       struct dload_payload *payload;
> >        int ret;
> >
> >        CHECK_HANDLE(handle, return NULL);
> > @@ -441,8 +447,11 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
> >        /* find a valid cache dir to download to */
> >        cachedir = _alpm_filecache_setup(handle);
> >
> > +       CALLOC(payload, 1, sizeof(*payload), RET_ERR(handle, PM_ERR_MEMORY, NULL));
> > +       payload->filename = strdup(url);
> > +
> >        /* download the file */
> > -       ret = _alpm_download(handle, url, cachedir, &final_file, 0, 1, 0);
> > +       ret = _alpm_download(handle, payload, cachedir, &final_file, 0, 1, 0);
> >        if(ret == -1) {
> >                _alpm_log(handle, PM_LOG_WARNING, _("failed to download %s\n"), url);
> >                return NULL;
> > @@ -452,30 +461,42 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
> >        /* attempt to download the signature */
> >        if(ret == 0 && (handle->sigverify == PM_PGP_VERIFY_ALWAYS ||
> >                                handle->sigverify == PM_PGP_VERIFY_OPTIONAL)) {
> > -               char *sig_url;
> >                size_t len;
> >                int errors_ok = (handle->sigverify == PM_PGP_VERIFY_OPTIONAL);
> > +               struct dload_payload *spayload;
> Is the load getting spayed? sig_payload would be a bit more readable.
> It also looks like you never set the 16 KiB limit here.
> 

Just trying to make Bob Barker happy.

> >
> > +               CALLOC(spayload, 1, sizeof(*spayload), RET_ERR(handle, PM_ERR_MEMORY, NULL));
> >                len = strlen(url) + 5;
> > -               CALLOC(sig_url, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, NULL));
> > -               snprintf(sig_url, len, "%s.sig", url);
> > +               CALLOC(spayload->fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, NULL));
> > +               snprintf(spayload->fileurl, len, "%s.sig", url);
> >
> > -               ret = _alpm_download(handle, sig_url, cachedir, &final_file, 1, 0, errors_ok);
> > +               ret = _alpm_download(handle, spayload, cachedir, &final_file, 1, 0, errors_ok);
> >                if(ret == -1 && !errors_ok) {
> > -                       _alpm_log(handle, PM_LOG_WARNING, _("failed to download %s\n"), sig_url);
> > +                       _alpm_log(handle, PM_LOG_WARNING, _("failed to download %s\n"), spayload->fileurl);
> >                        /* Warn now, but don't return NULL. We will fail later during package
> >                         * load time. */
> >                } else if(ret == 0) {
> > -                       _alpm_log(handle, PM_LOG_DEBUG, "successfully downloaded %s\n", sig_url);
> > +                       _alpm_log(handle, PM_LOG_DEBUG, "successfully downloaded %s\n", spayload->fileurl);
> >                }
> > -               FREE(sig_url);
> > +               _alpm_dload_payload_free(spayload);
> >        }
> >
> >        /* we should be able to find the file the second time around */
> >        filepath = _alpm_filecache_find(handle, final_file);
> >        FREE(final_file);
> > +       _alpm_dload_payload_free(payload);
> >
> >        return filepath;
> >  }
> >
> > +void _alpm_dload_payload_free(void *payload) {
> > +       struct dload_payload *load = (struct dload_payload*)payload;
> > +
> > +       ASSERT(load, return);
> > +
> > +       FREE(load->filename);
> > +       FREE(load->fileurl);
> > +       FREE(load);
> > +}
> > +
> >  /* vim: set ts=2 sw=2 noet: */
> > diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h
> > index 351b2b3..8aaed0c 100644
> > --- a/lib/libalpm/dload.h
> > +++ b/lib/libalpm/dload.h
> > @@ -33,8 +33,17 @@ struct fileinfo {
> >        double initial_size;
> >  };
> >
> > -int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath,
> > -               char **final_file, int force, int allow_resume, int errors_ok);
> > +struct dload_payload {
> > +       char *filename;
> > +       char *fileurl;
> > +       long max_size;
> > +};
> > +
> > +void _alpm_dload_payload_free(void *payload);
> > +
> > +int _alpm_download(alpm_handle_t *handle, struct dload_payload *payload,
> > +               const char *localpath, char **final_file, int force, int allow_resume,
> > +               int errors_ok);
> >
> >  #endif /* _ALPM_DLOAD_H */
> >
> > diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c
> > index 8df8a17..4277283 100644
> > --- a/lib/libalpm/sync.c
> > +++ b/lib/libalpm/sync.c
> > @@ -757,10 +757,12 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas)
> >                        alpm_pkg_t *spkg = j->data;
> >
> >                        if(spkg->origin != PKG_FROM_FILE && current == spkg->origin_data.db) {
> > -                               const char *fname = NULL;
> > +                               struct dload_payload *payload;
> > +
> > +                               CALLOC(payload, 1, sizeof(*payload), RET_ERR(handle, PM_ERR_MEMORY, -1));
> > +                               STRDUP(payload->filename, spkg->filename, RET_ERR(handle, PM_ERR_MEMORY, -1));
> > +                               payload->max_size = spkg->download_size;
> This is actually wrong. Take a look at where and when we set this
> field (compute_download_size). You never want a possible delta chain
> total size, so you really want to use alpm_pkg_get_size() in all cases
> here.
> 

Fixed. Good to know.

> Sidenote: I also have a mental TODO in the future of allowing
> compute_download_size() to take partial downloads into account, so if
> you say had half of libreoffice downloaded, it would report the proper
> size at the prompt.
> 

Seems reasonable.

> >
> > -                               fname = alpm_pkg_get_filename(spkg);
> > -                               ASSERT(fname != NULL, RET_ERR(handle, PM_ERR_PKG_INVALID_NAME, -1));
> >                                alpm_list_t *delta_path = spkg->delta_path;
> >                                if(delta_path) {
> >                                        /* using deltas */
> > @@ -768,14 +770,20 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas)
> >                                        for(dlts = delta_path; dlts; dlts = dlts->next) {
> >                                                alpm_delta_t *delta = dlts->data;
> >                                                if(delta->download_size != 0) {
> > -                                                       files = alpm_list_add(files, strdup(delta->delta));
> > +                                                       struct dload_payload *dpayload;
> > +
> > +                                                       CALLOC(dpayload, 1, sizeof(*payload), RET_ERR(handle, PM_ERR_MEMORY, -1));
> > +                                                       STRDUP(dpayload->filename, delta->delta, RET_ERR(handle, PM_ERR_MEMORY, -1));
> > +                                                       dpayload->max_size = delta->download_size;
> > +
> > +                                                       files = alpm_list_add(files, dpayload);
> >                                                }
> >                                                /* keep a list of all the delta files for md5sums */
> >                                                *deltas = alpm_list_add(*deltas, delta);
> >                                        }
> >
> >                                } else if(spkg->download_size != 0) {
> > -                                       files = alpm_list_add(files, strdup(fname));
> > +                                       files = alpm_list_add(files, payload);
> >                                }
> >
> >                        }
> > @@ -784,7 +792,7 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas)
> >                if(files) {
> >                        EVENT(handle->trans, PM_TRANS_EVT_RETRIEVE_START, current->treename, NULL);
> >                        for(j = files; j; j = j->next) {
> > -                               const char *filename = j->data;
> > +                               struct dload_payload *payload = j->data;
> >                                alpm_list_t *server;
> >                                int ret = -1;
> >                                for(server = current->servers; server; server = server->next) {
> > @@ -793,11 +801,11 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas)
> >                                        size_t len;
> >
> >                                        /* print server + filename into a buffer */
> > -                                       len = strlen(server_url) + strlen(filename) + 2;
> > -                                       CALLOC(fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1));
> > -                                       snprintf(fileurl, len, "%s/%s", server_url, filename);
> > +                                       len = strlen(server_url) + strlen(payload->filename) + 2;
> > +                                       CALLOC(payload->fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1));
> > +                                       snprintf(payload->fileurl, len, "%s/%s", server_url, payload->filename);
> >
> > -                                       ret = _alpm_download(handle, fileurl, cachedir, NULL, 0, 1, 0);
> > +                                       ret = _alpm_download(handle, payload, cachedir, NULL, 0, 1, 0);
> >                                        FREE(fileurl);
> >                                        if(ret != -1) {
> >                                                break;
> > @@ -809,6 +817,8 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas)
> >                        }
> >
> >                        FREELIST(files);
> > +                       alpm_list_free_inner(files, _alpm_dload_payload_free);
> This is going to segfault, you left the FREELIST() call in; you want
> to remove that completely.

I'm surprised I haven't seen a double free yet because of this.

> > +                       alpm_list_free(files);
> >                        if(errors) {
> >                                _alpm_log(handle, PM_LOG_WARNING, _("failed to retrieve some files from %s\n"),
> >                                                current->treename);
> > --
> > 1.7.6
> 


More information about the pacman-dev mailing list