[pacman-dev] Do not clobber db if new db is invalid, close a couple "TODO" leaks
Patch 1 is an attempt to close #46107 [1]. In lib/libalpm/be_sync.c, pacman 1) backs up the old db, 2) procedes as normal, 3) if new db is valid, deletes backup; if not, restores backup. While working on patch 1, I noticed a bunch of TODOs that mentioned the syncpath not being freed or umask reset if pacman ran out of memory for a MALLOC. Patch 2 fixes them. [1]: https://bugs.archlinux.org/task/46107?project=3 As always, comments welcome. Enjoy! (-: Ivy
From: Ivy Foster <ivy.foster@gmail.com> Closes #46107 Signed-off-by: Ivy Foster <ivy.foster@gmail.com> --- lib/libalpm/alpm.h | 1 + lib/libalpm/be_sync.c | 26 +++++++++++++++++++++++++- lib/libalpm/error.c | 2 ++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index 168d71b..0dd68a7 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h @@ -75,6 +75,7 @@ typedef enum _alpm_errno_t { ALPM_ERR_DB_VERSION, ALPM_ERR_DB_WRITE, ALPM_ERR_DB_REMOVE, + ALPM_ERR_DB_BACKUP, /* Servers */ ALPM_ERR_SERVER_BAD_URL, ALPM_ERR_SERVER_NONE, diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 32a669d..ee438f8 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -182,6 +182,9 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) mode_t oldmask; alpm_handle_t *handle; alpm_siglevel_t level; + char *newdb; + char *olddb; + size_t len; /* Sanity checks */ ASSERT(db != NULL, return -1); @@ -218,10 +221,23 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) dbext = db->handle->dbext; + len = strlen(syncpath) + strlen(db->treename) + strlen(dbext) + 2; + /* TODO fix leak syncpath and umask unset */ + MALLOC(newdb, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); + snprintf(newdb, len, "%s/%s%s", syncpath, db->treename, dbext); + len += 4; + /* TODO fix leak syncpath and umask unset */ + MALLOC(olddb, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); + snprintf(olddb, len, "%s.bak", newdb); + if (rename(newdb, olddb) == -1) { + ret = -1; + handle->pm_errno = ALPM_ERR_DB_BACKUP; + goto cleanup; + } + for(i = db->servers; i; i = i->next) { const char *server = i->data, *final_db_url = NULL; struct dload_payload payload; - size_t len; int sig_ret = 0; memset(&payload, 0, sizeof(struct dload_payload)); @@ -315,15 +331,23 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) } } +cleanup: if(ret == -1) { /* pm_errno was set by the download code */ _alpm_log(handle, ALPM_LOG_DEBUG, "failed to sync db: %s\n", alpm_strerror(handle->pm_errno)); + if (handle->pm_errno != ALPM_ERR_DB_BACKUP && rename(olddb, newdb) == -1) { + _alpm_log(handle, ALPM_LOG_DEBUG, "failed to replace original db: %s\n", + alpm_strerror(ALPM_ERR_DB_BACKUP)); + } } else { + unlink(olddb); handle->pm_errno = 0; } _alpm_handle_unlock(handle); + free(newdb); + free(olddb); free(syncpath); umask(oldmask); return ret; diff --git a/lib/libalpm/error.c b/lib/libalpm/error.c index 2d6d071..e707d43 100644 --- a/lib/libalpm/error.c +++ b/lib/libalpm/error.c @@ -78,6 +78,8 @@ const char SYMEXPORT *alpm_strerror(alpm_errno_t err) return _("could not update database"); case ALPM_ERR_DB_REMOVE: return _("could not remove database entry"); + case ALPM_ERR_DB_BACKUP: + return _("could not back up old database"); /* Servers */ case ALPM_ERR_SERVER_BAD_URL: return _("invalid url for server"); -- 2.9.3
On 09/07/16 at 07:22pm, ivy.foster@gmail.com wrote:
From: Ivy Foster <ivy.foster@gmail.com>
Closes #46107
Signed-off-by: Ivy Foster <ivy.foster@gmail.com> --- lib/libalpm/alpm.h | 1 + lib/libalpm/be_sync.c | 26 +++++++++++++++++++++++++- lib/libalpm/error.c | 2 ++ 3 files changed, 28 insertions(+), 1 deletion(-)
This is a step in the right direction, but the problem of downloading an invalid db over a valid one still exists. The errors given in the bug report are not actually from the invalid db, they're from invalid signature files. If we download a junk db but no signature file, the valid db would still be overwritten by the invalid one.
diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index 168d71b..0dd68a7 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h @@ -75,6 +75,7 @@ typedef enum _alpm_errno_t { ALPM_ERR_DB_VERSION, ALPM_ERR_DB_WRITE, ALPM_ERR_DB_REMOVE, + ALPM_ERR_DB_BACKUP, /* Servers */ ALPM_ERR_SERVER_BAD_URL, ALPM_ERR_SERVER_NONE, diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 32a669d..ee438f8 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -182,6 +182,9 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) mode_t oldmask; alpm_handle_t *handle; alpm_siglevel_t level; + char *newdb; + char *olddb; + size_t len;
/* Sanity checks */ ASSERT(db != NULL, return -1); @@ -218,10 +221,23 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
dbext = db->handle->dbext;
+ len = strlen(syncpath) + strlen(db->treename) + strlen(dbext) + 2; + /* TODO fix leak syncpath and umask unset */ + MALLOC(newdb, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); + snprintf(newdb, len, "%s/%s%s", syncpath, db->treename, dbext); + len += 4; + /* TODO fix leak syncpath and umask unset */ + MALLOC(olddb, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); + snprintf(olddb, len, "%s.bak", newdb);
This runs the risk of conflicting with another db if dbext is "" or ".bak", for example, if I have repos core and core.bak, syncing core would clobber the core.bak db.
+ if (rename(newdb, olddb) == -1) { + ret = -1; + handle->pm_errno = ALPM_ERR_DB_BACKUP; + goto cleanup; + }
This is backwards. Instead of moving the good db out of the way and hoping we can move it back if something goes wrong, we should download the new database to a temp file then move it into place if it's good.
for(i = db->servers; i; i = i->next) { const char *server = i->data, *final_db_url = NULL; struct dload_payload payload; - size_t len; int sig_ret = 0;
memset(&payload, 0, sizeof(struct dload_payload)); @@ -315,15 +331,23 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) } }
+cleanup: if(ret == -1) { /* pm_errno was set by the download code */ _alpm_log(handle, ALPM_LOG_DEBUG, "failed to sync db: %s\n", alpm_strerror(handle->pm_errno)); + if (handle->pm_errno != ALPM_ERR_DB_BACKUP && rename(olddb, newdb) == -1) { + _alpm_log(handle, ALPM_LOG_DEBUG, "failed to replace original db: %s\n", + alpm_strerror(ALPM_ERR_DB_BACKUP)); + } } else { + unlink(olddb); handle->pm_errno = 0; }
_alpm_handle_unlock(handle); + free(newdb); + free(olddb); free(syncpath); umask(oldmask); return ret; diff --git a/lib/libalpm/error.c b/lib/libalpm/error.c index 2d6d071..e707d43 100644 --- a/lib/libalpm/error.c +++ b/lib/libalpm/error.c @@ -78,6 +78,8 @@ const char SYMEXPORT *alpm_strerror(alpm_errno_t err) return _("could not update database"); case ALPM_ERR_DB_REMOVE: return _("could not remove database entry"); + case ALPM_ERR_DB_BACKUP: + return _("could not back up old database"); /* Servers */ case ALPM_ERR_SERVER_BAD_URL: return _("invalid url for server"); -- 2.9.3
On 07 Sep 2016, at 10:05 pm -0400, Andrew Gregory wrote:
On 09/07/16 at 07:22pm, ivy.foster@gmail.com wrote:
From: Ivy Foster <ivy.foster@gmail.com>
This is a step in the right direction, but the problem of downloading an invalid db over a valid one still exists. The errors given in the bug report are not actually from the invalid db, they're from invalid signature files. If we download a junk db but no signature file, the valid db would still be overwritten by the invalid one.
Ah, my mistake. I should've actually looked at sync_db_validate; I just assumed that it handled validation in general, not just signatures. Also, unfortunately, I forgot about .files dbs.
/* Sanity checks */ ASSERT(db != NULL, return -1); @@ -218,10 +221,23 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
dbext = db->handle->dbext;
+ len = strlen(syncpath) + strlen(db->treename) + strlen(dbext) + 2; + /* TODO fix leak syncpath and umask unset */ + MALLOC(newdb, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); + snprintf(newdb, len, "%s/%s%s", syncpath, db->treename, dbext); + len += 4; + /* TODO fix leak syncpath and umask unset */ + MALLOC(olddb, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); + snprintf(olddb, len, "%s.bak", newdb);
This runs the risk of conflicting with another db if dbext is "" or ".bak", for example, if I have repos core and core.bak, syncing core would clobber the core.bak db.
Fair enough. I'll keep that in mind when addressing the note below.
+ if (rename(newdb, olddb) == -1) { + ret = -1; + handle->pm_errno = ALPM_ERR_DB_BACKUP; + goto cleanup; + }
This is backwards. Instead of moving the good db out of the way and hoping we can move it back if something goes wrong, we should download the new database to a temp file then move it into place if it's good.
Okay. I'll change that around tomorrow. Given the note before this one, perhaps the thing to do is to download the new db to a tmpfile named by mkstemp. Thanks, Ivy
On 09/07/16 at 10:28pm, Ivy Foster wrote:
On 07 Sep 2016, at 10:05 pm -0400, Andrew Gregory wrote:
On 09/07/16 at 07:22pm, ivy.foster@gmail.com wrote:
From: Ivy Foster <ivy.foster@gmail.com>
This is a step in the right direction, but the problem of downloading an invalid db over a valid one still exists. The errors given in the bug report are not actually from the invalid db, they're from invalid signature files. If we download a junk db but no signature file, the valid db would still be overwritten by the invalid one.
Ah, my mistake. I should've actually looked at sync_db_validate; I just assumed that it handled validation in general, not just signatures. Also, unfortunately, I forgot about .files dbs.
/* Sanity checks */ ASSERT(db != NULL, return -1); @@ -218,10 +221,23 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
dbext = db->handle->dbext;
+ len = strlen(syncpath) + strlen(db->treename) + strlen(dbext) + 2; + /* TODO fix leak syncpath and umask unset */ + MALLOC(newdb, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); + snprintf(newdb, len, "%s/%s%s", syncpath, db->treename, dbext); + len += 4; + /* TODO fix leak syncpath and umask unset */ + MALLOC(olddb, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); + snprintf(olddb, len, "%s.bak", newdb);
This runs the risk of conflicting with another db if dbext is "" or ".bak", for example, if I have repos core and core.bak, syncing core would clobber the core.bak db.
Fair enough. I'll keep that in mind when addressing the note below.
+ if (rename(newdb, olddb) == -1) { + ret = -1; + handle->pm_errno = ALPM_ERR_DB_BACKUP; + goto cleanup; + }
This is backwards. Instead of moving the good db out of the way and hoping we can move it back if something goes wrong, we should download the new database to a temp file then move it into place if it's good.
Okay. I'll change that around tomorrow. Given the note before this one, perhaps the thing to do is to download the new db to a tmpfile named by mkstemp.
As long as we're downloading directly into the db directory, ensuring we don't conflict with another db is impossible because, now that the extension is configurable, there are no limits on db file names. Even a tmpfile could conflict if the conflicting db file hasn't been downloaded yet. I see two ways around this: place restrictions on db names/extensions or download into a separate directory and move/symlink them over once validated. Using a separate directory with symlinks is probably a bit more fault tolerant because both the old and new versions of the db and its sig could coexist the entire time, but I doubt the small gain is worth the complexity. I am inclined to say that we should just reject database names beginning with '_' and use that as a prefix for any temporary files. That should be sufficient to prevent any conflicts and is trivial to check in register_syncdb.
On 08 Sep 2016, at 12:55 pm -0400, Andrew Gregory wrote:
On 09/07/16 at 10:28pm, Ivy Foster wrote:
On 07 Sep 2016, at 10:05 pm -0400, Andrew Gregory wrote:
This runs the risk of conflicting with another db if dbext is "" or ".bak", for example, if I have repos core and core.bak, syncing core would clobber the core.bak db.
Fair enough. I'll keep that in mind when addressing the note below.
+ if (rename(newdb, olddb) == -1) { + ret = -1; + handle->pm_errno = ALPM_ERR_DB_BACKUP; + goto cleanup; + }
This is backwards. Instead of moving the good db out of the way and hoping we can move it back if something goes wrong, we should download the new database to a temp file then move it into place if it's good.
Okay. I'll change that around tomorrow. Given the note before this one, perhaps the thing to do is to download the new db to a tmpfile named by mkstemp.
As long as we're downloading directly into the db directory, ensuring we don't conflict with another db is impossible because, now that the extension is configurable, there are no limits on db file names. Even a tmpfile could conflict if the conflicting db file hasn't been downloaded yet. I see two ways around this: place restrictions on db names/extensions or download into a separate directory and move/symlink them over once validated.
Using a separate directory with symlinks is probably a bit more fault tolerant because both the old and new versions of the db and its sig could coexist the entire time, but I doubt the small gain is worth the complexity.
I am inclined to say that we should just reject database names beginning with '_' and use that as a prefix for any temporary files. That should be sufficient to prevent any conflicts and is trivial to check in register_syncdb.
I mean, the names returned by mkstemp are probably sufficient protection. You provide a prefix and it provides six random characters to follow. Personally, I'm willing to bet that nobody's using the dbext ".db.tmp.XXXXXX", where each X is a random character...unless they're deliberately *trying* to collide. Either way, I'll have a look at it later this afternoon. iff
From: Ivy Foster <ivy.foster@gmail.com> Signed-off-by: Ivy Foster <ivy.foster@gmail.com> --- lib/libalpm/be_sync.c | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index ee438f8..f61668e 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -222,12 +222,24 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) dbext = db->handle->dbext; len = strlen(syncpath) + strlen(db->treename) + strlen(dbext) + 2; - /* TODO fix leak syncpath and umask unset */ - MALLOC(newdb, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); + MALLOC(newdb, len, + { + free(syncpath); + umask(oldmask); + RET_ERR(handle, ALPM_ERR_MEMORY, -1); + } + ); snprintf(newdb, len, "%s/%s%s", syncpath, db->treename, dbext); + len += 4; - /* TODO fix leak syncpath and umask unset */ - MALLOC(olddb, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); + MALLOC(olddb, len, + { + free(syncpath); + free(newdb); + umask(oldmask); + RET_ERR(handle, ALPM_ERR_MEMORY, -1); + } + ); snprintf(olddb, len, "%s.bak", newdb); if (rename(newdb, olddb) == -1) { ret = -1; @@ -247,8 +259,15 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) /* print server + filename into a buffer */ len = strlen(server) + strlen(db->treename) + strlen(dbext) + 2; - /* TODO fix leak syncpath and umask unset */ - MALLOC(payload.fileurl, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); + MALLOC(payload.fileurl, len, + { + free(newdb); + free(olddb); + free(syncpath); + umask(oldmask); + RET_ERR(handle, ALPM_ERR_MEMORY, -1); + } + ); snprintf(payload.fileurl, len, "%s/%s%s", server, db->treename, dbext); payload.handle = handle; payload.force = force; @@ -287,8 +306,15 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) len = strlen(server) + strlen(db->treename) + strlen(dbext) + 6; } - /* TODO fix leak syncpath and umask unset */ - MALLOC(payload.fileurl, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1)); + MALLOC(payload.fileurl, len, + { + free(newdb); + free(olddb); + free(syncpath); + umask(oldmask); + RET_ERR(handle, ALPM_ERR_MEMORY, -1); + } + ); if(final_db_url != NULL) { snprintf(payload.fileurl, len, "%s.sig", final_db_url); -- 2.9.3
participants (3)
-
Andrew Gregory
-
Ivy Foster
-
ivy.foster@gmail.com