[pacman-dev] [PATCH] Use file locks (flock) to stop multiple pacman instances from running concurrently. This improves the current model by automatically removing locks held by programs has failed to remove the lock, for example at a crash, forced kill or power outage.
This allows for further improvement of pacman where shared locks could also be used, in cases there pacman only reads the database and does not edit it. File locks also makes it easy to let pacman wait until the block instance has finished, by just not using the LOCK_NB flag. --- lib/libalpm/be_sync.c | 5 +---- lib/libalpm/handle.c | 26 ++++++++++++++------------ lib/libalpm/handle.h | 2 +- lib/libalpm/trans.c | 7 +------ scripts/pacman-db-upgrade.sh.in | 9 +++++---- scripts/pacman-optimize.sh.in | 9 +++++---- 6 files changed, 27 insertions(+), 31 deletions(-) diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 0b99684..e7c8c4b 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -284,10 +284,7 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) cleanup: - if(_alpm_handle_unlock(handle)) { - _alpm_log(handle, ALPM_LOG_WARNING, _("could not remove lock file %s\n"), - handle->lockfile); - } + _alpm_handle_unlock(handle); free(syncpath); umask(oldmask); return ret; diff --git a/lib/libalpm/handle.c b/lib/libalpm/handle.c index 53c86c5..5d1cfb7 100644 --- a/lib/libalpm/handle.c +++ b/lib/libalpm/handle.c @@ -27,6 +27,7 @@ #include <sys/types.h> #include <syslog.h> #include <sys/stat.h> +#include <sys/file.h> #include <fcntl.h> /* libalpm */ @@ -44,6 +45,7 @@ alpm_handle_t *_alpm_handle_new(void) CALLOC(handle, 1, sizeof(alpm_handle_t), return NULL); handle->deltaratio = 0.0; + handle->lckstream = -1; return handle; } @@ -95,7 +97,7 @@ int _alpm_handle_lock(alpm_handle_t *handle) char *dir, *ptr; ASSERT(handle->lockfile != NULL, return -1); - ASSERT(handle->lckstream == NULL, return 0); + ASSERT(handle->lckstream < 0, return 0); /* create the dir of the lockfile first */ dir = strdup(handle->lockfile); @@ -110,14 +112,14 @@ int _alpm_handle_lock(alpm_handle_t *handle) FREE(dir); do { - fd = open(handle->lockfile, O_WRONLY | O_CREAT | O_EXCL, 0000); + fd = open(handle->lockfile, O_WRONLY | O_CREAT); } while(fd == -1 && errno == EINTR); if(fd >= 0) { - FILE *f = fdopen(fd, "w"); - fprintf(f, "%ld\n", (long)getpid()); - fflush(f); fsync(fd); - handle->lckstream = f; + if(flock(fd, LOCK_EX | LOCK_NB) < 0) { + return -1; + } + handle->lckstream = fd; return 0; } return -1; @@ -127,14 +129,14 @@ int _alpm_handle_lock(alpm_handle_t *handle) int _alpm_handle_unlock(alpm_handle_t *handle) { ASSERT(handle->lockfile != NULL, return -1); - ASSERT(handle->lckstream != NULL, return 0); - - fclose(handle->lckstream); - handle->lckstream = NULL; + ASSERT(handle->lckstream >= 0, return 0); - if(unlink(handle->lockfile) && errno != ENOENT) { - return -1; + flock(handle->lckstream, LOCK_UN); + if(flock(handle->lckstream, LOCK_EX | LOCK_NB) == 0) { + unlink(handle->lockfile); + flock(handle->lckstream, LOCK_UN); } + handle->lckstream = NULL; return 0; } diff --git a/lib/libalpm/handle.h b/lib/libalpm/handle.h index 5e84d58..7ca90ff 100644 --- a/lib/libalpm/handle.h +++ b/lib/libalpm/handle.h @@ -55,7 +55,7 @@ struct __alpm_handle_t { alpm_db_t *db_local; /* local db pointer */ alpm_list_t *dbs_sync; /* List of (alpm_db_t *) */ FILE *logstream; /* log file stream pointer */ - FILE *lckstream; /* lock file stream pointer if one exists */ + int lckstream; /* lock file file descriptor if one exists */ alpm_trans_t *trans; #ifdef HAVE_LIBCURL diff --git a/lib/libalpm/trans.c b/lib/libalpm/trans.c index 8d4e0e7..122f35b 100644 --- a/lib/libalpm/trans.c +++ b/lib/libalpm/trans.c @@ -215,12 +215,7 @@ int SYMEXPORT alpm_trans_release(alpm_handle_t *handle) /* unlock db */ if(!nolock_flag) { - if(_alpm_handle_unlock(handle)) { - _alpm_log(handle, ALPM_LOG_WARNING, _("could not remove lock file %s\n"), - handle->lockfile); - alpm_logaction(handle, ALPM_CALLER_PREFIX, - "warning: could not remove lock file %s\n", handle->lockfile); - } + _alpm_handle_unlock(handle); } return 0; diff --git a/scripts/pacman-db-upgrade.sh.in b/scripts/pacman-db-upgrade.sh.in index a1630c5..20cf035 100644 --- a/scripts/pacman-db-upgrade.sh.in +++ b/scripts/pacman-db-upgrade.sh.in @@ -102,12 +102,12 @@ dbroot="${dbroot%/}" # form the path to our lockfile location lockfile="${dbroot}/db.lck" -# make sure pacman isn't running -if [[ -f $lockfile ]]; then +# make sure pacman isn't running and stop other instances for starting +exec 100<>"$lockfile" +if ! flock -x --nonblock 100; then + flock --close 100 die "$(gettext "Pacman lock file was found. Cannot run while pacman is running.")" fi -# do not let pacman run while we do this -touch "$lockfile" # pacman-3.4 to 3.5 upgrade - merge depends into desc if [[ $(find "$dbroot"/local -name depends) ]]; then @@ -123,5 +123,6 @@ fi # remove the lock file rm -f "$lockfile" +flock --close 100 # vim: set ts=2 sw=2 noet: diff --git a/scripts/pacman-optimize.sh.in b/scripts/pacman-optimize.sh.in index 47fbb49..8c6e556 100644 --- a/scripts/pacman-optimize.sh.in +++ b/scripts/pacman-optimize.sh.in @@ -114,12 +114,12 @@ dbroot="${dbroot%/}" lockfile="${dbroot}/db.lck" localdb="${dbroot}/local" -# make sure pacman isn't running -if [[ -f $lockfile ]]; then +# make sure pacman isn't running and stop other instances for starting +exec 100<>"$lockfile" +if ! flock -x --nonblock 100; then + flock --close 100 die "$(gettext "Pacman lock file was found. Cannot run while pacman is running.")" fi -# do not let pacman run while we do this -touch "$lockfile" workdir=$(mktemp -d "${TMPDIR:-/tmp}/pacman-optimize.XXXXXXXXXX") || die_r "$(gettext "Can not create temp directory for database building.")\n" >&2 @@ -178,6 +178,7 @@ rm -rf "$localdb.old" # remove the lock file and our working directory with sums and tarfile rm -f "$lockfile" +flock --close 100 rm -rf "$workdir" echo -- 1.8.3.4
On Thu, Aug 15, 2013 at 12:03:33AM +0200, Mattias Andrée wrote:
This allows for further improvement of pacman where shared locks could also be used, in cases there pacman only reads the database and does not edit it.
File locks also makes it easy to let pacman wait until the block instance has finished, by just not using the LOCK_NB flag. ---
And the question that comes up every time this patch arises: what about NFS? No, we won't change this.
lib/libalpm/be_sync.c | 5 +---- lib/libalpm/handle.c | 26 ++++++++++++++------------ lib/libalpm/handle.h | 2 +- lib/libalpm/trans.c | 7 +------ scripts/pacman-db-upgrade.sh.in | 9 +++++---- scripts/pacman-optimize.sh.in | 9 +++++---- 6 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 0b99684..e7c8c4b 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -284,10 +284,7 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
cleanup:
- if(_alpm_handle_unlock(handle)) { - _alpm_log(handle, ALPM_LOG_WARNING, _("could not remove lock file %s\n"), - handle->lockfile); - } + _alpm_handle_unlock(handle); free(syncpath); umask(oldmask); return ret; diff --git a/lib/libalpm/handle.c b/lib/libalpm/handle.c index 53c86c5..5d1cfb7 100644 --- a/lib/libalpm/handle.c +++ b/lib/libalpm/handle.c @@ -27,6 +27,7 @@ #include <sys/types.h> #include <syslog.h> #include <sys/stat.h> +#include <sys/file.h> #include <fcntl.h>
/* libalpm */ @@ -44,6 +45,7 @@ alpm_handle_t *_alpm_handle_new(void)
CALLOC(handle, 1, sizeof(alpm_handle_t), return NULL); handle->deltaratio = 0.0; + handle->lckstream = -1;
return handle; } @@ -95,7 +97,7 @@ int _alpm_handle_lock(alpm_handle_t *handle) char *dir, *ptr;
ASSERT(handle->lockfile != NULL, return -1); - ASSERT(handle->lckstream == NULL, return 0); + ASSERT(handle->lckstream < 0, return 0);
/* create the dir of the lockfile first */ dir = strdup(handle->lockfile); @@ -110,14 +112,14 @@ int _alpm_handle_lock(alpm_handle_t *handle) FREE(dir);
do { - fd = open(handle->lockfile, O_WRONLY | O_CREAT | O_EXCL, 0000); + fd = open(handle->lockfile, O_WRONLY | O_CREAT); } while(fd == -1 && errno == EINTR); if(fd >= 0) { - FILE *f = fdopen(fd, "w"); - fprintf(f, "%ld\n", (long)getpid()); - fflush(f); fsync(fd); - handle->lckstream = f; + if(flock(fd, LOCK_EX | LOCK_NB) < 0) { + return -1; + } + handle->lckstream = fd; return 0; } return -1; @@ -127,14 +129,14 @@ int _alpm_handle_lock(alpm_handle_t *handle) int _alpm_handle_unlock(alpm_handle_t *handle) { ASSERT(handle->lockfile != NULL, return -1); - ASSERT(handle->lckstream != NULL, return 0); - - fclose(handle->lckstream); - handle->lckstream = NULL; + ASSERT(handle->lckstream >= 0, return 0);
- if(unlink(handle->lockfile) && errno != ENOENT) { - return -1; + flock(handle->lckstream, LOCK_UN); + if(flock(handle->lckstream, LOCK_EX | LOCK_NB) == 0) { + unlink(handle->lockfile); + flock(handle->lckstream, LOCK_UN); } + handle->lckstream = NULL; return 0; }
diff --git a/lib/libalpm/handle.h b/lib/libalpm/handle.h index 5e84d58..7ca90ff 100644 --- a/lib/libalpm/handle.h +++ b/lib/libalpm/handle.h @@ -55,7 +55,7 @@ struct __alpm_handle_t { alpm_db_t *db_local; /* local db pointer */ alpm_list_t *dbs_sync; /* List of (alpm_db_t *) */ FILE *logstream; /* log file stream pointer */ - FILE *lckstream; /* lock file stream pointer if one exists */ + int lckstream; /* lock file file descriptor if one exists */ alpm_trans_t *trans;
#ifdef HAVE_LIBCURL diff --git a/lib/libalpm/trans.c b/lib/libalpm/trans.c index 8d4e0e7..122f35b 100644 --- a/lib/libalpm/trans.c +++ b/lib/libalpm/trans.c @@ -215,12 +215,7 @@ int SYMEXPORT alpm_trans_release(alpm_handle_t *handle)
/* unlock db */ if(!nolock_flag) { - if(_alpm_handle_unlock(handle)) { - _alpm_log(handle, ALPM_LOG_WARNING, _("could not remove lock file %s\n"), - handle->lockfile); - alpm_logaction(handle, ALPM_CALLER_PREFIX, - "warning: could not remove lock file %s\n", handle->lockfile); - } + _alpm_handle_unlock(handle); }
return 0; diff --git a/scripts/pacman-db-upgrade.sh.in b/scripts/pacman-db-upgrade.sh.in index a1630c5..20cf035 100644 --- a/scripts/pacman-db-upgrade.sh.in +++ b/scripts/pacman-db-upgrade.sh.in @@ -102,12 +102,12 @@ dbroot="${dbroot%/}" # form the path to our lockfile location lockfile="${dbroot}/db.lck"
-# make sure pacman isn't running -if [[ -f $lockfile ]]; then +# make sure pacman isn't running and stop other instances for starting +exec 100<>"$lockfile" +if ! flock -x --nonblock 100; then + flock --close 100 die "$(gettext "Pacman lock file was found. Cannot run while pacman is running.")" fi -# do not let pacman run while we do this -touch "$lockfile"
# pacman-3.4 to 3.5 upgrade - merge depends into desc if [[ $(find "$dbroot"/local -name depends) ]]; then @@ -123,5 +123,6 @@ fi
# remove the lock file rm -f "$lockfile" +flock --close 100
# vim: set ts=2 sw=2 noet: diff --git a/scripts/pacman-optimize.sh.in b/scripts/pacman-optimize.sh.in index 47fbb49..8c6e556 100644 --- a/scripts/pacman-optimize.sh.in +++ b/scripts/pacman-optimize.sh.in @@ -114,12 +114,12 @@ dbroot="${dbroot%/}" lockfile="${dbroot}/db.lck" localdb="${dbroot}/local"
-# make sure pacman isn't running -if [[ -f $lockfile ]]; then +# make sure pacman isn't running and stop other instances for starting +exec 100<>"$lockfile" +if ! flock -x --nonblock 100; then + flock --close 100 die "$(gettext "Pacman lock file was found. Cannot run while pacman is running.")" fi -# do not let pacman run while we do this -touch "$lockfile"
workdir=$(mktemp -d "${TMPDIR:-/tmp}/pacman-optimize.XXXXXXXXXX") || die_r "$(gettext "Can not create temp directory for database building.")\n" >&2 @@ -178,6 +178,7 @@ rm -rf "$localdb.old"
# remove the lock file and our working directory with sums and tarfile rm -f "$lockfile" +flock --close 100 rm -rf "$workdir"
echo -- 1.8.3.4
That is ridiculus that NFS does not implement flock, but there is a solution documented in flock(2): flock() does not lock files over NFS. Use fcntl(2) instead: that does work over NFS, given a sufficiently recent version of Linux and a server which supports locking. On Wed, 14 Aug 2013 18:14:20 -0400 Dave Reisner <d@falconindy.com> wrote:
On Thu, Aug 15, 2013 at 12:03:33AM +0200, Mattias Andrée wrote:
This allows for further improvement of pacman where shared locks could also be used, in cases there pacman only reads the database and does not edit it.
File locks also makes it easy to let pacman wait until the block instance has finished, by just not using the LOCK_NB flag. ---
And the question that comes up every time this patch arises: what about NFS?
No, we won't change this.
lib/libalpm/be_sync.c | 5 +---- lib/libalpm/handle.c | 26 ++++++++++++++------------ lib/libalpm/handle.h | 2 +- lib/libalpm/trans.c | 7 +------ scripts/pacman-db-upgrade.sh.in | 9 +++++---- scripts/pacman-optimize.sh.in | 9 +++++---- 6 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 0b99684..e7c8c4b 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -284,10 +284,7 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) cleanup:
- if(_alpm_handle_unlock(handle)) { - _alpm_log(handle, ALPM_LOG_WARNING, _("could not remove lock file %s\n"), - handle->lockfile); - } + _alpm_handle_unlock(handle); free(syncpath); umask(oldmask); return ret; diff --git a/lib/libalpm/handle.c b/lib/libalpm/handle.c index 53c86c5..5d1cfb7 100644 --- a/lib/libalpm/handle.c +++ b/lib/libalpm/handle.c @@ -27,6 +27,7 @@ #include <sys/types.h> #include <syslog.h> #include <sys/stat.h> +#include <sys/file.h> #include <fcntl.h>
/* libalpm */ @@ -44,6 +45,7 @@ alpm_handle_t *_alpm_handle_new(void)
CALLOC(handle, 1, sizeof(alpm_handle_t), return NULL); handle->deltaratio = 0.0; + handle->lckstream = -1;
return handle; } @@ -95,7 +97,7 @@ int _alpm_handle_lock(alpm_handle_t *handle) char *dir, *ptr;
ASSERT(handle->lockfile != NULL, return -1); - ASSERT(handle->lckstream == NULL, return 0); + ASSERT(handle->lckstream < 0, return 0);
/* create the dir of the lockfile first */ dir = strdup(handle->lockfile); @@ -110,14 +112,14 @@ int _alpm_handle_lock(alpm_handle_t *handle) FREE(dir);
do { - fd = open(handle->lockfile, O_WRONLY | O_CREAT | O_EXCL, 0000); + fd = open(handle->lockfile, O_WRONLY | O_CREAT); } while(fd == -1 && errno == EINTR); if(fd >= 0) { - FILE *f = fdopen(fd, "w"); - fprintf(f, "%ld\n", (long)getpid()); - fflush(f); fsync(fd); - handle->lckstream = f; + if(flock(fd, LOCK_EX | LOCK_NB) < 0) { + return -1; + } + handle->lckstream = fd; return 0; } return -1; @@ -127,14 +129,14 @@ int _alpm_handle_lock(alpm_handle_t *handle) int _alpm_handle_unlock(alpm_handle_t *handle) { ASSERT(handle->lockfile != NULL, return -1); - ASSERT(handle->lckstream != NULL, return 0); - - fclose(handle->lckstream); - handle->lckstream = NULL; + ASSERT(handle->lckstream >= 0, return 0);
- if(unlink(handle->lockfile) && errno != ENOENT) { - return -1; + flock(handle->lckstream, LOCK_UN); + if(flock(handle->lckstream, LOCK_EX | LOCK_NB) == 0) { + unlink(handle->lockfile); + flock(handle->lckstream, LOCK_UN); } + handle->lckstream = NULL; return 0; }
diff --git a/lib/libalpm/handle.h b/lib/libalpm/handle.h index 5e84d58..7ca90ff 100644 --- a/lib/libalpm/handle.h +++ b/lib/libalpm/handle.h @@ -55,7 +55,7 @@ struct __alpm_handle_t { alpm_db_t *db_local; /* local db pointer */ alpm_list_t *dbs_sync; /* List of (alpm_db_t *) */ FILE *logstream; /* log file stream pointer */ - FILE *lckstream; /* lock file stream pointer if one exists */ + int lckstream; /* lock file file descriptor if one exists */ alpm_trans_t *trans;
#ifdef HAVE_LIBCURL diff --git a/lib/libalpm/trans.c b/lib/libalpm/trans.c index 8d4e0e7..122f35b 100644 --- a/lib/libalpm/trans.c +++ b/lib/libalpm/trans.c @@ -215,12 +215,7 @@ int SYMEXPORT alpm_trans_release(alpm_handle_t *handle) /* unlock db */ if(!nolock_flag) { - if(_alpm_handle_unlock(handle)) { - _alpm_log(handle, ALPM_LOG_WARNING, _("could not remove lock file %s\n"), - handle->lockfile); - alpm_logaction(handle, ALPM_CALLER_PREFIX, - "warning: could not remove lock file %s\n", handle->lockfile); - } + _alpm_handle_unlock(handle); }
return 0; diff --git a/scripts/pacman-db-upgrade.sh.in b/scripts/pacman-db-upgrade.sh.in index a1630c5..20cf035 100644 --- a/scripts/pacman-db-upgrade.sh.in +++ b/scripts/pacman-db-upgrade.sh.in @@ -102,12 +102,12 @@ dbroot="${dbroot%/}" # form the path to our lockfile location lockfile="${dbroot}/db.lck"
-# make sure pacman isn't running -if [[ -f $lockfile ]]; then +# make sure pacman isn't running and stop other instances for starting +exec 100<>"$lockfile" +if ! flock -x --nonblock 100; then + flock --close 100 die "$(gettext "Pacman lock file was found. Cannot run while pacman is running.")" fi -# do not let pacman run while we do this -touch "$lockfile"
# pacman-3.4 to 3.5 upgrade - merge depends into desc if [[ $(find "$dbroot"/local -name depends) ]]; then @@ -123,5 +123,6 @@ fi
# remove the lock file rm -f "$lockfile" +flock --close 100
# vim: set ts=2 sw=2 noet: diff --git a/scripts/pacman-optimize.sh.in b/scripts/pacman-optimize.sh.in index 47fbb49..8c6e556 100644 --- a/scripts/pacman-optimize.sh.in +++ b/scripts/pacman-optimize.sh.in @@ -114,12 +114,12 @@ dbroot="${dbroot%/}" lockfile="${dbroot}/db.lck" localdb="${dbroot}/local"
-# make sure pacman isn't running -if [[ -f $lockfile ]]; then +# make sure pacman isn't running and stop other instances for starting +exec 100<>"$lockfile" +if ! flock -x --nonblock 100; then + flock --close 100 die "$(gettext "Pacman lock file was found. Cannot run while pacman is running.")" fi -# do not let pacman run while we do this -touch "$lockfile"
workdir=$(mktemp -d "${TMPDIR:-/tmp}/pacman-optimize.XXXXXXXXXX") || die_r "$(gettext "Can not create temp directory for database building.")\n" >&2 @@ -178,6 +178,7 @@ rm -rf "$localdb.old" # remove the lock file and our working directory with sums and tarfile rm -f "$lockfile" +flock --close 100 rm -rf "$workdir"
echo -- 1.8.3.4
On Aug 14, 2013 7:21 PM, "Mattias Andrée" <maandree@member.fsf.org> wrote:
That is ridiculus that NFS does not implement flock, but there is a solution documented in flock(2):
flock() does not lock files over NFS. Use fcntl(2) instead: that does work over NFS, given a sufficiently recent version of Linux and a server which supports locking.
Keyword being Linux. We support BSD and OSX as well. fcntl semantics aren't portable. Again, you're not the first to go down this road.
On Wed, 14 Aug 2013 18:14:20 -0400 Dave Reisner <d@falconindy.com> wrote:
On Thu, Aug 15, 2013 at 12:03:33AM +0200, Mattias Andrée wrote:
This allows for further improvement of pacman where shared locks could also be used, in cases there pacman only reads the database and does not edit it.
File locks also makes it easy to let pacman wait until the block instance has finished, by just not using the LOCK_NB flag. ---
And the question that comes up every time this patch arises: what about NFS?
No, we won't change this.
lib/libalpm/be_sync.c | 5 +---- lib/libalpm/handle.c | 26 ++++++++++++++------------ lib/libalpm/handle.h | 2 +- lib/libalpm/trans.c | 7 +------ scripts/pacman-db-upgrade.sh.in | 9 +++++---- scripts/pacman-optimize.sh.in | 9 +++++---- 6 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 0b99684..e7c8c4b 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -284,10 +284,7 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) cleanup:
- if(_alpm_handle_unlock(handle)) { - _alpm_log(handle, ALPM_LOG_WARNING, _("could not remove lock file %s\n"), - handle->lockfile); - } + _alpm_handle_unlock(handle); free(syncpath); umask(oldmask); return ret; diff --git a/lib/libalpm/handle.c b/lib/libalpm/handle.c index 53c86c5..5d1cfb7 100644 --- a/lib/libalpm/handle.c +++ b/lib/libalpm/handle.c @@ -27,6 +27,7 @@ #include <sys/types.h> #include <syslog.h> #include <sys/stat.h> +#include <sys/file.h> #include <fcntl.h>
/* libalpm */ @@ -44,6 +45,7 @@ alpm_handle_t *_alpm_handle_new(void)
CALLOC(handle, 1, sizeof(alpm_handle_t), return NULL); handle->deltaratio = 0.0; + handle->lckstream = -1;
return handle; } @@ -95,7 +97,7 @@ int _alpm_handle_lock(alpm_handle_t *handle) char *dir, *ptr;
ASSERT(handle->lockfile != NULL, return -1); - ASSERT(handle->lckstream == NULL, return 0); + ASSERT(handle->lckstream < 0, return 0);
/* create the dir of the lockfile first */ dir = strdup(handle->lockfile); @@ -110,14 +112,14 @@ int _alpm_handle_lock(alpm_handle_t *handle) FREE(dir);
do { - fd = open(handle->lockfile, O_WRONLY | O_CREAT | O_EXCL, 0000); + fd = open(handle->lockfile, O_WRONLY | O_CREAT); } while(fd == -1 && errno == EINTR); if(fd >= 0) { - FILE *f = fdopen(fd, "w"); - fprintf(f, "%ld\n", (long)getpid()); - fflush(f); fsync(fd); - handle->lckstream = f; + if(flock(fd, LOCK_EX | LOCK_NB) < 0) { + return -1; + } + handle->lckstream = fd; return 0; } return -1; @@ -127,14 +129,14 @@ int _alpm_handle_lock(alpm_handle_t *handle) int _alpm_handle_unlock(alpm_handle_t *handle) { ASSERT(handle->lockfile != NULL, return -1); - ASSERT(handle->lckstream != NULL, return 0); - - fclose(handle->lckstream); - handle->lckstream = NULL; + ASSERT(handle->lckstream >= 0, return 0);
- if(unlink(handle->lockfile) && errno != ENOENT) { - return -1; + flock(handle->lckstream, LOCK_UN); + if(flock(handle->lckstream, LOCK_EX | LOCK_NB) == 0) { + unlink(handle->lockfile); + flock(handle->lckstream, LOCK_UN); } + handle->lckstream = NULL; return 0; }
diff --git a/lib/libalpm/handle.h b/lib/libalpm/handle.h index 5e84d58..7ca90ff 100644 --- a/lib/libalpm/handle.h +++ b/lib/libalpm/handle.h @@ -55,7 +55,7 @@ struct __alpm_handle_t { alpm_db_t *db_local; /* local db pointer */ alpm_list_t *dbs_sync; /* List of (alpm_db_t *) */ FILE *logstream; /* log file stream pointer */ - FILE *lckstream; /* lock file stream pointer if one exists */ + int lckstream; /* lock file file descriptor if one exists */ alpm_trans_t *trans;
#ifdef HAVE_LIBCURL diff --git a/lib/libalpm/trans.c b/lib/libalpm/trans.c index 8d4e0e7..122f35b 100644 --- a/lib/libalpm/trans.c +++ b/lib/libalpm/trans.c @@ -215,12 +215,7 @@ int SYMEXPORT alpm_trans_release(alpm_handle_t *handle) /* unlock db */ if(!nolock_flag) { - if(_alpm_handle_unlock(handle)) { - _alpm_log(handle, ALPM_LOG_WARNING, _("could not remove lock file %s\n"), - handle->lockfile); - alpm_logaction(handle, ALPM_CALLER_PREFIX, - "warning: could not remove lock file %s\n", handle->lockfile); - } + _alpm_handle_unlock(handle); }
return 0; diff --git a/scripts/pacman-db-upgrade.sh.in b/scripts/pacman-db-upgrade.sh.in index a1630c5..20cf035 100644 --- a/scripts/pacman-db-upgrade.sh.in +++ b/scripts/pacman-db-upgrade.sh.in @@ -102,12 +102,12 @@ dbroot="${dbroot%/}" # form the path to our lockfile location lockfile="${dbroot}/db.lck"
-# make sure pacman isn't running -if [[ -f $lockfile ]]; then +# make sure pacman isn't running and stop other instances for starting +exec 100<>"$lockfile" +if ! flock -x --nonblock 100; then + flock --close 100 die "$(gettext "Pacman lock file was found. Cannot run while pacman is running.")" fi -# do not let pacman run while we do this -touch "$lockfile"
# pacman-3.4 to 3.5 upgrade - merge depends into desc if [[ $(find "$dbroot"/local -name depends) ]]; then @@ -123,5 +123,6 @@ fi
# remove the lock file rm -f "$lockfile" +flock --close 100
# vim: set ts=2 sw=2 noet: diff --git a/scripts/pacman-optimize.sh.in b/scripts/pacman-optimize.sh.in index 47fbb49..8c6e556 100644 --- a/scripts/pacman-optimize.sh.in +++ b/scripts/pacman-optimize.sh.in @@ -114,12 +114,12 @@ dbroot="${dbroot%/}" lockfile="${dbroot}/db.lck" localdb="${dbroot}/local"
-# make sure pacman isn't running -if [[ -f $lockfile ]]; then +# make sure pacman isn't running and stop other instances for starting +exec 100<>"$lockfile" +if ! flock -x --nonblock 100; then + flock --close 100 die "$(gettext "Pacman lock file was found. Cannot run while pacman is running.")" fi -# do not let pacman run while we do this -touch "$lockfile"
workdir=$(mktemp -d "${TMPDIR:-/tmp}/pacman-optimize.XXXXXXXXXX") || die_r "$(gettext "Can not create temp directory for database building.")\n" >&2 @@ -178,6 +178,7 @@ rm -rf "$localdb.old" # remove the lock file and our working directory with sums and tarfile rm -f "$lockfile" +flock --close 100 rm -rf "$workdir"
echo -- 1.8.3.4
I understood that. But I did not know that pacman supports BSD or Max OS X, nor that it was not portable. Just leaving it out because of NFS seems silly, it is not up to the program to account for all software that does not implement expected standards. On Wed, 14 Aug 2013 19:29:41 -0400 Dave Reisner <d@falconindy.com> wrote:
On Aug 14, 2013 7:21 PM, "Mattias Andrée" <maandree@member.fsf.org> wrote:
That is ridiculus that NFS does not implement flock, but there is a solution documented in flock(2):
flock() does not lock files over NFS. Use fcntl(2) instead: that does work over NFS, given a sufficiently recent version of Linux and a server which supports locking.
Keyword being Linux. We support BSD and OSX as well. fcntl semantics aren't portable.
Again, you're not the first to go down this road.
On Wed, 14 Aug 2013 18:14:20 -0400 Dave Reisner <d@falconindy.com> wrote:
On Thu, Aug 15, 2013 at 12:03:33AM +0200, Mattias Andrée wrote:
This allows for further improvement of pacman where shared locks could also be used, in cases there pacman only reads the database and does not edit it.
File locks also makes it easy to let pacman wait until the block instance has finished, by just not using the LOCK_NB flag. ---
And the question that comes up every time this patch arises: what about NFS?
No, we won't change this.
lib/libalpm/be_sync.c | 5 +---- lib/libalpm/handle.c | 26 ++++++++++++++------------ lib/libalpm/handle.h | 2 +- lib/libalpm/trans.c | 7 +------ scripts/pacman-db-upgrade.sh.in | 9 +++++---- scripts/pacman-optimize.sh.in | 9 +++++---- 6 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 0b99684..e7c8c4b 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -284,10 +284,7 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) cleanup:
- if(_alpm_handle_unlock(handle)) { - _alpm_log(handle, ALPM_LOG_WARNING, _("could not remove lock file %s\n"), - handle->lockfile); - } + _alpm_handle_unlock(handle); free(syncpath); umask(oldmask); return ret; diff --git a/lib/libalpm/handle.c b/lib/libalpm/handle.c index 53c86c5..5d1cfb7 100644 --- a/lib/libalpm/handle.c +++ b/lib/libalpm/handle.c @@ -27,6 +27,7 @@ #include <sys/types.h> #include <syslog.h> #include <sys/stat.h> +#include <sys/file.h> #include <fcntl.h>
/* libalpm */ @@ -44,6 +45,7 @@ alpm_handle_t *_alpm_handle_new(void)
CALLOC(handle, 1, sizeof(alpm_handle_t), return NULL); handle->deltaratio = 0.0; + handle->lckstream = -1;
return handle; } @@ -95,7 +97,7 @@ int _alpm_handle_lock(alpm_handle_t *handle) char *dir, *ptr;
ASSERT(handle->lockfile != NULL, return -1); - ASSERT(handle->lckstream == NULL, return 0); + ASSERT(handle->lckstream < 0, return 0);
/* create the dir of the lockfile first */ dir = strdup(handle->lockfile); @@ -110,14 +112,14 @@ int _alpm_handle_lock(alpm_handle_t *handle) FREE(dir);
do { - fd = open(handle->lockfile, O_WRONLY | O_CREAT | O_EXCL, 0000); + fd = open(handle->lockfile, O_WRONLY | O_CREAT); } while(fd == -1 && errno == EINTR); if(fd >= 0) { - FILE *f = fdopen(fd, "w"); - fprintf(f, "%ld\n", (long)getpid()); - fflush(f); fsync(fd); - handle->lckstream = f; + if(flock(fd, LOCK_EX | LOCK_NB) < 0) { + return -1; + } + handle->lckstream = fd; return 0; } return -1; @@ -127,14 +129,14 @@ int _alpm_handle_lock(alpm_handle_t *handle) int _alpm_handle_unlock(alpm_handle_t *handle) { ASSERT(handle->lockfile != NULL, return -1); - ASSERT(handle->lckstream != NULL, return 0); - - fclose(handle->lckstream); - handle->lckstream = NULL; + ASSERT(handle->lckstream >= 0, return 0);
- if(unlink(handle->lockfile) && errno != ENOENT) { - return -1; + flock(handle->lckstream, LOCK_UN); + if(flock(handle->lckstream, LOCK_EX | LOCK_NB) == 0) { + unlink(handle->lockfile); + flock(handle->lckstream, LOCK_UN); } + handle->lckstream = NULL; return 0; }
diff --git a/lib/libalpm/handle.h b/lib/libalpm/handle.h index 5e84d58..7ca90ff 100644 --- a/lib/libalpm/handle.h +++ b/lib/libalpm/handle.h @@ -55,7 +55,7 @@ struct __alpm_handle_t { alpm_db_t *db_local; /* local db pointer */ alpm_list_t *dbs_sync; /* List of (alpm_db_t *) */ FILE *logstream; /* log file stream pointer */ - FILE *lckstream; /* lock file stream pointer if one exists */ + int lckstream; /* lock file file descriptor if one exists */ alpm_trans_t *trans;
#ifdef HAVE_LIBCURL diff --git a/lib/libalpm/trans.c b/lib/libalpm/trans.c index 8d4e0e7..122f35b 100644 --- a/lib/libalpm/trans.c +++ b/lib/libalpm/trans.c @@ -215,12 +215,7 @@ int SYMEXPORT alpm_trans_release(alpm_handle_t *handle) /* unlock db */ if(!nolock_flag) { - if(_alpm_handle_unlock(handle)) { - _alpm_log(handle, ALPM_LOG_WARNING, _("could not remove lock file %s\n"), - handle->lockfile); - alpm_logaction(handle, ALPM_CALLER_PREFIX, - "warning: could not remove lock file %s\n", handle->lockfile); - } + _alpm_handle_unlock(handle); }
return 0; diff --git a/scripts/pacman-db-upgrade.sh.in b/scripts/pacman-db-upgrade.sh.in index a1630c5..20cf035 100644 --- a/scripts/pacman-db-upgrade.sh.in +++ b/scripts/pacman-db-upgrade.sh.in @@ -102,12 +102,12 @@ dbroot="${dbroot%/}" # form the path to our lockfile location lockfile="${dbroot}/db.lck"
-# make sure pacman isn't running -if [[ -f $lockfile ]]; then +# make sure pacman isn't running and stop other instances for starting +exec 100<>"$lockfile" +if ! flock -x --nonblock 100; then + flock --close 100 die "$(gettext "Pacman lock file was found. Cannot run while pacman is running.")" fi -# do not let pacman run while we do this -touch "$lockfile"
# pacman-3.4 to 3.5 upgrade - merge depends into desc if [[ $(find "$dbroot"/local -name depends) ]]; then @@ -123,5 +123,6 @@ fi
# remove the lock file rm -f "$lockfile" +flock --close 100
# vim: set ts=2 sw=2 noet: diff --git a/scripts/pacman-optimize.sh.in b/scripts/pacman-optimize.sh.in index 47fbb49..8c6e556 100644 --- a/scripts/pacman-optimize.sh.in +++ b/scripts/pacman-optimize.sh.in @@ -114,12 +114,12 @@ dbroot="${dbroot%/}" lockfile="${dbroot}/db.lck" localdb="${dbroot}/local"
-# make sure pacman isn't running -if [[ -f $lockfile ]]; then +# make sure pacman isn't running and stop other instances for starting +exec 100<>"$lockfile" +if ! flock -x --nonblock 100; then + flock --close 100 die "$(gettext "Pacman lock file was found. Cannot run while pacman is running.")" fi -# do not let pacman run while we do this -touch "$lockfile"
workdir=$(mktemp -d "${TMPDIR:-/tmp}/pacman-optimize.XXXXXXXXXX") || die_r "$(gettext "Can not create temp directory for database building.")\n" >&2 @@ -178,6 +178,7 @@ rm -rf "$localdb.old" # remove the lock file and our working directory with sums and tarfile rm -f "$lockfile" +flock --close 100 rm -rf "$workdir"
echo -- 1.8.3.4
On Aug 14, 2013 8:38 PM, "Mattias Andrée" <maandree@member.fsf.org> wrote:
I understood that. But I did not know that pacman supports BSD or Max OS X, nor that it was not portable. Just leaving it out because of NFS seems silly, it is not up to the program to account for all software that does not implement expected standards.
Uhhh but it is. That's sort of the idea of software portability. And since when is file locking support anything close to standardized? It's incredibly OS and filesystem specific.
On Wed, 14 Aug 2013 19:29:41 -0400 Dave Reisner <d@falconindy.com> wrote:
On Aug 14, 2013 7:21 PM, "Mattias Andrée" <maandree@member.fsf.org> wrote:
That is ridiculus that NFS does not implement flock, but there is a solution documented in flock(2):
flock() does not lock files over NFS. Use fcntl(2) instead: that does work over NFS, given a sufficiently recent version of Linux and a server which supports locking.
Keyword being Linux. We support BSD and OSX as well. fcntl semantics aren't portable.
Again, you're not the first to go down this road.
On Wed, 14 Aug 2013 18:14:20 -0400 Dave Reisner <d@falconindy.com> wrote:
On Thu, Aug 15, 2013 at 12:03:33AM +0200, Mattias Andrée wrote:
This allows for further improvement of pacman where shared locks could also be used, in cases there pacman only reads the database and does not edit it.
File locks also makes it easy to let pacman wait until the block instance has finished, by just not using the LOCK_NB flag. ---
And the question that comes up every time this patch arises: what about NFS?
No, we won't change this.
lib/libalpm/be_sync.c | 5 +---- lib/libalpm/handle.c | 26 ++++++++++++++------------ lib/libalpm/handle.h | 2 +- lib/libalpm/trans.c | 7 +------ scripts/pacman-db-upgrade.sh.in | 9 +++++---- scripts/pacman-optimize.sh.in | 9 +++++---- 6 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 0b99684..e7c8c4b 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -284,10 +284,7 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) cleanup:
- if(_alpm_handle_unlock(handle)) { - _alpm_log(handle, ALPM_LOG_WARNING, _("could not remove lock file %s\n"), - handle->lockfile); - } + _alpm_handle_unlock(handle); free(syncpath); umask(oldmask); return ret; diff --git a/lib/libalpm/handle.c b/lib/libalpm/handle.c index 53c86c5..5d1cfb7 100644 --- a/lib/libalpm/handle.c +++ b/lib/libalpm/handle.c @@ -27,6 +27,7 @@ #include <sys/types.h> #include <syslog.h> #include <sys/stat.h> +#include <sys/file.h> #include <fcntl.h>
/* libalpm */ @@ -44,6 +45,7 @@ alpm_handle_t *_alpm_handle_new(void)
CALLOC(handle, 1, sizeof(alpm_handle_t), return NULL); handle->deltaratio = 0.0; + handle->lckstream = -1;
return handle; } @@ -95,7 +97,7 @@ int _alpm_handle_lock(alpm_handle_t *handle) char *dir, *ptr;
ASSERT(handle->lockfile != NULL, return -1); - ASSERT(handle->lckstream == NULL, return 0); + ASSERT(handle->lckstream < 0, return 0);
/* create the dir of the lockfile first */ dir = strdup(handle->lockfile); @@ -110,14 +112,14 @@ int _alpm_handle_lock(alpm_handle_t *handle) FREE(dir);
do { - fd = open(handle->lockfile, O_WRONLY | O_CREAT | O_EXCL, 0000); + fd = open(handle->lockfile, O_WRONLY | O_CREAT); } while(fd == -1 && errno == EINTR); if(fd >= 0) { - FILE *f = fdopen(fd, "w"); - fprintf(f, "%ld\n", (long)getpid()); - fflush(f); fsync(fd); - handle->lckstream = f; + if(flock(fd, LOCK_EX | LOCK_NB) < 0) { + return -1; + } + handle->lckstream = fd; return 0; } return -1; @@ -127,14 +129,14 @@ int _alpm_handle_lock(alpm_handle_t *handle) int _alpm_handle_unlock(alpm_handle_t *handle) { ASSERT(handle->lockfile != NULL, return -1); - ASSERT(handle->lckstream != NULL, return 0); - - fclose(handle->lckstream); - handle->lckstream = NULL; + ASSERT(handle->lckstream >= 0, return 0);
- if(unlink(handle->lockfile) && errno != ENOENT) { - return -1; + flock(handle->lckstream, LOCK_UN); + if(flock(handle->lckstream, LOCK_EX | LOCK_NB) == 0) { + unlink(handle->lockfile); + flock(handle->lckstream, LOCK_UN); } + handle->lckstream = NULL; return 0; }
diff --git a/lib/libalpm/handle.h b/lib/libalpm/handle.h index 5e84d58..7ca90ff 100644 --- a/lib/libalpm/handle.h +++ b/lib/libalpm/handle.h @@ -55,7 +55,7 @@ struct __alpm_handle_t { alpm_db_t *db_local; /* local db pointer */ alpm_list_t *dbs_sync; /* List of (alpm_db_t *) */ FILE *logstream; /* log file stream pointer */ - FILE *lckstream; /* lock file stream pointer if one exists */ + int lckstream; /* lock file file descriptor if one exists */ alpm_trans_t *trans;
#ifdef HAVE_LIBCURL diff --git a/lib/libalpm/trans.c b/lib/libalpm/trans.c index 8d4e0e7..122f35b 100644 --- a/lib/libalpm/trans.c +++ b/lib/libalpm/trans.c @@ -215,12 +215,7 @@ int SYMEXPORT alpm_trans_release(alpm_handle_t *handle) /* unlock db */ if(!nolock_flag) { - if(_alpm_handle_unlock(handle)) { - _alpm_log(handle, ALPM_LOG_WARNING, _("could not remove lock file %s\n"), - handle->lockfile); - alpm_logaction(handle, ALPM_CALLER_PREFIX, - "warning: could not remove lock file %s\n", handle->lockfile); - } + _alpm_handle_unlock(handle); }
return 0; diff --git a/scripts/pacman-db-upgrade.sh.in b/scripts/pacman-db-upgrade.sh.in index a1630c5..20cf035 100644 --- a/scripts/pacman-db-upgrade.sh.in +++ b/scripts/pacman-db-upgrade.sh.in @@ -102,12 +102,12 @@ dbroot="${dbroot%/}" # form the path to our lockfile location lockfile="${dbroot}/db.lck"
-# make sure pacman isn't running -if [[ -f $lockfile ]]; then +# make sure pacman isn't running and stop other instances for starting +exec 100<>"$lockfile" +if ! flock -x --nonblock 100; then + flock --close 100 die "$(gettext "Pacman lock file was found. Cannot run while pacman is running.")" fi -# do not let pacman run while we do this -touch "$lockfile"
# pacman-3.4 to 3.5 upgrade - merge depends into desc if [[ $(find "$dbroot"/local -name depends) ]]; then @@ -123,5 +123,6 @@ fi
# remove the lock file rm -f "$lockfile" +flock --close 100
# vim: set ts=2 sw=2 noet: diff --git a/scripts/pacman-optimize.sh.in b/scripts/pacman-optimize.sh.in index 47fbb49..8c6e556 100644 --- a/scripts/pacman-optimize.sh.in +++ b/scripts/pacman-optimize.sh.in @@ -114,12 +114,12 @@ dbroot="${dbroot%/}" lockfile="${dbroot}/db.lck" localdb="${dbroot}/local"
-# make sure pacman isn't running -if [[ -f $lockfile ]]; then +# make sure pacman isn't running and stop other instances for starting +exec 100<>"$lockfile" +if ! flock -x --nonblock 100; then + flock --close 100 die "$(gettext "Pacman lock file was found. Cannot run while pacman is running.")" fi -# do not let pacman run while we do this -touch "$lockfile"
workdir=$(mktemp -d "${TMPDIR:-/tmp}/pacman-optimize.XXXXXXXXXX") || die_r "$(gettext "Can not create temp directory for database building.")\n" >&2 @@ -178,6 +178,7 @@ rm -rf "$localdb.old" # remove the lock file and our working directory with sums and tarfile rm -f "$lockfile" +flock --close 100 rm -rf "$workdir"
echo -- 1.8.3.4
On 15/08/13 11:26, Dave Reisner wrote:
On Aug 14, 2013 8:38 PM, "Mattias Andrée" <maandree@member.fsf.org> wrote:
I understood that. But I did not know that pacman supports BSD or Max OS X, nor that it was not portable. Just leaving it out because of NFS seems silly, it is not up to the program to account for all software that does not implement expected standards.
Uhhh but it is. That's sort of the idea of software portability. And since when is file locking support anything close to standardized? It's incredibly OS and filesystem specific.
So... I know we have been over this several times, but is there no reasonable solution here. As far as I am concerned, OSX support is not a blocker here (as we kept compile support of pacman to be nice, but some features of makepkg already fail on OSX). But, we need to support ELF based systems (Linux, BSD, Hurd) as pacman is used on all these architectures. flock() and probably fnctl() are out because NFS - and there are real world use cases of this to be concerned about. Does that leave us just the crappy file based locking? I believe the cause of phantom lock files has been identified (https://bugs.archlinux.org/task/35603) but I am unsure how to fix it. I stored the process ID in the lock file during creation many years ago. Is there a portable way for us to determine if that process ID is still running and remove the lock file automatically if it is not? Any other ideas? Allan
On Mon, Aug 19, 2013 at 5:39 AM, Allan McRae <allan@archlinux.org> wrote:
I stored the process ID in the lock file during creation many years ago. Is there a portable way for us to determine if that process ID is still running and remove the lock file automatically if it is not?
Try to kill the PID with signal 0 and remove it if it fails with ESRCH?
On 19/08/13 13:44, Jan Alexander Steffens wrote:
On Mon, Aug 19, 2013 at 5:39 AM, Allan McRae <allan@archlinux.org> wrote:
I stored the process ID in the lock file during creation many years ago. Is there a portable way for us to determine if that process ID is still running and remove the lock file automatically if it is not?
Try to kill the PID with signal 0 and remove it if it fails with ESRCH?
After discussing on IRC with Jan, this whole approach has issues... It would be fine for the local db lock, but not for the sync db or package cache which might be accessed by multiple systems. Perhaps we'd require the pid and a machine identifier? Of course, we could not clear leftover lock files from other machines, but we'd still fix the majority use case. Allan
On 19/08/2013, Allan McRae <allan@archlinux.org> wrote:
On 19/08/13 13:44, Jan Alexander Steffens wrote:
On Mon, Aug 19, 2013 at 5:39 AM, Allan McRae <allan@archlinux.org> wrote:
I stored the process ID in the lock file during creation many years ago. Is there a portable way for us to determine if that process ID is still running and remove the lock file automatically if it is not?
Try to kill the PID with signal 0 and remove it if it fails with ESRCH?
Pardon me if I don’t know enough details of Pacman’s locking, but this sounds like a race condition, defeating the purpose of a lock. Even on a single machine, what happens if two instances A and B have both decided the old process does not exist, then instance A removes the old lock, and creates a new valid lock for itself, just before instance B also tries to remove the old lock? How do you stop A from illegally removing B’s valid lock? Maybe you could have a two stage thing or something but it’s getting a bit complex a.k.a. silly.
After discussing on IRC with Jan, this whole approach has issues...
It would be fine for the local db lock, but not for the sync db or package cache which might be accessed by multiple systems.
Perhaps we'd require the pid and a machine identifier? Of course, we could not clear leftover lock files from other machines, but we'd still fix the majority use case.
Allan
On 19 August 2013 08:48, Martin Panter <vadmium+patch@gmail.com> wrote:
On 19/08/2013, Allan McRae <allan@archlinux.org> wrote:
On 19/08/13 13:44, Jan Alexander Steffens wrote:
On Mon, Aug 19, 2013 at 5:39 AM, Allan McRae <allan@archlinux.org> wrote:
I stored the process ID in the lock file during creation many years ago. Is there a portable way for us to determine if that process ID is still running and remove the lock file automatically if it is not?
Try to kill the PID with signal 0 and remove it if it fails with ESRCH?
Pardon me if I don’t know enough details of Pacman’s locking, but this sounds like a race condition, defeating the purpose of a lock. Even on a single machine, what happens if two instances A and B have both decided the old process does not exist, then instance A removes the old lock, and creates a new valid lock for itself, just before instance B also tries to remove the old lock? How do you stop A from illegally removing B’s valid lock? Maybe you could have a two stage thing or something but it’s getting a bit complex a.k.a. silly.
Sorry I got my processes mixed up at the end. Here’s the steps: * Lock file exists with process X which no longer exists * Pacman process A starts and determines X does not exist * Pacman process B starts and determines X does not exist * Process A removes the old lock file which referenced process X * Process A creates a new lock file for itself (referencing process A now) * How do you make sure B does not delete the new lock file A just created?
After discussing on IRC with Jan, this whole approach has issues...
It would be fine for the local db lock, but not for the sync db or package cache which might be accessed by multiple systems.
Perhaps we'd require the pid and a machine identifier? Of course, we could not clear leftover lock files from other machines, but we'd still fix the majority use case.
Allan
On Mon, Aug 19, 2013 at 10:58 AM, Martin Panter <vadmium+patch@gmail.com> wrote:
On 19 August 2013 08:48, Martin Panter <vadmium+patch@gmail.com> wrote:
Pardon me if I don’t know enough details of Pacman’s locking, but this sounds like a race condition, defeating the purpose of a lock. Even on a single machine, what happens if two instances A and B have both decided the old process does not exist, then instance A removes the old lock, and creates a new valid lock for itself, just before instance B also tries to remove the old lock? How do you stop A from illegally removing B’s valid lock? Maybe you could have a two stage thing or something but it’s getting a bit complex a.k.a. silly.
Sorry I got my processes mixed up at the end. Here’s the steps:
* Lock file exists with process X which no longer exists * Pacman process A starts and determines X does not exist * Pacman process B starts and determines X does not exist * Process A removes the old lock file which referenced process X * Process A creates a new lock file for itself (referencing process A now) * How do you make sure B does not delete the new lock file A just created?
How about this? * Open lock file with O_RDWR | O_APPEND | O_CREAT and read. -> If file is empty, we can run. -> If file contains a non-existing PID and the current hostname on its last line, we can run. * If we can run, write new line containing current PID and hostname. * Rewind file and read again. Verify content is as expected. * Do database operation. * Remove lock file. I'm not sure how well it works over a network. Maybe one or more fsyncs can be introduced to ensure the appending and reading is synchronized with the server?
On Mon, Aug 19, 2013 at 11:38 AM, Jan Alexander Steffens <jan.steffens@gmail.com> wrote:
How about this?
* Open lock file with O_RDWR | O_APPEND | O_CREAT and read. -> If file is empty, we can run. -> If file contains a non-existing PID and the current hostname on its last line, we can run. * If we can run, write new line containing current PID and hostname. * Rewind file and read again. Verify content is as expected.
Correction: Close and reopen instead of rewinding, to catch the lock file being removed right after the open().
* Do database operation. * Remove lock file.
Addendum: The file is only removed iff the lock was taken successfully.
I'm not sure how well it works over a network. Maybe one or more fsyncs can be introduced to ensure the appending and reading is synchronized with the server?
participants (6)
-
Allan McRae
-
Dave Reisner
-
Jan Alexander Steffens
-
Martin Panter
-
Mattias Andrée
-
Mattias Andrée