[pacman-dev] [PATCH] get_filename : use the FILENAME field from the db.
This fixes FS#9547. The get_filename function tried to construct the filename from name, version and arch, instead of directly using the filename field from the database, and got it wrong in most cases (with both core/extra and community repo). Since this field was introduced in 3.0, it should be safe to use it now in 3.1. NOTE: pactest will need to be fixed since it currently doesn't generate the filename field. Signed-off-by: Chantry Xavier <shiningxc@gmail.com> --- lib/libalpm/package.c | 20 +++++++------------- lib/libalpm/sync.c | 11 ++++++++++- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c index 937ee3e..d3351a4 100644 --- a/lib/libalpm/package.c +++ b/lib/libalpm/package.c @@ -107,6 +107,7 @@ int SYMEXPORT alpm_pkg_checkmd5sum(pmpkg_t *pkg) char *fpath; char *md5sum = NULL; int retval = 0; + const char *filename; ALPM_LOG_FUNC; @@ -115,7 +116,10 @@ int SYMEXPORT alpm_pkg_checkmd5sum(pmpkg_t *pkg) ASSERT(pkg->origin == PKG_FROM_CACHE, RET_ERR(PM_ERR_PKG_INVALID, -1)); ASSERT(pkg->origin_data.db != handle->db_local, RET_ERR(PM_ERR_PKG_INVALID, -1)); - fpath = _alpm_filecache_find(alpm_pkg_get_filename(pkg)); + filename = alpm_pkg_get_filename(pkg); + ASSERT(filename[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, -1)); + + fpath = _alpm_filecache_find(filename); md5sum = alpm_get_md5sum(fpath); if(md5sum == NULL) { @@ -162,18 +166,8 @@ const char SYMEXPORT *alpm_pkg_get_filename(pmpkg_t *pkg) ASSERT(handle != NULL, return(NULL)); ASSERT(pkg != NULL, return(NULL)); - if(!strlen(pkg->filename)) { - /* construct the file name, it's not in the desc file */ - if(pkg->origin == PKG_FROM_CACHE && !(pkg->infolevel & INFRQ_DESC)) { - _alpm_db_read(pkg->origin_data.db, pkg, INFRQ_DESC); - } - if(pkg->arch && strlen(pkg->arch) > 0) { - snprintf(pkg->filename, PKG_FILENAME_LEN, "%s-%s-%s" PKGEXT, - pkg->name, pkg->version, pkg->arch); - } else { - snprintf(pkg->filename, PKG_FILENAME_LEN, "%s-%s" PKGEXT, - pkg->name, pkg->version); - } + if(pkg->origin == PKG_FROM_CACHE && !(pkg->infolevel & INFRQ_DESC)) { + _alpm_db_read(pkg->origin_data.db, pkg, INFRQ_DESC); } return pkg->filename; diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index ced20c5..e03a757 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -689,6 +689,7 @@ static alpm_list_t *pkg_upgrade_delta_path(pmpkg_t *newpkg, pmdb_t *db_local) if(oldpkg) { const char *oldname = alpm_pkg_get_filename(oldpkg); + ASSERT(oldname[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, NULL)); char *oldpath = _alpm_filecache_find(oldname); if(oldpath) { @@ -726,7 +727,9 @@ static alpm_list_t *pkg_upgrade_delta_path(pmpkg_t *newpkg, pmdb_t *db_local) */ unsigned long SYMEXPORT alpm_pkg_download_size(pmpkg_t *newpkg, pmdb_t *db_local) { - char *fpath = _alpm_filecache_find(alpm_pkg_get_filename(newpkg)); + const char *fname = alpm_pkg_get_filename(newpkg); + ASSERT(fname[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, 0)); + char *fpath = _alpm_filecache_find(fname); unsigned long size = 0; if(fpath) { @@ -950,6 +953,8 @@ static int test_pkg_md5sum(pmtrans_t *trans, pmpkg_t *pkg, alpm_list_t **data) int ret = 0; filename = alpm_pkg_get_filename(pkg); + ASSERT(filename[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, 1)); + md5sum = alpm_pkg_get_md5sum(pkg); ret = test_md5sum(trans, filename, md5sum, data); @@ -995,6 +1000,7 @@ int _alpm_sync_commit(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t **data) const char *fname = NULL; fname = alpm_pkg_get_filename(spkg); + ASSERT(fname[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, -1)); if(trans->flags & PM_TRANS_FLAG_PRINTURIS) { EVENT(trans, PM_TRANS_EVT_PRINTURI, (char *)alpm_db_get_url(current), (char *)fname); @@ -1187,6 +1193,9 @@ int _alpm_sync_commit(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t **data) char *fpath; fname = alpm_pkg_get_filename(spkg); + if(fname[0] == '\0') { + goto error; + } /* Loop through the cache dirs until we find a matching file */ fpath = _alpm_filecache_find(fname); -- 1.5.4
This fixes FS#9547. The get_filename function tried to construct the filename from name, version and arch, instead of directly using the filename field from the database, and got it wrong in most cases (with both core/extra and community repo). Since this field was introduced in 3.0, it should be safe to use it now in 3.1. Added a quick fix to pactest to also generate the FILENAME field in its sync databases. Signed-off-by: Chantry Xavier <shiningxc@gmail.com> --- lib/libalpm/package.c | 20 +++++++------------- lib/libalpm/sync.c | 11 ++++++++++- pactest/pmdb.py | 2 ++ 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c index 937ee3e..d3351a4 100644 --- a/lib/libalpm/package.c +++ b/lib/libalpm/package.c @@ -107,6 +107,7 @@ int SYMEXPORT alpm_pkg_checkmd5sum(pmpkg_t *pkg) char *fpath; char *md5sum = NULL; int retval = 0; + const char *filename; ALPM_LOG_FUNC; @@ -115,7 +116,10 @@ int SYMEXPORT alpm_pkg_checkmd5sum(pmpkg_t *pkg) ASSERT(pkg->origin == PKG_FROM_CACHE, RET_ERR(PM_ERR_PKG_INVALID, -1)); ASSERT(pkg->origin_data.db != handle->db_local, RET_ERR(PM_ERR_PKG_INVALID, -1)); - fpath = _alpm_filecache_find(alpm_pkg_get_filename(pkg)); + filename = alpm_pkg_get_filename(pkg); + ASSERT(filename[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, -1)); + + fpath = _alpm_filecache_find(filename); md5sum = alpm_get_md5sum(fpath); if(md5sum == NULL) { @@ -162,18 +166,8 @@ const char SYMEXPORT *alpm_pkg_get_filename(pmpkg_t *pkg) ASSERT(handle != NULL, return(NULL)); ASSERT(pkg != NULL, return(NULL)); - if(!strlen(pkg->filename)) { - /* construct the file name, it's not in the desc file */ - if(pkg->origin == PKG_FROM_CACHE && !(pkg->infolevel & INFRQ_DESC)) { - _alpm_db_read(pkg->origin_data.db, pkg, INFRQ_DESC); - } - if(pkg->arch && strlen(pkg->arch) > 0) { - snprintf(pkg->filename, PKG_FILENAME_LEN, "%s-%s-%s" PKGEXT, - pkg->name, pkg->version, pkg->arch); - } else { - snprintf(pkg->filename, PKG_FILENAME_LEN, "%s-%s" PKGEXT, - pkg->name, pkg->version); - } + if(pkg->origin == PKG_FROM_CACHE && !(pkg->infolevel & INFRQ_DESC)) { + _alpm_db_read(pkg->origin_data.db, pkg, INFRQ_DESC); } return pkg->filename; diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index ced20c5..e03a757 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -689,6 +689,7 @@ static alpm_list_t *pkg_upgrade_delta_path(pmpkg_t *newpkg, pmdb_t *db_local) if(oldpkg) { const char *oldname = alpm_pkg_get_filename(oldpkg); + ASSERT(oldname[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, NULL)); char *oldpath = _alpm_filecache_find(oldname); if(oldpath) { @@ -726,7 +727,9 @@ static alpm_list_t *pkg_upgrade_delta_path(pmpkg_t *newpkg, pmdb_t *db_local) */ unsigned long SYMEXPORT alpm_pkg_download_size(pmpkg_t *newpkg, pmdb_t *db_local) { - char *fpath = _alpm_filecache_find(alpm_pkg_get_filename(newpkg)); + const char *fname = alpm_pkg_get_filename(newpkg); + ASSERT(fname[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, 0)); + char *fpath = _alpm_filecache_find(fname); unsigned long size = 0; if(fpath) { @@ -950,6 +953,8 @@ static int test_pkg_md5sum(pmtrans_t *trans, pmpkg_t *pkg, alpm_list_t **data) int ret = 0; filename = alpm_pkg_get_filename(pkg); + ASSERT(filename[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, 1)); + md5sum = alpm_pkg_get_md5sum(pkg); ret = test_md5sum(trans, filename, md5sum, data); @@ -995,6 +1000,7 @@ int _alpm_sync_commit(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t **data) const char *fname = NULL; fname = alpm_pkg_get_filename(spkg); + ASSERT(fname[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, -1)); if(trans->flags & PM_TRANS_FLAG_PRINTURIS) { EVENT(trans, PM_TRANS_EVT_PRINTURI, (char *)alpm_db_get_url(current), (char *)fname); @@ -1187,6 +1193,9 @@ int _alpm_sync_commit(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t **data) char *fpath; fname = alpm_pkg_get_filename(spkg); + if(fname[0] == '\0') { + goto error; + } /* Loop through the cache dirs until we find a matching file */ fpath = _alpm_filecache_find(fname); diff --git a/pactest/pmdb.py b/pactest/pmdb.py index af39200..d0a4f6d 100755 --- a/pactest/pmdb.py +++ b/pactest/pmdb.py @@ -261,6 +261,8 @@ class pmdb: if pkg.reason: data.append(_mksection("REASON", pkg.reason)) else: + if pkg.filename(): + data.append(_mksection("FILENAME", pkg.filename())) if pkg.replaces: data.append(_mksection("REPLACES", pkg.replaces)) if pkg.force: -- 1.5.4
On Feb 13, 2008 5:06 PM, Chantry Xavier <shiningxc@gmail.com> wrote:
This fixes FS#9547.
The get_filename function tried to construct the filename from name, version and arch, instead of directly using the filename field from the database, and got it wrong in most cases (with both core/extra and community repo).
Since this field was introduced in 3.0, it should be safe to use it now in 3.1.
Added a quick fix to pactest to also generate the FILENAME field in its sync databases.
Signed-off-by: Chantry Xavier <shiningxc@gmail.com> --- lib/libalpm/package.c | 20 +++++++------------- lib/libalpm/sync.c | 11 ++++++++++- pactest/pmdb.py | 2 ++ 3 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c index 937ee3e..d3351a4 100644 --- a/lib/libalpm/package.c +++ b/lib/libalpm/package.c @@ -107,6 +107,7 @@ int SYMEXPORT alpm_pkg_checkmd5sum(pmpkg_t *pkg) char *fpath; char *md5sum = NULL; int retval = 0; + const char *filename; Super small issue, but want to put all the char declarations are on consecutive lines, followed by the int one? Keeps the logically grouped a bit better.
ALPM_LOG_FUNC;
@@ -115,7 +116,10 @@ int SYMEXPORT alpm_pkg_checkmd5sum(pmpkg_t *pkg) ASSERT(pkg->origin == PKG_FROM_CACHE, RET_ERR(PM_ERR_PKG_INVALID, -1)); ASSERT(pkg->origin_data.db != handle->db_local, RET_ERR(PM_ERR_PKG_INVALID, -1));
- fpath = _alpm_filecache_find(alpm_pkg_get_filename(pkg)); + filename = alpm_pkg_get_filename(pkg); + ASSERT(filename[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, -1)); + + fpath = _alpm_filecache_find(filename);
Good.
md5sum = alpm_get_md5sum(fpath);
if(md5sum == NULL) { @@ -162,18 +166,8 @@ const char SYMEXPORT *alpm_pkg_get_filename(pmpkg_t *pkg) ASSERT(handle != NULL, return(NULL)); ASSERT(pkg != NULL, return(NULL));
- if(!strlen(pkg->filename)) { - /* construct the file name, it's not in the desc file */ - if(pkg->origin == PKG_FROM_CACHE && !(pkg->infolevel & INFRQ_DESC)) { - _alpm_db_read(pkg->origin_data.db, pkg, INFRQ_DESC); - } - if(pkg->arch && strlen(pkg->arch) > 0) { - snprintf(pkg->filename, PKG_FILENAME_LEN, "%s-%s-%s" PKGEXT, - pkg->name, pkg->version, pkg->arch); - } else { - snprintf(pkg->filename, PKG_FILENAME_LEN, "%s-%s" PKGEXT, - pkg->name, pkg->version); - } + if(pkg->origin == PKG_FROM_CACHE && !(pkg->infolevel & INFRQ_DESC)) { + _alpm_db_read(pkg->origin_data.db, pkg, INFRQ_DESC); }
Looks great.
return pkg->filename; diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index ced20c5..e03a757 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -689,6 +689,7 @@ static alpm_list_t *pkg_upgrade_delta_path(pmpkg_t *newpkg, pmdb_t *db_local)
if(oldpkg) { const char *oldname = alpm_pkg_get_filename(oldpkg); + ASSERT(oldname[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, NULL));
Hmm. I wonder if we need some more testing here. Did this never return the empty string in any pactests, or anything else? I also don't know that a locally installed package will store what filename it was installed from. I wonder if we just broke all of the delta assumptions. Or what is 'oldpkg' here?
char *oldpath = _alpm_filecache_find(oldname);
if(oldpath) { @@ -726,7 +727,9 @@ static alpm_list_t *pkg_upgrade_delta_path(pmpkg_t *newpkg, pmdb_t *db_local) */ unsigned long SYMEXPORT alpm_pkg_download_size(pmpkg_t *newpkg, pmdb_t *db_local) { - char *fpath = _alpm_filecache_find(alpm_pkg_get_filename(newpkg)); + const char *fname = alpm_pkg_get_filename(newpkg); + ASSERT(fname[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, 0)); + char *fpath = _alpm_filecache_find(fname); unsigned long size = 0;
Good.
if(fpath) { @@ -950,6 +953,8 @@ static int test_pkg_md5sum(pmtrans_t *trans, pmpkg_t *pkg, alpm_list_t **data) int ret = 0;
filename = alpm_pkg_get_filename(pkg); + ASSERT(filename[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, 1)); + md5sum = alpm_pkg_get_md5sum(pkg);
ret = test_md5sum(trans, filename, md5sum, data); @@ -995,6 +1000,7 @@ int _alpm_sync_commit(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t **data) const char *fname = NULL;
fname = alpm_pkg_get_filename(spkg); + ASSERT(fname[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, -1)); if(trans->flags & PM_TRANS_FLAG_PRINTURIS) { EVENT(trans, PM_TRANS_EVT_PRINTURI, (char *)alpm_db_get_url(current), (char *)fname); @@ -1187,6 +1193,9 @@ int _alpm_sync_commit(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t **data) char *fpath;
fname = alpm_pkg_get_filename(spkg); + if(fname[0] == '\0') { + goto error; + } /* Loop through the cache dirs until we find a matching file */ fpath = _alpm_filecache_find(fname);
diff --git a/pactest/pmdb.py b/pactest/pmdb.py index af39200..d0a4f6d 100755 --- a/pactest/pmdb.py +++ b/pactest/pmdb.py @@ -261,6 +261,8 @@ class pmdb: if pkg.reason: data.append(_mksection("REASON", pkg.reason)) else: + if pkg.filename(): + data.append(_mksection("FILENAME", pkg.filename())) if pkg.replaces: data.append(_mksection("REPLACES", pkg.replaces)) if pkg.force:
-- 1.5.4
This is the first step of fixing FS#9547. This should not break any existing code that may rely on this function behaving the way it did, and should be good for inclusion in a maint release. In addition, update pactest so it fills the FILENAME field in the DB entries it creates so we can move forward with a real fix to this issue. Signed-off-by: Dan McGee <dan@archlinux.org> --- How about this for a maint release fix? Then we can further evaluate if necessary for either 3.1.3 or 3.2.0. Of course, if we can address all of the issues I brought up in my last email regarding your patch, then we can try and fully fix the problem in 3.1.2. This just seems like a safer road to take when we aren't going to be able to get a lot of testing in. -Dan lib/libalpm/be_files.c | 4 ---- lib/libalpm/package.c | 7 ++++--- pactest/pmdb.py | 1 + 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/libalpm/be_files.c b/lib/libalpm/be_files.c index 724e3c8..4cd0985 100644 --- a/lib/libalpm/be_files.c +++ b/lib/libalpm/be_files.c @@ -297,10 +297,6 @@ int _alpm_db_read(pmdb_t *db, pmpkg_t *info, pmdbinfrq_t inforeq) } _alpm_strtrim(line); if(!strcmp(line, "%FILENAME%")) { - /* filename is _new_ - it provides the real name of the package, on the - * server, to allow for us to not tie the name of the actual file to the - * data of the package - */ if(fgets(info->filename, sizeof(info->filename), fp) == NULL) { goto error; } diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c index 937ee3e..363cf31 100644 --- a/lib/libalpm/package.c +++ b/lib/libalpm/package.c @@ -162,11 +162,12 @@ const char SYMEXPORT *alpm_pkg_get_filename(pmpkg_t *pkg) ASSERT(handle != NULL, return(NULL)); ASSERT(pkg != NULL, return(NULL)); + if(pkg->origin == PKG_FROM_CACHE && !(pkg->infolevel & INFRQ_DESC)) { + _alpm_db_read(pkg->origin_data.db, pkg, INFRQ_DESC); + } + if(!strlen(pkg->filename)) { /* construct the file name, it's not in the desc file */ - if(pkg->origin == PKG_FROM_CACHE && !(pkg->infolevel & INFRQ_DESC)) { - _alpm_db_read(pkg->origin_data.db, pkg, INFRQ_DESC); - } if(pkg->arch && strlen(pkg->arch) > 0) { snprintf(pkg->filename, PKG_FILENAME_LEN, "%s-%s-%s" PKGEXT, pkg->name, pkg->version, pkg->arch); diff --git a/pactest/pmdb.py b/pactest/pmdb.py index af39200..e0f328e 100755 --- a/pactest/pmdb.py +++ b/pactest/pmdb.py @@ -261,6 +261,7 @@ class pmdb: if pkg.reason: data.append(_mksection("REASON", pkg.reason)) else: + data.append(_mksection("FILENAME", pkg.filename())) if pkg.replaces: data.append(_mksection("REPLACES", pkg.replaces)) if pkg.force: -- 1.5.4.1
Dan McGee wrote:
This is the first step of fixing FS#9547. This should not break any existing code that may rely on this function behaving the way it did, and should be good for inclusion in a maint release.
In addition, update pactest so it fills the FILENAME field in the DB entries it creates so we can move forward with a real fix to this issue.
Signed-off-by: Dan McGee<dan@archlinux.org> ---
How about this for a maint release fix? Then we can further evaluate if necessary for either 3.1.3 or 3.2.0. Of course, if we can address all of the issues I brought up in my last email regarding your patch, then we can try and fully fix the problem in 3.1.2. This just seems like a safer road to take when we aren't going to be able to get a lot of testing in.
Actually, my patch is not acceptable. I totally overlooked a really crap fact : there are a HUGE number of packages without the FILENAME field in the databases (not community, but all others) find /var/lib/pacman/sync/ -name desc | wc -l 4274 grep -r FILENAME /var/lib/pacman/sync/ | wc -l 3376 900 packages without it. Let's look at one of these : /var/lib/pacman/sync/unstable/at76c503a-cvs-20061223-1/desc In that desc file, we have NAME and VERSION, but not ARCH, so the filename generated is : at76c503a-cvs-20061223-1.pkg.tar.gz And actually, that's the real filename which exists on the mirrors. So we have two kind of entries : entries without filenames (their real filenames don't have the -arch suffix) and entries with filenames (which have the -arch suffix). So the current filename fallback can't be removed at all. It actually works for the entries without filenames, and there are a lot of them . The only correct way for dealing with this is exactly what your patch does. First load the desc level, then use the filename field if it exists. Otherwise fallback.
Dan McGee wrote:
return pkg->filename; diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index ced20c5..e03a757 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -689,6 +689,7 @@ static alpm_list_t *pkg_upgrade_delta_path(pmpkg_t *newpkg, pmdb_t *db_local)
if(oldpkg) { const char *oldname = alpm_pkg_get_filename(oldpkg); + ASSERT(oldname[0] != '\0', RET_ERR(PM_ERR_PKG_INVALID_NAME, NULL));
Hmm. I wonder if we need some more testing here. Did this never return the empty string in any pactests, or anything else? I also don't know that a locally installed package will store what filename it was installed from. I wonder if we just broke all of the delta assumptions. Or what is 'oldpkg' here?
Oh crap, but that means we have a bigger issue here. The delta code needs the filename of the locally installed package, and we don't store that filename in the local db. So it actually relies on the broken way to construct the filename. Well, it looks like it works in a default database generated by repo-add (where filename = name-version-arch.pkgext), so that probably explains why it worked fine with pacman-git and delta-test repo. However, all official databases (core,extra,testing,unstable,community) breaks this assumption for most packages.
Xavier wrote:
Oh crap, but that means we have a bigger issue here. The delta code needs the filename of the locally installed package, and we don't store that filename in the local db. So it actually relies on the broken way to construct the filename. Well, it looks like it works in a default database generated by repo-add (where filename = name-version-arch.pkgext), so that probably explains why it worked fine with pacman-git and delta-test repo. However, all official databases (core,extra,testing,unstable,community) breaks this assumption for most packages.
As Dan suggested off list, the delta machine should probably work on filenames rather than just versions (in the delta metainfo file from the sync databases for a start). That way we don't need to store filenames in the local db (which wouldn't be used otherwise anyway).
On Thu, Feb 14, 2008 at 07:53:02PM +0100, Xavier wrote:
Xavier wrote:
Oh crap, but that means we have a bigger issue here. The delta code needs the filename of the locally installed package, and we don't store that filename in the local db. So it actually relies on the broken way to construct the filename. Well, it looks like it works in a default database generated by repo-add (where filename = name-version-arch.pkgext), so that probably explains why it worked fine with pacman-git and delta-test repo. However, all official databases (core,extra,testing,unstable,community) breaks this assumption for most packages.
As Dan suggested off list, the delta machine should probably work on filenames rather than just versions (in the delta metainfo file from the sync databases for a start). That way we don't need to store filenames in the local db (which wouldn't be used otherwise anyway).
I suppose so. Doing so would also allow us to check the md5sum of the previous package to make sure the delta would apply correctly before downloading.
On Thu, Feb 14, 2008 at 09:14:40PM -0500, Nathan Jones wrote:
On Thu, Feb 14, 2008 at 07:53:02PM +0100, Xavier wrote:
As Dan suggested off list, the delta machine should probably work on filenames rather than just versions (in the delta metainfo file from the sync databases for a start). That way we don't need to store filenames in the local db (which wouldn't be used otherwise anyway).
I suppose so. Doing so would also allow us to check the md5sum of the previous package to make sure the delta would apply correctly before downloading.
That's if we store both filenames and md5sums in the delta metainfo file? I think that's what Dan suggested anyway. Btw, if you are interested in doing that, it would be much appreciated :) Since it's your code after all, I think you would be more efficient. But there is no hurry anyway since deltas aren't in use yet.
On Fri, Feb 15, 2008 at 05:37:39PM +0100, Xavier wrote:
Btw, if you are interested in doing that, it would be much appreciated :) Since it's your code after all, I think you would be more efficient. But there is no hurry anyway since deltas aren't in use yet.
Yes I was planning on doing it. I should have mentioned that in my email. This last week I've been working on fixing the gzip/md5sum problem with deltas in order to add delta creation to repo-add. I think I will post that code tonight and begin on this soon.
On Fri, Feb 15, 2008 at 1:08 PM, Nathan Jones <nathanj@insightbb.com> wrote:
On Fri, Feb 15, 2008 at 05:37:39PM +0100, Xavier wrote:
Btw, if you are interested in doing that, it would be much appreciated :) Since it's your code after all, I think you would be more efficient. But there is no hurry anyway since deltas aren't in use yet.
Yes I was planning on doing it. I should have mentioned that in my email.
This last week I've been working on fixing the gzip/md5sum problem with deltas in order to add delta creation to repo-add. I think I will post that code tonight and begin on this soon.
If you want any help or anything, feel free to ping me on IRC or jabber and we can work some details out or I can give you some immediate feedback on anything you have questions on. -Dan
On Fri, Feb 15, 2008 at 1:33 PM, Dan McGee <dpmcgee@gmail.com> wrote:
On Fri, Feb 15, 2008 at 1:08 PM, Nathan Jones <nathanj@insightbb.com> wrote:
On Fri, Feb 15, 2008 at 05:37:39PM +0100, Xavier wrote:
Btw, if you are interested in doing that, it would be much appreciated :) Since it's your code after all, I think you would be more efficient. But there is no hurry anyway since deltas aren't in use yet.
Yes I was planning on doing it. I should have mentioned that in my email.
This last week I've been working on fixing the gzip/md5sum problem with deltas in order to add delta creation to repo-add. I think I will post that code tonight and begin on this soon.
If you want any help or anything, feel free to ping me on IRC or jabber and we can work some details out or I can give you some immediate feedback on anything you have questions on.
So how about a delta file in the sync dbs like the following: %DELTAS% oldfile.pkg.tar.gz 000md5sum000 newfile.pkg.tar.gz 000md5sum000 deltafile.delta 000md5sum000 23423 # or in bash format $oldfile $oldmd5 $newfile $newmd5 $deltafile $deltamd5 $deltasize I think you have an existing algorithm in place that we can adapt to this. I'll psuedocode it: read in delta lines build a tree of possible delta paths from $syncfilename back to each $oldfile until we can't go further, or until we find an $oldfile that we can lstat and the md5sum matches. find shortest path of our possible paths use this delta path (and since we have md5sums to verify the whole way, we can be more sure before we start downloading that it will work) Note that this whole process never worries about parsing the filename into parts, or for that matter, even opening the package file and looking at PKGINFO. -Dan
On Fri, Feb 15, 2008 at 05:29:38PM -0600, Dan McGee wrote:
So how about a delta file in the sync dbs like the following:
%DELTAS% oldfile.pkg.tar.gz 000md5sum000 newfile.pkg.tar.gz 000md5sum000 deltafile.delta 000md5sum000 23423 # or in bash format $oldfile $oldmd5 $newfile $newmd5 $deltafile $deltamd5 $deltasize
I think you have an existing algorithm in place that we can adapt to this. I'll psuedocode it:
read in delta lines build a tree of possible delta paths from $syncfilename back to each $oldfile until we can't go further, or until we find an $oldfile that we can lstat and the md5sum matches. find shortest path of our possible paths use this delta path (and since we have md5sums to verify the whole way, we can be more sure before we start downloading that it will work)
Note that this whole process never worries about parsing the filename into parts, or for that matter, even opening the package file and looking at PKGINFO.
-Dan
I was going to keep the old and new version numbers and continue to use the existing algorithm. Adding a few functions should be all that is needed to fix the filename issue: alpm_delta_find_old_filename(delta_list, old_version) alpm_delta_get_old_filename(delta) alpm_delta_get_new_filename(delta) Your algorithm is more flexible since it can use any file in the cache directory even if the package is not installed. However, I don't think that flexibility would be needed in the majority of cases.
Start to move the delta struct away from an assumed package name scheme and towards something that is package (or even filename) agnostic. This will allow us much greater flexibility in the usage of deltas (maybe even sync DBs some day) as well as allowing code outside of delta.h/delta.c to be much cleaner with less of a need for snprintf() calls. Signed-off-by: Dan McGee <dan@archlinux.org> --- Mainly an RFC here, although I feel like this is a good approach to take. Note both my comments above in the commit message, as well as taking a look at how not trying to even worry about the filename simplifies the code in sync.c a good amount. No promises that this works yet. :) -Dan lib/libalpm/alpm.h | 4 ++- lib/libalpm/delta.c | 88 ++++++++++++++++++++++++-------------------------- lib/libalpm/delta.h | 15 +++++++-- lib/libalpm/sync.c | 31 +++++++---------- 4 files changed, 70 insertions(+), 68 deletions(-) diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index b19e1dd..528bac9 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h @@ -231,10 +231,12 @@ unsigned long alpm_pkg_download_size(pmpkg_t *newpkg, pmdb_t *db_local); */ const char *alpm_delta_get_from(pmdelta_t *delta); +const char *alpm_delta_get_from_md5sum(pmdelta_t *delta); const char *alpm_delta_get_to(pmdelta_t *delta); -unsigned long alpm_delta_get_size(pmdelta_t *delta); +const char *alpm_delta_get_to_md5sum(pmdelta_t *delta); const char *alpm_delta_get_filename(pmdelta_t *delta); const char *alpm_delta_get_md5sum(pmdelta_t *delta); +unsigned long alpm_delta_get_size(pmdelta_t *delta); /* * Groups diff --git a/lib/libalpm/delta.c b/lib/libalpm/delta.c index 9eb6e82..923e05f 100644 --- a/lib/libalpm/delta.c +++ b/lib/libalpm/delta.c @@ -37,60 +37,50 @@ const char SYMEXPORT *alpm_delta_get_from(pmdelta_t *delta) { - ALPM_LOG_FUNC; - - /* Sanity checks */ ASSERT(delta != NULL, return(NULL)); - return(delta->from); } -const char SYMEXPORT *alpm_delta_get_to(pmdelta_t *delta) +const char SYMEXPORT *alpm_delta_get_from_md5sum(pmdelta_t *delta) { - ALPM_LOG_FUNC; - - /* Sanity checks */ ASSERT(delta != NULL, return(NULL)); + return(delta->from_md5); +} +const char SYMEXPORT *alpm_delta_get_to(pmdelta_t *delta) +{ + ASSERT(delta != NULL, return(NULL)); return(delta->to); } -unsigned long SYMEXPORT alpm_delta_get_size(pmdelta_t *delta) +const char SYMEXPORT *alpm_delta_get_to_md5sum(pmdelta_t *delta) { - ALPM_LOG_FUNC; - - /* Sanity checks */ - ASSERT(delta != NULL, return(-1)); - - return(delta->size); + ASSERT(delta != NULL, return(NULL)); + return(delta->to_md5); } const char SYMEXPORT *alpm_delta_get_filename(pmdelta_t *delta) { - ALPM_LOG_FUNC; - - /* Sanity checks */ ASSERT(delta != NULL, return(NULL)); - - return(delta->filename); + return(delta->delta); } const char SYMEXPORT *alpm_delta_get_md5sum(pmdelta_t *delta) { - ALPM_LOG_FUNC; - - /* Sanity checks */ ASSERT(delta != NULL, return(NULL)); + return(delta->delta_md5); +} - return(delta->md5sum); +unsigned long SYMEXPORT alpm_delta_get_size(pmdelta_t *delta) +{ + ASSERT(delta != NULL, return(-1)); + return(delta->delta_size); } /** @} */ /** Calculates the combined size of a list of delta files. - * * @param deltas the list of pmdelta_t * objects - * * @return the combined size */ unsigned long _alpm_delta_path_size(alpm_list_t *deltas) @@ -100,7 +90,7 @@ unsigned long _alpm_delta_path_size(alpm_list_t *deltas) while(dlts) { pmdelta_t *d = alpm_list_getdata(dlts); - sum += d->size; + sum += d->delta_size; dlts = alpm_list_next(dlts); } @@ -110,9 +100,7 @@ unsigned long _alpm_delta_path_size(alpm_list_t *deltas) /** Calculates the combined size of a list of delta files that are not * in the cache. - * * @param deltas the list of pmdelta_t * objects - * * @return the combined size */ unsigned long _alpm_delta_path_size_uncached(alpm_list_t *deltas) @@ -122,10 +110,10 @@ unsigned long _alpm_delta_path_size_uncached(alpm_list_t *deltas) while(dlts) { pmdelta_t *d = alpm_list_getdata(dlts); - char *fname = _alpm_filecache_find(d->filename); + char *fname = _alpm_filecache_find(d->delta); if(!fname) { - sum += d->size; + sum += d->delta_size; } FREE(fname); @@ -137,17 +125,13 @@ unsigned long _alpm_delta_path_size_uncached(alpm_list_t *deltas) } /** Calculates the shortest path from one version to another. - * * The shortest path is defined as the path with the smallest combined * size, not the length of the path. - * * The algorithm is based on Dijkstra's shortest path algorithm. - * * @param deltas the list of pmdelta_t * objects that a package has * @param from the version to start from * @param to the version to end at * @param path the current path - * * @return the list of pmdelta_t * objects that has the smallest size. * NULL (the empty list) is returned if there is no path between the * versions. @@ -201,14 +185,11 @@ static alpm_list_t *shortest_delta_path(alpm_list_t *deltas, } /** Calculates the shortest path from one version to another. - * * The shortest path is defined as the path with the smallest combined * size, not the length of the path. - * * @param deltas the list of pmdelta_t * objects that a package has * @param from the version to start from * @param to the version to end at - * * @return the list of pmdelta_t * objects that has the smallest size. * NULL (the empty list) is returned if there is no path between the * versions. @@ -224,13 +205,13 @@ alpm_list_t *_alpm_shortest_delta_path(alpm_list_t *deltas, const char *from, } /** Parses the string representation of a pmdelta_t object. - * * This function assumes that the string is in the correct format. - * + * This format is as follows: + * $oldfile $oldmd5 $newfile $newmd5 $deltafile $deltamd5 $deltasize * @param line the string to parse - * * @return A pointer to the new pmdelta_t object */ +/* TODO this does not really belong here, but in a parsing lib */ pmdelta_t *_alpm_delta_parse(char *line) { pmdelta_t *delta; @@ -246,19 +227,32 @@ pmdelta_t *_alpm_delta_parse(char *line) tmp2 = tmp; tmp = strchr(tmp, ' '); *(tmp++) = '\0'; + STRDUP(delta->from_md5, tmp2, RET_ERR(PM_ERR_MEMORY, NULL)); + + tmp2 = tmp; + tmp = strchr(tmp, ' '); + *(tmp++) = '\0'; STRDUP(delta->to, tmp2, RET_ERR(PM_ERR_MEMORY, NULL)); tmp2 = tmp; tmp = strchr(tmp, ' '); *(tmp++) = '\0'; - delta->size = atol(tmp2); + STRDUP(delta->to_md5, tmp2, RET_ERR(PM_ERR_MEMORY, NULL)); tmp2 = tmp; tmp = strchr(tmp, ' '); *(tmp++) = '\0'; - STRDUP(delta->filename, tmp2, RET_ERR(PM_ERR_MEMORY, NULL)); + STRDUP(delta->delta, tmp2, RET_ERR(PM_ERR_MEMORY, NULL)); - STRDUP(delta->md5sum, tmp, RET_ERR(PM_ERR_MEMORY, NULL)); + tmp2 = tmp; + tmp = strchr(tmp, ' '); + *(tmp++) = '\0'; + STRDUP(delta->delta_md5, tmp2, RET_ERR(PM_ERR_MEMORY, NULL)); + + tmp2 = tmp; + tmp = strchr(tmp, ' '); + *(tmp++) = '\0'; + delta->delta_size = atol(tmp2); return(delta); } @@ -266,9 +260,11 @@ pmdelta_t *_alpm_delta_parse(char *line) void _alpm_delta_free(pmdelta_t *delta) { FREE(delta->from); + FREE(delta->from_md5); FREE(delta->to); - FREE(delta->filename); - FREE(delta->md5sum); + FREE(delta->to_md5); + FREE(delta->delta); + FREE(delta->delta_md5); FREE(delta); } diff --git a/lib/libalpm/delta.h b/lib/libalpm/delta.h index 007e5d4..a2ac5f0 100644 --- a/lib/libalpm/delta.h +++ b/lib/libalpm/delta.h @@ -22,11 +22,20 @@ #include "alpm.h" struct __pmdelta_t { + /** filename of the 'before' file */ char *from; + /** md5sum of the 'before' file */ + char *from_md5; + /** filename of the 'after' file */ char *to; - unsigned long size; - char *filename; - char *md5sum; + /** md5sum of the 'after' file */ + char *to_md5; + /** filename of the delta patch */ + char *delta; + /** md5sum of the delta file */ + char *delta_md5; + /** filesize of the delta file */ + unsigned long delta_size; }; unsigned long _alpm_delta_path_size(alpm_list_t *deltas); diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index 4101158..3e343ff 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -720,7 +720,6 @@ static int apply_deltas(pmtrans_t *trans, alpm_list_t *patches) pmpkg_t *pkg; pmdelta_t *d; char command[PATH_MAX], fname[PATH_MAX]; - char pkgfilename[PATH_MAX]; pkg = alpm_list_getdata(p); p = alpm_list_next(p); @@ -745,26 +744,23 @@ static int apply_deltas(pmtrans_t *trans, alpm_list_t *patches) /* build the patch command */ snprintf(command, PATH_MAX, - "xdelta patch" /* the command */ - " %s/%s" /* the delta */ - " %s/%s-%s-%s" PKGEXT /* the 'from' package */ - " %s/%s-%s-%s" PKGEXT, /* the 'to' package */ - cachedir, d->filename, - cachedir, pkg->name, d->from, pkg->arch, - cachedir, pkg->name, d->to, pkg->arch); + "xdelta patch" /* the command */ + " %s/%s" /* the delta */ + " %s/%s" /* the 'from' package */ + " %s/%s", /* the 'to' package */ + cachedir, d->delta, + cachedir, d->from, + cachedir, d->to); _alpm_log(PM_LOG_DEBUG, _("command: %s\n"), command); - snprintf(pkgfilename, PATH_MAX, "%s-%s-%s" PKGEXT, - pkg->name, d->to, pkg->arch); - - EVENT(trans, PM_TRANS_EVT_DELTA_PATCH_START, pkgfilename, d->filename); + EVENT(trans, PM_TRANS_EVT_DELTA_PATCH_START, d->to, d->delta); if(system(command) == 0) { EVENT(trans, PM_TRANS_EVT_DELTA_PATCH_DONE, NULL, NULL); /* delete the delta file */ - snprintf(fname, PATH_MAX, "%s/%s", cachedir, d->filename); + snprintf(fname, PATH_MAX, "%s/%s", cachedir, d->delta); unlink(fname); /* Delete the 'from' package but only if it is an intermediate @@ -772,8 +768,7 @@ static int apply_deltas(pmtrans_t *trans, alpm_list_t *patches) * as if deltas were not used. Delete the package file if the * previous iteration of the loop used the same package. */ if(pkg == lastpkg) { - snprintf(fname, PATH_MAX, "%s/%s-%s-%s" PKGEXT, - cachedir, pkg->name, d->from, pkg->arch); + snprintf(fname, PATH_MAX, "%s/%s", cachedir, d->from); unlink(fname); } else { lastpkg = pkg; @@ -953,12 +948,12 @@ int _alpm_sync_commit(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t **data) for(dlts = delta_path; dlts; dlts = alpm_list_next(dlts)) { pmdelta_t *d = (pmdelta_t *)alpm_list_getdata(dlts); - char *fpath2 = _alpm_filecache_find(d->filename); + char *fpath2 = _alpm_filecache_find(d->delta); if(!fpath2) { /* add the delta filename to the download list if - * it's not in the cache*/ - files = alpm_list_add(files, strdup(d->filename)); + * it's not in the cache */ + files = alpm_list_add(files, strdup(d->delta)); } /* save the package and delta so that the xdelta patch -- 1.5.4.1
On Feb 15, 2008 8:27 PM, Dan McGee <dan@archlinux.org> wrote:
Start to move the delta struct away from an assumed package name scheme and towards something that is package (or even filename) agnostic. This will allow us much greater flexibility in the usage of deltas (maybe even sync DBs some day) as well as allowing code outside of delta.h/delta.c to be much cleaner with less of a need for snprintf() calls.
Signed-off-by: Dan McGee <dan@archlinux.org> ---
Mainly an RFC here, although I feel like this is a good approach to take. Note both my comments above in the commit message, as well as taking a look at how not trying to even worry about the filename simplifies the code in sync.c a good amount. No promises that this works yet. :)
-Dan
And a whole bunch more work since this patch: http://code.toofishes.net/gitweb.cgi?p=pacman.git;a=shortlog;h=refs/heads/re... Mainly this guy: http://code.toofishes.net/gitweb.cgi?p=pacman.git;a=commitdiff;h=afbf3f4058b... This stuff is not clean- its rough work. It will need some cleaning up. I'm hoping to get some feedback though. -Dan
On Fri, Feb 15, 2008 at 08:27:18PM -0600, Dan McGee wrote:
Note both my comments above in the commit message, as well as taking a look at how not trying to even worry about the filename simplifies the code in sync.c a good amount.
The simplification is very nice. This will allow us to completely get rid of the patches list, which I always thought was a bit hacky. The main reason I wanted to keep the old version in addition to the filename is for the function pkg_upgrade_delta_path: static alpm_list_t *pkg_upgrade_delta_path(pmpkg_t *newpkg, pmdb_t *db_local) { pmpkg_t *oldpkg = alpm_db_get_pkg(db_local, newpkg->name); alpm_list_t *ret = NULL; if(oldpkg) { const char *oldname = alpm_pkg_get_filename(oldpkg); char *oldpath = _alpm_filecache_find(oldname); if(oldpath) { // delta stuff } } return(ret); } The line const char *oldname = alpm_pkg_get_filename(oldpkg); would change to const char *oldname = alpm_deltas_find_old_filename(pkg->deltas, oldpkg->version); and everything would work as before.
@@ -246,19 +227,32 @@ pmdelta_t *_alpm_delta_parse(char *line) tmp2 = tmp; tmp = strchr(tmp, ' '); *(tmp++) = '\0'; + STRDUP(delta->from_md5, tmp2, RET_ERR(PM_ERR_MEMORY, NULL)); + + tmp2 = tmp; + tmp = strchr(tmp, ' '); + *(tmp++) = '\0'; STRDUP(delta->to, tmp2, RET_ERR(PM_ERR_MEMORY, NULL));
tmp2 = tmp; tmp = strchr(tmp, ' '); *(tmp++) = '\0'; - delta->size = atol(tmp2); + STRDUP(delta->to_md5, tmp2, RET_ERR(PM_ERR_MEMORY, NULL));
tmp2 = tmp; tmp = strchr(tmp, ' '); *(tmp++) = '\0'; - STRDUP(delta->filename, tmp2, RET_ERR(PM_ERR_MEMORY, NULL)); + STRDUP(delta->delta, tmp2, RET_ERR(PM_ERR_MEMORY, NULL));
- STRDUP(delta->md5sum, tmp, RET_ERR(PM_ERR_MEMORY, NULL)); + tmp2 = tmp; + tmp = strchr(tmp, ' '); + *(tmp++) = '\0'; + STRDUP(delta->delta_md5, tmp2, RET_ERR(PM_ERR_MEMORY, NULL)); + + tmp2 = tmp; + tmp = strchr(tmp, ' '); + *(tmp++) = '\0'; + delta->delta_size = atol(tmp2);
return(delta); }
So you want a space added to the end of every delta line? Because this is going to crash if not. Just making sure it's not a copy/paste problem.
On Feb 16, 2008 11:22 AM, Nathan Jones <nathanj@insightbb.com> wrote:
On Fri, Feb 15, 2008 at 08:27:18PM -0600, Dan McGee wrote:
Note both my comments above in the commit message, as well as taking a look at how not trying to even worry about the filename simplifies the code in sync.c a good amount.
The simplification is very nice. This will allow us to completely get rid of the patches list, which I always thought was a bit hacky.
The main reason I wanted to keep the old version in addition to the filename is for the function pkg_upgrade_delta_path:
static alpm_list_t *pkg_upgrade_delta_path(pmpkg_t *newpkg, pmdb_t *db_local) <snip>
Take a look here: http://code.toofishes.net/gitweb.cgi?p=pacman.git;a=commitdiff;h=afbf3f4058b... I reworked this function quite a bit in my "second" patch.
+ tmp2 = tmp; + tmp = strchr(tmp, ' '); + *(tmp++) = '\0'; + delta->delta_size = atol(tmp2);
return(delta); }
Yeah, I like segfaults. :P Also fixed in my second patch, although we should probably make this a bit more resistant to malformed entries. -Dan
participants (5)
-
Chantry Xavier
-
Dan McGee
-
Dan McGee
-
Nathan Jones
-
Xavier