[arch-projects] [dbscripts] [PATCH 1/4] tests: make dummy copies of all pkgpool packages in the test environment
Prerequisite for reproducible archives of packages. Signed-off-by: Eli Schwartz <eschwartz@archlinux.org> --- test/lib/common.bash | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/test/lib/common.bash b/test/lib/common.bash index bc2b4e6d..3dda5f62 100644 --- a/test/lib/common.bash +++ b/test/lib/common.bash @@ -102,6 +102,7 @@ setup() { SVNREPO="file://${TMP}/svn-packages-repo" PKGREPOS=('core' 'extra' 'testing') PKGPOOL='pool/packages' + EXTRA_PKGPOOLS=('pool/community') SRCPOOL='sources/packages' TESTING_REPO='testing' STABLE_REPOS=('core' 'extra') @@ -123,9 +124,18 @@ eot mkdir -p "${TMP}/ftp/${r}/os/${a}" done done - mkdir -p "${TMP}/ftp/${PKGPOOL}" + mkdir -p "${TMP}/ftp/${PKGPOOL}"{,-reproducible} + mkdir -p "${TMP}/ftp/${EXTRA_PKGPOOLS[0]}" mkdir -p "${TMP}/ftp/${SRCPOOL}" + # make dummy packages for "reproducibility" + comm -12 <(pacman -Sql core extra | sort -u) <(pacman -Qq | sort -u) | pacman -Sddp - | while read -r line; do + touch "${FTP_BASE}/${PKGPOOL}/${line##*/}"{,.sig} + done + comm -12 <(pacman -Sql community | sort -u) <(pacman -Qq | sort -u) | pacman -Sddp - | while read -r line; do + touch "${FTP_BASE}/${EXTRA_PKGPOOLS[0]}/${line##*/}"{,.sig} + done + svnadmin create "${TMP}/svn-packages-repo" svn checkout -q "file://${TMP}/svn-packages-repo" "${TMP}/svn-packages-copy" } -- 2.19.2
Whenever adding new package files to the pool of distributed packages, hardlink a copy of every package it was built with, into a "reproducible" pool, and log which file required it. Signed-off-by: Eli Schwartz <eschwartz@archlinux.org> --- config | 1 + config.local.svn-community | 1 + config.local.svn-packages | 1 + db-functions | 49 +++++++++++++++++++++++++++++++------- db-update | 4 ++++ 5 files changed, 48 insertions(+), 8 deletions(-) diff --git a/config b/config index 1cfe11f4..5144fca7 100644 --- a/config +++ b/config @@ -3,6 +3,7 @@ FTP_BASE="/srv/ftp" PKGREPOS=() PKGPOOL='' +EXTRA_PKGPOOLS=() SRCPOOL='' TESTING_REPO='' STABLE_REPOS=() diff --git a/config.local.svn-community b/config.local.svn-community index 5d61b5ea..15bcc17f 100644 --- a/config.local.svn-community +++ b/config.local.svn-community @@ -2,6 +2,7 @@ PKGREPOS=('community' 'community-testing' 'community-staging' 'multilib' 'multilib-testing' 'multilib-staging') PKGPOOL='pool/community' +EXTRA_PKGPOOLS=('pool/packages') SRCPOOL='sources/community' SVNREPO='file:///srv/repos/svn-community/svn' SVNUSER='svn-community' diff --git a/config.local.svn-packages b/config.local.svn-packages index 34aab35c..75986b65 100644 --- a/config.local.svn-packages +++ b/config.local.svn-packages @@ -2,6 +2,7 @@ PKGREPOS=('core' 'extra' 'testing' 'staging' 'kde-unstable' 'gnome-unstable') PKGPOOL='pool/packages' +EXTRA_PKGPOOLS=('pool/community') SRCPOOL='sources/packages' SVNREPO='file:///srv/repos/svn-packages/svn' SVNUSER='svn-packages' diff --git a/db-functions b/db-functions index 7aeedced..2b1ae87a 100644 --- a/db-functions +++ b/db-functions @@ -165,20 +165,23 @@ repo_unlock () { #repo_unlock <repo-name> <arch> fi } +# usage: _grep_all_info pkgfile infofile key +_grep_all_info() { + local _ret=() + + mapfile -t _ret < <(/usr/bin/bsdtar -xOqf "$1" "${2}" | grep "^${3} = ") + + printf '%s\n' "${_ret[@]#${3} = }" +} + # usage: _grep_pkginfo pkgfile pattern _grep_pkginfo() { - local _ret - - _ret="$(/usr/bin/bsdtar -xOqf "$1" .PKGINFO | grep "^${2} = " | tail -1)" - echo "${_ret#${2} = }" + _grep_all_info "${1}" .PKGINFO "${2}" | tail -1 } # usage: _grep_buildinfo pkgfile pattern _grep_buildinfo() { - local _ret - - _ret="$(/usr/bin/bsdtar -xOqf "$1" .BUILDINFO | grep "^${2} = " | tail -1)" - echo "${_ret#${2} = }" + _grep_all_info "${1}" .BUILDINFO "${2}" | tail -1 } # Get the package base or name as fallback @@ -444,4 +447,34 @@ arch_repo_modify() { REPO_MODIFIED=1 } +# Build an index of dependent packages needed by a given pkgfile +# usage: make_reproducible pkgfile [check] +make_reproducible() { + local pkg dir pkgs=() pkgfile pkgfiles=() + + mapfile -t pkgs < <(_grep_all_info "${1}" .BUILDINFO installed) + + for pkg in "${pkgs[@]}"; do + for dir in "${FTP_BASE}/${PKGPOOL}" "${EXTRA_PKGPOOLS[@]/#/${FTP_BASE}/}" "${STAGING}"/**/; do + if pkgfile="$(getpkgfile "${dir}/${pkg}"${PKGEXTS} 2>/dev/null)"; then + pkgfiles+=("${pkgfile}") + continue 2 + fi + done + error "could not find existing package for %s" "${pkg}" + return 1 + done + + if [[ ${2} = check ]]; then + return 0 + fi + + for pkg in "${pkgfiles[@]}"; do + if [[ ! -f ${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/} ]]; then + ln -L "${pkg}" "${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/}" + fi + echo "${1}" >> "${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/}.buildlinks" + done +} + . "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/db-functions-${VCS}" diff --git a/db-update b/db-update index 313fb999..11ec185f 100755 --- a/db-update +++ b/db-update @@ -61,6 +61,9 @@ for repo in "${repos[@]}"; do if ! check_builddir "${pkg}"; then die "Package %s was not built in a chroot" "$repo/${pkg##*/}" fi + if ! make_reproducible "${pkg}" "check"; then + die "Package %s is not reproducible" "${pkg}" + fi done if ! check_splitpkgs "${repo}" "${pkgs[@]}"; then die "Missing split packages for %s" "$repo" @@ -82,6 +85,7 @@ for repo in "${repos[@]}"; do # any packages might have been moved by the previous run if [[ -f ${pkg} ]]; then mv "${pkg}" "$FTP_BASE/${PKGPOOL}" + make_reproducible "${FTP_BASE}/${PKGPOOL}${pkg##*/}" fi ln -s "../../../${PKGPOOL}/${pkgfile}" "$FTP_BASE/$repo/os/${pkgarch}" # also move signatures -- 2.19.2
On 12/4/18 1:09 PM, Eli Schwartz wrote:
Whenever adding new package files to the pool of distributed packages, hardlink a copy of every package it was built with, into a "reproducible" pool, and log which file required it.
The question becomes, where can I store these? As-is, this will burden the mirror network as well. Unsure how to handle this. Could this be configurable by the mirror, as ISOs are now? Should we exclusively self-host this, and if so, where? archive.archlinux.org is managed by another service with its own exclusively writable location.
Signed-off-by: Eli Schwartz <eschwartz@archlinux.org> --- config | 1 + config.local.svn-community | 1 + config.local.svn-packages | 1 + db-functions | 49 +++++++++++++++++++++++++++++++------- db-update | 4 ++++ 5 files changed, 48 insertions(+), 8 deletions(-)
diff --git a/config b/config index 1cfe11f4..5144fca7 100644 --- a/config +++ b/config @@ -3,6 +3,7 @@ FTP_BASE="/srv/ftp" PKGREPOS=() PKGPOOL='' +EXTRA_PKGPOOLS=() SRCPOOL='' TESTING_REPO='' STABLE_REPOS=() diff --git a/config.local.svn-community b/config.local.svn-community index 5d61b5ea..15bcc17f 100644 --- a/config.local.svn-community +++ b/config.local.svn-community @@ -2,6 +2,7 @@
PKGREPOS=('community' 'community-testing' 'community-staging' 'multilib' 'multilib-testing' 'multilib-staging') PKGPOOL='pool/community' +EXTRA_PKGPOOLS=('pool/packages') SRCPOOL='sources/community' SVNREPO='file:///srv/repos/svn-community/svn' SVNUSER='svn-community' diff --git a/config.local.svn-packages b/config.local.svn-packages index 34aab35c..75986b65 100644 --- a/config.local.svn-packages +++ b/config.local.svn-packages @@ -2,6 +2,7 @@
PKGREPOS=('core' 'extra' 'testing' 'staging' 'kde-unstable' 'gnome-unstable') PKGPOOL='pool/packages' +EXTRA_PKGPOOLS=('pool/community') SRCPOOL='sources/packages' SVNREPO='file:///srv/repos/svn-packages/svn' SVNUSER='svn-packages' diff --git a/db-functions b/db-functions index 7aeedced..2b1ae87a 100644 --- a/db-functions +++ b/db-functions @@ -165,20 +165,23 @@ repo_unlock () { #repo_unlock <repo-name> <arch> fi }
+# usage: _grep_all_info pkgfile infofile key +_grep_all_info() { + local _ret=() + + mapfile -t _ret < <(/usr/bin/bsdtar -xOqf "$1" "${2}" | grep "^${3} = ") + + printf '%s\n' "${_ret[@]#${3} = }" +} + # usage: _grep_pkginfo pkgfile pattern _grep_pkginfo() { - local _ret - - _ret="$(/usr/bin/bsdtar -xOqf "$1" .PKGINFO | grep "^${2} = " | tail -1)" - echo "${_ret#${2} = }" + _grep_all_info "${1}" .PKGINFO "${2}" | tail -1 }
# usage: _grep_buildinfo pkgfile pattern _grep_buildinfo() { - local _ret - - _ret="$(/usr/bin/bsdtar -xOqf "$1" .BUILDINFO | grep "^${2} = " | tail -1)" - echo "${_ret#${2} = }" + _grep_all_info "${1}" .BUILDINFO "${2}" | tail -1 }
# Get the package base or name as fallback @@ -444,4 +447,34 @@ arch_repo_modify() { REPO_MODIFIED=1 }
+# Build an index of dependent packages needed by a given pkgfile +# usage: make_reproducible pkgfile [check] +make_reproducible() { + local pkg dir pkgs=() pkgfile pkgfiles=() + + mapfile -t pkgs < <(_grep_all_info "${1}" .BUILDINFO installed) + + for pkg in "${pkgs[@]}"; do + for dir in "${FTP_BASE}/${PKGPOOL}" "${EXTRA_PKGPOOLS[@]/#/${FTP_BASE}/}" "${STAGING}"/**/; do + if pkgfile="$(getpkgfile "${dir}/${pkg}"${PKGEXTS} 2>/dev/null)"; then + pkgfiles+=("${pkgfile}") + continue 2 + fi + done + error "could not find existing package for %s" "${pkg}" + return 1 + done + + if [[ ${2} = check ]]; then + return 0 + fi + + for pkg in "${pkgfiles[@]}"; do + if [[ ! -f ${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/} ]]; then + ln -L "${pkg}" "${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/}" + fi + echo "${1}" >> "${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/}.buildlinks" + done +} + . "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/db-functions-${VCS}" diff --git a/db-update b/db-update index 313fb999..11ec185f 100755 --- a/db-update +++ b/db-update @@ -61,6 +61,9 @@ for repo in "${repos[@]}"; do if ! check_builddir "${pkg}"; then die "Package %s was not built in a chroot" "$repo/${pkg##*/}" fi + if ! make_reproducible "${pkg}" "check"; then + die "Package %s is not reproducible" "${pkg}" + fi done if ! check_splitpkgs "${repo}" "${pkgs[@]}"; then die "Missing split packages for %s" "$repo" @@ -82,6 +85,7 @@ for repo in "${repos[@]}"; do # any packages might have been moved by the previous run if [[ -f ${pkg} ]]; then mv "${pkg}" "$FTP_BASE/${PKGPOOL}" + make_reproducible "${FTP_BASE}/${PKGPOOL}${pkg##*/}" fi ln -s "../../../${PKGPOOL}/${pkgfile}" "$FTP_BASE/$repo/os/${pkgarch}" # also move signatures
-- Eli Schwartz Bug Wrangler and Trusted User
On Tue, Dec 04, 2018 at 01:15:20PM -0500, Eli Schwartz via arch-devops <arch-devops@lists.archlinux.org> wrote:
On 12/4/18 1:09 PM, Eli Schwartz wrote:
Whenever adding new package files to the pool of distributed packages, hardlink a copy of every package it was built with, into a "reproducible" pool, and log which file required it.
The question becomes, where can I store these? As-is, this will burden the mirror network as well. Unsure how to handle this. Could this be configurable by the mirror, as ISOs are now? Should we exclusively self-host this, and if so, where?
I'm not a fan of adding this pool to the mirror root for multiple reasons: - Most mirrors would likely want to avoid mirroring it because it can become quite large and we told them that we only need around 100GB. If everyone wants to exclude it that requires action by every admin. Not ideal. - I'm not sure if all of our mirrors have hardlink support. We don't currently ask for it even though we suggest the -H rsync option. Also the current repos use symlinks for the packages instead of hardlinks. That said, I'm not even sure if rsync can detect hardlinks across directories. It can't even detect renames/moves across directories... - I don't expect that we need to mirror it because we don't even get that many requests to our current archive. If we ever need to mirror it, we can worry about that later I'd say since moving it to the mirror root should be rather simple. I'd suggest to make the base path of the repro pool configurable so that we can keep it out of the mirror root. For now I'd suggest something like this: REPRO_BASE="/srv/reproducible-archive/" pkgname="foo" pkgfile="foo-1.0-1.pkg.tar.xz" dest="$REPRO_BASE/packages/${pkgname:0:1}/$pkgname/$pkgfile" ln .. "$dest" Also note that this does intentionally not include $PKGPOOL any more even though you include it in your patch. The archive doesn't have it and I don't think it really helps anyone. It will just cause confusion if packages are moved between repos and it makes using the archive more difficult because the user would have to check all possible pool names or know which one to check. Ideally I'd like to later extend this to also include the current archive's features and from the looks of it, storing the packages like this is the first step. Then we just need to copy the repos (dbs and pkg symlinks) once a day and archive the ISOs. Also thinking about this, it would be great if we could skip the pkg symlinks for each day's archive and only copy the db itself. All we'd need is to have a dedicated PackageServer= setting (like Server=, but only for packages, not for the database) for pacman to find the packages, but I'm not sure if Allan would like that. That setting would also have to support the pkgname substring and the pkgname obviously. Comments/thoughts/patches/... welcome. Florian
On Wed, Dec 05, 2018 at 10:49:44AM +0100, Florian Pritz via arch-devops <arch-devops@lists.archlinux.org> wrote:
Also thinking about this, it would be great if we could skip the pkg symlinks for each day's archive and only copy the db itself. All we'd need is to have a dedicated PackageServer= setting (like Server=, but only for packages, not for the database) for pacman to find the packages, but I'm not sure if Allan would like that. That setting would also have to support the pkgname substring and the pkgname obviously.
As discussed on IRC, we don't actually need support in pacman here. We can just set up a rewrite in nginx so that when pacman tries to download a package file, nginx maps the path correctly. So nginx would rewrite /$repo/os/$arch/package-....tar.xz to /packages/p/package/pacakge-....tar.xz. Pacman would still work then and the only difference would be that we don't have directory listings with the packages of each day, but who needs those anyways. You can get all that info from the dbs themselves. Florian
So I think the idea of replacing archivetools with a builtin archive functionality has merit, especially if we can do it easily using nginx rewrites. Hence I've rewritten this to put packages in longterm storage in the archivetools "packages/" tree (which deprecates this aspect of archivetools) using a helper script. This also means no more mucking around with ftpdir-cleanup. Updating to this in infrastructure.git will require installing a sudoers rule: %dev ALL = (archive) NOPASSWD: /packages/db-archive %tu ALL = (archive) NOPASSWD: /community/db-archive Eli Schwartz (2): tests: make dummy archive copies of all packages in the test environment Add reproducible archive of packages. config | 2 ++ db-archive | 21 +++++++++++++++++++++ db-functions | 39 +++++++++++++++++++++++++++++++-------- db-update | 4 ++++ test/cases/db-update.bats | 6 ++++++ test/lib/common.bash | 10 ++++++++++ 6 files changed, 74 insertions(+), 8 deletions(-) create mode 100755 db-archive -- 2.20.1
Prerequisite for reproducible archives of packages. Signed-off-by: Eli Schwartz <eschwartz@archlinux.org> --- test/lib/common.bash | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/lib/common.bash b/test/lib/common.bash index bc2b4e6d..ab7d5963 100644 --- a/test/lib/common.bash +++ b/test/lib/common.bash @@ -99,6 +99,8 @@ setup() { export DBSCRIPTS_CONFIG=${TMP}/config.local cat <<eot > "${DBSCRIPTS_CONFIG}" FTP_BASE="${TMP}/ftp" + ARCHIVE_BASE="${TMP}/archive" + ARCHIVEUSER="" SVNREPO="file://${TMP}/svn-packages-repo" PKGREPOS=('core' 'extra' 'testing') PKGPOOL='pool/packages' @@ -126,6 +128,14 @@ eot mkdir -p "${TMP}/ftp/${PKGPOOL}" mkdir -p "${TMP}/ftp/${SRCPOOL}" + # make dummy packages for "reproducibility" + pacman -Qq | pacman -Sddp - | while read -r line; do + line=${line##*/} + pkgname=${line%-*-*-*} + mkdir -p "${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}" + touch "${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}/${line}"{,.sig} + done + svnadmin create "${TMP}/svn-packages-repo" svn checkout -q "file://${TMP}/svn-packages-repo" "${TMP}/svn-packages-copy" } -- 2.20.1
Whenever adding new package files to the pool of distributed packages, copy the file into a longterm archive. This is the first step to merging the functionality of archivetools, as this implements the shared pool while also guaranteeing that all packages are archived at the time of entry rather than once per day if they still exist. Signed-off-by: Eli Schwartz <eschwartz@archlinux.org> --- config | 2 ++ db-archive | 21 +++++++++++++++++++++ db-functions | 39 +++++++++++++++++++++++++++++++-------- db-update | 4 ++++ test/cases/db-update.bats | 6 ++++++ 5 files changed, 64 insertions(+), 8 deletions(-) create mode 100755 db-archive diff --git a/config b/config index 1cfe11f4..57a2cc47 100644 --- a/config +++ b/config @@ -1,6 +1,8 @@ #!/hint/bash FTP_BASE="/srv/ftp" +ARCHIVE_BASE="/srv/archive" +ARCHIVEUSER='archive' PKGREPOS=() PKGPOOL='' SRCPOOL='' diff --git a/db-archive b/db-archive new file mode 100755 index 00000000..5680b9de --- /dev/null +++ b/db-archive @@ -0,0 +1,21 @@ +#!/bin/bash + +. "$(dirname "$(readlink -e "$0")")/config" + +if (( $# != 1 )); then + echo "usage: %s <pkgfile>" "${0##*/}" + exit 1 +fi + +if [[ -n ${ARCHIVEUSER} ]]; then + exec sudo -u "${ARCHIVEUSER}" bash "${BASH_SOURCE[0]}" "${@}" +fi + +pkgfile=${1##*/} +pkgname=${pkgfile%-*-*-*} +archive_dir="${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}" + +if [[ ! -f ${archive_dir}/${pkgfile} ]]; then + mkdir -p "${archive_dir}" + cp -np "${1}"{,.sig} "${archive_dir}/" +fi diff --git a/db-functions b/db-functions index 7aeedced..b8a00b90 100644 --- a/db-functions +++ b/db-functions @@ -165,20 +165,23 @@ repo_unlock () { #repo_unlock <repo-name> <arch> fi } +# usage: _grep_all_info pkgfile infofile key +_grep_all_info() { + local _ret=() + + mapfile -t _ret < <(/usr/bin/bsdtar -xOqf "$1" "${2}" | grep "^${3} = ") + + printf '%s\n' "${_ret[@]#${3} = }" +} + # usage: _grep_pkginfo pkgfile pattern _grep_pkginfo() { - local _ret - - _ret="$(/usr/bin/bsdtar -xOqf "$1" .PKGINFO | grep "^${2} = " | tail -1)" - echo "${_ret#${2} = }" + _grep_all_info "${1}" .PKGINFO "${2}" | tail -1 } # usage: _grep_buildinfo pkgfile pattern _grep_buildinfo() { - local _ret - - _ret="$(/usr/bin/bsdtar -xOqf "$1" .BUILDINFO | grep "^${2} = " | tail -1)" - echo "${_ret#${2} = }" + _grep_all_info "${1}" .BUILDINFO "${2}" | tail -1 } # Get the package base or name as fallback @@ -444,4 +447,24 @@ arch_repo_modify() { REPO_MODIFIED=1 } +# Verify the existence of dependent packages needed by a given pkgfile +# usage: check_reproducible pkgfile +check_reproducible() { + local pkg dir pkgs=() pkgfile pkgfiles=() + + mapfile -t pkgs < <(_grep_all_info "${1}" .BUILDINFO installed) + + for pkg in "${pkgs[@]}"; do + local pkgname=${pkg%-*-*-*} + for dir in "${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}" "${STAGING}"/**/; do + if pkgfile="$(getpkgfile "${dir}/${pkg}"${PKGEXTS} 2>/dev/null)"; then + pkgfiles+=("${pkgfile}") + continue 2 + fi + done + error "could not find existing package for %s" "${pkg}" + return 1 + done +} + . "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/db-functions-${VCS}" diff --git a/db-update b/db-update index 313fb999..04a29bf3 100755 --- a/db-update +++ b/db-update @@ -61,6 +61,9 @@ for repo in "${repos[@]}"; do if ! check_builddir "${pkg}"; then die "Package %s was not built in a chroot" "$repo/${pkg##*/}" fi + if ! check_reproducible "${pkg}"; then + die "Package %s is not reproducible" "${pkg}" + fi done if ! check_splitpkgs "${repo}" "${pkgs[@]}"; then die "Missing split packages for %s" "$repo" @@ -82,6 +85,7 @@ for repo in "${repos[@]}"; do # any packages might have been moved by the previous run if [[ -f ${pkg} ]]; then mv "${pkg}" "$FTP_BASE/${PKGPOOL}" + "$(dirname "$(readlink -e "$0")")/db-archive" "${FTP_BASE}/${PKGPOOL}/${pkg##*/}" fi ln -s "../../../${PKGPOOL}/${pkgfile}" "$FTP_BASE/$repo/os/${pkgarch}" # also move signatures diff --git a/test/cases/db-update.bats b/test/cases/db-update.bats index 9ee06321..bc978302 100644 --- a/test/cases/db-update.bats +++ b/test/cases/db-update.bats @@ -87,6 +87,12 @@ load ../lib/common checkPackage testing pkg-any-a 1-2 } +@test "archive package when releasing" { + releasePackage extra pkg-any-a + db-update + [[ -f ${ARCHIVE_BASE}/packages/p/pkg-any-a/pkg-any-a-1-1-any${PKGEXT} ]] +} + @test "update same any package to same repository fails" { releasePackage extra pkg-any-a db-update -- 2.20.1
On Tue, Jan 08, 2019 at 06:40:37PM -0500, Eli Schwartz via arch-projects <arch-projects@archlinux.org> wrote:
diff --git a/db-archive b/db-archive new file mode 100755 index 00000000..5680b9de --- /dev/null +++ b/db-archive @@ -0,0 +1,21 @@ +#!/bin/bash + +. "$(dirname "$(readlink -e "$0")")/config"
This uses $0 (see below).
+ +if (( $# != 1 )); then + echo "usage: %s <pkgfile>" "${0##*/}" + exit 1 +fi + +if [[ -n ${ARCHIVEUSER} ]]; then + exec sudo -u "${ARCHIVEUSER}" bash "${BASH_SOURCE[0]}" "${@}"
This uses $BASH_SOURCE instead of $0 as used above. Is this intentional, if so why? I'd argue that this should also use $0, but maybe I'm missing something?
+fi + +pkgfile=${1##*/} +pkgname=${pkgfile%-*-*-*} +archive_dir="${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}" + +if [[ ! -f ${archive_dir}/${pkgfile} ]]; then + mkdir -p "${archive_dir}" + cp -np "${1}"{,.sig} "${archive_dir}/" +fi diff --git a/db-functions b/db-functions index 7aeedced..b8a00b90 100644 --- a/db-functions +++ b/db-functions @@ -444,4 +447,24 @@ arch_repo_modify() { REPO_MODIFIED=1 }
+# Verify the existence of dependent packages needed by a given pkgfile +# usage: check_reproducible pkgfile +check_reproducible() { + local pkg dir pkgs=() pkgfile pkgfiles=() + + mapfile -t pkgs < <(_grep_all_info "${1}" .BUILDINFO installed) + + for pkg in "${pkgs[@]}"; do + local pkgname=${pkg%-*-*-*} + for dir in "${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}" "${STAGING}"/**/; do + if pkgfile="$(getpkgfile "${dir}/${pkg}"${PKGEXTS} 2>/dev/null)"; then + pkgfiles+=("${pkgfile}") + continue 2 + fi + done + error "could not find existing package for %s" "${pkg}"
I imagine that I'd be confused if I ever saw this error. How about clarifying it like this? "could not find package for dependency %s in reproducibility archive or your staging directory"
+ return 1 + done +} + . "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/db-functions-${VCS}" diff --git a/db-update b/db-update index 313fb999..04a29bf3 100755 --- a/db-update +++ b/db-update @@ -61,6 +61,9 @@ for repo in "${repos[@]}"; do if ! check_builddir "${pkg}"; then die "Package %s was not built in a chroot" "$repo/${pkg##*/}" fi + if ! check_reproducible "${pkg}"; then + die "Package %s is not reproducible" "${pkg}"
Same as above. I'd suggest something like this: "Package %s depends on packages that are missing in the reproducibility archive and your staging directory. Ensure that all dependencies either exist in the repositories or reproducibility archive already or that they are added together with the package in a single call to db-update." Florian
On 1/9/19 8:00 AM, Florian Pritz wrote:
On Tue, Jan 08, 2019 at 06:40:37PM -0500, Eli Schwartz via arch-projects <arch-projects@archlinux.org> wrote:
diff --git a/db-archive b/db-archive new file mode 100755 index 00000000..5680b9de --- /dev/null +++ b/db-archive @@ -0,0 +1,21 @@ +#!/bin/bash + +. "$(dirname "$(readlink -e "$0")")/config"
This uses $0 (see below).
+ +if (( $# != 1 )); then + echo "usage: %s <pkgfile>" "${0##*/}" + exit 1 +fi + +if [[ -n ${ARCHIVEUSER} ]]; then + exec sudo -u "${ARCHIVEUSER}" bash "${BASH_SOURCE[0]}" "${@}"
This uses $BASH_SOURCE instead of $0 as used above. Is this intentional, if so why? I'd argue that this should also use $0, but maybe I'm missing something?
BASH_SOURCE explicitly refers to the file it was sourced/executed from, it's like the __file__ macro other languages have. $0 can be anything since programs can modify their argv0 freely, but by default it is the toplevel script name, which is why it gets used so much. It's still not technically correct, and I prefer to use BASH_SOURCE. In short, this is a copy-paste error above.
+fi + +pkgfile=${1##*/} +pkgname=${pkgfile%-*-*-*} +archive_dir="${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}" + +if [[ ! -f ${archive_dir}/${pkgfile} ]]; then + mkdir -p "${archive_dir}" + cp -np "${1}"{,.sig} "${archive_dir}/" +fi diff --git a/db-functions b/db-functions index 7aeedced..b8a00b90 100644 --- a/db-functions +++ b/db-functions @@ -444,4 +447,24 @@ arch_repo_modify() { REPO_MODIFIED=1 }
+# Verify the existence of dependent packages needed by a given pkgfile +# usage: check_reproducible pkgfile +check_reproducible() { + local pkg dir pkgs=() pkgfile pkgfiles=() + + mapfile -t pkgs < <(_grep_all_info "${1}" .BUILDINFO installed) + + for pkg in "${pkgs[@]}"; do + local pkgname=${pkg%-*-*-*} + for dir in "${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}" "${STAGING}"/**/; do + if pkgfile="$(getpkgfile "${dir}/${pkg}"${PKGEXTS} 2>/dev/null)"; then + pkgfiles+=("${pkgfile}") + continue 2 + fi + done + error "could not find existing package for %s" "${pkg}"
I imagine that I'd be confused if I ever saw this error. How about clarifying it like this? "could not find package for dependency %s in reproducibility archive or your staging directory"
Maybe "existing or staged package for dependency %s"?
+ return 1 + done +} + . "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/db-functions-${VCS}" diff --git a/db-update b/db-update index 313fb999..04a29bf3 100755 --- a/db-update +++ b/db-update @@ -61,6 +61,9 @@ for repo in "${repos[@]}"; do if ! check_builddir "${pkg}"; then die "Package %s was not built in a chroot" "$repo/${pkg##*/}" fi + if ! check_reproducible "${pkg}"; then + die "Package %s is not reproducible" "${pkg}"
Same as above. I'd suggest something like this:
"Package %s depends on packages that are missing in the reproducibility archive and your staging directory. Ensure that all dependencies either exist in the repositories or reproducibility archive already or that they are added together with the package in a single call to db-update."
The two errors will only be called together. I think expanding the message when printing the missing dependency should be enough. -- Eli Schwartz Bug Wrangler and Trusted User
On Wed, Jan 09, 2019 at 09:49:26AM -0500, Eli Schwartz via arch-projects <arch-projects@archlinux.org> wrote:
diff --git a/db-functions b/db-functions index 7aeedced..b8a00b90 100644 --- a/db-functions +++ b/db-functions @@ -444,4 +447,24 @@ arch_repo_modify() { REPO_MODIFIED=1 }
+# Verify the existence of dependent packages needed by a given pkgfile +# usage: check_reproducible pkgfile +check_reproducible() { + local pkg dir pkgs=() pkgfile pkgfiles=() + + mapfile -t pkgs < <(_grep_all_info "${1}" .BUILDINFO installed) + + for pkg in "${pkgs[@]}"; do + local pkgname=${pkg%-*-*-*} + for dir in "${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}" "${STAGING}"/**/; do + if pkgfile="$(getpkgfile "${dir}/${pkg}"${PKGEXTS} 2>/dev/null)"; then + pkgfiles+=("${pkgfile}") + continue 2 + fi + done + error "could not find existing package for %s" "${pkg}"
I imagine that I'd be confused if I ever saw this error. How about clarifying it like this? "could not find package for dependency %s in reproducibility archive or your staging directory"
Maybe "existing or staged package for dependency %s"?
"could not find existing or staged package for dependency %s" is fine by me.
+ return 1 + done +} + . "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/db-functions-${VCS}" diff --git a/db-update b/db-update index 313fb999..04a29bf3 100755 --- a/db-update +++ b/db-update @@ -61,6 +61,9 @@ for repo in "${repos[@]}"; do if ! check_builddir "${pkg}"; then die "Package %s was not built in a chroot" "$repo/${pkg##*/}" fi + if ! check_reproducible "${pkg}"; then + die "Package %s is not reproducible" "${pkg}"
Same as above. I'd suggest something like this:
"Package %s depends on packages that are missing in the reproducibility archive and your staging directory. Ensure that all dependencies either exist in the repositories or reproducibility archive already or that they are added together with the package in a single call to db-update."
The two errors will only be called together. I think expanding the message when printing the missing dependency should be enough.
I get that, but I think that a user that sees these two message may not understand that a missing dependency is related to the package being reproducible. To be honest, I actually expected db-update to run all checks and show all errors at once instead of terminating after the first one. I now know that this is not the case. I'm pretty sure that I would have treat these two messages as separate errors and that I'd then be confused as to what the "second error" is actually about. I think that it may save a lot of time and confusion if this error message is clear about what is wrong and how it can be fixed. Most of the other error messages in db-update are rather clear about the actual problem. Maybe not as clear as the message I propose here, but clearer than "Package %s is not reproducible". Apart from that, does it really hurt to have a more verbose error message? It will only be shown if there is an actual error and it doesn't influence normal usage. I'd say we can afford to be more verbose in that case. If you still think that this message should not be made more verbose, I'd argue that it should be removed entirely. If we have just the message about a dependency not being found, it is quite clear to a user what is wrong and how they could fix the error. I'd say that is much less confusing than if there were a second message about reproducibility that some people may or may not consider to be a different, additional error as I've explained above. Florian
On 1/9/19 10:34 AM, Florian Pritz wrote:
On Wed, Jan 09, 2019 at 09:49:26AM -0500, Eli Schwartz via arch-projects <arch-projects@archlinux.org> wrote:
diff --git a/db-functions b/db-functions index 7aeedced..b8a00b90 100644 --- a/db-functions +++ b/db-functions @@ -444,4 +447,24 @@ arch_repo_modify() { REPO_MODIFIED=1 }
+# Verify the existence of dependent packages needed by a given pkgfile +# usage: check_reproducible pkgfile +check_reproducible() { + local pkg dir pkgs=() pkgfile pkgfiles=() + + mapfile -t pkgs < <(_grep_all_info "${1}" .BUILDINFO installed) + + for pkg in "${pkgs[@]}"; do + local pkgname=${pkg%-*-*-*} + for dir in "${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}" "${STAGING}"/**/; do + if pkgfile="$(getpkgfile "${dir}/${pkg}"${PKGEXTS} 2>/dev/null)"; then + pkgfiles+=("${pkgfile}") + continue 2 + fi + done + error "could not find existing package for %s" "${pkg}"
I imagine that I'd be confused if I ever saw this error. How about clarifying it like this? "could not find package for dependency %s in reproducibility archive or your staging directory"
Maybe "existing or staged package for dependency %s"?
"could not find existing or staged package for dependency %s" is fine by me.
+ return 1 + done +} + . "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/db-functions-${VCS}" diff --git a/db-update b/db-update index 313fb999..04a29bf3 100755 --- a/db-update +++ b/db-update @@ -61,6 +61,9 @@ for repo in "${repos[@]}"; do if ! check_builddir "${pkg}"; then die "Package %s was not built in a chroot" "$repo/${pkg##*/}" fi + if ! check_reproducible "${pkg}"; then + die "Package %s is not reproducible" "${pkg}"
Same as above. I'd suggest something like this:
"Package %s depends on packages that are missing in the reproducibility archive and your staging directory. Ensure that all dependencies either exist in the repositories or reproducibility archive already or that they are added together with the package in a single call to db-update."
The two errors will only be called together. I think expanding the message when printing the missing dependency should be enough.
I get that, but I think that a user that sees these two message may not understand that a missing dependency is related to the package being reproducible. To be honest, I actually expected db-update to run all checks and show all errors at once instead of terminating after the first one. I now know that this is not the case. I'm pretty sure that I would have treat these two messages as separate errors and that I'd then be confused as to what the "second error" is actually about.
I think that it may save a lot of time and confusion if this error message is clear about what is wrong and how it can be fixed. Most of the other error messages in db-update are rather clear about the actual problem. Maybe not as clear as the message I propose here, but clearer than "Package %s is not reproducible". Apart from that, does it really hurt to have a more verbose error message? It will only be shown if there is an actual error and it doesn't influence normal usage. I'd say we can afford to be more verbose in that case.
If you still think that this message should not be made more verbose, I'd argue that it should be removed entirely. If we have just the message about a dependency not being found, it is quite clear to a user what is wrong and how they could fix the error. I'd say that is much less confusing than if there were a second message about reproducibility that some people may or may not consider to be a different, additional error as I've explained above.
Well, db-update check doesn't necessarily know what check_reproducible() cannot find, I guess we could exit directly in there, but... Hmm, what about: Package %s is not reproducible. Ensure that all dependencies are available in the repositories or are added in the same db-update. -- Eli Schwartz Bug Wrangler and Trusted User
On Wed, Jan 09, 2019 at 10:45:12AM -0500, Eli Schwartz via arch-projects <arch-projects@archlinux.org> wrote:
diff --git a/db-update b/db-update index 313fb999..04a29bf3 100755 --- a/db-update +++ b/db-update @@ -61,6 +61,9 @@ for repo in "${repos[@]}"; do if ! check_builddir "${pkg}"; then die "Package %s was not built in a chroot" "$repo/${pkg##*/}" fi + if ! check_reproducible "${pkg}"; then + die "Package %s is not reproducible" "${pkg}"
Same as above. I'd suggest something like this:
"Package %s depends on packages that are missing in the reproducibility archive and your staging directory. Ensure that all dependencies either exist in the repositories or reproducibility archive already or that they are added together with the package in a single call to db-update."
Hmm, what about:
Package %s is not reproducible. Ensure that all dependencies are available in the repositories or are added in the same db-update.
Fine by me. Florian
Whenever adding new package files to the pool of distributed packages, copy the file into a longterm archive. This is the first step to merging the functionality of archivetools, as this implements the shared pool while also guaranteeing that all packages are archived at the time of entry rather than once per day if they still exist. Signed-off-by: Eli Schwartz <eschwartz@archlinux.org> --- v3: string changes, consistently use BASH_SOURCE config | 2 ++ db-archive | 21 +++++++++++++++++++++ db-functions | 39 +++++++++++++++++++++++++++++++-------- db-update | 5 +++++ test/cases/db-update.bats | 6 ++++++ 5 files changed, 65 insertions(+), 8 deletions(-) create mode 100755 db-archive diff --git a/config b/config index 1cfe11f4..57a2cc47 100644 --- a/config +++ b/config @@ -1,6 +1,8 @@ #!/hint/bash FTP_BASE="/srv/ftp" +ARCHIVE_BASE="/srv/archive" +ARCHIVEUSER='archive' PKGREPOS=() PKGPOOL='' SRCPOOL='' diff --git a/db-archive b/db-archive new file mode 100755 index 00000000..f3fa2fea --- /dev/null +++ b/db-archive @@ -0,0 +1,21 @@ +#!/bin/bash + +. "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/config" + +if (( $# != 1 )); then + echo "usage: %s <pkgfile>" "${0##*/}" + exit 1 +fi + +if [[ -n ${ARCHIVEUSER} ]]; then + exec sudo -u "${ARCHIVEUSER}" bash "${BASH_SOURCE[0]}" "${@}" +fi + +pkgfile=${1##*/} +pkgname=${pkgfile%-*-*-*} +archive_dir="${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}" + +if [[ ! -f ${archive_dir}/${pkgfile} ]]; then + mkdir -p "${archive_dir}" + cp -np "${1}"{,.sig} "${archive_dir}/" +fi diff --git a/db-functions b/db-functions index 7aeedced..0e4dd939 100644 --- a/db-functions +++ b/db-functions @@ -165,20 +165,23 @@ repo_unlock () { #repo_unlock <repo-name> <arch> fi } +# usage: _grep_all_info pkgfile infofile key +_grep_all_info() { + local _ret=() + + mapfile -t _ret < <(/usr/bin/bsdtar -xOqf "$1" "${2}" | grep "^${3} = ") + + printf '%s\n' "${_ret[@]#${3} = }" +} + # usage: _grep_pkginfo pkgfile pattern _grep_pkginfo() { - local _ret - - _ret="$(/usr/bin/bsdtar -xOqf "$1" .PKGINFO | grep "^${2} = " | tail -1)" - echo "${_ret#${2} = }" + _grep_all_info "${1}" .PKGINFO "${2}" | tail -1 } # usage: _grep_buildinfo pkgfile pattern _grep_buildinfo() { - local _ret - - _ret="$(/usr/bin/bsdtar -xOqf "$1" .BUILDINFO | grep "^${2} = " | tail -1)" - echo "${_ret#${2} = }" + _grep_all_info "${1}" .BUILDINFO "${2}" | tail -1 } # Get the package base or name as fallback @@ -444,4 +447,24 @@ arch_repo_modify() { REPO_MODIFIED=1 } +# Verify the existence of dependent packages needed by a given pkgfile +# usage: check_reproducible pkgfile +check_reproducible() { + local pkg dir pkgs=() pkgfile pkgfiles=() + + mapfile -t pkgs < <(_grep_all_info "${1}" .BUILDINFO installed) + + for pkg in "${pkgs[@]}"; do + local pkgname=${pkg%-*-*-*} + for dir in "${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}" "${STAGING}"/**/; do + if pkgfile="$(getpkgfile "${dir}/${pkg}"${PKGEXTS} 2>/dev/null)"; then + pkgfiles+=("${pkgfile}") + continue 2 + fi + done + error "could not find existing or staged package for dependency %s" "${pkg}" + return 1 + done +} + . "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/db-functions-${VCS}" diff --git a/db-update b/db-update index 313fb999..3a66c859 100755 --- a/db-update +++ b/db-update @@ -61,6 +61,10 @@ for repo in "${repos[@]}"; do if ! check_builddir "${pkg}"; then die "Package %s was not built in a chroot" "$repo/${pkg##*/}" fi + if ! check_reproducible "${pkg}"; then + error "Package %s is not reproducible." "${pkg}" + die "Ensure that all dependencies are available in the repositories or are added in the same db-update." + fi done if ! check_splitpkgs "${repo}" "${pkgs[@]}"; then die "Missing split packages for %s" "$repo" @@ -82,6 +86,7 @@ for repo in "${repos[@]}"; do # any packages might have been moved by the previous run if [[ -f ${pkg} ]]; then mv "${pkg}" "$FTP_BASE/${PKGPOOL}" + "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/db-archive" "${FTP_BASE}/${PKGPOOL}/${pkg##*/}" fi ln -s "../../../${PKGPOOL}/${pkgfile}" "$FTP_BASE/$repo/os/${pkgarch}" # also move signatures diff --git a/test/cases/db-update.bats b/test/cases/db-update.bats index 9ee06321..bc978302 100644 --- a/test/cases/db-update.bats +++ b/test/cases/db-update.bats @@ -87,6 +87,12 @@ load ../lib/common checkPackage testing pkg-any-a 1-2 } +@test "archive package when releasing" { + releasePackage extra pkg-any-a + db-update + [[ -f ${ARCHIVE_BASE}/packages/p/pkg-any-a/pkg-any-a-1-1-any${PKGEXT} ]] +} + @test "update same any package to same repository fails" { releasePackage extra pkg-any-a db-update -- 2.20.1
On Wed, Jan 09, 2019 at 05:01:21PM -0500, Eli Schwartz via arch-projects <arch-projects@archlinux.org> wrote:
v3: string changes, consistently use BASH_SOURCE
Looks good to me. Thanks! Florian
On 12/04/18 at 01:15pm, Eli Schwartz via arch-devops wrote:
On 12/4/18 1:09 PM, Eli Schwartz wrote:
Whenever adding new package files to the pool of distributed packages, hardlink a copy of every package it was built with, into a "reproducible" pool, and log which file required it.
Does this also clean up the archive? As in remove packages which are not required for reproducible builds? Since now our archive server is almost running out of space again.
The question becomes, where can I store these? As-is, this will burden the mirror network as well. Unsure how to handle this. Could this be configurable by the mirror, as ISOs are now? Should we exclusively self-host this, and if so, where?
archive.archlinux.org is managed by another service with its own exclusively writable location.
Signed-off-by: Eli Schwartz <eschwartz@archlinux.org> --- config | 1 + config.local.svn-community | 1 + config.local.svn-packages | 1 + db-functions | 49 +++++++++++++++++++++++++++++++------- db-update | 4 ++++ 5 files changed, 48 insertions(+), 8 deletions(-)
diff --git a/config b/config index 1cfe11f4..5144fca7 100644 --- a/config +++ b/config @@ -3,6 +3,7 @@ FTP_BASE="/srv/ftp" PKGREPOS=() PKGPOOL='' +EXTRA_PKGPOOLS=() SRCPOOL='' TESTING_REPO='' STABLE_REPOS=() diff --git a/config.local.svn-community b/config.local.svn-community index 5d61b5ea..15bcc17f 100644 --- a/config.local.svn-community +++ b/config.local.svn-community @@ -2,6 +2,7 @@
PKGREPOS=('community' 'community-testing' 'community-staging' 'multilib' 'multilib-testing' 'multilib-staging') PKGPOOL='pool/community' +EXTRA_PKGPOOLS=('pool/packages') SRCPOOL='sources/community' SVNREPO='file:///srv/repos/svn-community/svn' SVNUSER='svn-community' diff --git a/config.local.svn-packages b/config.local.svn-packages index 34aab35c..75986b65 100644 --- a/config.local.svn-packages +++ b/config.local.svn-packages @@ -2,6 +2,7 @@
PKGREPOS=('core' 'extra' 'testing' 'staging' 'kde-unstable' 'gnome-unstable') PKGPOOL='pool/packages' +EXTRA_PKGPOOLS=('pool/community') SRCPOOL='sources/packages' SVNREPO='file:///srv/repos/svn-packages/svn' SVNUSER='svn-packages' diff --git a/db-functions b/db-functions index 7aeedced..2b1ae87a 100644 --- a/db-functions +++ b/db-functions @@ -165,20 +165,23 @@ repo_unlock () { #repo_unlock <repo-name> <arch> fi }
+# usage: _grep_all_info pkgfile infofile key +_grep_all_info() { + local _ret=() + + mapfile -t _ret < <(/usr/bin/bsdtar -xOqf "$1" "${2}" | grep "^${3} = ") + + printf '%s\n' "${_ret[@]#${3} = }" +} + # usage: _grep_pkginfo pkgfile pattern _grep_pkginfo() { - local _ret - - _ret="$(/usr/bin/bsdtar -xOqf "$1" .PKGINFO | grep "^${2} = " | tail -1)" - echo "${_ret#${2} = }" + _grep_all_info "${1}" .PKGINFO "${2}" | tail -1 }
# usage: _grep_buildinfo pkgfile pattern _grep_buildinfo() { - local _ret - - _ret="$(/usr/bin/bsdtar -xOqf "$1" .BUILDINFO | grep "^${2} = " | tail -1)" - echo "${_ret#${2} = }" + _grep_all_info "${1}" .BUILDINFO "${2}" | tail -1 }
# Get the package base or name as fallback @@ -444,4 +447,34 @@ arch_repo_modify() { REPO_MODIFIED=1 }
+# Build an index of dependent packages needed by a given pkgfile +# usage: make_reproducible pkgfile [check] +make_reproducible() { + local pkg dir pkgs=() pkgfile pkgfiles=() + + mapfile -t pkgs < <(_grep_all_info "${1}" .BUILDINFO installed) + + for pkg in "${pkgs[@]}"; do + for dir in "${FTP_BASE}/${PKGPOOL}" "${EXTRA_PKGPOOLS[@]/#/${FTP_BASE}/}" "${STAGING}"/**/; do + if pkgfile="$(getpkgfile "${dir}/${pkg}"${PKGEXTS} 2>/dev/null)"; then + pkgfiles+=("${pkgfile}") + continue 2 + fi + done + error "could not find existing package for %s" "${pkg}" + return 1 + done + + if [[ ${2} = check ]]; then + return 0 + fi + + for pkg in "${pkgfiles[@]}"; do + if [[ ! -f ${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/} ]]; then + ln -L "${pkg}" "${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/}" + fi + echo "${1}" >> "${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/}.buildlinks" + done +} + . "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/db-functions-${VCS}" diff --git a/db-update b/db-update index 313fb999..11ec185f 100755 --- a/db-update +++ b/db-update @@ -61,6 +61,9 @@ for repo in "${repos[@]}"; do if ! check_builddir "${pkg}"; then die "Package %s was not built in a chroot" "$repo/${pkg##*/}" fi + if ! make_reproducible "${pkg}" "check"; then + die "Package %s is not reproducible" "${pkg}" + fi done if ! check_splitpkgs "${repo}" "${pkgs[@]}"; then die "Missing split packages for %s" "$repo" @@ -82,6 +85,7 @@ for repo in "${repos[@]}"; do # any packages might have been moved by the previous run if [[ -f ${pkg} ]]; then mv "${pkg}" "$FTP_BASE/${PKGPOOL}" + make_reproducible "${FTP_BASE}/${PKGPOOL}${pkg##*/}" fi ln -s "../../../${PKGPOOL}/${pkgfile}" "$FTP_BASE/$repo/os/${pkgarch}" # also move signatures
-- Eli Schwartz Bug Wrangler and Trusted User
-- Jelle van der Waa
On 12/12/18 3:55 AM, Jelle van der Waa wrote:
On 12/04/18 at 01:15pm, Eli Schwartz via arch-devops wrote:
On 12/4/18 1:09 PM, Eli Schwartz wrote:
Whenever adding new package files to the pool of distributed packages, hardlink a copy of every package it was built with, into a "reproducible" pool, and log which file required it.
Does this also clean up the archive? As in remove packages which are not required for reproducible builds? Since now our archive server is almost running out of space again.
Patch 4/4 will do so, but I only cc'ed patch 2/4 to the devops list. -- Eli Schwartz Bug Wrangler and Trusted User
This is never ever called for multiple packages at once, and if it was, it would be named clean_pkgs. Meanwhile, it was implied that this could take a variable target, but that was never-used code. Make it useful because we might (will) use it. Signed-off-by: Eli Schwartz <eschwartz@archlinux.org> --- cron-jobs/ftpdir-cleanup | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/cron-jobs/ftpdir-cleanup b/cron-jobs/ftpdir-cleanup index 9df5f99a..74b771cd 100755 --- a/cron-jobs/ftpdir-cleanup +++ b/cron-jobs/ftpdir-cleanup @@ -4,21 +4,19 @@ . "$(dirname "$(readlink -e "$0")")/../db-functions" clean_pkg() { - local pkg - local target + local pkg=${1} + local targetdir=${2:-${CLEANUP_DESTDIR}} if [[ $CLEANUP_DRYRUN != true ]]; then - for pkg in "$@"; do - if [[ -h $pkg ]]; then - rm -f "$pkg" "$pkg.sig" - else - mv_acl "$pkg" "$CLEANUP_DESTDIR/${pkg##*/}" - if [[ -e $pkg.sig ]]; then - mv_acl "$pkg.sig" "$CLEANUP_DESTDIR/${pkg##*/}.sig" - fi - touch "${CLEANUP_DESTDIR}/${pkg##*/}" + if [[ -h ${pkg} ]]; then + rm -f "${pkg}" "${pkg}.sig" + else + mv_acl "${pkg}" "${targetdir}/${pkg##*/}" + if [[ -e ${pkg}.sig ]]; then + mv_acl "${pkg}.sig" "${targetdir}/${pkg##*/}.sig" fi - done + touch "${targetdir}/${pkg##*/}" + fi fi } -- 2.19.2
This reuses the same logic used for normally deleting packages, but cleanup of the reproducible archive happens in a subdirectory of ${CLEANUP_DESTDIR} while still subject to the same timeouts. Signed-off-by: Eli Schwartz <eschwartz@archlinux.org> --- cron-jobs/ftpdir-cleanup | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/cron-jobs/ftpdir-cleanup b/cron-jobs/ftpdir-cleanup index 74b771cd..87af2486 100755 --- a/cron-jobs/ftpdir-cleanup +++ b/cron-jobs/ftpdir-cleanup @@ -58,6 +58,7 @@ for repo in "${PKGREPOS[@]}"; do for old_pkg in "${old_pkgs[@]}"; do msg2 '%s' "${old_pkg}" clean_pkg "${FTP_BASE}/${repo}/os/${arch}/${old_pkg}" + find "${FTP_BASE}/${PKGPOOL}-reproducible/" -name "*.buildlinks" -exec sed -i "/${old_pkg%${PKGEXTS}}/d" {} + done fi done @@ -79,20 +80,29 @@ if (( ${#old_pkgs[@]} >= 1 )); then done fi +mapfile -td '' old_reproducible_pkgs < <(find "${FTP_BASE}/${PKGPOOL}-reproducible/" -name '*.buildlinks' -empty -printf '%f\0') +if (( ${#old_reproducible_pkgs[@]} >= 1 )); then + msg "Removing old packages from reproducible pool..." + for old_pkg in "${old_reproducible_pkgs[@]}"; do + msg2 '%s' "${old_pkg}" + clean_pkg "${FTP_BASE}/${PKGPOOL}-reproducible/${old_pkg}" "${CLEANUP_DESTDIR}/reproducible" + done +fi + unset old_pkgs touch -d "${CLEANUP_KEEP} days ago" "${WORKDIR}/cleanup_timestamp" for f in "${CLEANUP_DESTDIR}"/**/*${PKGEXTS}; do if [[ ${WORKDIR}/cleanup_timestamp -nt $f ]]; then - old_pkgs+=("${f##*/}") + old_pkgs+=("${f}") fi done if (( ${#old_pkgs[@]} >= 1 )); then msg "Removing old packages from the cleanup directory..." for old_pkg in "${old_pkgs[@]}"; do - msg2 '%s' "${old_pkg}" + msg2 '%s' "${old_pkg#${CLEANUP_DESTDIR}/}" if [[ $CLEANUP_DRYRUN != true ]]; then - rm -f "${CLEANUP_DESTDIR}/${old_pkg}" - rm -f "${CLEANUP_DESTDIR}/${old_pkg}.sig" + rm -f "${old_pkg}" + rm -f "${old_pkg}.sig" fi done fi -- 2.19.2
participants (3)
-
Eli Schwartz
-
Florian Pritz
-
Jelle van der Waa