[PATCH v2] strip: Use dwz to compress dwarf headers
From: Morten Linderud <morten@linderud.pw> `dwz` allows us to compress the DWARF files when building debug packages. Potentially shaving off some of the sizes of the symbols we distribute. https://sourceware.org/dwz/ A sample of packages built with dwz; pacman: Original debug info size: 1520kB Size after compression: 1252kB systemd: Original debug info size: 46692kB Size after compression: 41036kB Signed-off-by: Morten Linderud <morten@linderud.pw> --- doc/makepkg.conf.5.asciidoc | 3 ++ scripts/libmakepkg/executable/dwz.sh.in | 38 ++++++++++++++++ .../executable/sepdebugcrcfix.sh.in | 38 ++++++++++++++++ scripts/libmakepkg/tidy/strip.sh.in | 44 +++++++++++++++++-- 4 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 scripts/libmakepkg/executable/dwz.sh.in create mode 100644 scripts/libmakepkg/executable/sepdebugcrcfix.sh.in diff --git a/doc/makepkg.conf.5.asciidoc b/doc/makepkg.conf.5.asciidoc index a0d9a6d4..2e40f27f 100644 --- a/doc/makepkg.conf.5.asciidoc +++ b/doc/makepkg.conf.5.asciidoc @@ -194,6 +194,9 @@ Options DEBUG_CXXFLAGS to their counterpart buildflags. Creates a separate package containing the debug symbols when used with `strip'. + *dwz*;; + Compress dwarf files inside the debug package. + *lto*;; Enable building packages using link time optimization. Adds the flags specified in LTOFLAGS to CFLAGS, CXXFLAGS and LDFLAGS (or diff --git a/scripts/libmakepkg/executable/dwz.sh.in b/scripts/libmakepkg/executable/dwz.sh.in new file mode 100644 index 00000000..a540697c --- /dev/null +++ b/scripts/libmakepkg/executable/dwz.sh.in @@ -0,0 +1,38 @@ +#!/usr/bin/bash +# +# dwz.sh - Confirm presence of dwz binary +# +# Copyright (c) 2011-2022 Pacman Development Team <pacman-dev@lists.archlinux.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +[[ -n "$LIBMAKEPKG_EXECUTABLE_DWZ_SH" ]] && return +LIBMAKEPKG_EXECUTABLE_DWZ_SH=1 + +LIBRARY=${LIBRARY:-'@libmakepkgdir@'} + +source "$LIBRARY/util/message.sh" +source "$LIBRARY/util/option.sh" + +executable_functions+=('executable_dwz') + +executable_dwz() { + if check_option "dwz" "y"; then + if ! type -p dwz>/dev/null; then + error "$(gettext "Cannot find the %s binary required to compress dwarf files.")" "dwz" + return 1 + fi + fi +} diff --git a/scripts/libmakepkg/executable/sepdebugcrcfix.sh.in b/scripts/libmakepkg/executable/sepdebugcrcfix.sh.in new file mode 100644 index 00000000..6259ac32 --- /dev/null +++ b/scripts/libmakepkg/executable/sepdebugcrcfix.sh.in @@ -0,0 +1,38 @@ +#!/usr/bin/bash +# +# sepdebugcrcfix.sh - Confirm presence of sepdebugcrcfix binary +# +# Copyright (c) 2011-2022 Pacman Development Team <pacman-dev@lists.archlinux.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +[[ -n "$LIBMAKEPKG_EXECUTABLE_SEPDEBUGCRCFIX_SH" ]] && return +LIBMAKEPKG_EXECUTABLE_SEPDEBUGCRCFIX_SH=1 + +LIBRARY=${LIBRARY:-'@libmakepkgdir@'} + +source "$LIBRARY/util/message.sh" +source "$LIBRARY/util/option.sh" + +executable_functions+=('executable_sepdebugcrcfix') + +executable_sepdebugcrcfix() { + if check_option "dwz" "y"; then + if ! type -p sepdebugcrcfix>/dev/null; then + error "$(gettext "Cannot find the %s binary required to fix CRC.")" "sepdebugcrcfix" + return 1 + fi + fi +} diff --git a/scripts/libmakepkg/tidy/strip.sh.in b/scripts/libmakepkg/tidy/strip.sh.in index 688bcf1b..1365b77b 100644 --- a/scripts/libmakepkg/tidy/strip.sh.in +++ b/scripts/libmakepkg/tidy/strip.sh.in @@ -27,7 +27,7 @@ source "$LIBRARY/util/message.sh" source "$LIBRARY/util/option.sh" -packaging_options+=('strip' 'debug') +packaging_options+=('strip' 'debug' 'dwz') tidy_modify+=('tidy_strip') @@ -52,6 +52,37 @@ source_files() { | sort -zu | tr '\0' '\n' } +compress_dwarf_files(){ + binary_files="$@" + dwz_opts="" + readarray dwz_files < <(find "$dbgdir" -type f -name \*.debug | LC_ALL=C sort) + + # dwz multifile attempts to remove common symols found in all ELF files and + # throw them into one file. Replacing the references with .gnu_debugaltlink + local fullver=$(get_full_version) + dwz_multifile_name="${pkgbase}-${fullver}-${CARCH}" + if [ ${#dwz_files[@]} -gt 1 ]; then + dwz_opts="-m .dwz/${dwz_multifile_name}" + fi + mkdir -p "$dbgdir/.dwz" + + # The dwz options are taken from rpm/debugedit + (cd "$dbgdir" && + LANG=C dwz --hardlink \ + --quiet \ + --relative \ + --low-mem-die-limit 10000000 \ + --max-die-limit 50000000 \ + $dwz_opts \ + ${dwz_files[@]} 2> /dev/null) + + # Remove dir if empty + rmdir "$dbgdir/.dwz" 2> /dev/null + + # Some debuggers might use the CRC to find relevant debug files instead of the build-id + LANG=C sepdebugcrcfix "$dbgdir" $binary_files &>/dev/null +} + strip_file() { local binary=$1; shift @@ -142,7 +173,8 @@ tidy_strip() { fi local binary strip_flags - find . -type f -perm -u+w -print0 2>/dev/null | while IFS= read -rd '' binary ; do + declare -a binary_files + while IFS= read -rd '' binary ; do local STRIPLTO=0 case "$(LC_ALL=C readelf -h "$binary" 2>/dev/null)" in *Type:*'DYN (Shared object file)'*) # Libraries (.so) or Relocatable binaries @@ -166,6 +198,12 @@ tidy_strip() { esac strip_file "$binary" ${strip_flags} (( STRIPLTO )) && strip_lto "$binary" - done + binary_files+=("$binary") + done < <(find . -type f -perm -u+w -print0 2>/dev/null) + + if check_option "debug" "y" && check_option "dwz" "y"; then + msg2 "$(gettext "Compressing dwarf files...")" + compress_dwarf_files ${binary_files[@]} + fi fi } -- 2.36.1
On Saturday, 28 May 2022, Morten Linderud <foxboron@archlinux.org> wrote:
From: Morten Linderud <morten@linderud.pw>
`dwz` allows us to compress the DWARF files when building debug packages. Potentially shaving off some of the sizes of the symbols we distribute.
A sample of packages built with dwz;
pacman: Original debug info size: 1520kB Size after compression: 1252kB
systemd: Original debug info size: 46692kB Size after compression: 41036kB
Out of curiosity: have you measured the overhead? The one during package creation and runtime one during extraction. Especially for larger packages - say chromium, Firefox, etc That aside, I really like the explicit die-limit args which should ensure that even the debug packages stay reproducible. Thanks Emil
On Wed, Jun 01, 2022 at 10:45:42AM +0100, Emil Velikov wrote:
On Saturday, 28 May 2022, Morten Linderud <foxboron@archlinux.org> wrote:
From: Morten Linderud <morten@linderud.pw>
`dwz` allows us to compress the DWARF files when building debug packages. Potentially shaving off some of the sizes of the symbols we distribute.
A sample of packages built with dwz;
pacman: Original debug info size: 1520kB Size after compression: 1252kB
systemd: Original debug info size: 46692kB Size after compression: 41036kB
Out of curiosity: have you measured the overhead?
The one during package creation and runtime one during extraction. Especially for larger packages - say chromium, Firefox, etc
That aside, I really like the explicit die-limit args which should ensure that even the debug packages stay reproducible.
I haven't. I should frankly rewrite the commit message and provide a bit more details because I don't think they are 100% correct with the multifile approach. However from what I can tell the largest overhead during package creation is still the stripping itself and compression. dwz takes less then a second. I don't know how to test extraction though. Do feel free to help me test dwz though :) I'll add it to the repos so it's easier to test it. -- Morten Linderud PGP: 9C02FF419FECBE16
On 29/5/22 07:56, Morten Linderud wrote:
From: Morten Linderud <morten@linderud.pw>
`dwz` allows us to compress the DWARF files when building debug packages. Potentially shaving off some of the sizes of the symbols we distribute.
Looks like DWARF-5 (which is the default with current gcc (and clang?) toolchains) is not supported in full by dwz - using dwz everywhere will spit out some errors, but these could be ignored. I'm also not sure LLDB supports dwz compressed debug symbols.
A sample of packages built with dwz;
pacman: Original debug info size: 1520kB Size after compression: 1252kB
systemd: Original debug info size: 46692kB Size after compression: 41036kB
What DWARF format was this using? Is this package or on-disk size? Have you compared to: LDFLAGS+=" -Wl,--compress-debug-sections=zlib" I guess that affects only on disk size but not package size. Also very much not suggesting any distro uses that flag - debug overhead galore!. Finally, I see some concerns that dwz may make debuginfod require larger downloads as the common files get downloaded. It would be good to quantify this. However, all the above is more considerations for a distro to consider when enabling dwz support. Less of a consideration of whether this should be added to makepkg.
Signed-off-by: Morten Linderud <morten@linderud.pw> --- doc/makepkg.conf.5.asciidoc | 3 ++ scripts/libmakepkg/executable/dwz.sh.in | 38 ++++++++++++++++ .../executable/sepdebugcrcfix.sh.in | 38 ++++++++++++++++ scripts/libmakepkg/tidy/strip.sh.in | 44 +++++++++++++++++-- 4 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 scripts/libmakepkg/executable/dwz.sh.in create mode 100644 scripts/libmakepkg/executable/sepdebugcrcfix.sh.in
diff --git a/doc/makepkg.conf.5.asciidoc b/doc/makepkg.conf.5.asciidoc index a0d9a6d4..2e40f27f 100644 --- a/doc/makepkg.conf.5.asciidoc +++ b/doc/makepkg.conf.5.asciidoc @@ -194,6 +194,9 @@ Options DEBUG_CXXFLAGS to their counterpart buildflags. Creates a separate package containing the debug symbols when used with `strip'.
+ *dwz*;; + Compress dwarf files inside the debug package.
... inside debug packages Without going further into the patch yet, I am guessing this only works with debug packages? What about unstripped binaries in packages with 'debug' and '!strip'?
+ *lto*;; Enable building packages using link time optimization. Adds the flags specified in LTOFLAGS to CFLAGS, CXXFLAGS and LDFLAGS (or diff --git a/scripts/libmakepkg/executable/dwz.sh.in b/scripts/libmakepkg/executable/dwz.sh.in new file mode 100644 index 00000000..a540697c --- /dev/null +++ b/scripts/libmakepkg/executable/dwz.sh.in @@ -0,0 +1,38 @@ +#!/usr/bin/bash +# +# dwz.sh - Confirm presence of dwz binary +# +# Copyright (c) 2011-2022 Pacman Development Team <pacman-dev@lists.archlinux.org>
Copyright (c) 2022
+# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +[[ -n "$LIBMAKEPKG_EXECUTABLE_DWZ_SH" ]] && return +LIBMAKEPKG_EXECUTABLE_DWZ_SH=1 + +LIBRARY=${LIBRARY:-'@libmakepkgdir@'} + +source "$LIBRARY/util/message.sh" +source "$LIBRARY/util/option.sh" + +executable_functions+=('executable_dwz') + +executable_dwz() { + if check_option "dwz" "y"; then
check_option strip too?
+ if ! type -p dwz>/dev/null; then
Space after binary name
+ error "$(gettext "Cannot find the %s binary required to compress dwarf files.")" "dwz"
DWARF should have capitals. Can we just say "compress debugging symbols"? Not 100% technically accurate, but more readily interpretable.
+ return 1 + fi + fi +} diff --git a/scripts/libmakepkg/executable/sepdebugcrcfix.sh.in b/scripts/libmakepkg/executable/sepdebugcrcfix.sh.in new file mode 100644 index 00000000..6259ac32 --- /dev/null +++ b/scripts/libmakepkg/executable/sepdebugcrcfix.sh.in @@ -0,0 +1,38 @@ +#!/usr/bin/bash +# +# sepdebugcrcfix.sh - Confirm presence of sepdebugcrcfix binary +# +# Copyright (c) 2011-2022 Pacman Development Team <pacman-dev@lists.archlinux.org> +#
Just 2022
+# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +[[ -n "$LIBMAKEPKG_EXECUTABLE_SEPDEBUGCRCFIX_SH" ]] && return +LIBMAKEPKG_EXECUTABLE_SEPDEBUGCRCFIX_SH=1 + +LIBRARY=${LIBRARY:-'@libmakepkgdir@'} + +source "$LIBRARY/util/message.sh" +source "$LIBRARY/util/option.sh" + +executable_functions+=('executable_sepdebugcrcfix') + +executable_sepdebugcrcfix() { + if check_option "dwz" "y"; then
check_option strip too?
+ if ! type -p sepdebugcrcfix>/dev/null; then + error "$(gettext "Cannot find the %s binary required to fix CRC.")" "sepdebugcrcfix"
Poor error message. Given it is entirely for dwz, use the same error as dwz.
+ return 1 + fi + fi +} diff --git a/scripts/libmakepkg/tidy/strip.sh.in b/scripts/libmakepkg/tidy/strip.sh.in index 688bcf1b..1365b77b 100644 --- a/scripts/libmakepkg/tidy/strip.sh.in +++ b/scripts/libmakepkg/tidy/strip.sh.in @@ -27,7 +27,7 @@ source "$LIBRARY/util/message.sh" source "$LIBRARY/util/option.sh"
-packaging_options+=('strip' 'debug') +packaging_options+=('strip' 'debug' 'dwz') tidy_modify+=('tidy_strip')
@@ -52,6 +52,37 @@ source_files() { | sort -zu | tr '\0' '\n' }
+compress_dwarf_files(){ + binary_files="$@" + dwz_opts="" + readarray dwz_files < <(find "$dbgdir" -type f -name \*.debug | LC_ALL=C sort) + + # dwz multifile attempts to remove common symols found in all ELF files and
symbols
+ # throw them into one file. Replacing the references with .gnu_debugaltlink + local fullver=$(get_full_version) + dwz_multifile_name="${pkgbase}-${fullver}-${CARCH}"
This variable is used once. Remove.
+ if [ ${#dwz_files[@]} -gt 1 ]; then + dwz_opts="-m .dwz/${dwz_multifile_name}" + fi + mkdir -p "$dbgdir/.dwz"
Is that directory only needed for multifile?
+ + # The dwz options are taken from rpm/debugedit + (cd "$dbgdir" && + LANG=C dwz --hardlink \ + --quiet \ + --relative \ + --low-mem-die-limit 10000000 \ + --max-die-limit 50000000 \ + $dwz_opts \ + ${dwz_files[@]} 2> /dev/null) + + # Remove dir if empty + rmdir "$dbgdir/.dwz" 2> /dev/null
Directory removed here, so previous comment moot I guess.
+ + # Some debuggers might use the CRC to find relevant debug files instead of the build-id + LANG=C sepdebugcrcfix "$dbgdir" $binary_files &>/dev/null +} + strip_file() { local binary=$1; shift
@@ -142,7 +173,8 @@ tidy_strip() { fi
local binary strip_flags - find . -type f -perm -u+w -print0 2>/dev/null | while IFS= read -rd '' binary ; do + declare -a binary_files + while IFS= read -rd '' binary ; do local STRIPLTO=0 case "$(LC_ALL=C readelf -h "$binary" 2>/dev/null)" in *Type:*'DYN (Shared object file)'*) # Libraries (.so) or Relocatable binaries @@ -166,6 +198,12 @@ tidy_strip() { esac strip_file "$binary" ${strip_flags} (( STRIPLTO )) && strip_lto "$binary" - done + binary_files+=("$binary") + done < <(find . -type f -perm -u+w -print0 2>/dev/null) + + if check_option "debug" "y" && check_option "dwz" "y"; then
If checking for both here, then you should check for both when requiring the binaries.
+ msg2 "$(gettext "Compressing dwarf files...")" + compress_dwarf_files ${binary_files[@]} + fi fi }
participants (3)
-
Allan McRae
-
Emil Velikov
-
Morten Linderud