[pacman-dev] [PATCH] pacsysclean: Add new contrib script

Dan McGee dpmcgee at gmail.com
Tue Oct 25 11:22:08 EDT 2011


On Thu, Aug 11, 2011 at 5:40 PM, Eric Bélanger <snowmaniscool at gmail.com> wrote:
> On Mon, Aug 8, 2011 at 9:19 PM, Eric Bélanger <snowmaniscool at gmail.com> wrote:
>> On Mon, Aug 8, 2011 at 6:30 PM, Dan McGee <dpmcgee at gmail.com> wrote:
>>> On Sun, Aug 7, 2011 at 4:14 PM, Eric Bélanger <snowmaniscool at gmail.com> wrote:
>>>> pacsysclean sort installed packages by decreasing installed size. It's
>>>> useful for finding large unused package when doing system clean-up. This
>>>> script is an improved version of other similar scripts posted on the
>>>> forums. Thanks goes to Dave as I reused the size_to_human function from his
>>>> paccache script.
>>>>
>>>> Signed-off-by: Eric Bélanger <snowmaniscool at gmail.com>
>>>>
>>>> ---
>>>>
>>>> If you can think of a better name, feel free to suggest one.
>>>> ---
>>>>  contrib/.gitignore     |    1 +
>>>>  contrib/Makefile.am    |    5 ++-
>>>>  contrib/pacsysclean.in |   87 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  3 files changed, 92 insertions(+), 1 deletions(-)
>>>>  create mode 100755 contrib/pacsysclean.in
>>>>
>>>> diff --git a/contrib/.gitignore b/contrib/.gitignore
>>>> index 1bd145f..19b81e0 100644
>>>> --- a/contrib/.gitignore
>>>> +++ b/contrib/.gitignore
>>>> @@ -6,5 +6,6 @@ paclist
>>>>  paclog-pkglist
>>>>  pacscripts
>>>>  pacsearch
>>>> +pacsysclean
>>>>  wget-xdelta.sh
>>>>  zsh_completion
>>>> diff --git a/contrib/Makefile.am b/contrib/Makefile.am
>>>> index 10b03a2..754096d 100644
>>>> --- a/contrib/Makefile.am
>>>> +++ b/contrib/Makefile.am
>>>> @@ -5,7 +5,8 @@ OURSCRIPTS = \
>>>>        paclist \
>>>>        paclog-pkglist \
>>>>        pacscripts \
>>>> -       pacsearch
>>>> +       pacsearch \
>>>> +       pacsysclean
>>>>
>>>>  OURFILES = \
>>>>        bash_completion \
>>>> @@ -21,6 +22,7 @@ EXTRA_DIST = \
>>>>        paclist.in \
>>>>        pacscripts.in \
>>>>        pacsearch.in \
>>>> +       pacsysclean.in \
>>>>        vimprojects \
>>>>        zsh_completion.in \
>>>>        README
>>>> @@ -59,6 +61,7 @@ paclist: $(srcdir)/paclist.in
>>>>  paclog-pkglist: $(srcdir)/paclog-pkglist.in
>>>>  pacscripts: $(srcdir)/pacscripts.in
>>>>  pacsearch: $(srcdir)/pacsearch.in
>>>> +pacsysclean: $(srcdir)/pacsysclean.in
>>>>  pactree: $(srcdir)/pactree.in
>>>>  zsh_completion: $(srcdir)/zsh_completion.in
>>>>
>>>> diff --git a/contrib/pacsysclean.in b/contrib/pacsysclean.in
>>>> new file mode 100755
>>>> index 0000000..e393e24
>>>> --- /dev/null
>>>> +++ b/contrib/pacsysclean.in
>>>> @@ -0,0 +1,87 @@
>>>> +#!/bin/bash
>>>> +
>>>> +# pacsysclean - Sort installed packages by decreasing installed size. Useful for system clean-up.
>>>> +#
>>>> +# Copyright (C) 2011 Eric Bélanger <eric at archlinux.org>
>>>> +#
>>>> +# This program is free software; you can redistribute it and/or
>>>> +# modify it under the terms of the GNU General Public License
>>>> +# as published by the Free Software Foundation; either version 2
>>>> +# of the License, or (at your option) any later version.
>>>> +#
>>>> +# This program is distributed in the hope that it will be useful,
>>>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>>> +# GNU General Public License for more details.
>>>> +#
>>>> +# You should have received a copy of the GNU General Public License
>>>> +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
>>>> +
>>> cut from here
>>>> +export TEXTDOMAIN='pacman'
>>>> +export TEXTDOMAINDIR='/usr/share/locale'
>>>> +
>>>> +# determine whether we have gettext; make it a no-op if we do not
>>>> +if ! type gettext &>/dev/null; then
>>>> +       gettext() {
>>>> +               echo "$@"
>>>> +       }
>>>> +fi
>>> to here. You aren't using gettext() and we don't support it in contrib anyway.
>>
>> OK. I saw other scripts with this at their beginning so i thought it
>> was standard stuff.
>
> I've append a new version of the script with the suggested changes at
> the end of this email. I'll resent a git patch once it's finalized.
> I'll comment inline how I treated the many issues.
Did we ever get a resubmit from this? Don't want to see it get dropped.

>>>> +
>>>> +usage() {
>>>> +       echo "$0 - Sort installed packages by decreasing installed size."
>>>> +       echo
>>>> +       echo "Usage: $0 [options]"
>>>> +       echo
>>>> +       echo "Options:"
>>>> +       echo "  -a               List all packages (Default)"
>>>> +       echo "  -e               List unrequired explicitely installed packages"
>>> spelling, explicitly. Slightly related is using "not required" in the
>>> description as unrequired is not really a word (but it makes sense as
>>> a one-word flag, just not as a definition).
>>>
>>> Wouldn't it make more sense to allow any options pacman -Q allows on
>>> filtering, rather than just trying to emulate 1? I can see people
>>> wanting to do -Qdt, Qet, -Qm, etc.
>>
>> Probably.  I could change the -e option so it acccept an argument
>> instead and use that as the pacman query option.
>>
>
> I removed the useless -a option as it is the default. I renamed the -e
> option to -o and changed it to accept custom options., e.g.:
> pacsysclean -o "et"
> pacsysclean -o "Qm"
>
>>>
>>>> +       echo "  -h, --help       Show this help message and exit"
>>>> +}
>>>> +
>>>> +size_to_human() {
>>>> +       awk -v size="$1" '
>>>> +       BEGIN {
>>>> +               suffix[1] = "KiB"
>>>> +               suffix[2] = "MiB"
>>>> +               suffix[3] = "GiB"
>>>> +               suffix[4] = "TiB"
>>>> +               count = 1
>>>> +
>>>> +               while (size > 1024) {
>>>> +                       size /= 1024
>>>> +                       count++
>>>> +               }
>>>> +
>>>> +               sizestr = sprintf("%.2f", size)
>>>> +               sub(/.?0+$/, "", sizestr)
>>>> +               printf("%s %s", sizestr, suffix[count])
>>>> +       }'
>>> Isn't this fairly expensive to invoke awk each time you call it? This
>>> seems bash-math-able. It also already differs from Dave's
>>> implementation as he added the low 'B' suffix, and neither of these
>>> have the 'PiB' suffix that our formatter in pacman has.
>>
>> It isn't expensive. Here, with 1100 packages installed, it takes 8
>> seconds to execute the last while loop. I could also make the human
>> readable format optional if it's an issue.
>>
>> After stripping the ending '.00' to the installed size reported by
>> pacman, it's bash-math-able but you don't have any decimal places  as
>> bash can only do integer division. I guess that shouldn't be a big
>> problem as estimated sizes are good enough for this script. If we make
>> the human size optional, we could display the sizes in KB as reported
>> by pacman by default and have the human size done in bash.
>> Alternatively, I could also add the 'B' and 'PiB' suffix to the
>> size_to_human function if we decide to keep it. Let me know which
>> method would be preferable.
>
> I've kept the awk method. I added the B and PiB suffixes but I had to
> change the initial value of count to 2 as my input is in KiB. I don't
> know if Dave's script needs that change too.
>
> I also did two other changes so that it supports locales which use the
> comma instead of the decimal point (e.g. fr_CA.UTF-8). I added
> --use-lc-numeric to the awk command and changed "while (size > 1024)"
> to "while (size+0 > 1024)" to force a numeric conversion.
>
>>
>>>
>>>> +}
>>>> +
>>>> +PACMAN_OPTS="-Qq"
>>>> +if [ -n "$1" ]; then
>>>> +       case "$1" in
>>>> +               -a) PACMAN_OPTS="-Qq" ;;
>>>> +               -e) PACMAN_OPTS="-Qetq" ;;
>>>> +               -h|--help) usage; exit 0 ;;
>>>> +               *) usage; exit 1 ;;
>>>> +       esac
>>>> +fi
>>>> +
>>>> +TEMPDIR=$(mktemp -d /tmp/cleanup-script.XXXX)
>>>> +cd $TEMPDIR
>>>> +
>>>> +# Sort installed packages by decreasing installed size. Useful for system clean-up.
>>>> +for package in $(pacman $PACMAN_OPTS); do
>>>> +       echo $(pacman -Qi $package |grep 'Installed Size' |awk '{print $4}') $package
>>> I believe $(pacman -Qiet) would work just fine, right? And save you
>>> several invocations of commands making this a lot more efficient, as
>>> long as you properly navigate the output.
>>>
>>
>> I'm not sure what you mean. If you're talking about replacing:
>> $(pacman -Qi $package |grep 'Installed Size' |awk '{print $4}')
>> by
>> $(pacman -Qiet)
>> then it won't work. The current expression gives the installed size so
>> I end up with two columns: one with the sizes and one with their
>> corresponding packages. What you suggest would just output a lot of
>> junk wich will make it more difficult to sort and keep track of which
>> size goes with which packages.
>>
>
> I went with what Dave suggested to remove the need of the temp files.
>
>>
>>> This also won't work as written if you are in a different locale; I
>>> highly recommend testing every pacman script by enabling zh_CN.UTF-8
>>> in /etc/locale.gen, regenerating locales, and then executing via
>>> 'LANG=zh_CN.UTF-8 ./my_awesome_script.sh".
>>>
>>
>> OK, will do.
>
> The new script runs fine in the 3 locales I tested: en_CA.UTF-8,
> fr_CA.UTF-8 and zh_CN.UTF-8 so it should be OK for the rest.
>
>
> #!/bin/bash
>
> # pacsysclean - Sort installed packages by decreasing installed size.
> Useful for system clean-up.
> #
> # Copyright (C) 2011 Eric Bélanger <eric at archlinux.org>
> #
> # This program is free software; you can redistribute it and/or
> # modify it under the terms of the GNU General Public License
> # as published by the Free Software Foundation; either version 2
> # of the License, or (at your option) any later version.
> #
> # This program is distributed in the hope that it will be useful,
> # but WITHOUT ANY WARRANTY; without even the implied warranty of
> # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> # GNU General Public License for more details.
> #
> # You should have received a copy of the GNU General Public License
> # along with this program.  If not, see <http://www.gnu.org/licenses/>.
>
> usage() {
>        echo "pacsysclean - Sort installed packages by decreasing installed size."
>        echo
>        echo "Usage: pacsysclean [options]"
>        echo
>        echo "Options:"
>        echo "  -o <options>     Specify custom pacman query options"
>        echo "  -h, --help       Show this help message and exit"
> }
>
> size_to_human() {
>        awk --use-lc-numeric -v size="$1" '
>        BEGIN {
>                suffix[1] = "B"
>                suffix[2] = "KiB"
>                suffix[3] = "MiB"
>                suffix[4] = "GiB"
>                suffix[5] = "TiB"
>                suffix[6] = "PiB"
>                count = 2
>
>                while (size+0 > 1024) {
>                        size /= 1024
>                        count++
>                }
>
>                sizestr = sprintf("%.2f", size)
>                sub(/.?0+$/, "", sizestr)
>                printf("%s %s", sizestr, suffix[count])
>        }'
> }
>
> if [ -n "$1" ]; then
>        case "$1" in
>                -o) PACMAN_OPTS="-${2/Q}" ;;
>                -h|--help) usage; exit 0 ;;
>                *) usage; exit 1 ;;
>        esac
> fi
>
> for package in $(pacman -Qq $PACMAN_OPTS); do
>        printf '%g\t%s\n' "$(pacman -Qi $package | awk '/ K$/{print
> $(NF-1)}')" "$package"
> done | sort -gr | while IFS=$'\t' read -r size pkg; do
>        printf '%s\t%s\n' "$pkg" "$(size_to_human "$size")"
> done
>
> #===============================
>
>


More information about the pacman-dev mailing list