[pacman-dev] [PATCH v2 5/8] Avoid problematic use of Python's StringIO.

Allan McRae allan at archlinux.org
Sat Oct 12 21:16:03 EDT 2013


On 13/10/13 02:44, Jeremy Heiner wrote:
> 2to3 reported that StringIO moved into the io module in Python 3 and
> suggested fixing the imports accordingly. But that is not a complete
> solution because of the more rigid separation between byte and char in
> Python 3. So in addition to fixing the imports, BytesIO are used, and
> the chars are encoded to bytes as needed.
> 
> There was one hiccup in encoding in test sync600: the unicode strings
> which work in other tests for some reason cause problems here. This
> hiccup is only in 2.7, so a try block is used to fall back to use the
> unencoded string on that runtime only.

Using "for some reason" in the description is a clear flag for me not to
apply the patch.  This tends to result in an underlying issue being
missed due to lack of understanding.

> One more bytes/chars mismatch (which is only revealed at run time)
> occurs in passing data to hashlib.md5, so this patch fixes that too.
> 
> Signed-off-by: Jeremy Heiner <ScalaProtractor at gmail.com>
> ---
>  test/pacman/pmdb.py  |  5 +++--
>  test/pacman/pmpkg.py | 16 +++++++++++++---
>  test/pacman/util.py  |  2 +-
>  3 files changed, 17 insertions(+), 6 deletions(-)
> 
> diff --git a/test/pacman/pmdb.py b/test/pacman/pmdb.py
> index 53de91e..b4de281 100644
> --- a/test/pacman/pmdb.py
> +++ b/test/pacman/pmdb.py
> @@ -19,7 +19,7 @@
>  
>  import os
>  import shutil
> -from StringIO import StringIO
> +import io
>  import tarfile
>  
>  import pmpkg
> @@ -250,8 +250,9 @@ def generate(self):
>                  for name, data in entry.items():
>                      filename = os.path.join(pkg.fullname(), name)
>                      info = tarfile.TarInfo(filename)
> +                    data = data.encode('utf8') # 'utf8' req'd in 2.7
>                      info.size = len(data)
> -                    tar.addfile(info, StringIO(data))
> +                    tar.addfile(info, io.BytesIO(data))
>              tar.close()
>              # TODO: this is a bit unnecessary considering only one test uses it
>              serverpath = os.path.join(self.root, util.SYNCREPO, self.treename)
> diff --git a/test/pacman/pmpkg.py b/test/pacman/pmpkg.py
> index 6f7ae6e..5a5e78e 100644
> --- a/test/pacman/pmpkg.py
> +++ b/test/pacman/pmpkg.py
> @@ -21,7 +21,7 @@
>  import tempfile
>  import stat
>  import shutil
> -from StringIO import StringIO
> +import io
>  import tarfile
>  
>  import util
> @@ -151,8 +151,9 @@ def makepkg(self, path):
>          tar = tarfile.open(self.path, "w:gz")
>          for name, data in archive_files:
>              info = tarfile.TarInfo(name)
> +            data = data.encode('utf8') # 'utf8' req'd in 2.7
>              info.size = len(data)
> -            tar.addfile(info, StringIO(data))
> +            tar.addfile(info, io.BytesIO(data))
>  
>          # Generate package file system
>          for name in self.files:
> @@ -172,8 +173,17 @@ def makepkg(self, path):
>              else:
>                  # TODO wow what a hack, adding a newline to match mkfile?
>                  filedata = name + "\n"
> +                try:
> +                    filedata = filedata.encode('utf8') # 'utf8' req'd in 2.7
> +                except:
> +                    import sys
> +                    if sys.hexversion >= 0x03000000:
> +                        raise
> +                    # else use unencoded data
> +                    # which works fine on 2.7 because it isn't careful
> +                    # to distinguish bytes from unicode chars.
>                  info.size = len(filedata)
> -                tar.addfile(info, StringIO(filedata))
> +                tar.addfile(info, io.BytesIO(filedata))
>  
>          tar.close()
>  
> diff --git a/test/pacman/util.py b/test/pacman/util.py
> index ab5a6f4..4d5ba8a 100644
> --- a/test/pacman/util.py
> +++ b/test/pacman/util.py
> @@ -151,7 +151,7 @@ def getmd5sum(filename):
>  
>  def mkmd5sum(data):
>      checksum = hashlib.md5()
> -    checksum.update("%s\n" % data)
> +    checksum.update((data+"\n").encode('utf8')) # 'utf8' req'd in 2.7
>      return checksum.hexdigest()
>  
>  
> 



More information about the pacman-dev mailing list