[arch-commits] Commit in paperwork (7 files)

Balló György bgyorgy at archlinux.org
Wed Dec 19 23:31:36 UTC 2018


    Date: Wednesday, December 19, 2018 @ 23:31:35
  Author: bgyorgy
Revision: 416805

archrelease: copy trunk to community-any

Added:
  paperwork/repos/
  paperwork/repos/community-any/
  paperwork/repos/community-any/0001-Filter-out-boxes-that-start-at-0-0.patch
    (from rev 416804, paperwork/trunk/0001-Filter-out-boxes-that-start-at-0-0.patch)
  paperwork/repos/community-any/0001-Filter-out-too-large-boxes-on-selection.patch
    (from rev 416804, paperwork/trunk/0001-Filter-out-too-large-boxes-on-selection.patch)
  paperwork/repos/community-any/0001-Fix-importing-PNG-files-with-transparency.patch
    (from rev 416804, paperwork/trunk/0001-Fix-importing-PNG-files-with-transparency.patch)
  paperwork/repos/community-any/0001-util-find_language-New-versions-of-pycountry-do-not-.patch
    (from rev 416804, paperwork/trunk/0001-util-find_language-New-versions-of-pycountry-do-not-.patch)
  paperwork/repos/community-any/PKGBUILD
    (from rev 416804, paperwork/trunk/PKGBUILD)

-----------------------------------------------------------------+
 0001-Filter-out-boxes-that-start-at-0-0.patch                   |   37 ++++
 0001-Filter-out-too-large-boxes-on-selection.patch              |   39 ++++
 0001-Fix-importing-PNG-files-with-transparency.patch            |   25 ++
 0001-util-find_language-New-versions-of-pycountry-do-not-.patch |   87 ++++++++++
 PKGBUILD                                                        |   62 +++++++
 5 files changed, 250 insertions(+)

Copied: paperwork/repos/community-any/0001-Filter-out-boxes-that-start-at-0-0.patch (from rev 416804, paperwork/trunk/0001-Filter-out-boxes-that-start-at-0-0.patch)
===================================================================
--- repos/community-any/0001-Filter-out-boxes-that-start-at-0-0.patch	                        (rev 0)
+++ repos/community-any/0001-Filter-out-boxes-that-start-at-0-0.patch	2018-12-19 23:31:35 UTC (rev 416805)
@@ -0,0 +1,37 @@
+From d1da8cd20554aa1d3c766855c251a5a643d2abb4 Mon Sep 17 00:00:00 2001
+From: Jonas Wloka <code at jowlo.de>
+Date: Sat, 17 Nov 2018 19:54:20 +0100
+Subject: [PATCH] Filter out boxes that start at (0, 0)
+
+Tesseract returns way too large boxes that cover the whole page, mostly
+containing only a single special character. All of these boxes (in my
+tests) have coordinate 0 0.
+
+This filters out all of these boxes.
+---
+ paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py
+index 35c6b7f4..e324c670 100644
+--- a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py
++++ b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py
+@@ -251,10 +251,14 @@ class JobPageBoxesLoader(Job):
+             boxes = set()
+             for line in line_boxes:
+                 for word in line.word_boxes:
+-                    if word.content.strip() == "":
++                    if word.content.strip() == "" or (word.position[0][0] == 0
++                                                      and word.position[0][1] == 0):
+                         # XXX(Jflesch): Tesseract 3.03 (hOCR) returns big and
+                         # empty word boxes sometimes (just a single space
+                         # inside). They often match images, but not always.
++                        # XXX(jowlo): Tesseract returns large boxes containing
++                        # single letters (mostly special chars) that cover the
++                        # whole page. All of these start at (0 0)
+                         continue
+                     boxes.add(word)
+ 
+-- 
+2.20.0
+

Copied: paperwork/repos/community-any/0001-Filter-out-too-large-boxes-on-selection.patch (from rev 416804, paperwork/trunk/0001-Filter-out-too-large-boxes-on-selection.patch)
===================================================================
--- repos/community-any/0001-Filter-out-too-large-boxes-on-selection.patch	                        (rev 0)
+++ repos/community-any/0001-Filter-out-too-large-boxes-on-selection.patch	2018-12-19 23:31:35 UTC (rev 416805)
@@ -0,0 +1,39 @@
+From 318d9ef80a7dc21da7ad45fa46c11c8fb19ec8fb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ball=C3=B3=20Gy=C3=B6rgy?= <ballogyor at gmail.com>
+Date: Wed, 19 Dec 2018 16:59:46 +0100
+Subject: [PATCH] Filter out too large boxes on selection
+
+Tesseract returns way too large boxes that cover the whole page, mostly containing only a single special character.
+
+This is a complement for commit d1da8cd2 to filter out all of these boxes also on text selection.
+---
+ paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py
+index e324c670..b022b178 100644
+--- a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py
++++ b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py
+@@ -731,6 +731,9 @@ class SimplePageDrawer(Drawer):
+         index = 0
+         for line in self.boxes['lines']:
+             for box in line.word_boxes:
++                if box.content.strip() == "" or (box.position[0][0] == 0
++                                                 and box.position[0][1] == 0):
++                    continue
+                 rcx = (box.position[0][0] + box.position[1][0]) / 2
+                 rcy = (box.position[0][1] + box.position[1][1]) / 2
+                 w = box.position[1][0] - box.position[0][0]
+@@ -767,6 +770,9 @@ class SimplePageDrawer(Drawer):
+         selected = []
+         for line in self.boxes['lines']:
+             for box in line.word_boxes:
++                if box.content.strip() == "" or (box.position[0][0] == 0
++                                                 and box.position[0][1] == 0):
++                    continue
+                 if box == box_start:
+                     in_list = True
+                 if in_list:
+-- 
+2.20.0
+

Copied: paperwork/repos/community-any/0001-Fix-importing-PNG-files-with-transparency.patch (from rev 416804, paperwork/trunk/0001-Fix-importing-PNG-files-with-transparency.patch)
===================================================================
--- repos/community-any/0001-Fix-importing-PNG-files-with-transparency.patch	                        (rev 0)
+++ repos/community-any/0001-Fix-importing-PNG-files-with-transparency.patch	2018-12-19 23:31:35 UTC (rev 416805)
@@ -0,0 +1,25 @@
+From 2ffb468a41d28eceda3afe869dd8c5af70203bf4 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ball=C3=B3=20Gy=C3=B6rgy?= <ballogyor at gmail.com>
+Date: Wed, 19 Dec 2018 17:19:27 +0100
+Subject: [PATCH] Fix importing PNG files with transparency
+
+Pillow does not allow to save images with transparency to JPEG. These images need to be converted first.
+---
+ paperwork-backend/paperwork_backend/img/page.py | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/paperwork-backend/paperwork_backend/img/page.py b/paperwork-backend/paperwork_backend/img/page.py
+index ddbb9214..86d5e4be 100644
+--- a/paperwork-backend/paperwork_backend/img/page.py
++++ b/paperwork-backend/paperwork_backend/img/page.py
+@@ -141,6 +141,7 @@ class ImgPage(BasicPage):
+ 
+     def __set_img(self, img):
+         with self.fs.open(self.__img_path, 'wb') as fd:
++            img = img.convert("RGB")
+             img.save(fd, format="JPEG")
+ 
+     img = property(__get_img, __set_img)
+-- 
+2.20.0
+

Copied: paperwork/repos/community-any/0001-util-find_language-New-versions-of-pycountry-do-not-.patch (from rev 416804, paperwork/trunk/0001-util-find_language-New-versions-of-pycountry-do-not-.patch)
===================================================================
--- repos/community-any/0001-util-find_language-New-versions-of-pycountry-do-not-.patch	                        (rev 0)
+++ repos/community-any/0001-util-find_language-New-versions-of-pycountry-do-not-.patch	2018-12-19 23:31:35 UTC (rev 416805)
@@ -0,0 +1,87 @@
+From ad4555f9904805e4fd56a30ca6529536edb0f919 Mon Sep 17 00:00:00 2001
+From: Jerome Flesch <jflesch at openpaper.work>
+Date: Wed, 19 Dec 2018 22:38:59 +0100
+Subject: [PATCH] util: find_language(): New versions of pycountry do not raise
+ exception when a language is not found, they return None. Ref #812
+
+Signed-off-by: Jerome Flesch <jflesch at openpaper.work>
+---
+ paperwork-backend/paperwork_backend/util.py | 60 ++++++++-------------
+ 1 file changed, 23 insertions(+), 37 deletions(-)
+
+diff --git a/paperwork-backend/paperwork_backend/util.py b/paperwork-backend/paperwork_backend/util.py
+index 90fd0eb9..73701c8a 100644
+--- a/paperwork-backend/paperwork_backend/util.py
++++ b/paperwork-backend/paperwork_backend/util.py
+@@ -312,45 +312,31 @@ def find_language(lang_str=None, allow_none=False):
+         lang_str = lang_str.split("_")[0]
+ 
+     try:
+-        return pycountry.pycountry.languages.get(name=lang_str.title())
+-    except (KeyError, UnicodeDecodeError):
+-        pass
+-    try:
+-        return pycountry.pycountry.languages.get(iso_639_3_code=lang_str)
+-    except (KeyError, UnicodeDecodeError):
+-        pass
+-    try:
+-        return pycountry.pycountry.languages.get(iso639_3_code=lang_str)
+-    except (KeyError, UnicodeDecodeError):
+-        pass
+-    try:
+-        return pycountry.pycountry.languages.get(iso639_2T_code=lang_str)
+-    except (KeyError, UnicodeDecodeError):
+-        pass
+-    try:
+-        return pycountry.pycountry.languages.get(iso639_1_code=lang_str)
+-    except (KeyError, UnicodeDecodeError):
+-        pass
+-    try:
+-        return pycountry.pycountry.languages.get(terminology=lang_str)
+-    except (KeyError, UnicodeDecodeError):
+-        pass
+-    try:
+-        return pycountry.pycountry.languages.get(bibliographic=lang_str)
+-    except (KeyError, UnicodeDecodeError):
+-        pass
+-    try:
+-        return pycountry.pycountry.languages.get(alpha_3=lang_str)
+-    except (KeyError, UnicodeDecodeError):
+-        pass
+-    try:
+-        return pycountry.pycountry.languages.get(alpha_2=lang_str)
+-    except (KeyError, UnicodeDecodeError):
+-        pass
+-    try:
+-        return pycountry.pycountry.languages.get(alpha2=lang_str)
++        r = pycountry.pycountry.languages.get(name=lang_str.title())
++        if r is not None:
++            return r
+     except (KeyError, UnicodeDecodeError):
+         pass
++
++    ATTRS = (
++        'iso_639_3_code',
++        'iso639_3_code',
++        'iso639_2T_code',
++        'iso639_1_code',
++        'terminology',
++        'bibliographic',
++        'alpha_3',
++        'alpha_2',
++        'alpha2'
++    )
++    for attr in ATTRS:
++        try:
++            r = pycountry.pycountry.languages.get(**{attr: lang_str})
++            if r is not None:
++                return r
++        except (KeyError, UnicodeDecodeError):
++            pass
++
+     if allow_none:
+         logger.warning("Unknown language [{}]".format(lang_str))
+         return None
+-- 
+2.20.0
+

Copied: paperwork/repos/community-any/PKGBUILD (from rev 416804, paperwork/trunk/PKGBUILD)
===================================================================
--- repos/community-any/PKGBUILD	                        (rev 0)
+++ repos/community-any/PKGBUILD	2018-12-19 23:31:35 UTC (rev 416805)
@@ -0,0 +1,62 @@
+# Maintainer: Balló György <ballogyor+arch at gmail dot com>
+
+pkgname=paperwork
+pkgver=1.2.4
+pkgrel=3
+pkgdesc="Personal document manager for GNOME to manage scanned documents and PDFs"
+arch=(any)
+url="https://openpaper.work/"
+license=(GPL3)
+depends=(gtk3 libnotify poppler-glib python-cairo python-dateutil python-gobject
+         python-levenshtein python-natsort python-pillowfight python-pycountry python-pyenchant
+         python-pyinsane python-pyocr python-setuptools python-simplebayes python-termcolor
+         python-whoosh python-xdg)
+makedepends=(git)
+_commit=36c00eaac636c6fa6db58f2f86a01e1c1abf2398  # tags/1.2.4^0
+source=("git+https://gitlab.gnome.org/World/OpenPaperwork/paperwork.git#commit=$_commit"
+        0001-Filter-out-boxes-that-start-at-0-0.patch
+        0001-Filter-out-too-large-boxes-on-selection.patch
+        0001-Fix-importing-PNG-files-with-transparency.patch
+        0001-util-find_language-New-versions-of-pycountry-do-not-.patch)
+sha256sums=('SKIP'
+            '8fb8b760fba68c393b33bf2f2b52c6e7edf46d8958863c8f02c4f988842d84e3'
+            '1e11b2c18181b9ab36f51f25ff2e489759ae12245de41a4dcf377def400afe99'
+            'aa867fb97da9a53c0e2beb758c26881dc2a16265e9b2deaa83e6fe1443ec8260'
+            '00f501fb42abe44ec34982fbfd0f71cb4a589e0cc4bb983e0bfa50156fc30d3c')
+
+pkgver() {
+  cd $pkgname
+  git describe --tags | sed 's/-/+/g'
+}
+
+prepare() {
+  cd $pkgname
+
+  # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/merge_requests/781
+  patch -Np1 -i ../0001-Filter-out-boxes-that-start-at-0-0.patch
+
+  # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/merge_requests/782
+  patch -Np1 -i ../0001-Filter-out-too-large-boxes-on-selection.patch
+
+  # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/merge_requests/783
+  patch -Np1 -i ../0001-Fix-importing-PNG-files-with-transparency.patch
+
+  # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/issues/812
+  patch -Np1 -i ../0001-util-find_language-New-versions-of-pycountry-do-not-.patch
+}
+
+build() {
+  cd $pkgname
+  make
+}
+
+package() {
+  cd $pkgname/paperwork-backend
+  python3 setup.py install --root="$pkgdir" --optimize=1
+
+  cd ../paperwork-gtk
+  python3 setup.py install --root="$pkgdir" --optimize=1
+
+  cd "$pkgdir"/usr/lib/python3.7/site-packages/paperwork/frontend/
+  PYTHONPATH=`echo "$pkgdir"/usr/lib/python*/site-packages/` python3 -c 'import shell; shell.install_system(icon_basedir="../../../../../share/icons", data_basedir="../../../../../share")'
+}



More information about the arch-commits mailing list