[arch-commits] Commit in paperwork (7 files)
Balló György
bgyorgy at archlinux.org
Wed Dec 19 23:31:36 UTC 2018
Date: Wednesday, December 19, 2018 @ 23:31:35
Author: bgyorgy
Revision: 416805
archrelease: copy trunk to community-any
Added:
paperwork/repos/
paperwork/repos/community-any/
paperwork/repos/community-any/0001-Filter-out-boxes-that-start-at-0-0.patch
(from rev 416804, paperwork/trunk/0001-Filter-out-boxes-that-start-at-0-0.patch)
paperwork/repos/community-any/0001-Filter-out-too-large-boxes-on-selection.patch
(from rev 416804, paperwork/trunk/0001-Filter-out-too-large-boxes-on-selection.patch)
paperwork/repos/community-any/0001-Fix-importing-PNG-files-with-transparency.patch
(from rev 416804, paperwork/trunk/0001-Fix-importing-PNG-files-with-transparency.patch)
paperwork/repos/community-any/0001-util-find_language-New-versions-of-pycountry-do-not-.patch
(from rev 416804, paperwork/trunk/0001-util-find_language-New-versions-of-pycountry-do-not-.patch)
paperwork/repos/community-any/PKGBUILD
(from rev 416804, paperwork/trunk/PKGBUILD)
-----------------------------------------------------------------+
0001-Filter-out-boxes-that-start-at-0-0.patch | 37 ++++
0001-Filter-out-too-large-boxes-on-selection.patch | 39 ++++
0001-Fix-importing-PNG-files-with-transparency.patch | 25 ++
0001-util-find_language-New-versions-of-pycountry-do-not-.patch | 87 ++++++++++
PKGBUILD | 62 +++++++
5 files changed, 250 insertions(+)
Copied: paperwork/repos/community-any/0001-Filter-out-boxes-that-start-at-0-0.patch (from rev 416804, paperwork/trunk/0001-Filter-out-boxes-that-start-at-0-0.patch)
===================================================================
--- repos/community-any/0001-Filter-out-boxes-that-start-at-0-0.patch (rev 0)
+++ repos/community-any/0001-Filter-out-boxes-that-start-at-0-0.patch 2018-12-19 23:31:35 UTC (rev 416805)
@@ -0,0 +1,37 @@
+From d1da8cd20554aa1d3c766855c251a5a643d2abb4 Mon Sep 17 00:00:00 2001
+From: Jonas Wloka <code at jowlo.de>
+Date: Sat, 17 Nov 2018 19:54:20 +0100
+Subject: [PATCH] Filter out boxes that start at (0, 0)
+
+Tesseract returns way too large boxes that cover the whole page, mostly
+containing only a single special character. All of these boxes (in my
+tests) have coordinate 0 0.
+
+This filters out all of these boxes.
+---
+ paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py
+index 35c6b7f4..e324c670 100644
+--- a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py
++++ b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py
+@@ -251,10 +251,14 @@ class JobPageBoxesLoader(Job):
+ boxes = set()
+ for line in line_boxes:
+ for word in line.word_boxes:
+- if word.content.strip() == "":
++ if word.content.strip() == "" or (word.position[0][0] == 0
++ and word.position[0][1] == 0):
+ # XXX(Jflesch): Tesseract 3.03 (hOCR) returns big and
+ # empty word boxes sometimes (just a single space
+ # inside). They often match images, but not always.
++ # XXX(jowlo): Tesseract returns large boxes containing
++ # single letters (mostly special chars) that cover the
++ # whole page. All of these start at (0 0)
+ continue
+ boxes.add(word)
+
+--
+2.20.0
+
Copied: paperwork/repos/community-any/0001-Filter-out-too-large-boxes-on-selection.patch (from rev 416804, paperwork/trunk/0001-Filter-out-too-large-boxes-on-selection.patch)
===================================================================
--- repos/community-any/0001-Filter-out-too-large-boxes-on-selection.patch (rev 0)
+++ repos/community-any/0001-Filter-out-too-large-boxes-on-selection.patch 2018-12-19 23:31:35 UTC (rev 416805)
@@ -0,0 +1,39 @@
+From 318d9ef80a7dc21da7ad45fa46c11c8fb19ec8fb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ball=C3=B3=20Gy=C3=B6rgy?= <ballogyor at gmail.com>
+Date: Wed, 19 Dec 2018 16:59:46 +0100
+Subject: [PATCH] Filter out too large boxes on selection
+
+Tesseract returns way too large boxes that cover the whole page, mostly containing only a single special character.
+
+This is a complement for commit d1da8cd2 to filter out all of these boxes also on text selection.
+---
+ paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py
+index e324c670..b022b178 100644
+--- a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py
++++ b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py
+@@ -731,6 +731,9 @@ class SimplePageDrawer(Drawer):
+ index = 0
+ for line in self.boxes['lines']:
+ for box in line.word_boxes:
++ if box.content.strip() == "" or (box.position[0][0] == 0
++ and box.position[0][1] == 0):
++ continue
+ rcx = (box.position[0][0] + box.position[1][0]) / 2
+ rcy = (box.position[0][1] + box.position[1][1]) / 2
+ w = box.position[1][0] - box.position[0][0]
+@@ -767,6 +770,9 @@ class SimplePageDrawer(Drawer):
+ selected = []
+ for line in self.boxes['lines']:
+ for box in line.word_boxes:
++ if box.content.strip() == "" or (box.position[0][0] == 0
++ and box.position[0][1] == 0):
++ continue
+ if box == box_start:
+ in_list = True
+ if in_list:
+--
+2.20.0
+
Copied: paperwork/repos/community-any/0001-Fix-importing-PNG-files-with-transparency.patch (from rev 416804, paperwork/trunk/0001-Fix-importing-PNG-files-with-transparency.patch)
===================================================================
--- repos/community-any/0001-Fix-importing-PNG-files-with-transparency.patch (rev 0)
+++ repos/community-any/0001-Fix-importing-PNG-files-with-transparency.patch 2018-12-19 23:31:35 UTC (rev 416805)
@@ -0,0 +1,25 @@
+From 2ffb468a41d28eceda3afe869dd8c5af70203bf4 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ball=C3=B3=20Gy=C3=B6rgy?= <ballogyor at gmail.com>
+Date: Wed, 19 Dec 2018 17:19:27 +0100
+Subject: [PATCH] Fix importing PNG files with transparency
+
+Pillow does not allow to save images with transparency to JPEG. These images need to be converted first.
+---
+ paperwork-backend/paperwork_backend/img/page.py | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/paperwork-backend/paperwork_backend/img/page.py b/paperwork-backend/paperwork_backend/img/page.py
+index ddbb9214..86d5e4be 100644
+--- a/paperwork-backend/paperwork_backend/img/page.py
++++ b/paperwork-backend/paperwork_backend/img/page.py
+@@ -141,6 +141,7 @@ class ImgPage(BasicPage):
+
+ def __set_img(self, img):
+ with self.fs.open(self.__img_path, 'wb') as fd:
++ img = img.convert("RGB")
+ img.save(fd, format="JPEG")
+
+ img = property(__get_img, __set_img)
+--
+2.20.0
+
Copied: paperwork/repos/community-any/0001-util-find_language-New-versions-of-pycountry-do-not-.patch (from rev 416804, paperwork/trunk/0001-util-find_language-New-versions-of-pycountry-do-not-.patch)
===================================================================
--- repos/community-any/0001-util-find_language-New-versions-of-pycountry-do-not-.patch (rev 0)
+++ repos/community-any/0001-util-find_language-New-versions-of-pycountry-do-not-.patch 2018-12-19 23:31:35 UTC (rev 416805)
@@ -0,0 +1,87 @@
+From ad4555f9904805e4fd56a30ca6529536edb0f919 Mon Sep 17 00:00:00 2001
+From: Jerome Flesch <jflesch at openpaper.work>
+Date: Wed, 19 Dec 2018 22:38:59 +0100
+Subject: [PATCH] util: find_language(): New versions of pycountry do not raise
+ exception when a language is not found, they return None. Ref #812
+
+Signed-off-by: Jerome Flesch <jflesch at openpaper.work>
+---
+ paperwork-backend/paperwork_backend/util.py | 60 ++++++++-------------
+ 1 file changed, 23 insertions(+), 37 deletions(-)
+
+diff --git a/paperwork-backend/paperwork_backend/util.py b/paperwork-backend/paperwork_backend/util.py
+index 90fd0eb9..73701c8a 100644
+--- a/paperwork-backend/paperwork_backend/util.py
++++ b/paperwork-backend/paperwork_backend/util.py
+@@ -312,45 +312,31 @@ def find_language(lang_str=None, allow_none=False):
+ lang_str = lang_str.split("_")[0]
+
+ try:
+- return pycountry.pycountry.languages.get(name=lang_str.title())
+- except (KeyError, UnicodeDecodeError):
+- pass
+- try:
+- return pycountry.pycountry.languages.get(iso_639_3_code=lang_str)
+- except (KeyError, UnicodeDecodeError):
+- pass
+- try:
+- return pycountry.pycountry.languages.get(iso639_3_code=lang_str)
+- except (KeyError, UnicodeDecodeError):
+- pass
+- try:
+- return pycountry.pycountry.languages.get(iso639_2T_code=lang_str)
+- except (KeyError, UnicodeDecodeError):
+- pass
+- try:
+- return pycountry.pycountry.languages.get(iso639_1_code=lang_str)
+- except (KeyError, UnicodeDecodeError):
+- pass
+- try:
+- return pycountry.pycountry.languages.get(terminology=lang_str)
+- except (KeyError, UnicodeDecodeError):
+- pass
+- try:
+- return pycountry.pycountry.languages.get(bibliographic=lang_str)
+- except (KeyError, UnicodeDecodeError):
+- pass
+- try:
+- return pycountry.pycountry.languages.get(alpha_3=lang_str)
+- except (KeyError, UnicodeDecodeError):
+- pass
+- try:
+- return pycountry.pycountry.languages.get(alpha_2=lang_str)
+- except (KeyError, UnicodeDecodeError):
+- pass
+- try:
+- return pycountry.pycountry.languages.get(alpha2=lang_str)
++ r = pycountry.pycountry.languages.get(name=lang_str.title())
++ if r is not None:
++ return r
+ except (KeyError, UnicodeDecodeError):
+ pass
++
++ ATTRS = (
++ 'iso_639_3_code',
++ 'iso639_3_code',
++ 'iso639_2T_code',
++ 'iso639_1_code',
++ 'terminology',
++ 'bibliographic',
++ 'alpha_3',
++ 'alpha_2',
++ 'alpha2'
++ )
++ for attr in ATTRS:
++ try:
++ r = pycountry.pycountry.languages.get(**{attr: lang_str})
++ if r is not None:
++ return r
++ except (KeyError, UnicodeDecodeError):
++ pass
++
+ if allow_none:
+ logger.warning("Unknown language [{}]".format(lang_str))
+ return None
+--
+2.20.0
+
Copied: paperwork/repos/community-any/PKGBUILD (from rev 416804, paperwork/trunk/PKGBUILD)
===================================================================
--- repos/community-any/PKGBUILD (rev 0)
+++ repos/community-any/PKGBUILD 2018-12-19 23:31:35 UTC (rev 416805)
@@ -0,0 +1,62 @@
+# Maintainer: Balló György <ballogyor+arch at gmail dot com>
+
+pkgname=paperwork
+pkgver=1.2.4
+pkgrel=3
+pkgdesc="Personal document manager for GNOME to manage scanned documents and PDFs"
+arch=(any)
+url="https://openpaper.work/"
+license=(GPL3)
+depends=(gtk3 libnotify poppler-glib python-cairo python-dateutil python-gobject
+ python-levenshtein python-natsort python-pillowfight python-pycountry python-pyenchant
+ python-pyinsane python-pyocr python-setuptools python-simplebayes python-termcolor
+ python-whoosh python-xdg)
+makedepends=(git)
+_commit=36c00eaac636c6fa6db58f2f86a01e1c1abf2398 # tags/1.2.4^0
+source=("git+https://gitlab.gnome.org/World/OpenPaperwork/paperwork.git#commit=$_commit"
+ 0001-Filter-out-boxes-that-start-at-0-0.patch
+ 0001-Filter-out-too-large-boxes-on-selection.patch
+ 0001-Fix-importing-PNG-files-with-transparency.patch
+ 0001-util-find_language-New-versions-of-pycountry-do-not-.patch)
+sha256sums=('SKIP'
+ '8fb8b760fba68c393b33bf2f2b52c6e7edf46d8958863c8f02c4f988842d84e3'
+ '1e11b2c18181b9ab36f51f25ff2e489759ae12245de41a4dcf377def400afe99'
+ 'aa867fb97da9a53c0e2beb758c26881dc2a16265e9b2deaa83e6fe1443ec8260'
+ '00f501fb42abe44ec34982fbfd0f71cb4a589e0cc4bb983e0bfa50156fc30d3c')
+
+pkgver() {
+ cd $pkgname
+ git describe --tags | sed 's/-/+/g'
+}
+
+prepare() {
+ cd $pkgname
+
+ # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/merge_requests/781
+ patch -Np1 -i ../0001-Filter-out-boxes-that-start-at-0-0.patch
+
+ # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/merge_requests/782
+ patch -Np1 -i ../0001-Filter-out-too-large-boxes-on-selection.patch
+
+ # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/merge_requests/783
+ patch -Np1 -i ../0001-Fix-importing-PNG-files-with-transparency.patch
+
+ # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/issues/812
+ patch -Np1 -i ../0001-util-find_language-New-versions-of-pycountry-do-not-.patch
+}
+
+build() {
+ cd $pkgname
+ make
+}
+
+package() {
+ cd $pkgname/paperwork-backend
+ python3 setup.py install --root="$pkgdir" --optimize=1
+
+ cd ../paperwork-gtk
+ python3 setup.py install --root="$pkgdir" --optimize=1
+
+ cd "$pkgdir"/usr/lib/python3.7/site-packages/paperwork/frontend/
+ PYTHONPATH=`echo "$pkgdir"/usr/lib/python*/site-packages/` python3 -c 'import shell; shell.install_system(icon_basedir="../../../../../share/icons", data_basedir="../../../../../share")'
+}
More information about the arch-commits
mailing list