[arch-commits] Commit in icu/trunk (PKGBUILD fix_broken_regex.diff)

Fri May 18 09:15:06 UTC 2012

Date: Friday, May 18, 2012 @ 05:15:06
  Author: andyrtr
Revision: 159210

upgpkg: icu 49.1.1-2

fix broken regex; FS#29700

Added:
  icu/trunk/fix_broken_regex.diff
Modified:
  icu/trunk/PKGBUILD

-----------------------+
 PKGBUILD              |   10 +++++++---
 fix_broken_regex.diff |   35 +++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 3 deletions(-)

Modified: PKGBUILD
===================================================================

--- PKGBUILD	2012-05-18 01:02:05 UTC (rev 159209)
+++ PKGBUILD	2012-05-18 09:15:06 UTC (rev 159210)
@@ -4,7 +4,7 @@
 
 pkgname=icu
 pkgver=49.1.1
-pkgrel=1
+pkgrel=2
 pkgdesc="International Components for Unicode library"
 arch=(i686 x86_64)
 url="http://www.icu-project.org/"
@@ -12,14 +12,18 @@
 depends=('gcc-libs' 'sh')
 source=(#http://download.icu-project.org/files/${pkgname}4c/${pkgver}/${pkgname}4c-${pkgver/./_}-src.tgz
 	    http://download.icu-project.org/files/${pkgname}4c/${pkgver}/${pkgname}4c-${pkgver//./_}-src.tgz
-	    icu.8198.revert.icu5431.patch)
+	    icu.8198.revert.icu5431.patch
+	    fix_broken_regex.diff)
 md5sums=('7c53f83e0327343f4060c0eb83842daf'
-         'ebd5470fc969c75e52baf4af94a9ee82')
+         'ebd5470fc969c75e52baf4af94a9ee82'
+         '5bbcd600fdf9b35cbd89a06cab522f3f')
 
 build() {
   cd ${srcdir}/icu/source
   # fix Malayalam encoding https://bugzilla.redhat.com/show_bug.cgi?id=654200
   patch -Rp3 -i ${srcdir}/icu.8198.revert.icu5431.patch
+  # patch broken regex  - https://bugs.archlinux.org/task/29700 / http://bugs.icu-project.org/trac/ticket/9276
+  patch -Np0 -i ${srcdir}/fix_broken_regex.diff
   ./configure --prefix=/usr \
 	--sysconfdir=/etc \
 	--mandir=/usr/share/man

Added: fix_broken_regex.diff
===================================================================
--- fix_broken_regex.diff	                        (rev 0)
+++ fix_broken_regex.diff	2012-05-18 09:15:06 UTC (rev 159210)
@@ -0,0 +1,35 @@
+--- i18n/regexcmp.cpp
++++ i18n/regexcmp.cpp
+@@ -3307,8 +3307,29 @@
+ 
+         case URX_STRING_I:
+-            // TODO:  Is the case-folded string the longest?
+-            //        If so we can optimize this the same as URX_STRING.
+-            loc++;
+-            currentLen = INT32_MAX;
++            // TODO:  This code assumes that any user string that matches will be no longer
++            //        than our compiled string, with case insensitive matching.
++            //        Our compiled string has been case-folded already.
++            //
++            //        Any matching user string will have no more code points than our
++            //        compiled (folded) string.  Folding may add code points, but
++            //        not remove them.
++            //
++            //        There is a potential problem if a supplemental code point 
++            //        case-folds to a BMP code point.  In this case our compiled string
++            //        could be shorter (in code units) than a matching user string.
++            //
++            //        At this time (Unicode 6.1) there are no such characters, and this case
++            //        is not being handled.  A test, intltest regex/Bug9283, will fail if
++            //        any problematic characters are added to Unicode.
++            //
++            //        If this happens, we can make a set of the BMP chars that the
++            //        troublesome supplementals fold to, scan our string, and bump the
++            //        currentLen one extra for each that is found.
++            //
++            {
++                loc++;
++                int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
++                currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp));
++            }
+             break;