[arch-commits] Commit in icu/trunk (PKGBUILD fix_broken_regex.diff)
andyrtr at archlinux.org
andyrtr at archlinux.org
Fri May 18 09:15:06 UTC 2012
Date: Friday, May 18, 2012 @ 05:15:06
Author: andyrtr
Revision: 159210
upgpkg: icu 49.1.1-2
fix broken regex; FS#29700
Added:
icu/trunk/fix_broken_regex.diff
Modified:
icu/trunk/PKGBUILD
-----------------------+
PKGBUILD | 10 +++++++---
fix_broken_regex.diff | 35 +++++++++++++++++++++++++++++++++++
2 files changed, 42 insertions(+), 3 deletions(-)
Modified: PKGBUILD
===================================================================
--- PKGBUILD 2012-05-18 01:02:05 UTC (rev 159209)
+++ PKGBUILD 2012-05-18 09:15:06 UTC (rev 159210)
@@ -4,7 +4,7 @@
pkgname=icu
pkgver=49.1.1
-pkgrel=1
+pkgrel=2
pkgdesc="International Components for Unicode library"
arch=(i686 x86_64)
url="http://www.icu-project.org/"
@@ -12,14 +12,18 @@
depends=('gcc-libs' 'sh')
source=(#http://download.icu-project.org/files/${pkgname}4c/${pkgver}/${pkgname}4c-${pkgver/./_}-src.tgz
http://download.icu-project.org/files/${pkgname}4c/${pkgver}/${pkgname}4c-${pkgver//./_}-src.tgz
- icu.8198.revert.icu5431.patch)
+ icu.8198.revert.icu5431.patch
+ fix_broken_regex.diff)
md5sums=('7c53f83e0327343f4060c0eb83842daf'
- 'ebd5470fc969c75e52baf4af94a9ee82')
+ 'ebd5470fc969c75e52baf4af94a9ee82'
+ '5bbcd600fdf9b35cbd89a06cab522f3f')
build() {
cd ${srcdir}/icu/source
# fix Malayalam encoding https://bugzilla.redhat.com/show_bug.cgi?id=654200
patch -Rp3 -i ${srcdir}/icu.8198.revert.icu5431.patch
+ # patch broken regex - https://bugs.archlinux.org/task/29700 / http://bugs.icu-project.org/trac/ticket/9276
+ patch -Np0 -i ${srcdir}/fix_broken_regex.diff
./configure --prefix=/usr \
--sysconfdir=/etc \
--mandir=/usr/share/man
Added: fix_broken_regex.diff
===================================================================
--- fix_broken_regex.diff (rev 0)
+++ fix_broken_regex.diff 2012-05-18 09:15:06 UTC (rev 159210)
@@ -0,0 +1,35 @@
+--- i18n/regexcmp.cpp
++++ i18n/regexcmp.cpp
+@@ -3307,8 +3307,29 @@
+
+ case URX_STRING_I:
+- // TODO: Is the case-folded string the longest?
+- // If so we can optimize this the same as URX_STRING.
+- loc++;
+- currentLen = INT32_MAX;
++ // TODO: This code assumes that any user string that matches will be no longer
++ // than our compiled string, with case insensitive matching.
++ // Our compiled string has been case-folded already.
++ //
++ // Any matching user string will have no more code points than our
++ // compiled (folded) string. Folding may add code points, but
++ // not remove them.
++ //
++ // There is a potential problem if a supplemental code point
++ // case-folds to a BMP code point. In this case our compiled string
++ // could be shorter (in code units) than a matching user string.
++ //
++ // At this time (Unicode 6.1) there are no such characters, and this case
++ // is not being handled. A test, intltest regex/Bug9283, will fail if
++ // any problematic characters are added to Unicode.
++ //
++ // If this happens, we can make a set of the BMP chars that the
++ // troublesome supplementals fold to, scan our string, and bump the
++ // currentLen one extra for each that is found.
++ //
++ {
++ loc++;
++ int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
++ currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp));
++ }
+ break;
More information about the arch-commits
mailing list