[arch-commits] Commit in grep/trunk (3 files)
Allan McRae
allan at archlinux.org
Wed Jun 17 15:12:48 UTC 2009
Date: Wednesday, June 17, 2009 @ 11:12:48
Author: allan
Revision: 42634
upgpkg: grep 2.5.4-2
fix case insensitive grep issues cause by utf8 speedup patch
Added:
grep/trunk/65-dfa-optional.patch
grep/trunk/66-match_icase.patch
Modified:
grep/trunk/PKGBUILD
-----------------------+
65-dfa-optional.patch | 75 ++++++++++++++++++++++++++++++++++++++++++++++++
66-match_icase.patch | 41 ++++++++++++++++++++++++++
PKGBUILD | 22 ++++++++------
3 files changed, 129 insertions(+), 9 deletions(-)
Added: 65-dfa-optional.patch
===================================================================
--- 65-dfa-optional.patch (rev 0)
+++ 65-dfa-optional.patch 2009-06-17 15:12:48 UTC (rev 42634)
@@ -0,0 +1,75 @@
+
+The DFA algorithm is slow with mutlibytes characters.
+This patch disables the DFA algorithm, but it can be re-enabled by setting
+the GREP_USE_DFA environment variable.
+
+This patch requires 64-egf-speedup.patch
+--- a/src/search.c.orig 2005-09-06 22:22:17.000000000 +0200
++++ b/src/search.c 2005-09-06 22:25:41.000000000 +0200
+@@ -326,6 +326,8 @@
+ char eol = eolbyte;
+ int backref, start, len;
+ struct kwsmatch kwsm;
++ static int use_dfa;
++ static int use_dfa_checked = 0;
+ size_t i, ret_val;
+ #ifdef MBS_SUPPORT
+ int mb_cur_max = MB_CUR_MAX;
+@@ -333,6 +335,26 @@
+ memset (&mbs, '\0', sizeof (mbstate_t));
+ #endif /* MBS_SUPPORT */
+
++ if (!use_dfa_checked)
++ {
++ char *grep_use_dfa = getenv ("GREP_USE_DFA");
++ if (!grep_use_dfa)
++ {
++#ifdef MBS_SUPPORT
++ /* Turn off DFA when processing multibyte input. */
++ use_dfa = (MB_CUR_MAX == 1);
++#else
++ use_dfa = 1;
++#endif /* MBS_SUPPORT */
++ }
++ else
++ {
++ use_dfa = atoi (grep_use_dfa);
++ }
++
++ use_dfa_checked = 1;
++ }
++
+ buflim = buf + size;
+
+ for (beg = end = buf; end < buflim; beg = end)
+@@ -400,7 +422,8 @@
+ #endif /* MBS_SUPPORT */
+ (kwsm.index < kwset_exact_matches))
+ goto success;
+- if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
++ if (use_dfa &&
++ dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+ continue;
+ }
+ else
+@@ -409,7 +432,9 @@
+ #ifdef MBS_SUPPORT
+ size_t bytes_left = 0;
+ #endif /* MBS_SUPPORT */
+- size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
++ size_t offset = 0;
++ if (use_dfa)
++ offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+ if (offset == (size_t) -1)
+ break;
+ /* Narrow down to the line we've found. */
+@@ -451,7 +476,7 @@
+ --beg;
+ }
+ /* Successful, no backreferences encountered! */
+- if (!backref)
++ if (use_dfa && !backref)
+ goto success;
+ }
+ else
+
Added: 66-match_icase.patch
===================================================================
--- 66-match_icase.patch (rev 0)
+++ 66-match_icase.patch 2009-06-17 15:12:48 UTC (rev 42634)
@@ -0,0 +1,41 @@
+
+This fixes
+ echo Y | LC_ALL=en_US.UTF-8 grep -i '[y]'
+The expected output is:
+ Y
+
+Without this patch, it works on non UTF-8 environment, but fails on UTF-8
+environment.
+
+The definition of RE_ICASE comes from the glibc (/usr/include/regex.h)
+
+Maybe lib/posix/regex.h should be removed to enforce the usage of the
+glibc's regex.h
+
+--- a/lib/posix/regex.h.orig 2004-01-05 12:09:12.984391131 +0000
++++ b/lib/posix/regex.h 2004-01-05 12:09:24.717990622 +0000
+@@ -109,6 +109,10 @@
+ treated as 'a\{1'. */
+ #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
++/* If this bit is set, then ignore case when matching.
++ If not set, then case is significant. */
++#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
++
+ /* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+--- a/src/search.c 2009-02-11 17:15:24.000000000 +1100
++++ b/src/search.c 2009-02-11 17:23:48.000000000 +1100
+@@ -172,10 +172,8 @@
+ char const *motif = pattern;
+
+ check_utf8 ();
+-#if 0
+ if (match_icase)
+ syntax_bits |= RE_ICASE;
+-#endif
+ re_set_syntax (syntax_bits);
+ dfasyntax (syntax_bits, match_icase, eolbyte);
+
+
Modified: PKGBUILD
===================================================================
--- PKGBUILD 2009-06-17 13:04:39 UTC (rev 42633)
+++ PKGBUILD 2009-06-17 15:12:48 UTC (rev 42634)
@@ -1,10 +1,10 @@
# $Id$
-# Maintainer: Andreas Radke <andyrtr at archlinux.org>
+# Maintainer: Allan McRae <allan at archlinux.org>
# Contributor: judd <jvinet at zeroflux.org>
pkgname=grep
pkgver=2.5.4
-pkgrel=1
+pkgrel=2
pkgdesc="A string search utility"
arch=('i686' 'x86_64')
license=('GPL3')
@@ -16,20 +16,24 @@
source=(ftp://ftp.gnu.org/gnu/$pkgname/$pkgname-$pkgver.tar.gz
14-mem-exhausted.patch
15-empty-pattern.patch
- # fix for FS#7141 , extracted from debian diff.gz :
- # http://ftp.debian.org/debian/pool/main/g/grep/
- 64-egf-speedup.patch)
+ # utf8 grep speed fix - FS#7141
+ 64-egf-speedup.patch
+ # fix the case insensitive search issues the above patch causes - FS#14877
+ 65-dfa-optional.patch
+ 66-match_icase.patch)
md5sums=('92258031d98d4f12dfc6a6d24057e672'
'bc937da562d468f32c1fef2894610283'
'f421415b679ebcc9152797caaa0b1d51'
- 'efbe9d49d71a74092db6b86224b09fdd')
+ 'efbe9d49d71a74092db6b86224b09fdd'
+ 'f913cc834cda9be198b98318048a5ded'
+ '253351c7d960331a8268b4d853c511ff')
build() {
cd ${srcdir}/${pkgname}-${pkgver}
- for i in ../*.patch; do
- patch -Np1 -i ../$i
+ for i in ${srcdir}/*.patch; do
+ patch -Np1 -i $i || return 1
done
- ./configure --prefix=/usr --bindir=/bin
+ ./configure --prefix=/usr --bindir=/bin --without-included-regex
make || return 1
make DESTDIR=${pkgdir} install || return 1
More information about the arch-commits
mailing list