[arch-commits] Commit in grep/trunk (3 files)

Allan McRae allan at archlinux.org
Wed Jun 17 15:12:48 UTC 2009


    Date: Wednesday, June 17, 2009 @ 11:12:48
  Author: allan
Revision: 42634

upgpkg: grep 2.5.4-2
    fix case insensitive grep issues cause by utf8 speedup patch

Added:
  grep/trunk/65-dfa-optional.patch
  grep/trunk/66-match_icase.patch
Modified:
  grep/trunk/PKGBUILD

-----------------------+
 65-dfa-optional.patch |   75 ++++++++++++++++++++++++++++++++++++++++++++++++
 66-match_icase.patch  |   41 ++++++++++++++++++++++++++
 PKGBUILD              |   22 ++++++++------
 3 files changed, 129 insertions(+), 9 deletions(-)

Added: 65-dfa-optional.patch
===================================================================
--- 65-dfa-optional.patch	                        (rev 0)
+++ 65-dfa-optional.patch	2009-06-17 15:12:48 UTC (rev 42634)
@@ -0,0 +1,75 @@
+
+The DFA algorithm is slow with mutlibytes characters.
+This patch disables the DFA algorithm, but it can be re-enabled by setting
+the GREP_USE_DFA environment variable.
+
+This patch requires 64-egf-speedup.patch
+--- a/src/search.c.orig	2005-09-06 22:22:17.000000000 +0200
++++ b/src/search.c	2005-09-06 22:25:41.000000000 +0200
+@@ -326,6 +326,8 @@
+   char eol = eolbyte;
+   int backref, start, len;
+   struct kwsmatch kwsm;
++  static int use_dfa;
++  static int use_dfa_checked = 0;
+   size_t i, ret_val;
+ #ifdef MBS_SUPPORT
+   int mb_cur_max = MB_CUR_MAX;
+@@ -333,6 +335,26 @@
+   memset (&mbs, '\0', sizeof (mbstate_t));
+ #endif /* MBS_SUPPORT */
+ 
++  if (!use_dfa_checked)
++    {
++      char *grep_use_dfa = getenv ("GREP_USE_DFA");
++      if (!grep_use_dfa)
++	{
++#ifdef MBS_SUPPORT
++	  /* Turn off DFA when processing multibyte input. */
++	  use_dfa = (MB_CUR_MAX == 1);
++#else
++	  use_dfa = 1;
++#endif /* MBS_SUPPORT */
++	}
++      else
++	{
++	  use_dfa = atoi (grep_use_dfa);
++	}
++
++      use_dfa_checked = 1;
++    }
++
+   buflim = buf + size;
+ 
+   for (beg = end = buf; end < buflim; beg = end)
+@@ -400,7 +422,8 @@
+ #endif /* MBS_SUPPORT */
+ 		  (kwsm.index < kwset_exact_matches))
+ 		goto success;
+-	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
++	      if (use_dfa &&
++		  dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+ 		continue;
+ 	    }
+ 	  else
+@@ -409,7 +432,9 @@
+ #ifdef MBS_SUPPORT
+ 	      size_t bytes_left = 0;
+ #endif /* MBS_SUPPORT */
+-	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
++	      size_t offset = 0;
++	      if (use_dfa)
++		offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+ 	      if (offset == (size_t) -1)
+ 		break;
+ 	      /* Narrow down to the line we've found. */
+@@ -451,7 +476,7 @@
+ 		--beg;
+ 	    }
+ 	  /* Successful, no backreferences encountered! */
+-	  if (!backref)
++	  if (use_dfa && !backref)
+ 	    goto success;
+ 	}
+       else
+

Added: 66-match_icase.patch
===================================================================
--- 66-match_icase.patch	                        (rev 0)
+++ 66-match_icase.patch	2009-06-17 15:12:48 UTC (rev 42634)
@@ -0,0 +1,41 @@
+
+This fixes
+    echo Y | LC_ALL=en_US.UTF-8 grep -i '[y]'
+The expected output is:
+    Y
+
+Without this patch, it works on non UTF-8 environment, but fails on UTF-8
+environment.
+
+The definition of RE_ICASE comes from the glibc (/usr/include/regex.h)
+
+Maybe lib/posix/regex.h should be removed to enforce the usage of the
+glibc's regex.h
+
+--- a/lib/posix/regex.h.orig	2004-01-05 12:09:12.984391131 +0000
++++ b/lib/posix/regex.h	2004-01-05 12:09:24.717990622 +0000
+@@ -109,6 +109,10 @@
+    treated as 'a\{1'.  */
+ #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
++/* If this bit is set, then ignore case when matching.
++   If not set, then case is significant.  */
++#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
++
+ /* This global variable defines the particular regexp syntax to use (for
+    some interfaces).  When a regexp is compiled, the syntax used is
+    stored in the pattern buffer, so changing this does not affect
+--- a/src/search.c	2009-02-11 17:15:24.000000000 +1100
++++ b/src/search.c	2009-02-11 17:23:48.000000000 +1100
+@@ -172,10 +172,8 @@
+   char const *motif = pattern;
+ 
+   check_utf8 ();
+-#if 0
+   if (match_icase)
+     syntax_bits |= RE_ICASE;
+-#endif
+   re_set_syntax (syntax_bits);
+   dfasyntax (syntax_bits, match_icase, eolbyte);
+ 
+

Modified: PKGBUILD
===================================================================
--- PKGBUILD	2009-06-17 13:04:39 UTC (rev 42633)
+++ PKGBUILD	2009-06-17 15:12:48 UTC (rev 42634)
@@ -1,10 +1,10 @@
 # $Id$
-# Maintainer: Andreas Radke <andyrtr at archlinux.org>
+# Maintainer: Allan McRae <allan at archlinux.org>
 # Contributor: judd <jvinet at zeroflux.org>
 
 pkgname=grep
 pkgver=2.5.4
-pkgrel=1
+pkgrel=2
 pkgdesc="A string search utility"
 arch=('i686' 'x86_64')
 license=('GPL3')
@@ -16,20 +16,24 @@
 source=(ftp://ftp.gnu.org/gnu/$pkgname/$pkgname-$pkgver.tar.gz
 	14-mem-exhausted.patch
 	15-empty-pattern.patch
-	# fix for FS#7141 , extracted from debian diff.gz :
-	# http://ftp.debian.org/debian/pool/main/g/grep/
-	64-egf-speedup.patch)
+	# utf8 grep speed fix - FS#7141
+	64-egf-speedup.patch
+	# fix the case insensitive search issues the above patch causes - FS#14877
+	65-dfa-optional.patch
+	66-match_icase.patch)
 md5sums=('92258031d98d4f12dfc6a6d24057e672'
          'bc937da562d468f32c1fef2894610283'
          'f421415b679ebcc9152797caaa0b1d51'
-         'efbe9d49d71a74092db6b86224b09fdd')
+         'efbe9d49d71a74092db6b86224b09fdd'
+         'f913cc834cda9be198b98318048a5ded'
+         '253351c7d960331a8268b4d853c511ff')
 
 build() {
   cd ${srcdir}/${pkgname}-${pkgver}
-  for i in ../*.patch; do
-	patch -Np1 -i ../$i 
+  for i in ${srcdir}/*.patch; do
+	patch -Np1 -i $i || return 1
   done
-  ./configure --prefix=/usr --bindir=/bin
+  ./configure --prefix=/usr --bindir=/bin --without-included-regex
   make || return 1
   make DESTDIR=${pkgdir} install || return 1
 




More information about the arch-commits mailing list