1 /* kwsearch.c - searching subroutines using kwset for grep.
2 Copyright 1992, 1998, 2000, 2007, 2009-2012 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
19 /* Written August 1992 by Mike Haertel. */
24 /* For -w, we also consider _ to be word constituent. */
25 #define WCHAR(C) (isalnum (C) || (C) == '_')
27 /* KWset compiled pattern. For Ecompile and Gcompile, we compile
28 a list of strings, at least one of which is known to occur in
29 any string matching the regexp. */
33 Fcompile (char const *pattern, size_t size)
37 mb_len_map_t *map = NULL;
38 char const *pat = (match_icase && MB_CUR_MAX > 1
39 ? mbtolower (pattern, &psize, &map)
44 char const *beg = pat;
49 for (lim = beg;; ++lim)
52 if (lim >= pat + psize)
59 #if HAVE_DOS_FILE_CONTENTS
60 if (*lim == '\r' && lim + 1 < pat + psize && lim[1] == '\n')
68 if ((err = kwsincr (kwset, beg, end - beg)) != NULL)
69 error (EXIT_TROUBLE, 0, "%s", err);
72 while (beg < pat + psize);
74 if ((err = kwsprep (kwset)) != NULL)
75 error (EXIT_TROUBLE, 0, "%s", err);
79 Fexecute (char const *buf, size_t size, size_t *match_size,
80 char const *start_ptr)
82 char const *beg, *try, *end, *mb_start;
85 struct kwsmatch kwsmatch;
87 mb_len_map_t *map = NULL;
93 char *case_buf = mbtolower (buf, &size, &map);
95 start_ptr = case_buf + (start_ptr - buf);
100 for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++)
102 size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
103 if (offset == (size_t) -1)
105 len = kwsmatch.size[0];
107 && is_mb_middle (&mb_start, beg + offset, buf + size, len))
109 /* The match was a part of multibyte character, advance at least
110 one byte to ensure no infinite loop happens. */
112 memset (&s, 0, sizeof s);
113 size_t mb_len = mbrlen (mb_start, (buf + size) - (beg + offset), &s);
114 if (mb_len == (size_t) -2)
117 if (mb_len != (size_t) -1)
122 if (start_ptr && !match_words)
123 goto success_in_beg_and_len;
126 if (beg > buf && beg[-1] != eol)
128 if (beg + len < buf + size && beg[len] != eol)
132 else if (match_words)
135 if (try > buf && WCHAR((unsigned char) try[-1]))
137 if (try + len < buf + size && WCHAR((unsigned char) try[len]))
141 offset = kwsexec (kwset, beg, --len, &kwsmatch);
142 if (offset == (size_t) -1)
145 len = kwsmatch.size[0];
150 goto success_in_beg_and_len;
154 } /* for (beg in buf) */
161 if ((end = memchr (beg + len, eol, (buf + size) - (beg + len))) != NULL)
165 while (buf < beg && beg[-1] != eol)
168 success_in_beg_and_len:;
169 size_t off = beg - buf;
170 mb_case_map_apply (map, &off, &len);