1 /* pcresearch.c - searching subroutines using PCRE for grep.
2 Copyright 2000, 2007, 2009-2011 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
19 /* Written August 1992 by Mike Haertel. */
25 #elif HAVE_PCRE_PCRE_H
26 # include <pcre/pcre.h>
30 /* Compiled internal form of a Perl regular expression. */
33 /* Additional information about the pattern. */
34 static pcre_extra *extra;
38 Pcompile (char const *pattern, size_t size)
41 error (EXIT_TROUBLE, 0, "%s",
42 _("support for the -P option is not compiled into "
43 "this --disable-perl-regexp binary"));
47 char *re = xnmalloc (4, size + 7);
48 int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0);
49 char const *patlim = pattern + size;
54 /* FIXME: Remove these restrictions. */
55 if (memchr(pattern, '\n', size))
56 error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
65 /* The PCRE interface doesn't allow NUL bytes in the pattern, so
66 replace each NUL byte in the pattern with the four characters
67 "\000", removing a preceding backslash if there are an odd
68 number of backslashes before the NUL.
70 FIXME: This method does not work with some multibyte character
71 encodings, notably Shift-JIS, where a multibyte character can end
72 in a backslash byte. */
73 for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
75 memcpy (n, p, pnul - p);
77 for (p = pnul; pattern < p && p[-1] == '\\'; p--)
84 memcpy (n, p, patlim - p);
92 cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
94 error (EXIT_TROUBLE, 0, "%s", ep);
96 extra = pcre_study (cre, 0, &ep);
98 error (EXIT_TROUBLE, 0, "%s", ep);
104 /* Pexecute is a no-return function when building --without-pcre. */
106 # define WITHOUT_PCRE_NORETURN _GL_ATTRIBUTE_NORETURN
108 # define WITHOUT_PCRE_NORETURN /* empty */
111 size_t WITHOUT_PCRE_NORETURN
112 Pexecute (char const *buf, size_t size, size_t *match_size,
113 char const *start_ptr)
118 /* This array must have at least two elements; everything after that
119 is just for performance improvement in pcre_exec. */
122 const char *line_buf, *line_end, *line_next;
123 int e = PCRE_ERROR_NOMATCH;
124 ptrdiff_t start_ofs = start_ptr ? start_ptr - buf : 0;
126 /* PCRE can't limit the matching to single lines, therefore we have to
127 match each line in the buffer separately. */
128 for (line_next = buf;
129 e == PCRE_ERROR_NOMATCH && line_next < buf + size;
130 start_ofs -= line_next - line_buf)
132 line_buf = line_next;
133 line_end = memchr (line_buf, eolbyte, (buf + size) - line_buf);
134 if (line_end == NULL)
135 line_next = line_end = buf + size;
137 line_next = line_end + 1;
139 if (start_ptr && start_ptr >= line_end)
142 e = pcre_exec (cre, extra, line_buf, line_end - line_buf,
143 start_ofs < 0 ? 0 : start_ofs, 0,
144 sub, sizeof sub / sizeof *sub);
151 case PCRE_ERROR_NOMATCH:
154 case PCRE_ERROR_NOMEMORY:
155 error (EXIT_TROUBLE, 0, _("memory exhausted"));
157 case PCRE_ERROR_MATCHLIMIT:
158 error (EXIT_TROUBLE, 0,
159 _("exceeded PCRE's backtracking limit"));
167 /* Narrow down to the line we've found. */
168 char const *beg = line_buf + sub[0];
169 char const *end = line_buf + sub[1];
170 char const *buflim = buf + size;
174 /* FIXME: The case when '\n' is not found indicates a bug:
175 Since grep is line oriented, the match should never contain
176 a newline, so there _must_ be a newline following.
178 if (!(end = memchr (end, eol, buflim - end)))
182 while (buf < beg && beg[-1] != eol)
186 *match_size = end - beg;