1 /* pcresearch.c - searching subroutines using PCRE for grep.
2 Copyright 2000, 2007, 2009-2014 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
19 /* Written August 1992 by Mike Haertel. */
25 #elif HAVE_PCRE_PCRE_H
26 # include <pcre/pcre.h>
30 /* Compiled internal form of a Perl regular expression. */
33 /* Additional information about the pattern. */
34 static pcre_extra *extra;
36 # ifdef PCRE_STUDY_JIT_COMPILE
37 static pcre_jit_stack *jit_stack;
39 # define PCRE_STUDY_JIT_COMPILE 0
44 Pcompile (char const *pattern, size_t size)
47 error (EXIT_TROUBLE, 0, "%s",
48 _("support for the -P option is not compiled into "
49 "this --disable-perl-regexp binary"));
53 char *re = xnmalloc (4, size + 7);
54 int flags = (PCRE_MULTILINE
55 | (match_icase ? PCRE_CASELESS : 0)
56 | (using_utf8 () ? PCRE_UTF8 : 0));
57 char const *patlim = pattern + size;
62 /* FIXME: Remove these restrictions. */
63 if (memchr (pattern, '\n', size))
64 error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
70 strcpy (n, "(?<!\\w)(?:");
73 /* The PCRE interface doesn't allow NUL bytes in the pattern, so
74 replace each NUL byte in the pattern with the four characters
75 "\000", removing a preceding backslash if there are an odd
76 number of backslashes before the NUL.
78 FIXME: This method does not work with some multibyte character
79 encodings, notably Shift-JIS, where a multibyte character can end
80 in a backslash byte. */
81 for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
83 memcpy (n, p, pnul - p);
85 for (p = pnul; pattern < p && p[-1] == '\\'; p--)
92 memcpy (n, p, patlim - p);
96 strcpy (n, ")(?!\\w)");
100 cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
102 error (EXIT_TROUBLE, 0, "%s", ep);
104 extra = pcre_study (cre, PCRE_STUDY_JIT_COMPILE, &ep);
106 error (EXIT_TROUBLE, 0, "%s", ep);
108 # if PCRE_STUDY_JIT_COMPILE
109 if (pcre_fullinfo (cre, extra, PCRE_INFO_JIT, &e))
110 error (EXIT_TROUBLE, 0, _("internal error (should never happen)"));
114 /* A 32K stack is allocated for the machine code by default, which
115 can grow to 512K if necessary. Since JIT uses far less memory
116 than the interpreter, this should be enough in practice. */
117 jit_stack = pcre_jit_stack_alloc (32 * 1024, 512 * 1024);
119 error (EXIT_TROUBLE, 0,
120 _("failed to allocate memory for the PCRE JIT stack"));
121 pcre_assign_jit_stack (extra, NULL, jit_stack);
125 #endif /* HAVE_LIBPCRE */
129 Pexecute (char const *buf, size_t size, size_t *match_size,
130 char const *start_ptr)
133 /* We can't get here, because Pcompile would have been called earlier. */
134 error (EXIT_TROUBLE, 0, _("internal error"));
137 /* This array must have at least two elements; everything after that
138 is just for performance improvement in pcre_exec. */
141 const char *line_buf, *line_end, *line_next;
142 int e = PCRE_ERROR_NOMATCH;
143 ptrdiff_t start_ofs = start_ptr ? start_ptr - buf : 0;
145 /* PCRE can't limit the matching to single lines, therefore we have to
146 match each line in the buffer separately. */
147 for (line_next = buf;
148 e == PCRE_ERROR_NOMATCH && line_next < buf + size;
149 start_ofs -= line_next - line_buf)
151 line_buf = line_next;
152 line_end = memchr (line_buf, eolbyte, (buf + size) - line_buf);
153 if (line_end == NULL)
154 line_next = line_end = buf + size;
156 line_next = line_end + 1;
158 if (start_ptr && start_ptr >= line_end)
161 if (INT_MAX < line_end - line_buf)
162 error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
164 e = pcre_exec (cre, extra, line_buf, line_end - line_buf,
165 start_ofs < 0 ? 0 : start_ofs, 0,
166 sub, sizeof sub / sizeof *sub);
173 case PCRE_ERROR_NOMATCH:
176 case PCRE_ERROR_NOMEMORY:
177 error (EXIT_TROUBLE, 0, _("memory exhausted"));
179 case PCRE_ERROR_MATCHLIMIT:
180 error (EXIT_TROUBLE, 0,
181 _("exceeded PCRE's backtracking limit"));
183 case PCRE_ERROR_BADUTF8:
184 error (EXIT_TROUBLE, 0,
185 _("invalid UTF-8 byte sequence in input"));
188 /* For now, we lump all remaining PCRE failures into this basket.
189 If anyone cares to provide sample grep usage that can trigger
190 particular PCRE errors, we can add to the list (above) of more
191 detailed diagnostics. */
192 error (EXIT_TROUBLE, 0, _("internal PCRE error: %d"), e);
200 /* Narrow down to the line we've found. */
201 char const *beg = line_buf + sub[0];
202 char const *end = line_buf + sub[1];
203 char const *buflim = buf + size;
207 /* FIXME: The case when '\n' is not found indicates a bug:
208 Since grep is line oriented, the match should never contain
209 a newline, so there _must_ be a newline following.
211 if (!(end = memchr (end, eol, buflim - end)))
215 while (buf < beg && beg[-1] != eol)
219 *match_size = end - beg;