1 /* grep.c - main driver file for grep.
2 Copyright (C) 1992, 1997-2002, 2004-2014 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
19 /* Written July 1992 by Mike Haertel. */
22 #include <sys/types.h>
38 #include "fcntl-safer.h"
44 #include "propername.h"
46 #include "safe-read.h"
48 #include "version-etc.h"
52 #define SEP_CHAR_SELECTED ':'
53 #define SEP_CHAR_REJECTED '-'
54 #define SEP_STR_GROUP "--"
57 proper_name ("Mike Haertel"), \
58 _("others, see <http://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>")
60 /* When stdout is connected to a regular file, save its stat
61 information here, so that we can automatically skip it, thus
62 avoiding a potential (racy) infinite loop. */
63 static struct stat out_stat;
65 /* if non-zero, display usage information and exit */
68 /* If non-zero, print the version on standard output and exit. */
69 static int show_version;
71 /* If nonzero, suppress diagnostics for nonexistent or unreadable files. */
72 static int suppress_errors;
74 /* If nonzero, use color markers. */
75 static int color_option;
77 /* If nonzero, show only the part of a line matching the expression. */
78 static int only_matching;
80 /* If nonzero, make sure first content char in a line is on a tab stop. */
81 static int align_tabs;
83 /* The group separator used when context is requested. */
84 static const char *group_separator = SEP_STR_GROUP;
86 /* The context and logic for choosing default --color screen attributes
87 (foreground and background colors, etc.) are the following.
88 -- There are eight basic colors available, each with its own
89 nominal luminosity to the human eye and foreground/background
90 codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
91 magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
92 yellow [89 %, 33/43], and white [100 %, 37/47]).
93 -- Sometimes, white as a background is actually implemented using
94 a shade of light gray, so that a foreground white can be visible
95 on top of it (but most often not).
96 -- Sometimes, black as a foreground is actually implemented using
97 a shade of dark gray, so that it can be visible on top of a
98 background black (but most often not).
99 -- Sometimes, more colors are available, as extensions.
100 -- Other attributes can be selected/deselected (bold [1/22],
101 underline [4/24], standout/inverse [7/27], blink [5/25], and
102 invisible/hidden [8/28]). They are sometimes implemented by
103 using colors instead of what their names imply; e.g., bold is
104 often achieved by using brighter colors. In practice, only bold
105 is really available to us, underline sometimes being mapped by
106 the terminal to some strange color choice, and standout best
107 being left for use by downstream programs such as less(1).
108 -- We cannot assume that any of the extensions or special features
109 are available for the purpose of choosing defaults for everyone.
110 -- The most prevalent default terminal backgrounds are pure black
111 and pure white, and are not necessarily the same shades of
112 those as if they were selected explicitly with SGR sequences.
113 Some terminals use dark or light pictures as default background,
114 but those are covered over by an explicit selection of background
115 color with an SGR sequence; their users will appreciate their
116 background pictures not be covered like this, if possible.
117 -- Some uses of colors attributes is to make some output items
118 more understated (e.g., context lines); this cannot be achieved
119 by changing the background color.
120 -- For these reasons, the grep color defaults should strive not
121 to change the background color from its default, unless it's
122 for a short item that should be highlighted, not understated.
123 -- The grep foreground color defaults (without an explicitly set
124 background) should provide enough contrast to be readable on any
125 terminal with either a black (dark) or white (light) background.
126 This only leaves red, magenta, green, and cyan (and their bold
127 counterparts) and possibly bold blue. */
128 /* The color strings used for matched text.
129 The user can overwrite them using the deprecated
130 environment variable GREP_COLOR or the new GREP_COLORS. */
131 static const char *selected_match_color = "01;31"; /* bold red */
132 static const char *context_match_color = "01;31"; /* bold red */
134 /* Other colors. Defaults look damn good. */
135 static const char *filename_color = "35"; /* magenta */
136 static const char *line_num_color = "32"; /* green */
137 static const char *byte_num_color = "32"; /* green */
138 static const char *sep_color = "36"; /* cyan */
139 static const char *selected_line_color = ""; /* default color pair */
140 static const char *context_line_color = ""; /* default color pair */
142 /* Select Graphic Rendition (SGR, "\33[...m") strings. */
143 /* Also Erase in Line (EL) to Right ("\33[K") by default. */
144 /* Why have EL to Right after SGR?
145 -- The behavior of line-wrapping when at the bottom of the
146 terminal screen and at the end of the current line is often
147 such that a new line is introduced, entirely cleared with
148 the current background color which may be different from the
149 default one (see the boolean back_color_erase terminfo(5)
150 capability), thus scrolling the display by one line.
151 The end of this new line will stay in this background color
152 even after reverting to the default background color with
153 "\33[m', unless it is explicitly cleared again with "\33[K"
154 (which is the behavior the user would instinctively expect
155 from the whole thing). There may be some unavoidable
156 background-color flicker at the end of this new line because
157 of this (when timing with the monitor's redraw is just right).
158 -- The behavior of HT (tab, "\t") is usually the same as that of
159 Cursor Forward Tabulation (CHT) with a default parameter
160 of 1 ("\33[I"), i.e., it performs pure movement to the next
161 tab stop, without any clearing of either content or screen
162 attributes (including background color); try
163 printf 'asdfqwerzxcv\rASDF\tZXCV\n'
164 in a bash(1) shell to demonstrate this. This is not what the
165 user would instinctively expect of HT (but is ok for CHT).
166 The instinctive behavior would include clearing the terminal
167 cells that are skipped over by HT with blank cells in the
168 current screen attributes, including background color;
169 the boolean dest_tabs_magic_smso terminfo(5) capability
170 indicates this saner behavior for HT, but only some rare
171 terminals have it (although it also indicates a special
172 glitch with standout mode in the Teleray terminal for which
173 it was initially introduced). The remedy is to add "\33K"
174 after each SGR sequence, be it START (to fix the behavior
175 of any HT after that before another SGR) or END (to fix the
176 behavior of an HT in default background color that would
177 follow a line-wrapping at the bottom of the screen in another
178 background color, and to complement doing it after START).
179 Piping grep's output through a pager such as less(1) avoids
180 any HT problems since the pager performs tab expansion.
182 Generic disadvantages of this remedy are:
183 -- Some very rare terminals might support SGR but not EL (nobody
184 will use "grep --color" on a terminal that does not support
185 SGR in the first place).
186 -- Having these extra control sequences might somewhat complicate
187 the task of any program trying to parse "grep --color"
188 output in order to extract structuring information from it.
189 A specific disadvantage to doing it after SGR START is:
190 -- Even more possible background color flicker (when timing
191 with the monitor's redraw is just right), even when not at the
192 bottom of the screen.
193 There are no additional disadvantages specific to doing it after
196 It would be impractical for GNU grep to become a full-fledged
197 terminal program linked against ncurses or the like, so it will
198 not detect terminfo(5) capabilities. */
199 static const char *sgr_start = "\33[%sm\33[K";
200 static const char *sgr_end = "\33[m\33[K";
202 /* SGR utility functions. */
204 pr_sgr_start (char const *s)
207 print_start_colorize (sgr_start, s);
210 pr_sgr_end (char const *s)
213 print_end_colorize (sgr_end);
216 pr_sgr_start_if (char const *s)
222 pr_sgr_end_if (char const *s)
236 color_cap_mt_fct (void)
238 /* Our caller just set selected_match_color. */
239 context_match_color = selected_match_color;
243 color_cap_rv_fct (void)
245 /* By this point, it was 1 (or already -1). */
246 color_option = -1; /* That's still != 0. */
250 color_cap_ne_fct (void)
252 sgr_start = "\33[%sm";
256 /* For GREP_COLORS. */
257 static const struct color_cap color_dict[] =
259 { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
260 { "ms", &selected_match_color, NULL }, /* selected matched text */
261 { "mc", &context_match_color, NULL }, /* context matched text */
262 { "fn", &filename_color, NULL }, /* filename */
263 { "ln", &line_num_color, NULL }, /* line number */
264 { "bn", &byte_num_color, NULL }, /* byte (sic) offset */
265 { "se", &sep_color, NULL }, /* separator */
266 { "sl", &selected_line_color, NULL }, /* selected lines */
267 { "cx", &context_line_color, NULL }, /* context lines */
268 { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */
269 { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */
273 static struct exclude *excluded_patterns;
274 static struct exclude *excluded_directory_patterns;
276 static char const short_options[] =
277 "0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
279 /* Non-boolean long options that have no corresponding short equivalents. */
282 BINARY_FILES_OPTION = CHAR_MAX + 1,
287 LINE_BUFFERED_OPTION,
289 EXCLUDE_DIRECTORY_OPTION,
290 GROUP_SEPARATOR_OPTION
293 /* Long options equivalences. */
294 static struct option const long_options[] =
296 {"basic-regexp", no_argument, NULL, 'G'},
297 {"extended-regexp", no_argument, NULL, 'E'},
298 {"fixed-regexp", no_argument, NULL, 'F'},
299 {"fixed-strings", no_argument, NULL, 'F'},
300 {"perl-regexp", no_argument, NULL, 'P'},
301 {"after-context", required_argument, NULL, 'A'},
302 {"before-context", required_argument, NULL, 'B'},
303 {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
304 {"byte-offset", no_argument, NULL, 'b'},
305 {"context", required_argument, NULL, 'C'},
306 {"color", optional_argument, NULL, COLOR_OPTION},
307 {"colour", optional_argument, NULL, COLOR_OPTION},
308 {"count", no_argument, NULL, 'c'},
309 {"devices", required_argument, NULL, 'D'},
310 {"directories", required_argument, NULL, 'd'},
311 {"exclude", required_argument, NULL, EXCLUDE_OPTION},
312 {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
313 {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
314 {"file", required_argument, NULL, 'f'},
315 {"files-with-matches", no_argument, NULL, 'l'},
316 {"files-without-match", no_argument, NULL, 'L'},
317 {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
318 {"help", no_argument, &show_help, 1},
319 {"include", required_argument, NULL, INCLUDE_OPTION},
320 {"ignore-case", no_argument, NULL, 'i'},
321 {"initial-tab", no_argument, NULL, 'T'},
322 {"label", required_argument, NULL, LABEL_OPTION},
323 {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
324 {"line-number", no_argument, NULL, 'n'},
325 {"line-regexp", no_argument, NULL, 'x'},
326 {"max-count", required_argument, NULL, 'm'},
328 {"no-filename", no_argument, NULL, 'h'},
329 {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
330 {"no-messages", no_argument, NULL, 's'},
331 {"null", no_argument, NULL, 'Z'},
332 {"null-data", no_argument, NULL, 'z'},
333 {"only-matching", no_argument, NULL, 'o'},
334 {"quiet", no_argument, NULL, 'q'},
335 {"recursive", no_argument, NULL, 'r'},
336 {"dereference-recursive", no_argument, NULL, 'R'},
337 {"regexp", required_argument, NULL, 'e'},
338 {"invert-match", no_argument, NULL, 'v'},
339 {"silent", no_argument, NULL, 'q'},
340 {"text", no_argument, NULL, 'a'},
341 {"binary", no_argument, NULL, 'U'},
342 {"unix-byte-offsets", no_argument, NULL, 'u'},
343 {"version", no_argument, NULL, 'V'},
344 {"with-filename", no_argument, NULL, 'H'},
345 {"word-regexp", no_argument, NULL, 'w'},
349 /* Define flags declared in grep.h. */
353 unsigned char eolbyte;
355 static char const *matcher;
357 /* For error messages. */
358 /* The input file name, or (if standard input) "-" or a --label argument. */
359 static char const *filename;
360 static size_t filename_prefix_len;
362 static int write_error_seen;
364 enum directories_type
366 READ_DIRECTORIES = 2,
371 /* How to handle directories. */
372 static char const *const directories_args[] =
374 "read", "recurse", "skip", NULL
376 static enum directories_type const directories_types[] =
378 READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
380 ARGMATCH_VERIFY (directories_args, directories_types);
382 static enum directories_type directories = READ_DIRECTORIES;
384 enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
385 static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
387 /* How to handle devices. */
390 READ_COMMAND_LINE_DEVICES,
393 } devices = READ_COMMAND_LINE_DEVICES;
395 static int grepfile (int, char const *, int, int);
396 static int grepdesc (int, int);
398 static void dos_binary (void);
399 static void dos_unix_byte_offsets (void);
400 static int undossify_input (char *, size_t);
403 is_device_mode (mode_t m)
405 return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
408 /* Return nonzero if ST->st_size is defined. Assume the file is not a
411 usable_st_size (struct stat const *st)
413 return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
416 /* Functions we'll use to search. */
417 typedef void (*compile_fp_t) (char const *, size_t);
418 typedef size_t (*execute_fp_t) (char const *, size_t, size_t *, char const *);
419 static compile_fp_t compile;
420 static execute_fp_t execute;
422 /* Like error, but suppress the diagnostic if requested. */
424 suppressible_error (char const *mesg, int errnum)
426 if (! suppress_errors)
427 error (0, errnum, "%s", mesg);
431 /* If there has already been a write error, don't bother closing
432 standard output, as that might elicit a duplicate diagnostic. */
434 clean_up_stdout (void)
436 if (! write_error_seen)
440 /* Return 1 if a file is known to be binary for the purpose of 'grep'.
441 BUF, of size BUFSIZE, is the initial buffer read from the file with
442 descriptor FD and status ST. */
444 file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st)
447 enum { SEEK_HOLE = SEEK_END };
450 /* If -z, test only whether the initial buffer contains '\200';
451 knowing about holes won't help. */
453 return memchr (buf, '\200', bufsize) != 0;
455 /* If the initial buffer contains a null byte, guess that the file
457 if (memchr (buf, '\0', bufsize))
460 /* If the file has holes, it must contain a null byte somewhere. */
461 if (SEEK_HOLE != SEEK_END && usable_st_size (st))
464 if (O_BINARY || fd == STDIN_FILENO)
466 cur = lseek (fd, 0, SEEK_CUR);
471 /* Look for a hole after the current location. */
472 off_t hole_start = lseek (fd, cur, SEEK_HOLE);
475 if (lseek (fd, cur, SEEK_SET) < 0)
476 suppressible_error (filename, errno);
477 if (hole_start < st->st_size)
482 /* Guess that the file does not contain binary data. */
486 /* Convert STR to a nonnegative integer, storing the result in *OUT.
487 STR must be a valid context length argument; report an error if it
488 isn't. Silently ceiling *OUT at the maximum value, as that is
489 practically equivalent to infinity for grep's purposes. */
491 context_length_arg (char const *str, intmax_t *out)
493 switch (xstrtoimax (str, 0, 10, out, ""))
496 case LONGINT_OVERFLOW:
501 error (EXIT_TROUBLE, 0, "%s: %s", str,
502 _("invalid context length argument"));
506 /* Return nonzero if the file with NAME should be skipped.
507 If COMMAND_LINE is nonzero, it is a command-line argument.
508 If IS_DIR is nonzero, it is a directory. */
510 skipped_file (char const *name, int command_line, int is_dir)
513 ? (directories == SKIP_DIRECTORIES
514 || (! (command_line && filename_prefix_len != 0)
515 && excluded_directory_patterns
516 && excluded_file_name (excluded_directory_patterns, name)))
518 && excluded_file_name (excluded_patterns, name)));
521 /* Hairy buffering mechanism for grep. The intent is to keep
522 all reads aligned on a page boundary and multiples of the
523 page size, unless a read yields a partial page. */
525 static char *buffer; /* Base of buffer. */
526 static size_t bufalloc; /* Allocated buffer size, counting slop. */
527 #define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */
528 static int bufdesc; /* File descriptor. */
529 static char *bufbeg; /* Beginning of user-visible stuff. */
530 static char *buflim; /* Limit of user-visible stuff. */
531 static size_t pagesize; /* alignment of memory pages */
532 static off_t bufoffset; /* Read offset; defined on regular files. */
533 static off_t after_last_match; /* Pointer after last matching line that
534 would have been output if we were
535 outputting characters. */
537 /* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
538 an integer or a pointer. Both args must be free of side effects. */
539 #define ALIGN_TO(val, alignment) \
540 ((size_t) (val) % (alignment) == 0 \
542 : (val) + ((alignment) - (size_t) (val) % (alignment)))
544 /* Reset the buffer for a new file, returning zero if we should skip it.
545 Initialize on the first time through. */
547 reset (int fd, struct stat const *st)
551 pagesize = getpagesize ();
552 if (pagesize == 0 || 2 * pagesize + 1 <= pagesize)
554 bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + 1;
555 buffer = xmalloc (bufalloc);
558 bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
559 bufbeg[-1] = eolbyte;
562 if (S_ISREG (st->st_mode))
564 if (fd != STDIN_FILENO)
568 bufoffset = lseek (fd, 0, SEEK_CUR);
571 suppressible_error (_("lseek failed"), errno);
579 /* Read new stuff into the buffer, saving the specified
580 amount of old stuff. When we're done, 'bufbeg' points
581 to the beginning of the buffer contents, and 'buflim'
582 points just after the end. Return zero if there's an error. */
584 fillbuf (size_t save, struct stat const *st)
591 /* Offset from start of buffer to start of old stuff
592 that we want to save. */
593 size_t saved_offset = buflim - save - buffer;
595 if (pagesize <= buffer + bufalloc - buflim)
598 bufbeg = buflim - save;
602 size_t minsize = save + pagesize;
607 /* Grow newsize until it is at least as great as minsize. */
608 for (newsize = bufalloc - pagesize - 1; newsize < minsize; newsize *= 2)
609 if (newsize * 2 < newsize || newsize * 2 + pagesize + 1 < newsize * 2)
612 /* Try not to allocate more memory than the file size indicates,
613 as that might cause unnecessary memory exhaustion if the file
614 is large. However, do not use the original file size as a
615 heuristic if we've already read past the file end, as most
616 likely the file is growing. */
617 if (usable_st_size (st))
619 off_t to_be_read = st->st_size - bufoffset;
620 off_t maxsize_off = save + to_be_read;
621 if (0 <= to_be_read && to_be_read <= maxsize_off
622 && maxsize_off == (size_t) maxsize_off
623 && minsize <= (size_t) maxsize_off
624 && (size_t) maxsize_off < newsize)
625 newsize = maxsize_off;
628 /* Add enough room so that the buffer is aligned and has room
629 for byte sentinels fore and aft. */
630 newalloc = newsize + pagesize + 1;
632 newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
633 readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
634 bufbeg = readbuf - save;
635 memmove (bufbeg, buffer + saved_offset, save);
636 bufbeg[-1] = eolbyte;
637 if (newbuf != buffer)
644 readsize = buffer + bufalloc - readbuf;
645 readsize -= readsize % pagesize;
647 fillsize = safe_read (bufdesc, readbuf, readsize);
650 bufoffset += fillsize;
651 fillsize = undossify_input (readbuf, fillsize);
652 buflim = readbuf + fillsize;
656 /* Flags controlling the style of output. */
661 WITHOUT_MATCH_BINARY_FILES
662 } binary_files; /* How to handle binary files. */
664 static int filename_mask; /* If zero, output nulls after filenames. */
665 static int out_quiet; /* Suppress all normal output. */
666 static bool out_invert; /* Print nonmatching stuff. */
667 static int out_file; /* Print filenames. */
668 static int out_line; /* Print line numbers. */
669 static int out_byte; /* Print byte offsets. */
670 static intmax_t out_before; /* Lines of leading context. */
671 static intmax_t out_after; /* Lines of trailing context. */
672 static int count_matches; /* Count matching lines. */
673 static int list_files; /* List matching files. */
674 static int no_filenames; /* Suppress file names. */
675 static intmax_t max_count; /* Stop after outputting this many
676 lines from an input file. */
677 static int line_buffered; /* If nonzero, use line buffering, i.e.
678 fflush everyline out. */
679 static char *label = NULL; /* Fake filename for stdin */
682 /* Internal variables to keep track of byte count, context, etc. */
683 static uintmax_t totalcc; /* Total character count before bufbeg. */
684 static char const *lastnl; /* Pointer after last newline counted. */
685 static char const *lastout; /* Pointer after last character output;
686 NULL if no character has been output
687 or if it's conceptually before bufbeg. */
688 static uintmax_t totalnl; /* Total newline count before lastnl. */
689 static intmax_t outleft; /* Maximum number of lines to be output. */
690 static intmax_t pending; /* Pending lines of output.
691 Always kept 0 if out_quiet is true. */
692 static int done_on_match; /* Stop scanning file on first match. */
693 static int exit_on_match; /* Exit on first match. */
697 /* Add two numbers that count input bytes or lines, and report an
698 error if the addition overflows. */
700 add_count (uintmax_t a, uintmax_t b)
702 uintmax_t sum = a + b;
704 error (EXIT_TROUBLE, 0, _("input is too large to count"));
709 nlscan (char const *lim)
713 for (beg = lastnl; beg < lim; beg++)
715 beg = memchr (beg, eolbyte, lim - beg);
720 totalnl = add_count (totalnl, newlines);
724 /* Print the current filename. */
726 print_filename (void)
728 pr_sgr_start_if (filename_color);
729 fputs (filename, stdout);
730 pr_sgr_end_if (filename_color);
733 /* Print a character separator. */
737 pr_sgr_start_if (sep_color);
739 pr_sgr_end_if (sep_color);
742 /* Print a line number or a byte offset. */
744 print_offset (uintmax_t pos, int min_width, const char *color)
746 /* Do not rely on printf to print pos, since uintmax_t may be longer
747 than long, and long long is not portable. */
749 char buf[sizeof pos * CHAR_BIT];
750 char *p = buf + sizeof buf;
754 *--p = '0' + pos % 10;
757 while ((pos /= 10) != 0);
759 /* Do this to maximize the probability of alignment across lines. */
761 while (--min_width >= 0)
764 pr_sgr_start_if (color);
765 fwrite (p, 1, buf + sizeof buf - p, stdout);
766 pr_sgr_end_if (color);
769 /* Print a whole line head (filename, line, byte). */
771 print_line_head (char const *beg, char const *lim, int sep)
789 totalnl = add_count (totalnl, 1);
794 print_offset (totalnl, 4, line_num_color);
800 uintmax_t pos = add_count (totalcc, beg - bufbeg);
801 pos = dossified_pos (pos);
804 print_offset (pos, 6, byte_num_color);
810 /* This assumes sep is one column wide.
811 Try doing this any other way with Unicode
812 (and its combining and wide characters)
813 filenames and you're wasting your efforts. */
815 fputs ("\t\b", stdout);
822 print_line_middle (const char *beg, const char *lim,
823 const char *line_color, const char *match_color)
827 const char *cur = beg;
828 const char *mid = NULL;
831 && ((match_offset = execute (beg, lim - beg, &match_size,
832 beg + (cur - beg))) != (size_t) -1))
834 char const *b = beg + match_offset;
836 /* Avoid matching the empty line at the end of the buffer. */
840 /* Avoid hanging on grep --color "" foo */
843 /* Make minimal progress; there may be further non-empty matches. */
844 /* XXX - Could really advance by one whole multi-octet character. */
851 /* This function is called on a matching line only,
852 but is it selected or rejected/context? */
854 print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED
855 : SEP_CHAR_SELECTED));
858 pr_sgr_start (line_color);
864 fwrite (cur, sizeof (char), b - cur, stdout);
867 pr_sgr_start_if (match_color);
868 fwrite (b, sizeof (char), match_size, stdout);
869 pr_sgr_end_if (match_color);
871 fputs ("\n", stdout);
873 cur = b + match_size;
885 print_line_tail (const char *beg, const char *lim, const char *line_color)
890 eol_size = (lim > beg && lim[-1] == eolbyte);
891 eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
892 tail_size = lim - eol_size - beg;
896 pr_sgr_start (line_color);
897 fwrite (beg, 1, tail_size, stdout);
899 pr_sgr_end (line_color);
906 prline (char const *beg, char const *lim, int sep)
909 const char *line_color;
910 const char *match_color;
913 print_line_head (beg, lim, sep);
915 matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
919 line_color = (((sep == SEP_CHAR_SELECTED)
920 ^ (out_invert && (color_option < 0)))
921 ? selected_line_color : context_line_color);
922 match_color = (sep == SEP_CHAR_SELECTED
923 ? selected_match_color : context_match_color);
926 line_color = match_color = NULL; /* Shouldn't be used. */
928 if ((only_matching && matching)
929 || (color_option && (*line_color || *match_color)))
931 /* We already know that non-matching lines have no match (to colorize). */
932 if (matching && (only_matching || *match_color))
933 beg = print_line_middle (beg, lim, line_color, match_color);
935 if (!only_matching && *line_color)
937 /* This code is exercised at least when grep is invoked like this:
938 echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */
939 beg = print_line_tail (beg, lim, line_color);
943 if (!only_matching && lim > beg)
944 fwrite (beg, 1, lim - beg, stdout);
948 write_error_seen = 1;
949 error (EXIT_TROUBLE, 0, _("write error"));
958 /* Print pending lines of trailing context prior to LIM. Trailing context ends
959 at the next matching line when OUTLEFT is 0. */
961 prpending (char const *lim)
965 while (pending > 0 && lastout < lim)
967 char const *nl = memchr (lastout, eolbyte, lim - lastout);
971 || ((execute (lastout, nl + 1 - lastout,
972 &match_size, NULL) == (size_t) -1)
974 prline (lastout, nl + 1, SEP_CHAR_REJECTED);
980 /* Output the lines between BEG and LIM. Deal with context. */
982 prtext (char const *beg, char const *lim)
984 static bool used; /* Avoid printing SEP_STR_GROUP before any output. */
987 if (!out_quiet && pending > 0)
994 /* Deal with leading context. */
995 char const *bp = lastout ? lastout : bufbeg;
997 for (i = 0; i < out_before; ++i)
1001 while (p[-1] != eol);
1003 /* Print the group separator unless the output is adjacent to
1004 the previous output in the file. */
1005 if ((0 <= out_before || 0 <= out_after) && used
1006 && p != lastout && group_separator)
1008 pr_sgr_start_if (sep_color);
1009 fputs (group_separator, stdout);
1010 pr_sgr_end_if (sep_color);
1011 fputc ('\n', stdout);
1016 char const *nl = memchr (p, eol, beg - p);
1018 prline (p, nl, SEP_CHAR_REJECTED);
1026 /* One or more lines are output. */
1027 for (n = 0; p < lim && n < outleft; n++)
1029 char const *nl = memchr (p, eol, lim - p);
1032 prline (p, nl, SEP_CHAR_SELECTED);
1038 /* Just one line is output. */
1040 prline (beg, lim, SEP_CHAR_SELECTED);
1045 after_last_match = bufoffset - (buflim - p);
1046 pending = out_quiet ? 0 : MAX (0, out_after);
1051 /* Invoke the matcher, EXECUTE, on buffer BUF of SIZE bytes. If there
1052 is no match, return (size_t) -1. Otherwise, set *MATCH_SIZE to the
1053 length of the match and return the offset of the start of the match. */
1055 do_execute (char const *buf, size_t size, size_t *match_size)
1058 const char *line_next;
1060 /* With the current implementation, using --ignore-case with a multi-byte
1061 character set is very inefficient when applied to a large buffer
1062 containing many matches. We can avoid much of the wasted effort
1063 by matching line-by-line.
1065 FIXME: this is just an ugly workaround, and it doesn't really
1066 belong here. Also, PCRE is always using this same per-line
1067 matching algorithm. Either we fix -i, or we should refactor
1068 this code---for example, we could add another function pointer
1069 to struct matcher to split the buffer passed to execute. It would
1070 perform the memchr if line-by-line matching is necessary, or just
1071 return buf + size otherwise. */
1072 if (! (execute == Fexecute || execute == Pexecute)
1073 || MB_CUR_MAX == 1 || !match_icase)
1074 return execute (buf, size, match_size, NULL);
1076 for (line_next = buf; line_next < buf + size; )
1078 const char *line_buf = line_next;
1079 const char *line_end = memchr (line_buf, eolbyte,
1080 (buf + size) - line_buf);
1081 if (line_end == NULL)
1082 line_next = line_end = buf + size;
1084 line_next = line_end + 1;
1086 result = execute (line_buf, line_next - line_buf, match_size, NULL);
1087 if (result != (size_t) -1)
1088 return (line_buf - buf) + result;
1094 /* Scan the specified portion of the buffer, matching lines (or
1095 between matching lines if OUT_INVERT is true). Return a count of
1098 grepbuf (char const *beg, char const *lim)
1100 intmax_t outleft0 = outleft;
1104 for (p = beg; p < lim; p = endp)
1107 size_t match_offset = do_execute (p, lim - p, &match_size);
1108 if (match_offset == (size_t) -1)
1112 match_offset = lim - p;
1115 char const *b = p + match_offset;
1116 endp = b + match_size;
1117 /* Avoid matching the empty line at the end of the buffer. */
1118 if (!out_invert && b == lim)
1120 if (!out_invert || p < b)
1122 char const *prbeg = out_invert ? p : b;
1123 char const *prend = out_invert ? b : endp;
1124 prtext (prbeg, prend);
1125 if (!outleft || done_on_match)
1128 exit (EXIT_SUCCESS);
1134 return outleft0 - outleft;
1137 /* Search a given file. Normally, return a count of lines printed;
1138 but if the file is a directory and we search it recursively, then
1139 return -2 if there was a match, and -1 otherwise. */
1141 grep (int fd, struct stat const *st)
1145 size_t residue, save;
1151 if (! reset (fd, st))
1157 outleft = max_count;
1158 after_last_match = 0;
1165 if (! fillbuf (save, st))
1167 suppressible_error (filename, errno);
1171 not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet)
1172 || binary_files == WITHOUT_MATCH_BINARY_FILES)
1173 && file_is_binary (bufbeg, buflim - bufbeg, fd, st));
1174 if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES)
1176 done_on_match += not_text;
1177 out_quiet += not_text;
1185 beg = bufbeg + save;
1187 /* no more data to scan (eof) except for maybe a residue -> break */
1191 /* Determine new residue (the length of an incomplete line at the end of
1192 the buffer, 0 means there is no incomplete last line). */
1195 /* FIXME: use rawmemrchr if/when it exists, since we have ensured
1196 that this use of memrchr is guaranteed never to return NULL. */
1197 lim = memrchr (beg - 1, eol, buflim - beg + 1);
1201 lim = beg - residue;
1203 residue = buflim - lim;
1208 nlines += grepbuf (beg, lim);
1211 if ((!outleft && !pending) || (nlines && done_on_match))
1215 /* The last OUT_BEFORE lines at the end of the buffer will be needed as
1216 leading context if there is a matching line at the begin of the
1217 next data. Make beg point to their begin. */
1220 while (i < out_before && beg > bufbeg && beg != lastout)
1225 while (beg[-1] != eol);
1228 /* Detect whether leading context is adjacent to previous output. */
1232 /* Handle some details and read more data to scan. */
1233 save = residue + lim - beg;
1235 totalcc = add_count (totalcc, buflim - bufbeg - save);
1238 if (! fillbuf (save, st))
1240 suppressible_error (filename, errno);
1248 nlines += grepbuf (bufbeg + save - residue, buflim);
1254 done_on_match -= not_text;
1255 out_quiet -= not_text;
1256 if ((not_text & ~out_quiet) && nlines != 0)
1257 printf (_("Binary file %s matches\n"), filename);
1262 grepdirent (FTS *fts, FTSENT *ent, int command_line)
1264 int follow, dirdesc;
1265 struct stat *st = ent->fts_statp;
1266 command_line &= ent->fts_level == FTS_ROOTLEVEL;
1268 if (ent->fts_info == FTS_DP)
1270 if (directories == RECURSE_DIRECTORIES && command_line)
1271 out_file &= ~ (2 * !no_filenames);
1275 if (skipped_file (ent->fts_name, command_line,
1276 (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
1277 || ent->fts_info == FTS_DNR)))
1279 fts_set (fts, ent, FTS_SKIP);
1283 filename = ent->fts_path + filename_prefix_len;
1284 follow = (fts->fts_options & FTS_LOGICAL
1285 || (fts->fts_options & FTS_COMFOLLOW && command_line));
1287 switch (ent->fts_info)
1290 if (directories == RECURSE_DIRECTORIES)
1292 out_file |= 2 * !no_filenames;
1295 fts_set (fts, ent, FTS_SKIP);
1299 if (!suppress_errors)
1300 error (0, 0, _("warning: %s: %s"), filename,
1301 _("recursive directory loop"));
1307 suppressible_error (filename, ent->fts_errno);
1312 if (devices == SKIP_DEVICES
1313 || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
1318 /* The file type is not already known. Get the file status
1319 before opening, since opening might have side effects
1321 int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
1322 if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
1324 suppressible_error (filename, errno);
1329 if (is_device_mode (st->st_mode))
1346 dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD
1349 return grepfile (dirdesc, ent->fts_accpath, follow, command_line);
1353 grepfile (int dirdesc, char const *name, int follow, int command_line)
1355 int desc = openat_safer (dirdesc, name, O_RDONLY | (follow ? 0 : O_NOFOLLOW));
1358 if (follow || (errno != ELOOP && errno != EMLINK))
1359 suppressible_error (filename, errno);
1362 return grepdesc (desc, command_line);
1366 grepdesc (int desc, int command_line)
1372 /* Get the file status, possibly for the second time. This catches
1373 a race condition if the directory entry changes after the
1374 directory entry is read and before the file is opened. For
1375 example, normally DESC is a directory only at the top level, but
1376 there is an exception if some other process substitutes a
1377 directory for a non-directory while 'grep' is running. */
1378 if (fstat (desc, &st) != 0)
1380 suppressible_error (filename, errno);
1384 if (desc != STDIN_FILENO && command_line
1385 && skipped_file (filename, 1, S_ISDIR (st.st_mode)))
1388 if (desc != STDIN_FILENO
1389 && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
1391 /* Traverse the directory starting with its full name, because
1392 unfortunately fts provides no way to traverse the directory
1393 starting from its file descriptor. */
1397 int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
1400 /* Close DESC now, to conserve file descriptors if the race
1401 condition occurs many times in a deep recursion. */
1402 if (close (desc) != 0)
1403 suppressible_error (filename, errno);
1405 fts_arg[0] = (char *) filename;
1407 fts = fts_open (fts_arg, opts, NULL);
1411 while ((ent = fts_read (fts)))
1412 status &= grepdirent (fts, ent, command_line);
1414 suppressible_error (filename, errno);
1415 if (fts_close (fts) != 0)
1416 suppressible_error (filename, errno);
1419 if (desc != STDIN_FILENO
1420 && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
1421 || ((devices == SKIP_DEVICES
1422 || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
1423 && is_device_mode (st.st_mode))))
1426 /* If there is a regular file on stdout and the current file refers
1427 to the same i-node, we have to report the problem and skip it.
1428 Otherwise when matching lines from some other input reach the
1429 disk before we open this file, we can end up reading and matching
1430 those lines and appending them to the file from which we're reading.
1431 Then we'd have what appears to be an infinite loop that'd terminate
1432 only upon filling the output file system or reaching a quota.
1433 However, there is no risk of an infinite loop if grep is generating
1434 no output, i.e., with --silent, --quiet, -q.
1435 Similarly, with any of these:
1436 --max-count=N (-m) (for N >= 2)
1437 --files-with-matches (-l)
1438 --files-without-match (-L)
1439 there is no risk of trouble.
1440 For --max-count=1, grep stops after printing the first match,
1441 so there is no risk of malfunction. But even --max-count=2, with
1442 input==output, while there is no risk of infloop, there is a race
1443 condition that could result in "alternate" output. */
1444 if (!out_quiet && list_files == 0 && 1 < max_count
1445 && S_ISREG (out_stat.st_mode) && out_stat.st_ino
1446 && SAME_INODE (st, out_stat))
1448 if (! suppress_errors)
1449 error (0, 0, _("input file %s is also the output"), quote (filename));
1454 #if defined SET_BINARY
1455 /* Set input to binary mode. Pipes are simulated with files
1456 on DOS, so this includes the case of "foo | grep bar". */
1461 count = grep (desc, &st);
1472 print_sep (SEP_CHAR_SELECTED);
1476 printf ("%" PRIdMAX "\n", count);
1480 if (list_files == 1 - 2 * status)
1483 fputc ('\n' & filename_mask, stdout);
1486 if (desc == STDIN_FILENO)
1488 off_t required_offset = outleft ? bufoffset : after_last_match;
1489 if (required_offset != bufoffset
1490 && lseek (desc, required_offset, SEEK_SET) < 0
1491 && S_ISREG (st.st_mode))
1492 suppressible_error (filename, errno);
1497 if (desc != STDIN_FILENO && close (desc) != 0)
1498 suppressible_error (filename, errno);
1503 grep_command_line_arg (char const *arg)
1505 if (STREQ (arg, "-"))
1507 filename = label ? label : _("(standard input)");
1508 return grepdesc (STDIN_FILENO, 1);
1513 return grepfile (AT_FDCWD, arg, 1, 1);
1517 _Noreturn void usage (int);
1523 fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"),
1525 fprintf (stderr, _("Try '%s --help' for more information.\n"),
1530 printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name);
1531 printf (_("Search for PATTERN in each FILE or standard input.\n"));
1532 printf (_("PATTERN is, by default, a basic regular expression (BRE).\n"));
1534 Example: %s -i 'hello world' menu.h main.c\n\
1536 Regexp selection and interpretation:\n"), program_name);
1538 -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\
1539 -F, --fixed-strings PATTERN is a set of newline-separated fixed strings\n\
1540 -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\
1541 -P, --perl-regexp PATTERN is a Perl regular expression\n"));
1542 /* -X is undocumented on purpose. */
1544 -e, --regexp=PATTERN use PATTERN for matching\n\
1545 -f, --file=FILE obtain PATTERN from FILE\n\
1546 -i, --ignore-case ignore case distinctions\n\
1547 -w, --word-regexp force PATTERN to match only whole words\n\
1548 -x, --line-regexp force PATTERN to match only whole lines\n\
1549 -z, --null-data a data line ends in 0 byte, not newline\n"));
1553 -s, --no-messages suppress error messages\n\
1554 -v, --invert-match select non-matching lines\n\
1555 -V, --version display version information and exit\n\
1556 --help display this help text and exit\n"));
1560 -m, --max-count=NUM stop after NUM matches\n\
1561 -b, --byte-offset print the byte offset with output lines\n\
1562 -n, --line-number print line number with output lines\n\
1563 --line-buffered flush output on every line\n\
1564 -H, --with-filename print the file name for each match\n\
1565 -h, --no-filename suppress the file name prefix on output\n\
1566 --label=LABEL use LABEL as the standard input file name prefix\n\
1569 -o, --only-matching show only the part of a line matching PATTERN\n\
1570 -q, --quiet, --silent suppress all normal output\n\
1571 --binary-files=TYPE assume that binary files are TYPE;\n\
1572 TYPE is 'binary', 'text', or 'without-match'\n\
1573 -a, --text equivalent to --binary-files=text\n\
1576 -I equivalent to --binary-files=without-match\n\
1577 -d, --directories=ACTION how to handle directories;\n\
1578 ACTION is 'read', 'recurse', or 'skip'\n\
1579 -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\
1580 ACTION is 'read' or 'skip'\n\
1581 -r, --recursive like --directories=recurse\n\
1582 -R, --dereference-recursive likewise, but follow all symlinks\n\
1585 --include=FILE_PATTERN search only files that match FILE_PATTERN\n\
1586 --exclude=FILE_PATTERN skip files and directories matching FILE_PATTERN\n\
1587 --exclude-from=FILE skip files matching any file pattern from FILE\n\
1588 --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\
1591 -L, --files-without-match print only names of FILEs containing no match\n\
1592 -l, --files-with-matches print only names of FILEs containing matches\n\
1593 -c, --count print only a count of matching lines per FILE\n\
1594 -T, --initial-tab make tabs line up (if needed)\n\
1595 -Z, --null print 0 byte after FILE name\n"));
1599 -B, --before-context=NUM print NUM lines of leading context\n\
1600 -A, --after-context=NUM print NUM lines of trailing context\n\
1601 -C, --context=NUM print NUM lines of output context\n\
1604 -NUM same as --context=NUM\n\
1606 --colour[=WHEN] use markers to highlight the matching strings;\n\
1607 WHEN is 'always', 'never', or 'auto'\n\
1608 -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\
1609 -u, --unix-byte-offsets report offsets as if CRs were not there\n\
1613 'egrep' means 'grep -E'. 'fgrep' means 'grep -F'.\n\
1614 Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n"));
1616 When FILE is -, read standard input. With no FILE, read . if a command-line\n\
1617 -r is given, - otherwise. If fewer than two FILEs are given, assume -h.\n\
1618 Exit status is 0 if any line is selected, 1 otherwise;\n\
1619 if any error occurs and -q is not given, the exit status is 2.\n"));
1620 printf (_("\nReport bugs to: %s\n"), PACKAGE_BUGREPORT);
1621 printf (_("GNU Grep home page: <%s>\n"),
1622 "http://www.gnu.org/software/grep/");
1623 fputs (_("General help using GNU software: <http://www.gnu.org/gethelp/>\n"),
1630 /* Pattern compilers and matchers. */
1633 Gcompile (char const *pattern, size_t size)
1635 GEAcompile (pattern, size, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES);
1639 Ecompile (char const *pattern, size_t size)
1641 GEAcompile (pattern, size, RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
1645 Acompile (char const *pattern, size_t size)
1647 GEAcompile (pattern, size, RE_SYNTAX_AWK);
1651 GAcompile (char const *pattern, size_t size)
1653 GEAcompile (pattern, size, RE_SYNTAX_GNU_AWK);
1657 PAcompile (char const *pattern, size_t size)
1659 GEAcompile (pattern, size, RE_SYNTAX_POSIX_AWK);
1664 char const name[16];
1665 compile_fp_t compile;
1666 execute_fp_t execute;
1668 static struct matcher const matchers[] = {
1669 { "grep", Gcompile, EGexecute },
1670 { "egrep", Ecompile, EGexecute },
1671 { "fgrep", Fcompile, Fexecute },
1672 { "awk", Acompile, EGexecute },
1673 { "gawk", GAcompile, EGexecute },
1674 { "posixawk", PAcompile, EGexecute },
1675 { "perl", Pcompile, Pexecute },
1679 /* Set the matcher to M if available. Exit in case of conflicts or if
1680 M is not available. */
1682 setmatcher (char const *m)
1684 struct matcher const *p;
1686 if (matcher && !STREQ (matcher, m))
1687 error (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
1689 for (p = matchers; p->compile; p++)
1690 if (STREQ (m, p->name))
1693 compile = p->compile;
1694 execute = p->execute;
1698 error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
1701 /* Find the white-space-separated options specified by OPTIONS, and
1702 using BUF to store copies of these options, set ARGV[0], ARGV[1],
1703 etc. to the option copies. Return the number N of options found.
1704 Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0]
1705 etc. Backslash can be used to escape whitespace (and backslashes). */
1707 prepend_args (char const *options, char *buf, char **argv)
1709 char const *o = options;
1715 while (c_isspace (to_uchar (*o)))
1724 if ((*b++ = *o++) == '\\' && *o)
1726 while (*o && ! c_isspace (to_uchar (*o)));
1732 /* Prepend the whitespace-separated options in OPTIONS to the argument
1733 vector of a main program with argument count *PARGC and argument
1734 vector *PARGV. Return the number of options prepended. */
1736 prepend_default_options (char const *options, int *pargc, char ***pargv)
1738 if (options && *options)
1740 char *buf = xmalloc (strlen (options) + 1);
1741 size_t prepended = prepend_args (options, buf, NULL);
1743 char *const *argv = *pargv;
1745 enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) };
1746 if (MAX_ARGS - argc < prepended)
1748 pp = xmalloc ((prepended + argc + 1) * sizeof *pp);
1749 *pargc = prepended + argc;
1752 pp += prepend_args (options, buf, pp);
1753 while ((*pp++ = *argv++))
1761 /* Get the next non-digit option from ARGC and ARGV.
1762 Return -1 if there are no more options.
1763 Process any digit options that were encountered on the way,
1764 and store the resulting integer into *DEFAULT_CONTEXT. */
1766 get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
1768 static int prev_digit_optind = -1;
1769 int this_digit_optind, was_digit;
1770 char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
1775 this_digit_optind = optind;
1778 opt = getopt_long (argc, (char **) argv, short_options,
1779 long_options, NULL);
1780 if ( ! ('0' <= opt && opt <= '9'))
1783 if (prev_digit_optind != this_digit_optind || !was_digit)
1785 /* Reset to start another context length argument. */
1790 /* Suppress trivial leading zeros, to avoid incorrect
1791 diagnostic on strings like 00000000000. */
1795 if (p == buf + sizeof buf - 4)
1797 /* Too many digits. Append "..." to make context_length_arg
1798 complain about "X...", where X contains the digits seen
1807 prev_digit_optind = this_digit_optind;
1808 this_digit_optind = optind;
1813 context_length_arg (buf, default_context);
1819 /* Parse GREP_COLORS. The default would look like:
1820 GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
1821 with boolean capabilities (ne and rv) unset (i.e., omitted).
1822 No character escaping is needed or supported. */
1824 parse_grep_colors (void)
1831 p = getenv ("GREP_COLORS"); /* Plural! */
1832 if (p == NULL || *p == '\0')
1835 /* Work off a writable copy. */
1840 /* From now on, be well-formed or you're gone. */
1842 if (*q == ':' || *q == '\0')
1845 struct color_cap const *cap;
1847 *q++ = '\0'; /* Terminate name or val. */
1848 /* Empty name without val (empty cap)
1849 * won't match and will be ignored. */
1850 for (cap = color_dict; cap->name; cap++)
1851 if (STREQ (cap->name, name))
1853 /* If name unknown, go on for forward compatibility. */
1854 if (cap->var && val)
1865 if (q == name || val)
1867 *q++ = '\0'; /* Terminate name. */
1868 val = q; /* Can be the empty string. */
1870 else if (val == NULL)
1871 q++; /* Accumulate name. */
1872 else if (*q == ';' || (*q >= '0' && *q <= '9'))
1873 q++; /* Accumulate val. Protect the terminal from being sent crap. */
1878 /* Return true if PAT (of length PATLEN) contains an encoding error. */
1880 contains_encoding_error (char const *pat, size_t patlen)
1882 mbstate_t mbs = { 0 };
1885 for (i = 0; i < patlen; i += charlen + (charlen == 0))
1887 charlen = mbrlen (pat + i, patlen - i, &mbs);
1888 if ((size_t) -2 <= charlen)
1894 /* Change a pattern for fgrep into grep. */
1896 fgrep_to_grep_pattern (size_t len, char const *keys,
1897 size_t *new_len, char **new_keys)
1899 char *p = *new_keys = xnmalloc (len + 1, 2);
1900 mbstate_t mb_state = { 0 };
1903 for (; len; keys += n, len -= n)
1906 n = mbrtowc (&wc, keys, len, &mb_state);
1913 p = mempcpy (p, keys, n);
1917 memset (&mb_state, 0, sizeof mb_state);
1921 p += strchr ("$*.[\\^", *keys) != NULL;
1930 *new_len = p - *new_keys;
1934 main (int argc, char **argv)
1937 size_t keycc, oldcc, keyalloc;
1940 int opt, status, prepended;
1941 int prev_optind, last_recursive;
1943 intmax_t default_context;
1945 exit_failure = EXIT_TROUBLE;
1946 initialize_main (&argc, &argv);
1947 set_program_name (argv[0]);
1948 program_name = argv[0];
1956 max_count = INTMAX_MAX;
1958 /* The value -1 means to use DEFAULT_CONTEXT. */
1959 out_after = out_before = -1;
1960 /* Default before/after context: changed by -C/-NUM options */
1961 default_context = -1;
1962 /* Changed by -o option */
1965 /* Internationalization. */
1966 #if defined HAVE_SETLOCALE
1967 setlocale (LC_ALL, "");
1969 #if defined ENABLE_NLS
1970 bindtextdomain (PACKAGE, LOCALEDIR);
1971 textdomain (PACKAGE);
1974 exit_failure = EXIT_TROUBLE;
1975 atexit (clean_up_stdout);
1978 prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
1979 compile = matchers[0].compile;
1980 execute = matchers[0].execute;
1982 while (prev_optind = optind,
1983 (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
1987 context_length_arg (optarg, &out_after);
1991 context_length_arg (optarg, &out_before);
1995 /* Set output match context, but let any explicit leading or
1996 trailing amount specified with -A or -B stand. */
1997 context_length_arg (optarg, &default_context);
2001 if (STREQ (optarg, "read"))
2002 devices = READ_DEVICES;
2003 else if (STREQ (optarg, "skip"))
2004 devices = SKIP_DEVICES;
2006 error (EXIT_TROUBLE, 0, _("unknown devices method"));
2010 setmatcher ("egrep");
2014 setmatcher ("fgrep");
2018 setmatcher ("perl");
2022 setmatcher ("grep");
2025 case 'X': /* undocumented on purpose */
2026 setmatcher (optarg);
2035 binary_files = WITHOUT_MATCH_BINARY_FILES;
2047 dos_unix_byte_offsets ();
2055 binary_files = TEXT_BINARY_FILES;
2067 directories = XARGMATCH ("--directories", optarg,
2068 directories_args, directories_types);
2069 if (directories == RECURSE_DIRECTORIES)
2070 last_recursive = prev_optind;
2074 cc = strlen (optarg);
2075 keys = xrealloc (keys, keycc + cc + 1);
2076 strcpy (&keys[keycc], optarg);
2078 keys[keycc++] = '\n';
2082 fp = STREQ (optarg, "-") ? stdin : fopen (optarg, O_TEXT ? "rt" : "r");
2084 error (EXIT_TROUBLE, errno, "%s", optarg);
2085 for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
2087 keys = xrealloc (keys, keyalloc);
2089 while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0)
2092 if (keycc == keyalloc - 1)
2093 keys = x2nrealloc (keys, &keyalloc, sizeof *keys);
2095 fread_errno = errno;
2097 error (EXIT_TROUBLE, fread_errno, "%s", optarg);
2100 /* Append final newline if file ended in non-newline. */
2101 if (oldcc != keycc && keys[keycc - 1] != '\n')
2102 keys[keycc++] = '\n';
2111 case 'y': /* For old-timers . . . */
2116 /* Like -l, except list files that don't contain matches.
2117 Inspired by the same option in Hume's gre. */
2126 switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
2129 case LONGINT_OVERFLOW:
2133 error (EXIT_TROUBLE, 0, _("invalid max count"));
2151 fts_options = basic_fts_options | FTS_LOGICAL;
2154 directories = RECURSE_DIRECTORIES;
2155 last_recursive = prev_optind;
2159 suppress_errors = 1;
2182 case BINARY_FILES_OPTION:
2183 if (STREQ (optarg, "binary"))
2184 binary_files = BINARY_BINARY_FILES;
2185 else if (STREQ (optarg, "text"))
2186 binary_files = TEXT_BINARY_FILES;
2187 else if (STREQ (optarg, "without-match"))
2188 binary_files = WITHOUT_MATCH_BINARY_FILES;
2190 error (EXIT_TROUBLE, 0, _("unknown binary-files type"));
2196 if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes")
2197 || !strcasecmp (optarg, "force"))
2199 else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg, "no")
2200 || !strcasecmp (optarg, "none"))
2202 else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg, "tty")
2203 || !strcasecmp (optarg, "if-tty"))
2212 case EXCLUDE_OPTION:
2213 case INCLUDE_OPTION:
2214 if (!excluded_patterns)
2215 excluded_patterns = new_exclude ();
2216 add_exclude (excluded_patterns, optarg,
2218 | (opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)));
2220 case EXCLUDE_FROM_OPTION:
2221 if (!excluded_patterns)
2222 excluded_patterns = new_exclude ();
2223 if (add_exclude_file (add_exclude, excluded_patterns, optarg,
2224 EXCLUDE_WILDCARDS, '\n') != 0)
2226 error (EXIT_TROUBLE, errno, "%s", optarg);
2230 case EXCLUDE_DIRECTORY_OPTION:
2231 if (!excluded_directory_patterns)
2232 excluded_directory_patterns = new_exclude ();
2233 strip_trailing_slashes (optarg);
2234 add_exclude (excluded_directory_patterns, optarg, EXCLUDE_WILDCARDS);
2237 case GROUP_SEPARATOR_OPTION:
2238 group_separator = optarg;
2241 case LINE_BUFFERED_OPTION:
2254 usage (EXIT_TROUBLE);
2259 if (color_option == 2)
2260 color_option = isatty (STDOUT_FILENO) && should_colorize ();
2263 /* POSIX says that -q overrides -l, which in turn overrides the
2264 other output options. */
2267 if (exit_on_match | list_files)
2272 out_quiet = count_matches | done_on_match;
2275 out_after = default_context;
2277 out_before = default_context;
2282 char *userval = getenv ("GREP_COLOR");
2283 if (userval != NULL && *userval != '\0')
2284 selected_match_color = context_match_color = userval;
2286 /* New GREP_COLORS has priority. */
2287 parse_grep_colors ();
2292 version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS,
2294 exit (EXIT_SUCCESS);
2298 usage (EXIT_SUCCESS);
2300 struct stat tmp_stat;
2301 if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode))
2302 out_stat = tmp_stat;
2308 /* No keys were specified (e.g. -f /dev/null). Match nothing. */
2310 match_lines = match_words = 0;
2313 /* Strip trailing newline. */
2316 else if (optind < argc)
2318 /* A copy must be made in case of an xrealloc() or free() later. */
2319 keycc = strlen (argv[optind]);
2320 keys = xmemdup (argv[optind++], keycc + 1);
2323 usage (EXIT_TROUBLE);
2325 /* If fgrep in a multibyte locale, then use grep if either
2326 (1) case is ignored (where grep is typically faster), or
2327 (2) the pattern has an encoding error (where fgrep might not work). */
2328 if (compile == Fcompile && MB_CUR_MAX > 1
2329 && (match_icase || contains_encoding_error (keys, keycc)))
2333 fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys);
2339 execute = EGexecute;
2343 build_mbclen_cache ();
2345 compile (keys, keycc);
2348 if ((argc - optind > 1 && !no_filenames) || with_filenames)
2352 /* Output is set to binary mode because we shouldn't convert
2353 NL to CR-LF pairs, especially when grepping binary files. */
2359 exit (EXIT_FAILURE);
2361 if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
2362 devices = READ_DEVICES;
2368 status &= grep_command_line_arg (argv[optind]);
2369 while (++optind < argc);
2371 else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive)
2373 /* Grep through ".", omitting leading "./" from diagnostics. */
2374 filename_prefix_len = 2;
2375 status = grep_command_line_arg (".");
2378 status = grep_command_line_arg ("-");
2380 /* We register via atexit() to test stdout. */
2381 exit (errseen ? EXIT_TROUBLE : status);
2383 /* vim:set shiftwidth=2: */