1 /* grep.c - main driver file for grep.
2 Copyright (C) 1992, 1997-2002, 2004-2014 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
19 /* Written July 1992 by Mike Haertel. */
22 #include <sys/types.h>
38 #include "fcntl-safer.h"
44 #include "propername.h"
46 #include "safe-read.h"
48 #include "version-etc.h"
52 #define SEP_CHAR_SELECTED ':'
53 #define SEP_CHAR_REJECTED '-'
54 #define SEP_STR_GROUP "--"
57 proper_name ("Mike Haertel"), \
58 _("others, see\n<http://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>")
60 /* When stdout is connected to a regular file, save its stat
61 information here, so that we can automatically skip it, thus
62 avoiding a potential (racy) infinite loop. */
63 static struct stat out_stat;
65 /* if non-zero, display usage information and exit */
68 /* If non-zero, print the version on standard output and exit. */
69 static int show_version;
71 /* If nonzero, suppress diagnostics for nonexistent or unreadable files. */
72 static int suppress_errors;
74 /* If nonzero, use color markers. */
75 static int color_option;
77 /* If nonzero, show only the part of a line matching the expression. */
78 static int only_matching;
80 /* If nonzero, make sure first content char in a line is on a tab stop. */
81 static int align_tabs;
83 /* The group separator used when context is requested. */
84 static const char *group_separator = SEP_STR_GROUP;
86 /* The context and logic for choosing default --color screen attributes
87 (foreground and background colors, etc.) are the following.
88 -- There are eight basic colors available, each with its own
89 nominal luminosity to the human eye and foreground/background
90 codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
91 magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
92 yellow [89 %, 33/43], and white [100 %, 37/47]).
93 -- Sometimes, white as a background is actually implemented using
94 a shade of light gray, so that a foreground white can be visible
95 on top of it (but most often not).
96 -- Sometimes, black as a foreground is actually implemented using
97 a shade of dark gray, so that it can be visible on top of a
98 background black (but most often not).
99 -- Sometimes, more colors are available, as extensions.
100 -- Other attributes can be selected/deselected (bold [1/22],
101 underline [4/24], standout/inverse [7/27], blink [5/25], and
102 invisible/hidden [8/28]). They are sometimes implemented by
103 using colors instead of what their names imply; e.g., bold is
104 often achieved by using brighter colors. In practice, only bold
105 is really available to us, underline sometimes being mapped by
106 the terminal to some strange color choice, and standout best
107 being left for use by downstream programs such as less(1).
108 -- We cannot assume that any of the extensions or special features
109 are available for the purpose of choosing defaults for everyone.
110 -- The most prevalent default terminal backgrounds are pure black
111 and pure white, and are not necessarily the same shades of
112 those as if they were selected explicitly with SGR sequences.
113 Some terminals use dark or light pictures as default background,
114 but those are covered over by an explicit selection of background
115 color with an SGR sequence; their users will appreciate their
116 background pictures not be covered like this, if possible.
117 -- Some uses of colors attributes is to make some output items
118 more understated (e.g., context lines); this cannot be achieved
119 by changing the background color.
120 -- For these reasons, the grep color defaults should strive not
121 to change the background color from its default, unless it's
122 for a short item that should be highlighted, not understated.
123 -- The grep foreground color defaults (without an explicitly set
124 background) should provide enough contrast to be readable on any
125 terminal with either a black (dark) or white (light) background.
126 This only leaves red, magenta, green, and cyan (and their bold
127 counterparts) and possibly bold blue. */
128 /* The color strings used for matched text.
129 The user can overwrite them using the deprecated
130 environment variable GREP_COLOR or the new GREP_COLORS. */
131 static const char *selected_match_color = "01;31"; /* bold red */
132 static const char *context_match_color = "01;31"; /* bold red */
134 /* Other colors. Defaults look damn good. */
135 static const char *filename_color = "35"; /* magenta */
136 static const char *line_num_color = "32"; /* green */
137 static const char *byte_num_color = "32"; /* green */
138 static const char *sep_color = "36"; /* cyan */
139 static const char *selected_line_color = ""; /* default color pair */
140 static const char *context_line_color = ""; /* default color pair */
142 /* Select Graphic Rendition (SGR, "\33[...m") strings. */
143 /* Also Erase in Line (EL) to Right ("\33[K") by default. */
144 /* Why have EL to Right after SGR?
145 -- The behavior of line-wrapping when at the bottom of the
146 terminal screen and at the end of the current line is often
147 such that a new line is introduced, entirely cleared with
148 the current background color which may be different from the
149 default one (see the boolean back_color_erase terminfo(5)
150 capability), thus scrolling the display by one line.
151 The end of this new line will stay in this background color
152 even after reverting to the default background color with
153 "\33[m', unless it is explicitly cleared again with "\33[K"
154 (which is the behavior the user would instinctively expect
155 from the whole thing). There may be some unavoidable
156 background-color flicker at the end of this new line because
157 of this (when timing with the monitor's redraw is just right).
158 -- The behavior of HT (tab, "\t") is usually the same as that of
159 Cursor Forward Tabulation (CHT) with a default parameter
160 of 1 ("\33[I"), i.e., it performs pure movement to the next
161 tab stop, without any clearing of either content or screen
162 attributes (including background color); try
163 printf 'asdfqwerzxcv\rASDF\tZXCV\n'
164 in a bash(1) shell to demonstrate this. This is not what the
165 user would instinctively expect of HT (but is ok for CHT).
166 The instinctive behavior would include clearing the terminal
167 cells that are skipped over by HT with blank cells in the
168 current screen attributes, including background color;
169 the boolean dest_tabs_magic_smso terminfo(5) capability
170 indicates this saner behavior for HT, but only some rare
171 terminals have it (although it also indicates a special
172 glitch with standout mode in the Teleray terminal for which
173 it was initially introduced). The remedy is to add "\33K"
174 after each SGR sequence, be it START (to fix the behavior
175 of any HT after that before another SGR) or END (to fix the
176 behavior of an HT in default background color that would
177 follow a line-wrapping at the bottom of the screen in another
178 background color, and to complement doing it after START).
179 Piping grep's output through a pager such as less(1) avoids
180 any HT problems since the pager performs tab expansion.
182 Generic disadvantages of this remedy are:
183 -- Some very rare terminals might support SGR but not EL (nobody
184 will use "grep --color" on a terminal that does not support
185 SGR in the first place).
186 -- Having these extra control sequences might somewhat complicate
187 the task of any program trying to parse "grep --color"
188 output in order to extract structuring information from it.
189 A specific disadvantage to doing it after SGR START is:
190 -- Even more possible background color flicker (when timing
191 with the monitor's redraw is just right), even when not at the
192 bottom of the screen.
193 There are no additional disadvantages specific to doing it after
196 It would be impractical for GNU grep to become a full-fledged
197 terminal program linked against ncurses or the like, so it will
198 not detect terminfo(5) capabilities. */
199 static const char *sgr_start = "\33[%sm\33[K";
200 static const char *sgr_end = "\33[m\33[K";
202 /* SGR utility functions. */
204 pr_sgr_start (char const *s)
207 print_start_colorize (sgr_start, s);
210 pr_sgr_end (char const *s)
213 print_end_colorize (sgr_end);
216 pr_sgr_start_if (char const *s)
222 pr_sgr_end_if (char const *s)
236 color_cap_mt_fct (void)
238 /* Our caller just set selected_match_color. */
239 context_match_color = selected_match_color;
243 color_cap_rv_fct (void)
245 /* By this point, it was 1 (or already -1). */
246 color_option = -1; /* That's still != 0. */
250 color_cap_ne_fct (void)
252 sgr_start = "\33[%sm";
256 /* For GREP_COLORS. */
257 static const struct color_cap color_dict[] =
259 { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
260 { "ms", &selected_match_color, NULL }, /* selected matched text */
261 { "mc", &context_match_color, NULL }, /* context matched text */
262 { "fn", &filename_color, NULL }, /* filename */
263 { "ln", &line_num_color, NULL }, /* line number */
264 { "bn", &byte_num_color, NULL }, /* byte (sic) offset */
265 { "se", &sep_color, NULL }, /* separator */
266 { "sl", &selected_line_color, NULL }, /* selected lines */
267 { "cx", &context_line_color, NULL }, /* context lines */
268 { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */
269 { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */
273 static struct exclude *excluded_patterns;
274 static struct exclude *excluded_directory_patterns;
276 static char const short_options[] =
277 "0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
279 /* Non-boolean long options that have no corresponding short equivalents. */
282 BINARY_FILES_OPTION = CHAR_MAX + 1,
287 LINE_BUFFERED_OPTION,
289 EXCLUDE_DIRECTORY_OPTION,
290 GROUP_SEPARATOR_OPTION
293 /* Long options equivalences. */
294 static struct option const long_options[] =
296 {"basic-regexp", no_argument, NULL, 'G'},
297 {"extended-regexp", no_argument, NULL, 'E'},
298 {"fixed-regexp", no_argument, NULL, 'F'},
299 {"fixed-strings", no_argument, NULL, 'F'},
300 {"perl-regexp", no_argument, NULL, 'P'},
301 {"after-context", required_argument, NULL, 'A'},
302 {"before-context", required_argument, NULL, 'B'},
303 {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
304 {"byte-offset", no_argument, NULL, 'b'},
305 {"context", required_argument, NULL, 'C'},
306 {"color", optional_argument, NULL, COLOR_OPTION},
307 {"colour", optional_argument, NULL, COLOR_OPTION},
308 {"count", no_argument, NULL, 'c'},
309 {"devices", required_argument, NULL, 'D'},
310 {"directories", required_argument, NULL, 'd'},
311 {"exclude", required_argument, NULL, EXCLUDE_OPTION},
312 {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
313 {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
314 {"file", required_argument, NULL, 'f'},
315 {"files-with-matches", no_argument, NULL, 'l'},
316 {"files-without-match", no_argument, NULL, 'L'},
317 {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
318 {"help", no_argument, &show_help, 1},
319 {"include", required_argument, NULL, INCLUDE_OPTION},
320 {"ignore-case", no_argument, NULL, 'i'},
321 {"initial-tab", no_argument, NULL, 'T'},
322 {"label", required_argument, NULL, LABEL_OPTION},
323 {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
324 {"line-number", no_argument, NULL, 'n'},
325 {"line-regexp", no_argument, NULL, 'x'},
326 {"max-count", required_argument, NULL, 'm'},
328 {"no-filename", no_argument, NULL, 'h'},
329 {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
330 {"no-messages", no_argument, NULL, 's'},
331 {"null", no_argument, NULL, 'Z'},
332 {"null-data", no_argument, NULL, 'z'},
333 {"only-matching", no_argument, NULL, 'o'},
334 {"quiet", no_argument, NULL, 'q'},
335 {"recursive", no_argument, NULL, 'r'},
336 {"dereference-recursive", no_argument, NULL, 'R'},
337 {"regexp", required_argument, NULL, 'e'},
338 {"invert-match", no_argument, NULL, 'v'},
339 {"silent", no_argument, NULL, 'q'},
340 {"text", no_argument, NULL, 'a'},
341 {"binary", no_argument, NULL, 'U'},
342 {"unix-byte-offsets", no_argument, NULL, 'u'},
343 {"version", no_argument, NULL, 'V'},
344 {"with-filename", no_argument, NULL, 'H'},
345 {"word-regexp", no_argument, NULL, 'w'},
349 /* Define flags declared in grep.h. */
353 unsigned char eolbyte;
355 static char const *matcher;
357 /* For error messages. */
358 /* The input file name, or (if standard input) "-" or a --label argument. */
359 static char const *filename;
360 static size_t filename_prefix_len;
362 static int write_error_seen;
364 enum directories_type
366 READ_DIRECTORIES = 2,
371 /* How to handle directories. */
372 static char const *const directories_args[] =
374 "read", "recurse", "skip", NULL
376 static enum directories_type const directories_types[] =
378 READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
380 ARGMATCH_VERIFY (directories_args, directories_types);
382 static enum directories_type directories = READ_DIRECTORIES;
384 enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
385 static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
387 /* How to handle devices. */
390 READ_COMMAND_LINE_DEVICES,
393 } devices = READ_COMMAND_LINE_DEVICES;
395 static int grepfile (int, char const *, int, int);
396 static int grepdesc (int, int);
398 static void dos_binary (void);
399 static void dos_unix_byte_offsets (void);
400 static int undossify_input (char *, size_t);
403 is_device_mode (mode_t m)
405 return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
408 /* Return nonzero if ST->st_size is defined. Assume the file is not a
411 usable_st_size (struct stat const *st)
413 return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
416 /* Functions we'll use to search. */
417 typedef void (*compile_fp_t) (char const *, size_t);
418 typedef size_t (*execute_fp_t) (char const *, size_t, size_t *, char const *);
419 static compile_fp_t compile;
420 static execute_fp_t execute;
422 /* Like error, but suppress the diagnostic if requested. */
424 suppressible_error (char const *mesg, int errnum)
426 if (! suppress_errors)
427 error (0, errnum, "%s", mesg);
431 /* If there has already been a write error, don't bother closing
432 standard output, as that might elicit a duplicate diagnostic. */
434 clean_up_stdout (void)
436 if (! write_error_seen)
440 /* Return 1 if a file is known to be binary for the purpose of 'grep'.
441 BUF, of size BUFSIZE, is the initial buffer read from the file with
442 descriptor FD and status ST. */
444 file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st)
447 enum { SEEK_HOLE = SEEK_END };
450 /* If -z, test only whether the initial buffer contains '\200';
451 knowing about holes won't help. */
453 return memchr (buf, '\200', bufsize) != 0;
455 /* If the initial buffer contains a null byte, guess that the file
457 if (memchr (buf, '\0', bufsize))
460 /* If the file has holes, it must contain a null byte somewhere. */
461 if (SEEK_HOLE != SEEK_END && usable_st_size (st))
464 if (O_BINARY || fd == STDIN_FILENO)
466 cur = lseek (fd, 0, SEEK_CUR);
471 /* Look for a hole after the current location. */
472 off_t hole_start = lseek (fd, cur, SEEK_HOLE);
475 if (lseek (fd, cur, SEEK_SET) < 0)
476 suppressible_error (filename, errno);
477 if (hole_start < st->st_size)
482 /* Guess that the file does not contain binary data. */
486 /* Convert STR to a nonnegative integer, storing the result in *OUT.
487 STR must be a valid context length argument; report an error if it
488 isn't. Silently ceiling *OUT at the maximum value, as that is
489 practically equivalent to infinity for grep's purposes. */
491 context_length_arg (char const *str, intmax_t *out)
493 switch (xstrtoimax (str, 0, 10, out, ""))
496 case LONGINT_OVERFLOW:
501 error (EXIT_TROUBLE, 0, "%s: %s", str,
502 _("invalid context length argument"));
506 /* Return nonzero if the file with NAME should be skipped.
507 If COMMAND_LINE is nonzero, it is a command-line argument.
508 If IS_DIR is nonzero, it is a directory. */
510 skipped_file (char const *name, int command_line, int is_dir)
513 ? (directories == SKIP_DIRECTORIES
514 || (! (command_line && filename_prefix_len != 0)
515 && excluded_directory_patterns
516 && excluded_file_name (excluded_directory_patterns, name)))
518 && excluded_file_name (excluded_patterns, name)));
521 /* Hairy buffering mechanism for grep. The intent is to keep
522 all reads aligned on a page boundary and multiples of the
523 page size, unless a read yields a partial page. */
525 static char *buffer; /* Base of buffer. */
526 static size_t bufalloc; /* Allocated buffer size, counting slop. */
527 #define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */
528 static int bufdesc; /* File descriptor. */
529 static char *bufbeg; /* Beginning of user-visible stuff. */
530 static char *buflim; /* Limit of user-visible stuff. */
531 static size_t pagesize; /* alignment of memory pages */
532 static off_t bufoffset; /* Read offset; defined on regular files. */
533 static off_t after_last_match; /* Pointer after last matching line that
534 would have been output if we were
535 outputting characters. */
537 /* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
538 an integer or a pointer. Both args must be free of side effects. */
539 #define ALIGN_TO(val, alignment) \
540 ((size_t) (val) % (alignment) == 0 \
542 : (val) + ((alignment) - (size_t) (val) % (alignment)))
544 /* Reset the buffer for a new file, returning zero if we should skip it.
545 Initialize on the first time through. */
547 reset (int fd, struct stat const *st)
551 pagesize = getpagesize ();
552 if (pagesize == 0 || 2 * pagesize + 1 <= pagesize)
554 bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + 1;
555 buffer = xmalloc (bufalloc);
558 bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
559 bufbeg[-1] = eolbyte;
562 if (S_ISREG (st->st_mode))
564 if (fd != STDIN_FILENO)
568 bufoffset = lseek (fd, 0, SEEK_CUR);
571 suppressible_error (_("lseek failed"), errno);
579 /* Read new stuff into the buffer, saving the specified
580 amount of old stuff. When we're done, 'bufbeg' points
581 to the beginning of the buffer contents, and 'buflim'
582 points just after the end. Return zero if there's an error. */
584 fillbuf (size_t save, struct stat const *st)
591 /* Offset from start of buffer to start of old stuff
592 that we want to save. */
593 size_t saved_offset = buflim - save - buffer;
595 if (pagesize <= buffer + bufalloc - buflim)
598 bufbeg = buflim - save;
602 size_t minsize = save + pagesize;
607 /* Grow newsize until it is at least as great as minsize. */
608 for (newsize = bufalloc - pagesize - 1; newsize < minsize; newsize *= 2)
609 if (newsize * 2 < newsize || newsize * 2 + pagesize + 1 < newsize * 2)
612 /* Try not to allocate more memory than the file size indicates,
613 as that might cause unnecessary memory exhaustion if the file
614 is large. However, do not use the original file size as a
615 heuristic if we've already read past the file end, as most
616 likely the file is growing. */
617 if (usable_st_size (st))
619 off_t to_be_read = st->st_size - bufoffset;
620 off_t maxsize_off = save + to_be_read;
621 if (0 <= to_be_read && to_be_read <= maxsize_off
622 && maxsize_off == (size_t) maxsize_off
623 && minsize <= (size_t) maxsize_off
624 && (size_t) maxsize_off < newsize)
625 newsize = maxsize_off;
628 /* Add enough room so that the buffer is aligned and has room
629 for byte sentinels fore and aft. */
630 newalloc = newsize + pagesize + 1;
632 newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
633 readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
634 bufbeg = readbuf - save;
635 memmove (bufbeg, buffer + saved_offset, save);
636 bufbeg[-1] = eolbyte;
637 if (newbuf != buffer)
644 readsize = buffer + bufalloc - readbuf;
645 readsize -= readsize % pagesize;
647 fillsize = safe_read (bufdesc, readbuf, readsize);
650 bufoffset += fillsize;
651 fillsize = undossify_input (readbuf, fillsize);
652 buflim = readbuf + fillsize;
656 /* Flags controlling the style of output. */
661 WITHOUT_MATCH_BINARY_FILES
662 } binary_files; /* How to handle binary files. */
664 static int filename_mask; /* If zero, output nulls after filenames. */
665 static int out_quiet; /* Suppress all normal output. */
666 static bool out_invert; /* Print nonmatching stuff. */
667 static int out_file; /* Print filenames. */
668 static int out_line; /* Print line numbers. */
669 static int out_byte; /* Print byte offsets. */
670 static intmax_t out_before; /* Lines of leading context. */
671 static intmax_t out_after; /* Lines of trailing context. */
672 static int count_matches; /* Count matching lines. */
673 static int list_files; /* List matching files. */
674 static int no_filenames; /* Suppress file names. */
675 static intmax_t max_count; /* Stop after outputting this many
676 lines from an input file. */
677 static int line_buffered; /* If nonzero, use line buffering, i.e.
678 fflush everyline out. */
679 static char *label = NULL; /* Fake filename for stdin */
682 /* Internal variables to keep track of byte count, context, etc. */
683 static uintmax_t totalcc; /* Total character count before bufbeg. */
684 static char const *lastnl; /* Pointer after last newline counted. */
685 static char const *lastout; /* Pointer after last character output;
686 NULL if no character has been output
687 or if it's conceptually before bufbeg. */
688 static uintmax_t totalnl; /* Total newline count before lastnl. */
689 static intmax_t outleft; /* Maximum number of lines to be output. */
690 static intmax_t pending; /* Pending lines of output.
691 Always kept 0 if out_quiet is true. */
692 static int done_on_match; /* Stop scanning file on first match. */
693 static int exit_on_match; /* Exit on first match. */
697 /* Add two numbers that count input bytes or lines, and report an
698 error if the addition overflows. */
700 add_count (uintmax_t a, uintmax_t b)
702 uintmax_t sum = a + b;
704 error (EXIT_TROUBLE, 0, _("input is too large to count"));
709 nlscan (char const *lim)
713 for (beg = lastnl; beg < lim; beg++)
715 beg = memchr (beg, eolbyte, lim - beg);
720 totalnl = add_count (totalnl, newlines);
724 /* Print the current filename. */
726 print_filename (void)
728 pr_sgr_start_if (filename_color);
729 fputs (filename, stdout);
730 pr_sgr_end_if (filename_color);
733 /* Print a character separator. */
737 pr_sgr_start_if (sep_color);
739 pr_sgr_end_if (sep_color);
742 /* Print a line number or a byte offset. */
744 print_offset (uintmax_t pos, int min_width, const char *color)
746 /* Do not rely on printf to print pos, since uintmax_t may be longer
747 than long, and long long is not portable. */
749 char buf[sizeof pos * CHAR_BIT];
750 char *p = buf + sizeof buf;
754 *--p = '0' + pos % 10;
757 while ((pos /= 10) != 0);
759 /* Do this to maximize the probability of alignment across lines. */
761 while (--min_width >= 0)
764 pr_sgr_start_if (color);
765 fwrite (p, 1, buf + sizeof buf - p, stdout);
766 pr_sgr_end_if (color);
769 /* Print a whole line head (filename, line, byte). */
771 print_line_head (char const *beg, char const *lim, int sep)
789 totalnl = add_count (totalnl, 1);
794 print_offset (totalnl, 4, line_num_color);
800 uintmax_t pos = add_count (totalcc, beg - bufbeg);
801 pos = dossified_pos (pos);
804 print_offset (pos, 6, byte_num_color);
810 /* This assumes sep is one column wide.
811 Try doing this any other way with Unicode
812 (and its combining and wide characters)
813 filenames and you're wasting your efforts. */
815 fputs ("\t\b", stdout);
822 print_line_middle (const char *beg, const char *lim,
823 const char *line_color, const char *match_color)
827 const char *cur = beg;
828 const char *mid = NULL;
831 && ((match_offset = execute (beg, lim - beg, &match_size,
832 beg + (cur - beg))) != (size_t) -1))
834 char const *b = beg + match_offset;
836 /* Avoid matching the empty line at the end of the buffer. */
840 /* Avoid hanging on grep --color "" foo */
843 /* Make minimal progress; there may be further non-empty matches. */
844 /* XXX - Could really advance by one whole multi-octet character. */
851 /* This function is called on a matching line only,
852 but is it selected or rejected/context? */
854 print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED
855 : SEP_CHAR_SELECTED));
858 pr_sgr_start (line_color);
864 fwrite (cur, sizeof (char), b - cur, stdout);
867 pr_sgr_start_if (match_color);
868 fwrite (b, sizeof (char), match_size, stdout);
869 pr_sgr_end_if (match_color);
871 fputs ("\n", stdout);
873 cur = b + match_size;
885 print_line_tail (const char *beg, const char *lim, const char *line_color)
890 eol_size = (lim > beg && lim[-1] == eolbyte);
891 eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
892 tail_size = lim - eol_size - beg;
896 pr_sgr_start (line_color);
897 fwrite (beg, 1, tail_size, stdout);
899 pr_sgr_end (line_color);
906 prline (char const *beg, char const *lim, int sep)
909 const char *line_color;
910 const char *match_color;
913 print_line_head (beg, lim, sep);
915 matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
919 line_color = (((sep == SEP_CHAR_SELECTED)
920 ^ (out_invert && (color_option < 0)))
921 ? selected_line_color : context_line_color);
922 match_color = (sep == SEP_CHAR_SELECTED
923 ? selected_match_color : context_match_color);
926 line_color = match_color = NULL; /* Shouldn't be used. */
928 if ((only_matching && matching)
929 || (color_option && (*line_color || *match_color)))
931 /* We already know that non-matching lines have no match (to colorize). */
932 if (matching && (only_matching || *match_color))
933 beg = print_line_middle (beg, lim, line_color, match_color);
935 if (!only_matching && *line_color)
937 /* This code is exercised at least when grep is invoked like this:
938 echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */
939 beg = print_line_tail (beg, lim, line_color);
943 if (!only_matching && lim > beg)
944 fwrite (beg, 1, lim - beg, stdout);
948 write_error_seen = 1;
949 error (EXIT_TROUBLE, 0, _("write error"));
958 /* Print pending lines of trailing context prior to LIM. Trailing context ends
959 at the next matching line when OUTLEFT is 0. */
961 prpending (char const *lim)
965 while (pending > 0 && lastout < lim)
967 char const *nl = memchr (lastout, eolbyte, lim - lastout);
971 || ((execute (lastout, nl + 1 - lastout,
972 &match_size, NULL) == (size_t) -1)
974 prline (lastout, nl + 1, SEP_CHAR_REJECTED);
980 /* Output the lines between BEG and LIM. Deal with context. */
982 prtext (char const *beg, char const *lim)
984 static bool used; /* Avoid printing SEP_STR_GROUP before any output. */
987 if (!out_quiet && pending > 0)
994 /* Deal with leading context. */
995 char const *bp = lastout ? lastout : bufbeg;
997 for (i = 0; i < out_before; ++i)
1001 while (p[-1] != eol);
1003 /* Print the group separator unless the output is adjacent to
1004 the previous output in the file. */
1005 if ((0 <= out_before || 0 <= out_after) && used
1006 && p != lastout && group_separator)
1008 pr_sgr_start_if (sep_color);
1009 fputs (group_separator, stdout);
1010 pr_sgr_end_if (sep_color);
1011 fputc ('\n', stdout);
1016 char const *nl = memchr (p, eol, beg - p);
1018 prline (p, nl, SEP_CHAR_REJECTED);
1026 /* One or more lines are output. */
1027 for (n = 0; p < lim && n < outleft; n++)
1029 char const *nl = memchr (p, eol, lim - p);
1032 prline (p, nl, SEP_CHAR_SELECTED);
1038 /* Just one line is output. */
1040 prline (beg, lim, SEP_CHAR_SELECTED);
1045 after_last_match = bufoffset - (buflim - p);
1046 pending = out_quiet ? 0 : MAX (0, out_after);
1051 /* Invoke the matcher, EXECUTE, on buffer BUF of SIZE bytes. If there
1052 is no match, return (size_t) -1. Otherwise, set *MATCH_SIZE to the
1053 length of the match and return the offset of the start of the match. */
1055 do_execute (char const *buf, size_t size, size_t *match_size)
1058 const char *line_next;
1060 /* With the current implementation, using --ignore-case with a multi-byte
1061 character set is very inefficient when applied to a large buffer
1062 containing many matches. We can avoid much of the wasted effort
1063 by matching line-by-line.
1065 FIXME: this is just an ugly workaround, and it doesn't really
1066 belong here. Also, PCRE is always using this same per-line
1067 matching algorithm. Either we fix -i, or we should refactor
1068 this code---for example, we could add another function pointer
1069 to struct matcher to split the buffer passed to execute. It would
1070 perform the memchr if line-by-line matching is necessary, or just
1071 return buf + size otherwise. */
1072 if (! (execute == Fexecute || execute == Pexecute)
1073 || MB_CUR_MAX == 1 || !match_icase)
1074 return execute (buf, size, match_size, NULL);
1076 for (line_next = buf; line_next < buf + size; )
1078 const char *line_buf = line_next;
1079 const char *line_end = memchr (line_buf, eolbyte,
1080 (buf + size) - line_buf);
1081 if (line_end == NULL)
1082 line_next = line_end = buf + size;
1084 line_next = line_end + 1;
1086 result = execute (line_buf, line_next - line_buf, match_size, NULL);
1087 if (result != (size_t) -1)
1088 return (line_buf - buf) + result;
1094 /* Scan the specified portion of the buffer, matching lines (or
1095 between matching lines if OUT_INVERT is true). Return a count of
1098 grepbuf (char const *beg, char const *lim)
1100 intmax_t outleft0 = outleft;
1104 for (p = beg; p < lim; p = endp)
1107 size_t match_offset = do_execute (p, lim - p, &match_size);
1108 if (match_offset == (size_t) -1)
1112 match_offset = lim - p;
1115 char const *b = p + match_offset;
1116 endp = b + match_size;
1117 /* Avoid matching the empty line at the end of the buffer. */
1118 if (!out_invert && b == lim)
1120 if (!out_invert || p < b)
1122 char const *prbeg = out_invert ? p : b;
1123 char const *prend = out_invert ? b : endp;
1124 prtext (prbeg, prend);
1125 if (!outleft || done_on_match)
1128 exit (EXIT_SUCCESS);
1134 return outleft0 - outleft;
1137 /* Search a given file. Normally, return a count of lines printed;
1138 but if the file is a directory and we search it recursively, then
1139 return -2 if there was a match, and -1 otherwise. */
1141 grep (int fd, struct stat const *st)
1145 size_t residue, save;
1151 if (! reset (fd, st))
1157 outleft = max_count;
1158 after_last_match = 0;
1165 if (! fillbuf (save, st))
1167 if (errno != EINVAL)
1168 suppressible_error (filename, errno);
1172 not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet)
1173 || binary_files == WITHOUT_MATCH_BINARY_FILES)
1174 && file_is_binary (bufbeg, buflim - bufbeg, fd, st));
1175 if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES)
1177 done_on_match += not_text;
1178 out_quiet += not_text;
1186 beg = bufbeg + save;
1188 /* no more data to scan (eof) except for maybe a residue -> break */
1192 /* Determine new residue (the length of an incomplete line at the end of
1193 the buffer, 0 means there is no incomplete last line). */
1196 /* FIXME: use rawmemrchr if/when it exists, since we have ensured
1197 that this use of memrchr is guaranteed never to return NULL. */
1198 lim = memrchr (beg - 1, eol, buflim - beg + 1);
1202 lim = beg - residue;
1204 residue = buflim - lim;
1209 nlines += grepbuf (beg, lim);
1212 if ((!outleft && !pending) || (nlines && done_on_match))
1216 /* The last OUT_BEFORE lines at the end of the buffer will be needed as
1217 leading context if there is a matching line at the begin of the
1218 next data. Make beg point to their begin. */
1221 while (i < out_before && beg > bufbeg && beg != lastout)
1226 while (beg[-1] != eol);
1229 /* Detect whether leading context is adjacent to previous output. */
1233 /* Handle some details and read more data to scan. */
1234 save = residue + lim - beg;
1236 totalcc = add_count (totalcc, buflim - bufbeg - save);
1239 if (! fillbuf (save, st))
1241 suppressible_error (filename, errno);
1249 nlines += grepbuf (bufbeg + save - residue, buflim);
1255 done_on_match -= not_text;
1256 out_quiet -= not_text;
1257 if ((not_text & ~out_quiet) && nlines != 0)
1258 printf (_("Binary file %s matches\n"), filename);
1263 grepdirent (FTS *fts, FTSENT *ent, int command_line)
1265 int follow, dirdesc;
1266 struct stat *st = ent->fts_statp;
1267 command_line &= ent->fts_level == FTS_ROOTLEVEL;
1269 if (ent->fts_info == FTS_DP)
1271 if (directories == RECURSE_DIRECTORIES && command_line)
1272 out_file &= ~ (2 * !no_filenames);
1276 if (skipped_file (ent->fts_name, command_line,
1277 (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
1278 || ent->fts_info == FTS_DNR)))
1280 fts_set (fts, ent, FTS_SKIP);
1284 filename = ent->fts_path + filename_prefix_len;
1285 follow = (fts->fts_options & FTS_LOGICAL
1286 || (fts->fts_options & FTS_COMFOLLOW && command_line));
1288 switch (ent->fts_info)
1291 if (directories == RECURSE_DIRECTORIES)
1293 out_file |= 2 * !no_filenames;
1296 fts_set (fts, ent, FTS_SKIP);
1300 if (!suppress_errors)
1301 error (0, 0, _("warning: %s: %s"), filename,
1302 _("recursive directory loop"));
1308 suppressible_error (filename, ent->fts_errno);
1313 if (devices == SKIP_DEVICES
1314 || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
1319 /* The file type is not already known. Get the file status
1320 before opening, since opening might have side effects
1322 int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
1323 if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
1325 suppressible_error (filename, errno);
1330 if (is_device_mode (st->st_mode))
1347 dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD
1350 return grepfile (dirdesc, ent->fts_accpath, follow, command_line);
1354 grepfile (int dirdesc, char const *name, int follow, int command_line)
1356 int desc = openat_safer (dirdesc, name, O_RDONLY | (follow ? 0 : O_NOFOLLOW));
1359 if (follow || (errno != ELOOP && errno != EMLINK))
1360 suppressible_error (filename, errno);
1363 return grepdesc (desc, command_line);
1367 grepdesc (int desc, int command_line)
1373 /* Get the file status, possibly for the second time. This catches
1374 a race condition if the directory entry changes after the
1375 directory entry is read and before the file is opened. For
1376 example, normally DESC is a directory only at the top level, but
1377 there is an exception if some other process substitutes a
1378 directory for a non-directory while 'grep' is running. */
1379 if (fstat (desc, &st) != 0)
1381 suppressible_error (filename, errno);
1385 if (desc != STDIN_FILENO && command_line
1386 && skipped_file (filename, 1, S_ISDIR (st.st_mode)))
1389 if (desc != STDIN_FILENO
1390 && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
1392 /* Traverse the directory starting with its full name, because
1393 unfortunately fts provides no way to traverse the directory
1394 starting from its file descriptor. */
1398 int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
1401 /* Close DESC now, to conserve file descriptors if the race
1402 condition occurs many times in a deep recursion. */
1403 if (close (desc) != 0)
1404 suppressible_error (filename, errno);
1406 fts_arg[0] = (char *) filename;
1408 fts = fts_open (fts_arg, opts, NULL);
1412 while ((ent = fts_read (fts)))
1413 status &= grepdirent (fts, ent, command_line);
1415 suppressible_error (filename, errno);
1416 if (fts_close (fts) != 0)
1417 suppressible_error (filename, errno);
1420 if (desc != STDIN_FILENO
1421 && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
1422 || ((devices == SKIP_DEVICES
1423 || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
1424 && is_device_mode (st.st_mode))))
1427 /* If there is a regular file on stdout and the current file refers
1428 to the same i-node, we have to report the problem and skip it.
1429 Otherwise when matching lines from some other input reach the
1430 disk before we open this file, we can end up reading and matching
1431 those lines and appending them to the file from which we're reading.
1432 Then we'd have what appears to be an infinite loop that'd terminate
1433 only upon filling the output file system or reaching a quota.
1434 However, there is no risk of an infinite loop if grep is generating
1435 no output, i.e., with --silent, --quiet, -q.
1436 Similarly, with any of these:
1437 --max-count=N (-m) (for N >= 2)
1438 --files-with-matches (-l)
1439 --files-without-match (-L)
1440 there is no risk of trouble.
1441 For --max-count=1, grep stops after printing the first match,
1442 so there is no risk of malfunction. But even --max-count=2, with
1443 input==output, while there is no risk of infloop, there is a race
1444 condition that could result in "alternate" output. */
1445 if (!out_quiet && list_files == 0 && 1 < max_count
1446 && S_ISREG (out_stat.st_mode) && out_stat.st_ino
1447 && SAME_INODE (st, out_stat))
1449 if (! suppress_errors)
1450 error (0, 0, _("input file %s is also the output"), quote (filename));
1455 #if defined SET_BINARY
1456 /* Set input to binary mode. Pipes are simulated with files
1457 on DOS, so this includes the case of "foo | grep bar". */
1462 count = grep (desc, &st);
1473 print_sep (SEP_CHAR_SELECTED);
1477 printf ("%" PRIdMAX "\n", count);
1481 if (list_files == 1 - 2 * status)
1484 fputc ('\n' & filename_mask, stdout);
1487 if (desc == STDIN_FILENO)
1489 off_t required_offset = outleft ? bufoffset : after_last_match;
1490 if (required_offset != bufoffset
1491 && lseek (desc, required_offset, SEEK_SET) < 0
1492 && S_ISREG (st.st_mode))
1493 suppressible_error (filename, errno);
1498 if (desc != STDIN_FILENO && close (desc) != 0)
1499 suppressible_error (filename, errno);
1504 grep_command_line_arg (char const *arg)
1506 if (STREQ (arg, "-"))
1508 filename = label ? label : _("(standard input)");
1509 return grepdesc (STDIN_FILENO, 1);
1514 return grepfile (AT_FDCWD, arg, 1, 1);
1518 _Noreturn void usage (int);
1524 fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"),
1526 fprintf (stderr, _("Try '%s --help' for more information.\n"),
1531 printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name);
1532 printf (_("Search for PATTERN in each FILE or standard input.\n"));
1533 printf (_("PATTERN is, by default, a basic regular expression (BRE).\n"));
1535 Example: %s -i 'hello world' menu.h main.c\n\
1537 Regexp selection and interpretation:\n"), program_name);
1539 -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\
1540 -F, --fixed-strings PATTERN is a set of newline-separated fixed strings\n\
1541 -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\
1542 -P, --perl-regexp PATTERN is a Perl regular expression\n"));
1543 /* -X is undocumented on purpose. */
1545 -e, --regexp=PATTERN use PATTERN for matching\n\
1546 -f, --file=FILE obtain PATTERN from FILE\n\
1547 -i, --ignore-case ignore case distinctions\n\
1548 -w, --word-regexp force PATTERN to match only whole words\n\
1549 -x, --line-regexp force PATTERN to match only whole lines\n\
1550 -z, --null-data a data line ends in 0 byte, not newline\n"));
1554 -s, --no-messages suppress error messages\n\
1555 -v, --invert-match select non-matching lines\n\
1556 -V, --version display version information and exit\n\
1557 --help display this help text and exit\n"));
1561 -m, --max-count=NUM stop after NUM matches\n\
1562 -b, --byte-offset print the byte offset with output lines\n\
1563 -n, --line-number print line number with output lines\n\
1564 --line-buffered flush output on every line\n\
1565 -H, --with-filename print the file name for each match\n\
1566 -h, --no-filename suppress the file name prefix on output\n\
1567 --label=LABEL use LABEL as the standard input file name prefix\n\
1570 -o, --only-matching show only the part of a line matching PATTERN\n\
1571 -q, --quiet, --silent suppress all normal output\n\
1572 --binary-files=TYPE assume that binary files are TYPE;\n\
1573 TYPE is 'binary', 'text', or 'without-match'\n\
1574 -a, --text equivalent to --binary-files=text\n\
1577 -I equivalent to --binary-files=without-match\n\
1578 -d, --directories=ACTION how to handle directories;\n\
1579 ACTION is 'read', 'recurse', or 'skip'\n\
1580 -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\
1581 ACTION is 'read' or 'skip'\n\
1582 -r, --recursive like --directories=recurse\n\
1583 -R, --dereference-recursive likewise, but follow all symlinks\n\
1586 --include=FILE_PATTERN search only files that match FILE_PATTERN\n\
1587 --exclude=FILE_PATTERN skip files and directories matching FILE_PATTERN\n\
1588 --exclude-from=FILE skip files matching any file pattern from FILE\n\
1589 --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\
1592 -L, --files-without-match print only names of FILEs containing no match\n\
1593 -l, --files-with-matches print only names of FILEs containing matches\n\
1594 -c, --count print only a count of matching lines per FILE\n\
1595 -T, --initial-tab make tabs line up (if needed)\n\
1596 -Z, --null print 0 byte after FILE name\n"));
1600 -B, --before-context=NUM print NUM lines of leading context\n\
1601 -A, --after-context=NUM print NUM lines of trailing context\n\
1602 -C, --context=NUM print NUM lines of output context\n\
1605 -NUM same as --context=NUM\n\
1607 --colour[=WHEN] use markers to highlight the matching strings;\n\
1608 WHEN is 'always', 'never', or 'auto'\n\
1609 -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\
1610 -u, --unix-byte-offsets report offsets as if CRs were not there\n\
1614 'egrep' means 'grep -E'. 'fgrep' means 'grep -F'.\n\
1615 Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n"));
1617 When FILE is -, read standard input. With no FILE, read . if a command-line\n\
1618 -r is given, - otherwise. If fewer than two FILEs are given, assume -h.\n\
1619 Exit status is 0 if any line is selected, 1 otherwise;\n\
1620 if any error occurs and -q is not given, the exit status is 2.\n"));
1621 printf (_("\nReport bugs to: %s\n"), PACKAGE_BUGREPORT);
1622 printf (_("GNU Grep home page: <%s>\n"),
1623 "http://www.gnu.org/software/grep/");
1624 fputs (_("General help using GNU software: <http://www.gnu.org/gethelp/>\n"),
1631 /* Pattern compilers and matchers. */
1634 Gcompile (char const *pattern, size_t size)
1636 GEAcompile (pattern, size, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES);
1640 Ecompile (char const *pattern, size_t size)
1642 GEAcompile (pattern, size, RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
1646 Acompile (char const *pattern, size_t size)
1648 GEAcompile (pattern, size, RE_SYNTAX_AWK);
1652 GAcompile (char const *pattern, size_t size)
1654 GEAcompile (pattern, size, RE_SYNTAX_GNU_AWK);
1658 PAcompile (char const *pattern, size_t size)
1660 GEAcompile (pattern, size, RE_SYNTAX_POSIX_AWK);
1665 char const name[16];
1666 compile_fp_t compile;
1667 execute_fp_t execute;
1669 static struct matcher const matchers[] = {
1670 { "grep", Gcompile, EGexecute },
1671 { "egrep", Ecompile, EGexecute },
1672 { "fgrep", Fcompile, Fexecute },
1673 { "awk", Acompile, EGexecute },
1674 { "gawk", GAcompile, EGexecute },
1675 { "posixawk", PAcompile, EGexecute },
1676 { "perl", Pcompile, Pexecute },
1680 /* Set the matcher to M if available. Exit in case of conflicts or if
1681 M is not available. */
1683 setmatcher (char const *m)
1685 struct matcher const *p;
1687 if (matcher && !STREQ (matcher, m))
1688 error (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
1690 for (p = matchers; p->compile; p++)
1691 if (STREQ (m, p->name))
1694 compile = p->compile;
1695 execute = p->execute;
1699 error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
1702 /* Find the white-space-separated options specified by OPTIONS, and
1703 using BUF to store copies of these options, set ARGV[0], ARGV[1],
1704 etc. to the option copies. Return the number N of options found.
1705 Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0]
1706 etc. Backslash can be used to escape whitespace (and backslashes). */
1708 prepend_args (char const *options, char *buf, char **argv)
1710 char const *o = options;
1716 while (c_isspace (to_uchar (*o)))
1725 if ((*b++ = *o++) == '\\' && *o)
1727 while (*o && ! c_isspace (to_uchar (*o)));
1733 /* Prepend the whitespace-separated options in OPTIONS to the argument
1734 vector of a main program with argument count *PARGC and argument
1735 vector *PARGV. Return the number of options prepended. */
1737 prepend_default_options (char const *options, int *pargc, char ***pargv)
1739 if (options && *options)
1741 char *buf = xmalloc (strlen (options) + 1);
1742 size_t prepended = prepend_args (options, buf, NULL);
1744 char *const *argv = *pargv;
1746 enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) };
1747 if (MAX_ARGS - argc < prepended)
1749 pp = xmalloc ((prepended + argc + 1) * sizeof *pp);
1750 *pargc = prepended + argc;
1753 pp += prepend_args (options, buf, pp);
1754 while ((*pp++ = *argv++))
1762 /* Get the next non-digit option from ARGC and ARGV.
1763 Return -1 if there are no more options.
1764 Process any digit options that were encountered on the way,
1765 and store the resulting integer into *DEFAULT_CONTEXT. */
1767 get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
1769 static int prev_digit_optind = -1;
1770 int this_digit_optind, was_digit;
1771 char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
1776 this_digit_optind = optind;
1779 opt = getopt_long (argc, (char **) argv, short_options,
1780 long_options, NULL);
1781 if ( ! ('0' <= opt && opt <= '9'))
1784 if (prev_digit_optind != this_digit_optind || !was_digit)
1786 /* Reset to start another context length argument. */
1791 /* Suppress trivial leading zeros, to avoid incorrect
1792 diagnostic on strings like 00000000000. */
1796 if (p == buf + sizeof buf - 4)
1798 /* Too many digits. Append "..." to make context_length_arg
1799 complain about "X...", where X contains the digits seen
1808 prev_digit_optind = this_digit_optind;
1809 this_digit_optind = optind;
1814 context_length_arg (buf, default_context);
1820 /* Parse GREP_COLORS. The default would look like:
1821 GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
1822 with boolean capabilities (ne and rv) unset (i.e., omitted).
1823 No character escaping is needed or supported. */
1825 parse_grep_colors (void)
1832 p = getenv ("GREP_COLORS"); /* Plural! */
1833 if (p == NULL || *p == '\0')
1836 /* Work off a writable copy. */
1841 /* From now on, be well-formed or you're gone. */
1843 if (*q == ':' || *q == '\0')
1846 struct color_cap const *cap;
1848 *q++ = '\0'; /* Terminate name or val. */
1849 /* Empty name without val (empty cap)
1850 * won't match and will be ignored. */
1851 for (cap = color_dict; cap->name; cap++)
1852 if (STREQ (cap->name, name))
1854 /* If name unknown, go on for forward compatibility. */
1855 if (cap->var && val)
1866 if (q == name || val)
1868 *q++ = '\0'; /* Terminate name. */
1869 val = q; /* Can be the empty string. */
1871 else if (val == NULL)
1872 q++; /* Accumulate name. */
1873 else if (*q == ';' || (*q >= '0' && *q <= '9'))
1874 q++; /* Accumulate val. Protect the terminal from being sent crap. */
1879 /* Return true if PAT (of length PATLEN) contains an encoding error. */
1881 contains_encoding_error (char const *pat, size_t patlen)
1883 mbstate_t mbs = { 0 };
1886 for (i = 0; i < patlen; i += charlen + (charlen == 0))
1888 charlen = mbrlen (pat + i, patlen - i, &mbs);
1889 if ((size_t) -2 <= charlen)
1895 /* Change a pattern for fgrep into grep. */
1897 fgrep_to_grep_pattern (size_t len, char const *keys,
1898 size_t *new_len, char **new_keys)
1900 char *p = *new_keys = xnmalloc (len + 1, 2);
1901 mbstate_t mb_state = { 0 };
1904 for (; len; keys += n, len -= n)
1907 n = mbrtowc (&wc, keys, len, &mb_state);
1914 p = mempcpy (p, keys, n);
1918 memset (&mb_state, 0, sizeof mb_state);
1922 p += strchr ("$*.[\\^", *keys) != NULL;
1931 *new_len = p - *new_keys;
1935 main (int argc, char **argv)
1938 size_t keycc, oldcc, keyalloc;
1941 int opt, status, prepended;
1942 int prev_optind, last_recursive;
1944 intmax_t default_context;
1946 exit_failure = EXIT_TROUBLE;
1947 initialize_main (&argc, &argv);
1948 set_program_name (argv[0]);
1949 program_name = argv[0];
1957 max_count = INTMAX_MAX;
1959 /* The value -1 means to use DEFAULT_CONTEXT. */
1960 out_after = out_before = -1;
1961 /* Default before/after context: changed by -C/-NUM options */
1962 default_context = -1;
1963 /* Changed by -o option */
1966 /* Internationalization. */
1967 #if defined HAVE_SETLOCALE
1968 setlocale (LC_ALL, "");
1970 #if defined ENABLE_NLS
1971 bindtextdomain (PACKAGE, LOCALEDIR);
1972 textdomain (PACKAGE);
1975 exit_failure = EXIT_TROUBLE;
1976 atexit (clean_up_stdout);
1979 prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
1980 compile = matchers[0].compile;
1981 execute = matchers[0].execute;
1983 while (prev_optind = optind,
1984 (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
1988 context_length_arg (optarg, &out_after);
1992 context_length_arg (optarg, &out_before);
1996 /* Set output match context, but let any explicit leading or
1997 trailing amount specified with -A or -B stand. */
1998 context_length_arg (optarg, &default_context);
2002 if (STREQ (optarg, "read"))
2003 devices = READ_DEVICES;
2004 else if (STREQ (optarg, "skip"))
2005 devices = SKIP_DEVICES;
2007 error (EXIT_TROUBLE, 0, _("unknown devices method"));
2011 setmatcher ("egrep");
2015 setmatcher ("fgrep");
2019 setmatcher ("perl");
2023 setmatcher ("grep");
2026 case 'X': /* undocumented on purpose */
2027 setmatcher (optarg);
2036 binary_files = WITHOUT_MATCH_BINARY_FILES;
2048 dos_unix_byte_offsets ();
2056 binary_files = TEXT_BINARY_FILES;
2068 directories = XARGMATCH ("--directories", optarg,
2069 directories_args, directories_types);
2070 if (directories == RECURSE_DIRECTORIES)
2071 last_recursive = prev_optind;
2075 cc = strlen (optarg);
2076 keys = xrealloc (keys, keycc + cc + 1);
2077 strcpy (&keys[keycc], optarg);
2079 keys[keycc++] = '\n';
2083 fp = STREQ (optarg, "-") ? stdin : fopen (optarg, O_TEXT ? "rt" : "r");
2085 error (EXIT_TROUBLE, errno, "%s", optarg);
2086 for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
2088 keys = xrealloc (keys, keyalloc);
2090 while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0)
2093 if (keycc == keyalloc - 1)
2094 keys = x2nrealloc (keys, &keyalloc, sizeof *keys);
2096 fread_errno = errno;
2098 error (EXIT_TROUBLE, fread_errno, "%s", optarg);
2101 /* Append final newline if file ended in non-newline. */
2102 if (oldcc != keycc && keys[keycc - 1] != '\n')
2103 keys[keycc++] = '\n';
2112 case 'y': /* For old-timers . . . */
2117 /* Like -l, except list files that don't contain matches.
2118 Inspired by the same option in Hume's gre. */
2127 switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
2130 case LONGINT_OVERFLOW:
2134 error (EXIT_TROUBLE, 0, _("invalid max count"));
2152 fts_options = basic_fts_options | FTS_LOGICAL;
2155 directories = RECURSE_DIRECTORIES;
2156 last_recursive = prev_optind;
2160 suppress_errors = 1;
2183 case BINARY_FILES_OPTION:
2184 if (STREQ (optarg, "binary"))
2185 binary_files = BINARY_BINARY_FILES;
2186 else if (STREQ (optarg, "text"))
2187 binary_files = TEXT_BINARY_FILES;
2188 else if (STREQ (optarg, "without-match"))
2189 binary_files = WITHOUT_MATCH_BINARY_FILES;
2191 error (EXIT_TROUBLE, 0, _("unknown binary-files type"));
2197 if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes")
2198 || !strcasecmp (optarg, "force"))
2200 else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg, "no")
2201 || !strcasecmp (optarg, "none"))
2203 else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg, "tty")
2204 || !strcasecmp (optarg, "if-tty"))
2213 case EXCLUDE_OPTION:
2214 case INCLUDE_OPTION:
2215 if (!excluded_patterns)
2216 excluded_patterns = new_exclude ();
2217 add_exclude (excluded_patterns, optarg,
2219 | (opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)));
2221 case EXCLUDE_FROM_OPTION:
2222 if (!excluded_patterns)
2223 excluded_patterns = new_exclude ();
2224 if (add_exclude_file (add_exclude, excluded_patterns, optarg,
2225 EXCLUDE_WILDCARDS, '\n') != 0)
2227 error (EXIT_TROUBLE, errno, "%s", optarg);
2231 case EXCLUDE_DIRECTORY_OPTION:
2232 if (!excluded_directory_patterns)
2233 excluded_directory_patterns = new_exclude ();
2234 strip_trailing_slashes (optarg);
2235 add_exclude (excluded_directory_patterns, optarg, EXCLUDE_WILDCARDS);
2238 case GROUP_SEPARATOR_OPTION:
2239 group_separator = optarg;
2242 case LINE_BUFFERED_OPTION:
2255 usage (EXIT_TROUBLE);
2260 if (color_option == 2)
2261 color_option = isatty (STDOUT_FILENO) && should_colorize ();
2264 /* POSIX says that -q overrides -l, which in turn overrides the
2265 other output options. */
2268 if (exit_on_match | list_files)
2273 out_quiet = count_matches | done_on_match;
2276 out_after = default_context;
2278 out_before = default_context;
2283 char *userval = getenv ("GREP_COLOR");
2284 if (userval != NULL && *userval != '\0')
2285 selected_match_color = context_match_color = userval;
2287 /* New GREP_COLORS has priority. */
2288 parse_grep_colors ();
2293 version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS,
2295 exit (EXIT_SUCCESS);
2299 usage (EXIT_SUCCESS);
2301 struct stat tmp_stat;
2302 if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode))
2303 out_stat = tmp_stat;
2309 /* No keys were specified (e.g. -f /dev/null). Match nothing. */
2311 match_lines = match_words = 0;
2314 /* Strip trailing newline. */
2317 else if (optind < argc)
2319 /* A copy must be made in case of an xrealloc() or free() later. */
2320 keycc = strlen (argv[optind]);
2321 keys = xmemdup (argv[optind++], keycc + 1);
2324 usage (EXIT_TROUBLE);
2326 /* If fgrep in a multibyte locale, then use grep if either
2327 (1) case is ignored (where grep is typically faster), or
2328 (2) the pattern has an encoding error (where fgrep might not work). */
2329 if (compile == Fcompile && MB_CUR_MAX > 1
2330 && (match_icase || contains_encoding_error (keys, keycc)))
2334 fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys);
2340 execute = EGexecute;
2344 build_mbclen_cache ();
2346 compile (keys, keycc);
2349 if ((argc - optind > 1 && !no_filenames) || with_filenames)
2353 /* Output is set to binary mode because we shouldn't convert
2354 NL to CR-LF pairs, especially when grepping binary files. */
2360 exit (EXIT_FAILURE);
2362 if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
2363 devices = READ_DEVICES;
2369 status &= grep_command_line_arg (argv[optind]);
2370 while (++optind < argc);
2372 else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive)
2374 /* Grep through ".", omitting leading "./" from diagnostics. */
2375 filename_prefix_len = 2;
2376 status = grep_command_line_arg (".");
2379 status = grep_command_line_arg ("-");
2381 /* We register via atexit() to test stdout. */
2382 exit (errseen ? EXIT_TROUBLE : status);
2384 /* vim:set shiftwidth=2: */