From: John Marino Date: Fri, 10 Oct 2014 22:51:31 +0000 (+0200) Subject: Merge branch 'vendor/GREP' X-Git-Tag: v4.1.0~66 X-Git-Url: https://gitweb.dragonflybsd.org/~tuxillo/dragonfly.git/commitdiff_plain/5be44d7264347781e3ed476d25d623ecb3181508?hp=f561435f64997452deb9afa6d9c58fae0db01175 Merge branch 'vendor/GREP' Conflicts: contrib/grep/src/main.c --- diff --git a/contrib/grep/README b/contrib/grep/README index 36b377b200..76f9770f66 100644 --- a/contrib/grep/README +++ b/contrib/grep/README @@ -1,4 +1,4 @@ - Copyright (C) 1992, 1997-2002, 2004-2012 Free Software Foundation, Inc. + Copyright (C) 1992, 1997-2002, 2004-2014 Free Software Foundation, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright diff --git a/contrib/grep/doc/fdl.texi b/contrib/grep/doc/fdl.texi index 2d26c9ec76..fd81125a46 100644 --- a/contrib/grep/doc/fdl.texi +++ b/contrib/grep/doc/fdl.texi @@ -5,7 +5,7 @@ @c hence no sectioning command or @node. @display -Copyright @copyright{} 2000-2002, 2007-2008, 2010-2012 Free Software +Copyright @copyright{} 2000-2002, 2007-2008, 2010-2014 Free Software Foundation, Inc. @uref{http://fsf.org/} diff --git a/contrib/grep/doc/grep.texi b/contrib/grep/doc/grep.texi index 0e519dd9d4..c9417e5196 100644 --- a/contrib/grep/doc/grep.texi +++ b/contrib/grep/doc/grep.texi @@ -13,10 +13,12 @@ @syncodeindex vr cp @c %**end of header +@documentencoding UTF-8 + @copying This manual is for @command{grep}, a pattern matching engine. -Copyright @copyright{} 1999-2002, 2005, 2008-2012 Free Software Foundation, +Copyright @copyright{} 1999--2002, 2005, 2008--2014 Free Software Foundation, Inc. @quotation @@ -193,7 +195,19 @@ The empty file contains zero patterns, and therefore matches nothing. @opindex -y @opindex --ignore-case @cindex case insensitive search -Ignore case distinctions in both the pattern and the input files. +Ignore case distinctions, so that characters that differ only in case +match each other. Although this is straightforward when letters +differ in case only via lowercase-uppercase pairs, the behavior is +unspecified in other situations. For example, uppercase ``S'' has an +unusual lowercase counterpart ``Å¿'' (Unicode character U+017F, LATIN +SMALL LETTER LONG S) in many locales, and it is unspecified whether +this unusual character matches ``S'' or ``s'' even though uppercasing +it yields ``S''. Another example: the lowercase German letter ``ß'' +(U+00DF, LATIN SMALL LETTER SHARP S) is normally capitalized as the +two-character string ``SS'' but it does not match ``SS'', and it might +not match the uppercase letter ``ẞ'' (U+1E9E, LATIN CAPITAL LETTER +SHARP S) even though lowercasing the latter yields the former. + @option{-y} is an obsolete synonym that is provided for compatibility. (@option{-i} is specified by POSIX.) @@ -518,14 +532,13 @@ Print @var{num} lines of leading and trailing output context. @opindex --group-separator @cindex group separator When @option{-A}, @option{-B} or @option{-C} are in use, -print @var{string} instead of @option{--} around disjoint groups -of lines. +print @var{string} instead of @option{--} between groups of lines. @item --no-group-separator @opindex --group-separator @cindex group separator When @option{-A}, @option{-B} or @option{-C} are in use, -print disjoint groups of lines adjacent to each other. +do not print a separator between groups of lines. @end table @@ -541,26 +554,25 @@ between prefix fields and actual line content. Context (i.e., non-matching) lines use @samp{-} instead. @item -When no context is specified, +When context is not specified, matching lines are simply output one right after another. @item -When nonzero context is specified, +When context is specified, lines that are adjacent in the input form a group and are output one right after another, while -a separator appears by default between disjoint groups on a line -of its own and without any prefix. +by default a separator appears between non-adjacent groups. @item The default separator -is @samp{--}, however whether to include it and its appearance +is a @samp{--} line; its presence and appearance can be changed with the options above. @item Each group may contain several matching lines when they are close enough to each other -that two otherwise adjacent but divided groups connect -and can just merge into a single contiguous one. +that two adjacent groups connect and can merge into a single +contiguous one. @end itemize @node File and Directory Selection @@ -656,8 +668,8 @@ under @option{--exclude}). @item --exclude-dir=@var{dir} @opindex --exclude-dir @cindex exclude directories -Exclude directories matching the pattern @var{dir} from recursive -directory searches. +Skip any directory whose name matches the pattern @var{dir}, ignoring +any redundant trailing slashes in @var{dir}. @item -I Process a binary file as if it did not contain matching data; @@ -706,15 +718,6 @@ directory, recursively, following all symbolic links. Use line buffering on output. This can cause a performance penalty. -@item --mmap -@opindex --mmap -@cindex memory mapped input -This option is deprecated and now elicits a warning, but is otherwise a no-op. -It used to make @command{grep} read -input with the @code{mmap} system call, instead of the default @code{read} -system call. On modern systems, @code{mmap} would rarely if ever yield -better performance. - @item -U @itemx --binary @opindex -U @@ -960,8 +963,8 @@ They are omitted (i.e., false) by default and become true when specified. @cindex national language support @cindex NLS These variables specify the locale for the @code{LC_COLLATE} category, -which determines the collating sequence -used to interpret range expressions like @samp{[a-z]}. +which might affect how range expressions like @samp{[a-z]} are +interpreted. @item LC_ALL @itemx LC_CTYPE @@ -1180,6 +1183,7 @@ The preceding item is matched @var{n} or more times. @cindex braces, first argument omitted @cindex match expression at most @var{m} times The preceding item is matched at most @var{m} times. +This is a GNU extension. @item @{@var{n},@var{m}@} @opindex @{@var{n},@var{m}@} @@ -1222,14 +1226,13 @@ For example, the regular expression Within a bracket expression, a @dfn{range expression} consists of two characters separated by a hyphen. It matches any single character that -sorts between the two characters, inclusive, using the locale's -collating sequence and character set. -For example, in the default C -locale, @samp{[a-d]} is equivalent to @samp{[abcd]}. -Many locales sort -characters in dictionary order, and in these locales @samp{[a-d]} is -typically not equivalent to @samp{[abcd]}; -it might be equivalent to @samp{[aBbCcDd]}, for example. +sorts between the two characters, inclusive. +In the default C locale, the sorting sequence is the native character +order; for example, @samp{[a-d]} is equivalent to @samp{[abcd]}. +In other locales, the sorting sequence is not specified, and +@samp{[a-d]} might be equivalent to @samp{[abcd]} or to +@samp{[aBbCcDd]}, or it might fail to match any character, or the set of +characters that it matches might even be erratic. To obtain the traditional interpretation of bracket expressions, you can use the @samp{C} locale by setting the @env{LC_ALL} environment variable to the value @samp{C}. @@ -1714,25 +1717,19 @@ How can I match across lines? Standard grep cannot do this, as it is fundamentally line-based. Therefore, merely using the @code{[:space:]} character class does not -match newlines in the way you might expect. However, if your grep is -compiled with Perl patterns enabled, the Perl @samp{s} -modifier (which makes @code{.} match newlines) can be used: - -@example -printf 'foo\nbar\n' | grep -P '(?s)foo.*?bar' -@end example +match newlines in the way you might expect. With the GNU @command{grep} option @code{-z} (@pxref{File and Directory Selection}), the input is terminated by null bytes. Thus, -you can match newlines in the input, but the output will be the whole -file, so this is really only useful to determine if the pattern is -present: +you can match newlines in the input, but typically if there is a match +the entire input is output, so this usage is often combined with +output-suppressing options like @option{-q}, e.g.: @example printf 'foo\nbar\n' | grep -z -q 'foo[[:space:]]\+bar' @end example -Failing either of those options, you need to transform the input +If this does not suffice, you can transform the input before giving it to @command{grep}, or turn to @command{awk}, @command{sed}, @command{perl}, or many other utilities that are designed to operate across lines. diff --git a/contrib/grep/doc/version.texi b/contrib/grep/doc/version.texi index efdb0e5f89..0619e31ed0 100644 --- a/contrib/grep/doc/version.texi +++ b/contrib/grep/doc/version.texi @@ -1,4 +1,4 @@ -@set UPDATED 15 May 2012 -@set UPDATED-MONTH May 2012 -@set EDITION 2.14 -@set VERSION 2.14 +@set UPDATED 29 May 2014 +@set UPDATED-MONTH May 2014 +@set EDITION 2.20 +@set VERSION 2.20 diff --git a/contrib/grep/lib/argmatch.c b/contrib/grep/lib/argmatch.c index a713364221..f56ab55f72 100644 --- a/contrib/grep/lib/argmatch.c +++ b/contrib/grep/lib/argmatch.c @@ -1,6 +1,6 @@ /* argmatch.c -- find a match for a string in an array - Copyright (C) 1990, 1998-1999, 2001-2007, 2009-2012 Free Software + Copyright (C) 1990, 1998-1999, 2001-2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/argmatch.h b/contrib/grep/lib/argmatch.h index 7e88696a4a..09645db657 100644 --- a/contrib/grep/lib/argmatch.h +++ b/contrib/grep/lib/argmatch.h @@ -1,6 +1,6 @@ /* argmatch.h -- definitions and prototypes for argmatch.c - Copyright (C) 1990, 1998-1999, 2001-2002, 2004-2005, 2009-2012 Free Software + Copyright (C) 1990, 1998-1999, 2001-2002, 2004-2005, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -26,6 +26,10 @@ # include "verify.h" +#ifdef __cplusplus +extern "C" { +#endif + # define ARRAY_CARDINALITY(Array) (sizeof (Array) / sizeof *(Array)) /* Assert there are as many real arguments as there are values @@ -100,4 +104,8 @@ char const *argmatch_to_argument (char const *value, argmatch_to_argument (Value, Arglist, \ (char const *) (Vallist), sizeof *(Vallist)) +#ifdef __cplusplus +} +#endif + #endif /* ARGMATCH_H_ */ diff --git a/contrib/grep/lib/at-func.c b/contrib/grep/lib/at-func.c index b25bc4d819..c8ee073d11 100644 --- a/contrib/grep/lib/at-func.c +++ b/contrib/grep/lib/at-func.c @@ -1,5 +1,5 @@ /* Define at-style functions like fstatat, unlinkat, fchownat, etc. - Copyright (C) 2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,9 +17,17 @@ /* written by Jim Meyering */ #include "dosname.h" /* solely for definition of IS_ABSOLUTE_FILE_NAME */ -#include "openat.h" -#include "openat-priv.h" -#include "save-cwd.h" + +#ifdef GNULIB_SUPPORT_ONLY_AT_FDCWD +# include +# ifndef ENOTSUP +# define ENOTSUP EINVAL +# endif +#else +# include "openat.h" +# include "openat-priv.h" +# include "save-cwd.h" +#endif #ifdef AT_FUNC_USE_F1_COND # define CALL_FUNC(F) \ @@ -61,17 +69,22 @@ FUNC_RESULT AT_FUNC_NAME (int fd, char const *file AT_FUNC_POST_FILE_PARAM_DECLS) { + VALIDATE_FLAG (flag); + + if (fd == AT_FDCWD || IS_ABSOLUTE_FILE_NAME (file)) + return CALL_FUNC (file); + +#ifdef GNULIB_SUPPORT_ONLY_AT_FDCWD + errno = ENOTSUP; + return FUNC_FAIL; +#else + { /* Be careful to choose names unlikely to conflict with AT_FUNC_POST_FILE_PARAM_DECLS. */ struct saved_cwd saved_cwd; int saved_errno; FUNC_RESULT err; - VALIDATE_FLAG (flag); - - if (fd == AT_FDCWD || IS_ABSOLUTE_FILE_NAME (file)) - return CALL_FUNC (file); - { char proc_buf[OPENAT_BUFFER_SIZE]; char *proc_file = openat_proc_name (proc_buf, fd, file); @@ -125,6 +138,8 @@ AT_FUNC_NAME (int fd, char const *file AT_FUNC_POST_FILE_PARAM_DECLS) if (saved_errno) errno = saved_errno; return err; + } +#endif } #undef CALL_FUNC #undef FUNC_RESULT diff --git a/contrib/grep/lib/basename-lgpl.c b/contrib/grep/lib/basename-lgpl.c index 43ef8c211b..cec50e935a 100644 --- a/contrib/grep/lib/basename-lgpl.c +++ b/contrib/grep/lib/basename-lgpl.c @@ -1,6 +1,6 @@ /* basename.c -- return the last element in a file name - Copyright (C) 1990, 1998-2001, 2003-2006, 2009-2012 Free Software + Copyright (C) 1990, 1998-2001, 2003-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/binary-io.c b/contrib/grep/lib/binary-io.c new file mode 100644 index 0000000000..8bbdb44d12 --- /dev/null +++ b/contrib/grep/lib/binary-io.c @@ -0,0 +1,3 @@ +#include +#define BINARY_IO_INLINE _GL_EXTERN_INLINE +#include "binary-io.h" diff --git a/contrib/grep/lib/binary-io.h b/contrib/grep/lib/binary-io.h index a33e32aee2..7928f8c10c 100644 --- a/contrib/grep/lib/binary-io.h +++ b/contrib/grep/lib/binary-io.h @@ -1,5 +1,5 @@ /* Binary mode I/O. - Copyright (C) 2001, 2003, 2005, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2001, 2003, 2005, 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +25,14 @@ so we include it here first. */ #include +#ifndef _GL_INLINE_HEADER_BEGIN + #error "Please include config.h first." +#endif +_GL_INLINE_HEADER_BEGIN +#ifndef BINARY_IO_INLINE +# define BINARY_IO_INLINE _GL_INLINE +#endif + /* set_binary_mode (fd, mode) sets the binary/text I/O mode of file descriptor fd to the given mode (must be O_BINARY or O_TEXT) and returns the previous mode. */ @@ -39,9 +47,9 @@ # endif #else /* On reasonable systems, binary I/O is the only choice. */ - /* Use an inline function rather than a macro, to avoid gcc warnings + /* Use a function rather than a macro, to avoid gcc warnings "warning: statement with no effect". */ -static inline int +BINARY_IO_INLINE int set_binary_mode (int fd, int mode) { (void) fd; @@ -62,4 +70,6 @@ set_binary_mode (int fd, int mode) # define SET_BINARY(fd) ((void) set_binary_mode (fd, O_BINARY)) #endif +_GL_INLINE_HEADER_END + #endif /* _BINARY_H */ diff --git a/contrib/grep/lib/bitrotate.c b/contrib/grep/lib/bitrotate.c new file mode 100644 index 0000000000..a8f602889d --- /dev/null +++ b/contrib/grep/lib/bitrotate.c @@ -0,0 +1,3 @@ +#include +#define BITROTATE_INLINE _GL_EXTERN_INLINE +#include "bitrotate.h" diff --git a/contrib/grep/lib/bitrotate.h b/contrib/grep/lib/bitrotate.h index 5fb8f9be47..9ec0b47b72 100644 --- a/contrib/grep/lib/bitrotate.h +++ b/contrib/grep/lib/bitrotate.h @@ -1,5 +1,5 @@ /* bitrotate.h - Rotate bits in integers - Copyright (C) 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,11 +23,19 @@ #include #include +#ifndef _GL_INLINE_HEADER_BEGIN + #error "Please include config.h first." +#endif +_GL_INLINE_HEADER_BEGIN +#ifndef BITROTATE_INLINE +# define BITROTATE_INLINE _GL_INLINE +#endif + #ifdef UINT64_MAX /* Given an unsigned 64-bit argument X, return the value corresponding to rotating the bits N steps to the left. N must be between 1 and 63 inclusive. */ -static inline uint64_t +BITROTATE_INLINE uint64_t rotl64 (uint64_t x, int n) { return ((x << n) | (x >> (64 - n))) & UINT64_MAX; @@ -36,7 +44,7 @@ rotl64 (uint64_t x, int n) /* Given an unsigned 64-bit argument X, return the value corresponding to rotating the bits N steps to the right. N must be between 1 to 63 inclusive.*/ -static inline uint64_t +BITROTATE_INLINE uint64_t rotr64 (uint64_t x, int n) { return ((x >> n) | (x << (64 - n))) & UINT64_MAX; @@ -46,7 +54,7 @@ rotr64 (uint64_t x, int n) /* Given an unsigned 32-bit argument X, return the value corresponding to rotating the bits N steps to the left. N must be between 1 and 31 inclusive. */ -static inline uint32_t +BITROTATE_INLINE uint32_t rotl32 (uint32_t x, int n) { return ((x << n) | (x >> (32 - n))) & UINT32_MAX; @@ -55,7 +63,7 @@ rotl32 (uint32_t x, int n) /* Given an unsigned 32-bit argument X, return the value corresponding to rotating the bits N steps to the right. N must be between 1 to 31 inclusive.*/ -static inline uint32_t +BITROTATE_INLINE uint32_t rotr32 (uint32_t x, int n) { return ((x >> n) | (x << (32 - n))) & UINT32_MAX; @@ -64,7 +72,7 @@ rotr32 (uint32_t x, int n) /* Given a size_t argument X, return the value corresponding to rotating the bits N steps to the left. N must be between 1 and (CHAR_BIT * sizeof (size_t) - 1) inclusive. */ -static inline size_t +BITROTATE_INLINE size_t rotl_sz (size_t x, int n) { return ((x << n) | (x >> ((CHAR_BIT * sizeof x) - n))) & SIZE_MAX; @@ -73,7 +81,7 @@ rotl_sz (size_t x, int n) /* Given a size_t argument X, return the value corresponding to rotating the bits N steps to the right. N must be between 1 to (CHAR_BIT * sizeof (size_t) - 1) inclusive. */ -static inline size_t +BITROTATE_INLINE size_t rotr_sz (size_t x, int n) { return ((x >> n) | (x << ((CHAR_BIT * sizeof x) - n))) & SIZE_MAX; @@ -84,7 +92,7 @@ rotr_sz (size_t x, int n) 15 inclusive, but on most relevant targets N can also be 0 and 16 because 'int' is at least 32 bits and the arguments must widen before shifting. */ -static inline uint16_t +BITROTATE_INLINE uint16_t rotl16 (uint16_t x, int n) { return ((x << n) | (x >> (16 - n))) & UINT16_MAX; @@ -95,7 +103,7 @@ rotl16 (uint16_t x, int n) inclusive, but on most relevant targets N can also be 0 and 16 because 'int' is at least 32 bits and the arguments must widen before shifting. */ -static inline uint16_t +BITROTATE_INLINE uint16_t rotr16 (uint16_t x, int n) { return ((x >> n) | (x << (16 - n))) & UINT16_MAX; @@ -106,7 +114,7 @@ rotr16 (uint16_t x, int n) inclusive, but on most relevant targets N can also be 0 and 8 because 'int' is at least 32 bits and the arguments must widen before shifting. */ -static inline uint8_t +BITROTATE_INLINE uint8_t rotl8 (uint8_t x, int n) { return ((x << n) | (x >> (8 - n))) & UINT8_MAX; @@ -117,10 +125,12 @@ rotl8 (uint8_t x, int n) inclusive, but on most relevant targets N can also be 0 and 8 because 'int' is at least 32 bits and the arguments must widen before shifting. */ -static inline uint8_t +BITROTATE_INLINE uint8_t rotr8 (uint8_t x, int n) { return ((x >> n) | (x << (8 - n))) & UINT8_MAX; } +_GL_INLINE_HEADER_END + #endif /* _GL_BITROTATE_H */ diff --git a/contrib/grep/lib/btowc.c b/contrib/grep/lib/btowc.c index cec9eca564..b06ca2e4a5 100644 --- a/contrib/grep/lib/btowc.c +++ b/contrib/grep/lib/btowc.c @@ -1,5 +1,5 @@ /* Convert unibyte character to wide character. - Copyright (C) 2008, 2010-2012 Free Software Foundation, Inc. + Copyright (C) 2008, 2010-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2008. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/c-ctype.c b/contrib/grep/lib/c-ctype.c index 952d7a851f..48c6478378 100644 --- a/contrib/grep/lib/c-ctype.c +++ b/contrib/grep/lib/c-ctype.c @@ -1,6 +1,6 @@ /* Character handling in C locale. - Copyright 2000-2003, 2006, 2009-2012 Free Software Foundation, Inc. + Copyright 2000-2003, 2006, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/c-ctype.h b/contrib/grep/lib/c-ctype.h index 0b31309e96..b465277619 100644 --- a/contrib/grep/lib/c-ctype.h +++ b/contrib/grep/lib/c-ctype.h @@ -5,7 +5,7 @@ functions' behaviour depends on the current locale set via setlocale. - Copyright (C) 2000-2003, 2006, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2000-2003, 2006, 2008-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -136,7 +136,8 @@ extern int c_tolower (int c) _GL_ATTRIBUTE_CONST; extern int c_toupper (int c) _GL_ATTRIBUTE_CONST; -#if defined __GNUC__ && defined __OPTIMIZE__ && !defined __OPTIMIZE_SIZE__ && !defined NO_C_CTYPE_MACROS +#if (defined __GNUC__ && !defined __STRICT_ANSI__ && defined __OPTIMIZE__ \ + && !defined __OPTIMIZE_SIZE__ && !defined NO_C_CTYPE_MACROS) /* ASCII optimizations. */ diff --git a/contrib/grep/lib/c-strcase.h b/contrib/grep/lib/c-strcase.h index fdef2385ea..8e660441d0 100644 --- a/contrib/grep/lib/c-strcase.h +++ b/contrib/grep/lib/c-strcase.h @@ -1,5 +1,5 @@ /* Case-insensitive string comparison functions in C locale. - Copyright (C) 1995-1996, 2001, 2003, 2005, 2009-2012 Free Software + Copyright (C) 1995-1996, 2001, 2003, 2005, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify diff --git a/contrib/grep/lib/c-strcasecmp.c b/contrib/grep/lib/c-strcasecmp.c index d8332caf83..b9b26a4ce8 100644 --- a/contrib/grep/lib/c-strcasecmp.c +++ b/contrib/grep/lib/c-strcasecmp.c @@ -1,5 +1,5 @@ /* c-strcasecmp.c -- case insensitive string comparator in C locale - Copyright (C) 1998-1999, 2005-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1998-1999, 2005-2006, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/c-strcaseeq.h b/contrib/grep/lib/c-strcaseeq.h index 5c4bdffecb..5139a30b9d 100644 --- a/contrib/grep/lib/c-strcaseeq.h +++ b/contrib/grep/lib/c-strcaseeq.h @@ -1,5 +1,5 @@ /* Optimized case-insensitive string comparison in C locale. - Copyright (C) 2001-2002, 2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published diff --git a/contrib/grep/lib/c-strncasecmp.c b/contrib/grep/lib/c-strncasecmp.c index 47fb5fdb67..972eb80079 100644 --- a/contrib/grep/lib/c-strncasecmp.c +++ b/contrib/grep/lib/c-strncasecmp.c @@ -1,5 +1,5 @@ /* c-strncasecmp.c -- case insensitive string comparator in C locale - Copyright (C) 1998-1999, 2005-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1998-1999, 2005-2006, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/chdir-long.c b/contrib/grep/lib/chdir-long.c index 599d14157a..5b1b18fc35 100644 --- a/contrib/grep/lib/chdir-long.c +++ b/contrib/grep/lib/chdir-long.c @@ -1,5 +1,5 @@ /* provide a chdir function that tries not to fail due to ENAMETOOLONG - Copyright (C) 2004-2012 Free Software Foundation, Inc. + Copyright (C) 2004-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -42,19 +42,19 @@ struct cd_buf int fd; }; -static inline void +static void cdb_init (struct cd_buf *cdb) { cdb->fd = AT_FDCWD; } -static inline int +static int cdb_fchdir (struct cd_buf const *cdb) { return fchdir (cdb->fd); } -static inline void +static void cdb_free (struct cd_buf const *cdb) { if (0 <= cdb->fd) @@ -83,7 +83,7 @@ cdb_advance_fd (struct cd_buf *cdb, char const *dir) } /* Return a pointer to the first non-slash in S. */ -static inline char * _GL_ATTRIBUTE_PURE +static char * _GL_ATTRIBUTE_PURE find_non_slash (char const *s) { size_t n_slash = strspn (s, "/"); diff --git a/contrib/grep/lib/chdir-long.h b/contrib/grep/lib/chdir-long.h index 51db5a8041..4b5b940405 100644 --- a/contrib/grep/lib/chdir-long.h +++ b/contrib/grep/lib/chdir-long.h @@ -1,5 +1,5 @@ /* provide a chdir function that tries not to fail due to ENAMETOOLONG - Copyright (C) 2004-2005, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2004-2005, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/cloexec.c b/contrib/grep/lib/cloexec.c index 7919e8677c..48a0c97357 100644 --- a/contrib/grep/lib/cloexec.c +++ b/contrib/grep/lib/cloexec.c @@ -1,6 +1,6 @@ /* closexec.c - set or clear the close-on-exec descriptor flag - Copyright (C) 1991, 2004-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1991, 2004-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/cloexec.h b/contrib/grep/lib/cloexec.h index 92e9f81e5e..6c442dca14 100644 --- a/contrib/grep/lib/cloexec.h +++ b/contrib/grep/lib/cloexec.h @@ -1,6 +1,6 @@ /* closexec.c - set or clear the close-on-exec descriptor flag - Copyright (C) 2004, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2004, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/close-stream.c b/contrib/grep/lib/close-stream.c index 04fa5ece09..87921d4293 100644 --- a/contrib/grep/lib/close-stream.c +++ b/contrib/grep/lib/close-stream.c @@ -1,6 +1,6 @@ /* Close a stream, with nicer error checking than fclose's. - Copyright (C) 1998-2002, 2004, 2006-2012 Free Software Foundation, Inc. + Copyright (C) 1998-2002, 2004, 2006-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/close.c b/contrib/grep/lib/close.c index 4b7accbc96..d7dcb3f157 100644 --- a/contrib/grep/lib/close.c +++ b/contrib/grep/lib/close.c @@ -1,5 +1,5 @@ /* close replacement. - Copyright (C) 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/closedir.c b/contrib/grep/lib/closedir.c index df31e86b4e..940c6f9f39 100644 --- a/contrib/grep/lib/closedir.c +++ b/contrib/grep/lib/closedir.c @@ -1,5 +1,5 @@ /* Stop reading the entries of a directory. - Copyright (C) 2006-2012 Free Software Foundation, Inc. + Copyright (C) 2006-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/closeout.c b/contrib/grep/lib/closeout.c index eea402473c..674db787ea 100644 --- a/contrib/grep/lib/closeout.c +++ b/contrib/grep/lib/closeout.c @@ -1,6 +1,6 @@ /* Close standard output and standard error, exiting with a diagnostic on error. - Copyright (C) 1998-2002, 2004, 2006, 2008-2012 Free Software Foundation, + Copyright (C) 1998-2002, 2004, 2006, 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/closeout.h b/contrib/grep/lib/closeout.h index 5310b28627..28d80ea5f2 100644 --- a/contrib/grep/lib/closeout.h +++ b/contrib/grep/lib/closeout.h @@ -1,6 +1,6 @@ /* Close standard output and standard error. - Copyright (C) 1998, 2000, 2003-2004, 2006, 2008-2012 Free Software + Copyright (C) 1998, 2000, 2003-2004, 2006, 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/colorize-posix.c b/contrib/grep/lib/colorize-posix.c index 116bbb2c40..5d1516da7d 100644 --- a/contrib/grep/lib/colorize-posix.c +++ b/contrib/grep/lib/colorize-posix.c @@ -1,5 +1,5 @@ /* Output colorization. - Copyright 2011-2012 Free Software Foundation, Inc. + Copyright 2011-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/colorize-w32.c b/contrib/grep/lib/colorize-w32.c index 3fcd317c65..402f0c6844 100644 --- a/contrib/grep/lib/colorize-w32.c +++ b/contrib/grep/lib/colorize-w32.c @@ -1,5 +1,5 @@ /* Output colorization on MS-Windows. - Copyright 2011-2012 Free Software Foundation, Inc. + Copyright 2011-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/colorize.c b/contrib/grep/lib/colorize.c deleted file mode 100644 index 116bbb2c40..0000000000 --- a/contrib/grep/lib/colorize.c +++ /dev/null @@ -1,58 +0,0 @@ -/* Output colorization. - Copyright 2011-2012 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA - 02110-1301, USA. */ - -/* Without this pragma, gcc 4.7.0 20120102 suggests that the - init_colorize function might be candidate for attribute 'const' */ -#if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__ -# pragma GCC diagnostic ignored "-Wsuggest-attribute=const" -#endif - -#include - -#include "colorize.h" - -#include -#include -#include -#include - -/* Return non-zero if we should highlight matches in output to file - descriptor FD. */ -int -should_colorize (void) -{ - char const *t = getenv ("TERM"); - return t && strcmp (t, "dumb") != 0; -} - -void init_colorize (void) { } - -/* Start a colorized text attribute on stdout using the SGR_START - format; the attribute is specified by SGR_SEQ. */ -void -print_start_colorize (char const *sgr_start, char const *sgr_seq) -{ - printf (sgr_start, sgr_seq); -} - -/* Restore the normal text attribute using the SGR_END string. */ -void -print_end_colorize (char const *sgr_end) -{ - fputs (sgr_end, stdout); -} diff --git a/contrib/grep/lib/colorize.h b/contrib/grep/lib/colorize.h index 23687d080b..26956f9939 100644 --- a/contrib/grep/lib/colorize.h +++ b/contrib/grep/lib/colorize.h @@ -1,6 +1,6 @@ /* Output colorization. - Copyright 2011-2012 Free Software Foundation, Inc. + Copyright 2011-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) diff --git a/contrib/grep/lib/config.charset b/contrib/grep/lib/config.charset index 0a3b343add..289bc44ea9 100644 --- a/contrib/grep/lib/config.charset +++ b/contrib/grep/lib/config.charset @@ -1,7 +1,7 @@ #! /bin/sh # Output a system dependent table of character encoding aliases. # -# Copyright (C) 2000-2004, 2006-2012 Free Software Foundation, Inc. +# Copyright (C) 2000-2004, 2006-2014 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/creat-safer.c b/contrib/grep/lib/creat-safer.c index f0f9a22c27..984857b879 100644 --- a/contrib/grep/lib/creat-safer.c +++ b/contrib/grep/lib/creat-safer.c @@ -1,6 +1,6 @@ /* Invoke creat, but avoid some glitches. - Copyright (C) 2005-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/cycle-check.c b/contrib/grep/lib/cycle-check.c index 011cae95dc..f7b3d07885 100644 --- a/contrib/grep/lib/cycle-check.c +++ b/contrib/grep/lib/cycle-check.c @@ -1,6 +1,6 @@ /* help detect directory cycles efficiently - Copyright (C) 2003-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2003-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,7 +33,7 @@ /* Return true if I is a power of 2, or is zero. */ -static inline bool +static bool is_zero_or_power_of_two (uintmax_t i) { return (i & (i - 1)) == 0; diff --git a/contrib/grep/lib/cycle-check.h b/contrib/grep/lib/cycle-check.h index 88864768a5..7649606a37 100644 --- a/contrib/grep/lib/cycle-check.h +++ b/contrib/grep/lib/cycle-check.h @@ -1,6 +1,6 @@ /* help detect directory cycles efficiently - Copyright (C) 2003-2004, 2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2003-2004, 2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/dirent--.h b/contrib/grep/lib/dirent--.h index 1b75793867..d7c51bac2a 100644 --- a/contrib/grep/lib/dirent--.h +++ b/contrib/grep/lib/dirent--.h @@ -1,6 +1,6 @@ /* Like dirent.h, but redefine some names to avoid glitches. - Copyright (C) 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/dirent-private.h b/contrib/grep/lib/dirent-private.h index b60203c4b9..83531a16af 100644 --- a/contrib/grep/lib/dirent-private.h +++ b/contrib/grep/lib/dirent-private.h @@ -1,5 +1,5 @@ /* Private details of the DIR type. - Copyright (C) 2011-2012 Free Software Foundation, Inc. + Copyright (C) 2011-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/dirent-safer.h b/contrib/grep/lib/dirent-safer.h index 861ea93a44..8cc75d8d0f 100644 --- a/contrib/grep/lib/dirent-safer.h +++ b/contrib/grep/lib/dirent-safer.h @@ -1,6 +1,6 @@ /* Invoke dirent-like functions, but avoid some glitches. - Copyright (C) 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/dirfd.c b/contrib/grep/lib/dirfd.c index 2f4d424f48..4d37928cb2 100644 --- a/contrib/grep/lib/dirfd.c +++ b/contrib/grep/lib/dirfd.c @@ -1,6 +1,6 @@ /* dirfd.c -- return the file descriptor associated with an open DIR* - Copyright (C) 2001, 2006, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2001, 2006, 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/dirname-lgpl.c b/contrib/grep/lib/dirname-lgpl.c index 2895cbcb28..d4a69730e5 100644 --- a/contrib/grep/lib/dirname-lgpl.c +++ b/contrib/grep/lib/dirname-lgpl.c @@ -1,6 +1,6 @@ /* dirname.c -- return all but the last element in a file name - Copyright (C) 1990, 1998, 2000-2001, 2003-2006, 2009-2012 Free Software + Copyright (C) 1990, 1998, 2000-2001, 2003-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/dirname.h b/contrib/grep/lib/dirname.h index 51a685c3a4..a1b7009326 100644 --- a/contrib/grep/lib/dirname.h +++ b/contrib/grep/lib/dirname.h @@ -1,6 +1,6 @@ /* Take file names apart into directory and base names. - Copyright (C) 1998, 2001, 2003-2006, 2009-2012 Free Software Foundation, + Copyright (C) 1998, 2001, 2003-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/dosname.h b/contrib/grep/lib/dosname.h index 0468ce4dc8..b92adfac2e 100644 --- a/contrib/grep/lib/dosname.h +++ b/contrib/grep/lib/dosname.h @@ -1,6 +1,6 @@ /* File names on MS-DOS/Windows systems. - Copyright (C) 2000-2001, 2004-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2000-2001, 2004-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/dup-safer.c b/contrib/grep/lib/dup-safer.c index 0f5d3be3f7..8df3cdab10 100644 --- a/contrib/grep/lib/dup-safer.c +++ b/contrib/grep/lib/dup-safer.c @@ -1,6 +1,6 @@ /* Invoke dup, but avoid some glitches. - Copyright (C) 2001, 2004-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001, 2004-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/dup.c b/contrib/grep/lib/dup.c index 0a0b69ed4e..c813df626a 100644 --- a/contrib/grep/lib/dup.c +++ b/contrib/grep/lib/dup.c @@ -1,6 +1,6 @@ /* Duplicate an open file descriptor. - Copyright (C) 2011-2012 Free Software Foundation, Inc. + Copyright (C) 2011-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,7 +27,7 @@ #undef dup #if HAVE_MSVC_INVALID_PARAMETER_HANDLER -static inline int +static int dup_nothrow (int fd) { int result; diff --git a/contrib/grep/lib/dup2.c b/contrib/grep/lib/dup2.c index f6d0f1c73c..7de6805499 100644 --- a/contrib/grep/lib/dup2.c +++ b/contrib/grep/lib/dup2.c @@ -1,6 +1,6 @@ /* Duplicate an open file descriptor to a specified file descriptor. - Copyright (C) 1999, 2004-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1999, 2004-2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -95,7 +95,14 @@ rpl_dup2 (int fd, int desired_fd) # ifdef F_GETFL /* On Linux kernels 2.6.26-2.6.29, dup2 (fd, fd) returns -EBADF. On Cygwin 1.5.x, dup2 (1, 1) returns 0. + On Cygwin 1.7.17, dup2 (1, -1) dumps core. + On Cygwin 1.7.25, dup2 (1, 256) can dump core. On Haiku, dup2 (fd, fd) mistakenly clears FD_CLOEXEC. */ +# if HAVE_SETDTABLESIZE + setdtablesize (desired_fd + 1); +# endif + if (desired_fd < 0) + fd = desired_fd; if (fd == desired_fd) return fcntl (fd, F_GETFL) == -1 ? -1 : fd; # endif diff --git a/contrib/grep/lib/error.c b/contrib/grep/lib/error.c index dc8c65fa7f..31109df3b5 100644 --- a/contrib/grep/lib/error.c +++ b/contrib/grep/lib/error.c @@ -1,5 +1,5 @@ /* Error handler for noninteractive utilities - Copyright (C) 1990-1998, 2000-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1990-1998, 2000-2007, 2009-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. This program is free software: you can redistribute it and/or modify @@ -121,7 +121,7 @@ extern char *program_name; #if !_LIBC /* Return non-zero if FD is open. */ -static inline int +static int is_open (int fd) { # if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ @@ -139,7 +139,7 @@ is_open (int fd) } #endif -static inline void +static void flush_stdout (void) { #if !_LIBC @@ -195,7 +195,7 @@ print_errno_message (int errnum) #endif } -static void +static void _GL_ATTRIBUTE_FORMAT_PRINTF (3, 0) _GL_ARG_NONNULL ((3)) error_tail (int status, int errnum, const char *message, va_list args) { #if _LIBC diff --git a/contrib/grep/lib/error.h b/contrib/grep/lib/error.h index 9c2cb8bc6a..4aaafb3626 100644 --- a/contrib/grep/lib/error.h +++ b/contrib/grep/lib/error.h @@ -1,5 +1,5 @@ /* Declaration for error-reporting function - Copyright (C) 1995-1997, 2003, 2006, 2008-2012 Free Software Foundation, + Copyright (C) 1995-1997, 2003, 2006, 2008-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. diff --git a/contrib/grep/lib/exclude.c b/contrib/grep/lib/exclude.c index 08a4829242..14b59b70eb 100644 --- a/contrib/grep/lib/exclude.c +++ b/contrib/grep/lib/exclude.c @@ -1,6 +1,6 @@ /* exclude.c -- exclude file names - Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2012 Free Software + Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -32,6 +32,7 @@ #include #include #include +#include #include "exclude.h" #include "hash.h" @@ -39,6 +40,7 @@ #include "fnmatch.h" #include "xalloc.h" #include "verify.h" +#include "filename.h" #if USE_UNLOCKED_IO # include "unlocked-io.h" @@ -73,8 +75,12 @@ verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS) struct patopts { - char const *pattern; int options; + union + { + char const *pattern; + regex_t re; + } v; }; /* An array of pattern-options pairs. */ @@ -104,13 +110,33 @@ struct exclude_segment } v; }; +struct pattern_buffer + { + struct pattern_buffer *next; + char *base; + }; + /* The exclude structure keeps a singly-linked list of exclude segments, maintained in reverse order. */ struct exclude { struct exclude_segment *head; + struct pattern_buffer *patbuf; }; +/* Register BUF in the pattern buffer list of EX. ADD_FUNC (see + add_exclude_file and add_exclude_fp below) can use this function + if it modifies the pattern, to ensure the allocated memory will be + properly reclaimed upon calling free_exclude. */ +void +exclude_add_pattern_buffer (struct exclude *ex, char *buf) +{ + struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf); + pbuf->base = buf; + pbuf->next = ex->patbuf; + ex->patbuf = pbuf; +} + /* Return true if STR has or may have wildcards, when matched with OPTIONS. Return false if STR definitely does not have wildcards. */ bool @@ -120,8 +146,20 @@ fnmatch_pattern_has_wildcards (const char *str, int options) { switch (*str++) { + case '.': + case '{': + case '}': + case '(': + case ')': + if (options & EXCLUDE_REGEX) + return true; + break; + case '\\': - str += ! (options & FNM_NOESCAPE) && *str; + if (options & EXCLUDE_REGEX) + continue; + else + str += ! (options & FNM_NOESCAPE) && *str; break; case '+': case '@': case '!': @@ -243,9 +281,16 @@ new_exclude_segment (struct exclude *ex, enum exclude_type type, int options) static void free_exclude_segment (struct exclude_segment *seg) { + size_t i; + switch (seg->type) { case exclude_pattern: + for (i = 0; i < seg->v.pat.exclude_count; i++) + { + if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX) + regfree (&seg->v.pat.exclude[i].v.re); + } free (seg->v.pat.exclude); break; @@ -261,12 +306,23 @@ void free_exclude (struct exclude *ex) { struct exclude_segment *seg; + struct pattern_buffer *pbuf; + for (seg = ex->head; seg; ) { struct exclude_segment *next = seg->next; free_exclude_segment (seg); seg = next; } + + for (pbuf = ex->patbuf; pbuf; ) + { + struct pattern_buffer *next = pbuf->next; + free (pbuf->base); + free (pbuf); + pbuf = next; + } + free (ex); } @@ -331,11 +387,21 @@ exclude_fnmatch (char const *pattern, char const *f, int options) if (! (options & EXCLUDE_ANCHORED)) for (p = f; *p && ! matched; p++) if (*p == '/' && p[1] != '/') - matched = ((*matcher) (pattern, p + 1, options) == 0); + matched = ((*matcher) (pattern, p + 1, options) == 0); return matched; } +bool +exclude_patopts (struct patopts const *opts, char const *f) +{ + int options = opts->options; + + return (options & EXCLUDE_REGEX) + ? regexec (&opts->v.re, f, 0, NULL, 0) == 0 + : exclude_fnmatch (opts->v.pattern, f, options); +} + /* Return true if the exclude_pattern segment SEG matches F. */ static bool @@ -347,9 +413,7 @@ file_pattern_matches (struct exclude_segment const *seg, char const *f) for (i = 0; i < exclude_count; i++) { - char const *pattern = exclude[i].pattern; - int options = exclude[i].options; - if (exclude_fnmatch (pattern, f, options)) + if (exclude_patopts (exclude + i, f)) return true; } return false; @@ -454,17 +518,17 @@ void add_exclude (struct exclude *ex, char const *pattern, int options) { struct exclude_segment *seg; + struct exclude_pattern *pat; + struct patopts *patopts; - if ((options & EXCLUDE_WILDCARDS) + if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS)) && fnmatch_pattern_has_wildcards (pattern, options)) { - struct exclude_pattern *pat; - struct patopts *patopts; - if (! (ex->head && ex->head->type == exclude_pattern - && ((ex->head->options & EXCLUDE_INCLUDE) - == (options & EXCLUDE_INCLUDE)))) - new_exclude_segment (ex, exclude_pattern, options); + && ((ex->head->options & EXCLUDE_INCLUDE) + == (options & EXCLUDE_INCLUDE)))) + new_exclude_segment (ex, exclude_pattern, options); + seg = ex->head; pat = &seg->v.pat; @@ -472,8 +536,51 @@ add_exclude (struct exclude *ex, char const *pattern, int options) pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc, sizeof *pat->exclude); patopts = &pat->exclude[pat->exclude_count++]; - patopts->pattern = pattern; + patopts->options = options; + if (options & EXCLUDE_REGEX) + { + int rc; + int cflags = REG_NOSUB|REG_EXTENDED| + ((options & FNM_CASEFOLD) ? REG_ICASE : 0); + + if (options & FNM_LEADING_DIR) + { + char *tmp; + size_t len = strlen (pattern); + + while (len > 0 && ISSLASH (pattern[len-1])) + --len; + + if (len == 0) + rc = 1; + else + { + tmp = xmalloc (len + 7); + memcpy (tmp, pattern, len); + strcpy (tmp + len, "(/.*)?"); + rc = regcomp (&patopts->v.re, tmp, cflags); + free (tmp); + } + } + else + rc = regcomp (&patopts->v.re, pattern, cflags); + + if (rc) + { + pat->exclude_count--; + return; + } + } + else + { + if (options & EXCLUDE_ALLOC) + { + pattern = xstrdup (pattern); + exclude_add_pattern_buffer (ex, (char*) pattern); + } + patopts->v.pattern = pattern; + } } else { @@ -498,45 +605,39 @@ add_exclude (struct exclude *ex, char const *pattern, int options) /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with OPTIONS. LINE_END terminates each pattern in the file. If LINE_END is a space character, ignore trailing spaces and empty - lines in FILE. Return -1 on failure, 0 on success. */ + lines in FP. Return -1 on failure, 0 on success. */ int -add_exclude_file (void (*add_func) (struct exclude *, char const *, int), - struct exclude *ex, char const *file_name, int options, - char line_end) +add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *), + struct exclude *ex, FILE *fp, int options, + char line_end, + void *data) { - bool use_stdin = file_name[0] == '-' && !file_name[1]; - FILE *in; char *buf = NULL; char *p; - char const *pattern; + char *pattern; char const *lim; size_t buf_alloc = 0; size_t buf_count = 0; int c; int e = 0; - if (use_stdin) - in = stdin; - else if (! (in = fopen (file_name, "r"))) - return -1; - - while ((c = getc (in)) != EOF) + while ((c = getc (fp)) != EOF) { if (buf_count == buf_alloc) buf = x2realloc (buf, &buf_alloc); buf[buf_count++] = c; } - if (ferror (in)) - e = errno; - - if (!use_stdin && fclose (in) != 0) + if (ferror (fp)) e = errno; buf = xrealloc (buf, buf_count + 1); buf[buf_count] = line_end; lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end); + + exclude_add_pattern_buffer (ex, buf); + pattern = buf; for (p = buf; p < lim; p++) @@ -554,7 +655,7 @@ add_exclude_file (void (*add_func) (struct exclude *, char const *, int), } *pattern_end = '\0'; - (*add_func) (ex, pattern, options); + (*add_func) (ex, pattern, options, data); next_pattern: pattern = p + 1; @@ -563,3 +664,32 @@ add_exclude_file (void (*add_func) (struct exclude *, char const *, int), errno = e; return e ? -1 : 0; } + +static void +call_addfn (struct exclude *ex, char const *pattern, int options, void *data) +{ + void (**addfnptr) (struct exclude *, char const *, int) = data; + (*addfnptr) (ex, pattern, options); +} + +int +add_exclude_file (void (*add_func) (struct exclude *, char const *, int), + struct exclude *ex, char const *file_name, int options, + char line_end) +{ + bool use_stdin = file_name[0] == '-' && !file_name[1]; + FILE *in; + int rc = 0; + + if (use_stdin) + in = stdin; + else if (! (in = fopen (file_name, "r"))) + return -1; + + rc = add_exclude_fp (call_addfn, ex, in, options, line_end, &add_func); + + if (!use_stdin && fclose (in) != 0) + rc = -1; + + return rc; +} diff --git a/contrib/grep/lib/exclude.h b/contrib/grep/lib/exclude.h index 50e8a63155..955521861c 100644 --- a/contrib/grep/lib/exclude.h +++ b/contrib/grep/lib/exclude.h @@ -1,6 +1,6 @@ /* exclude.h -- declarations for excluding file names - Copyright (C) 1992-1994, 1997, 1999, 2001-2003, 2005-2006, 2009-2012 Free + Copyright (C) 1992-1994, 1997, 1999, 2001-2003, 2005-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -20,6 +20,7 @@ #define _GL_EXCLUDE_H 1 #include +#include /* Written by Paul Eggert and Sergey Poznyakoff */ @@ -37,6 +38,12 @@ option, these characters are ordinary and fnmatch is not used. */ #define EXCLUDE_WILDCARDS (1 << 28) +/* Patterns are POSIX extended regular expressions */ +#define EXCLUDE_REGEX (1 << 27) + +/* Allocate storage for the pattern */ +#define EXCLUDE_ALLOC (1 << 26) + struct exclude; bool fnmatch_pattern_has_wildcards (const char *, int) _GL_ATTRIBUTE_PURE; @@ -46,7 +53,10 @@ void free_exclude (struct exclude *); void add_exclude (struct exclude *, char const *, int); int add_exclude_file (void (*) (struct exclude *, char const *, int), struct exclude *, char const *, int, char); +int add_exclude_fp (void (*) (struct exclude *, char const *, int, void *), + struct exclude *, FILE *, int, char, void *); bool excluded_file_name (struct exclude const *, char const *); -bool exclude_fnmatch (char const *pattern, char const *f, int options); +void exclude_add_pattern_buffer (struct exclude *ex, char *buf); +bool exclude_fnmatch (char const *, char const *, int); #endif /* _GL_EXCLUDE_H */ diff --git a/contrib/grep/lib/exitfail.c b/contrib/grep/lib/exitfail.c index fdd674c11b..609afc5af7 100644 --- a/contrib/grep/lib/exitfail.c +++ b/contrib/grep/lib/exitfail.c @@ -1,6 +1,6 @@ /* Failure exit status - Copyright (C) 2002-2003, 2005-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2002-2003, 2005-2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/exitfail.h b/contrib/grep/lib/exitfail.h index 074f212192..66807d511f 100644 --- a/contrib/grep/lib/exitfail.h +++ b/contrib/grep/lib/exitfail.h @@ -1,6 +1,6 @@ /* Failure exit status - Copyright (C) 2002, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2002, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/fchdir.c b/contrib/grep/lib/fchdir.c index 2e07553e6b..5d71377385 100644 --- a/contrib/grep/lib/fchdir.c +++ b/contrib/grep/lib/fchdir.c @@ -1,5 +1,5 @@ /* fchdir replacement. - Copyright (C) 2006-2012 Free Software Foundation, Inc. + Copyright (C) 2006-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/fcntl--.h b/contrib/grep/lib/fcntl--.h index 8fde6c1120..37be26c3c2 100644 --- a/contrib/grep/lib/fcntl--.h +++ b/contrib/grep/lib/fcntl--.h @@ -1,6 +1,6 @@ /* Like fcntl.h, but redefine some names to avoid glitches. - Copyright (C) 2005, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2005, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/fcntl-safer.h b/contrib/grep/lib/fcntl-safer.h index 440a2ffde8..73b7ad7859 100644 --- a/contrib/grep/lib/fcntl-safer.h +++ b/contrib/grep/lib/fcntl-safer.h @@ -1,6 +1,6 @@ /* Invoke fcntl-like functions, but avoid some glitches. - Copyright (C) 2005, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2005, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/fcntl.c b/contrib/grep/lib/fcntl.c index 3dfb6b73a8..54f748606b 100644 --- a/contrib/grep/lib/fcntl.c +++ b/contrib/grep/lib/fcntl.c @@ -1,6 +1,6 @@ /* Provide file descriptor control. - Copyright (C) 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/fd-hook.c b/contrib/grep/lib/fd-hook.c index 8f4ffe2a8d..0171cc6028 100644 --- a/contrib/grep/lib/fd-hook.c +++ b/contrib/grep/lib/fd-hook.c @@ -1,5 +1,5 @@ /* Hook for making making file descriptor functions close(), ioctl() extensible. - Copyright (C) 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2009-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2009. This program is free software: you can redistribute it and/or modify it diff --git a/contrib/grep/lib/fd-hook.h b/contrib/grep/lib/fd-hook.h index 721e9ad931..1aa264eb74 100644 --- a/contrib/grep/lib/fd-hook.h +++ b/contrib/grep/lib/fd-hook.h @@ -1,5 +1,5 @@ /* Hook for making making file descriptor functions close(), ioctl() extensible. - Copyright (C) 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published diff --git a/contrib/grep/lib/fd-safer.c b/contrib/grep/lib/fd-safer.c index 052837bdd1..148f735dc6 100644 --- a/contrib/grep/lib/fd-safer.c +++ b/contrib/grep/lib/fd-safer.c @@ -1,6 +1,6 @@ /* Return a safer copy of a file descriptor. - Copyright (C) 2005-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/fdopendir.c b/contrib/grep/lib/fdopendir.c index 4ae7d05395..b6c94a0990 100644 --- a/contrib/grep/lib/fdopendir.c +++ b/contrib/grep/lib/fdopendir.c @@ -1,5 +1,5 @@ /* provide a replacement fdopendir function - Copyright (C) 2004-2012 Free Software Foundation, Inc. + Copyright (C) 2004-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/filename.h b/contrib/grep/lib/filename.h index 7b89d0d06b..96dbaaa7d9 100644 --- a/contrib/grep/lib/filename.h +++ b/contrib/grep/lib/filename.h @@ -1,5 +1,5 @@ /* Basic filename support macros. - Copyright (C) 2001-2004, 2007-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2004, 2007-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/filenamecat-lgpl.c b/contrib/grep/lib/filenamecat-lgpl.c index c7c46172a3..e817f97c7d 100644 --- a/contrib/grep/lib/filenamecat-lgpl.c +++ b/contrib/grep/lib/filenamecat-lgpl.c @@ -1,6 +1,6 @@ /* Concatenate two arbitrary file names. - Copyright (C) 1996-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1996-2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/filenamecat.h b/contrib/grep/lib/filenamecat.h index e7b2e3063a..b33dce8438 100644 --- a/contrib/grep/lib/filenamecat.h +++ b/contrib/grep/lib/filenamecat.h @@ -1,6 +1,6 @@ /* Concatenate two arbitrary file names. - Copyright (C) 1996-1997, 2003, 2005, 2007, 2009-2012 Free Software + Copyright (C) 1996-1997, 2003, 2005, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/fnmatch.c b/contrib/grep/lib/fnmatch.c index 842182bf6a..4246d82297 100644 --- a/contrib/grep/lib/fnmatch.c +++ b/contrib/grep/lib/fnmatch.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991-1993, 1996-2007, 2009-2012 Free Software Foundation, Inc. +/* Copyright (C) 1991-1993, 1996-2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/fnmatch_loop.c b/contrib/grep/lib/fnmatch_loop.c index 048079e11f..4c6000fa73 100644 --- a/contrib/grep/lib/fnmatch_loop.c +++ b/contrib/grep/lib/fnmatch_loop.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991-1993, 1996-2006, 2009-2012 Free Software Foundation, Inc. +/* Copyright (C) 1991-1993, 1996-2006, 2009-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. This program is free software; you can redistribute it and/or modify @@ -227,6 +227,8 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, c = *p++; for (;;) { + bool is_range = false; + if (!(flags & FNM_NOESCAPE) && c == L_('\\')) { if (*p == L_('\0')) @@ -420,8 +422,6 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, } else { - bool is_range = false; - #ifdef _LIBC bool is_seqval = false; diff --git a/contrib/grep/lib/fpending.c b/contrib/grep/lib/fpending.c index 2591d53437..31aba1ada3 100644 --- a/contrib/grep/lib/fpending.c +++ b/contrib/grep/lib/fpending.c @@ -1,5 +1,5 @@ /* fpending.c -- return the number of pending output bytes on a stream - Copyright (C) 2000, 2004, 2006-2007, 2009-2012 Free Software Foundation, + Copyright (C) 2000, 2004, 2006-2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/fpending.h b/contrib/grep/lib/fpending.h index 0365287ba7..dd607fe20c 100644 --- a/contrib/grep/lib/fpending.h +++ b/contrib/grep/lib/fpending.h @@ -1,6 +1,6 @@ /* Declare __fpending. - Copyright (C) 2000, 2003, 2005-2006, 2009-2012 Free Software Foundation, + Copyright (C) 2000, 2003, 2005-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -20,11 +20,10 @@ #include #include +#if HAVE_STDIO_EXT_H +# include +#endif -#if HAVE_DECL___FPENDING -# if HAVE_STDIO_EXT_H -# include -# endif -#else -size_t __fpending (FILE *); +#if !HAVE_DECL___FPENDING +size_t __fpending (FILE *) _GL_ATTRIBUTE_PURE; #endif diff --git a/contrib/grep/lib/fstat.c b/contrib/grep/lib/fstat.c index 6d5f5c2b6f..ffcda9f571 100644 --- a/contrib/grep/lib/fstat.c +++ b/contrib/grep/lib/fstat.c @@ -1,5 +1,5 @@ /* fstat() replacement. - Copyright (C) 2011-2012 Free Software Foundation, Inc. + Copyright (C) 2011-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ #endif #undef __need_system_sys_stat_h -static inline int +static int orig_fstat (int fd, struct stat *buf) { return fstat (fd, buf); @@ -51,7 +51,7 @@ orig_fstat (int fd, struct stat *buf) #endif #if HAVE_MSVC_INVALID_PARAMETER_HANDLER -static inline int +static int fstat_nothrow (int fd, struct stat *buf) { int result; diff --git a/contrib/grep/lib/fstatat.c b/contrib/grep/lib/fstatat.c index 9b701c4d61..44e3266835 100644 --- a/contrib/grep/lib/fstatat.c +++ b/contrib/grep/lib/fstatat.c @@ -1,6 +1,6 @@ /* Work around an fstatat bug on Solaris 9. - Copyright (C) 2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,7 +29,7 @@ #undef __need_system_sys_stat_h #if HAVE_FSTATAT -static inline int +static int orig_fstatat (int fd, char const *filename, struct stat *buf, int flags) { return fstatat (fd, filename, buf, flags); @@ -97,7 +97,7 @@ rpl_fstatat (int fd, char const *file, struct stat *st, int flag) because the preprocessor sees a use of a macro that requires two arguments but is only given one. Hence, we need an inline forwarder to get past the preprocessor. */ -static inline int +static int stat_func (char const *name, struct stat *st) { return stat (name, st); diff --git a/contrib/grep/lib/fts-cycle.c b/contrib/grep/lib/fts-cycle.c index bdb090faca..95da56e236 100644 --- a/contrib/grep/lib/fts-cycle.c +++ b/contrib/grep/lib/fts-cycle.c @@ -1,6 +1,6 @@ /* Detect cycles in file tree walks. - Copyright (C) 2003-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2003-2006, 2009-2014 Free Software Foundation, Inc. Written by Jim Meyering. diff --git a/contrib/grep/lib/fts.c b/contrib/grep/lib/fts.c index 42c8067708..500e92c6b8 100644 --- a/contrib/grep/lib/fts.c +++ b/contrib/grep/lib/fts.c @@ -1,6 +1,6 @@ /* Traverse a file hierarchy. - Copyright (C) 2004-2012 Free Software Foundation, Inc. + Copyright (C) 2004-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -290,7 +290,7 @@ fts_set_stat_required (FTSENT *p, bool required) /* file-descriptor-relative opendir. */ /* FIXME: if others need this function, move it into lib/openat.c */ -static inline DIR * +static DIR * internal_function opendirat (int fd, char const *dir, int extra_flags, int *pdir_fd) { @@ -360,7 +360,7 @@ restore_initial_cwd (FTS *sp) descriptor. Return -1 and set errno on failure. It doesn't matter whether the file descriptor has read or write access. */ -static inline int +static int internal_function diropen (FTS const *sp, char const *dir) { @@ -487,6 +487,17 @@ fts_open (char * const *argv, for (root = NULL, nitems = 0; *argv != NULL; ++argv, ++nitems) { /* *Do* allow zero-length file names. */ size_t len = strlen(*argv); + + if ( ! (options & FTS_VERBATIM)) + { + /* If there are two or more trailing slashes, trim all but one, + but don't change "//" to "/", and do map "///" to "/". */ + char const *v = *argv; + if (2 < len && v[len - 1] == '/') + while (1 < len && v[len - 2] == '/') + --len; + } + if ((p = fts_alloc(sp, *argv, len)) == NULL) goto mem3; p->fts_level = FTS_ROOTLEVEL; @@ -1436,19 +1447,21 @@ fts_build (register FTS *sp, int type) nitems = 0; while (cur->fts_dirp) { bool is_dir; + size_t d_namelen; struct dirent *dp = readdir(cur->fts_dirp); if (dp == NULL) break; if (!ISSET(FTS_SEEDOT) && ISDOT(dp->d_name)) continue; - if ((p = fts_alloc (sp, dp->d_name, - _D_EXACT_NAMLEN (dp))) == NULL) + d_namelen = _D_EXACT_NAMLEN (dp); + p = fts_alloc (sp, dp->d_name, d_namelen); + if (!p) goto mem1; - if (_D_EXACT_NAMLEN (dp) >= maxlen) { + if (d_namelen >= maxlen) { /* include space for NUL */ oldaddr = sp->fts_path; - if (! fts_palloc(sp, _D_EXACT_NAMLEN (dp) + len + 1)) { + if (! fts_palloc(sp, d_namelen + len + 1)) { /* * No more memory. Save * errno, free up the current structure and the @@ -1472,7 +1485,7 @@ mem1: saved_errno = errno; maxlen = sp->fts_pathlen - len; } - new_len = len + _D_EXACT_NAMLEN (dp); + new_len = len + d_namelen; if (new_len < len) { /* * In the unlikely event that we would end up @@ -1894,7 +1907,7 @@ fts_alloc (FTS *sp, const char *name, register size_t namelen) return (NULL); /* Copy the name and guarantee NUL termination. */ - memmove(p->fts_name, name, namelen); + memcpy(p->fts_name, name, namelen); p->fts_name[namelen] = '\0'; p->fts_namelen = namelen; diff --git a/contrib/grep/lib/fts_.h b/contrib/grep/lib/fts_.h index 529403979f..63d4b74219 100644 --- a/contrib/grep/lib/fts_.h +++ b/contrib/grep/lib/fts_.h @@ -1,6 +1,6 @@ /* Traverse a file hierarchy. - Copyright (C) 2004-2012 Free Software Foundation, Inc. + Copyright (C) 2004-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -145,10 +145,14 @@ typedef struct { # define FTS_NOATIME 0x0800 /* use O_NOATIME during traversal */ -# define FTS_OPTIONMASK 0x0fff /* valid user option mask */ + /* Use this flag to disable stripping of trailing slashes + from input path names during fts_open initialization. */ +# define FTS_VERBATIM 0x1000 -# define FTS_NAMEONLY 0x1000 /* (private) child names only */ -# define FTS_STOP 0x2000 /* (private) unrecoverable error */ +# define FTS_OPTIONMASK 0x1fff /* valid user option mask */ + +# define FTS_NAMEONLY 0x2000 /* (private) child names only */ +# define FTS_STOP 0x4000 /* (private) unrecoverable error */ int fts_options; /* fts_open options, global flags */ /* Map a directory's device number to a boolean. The boolean is diff --git a/contrib/grep/lib/getcwd-lgpl.c b/contrib/grep/lib/getcwd-lgpl.c index f1e821b630..044c000017 100644 --- a/contrib/grep/lib/getcwd-lgpl.c +++ b/contrib/grep/lib/getcwd-lgpl.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2012 Free Software Foundation, Inc. +/* Copyright (C) 2011-2014 Free Software Foundation, Inc. This file is part of gnulib. This program is free software: you can redistribute it and/or modify @@ -20,6 +20,7 @@ #include #include +#include #include #if GNULIB_GETCWD diff --git a/contrib/grep/lib/getdtablesize.c b/contrib/grep/lib/getdtablesize.c index 70ba0751b5..946738cdb6 100644 --- a/contrib/grep/lib/getdtablesize.c +++ b/contrib/grep/lib/getdtablesize.c @@ -1,5 +1,5 @@ /* getdtablesize() function for platforms that don't have it. - Copyright (C) 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2008-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2008. This program is free software: you can redistribute it and/or modify @@ -22,12 +22,12 @@ #if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ -#include +# include -#include "msvc-inval.h" +# include "msvc-inval.h" -#if HAVE_MSVC_INVALID_PARAMETER_HANDLER -static inline int +# if HAVE_MSVC_INVALID_PARAMETER_HANDLER +static int _setmaxstdio_nothrow (int newmax) { int result; @@ -44,10 +44,11 @@ _setmaxstdio_nothrow (int newmax) return result; } -# define _setmaxstdio _setmaxstdio_nothrow -#endif +# define _setmaxstdio _setmaxstdio_nothrow +# endif -/* Cache for the previous getdtablesize () result. */ +/* Cache for the previous getdtablesize () result. Safe to cache because + Windows also lacks setrlimit. */ static int dtablesize; int @@ -83,4 +84,24 @@ getdtablesize (void) return dtablesize; } +#elif HAVE_GETDTABLESIZE + +# include +# undef getdtablesize + +int +rpl_getdtablesize(void) +{ + /* To date, this replacement is only compiled for Cygwin 1.7.25, + which auto-increased the RLIMIT_NOFILE soft limit until it + hits the compile-time constant hard limit of 3200. Although + that version of cygwin supported a child process inheriting + a smaller soft limit, the smaller limit is not enforced, so + we might as well just report the hard limit. */ + struct rlimit lim; + if (!getrlimit (RLIMIT_NOFILE, &lim) && lim.rlim_max != RLIM_INFINITY) + return lim.rlim_max; + return getdtablesize (); +} + #endif diff --git a/contrib/grep/lib/getopt.c b/contrib/grep/lib/getopt.c index 4342a34104..7d950af1a5 100644 --- a/contrib/grep/lib/getopt.c +++ b/contrib/grep/lib/getopt.c @@ -2,7 +2,7 @@ NOTE: getopt is part of the C library, so if you don't know what "Keep this file name-space clean" means, talk to drepper@gnu.org before changing it! - Copyright (C) 1987-1996, 1998-2004, 2006, 2008-2012 Free Software + Copyright (C) 1987-1996, 1998-2004, 2006, 2008-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. diff --git a/contrib/grep/lib/getopt1.c b/contrib/grep/lib/getopt1.c index fb2a8f5a7b..a184865ea6 100644 --- a/contrib/grep/lib/getopt1.c +++ b/contrib/grep/lib/getopt1.c @@ -1,5 +1,5 @@ /* getopt_long and getopt_long_only entry points for GNU getopt. - Copyright (C) 1987-1994, 1996-1998, 2004, 2006, 2009-2012 Free Software + Copyright (C) 1987-1994, 1996-1998, 2004, 2006, 2009-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. diff --git a/contrib/grep/lib/getopt_int.h b/contrib/grep/lib/getopt_int.h index 2da020c995..24ed672e2e 100644 --- a/contrib/grep/lib/getopt_int.h +++ b/contrib/grep/lib/getopt_int.h @@ -1,5 +1,5 @@ /* Internal declarations for getopt. - Copyright (C) 1989-1994, 1996-1999, 2001, 2003-2004, 2009-2012 Free Software + Copyright (C) 1989-1994, 1996-1999, 2001, 2003-2004, 2009-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. diff --git a/contrib/grep/lib/getpagesize.c b/contrib/grep/lib/getpagesize.c index 02c00fbea4..86e127100d 100644 --- a/contrib/grep/lib/getpagesize.c +++ b/contrib/grep/lib/getpagesize.c @@ -1,6 +1,6 @@ /* getpagesize emulation for systems where it cannot be done in a C macro. - Copyright (C) 2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/gettext.h b/contrib/grep/lib/gettext.h index 65ca1e6762..c7d6fd345c 100644 --- a/contrib/grep/lib/gettext.h +++ b/contrib/grep/lib/gettext.h @@ -1,5 +1,5 @@ /* Convenience header for conditional use of GNU . - Copyright (C) 1995-1998, 2000-2002, 2004-2006, 2009-2012 Free Software + Copyright (C) 1995-1998, 2000-2002, 2004-2006, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify diff --git a/contrib/grep/lib/gettimeofday.c b/contrib/grep/lib/gettimeofday.c new file mode 100644 index 0000000000..8b2058e8c8 --- /dev/null +++ b/contrib/grep/lib/gettimeofday.c @@ -0,0 +1,154 @@ +/* Provide gettimeofday for systems that don't have it or for which it's broken. + + Copyright (C) 2001-2003, 2005-2007, 2009-2014 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +/* written by Jim Meyering */ + +#include + +/* Specification. */ +#include + +#include + +#if HAVE_SYS_TIMEB_H +# include +#endif + +#if GETTIMEOFDAY_CLOBBERS_LOCALTIME || TZSET_CLOBBERS_LOCALTIME + +/* Work around the bug in some systems whereby gettimeofday clobbers + the static buffer that localtime uses for its return value. The + gettimeofday function from Mac OS X 10.0.4 (i.e., Darwin 1.3.7) has + this problem. The tzset replacement is necessary for at least + Solaris 2.5, 2.5.1, and 2.6. */ + +static struct tm tm_zero_buffer; +static struct tm *localtime_buffer_addr = &tm_zero_buffer; + +# undef localtime +extern struct tm *localtime (time_t const *); + +# undef gmtime +extern struct tm *gmtime (time_t const *); + +/* This is a wrapper for localtime. It is used only on systems for which + gettimeofday clobbers the static buffer used for localtime's result. + + On the first call, record the address of the static buffer that + localtime uses for its result. */ + +struct tm * +rpl_localtime (time_t const *timep) +{ + struct tm *tm = localtime (timep); + + if (localtime_buffer_addr == &tm_zero_buffer) + localtime_buffer_addr = tm; + + return tm; +} + +/* Same as above, since gmtime and localtime use the same buffer. */ +struct tm * +rpl_gmtime (time_t const *timep) +{ + struct tm *tm = gmtime (timep); + + if (localtime_buffer_addr == &tm_zero_buffer) + localtime_buffer_addr = tm; + + return tm; +} + +#endif /* GETTIMEOFDAY_CLOBBERS_LOCALTIME || TZSET_CLOBBERS_LOCALTIME */ + +#if TZSET_CLOBBERS_LOCALTIME + +# undef tzset +extern void tzset (void); + +/* This is a wrapper for tzset, for systems on which tzset may clobber + the static buffer used for localtime's result. */ +void +rpl_tzset (void) +{ + /* Save and restore the contents of the buffer used for localtime's + result around the call to tzset. */ + struct tm save = *localtime_buffer_addr; + tzset (); + *localtime_buffer_addr = save; +} +#endif + +/* This is a wrapper for gettimeofday. It is used only on systems + that lack this function, or whose implementation of this function + causes problems. */ + +int +gettimeofday (struct timeval *restrict tv, void *restrict tz) +{ +#undef gettimeofday +#if HAVE_GETTIMEOFDAY +# if GETTIMEOFDAY_CLOBBERS_LOCALTIME + /* Save and restore the contents of the buffer used for localtime's + result around the call to gettimeofday. */ + struct tm save = *localtime_buffer_addr; +# endif + +# if defined timeval /* 'struct timeval' overridden by gnulib? */ +# undef timeval + struct timeval otv; + int result = gettimeofday (&otv, (struct timezone *) tz); + if (result == 0) + { + tv->tv_sec = otv.tv_sec; + tv->tv_usec = otv.tv_usec; + } +# else + int result = gettimeofday (tv, (struct timezone *) tz); +# endif + +# if GETTIMEOFDAY_CLOBBERS_LOCALTIME + *localtime_buffer_addr = save; +# endif + + return result; + +#else + +# if HAVE__FTIME + + struct _timeb timebuf; + _ftime (&timebuf); + tv->tv_sec = timebuf.time; + tv->tv_usec = timebuf.millitm * 1000; + +# else + +# if !defined OK_TO_USE_1S_CLOCK +# error "Only 1-second nominal clock resolution found. Is that intended?" \ + "If so, compile with the -DOK_TO_USE_1S_CLOCK option." +# endif + tv->tv_sec = time (NULL); + tv->tv_usec = 0; + +# endif + + return 0; + +#endif +} diff --git a/contrib/grep/lib/glthread/lock.c b/contrib/grep/lib/glthread/lock.c new file mode 100644 index 0000000000..33c15a0d7f --- /dev/null +++ b/contrib/grep/lib/glthread/lock.c @@ -0,0 +1,1057 @@ +/* Locking in multithreaded situations. + Copyright (C) 2005-2014 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +/* Written by Bruno Haible , 2005. + Based on GCC's gthr-posix.h, gthr-posix95.h, gthr-solaris.h, + gthr-win32.h. */ + +#include + +#include "glthread/lock.h" + +/* ========================================================================= */ + +#if USE_POSIX_THREADS + +/* -------------------------- gl_lock_t datatype -------------------------- */ + +/* ------------------------- gl_rwlock_t datatype ------------------------- */ + +# if HAVE_PTHREAD_RWLOCK + +# if !defined PTHREAD_RWLOCK_INITIALIZER + +int +glthread_rwlock_init_multithreaded (gl_rwlock_t *lock) +{ + int err; + + err = pthread_rwlock_init (&lock->rwlock, NULL); + if (err != 0) + return err; + lock->initialized = 1; + return 0; +} + +int +glthread_rwlock_rdlock_multithreaded (gl_rwlock_t *lock) +{ + if (!lock->initialized) + { + int err; + + err = pthread_mutex_lock (&lock->guard); + if (err != 0) + return err; + if (!lock->initialized) + { + err = glthread_rwlock_init_multithreaded (lock); + if (err != 0) + { + pthread_mutex_unlock (&lock->guard); + return err; + } + } + err = pthread_mutex_unlock (&lock->guard); + if (err != 0) + return err; + } + return pthread_rwlock_rdlock (&lock->rwlock); +} + +int +glthread_rwlock_wrlock_multithreaded (gl_rwlock_t *lock) +{ + if (!lock->initialized) + { + int err; + + err = pthread_mutex_lock (&lock->guard); + if (err != 0) + return err; + if (!lock->initialized) + { + err = glthread_rwlock_init_multithreaded (lock); + if (err != 0) + { + pthread_mutex_unlock (&lock->guard); + return err; + } + } + err = pthread_mutex_unlock (&lock->guard); + if (err != 0) + return err; + } + return pthread_rwlock_wrlock (&lock->rwlock); +} + +int +glthread_rwlock_unlock_multithreaded (gl_rwlock_t *lock) +{ + if (!lock->initialized) + return EINVAL; + return pthread_rwlock_unlock (&lock->rwlock); +} + +int +glthread_rwlock_destroy_multithreaded (gl_rwlock_t *lock) +{ + int err; + + if (!lock->initialized) + return EINVAL; + err = pthread_rwlock_destroy (&lock->rwlock); + if (err != 0) + return err; + lock->initialized = 0; + return 0; +} + +# endif + +# else + +int +glthread_rwlock_init_multithreaded (gl_rwlock_t *lock) +{ + int err; + + err = pthread_mutex_init (&lock->lock, NULL); + if (err != 0) + return err; + err = pthread_cond_init (&lock->waiting_readers, NULL); + if (err != 0) + return err; + err = pthread_cond_init (&lock->waiting_writers, NULL); + if (err != 0) + return err; + lock->waiting_writers_count = 0; + lock->runcount = 0; + return 0; +} + +int +glthread_rwlock_rdlock_multithreaded (gl_rwlock_t *lock) +{ + int err; + + err = pthread_mutex_lock (&lock->lock); + if (err != 0) + return err; + /* Test whether only readers are currently running, and whether the runcount + field will not overflow. */ + /* POSIX says: "It is implementation-defined whether the calling thread + acquires the lock when a writer does not hold the lock and there are + writers blocked on the lock." Let's say, no: give the writers a higher + priority. */ + while (!(lock->runcount + 1 > 0 && lock->waiting_writers_count == 0)) + { + /* This thread has to wait for a while. Enqueue it among the + waiting_readers. */ + err = pthread_cond_wait (&lock->waiting_readers, &lock->lock); + if (err != 0) + { + pthread_mutex_unlock (&lock->lock); + return err; + } + } + lock->runcount++; + return pthread_mutex_unlock (&lock->lock); +} + +int +glthread_rwlock_wrlock_multithreaded (gl_rwlock_t *lock) +{ + int err; + + err = pthread_mutex_lock (&lock->lock); + if (err != 0) + return err; + /* Test whether no readers or writers are currently running. */ + while (!(lock->runcount == 0)) + { + /* This thread has to wait for a while. Enqueue it among the + waiting_writers. */ + lock->waiting_writers_count++; + err = pthread_cond_wait (&lock->waiting_writers, &lock->lock); + if (err != 0) + { + lock->waiting_writers_count--; + pthread_mutex_unlock (&lock->lock); + return err; + } + lock->waiting_writers_count--; + } + lock->runcount--; /* runcount becomes -1 */ + return pthread_mutex_unlock (&lock->lock); +} + +int +glthread_rwlock_unlock_multithreaded (gl_rwlock_t *lock) +{ + int err; + + err = pthread_mutex_lock (&lock->lock); + if (err != 0) + return err; + if (lock->runcount < 0) + { + /* Drop a writer lock. */ + if (!(lock->runcount == -1)) + { + pthread_mutex_unlock (&lock->lock); + return EINVAL; + } + lock->runcount = 0; + } + else + { + /* Drop a reader lock. */ + if (!(lock->runcount > 0)) + { + pthread_mutex_unlock (&lock->lock); + return EINVAL; + } + lock->runcount--; + } + if (lock->runcount == 0) + { + /* POSIX recommends that "write locks shall take precedence over read + locks", to avoid "writer starvation". */ + if (lock->waiting_writers_count > 0) + { + /* Wake up one of the waiting writers. */ + err = pthread_cond_signal (&lock->waiting_writers); + if (err != 0) + { + pthread_mutex_unlock (&lock->lock); + return err; + } + } + else + { + /* Wake up all waiting readers. */ + err = pthread_cond_broadcast (&lock->waiting_readers); + if (err != 0) + { + pthread_mutex_unlock (&lock->lock); + return err; + } + } + } + return pthread_mutex_unlock (&lock->lock); +} + +int +glthread_rwlock_destroy_multithreaded (gl_rwlock_t *lock) +{ + int err; + + err = pthread_mutex_destroy (&lock->lock); + if (err != 0) + return err; + err = pthread_cond_destroy (&lock->waiting_readers); + if (err != 0) + return err; + err = pthread_cond_destroy (&lock->waiting_writers); + if (err != 0) + return err; + return 0; +} + +# endif + +/* --------------------- gl_recursive_lock_t datatype --------------------- */ + +# if HAVE_PTHREAD_MUTEX_RECURSIVE + +# if defined PTHREAD_RECURSIVE_MUTEX_INITIALIZER || defined PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP + +int +glthread_recursive_lock_init_multithreaded (gl_recursive_lock_t *lock) +{ + pthread_mutexattr_t attributes; + int err; + + err = pthread_mutexattr_init (&attributes); + if (err != 0) + return err; + err = pthread_mutexattr_settype (&attributes, PTHREAD_MUTEX_RECURSIVE); + if (err != 0) + { + pthread_mutexattr_destroy (&attributes); + return err; + } + err = pthread_mutex_init (lock, &attributes); + if (err != 0) + { + pthread_mutexattr_destroy (&attributes); + return err; + } + err = pthread_mutexattr_destroy (&attributes); + if (err != 0) + return err; + return 0; +} + +# else + +int +glthread_recursive_lock_init_multithreaded (gl_recursive_lock_t *lock) +{ + pthread_mutexattr_t attributes; + int err; + + err = pthread_mutexattr_init (&attributes); + if (err != 0) + return err; + err = pthread_mutexattr_settype (&attributes, PTHREAD_MUTEX_RECURSIVE); + if (err != 0) + { + pthread_mutexattr_destroy (&attributes); + return err; + } + err = pthread_mutex_init (&lock->recmutex, &attributes); + if (err != 0) + { + pthread_mutexattr_destroy (&attributes); + return err; + } + err = pthread_mutexattr_destroy (&attributes); + if (err != 0) + return err; + lock->initialized = 1; + return 0; +} + +int +glthread_recursive_lock_lock_multithreaded (gl_recursive_lock_t *lock) +{ + if (!lock->initialized) + { + int err; + + err = pthread_mutex_lock (&lock->guard); + if (err != 0) + return err; + if (!lock->initialized) + { + err = glthread_recursive_lock_init_multithreaded (lock); + if (err != 0) + { + pthread_mutex_unlock (&lock->guard); + return err; + } + } + err = pthread_mutex_unlock (&lock->guard); + if (err != 0) + return err; + } + return pthread_mutex_lock (&lock->recmutex); +} + +int +glthread_recursive_lock_unlock_multithreaded (gl_recursive_lock_t *lock) +{ + if (!lock->initialized) + return EINVAL; + return pthread_mutex_unlock (&lock->recmutex); +} + +int +glthread_recursive_lock_destroy_multithreaded (gl_recursive_lock_t *lock) +{ + int err; + + if (!lock->initialized) + return EINVAL; + err = pthread_mutex_destroy (&lock->recmutex); + if (err != 0) + return err; + lock->initialized = 0; + return 0; +} + +# endif + +# else + +int +glthread_recursive_lock_init_multithreaded (gl_recursive_lock_t *lock) +{ + int err; + + err = pthread_mutex_init (&lock->mutex, NULL); + if (err != 0) + return err; + lock->owner = (pthread_t) 0; + lock->depth = 0; + return 0; +} + +int +glthread_recursive_lock_lock_multithreaded (gl_recursive_lock_t *lock) +{ + pthread_t self = pthread_self (); + if (lock->owner != self) + { + int err; + + err = pthread_mutex_lock (&lock->mutex); + if (err != 0) + return err; + lock->owner = self; + } + if (++(lock->depth) == 0) /* wraparound? */ + { + lock->depth--; + return EAGAIN; + } + return 0; +} + +int +glthread_recursive_lock_unlock_multithreaded (gl_recursive_lock_t *lock) +{ + if (lock->owner != pthread_self ()) + return EPERM; + if (lock->depth == 0) + return EINVAL; + if (--(lock->depth) == 0) + { + lock->owner = (pthread_t) 0; + return pthread_mutex_unlock (&lock->mutex); + } + else + return 0; +} + +int +glthread_recursive_lock_destroy_multithreaded (gl_recursive_lock_t *lock) +{ + if (lock->owner != (pthread_t) 0) + return EBUSY; + return pthread_mutex_destroy (&lock->mutex); +} + +# endif + +/* -------------------------- gl_once_t datatype -------------------------- */ + +static const pthread_once_t fresh_once = PTHREAD_ONCE_INIT; + +int +glthread_once_singlethreaded (pthread_once_t *once_control) +{ + /* We don't know whether pthread_once_t is an integer type, a floating-point + type, a pointer type, or a structure type. */ + char *firstbyte = (char *)once_control; + if (*firstbyte == *(const char *)&fresh_once) + { + /* First time use of once_control. Invert the first byte. */ + *firstbyte = ~ *(const char *)&fresh_once; + return 1; + } + else + return 0; +} + +#endif + +/* ========================================================================= */ + +#if USE_PTH_THREADS + +/* Use the GNU Pth threads library. */ + +/* -------------------------- gl_lock_t datatype -------------------------- */ + +/* ------------------------- gl_rwlock_t datatype ------------------------- */ + +/* --------------------- gl_recursive_lock_t datatype --------------------- */ + +/* -------------------------- gl_once_t datatype -------------------------- */ + +static void +glthread_once_call (void *arg) +{ + void (**gl_once_temp_addr) (void) = (void (**) (void)) arg; + void (*initfunction) (void) = *gl_once_temp_addr; + initfunction (); +} + +int +glthread_once_multithreaded (pth_once_t *once_control, void (*initfunction) (void)) +{ + void (*temp) (void) = initfunction; + return (!pth_once (once_control, glthread_once_call, &temp) ? errno : 0); +} + +int +glthread_once_singlethreaded (pth_once_t *once_control) +{ + /* We know that pth_once_t is an integer type. */ + if (*once_control == PTH_ONCE_INIT) + { + /* First time use of once_control. Invert the marker. */ + *once_control = ~ PTH_ONCE_INIT; + return 1; + } + else + return 0; +} + +#endif + +/* ========================================================================= */ + +#if USE_SOLARIS_THREADS + +/* Use the old Solaris threads library. */ + +/* -------------------------- gl_lock_t datatype -------------------------- */ + +/* ------------------------- gl_rwlock_t datatype ------------------------- */ + +/* --------------------- gl_recursive_lock_t datatype --------------------- */ + +int +glthread_recursive_lock_init_multithreaded (gl_recursive_lock_t *lock) +{ + int err; + + err = mutex_init (&lock->mutex, USYNC_THREAD, NULL); + if (err != 0) + return err; + lock->owner = (thread_t) 0; + lock->depth = 0; + return 0; +} + +int +glthread_recursive_lock_lock_multithreaded (gl_recursive_lock_t *lock) +{ + thread_t self = thr_self (); + if (lock->owner != self) + { + int err; + + err = mutex_lock (&lock->mutex); + if (err != 0) + return err; + lock->owner = self; + } + if (++(lock->depth) == 0) /* wraparound? */ + { + lock->depth--; + return EAGAIN; + } + return 0; +} + +int +glthread_recursive_lock_unlock_multithreaded (gl_recursive_lock_t *lock) +{ + if (lock->owner != thr_self ()) + return EPERM; + if (lock->depth == 0) + return EINVAL; + if (--(lock->depth) == 0) + { + lock->owner = (thread_t) 0; + return mutex_unlock (&lock->mutex); + } + else + return 0; +} + +int +glthread_recursive_lock_destroy_multithreaded (gl_recursive_lock_t *lock) +{ + if (lock->owner != (thread_t) 0) + return EBUSY; + return mutex_destroy (&lock->mutex); +} + +/* -------------------------- gl_once_t datatype -------------------------- */ + +int +glthread_once_multithreaded (gl_once_t *once_control, void (*initfunction) (void)) +{ + if (!once_control->inited) + { + int err; + + /* Use the mutex to guarantee that if another thread is already calling + the initfunction, this thread waits until it's finished. */ + err = mutex_lock (&once_control->mutex); + if (err != 0) + return err; + if (!once_control->inited) + { + once_control->inited = 1; + initfunction (); + } + return mutex_unlock (&once_control->mutex); + } + else + return 0; +} + +int +glthread_once_singlethreaded (gl_once_t *once_control) +{ + /* We know that gl_once_t contains an integer type. */ + if (!once_control->inited) + { + /* First time use of once_control. Invert the marker. */ + once_control->inited = ~ 0; + return 1; + } + else + return 0; +} + +#endif + +/* ========================================================================= */ + +#if USE_WINDOWS_THREADS + +/* -------------------------- gl_lock_t datatype -------------------------- */ + +void +glthread_lock_init_func (gl_lock_t *lock) +{ + InitializeCriticalSection (&lock->lock); + lock->guard.done = 1; +} + +int +glthread_lock_lock_func (gl_lock_t *lock) +{ + if (!lock->guard.done) + { + if (InterlockedIncrement (&lock->guard.started) == 0) + /* This thread is the first one to need this lock. Initialize it. */ + glthread_lock_init (lock); + else + /* Yield the CPU while waiting for another thread to finish + initializing this lock. */ + while (!lock->guard.done) + Sleep (0); + } + EnterCriticalSection (&lock->lock); + return 0; +} + +int +glthread_lock_unlock_func (gl_lock_t *lock) +{ + if (!lock->guard.done) + return EINVAL; + LeaveCriticalSection (&lock->lock); + return 0; +} + +int +glthread_lock_destroy_func (gl_lock_t *lock) +{ + if (!lock->guard.done) + return EINVAL; + DeleteCriticalSection (&lock->lock); + lock->guard.done = 0; + return 0; +} + +/* ------------------------- gl_rwlock_t datatype ------------------------- */ + +/* In this file, the waitqueues are implemented as circular arrays. */ +#define gl_waitqueue_t gl_carray_waitqueue_t + +static void +gl_waitqueue_init (gl_waitqueue_t *wq) +{ + wq->array = NULL; + wq->count = 0; + wq->alloc = 0; + wq->offset = 0; +} + +/* Enqueues the current thread, represented by an event, in a wait queue. + Returns INVALID_HANDLE_VALUE if an allocation failure occurs. */ +static HANDLE +gl_waitqueue_add (gl_waitqueue_t *wq) +{ + HANDLE event; + unsigned int index; + + if (wq->count == wq->alloc) + { + unsigned int new_alloc = 2 * wq->alloc + 1; + HANDLE *new_array = + (HANDLE *) realloc (wq->array, new_alloc * sizeof (HANDLE)); + if (new_array == NULL) + /* No more memory. */ + return INVALID_HANDLE_VALUE; + /* Now is a good opportunity to rotate the array so that its contents + starts at offset 0. */ + if (wq->offset > 0) + { + unsigned int old_count = wq->count; + unsigned int old_alloc = wq->alloc; + unsigned int old_offset = wq->offset; + unsigned int i; + if (old_offset + old_count > old_alloc) + { + unsigned int limit = old_offset + old_count - old_alloc; + for (i = 0; i < limit; i++) + new_array[old_alloc + i] = new_array[i]; + } + for (i = 0; i < old_count; i++) + new_array[i] = new_array[old_offset + i]; + wq->offset = 0; + } + wq->array = new_array; + wq->alloc = new_alloc; + } + /* Whether the created event is a manual-reset one or an auto-reset one, + does not matter, since we will wait on it only once. */ + event = CreateEvent (NULL, TRUE, FALSE, NULL); + if (event == INVALID_HANDLE_VALUE) + /* No way to allocate an event. */ + return INVALID_HANDLE_VALUE; + index = wq->offset + wq->count; + if (index >= wq->alloc) + index -= wq->alloc; + wq->array[index] = event; + wq->count++; + return event; +} + +/* Notifies the first thread from a wait queue and dequeues it. */ +static void +gl_waitqueue_notify_first (gl_waitqueue_t *wq) +{ + SetEvent (wq->array[wq->offset + 0]); + wq->offset++; + wq->count--; + if (wq->count == 0 || wq->offset == wq->alloc) + wq->offset = 0; +} + +/* Notifies all threads from a wait queue and dequeues them all. */ +static void +gl_waitqueue_notify_all (gl_waitqueue_t *wq) +{ + unsigned int i; + + for (i = 0; i < wq->count; i++) + { + unsigned int index = wq->offset + i; + if (index >= wq->alloc) + index -= wq->alloc; + SetEvent (wq->array[index]); + } + wq->count = 0; + wq->offset = 0; +} + +void +glthread_rwlock_init_func (gl_rwlock_t *lock) +{ + InitializeCriticalSection (&lock->lock); + gl_waitqueue_init (&lock->waiting_readers); + gl_waitqueue_init (&lock->waiting_writers); + lock->runcount = 0; + lock->guard.done = 1; +} + +int +glthread_rwlock_rdlock_func (gl_rwlock_t *lock) +{ + if (!lock->guard.done) + { + if (InterlockedIncrement (&lock->guard.started) == 0) + /* This thread is the first one to need this lock. Initialize it. */ + glthread_rwlock_init (lock); + else + /* Yield the CPU while waiting for another thread to finish + initializing this lock. */ + while (!lock->guard.done) + Sleep (0); + } + EnterCriticalSection (&lock->lock); + /* Test whether only readers are currently running, and whether the runcount + field will not overflow. */ + if (!(lock->runcount + 1 > 0)) + { + /* This thread has to wait for a while. Enqueue it among the + waiting_readers. */ + HANDLE event = gl_waitqueue_add (&lock->waiting_readers); + if (event != INVALID_HANDLE_VALUE) + { + DWORD result; + LeaveCriticalSection (&lock->lock); + /* Wait until another thread signals this event. */ + result = WaitForSingleObject (event, INFINITE); + if (result == WAIT_FAILED || result == WAIT_TIMEOUT) + abort (); + CloseHandle (event); + /* The thread which signalled the event already did the bookkeeping: + removed us from the waiting_readers, incremented lock->runcount. */ + if (!(lock->runcount > 0)) + abort (); + return 0; + } + else + { + /* Allocation failure. Weird. */ + do + { + LeaveCriticalSection (&lock->lock); + Sleep (1); + EnterCriticalSection (&lock->lock); + } + while (!(lock->runcount + 1 > 0)); + } + } + lock->runcount++; + LeaveCriticalSection (&lock->lock); + return 0; +} + +int +glthread_rwlock_wrlock_func (gl_rwlock_t *lock) +{ + if (!lock->guard.done) + { + if (InterlockedIncrement (&lock->guard.started) == 0) + /* This thread is the first one to need this lock. Initialize it. */ + glthread_rwlock_init (lock); + else + /* Yield the CPU while waiting for another thread to finish + initializing this lock. */ + while (!lock->guard.done) + Sleep (0); + } + EnterCriticalSection (&lock->lock); + /* Test whether no readers or writers are currently running. */ + if (!(lock->runcount == 0)) + { + /* This thread has to wait for a while. Enqueue it among the + waiting_writers. */ + HANDLE event = gl_waitqueue_add (&lock->waiting_writers); + if (event != INVALID_HANDLE_VALUE) + { + DWORD result; + LeaveCriticalSection (&lock->lock); + /* Wait until another thread signals this event. */ + result = WaitForSingleObject (event, INFINITE); + if (result == WAIT_FAILED || result == WAIT_TIMEOUT) + abort (); + CloseHandle (event); + /* The thread which signalled the event already did the bookkeeping: + removed us from the waiting_writers, set lock->runcount = -1. */ + if (!(lock->runcount == -1)) + abort (); + return 0; + } + else + { + /* Allocation failure. Weird. */ + do + { + LeaveCriticalSection (&lock->lock); + Sleep (1); + EnterCriticalSection (&lock->lock); + } + while (!(lock->runcount == 0)); + } + } + lock->runcount--; /* runcount becomes -1 */ + LeaveCriticalSection (&lock->lock); + return 0; +} + +int +glthread_rwlock_unlock_func (gl_rwlock_t *lock) +{ + if (!lock->guard.done) + return EINVAL; + EnterCriticalSection (&lock->lock); + if (lock->runcount < 0) + { + /* Drop a writer lock. */ + if (!(lock->runcount == -1)) + abort (); + lock->runcount = 0; + } + else + { + /* Drop a reader lock. */ + if (!(lock->runcount > 0)) + { + LeaveCriticalSection (&lock->lock); + return EPERM; + } + lock->runcount--; + } + if (lock->runcount == 0) + { + /* POSIX recommends that "write locks shall take precedence over read + locks", to avoid "writer starvation". */ + if (lock->waiting_writers.count > 0) + { + /* Wake up one of the waiting writers. */ + lock->runcount--; + gl_waitqueue_notify_first (&lock->waiting_writers); + } + else + { + /* Wake up all waiting readers. */ + lock->runcount += lock->waiting_readers.count; + gl_waitqueue_notify_all (&lock->waiting_readers); + } + } + LeaveCriticalSection (&lock->lock); + return 0; +} + +int +glthread_rwlock_destroy_func (gl_rwlock_t *lock) +{ + if (!lock->guard.done) + return EINVAL; + if (lock->runcount != 0) + return EBUSY; + DeleteCriticalSection (&lock->lock); + if (lock->waiting_readers.array != NULL) + free (lock->waiting_readers.array); + if (lock->waiting_writers.array != NULL) + free (lock->waiting_writers.array); + lock->guard.done = 0; + return 0; +} + +/* --------------------- gl_recursive_lock_t datatype --------------------- */ + +void +glthread_recursive_lock_init_func (gl_recursive_lock_t *lock) +{ + lock->owner = 0; + lock->depth = 0; + InitializeCriticalSection (&lock->lock); + lock->guard.done = 1; +} + +int +glthread_recursive_lock_lock_func (gl_recursive_lock_t *lock) +{ + if (!lock->guard.done) + { + if (InterlockedIncrement (&lock->guard.started) == 0) + /* This thread is the first one to need this lock. Initialize it. */ + glthread_recursive_lock_init (lock); + else + /* Yield the CPU while waiting for another thread to finish + initializing this lock. */ + while (!lock->guard.done) + Sleep (0); + } + { + DWORD self = GetCurrentThreadId (); + if (lock->owner != self) + { + EnterCriticalSection (&lock->lock); + lock->owner = self; + } + if (++(lock->depth) == 0) /* wraparound? */ + { + lock->depth--; + return EAGAIN; + } + } + return 0; +} + +int +glthread_recursive_lock_unlock_func (gl_recursive_lock_t *lock) +{ + if (lock->owner != GetCurrentThreadId ()) + return EPERM; + if (lock->depth == 0) + return EINVAL; + if (--(lock->depth) == 0) + { + lock->owner = 0; + LeaveCriticalSection (&lock->lock); + } + return 0; +} + +int +glthread_recursive_lock_destroy_func (gl_recursive_lock_t *lock) +{ + if (lock->owner != 0) + return EBUSY; + DeleteCriticalSection (&lock->lock); + lock->guard.done = 0; + return 0; +} + +/* -------------------------- gl_once_t datatype -------------------------- */ + +void +glthread_once_func (gl_once_t *once_control, void (*initfunction) (void)) +{ + if (once_control->inited <= 0) + { + if (InterlockedIncrement (&once_control->started) == 0) + { + /* This thread is the first one to come to this once_control. */ + InitializeCriticalSection (&once_control->lock); + EnterCriticalSection (&once_control->lock); + once_control->inited = 0; + initfunction (); + once_control->inited = 1; + LeaveCriticalSection (&once_control->lock); + } + else + { + /* Undo last operation. */ + InterlockedDecrement (&once_control->started); + /* Some other thread has already started the initialization. + Yield the CPU while waiting for the other thread to finish + initializing and taking the lock. */ + while (once_control->inited < 0) + Sleep (0); + if (once_control->inited <= 0) + { + /* Take the lock. This blocks until the other thread has + finished calling the initfunction. */ + EnterCriticalSection (&once_control->lock); + LeaveCriticalSection (&once_control->lock); + if (!(once_control->inited > 0)) + abort (); + } + } + } +} + +#endif + +/* ========================================================================= */ diff --git a/contrib/grep/lib/glthread/lock.h b/contrib/grep/lib/glthread/lock.h new file mode 100644 index 0000000000..36096c4b6a --- /dev/null +++ b/contrib/grep/lib/glthread/lock.h @@ -0,0 +1,927 @@ +/* Locking in multithreaded situations. + Copyright (C) 2005-2014 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +/* Written by Bruno Haible , 2005. + Based on GCC's gthr-posix.h, gthr-posix95.h, gthr-solaris.h, + gthr-win32.h. */ + +/* This file contains locking primitives for use with a given thread library. + It does not contain primitives for creating threads or for other + synchronization primitives. + + Normal (non-recursive) locks: + Type: gl_lock_t + Declaration: gl_lock_define(extern, name) + Initializer: gl_lock_define_initialized(, name) + Initialization: gl_lock_init (name); + Taking the lock: gl_lock_lock (name); + Releasing the lock: gl_lock_unlock (name); + De-initialization: gl_lock_destroy (name); + Equivalent functions with control of error handling: + Initialization: err = glthread_lock_init (&name); + Taking the lock: err = glthread_lock_lock (&name); + Releasing the lock: err = glthread_lock_unlock (&name); + De-initialization: err = glthread_lock_destroy (&name); + + Read-Write (non-recursive) locks: + Type: gl_rwlock_t + Declaration: gl_rwlock_define(extern, name) + Initializer: gl_rwlock_define_initialized(, name) + Initialization: gl_rwlock_init (name); + Taking the lock: gl_rwlock_rdlock (name); + gl_rwlock_wrlock (name); + Releasing the lock: gl_rwlock_unlock (name); + De-initialization: gl_rwlock_destroy (name); + Equivalent functions with control of error handling: + Initialization: err = glthread_rwlock_init (&name); + Taking the lock: err = glthread_rwlock_rdlock (&name); + err = glthread_rwlock_wrlock (&name); + Releasing the lock: err = glthread_rwlock_unlock (&name); + De-initialization: err = glthread_rwlock_destroy (&name); + + Recursive locks: + Type: gl_recursive_lock_t + Declaration: gl_recursive_lock_define(extern, name) + Initializer: gl_recursive_lock_define_initialized(, name) + Initialization: gl_recursive_lock_init (name); + Taking the lock: gl_recursive_lock_lock (name); + Releasing the lock: gl_recursive_lock_unlock (name); + De-initialization: gl_recursive_lock_destroy (name); + Equivalent functions with control of error handling: + Initialization: err = glthread_recursive_lock_init (&name); + Taking the lock: err = glthread_recursive_lock_lock (&name); + Releasing the lock: err = glthread_recursive_lock_unlock (&name); + De-initialization: err = glthread_recursive_lock_destroy (&name); + + Once-only execution: + Type: gl_once_t + Initializer: gl_once_define(extern, name) + Execution: gl_once (name, initfunction); + Equivalent functions with control of error handling: + Execution: err = glthread_once (&name, initfunction); +*/ + + +#ifndef _LOCK_H +#define _LOCK_H + +#include +#include + +/* ========================================================================= */ + +#if USE_POSIX_THREADS + +/* Use the POSIX threads library. */ + +# include + +# ifdef __cplusplus +extern "C" { +# endif + +# if PTHREAD_IN_USE_DETECTION_HARD + +/* The pthread_in_use() detection needs to be done at runtime. */ +# define pthread_in_use() \ + glthread_in_use () +extern int glthread_in_use (void); + +# endif + +# if USE_POSIX_THREADS_WEAK + +/* Use weak references to the POSIX threads library. */ + +/* Weak references avoid dragging in external libraries if the other parts + of the program don't use them. Here we use them, because we don't want + every program that uses libintl to depend on libpthread. This assumes + that libpthread would not be loaded after libintl; i.e. if libintl is + loaded first, by an executable that does not depend on libpthread, and + then a module is dynamically loaded that depends on libpthread, libintl + will not be multithread-safe. */ + +/* The way to test at runtime whether libpthread is present is to test + whether a function pointer's value, such as &pthread_mutex_init, is + non-NULL. However, some versions of GCC have a bug through which, in + PIC mode, &foo != NULL always evaluates to true if there is a direct + call to foo(...) in the same function. To avoid this, we test the + address of a function in libpthread that we don't use. */ + +# pragma weak pthread_mutex_init +# pragma weak pthread_mutex_lock +# pragma weak pthread_mutex_unlock +# pragma weak pthread_mutex_destroy +# pragma weak pthread_rwlock_init +# pragma weak pthread_rwlock_rdlock +# pragma weak pthread_rwlock_wrlock +# pragma weak pthread_rwlock_unlock +# pragma weak pthread_rwlock_destroy +# pragma weak pthread_once +# pragma weak pthread_cond_init +# pragma weak pthread_cond_wait +# pragma weak pthread_cond_signal +# pragma weak pthread_cond_broadcast +# pragma weak pthread_cond_destroy +# pragma weak pthread_mutexattr_init +# pragma weak pthread_mutexattr_settype +# pragma weak pthread_mutexattr_destroy +# ifndef pthread_self +# pragma weak pthread_self +# endif + +# if !PTHREAD_IN_USE_DETECTION_HARD +# pragma weak pthread_cancel +# define pthread_in_use() (pthread_cancel != NULL) +# endif + +# else + +# if !PTHREAD_IN_USE_DETECTION_HARD +# define pthread_in_use() 1 +# endif + +# endif + +/* -------------------------- gl_lock_t datatype -------------------------- */ + +typedef pthread_mutex_t gl_lock_t; +# define gl_lock_define(STORAGECLASS, NAME) \ + STORAGECLASS pthread_mutex_t NAME; +# define gl_lock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS pthread_mutex_t NAME = gl_lock_initializer; +# define gl_lock_initializer \ + PTHREAD_MUTEX_INITIALIZER +# define glthread_lock_init(LOCK) \ + (pthread_in_use () ? pthread_mutex_init (LOCK, NULL) : 0) +# define glthread_lock_lock(LOCK) \ + (pthread_in_use () ? pthread_mutex_lock (LOCK) : 0) +# define glthread_lock_unlock(LOCK) \ + (pthread_in_use () ? pthread_mutex_unlock (LOCK) : 0) +# define glthread_lock_destroy(LOCK) \ + (pthread_in_use () ? pthread_mutex_destroy (LOCK) : 0) + +/* ------------------------- gl_rwlock_t datatype ------------------------- */ + +# if HAVE_PTHREAD_RWLOCK + +# ifdef PTHREAD_RWLOCK_INITIALIZER + +typedef pthread_rwlock_t gl_rwlock_t; +# define gl_rwlock_define(STORAGECLASS, NAME) \ + STORAGECLASS pthread_rwlock_t NAME; +# define gl_rwlock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS pthread_rwlock_t NAME = gl_rwlock_initializer; +# define gl_rwlock_initializer \ + PTHREAD_RWLOCK_INITIALIZER +# define glthread_rwlock_init(LOCK) \ + (pthread_in_use () ? pthread_rwlock_init (LOCK, NULL) : 0) +# define glthread_rwlock_rdlock(LOCK) \ + (pthread_in_use () ? pthread_rwlock_rdlock (LOCK) : 0) +# define glthread_rwlock_wrlock(LOCK) \ + (pthread_in_use () ? pthread_rwlock_wrlock (LOCK) : 0) +# define glthread_rwlock_unlock(LOCK) \ + (pthread_in_use () ? pthread_rwlock_unlock (LOCK) : 0) +# define glthread_rwlock_destroy(LOCK) \ + (pthread_in_use () ? pthread_rwlock_destroy (LOCK) : 0) + +# else + +typedef struct + { + int initialized; + pthread_mutex_t guard; /* protects the initialization */ + pthread_rwlock_t rwlock; /* read-write lock */ + } + gl_rwlock_t; +# define gl_rwlock_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_rwlock_t NAME; +# define gl_rwlock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS gl_rwlock_t NAME = gl_rwlock_initializer; +# define gl_rwlock_initializer \ + { 0, PTHREAD_MUTEX_INITIALIZER } +# define glthread_rwlock_init(LOCK) \ + (pthread_in_use () ? glthread_rwlock_init_multithreaded (LOCK) : 0) +# define glthread_rwlock_rdlock(LOCK) \ + (pthread_in_use () ? glthread_rwlock_rdlock_multithreaded (LOCK) : 0) +# define glthread_rwlock_wrlock(LOCK) \ + (pthread_in_use () ? glthread_rwlock_wrlock_multithreaded (LOCK) : 0) +# define glthread_rwlock_unlock(LOCK) \ + (pthread_in_use () ? glthread_rwlock_unlock_multithreaded (LOCK) : 0) +# define glthread_rwlock_destroy(LOCK) \ + (pthread_in_use () ? glthread_rwlock_destroy_multithreaded (LOCK) : 0) +extern int glthread_rwlock_init_multithreaded (gl_rwlock_t *lock); +extern int glthread_rwlock_rdlock_multithreaded (gl_rwlock_t *lock); +extern int glthread_rwlock_wrlock_multithreaded (gl_rwlock_t *lock); +extern int glthread_rwlock_unlock_multithreaded (gl_rwlock_t *lock); +extern int glthread_rwlock_destroy_multithreaded (gl_rwlock_t *lock); + +# endif + +# else + +typedef struct + { + pthread_mutex_t lock; /* protects the remaining fields */ + pthread_cond_t waiting_readers; /* waiting readers */ + pthread_cond_t waiting_writers; /* waiting writers */ + unsigned int waiting_writers_count; /* number of waiting writers */ + int runcount; /* number of readers running, or -1 when a writer runs */ + } + gl_rwlock_t; +# define gl_rwlock_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_rwlock_t NAME; +# define gl_rwlock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS gl_rwlock_t NAME = gl_rwlock_initializer; +# define gl_rwlock_initializer \ + { PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER, PTHREAD_COND_INITIALIZER, 0, 0 } +# define glthread_rwlock_init(LOCK) \ + (pthread_in_use () ? glthread_rwlock_init_multithreaded (LOCK) : 0) +# define glthread_rwlock_rdlock(LOCK) \ + (pthread_in_use () ? glthread_rwlock_rdlock_multithreaded (LOCK) : 0) +# define glthread_rwlock_wrlock(LOCK) \ + (pthread_in_use () ? glthread_rwlock_wrlock_multithreaded (LOCK) : 0) +# define glthread_rwlock_unlock(LOCK) \ + (pthread_in_use () ? glthread_rwlock_unlock_multithreaded (LOCK) : 0) +# define glthread_rwlock_destroy(LOCK) \ + (pthread_in_use () ? glthread_rwlock_destroy_multithreaded (LOCK) : 0) +extern int glthread_rwlock_init_multithreaded (gl_rwlock_t *lock); +extern int glthread_rwlock_rdlock_multithreaded (gl_rwlock_t *lock); +extern int glthread_rwlock_wrlock_multithreaded (gl_rwlock_t *lock); +extern int glthread_rwlock_unlock_multithreaded (gl_rwlock_t *lock); +extern int glthread_rwlock_destroy_multithreaded (gl_rwlock_t *lock); + +# endif + +/* --------------------- gl_recursive_lock_t datatype --------------------- */ + +# if HAVE_PTHREAD_MUTEX_RECURSIVE + +# if defined PTHREAD_RECURSIVE_MUTEX_INITIALIZER || defined PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP + +typedef pthread_mutex_t gl_recursive_lock_t; +# define gl_recursive_lock_define(STORAGECLASS, NAME) \ + STORAGECLASS pthread_mutex_t NAME; +# define gl_recursive_lock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS pthread_mutex_t NAME = gl_recursive_lock_initializer; +# ifdef PTHREAD_RECURSIVE_MUTEX_INITIALIZER +# define gl_recursive_lock_initializer \ + PTHREAD_RECURSIVE_MUTEX_INITIALIZER +# else +# define gl_recursive_lock_initializer \ + PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP +# endif +# define glthread_recursive_lock_init(LOCK) \ + (pthread_in_use () ? glthread_recursive_lock_init_multithreaded (LOCK) : 0) +# define glthread_recursive_lock_lock(LOCK) \ + (pthread_in_use () ? pthread_mutex_lock (LOCK) : 0) +# define glthread_recursive_lock_unlock(LOCK) \ + (pthread_in_use () ? pthread_mutex_unlock (LOCK) : 0) +# define glthread_recursive_lock_destroy(LOCK) \ + (pthread_in_use () ? pthread_mutex_destroy (LOCK) : 0) +extern int glthread_recursive_lock_init_multithreaded (gl_recursive_lock_t *lock); + +# else + +typedef struct + { + pthread_mutex_t recmutex; /* recursive mutex */ + pthread_mutex_t guard; /* protects the initialization */ + int initialized; + } + gl_recursive_lock_t; +# define gl_recursive_lock_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_recursive_lock_t NAME; +# define gl_recursive_lock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS gl_recursive_lock_t NAME = gl_recursive_lock_initializer; +# define gl_recursive_lock_initializer \ + { PTHREAD_MUTEX_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, 0 } +# define glthread_recursive_lock_init(LOCK) \ + (pthread_in_use () ? glthread_recursive_lock_init_multithreaded (LOCK) : 0) +# define glthread_recursive_lock_lock(LOCK) \ + (pthread_in_use () ? glthread_recursive_lock_lock_multithreaded (LOCK) : 0) +# define glthread_recursive_lock_unlock(LOCK) \ + (pthread_in_use () ? glthread_recursive_lock_unlock_multithreaded (LOCK) : 0) +# define glthread_recursive_lock_destroy(LOCK) \ + (pthread_in_use () ? glthread_recursive_lock_destroy_multithreaded (LOCK) : 0) +extern int glthread_recursive_lock_init_multithreaded (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_lock_multithreaded (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_unlock_multithreaded (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_destroy_multithreaded (gl_recursive_lock_t *lock); + +# endif + +# else + +/* Old versions of POSIX threads on Solaris did not have recursive locks. + We have to implement them ourselves. */ + +typedef struct + { + pthread_mutex_t mutex; + pthread_t owner; + unsigned long depth; + } + gl_recursive_lock_t; +# define gl_recursive_lock_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_recursive_lock_t NAME; +# define gl_recursive_lock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS gl_recursive_lock_t NAME = gl_recursive_lock_initializer; +# define gl_recursive_lock_initializer \ + { PTHREAD_MUTEX_INITIALIZER, (pthread_t) 0, 0 } +# define glthread_recursive_lock_init(LOCK) \ + (pthread_in_use () ? glthread_recursive_lock_init_multithreaded (LOCK) : 0) +# define glthread_recursive_lock_lock(LOCK) \ + (pthread_in_use () ? glthread_recursive_lock_lock_multithreaded (LOCK) : 0) +# define glthread_recursive_lock_unlock(LOCK) \ + (pthread_in_use () ? glthread_recursive_lock_unlock_multithreaded (LOCK) : 0) +# define glthread_recursive_lock_destroy(LOCK) \ + (pthread_in_use () ? glthread_recursive_lock_destroy_multithreaded (LOCK) : 0) +extern int glthread_recursive_lock_init_multithreaded (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_lock_multithreaded (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_unlock_multithreaded (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_destroy_multithreaded (gl_recursive_lock_t *lock); + +# endif + +/* -------------------------- gl_once_t datatype -------------------------- */ + +typedef pthread_once_t gl_once_t; +# define gl_once_define(STORAGECLASS, NAME) \ + STORAGECLASS pthread_once_t NAME = PTHREAD_ONCE_INIT; +# define glthread_once(ONCE_CONTROL, INITFUNCTION) \ + (pthread_in_use () \ + ? pthread_once (ONCE_CONTROL, INITFUNCTION) \ + : (glthread_once_singlethreaded (ONCE_CONTROL) ? (INITFUNCTION (), 0) : 0)) +extern int glthread_once_singlethreaded (pthread_once_t *once_control); + +# ifdef __cplusplus +} +# endif + +#endif + +/* ========================================================================= */ + +#if USE_PTH_THREADS + +/* Use the GNU Pth threads library. */ + +# include + +# ifdef __cplusplus +extern "C" { +# endif + +# if USE_PTH_THREADS_WEAK + +/* Use weak references to the GNU Pth threads library. */ + +# pragma weak pth_mutex_init +# pragma weak pth_mutex_acquire +# pragma weak pth_mutex_release +# pragma weak pth_rwlock_init +# pragma weak pth_rwlock_acquire +# pragma weak pth_rwlock_release +# pragma weak pth_once + +# pragma weak pth_cancel +# define pth_in_use() (pth_cancel != NULL) + +# else + +# define pth_in_use() 1 + +# endif + +/* -------------------------- gl_lock_t datatype -------------------------- */ + +typedef pth_mutex_t gl_lock_t; +# define gl_lock_define(STORAGECLASS, NAME) \ + STORAGECLASS pth_mutex_t NAME; +# define gl_lock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS pth_mutex_t NAME = gl_lock_initializer; +# define gl_lock_initializer \ + PTH_MUTEX_INIT +# define glthread_lock_init(LOCK) \ + (pth_in_use () && !pth_mutex_init (LOCK) ? errno : 0) +# define glthread_lock_lock(LOCK) \ + (pth_in_use () && !pth_mutex_acquire (LOCK, 0, NULL) ? errno : 0) +# define glthread_lock_unlock(LOCK) \ + (pth_in_use () && !pth_mutex_release (LOCK) ? errno : 0) +# define glthread_lock_destroy(LOCK) \ + ((void)(LOCK), 0) + +/* ------------------------- gl_rwlock_t datatype ------------------------- */ + +typedef pth_rwlock_t gl_rwlock_t; +# define gl_rwlock_define(STORAGECLASS, NAME) \ + STORAGECLASS pth_rwlock_t NAME; +# define gl_rwlock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS pth_rwlock_t NAME = gl_rwlock_initializer; +# define gl_rwlock_initializer \ + PTH_RWLOCK_INIT +# define glthread_rwlock_init(LOCK) \ + (pth_in_use () && !pth_rwlock_init (LOCK) ? errno : 0) +# define glthread_rwlock_rdlock(LOCK) \ + (pth_in_use () && !pth_rwlock_acquire (LOCK, PTH_RWLOCK_RD, 0, NULL) ? errno : 0) +# define glthread_rwlock_wrlock(LOCK) \ + (pth_in_use () && !pth_rwlock_acquire (LOCK, PTH_RWLOCK_RW, 0, NULL) ? errno : 0) +# define glthread_rwlock_unlock(LOCK) \ + (pth_in_use () && !pth_rwlock_release (LOCK) ? errno : 0) +# define glthread_rwlock_destroy(LOCK) \ + ((void)(LOCK), 0) + +/* --------------------- gl_recursive_lock_t datatype --------------------- */ + +/* In Pth, mutexes are recursive by default. */ +typedef pth_mutex_t gl_recursive_lock_t; +# define gl_recursive_lock_define(STORAGECLASS, NAME) \ + STORAGECLASS pth_mutex_t NAME; +# define gl_recursive_lock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS pth_mutex_t NAME = gl_recursive_lock_initializer; +# define gl_recursive_lock_initializer \ + PTH_MUTEX_INIT +# define glthread_recursive_lock_init(LOCK) \ + (pth_in_use () && !pth_mutex_init (LOCK) ? errno : 0) +# define glthread_recursive_lock_lock(LOCK) \ + (pth_in_use () && !pth_mutex_acquire (LOCK, 0, NULL) ? errno : 0) +# define glthread_recursive_lock_unlock(LOCK) \ + (pth_in_use () && !pth_mutex_release (LOCK) ? errno : 0) +# define glthread_recursive_lock_destroy(LOCK) \ + ((void)(LOCK), 0) + +/* -------------------------- gl_once_t datatype -------------------------- */ + +typedef pth_once_t gl_once_t; +# define gl_once_define(STORAGECLASS, NAME) \ + STORAGECLASS pth_once_t NAME = PTH_ONCE_INIT; +# define glthread_once(ONCE_CONTROL, INITFUNCTION) \ + (pth_in_use () \ + ? glthread_once_multithreaded (ONCE_CONTROL, INITFUNCTION) \ + : (glthread_once_singlethreaded (ONCE_CONTROL) ? (INITFUNCTION (), 0) : 0)) +extern int glthread_once_multithreaded (pth_once_t *once_control, void (*initfunction) (void)); +extern int glthread_once_singlethreaded (pth_once_t *once_control); + +# ifdef __cplusplus +} +# endif + +#endif + +/* ========================================================================= */ + +#if USE_SOLARIS_THREADS + +/* Use the old Solaris threads library. */ + +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +# if USE_SOLARIS_THREADS_WEAK + +/* Use weak references to the old Solaris threads library. */ + +# pragma weak mutex_init +# pragma weak mutex_lock +# pragma weak mutex_unlock +# pragma weak mutex_destroy +# pragma weak rwlock_init +# pragma weak rw_rdlock +# pragma weak rw_wrlock +# pragma weak rw_unlock +# pragma weak rwlock_destroy +# pragma weak thr_self + +# pragma weak thr_suspend +# define thread_in_use() (thr_suspend != NULL) + +# else + +# define thread_in_use() 1 + +# endif + +/* -------------------------- gl_lock_t datatype -------------------------- */ + +typedef mutex_t gl_lock_t; +# define gl_lock_define(STORAGECLASS, NAME) \ + STORAGECLASS mutex_t NAME; +# define gl_lock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS mutex_t NAME = gl_lock_initializer; +# define gl_lock_initializer \ + DEFAULTMUTEX +# define glthread_lock_init(LOCK) \ + (thread_in_use () ? mutex_init (LOCK, USYNC_THREAD, NULL) : 0) +# define glthread_lock_lock(LOCK) \ + (thread_in_use () ? mutex_lock (LOCK) : 0) +# define glthread_lock_unlock(LOCK) \ + (thread_in_use () ? mutex_unlock (LOCK) : 0) +# define glthread_lock_destroy(LOCK) \ + (thread_in_use () ? mutex_destroy (LOCK) : 0) + +/* ------------------------- gl_rwlock_t datatype ------------------------- */ + +typedef rwlock_t gl_rwlock_t; +# define gl_rwlock_define(STORAGECLASS, NAME) \ + STORAGECLASS rwlock_t NAME; +# define gl_rwlock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS rwlock_t NAME = gl_rwlock_initializer; +# define gl_rwlock_initializer \ + DEFAULTRWLOCK +# define glthread_rwlock_init(LOCK) \ + (thread_in_use () ? rwlock_init (LOCK, USYNC_THREAD, NULL) : 0) +# define glthread_rwlock_rdlock(LOCK) \ + (thread_in_use () ? rw_rdlock (LOCK) : 0) +# define glthread_rwlock_wrlock(LOCK) \ + (thread_in_use () ? rw_wrlock (LOCK) : 0) +# define glthread_rwlock_unlock(LOCK) \ + (thread_in_use () ? rw_unlock (LOCK) : 0) +# define glthread_rwlock_destroy(LOCK) \ + (thread_in_use () ? rwlock_destroy (LOCK) : 0) + +/* --------------------- gl_recursive_lock_t datatype --------------------- */ + +/* Old Solaris threads did not have recursive locks. + We have to implement them ourselves. */ + +typedef struct + { + mutex_t mutex; + thread_t owner; + unsigned long depth; + } + gl_recursive_lock_t; +# define gl_recursive_lock_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_recursive_lock_t NAME; +# define gl_recursive_lock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS gl_recursive_lock_t NAME = gl_recursive_lock_initializer; +# define gl_recursive_lock_initializer \ + { DEFAULTMUTEX, (thread_t) 0, 0 } +# define glthread_recursive_lock_init(LOCK) \ + (thread_in_use () ? glthread_recursive_lock_init_multithreaded (LOCK) : 0) +# define glthread_recursive_lock_lock(LOCK) \ + (thread_in_use () ? glthread_recursive_lock_lock_multithreaded (LOCK) : 0) +# define glthread_recursive_lock_unlock(LOCK) \ + (thread_in_use () ? glthread_recursive_lock_unlock_multithreaded (LOCK) : 0) +# define glthread_recursive_lock_destroy(LOCK) \ + (thread_in_use () ? glthread_recursive_lock_destroy_multithreaded (LOCK) : 0) +extern int glthread_recursive_lock_init_multithreaded (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_lock_multithreaded (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_unlock_multithreaded (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_destroy_multithreaded (gl_recursive_lock_t *lock); + +/* -------------------------- gl_once_t datatype -------------------------- */ + +typedef struct + { + volatile int inited; + mutex_t mutex; + } + gl_once_t; +# define gl_once_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_once_t NAME = { 0, DEFAULTMUTEX }; +# define glthread_once(ONCE_CONTROL, INITFUNCTION) \ + (thread_in_use () \ + ? glthread_once_multithreaded (ONCE_CONTROL, INITFUNCTION) \ + : (glthread_once_singlethreaded (ONCE_CONTROL) ? (INITFUNCTION (), 0) : 0)) +extern int glthread_once_multithreaded (gl_once_t *once_control, void (*initfunction) (void)); +extern int glthread_once_singlethreaded (gl_once_t *once_control); + +# ifdef __cplusplus +} +# endif + +#endif + +/* ========================================================================= */ + +#if USE_WINDOWS_THREADS + +# define WIN32_LEAN_AND_MEAN /* avoid including junk */ +# include + +# ifdef __cplusplus +extern "C" { +# endif + +/* We can use CRITICAL_SECTION directly, rather than the native Windows Event, + Mutex, Semaphore types, because + - we need only to synchronize inside a single process (address space), + not inter-process locking, + - we don't need to support trylock operations. (TryEnterCriticalSection + does not work on Windows 95/98/ME. Packages that need trylock usually + define their own mutex type.) */ + +/* There is no way to statically initialize a CRITICAL_SECTION. It needs + to be done lazily, once only. For this we need spinlocks. */ + +typedef struct { volatile int done; volatile long started; } gl_spinlock_t; + +/* -------------------------- gl_lock_t datatype -------------------------- */ + +typedef struct + { + gl_spinlock_t guard; /* protects the initialization */ + CRITICAL_SECTION lock; + } + gl_lock_t; +# define gl_lock_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_lock_t NAME; +# define gl_lock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS gl_lock_t NAME = gl_lock_initializer; +# define gl_lock_initializer \ + { { 0, -1 } } +# define glthread_lock_init(LOCK) \ + (glthread_lock_init_func (LOCK), 0) +# define glthread_lock_lock(LOCK) \ + glthread_lock_lock_func (LOCK) +# define glthread_lock_unlock(LOCK) \ + glthread_lock_unlock_func (LOCK) +# define glthread_lock_destroy(LOCK) \ + glthread_lock_destroy_func (LOCK) +extern void glthread_lock_init_func (gl_lock_t *lock); +extern int glthread_lock_lock_func (gl_lock_t *lock); +extern int glthread_lock_unlock_func (gl_lock_t *lock); +extern int glthread_lock_destroy_func (gl_lock_t *lock); + +/* ------------------------- gl_rwlock_t datatype ------------------------- */ + +/* It is impossible to implement read-write locks using plain locks, without + introducing an extra thread dedicated to managing read-write locks. + Therefore here we need to use the low-level Event type. */ + +typedef struct + { + HANDLE *array; /* array of waiting threads, each represented by an event */ + unsigned int count; /* number of waiting threads */ + unsigned int alloc; /* length of allocated array */ + unsigned int offset; /* index of first waiting thread in array */ + } + gl_carray_waitqueue_t; +typedef struct + { + gl_spinlock_t guard; /* protects the initialization */ + CRITICAL_SECTION lock; /* protects the remaining fields */ + gl_carray_waitqueue_t waiting_readers; /* waiting readers */ + gl_carray_waitqueue_t waiting_writers; /* waiting writers */ + int runcount; /* number of readers running, or -1 when a writer runs */ + } + gl_rwlock_t; +# define gl_rwlock_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_rwlock_t NAME; +# define gl_rwlock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS gl_rwlock_t NAME = gl_rwlock_initializer; +# define gl_rwlock_initializer \ + { { 0, -1 } } +# define glthread_rwlock_init(LOCK) \ + (glthread_rwlock_init_func (LOCK), 0) +# define glthread_rwlock_rdlock(LOCK) \ + glthread_rwlock_rdlock_func (LOCK) +# define glthread_rwlock_wrlock(LOCK) \ + glthread_rwlock_wrlock_func (LOCK) +# define glthread_rwlock_unlock(LOCK) \ + glthread_rwlock_unlock_func (LOCK) +# define glthread_rwlock_destroy(LOCK) \ + glthread_rwlock_destroy_func (LOCK) +extern void glthread_rwlock_init_func (gl_rwlock_t *lock); +extern int glthread_rwlock_rdlock_func (gl_rwlock_t *lock); +extern int glthread_rwlock_wrlock_func (gl_rwlock_t *lock); +extern int glthread_rwlock_unlock_func (gl_rwlock_t *lock); +extern int glthread_rwlock_destroy_func (gl_rwlock_t *lock); + +/* --------------------- gl_recursive_lock_t datatype --------------------- */ + +/* The native Windows documentation says that CRITICAL_SECTION already + implements a recursive lock. But we need not rely on it: It's easy to + implement a recursive lock without this assumption. */ + +typedef struct + { + gl_spinlock_t guard; /* protects the initialization */ + DWORD owner; + unsigned long depth; + CRITICAL_SECTION lock; + } + gl_recursive_lock_t; +# define gl_recursive_lock_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_recursive_lock_t NAME; +# define gl_recursive_lock_define_initialized(STORAGECLASS, NAME) \ + STORAGECLASS gl_recursive_lock_t NAME = gl_recursive_lock_initializer; +# define gl_recursive_lock_initializer \ + { { 0, -1 }, 0, 0 } +# define glthread_recursive_lock_init(LOCK) \ + (glthread_recursive_lock_init_func (LOCK), 0) +# define glthread_recursive_lock_lock(LOCK) \ + glthread_recursive_lock_lock_func (LOCK) +# define glthread_recursive_lock_unlock(LOCK) \ + glthread_recursive_lock_unlock_func (LOCK) +# define glthread_recursive_lock_destroy(LOCK) \ + glthread_recursive_lock_destroy_func (LOCK) +extern void glthread_recursive_lock_init_func (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_lock_func (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_unlock_func (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_destroy_func (gl_recursive_lock_t *lock); + +/* -------------------------- gl_once_t datatype -------------------------- */ + +typedef struct + { + volatile int inited; + volatile long started; + CRITICAL_SECTION lock; + } + gl_once_t; +# define gl_once_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_once_t NAME = { -1, -1 }; +# define glthread_once(ONCE_CONTROL, INITFUNCTION) \ + (glthread_once_func (ONCE_CONTROL, INITFUNCTION), 0) +extern void glthread_once_func (gl_once_t *once_control, void (*initfunction) (void)); + +# ifdef __cplusplus +} +# endif + +#endif + +/* ========================================================================= */ + +#if !(USE_POSIX_THREADS || USE_PTH_THREADS || USE_SOLARIS_THREADS || USE_WINDOWS_THREADS) + +/* Provide dummy implementation if threads are not supported. */ + +/* -------------------------- gl_lock_t datatype -------------------------- */ + +typedef int gl_lock_t; +# define gl_lock_define(STORAGECLASS, NAME) +# define gl_lock_define_initialized(STORAGECLASS, NAME) +# define glthread_lock_init(NAME) 0 +# define glthread_lock_lock(NAME) 0 +# define glthread_lock_unlock(NAME) 0 +# define glthread_lock_destroy(NAME) 0 + +/* ------------------------- gl_rwlock_t datatype ------------------------- */ + +typedef int gl_rwlock_t; +# define gl_rwlock_define(STORAGECLASS, NAME) +# define gl_rwlock_define_initialized(STORAGECLASS, NAME) +# define glthread_rwlock_init(NAME) 0 +# define glthread_rwlock_rdlock(NAME) 0 +# define glthread_rwlock_wrlock(NAME) 0 +# define glthread_rwlock_unlock(NAME) 0 +# define glthread_rwlock_destroy(NAME) 0 + +/* --------------------- gl_recursive_lock_t datatype --------------------- */ + +typedef int gl_recursive_lock_t; +# define gl_recursive_lock_define(STORAGECLASS, NAME) +# define gl_recursive_lock_define_initialized(STORAGECLASS, NAME) +# define glthread_recursive_lock_init(NAME) 0 +# define glthread_recursive_lock_lock(NAME) 0 +# define glthread_recursive_lock_unlock(NAME) 0 +# define glthread_recursive_lock_destroy(NAME) 0 + +/* -------------------------- gl_once_t datatype -------------------------- */ + +typedef int gl_once_t; +# define gl_once_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_once_t NAME = 0; +# define glthread_once(ONCE_CONTROL, INITFUNCTION) \ + (*(ONCE_CONTROL) == 0 ? (*(ONCE_CONTROL) = ~ 0, INITFUNCTION (), 0) : 0) + +#endif + +/* ========================================================================= */ + +/* Macros with built-in error handling. */ + +/* -------------------------- gl_lock_t datatype -------------------------- */ + +#define gl_lock_init(NAME) \ + do \ + { \ + if (glthread_lock_init (&NAME)) \ + abort (); \ + } \ + while (0) +#define gl_lock_lock(NAME) \ + do \ + { \ + if (glthread_lock_lock (&NAME)) \ + abort (); \ + } \ + while (0) +#define gl_lock_unlock(NAME) \ + do \ + { \ + if (glthread_lock_unlock (&NAME)) \ + abort (); \ + } \ + while (0) +#define gl_lock_destroy(NAME) \ + do \ + { \ + if (glthread_lock_destroy (&NAME)) \ + abort (); \ + } \ + while (0) + +/* ------------------------- gl_rwlock_t datatype ------------------------- */ + +#define gl_rwlock_init(NAME) \ + do \ + { \ + if (glthread_rwlock_init (&NAME)) \ + abort (); \ + } \ + while (0) +#define gl_rwlock_rdlock(NAME) \ + do \ + { \ + if (glthread_rwlock_rdlock (&NAME)) \ + abort (); \ + } \ + while (0) +#define gl_rwlock_wrlock(NAME) \ + do \ + { \ + if (glthread_rwlock_wrlock (&NAME)) \ + abort (); \ + } \ + while (0) +#define gl_rwlock_unlock(NAME) \ + do \ + { \ + if (glthread_rwlock_unlock (&NAME)) \ + abort (); \ + } \ + while (0) +#define gl_rwlock_destroy(NAME) \ + do \ + { \ + if (glthread_rwlock_destroy (&NAME)) \ + abort (); \ + } \ + while (0) + +/* --------------------- gl_recursive_lock_t datatype --------------------- */ + +#define gl_recursive_lock_init(NAME) \ + do \ + { \ + if (glthread_recursive_lock_init (&NAME)) \ + abort (); \ + } \ + while (0) +#define gl_recursive_lock_lock(NAME) \ + do \ + { \ + if (glthread_recursive_lock_lock (&NAME)) \ + abort (); \ + } \ + while (0) +#define gl_recursive_lock_unlock(NAME) \ + do \ + { \ + if (glthread_recursive_lock_unlock (&NAME)) \ + abort (); \ + } \ + while (0) +#define gl_recursive_lock_destroy(NAME) \ + do \ + { \ + if (glthread_recursive_lock_destroy (&NAME)) \ + abort (); \ + } \ + while (0) + +/* -------------------------- gl_once_t datatype -------------------------- */ + +#define gl_once(NAME, INITFUNCTION) \ + do \ + { \ + if (glthread_once (&NAME, INITFUNCTION)) \ + abort (); \ + } \ + while (0) + +/* ========================================================================= */ + +#endif /* _LOCK_H */ diff --git a/contrib/grep/lib/glthread/threadlib.c b/contrib/grep/lib/glthread/threadlib.c new file mode 100644 index 0000000000..26870de241 --- /dev/null +++ b/contrib/grep/lib/glthread/threadlib.c @@ -0,0 +1,73 @@ +/* Multithreading primitives. + Copyright (C) 2005-2014 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +/* Written by Bruno Haible , 2005. */ + +#include + +/* ========================================================================= */ + +#if USE_POSIX_THREADS + +/* Use the POSIX threads library. */ + +# include +# include + +# if PTHREAD_IN_USE_DETECTION_HARD + +/* The function to be executed by a dummy thread. */ +static void * +dummy_thread_func (void *arg) +{ + return arg; +} + +int +glthread_in_use (void) +{ + static int tested; + static int result; /* 1: linked with -lpthread, 0: only with libc */ + + if (!tested) + { + pthread_t thread; + + if (pthread_create (&thread, NULL, dummy_thread_func, NULL) != 0) + /* Thread creation failed. */ + result = 0; + else + { + /* Thread creation works. */ + void *retval; + if (pthread_join (thread, &retval) != 0) + abort (); + result = 1; + } + tested = 1; + } + return result; +} + +# endif + +#endif + +/* ========================================================================= */ + +/* This declaration is solely to ensure that after preprocessing + this file is never empty. */ +typedef int dummy; diff --git a/contrib/grep/lib/gnulib.mk b/contrib/grep/lib/gnulib.mk index c3383f042b..5e307ed0cd 100644 --- a/contrib/grep/lib/gnulib.mk +++ b/contrib/grep/lib/gnulib.mk @@ -1,6 +1,6 @@ ## DO NOT EDIT! GENERATED AUTOMATICALLY! ## Process this file with automake to produce Makefile.in. -# Copyright (C) 2002-2012 Free Software Foundation, Inc. +# Copyright (C) 2002-2014 Free Software Foundation, Inc. # # This file is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ # the same distribution terms as the rest of that program. # # Generated by gnulib-tool. -# Reproduce by: gnulib-tool --import --dir=. --local-dir=gl --lib=libgreputils --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=gnulib-tests --aux-dir=build-aux --with-tests --avoid=lock-tests --makefile-name=gnulib.mk --no-conditional-dependencies --no-libtool --macro-prefix=gl alloca announce-gen argmatch binary-io btowc c-ctype closeout do-release-commit-and-tag error exclude fcntl-h fnmatch fstatat fts getopt-gnu getpagesize gettext-h git-version-gen gitlog-to-changelog gnu-web-doc-update gnupload ignore-value intprops inttypes isatty isblank iswctype largefile locale lseek maintainer-makefile malloc-gnu manywarnings mbrlen mbrtowc memchr mempcpy minmax obstack openat-safer perl progname propername quote readme-release realloc-gnu regex same-inode ssize_t stddef stdlib stpcpy strerror string strtoull strtoumax sys_stat unistd unlocked-io update-copyright useless-if-before-free version-etc-fsf wchar wcrtomb wcscoll wctob wctype-h xalloc xstrtoimax +# Reproduce by: gnulib-tool --import --dir=. --local-dir=gl --lib=libgreputils --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=gnulib-tests --aux-dir=build-aux --with-tests --avoid=lock-tests --makefile-name=gnulib.mk --no-conditional-dependencies --no-libtool --macro-prefix=gl alloca announce-gen argmatch binary-io btowc c-ctype closeout do-release-commit-and-tag error exclude fcntl-h fnmatch fstatat fts getopt-gnu getpagesize gettext-h git-version-gen gitlog-to-changelog gnu-web-doc-update gnupload ignore-value intprops inttypes isatty isblank iswctype largefile locale lseek maintainer-makefile malloc-gnu manywarnings mbrlen mbrtowc memchr memchr2 mempcpy minmax obstack openat-safer perl progname propername quote readme-release realloc-gnu regex safe-read same-inode ssize_t stddef stdlib stpcpy strerror string strtoull strtoumax sys_stat unistd unlocked-io update-copyright useless-if-before-free version-etc-fsf wchar wcrtomb wctob wctype-h xalloc xstrtoimax MOSTLYCLEANFILES += core *.stackdump @@ -33,6 +33,15 @@ libgreputils_a_LIBADD = $(gl_LIBOBJS) libgreputils_a_DEPENDENCIES = $(gl_LIBOBJS) EXTRA_libgreputils_a_SOURCES = +## begin gnulib module absolute-header + +# Use this preprocessor expression to decide whether #include_next works. +# Do not rely on a 'configure'-time test for this, since the expression +# might appear in an installed header, which is used by some other compiler. +HAVE_INCLUDE_NEXT = (__GNUC__ || 60000000 <= __DECC_VER) + +## end gnulib module absolute-header + ## begin gnulib module alloca @@ -93,13 +102,13 @@ EXTRA_libgreputils_a_SOURCES += openat-proc.c ## begin gnulib module binary-io -libgreputils_a_SOURCES += binary-io.h +libgreputils_a_SOURCES += binary-io.h binary-io.c ## end gnulib module binary-io ## begin gnulib module bitrotate -libgreputils_a_SOURCES += bitrotate.h +libgreputils_a_SOURCES += bitrotate.h bitrotate.c ## end gnulib module bitrotate @@ -185,7 +194,7 @@ EXTRA_DIST += closeout.h ## begin gnulib module configmake # Listed in the same order as the GNU makefile conventions, and -# provided by autoconf 2.59c+. +# provided by autoconf 2.59c+ or 2.70. # The Automake-defined pkg* macros are appended, in the order # listed in the Automake 1.10a+ documentation. configmake.h: Makefile @@ -201,6 +210,7 @@ configmake.h: Makefile echo '#define SYSCONFDIR "$(sysconfdir)"'; \ echo '#define SHAREDSTATEDIR "$(sharedstatedir)"'; \ echo '#define LOCALSTATEDIR "$(localstatedir)"'; \ + echo '#define RUNSTATEDIR "$(runstatedir)"'; \ echo '#define INCLUDEDIR "$(includedir)"'; \ echo '#define OLDINCLUDEDIR "$(oldincludedir)"'; \ echo '#define DOCDIR "$(docdir)"'; \ @@ -572,7 +582,7 @@ EXTRA_libgreputils_a_SOURCES += fstat.c ## begin gnulib module fstatat -EXTRA_DIST += at-func.c fstatat.c openat-priv.h +EXTRA_DIST += at-func.c fstatat.c EXTRA_libgreputils_a_SOURCES += at-func.c fstatat.c @@ -654,6 +664,15 @@ libgreputils_a_SOURCES += gettext.h ## end gnulib module gettext-h +## begin gnulib module gettimeofday + + +EXTRA_DIST += gettimeofday.c + +EXTRA_libgreputils_a_SOURCES += gettimeofday.c + +## end gnulib module gettimeofday + ## begin gnulib module git-version-gen @@ -827,6 +846,7 @@ inttypes.h: inttypes.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(WARN_ON_U -e 's/@''HAVE_DECL_STRTOIMAX''@/$(HAVE_DECL_STRTOIMAX)/g' \ -e 's/@''HAVE_DECL_STRTOUMAX''@/$(HAVE_DECL_STRTOUMAX)/g' \ -e 's/@''REPLACE_STRTOIMAX''@/$(REPLACE_STRTOIMAX)/g' \ + -e 's/@''REPLACE_STRTOUMAX''@/$(REPLACE_STRTOUMAX)/g' \ -e 's/@''INT32_MAX_LT_INTMAX_MAX''@/$(INT32_MAX_LT_INTMAX_MAX)/g' \ -e 's/@''INT64_MAX_EQ_LONG_MAX''@/$(INT64_MAX_EQ_LONG_MAX)/g' \ -e 's/@''UINT32_MAX_LT_UINTMAX_MAX''@/$(UINT32_MAX_LT_UINTMAX_MAX)/g' \ @@ -1030,6 +1050,12 @@ EXTRA_libgreputils_a_SOURCES += localeconv.c ## end gnulib module localeconv +## begin gnulib module lock + +libgreputils_a_SOURCES += glthread/lock.h glthread/lock.c + +## end gnulib module lock + ## begin gnulib module lseek @@ -1090,7 +1116,7 @@ EXTRA_DIST += mbchar.h ## begin gnulib module mbiter -libgreputils_a_SOURCES += mbiter.h +libgreputils_a_SOURCES += mbiter.h mbiter.c ## end gnulib module mbiter @@ -1161,7 +1187,7 @@ EXTRA_libgreputils_a_SOURCES += mbtowc.c ## begin gnulib module mbuiter -libgreputils_a_SOURCES += mbuiter.h +libgreputils_a_SOURCES += mbuiter.h mbuiter.c ## end gnulib module mbuiter @@ -1174,6 +1200,14 @@ EXTRA_libgreputils_a_SOURCES += memchr.c ## end gnulib module memchr +## begin gnulib module memchr2 + +libgreputils_a_SOURCES += memchr2.h memchr2.c + +EXTRA_DIST += memchr2.valgrind + +## end gnulib module memchr2 + ## begin gnulib module mempcpy @@ -1246,7 +1280,7 @@ EXTRA_libgreputils_a_SOURCES += open.c ## begin gnulib module openat -EXTRA_DIST += openat-priv.h openat.c +EXTRA_DIST += openat.c EXTRA_libgreputils_a_SOURCES += openat.c @@ -1316,6 +1350,15 @@ EXTRA_DIST += quote.h quotearg.h ## end gnulib module quotearg +## begin gnulib module read + + +EXTRA_DIST += read.c + +EXTRA_libgreputils_a_SOURCES += read.c + +## end gnulib module read + ## begin gnulib module readdir @@ -1359,6 +1402,14 @@ EXTRA_libgreputils_a_SOURCES += regcomp.c regex.c regex_internal.c regexec.c ## end gnulib module regex +## begin gnulib module safe-read + +libgreputils_a_SOURCES += safe-read.c + +EXTRA_DIST += safe-read.h + +## end gnulib module safe-read + ## begin gnulib module same-inode @@ -1620,6 +1671,134 @@ EXTRA_DIST += stdint.in.h ## end gnulib module stdint +## begin gnulib module stdio + +BUILT_SOURCES += stdio.h + +# We need the following in order to create when the system +# doesn't have one that works with the given compiler. +stdio.h: stdio.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) + $(AM_V_GEN)rm -f $@-t $@ && \ + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ + sed -e 's|@''GUARD_PREFIX''@|GL|g' \ + -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ + -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ + -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ + -e 's|@''NEXT_STDIO_H''@|$(NEXT_STDIO_H)|g' \ + -e 's/@''GNULIB_DPRINTF''@/$(GNULIB_DPRINTF)/g' \ + -e 's/@''GNULIB_FCLOSE''@/$(GNULIB_FCLOSE)/g' \ + -e 's/@''GNULIB_FDOPEN''@/$(GNULIB_FDOPEN)/g' \ + -e 's/@''GNULIB_FFLUSH''@/$(GNULIB_FFLUSH)/g' \ + -e 's/@''GNULIB_FGETC''@/$(GNULIB_FGETC)/g' \ + -e 's/@''GNULIB_FGETS''@/$(GNULIB_FGETS)/g' \ + -e 's/@''GNULIB_FOPEN''@/$(GNULIB_FOPEN)/g' \ + -e 's/@''GNULIB_FPRINTF''@/$(GNULIB_FPRINTF)/g' \ + -e 's/@''GNULIB_FPRINTF_POSIX''@/$(GNULIB_FPRINTF_POSIX)/g' \ + -e 's/@''GNULIB_FPURGE''@/$(GNULIB_FPURGE)/g' \ + -e 's/@''GNULIB_FPUTC''@/$(GNULIB_FPUTC)/g' \ + -e 's/@''GNULIB_FPUTS''@/$(GNULIB_FPUTS)/g' \ + -e 's/@''GNULIB_FREAD''@/$(GNULIB_FREAD)/g' \ + -e 's/@''GNULIB_FREOPEN''@/$(GNULIB_FREOPEN)/g' \ + -e 's/@''GNULIB_FSCANF''@/$(GNULIB_FSCANF)/g' \ + -e 's/@''GNULIB_FSEEK''@/$(GNULIB_FSEEK)/g' \ + -e 's/@''GNULIB_FSEEKO''@/$(GNULIB_FSEEKO)/g' \ + -e 's/@''GNULIB_FTELL''@/$(GNULIB_FTELL)/g' \ + -e 's/@''GNULIB_FTELLO''@/$(GNULIB_FTELLO)/g' \ + -e 's/@''GNULIB_FWRITE''@/$(GNULIB_FWRITE)/g' \ + -e 's/@''GNULIB_GETC''@/$(GNULIB_GETC)/g' \ + -e 's/@''GNULIB_GETCHAR''@/$(GNULIB_GETCHAR)/g' \ + -e 's/@''GNULIB_GETDELIM''@/$(GNULIB_GETDELIM)/g' \ + -e 's/@''GNULIB_GETLINE''@/$(GNULIB_GETLINE)/g' \ + -e 's/@''GNULIB_OBSTACK_PRINTF''@/$(GNULIB_OBSTACK_PRINTF)/g' \ + -e 's/@''GNULIB_OBSTACK_PRINTF_POSIX''@/$(GNULIB_OBSTACK_PRINTF_POSIX)/g' \ + -e 's/@''GNULIB_PCLOSE''@/$(GNULIB_PCLOSE)/g' \ + -e 's/@''GNULIB_PERROR''@/$(GNULIB_PERROR)/g' \ + -e 's/@''GNULIB_POPEN''@/$(GNULIB_POPEN)/g' \ + -e 's/@''GNULIB_PRINTF''@/$(GNULIB_PRINTF)/g' \ + -e 's/@''GNULIB_PRINTF_POSIX''@/$(GNULIB_PRINTF_POSIX)/g' \ + -e 's/@''GNULIB_PUTC''@/$(GNULIB_PUTC)/g' \ + -e 's/@''GNULIB_PUTCHAR''@/$(GNULIB_PUTCHAR)/g' \ + -e 's/@''GNULIB_PUTS''@/$(GNULIB_PUTS)/g' \ + -e 's/@''GNULIB_REMOVE''@/$(GNULIB_REMOVE)/g' \ + -e 's/@''GNULIB_RENAME''@/$(GNULIB_RENAME)/g' \ + -e 's/@''GNULIB_RENAMEAT''@/$(GNULIB_RENAMEAT)/g' \ + -e 's/@''GNULIB_SCANF''@/$(GNULIB_SCANF)/g' \ + -e 's/@''GNULIB_SNPRINTF''@/$(GNULIB_SNPRINTF)/g' \ + -e 's/@''GNULIB_SPRINTF_POSIX''@/$(GNULIB_SPRINTF_POSIX)/g' \ + -e 's/@''GNULIB_STDIO_H_NONBLOCKING''@/$(GNULIB_STDIO_H_NONBLOCKING)/g' \ + -e 's/@''GNULIB_STDIO_H_SIGPIPE''@/$(GNULIB_STDIO_H_SIGPIPE)/g' \ + -e 's/@''GNULIB_TMPFILE''@/$(GNULIB_TMPFILE)/g' \ + -e 's/@''GNULIB_VASPRINTF''@/$(GNULIB_VASPRINTF)/g' \ + -e 's/@''GNULIB_VDPRINTF''@/$(GNULIB_VDPRINTF)/g' \ + -e 's/@''GNULIB_VFPRINTF''@/$(GNULIB_VFPRINTF)/g' \ + -e 's/@''GNULIB_VFPRINTF_POSIX''@/$(GNULIB_VFPRINTF_POSIX)/g' \ + -e 's/@''GNULIB_VFSCANF''@/$(GNULIB_VFSCANF)/g' \ + -e 's/@''GNULIB_VSCANF''@/$(GNULIB_VSCANF)/g' \ + -e 's/@''GNULIB_VPRINTF''@/$(GNULIB_VPRINTF)/g' \ + -e 's/@''GNULIB_VPRINTF_POSIX''@/$(GNULIB_VPRINTF_POSIX)/g' \ + -e 's/@''GNULIB_VSNPRINTF''@/$(GNULIB_VSNPRINTF)/g' \ + -e 's/@''GNULIB_VSPRINTF_POSIX''@/$(GNULIB_VSPRINTF_POSIX)/g' \ + < $(srcdir)/stdio.in.h | \ + sed -e 's|@''HAVE_DECL_FPURGE''@|$(HAVE_DECL_FPURGE)|g' \ + -e 's|@''HAVE_DECL_FSEEKO''@|$(HAVE_DECL_FSEEKO)|g' \ + -e 's|@''HAVE_DECL_FTELLO''@|$(HAVE_DECL_FTELLO)|g' \ + -e 's|@''HAVE_DECL_GETDELIM''@|$(HAVE_DECL_GETDELIM)|g' \ + -e 's|@''HAVE_DECL_GETLINE''@|$(HAVE_DECL_GETLINE)|g' \ + -e 's|@''HAVE_DECL_OBSTACK_PRINTF''@|$(HAVE_DECL_OBSTACK_PRINTF)|g' \ + -e 's|@''HAVE_DECL_SNPRINTF''@|$(HAVE_DECL_SNPRINTF)|g' \ + -e 's|@''HAVE_DECL_VSNPRINTF''@|$(HAVE_DECL_VSNPRINTF)|g' \ + -e 's|@''HAVE_DPRINTF''@|$(HAVE_DPRINTF)|g' \ + -e 's|@''HAVE_FSEEKO''@|$(HAVE_FSEEKO)|g' \ + -e 's|@''HAVE_FTELLO''@|$(HAVE_FTELLO)|g' \ + -e 's|@''HAVE_PCLOSE''@|$(HAVE_PCLOSE)|g' \ + -e 's|@''HAVE_POPEN''@|$(HAVE_POPEN)|g' \ + -e 's|@''HAVE_RENAMEAT''@|$(HAVE_RENAMEAT)|g' \ + -e 's|@''HAVE_VASPRINTF''@|$(HAVE_VASPRINTF)|g' \ + -e 's|@''HAVE_VDPRINTF''@|$(HAVE_VDPRINTF)|g' \ + -e 's|@''REPLACE_DPRINTF''@|$(REPLACE_DPRINTF)|g' \ + -e 's|@''REPLACE_FCLOSE''@|$(REPLACE_FCLOSE)|g' \ + -e 's|@''REPLACE_FDOPEN''@|$(REPLACE_FDOPEN)|g' \ + -e 's|@''REPLACE_FFLUSH''@|$(REPLACE_FFLUSH)|g' \ + -e 's|@''REPLACE_FOPEN''@|$(REPLACE_FOPEN)|g' \ + -e 's|@''REPLACE_FPRINTF''@|$(REPLACE_FPRINTF)|g' \ + -e 's|@''REPLACE_FPURGE''@|$(REPLACE_FPURGE)|g' \ + -e 's|@''REPLACE_FREOPEN''@|$(REPLACE_FREOPEN)|g' \ + -e 's|@''REPLACE_FSEEK''@|$(REPLACE_FSEEK)|g' \ + -e 's|@''REPLACE_FSEEKO''@|$(REPLACE_FSEEKO)|g' \ + -e 's|@''REPLACE_FTELL''@|$(REPLACE_FTELL)|g' \ + -e 's|@''REPLACE_FTELLO''@|$(REPLACE_FTELLO)|g' \ + -e 's|@''REPLACE_GETDELIM''@|$(REPLACE_GETDELIM)|g' \ + -e 's|@''REPLACE_GETLINE''@|$(REPLACE_GETLINE)|g' \ + -e 's|@''REPLACE_OBSTACK_PRINTF''@|$(REPLACE_OBSTACK_PRINTF)|g' \ + -e 's|@''REPLACE_PERROR''@|$(REPLACE_PERROR)|g' \ + -e 's|@''REPLACE_POPEN''@|$(REPLACE_POPEN)|g' \ + -e 's|@''REPLACE_PRINTF''@|$(REPLACE_PRINTF)|g' \ + -e 's|@''REPLACE_REMOVE''@|$(REPLACE_REMOVE)|g' \ + -e 's|@''REPLACE_RENAME''@|$(REPLACE_RENAME)|g' \ + -e 's|@''REPLACE_RENAMEAT''@|$(REPLACE_RENAMEAT)|g' \ + -e 's|@''REPLACE_SNPRINTF''@|$(REPLACE_SNPRINTF)|g' \ + -e 's|@''REPLACE_SPRINTF''@|$(REPLACE_SPRINTF)|g' \ + -e 's|@''REPLACE_STDIO_READ_FUNCS''@|$(REPLACE_STDIO_READ_FUNCS)|g' \ + -e 's|@''REPLACE_STDIO_WRITE_FUNCS''@|$(REPLACE_STDIO_WRITE_FUNCS)|g' \ + -e 's|@''REPLACE_TMPFILE''@|$(REPLACE_TMPFILE)|g' \ + -e 's|@''REPLACE_VASPRINTF''@|$(REPLACE_VASPRINTF)|g' \ + -e 's|@''REPLACE_VDPRINTF''@|$(REPLACE_VDPRINTF)|g' \ + -e 's|@''REPLACE_VFPRINTF''@|$(REPLACE_VFPRINTF)|g' \ + -e 's|@''REPLACE_VPRINTF''@|$(REPLACE_VPRINTF)|g' \ + -e 's|@''REPLACE_VSNPRINTF''@|$(REPLACE_VSNPRINTF)|g' \ + -e 's|@''REPLACE_VSPRINTF''@|$(REPLACE_VSPRINTF)|g' \ + -e 's|@''ASM_SYMBOL_PREFIX''@|$(ASM_SYMBOL_PREFIX)|g' \ + -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ + -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ + -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)'; \ + } > $@-t && \ + mv $@-t $@ +MOSTLYCLEANFILES += stdio.h stdio.h-t + +EXTRA_DIST += stdio.in.h + +## end gnulib module stdio + ## begin gnulib module stdlib BUILT_SOURCES += stdlib.h @@ -1658,6 +1837,7 @@ stdlib.h: stdlib.in.h $(top_builddir)/config.status $(CXXDEFS_H) \ -e 's/@''GNULIB_REALLOC_POSIX''@/$(GNULIB_REALLOC_POSIX)/g' \ -e 's/@''GNULIB_REALPATH''@/$(GNULIB_REALPATH)/g' \ -e 's/@''GNULIB_RPMATCH''@/$(GNULIB_RPMATCH)/g' \ + -e 's/@''GNULIB_SECURE_GETENV''@/$(GNULIB_SECURE_GETENV)/g' \ -e 's/@''GNULIB_SETENV''@/$(GNULIB_SETENV)/g' \ -e 's/@''GNULIB_STRTOD''@/$(GNULIB_STRTOD)/g' \ -e 's/@''GNULIB_STRTOLL''@/$(GNULIB_STRTOLL)/g' \ @@ -1686,6 +1866,7 @@ stdlib.h: stdlib.in.h $(top_builddir)/config.status $(CXXDEFS_H) \ -e 's|@''HAVE_RANDOM_R''@|$(HAVE_RANDOM_R)|g' \ -e 's|@''HAVE_REALPATH''@|$(HAVE_REALPATH)|g' \ -e 's|@''HAVE_RPMATCH''@|$(HAVE_RPMATCH)|g' \ + -e 's|@''HAVE_SECURE_GETENV''@|$(HAVE_SECURE_GETENV)|g' \ -e 's|@''HAVE_DECL_SETENV''@|$(HAVE_DECL_SETENV)|g' \ -e 's|@''HAVE_STRTOD''@|$(HAVE_STRTOD)|g' \ -e 's|@''HAVE_STRTOLL''@|$(HAVE_STRTOLL)|g' \ @@ -1699,6 +1880,7 @@ stdlib.h: stdlib.in.h $(top_builddir)/config.status $(CXXDEFS_H) \ -e 's|@''REPLACE_MALLOC''@|$(REPLACE_MALLOC)|g' \ -e 's|@''REPLACE_MBTOWC''@|$(REPLACE_MBTOWC)|g' \ -e 's|@''REPLACE_MKSTEMP''@|$(REPLACE_MKSTEMP)|g' \ + -e 's|@''REPLACE_PTSNAME''@|$(REPLACE_PTSNAME)|g' \ -e 's|@''REPLACE_PTSNAME_R''@|$(REPLACE_PTSNAME_R)|g' \ -e 's|@''REPLACE_PUTENV''@|$(REPLACE_PUTENV)|g' \ -e 's|@''REPLACE_RANDOM_R''@|$(REPLACE_RANDOM_R)|g' \ @@ -1984,6 +2166,40 @@ EXTRA_DIST += sys_stat.in.h ## end gnulib module sys_stat +## begin gnulib module sys_time + +BUILT_SOURCES += sys/time.h + +# We need the following in order to create when the system +# doesn't have one that works with the given compiler. +sys/time.h: sys_time.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) + $(AM_V_at)$(MKDIR_P) sys + $(AM_V_GEN)rm -f $@-t $@ && \ + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ + sed -e 's|@''GUARD_PREFIX''@|GL|g' \ + -e 's/@''HAVE_SYS_TIME_H''@/$(HAVE_SYS_TIME_H)/g' \ + -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ + -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ + -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ + -e 's|@''NEXT_SYS_TIME_H''@|$(NEXT_SYS_TIME_H)|g' \ + -e 's/@''GNULIB_GETTIMEOFDAY''@/$(GNULIB_GETTIMEOFDAY)/g' \ + -e 's|@''HAVE_WINSOCK2_H''@|$(HAVE_WINSOCK2_H)|g' \ + -e 's/@''HAVE_GETTIMEOFDAY''@/$(HAVE_GETTIMEOFDAY)/g' \ + -e 's/@''HAVE_STRUCT_TIMEVAL''@/$(HAVE_STRUCT_TIMEVAL)/g' \ + -e 's/@''REPLACE_GETTIMEOFDAY''@/$(REPLACE_GETTIMEOFDAY)/g' \ + -e 's/@''REPLACE_STRUCT_TIMEVAL''@/$(REPLACE_STRUCT_TIMEVAL)/g' \ + -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ + -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ + -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ + < $(srcdir)/sys_time.in.h; \ + } > $@-t && \ + mv $@-t $@ +MOSTLYCLEANFILES += sys/time.h sys/time.h-t + +EXTRA_DIST += sys_time.in.h + +## end gnulib module sys_time + ## begin gnulib module sys_types BUILT_SOURCES += sys/types.h @@ -2009,6 +2225,14 @@ EXTRA_DIST += sys_types.in.h ## end gnulib module sys_types +## begin gnulib module threadlib + +libgreputils_a_SOURCES += glthread/threadlib.c + +EXTRA_DIST += $(top_srcdir)/build-aux/config.rpath + +## end gnulib module threadlib + ## begin gnulib module time BUILT_SOURCES += time.h @@ -2023,6 +2247,7 @@ time.h: time.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $( -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ -e 's|@''NEXT_TIME_H''@|$(NEXT_TIME_H)|g' \ + -e 's/@''GNULIB_GETTIMEOFDAY''@/$(GNULIB_GETTIMEOFDAY)/g' \ -e 's/@''GNULIB_MKTIME''@/$(GNULIB_MKTIME)/g' \ -e 's/@''GNULIB_NANOSLEEP''@/$(GNULIB_NANOSLEEP)/g' \ -e 's/@''GNULIB_STRPTIME''@/$(GNULIB_STRPTIME)/g' \ @@ -2032,6 +2257,8 @@ time.h: time.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $( -e 's|@''HAVE_NANOSLEEP''@|$(HAVE_NANOSLEEP)|g' \ -e 's|@''HAVE_STRPTIME''@|$(HAVE_STRPTIME)|g' \ -e 's|@''HAVE_TIMEGM''@|$(HAVE_TIMEGM)|g' \ + -e 's|@''REPLACE_GMTIME''@|$(REPLACE_GMTIME)|g' \ + -e 's|@''REPLACE_LOCALTIME''@|$(REPLACE_LOCALTIME)|g' \ -e 's|@''REPLACE_LOCALTIME_R''@|$(REPLACE_LOCALTIME_R)|g' \ -e 's|@''REPLACE_MKTIME''@|$(REPLACE_MKTIME)|g' \ -e 's|@''REPLACE_NANOSLEEP''@|$(REPLACE_NANOSLEEP)|g' \ @@ -2062,6 +2289,7 @@ EXTRA_DIST += trim.h ## begin gnulib module unistd BUILT_SOURCES += unistd.h +libgreputils_a_SOURCES += unistd.c # We need the following in order to create an empty placeholder for # when the system doesn't have one. @@ -2176,6 +2404,7 @@ unistd.h: unistd.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H -e 's|@''REPLACE_FTRUNCATE''@|$(REPLACE_FTRUNCATE)|g' \ -e 's|@''REPLACE_GETCWD''@|$(REPLACE_GETCWD)|g' \ -e 's|@''REPLACE_GETDOMAINNAME''@|$(REPLACE_GETDOMAINNAME)|g' \ + -e 's|@''REPLACE_GETDTABLESIZE''@|$(REPLACE_GETDTABLESIZE)|g' \ -e 's|@''REPLACE_GETLOGIN_R''@|$(REPLACE_GETLOGIN_R)|g' \ -e 's|@''REPLACE_GETGROUPS''@|$(REPLACE_GETGROUPS)|g' \ -e 's|@''REPLACE_GETPAGESIZE''@|$(REPLACE_GETPAGESIZE)|g' \ @@ -2468,15 +2697,6 @@ EXTRA_libgreputils_a_SOURCES += wcrtomb.c ## end gnulib module wcrtomb -## begin gnulib module wcscoll - - -EXTRA_DIST += wcscoll-impl.h wcscoll.c - -EXTRA_libgreputils_a_SOURCES += wcscoll.c - -## end gnulib module wcscoll - ## begin gnulib module wctob @@ -2498,6 +2718,7 @@ EXTRA_libgreputils_a_SOURCES += wctomb.c ## begin gnulib module wctype-h BUILT_SOURCES += wctype.h +libgreputils_a_SOURCES += wctype-h.c # We need the following in order to create when the system # doesn't have one that works with the given compiler. diff --git a/contrib/grep/lib/hash.c b/contrib/grep/lib/hash.c index 19330576fc..d312a2644e 100644 --- a/contrib/grep/lib/hash.c +++ b/contrib/grep/lib/hash.c @@ -1,6 +1,6 @@ /* hash - hashing table processing. - Copyright (C) 1998-2004, 2006-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1998-2004, 2006-2007, 2009-2014 Free Software Foundation, Inc. Written by Jim Meyering, 1992. diff --git a/contrib/grep/lib/hash.h b/contrib/grep/lib/hash.h index 06e303bfbf..9994f5f665 100644 --- a/contrib/grep/lib/hash.h +++ b/contrib/grep/lib/hash.h @@ -1,5 +1,5 @@ /* hash - hashing table processing. - Copyright (C) 1998-1999, 2001, 2003, 2009-2012 Free Software Foundation, + Copyright (C) 1998-1999, 2001, 2003, 2009-2014 Free Software Foundation, Inc. Written by Jim Meyering , 1998. diff --git a/contrib/grep/lib/i-ring.c b/contrib/grep/lib/i-ring.c index 8bd2c541d3..4173a03fdc 100644 --- a/contrib/grep/lib/i-ring.c +++ b/contrib/grep/lib/i-ring.c @@ -1,5 +1,5 @@ /* a simple ring buffer - Copyright (C) 2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/i-ring.h b/contrib/grep/lib/i-ring.h index 9757c84d1e..7d993fa482 100644 --- a/contrib/grep/lib/i-ring.h +++ b/contrib/grep/lib/i-ring.h @@ -1,5 +1,5 @@ /* definitions for a simple ring buffer - Copyright (C) 2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/iconv_open.c b/contrib/grep/lib/iconv_open.c index 76536176b0..37ce9ce28a 100644 --- a/contrib/grep/lib/iconv_open.c +++ b/contrib/grep/lib/iconv_open.c @@ -1,5 +1,5 @@ /* Character set conversion. - Copyright (C) 2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/ignore-value.h b/contrib/grep/lib/ignore-value.h index 2e34435094..4dd0946d4b 100644 --- a/contrib/grep/lib/ignore-value.h +++ b/contrib/grep/lib/ignore-value.h @@ -1,6 +1,6 @@ /* ignore a function return without a compiler warning - Copyright (C) 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,15 +33,18 @@ declared with attribute warn_unused_result". */ #ifndef _GL_IGNORE_VALUE_H -# define _GL_IGNORE_VALUE_H - -/* The __attribute__((__warn_unused_result__)) feature - is available in gcc versions 3.4 and newer, - while the typeof feature has been available since 2.7 at least. */ -# if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4) -# define ignore_value(x) ((void) (x)) -# else -# define ignore_value(x) (({ __typeof__ (x) __x = (x); (void) __x; })) -# endif +#define _GL_IGNORE_VALUE_H + +/* Normally casting an expression to void discards its value, but GCC + versions 3.4 and newer have __attribute__ ((__warn_unused_result__)) + which may cause unwanted diagnostics in that case. Use __typeof__ + and __extension__ to work around the problem, if the workaround is + known to be needed. */ +#if 3 < __GNUC__ + (4 <= __GNUC_MINOR__) +# define ignore_value(x) \ + (__extension__ ({ __typeof__ (x) __x = (x); (void) __x; })) +#else +# define ignore_value(x) ((void) (x)) +#endif #endif diff --git a/contrib/grep/lib/intprops.h b/contrib/grep/lib/intprops.h index 2485c78d4b..d0bb7a6f57 100644 --- a/contrib/grep/lib/intprops.h +++ b/contrib/grep/lib/intprops.h @@ -1,6 +1,6 @@ /* intprops.h -- properties of integer types - Copyright (C) 2001-2005, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2005, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -89,7 +89,8 @@ /* Return 1 if the __typeof__ keyword works. This could be done by 'configure', but for now it's easier to do it by hand. */ -#if 2 <= __GNUC__ || 0x5110 <= __SUNPRO_C +#if (2 <= __GNUC__ || defined __IBM__TYPEOF__ \ + || (0x5110 <= __SUNPRO_C && !__STDC__)) # define _GL_HAVE___TYPEOF__ 1 #else # define _GL_HAVE___TYPEOF__ 0 diff --git a/contrib/grep/lib/isatty.c b/contrib/grep/lib/isatty.c index 2ecdd5494a..e38bc9d9aa 100644 --- a/contrib/grep/lib/isatty.c +++ b/contrib/grep/lib/isatty.c @@ -1,5 +1,5 @@ /* isatty() replacement. - Copyright (C) 2012 Free Software Foundation, Inc. + Copyright (C) 2012-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,7 +37,7 @@ #define IsConsoleHandle(h) (((intptr_t) (h) & 3) == 3) #if HAVE_MSVC_INVALID_PARAMETER_HANDLER -static inline int +static int _isatty_nothrow (int fd) { int result; diff --git a/contrib/grep/lib/isblank.c b/contrib/grep/lib/isblank.c index 63ff34f0f0..4e0aed1dd4 100644 --- a/contrib/grep/lib/isblank.c +++ b/contrib/grep/lib/isblank.c @@ -1,6 +1,6 @@ /* Test whether a character is a blank. - Copyright (C) 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/iswctype-impl.h b/contrib/grep/lib/iswctype-impl.h index 04bc46b8dc..3c23965f71 100644 --- a/contrib/grep/lib/iswctype-impl.h +++ b/contrib/grep/lib/iswctype-impl.h @@ -1,5 +1,5 @@ /* Test whether a wide character has a given property. - Copyright (C) 2011-2012 Free Software Foundation, Inc. + Copyright (C) 2011-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2011. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/iswctype.c b/contrib/grep/lib/iswctype.c index e7c6317f76..bbc3a16a35 100644 --- a/contrib/grep/lib/iswctype.c +++ b/contrib/grep/lib/iswctype.c @@ -1,5 +1,5 @@ /* Test whether a wide character has a given property. - Copyright (C) 2011-2012 Free Software Foundation, Inc. + Copyright (C) 2011-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2011. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/localcharset.c b/contrib/grep/lib/localcharset.c index b5ee2d6fef..a9288673b7 100644 --- a/contrib/grep/lib/localcharset.c +++ b/contrib/grep/lib/localcharset.c @@ -1,6 +1,6 @@ /* Determine a canonical name for the current locale's character encoding. - Copyright (C) 2000-2006, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2000-2006, 2008-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -65,6 +65,11 @@ # include #endif +/* For MB_CUR_MAX_L */ +#if defined DARWIN7 +# include +#endif + #if ENABLE_RELOCATABLE # include "relocatable.h" #else @@ -542,5 +547,12 @@ locale_charset (void) if (codeset[0] == '\0') codeset = "ASCII"; +#ifdef DARWIN7 + /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8" + (the default codeset) does not work when MB_CUR_MAX is 1. */ + if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1) + codeset = "ASCII"; +#endif + return codeset; } diff --git a/contrib/grep/lib/localcharset.h b/contrib/grep/lib/localcharset.h index 39dc593995..9011d5805d 100644 --- a/contrib/grep/lib/localcharset.h +++ b/contrib/grep/lib/localcharset.h @@ -1,5 +1,5 @@ /* Determine a canonical name for the current locale's character encoding. - Copyright (C) 2000-2003, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2000-2003, 2009-2014 Free Software Foundation, Inc. This file is part of the GNU CHARSET Library. This program is free software; you can redistribute it and/or modify diff --git a/contrib/grep/lib/localeconv.c b/contrib/grep/lib/localeconv.c index 2c91b374f0..014e739c61 100644 --- a/contrib/grep/lib/localeconv.c +++ b/contrib/grep/lib/localeconv.c @@ -1,5 +1,5 @@ /* Query locale dependent information for formatting numbers. - Copyright (C) 2012 Free Software Foundation, Inc. + Copyright (C) 2012-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/lseek.c b/contrib/grep/lib/lseek.c index 794c230447..f19b41b154 100644 --- a/contrib/grep/lib/lseek.c +++ b/contrib/grep/lib/lseek.c @@ -1,5 +1,5 @@ /* An lseek() function that detects pipes. - Copyright (C) 2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/lstat.c b/contrib/grep/lib/lstat.c index db119a10d3..f70fd435c0 100644 --- a/contrib/grep/lib/lstat.c +++ b/contrib/grep/lib/lstat.c @@ -1,6 +1,6 @@ /* Work around a bug of lstat on some systems - Copyright (C) 1997-2006, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 1997-2006, 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,7 +35,7 @@ typedef int dummy; # include # undef __need_system_sys_stat_h -static inline int +static int orig_lstat (const char *filename, struct stat *buf) { return lstat (filename, buf); diff --git a/contrib/grep/lib/malloc.c b/contrib/grep/lib/malloc.c index e0d5c89d18..bc81c54dcc 100644 --- a/contrib/grep/lib/malloc.c +++ b/contrib/grep/lib/malloc.c @@ -1,6 +1,6 @@ /* malloc() function that is glibc compatible. - Copyright (C) 1997-1998, 2006-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1997-1998, 2006-2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/malloca.c b/contrib/grep/lib/malloca.c index 1f7533a7d3..370551c534 100644 --- a/contrib/grep/lib/malloca.c +++ b/contrib/grep/lib/malloca.c @@ -1,5 +1,5 @@ /* Safe automatic memory allocation. - Copyright (C) 2003, 2006-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2003, 2006-2007, 2009-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2003. This program is free software; you can redistribute it and/or modify @@ -49,12 +49,18 @@ #define MAGIC_SIZE sizeof (int) /* This is how the header info would look like without any alignment considerations. */ -struct preliminary_header { void *next; char room[MAGIC_SIZE]; }; +struct preliminary_header { void *next; int magic; }; /* But the header's size must be a multiple of sa_alignment_max. */ #define HEADER_SIZE \ (((sizeof (struct preliminary_header) + sa_alignment_max - 1) / sa_alignment_max) * sa_alignment_max) -struct header { void *next; char room[HEADER_SIZE - sizeof (struct preliminary_header) + MAGIC_SIZE]; }; -verify (HEADER_SIZE == sizeof (struct header)); +union header { + void *next; + struct { + char room[HEADER_SIZE - MAGIC_SIZE]; + int word; + } magic; +}; +verify (HEADER_SIZE == sizeof (union header)); /* We make the hash table quite big, so that during lookups the probability of empty hash buckets is quite high. There is no need to make the hash table resizable, because when the hash table gets filled so much that the @@ -74,20 +80,21 @@ mmalloca (size_t n) if (nplus >= n) { - char *p = (char *) malloc (nplus); + void *p = malloc (nplus); if (p != NULL) { size_t slot; + union header *h = p; - p += HEADER_SIZE; + p = h + 1; /* Put a magic number into the indicator word. */ - ((int *) p)[-1] = MAGIC_NUMBER; + h->magic.word = MAGIC_NUMBER; /* Enter p into the hash table. */ slot = (uintptr_t) p % HASH_TABLE_SIZE; - ((struct header *) (p - HEADER_SIZE))->next = mmalloca_results[slot]; + h->next = mmalloca_results[slot]; mmalloca_results[slot] = p; return p; @@ -123,15 +130,17 @@ freea (void *p) void **chain = &mmalloca_results[slot]; for (; *chain != NULL;) { + union header *h = p; if (*chain == p) { /* Found it. Remove it from the hash table and free it. */ - char *p_begin = (char *) p - HEADER_SIZE; - *chain = ((struct header *) p_begin)->next; + union header *p_begin = h - 1; + *chain = p_begin->next; free (p_begin); return; } - chain = &((struct header *) ((char *) *chain - HEADER_SIZE))->next; + h = *chain; + chain = &h[-1].next; } } /* At this point, we know it was not a mmalloca() result. */ diff --git a/contrib/grep/lib/malloca.h b/contrib/grep/lib/malloca.h index 0cedf5f633..5071e46f9e 100644 --- a/contrib/grep/lib/malloca.h +++ b/contrib/grep/lib/malloca.h @@ -1,5 +1,5 @@ /* Safe automatic memory allocation. - Copyright (C) 2003-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2003-2007, 2009-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2003. This program is free software; you can redistribute it and/or modify @@ -42,7 +42,7 @@ extern "C" { and a page size can be as small as 4096 bytes. So we cannot safely allocate anything larger than 4096 bytes. Also care for the possibility of a few compiler-allocated temporary stack slots. - This must be a macro, not an inline function. */ + This must be a macro, not a function. */ # define safe_alloca(N) ((N) < 4032 ? alloca (N) : NULL) #else # define safe_alloca(N) ((void) (N), NULL) @@ -92,7 +92,7 @@ extern void * nmalloca (size_t n, size_t s); /* ------------------- Auxiliary, non-public definitions ------------------- */ /* Determine the alignment of a type at compile time. */ -#if defined __GNUC__ +#if defined __GNUC__ || defined __IBM__ALIGNOF__ # define sa_alignof __alignof__ #elif defined __cplusplus template struct sa_alignof_helper { char __slot1; type __slot2; }; diff --git a/contrib/grep/lib/mbchar.c b/contrib/grep/lib/mbchar.c index 10709510a2..7d5bfcc173 100644 --- a/contrib/grep/lib/mbchar.c +++ b/contrib/grep/lib/mbchar.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2001, 2006, 2009-2012 Free Software Foundation, Inc. +/* Copyright (C) 2001, 2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,6 +16,8 @@ #include +#define MBCHAR_INLINE _GL_EXTERN_INLINE + #include #include "mbchar.h" diff --git a/contrib/grep/lib/mbchar.h b/contrib/grep/lib/mbchar.h index ccbcb827e5..700174c5db 100644 --- a/contrib/grep/lib/mbchar.h +++ b/contrib/grep/lib/mbchar.h @@ -1,5 +1,5 @@ /* Multibyte character data type. - Copyright (C) 2001, 2005-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001, 2005-2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -156,6 +156,14 @@ #include #include +#ifndef _GL_INLINE_HEADER_BEGIN + #error "Please include config.h first." +#endif +_GL_INLINE_HEADER_BEGIN +#ifndef MBCHAR_INLINE +# define MBCHAR_INLINE _GL_INLINE +#endif + #define MBCHAR_BUF_SIZE 24 struct mbchar @@ -235,7 +243,7 @@ typedef struct mbchar mbchar_t; /* Unprintable characters appear as a small box of width 1. */ #define MB_UNPRINTABLE_WIDTH 1 -static inline int +MBCHAR_INLINE int mb_width_aux (wint_t wc) { int w = wcwidth (wc); @@ -256,7 +264,7 @@ mb_width_aux (wint_t wc) (mbc)->wc = (mbc)->buf[0] = (sc)) /* Copying a character. */ -static inline void +MBCHAR_INLINE void mb_copy (mbchar_t *new_mbc, const mbchar_t *old_mbc) { if (old_mbc->ptr == &old_mbc->buf[0]) @@ -304,7 +312,7 @@ mb_copy (mbchar_t *new_mbc, const mbchar_t *old_mbc) extern const unsigned int is_basic_table[]; -static inline bool +MBCHAR_INLINE bool is_basic (char c) { return (is_basic_table [(unsigned char) c >> 5] >> ((unsigned char) c & 31)) @@ -313,7 +321,7 @@ is_basic (char c) #else -static inline bool +MBCHAR_INLINE bool is_basic (char c) { switch (c) @@ -347,4 +355,6 @@ is_basic (char c) #endif +_GL_INLINE_HEADER_END + #endif /* _MBCHAR_H */ diff --git a/contrib/grep/lib/mbiter.c b/contrib/grep/lib/mbiter.c new file mode 100644 index 0000000000..22a1ff8d2e --- /dev/null +++ b/contrib/grep/lib/mbiter.c @@ -0,0 +1,3 @@ +#include +#define MBITER_INLINE _GL_EXTERN_INLINE +#include "mbiter.h" diff --git a/contrib/grep/lib/mbiter.h b/contrib/grep/lib/mbiter.h index 5419a67abc..32fff8cf89 100644 --- a/contrib/grep/lib/mbiter.h +++ b/contrib/grep/lib/mbiter.h @@ -1,5 +1,5 @@ /* Iterating through multibyte strings: macros for multi-byte encodings. - Copyright (C) 2001, 2005, 2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001, 2005, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -97,6 +97,14 @@ #include "mbchar.h" +#ifndef _GL_INLINE_HEADER_BEGIN + #error "Please include config.h first." +#endif +_GL_INLINE_HEADER_BEGIN +#ifndef MBITER_INLINE +# define MBITER_INLINE _GL_INLINE +#endif + struct mbiter_multi { const char *limit; /* pointer to end of string */ @@ -112,7 +120,7 @@ struct mbiter_multi */ }; -static inline void +MBITER_INLINE void mbiter_multi_next (struct mbiter_multi *iter) { if (iter->next_done) @@ -172,14 +180,14 @@ mbiter_multi_next (struct mbiter_multi *iter) iter->next_done = true; } -static inline void +MBITER_INLINE void mbiter_multi_reloc (struct mbiter_multi *iter, ptrdiff_t ptrdiff) { iter->cur.ptr += ptrdiff; iter->limit += ptrdiff; } -static inline void +MBITER_INLINE void mbiter_multi_copy (struct mbiter_multi *new_iter, const struct mbiter_multi *old_iter) { new_iter->limit = old_iter->limit; @@ -212,4 +220,6 @@ typedef struct mbiter_multi mbi_iterator_t; /* Copying an iterator. */ #define mbi_copy mbiter_multi_copy +_GL_INLINE_HEADER_END + #endif /* _MBITER_H */ diff --git a/contrib/grep/lib/mbrlen.c b/contrib/grep/lib/mbrlen.c index f52e76042b..5f180ba210 100644 --- a/contrib/grep/lib/mbrlen.c +++ b/contrib/grep/lib/mbrlen.c @@ -1,5 +1,5 @@ /* Recognize multibyte character. - Copyright (C) 1999-2000, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 1999-2000, 2008-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2008. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/mbrtowc.c b/contrib/grep/lib/mbrtowc.c index e3f354beed..5743f437f6 100644 --- a/contrib/grep/lib/mbrtowc.c +++ b/contrib/grep/lib/mbrtowc.c @@ -1,5 +1,5 @@ /* Convert multibyte character to wide character. - Copyright (C) 1999-2002, 2005-2012 Free Software Foundation, Inc. + Copyright (C) 1999-2002, 2005-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2008. This program is free software: you can redistribute it and/or modify @@ -328,7 +328,7 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) size_t rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) { -# if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG +# if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG if (s == NULL) { pwc = NULL; @@ -337,6 +337,11 @@ rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) } # endif +# if MBRTOWC_EMPTY_INPUT_BUG + if (n == 0) + return (size_t) -2; +# endif + # if MBRTOWC_RETVAL_BUG { static mbstate_t internal_state; diff --git a/contrib/grep/lib/mbscasecmp.c b/contrib/grep/lib/mbscasecmp.c index 863bf0ad29..d52448fe38 100644 --- a/contrib/grep/lib/mbscasecmp.c +++ b/contrib/grep/lib/mbscasecmp.c @@ -1,5 +1,5 @@ /* Case-insensitive string comparison function. - Copyright (C) 1998-1999, 2005-2012 Free Software Foundation, Inc. + Copyright (C) 1998-1999, 2005-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2005, based on earlier glibc code. diff --git a/contrib/grep/lib/mbsinit.c b/contrib/grep/lib/mbsinit.c index 638142e8ba..df4bcd182a 100644 --- a/contrib/grep/lib/mbsinit.c +++ b/contrib/grep/lib/mbsinit.c @@ -1,5 +1,5 @@ /* Test for initial conversion state. - Copyright (C) 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2008-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2008. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/mbslen.c b/contrib/grep/lib/mbslen.c index a829ff545c..74c6ec1241 100644 --- a/contrib/grep/lib/mbslen.c +++ b/contrib/grep/lib/mbslen.c @@ -1,5 +1,5 @@ /* Counting the multibyte characters in a string. - Copyright (C) 2007-2012 Free Software Foundation, Inc. + Copyright (C) 2007-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2007. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/mbsrtowcs-state.c b/contrib/grep/lib/mbsrtowcs-state.c index cca17912f6..2a7019aaaa 100644 --- a/contrib/grep/lib/mbsrtowcs-state.c +++ b/contrib/grep/lib/mbsrtowcs-state.c @@ -1,5 +1,5 @@ /* Convert string to wide string. - Copyright (C) 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2008-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2008. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/mbsrtowcs.c b/contrib/grep/lib/mbsrtowcs.c index e3ec8767c3..136ec343b5 100644 --- a/contrib/grep/lib/mbsrtowcs.c +++ b/contrib/grep/lib/mbsrtowcs.c @@ -1,5 +1,5 @@ /* Convert string to wide string. - Copyright (C) 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2008-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2008. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/mbsstr.c b/contrib/grep/lib/mbsstr.c index 464fb7d6c9..1ae9fe4229 100644 --- a/contrib/grep/lib/mbsstr.c +++ b/contrib/grep/lib/mbsstr.c @@ -1,5 +1,5 @@ /* Searching in a string. - Copyright (C) 2005-2012 Free Software Foundation, Inc. + Copyright (C) 2005-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2005. This program is free software: you can redistribute it and/or modify @@ -45,11 +45,13 @@ knuth_morris_pratt_multibyte (const char *haystack, const char *needle, size_t *table; /* Allocate room for needle_mbchars and the table. */ - char *memory = (char *) nmalloca (m, sizeof (mbchar_t) + sizeof (size_t)); + void *memory = nmalloca (m, sizeof (mbchar_t) + sizeof (size_t)); + void *table_memory; if (memory == NULL) return false; - needle_mbchars = (mbchar_t *) memory; - table = (size_t *) (memory + m * sizeof (mbchar_t)); + needle_mbchars = memory; + table_memory = needle_mbchars + m; + table = table_memory; /* Fill needle_mbchars. */ { diff --git a/contrib/grep/lib/mbuiter.c b/contrib/grep/lib/mbuiter.c new file mode 100644 index 0000000000..9167580d0d --- /dev/null +++ b/contrib/grep/lib/mbuiter.c @@ -0,0 +1,3 @@ +#include +#define MBUITER_INLINE _GL_EXTERN_INLINE +#include "mbuiter.h" diff --git a/contrib/grep/lib/mbuiter.h b/contrib/grep/lib/mbuiter.h index 44b617371b..1702f14c6e 100644 --- a/contrib/grep/lib/mbuiter.h +++ b/contrib/grep/lib/mbuiter.h @@ -1,5 +1,5 @@ /* Iterating through multibyte strings: macros for multi-byte encodings. - Copyright (C) 2001, 2005, 2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001, 2005, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -106,6 +106,14 @@ #include "mbchar.h" #include "strnlen1.h" +#ifndef _GL_INLINE_HEADER_BEGIN + #error "Please include config.h first." +#endif +_GL_INLINE_HEADER_BEGIN +#ifndef MBUITER_INLINE +# define MBUITER_INLINE _GL_INLINE +#endif + struct mbuiter_multi { bool in_shift; /* true if next byte may not be interpreted as ASCII */ @@ -120,7 +128,7 @@ struct mbuiter_multi */ }; -static inline void +MBUITER_INLINE void mbuiter_multi_next (struct mbuiter_multi *iter) { if (iter->next_done) @@ -181,13 +189,13 @@ mbuiter_multi_next (struct mbuiter_multi *iter) iter->next_done = true; } -static inline void +MBUITER_INLINE void mbuiter_multi_reloc (struct mbuiter_multi *iter, ptrdiff_t ptrdiff) { iter->cur.ptr += ptrdiff; } -static inline void +MBUITER_INLINE void mbuiter_multi_copy (struct mbuiter_multi *new_iter, const struct mbuiter_multi *old_iter) { if ((new_iter->in_shift = old_iter->in_shift)) @@ -219,4 +227,6 @@ typedef struct mbuiter_multi mbui_iterator_t; /* Copying an iterator. */ #define mbui_copy mbuiter_multi_copy +_GL_INLINE_HEADER_END + #endif /* _MBUITER_H */ diff --git a/contrib/grep/lib/memchr.c b/contrib/grep/lib/memchr.c index b8fb0efb95..6709f6ea2e 100644 --- a/contrib/grep/lib/memchr.c +++ b/contrib/grep/lib/memchr.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2004, 2006, 2008-2012 +/* Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2004, 2006, 2008-2014 Free Software Foundation, Inc. Based on strlen implementation by Torbjorn Granlund (tege@sics.se), diff --git a/contrib/grep/lib/memchr.c b/contrib/grep/lib/memchr2.c similarity index 58% copy from contrib/grep/lib/memchr.c copy to contrib/grep/lib/memchr2.c index b8fb0efb95..3d79f2a207 100644 --- a/contrib/grep/lib/memchr.c +++ b/contrib/grep/lib/memchr2.c @@ -1,14 +1,12 @@ -/* Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2004, 2006, 2008-2012 +/* Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2004, 2006, 2008-2014 Free Software Foundation, Inc. Based on strlen implementation by Torbjorn Granlund (tege@sics.se), with help from Dan Sahlin (dan@sics.se) and commentary by Jim Blandy (jimb@ai.mit.edu); adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), - and implemented by Roland McGrath (roland@ai.mit.edu). - -NOTE: The canonical source of this file is maintained with the GNU C Library. -Bugs can be reported to bug-glibc@prep.ai.mit.edu. + and implemented in glibc by Roland McGrath (roland@ai.mit.edu). + Extension to memchr2 implemented by Eric Blake (ebb9@byu.net). This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -23,40 +21,19 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ -#ifndef _LIBC -# include -#endif - -#include - -#include +#include -#if defined _LIBC -# include -#else -# define reg_char char -#endif +#include "memchr2.h" #include +#include +#include -#if HAVE_BP_SYM_H || defined _LIBC -# include -#else -# define BP_SYM(sym) sym -#endif - -#undef __memchr -#ifdef _LIBC -# undef memchr -#endif - -#ifndef weak_alias -# define __memchr memchr -#endif - -/* Search no more than N bytes of S for C. */ +/* Return the first address of either C1 or C2 (treated as unsigned + char) that occurs within N bytes of the memory region S. If + neither byte appears, return NULL. */ void * -__memchr (void const *s, int c_in, size_t n) +memchr2 (void const *s, int c1_in, int c2_in, size_t n) { /* On 32-bit hardware, choosing longword to be a 32-bit unsigned long instead of a 64-bit uintmax_t tends to give better @@ -66,36 +43,51 @@ __memchr (void const *s, int c_in, size_t n) typedef unsigned long int longword; const unsigned char *char_ptr; + void const *void_ptr; const longword *longword_ptr; longword repeated_one; - longword repeated_c; - unsigned reg_char c; + longword repeated_c1; + longword repeated_c2; + unsigned char c1; + unsigned char c2; + + c1 = (unsigned char) c1_in; + c2 = (unsigned char) c2_in; - c = (unsigned char) c_in; + if (c1 == c2) + return memchr (s, c1, n); /* Handle the first few bytes by reading one byte at a time. - Do this until CHAR_PTR is aligned on a longword boundary. */ - for (char_ptr = (const unsigned char *) s; - n > 0 && (size_t) char_ptr % sizeof (longword) != 0; - --n, ++char_ptr) - if (*char_ptr == c) - return (void *) char_ptr; + Do this until VOID_PTR is aligned on a longword boundary. */ + for (void_ptr = s; + n > 0 && (uintptr_t) void_ptr % sizeof (longword) != 0; + --n) + { + char_ptr = void_ptr; + if (*char_ptr == c1 || *char_ptr == c2) + return (void *) void_ptr; + void_ptr = char_ptr + 1; + } - longword_ptr = (const longword *) char_ptr; + longword_ptr = void_ptr; /* All these elucidatory comments refer to 4-byte longwords, but the theory applies equally well to any size longwords. */ /* Compute auxiliary longword values: repeated_one is a value which has a 1 in every byte. - repeated_c has c in every byte. */ + repeated_c1 has c1 in every byte. + repeated_c2 has c2 in every byte. */ repeated_one = 0x01010101; - repeated_c = c | (c << 8); - repeated_c |= repeated_c << 16; + repeated_c1 = c1 | (c1 << 8); + repeated_c2 = c2 | (c2 << 8); + repeated_c1 |= repeated_c1 << 16; + repeated_c2 |= repeated_c2 << 16; if (0xffffffffU < (longword) -1) { repeated_one |= repeated_one << 31 << 1; - repeated_c |= repeated_c << 31 << 1; + repeated_c1 |= repeated_c1 << 31 << 1; + repeated_c2 |= repeated_c2 << 31 << 1; if (8 < sizeof (longword)) { size_t i; @@ -103,18 +95,20 @@ __memchr (void const *s, int c_in, size_t n) for (i = 64; i < sizeof (longword) * 8; i *= 2) { repeated_one |= repeated_one << i; - repeated_c |= repeated_c << i; + repeated_c1 |= repeated_c1 << i; + repeated_c2 |= repeated_c2 << i; } } } /* Instead of the traditional loop which tests each byte, we will test a longword at a time. The tricky part is testing if *any of the four* - bytes in the longword in question are equal to c. We first use an xor - with repeated_c. This reduces the task to testing whether *any of the - four* bytes in longword1 is zero. + bytes in the longword in question are equal to c1 or c2. We first use + an xor with repeated_c1 and repeated_c2, respectively. This reduces + the task to testing whether *any of the four* bytes in longword1 or + longword2 is zero. - We compute tmp = + Let's consider longword1. We compute tmp1 = ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7). That is, we perform the following operations: 1. Subtract repeated_one. @@ -129,21 +123,27 @@ __memchr (void const *s, int c_in, size_t n) the byte ends in a single bit of value 0 and k bits of value 1. After step 2, the result is just k bits of value 1: 2^k - 1. After step 3, the result is 0. And no carry is produced. - So, if longword1 has only non-zero bytes, tmp is zero. + So, if longword1 has only non-zero bytes, tmp1 is zero. Whereas if longword1 has a zero byte, call j the position of the least significant zero byte. Then the result has a zero at positions 0, ..., j-1 and a 0x80 at position j. We cannot predict the result at the more significant bytes (positions j+1..3), but it does not matter since we already have a non-zero bit at position 8*j+7. - So, the test whether any byte in longword1 is zero is equivalent to - testing whether tmp is nonzero. */ + Similarly, we compute tmp2 = + ((longword2 - repeated_one) & ~longword2) & (repeated_one << 7). + + The test whether any byte in longword1 or longword2 is zero is equivalent + to testing whether tmp1 is nonzero or tmp2 is nonzero. We can combine + this into a single test, whether (tmp1 | tmp2) is nonzero. */ while (n >= sizeof (longword)) { - longword longword1 = *longword_ptr ^ repeated_c; + longword longword1 = *longword_ptr ^ repeated_c1; + longword longword2 = *longword_ptr ^ repeated_c2; - if ((((longword1 - repeated_one) & ~longword1) + if (((((longword1 - repeated_one) & ~longword1) + | ((longword2 - repeated_one) & ~longword2)) & (repeated_one << 7)) != 0) break; longword_ptr++; @@ -153,20 +153,17 @@ __memchr (void const *s, int c_in, size_t n) char_ptr = (const unsigned char *) longword_ptr; /* At this point, we know that either n < sizeof (longword), or one of the - sizeof (longword) bytes starting at char_ptr is == c. On little-endian - machines, we could determine the first such byte without any further - memory accesses, just by looking at the tmp result from the last loop - iteration. But this does not work on big-endian machines. Choose code - that works in both cases. */ + sizeof (longword) bytes starting at char_ptr is == c1 or == c2. On + little-endian machines, we could determine the first such byte without + any further memory accesses, just by looking at the (tmp1 | tmp2) result + from the last loop iteration. But this does not work on big-endian + machines. Choose code that works in both cases. */ for (; n > 0; --n, ++char_ptr) { - if (*char_ptr == c) + if (*char_ptr == c1 || *char_ptr == c2) return (void *) char_ptr; } return NULL; } -#ifdef weak_alias -weak_alias (__memchr, BP_SYM (memchr)) -#endif diff --git a/contrib/grep/lib/strnlen1.h b/contrib/grep/lib/memchr2.h similarity index 62% copy from contrib/grep/lib/strnlen1.h copy to contrib/grep/lib/memchr2.h index 4c4f0c0813..220c2b54cb 100644 --- a/contrib/grep/lib/strnlen1.h +++ b/contrib/grep/lib/memchr2.h @@ -1,5 +1,5 @@ -/* Find the length of STRING + 1, but scan at most MAXLEN bytes. - Copyright (C) 2005, 2009-2012 Free Software Foundation, Inc. +/* Scan memory for the first of two bytes. + Copyright (C) 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -14,27 +14,19 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . */ -#ifndef _STRNLEN1_H -#define _STRNLEN1_H - #include - #ifdef __cplusplus extern "C" { #endif +/* Return the first address of either C1 or C2 (treated as unsigned + char) that occurs within N bytes of the memory region S. If + neither byte appears, return NULL. */ -/* Find the length of STRING + 1, but scan at most MAXLEN bytes. - If no '\0' terminator is found in that many characters, return MAXLEN. */ -/* This is the same as strnlen (string, maxlen - 1) + 1. */ -extern size_t strnlen1 (const char *string, size_t maxlen) +extern void *memchr2 (void const *s, int c1, int c2, size_t n) _GL_ATTRIBUTE_PURE; - #ifdef __cplusplus } #endif - - -#endif /* _STRNLEN1_H */ diff --git a/contrib/grep/lib/memchr2.valgrind b/contrib/grep/lib/memchr2.valgrind new file mode 100644 index 0000000000..778fe867dc --- /dev/null +++ b/contrib/grep/lib/memchr2.valgrind @@ -0,0 +1,14 @@ +# Suppress a valgrind message about use of uninitialized memory in memchr2(). +# Like memchr, it is safe to overestimate the length when the terminator +# is guaranteed to be found. In this case, we may end up reading a word +# that is partially uninitialized, but this use is OK for a speedup. +{ + memchr2-value4 + Memcheck:Value4 + fun:memchr2 +} +{ + memchr2-value8 + Memcheck:Value8 + fun:memchr2 +} diff --git a/contrib/grep/lib/mempcpy.c b/contrib/grep/lib/mempcpy.c index a97fe42295..ff83ee118e 100644 --- a/contrib/grep/lib/mempcpy.c +++ b/contrib/grep/lib/mempcpy.c @@ -1,5 +1,5 @@ /* Copy memory area and return pointer after last written byte. - Copyright (C) 2003, 2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2003, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/memrchr.c b/contrib/grep/lib/memrchr.c index 8f2edeb3e4..742a0c9bfb 100644 --- a/contrib/grep/lib/memrchr.c +++ b/contrib/grep/lib/memrchr.c @@ -1,6 +1,6 @@ /* memrchr -- find the last occurrence of a byte in a memory block - Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2012 Free Software + Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2014 Free Software Foundation, Inc. Based on strlen implementation by Torbjorn Granlund (tege@sics.se), diff --git a/contrib/grep/lib/minmax.h b/contrib/grep/lib/minmax.h index 75a9b3cc7e..af1fc9bfc6 100644 --- a/contrib/grep/lib/minmax.h +++ b/contrib/grep/lib/minmax.h @@ -1,5 +1,5 @@ /* MIN, MAX macros. - Copyright (C) 1995, 1998, 2001, 2003, 2005, 2009-2012 Free Software + Copyright (C) 1995, 1998, 2001, 2003, 2005, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify diff --git a/contrib/grep/lib/msvc-inval.c b/contrib/grep/lib/msvc-inval.c index ba76a7ea66..f83827fb4d 100644 --- a/contrib/grep/lib/msvc-inval.c +++ b/contrib/grep/lib/msvc-inval.c @@ -1,5 +1,5 @@ /* Invalid parameter handler for MSVC runtime libraries. - Copyright (C) 2011-2012 Free Software Foundation, Inc. + Copyright (C) 2011-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,7 +28,7 @@ # if MSVC_INVALID_PARAMETER_HANDLING == DEFAULT_HANDLING -static void cdecl +static void __cdecl gl_msvc_invalid_parameter_handler (const wchar_t *expression, const wchar_t *function, const wchar_t *file, @@ -45,7 +45,7 @@ gl_msvc_invalid_parameter_handler (const wchar_t *expression, # if defined _MSC_VER -static void cdecl +static void __cdecl gl_msvc_invalid_parameter_handler (const wchar_t *expression, const wchar_t *function, const wchar_t *file, @@ -94,7 +94,7 @@ gl_msvc_inval_current (void) } } -static void cdecl +static void __cdecl gl_msvc_invalid_parameter_handler (const wchar_t *expression, const wchar_t *function, const wchar_t *file, diff --git a/contrib/grep/lib/msvc-inval.h b/contrib/grep/lib/msvc-inval.h index eb6930b1c6..a0ef540fc5 100644 --- a/contrib/grep/lib/msvc-inval.h +++ b/contrib/grep/lib/msvc-inval.h @@ -1,5 +1,5 @@ /* Invalid parameter handler for MSVC runtime libraries. - Copyright (C) 2011-2012 Free Software Foundation, Inc. + Copyright (C) 2011-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/msvc-nothrow.c b/contrib/grep/lib/msvc-nothrow.c index e5cf181a04..b1e5f0c97f 100644 --- a/contrib/grep/lib/msvc-nothrow.c +++ b/contrib/grep/lib/msvc-nothrow.c @@ -1,6 +1,6 @@ /* Wrappers that don't throw invalid parameter notifications with MSVC runtime libraries. - Copyright (C) 2011-2012 Free Software Foundation, Inc. + Copyright (C) 2011-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/msvc-nothrow.h b/contrib/grep/lib/msvc-nothrow.h index 2b71945b0b..0d4dea584b 100644 --- a/contrib/grep/lib/msvc-nothrow.h +++ b/contrib/grep/lib/msvc-nothrow.h @@ -1,6 +1,6 @@ /* Wrappers that don't throw invalid parameter notifications with MSVC runtime libraries. - Copyright (C) 2011-2012 Free Software Foundation, Inc. + Copyright (C) 2011-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/nl_langinfo.c b/contrib/grep/lib/nl_langinfo.c index c73c76982c..287abfd036 100644 --- a/contrib/grep/lib/nl_langinfo.c +++ b/contrib/grep/lib/nl_langinfo.c @@ -1,6 +1,6 @@ /* nl_langinfo() replacement: query locale dependent information. - Copyright (C) 2007-2012 Free Software Foundation, Inc. + Copyright (C) 2007-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/obstack.c b/contrib/grep/lib/obstack.c index fc50f1e4a8..2db87b353c 100644 --- a/contrib/grep/lib/obstack.c +++ b/contrib/grep/lib/obstack.c @@ -1,19 +1,21 @@ /* obstack.c - subroutines used implicitly by object stack macros + Copyright (C) 1988-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. - Copyright (C) 1988-1994, 1996-2006, 2009-2012 Free Software Foundation, Inc. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ #ifdef _LIBC # include @@ -49,6 +51,7 @@ #ifndef ELIDE_CODE + # include /* Determine default alignment. */ @@ -67,10 +70,10 @@ struct fooalign But in fact it might be less smart and round addresses to as much as DEFAULT_ROUNDING. So we prepare for it to do that. */ enum - { - DEFAULT_ALIGNMENT = offsetof (struct fooalign, u), - DEFAULT_ROUNDING = sizeof (union fooround) - }; +{ + DEFAULT_ALIGNMENT = offsetof (struct fooalign, u), + DEFAULT_ROUNDING = sizeof (union fooround) +}; /* When we copy a long block of data, this is the unit to do it with. On some machines, copying successive ints does not work; @@ -104,7 +107,7 @@ int obstack_exit_failure = EXIT_FAILURE; /* A looong time ago (before 1994, anyway; we're not sure) this global variable was used by non-GNU-C macros to avoid multiple evaluation. The GNU C library still exports it because somebody might use it. */ -struct obstack *_obstack_compat; +struct obstack *_obstack_compat = 0; compat_symbol (libc, _obstack_compat, _obstack, GLIBC_2_0); # endif # endif @@ -116,19 +119,19 @@ compat_symbol (libc, _obstack_compat, _obstack, GLIBC_2_0); do not allow (expr) ? void : void. */ # define CALL_CHUNKFUN(h, size) \ - (((h) -> use_extra_arg) \ - ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \ - : (*(struct _obstack_chunk *(*) (long)) (h)->chunkfun) ((size))) + (((h)->use_extra_arg) \ + ? (*(h)->chunkfun)((h)->extra_arg, (size)) \ + : (*(struct _obstack_chunk *(*)(long))(h)->chunkfun)((size))) # define CALL_FREEFUN(h, old_chunk) \ do { \ - if ((h) -> use_extra_arg) \ - (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \ - else \ - (*(void (*) (void *)) (h)->freefun) ((old_chunk)); \ - } while (0) + if ((h)->use_extra_arg) \ + (*(h)->freefun)((h)->extra_arg, (old_chunk)); \ + else \ + (*(void (*)(void *))(h)->freefun)((old_chunk)); \ + } while (0) + - /* Initialize an obstack H for use. Specify chunk size SIZE (0 means default). Objects start on multiples of ALIGNMENT (0 means use default). CHUNKFUN is the function to use to allocate chunks, @@ -143,7 +146,7 @@ _obstack_begin (struct obstack *h, void *(*chunkfun) (long), void (*freefun) (void *)) { - register struct _obstack_chunk *chunk; /* points to new chunk */ + struct _obstack_chunk *chunk; /* points to new chunk */ if (alignment == 0) alignment = DEFAULT_ALIGNMENT; @@ -164,19 +167,19 @@ _obstack_begin (struct obstack *h, size = 4096 - extra; } - h->chunkfun = (struct _obstack_chunk * (*)(void *, long)) chunkfun; + h->chunkfun = (struct _obstack_chunk * (*) (void *, long)) chunkfun; h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun; h->chunk_size = size; h->alignment_mask = alignment - 1; h->use_extra_arg = 0; - chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size); + chunk = h->chunk = CALL_CHUNKFUN (h, h->chunk_size); if (!chunk) (*obstack_alloc_failed_handler) (); h->next_free = h->object_base = __PTR_ALIGN ((char *) chunk, chunk->contents, alignment - 1); h->chunk_limit = chunk->limit - = (char *) chunk + h->chunk_size; + = (char *) chunk + h->chunk_size; chunk->prev = 0; /* The initial chunk now contains no empty object. */ h->maybe_empty_object = 0; @@ -190,7 +193,7 @@ _obstack_begin_1 (struct obstack *h, int size, int alignment, void (*freefun) (void *, void *), void *arg) { - register struct _obstack_chunk *chunk; /* points to new chunk */ + struct _obstack_chunk *chunk; /* points to new chunk */ if (alignment == 0) alignment = DEFAULT_ALIGNMENT; @@ -218,13 +221,13 @@ _obstack_begin_1 (struct obstack *h, int size, int alignment, h->extra_arg = arg; h->use_extra_arg = 1; - chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size); + chunk = h->chunk = CALL_CHUNKFUN (h, h->chunk_size); if (!chunk) (*obstack_alloc_failed_handler) (); h->next_free = h->object_base = __PTR_ALIGN ((char *) chunk, chunk->contents, alignment - 1); h->chunk_limit = chunk->limit - = (char *) chunk + h->chunk_size; + = (char *) chunk + h->chunk_size; chunk->prev = 0; /* The initial chunk now contains no empty object. */ h->maybe_empty_object = 0; @@ -241,11 +244,11 @@ _obstack_begin_1 (struct obstack *h, int size, int alignment, void _obstack_newchunk (struct obstack *h, int length) { - register struct _obstack_chunk *old_chunk = h->chunk; - register struct _obstack_chunk *new_chunk; - register long new_size; - register long obj_size = h->next_free - h->object_base; - register long i; + struct _obstack_chunk *old_chunk = h->chunk; + struct _obstack_chunk *new_chunk; + long new_size; + long obj_size = h->next_free - h->object_base; + long i; long already; char *object_base; @@ -257,7 +260,7 @@ _obstack_newchunk (struct obstack *h, int length) /* Allocate and initialize the new chunk. */ new_chunk = CALL_CHUNKFUN (h, new_size); if (!new_chunk) - (*obstack_alloc_failed_handler) (); + (*obstack_alloc_failed_handler)(); h->chunk = new_chunk; new_chunk->prev = old_chunk; new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size; @@ -273,8 +276,8 @@ _obstack_newchunk (struct obstack *h, int length) { for (i = obj_size / sizeof (COPYING_UNIT) - 1; i >= 0; i--) - ((COPYING_UNIT *)object_base)[i] - = ((COPYING_UNIT *)h->object_base)[i]; + ((COPYING_UNIT *) object_base)[i] + = ((COPYING_UNIT *) h->object_base)[i]; /* We used to copy the odd few remaining bytes as one extra COPYING_UNIT, but that can cross a page boundary on a machine which does not do strict alignment for COPYING_UNITS. */ @@ -289,7 +292,7 @@ _obstack_newchunk (struct obstack *h, int length) /* If the object just copied was the only data in OLD_CHUNK, free that chunk and remove it from the chain. But not if that chunk might contain an empty object. */ - if (! h->maybe_empty_object + if (!h->maybe_empty_object && (h->object_base == __PTR_ALIGN ((char *) old_chunk, old_chunk->contents, h->alignment_mask))) @@ -313,13 +316,13 @@ libc_hidden_def (_obstack_newchunk) /* Suppress -Wmissing-prototypes warning. We don't want to declare this in obstack.h because it is just for debugging. */ -int _obstack_allocated_p (struct obstack *h, void *obj); +int _obstack_allocated_p (struct obstack *h, void *obj) __attribute_pure__; int _obstack_allocated_p (struct obstack *h, void *obj) { - register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ - register struct _obstack_chunk *plp; /* point to previous chunk if any */ + struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ + struct _obstack_chunk *plp; /* point to previous chunk if any */ lp = (h)->chunk; /* We use >= rather than > since the object cannot be exactly at @@ -332,7 +335,7 @@ _obstack_allocated_p (struct obstack *h, void *obj) } return lp != 0; } - + /* Free objects in obstack H, including OBJ and everything allocate more recently than OBJ. If OBJ is zero, free everything in H. */ @@ -341,8 +344,8 @@ _obstack_allocated_p (struct obstack *h, void *obj) void __obstack_free (struct obstack *h, void *obj) { - register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ - register struct _obstack_chunk *plp; /* point to previous chunk if any */ + struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ + struct _obstack_chunk *plp; /* point to previous chunk if any */ lp = h->chunk; /* We use >= because there cannot be an object at the beginning of a chunk. @@ -373,12 +376,12 @@ __obstack_free (struct obstack *h, void *obj) called by non-GCC compilers. */ strong_alias (obstack_free, _obstack_free) # endif - + int _obstack_memory_used (struct obstack *h) { - register struct _obstack_chunk* lp; - register int nbytes = 0; + struct _obstack_chunk *lp; + int nbytes = 0; for (lp = h->chunk; lp != 0; lp = lp->prev) { @@ -386,7 +389,7 @@ _obstack_memory_used (struct obstack *h) } return nbytes; } - + /* Define the error handler. */ # ifdef _LIBC # include diff --git a/contrib/grep/lib/obstack.h b/contrib/grep/lib/obstack.h index 6a44b44b97..ebaa622f0f 100644 --- a/contrib/grep/lib/obstack.h +++ b/contrib/grep/lib/obstack.h @@ -1,90 +1,90 @@ /* obstack.h - object stack macros - Copyright (C) 1988-1994, 1996-1999, 2003-2006, 2009-2012 Free Software - Foundation, Inc. + Copyright (C) 1988-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ /* Summary: -All the apparent functions defined here are macros. The idea -is that you would use these pre-tested macros to solve a -very specific set of problems, and they would run fast. -Caution: no side-effects in arguments please!! They may be -evaluated MANY times!! - -These macros operate a stack of objects. Each object starts life -small, and may grow to maturity. (Consider building a word syllable -by syllable.) An object can move while it is growing. Once it has -been "finished" it never changes address again. So the "top of the -stack" is typically an immature growing object, while the rest of the -stack is of mature, fixed size and fixed address objects. - -These routines grab large chunks of memory, using a function you -supply, called 'obstack_chunk_alloc'. On occasion, they free chunks, -by calling 'obstack_chunk_free'. You must define them and declare -them before using any obstack macros. - -Each independent stack is represented by a 'struct obstack'. -Each of the obstack macros expects a pointer to such a structure -as the first argument. - -One motivation for this package is the problem of growing char strings -in symbol tables. Unless you are "fascist pig with a read-only mind" ---Gosper's immortal quote from HAKMEM item 154, out of context--you -would not like to put any arbitrary upper limit on the length of your -symbols. - -In practice this often means you will build many short symbols and a -few long symbols. At the time you are reading a symbol you don't know -how long it is. One traditional method is to read a symbol into a -buffer, realloc()ating the buffer every time you try to read a symbol -that is longer than the buffer. This is beaut, but you still will -want to copy the symbol from the buffer to a more permanent -symbol-table entry say about half the time. - -With obstacks, you can work differently. Use one obstack for all symbol -names. As you read a symbol, grow the name in the obstack gradually. -When the name is complete, finalize it. Then, if the symbol exists already, -free the newly read name. - -The way we do this is to take a large chunk, allocating memory from -low addresses. When you want to build a symbol in the chunk you just -add chars above the current "high water mark" in the chunk. When you -have finished adding chars, because you got to the end of the symbol, -you know how long the chars are, and you can create a new object. -Mostly the chars will not burst over the highest address of the chunk, -because you would typically expect a chunk to be (say) 100 times as -long as an average object. - -In case that isn't clear, when we have enough chars to make up -the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed) -so we just point to it where it lies. No moving of chars is -needed and this is the second win: potentially long strings need -never be explicitly shuffled. Once an object is formed, it does not -change its address during its lifetime. - -When the chars burst over a chunk boundary, we allocate a larger -chunk, and then copy the partly formed object from the end of the old -chunk to the beginning of the new larger chunk. We then carry on -accreting characters to the end of the object as we normally would. - -A special macro is provided to add a single char at a time to a -growing object. This allows the use of register variables, which -break the ordinary 'growth' macro. - -Summary: + All the apparent functions defined here are macros. The idea + is that you would use these pre-tested macros to solve a + very specific set of problems, and they would run fast. + Caution: no side-effects in arguments please!! They may be + evaluated MANY times!! + + These macros operate a stack of objects. Each object starts life + small, and may grow to maturity. (Consider building a word syllable + by syllable.) An object can move while it is growing. Once it has + been "finished" it never changes address again. So the "top of the + stack" is typically an immature growing object, while the rest of the + stack is of mature, fixed size and fixed address objects. + + These routines grab large chunks of memory, using a function you + supply, called 'obstack_chunk_alloc'. On occasion, they free chunks, + by calling 'obstack_chunk_free'. You must define them and declare + them before using any obstack macros. + + Each independent stack is represented by a 'struct obstack'. + Each of the obstack macros expects a pointer to such a structure + as the first argument. + + One motivation for this package is the problem of growing char strings + in symbol tables. Unless you are "fascist pig with a read-only mind" + --Gosper's immortal quote from HAKMEM item 154, out of context--you + would not like to put any arbitrary upper limit on the length of your + symbols. + + In practice this often means you will build many short symbols and a + few long symbols. At the time you are reading a symbol you don't know + how long it is. One traditional method is to read a symbol into a + buffer, realloc()ating the buffer every time you try to read a symbol + that is longer than the buffer. This is beaut, but you still will + want to copy the symbol from the buffer to a more permanent + symbol-table entry say about half the time. + + With obstacks, you can work differently. Use one obstack for all symbol + names. As you read a symbol, grow the name in the obstack gradually. + When the name is complete, finalize it. Then, if the symbol exists already, + free the newly read name. + + The way we do this is to take a large chunk, allocating memory from + low addresses. When you want to build a symbol in the chunk you just + add chars above the current "high water mark" in the chunk. When you + have finished adding chars, because you got to the end of the symbol, + you know how long the chars are, and you can create a new object. + Mostly the chars will not burst over the highest address of the chunk, + because you would typically expect a chunk to be (say) 100 times as + long as an average object. + + In case that isn't clear, when we have enough chars to make up + the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed) + so we just point to it where it lies. No moving of chars is + needed and this is the second win: potentially long strings need + never be explicitly shuffled. Once an object is formed, it does not + change its address during its lifetime. + + When the chars burst over a chunk boundary, we allocate a larger + chunk, and then copy the partly formed object from the end of the old + chunk to the beginning of the new larger chunk. We then carry on + accreting characters to the end of the object as we normally would. + + A special macro is provided to add a single char at a time to a + growing object. This allows the use of register variables, which + break the ordinary 'growth' macro. + + Summary: We allocate large chunks. We carve out one object at a time from the current chunk. Once carved, an object never moves. @@ -96,14 +96,14 @@ Summary: Because of the way we do it, you can "unwind" an obstack back to a previous state. (You may remove objects much as you would with a stack.) -*/ + */ /* Don't do the contents of this file more than once. */ #ifndef _OBSTACK_H #define _OBSTACK_H 1 - + /* We need the type of a pointer subtraction. If __PTRDIFF_TYPE__ is defined, as with GNU C, use that; that way we don't pollute the namespace with 's symbols. Otherwise, include @@ -129,70 +129,74 @@ Summary: relative to B. Otherwise, use the faster strategy of computing the alignment relative to 0. */ -#define __PTR_ALIGN(B, P, A) \ +#define __PTR_ALIGN(B, P, A) \ __BPTR_ALIGN (sizeof (PTR_INT_TYPE) < sizeof (void *) ? (B) : (char *) 0, \ P, A) #include +#ifndef __attribute_pure__ +# define __attribute_pure__ _GL_ATTRIBUTE_PURE +#endif + #ifdef __cplusplus extern "C" { #endif struct _obstack_chunk /* Lives at front of each chunk. */ { - char *limit; /* 1 past end of this chunk */ + char *limit; /* 1 past end of this chunk */ struct _obstack_chunk *prev; /* address of prior chunk or NULL */ - char contents[4]; /* objects begin here */ + char contents[4]; /* objects begin here */ }; struct obstack /* control current object in current chunk */ { - long chunk_size; /* preferred size to allocate chunks in */ + long chunk_size; /* preferred size to allocate chunks in */ struct _obstack_chunk *chunk; /* address of current struct obstack_chunk */ - char *object_base; /* address of object we are building */ - char *next_free; /* where to add next char to current object */ - char *chunk_limit; /* address of char after current chunk */ + char *object_base; /* address of object we are building */ + char *next_free; /* where to add next char to current object */ + char *chunk_limit; /* address of char after current chunk */ union { PTR_INT_TYPE tempint; void *tempptr; } temp; /* Temporary for some macros. */ - int alignment_mask; /* Mask of alignment for each object. */ + int alignment_mask; /* Mask of alignment for each object. */ /* These prototypes vary based on 'use_extra_arg', and we use casts to the prototypeless function type in all assignments, but having prototypes here quiets -Wstrict-prototypes. */ struct _obstack_chunk *(*chunkfun) (void *, long); void (*freefun) (void *, struct _obstack_chunk *); void *extra_arg; /* first arg for chunk alloc/dealloc funcs */ - unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */ - unsigned maybe_empty_object:1;/* There is a possibility that the current - chunk contains a zero-length object. This - prevents freeing the chunk if we allocate - a bigger chunk to replace it. */ - unsigned alloc_failed:1; /* No longer used, as we now call the failed - handler on error, but retained for binary - compatibility. */ + unsigned use_extra_arg : 1; /* chunk alloc/dealloc funcs take extra arg */ + unsigned maybe_empty_object : 1; /* There is a possibility that the current + chunk contains a zero-length object. This + prevents freeing the chunk if we allocate + a bigger chunk to replace it. */ + unsigned alloc_failed : 1; /* No longer used, as we now call the failed + handler on error, but retained for binary + compatibility. */ }; /* Declare the external functions we use; they are in obstack.c. */ extern void _obstack_newchunk (struct obstack *, int); extern int _obstack_begin (struct obstack *, int, int, - void *(*) (long), void (*) (void *)); + void *(*)(long), void (*)(void *)); extern int _obstack_begin_1 (struct obstack *, int, int, - void *(*) (void *, long), - void (*) (void *, void *), void *); -extern int _obstack_memory_used (struct obstack *); + void *(*)(void *, long), + void (*)(void *, void *), void *); +extern int _obstack_memory_used (struct obstack *) __attribute_pure__; /* The default name of the function for freeing a chunk is 'obstack_free', but gnulib users can override this by defining '__obstack_free'. */ #ifndef __obstack_free # define __obstack_free obstack_free #endif -extern void __obstack_free (struct obstack *obstack, void *block); +extern void __obstack_free (struct obstack *, void *); + - /* Error handler called when 'obstack_chunk_alloc' failed to allocate more memory. This can be set to a user defined function which should either abort gracefully or use longjump - but shouldn't @@ -201,7 +205,7 @@ extern void (*obstack_alloc_failed_handler) (void); /* Exit value used when 'print_and_abort' is used. */ extern int obstack_exit_failure; - + /* Pointer to beginning of object being allocated or to be allocated next. Note that this might not be the final address of the object because a new chunk might be needed to hold the final size. */ @@ -221,43 +225,40 @@ extern int obstack_exit_failure; #define obstack_alignment_mask(h) ((h)->alignment_mask) /* To prevent prototype warnings provide complete argument list. */ -#define obstack_init(h) \ - _obstack_begin ((h), 0, 0, \ - (void *(*) (long)) obstack_chunk_alloc, \ - (void (*) (void *)) obstack_chunk_free) +#define obstack_init(h) \ + _obstack_begin ((h), 0, 0, \ + (void *(*)(long))obstack_chunk_alloc, \ + (void (*)(void *))obstack_chunk_free) -#define obstack_begin(h, size) \ - _obstack_begin ((h), (size), 0, \ - (void *(*) (long)) obstack_chunk_alloc, \ - (void (*) (void *)) obstack_chunk_free) +#define obstack_begin(h, size) \ + _obstack_begin ((h), (size), 0, \ + (void *(*)(long))obstack_chunk_alloc, \ + (void (*)(void *))obstack_chunk_free) #define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \ - _obstack_begin ((h), (size), (alignment), \ - (void *(*) (long)) (chunkfun), \ - (void (*) (void *)) (freefun)) + _obstack_begin ((h), (size), (alignment), \ + (void *(*)(long))(chunkfun), \ + (void (*)(void *))(freefun)) #define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \ - _obstack_begin_1 ((h), (size), (alignment), \ - (void *(*) (void *, long)) (chunkfun), \ - (void (*) (void *, void *)) (freefun), (arg)) + _obstack_begin_1 ((h), (size), (alignment), \ + (void *(*)(void *, long))(chunkfun), \ + (void (*)(void *, void *))(freefun), (arg)) #define obstack_chunkfun(h, newchunkfun) \ - ((h) -> chunkfun = (struct _obstack_chunk *(*)(void *, long)) (newchunkfun)) + ((h)->chunkfun = (struct _obstack_chunk *(*)(void *, long))(newchunkfun)) #define obstack_freefun(h, newfreefun) \ - ((h) -> freefun = (void (*)(void *, struct _obstack_chunk *)) (newfreefun)) + ((h)->freefun = (void (*)(void *, struct _obstack_chunk *))(newfreefun)) -#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = (achar)) +#define obstack_1grow_fast(h, achar) (*((h)->next_free)++ = (achar)) -#define obstack_blank_fast(h,n) ((h)->next_free += (n)) +#define obstack_blank_fast(h, n) ((h)->next_free += (n)) #define obstack_memory_used(h) _obstack_memory_used (h) - + #if defined __GNUC__ -/* NextStep 2.0 cc is really gcc 1.93 but it defines __GNUC__ = 2 and - does not implement __extension__. But that compiler doesn't define - __GNUC_MINOR__. */ -# if __GNUC__ < 2 || (__NeXT__ && !__GNUC_MINOR__) +# if ! (2 < __GNUC__ + (8 <= __GNUC_MINOR__)) # define __extension__ # endif @@ -266,158 +267,160 @@ extern int obstack_exit_failure; without using a global variable. Also, we can avoid using the 'temp' slot, to make faster code. */ -# define obstack_object_size(OBSTACK) \ - __extension__ \ - ({ struct obstack const *__o = (OBSTACK); \ - (unsigned) (__o->next_free - __o->object_base); }) - -# define obstack_room(OBSTACK) \ - __extension__ \ - ({ struct obstack const *__o = (OBSTACK); \ - (unsigned) (__o->chunk_limit - __o->next_free); }) - -# define obstack_make_room(OBSTACK,length) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - int __len = (length); \ - if (__o->chunk_limit - __o->next_free < __len) \ - _obstack_newchunk (__o, __len); \ - (void) 0; }) - -# define obstack_empty_p(OBSTACK) \ - __extension__ \ - ({ struct obstack const *__o = (OBSTACK); \ - (__o->chunk->prev == 0 \ - && __o->next_free == __PTR_ALIGN ((char *) __o->chunk, \ - __o->chunk->contents, \ - __o->alignment_mask)); }) - -# define obstack_grow(OBSTACK,where,length) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - int __len = (length); \ - if (__o->next_free + __len > __o->chunk_limit) \ - _obstack_newchunk (__o, __len); \ - memcpy (__o->next_free, where, __len); \ - __o->next_free += __len; \ - (void) 0; }) - -# define obstack_grow0(OBSTACK,where,length) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - int __len = (length); \ - if (__o->next_free + __len + 1 > __o->chunk_limit) \ - _obstack_newchunk (__o, __len + 1); \ - memcpy (__o->next_free, where, __len); \ - __o->next_free += __len; \ - *(__o->next_free)++ = 0; \ - (void) 0; }) - -# define obstack_1grow(OBSTACK,datum) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - if (__o->next_free + 1 > __o->chunk_limit) \ - _obstack_newchunk (__o, 1); \ - obstack_1grow_fast (__o, datum); \ - (void) 0; }) +# define obstack_object_size(OBSTACK) \ + __extension__ \ + ({ struct obstack const *__o = (OBSTACK); \ + (unsigned) (__o->next_free - __o->object_base); }) + +# define obstack_room(OBSTACK) \ + __extension__ \ + ({ struct obstack const *__o = (OBSTACK); \ + (unsigned) (__o->chunk_limit - __o->next_free); }) + +# define obstack_make_room(OBSTACK, length) \ + __extension__ \ + ({ struct obstack *__o = (OBSTACK); \ + int __len = (length); \ + if (__o->chunk_limit - __o->next_free < __len) \ + _obstack_newchunk (__o, __len); \ + (void) 0; }) + +# define obstack_empty_p(OBSTACK) \ + __extension__ \ + ({ struct obstack const *__o = (OBSTACK); \ + (__o->chunk->prev == 0 \ + && __o->next_free == __PTR_ALIGN ((char *) __o->chunk, \ + __o->chunk->contents, \ + __o->alignment_mask)); }) + +# define obstack_grow(OBSTACK, where, length) \ + __extension__ \ + ({ struct obstack *__o = (OBSTACK); \ + int __len = (length); \ + if (__o->next_free + __len > __o->chunk_limit) \ + _obstack_newchunk (__o, __len); \ + memcpy (__o->next_free, where, __len); \ + __o->next_free += __len; \ + (void) 0; }) + +# define obstack_grow0(OBSTACK, where, length) \ + __extension__ \ + ({ struct obstack *__o = (OBSTACK); \ + int __len = (length); \ + if (__o->next_free + __len + 1 > __o->chunk_limit) \ + _obstack_newchunk (__o, __len + 1); \ + memcpy (__o->next_free, where, __len); \ + __o->next_free += __len; \ + *(__o->next_free)++ = 0; \ + (void) 0; }) + +# define obstack_1grow(OBSTACK, datum) \ + __extension__ \ + ({ struct obstack *__o = (OBSTACK); \ + if (__o->next_free + 1 > __o->chunk_limit) \ + _obstack_newchunk (__o, 1); \ + obstack_1grow_fast (__o, datum); \ + (void) 0; }) /* These assume that the obstack alignment is good enough for pointers or ints, and that the data added so far to the current object shares that much alignment. */ -# define obstack_ptr_grow(OBSTACK,datum) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - if (__o->next_free + sizeof (void *) > __o->chunk_limit) \ - _obstack_newchunk (__o, sizeof (void *)); \ - obstack_ptr_grow_fast (__o, datum); }) \ - -# define obstack_int_grow(OBSTACK,datum) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - if (__o->next_free + sizeof (int) > __o->chunk_limit) \ - _obstack_newchunk (__o, sizeof (int)); \ - obstack_int_grow_fast (__o, datum); }) - -# define obstack_ptr_grow_fast(OBSTACK,aptr) \ -__extension__ \ -({ struct obstack *__o1 = (OBSTACK); \ - *(const void **) __o1->next_free = (aptr); \ - __o1->next_free += sizeof (const void *); \ - (void) 0; }) - -# define obstack_int_grow_fast(OBSTACK,aint) \ -__extension__ \ -({ struct obstack *__o1 = (OBSTACK); \ - *(int *) __o1->next_free = (aint); \ - __o1->next_free += sizeof (int); \ - (void) 0; }) - -# define obstack_blank(OBSTACK,length) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - int __len = (length); \ - if (__o->chunk_limit - __o->next_free < __len) \ - _obstack_newchunk (__o, __len); \ - obstack_blank_fast (__o, __len); \ - (void) 0; }) - -# define obstack_alloc(OBSTACK,length) \ -__extension__ \ -({ struct obstack *__h = (OBSTACK); \ - obstack_blank (__h, (length)); \ - obstack_finish (__h); }) - -# define obstack_copy(OBSTACK,where,length) \ -__extension__ \ -({ struct obstack *__h = (OBSTACK); \ - obstack_grow (__h, (where), (length)); \ - obstack_finish (__h); }) - -# define obstack_copy0(OBSTACK,where,length) \ -__extension__ \ -({ struct obstack *__h = (OBSTACK); \ - obstack_grow0 (__h, (where), (length)); \ - obstack_finish (__h); }) +# define obstack_ptr_grow(OBSTACK, datum) \ + __extension__ \ + ({ struct obstack *__o = (OBSTACK); \ + if (__o->next_free + sizeof (void *) > __o->chunk_limit) \ + _obstack_newchunk (__o, sizeof (void *)); \ + obstack_ptr_grow_fast (__o, datum); }) \ + +# define obstack_int_grow(OBSTACK, datum) \ + __extension__ \ + ({ struct obstack *__o = (OBSTACK); \ + if (__o->next_free + sizeof (int) > __o->chunk_limit) \ + _obstack_newchunk (__o, sizeof (int)); \ + obstack_int_grow_fast (__o, datum); }) + +# define obstack_ptr_grow_fast(OBSTACK, aptr) \ + __extension__ \ + ({ struct obstack *__o1 = (OBSTACK); \ + void *__p1 = __o1->next_free; \ + *(const void **) __p1 = (aptr); \ + __o1->next_free += sizeof (const void *); \ + (void) 0; }) + +# define obstack_int_grow_fast(OBSTACK, aint) \ + __extension__ \ + ({ struct obstack *__o1 = (OBSTACK); \ + void *__p1 = __o1->next_free; \ + *(int *) __p1 = (aint); \ + __o1->next_free += sizeof (int); \ + (void) 0; }) + +# define obstack_blank(OBSTACK, length) \ + __extension__ \ + ({ struct obstack *__o = (OBSTACK); \ + int __len = (length); \ + if (__o->chunk_limit - __o->next_free < __len) \ + _obstack_newchunk (__o, __len); \ + obstack_blank_fast (__o, __len); \ + (void) 0; }) + +# define obstack_alloc(OBSTACK, length) \ + __extension__ \ + ({ struct obstack *__h = (OBSTACK); \ + obstack_blank (__h, (length)); \ + obstack_finish (__h); }) + +# define obstack_copy(OBSTACK, where, length) \ + __extension__ \ + ({ struct obstack *__h = (OBSTACK); \ + obstack_grow (__h, (where), (length)); \ + obstack_finish (__h); }) + +# define obstack_copy0(OBSTACK, where, length) \ + __extension__ \ + ({ struct obstack *__h = (OBSTACK); \ + obstack_grow0 (__h, (where), (length)); \ + obstack_finish (__h); }) /* The local variable is named __o1 to avoid a name conflict when obstack_blank is called. */ -# define obstack_finish(OBSTACK) \ -__extension__ \ -({ struct obstack *__o1 = (OBSTACK); \ - void *__value = (void *) __o1->object_base; \ - if (__o1->next_free == __value) \ - __o1->maybe_empty_object = 1; \ - __o1->next_free \ - = __PTR_ALIGN (__o1->object_base, __o1->next_free, \ - __o1->alignment_mask); \ - if (__o1->next_free - (char *)__o1->chunk \ - > __o1->chunk_limit - (char *)__o1->chunk) \ - __o1->next_free = __o1->chunk_limit; \ - __o1->object_base = __o1->next_free; \ - __value; }) - -# define obstack_free(OBSTACK, OBJ) \ -__extension__ \ -({ struct obstack *__o = (OBSTACK); \ - void *__obj = (OBJ); \ - if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \ - __o->next_free = __o->object_base = (char *)__obj; \ - else (__obstack_free) (__o, __obj); }) - +# define obstack_finish(OBSTACK) \ + __extension__ \ + ({ struct obstack *__o1 = (OBSTACK); \ + void *__value = (void *) __o1->object_base; \ + if (__o1->next_free == __value) \ + __o1->maybe_empty_object = 1; \ + __o1->next_free \ + = __PTR_ALIGN (__o1->object_base, __o1->next_free, \ + __o1->alignment_mask); \ + if (__o1->next_free - (char *) __o1->chunk \ + > __o1->chunk_limit - (char *) __o1->chunk) \ + __o1->next_free = __o1->chunk_limit; \ + __o1->object_base = __o1->next_free; \ + __value; }) + +# define obstack_free(OBSTACK, OBJ) \ + __extension__ \ + ({ struct obstack *__o = (OBSTACK); \ + void *__obj = (OBJ); \ + if (__obj > (void *) __o->chunk && __obj < (void *) __o->chunk_limit) \ + __o->next_free = __o->object_base = (char *) __obj; \ + else (__obstack_free) (__o, __obj); }) + #else /* not __GNUC__ */ # define obstack_object_size(h) \ - (unsigned) ((h)->next_free - (h)->object_base) + (unsigned) ((h)->next_free - (h)->object_base) -# define obstack_room(h) \ - (unsigned) ((h)->chunk_limit - (h)->next_free) +# define obstack_room(h) \ + (unsigned) ((h)->chunk_limit - (h)->next_free) # define obstack_empty_p(h) \ - ((h)->chunk->prev == 0 \ - && (h)->next_free == __PTR_ALIGN ((char *) (h)->chunk, \ - (h)->chunk->contents, \ - (h)->alignment_mask)) + ((h)->chunk->prev == 0 \ + && (h)->next_free == __PTR_ALIGN ((char *) (h)->chunk, \ + (h)->chunk->contents, \ + (h)->alignment_mask)) /* Note that the call to _obstack_newchunk is enclosed in (..., 0) so that we can avoid having void expressions @@ -425,83 +428,83 @@ __extension__ \ Casting the third operand to void was tried before, but some compilers won't accept it. */ -# define obstack_make_room(h,length) \ -( (h)->temp.tempint = (length), \ - (((h)->next_free + (h)->temp.tempint > (h)->chunk_limit) \ +# define obstack_make_room(h, length) \ + ((h)->temp.tempint = (length), \ + (((h)->next_free + (h)->temp.tempint > (h)->chunk_limit) \ ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0)) -# define obstack_grow(h,where,length) \ -( (h)->temp.tempint = (length), \ - (((h)->next_free + (h)->temp.tempint > (h)->chunk_limit) \ - ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0), \ - memcpy ((h)->next_free, where, (h)->temp.tempint), \ - (h)->next_free += (h)->temp.tempint) - -# define obstack_grow0(h,where,length) \ -( (h)->temp.tempint = (length), \ - (((h)->next_free + (h)->temp.tempint + 1 > (h)->chunk_limit) \ - ? (_obstack_newchunk ((h), (h)->temp.tempint + 1), 0) : 0), \ - memcpy ((h)->next_free, where, (h)->temp.tempint), \ - (h)->next_free += (h)->temp.tempint, \ - *((h)->next_free)++ = 0) - -# define obstack_1grow(h,datum) \ -( (((h)->next_free + 1 > (h)->chunk_limit) \ - ? (_obstack_newchunk ((h), 1), 0) : 0), \ - obstack_1grow_fast (h, datum)) - -# define obstack_ptr_grow(h,datum) \ -( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \ - ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \ - obstack_ptr_grow_fast (h, datum)) - -# define obstack_int_grow(h,datum) \ -( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \ - ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \ - obstack_int_grow_fast (h, datum)) - -# define obstack_ptr_grow_fast(h,aptr) \ +# define obstack_grow(h, where, length) \ + ((h)->temp.tempint = (length), \ + (((h)->next_free + (h)->temp.tempint > (h)->chunk_limit) \ + ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0), \ + memcpy ((h)->next_free, where, (h)->temp.tempint), \ + (h)->next_free += (h)->temp.tempint) + +# define obstack_grow0(h, where, length) \ + ((h)->temp.tempint = (length), \ + (((h)->next_free + (h)->temp.tempint + 1 > (h)->chunk_limit) \ + ? (_obstack_newchunk ((h), (h)->temp.tempint + 1), 0) : 0), \ + memcpy ((h)->next_free, where, (h)->temp.tempint), \ + (h)->next_free += (h)->temp.tempint, \ + *((h)->next_free)++ = 0) + +# define obstack_1grow(h, datum) \ + ((((h)->next_free + 1 > (h)->chunk_limit) \ + ? (_obstack_newchunk ((h), 1), 0) : 0), \ + obstack_1grow_fast (h, datum)) + +# define obstack_ptr_grow(h, datum) \ + ((((h)->next_free + sizeof (char *) > (h)->chunk_limit) \ + ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \ + obstack_ptr_grow_fast (h, datum)) + +# define obstack_int_grow(h, datum) \ + ((((h)->next_free + sizeof (int) > (h)->chunk_limit) \ + ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \ + obstack_int_grow_fast (h, datum)) + +# define obstack_ptr_grow_fast(h, aptr) \ (((const void **) ((h)->next_free += sizeof (void *)))[-1] = (aptr)) -# define obstack_int_grow_fast(h,aint) \ +# define obstack_int_grow_fast(h, aint) \ (((int *) ((h)->next_free += sizeof (int)))[-1] = (aint)) -# define obstack_blank(h,length) \ -( (h)->temp.tempint = (length), \ - (((h)->chunk_limit - (h)->next_free < (h)->temp.tempint) \ - ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0), \ - obstack_blank_fast (h, (h)->temp.tempint)) - -# define obstack_alloc(h,length) \ - (obstack_blank ((h), (length)), obstack_finish ((h))) - -# define obstack_copy(h,where,length) \ - (obstack_grow ((h), (where), (length)), obstack_finish ((h))) - -# define obstack_copy0(h,where,length) \ - (obstack_grow0 ((h), (where), (length)), obstack_finish ((h))) - -# define obstack_finish(h) \ -( ((h)->next_free == (h)->object_base \ - ? (((h)->maybe_empty_object = 1), 0) \ - : 0), \ - (h)->temp.tempptr = (h)->object_base, \ - (h)->next_free \ - = __PTR_ALIGN ((h)->object_base, (h)->next_free, \ - (h)->alignment_mask), \ - (((h)->next_free - (char *) (h)->chunk \ - > (h)->chunk_limit - (char *) (h)->chunk) \ - ? ((h)->next_free = (h)->chunk_limit) : 0), \ - (h)->object_base = (h)->next_free, \ - (h)->temp.tempptr) - -# define obstack_free(h,obj) \ -( (h)->temp.tempint = (char *) (obj) - (char *) (h)->chunk, \ - ((((h)->temp.tempint > 0 \ - && (h)->temp.tempint < (h)->chunk_limit - (char *) (h)->chunk)) \ - ? (int) ((h)->next_free = (h)->object_base \ - = (h)->temp.tempint + (char *) (h)->chunk) \ - : (((__obstack_free) ((h), (h)->temp.tempint + (char *) (h)->chunk), 0), 0))) +# define obstack_blank(h, length) \ + ((h)->temp.tempint = (length), \ + (((h)->chunk_limit - (h)->next_free < (h)->temp.tempint) \ + ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0), \ + obstack_blank_fast (h, (h)->temp.tempint)) + +# define obstack_alloc(h, length) \ + (obstack_blank ((h), (length)), obstack_finish ((h))) + +# define obstack_copy(h, where, length) \ + (obstack_grow ((h), (where), (length)), obstack_finish ((h))) + +# define obstack_copy0(h, where, length) \ + (obstack_grow0 ((h), (where), (length)), obstack_finish ((h))) + +# define obstack_finish(h) \ + (((h)->next_free == (h)->object_base \ + ? (((h)->maybe_empty_object = 1), 0) \ + : 0), \ + (h)->temp.tempptr = (h)->object_base, \ + (h)->next_free \ + = __PTR_ALIGN ((h)->object_base, (h)->next_free, \ + (h)->alignment_mask), \ + (((h)->next_free - (char *) (h)->chunk \ + > (h)->chunk_limit - (char *) (h)->chunk) \ + ? ((h)->next_free = (h)->chunk_limit) : 0), \ + (h)->object_base = (h)->next_free, \ + (h)->temp.tempptr) + +# define obstack_free(h, obj) \ + ((h)->temp.tempint = (char *) (obj) - (char *) (h)->chunk, \ + ((((h)->temp.tempint > 0 \ + && (h)->temp.tempint < (h)->chunk_limit - (char *) (h)->chunk)) \ + ? (void) ((h)->next_free = (h)->object_base \ + = (h)->temp.tempint + (char *) (h)->chunk) \ + : (__obstack_free) (h, (h)->temp.tempint + (char *) (h)->chunk))) #endif /* not __GNUC__ */ diff --git a/contrib/grep/lib/open-safer.c b/contrib/grep/lib/open-safer.c index 17f7600fff..91d69230db 100644 --- a/contrib/grep/lib/open-safer.c +++ b/contrib/grep/lib/open-safer.c @@ -1,6 +1,6 @@ /* Invoke open, but avoid some glitches. - Copyright (C) 2005-2006, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/open.c b/contrib/grep/lib/open.c index 27801b916c..d6f8e0df89 100644 --- a/contrib/grep/lib/open.c +++ b/contrib/grep/lib/open.c @@ -1,5 +1,5 @@ /* Open a descriptor to a file. - Copyright (C) 2007-2012 Free Software Foundation, Inc. + Copyright (C) 2007-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,7 +27,7 @@ #include #undef __need_system_fcntl_h -static inline int +static int orig_open (const char *filename, int flags, mode_t mode) { return open (filename, flags, mode); diff --git a/contrib/grep/lib/openat-die.c b/contrib/grep/lib/openat-die.c index a5f17d8a55..02f1fd7de4 100644 --- a/contrib/grep/lib/openat-die.c +++ b/contrib/grep/lib/openat-die.c @@ -1,6 +1,6 @@ /* Report a save- or restore-cwd failure in our openat replacement and then exit. - Copyright (C) 2005-2006, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/openat-priv.h b/contrib/grep/lib/openat-priv.h index fd0adc1855..326c739e5f 100644 --- a/contrib/grep/lib/openat-priv.h +++ b/contrib/grep/lib/openat-priv.h @@ -1,6 +1,6 @@ /* Internals for openat-like functions. - Copyright (C) 2005-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/openat-proc.c b/contrib/grep/lib/openat-proc.c index 186cda0833..7b40bb8f05 100644 --- a/contrib/grep/lib/openat-proc.c +++ b/contrib/grep/lib/openat-proc.c @@ -1,6 +1,6 @@ /* Create /proc/self/fd-related names for subfiles of open directories. - Copyright (C) 2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,14 +32,6 @@ #include "intprops.h" -/* The results of open() in this file are not used with fchdir, - and we do not leak fds to any single-threaded code that could use stdio, - therefore save some unnecessary work in fchdir.c. - FIXME - if the kernel ever adds support for multi-thread safety for - avoiding standard fds, then we should use open_safer. */ -#undef open -#undef close - #define PROC_SELF_FD_FORMAT "/proc/self/fd/%d/%s" #define PROC_SELF_FD_NAME_SIZE_BOUND(len) \ diff --git a/contrib/grep/lib/openat-safer.c b/contrib/grep/lib/openat-safer.c index f6ed93f638..d0dc2d7068 100644 --- a/contrib/grep/lib/openat-safer.c +++ b/contrib/grep/lib/openat-safer.c @@ -1,6 +1,6 @@ /* Invoke openat, but avoid some glitches. - Copyright (C) 2005-2006, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/openat.c b/contrib/grep/lib/openat.c index ac2496e907..9d2f7d49c2 100644 --- a/contrib/grep/lib/openat.c +++ b/contrib/grep/lib/openat.c @@ -1,5 +1,5 @@ /* provide a replacement openat function - Copyright (C) 2004-2012 Free Software Foundation, Inc. + Copyright (C) 2004-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,7 +28,7 @@ #undef __need_system_fcntl_h #if HAVE_OPENAT -static inline int +static int orig_openat (int fd, char const *filename, int flags, mode_t mode) { return openat (fd, filename, flags, mode); diff --git a/contrib/grep/lib/openat.h b/contrib/grep/lib/openat.h index d646250bb3..29e23a547a 100644 --- a/contrib/grep/lib/openat.h +++ b/contrib/grep/lib/openat.h @@ -1,5 +1,5 @@ /* provide a replacement openat function - Copyright (C) 2004-2006, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2004-2006, 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,6 +26,11 @@ #include #include +#ifndef _GL_INLINE_HEADER_BEGIN + #error "Please include config.h first." +#endif +_GL_INLINE_HEADER_BEGIN + #if !HAVE_OPENAT int openat_permissive (int fd, char const *file, int flags, mode_t mode, @@ -49,13 +54,17 @@ _Noreturn void openat_save_fail (int); #if GNULIB_FCHOWNAT -static inline int +# ifndef FCHOWNAT_INLINE +# define FCHOWNAT_INLINE _GL_INLINE +# endif + +FCHOWNAT_INLINE int chownat (int fd, char const *file, uid_t owner, gid_t group) { return fchownat (fd, file, owner, group, 0); } -static inline int +FCHOWNAT_INLINE int lchownat (int fd, char const *file, uid_t owner, gid_t group) { return fchownat (fd, file, owner, group, AT_SYMLINK_NOFOLLOW); @@ -65,13 +74,17 @@ lchownat (int fd, char const *file, uid_t owner, gid_t group) #if GNULIB_FCHMODAT -static inline int +# ifndef FCHMODAT_INLINE +# define FCHMODAT_INLINE _GL_INLINE +# endif + +FCHMODAT_INLINE int chmodat (int fd, char const *file, mode_t mode) { return fchmodat (fd, file, mode, 0); } -static inline int +FCHMODAT_INLINE int lchmodat (int fd, char const *file, mode_t mode) { return fchmodat (fd, file, mode, AT_SYMLINK_NOFOLLOW); @@ -79,15 +92,19 @@ lchmodat (int fd, char const *file, mode_t mode) #endif -#if GNULIB_FSTATAT +#if GNULIB_STATAT + +# ifndef STATAT_INLINE +# define STATAT_INLINE _GL_INLINE +# endif -static inline int +STATAT_INLINE int statat (int fd, char const *name, struct stat *st) { return fstatat (fd, name, st, 0); } -static inline int +STATAT_INLINE int lstatat (int fd, char const *name, struct stat *st) { return fstatat (fd, name, st, AT_SYMLINK_NOFOLLOW); @@ -101,4 +118,6 @@ lstatat (int fd, char const *name, struct stat *st) wrappers are not provided for accessat or euidaccessat, so as to avoid dragging in -lgen on some platforms. */ +_GL_INLINE_HEADER_END + #endif /* _GL_HEADER_OPENAT */ diff --git a/contrib/grep/lib/opendir-safer.c b/contrib/grep/lib/opendir-safer.c index 4320716589..e31da88498 100644 --- a/contrib/grep/lib/opendir-safer.c +++ b/contrib/grep/lib/opendir-safer.c @@ -1,6 +1,6 @@ /* Invoke opendir, but avoid some glitches. - Copyright (C) 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/opendir.c b/contrib/grep/lib/opendir.c index 50e25e305a..c67f2e5ac4 100644 --- a/contrib/grep/lib/opendir.c +++ b/contrib/grep/lib/opendir.c @@ -1,5 +1,5 @@ /* Start reading the entries of a directory. - Copyright (C) 2006-2012 Free Software Foundation, Inc. + Copyright (C) 2006-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/pathmax.h b/contrib/grep/lib/pathmax.h index 03db7cb519..33fc3553d7 100644 --- a/contrib/grep/lib/pathmax.h +++ b/contrib/grep/lib/pathmax.h @@ -1,5 +1,5 @@ /* Define PATH_MAX somehow. Requires sys/types.h. - Copyright (C) 1992, 1999, 2001, 2003, 2005, 2009-2012 Free Software + Copyright (C) 1992, 1999, 2001, 2003, 2005, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify diff --git a/contrib/grep/lib/pipe-safer.c b/contrib/grep/lib/pipe-safer.c index d6dce5ffc1..70fb40f8e0 100644 --- a/contrib/grep/lib/pipe-safer.c +++ b/contrib/grep/lib/pipe-safer.c @@ -1,5 +1,5 @@ /* Invoke pipe, but avoid some glitches. - Copyright (C) 2005-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/progname.c b/contrib/grep/lib/progname.c index bdd4dd74d4..2919be999a 100644 --- a/contrib/grep/lib/progname.c +++ b/contrib/grep/lib/progname.c @@ -1,5 +1,5 @@ /* Program name management. - Copyright (C) 2001-2003, 2005-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2003, 2005-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/progname.h b/contrib/grep/lib/progname.h index a75a02e6da..fa15871df5 100644 --- a/contrib/grep/lib/progname.h +++ b/contrib/grep/lib/progname.h @@ -1,5 +1,5 @@ /* Program name management. - Copyright (C) 2001-2004, 2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2004, 2006, 2009-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/propername.c b/contrib/grep/lib/propername.c index 79c6b08e45..54a6adaffd 100644 --- a/contrib/grep/lib/propername.c +++ b/contrib/grep/lib/propername.c @@ -1,5 +1,5 @@ /* Localization of proper names. - Copyright (C) 2006-2012 Free Software Foundation, Inc. + Copyright (C) 2006-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2006. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/propername.h b/contrib/grep/lib/propername.h index 0ce1923f68..7e3de0fc51 100644 --- a/contrib/grep/lib/propername.h +++ b/contrib/grep/lib/propername.h @@ -1,5 +1,5 @@ /* Localization of proper names. - Copyright (C) 2006, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2006, 2008-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2006. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/quote.h b/contrib/grep/lib/quote.h index b30b166ddb..fb338a77a1 100644 --- a/contrib/grep/lib/quote.h +++ b/contrib/grep/lib/quote.h @@ -1,6 +1,6 @@ /* quote.h - prototypes for quote.c - Copyright (C) 1998-2001, 2003, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1998-2001, 2003, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,16 +18,29 @@ #ifndef QUOTE_H_ # define QUOTE_H_ 1 +# include + /* The quoting options used by quote_n and quote. Its type is incomplete, so it's useful only in expressions like '"e_quoting_options'. */ extern struct quoting_options quote_quoting_options; -/* Return an unambiguous printable representation of NAME, - allocated in slot N, suitable for diagnostics. */ -char const *quote_n (int n, char const *name); +/* Return an unambiguous printable representation of ARG (of size + ARGSIZE), allocated in slot N, suitable for diagnostics. If + ARGSIZE is SIZE_MAX, use the string length of the argument for + ARGSIZE. */ +char const *quote_n_mem (int n, char const *arg, size_t argsize); + +/* Return an unambiguous printable representation of ARG (of size + ARGSIZE), suitable for diagnostics. If ARGSIZE is SIZE_MAX, use + the string length of the argument for ARGSIZE. */ +char const *quote_mem (char const *arg, size_t argsize); + +/* Return an unambiguous printable representation of ARG, allocated in + slot N, suitable for diagnostics. */ +char const *quote_n (int n, char const *arg); -/* Return an unambiguous printable representation of NAME, - suitable for diagnostics. */ -char const *quote (char const *name); +/* Return an unambiguous printable representation of ARG, suitable for + diagnostics. */ +char const *quote (char const *arg); #endif /* !QUOTE_H_ */ diff --git a/contrib/grep/lib/quotearg.c b/contrib/grep/lib/quotearg.c index 1ea583d9c2..bef7b98b8b 100644 --- a/contrib/grep/lib/quotearg.c +++ b/contrib/grep/lib/quotearg.c @@ -1,6 +1,6 @@ /* quotearg.c - quote arguments for output - Copyright (C) 1998-2002, 2004-2012 Free Software Foundation, Inc. + Copyright (C) 1998-2002, 2004-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -178,7 +178,7 @@ set_custom_quoting (struct quoting_options *o, static struct quoting_options /* NOT PURE!! */ quoting_options_from_style (enum quoting_style style) { - struct quoting_options o = { 0, 0, { 0 }, NULL, NULL }; + struct quoting_options o = { literal_quoting_style, 0, { 0 }, NULL, NULL }; if (style == custom_quoting_style) abort (); o.style = style; @@ -348,7 +348,12 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, if (backslash_escapes && quote_string_len - && i + quote_string_len <= argsize + && (i + quote_string_len + <= (argsize == SIZE_MAX && 1 < quote_string_len + /* Use strlen only if we must: when argsize is SIZE_MAX, + and when the quote string is more than 1 byte long. + If we do call strlen, save the result. */ + ? (argsize = strlen (arg)) : argsize)) && memcmp (arg + i, quote_string, quote_string_len) == 0) { if (elide_outer_quotes) @@ -621,7 +626,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, if (! ((backslash_escapes || elide_outer_quotes) && quote_these_too - && quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))) + && quote_these_too[c / INT_BITS] >> (c % INT_BITS) & 1) && !is_right_quote) goto store_c; @@ -929,7 +934,7 @@ quotearg_custom_mem (char const *left_quote, char const *right_quote, } -/* The quoting option used by quote_n and quote. */ +/* The quoting option used by the functions of quote.h. */ struct quoting_options quote_quoting_options = { locale_quoting_style, @@ -939,13 +944,25 @@ struct quoting_options quote_quoting_options = }; char const * -quote_n (int n, char const *name) +quote_n_mem (int n, char const *arg, size_t argsize) { - return quotearg_n_options (n, name, SIZE_MAX, "e_quoting_options); + return quotearg_n_options (n, arg, argsize, "e_quoting_options); } char const * -quote (char const *name) +quote_mem (char const *arg, size_t argsize) { - return quote_n (0, name); + return quote_n_mem (0, arg, argsize); +} + +char const * +quote_n (int n, char const *arg) +{ + return quote_n_mem (n, arg, SIZE_MAX); +} + +char const * +quote (char const *arg) +{ + return quote_n (0, arg); } diff --git a/contrib/grep/lib/quotearg.h b/contrib/grep/lib/quotearg.h index e6b08ab230..da29d7b5fc 100644 --- a/contrib/grep/lib/quotearg.h +++ b/contrib/grep/lib/quotearg.h @@ -1,6 +1,6 @@ /* quotearg.h - quote arguments for output - Copyright (C) 1998-2002, 2004, 2006, 2008-2012 Free Software Foundation, + Copyright (C) 1998-2002, 2004, 2006, 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/read.c b/contrib/grep/lib/read.c new file mode 100644 index 0000000000..d125b51a5e --- /dev/null +++ b/contrib/grep/lib/read.c @@ -0,0 +1,85 @@ +/* POSIX compatible read() function. + Copyright (C) 2008-2014 Free Software Foundation, Inc. + Written by Bruno Haible , 2011. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include + +#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ + +# include +# include + +# define WIN32_LEAN_AND_MEAN /* avoid including junk */ +# include + +# include "msvc-inval.h" +# include "msvc-nothrow.h" + +# undef read + +# if HAVE_MSVC_INVALID_PARAMETER_HANDLER +static ssize_t +read_nothrow (int fd, void *buf, size_t count) +{ + ssize_t result; + + TRY_MSVC_INVAL + { + result = read (fd, buf, count); + } + CATCH_MSVC_INVAL + { + result = -1; + errno = EBADF; + } + DONE_MSVC_INVAL; + + return result; +} +# else +# define read_nothrow read +# endif + +ssize_t +rpl_read (int fd, void *buf, size_t count) +{ + ssize_t ret = read_nothrow (fd, buf, count); + +# if GNULIB_NONBLOCKING + if (ret < 0 + && GetLastError () == ERROR_NO_DATA) + { + HANDLE h = (HANDLE) _get_osfhandle (fd); + if (GetFileType (h) == FILE_TYPE_PIPE) + { + /* h is a pipe or socket. */ + DWORD state; + if (GetNamedPipeHandleState (h, &state, NULL, NULL, NULL, NULL, 0) + && (state & PIPE_NOWAIT) != 0) + /* h is a pipe in non-blocking mode. + Change errno from EINVAL to EAGAIN. */ + errno = EAGAIN; + } + } +# endif + + return ret; +} + +#endif diff --git a/contrib/grep/lib/readdir.c b/contrib/grep/lib/readdir.c index 28aff9ccb4..b74aebe07f 100644 --- a/contrib/grep/lib/readdir.c +++ b/contrib/grep/lib/readdir.c @@ -1,5 +1,5 @@ /* Read the next entry of a directory. - Copyright (C) 2011-2012 Free Software Foundation, Inc. + Copyright (C) 2011-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/realloc.c b/contrib/grep/lib/realloc.c index 99556ccf9c..8dfcfbe5a5 100644 --- a/contrib/grep/lib/realloc.c +++ b/contrib/grep/lib/realloc.c @@ -1,6 +1,6 @@ /* realloc() function that is glibc compatible. - Copyright (C) 1997, 2003-2004, 2006-2007, 2009-2012 Free Software + Copyright (C) 1997, 2003-2004, 2006-2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/ref-add.sin b/contrib/grep/lib/ref-add.sin index 4ec043f87d..0e14022965 100644 --- a/contrib/grep/lib/ref-add.sin +++ b/contrib/grep/lib/ref-add.sin @@ -1,6 +1,6 @@ # Add this package to a list of references stored in a text file. # -# Copyright (C) 2000, 2009-2012 Free Software Foundation, Inc. +# Copyright (C) 2000, 2009-2014 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/ref-del.sin b/contrib/grep/lib/ref-del.sin index eb0388117b..2dbcf7b2c1 100644 --- a/contrib/grep/lib/ref-del.sin +++ b/contrib/grep/lib/ref-del.sin @@ -1,6 +1,6 @@ # Remove this package from a list of references stored in a text file. # -# Copyright (C) 2000, 2009-2012 Free Software Foundation, Inc. +# Copyright (C) 2000, 2009-2014 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/regcomp.c b/contrib/grep/lib/regcomp.c index 81277dcb96..249d757a87 100644 --- a/contrib/grep/lib/regcomp.c +++ b/contrib/grep/lib/regcomp.c @@ -1,20 +1,21 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2012 Free Software Foundation, Inc. + Copyright (C) 2002-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, size_t length, reg_syntax_t syntax); @@ -93,20 +94,20 @@ static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, re_charset_t *mbcset, Idx *char_class_alloc, - const unsigned char *class_name, + const char *class_name, reg_syntax_t syntax); #else /* not RE_ENABLE_I18N */ static reg_errcode_t build_equiv_class (bitset_t sbcset, const unsigned char *name); static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, - const unsigned char *class_name, + const char *class_name, reg_syntax_t syntax); #endif /* not RE_ENABLE_I18N */ static bin_tree_t *build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, - const unsigned char *class_name, - const unsigned char *extra, + const char *class_name, + const char *extra, bool non_match, reg_errcode_t *err); static bin_tree_t *create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, @@ -291,7 +292,7 @@ weak_alias (__re_compile_fastmap, re_compile_fastmap) #endif static inline void -__attribute ((always_inline)) +__attribute__ ((always_inline)) re_set_fastmap (char *fastmap, bool icase, int ch) { fastmap[ch] = 1; @@ -539,7 +540,7 @@ regerror (errcode, preg, errbuf, errbuf_size) size_t errbuf_size; #else /* size_t might promote */ size_t -regerror (int errcode, const regex_t *_Restrict_ preg _UNUSED_PARAMETER_, +regerror (int errcode, const regex_t *_Restrict_ preg, char *_Restrict_ errbuf, size_t errbuf_size) #endif { @@ -585,7 +586,7 @@ weak_alias (__regerror, regerror) static const bitset_t utf8_sb_map = { /* Set the first 128 bits. */ -# ifdef __GNUC__ +# if defined __GNUC__ && !defined __STRICT_ANSI__ [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX # else # if 4 * BITSET_WORD_BITS < ASCII_CHARS @@ -662,7 +663,10 @@ regfree (preg) { re_dfa_t *dfa = preg->buffer; if (BE (dfa != NULL, 1)) - free_dfa_content (dfa); + { + lock_fini (dfa->lock); + free_dfa_content (dfa); + } preg->buffer = NULL; preg->allocated = 0; @@ -783,6 +787,8 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, preg->used = sizeof (re_dfa_t); err = init_dfa (dfa, length); + if (BE (err == REG_NOERROR && lock_init (dfa->lock) != 0, 0)) + err = REG_ESPACE; if (BE (err != REG_NOERROR, 0)) { free_dfa_content (dfa); @@ -796,8 +802,6 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, strncpy (dfa->re_str, pattern, length + 1); #endif - __libc_lock_init (dfa->lock); - err = re_string_construct (®exp, pattern, length, preg->translate, (syntax & RE_ICASE) != 0, dfa); if (BE (err != REG_NOERROR, 0)) @@ -805,6 +809,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, re_compile_internal_free_return: free_workarea_compile (preg); re_string_destruct (®exp); + lock_fini (dfa->lock); free_dfa_content (dfa); preg->buffer = NULL; preg->allocated = 0; @@ -837,6 +842,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, if (BE (err != REG_NOERROR, 0)) { + lock_fini (dfa->lock); free_dfa_content (dfa); preg->buffer = NULL; preg->allocated = 0; @@ -952,10 +958,10 @@ static void internal_function init_word_char (re_dfa_t *dfa) { - dfa->word_ops_used = 1; int i = 0; int j; int ch = 0; + dfa->word_ops_used = 1; if (BE (dfa->map_notascii == 0, 1)) { bitset_word_t bits0 = 0x00000000; @@ -1421,7 +1427,7 @@ calc_first (void *extra, bin_tree_t *node) /* Pass 2: compute NEXT on the tree. Preorder visit. */ static reg_errcode_t -calc_next (void *extra _UNUSED_PARAMETER_, bin_tree_t *node) +calc_next (void *extra, bin_tree_t *node) { switch (node->token.type) { @@ -2421,8 +2427,8 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, case OP_WORD: case OP_NOTWORD: tree = build_charclass_op (dfa, regexp->trans, - (const unsigned char *) "alnum", - (const unsigned char *) "_", + "alnum", + "_", token->type == OP_NOTWORD, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; @@ -2430,8 +2436,8 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, case OP_SPACE: case OP_NOTSPACE: tree = build_charclass_op (dfa, regexp->trans, - (const unsigned char *) "space", - (const unsigned char *) "", + "space", + "", token->type == OP_NOTSPACE, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; @@ -2711,7 +2717,6 @@ build_range_exp (const reg_syntax_t syntax, wchar_t wc; wint_t start_wc; wint_t end_wc; - wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] @@ -2725,11 +2730,7 @@ build_range_exp (const reg_syntax_t syntax, ? __btowc (end_ch) : end_elem->opr.wch); if (start_wc == WEOF || end_wc == WEOF) return REG_ECOLLATE; - cmp_buf[0] = start_wc; - cmp_buf[4] = end_wc; - - if (BE ((syntax & RE_NO_EMPTY_RANGES) - && wcscoll (cmp_buf, cmp_buf + 4) > 0, 0)) + else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0)) return REG_ERANGE; /* Got valid collation sequence values, add them as a new entry. @@ -2770,9 +2771,7 @@ build_range_exp (const reg_syntax_t syntax, /* Build the table for single byte characters. */ for (wc = 0; wc < SBC_MAX; ++wc) { - cmp_buf[2] = wc; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + if (start_wc <= wc && wc <= end_wc) bitset_set (sbcset, wc); } } @@ -2807,10 +2806,8 @@ build_range_exp (const reg_syntax_t syntax, static reg_errcode_t internal_function # ifdef RE_ENABLE_I18N -build_collating_symbol (bitset_t sbcset, - re_charset_t *mbcset _UNUSED_PARAMETER_, - Idx *coll_sym_alloc _UNUSED_PARAMETER_, - const unsigned char *name) +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + Idx *coll_sym_alloc, const unsigned char *name) # else /* not RE_ENABLE_I18N */ build_collating_symbol (bitset_t sbcset, const unsigned char *name) # endif /* not RE_ENABLE_I18N */ @@ -2843,40 +2840,29 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, /* Local function for parse_bracket_exp used in _LIBC environment. Seek the collating symbol entry corresponding to NAME. - Return the index of the symbol in the SYMB_TABLE. */ + Return the index of the symbol in the SYMB_TABLE, + or -1 if not found. */ auto inline int32_t - __attribute ((always_inline)) - seek_collating_symbol_entry (name, name_len) - const unsigned char *name; - size_t name_len; + __attribute__ ((always_inline)) + seek_collating_symbol_entry (const unsigned char *name, size_t name_len) { - int32_t hash = elem_hash ((const char *) name, name_len); - int32_t elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - int32_t second = hash % (table_size - 2) + 1; + int32_t elem; - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - /* Compare the length of the name. */ - && name_len == extra[symb_table[2 * elem + 1]] - /* Compare the name. */ - && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], - name_len) == 0) - { - /* Yep, this is the entry. */ - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } - return elem; + for (elem = 0; elem < table_size; elem++) + if (symb_table[2 * elem] != 0) + { + int32_t idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + if (/* Compare the length of the name. */ + name_len == extra[idx] + /* Compare the name. */ + && memcmp (name, &extra[idx + 1], name_len) == 0) + /* Yep, this is the entry. */ + return elem; + } + return -1; } /* Local function for parse_bracket_exp used in _LIBC environment. @@ -2884,9 +2870,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, Return the value if succeeded, UINT_MAX otherwise. */ auto inline unsigned int - __attribute ((always_inline)) - lookup_collation_sequence_value (br_elem) - bracket_elem_t *br_elem; + __attribute__ ((always_inline)) + lookup_collation_sequence_value (bracket_elem_t *br_elem) { if (br_elem->type == SB_CHAR) { @@ -2914,7 +2899,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, int32_t elem, idx; elem = seek_collating_symbol_entry (br_elem->opr.name, sym_name_len); - if (symb_table[2 * elem] != 0) + if (elem != -1) { /* We found the entry. */ idx = symb_table[2 * elem + 1]; @@ -2932,7 +2917,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, /* Return the collation sequence value. */ return *(unsigned int *) (extra + idx); } - else if (symb_table[2 * elem] == 0 && sym_name_len == 1) + else if (sym_name_len == 1) { /* No valid character. Match it as a single byte character. */ @@ -2953,12 +2938,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, update it. */ auto inline reg_errcode_t - __attribute ((always_inline)) - build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) - re_charset_t *mbcset; - Idx *range_alloc; - bitset_t sbcset; - bracket_elem_t *start_elem, *end_elem; + __attribute__ ((always_inline)) + build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, bracket_elem_t *end_elem) { unsigned int ch; uint32_t start_collseq; @@ -2971,6 +2953,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, 0)) return REG_ERANGE; + /* FIXME: Implement rational ranges here, too. */ start_collseq = lookup_collation_sequence_value (start_elem); end_collseq = lookup_collation_sequence_value (end_elem); /* Check start/end collation sequence values. */ @@ -3036,26 +3019,23 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, pointer argument since we may update it. */ auto inline reg_errcode_t - __attribute ((always_inline)) - build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) - re_charset_t *mbcset; - Idx *coll_sym_alloc; - bitset_t sbcset; - const unsigned char *name; + __attribute__ ((always_inline)) + build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + Idx *coll_sym_alloc, const unsigned char *name) { int32_t elem, idx; size_t name_len = strlen ((const char *) name); if (nrules != 0) { elem = seek_collating_symbol_entry (name, name_len); - if (symb_table[2 * elem] != 0) + if (elem != -1) { /* We found the entry. */ idx = symb_table[2 * elem + 1]; /* Skip the name of collating element name. */ idx += 1 + extra[idx]; } - else if (symb_table[2 * elem] == 0 && name_len == 1) + else if (name_len == 1) { /* No valid character, treat it as a normal character. */ @@ -3298,7 +3278,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, #ifdef RE_ENABLE_I18N mbcset, &char_class_alloc, #endif /* RE_ENABLE_I18N */ - start_elem.opr.name, syntax); + (const char *) start_elem.opr.name, + syntax); if (BE (*err != REG_NOERROR, 0)) goto parse_bracket_exp_free_return; break; @@ -3394,8 +3375,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, - re_token_t *token, int token_len, - re_dfa_t *dfa _UNUSED_PARAMETER_, + re_token_t *token, int token_len, re_dfa_t *dfa, reg_syntax_t syntax, bool accept_hyphen) { #ifdef RE_ENABLE_I18N @@ -3482,10 +3462,8 @@ parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, static reg_errcode_t #ifdef RE_ENABLE_I18N -build_equiv_class (bitset_t sbcset, - re_charset_t *mbcset _UNUSED_PARAMETER_, - Idx *equiv_class_alloc _UNUSED_PARAMETER_, - const unsigned char *name) +build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, + Idx *equiv_class_alloc, const unsigned char *name) #else /* not RE_ENABLE_I18N */ build_equiv_class (bitset_t sbcset, const unsigned char *name) #endif /* not RE_ENABLE_I18N */ @@ -3581,14 +3559,14 @@ static reg_errcode_t #ifdef RE_ENABLE_I18N build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, re_charset_t *mbcset, Idx *char_class_alloc, - const unsigned char *class_name, reg_syntax_t syntax) + const char *class_name, reg_syntax_t syntax) #else /* not RE_ENABLE_I18N */ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, - const unsigned char *class_name, reg_syntax_t syntax) + const char *class_name, reg_syntax_t syntax) #endif /* not RE_ENABLE_I18N */ { int i; - const char *name = (const char *) class_name; + const char *name = class_name; /* In case of REG_ICASE "upper" and "lower" match the both of upper and lower cases. */ @@ -3662,8 +3640,8 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, static bin_tree_t * build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, - const unsigned char *class_name, - const unsigned char *extra, bool non_match, + const char *class_name, + const char *extra, bool non_match, reg_errcode_t *err) { re_bitset_ptr_t sbcset; @@ -3891,7 +3869,7 @@ free_token (re_token_t *node) and its children. */ static reg_errcode_t -free_tree (void *extra _UNUSED_PARAMETER_, bin_tree_t *node) +free_tree (void *extra, bin_tree_t *node) { free_token (&node->token); return REG_NOERROR; diff --git a/contrib/grep/lib/regex.c b/contrib/grep/lib/regex.c index 7b440493f6..95beb6fba3 100644 --- a/contrib/grep/lib/regex.c +++ b/contrib/grep/lib/regex.c @@ -1,20 +1,21 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2003, 2005-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2002-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ #ifndef _LIBC # include @@ -23,6 +24,7 @@ # pragma GCC diagnostic ignored "-Wsuggest-attribute=pure" # endif # if (__GNUC__ == 4 && 3 <= __GNUC_MINOR__) || 4 < __GNUC__ +# pragma GCC diagnostic ignored "-Wold-style-definition" # pragma GCC diagnostic ignored "-Wtype-limits" # endif #endif diff --git a/contrib/grep/lib/regex.h b/contrib/grep/lib/regex.h index 211b5fcb27..a3ac7a9e9e 100644 --- a/contrib/grep/lib/regex.h +++ b/contrib/grep/lib/regex.h @@ -1,21 +1,22 @@ /* Definitions for data structures and routines for the regular expression library. - Copyright (C) 1985, 1989-1993, 1995-1998, 2000-2003, 2005-2012 - Free Software Foundation, Inc. + Copyright (C) 1985, 1989-1993, 1995-1998, 2000-2003, 2005-2014 Free Software + Foundation, Inc. This file is part of the GNU C Library. - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ #ifndef _REGEX_H #define _REGEX_H 1 diff --git a/contrib/grep/lib/regex_internal.c b/contrib/grep/lib/regex_internal.c index 9537297370..c5bb012218 100644 --- a/contrib/grep/lib/regex_internal.c +++ b/contrib/grep/lib/regex_internal.c @@ -1,23 +1,22 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2012 Free Software Foundation, Inc. + Copyright (C) 2002-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ -#include "verify.h" -#include "intprops.h" static void re_string_construct_common (const char *str, Idx len, re_string_t *pstr, RE_TRANSLATE_TYPE trans, bool icase, @@ -312,12 +311,11 @@ build_wcs_upper_buffer (re_string_t *pstr) + byte_idx), remain_len, &pstr->cur_state); if (BE (mbclen < (size_t) -2, 1)) { - wchar_t wcu = wc; - if (iswlower (wc)) + wchar_t wcu = towupper (wc); + if (wcu != wc) { size_t mbcdlen; - wcu = towupper (wc); mbcdlen = wcrtomb (buf, wcu, &prev_st); if (BE (mbclen == mbcdlen, 1)) memcpy (pstr->mbs + byte_idx, buf, mbclen); @@ -382,12 +380,11 @@ build_wcs_upper_buffer (re_string_t *pstr) mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); if (BE (mbclen < (size_t) -2, 1)) { - wchar_t wcu = wc; - if (iswlower (wc)) + wchar_t wcu = towupper (wc); + if (wcu != wc) { size_t mbcdlen; - wcu = towupper (wc); mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); if (BE (mbclen == mbcdlen, 1)) memcpy (pstr->mbs + byte_idx, buf, mbclen); @@ -539,10 +536,7 @@ build_upper_buffer (re_string_t *pstr) int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; if (BE (pstr->trans != NULL, 0)) ch = pstr->trans[ch]; - if (islower (ch)) - pstr->mbs[char_idx] = toupper (ch); - else - pstr->mbs[char_idx] = ch; + pstr->mbs[char_idx] = toupper (ch); } pstr->valid_len = char_idx; pstr->valid_raw_len = char_idx; @@ -835,7 +829,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) } static unsigned char -internal_function __attribute ((pure)) +internal_function __attribute__ ((pure)) re_string_peek_byte_case (const re_string_t *pstr, Idx idx) { int ch; @@ -975,7 +969,7 @@ re_node_set_alloc (re_node_set *set, Idx size) set->alloc = size; set->nelem = 0; set->elems = re_malloc (Idx, size); - if (BE (set->elems == NULL, 0)) + if (BE (set->elems == NULL, 0) && (MALLOC_0_IS_NONNULL || size != 0)) return REG_ESPACE; return REG_NOERROR; } @@ -1355,7 +1349,7 @@ re_node_set_insert_last (re_node_set *set, Idx elem) Return true if SET1 and SET2 are equivalent. */ static bool -internal_function __attribute ((pure)) +internal_function __attribute__ ((pure)) re_node_set_compare (const re_node_set *set1, const re_node_set *set2) { Idx i; @@ -1370,7 +1364,7 @@ re_node_set_compare (const re_node_set *set1, const re_node_set *set2) /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ static Idx -internal_function __attribute ((pure)) +internal_function __attribute__ ((pure)) re_node_set_contains (const re_node_set *set, Idx elem) { __re_size_t idx, right, mid; @@ -1395,10 +1389,7 @@ static void internal_function re_node_set_remove_at (re_node_set *set, Idx idx) { - verify (! TYPE_SIGNED (Idx)); - /* if (idx < 0) - return; */ - if (idx >= set->nelem) + if (idx < 0 || idx >= set->nelem) return; --set->nelem; for (; idx < set->nelem; idx++) @@ -1447,11 +1438,9 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token) dfa->nodes[dfa->nodes_len] = token; dfa->nodes[dfa->nodes_len].constraint = 0; #ifdef RE_ENABLE_I18N - { - int type = token.type; dfa->nodes[dfa->nodes_len].accept_mb = - (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; - } + ((token.type == OP_PERIOD && dfa->mb_cur_max > 1) + || token.type == COMPLEX_BRACKET); #endif dfa->nexts[dfa->nodes_len] = REG_MISSING; re_node_set_init_empty (dfa->edests + dfa->nodes_len); @@ -1459,7 +1448,7 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token) return dfa->nodes_len++; } -static inline re_hashval_t +static re_hashval_t internal_function calc_state_hash (const re_node_set *nodes, unsigned int context) { diff --git a/contrib/grep/lib/regex_internal.h b/contrib/grep/lib/regex_internal.h index aa35ed90e3..f44551b677 100644 --- a/contrib/grep/lib/regex_internal.h +++ b/contrib/grep/lib/regex_internal.h @@ -1,20 +1,21 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2012 Free Software Foundation, Inc. + Copyright (C) 2002-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ #ifndef _REGEX_INTERNAL_H #define _REGEX_INTERNAL_H 1 @@ -26,21 +27,55 @@ #include #include -#ifndef _LIBC -# include "localcharset.h" -#endif #include #include #include #include #include -#if defined _LIBC + +#ifdef _LIBC # include +# define lock_define(name) __libc_lock_define (, name) +# define lock_init(lock) (__libc_lock_init (lock), 0) +# define lock_fini(lock) 0 +# define lock_lock(lock) __libc_lock_lock (lock) +# define lock_unlock(lock) __libc_lock_unlock (lock) +#elif defined GNULIB_LOCK +# include "glthread/lock.h" + /* Use gl_lock_define if empty macro arguments are known to work. + Otherwise, fall back on less-portable substitutes. */ +# if ((defined __GNUC__ && !defined __STRICT_ANSI__) \ + || (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__)) +# define lock_define(name) gl_lock_define (, name) +# elif USE_POSIX_THREADS +# define lock_define(name) pthread_mutex_t name; +# elif USE_PTH_THREADS +# define lock_define(name) pth_mutex_t name; +# elif USE_SOLARIS_THREADS +# define lock_define(name) mutex_t name; +# elif USE_WINDOWS_THREADS +# define lock_define(name) gl_lock_t name; +# else +# define lock_define(name) +# endif +# define lock_init(lock) glthread_lock_init (&(lock)) +# define lock_fini(lock) glthread_lock_destroy (&(lock)) +# define lock_lock(lock) glthread_lock_lock (&(lock)) +# define lock_unlock(lock) glthread_lock_unlock (&(lock)) +#elif defined GNULIB_PTHREAD +# include +# define lock_define(name) pthread_mutex_t name; +# define lock_init(lock) pthread_mutex_init (&(lock), 0) +# define lock_fini(lock) pthread_mutex_destroy (&(lock)) +# define lock_lock(lock) pthread_mutex_lock (&(lock)) +# define lock_unlock(lock) pthread_mutex_unlock (&(lock)) #else -# define __libc_lock_define(CLASS,NAME) -# define __libc_lock_init(NAME) do { } while (0) -# define __libc_lock_lock(NAME) do { } while (0) -# define __libc_lock_unlock(NAME) do { } while (0) +# define lock_define(name) +# define lock_init(lock) 0 +# define lock_fini(lock) ((void) 0) + /* The 'dfa' avoids an "unused variable 'dfa'" warning from GCC. */ +# define lock_lock(lock) ((void) dfa) +# define lock_unlock(lock) ((void) 0) #endif /* In case that the system doesn't have isblank(). */ @@ -63,7 +98,7 @@ # ifdef _LIBC # undef gettext # define gettext(msgid) \ - INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) + __dcgettext (_libc_intl_domainname, msgid, LC_MESSAGES) # endif #else # define gettext(msgid) (msgid) @@ -75,7 +110,7 @@ # define gettext_noop(String) String #endif -#if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE && HAVE_WCSCOLL) || _LIBC +#if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE) || _LIBC # define RE_ENABLE_I18N #endif @@ -83,9 +118,6 @@ # define BE(expr, val) __builtin_expect (expr, val) #else # define BE(expr, val) (expr) -# ifdef _LIBC -# define inline -# endif #endif /* Number of ASCII characters. */ @@ -102,6 +134,8 @@ /* Rename to standard API for using out of glibc. */ #ifndef _LIBC +# undef __wctype +# undef __iswctype # define __wctype wctype # define __iswctype iswctype # define __btowc btowc @@ -111,10 +145,8 @@ # define attribute_hidden #endif /* not _LIBC */ -#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) -# define __attribute(arg) __attribute__ (arg) -#else -# define __attribute(arg) +#if __GNUC__ < 3 + (__GNUC_MINOR__ < 1) +# define __attribute__(arg) #endif typedef __re_idx_t Idx; @@ -430,7 +462,7 @@ static void build_upper_buffer (re_string_t *pstr) internal_function; static void re_string_translate_buffer (re_string_t *pstr) internal_function; static unsigned int re_string_context_at (const re_string_t *input, Idx idx, int eflags) - internal_function __attribute ((pure)); + internal_function __attribute__ ((pure)); #endif #define re_string_peek_byte(pstr, offset) \ ((pstr)->mbs[(pstr)->cur_idx + offset]) @@ -449,7 +481,9 @@ static unsigned int re_string_context_at (const re_string_t *input, Idx idx, #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) -#include +#if defined _LIBC || HAVE_ALLOCA +# include +#endif #ifndef _LIBC # if HAVE_ALLOCA @@ -466,6 +500,12 @@ static unsigned int re_string_context_at (const re_string_t *input, Idx idx, # endif #endif +#ifdef _LIBC +# define MALLOC_0_IS_NONNULL 1 +#elif !defined MALLOC_0_IS_NONNULL +# define MALLOC_0_IS_NONNULL 0 +#endif + #ifndef MAX # define MAX(a,b) ((a) < (b) ? (b) : (a)) #endif @@ -696,7 +736,7 @@ struct re_dfa_t #ifdef DEBUG char* re_str; #endif - __libc_lock_define (, lock) + lock_define (lock) }; #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) @@ -727,33 +767,33 @@ typedef struct } bracket_elem_t; -/* Inline functions for bitset_t operation. */ +/* Functions for bitset_t operation. */ -static inline void +static void bitset_set (bitset_t set, Idx i) { set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS; } -static inline void +static void bitset_clear (bitset_t set, Idx i) { set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS); } -static inline bool +static bool bitset_contain (const bitset_t set, Idx i) { return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1; } -static inline void +static void bitset_empty (bitset_t set) { memset (set, '\0', sizeof (bitset_t)); } -static inline void +static void bitset_set_all (bitset_t set) { memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS)); @@ -762,13 +802,13 @@ bitset_set_all (bitset_t set) ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1; } -static inline void +static void bitset_copy (bitset_t dest, const bitset_t src) { memcpy (dest, src, sizeof (bitset_t)); } -static inline void +static void __attribute__ ((unused)) bitset_not (bitset_t set) { int bitset_i; @@ -780,7 +820,7 @@ bitset_not (bitset_t set) & ~set[BITSET_WORDS - 1]); } -static inline void +static void __attribute__ ((unused)) bitset_merge (bitset_t dest, const bitset_t src) { int bitset_i; @@ -788,7 +828,7 @@ bitset_merge (bitset_t dest, const bitset_t src) dest[bitset_i] |= src[bitset_i]; } -static inline void +static void __attribute__ ((unused)) bitset_mask (bitset_t dest, const bitset_t src) { int bitset_i; @@ -797,9 +837,9 @@ bitset_mask (bitset_t dest, const bitset_t src) } #ifdef RE_ENABLE_I18N -/* Inline functions for re_string. */ -static inline int -internal_function __attribute ((pure)) +/* Functions for re_string. */ +static int +internal_function __attribute__ ((pure, unused)) re_string_char_size_at (const re_string_t *pstr, Idx idx) { int byte_idx; @@ -811,8 +851,8 @@ re_string_char_size_at (const re_string_t *pstr, Idx idx) return byte_idx; } -static inline wint_t -internal_function __attribute ((pure)) +static wint_t +internal_function __attribute__ ((pure, unused)) re_string_wchar_at (const re_string_t *pstr, Idx idx) { if (pstr->mb_cur_max == 1) @@ -822,9 +862,8 @@ re_string_wchar_at (const re_string_t *pstr, Idx idx) # ifndef NOT_IN_libc static int -internal_function __attribute ((pure)) -re_string_elem_size_at (const re_string_t *pstr _UNUSED_PARAMETER_, - Idx idx _UNUSED_PARAMETER_) +internal_function __attribute__ ((pure, unused)) +re_string_elem_size_at (const re_string_t *pstr, Idx idx) { # ifdef _LIBC const unsigned char *p, *extra; diff --git a/contrib/grep/lib/regexec.c b/contrib/grep/lib/regexec.c index b816f8ffef..7c91449a19 100644 --- a/contrib/grep/lib/regexec.c +++ b/contrib/grep/lib/regexec.c @@ -1,23 +1,22 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2012 Free Software Foundation, Inc. + Copyright (C) 2002-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ -#include "verify.h" -#include "intprops.h" static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, Idx n) internal_function; static void match_ctx_clean (re_match_context_t *mctx) internal_function; @@ -200,7 +199,7 @@ static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, static bool check_node_accept (const re_match_context_t *mctx, const re_token_t *node, Idx idx) internal_function; -static reg_errcode_t extend_buffers (re_match_context_t *mctx) +static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len) internal_function; /* Entry point for POSIX code. */ @@ -229,9 +228,7 @@ regexec (preg, string, nmatch, pmatch, eflags) { reg_errcode_t err; Idx start, length; -#ifdef _LIBC re_dfa_t *dfa = preg->buffer; -#endif if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) return REG_BADPAT; @@ -247,14 +244,14 @@ regexec (preg, string, nmatch, pmatch, eflags) length = strlen (string); } - __libc_lock_lock (dfa->lock); + lock_lock (dfa->lock); if (preg->no_sub) err = re_search_internal (preg, string, length, start, length, length, 0, NULL, eflags); else err = re_search_internal (preg, string, length, start, length, length, nmatch, pmatch, eflags); - __libc_lock_unlock (dfa->lock); + lock_unlock (dfa->lock); return err != REG_NOERROR; } @@ -376,11 +373,8 @@ re_search_2_stub (struct re_pattern_buffer *bufp, Idx len = length1 + length2; char *s = NULL; - verify (! TYPE_SIGNED (Idx)); - if (BE (len < length1, 0)) - return -2; - /* if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) - return -2; */ + if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0)) + return -2; /* Concatenate the strings. */ if (length2 > 0) @@ -425,23 +419,18 @@ re_search_stub (struct re_pattern_buffer *bufp, Idx nregs; regoff_t rval; int eflags = 0; -#ifdef _LIBC re_dfa_t *dfa = bufp->buffer; -#endif Idx last_start = start + range; /* Check for out-of-range. */ - verify (! TYPE_SIGNED (Idx)); - /* if (BE (start < 0, 0)) - return -1; */ - if (BE (start > length, 0)) - return -1; + if (BE (start < 0 || start > length, 0)) + return -1; if (BE (length < last_start || (0 <= range && last_start < start), 0)) last_start = length; - else if (BE (/* last_start < 0 || */ (range < 0 && start <= last_start), 0)) + else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0)) last_start = 0; - __libc_lock_lock (dfa->lock); + lock_lock (dfa->lock); eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; @@ -505,7 +494,7 @@ re_search_stub (struct re_pattern_buffer *bufp, } re_free (pmatch); out: - __libc_lock_unlock (dfa->lock); + lock_unlock (dfa->lock); return rval; } @@ -743,7 +732,7 @@ re_search_internal (const regex_t *preg, mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF : CONTEXT_NEWLINE | CONTEXT_BEGBUF; - /* Check incrementally whether of not the input string match. */ + /* Check incrementally whether the input string matches. */ incr = (last_start < start) ? -1 : 1; left_lim = (last_start < start) ? last_start : start; right_lim = (last_start < start) ? start : last_start; @@ -1071,7 +1060,7 @@ prune_impossible_nodes (re_match_context_t *mctx) since initial states may have constraints like "\<", "^", etc.. */ static inline re_dfastate_t * -__attribute ((always_inline)) internal_function +__attribute__ ((always_inline)) internal_function acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, Idx idx) { @@ -1183,7 +1172,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, || (BE (next_char_idx >= mctx->input.valid_len, 0) && mctx->input.valid_len < mctx->input.len)) { - err = extend_buffers (mctx); + err = extend_buffers (mctx, next_char_idx + 1); if (BE (err != REG_NOERROR, 0)) { assert (err == REG_ESPACE); @@ -1763,7 +1752,7 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) && mctx->input.valid_len < mctx->input.len)) { reg_errcode_t err; - err = extend_buffers (mctx); + err = extend_buffers (mctx, next_state_log_idx + 1); if (BE (err != REG_NOERROR, 0)) return err; } @@ -2820,7 +2809,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) if (bkref_str_off >= mctx->input.len) break; - err = extend_buffers (mctx); + err = extend_buffers (mctx, bkref_str_off + 1); if (BE (err != REG_NOERROR, 0)) return err; @@ -3943,6 +3932,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, in_collseq = find_collation_sequence_value (pin, elem_len); } /* match with range expression? */ + /* FIXME: Implement rational ranges here, too. */ for (i = 0; i < cset->nranges; ++i) if (cset->range_starts[i] <= in_collseq && in_collseq <= cset->range_ends[i]) @@ -3994,18 +3984,9 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, # endif /* _LIBC */ { /* match with range expression? */ -#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && defined __STRICT_ANSI__) - wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; -#else - wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; - cmp_buf[2] = wc; -#endif for (i = 0; i < cset->nranges; ++i) { - cmp_buf[0] = cset->range_starts[i]; - cmp_buf[4] = cset->range_ends[i]; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i]) { match_len = char_len; goto check_node_accept_bytes_match; @@ -4143,7 +4124,7 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, static reg_errcode_t internal_function __attribute_warn_unused_result__ -extend_buffers (re_match_context_t *mctx) +extend_buffers (re_match_context_t *mctx, int min_len) { reg_errcode_t ret; re_string_t *pstr = &mctx->input; @@ -4153,8 +4134,10 @@ extend_buffers (re_match_context_t *mctx) <= pstr->bufs_len, 0)) return REG_ESPACE; - /* Double the lengths of the buffers. */ - ret = re_string_realloc_buffers (pstr, MIN (pstr->len, pstr->bufs_len * 2)); + /* Double the lengths of the buffers, but allocate at least MIN_LEN. */ + ret = re_string_realloc_buffers (pstr, + MAX (min_len, + MIN (pstr->len, pstr->bufs_len * 2))); if (BE (ret != REG_NOERROR, 0)) return ret; diff --git a/contrib/grep/lib/safe-read.c b/contrib/grep/lib/safe-read.c new file mode 100644 index 0000000000..c9b8828a18 --- /dev/null +++ b/contrib/grep/lib/safe-read.c @@ -0,0 +1,77 @@ +/* An interface to read and write that retries after interrupts. + + Copyright (C) 1993-1994, 1998, 2002-2006, 2009-2014 Free Software + Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#ifdef SAFE_WRITE +# include "safe-write.h" +#else +# include "safe-read.h" +#endif + +/* Get ssize_t. */ +#include +#include + +#include + +#ifdef EINTR +# define IS_EINTR(x) ((x) == EINTR) +#else +# define IS_EINTR(x) 0 +#endif + +#include + +#ifdef SAFE_WRITE +# define safe_rw safe_write +# define rw write +#else +# define safe_rw safe_read +# define rw read +# undef const +# define const /* empty */ +#endif + +/* Read(write) up to COUNT bytes at BUF from(to) descriptor FD, retrying if + interrupted. Return the actual number of bytes read(written), zero for EOF, + or SAFE_READ_ERROR(SAFE_WRITE_ERROR) upon error. */ +size_t +safe_rw (int fd, void const *buf, size_t count) +{ + /* Work around a bug in Tru64 5.1. Attempting to read more than + INT_MAX bytes fails with errno == EINVAL. See + . + When decreasing COUNT, keep it block-aligned. */ + enum { BUGGY_READ_MAXIMUM = INT_MAX & ~8191 }; + + for (;;) + { + ssize_t result = rw (fd, buf, count); + + if (0 <= result) + return result; + else if (IS_EINTR (errno)) + continue; + else if (errno == EINVAL && BUGGY_READ_MAXIMUM < count) + count = BUGGY_READ_MAXIMUM; + else + return result; + } +} diff --git a/contrib/grep/lib/safe-read.h b/contrib/grep/lib/safe-read.h new file mode 100644 index 0000000000..aba8bdb341 --- /dev/null +++ b/contrib/grep/lib/safe-read.h @@ -0,0 +1,47 @@ +/* An interface to read() that retries after interrupts. + Copyright (C) 2002, 2006, 2009-2014 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Some system calls may be interrupted and fail with errno = EINTR in the + following situations: + - The process is stopped and restarted (signal SIGSTOP and SIGCONT, user + types Ctrl-Z) on some platforms: Mac OS X. + - The process receives a signal for which a signal handler was installed + with sigaction() with an sa_flags field that does not contain + SA_RESTART. + - The process receives a signal for which a signal handler was installed + with signal() and for which no call to siginterrupt(sig,0) was done, + on some platforms: AIX, HP-UX, IRIX, OSF/1, Solaris. + + This module provides a wrapper around read() that handles EINTR. */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +#define SAFE_READ_ERROR ((size_t) -1) + +/* Read up to COUNT bytes at BUF from descriptor FD, retrying if interrupted. + Return the actual number of bytes read, zero for EOF, or SAFE_READ_ERROR + upon error. */ +extern size_t safe_read (int fd, void *buf, size_t count); + + +#ifdef __cplusplus +} +#endif diff --git a/contrib/grep/lib/same-inode.h b/contrib/grep/lib/same-inode.h index 8c3900d041..584f606500 100644 --- a/contrib/grep/lib/same-inode.h +++ b/contrib/grep/lib/same-inode.h @@ -1,6 +1,6 @@ /* Determine whether two stat buffers refer to the same file. - Copyright (C) 2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/save-cwd.c b/contrib/grep/lib/save-cwd.c index 592d274eec..7aafacd411 100644 --- a/contrib/grep/lib/save-cwd.c +++ b/contrib/grep/lib/save-cwd.c @@ -1,6 +1,6 @@ /* save-cwd.c -- Save and restore current working directory. - Copyright (C) 1995, 1997-1998, 2003-2006, 2009-2012 Free Software + Copyright (C) 1995, 1997-1998, 2003-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/save-cwd.h b/contrib/grep/lib/save-cwd.h index 0c875da284..6b84e4601d 100644 --- a/contrib/grep/lib/save-cwd.h +++ b/contrib/grep/lib/save-cwd.h @@ -1,6 +1,6 @@ /* Save and restore current working directory. - Copyright (C) 1995, 1997-1998, 2003, 2009-2012 Free Software Foundation, + Copyright (C) 1995, 1997-1998, 2003, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/stat.c b/contrib/grep/lib/stat.c index 7599540890..35f4b0b1a5 100644 --- a/contrib/grep/lib/stat.c +++ b/contrib/grep/lib/stat.c @@ -1,5 +1,5 @@ /* Work around platform bugs in stat. - Copyright (C) 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -42,7 +42,7 @@ # endif #endif -static inline int +static int orig_stat (const char *filename, struct stat *buf) { return stat (filename, buf); diff --git a/contrib/grep/lib/stpcpy.c b/contrib/grep/lib/stpcpy.c index e857abef0b..880a7061ef 100644 --- a/contrib/grep/lib/stpcpy.c +++ b/contrib/grep/lib/stpcpy.c @@ -1,5 +1,5 @@ /* stpcpy.c -- copy a string and return pointer to end of new string - Copyright (C) 1992, 1995, 1997-1998, 2006, 2009-2012 Free Software + Copyright (C) 1992, 1995, 1997-1998, 2006, 2009-2014 Free Software Foundation, Inc. NOTE: The canonical source of this file is maintained with the GNU C Library. diff --git a/contrib/grep/lib/str-kmp.h b/contrib/grep/lib/str-kmp.h index 3395173a08..b25d07f8a6 100644 --- a/contrib/grep/lib/str-kmp.h +++ b/contrib/grep/lib/str-kmp.h @@ -1,6 +1,6 @@ /* Substring search in a NUL terminated string of UNIT elements, using the Knuth-Morris-Pratt algorithm. - Copyright (C) 2005-2012 Free Software Foundation, Inc. + Copyright (C) 2005-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2005. This program is free software; you can redistribute it and/or modify diff --git a/contrib/grep/lib/strdup.c b/contrib/grep/lib/strdup.c index a3f8c863f3..14cb1266f7 100644 --- a/contrib/grep/lib/strdup.c +++ b/contrib/grep/lib/strdup.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991, 1996-1998, 2002-2004, 2006-2007, 2009-2012 Free Software +/* Copyright (C) 1991, 1996-1998, 2002-2004, 2006-2007, 2009-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. diff --git a/contrib/grep/lib/streq.h b/contrib/grep/lib/streq.h index b312f7f917..0e62fb939a 100644 --- a/contrib/grep/lib/streq.h +++ b/contrib/grep/lib/streq.h @@ -1,5 +1,5 @@ /* Optimized string comparison. - Copyright (C) 2001-2002, 2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published diff --git a/contrib/grep/lib/strerror-override.c b/contrib/grep/lib/strerror-override.c index 9f55cfa9ac..255d933346 100644 --- a/contrib/grep/lib/strerror-override.c +++ b/contrib/grep/lib/strerror-override.c @@ -1,6 +1,6 @@ /* strerror-override.c --- POSIX compatible system error routine - Copyright (C) 2010-2012 Free Software Foundation, Inc. + Copyright (C) 2010-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -291,6 +291,11 @@ strerror_override (int errnum) return "State not recoverable"; #endif +#if GNULIB_defined_EILSEQ + case EILSEQ: + return "Invalid or incomplete multibyte or wide character"; +#endif + default: return NULL; } diff --git a/contrib/grep/lib/strerror-override.h b/contrib/grep/lib/strerror-override.h index fe1fb2c229..e98c1c16bc 100644 --- a/contrib/grep/lib/strerror-override.h +++ b/contrib/grep/lib/strerror-override.h @@ -1,6 +1,6 @@ /* strerror-override.h --- POSIX compatible system error routine - Copyright (C) 2010-2012 Free Software Foundation, Inc. + Copyright (C) 2010-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -46,8 +46,9 @@ || GNULIB_defined_EDQUOT \ || GNULIB_defined_ECANCELED \ || GNULIB_defined_EOWNERDEAD \ - || GNULIB_defined_ENOTRECOVERABLE -extern const char *strerror_override (int errnum); + || GNULIB_defined_ENOTRECOVERABLE \ + || GNULIB_defined_EILSEQ +extern const char *strerror_override (int errnum) _GL_ATTRIBUTE_CONST; # else # define strerror_override(ignored) NULL # endif diff --git a/contrib/grep/lib/strerror.c b/contrib/grep/lib/strerror.c index 587bd21a6c..e54f0cc906 100644 --- a/contrib/grep/lib/strerror.c +++ b/contrib/grep/lib/strerror.c @@ -1,6 +1,6 @@ /* strerror.c --- POSIX compatible system error routine - Copyright (C) 2007-2012 Free Software Foundation, Inc. + Copyright (C) 2007-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/striconv.c b/contrib/grep/lib/striconv.c index 4458d0c3b5..2495d81c48 100644 --- a/contrib/grep/lib/striconv.c +++ b/contrib/grep/lib/striconv.c @@ -1,5 +1,5 @@ /* Charset conversion. - Copyright (C) 2001-2007, 2010-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2007, 2010-2014 Free Software Foundation, Inc. Written by Bruno Haible and Simon Josefsson. This program is free software; you can redistribute it and/or modify diff --git a/contrib/grep/lib/striconv.h b/contrib/grep/lib/striconv.h index fc3aa696a2..5747731e66 100644 --- a/contrib/grep/lib/striconv.h +++ b/contrib/grep/lib/striconv.h @@ -1,5 +1,5 @@ /* Charset conversion. - Copyright (C) 2001-2004, 2006-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2004, 2006-2007, 2009-2014 Free Software Foundation, Inc. Written by Bruno Haible and Simon Josefsson. This program is free software; you can redistribute it and/or modify diff --git a/contrib/grep/lib/stripslash.c b/contrib/grep/lib/stripslash.c index ef458c63a9..6f802d4c23 100644 --- a/contrib/grep/lib/stripslash.c +++ b/contrib/grep/lib/stripslash.c @@ -1,6 +1,6 @@ /* stripslash.c -- remove redundant trailing slashes from a file name - Copyright (C) 1990, 2001, 2003-2006, 2009-2012 Free Software Foundation, + Copyright (C) 1990, 2001, 2003-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/strnlen.c b/contrib/grep/lib/strnlen.c index d36180dc77..ddccebc6b1 100644 --- a/contrib/grep/lib/strnlen.c +++ b/contrib/grep/lib/strnlen.c @@ -1,5 +1,5 @@ /* Find the length of STRING, but scan at most MAXLEN characters. - Copyright (C) 2005-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2005-2007, 2009-2014 Free Software Foundation, Inc. Written by Simon Josefsson. This program is free software; you can redistribute it and/or modify diff --git a/contrib/grep/lib/strnlen1.c b/contrib/grep/lib/strnlen1.c index 52dc507895..363776a927 100644 --- a/contrib/grep/lib/strnlen1.c +++ b/contrib/grep/lib/strnlen1.c @@ -1,5 +1,5 @@ /* Find the length of STRING + 1, but scan at most MAXLEN bytes. - Copyright (C) 2005-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/strnlen1.h b/contrib/grep/lib/strnlen1.h index 4c4f0c0813..b2f956f577 100644 --- a/contrib/grep/lib/strnlen1.h +++ b/contrib/grep/lib/strnlen1.h @@ -1,5 +1,5 @@ /* Find the length of STRING + 1, but scan at most MAXLEN bytes. - Copyright (C) 2005, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2005, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/strtoimax.c b/contrib/grep/lib/strtoimax.c index 4f5fdbb5f2..2c33d5857a 100644 --- a/contrib/grep/lib/strtoimax.c +++ b/contrib/grep/lib/strtoimax.c @@ -1,6 +1,6 @@ /* Convert string representation of a number into an intmax_t value. - Copyright (C) 1999, 2001-2004, 2006, 2009-2012 Free Software Foundation, + Copyright (C) 1999, 2001-2004, 2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -28,48 +28,55 @@ #include "verify.h" #ifdef UNSIGNED -# ifndef HAVE_DECL_STRTOULL +# if HAVE_UNSIGNED_LONG_LONG_INT +# ifndef HAVE_DECL_STRTOULL "this configure-time declaration test was not run" -# endif -# if !HAVE_DECL_STRTOULL && HAVE_UNSIGNED_LONG_LONG_INT +# endif +# if !HAVE_DECL_STRTOULL unsigned long long int strtoull (char const *, char **, int); +# endif # endif #else -# ifndef HAVE_DECL_STRTOLL +# if HAVE_LONG_LONG_INT +# ifndef HAVE_DECL_STRTOLL "this configure-time declaration test was not run" -# endif -# if !HAVE_DECL_STRTOLL && HAVE_LONG_LONG_INT +# endif +# if !HAVE_DECL_STRTOLL long long int strtoll (char const *, char **, int); +# endif # endif #endif #ifdef UNSIGNED # define Have_long_long HAVE_UNSIGNED_LONG_LONG_INT # define Int uintmax_t +# define Strtoimax strtoumax +# define Strtol strtoul +# define Strtoll strtoull # define Unsigned unsigned -# define strtoimax strtoumax -# define strtol strtoul -# define strtoll strtoull #else # define Have_long_long HAVE_LONG_LONG_INT # define Int intmax_t +# define Strtoimax strtoimax +# define Strtol strtol +# define Strtoll strtoll # define Unsigned #endif Int -strtoimax (char const *ptr, char **endptr, int base) +Strtoimax (char const *ptr, char **endptr, int base) { #if Have_long_long verify (sizeof (Int) == sizeof (Unsigned long int) || sizeof (Int) == sizeof (Unsigned long long int)); if (sizeof (Int) != sizeof (Unsigned long int)) - return strtoll (ptr, endptr, base); + return Strtoll (ptr, endptr, base); #else verify (sizeof (Int) == sizeof (Unsigned long int)); #endif - return strtol (ptr, endptr, base); + return Strtol (ptr, endptr, base); } diff --git a/contrib/grep/lib/strtol.c b/contrib/grep/lib/strtol.c index bf992a8c27..2e015dc5bd 100644 --- a/contrib/grep/lib/strtol.c +++ b/contrib/grep/lib/strtol.c @@ -1,6 +1,6 @@ /* Convert string representation of a number into an integer value. - Copyright (C) 1991-1992, 1994-1999, 2003, 2005-2007, 2009-2012 Free Software + Copyright (C) 1991-1992, 1994-1999, 2003, 2005-2007, 2009-2014 Free Software Foundation, Inc. NOTE: The canonical source of this file is maintained with the GNU C diff --git a/contrib/grep/lib/strtoll.c b/contrib/grep/lib/strtoll.c index fdfceb0137..47dfaa0677 100644 --- a/contrib/grep/lib/strtoll.c +++ b/contrib/grep/lib/strtoll.c @@ -1,5 +1,5 @@ /* Function to parse a 'long long int' from text. - Copyright (C) 1995-1997, 1999, 2001, 2009-2012 Free Software Foundation, + Copyright (C) 1995-1997, 1999, 2001, 2009-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. diff --git a/contrib/grep/lib/strtoul.c b/contrib/grep/lib/strtoul.c index e99da41edc..4c5ceaff96 100644 --- a/contrib/grep/lib/strtoul.c +++ b/contrib/grep/lib/strtoul.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991, 1997, 2009-2012 Free Software Foundation, Inc. +/* Copyright (C) 1991, 1997, 2009-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/strtoull.c b/contrib/grep/lib/strtoull.c index 33c2c5d9af..30e6e0279e 100644 --- a/contrib/grep/lib/strtoull.c +++ b/contrib/grep/lib/strtoull.c @@ -1,5 +1,5 @@ /* Function to parse an 'unsigned long long int' from text. - Copyright (C) 1995-1997, 1999, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1995-1997, 1999, 2009-2014 Free Software Foundation, Inc. NOTE: The canonical source of this file is maintained with the GNU C Library. Bugs can be reported to bug-glibc@gnu.org. diff --git a/contrib/grep/lib/trim.c b/contrib/grep/lib/trim.c index 7f31380d18..742de9be72 100644 --- a/contrib/grep/lib/trim.c +++ b/contrib/grep/lib/trim.c @@ -1,5 +1,5 @@ /* Removes leading and/or trailing whitespaces - Copyright (C) 2006-2012 Free Software Foundation, Inc. + Copyright (C) 2006-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/trim.h b/contrib/grep/lib/trim.h index bfe0c3f261..e4dc938bec 100644 --- a/contrib/grep/lib/trim.h +++ b/contrib/grep/lib/trim.h @@ -1,5 +1,5 @@ /* Removes leading and/or trailing whitespaces - Copyright (C) 2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/unistd--.h b/contrib/grep/lib/unistd--.h index 78b9e4ac76..9d0e54d106 100644 --- a/contrib/grep/lib/unistd--.h +++ b/contrib/grep/lib/unistd--.h @@ -1,6 +1,6 @@ /* Like unistd.h, but redefine some names to avoid glitches. - Copyright (C) 2005, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2005, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/unistd-safer.h b/contrib/grep/lib/unistd-safer.h index 684bf79f5b..4b0ca6abce 100644 --- a/contrib/grep/lib/unistd-safer.h +++ b/contrib/grep/lib/unistd-safer.h @@ -1,6 +1,6 @@ /* Invoke unistd-like functions, but avoid some glitches. - Copyright (C) 2001, 2003, 2005, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001, 2003, 2005, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/unistd.c b/contrib/grep/lib/unistd.c new file mode 100644 index 0000000000..6c6a8e268c --- /dev/null +++ b/contrib/grep/lib/unistd.c @@ -0,0 +1,3 @@ +#include +#define _GL_UNISTD_INLINE _GL_EXTERN_INLINE +#include "unistd.h" diff --git a/contrib/grep/lib/unistr/u8-mbtoucr.c b/contrib/grep/lib/unistr/u8-mbtoucr.c index 8bc59d7ce7..b11543a5e7 100644 --- a/contrib/grep/lib/unistr/u8-mbtoucr.c +++ b/contrib/grep/lib/unistr/u8-mbtoucr.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-8 string, returning an error code. - Copyright (C) 1999-2002, 2006-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1999-2002, 2006-2007, 2009-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software: you can redistribute it and/or modify it diff --git a/contrib/grep/lib/unistr/u8-uctomb-aux.c b/contrib/grep/lib/unistr/u8-uctomb-aux.c index 5ee212b7ee..8fdd88b5cb 100644 --- a/contrib/grep/lib/unistr/u8-uctomb-aux.c +++ b/contrib/grep/lib/unistr/u8-uctomb-aux.c @@ -1,5 +1,5 @@ /* Conversion UCS-4 to UTF-8. - Copyright (C) 2002, 2006-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2002. This program is free software: you can redistribute it and/or modify it diff --git a/contrib/grep/lib/unistr/u8-uctomb.c b/contrib/grep/lib/unistr/u8-uctomb.c index e56e4a2386..df9b227ee7 100644 --- a/contrib/grep/lib/unistr/u8-uctomb.c +++ b/contrib/grep/lib/unistr/u8-uctomb.c @@ -1,5 +1,5 @@ /* Store a character in UTF-8 string. - Copyright (C) 2002, 2005-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2002, 2005-2006, 2009-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2002. This program is free software: you can redistribute it and/or modify it diff --git a/contrib/grep/lib/uniwidth/cjk.h b/contrib/grep/lib/uniwidth/cjk.h index d75c94f169..71ea0cb7b0 100644 --- a/contrib/grep/lib/uniwidth/cjk.h +++ b/contrib/grep/lib/uniwidth/cjk.h @@ -1,5 +1,5 @@ /* Test for CJK encoding. - Copyright (C) 2001-2002, 2005-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2005-2007, 2009-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2002. This program is free software: you can redistribute it and/or modify it diff --git a/contrib/grep/lib/uniwidth/width.c b/contrib/grep/lib/uniwidth/width.c index b6e2c334ba..2c070c3e03 100644 --- a/contrib/grep/lib/uniwidth/width.c +++ b/contrib/grep/lib/uniwidth/width.c @@ -1,5 +1,5 @@ /* Determine display width of Unicode character. - Copyright (C) 2001-2002, 2006-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2006-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2002. This program is free software: you can redistribute it and/or modify it diff --git a/contrib/grep/lib/unlocked-io.h b/contrib/grep/lib/unlocked-io.h index 65bfbc4a5b..4b0f3ff087 100644 --- a/contrib/grep/lib/unlocked-io.h +++ b/contrib/grep/lib/unlocked-io.h @@ -1,6 +1,6 @@ /* Prefer faster, non-thread-safe stdio functions if available. - Copyright (C) 2001-2004, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2004, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/verify.h b/contrib/grep/lib/verify.h index 0c320b19ad..a25e514668 100644 --- a/contrib/grep/lib/verify.h +++ b/contrib/grep/lib/verify.h @@ -1,6 +1,6 @@ /* Compile-time assert-like macros. - Copyright (C) 2005-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ /* Written by Paul Eggert, Bruno Haible, and Jim Meyering. */ #ifndef _GL_VERIFY_H -# define _GL_VERIFY_H +#define _GL_VERIFY_H /* Define _GL_HAVE__STATIC_ASSERT to 1 if _Static_assert works as per C11. @@ -31,14 +31,24 @@ Use this only with GCC. If we were willing to slow 'configure' down we could also use it with other compilers, but since this affects only the quality of diagnostics, why bother? */ -# if (4 < __GNUC__ || (__GNUC__ == 4 && 6 <= __GNUC_MINOR__)) && !defined __cplusplus -# define _GL_HAVE__STATIC_ASSERT 1 -# endif +#if (4 < __GNUC__ + (6 <= __GNUC_MINOR__) \ + && (201112L <= __STDC_VERSION__ || !defined __STRICT_ANSI__) \ + && !defined __cplusplus) +# define _GL_HAVE__STATIC_ASSERT 1 +#endif /* The condition (99 < __GNUC__) is temporary, until we know about the first G++ release that supports static_assert. */ -# if (99 < __GNUC__) && defined __cplusplus -# define _GL_HAVE_STATIC_ASSERT 1 -# endif +#if (99 < __GNUC__) && defined __cplusplus +# define _GL_HAVE_STATIC_ASSERT 1 +#endif + +/* FreeBSD 9.1 , included by and lots of other + system headers, defines a conflicting _Static_assert that is no + better than ours; override it. */ +#ifndef _GL_HAVE_STATIC_ASSERT +# include +# undef _Static_assert +#endif /* Each of these macros verifies that its argument R is nonzero. To be portable, R should be an integer constant expression. Unlike @@ -141,50 +151,50 @@ Use a template type to work around the problem. */ /* Concatenate two preprocessor tokens. */ -# define _GL_CONCAT(x, y) _GL_CONCAT0 (x, y) -# define _GL_CONCAT0(x, y) x##y +#define _GL_CONCAT(x, y) _GL_CONCAT0 (x, y) +#define _GL_CONCAT0(x, y) x##y /* _GL_COUNTER is an integer, preferably one that changes each time we use it. Use __COUNTER__ if it works, falling back on __LINE__ otherwise. __LINE__ isn't perfect, but it's better than a constant. */ -# if defined __COUNTER__ && __COUNTER__ != __COUNTER__ -# define _GL_COUNTER __COUNTER__ -# else -# define _GL_COUNTER __LINE__ -# endif +#if defined __COUNTER__ && __COUNTER__ != __COUNTER__ +# define _GL_COUNTER __COUNTER__ +#else +# define _GL_COUNTER __LINE__ +#endif /* Generate a symbol with the given prefix, making it unique if possible. */ -# define _GL_GENSYM(prefix) _GL_CONCAT (prefix, _GL_COUNTER) +#define _GL_GENSYM(prefix) _GL_CONCAT (prefix, _GL_COUNTER) /* Verify requirement R at compile-time, as an integer constant expression that returns 1. If R is false, fail at compile-time, preferably with a diagnostic that includes the string-literal DIAGNOSTIC. */ -# define _GL_VERIFY_TRUE(R, DIAGNOSTIC) \ - (!!sizeof (_GL_VERIFY_TYPE (R, DIAGNOSTIC))) +#define _GL_VERIFY_TRUE(R, DIAGNOSTIC) \ + (!!sizeof (_GL_VERIFY_TYPE (R, DIAGNOSTIC))) -# ifdef __cplusplus -# if !GNULIB_defined_struct__gl_verify_type +#ifdef __cplusplus +# if !GNULIB_defined_struct__gl_verify_type template struct _gl_verify_type { unsigned int _gl_verify_error_if_negative: w; }; -# define GNULIB_defined_struct__gl_verify_type 1 -# endif -# define _GL_VERIFY_TYPE(R, DIAGNOSTIC) \ - _gl_verify_type<(R) ? 1 : -1> -# elif defined _GL_HAVE__STATIC_ASSERT -# define _GL_VERIFY_TYPE(R, DIAGNOSTIC) \ - struct { \ - _Static_assert (R, DIAGNOSTIC); \ - int _gl_dummy; \ - } -# else -# define _GL_VERIFY_TYPE(R, DIAGNOSTIC) \ - struct { unsigned int _gl_verify_error_if_negative: (R) ? 1 : -1; } +# define GNULIB_defined_struct__gl_verify_type 1 # endif +# define _GL_VERIFY_TYPE(R, DIAGNOSTIC) \ + _gl_verify_type<(R) ? 1 : -1> +#elif defined _GL_HAVE__STATIC_ASSERT +# define _GL_VERIFY_TYPE(R, DIAGNOSTIC) \ + struct { \ + _Static_assert (R, DIAGNOSTIC); \ + int _gl_dummy; \ + } +#else +# define _GL_VERIFY_TYPE(R, DIAGNOSTIC) \ + struct { unsigned int _gl_verify_error_if_negative: (R) ? 1 : -1; } +#endif /* Verify requirement R at compile-time, as a declaration without a trailing ';'. If R is false, fail at compile-time, preferably @@ -193,23 +203,23 @@ template Unfortunately, unlike C11, this implementation must appear as an ordinary declaration, and cannot appear inside struct { ... }. */ -# ifdef _GL_HAVE__STATIC_ASSERT -# define _GL_VERIFY _Static_assert -# else -# define _GL_VERIFY(R, DIAGNOSTIC) \ - extern int (*_GL_GENSYM (_gl_verify_function) (void)) \ - [_GL_VERIFY_TRUE (R, DIAGNOSTIC)] -# endif +#ifdef _GL_HAVE__STATIC_ASSERT +# define _GL_VERIFY _Static_assert +#else +# define _GL_VERIFY(R, DIAGNOSTIC) \ + extern int (*_GL_GENSYM (_gl_verify_function) (void)) \ + [_GL_VERIFY_TRUE (R, DIAGNOSTIC)] +#endif /* _GL_STATIC_ASSERT_H is defined if this code is copied into assert.h. */ -# ifdef _GL_STATIC_ASSERT_H -# if !defined _GL_HAVE__STATIC_ASSERT && !defined _Static_assert -# define _Static_assert(R, DIAGNOSTIC) _GL_VERIFY (R, DIAGNOSTIC) -# endif -# if !defined _GL_HAVE_STATIC_ASSERT && !defined static_assert -# define static_assert _Static_assert /* C11 requires this #define. */ -# endif +#ifdef _GL_STATIC_ASSERT_H +# if !defined _GL_HAVE__STATIC_ASSERT && !defined _Static_assert +# define _Static_assert(R, DIAGNOSTIC) _GL_VERIFY (R, DIAGNOSTIC) # endif +# if !defined _GL_HAVE_STATIC_ASSERT && !defined static_assert +# define static_assert _Static_assert /* C11 requires this #define. */ +# endif +#endif /* @assert.h omit start@ */ @@ -227,18 +237,42 @@ template verify_true is obsolescent; please use verify_expr instead. */ -# define verify_true(R) _GL_VERIFY_TRUE (R, "verify_true (" #R ")") +#define verify_true(R) _GL_VERIFY_TRUE (R, "verify_true (" #R ")") /* Verify requirement R at compile-time. Return the value of the expression E. */ -# define verify_expr(R, E) \ - (_GL_VERIFY_TRUE (R, "verify_expr (" #R ", " #E ")") ? (E) : (E)) +#define verify_expr(R, E) \ + (_GL_VERIFY_TRUE (R, "verify_expr (" #R ", " #E ")") ? (E) : (E)) /* Verify requirement R at compile-time, as a declaration without a trailing ';'. */ -# define verify(R) _GL_VERIFY (R, "verify (" #R ")") +#define verify(R) _GL_VERIFY (R, "verify (" #R ")") + +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/* Assume that R always holds. This lets the compiler optimize + accordingly. R should not have side-effects; it may or may not be + evaluated. Behavior is undefined if R is false. */ + +#if (__has_builtin (__builtin_unreachable) \ + || 4 < __GNUC__ + (5 <= __GNUC_MINOR__)) +# define assume(R) ((R) ? (void) 0 : __builtin_unreachable ()) +#elif 1200 <= _MSC_VER +# define assume(R) __assume (R) +#elif (defined lint \ + && (__has_builtin (__builtin_trap) \ + || 3 < __GNUC__ + (3 < __GNUC_MINOR__ + (4 <= __GNUC_PATCHLEVEL__)))) + /* Doing it this way helps various packages when configured with + --enable-gcc-warnings, which compiles with -Dlint. It's nicer + when 'assume' silences warnings even with older GCCs. */ +# define assume(R) ((R) ? (void) 0 : __builtin_trap ()) +#else +# define assume(R) ((void) (0 && (R))) +#endif /* @assert.h omit end@ */ diff --git a/contrib/grep/lib/version-etc-fsf.c b/contrib/grep/lib/version-etc-fsf.c index c452f40dfb..b37dd60559 100644 --- a/contrib/grep/lib/version-etc-fsf.c +++ b/contrib/grep/lib/version-etc-fsf.c @@ -1,5 +1,5 @@ /* Variable with FSF copyright information, for version-etc. - Copyright (C) 1999-2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1999-2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/version-etc.c b/contrib/grep/lib/version-etc.c index ae1e44d61d..04f5b6209f 100644 --- a/contrib/grep/lib/version-etc.c +++ b/contrib/grep/lib/version-etc.c @@ -1,5 +1,5 @@ /* Print --version and bug-reporting information in a consistent format. - Copyright (C) 1999-2012 Free Software Foundation, Inc. + Copyright (C) 1999-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,7 +38,7 @@ # define PACKAGE PACKAGE_TARNAME #endif -enum { COPYRIGHT_YEAR = 2012 }; +enum { COPYRIGHT_YEAR = 2014 }; /* The three functions below display the --version information the standard way. diff --git a/contrib/grep/lib/version-etc.h b/contrib/grep/lib/version-etc.h index 5548c023c3..8e1b2ccff1 100644 --- a/contrib/grep/lib/version-etc.h +++ b/contrib/grep/lib/version-etc.h @@ -1,5 +1,5 @@ /* Print --version and bug-reporting information in a consistent format. - Copyright (C) 1999, 2003, 2005, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1999, 2003, 2005, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/wcrtomb.c b/contrib/grep/lib/wcrtomb.c index 9e6494eef4..1d340cd942 100644 --- a/contrib/grep/lib/wcrtomb.c +++ b/contrib/grep/lib/wcrtomb.c @@ -1,5 +1,5 @@ /* Convert wide character to multibyte character. - Copyright (C) 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2008-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2008. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/wcscoll-impl.h b/contrib/grep/lib/wcscoll-impl.h deleted file mode 100644 index 8a93d5257c..0000000000 --- a/contrib/grep/lib/wcscoll-impl.h +++ /dev/null @@ -1,111 +0,0 @@ -/* Compare two wide strings using the current locale. - Copyright (C) 2011-2012 Free Software Foundation, Inc. - Written by Bruno Haible , 2011. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -int -wcscoll (const wchar_t *s1, const wchar_t *s2) -{ - char mbbuf1[1024]; - char mbbuf2[1024]; - char *mbs1; - char *mbs2; - - { - int saved_errno = errno; - - /* Convert s1 to a multibyte string, trying to avoid malloc(). */ - { - size_t ret; - - ret = wcstombs (mbbuf1, s1, sizeof (mbbuf1)); - if (ret == (size_t)-1) - goto failed1; - if (ret < sizeof (mbbuf1)) - mbs1 = mbbuf1; - else - { - size_t need = wcstombs (NULL, s1, 0); - if (need == (size_t)-1) - goto failed1; - mbs1 = (char *) malloc (need + 1); - if (mbs1 == NULL) - goto out_of_memory1; - ret = wcstombs (mbs1, s1, need + 1); - if (ret != need) - abort (); - } - } - - /* Convert s2 to a multibyte string, trying to avoid malloc(). */ - { - size_t ret; - - ret = wcstombs (mbbuf2, s2, sizeof (mbbuf2)); - if (ret == (size_t)-1) - goto failed2; - if (ret < sizeof (mbbuf2)) - mbs2 = mbbuf2; - else - { - size_t need = wcstombs (NULL, s2, 0); - if (need == (size_t)-1) - goto failed2; - mbs2 = (char *) malloc (need + 1); - if (mbs2 == NULL) - goto out_of_memory2; - ret = wcstombs (mbs2, s2, need + 1); - if (ret != need) - abort (); - } - } - - /* No error so far. */ - errno = saved_errno; - } - - /* Compare the two multibyte strings. */ - { - int result = strcoll (mbs1, mbs2); - - if (mbs1 != mbbuf1) - { - int saved_errno = errno; - free (mbs1); - errno = saved_errno; - } - if (mbs2 != mbbuf2) - { - int saved_errno = errno; - free (mbs2); - errno = saved_errno; - } - return result; - } - - out_of_memory2: - if (mbs1 != mbbuf1) - free (mbs1); - out_of_memory1: - errno = ENOMEM; - return 0; - - failed2: - if (mbs1 != mbbuf1) - free (mbs1); - failed1: - errno = EILSEQ; - return 0; -} diff --git a/contrib/grep/lib/wcscoll.c b/contrib/grep/lib/wcscoll.c deleted file mode 100644 index ce60aea9a7..0000000000 --- a/contrib/grep/lib/wcscoll.c +++ /dev/null @@ -1,27 +0,0 @@ -/* Compare two wide strings using the current locale. - Copyright (C) 2011-2012 Free Software Foundation, Inc. - Written by Bruno Haible , 2011. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#include -#include -#include - -#include "wcscoll-impl.h" diff --git a/contrib/grep/lib/wctob.c b/contrib/grep/lib/wctob.c index 0885c1b127..d87d18042c 100644 --- a/contrib/grep/lib/wctob.c +++ b/contrib/grep/lib/wctob.c @@ -1,5 +1,5 @@ /* Convert wide character to unibyte character. - Copyright (C) 2008, 2010-2012 Free Software Foundation, Inc. + Copyright (C) 2008, 2010-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2008. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/wctomb-impl.h b/contrib/grep/lib/wctomb-impl.h index 68264cfb97..0fa170eb18 100644 --- a/contrib/grep/lib/wctomb-impl.h +++ b/contrib/grep/lib/wctomb-impl.h @@ -1,5 +1,5 @@ /* Convert wide character to multibyte character. - Copyright (C) 2011-2012 Free Software Foundation, Inc. + Copyright (C) 2011-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2011. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/wctomb.c b/contrib/grep/lib/wctomb.c index 736a7c7090..b3595a4d37 100644 --- a/contrib/grep/lib/wctomb.c +++ b/contrib/grep/lib/wctomb.c @@ -1,5 +1,5 @@ /* Convert wide character to multibyte character. - Copyright (C) 2011-2012 Free Software Foundation, Inc. + Copyright (C) 2011-2014 Free Software Foundation, Inc. Written by Bruno Haible , 2011. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/wctype-h.c b/contrib/grep/lib/wctype-h.c new file mode 100644 index 0000000000..bb5f847e33 --- /dev/null +++ b/contrib/grep/lib/wctype-h.c @@ -0,0 +1,4 @@ +/* Normally this would be wctype.c, but that name's already taken. */ +#include +#define _GL_WCTYPE_INLINE _GL_EXTERN_INLINE +#include "wctype.h" diff --git a/contrib/grep/lib/wcwidth.c b/contrib/grep/lib/wcwidth.c index 44503b04bb..d7837bb1c7 100644 --- a/contrib/grep/lib/wcwidth.c +++ b/contrib/grep/lib/wcwidth.c @@ -1,5 +1,5 @@ /* Determine the number of screen columns needed for a character. - Copyright (C) 2006-2007, 2010-2012 Free Software Foundation, Inc. + Copyright (C) 2006-2007, 2010-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/xalloc-die.c b/contrib/grep/lib/xalloc-die.c index e85d6dfb69..7bdd2a3113 100644 --- a/contrib/grep/lib/xalloc-die.c +++ b/contrib/grep/lib/xalloc-die.c @@ -1,6 +1,6 @@ /* Report a memory allocation failure and exit. - Copyright (C) 1997-2000, 2002-2004, 2006, 2009-2012 Free Software + Copyright (C) 1997-2000, 2002-2004, 2006, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/xalloc-oversized.h b/contrib/grep/lib/xalloc-oversized.h index 6779755983..f3329228ec 100644 --- a/contrib/grep/lib/xalloc-oversized.h +++ b/contrib/grep/lib/xalloc-oversized.h @@ -1,6 +1,6 @@ /* xalloc-oversized.h -- memory allocation size checking - Copyright (C) 1990-2000, 2003-2004, 2006-2012 Free Software Foundation, Inc. + Copyright (C) 1990-2000, 2003-2004, 2006-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ /* Return 1 if an array of N objects, each of size S, cannot exist due to size arithmetic overflow. S must be positive and N must be - nonnegative. This is a macro, not an inline function, so that it + nonnegative. This is a macro, not a function, so that it works correctly even when SIZE_MAX < N. By gnulib convention, SIZE_MAX represents overflow in size diff --git a/contrib/grep/lib/xalloc.h b/contrib/grep/lib/xalloc.h index 6f5b87ee2d..3f6b5b80f8 100644 --- a/contrib/grep/lib/xalloc.h +++ b/contrib/grep/lib/xalloc.h @@ -1,6 +1,6 @@ /* xalloc.h -- malloc with out-of-memory checking - Copyright (C) 1990-2000, 2003-2004, 2006-2012 Free Software Foundation, Inc. + Copyright (C) 1990-2000, 2003-2004, 2006-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,28 +16,36 @@ along with this program. If not, see . */ #ifndef XALLOC_H_ -# define XALLOC_H_ +#define XALLOC_H_ -# include +#include -# include "xalloc-oversized.h" +#include "xalloc-oversized.h" -# ifdef __cplusplus +#ifndef _GL_INLINE_HEADER_BEGIN + #error "Please include config.h first." +#endif +_GL_INLINE_HEADER_BEGIN +#ifndef XALLOC_INLINE +# define XALLOC_INLINE _GL_INLINE +#endif + +#ifdef __cplusplus extern "C" { -# endif +#endif -# if __GNUC__ >= 3 -# define _GL_ATTRIBUTE_MALLOC __attribute__ ((__malloc__)) -# else -# define _GL_ATTRIBUTE_MALLOC -# endif +#if __GNUC__ >= 3 +# define _GL_ATTRIBUTE_MALLOC __attribute__ ((__malloc__)) +#else +# define _GL_ATTRIBUTE_MALLOC +#endif -# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) -# define _GL_ATTRIBUTE_ALLOC_SIZE(args) __attribute__ ((__alloc_size__ args)) -# else -# define _GL_ATTRIBUTE_ALLOC_SIZE(args) -# endif +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) +# define _GL_ATTRIBUTE_ALLOC_SIZE(args) __attribute__ ((__alloc_size__ args)) +#else +# define _GL_ATTRIBUTE_ALLOC_SIZE(args) +#endif /* This function is always triggered when memory is exhausted. It must be defined by the application, either explicitly @@ -56,7 +64,7 @@ void *xrealloc (void *p, size_t s) _GL_ATTRIBUTE_ALLOC_SIZE ((2)); void *x2realloc (void *p, size_t *pn); void *xmemdup (void const *p, size_t s) - _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_ALLOC_SIZE ((2)); + _GL_ATTRIBUTE_ALLOC_SIZE ((2)); char *xstrdup (char const *str) _GL_ATTRIBUTE_MALLOC; @@ -67,45 +75,31 @@ char *xstrdup (char const *str) /* Allocate an object of type T dynamically, with error checking. */ /* extern t *XMALLOC (typename t); */ -# define XMALLOC(t) ((t *) xmalloc (sizeof (t))) +#define XMALLOC(t) ((t *) xmalloc (sizeof (t))) /* Allocate memory for N elements of type T, with error checking. */ /* extern t *XNMALLOC (size_t n, typename t); */ -# define XNMALLOC(n, t) \ - ((t *) (sizeof (t) == 1 ? xmalloc (n) : xnmalloc (n, sizeof (t)))) +#define XNMALLOC(n, t) \ + ((t *) (sizeof (t) == 1 ? xmalloc (n) : xnmalloc (n, sizeof (t)))) /* Allocate an object of type T dynamically, with error checking, and zero it. */ /* extern t *XZALLOC (typename t); */ -# define XZALLOC(t) ((t *) xzalloc (sizeof (t))) +#define XZALLOC(t) ((t *) xzalloc (sizeof (t))) /* Allocate memory for N elements of type T, with error checking, and zero it. */ /* extern t *XCALLOC (size_t n, typename t); */ -# define XCALLOC(n, t) \ - ((t *) (sizeof (t) == 1 ? xzalloc (n) : xcalloc (n, sizeof (t)))) +#define XCALLOC(n, t) \ + ((t *) (sizeof (t) == 1 ? xzalloc (n) : xcalloc (n, sizeof (t)))) -# if HAVE_INLINE -# define static_inline static inline -# else -void *xnmalloc (size_t n, size_t s) - _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_ALLOC_SIZE ((1, 2)); -void *xnrealloc (void *p, size_t n, size_t s) - _GL_ATTRIBUTE_ALLOC_SIZE ((2, 3)); -void *x2nrealloc (void *p, size_t *pn, size_t s); -char *xcharalloc (size_t n) - _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_ALLOC_SIZE ((1)); -# endif - -# ifdef static_inline - /* Allocate an array of N objects, each with S bytes of memory, dynamically, with error checking. S must be nonzero. */ -static_inline void *xnmalloc (size_t n, size_t s) +XALLOC_INLINE void *xnmalloc (size_t n, size_t s) _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_ALLOC_SIZE ((1, 2)); -static_inline void * +XALLOC_INLINE void * xnmalloc (size_t n, size_t s) { if (xalloc_oversized (n, s)) @@ -116,9 +110,9 @@ xnmalloc (size_t n, size_t s) /* Change the size of an allocated block of memory P to an array of N objects each of S bytes, with error checking. S must be nonzero. */ -static_inline void *xnrealloc (void *p, size_t n, size_t s) +XALLOC_INLINE void *xnrealloc (void *p, size_t n, size_t s) _GL_ATTRIBUTE_ALLOC_SIZE ((2, 3)); -static_inline void * +XALLOC_INLINE void * xnrealloc (void *p, size_t n, size_t s) { if (xalloc_oversized (n, s)) @@ -128,10 +122,9 @@ xnrealloc (void *p, size_t n, size_t s) /* If P is null, allocate a block of at least *PN such objects; otherwise, reallocate P so that it contains more than *PN objects - each of S bytes. *PN must be nonzero unless P is null, and S must - be nonzero. Set *PN to the new number of objects, and return the - pointer to the new block. *PN is never set to zero, and the - returned pointer is never null. + each of S bytes. S must be nonzero. Set *PN to the new number of + objects, and return the pointer to the new block. *PN is never set + to zero, and the returned pointer is never null. Repeated reallocations are guaranteed to make progress, either by allocating an initial block with a nonzero size, or by allocating a @@ -181,7 +174,7 @@ xnrealloc (void *p, size_t n, size_t s) */ -static_inline void * +XALLOC_INLINE void * x2nrealloc (void *p, size_t *pn, size_t s) { size_t n = *pn; @@ -202,13 +195,13 @@ x2nrealloc (void *p, size_t *pn, size_t s) } else { - /* Set N = ceil (1.5 * N) so that progress is made if N == 1. + /* Set N = floor (1.5 * N) + 1 so that progress is made even if N == 0. Check for overflow, so that N * S stays in size_t range. - The check is slightly conservative, but an exact check isn't + The check may be slightly conservative, but an exact check isn't worth the trouble. */ if ((size_t) -1 / 3 * 2 / s <= n) xalloc_die (); - n += (n + 1) / 2; + n += n / 2 + 1; } *pn = n; @@ -218,17 +211,15 @@ x2nrealloc (void *p, size_t *pn, size_t s) /* Return a pointer to a new buffer of N bytes. This is like xmalloc, except it returns char *. */ -static_inline char *xcharalloc (size_t n) +XALLOC_INLINE char *xcharalloc (size_t n) _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_ALLOC_SIZE ((1)); -static_inline char * +XALLOC_INLINE char * xcharalloc (size_t n) { return XNMALLOC (n, char); } -# endif - -# ifdef __cplusplus +#ifdef __cplusplus } /* C++ does not allow conversions from void * to other pointer types @@ -265,7 +256,7 @@ xmemdup (T const *p, size_t s) return (T *) xmemdup ((void const *) p, s); } -# endif +#endif #endif /* !XALLOC_H_ */ diff --git a/contrib/grep/lib/xmalloc.c b/contrib/grep/lib/xmalloc.c index 3b5f86cdca..264d44afd4 100644 --- a/contrib/grep/lib/xmalloc.c +++ b/contrib/grep/lib/xmalloc.c @@ -1,6 +1,6 @@ /* xmalloc.c -- malloc with out of memory checking - Copyright (C) 1990-2000, 2002-2006, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 1990-2000, 2002-2006, 2008-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,11 +17,9 @@ #include -#if ! HAVE_INLINE -# define static_inline -#endif +#define XALLOC_INLINE _GL_EXTERN_INLINE + #include "xalloc.h" -#undef static_inline #include #include diff --git a/contrib/grep/lib/xstriconv.c b/contrib/grep/lib/xstriconv.c index 23d6fc61bf..be1ef2df2e 100644 --- a/contrib/grep/lib/xstriconv.c +++ b/contrib/grep/lib/xstriconv.c @@ -1,5 +1,5 @@ /* Charset conversion with out-of-memory checking. - Copyright (C) 2001-2004, 2006, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2004, 2006, 2009-2014 Free Software Foundation, Inc. Written by Bruno Haible. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/xstriconv.h b/contrib/grep/lib/xstriconv.h index f176f4ebb9..75f8f8326e 100644 --- a/contrib/grep/lib/xstriconv.h +++ b/contrib/grep/lib/xstriconv.h @@ -1,5 +1,5 @@ /* Charset conversion with out-of-memory checking. - Copyright (C) 2001-2004, 2006-2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2004, 2006-2007, 2009-2014 Free Software Foundation, Inc. Written by Bruno Haible and Simon Josefsson. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/xstrtol-error.c b/contrib/grep/lib/xstrtol-error.c index ce96ef6c65..9629dabbe5 100644 --- a/contrib/grep/lib/xstrtol-error.c +++ b/contrib/grep/lib/xstrtol-error.c @@ -1,6 +1,6 @@ /* A more useful interface to strtol. - Copyright (C) 1995-1996, 1998-1999, 2001-2004, 2006-2012 Free Software + Copyright (C) 1995-1996, 1998-1999, 2001-2004, 2006-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify diff --git a/contrib/grep/lib/xstrtol.c b/contrib/grep/lib/xstrtol.c index 7c4fbd87b8..f6d535a7f3 100644 --- a/contrib/grep/lib/xstrtol.c +++ b/contrib/grep/lib/xstrtol.c @@ -1,6 +1,6 @@ /* A more useful interface to strtol. - Copyright (C) 1995-1996, 1998-2001, 2003-2007, 2009-2012 Free Software + Copyright (C) 1995-1996, 1998-2001, 2003-2007, 2009-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -182,7 +182,7 @@ __xstrtol (const char *s, char **ptr, int strtol_base, break; case 'c': - overflow = 0; + overflow = LONGINT_OK; break; case 'E': /* exa or exbi */ diff --git a/contrib/grep/lib/xstrtol.h b/contrib/grep/lib/xstrtol.h index 516ac5655f..fe54f6afb3 100644 --- a/contrib/grep/lib/xstrtol.h +++ b/contrib/grep/lib/xstrtol.h @@ -1,6 +1,6 @@ /* A more useful interface to strtol. - Copyright (C) 1995-1996, 1998-1999, 2001-2004, 2006-2012 Free Software + Copyright (C) 1995-1996, 1998-1999, 2001-2004, 2006-2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -66,7 +66,7 @@ _DECLARE_XSTRTOL (xstrtoull, unsigned long long int) After reporting an error, exit with a failure status. */ -void _Noreturn xstrtol_fatal (enum strtol_error, +_Noreturn void xstrtol_fatal (enum strtol_error, int, char, struct option const *, char const *); diff --git a/contrib/grep/src/dfa.c b/contrib/grep/src/dfa.c index df73a1ab98..48a83cdf86 100644 --- a/contrib/grep/src/dfa.c +++ b/contrib/grep/src/dfa.c @@ -1,5 +1,5 @@ /* dfa.c - deterministic extended regexp routines for GNU - Copyright (C) 1988, 1998, 2000, 2002, 2004-2005, 2007-2012 Free Software + Copyright (C) 1988, 1998, 2000, 2002, 2004-2005, 2007-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify @@ -21,15 +21,16 @@ Modified July, 1988 by Arthur David Olson to assist BMG speedups */ #include + +#include "dfa.h" + #include #include #include -#include #include #include #include #include -#include #define STREQ(a, b) (strcmp (a, b) == 0) @@ -43,23 +44,15 @@ host does not conform to Posix. */ #define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9) -/* gettext.h ensures that we don't use gettext if ENABLE_NLS is not defined */ #include "gettext.h" #define _(str) gettext (str) -#include "mbsupport.h" /* defines MBS_SUPPORT if appropriate */ #include #include -#if HAVE_LANGINFO_CODESET -# include -#endif - -#include "regex.h" -#include "dfa.h" #include "xalloc.h" -/* HPUX, define those as macros in sys/param.h */ +/* HPUX defines these as macros in sys/param.h. */ #ifdef setbit # undef setbit #endif @@ -67,36 +60,41 @@ # undef clrbit #endif -/* Number of bits in an unsigned char. */ -#ifndef CHARBITS -# define CHARBITS 8 -#endif +/* First integer value that is greater than any character code. */ +enum { NOTCHAR = 1 << CHAR_BIT }; -/* First integer value that is greater than any character code. */ -#define NOTCHAR (1 << CHARBITS) +/* This represents part of a character class. It must be unsigned and + at least CHARCLASS_WORD_BITS wide. Any excess bits are zero. */ +typedef unsigned int charclass_word; -/* INTBITS need not be exact, just a lower bound. */ -#ifndef INTBITS -# define INTBITS (CHARBITS * sizeof (int)) -#endif +/* The number of bits used in a charclass word. utf8_classes assumes + this is exactly 32. */ +enum { CHARCLASS_WORD_BITS = 32 }; -/* Number of ints required to hold a bit for every character. */ -#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS) +/* The maximum useful value of a charclass_word; all used bits are 1. */ +#define CHARCLASS_WORD_MASK \ + (((charclass_word) 1 << (CHARCLASS_WORD_BITS - 1) << 1) - 1) -/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */ -typedef int charclass[CHARCLASS_INTS]; +/* Number of words required to hold a bit for every character. */ +enum +{ + CHARCLASS_WORDS = (NOTCHAR + CHARCLASS_WORD_BITS - 1) / CHARCLASS_WORD_BITS +}; + +/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */ +typedef charclass_word charclass[CHARCLASS_WORDS]; /* Convert a possibly-signed character to an unsigned character. This is a bit safer than casting to unsigned char, since it catches some type errors that the cast doesn't. */ -static inline unsigned char +static unsigned char to_uchar (char ch) { return ch; } /* Contexts tell us whether a character is a newline or a word constituent. - Word-constituent characters are those that satisfy iswalnum(), plus '_'. + Word-constituent characters are those that satisfy iswalnum, plus '_'. Each character has a single CTX_* value; bitmasks of CTX_* values denote a particular character class. @@ -126,17 +124,17 @@ to_uchar (char ch) The macro SUCCEEDS_IN_CONTEXT determines whether a given constraint succeeds in a particular context. Prev is a bitmask of possible context values for the previous character, curr is the (single-bit) - context value for the lookahead character. */ + context value for the lookahead character. */ #define NEWLINE_CONSTRAINT(constraint) (((constraint) >> 8) & 0xf) #define LETTER_CONSTRAINT(constraint) (((constraint) >> 4) & 0xf) #define OTHER_CONSTRAINT(constraint) ((constraint) & 0xf) #define SUCCEEDS_IN_CONTEXT(constraint, prev, curr) \ - ((((curr) & CTX_NONE ? OTHER_CONSTRAINT(constraint) : 0) \ - | ((curr) & CTX_LETTER ? LETTER_CONSTRAINT(constraint) : 0) \ - | ((curr) & CTX_NEWLINE ? NEWLINE_CONSTRAINT(constraint) : 0)) & (prev)) + ((((curr) & CTX_NONE ? OTHER_CONSTRAINT (constraint) : 0) \ + | ((curr) & CTX_LETTER ? LETTER_CONSTRAINT (constraint) : 0) \ + | ((curr) & CTX_NEWLINE ? NEWLINE_CONSTRAINT (constraint) : 0)) & (prev)) -/* The following macros give information about what a constraint depends on. */ +/* The following macros describe what a constraint depends on. */ #define PREV_NEWLINE_CONSTRAINT(constraint) (((constraint) >> 2) & 0x111) #define PREV_LETTER_CONSTRAINT(constraint) (((constraint) >> 1) & 0x111) #define PREV_OTHER_CONSTRAINT(constraint) ((constraint) & 0x111) @@ -149,7 +147,7 @@ to_uchar (char ch) /* Tokens that match the empty string subject to some constraint actually work by applying that constraint to determine what may follow them, taking into account what has gone before. The following values are - the constraints corresponding to the special tokens previously defined. */ + the constraints corresponding to the special tokens previously defined. */ #define NO_CONSTRAINT 0x777 #define BEGLINE_CONSTRAINT 0x444 #define ENDLINE_CONSTRAINT 0x700 @@ -160,7 +158,7 @@ to_uchar (char ch) /* The regexp is parsed into an array of tokens in postfix form. Some tokens are operators and others are terminal symbols. Most (but not all) of these - codes are returned by the lexical analyzer. */ + codes are returned by the lexical analyzer. */ typedef ptrdiff_t token; @@ -171,75 +169,74 @@ enum end of input; any value of END or less in the parse tree is such a symbol. Accepting states of the DFA are those that would have - a transition on END. */ + a transition on END. */ - /* Ordinary character values are terminal symbols that match themselves. */ + /* Ordinary character values are terminal symbols that match themselves. */ EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches - the empty string. */ + the empty string. */ - BACKREF, /* BACKREF is generated by \; it + BACKREF, /* BACKREF is generated by \ + or by any other construct that is not completely handled. If the scanner detects a transition on backref, it returns a kind of "semi-success" indicating that the match will have to be verified with - a backtracking matcher. */ + a backtracking matcher. */ BEGLINE, /* BEGLINE is a terminal symbol that matches - the empty string if it is at the beginning - of a line. */ + the empty string at the beginning of a + line. */ ENDLINE, /* ENDLINE is a terminal symbol that matches - the empty string if it is at the end of - a line. */ + the empty string at the end of a line. */ BEGWORD, /* BEGWORD is a terminal symbol that matches - the empty string if it is at the beginning - of a word. */ + the empty string at the beginning of a + word. */ ENDWORD, /* ENDWORD is a terminal symbol that matches - the empty string if it is at the end of - a word. */ + the empty string at the end of a word. */ LIMWORD, /* LIMWORD is a terminal symbol that matches - the empty string if it is at the beginning - or the end of a word. */ + the empty string at the beginning or the + end of a word. */ NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that - matches the empty string if it is not at - the beginning or end of a word. */ + matches the empty string not at + the beginning or end of a word. */ QMARK, /* QMARK is an operator of one argument that matches zero or one occurrences of its - argument. */ + argument. */ STAR, /* STAR is an operator of one argument that matches the Kleene closure (zero or more - occurrences) of its argument. */ + occurrences) of its argument. */ PLUS, /* PLUS is an operator of one argument that matches the positive closure (one or more - occurrences) of its argument. */ + occurrences) of its argument. */ REPMN, /* REPMN is a lexical token corresponding to the {m,n} construct. REPMN never - appears in the compiled token vector. */ + appears in the compiled token vector. */ CAT, /* CAT is an operator of two arguments that matches the concatenation of its arguments. CAT is never returned by the - lexical analyzer. */ + lexical analyzer. */ OR, /* OR is an operator of two arguments that - matches either of its arguments. */ + matches either of its arguments. */ LPAREN, /* LPAREN never appears in the parse tree, - it is only a lexeme. */ + it is only a lexeme. */ - RPAREN, /* RPAREN never appears in the parse tree. */ + RPAREN, /* RPAREN never appears in the parse tree. */ ANYCHAR, /* ANYCHAR is a terminal symbol that matches - any multibyte (or single byte) characters. + a valid multibyte (or single byte) character. It is used only if MB_CUR_MAX > 1. */ MBCSET, /* MBCSET is similar to CSET, but for @@ -250,50 +247,50 @@ enum CSET /* CSET and (and any value greater) is a terminal symbol that matches any of a - class of characters. */ + class of characters. */ }; /* States of the recognizer correspond to sets of positions in the parse tree, together with the constraints under which they may be matched. So a position is encoded as an index into the parse tree together with - a constraint. */ + a constraint. */ typedef struct { - size_t index; /* Index into the parse array. */ - unsigned int constraint; /* Constraint for matching this position. */ + size_t index; /* Index into the parse array. */ + unsigned int constraint; /* Constraint for matching this position. */ } position; -/* Sets of positions are stored as arrays. */ +/* Sets of positions are stored as arrays. */ typedef struct { - position *elems; /* Elements of this position set. */ - size_t nelem; /* Number of elements in this set. */ + position *elems; /* Elements of this position set. */ + size_t nelem; /* Number of elements in this set. */ size_t alloc; /* Number of elements allocated in ELEMS. */ } position_set; -/* Sets of leaves are also stored as arrays. */ +/* Sets of leaves are also stored as arrays. */ typedef struct { - size_t *elems; /* Elements of this position set. */ - size_t nelem; /* Number of elements in this set. */ + size_t *elems; /* Elements of this position set. */ + size_t nelem; /* Number of elements in this set. */ } leaf_set; /* A state of the dfa consists of a set of positions, some flags, and the token value of the lowest-numbered position of the state that - contains an END token. */ + contains an END token. */ typedef struct { - size_t hash; /* Hash of the positions of this state. */ - position_set elems; /* Positions this state could match. */ - unsigned char context; /* Context from previous state. */ - char backref; /* True if this state matches a \. */ - unsigned short constraint; /* Constraint for this state to accept. */ - token first_end; /* Token value of the first END in elems. */ + size_t hash; /* Hash of the positions of this state. */ + position_set elems; /* Positions this state could match. */ + unsigned char context; /* Context from previous state. */ + bool has_backref; /* This state matches a \. */ + bool has_mbcset; /* This state matches a MBCSET. */ + unsigned short constraint; /* Constraint for this state to accept. */ + token first_end; /* Token value of the first END in elems. */ position_set mbps; /* Positions which can match multibyte - characters. e.g. period. - These staff are used only if - MB_CUR_MAX > 1. */ + characters, e.g., period. + Used only if MB_CUR_MAX > 1. */ } dfa_state; /* States are indexed by state_num values. These are normally @@ -301,17 +298,20 @@ typedef struct typedef ptrdiff_t state_num; /* A bracket operator. - e.g. [a-c], [[:alpha:]], etc. */ + e.g., [a-c], [[:alpha:]], etc. */ struct mb_char_classes { ptrdiff_t cset; - int invert; + bool invert; wchar_t *chars; /* Normal characters. */ size_t nchars; wctype_t *ch_classes; /* Character classes. */ size_t nch_classes; - wchar_t *range_sts; /* Range characters (start of the range). */ - wchar_t *range_ends; /* Range characters (end of the range). */ + struct /* Range characters. */ + { + wchar_t beg; /* Range start. */ + wchar_t end; /* Range end. */ + } *ranges; size_t nranges; char **equivs; /* Equivalence classes. */ size_t nequivs; @@ -319,28 +319,30 @@ struct mb_char_classes size_t ncoll_elems; /* Collating elements. */ }; -/* A compiled regular expression. */ +/* A compiled regular expression. */ struct dfa { - /* Fields filled by the scanner. */ - charclass *charclasses; /* Array of character sets for CSET tokens. */ - size_t cindex; /* Index for adding new charclasses. */ - size_t calloc; /* Number of charclasses currently allocated. */ - - /* Fields filled by the parser. */ - token *tokens; /* Postfix parse array. */ - size_t tindex; /* Index for adding new tokens. */ - size_t talloc; /* Number of tokens currently allocated. */ + /* Fields filled by the scanner. */ + charclass *charclasses; /* Array of character sets for CSET tokens. */ + size_t cindex; /* Index for adding new charclasses. */ + size_t calloc; /* Number of charclasses allocated. */ + + /* Fields filled by the parser. */ + token *tokens; /* Postfix parse array. */ + size_t tindex; /* Index for adding new tokens. */ + size_t talloc; /* Number of tokens currently allocated. */ size_t depth; /* Depth required of an evaluation stack used for depth-first traversal of the - parse tree. */ - size_t nleaves; /* Number of leaves on the parse tree. */ + parse tree. */ + size_t nleaves; /* Number of leaves on the parse tree. */ size_t nregexps; /* Count of parallel regexps being built - with dfaparse(). */ - unsigned int mb_cur_max; /* Cached value of MB_CUR_MAX. */ - token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */ + with dfaparse. */ + bool fast; /* The DFA is fast. */ + bool multibyte; /* MB_CUR_MAX > 1. */ + token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */ + mbstate_t mbs; /* Multibyte conversion state. */ - /* The following are used only if MB_CUR_MAX > 1. */ + /* The following are valid only if MB_CUR_MAX > 1. */ /* The value of multibyte_prop[i] is defined by following rule. if tokens[i] < NOTCHAR @@ -359,110 +361,139 @@ struct dfa multibyte_prop = 3 , 1 , 0 , 2 , 3 */ - size_t nmultibyte_prop; int *multibyte_prop; + /* A table indexed by byte values that contains the corresponding wide + character (if any) for that byte. WEOF means the byte is not a + valid single-byte character. */ + wint_t mbrtowc_cache[NOTCHAR]; + /* Array of the bracket expression in the DFA. */ struct mb_char_classes *mbcsets; size_t nmbcsets; size_t mbcsets_alloc; - /* Fields filled by the state builder. */ - dfa_state *states; /* States of the dfa. */ - state_num sindex; /* Index for adding new states. */ - state_num salloc; /* Number of states currently allocated. */ + /* Fields filled by the superset. */ + struct dfa *superset; /* Hint of the dfa. */ + + /* Fields filled by the state builder. */ + dfa_state *states; /* States of the dfa. */ + state_num sindex; /* Index for adding new states. */ + size_t salloc; /* Number of states currently allocated. */ - /* Fields filled by the parse tree->NFA conversion. */ + /* Fields filled by the parse tree->NFA conversion. */ position_set *follows; /* Array of follow sets, indexed by position index. The follow of a position is the set of positions containing characters that could conceivably follow a character matching the given position in a string matching the regexp. Allocated to the - maximum possible position index. */ - int searchflag; /* True if we are supposed to build a searching + maximum possible position index. */ + bool searchflag; /* We are supposed to build a searching as opposed to an exact matcher. A searching matcher finds the first and shortest string matching a regexp anywhere in the buffer, whereas an exact matcher finds the longest string matching, but anchored to the - beginning of the buffer. */ + beginning of the buffer. */ - /* Fields filled by dfaexec. */ + /* Fields filled by dfaexec. */ state_num tralloc; /* Number of transition tables that have - slots so far. */ + slots so far, not counting trans[-1]. */ int trcount; /* Number of transition tables that have - actually been built. */ + actually been built. */ state_num **trans; /* Transition tables for states that can never accept. If the transitions for a state have not yet been computed, or the state could possibly accept, its entry in - this table is NULL. */ - state_num **realtrans; /* Trans always points to realtrans + 1; this - is so trans[-1] can contain NULL. */ + this table is NULL. This points to one + past the start of the allocated array, + and trans[-1] is always NULL. */ state_num **fails; /* Transition tables after failing to accept - on a state that potentially could do so. */ + on a state that potentially could do so. */ int *success; /* Table of acceptance conditions used in - dfaexec and computed in build_state. */ + dfaexec and computed in build_state. */ state_num *newlines; /* Transitions on newlines. The entry for a newline in any transition table is always -1 so we can count lines without wasting too many cycles. The transition for a newline is stored separately and handled as a special case. Newline is also used - as a sentinel at the end of the buffer. */ + as a sentinel at the end of the buffer. */ struct dfamust *musts; /* List of strings, at least one of which is known to appear in any r.e. matching - the dfa. */ + the dfa. */ + position_set mb_follows; /* Follow set added by ANYCHAR and/or MBCSET + on demand. */ + int *mb_match_lens; /* Array of length reduced by ANYCHAR and/or + MBCSET. Null if mb_follows.elems has not + been allocated. */ }; -/* Some macros for user access to dfa internals. */ +/* Some macros for user access to dfa internals. */ -/* ACCEPTING returns true if s could possibly be an accepting state of r. */ +/* S could possibly be an accepting state of R. */ #define ACCEPTING(s, r) ((r).states[s].constraint) -/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the - specified context. */ +/* STATE accepts in the specified context. */ #define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \ SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr) static void dfamust (struct dfa *dfa); static void regexp (void); -/* These two macros are identical to the ones in gnulib's xalloc.h, - except that they not to case the result to "(t *)", and thus may - be used via type-free CALLOC and MALLOC macros. */ -#undef XNMALLOC -#undef XCALLOC - -/* Allocate memory for N elements of type T, with error checking. */ -/* extern t *XNMALLOC (size_t n, typename t); */ -# define XNMALLOC(n, t) \ - (sizeof (t) == 1 ? xmalloc (n) : xnmalloc (n, sizeof (t))) - -/* Allocate memory for N elements of type T, with error checking, - and zero it. */ -/* extern t *XCALLOC (size_t n, typename t); */ -# define XCALLOC(n, t) \ - (sizeof (t) == 1 ? xzalloc (n) : xcalloc (n, sizeof (t))) - -#define CALLOC(p, n) do { (p) = XCALLOC (n, *(p)); } while (0) -#define MALLOC(p, n) do { (p) = XNMALLOC (n, *(p)); } while (0) -#define REALLOC(p, n) do {(p) = xnrealloc (p, n, sizeof (*(p))); } while (0) - -/* Reallocate an array of type *P if N_ALLOC is <= N_REQUIRED. */ -#define REALLOC_IF_NECESSARY(p, n_alloc, n_required) \ - do \ - { \ - if ((n_alloc) <= (n_required)) \ - { \ - size_t new_n_alloc = (n_required) + !(p); \ - (p) = x2nrealloc (p, &new_n_alloc, sizeof (*(p))); \ - (n_alloc) = new_n_alloc; \ - } \ - } \ - while (false) +static void +dfambcache (struct dfa *d) +{ + int i; + for (i = CHAR_MIN; i <= CHAR_MAX; ++i) + { + char c = i; + unsigned char uc = i; + mbstate_t s = { 0 }; + wchar_t wc; + d->mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF; + } +} + +/* Store into *PWC the result of converting the leading bytes of the + multibyte buffer S of length N bytes, using the mbrtowc_cache in *D + and updating the conversion state in *D. On conversion error, + convert just a single byte, to WEOF. Return the number of bytes + converted. + + This differs from mbrtowc (PWC, S, N, &D->mbs) as follows: + + * PWC points to wint_t, not to wchar_t. + * The last arg is a dfa *D instead of merely a multibyte conversion + state D->mbs. D also contains an mbrtowc_cache for speed. + * N must be at least 1. + * S[N - 1] must be a sentinel byte. + * Shift encodings are not supported. + * The return value is always in the range 1..N. + * D->mbs is always valid afterwards. + * *PWC is always set to something. */ +static size_t +mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d) +{ + unsigned char uc = s[0]; + wint_t wc = d->mbrtowc_cache[uc]; + + if (wc == WEOF) + { + wchar_t wch; + size_t nbytes = mbrtowc (&wch, s, n, &d->mbs); + if (0 < nbytes && nbytes < (size_t) -2) + { + *pwc = wch; + return nbytes; + } + memset (&d->mbs, 0, sizeof d->mbs); + } + *pwc = wc; + return 1; +} #ifdef DEBUG @@ -542,24 +573,25 @@ prtok (token t) } #endif /* DEBUG */ -/* Stuff pertaining to charclasses. */ +/* Stuff pertaining to charclasses. */ -static int +static bool tstbit (unsigned int b, charclass const c) { - return c[b / INTBITS] & 1 << b % INTBITS; + return c[b / CHARCLASS_WORD_BITS] >> b % CHARCLASS_WORD_BITS & 1; } static void setbit (unsigned int b, charclass c) { - c[b / INTBITS] |= 1 << b % INTBITS; + c[b / CHARCLASS_WORD_BITS] |= (charclass_word) 1 << b % CHARCLASS_WORD_BITS; } static void clrbit (unsigned int b, charclass c) { - c[b / INTBITS] &= ~(1 << b % INTBITS); + c[b / CHARCLASS_WORD_BITS] &= ~((charclass_word) 1 + << b % CHARCLASS_WORD_BITS); } static void @@ -579,39 +611,64 @@ notset (charclass s) { int i; - for (i = 0; i < CHARCLASS_INTS; ++i) - s[i] = ~s[i]; + for (i = 0; i < CHARCLASS_WORDS; ++i) + s[i] = CHARCLASS_WORD_MASK & ~s[i]; } -static int +static bool equal (charclass const s1, charclass const s2) { return memcmp (s1, s2, sizeof (charclass)) == 0; } -/* A pointer to the current dfa is kept here during parsing. */ -static struct dfa *dfa; +/* Ensure that the array addressed by PTR holds at least NITEMS + + (PTR || !NITEMS) items. Either return PTR, or reallocate the array + and return its new address. Although PTR may be null, the returned + value is never null. -/* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */ + The array holds *NALLOC items; *NALLOC is updated on reallocation. + ITEMSIZE is the size of one item. Avoid O(N**2) behavior on arrays + growing linearly. */ +static void * +maybe_realloc (void *ptr, size_t nitems, size_t *nalloc, size_t itemsize) +{ + if (nitems < *nalloc) + return ptr; + *nalloc = nitems; + return x2nrealloc (ptr, nalloc, itemsize); +} + +/* In DFA D, find the index of charclass S, or allocate a new one. */ static size_t -charclass_index (charclass const s) +dfa_charclass_index (struct dfa *d, charclass const s) { size_t i; - for (i = 0; i < dfa->cindex; ++i) - if (equal (s, dfa->charclasses[i])) + for (i = 0; i < d->cindex; ++i) + if (equal (s, d->charclasses[i])) return i; - REALLOC_IF_NECESSARY (dfa->charclasses, dfa->calloc, dfa->cindex + 1); - ++dfa->cindex; - copyset (s, dfa->charclasses[i]); + d->charclasses = maybe_realloc (d->charclasses, d->cindex, &d->calloc, + sizeof *d->charclasses); + ++d->cindex; + copyset (s, d->charclasses[i]); return i; } -/* Syntax bits controlling the behavior of the lexical analyzer. */ +/* A pointer to the current dfa is kept here during parsing. */ +static struct dfa *dfa; + +/* Find the index of charclass S in the current DFA, or allocate a new one. */ +static size_t +charclass_index (charclass const s) +{ + return dfa_charclass_index (dfa, s); +} + +/* Syntax bits controlling the behavior of the lexical analyzer. */ static reg_syntax_t syntax_bits, syntax_bits_set; -/* Flag for case-folding letters into sets. */ -static int case_fold; +/* Flag for case-folding letters into sets. */ +static bool case_fold; /* End-of-line byte in data. */ static unsigned char eolbyte; @@ -619,10 +676,10 @@ static unsigned char eolbyte; /* Cache of char-context values. */ static int sbit[NOTCHAR]; -/* Set of characters considered letters. */ +/* Set of characters considered letters. */ static charclass letters; -/* Set of characters that are newline. */ +/* Set of characters that are newline. */ static charclass newline; /* Add this to the test for whether a byte is word-constituent, since on @@ -631,17 +688,17 @@ static charclass newline; #ifdef __GLIBC__ # define is_valid_unibyte_character(c) 1 #else -# define is_valid_unibyte_character(c) (! (MBS_SUPPORT && btowc (c) == WEOF)) +# define is_valid_unibyte_character(c) (btowc (c) != WEOF) #endif -/* Return non-zero if C is a "word-constituent" byte; zero otherwise. */ +/* C is a "word-constituent" byte. */ #define IS_WORD_CONSTITUENT(C) \ (is_valid_unibyte_character (C) && (isalnum (C) || (C) == '_')) static int char_context (unsigned char c) { - if (c == eolbyte || c == 0) + if (c == eolbyte) return CTX_NEWLINE; if (IS_WORD_CONSTITUENT (c)) return CTX_LETTER; @@ -658,7 +715,7 @@ wchar_context (wint_t wc) return CTX_NONE; } -/* Entry point to set syntax options. */ +/* Entry point to set syntax options. */ void dfasyntax (reg_syntax_t bits, int fold, unsigned char eol) { @@ -666,7 +723,7 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol) syntax_bits_set = 1; syntax_bits = bits; - case_fold = fold; + case_fold = fold != 0; eolbyte = eol; for (i = 0; i < NOTCHAR; ++i) @@ -689,7 +746,6 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol) this may happen when folding case in weird Turkish locales where dotless i/dotted I are not included in the chosen character set. Return whether a bit was set in the charclass. */ -#if MBS_SUPPORT static bool setbit_wc (wint_t wc, charclass c) { @@ -701,111 +757,104 @@ setbit_wc (wint_t wc, charclass c) return true; } -/* Set a bit in the charclass for the given single byte character, - if it is valid in the current character set. */ -static void -setbit_c (int b, charclass c) -{ - /* Do nothing if b is invalid in this character set. */ - if (MB_CUR_MAX > 1 && btowc (b) == WEOF) - return; - setbit (b, c); -} -#else -# define setbit_c setbit -static inline bool -setbit_wc (wint_t wc, charclass c) -{ - abort (); - /*NOTREACHED*/ return false; -} -#endif - -/* Like setbit_c, but if case is folded, set both cases of a letter. For - MB_CUR_MAX > 1, the resulting charset is only used as an optimization, - and the caller takes care of setting the appropriate field of struct - mb_char_classes. */ +/* Set a bit for B and its case variants in the charclass C. + MB_CUR_MAX must be 1. */ static void setbit_case_fold_c (int b, charclass c) { - if (MB_CUR_MAX > 1) - { - wint_t wc = btowc (b); - if (wc == WEOF) - return; - setbit (b, c); - if (case_fold && iswalpha (wc)) - setbit_wc (iswupper (wc) ? towlower (wc) : towupper (wc), c); - } - else - { - setbit (b, c); - if (case_fold && isalpha (b)) - setbit_c (isupper (b) ? tolower (b) : toupper (b), c); - } + int ub = toupper (b); + int i; + for (i = 0; i < NOTCHAR; i++) + if (toupper (i) == ub) + setbit (i, c); } /* UTF-8 encoding allows some optimizations that we can't otherwise - assume in a multibyte encoding. */ -static inline int + assume in a multibyte encoding. */ +int using_utf8 (void) { static int utf8 = -1; - if (utf8 == -1) + if (utf8 < 0) { -#if defined HAVE_LANGINFO_CODESET && MBS_SUPPORT - utf8 = (STREQ (nl_langinfo (CODESET), "UTF-8")); -#else - utf8 = 0; -#endif + wchar_t wc; + mbstate_t mbs = { 0 }; + utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100; } - return utf8; } +/* The current locale is known to be a unibyte locale + without multicharacter collating sequences and where range + comparisons simply use the native encoding. These locales can be + processed more efficiently. */ + +static bool +using_simple_locale (void) +{ + /* The native character set is known to be compatible with + the C locale. The following test isn't perfect, but it's good + enough in practice, as only ASCII and EBCDIC are in common use + and this test correctly accepts ASCII and rejects EBCDIC. */ + enum { native_c_charset = + ('\b' == 8 && '\t' == 9 && '\n' == 10 && '\v' == 11 && '\f' == 12 + && '\r' == 13 && ' ' == 32 && '!' == 33 && '"' == 34 && '#' == 35 + && '%' == 37 && '&' == 38 && '\'' == 39 && '(' == 40 && ')' == 41 + && '*' == 42 && '+' == 43 && ',' == 44 && '-' == 45 && '.' == 46 + && '/' == 47 && '0' == 48 && '9' == 57 && ':' == 58 && ';' == 59 + && '<' == 60 && '=' == 61 && '>' == 62 && '?' == 63 && 'A' == 65 + && 'Z' == 90 && '[' == 91 && '\\' == 92 && ']' == 93 && '^' == 94 + && '_' == 95 && 'a' == 97 && 'z' == 122 && '{' == 123 && '|' == 124 + && '}' == 125 && '~' == 126) + }; + + if (! native_c_charset || dfa->multibyte) + return false; + else + { + static int unibyte_c = -1; + if (unibyte_c < 0) + { + char const *locale = setlocale (LC_ALL, NULL); + unibyte_c = (!locale + || STREQ (locale, "C") + || STREQ (locale, "POSIX")); + } + return unibyte_c; + } +} + /* Lexical analyzer. All the dross that deals with the obnoxious GNU Regex syntax bits is located here. The poor, suffering reader is referred to the GNU Regex documentation for the - meaning of the @#%!@#%^!@ syntax bits. */ + meaning of the @#%!@#%^!@ syntax bits. */ -static char const *lexptr; /* Pointer to next input character. */ -static size_t lexleft; /* Number of characters remaining. */ -static token lasttok; /* Previous token returned; initially END. */ -static int laststart; /* True if we're separated from beginning or (, | - only by zero-width characters. */ -static size_t parens; /* Count of outstanding left parens. */ -static int minrep, maxrep; /* Repeat counts for {m,n}. */ +static char const *lexptr; /* Pointer to next input character. */ +static size_t lexleft; /* Number of characters remaining. */ +static token lasttok; /* Previous token returned; initially END. */ +static bool laststart; /* We're separated from beginning or (, + | only by zero-width characters. */ +static size_t parens; /* Count of outstanding left parens. */ +static int minrep, maxrep; /* Repeat counts for {m,n}. */ static int cur_mb_len = 1; /* Length of the multibyte representation of wctok. */ -/* These variables are used only if (MB_CUR_MAX > 1). */ -static mbstate_t mbs; /* Mbstate for mbrlen(). */ -static wchar_t wctok; /* Wide character representation of the current - multibyte character. */ -static unsigned char *mblen_buf; /* Correspond to the input buffer in dfaexec(). - Each element store the amount of remain - byte of corresponding multibyte character - in the input string. A element's value - is 0 if corresponding character is a - single byte character. - e.g. input : 'a', , , - mblen_buf : 0, 3, 2, 1 - */ -static wchar_t *inputwcs; /* Wide character representation of input - string in dfaexec(). - The length of this array is same as - the length of input string(char array). - inputstring[i] is a single-byte char, - or 1st byte of a multibyte char. - And inputwcs[i] is the codepoint. */ -static unsigned char const *buf_begin; /* reference to begin in dfaexec(). */ -static unsigned char const *buf_end; /* reference to end in dfaexec(). */ - - -#if MBS_SUPPORT -/* Note that characters become unsigned here. */ + +static wint_t wctok; /* Wide character representation of the current + multibyte character, or WEOF if there was + an encoding error. Used only if + MB_CUR_MAX > 1. */ + + +/* Fetch the next lexical input character. Set C (of type int) to the + next input byte, except set C to EOF if the input is a multibyte + character of length greater than 1. Set WC (of type wint_t) to the + value of the input if it is a valid multibyte character (possibly + of length 1); otherwise set WC to WEOF. If there is no more input, + report EOFERR if EOFERR is not null, and return lasttok = END + otherwise. */ # define FETCH_WC(c, wc, eoferr) \ do { \ if (! lexleft) \ @@ -817,58 +866,73 @@ static unsigned char const *buf_end; /* reference to end in dfaexec(). */ } \ else \ { \ - wchar_t _wc; \ - cur_mb_len = mbrtowc (&_wc, lexptr, lexleft, &mbs); \ - if (cur_mb_len <= 0) \ - { \ - cur_mb_len = 1; \ - --lexleft; \ - (wc) = (c) = to_uchar (*lexptr++); \ - } \ - else \ - { \ - lexptr += cur_mb_len; \ - lexleft -= cur_mb_len; \ - (wc) = _wc; \ - (c) = wctob (wc); \ - } \ + wint_t _wc; \ + size_t nbytes = mbs_to_wchar (&_wc, lexptr, lexleft, dfa); \ + cur_mb_len = nbytes; \ + (wc) = _wc; \ + (c) = nbytes == 1 ? to_uchar (*lexptr) : EOF; \ + lexptr += nbytes; \ + lexleft -= nbytes; \ } \ - } while(0) - -# define FETCH(c, eoferr) \ - do { \ - wint_t wc; \ - FETCH_WC (c, wc, eoferr); \ } while (0) -#else -/* Note that characters become unsigned here. */ -# define FETCH(c, eoferr) \ - do { \ - if (! lexleft) \ - { \ - if ((eoferr) != 0) \ - dfaerror (eoferr); \ - else \ - return lasttok = END; \ - } \ - (c) = to_uchar (*lexptr++); \ - --lexleft; \ - } while(0) - -# define FETCH_WC(c, unused, eoferr) FETCH (c, eoferr) - -#endif /* MBS_SUPPORT */ - #ifndef MIN # define MIN(a,b) ((a) < (b) ? (a) : (b)) #endif +/* The set of wchar_t values C such that there's a useful locale + somewhere where C != towupper (C) && C != towlower (towupper (C)). + For example, 0x00B5 (U+00B5 MICRO SIGN) is in this table, because + towupper (0x00B5) == 0x039C (U+039C GREEK CAPITAL LETTER MU), and + towlower (0x039C) == 0x03BC (U+03BC GREEK SMALL LETTER MU). */ +static short const lonesome_lower[] = + { + 0x00B5, 0x0131, 0x017F, 0x01C5, 0x01C8, 0x01CB, 0x01F2, 0x0345, + 0x03C2, 0x03D0, 0x03D1, 0x03D5, 0x03D6, 0x03F0, 0x03F1, + + /* U+03F2 GREEK LUNATE SIGMA SYMBOL lacks a specific uppercase + counterpart in locales predating Unicode 4.0.0 (April 2003). */ + 0x03F2, + + 0x03F5, 0x1E9B, 0x1FBE, + }; + +/* Maximum number of characters that can be the case-folded + counterparts of a single character, not counting the character + itself. This is 1 for towupper, 1 for towlower, and 1 for each + entry in LONESOME_LOWER. */ +enum +{ CASE_FOLDED_BUFSIZE = 2 + sizeof lonesome_lower / sizeof *lonesome_lower }; + +/* Find the characters equal to C after case-folding, other than C + itself, and store them into FOLDED. Return the number of characters + stored. */ +static int +case_folded_counterparts (wchar_t c, wchar_t folded[CASE_FOLDED_BUFSIZE]) +{ + int i; + int n = 0; + wint_t uc = towupper (c); + wint_t lc = towlower (uc); + if (uc != c) + folded[n++] = uc; + if (lc != uc && lc != c && towupper (lc) == uc) + folded[n++] = lc; + for (i = 0; i < sizeof lonesome_lower / sizeof *lonesome_lower; i++) + { + wint_t li = lonesome_lower[i]; + if (li != lc && li != uc && li != c && towupper (li) == uc) + folded[n++] = li; + } + return n; +} + typedef int predicate (int); /* The following list maps the names of the Posix named character classes to predicate functions that determine whether a given character is in - the class. The leading [ has already been eaten by the lexical analyzer. */ + the class. The leading [ has already been eaten by the lexical + analyzer. */ struct dfa_ctype { const char *name; @@ -881,7 +945,7 @@ static const struct dfa_ctype prednames[] = { {"upper", isupper, false}, {"lower", islower, false}, {"digit", isdigit, true}, - {"xdigit", isxdigit, true}, + {"xdigit", isxdigit, false}, {"space", isspace, false}, {"punct", ispunct, false}, {"alnum", isalnum, false}, @@ -904,15 +968,18 @@ find_pred (const char *str) } /* Multibyte character handling sub-routine for lex. - This function parse a bracket expression and build a struct - mb_char_classes. */ + Parse a bracket expression and build a struct mb_char_classes. */ static token parse_bracket_exp (void) { - int invert; + bool invert; int c, c1, c2; charclass ccl; + /* This is a bracket expression that dfaexec is known to + process correctly. */ + bool known_bracket_exp = true; + /* Used to warn about [:space:]. Bit 0 = first character is a colon. Bit 1 = last character is a colon. @@ -926,19 +993,17 @@ parse_bracket_exp (void) /* Work area to build a mb_char_classes. */ struct mb_char_classes *work_mbc; - size_t chars_al, range_sts_al, range_ends_al, ch_classes_al, - equivs_al, coll_elems_al; + size_t chars_al, ranges_al, ch_classes_al, equivs_al, coll_elems_al; - chars_al = 0; - range_sts_al = range_ends_al = 0; - ch_classes_al = equivs_al = coll_elems_al = 0; - if (MB_CUR_MAX > 1) + chars_al = ranges_al = ch_classes_al = equivs_al = coll_elems_al = 0; + if (dfa->multibyte) { - REALLOC_IF_NECESSARY (dfa->mbcsets, dfa->mbcsets_alloc, - dfa->nmbcsets + 1); + dfa->mbcsets = maybe_realloc (dfa->mbcsets, dfa->nmbcsets, + &dfa->mbcsets_alloc, + sizeof *dfa->mbcsets); /* dfa->multibyte_prop[] hold the index of dfa->mbcsets. - We will update dfa->multibyte_prop[] in addtok(), because we can't + We will update dfa->multibyte_prop[] in addtok, because we can't decide the index in dfa->tokens[]. */ /* Initialize work area. */ @@ -953,39 +1018,38 @@ parse_bracket_exp (void) if (c == '^') { FETCH_WC (c, wc, _("unbalanced [")); - invert = 1; + invert = true; + known_bracket_exp = using_simple_locale (); } else - invert = 0; + invert = false; colon_warning_state = (c == ':'); do { - c1 = EOF; /* mark c1 is not initialized". */ + c1 = NOTCHAR; /* Mark c1 as not initialized. */ colon_warning_state &= ~2; /* Note that if we're looking at some other [:...:] construct, we just treat it as a bunch of ordinary characters. We can do this because we assume regex has checked for syntax errors before - dfa is ever called. */ - if (c == '[' && (syntax_bits & RE_CHAR_CLASSES)) + dfa is ever called. */ + if (c == '[') { -#define BRACKET_BUFFER_SIZE 128 - char str[BRACKET_BUFFER_SIZE]; FETCH_WC (c1, wc1, _("unbalanced [")); - /* If pattern contains '[[:', '[[.', or '[[='. */ - if (c1 == ':' - /* TODO: handle '[[.' and '[[=' also for MB_CUR_MAX == 1. */ - || (MB_CUR_MAX > 1 && (c1 == '.' || c1 == '='))) + if ((c1 == ':' && (syntax_bits & RE_CHAR_CLASSES)) + || c1 == '.' || c1 == '=') { + enum { MAX_BRACKET_STRING_LEN = 32 }; + char str[MAX_BRACKET_STRING_LEN + 1]; size_t len = 0; for (;;) { FETCH_WC (c, wc, _("unbalanced [")); if ((c == c1 && *lexptr == ']') || lexleft == 0) break; - if (len < BRACKET_BUFFER_SIZE) + if (len < MAX_BRACKET_STRING_LEN) str[len++] = c; else /* This is in any case an invalid class name. */ @@ -996,7 +1060,10 @@ parse_bracket_exp (void) /* Fetch bracket. */ FETCH_WC (c, wc, _("unbalanced [")); if (c1 == ':') - /* build character class. */ + /* Build character class. POSIX allows character + classes to match multicharacter collating elements, + but the regex code does not support that, so do not + worry about that possibility. */ { char const *class = (case_fold && (STREQ (str, "upper") @@ -1005,43 +1072,25 @@ parse_bracket_exp (void) if (!pred) dfaerror (_("invalid character class")); - if (MB_CUR_MAX > 1 && !pred->single_byte_only) + if (dfa->multibyte && !pred->single_byte_only) { /* Store the character class as wctype_t. */ wctype_t wt = wctype (class); - REALLOC_IF_NECESSARY (work_mbc->ch_classes, - ch_classes_al, - work_mbc->nch_classes + 1); + work_mbc->ch_classes + = maybe_realloc (work_mbc->ch_classes, + work_mbc->nch_classes, &ch_classes_al, + sizeof *work_mbc->ch_classes); work_mbc->ch_classes[work_mbc->nch_classes++] = wt; } for (c2 = 0; c2 < NOTCHAR; ++c2) if (pred->func (c2)) - setbit_case_fold_c (c2, ccl); + setbit (c2, ccl); } + else + known_bracket_exp = false; - else if (MBS_SUPPORT && (c1 == '=' || c1 == '.')) - { - char *elem = xmemdup (str, len + 1); - - if (c1 == '=') - /* build equivalence class. */ - { - REALLOC_IF_NECESSARY (work_mbc->equivs, - equivs_al, work_mbc->nequivs + 1); - work_mbc->equivs[work_mbc->nequivs++] = elem; - } - - if (c1 == '.') - /* build collating element. */ - { - REALLOC_IF_NECESSARY (work_mbc->coll_elems, - coll_elems_al, - work_mbc->ncoll_elems + 1); - work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem; - } - } colon_warning_state |= 8; /* Fetch new lookahead character. */ @@ -1056,114 +1105,110 @@ parse_bracket_exp (void) if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) FETCH_WC (c, wc, _("unbalanced [")); - if (c1 == EOF) + if (c1 == NOTCHAR) FETCH_WC (c1, wc1, _("unbalanced [")); if (c1 == '-') /* build range characters. */ { FETCH_WC (c2, wc2, _("unbalanced [")); - if (c2 == ']') + + /* A bracket expression like [a-[.aa.]] matches an unknown set. + Treat it like [-a[.aa.]] while parsing it, and + remember that the set is unknown. */ + if (c2 == '[' && *lexptr == '.') { - /* In the case [x-], the - is an ordinary hyphen, - which is left in c1, the lookahead character. */ - lexptr -= cur_mb_len; - lexleft += cur_mb_len; + known_bracket_exp = false; + c2 = ']'; } - } - - if (c1 == '-' && c2 != ']') - { - if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) - FETCH_WC (c2, wc2, _("unbalanced [")); - if (MB_CUR_MAX > 1) + if (c2 != ']') { - /* When case folding map a range, say [m-z] (or even [M-z]) - to the pair of ranges, [m-z] [M-Z]. */ - REALLOC_IF_NECESSARY (work_mbc->range_sts, - range_sts_al, work_mbc->nranges + 1); - REALLOC_IF_NECESSARY (work_mbc->range_ends, - range_ends_al, work_mbc->nranges + 1); - work_mbc->range_sts[work_mbc->nranges] = - case_fold ? towlower (wc) : (wchar_t) wc; - work_mbc->range_ends[work_mbc->nranges++] = - case_fold ? towlower (wc2) : (wchar_t) wc2; - -#ifndef GREP - if (case_fold && (iswalpha (wc) || iswalpha (wc2))) + if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) + FETCH_WC (c2, wc2, _("unbalanced [")); + + if (dfa->multibyte) { - REALLOC_IF_NECESSARY (work_mbc->range_sts, - range_sts_al, work_mbc->nranges + 1); - work_mbc->range_sts[work_mbc->nranges] = towupper (wc); - REALLOC_IF_NECESSARY (work_mbc->range_ends, - range_ends_al, work_mbc->nranges + 1); - work_mbc->range_ends[work_mbc->nranges++] = towupper (wc2); + /* When case folding map a range, say [m-z] (or even [M-z]) + to the pair of ranges, [m-z] [M-Z]. Although this code + is wrong in multiple ways, it's never used in practice. + FIXME: Remove this (and related) unused code. */ + if (wc != WEOF && wc2 != WEOF) + { + work_mbc->ranges + = maybe_realloc (work_mbc->ranges, work_mbc->nranges + 2, + &ranges_al, sizeof *work_mbc->ranges); + work_mbc->ranges[work_mbc->nranges].beg + = case_fold ? towlower (wc) : wc; + work_mbc->ranges[work_mbc->nranges++].end + = case_fold ? towlower (wc2) : wc2; + + if (case_fold && (iswalpha (wc) || iswalpha (wc2))) + { + work_mbc->ranges[work_mbc->nranges].beg = towupper (wc); + work_mbc->ranges[work_mbc->nranges++].end + = towupper (wc2); + } + } } -#endif - } - else - { - /* Defer to the system regex library about the meaning - of range expressions. */ - regex_t re; - char pattern[6] = { '[', 0, '-', 0, ']', 0 }; - char subject[2] = { 0, 0 }; - c1 = c; - if (case_fold) + else if (using_simple_locale ()) { - c1 = tolower (c1); - c2 = tolower (c2); + for (c1 = c; c1 <= c2; c1++) + setbit (c1, ccl); + if (case_fold) + { + int uc = toupper (c); + int uc2 = toupper (c2); + for (c1 = 0; c1 < NOTCHAR; c1++) + { + int uc1 = toupper (c1); + if (uc <= uc1 && uc1 <= uc2) + setbit (c1, ccl); + } + } } + else + known_bracket_exp = false; - pattern[1] = c1; - pattern[3] = c2; - regcomp (&re, pattern, REG_NOSUB); - for (c = 0; c < NOTCHAR; ++c) - { - if ((case_fold && isupper (c)) - || (MB_CUR_MAX > 1 && btowc (c) == WEOF)) - continue; - subject[0] = c; - if (regexec (&re, subject, 0, NULL, 0) != REG_NOMATCH) - setbit_case_fold_c (c, ccl); - } - regfree (&re); + colon_warning_state |= 8; + FETCH_WC (c1, wc1, _("unbalanced [")); + continue; } - colon_warning_state |= 8; - FETCH_WC (c1, wc1, _("unbalanced [")); - continue; + /* In the case [x-], the - is an ordinary hyphen, + which is left in c1, the lookahead character. */ + lexptr -= cur_mb_len; + lexleft += cur_mb_len; } colon_warning_state |= (c == ':') ? 2 : 4; - if (MB_CUR_MAX == 1) + if (!dfa->multibyte) { - setbit_case_fold_c (c, ccl); + if (case_fold) + setbit_case_fold_c (c, ccl); + else + setbit (c, ccl); continue; } - if (case_fold && iswalpha (wc)) - { - wc = towlower (wc); - if (!setbit_wc (wc, ccl)) - { - REALLOC_IF_NECESSARY (work_mbc->chars, chars_al, - work_mbc->nchars + 1); - work_mbc->chars[work_mbc->nchars++] = wc; - } -#ifdef GREP - continue; -#else - wc = towupper (wc); -#endif - } - if (!setbit_wc (wc, ccl)) + if (wc == WEOF) + known_bracket_exp = false; + else { - REALLOC_IF_NECESSARY (work_mbc->chars, chars_al, - work_mbc->nchars + 1); - work_mbc->chars[work_mbc->nchars++] = wc; + wchar_t folded[CASE_FOLDED_BUFSIZE + 1]; + int i; + int n = (case_fold ? case_folded_counterparts (wc, folded + 1) + 1 + : 1); + folded[0] = wc; + for (i = 0; i < n; i++) + if (!setbit_wc (folded[i], ccl)) + { + work_mbc->chars + = maybe_realloc (work_mbc->chars, work_mbc->nchars, + &chars_al, sizeof *work_mbc->chars); + work_mbc->chars[work_mbc->nchars++] = folded[i]; + } } } while ((wc = wc1, (c = c1) != ']')); @@ -1171,7 +1216,10 @@ parse_bracket_exp (void) if (colon_warning_state == 7) dfawarn (_("character class syntax is [[:space:]], not [:space:]")); - if (MB_CUR_MAX > 1) + if (! known_bracket_exp) + return BACKREF; + + if (dfa->multibyte) { static charclass zeroclass; work_mbc->invert = invert; @@ -1181,7 +1229,7 @@ parse_bracket_exp (void) if (invert) { - assert (MB_CUR_MAX == 1); + assert (!dfa->multibyte); notset (ccl); if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) clrbit (eolbyte, ccl); @@ -1193,8 +1241,8 @@ parse_bracket_exp (void) static token lex (void) { - unsigned int c, c2; - int backslash = 0; + int c, c2; + bool backslash = false; charclass ccl; int i; @@ -1206,14 +1254,7 @@ lex (void) "if (backslash) ...". */ for (i = 0; i < 2; ++i) { - if (MB_CUR_MAX > 1) - { - FETCH_WC (c, wctok, NULL); - if ((int) c == EOF) - goto normal_char; - } - else - FETCH (c, NULL); + FETCH_WC (c, wctok, NULL); switch (c) { @@ -1222,7 +1263,7 @@ lex (void) goto normal_char; if (lexleft == 0) dfaerror (_("unfinished \\ escape")); - backslash = 1; + backslash = true; break; case '^': @@ -1260,14 +1301,14 @@ lex (void) case '9': if (backslash && !(syntax_bits & RE_NO_BK_REFS)) { - laststart = 0; + laststart = false; return lasttok = BACKREF; } goto normal_char; case '`': if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = BEGLINE; /* FIXME: should be beginning of string */ + return lasttok = BEGLINE; /* FIXME: should be beginning of string */ goto normal_char; case '\'': @@ -1368,14 +1409,14 @@ lex (void) { if (syntax_bits & RE_INVALID_INTERVAL_ORD) goto normal_char; - dfaerror (_("Invalid content of \\{\\}")); + dfaerror (_("invalid content of \\{\\}")); } if (RE_DUP_MAX < maxrep) - dfaerror (_("Regular expression too big")); + dfaerror (_("regular expression too big")); lexptr = p; lexleft = lim - p; } - laststart = 0; + laststart = false; return lasttok = REPMN; case '|': @@ -1383,21 +1424,21 @@ lex (void) goto normal_char; if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0)) goto normal_char; - laststart = 1; + laststart = true; return lasttok = OR; case '\n': if (syntax_bits & RE_LIMITED_OPS || backslash || !(syntax_bits & RE_NEWLINE_ALT)) goto normal_char; - laststart = 1; + laststart = true; return lasttok = OR; case '(': if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0)) goto normal_char; ++parens; - laststart = 1; + laststart = true; return lasttok = LPAREN; case ')': @@ -1406,17 +1447,17 @@ lex (void) if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD) goto normal_char; --parens; - laststart = 0; + laststart = false; return lasttok = RPAREN; case '.': if (backslash) goto normal_char; - if (MB_CUR_MAX > 1) + if (dfa->multibyte) { /* In multibyte environment period must match with a single character not a byte. So we use ANYCHAR. */ - laststart = 0; + laststart = false; return lasttok = ANYCHAR; } zeroset (ccl); @@ -1425,21 +1466,53 @@ lex (void) clrbit (eolbyte, ccl); if (syntax_bits & RE_DOT_NOT_NULL) clrbit ('\0', ccl); - laststart = 0; + laststart = false; return lasttok = CSET + charclass_index (ccl); case 's': case 'S': if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) goto normal_char; - zeroset (ccl); - for (c2 = 0; c2 < NOTCHAR; ++c2) - if (isspace (c2)) - setbit (c2, ccl); - if (c == 'S') - notset (ccl); - laststart = 0; - return lasttok = CSET + charclass_index (ccl); + if (!dfa->multibyte) + { + zeroset (ccl); + for (c2 = 0; c2 < NOTCHAR; ++c2) + if (isspace (c2)) + setbit (c2, ccl); + if (c == 'S') + notset (ccl); + laststart = false; + return lasttok = CSET + charclass_index (ccl); + } + +#define PUSH_LEX_STATE(s) \ + do \ + { \ + char const *lexptr_saved = lexptr; \ + size_t lexleft_saved = lexleft; \ + lexptr = (s); \ + lexleft = strlen (lexptr) + +#define POP_LEX_STATE() \ + lexptr = lexptr_saved; \ + lexleft = lexleft_saved; \ + } \ + while (0) + + /* FIXME: see if optimizing this, as is done with ANYCHAR and + add_utf8_anychar, makes sense. */ + + /* \s and \S are documented to be equivalent to [[:space:]] and + [^[:space:]] respectively, so tell the lexer to process those + strings, each minus its "already processed" '['. */ + PUSH_LEX_STATE (c == 's' ? "[:space:]]" : "^[:space:]]"); + + lasttok = parse_bracket_exp (); + + POP_LEX_STATE (); + + laststart = false; + return lasttok; case 'w': case 'W': @@ -1451,21 +1524,21 @@ lex (void) setbit (c2, ccl); if (c == 'W') notset (ccl); - laststart = 0; + laststart = false; return lasttok = CSET + charclass_index (ccl); case '[': if (backslash) goto normal_char; - laststart = 0; + laststart = false; return lasttok = parse_bracket_exp (); default: normal_char: - laststart = 0; + laststart = false; /* For multibyte character sets, folding is done in atom. Always return WCHAR. */ - if (MB_CUR_MAX > 1) + if (dfa->multibyte) return lasttok = WCHAR; if (case_fold && isalpha (c)) @@ -1480,31 +1553,33 @@ lex (void) } /* The above loop should consume at most a backslash - and some other character. */ + and some other character. */ abort (); - return END; /* keeps pedantic compilers happy. */ + return END; /* keeps pedantic compilers happy. */ } -/* Recursive descent parser for regular expressions. */ +/* Recursive descent parser for regular expressions. */ -static token tok; /* Lookahead token. */ +static token tok; /* Lookahead token. */ static size_t depth; /* Current depth of a hypothetical stack holding deferred productions. This is used to determine the depth that will be required of the real stack later on in - dfaanalyze(). */ + dfaanalyze. */ static void addtok_mb (token t, int mbprop) { - if (MB_CUR_MAX > 1) + if (dfa->talloc == dfa->tindex) { - REALLOC_IF_NECESSARY (dfa->multibyte_prop, dfa->nmultibyte_prop, - dfa->tindex + 1); - dfa->multibyte_prop[dfa->tindex] = mbprop; + dfa->tokens = x2nrealloc (dfa->tokens, &dfa->talloc, + sizeof *dfa->tokens); + if (dfa->multibyte) + dfa->multibyte_prop = xnrealloc (dfa->multibyte_prop, dfa->talloc, + sizeof *dfa->multibyte_prop); } - - REALLOC_IF_NECESSARY (dfa->tokens, dfa->talloc, dfa->tindex + 1); + if (dfa->multibyte) + dfa->multibyte_prop[dfa->tindex] = mbprop; dfa->tokens[dfa->tindex++] = t; switch (t) @@ -1519,8 +1594,12 @@ addtok_mb (token t, int mbprop) --depth; break; + case BACKREF: + dfa->fast = false; + /* fallthrough */ default: ++dfa->nleaves; + /* fallthrough */ case EMPTY: ++depth; break; @@ -1532,11 +1611,11 @@ addtok_mb (token t, int mbprop) static void addtok_wc (wint_t wc); /* Add the given token to the parse tree, maintaining the depth count and - updating the maximum depth if necessary. */ + updating the maximum depth if necessary. */ static void addtok (token t) { - if (MB_CUR_MAX > 1 && t == MBCSET) + if (dfa->multibyte && t == MBCSET) { bool need_or = false; struct mb_char_classes *work_mbc = &dfa->mbcsets[dfa->nmbcsets - 1]; @@ -1556,10 +1635,11 @@ addtok (token t) work_mbc->nchars = 0; } - /* UTF-8 allows treating a simple, non-inverted MBCSET like a CSET. */ + /* If the MBCSET is non-inverted and doesn't include neither + character classes including multibyte characters, range + expressions, equivalence classes nor collating elements, + it can be replaced to a simple CSET. */ if (work_mbc->invert - || (!using_utf8 () && work_mbc->cset != -1) - || work_mbc->nchars != 0 || work_mbc->nch_classes != 0 || work_mbc->nranges != 0 || work_mbc->nequivs != 0 || work_mbc->ncoll_elems != 0) @@ -1574,7 +1654,6 @@ addtok (token t) that the mbcset is empty now. Do nothing in that case. */ if (work_mbc->cset != -1) { - assert (using_utf8 ()); addtok (CSET + work_mbc->cset); if (need_or) addtok (OR); @@ -1587,27 +1666,29 @@ addtok (token t) } } -#if MBS_SUPPORT /* We treat a multibyte character as a single atom, so that DFA can treat a multibyte character as a single expression. - e.g. We construct following tree from "". + e.g., we construct the following tree from "". */ static void addtok_wc (wint_t wc) { unsigned char buf[MB_LEN_MAX]; - mbstate_t s; + mbstate_t s = { 0 }; int i; - memset (&s, 0, sizeof s); - cur_mb_len = wcrtomb ((char *) buf, wc, &s); + size_t stored_bytes = wcrtomb ((char *) buf, wc, &s); - /* This is merely stop-gap. When cur_mb_len is 0 or negative, - buf[0] is undefined, yet skipping the addtok_mb call altogether - can result in heap corruption. */ - if (cur_mb_len <= 0) - buf[0] = 0; + if (stored_bytes != (size_t) -1) + cur_mb_len = stored_bytes; + else + { + /* This is merely stop-gap. buf[0] is undefined, yet skipping + the addtok_mb call altogether can corrupt the heap. */ + cur_mb_len = 1; + buf[0] = 0; + } addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1); for (i = 1; i < cur_mb_len; i++) @@ -1616,23 +1697,26 @@ addtok_wc (wint_t wc) addtok (CAT); } } -#else -static void -addtok_wc (wint_t wc) -{ -} -#endif static void add_utf8_anychar (void) { -#if MBS_SUPPORT static const charclass utf8_classes[5] = { - {0, 0, 0, 0, ~0, ~0, 0, 0}, /* 80-bf: non-lead bytes */ - {~0, ~0, ~0, ~0, 0, 0, 0, 0}, /* 00-7f: 1-byte sequence */ - {0, 0, 0, 0, 0, 0, ~3, 0}, /* c2-df: 2-byte sequence */ - {0, 0, 0, 0, 0, 0, 0, 0xffff}, /* e0-ef: 3-byte sequence */ - {0, 0, 0, 0, 0, 0, 0, 0xff0000} /* f0-f7: 4-byte sequence */ + /* 80-bf: non-leading bytes. */ + {0, 0, 0, 0, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, 0, 0}, + + /* 00-7f: 1-byte sequence. */ + {CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, + CHARCLASS_WORD_MASK, 0, 0, 0, 0}, + + /* c2-df: 2-byte sequence. */ + {0, 0, 0, 0, 0, 0, ~3 & CHARCLASS_WORD_MASK, 0}, + + /* e0-ef: 3-byte sequence. */ + {0, 0, 0, 0, 0, 0, 0, 0xffff}, + + /* f0-f7: 4-byte sequence. */ + {0, 0, 0, 0, 0, 0, 0, 0xff0000} }; const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]); unsigned int i; @@ -1671,7 +1755,6 @@ add_utf8_anychar (void) addtok (CAT); addtok (OR); } -#endif } /* The grammar understood by the parser is as follows. @@ -1707,29 +1790,34 @@ add_utf8_anychar (void) LPAREN regexp RPAREN - The parser builds a parse tree in postfix form in an array of tokens. */ + The parser builds a parse tree in postfix form in an array of tokens. */ static void atom (void) { - if (0) + if (tok == WCHAR) { - /* empty */ - } - else if (MBS_SUPPORT && tok == WCHAR) - { - addtok_wc (case_fold ? towlower (wctok) : wctok); -#ifndef GREP - if (case_fold && iswalpha (wctok)) + if (wctok == WEOF) + addtok (BACKREF); + else { - addtok_wc (towupper (wctok)); - addtok (OR); + addtok_wc (wctok); + + if (case_fold) + { + wchar_t folded[CASE_FOLDED_BUFSIZE]; + int i, n = case_folded_counterparts (wctok, folded); + for (i = 0; i < n; i++) + { + addtok_wc (folded[i]); + addtok (OR); + } + } } -#endif tok = lex (); } - else if (MBS_SUPPORT && tok == ANYCHAR && using_utf8 ()) + else if (tok == ANYCHAR && using_utf8 ()) { /* For UTF-8 expand the period to a series of CSETs that define a valid UTF-8 character. This avoids using the slow multibyte path. I'm @@ -1743,9 +1831,7 @@ atom (void) } else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD -#if MBS_SUPPORT || tok == ANYCHAR || tok == MBCSET -#endif /* MBS_SUPPORT */ || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD) { addtok (tok); @@ -1763,7 +1849,7 @@ atom (void) addtok (EMPTY); } -/* Return the number of tokens in the given subexpression. */ +/* Return the number of tokens in the given subexpression. */ static size_t _GL_ATTRIBUTE_PURE nsubtoks (size_t tindex) { @@ -1784,19 +1870,18 @@ nsubtoks (size_t tindex) } } -/* Copy the given subexpression to the top of the tree. */ +/* Copy the given subexpression to the top of the tree. */ static void copytoks (size_t tindex, size_t ntokens) { size_t i; - for (i = 0; i < ntokens; ++i) - { - addtok (dfa->tokens[tindex + i]); - /* Update index into multibyte csets. */ - if (MB_CUR_MAX > 1 && dfa->tokens[tindex + i] == MBCSET) - dfa->multibyte_prop[dfa->tindex - 1] = dfa->multibyte_prop[tindex + i]; - } + if (dfa->multibyte) + for (i = 0; i < ntokens; ++i) + addtok_mb (dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]); + else + for (i = 0; i < ntokens; ++i) + addtok_mb (dfa->tokens[tindex + i], 3); } static void @@ -1866,7 +1951,7 @@ regexp (void) /* Main entry point for the parser. S is a string to be parsed, len is the length of the string, so s can include NUL characters. D is a pointer to - the struct dfa to parse into. */ + the struct dfa to parse into. */ void dfaparse (char const *s, size_t len, struct dfa *d) { @@ -1874,12 +1959,12 @@ dfaparse (char const *s, size_t len, struct dfa *d) lexptr = s; lexleft = len; lasttok = END; - laststart = 1; + laststart = true; parens = 0; - if (MB_CUR_MAX > 1) + if (dfa->multibyte) { cur_mb_len = 0; - memset (&mbs, 0, sizeof mbs); + memset (&d->mbs, 0, sizeof d->mbs); } if (!syntax_bits_set) @@ -1902,21 +1987,26 @@ dfaparse (char const *s, size_t len, struct dfa *d) ++d->nregexps; } -/* Some primitives for operating on sets of positions. */ +/* Some primitives for operating on sets of positions. */ -/* Copy one set to another; the destination must be large enough. */ +/* Copy one set to another. */ static void copy (position_set const *src, position_set * dst) { - REALLOC_IF_NECESSARY (dst->elems, dst->alloc, src->nelem); - memcpy (dst->elems, src->elems, sizeof (dst->elems[0]) * src->nelem); + if (dst->alloc < src->nelem) + { + free (dst->elems); + dst->alloc = src->nelem; + dst->elems = x2nrealloc (NULL, &dst->alloc, sizeof *dst->elems); + } + memcpy (dst->elems, src->elems, src->nelem * sizeof *dst->elems); dst->nelem = src->nelem; } static void alloc_position_set (position_set * s, size_t size) { - MALLOC (s->elems, size); + s->elems = xnmalloc (size, sizeof *s->elems); s->alloc = size; s->nelem = 0; } @@ -1924,7 +2014,7 @@ alloc_position_set (position_set * s, size_t size) /* Insert position P in set S. S is maintained in sorted order on decreasing index. If there is already an entry in S with P.index then merge (logically-OR) P's constraints into the one in S. - S->elems must point to an array large enough to hold the resulting set. */ + S->elems must point to an array large enough to hold the resulting set. */ static void insert (position p, position_set * s) { @@ -1946,7 +2036,7 @@ insert (position p, position_set * s) return; } - REALLOC_IF_NECESSARY (s->elems, s->alloc, count + 1); + s->elems = maybe_realloc (s->elems, count, &s->alloc, sizeof *s->elems); for (i = count; i > lo; i--) s->elems[i] = s->elems[i - 1]; s->elems[lo] = p; @@ -1954,13 +2044,18 @@ insert (position p, position_set * s) } /* Merge two sets of positions into a third. The result is exactly as if - the positions of both sets were inserted into an initially empty set. */ + the positions of both sets were inserted into an initially empty set. */ static void merge (position_set const *s1, position_set const *s2, position_set * m) { size_t i = 0, j = 0; - REALLOC_IF_NECESSARY (m->elems, m->alloc, s1->nelem + s2->nelem); + if (m->alloc < s1->nelem + s2->nelem) + { + free (m->elems); + m->elems = maybe_realloc (NULL, s1->nelem + s2->nelem, &m->alloc, + sizeof *m->elems); + } m->nelem = 0; while (i < s1->nelem && j < s2->nelem) if (s1->elems[i].index > s2->elems[j].index) @@ -1978,7 +2073,7 @@ merge (position_set const *s1, position_set const *s2, position_set * m) m->elems[m->nelem++] = s2->elems[j++]; } -/* Delete a position from a set. */ +/* Delete a position from a set. */ static void delete (position p, position_set * s) { @@ -1994,7 +2089,7 @@ delete (position p, position_set * s) /* Find the index of the state corresponding to the given position set with the given preceding context, or create a new state if there is no such - state. Context tells whether we got here on a newline or letter. */ + state. Context tells whether we got here on a newline or letter. */ static state_num state_index (struct dfa *d, position_set const *s, int context) { @@ -2005,7 +2100,7 @@ state_index (struct dfa *d, position_set const *s, int context) for (i = 0; i < s->nelem; ++i) hash ^= s->elems[i].index + s->elems[i].constraint; - /* Try to find a state that exactly matches the proposed one. */ + /* Try to find a state that exactly matches the proposed one. */ for (i = 0; i < d->sindex; ++i) { if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem @@ -2020,20 +2115,20 @@ state_index (struct dfa *d, position_set const *s, int context) return i; } - /* We'll have to create a new state. */ - REALLOC_IF_NECESSARY (d->states, d->salloc, d->sindex + 1); + /* We'll have to create a new state. */ + d->states = maybe_realloc (d->states, d->sindex, &d->salloc, + sizeof *d->states); d->states[i].hash = hash; alloc_position_set (&d->states[i].elems, s->nelem); copy (s, &d->states[i].elems); d->states[i].context = context; - d->states[i].backref = 0; + d->states[i].has_backref = false; + d->states[i].has_mbcset = false; d->states[i].constraint = 0; d->states[i].first_end = 0; - if (MBS_SUPPORT) - { - d->states[i].mbps.nelem = 0; - d->states[i].mbps.elems = NULL; - } + d->states[i].mbps.nelem = 0; + d->states[i].mbps.elems = NULL; + for (j = 0; j < s->nelem; ++j) if (d->tokens[s->elems[j].index] < 0) { @@ -2046,7 +2141,7 @@ state_index (struct dfa *d, position_set const *s, int context) else if (d->tokens[s->elems[j].index] == BACKREF) { d->states[i].constraint = NO_CONSTRAINT; - d->states[i].backref = 1; + d->states[i].has_backref = true; } ++d->sindex; @@ -2058,25 +2153,26 @@ state_index (struct dfa *d, position_set const *s, int context) contains a symbol that matches the empty string in some context, replace that position with the elements of its follow labeled with an appropriate constraint. Repeat exhaustively until no funny positions are left. - S->elems must be large enough to hold the result. */ + S->elems must be large enough to hold the result. */ static void -epsclosure (position_set * s, struct dfa const *d) +epsclosure (position_set *s, struct dfa const *d, char *visited) { size_t i, j; - char *visited; /* array of booleans, enough to use char, not int */ position p, old; - - CALLOC (visited, d->tindex); + bool initialized = false; for (i = 0; i < s->nelem; ++i) if (d->tokens[s->elems[i].index] >= NOTCHAR && d->tokens[s->elems[i].index] != BACKREF -#if MBS_SUPPORT && d->tokens[s->elems[i].index] != ANYCHAR && d->tokens[s->elems[i].index] != MBCSET -#endif && d->tokens[s->elems[i].index] < CSET) { + if (!initialized) + { + memset (visited, 0, d->tindex * sizeof (*visited)); + initialized = true; + } old = s->elems[i]; p.constraint = old.constraint; delete (s->elems[i], s); @@ -2114,11 +2210,9 @@ epsclosure (position_set * s, struct dfa const *d) p.index = d->follows[old.index].elems[j].index; insert (p, s); } - /* Force rescan to start at the beginning. */ + /* Force rescan to start at the beginning. */ i = -1; } - - free (visited); } /* Returns the set of contexts for which there is at least one @@ -2133,7 +2227,7 @@ charclass_context (charclass c) if (tstbit (eolbyte, c)) context |= CTX_NEWLINE; - for (j = 0; j < CHARCLASS_INTS; ++j) + for (j = 0; j < CHARCLASS_WORDS; ++j) { if (c[j] & letters[j]) context |= CTX_LETTER; @@ -2219,23 +2313,33 @@ state_separate_contexts (position_set const *s) analysis is conveniently done by a linear scan with the aid of a stack. Sets are stored as arrays of the elements, obeying a stack-like allocation scheme; the number of elements in each set deeper in the stack can be - used to determine the address of a particular set's array. */ + used to determine the address of a particular set's array. */ void dfaanalyze (struct dfa *d, int searchflag) { - int *nullable; /* Nullable stack. */ - size_t *nfirstpos; /* Element count stack for firstpos sets. */ - position *firstpos; /* Array where firstpos elements are stored. */ - size_t *nlastpos; /* Element count stack for lastpos sets. */ - position *lastpos; /* Array where lastpos elements are stored. */ - position_set tmp; /* Temporary set for merging sets. */ - position_set merged; /* Result of merging sets. */ - int separate_contexts; /* Context wanted by some position. */ - int *o_nullable; - size_t *o_nfirst, *o_nlast; - position *o_firstpos, *o_lastpos; + /* Array allocated to hold position sets. */ + position *posalloc = xnmalloc (d->nleaves, 2 * sizeof *posalloc); + /* Firstpos and lastpos elements. */ + position *firstpos = posalloc + d->nleaves; + position *lastpos = firstpos + d->nleaves; + + /* Stack for element counts and nullable flags. */ + struct + { + /* Whether the entry is nullable. */ + bool nullable; + + /* Counts of firstpos and lastpos sets. */ + size_t nfirstpos; + size_t nlastpos; + } *stkalloc = xnmalloc (d->depth, sizeof *stkalloc), *stk = stkalloc; + + position_set tmp; /* Temporary set for merging sets. */ + position_set merged; /* Result of merging sets. */ + int separate_contexts; /* Context wanted by some position. */ size_t i, j; position *pos; + char *visited = xnmalloc (d->tindex, sizeof *visited); #ifdef DEBUG fprintf (stderr, "dfaanalyze:\n"); @@ -2247,104 +2351,90 @@ dfaanalyze (struct dfa *d, int searchflag) putc ('\n', stderr); #endif - d->searchflag = searchflag; - - MALLOC (nullable, d->depth); - o_nullable = nullable; - MALLOC (nfirstpos, d->depth); - o_nfirst = nfirstpos; - MALLOC (firstpos, d->nleaves); - o_firstpos = firstpos, firstpos += d->nleaves; - MALLOC (nlastpos, d->depth); - o_nlast = nlastpos; - MALLOC (lastpos, d->nleaves); - o_lastpos = lastpos, lastpos += d->nleaves; + d->searchflag = searchflag != 0; alloc_position_set (&merged, d->nleaves); - - CALLOC (d->follows, d->tindex); + d->follows = xcalloc (d->tindex, sizeof *d->follows); for (i = 0; i < d->tindex; ++i) { switch (d->tokens[i]) { case EMPTY: - /* The empty set is nullable. */ - *nullable++ = 1; + /* The empty set is nullable. */ + stk->nullable = true; - /* The firstpos and lastpos of the empty leaf are both empty. */ - *nfirstpos++ = *nlastpos++ = 0; + /* The firstpos and lastpos of the empty leaf are both empty. */ + stk->nfirstpos = stk->nlastpos = 0; + stk++; break; case STAR: case PLUS: /* Every element in the firstpos of the argument is in the follow - of every element in the lastpos. */ - tmp.nelem = nfirstpos[-1]; + of every element in the lastpos. */ + tmp.nelem = stk[-1].nfirstpos; tmp.elems = firstpos; pos = lastpos; - for (j = 0; j < nlastpos[-1]; ++j) + for (j = 0; j < stk[-1].nlastpos; ++j) { merge (&tmp, &d->follows[pos[j].index], &merged); copy (&merged, &d->follows[pos[j].index]); } + /* fallthrough */ case QMARK: - /* A QMARK or STAR node is automatically nullable. */ + /* A QMARK or STAR node is automatically nullable. */ if (d->tokens[i] != PLUS) - nullable[-1] = 1; + stk[-1].nullable = true; break; case CAT: /* Every element in the firstpos of the second argument is in the - follow of every element in the lastpos of the first argument. */ - tmp.nelem = nfirstpos[-1]; + follow of every element in the lastpos of the first argument. */ + tmp.nelem = stk[-1].nfirstpos; tmp.elems = firstpos; - pos = lastpos + nlastpos[-1]; - for (j = 0; j < nlastpos[-2]; ++j) + pos = lastpos + stk[-1].nlastpos; + for (j = 0; j < stk[-2].nlastpos; ++j) { merge (&tmp, &d->follows[pos[j].index], &merged); copy (&merged, &d->follows[pos[j].index]); } /* The firstpos of a CAT node is the firstpos of the first argument, - union that of the second argument if the first is nullable. */ - if (nullable[-2]) - nfirstpos[-2] += nfirstpos[-1]; + union that of the second argument if the first is nullable. */ + if (stk[-2].nullable) + stk[-2].nfirstpos += stk[-1].nfirstpos; else - firstpos += nfirstpos[-1]; - --nfirstpos; + firstpos += stk[-1].nfirstpos; /* The lastpos of a CAT node is the lastpos of the second argument, - union that of the first argument if the second is nullable. */ - if (nullable[-1]) - nlastpos[-2] += nlastpos[-1]; + union that of the first argument if the second is nullable. */ + if (stk[-1].nullable) + stk[-2].nlastpos += stk[-1].nlastpos; else { - pos = lastpos + nlastpos[-2]; - for (j = nlastpos[-1]; j-- > 0;) + pos = lastpos + stk[-2].nlastpos; + for (j = stk[-1].nlastpos; j-- > 0;) pos[j] = lastpos[j]; - lastpos += nlastpos[-2]; - nlastpos[-2] = nlastpos[-1]; + lastpos += stk[-2].nlastpos; + stk[-2].nlastpos = stk[-1].nlastpos; } - --nlastpos; - /* A CAT node is nullable if both arguments are nullable. */ - nullable[-2] = nullable[-1] && nullable[-2]; - --nullable; + /* A CAT node is nullable if both arguments are nullable. */ + stk[-2].nullable &= stk[-1].nullable; + stk--; break; case OR: - /* The firstpos is the union of the firstpos of each argument. */ - nfirstpos[-2] += nfirstpos[-1]; - --nfirstpos; + /* The firstpos is the union of the firstpos of each argument. */ + stk[-2].nfirstpos += stk[-1].nfirstpos; - /* The lastpos is the union of the lastpos of each argument. */ - nlastpos[-2] += nlastpos[-1]; - --nlastpos; + /* The lastpos is the union of the lastpos of each argument. */ + stk[-2].nlastpos += stk[-1].nlastpos; - /* An OR node is nullable if either argument is nullable. */ - nullable[-2] = nullable[-1] || nullable[-2]; - --nullable; + /* An OR node is nullable if either argument is nullable. */ + stk[-2].nullable |= stk[-1].nullable; + stk--; break; default: @@ -2352,33 +2442,36 @@ dfaanalyze (struct dfa *d, int searchflag) constructs like \< are treated as nonempty strings here; an "epsilon closure" effectively makes them nullable later. Backreferences have to get a real position so we can detect - transitions on them later. But they are nullable. */ - *nullable++ = d->tokens[i] == BACKREF; + transitions on them later. But they are nullable. */ + stk->nullable = d->tokens[i] == BACKREF; + + /* This position is in its own firstpos and lastpos. */ + stk->nfirstpos = stk->nlastpos = 1; + stk++; - /* This position is in its own firstpos and lastpos. */ - *nfirstpos++ = *nlastpos++ = 1; --firstpos, --lastpos; firstpos->index = lastpos->index = i; firstpos->constraint = lastpos->constraint = NO_CONSTRAINT; - /* Allocate the follow set for this position. */ + /* Allocate the follow set for this position. */ alloc_position_set (&d->follows[i], 1); break; } #ifdef DEBUG - /* ... balance the above nonsyntactic #ifdef goo... */ + /* ... balance the above nonsyntactic #ifdef goo... */ fprintf (stderr, "node %zd:", i); prtok (d->tokens[i]); putc ('\n', stderr); - fprintf (stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n"); + fprintf (stderr, + stk[-1].nullable ? " nullable: yes\n" : " nullable: no\n"); fprintf (stderr, " firstpos:"); - for (j = nfirstpos[-1]; j-- > 0;) + for (j = stk[-1].nfirstpos; j-- > 0;) { fprintf (stderr, " %zd:", firstpos[j].index); prtok (d->tokens[firstpos[j].index]); } fprintf (stderr, "\n lastpos:"); - for (j = nlastpos[-1]; j-- > 0;) + for (j = stk[-1].nlastpos; j-- > 0;) { fprintf (stderr, " %zd:", lastpos[j].index); prtok (d->tokens[lastpos[j].index]); @@ -2388,12 +2481,10 @@ dfaanalyze (struct dfa *d, int searchflag) } /* For each follow set that is the follow set of a real position, replace - it with its epsilon closure. */ + it with its epsilon closure. */ for (i = 0; i < d->tindex; ++i) if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF -#if MBS_SUPPORT || d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET -#endif || d->tokens[i] >= CSET) { #ifdef DEBUG @@ -2408,33 +2499,27 @@ dfaanalyze (struct dfa *d, int searchflag) putc ('\n', stderr); #endif copy (&d->follows[i], &merged); - epsclosure (&merged, d); + epsclosure (&merged, d, visited); copy (&merged, &d->follows[i]); } /* Get the epsilon closure of the firstpos of the regexp. The result will - be the set of positions of state 0. */ + be the set of positions of state 0. */ merged.nelem = 0; - for (i = 0; i < nfirstpos[-1]; ++i) + for (i = 0; i < stk[-1].nfirstpos; ++i) insert (firstpos[i], &merged); - epsclosure (&merged, d); - - /* Build the initial state. */ - d->salloc = 1; - d->sindex = 0; - MALLOC (d->states, d->salloc); + epsclosure (&merged, d, visited); + /* Build the initial state. */ separate_contexts = state_separate_contexts (&merged); state_index (d, &merged, (separate_contexts & CTX_NEWLINE ? CTX_NEWLINE : separate_contexts ^ CTX_ANY)); - free (o_nullable); - free (o_nfirst); - free (o_firstpos); - free (o_nlast); - free (o_lastpos); + free (posalloc); + free (stkalloc); free (merged.elems); + free (visited); } @@ -2467,33 +2552,30 @@ dfaanalyze (struct dfa *d, int searchflag) If after comparing with every group there are characters remaining in C, create a new group labeled with the characters of C and insert this - position in that group. */ + position in that group. */ void dfastate (state_num s, struct dfa *d, state_num trans[]) { - leaf_set *grps; /* As many as will ever be needed. */ - charclass *labels; /* Labels corresponding to the groups. */ - size_t ngrps = 0; /* Number of groups actually used. */ - position pos; /* Current position being considered. */ - charclass matches; /* Set of matching characters. */ - int matchesf; /* True if matches is nonempty. */ - charclass intersect; /* Intersection with some label set. */ - int intersectf; /* True if intersect is nonempty. */ - charclass leftovers; /* Stuff in the label that didn't match. */ - int leftoversf; /* True if leftovers is nonempty. */ - position_set follows; /* Union of the follows of some group. */ - position_set tmp; /* Temporary space for merging sets. */ - int possible_contexts; /* Contexts that this group can match. */ - int separate_contexts; /* Context that new state wants to know. */ - state_num state; /* New state. */ - state_num state_newline; /* New state on a newline transition. */ - state_num state_letter; /* New state on a letter transition. */ - int next_isnt_1st_byte = 0; /* Flag if we can't add state0. */ + leaf_set grps[NOTCHAR]; /* As many as will ever be needed. */ + charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */ + size_t ngrps = 0; /* Number of groups actually used. */ + position pos; /* Current position being considered. */ + charclass matches; /* Set of matching characters. */ + charclass_word matchesf; /* Nonzero if matches is nonempty. */ + charclass intersect; /* Intersection with some label set. */ + charclass_word intersectf; /* Nonzero if intersect is nonempty. */ + charclass leftovers; /* Stuff in the label that didn't match. */ + charclass_word leftoversf; /* Nonzero if leftovers is nonempty. */ + position_set follows; /* Union of the follows of some group. */ + position_set tmp; /* Temporary space for merging sets. */ + int possible_contexts; /* Contexts that this group can match. */ + int separate_contexts; /* Context that new state wants to know. */ + state_num state; /* New state. */ + state_num state_newline; /* New state on a newline transition. */ + state_num state_letter; /* New state on a letter transition. */ + bool next_isnt_1st_byte = false; /* We can't add state0. */ size_t i, j, k; - MALLOC (grps, NOTCHAR); - MALLOC (labels, NOTCHAR); - zeroset (matches); for (i = 0; i < d->states[s].elems.nelem; ++i) @@ -2503,43 +2585,45 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) setbit (d->tokens[pos.index], matches); else if (d->tokens[pos.index] >= CSET) copyset (d->charclasses[d->tokens[pos.index] - CSET], matches); - else if (MBS_SUPPORT - && (d->tokens[pos.index] == ANYCHAR - || d->tokens[pos.index] == MBCSET)) - /* MB_CUR_MAX > 1 */ + else { - /* ANYCHAR and MBCSET must match with a single character, so we - must put it to d->states[s].mbps, which contains the positions - which can match with a single character not a byte. */ - if (d->states[s].mbps.nelem == 0) - alloc_position_set (&d->states[s].mbps, 1); - insert (pos, &(d->states[s].mbps)); + if (d->tokens[pos.index] == MBCSET + || d->tokens[pos.index] == ANYCHAR) + { + /* MB_CUR_MAX > 1 */ + if (d->tokens[pos.index] == MBCSET) + d->states[s].has_mbcset = true; + /* ANYCHAR and MBCSET must match with a single character, so we + must put it to d->states[s].mbps, which contains the positions + which can match with a single character not a byte. */ + if (d->states[s].mbps.nelem == 0) + alloc_position_set (&d->states[s].mbps, 1); + insert (pos, &(d->states[s].mbps)); + } continue; } - else - continue; /* Some characters may need to be eliminated from matches because - they fail in the current context. */ + they fail in the current context. */ if (pos.constraint != NO_CONSTRAINT) { if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, CTX_NEWLINE)) - for (j = 0; j < CHARCLASS_INTS; ++j) + for (j = 0; j < CHARCLASS_WORDS; ++j) matches[j] &= ~newline[j]; if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, CTX_LETTER)) - for (j = 0; j < CHARCLASS_INTS; ++j) + for (j = 0; j < CHARCLASS_WORDS; ++j) matches[j] &= ~letters[j]; if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, CTX_NONE)) - for (j = 0; j < CHARCLASS_INTS; ++j) + for (j = 0; j < CHARCLASS_WORDS; ++j) matches[j] &= letters[j] | newline[j]; - /* If there are no characters left, there's no point in going on. */ - for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j) + /* If there are no characters left, there's no point in going on. */ + for (j = 0; j < CHARCLASS_WORDS && !matches[j]; ++j) continue; - if (j == CHARCLASS_INTS) + if (j == CHARCLASS_WORDS) continue; } @@ -2547,36 +2631,37 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) { /* If matches contains a single character only, and the current group's label doesn't contain that character, go on to the - next group. */ + next group. */ if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR && !tstbit (d->tokens[pos.index], labels[j])) continue; /* Check if this group's label has a nonempty intersection with - matches. */ + matches. */ intersectf = 0; - for (k = 0; k < CHARCLASS_INTS; ++k) - (intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0; + for (k = 0; k < CHARCLASS_WORDS; ++k) + intersectf |= intersect[k] = matches[k] & labels[j][k]; if (!intersectf) continue; - /* It does; now find the set differences both ways. */ + /* It does; now find the set differences both ways. */ leftoversf = matchesf = 0; - for (k = 0; k < CHARCLASS_INTS; ++k) + for (k = 0; k < CHARCLASS_WORDS; ++k) { - /* Even an optimizing compiler can't know this for sure. */ - int match = matches[k], label = labels[j][k]; + /* Even an optimizing compiler can't know this for sure. */ + charclass_word match = matches[k], label = labels[j][k]; - (leftovers[k] = ~match & label) ? (leftoversf = 1) : 0; - (matches[k] = match & ~label) ? (matchesf = 1) : 0; + leftoversf |= leftovers[k] = ~match & label; + matchesf |= matches[k] = match & ~label; } - /* If there were leftovers, create a new group labeled with them. */ + /* If there were leftovers, create a new group labeled with them. */ if (leftoversf) { copyset (leftovers, labels[ngrps]); copyset (intersect, labels[j]); - MALLOC (grps[ngrps].elems, d->nleaves); + grps[ngrps].elems = xnmalloc (d->nleaves, + sizeof *grps[ngrps].elems); memcpy (grps[ngrps].elems, grps[j].elems, sizeof (grps[j].elems[0]) * grps[j].nelem); grps[ngrps].nelem = grps[j].nelem; @@ -2588,18 +2673,18 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) grps[j].elems[grps[j].nelem++] = pos.index; /* If every character matching the current position has been - accounted for, we're done. */ + accounted for, we're done. */ if (!matchesf) break; } /* If we've passed the last group, and there are still characters - unaccounted for, then we'll have to create a new group. */ + unaccounted for, then we'll have to create a new group. */ if (j == ngrps) { copyset (matches, labels[ngrps]); zeroset (matches); - MALLOC (grps[ngrps].elems, d->nleaves); + grps[ngrps].elems = xnmalloc (d->nleaves, sizeof *grps[ngrps].elems); grps[ngrps].nelem = 1; grps[ngrps].elems[0] = pos.index; ++ngrps; @@ -2611,10 +2696,10 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) /* If we are a searching matcher, the default transition is to a state containing the positions of state 0, otherwise the default transition - is to fail miserably. */ + is to fail miserably. */ if (d->searchflag) { - /* Find the state(s) corresponding to the positions of state 0. */ + /* Find the state(s) corresponding to the positions of state 0. */ copy (&d->states[0].elems, &follows); separate_contexts = state_separate_contexts (&follows); state = state_index (d, &follows, separate_contexts ^ CTX_ANY); @@ -2640,12 +2725,12 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) follows.nelem = 0; /* Find the union of the follows of the positions of the group. - This is a hideously inefficient loop. Fix it someday. */ + This is a hideously inefficient loop. Fix it someday. */ for (j = 0; j < grps[i].nelem; ++j) for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k) insert (d->follows[grps[i].elems[j]].elems[k], &follows); - if (d->mb_cur_max > 1) + if (d->multibyte) { /* If a token in follows.elems is not 1st byte of a multibyte character, or the states of follows must accept the bytes @@ -2665,29 +2750,30 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) codepoint of , it must not be but 2nd byte of , so we cannot add state[0]. */ - next_isnt_1st_byte = 0; + next_isnt_1st_byte = false; for (j = 0; j < follows.nelem; ++j) { if (!(d->multibyte_prop[follows.elems[j].index] & 1)) { - next_isnt_1st_byte = 1; + next_isnt_1st_byte = true; break; } } } /* If we are building a searching matcher, throw in the positions - of state 0 as well. */ - if (d->searchflag - && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte))) - for (j = 0; j < d->states[0].elems.nelem; ++j) - insert (d->states[0].elems.elems[j], &follows); + of state 0 as well. */ + if (d->searchflag && (!d->multibyte || !next_isnt_1st_byte)) + { + merge (&d->states[0].elems, &follows, &tmp); + copy (&tmp, &follows); + } - /* Find out if the new state will want any context information. */ + /* Find out if the new state will want any context information. */ possible_contexts = charclass_context (labels[i]); separate_contexts = state_separate_contexts (&follows); - /* Find the state(s) corresponding to the union of the follows. */ + /* Find the state(s) corresponding to the union of the follows. */ if ((separate_contexts & possible_contexts) != possible_contexts) state = state_index (d, &follows, separate_contexts ^ CTX_ANY); else @@ -2701,12 +2787,12 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) else state_letter = state; - /* Set the transitions for each character in the current label. */ - for (j = 0; j < CHARCLASS_INTS; ++j) - for (k = 0; k < INTBITS; ++k) - if (labels[i][j] & 1 << k) + /* Set the transitions for each character in the current label. */ + for (j = 0; j < CHARCLASS_WORDS; ++j) + for (k = 0; k < CHARCLASS_WORD_BITS; ++k) + if (labels[i][j] >> k & 1) { - int c = j * INTBITS + k; + int c = j * CHARCLASS_WORD_BITS + k; if (c == eolbyte) trans[c] = state_newline; @@ -2721,8 +2807,31 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) free (grps[i].elems); free (follows.elems); free (tmp.elems); - free (grps); - free (labels); +} + +/* Make sure D's state arrays are large enough to hold NEW_STATE. */ +static void +realloc_trans_if_necessary (struct dfa *d, state_num new_state) +{ + state_num oldalloc = d->tralloc; + if (oldalloc <= new_state) + { + state_num **realtrans = d->trans ? d->trans - 1 : NULL; + size_t newalloc, newalloc1; + newalloc1 = new_state + 1; + realtrans = x2nrealloc (realtrans, &newalloc1, sizeof *realtrans); + realtrans[0] = NULL; + d->trans = realtrans + 1; + d->tralloc = newalloc = newalloc1 - 1; + d->fails = xnrealloc (d->fails, newalloc, sizeof *d->fails); + d->success = xnrealloc (d->success, newalloc, sizeof *d->success); + d->newlines = xnrealloc (d->newlines, newalloc, sizeof *d->newlines); + for (; oldalloc < newalloc; oldalloc++) + { + d->trans[oldalloc] = NULL; + d->fails[oldalloc] = NULL; + } + } } /* Some routines for manipulating a compiled dfa's transition tables. @@ -2730,32 +2839,33 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) is a non-accepting state, then d->trans[state] points to its table. If it is an accepting state then d->fails[state] points to its table. If it has no table at all, then d->trans[state] is NULL. - TODO: Improve this comment, get rid of the unnecessary redundancy. */ + TODO: Improve this comment, get rid of the unnecessary redundancy. */ static void build_state (state_num s, struct dfa *d) { - state_num *trans; /* The new transition table. */ - state_num i; + state_num *trans; /* The new transition table. */ + state_num i, maxstate; /* Set an upper limit on the number of transition tables that will ever exist at once. 1024 is arbitrary. The idea is that the frequently used transition tables will be quickly rebuilt, whereas the ones that - were only needed once or twice will be cleared away. */ + were only needed once or twice will be cleared away. However, do + not clear the initial state, as it's always used. */ if (d->trcount >= 1024) { - for (i = 0; i < d->tralloc; ++i) + for (i = 1; i < d->tralloc; ++i) { free (d->trans[i]); free (d->fails[i]); d->trans[i] = d->fails[i] = NULL; } - d->trcount = 0; + d->trcount = 1; } ++d->trcount; - /* Set up the success bits for this state. */ + /* Set up the success bits for this state. */ d->success[s] = 0; if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NEWLINE, s, *d)) d->success[s] |= CTX_NEWLINE; @@ -2764,33 +2874,20 @@ build_state (state_num s, struct dfa *d) if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NONE, s, *d)) d->success[s] |= CTX_NONE; - MALLOC (trans, NOTCHAR); + trans = xmalloc (NOTCHAR * sizeof *trans); dfastate (s, d, trans); /* Now go through the new transition table, and make sure that the trans and fail arrays are allocated large enough to hold a pointer for the - largest state mentioned in the table. */ + largest state mentioned in the table. */ + maxstate = -1; for (i = 0; i < NOTCHAR; ++i) - if (trans[i] >= d->tralloc) - { - state_num oldalloc = d->tralloc; - - while (trans[i] >= d->tralloc) - d->tralloc *= 2; - REALLOC (d->realtrans, d->tralloc + 1); - d->trans = d->realtrans + 1; - REALLOC (d->fails, d->tralloc); - REALLOC (d->success, d->tralloc); - REALLOC (d->newlines, d->tralloc); - while (oldalloc < d->tralloc) - { - d->trans[oldalloc] = NULL; - d->fails[oldalloc++] = NULL; - } - } + if (maxstate < trans[i]) + maxstate = trans[i]; + realloc_trans_if_necessary (d, maxstate); /* Keep the newline transition in a special place so we can use it as - a sentinel. */ + a sentinel. */ d->newlines[s] = trans[eolbyte]; trans[eolbyte] = -1; @@ -2800,69 +2897,9 @@ build_state (state_num s, struct dfa *d) d->trans[s] = trans; } -static void -build_state_zero (struct dfa *d) -{ - d->tralloc = 1; - d->trcount = 0; - CALLOC (d->realtrans, d->tralloc + 1); - d->trans = d->realtrans + 1; - CALLOC (d->fails, d->tralloc); - MALLOC (d->success, d->tralloc); - MALLOC (d->newlines, d->tralloc); - build_state (0, d); -} - /* Multibyte character handling sub-routines for dfaexec. */ -/* Initial state may encounter the byte which is not a single byte character - nor 1st byte of a multibyte character. But it is incorrect for initial - state to accept such a byte. - For example, in sjis encoding the regular expression like "\\" accepts - the codepoint 0x5c, but should not accept the 2nd byte of the codepoint - 0x815c. Then Initial state must skip the bytes which are not a single byte - character nor 1st byte of a multibyte character. */ -#define SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p) \ - if (s == 0) \ - { \ - while (inputwcs[p - buf_begin] == 0 \ - && mblen_buf[p - buf_begin] > 0 \ - && (unsigned char const *) p < buf_end) \ - ++p; \ - if ((char *) p >= end) \ - { \ - free (mblen_buf); \ - free (inputwcs); \ - *end = saved_end; \ - return NULL; \ - } \ - } - -static void -realloc_trans_if_necessary (struct dfa *d, state_num new_state) -{ - /* Make sure that the trans and fail arrays are allocated large enough - to hold a pointer for the new state. */ - if (new_state >= d->tralloc) - { - state_num oldalloc = d->tralloc; - - while (new_state >= d->tralloc) - d->tralloc *= 2; - REALLOC (d->realtrans, d->tralloc + 1); - d->trans = d->realtrans + 1; - REALLOC (d->fails, d->tralloc); - REALLOC (d->success, d->tralloc); - REALLOC (d->newlines, d->tralloc); - while (oldalloc < d->tralloc) - { - d->trans[oldalloc] = NULL; - d->fails[oldalloc++] = NULL; - } - } -} - -/* Return values of transit_state_singlebyte(), and +/* Return values of transit_state_singlebyte, and transit_state_consume_1char. */ typedef enum { @@ -2872,7 +2909,7 @@ typedef enum } status_transit_state; /* Consume a single byte and transit state from 's' to '*next_state'. - This function is almost same as the state transition routin in dfaexec(). + This function is almost same as the state transition routin in dfaexec. But state transition is done just once, otherwise matching succeed or reach the end of the buffer. */ static status_transit_state @@ -2894,14 +2931,7 @@ transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const *p, works = 0; } else if (works < 0) - { - if (p == buf_end) - { - /* At the moment, it must not happen. */ - abort (); - } - works = 0; - } + works = 0; else if (d->fails[works]) { works = d->fails[works][*p]; @@ -2916,18 +2946,13 @@ transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const *p, return rval; } -/* Match a "." against the current context. buf_begin[IDX] is the - current position. Return the length of the match, in bytes. - POS is the position of the ".". */ +/* Match a "." against the current context. Return the length of the + match, in bytes. POS is the position of the ".". */ static int -match_anychar (struct dfa *d, state_num s, position pos, size_t idx) +match_anychar (struct dfa *d, state_num s, position pos, + wint_t wc, size_t mbclen) { int context; - wchar_t wc; - int mbclen; - - wc = inputwcs[idx]; - mbclen = (mblen_buf[idx] == 0) ? 1 : mblen_buf[idx]; /* Check syntax bits. */ if (wc == (wchar_t) eolbyte) @@ -2940,6 +2965,8 @@ match_anychar (struct dfa *d, state_num s, position pos, size_t idx) if (syntax_bits & RE_DOT_NOT_NULL) return 0; } + else if (wc == WEOF) + return 0; context = wchar_context (wc); if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, context)) @@ -2949,26 +2976,21 @@ match_anychar (struct dfa *d, state_num s, position pos, size_t idx) } /* Match a bracket expression against the current context. - buf_begin[IDX] is the current position. Return the length of the match, in bytes. POS is the position of the bracket expression. */ static int -match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx) +match_mb_charset (struct dfa *d, state_num s, position pos, + char const *p, wint_t wc, size_t match_len) { size_t i; - int match; /* Flag which represent that matching succeed. */ - int match_len; /* Length of the character (or collating element) - with which this operator match. */ - int op_len; /* Length of the operator. */ + bool match; /* Matching succeeded. */ + int op_len; /* Length of the operator. */ char buffer[128]; /* Pointer to the structure to which we are currently referring. */ struct mb_char_classes *work_mbc; int context; - wchar_t wc; /* Current referring character. */ - - wc = inputwcs[idx]; /* Check syntax bits. */ if (wc == (wchar_t) eolbyte) @@ -2981,6 +3003,8 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx) if (syntax_bits & RE_DOT_NOT_NULL) return 0; } + else if (wc == WEOF) + return 0; context = wchar_context (wc); if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, context)) @@ -2989,11 +3013,10 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx) /* Assign the current referring operator to work_mbc. */ work_mbc = &(d->mbcsets[(d->multibyte_prop[pos.index]) >> 2]); match = !work_mbc->invert; - match_len = (mblen_buf[idx] == 0) ? 1 : mblen_buf[idx]; /* Match in range 0-255? */ if (wc < NOTCHAR && work_mbc->cset != -1 - && tstbit ((unsigned char) wc, d->charclasses[work_mbc->cset])) + && tstbit (to_uchar (wc), d->charclasses[work_mbc->cset])) goto charset_matched; /* match with a character class? */ @@ -3003,14 +3026,14 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx) goto charset_matched; } - strncpy (buffer, (char const *) buf_begin + idx, match_len); + strncpy (buffer, p, match_len); buffer[match_len] = '\0'; /* match with an equivalence class? */ for (i = 0; i < work_mbc->nequivs; i++) { op_len = strlen (work_mbc->equivs[i]); - strncpy (buffer, (char const *) buf_begin + idx, op_len); + strncpy (buffer, p, op_len); buffer[op_len] = '\0'; if (strcoll (work_mbc->equivs[i], buffer) == 0) { @@ -3023,7 +3046,7 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx) for (i = 0; i < work_mbc->ncoll_elems; i++) { op_len = strlen (work_mbc->coll_elems[i]); - strncpy (buffer, (char const *) buf_begin + idx, op_len); + strncpy (buffer, p, op_len); buffer[op_len] = '\0'; if (strcoll (work_mbc->coll_elems[i], buffer) == 0) @@ -3036,7 +3059,7 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx) /* match with a range? */ for (i = 0; i < work_mbc->nranges; i++) { - if (work_mbc->range_sts[i] <= wc && wc <= work_mbc->range_ends[i]) + if (work_mbc->ranges[i].beg <= wc && wc <= work_mbc->ranges[i].end) goto charset_matched; } @@ -3053,30 +3076,29 @@ charset_matched: return match ? match_len : 0; } -/* Check each of 'd->states[s].mbps.elem' can match or not. Then return the - array which corresponds to 'd->states[s].mbps.elem' and each element of - the array contains the amount of the bytes with which the element can - match. - 'idx' is the index from the buf_begin, and it is the current position - in the buffer. - Caller MUST free the array which this function return. */ +/* Check whether each of 'd->states[s].mbps.elem' can match. Then return the + array which corresponds to 'd->states[s].mbps.elem'; each element of the + array contains the number of bytes with which the element can match. + + The caller MUST free the array which this function return. */ static int * -check_matching_with_multibyte_ops (struct dfa *d, state_num s, size_t idx) +check_matching_with_multibyte_ops (struct dfa *d, state_num s, + char const *p, wint_t wc, size_t mbclen) { size_t i; int *rarray; - MALLOC (rarray, d->states[s].mbps.nelem); + rarray = d->mb_match_lens; for (i = 0; i < d->states[s].mbps.nelem; ++i) { position pos = d->states[s].mbps.elems[i]; switch (d->tokens[pos.index]) { case ANYCHAR: - rarray[i] = match_anychar (d, s, pos, idx); + rarray[i] = match_anychar (d, s, pos, wc, mbclen); break; case MBCSET: - rarray[i] = match_mb_charset (d, s, pos, idx); + rarray[i] = match_mb_charset (d, s, pos, p, wc, mbclen); break; default: break; /* cannot happen. */ @@ -3086,56 +3108,49 @@ check_matching_with_multibyte_ops (struct dfa *d, state_num s, size_t idx) } /* Consume a single character and enumerate all of the positions which can - be next position from the state 's'. - 'match_lens' is the input. It can be NULL, but it can also be the output - of check_matching_with_multibyte_ops() for optimization. + be the next position from the state 's'. + + 'match_lens' is the input. It can be NULL, but it can also be the output + of check_matching_with_multibyte_ops for optimization. + 'mbclen' and 'pps' are the output. 'mbclen' is the length of the - character consumed, and 'pps' is the set this function enumerate. */ + character consumed, and 'pps' is the set this function enumerates. */ static status_transit_state transit_state_consume_1char (struct dfa *d, state_num s, unsigned char const **pp, - int *match_lens, int *mbclen, position_set * pps) + wint_t wc, size_t mbclen, + int *match_lens) { size_t i, j; int k; state_num s1, s2; - int *work_mbls; status_transit_state rs = TRANSIT_STATE_DONE; - /* Calculate the length of the (single/multi byte) character - to which p points. */ - *mbclen = (mblen_buf[*pp - buf_begin] == 0) ? 1 : mblen_buf[*pp - buf_begin]; + if (! match_lens && d->states[s].mbps.nelem != 0) + match_lens = check_matching_with_multibyte_ops (d, s, (char const *) *pp, + wc, mbclen); /* Calculate the state which can be reached from the state 's' by - consuming '*mbclen' single bytes from the buffer. */ + consuming 'mbclen' single bytes from the buffer. */ s1 = s; - for (k = 0; k < *mbclen; k++) + for (k = 0; k < mbclen; k++) { s2 = s1; rs = transit_state_singlebyte (d, s2, (*pp)++, &s1); } - /* Copy the positions contained by 's1' to the set 'pps'. */ - copy (&(d->states[s1].elems), pps); - - /* Check (input) match_lens, and initialize if it is NULL. */ - if (match_lens == NULL && d->states[s].mbps.nelem != 0) - work_mbls = check_matching_with_multibyte_ops (d, s, *pp - buf_begin); - else - work_mbls = match_lens; + copy (&d->states[s1].elems, &d->mb_follows); /* Add all of the positions which can be reached from 's' by consuming a single character. */ for (i = 0; i < d->states[s].mbps.nelem; i++) { - if (work_mbls[i] == *mbclen) + if (match_lens[i] == mbclen) for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem; j++) - insert (d->follows[d->states[s].mbps.elems[i].index].elems[j], pps); + insert (d->follows[d->states[s].mbps.elems[i].index].elems[j], + &d->mb_follows); } - if (match_lens == NULL && work_mbls != NULL) - free (work_mbls); - /* FIXME: this return value is always ignored. */ return rs; } @@ -3144,24 +3159,26 @@ transit_state_consume_1char (struct dfa *d, state_num s, buffer. This function is for some operator which can match with a multi- byte character or a collating element (which may be multi characters). */ static state_num -transit_state (struct dfa *d, state_num s, unsigned char const **pp) +transit_state (struct dfa *d, state_num s, unsigned char const **pp, + unsigned char const *end) { state_num s1; - int mbclen; /* The length of current input multibyte character. */ + int mbclen; /* The length of current input multibyte character. */ int maxlen = 0; size_t i, j; int *match_lens = NULL; size_t nelem = d->states[s].mbps.nelem; /* Just a alias. */ - position_set follows; unsigned char const *p1 = *pp; - wchar_t wc; + wint_t wc; if (nelem > 0) /* This state has (a) multibyte operator(s). We check whether each of them can match or not. */ { /* Note: caller must free the return value of this function. */ - match_lens = check_matching_with_multibyte_ops (d, s, *pp - buf_begin); + mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d); + match_lens = check_matching_with_multibyte_ops (d, s, (char const *) *pp, + wc, mbclen); for (i = 0; i < nelem; i++) /* Search the operator which match the longest string, @@ -3183,26 +3200,25 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp) if (rs == TRANSIT_STATE_DONE) ++*pp; - free (match_lens); return s1; } /* This state has some operators which can match a multibyte character. */ - alloc_position_set (&follows, d->nleaves); + d->mb_follows.nelem = 0; /* 'maxlen' may be longer than the length of a character, because it may not be a character but a (multi character) collating element. We enumerate all of the positions which 's' can reach by consuming 'maxlen' bytes. */ - transit_state_consume_1char (d, s, pp, match_lens, &mbclen, &follows); + transit_state_consume_1char (d, s, pp, wc, mbclen, match_lens); - wc = inputwcs[*pp - mbclen - buf_begin]; - s1 = state_index (d, &follows, wchar_context (wc)); + s1 = state_index (d, &d->mb_follows, wchar_context (wc)); realloc_trans_if_necessary (d, s1); while (*pp - p1 < maxlen) { - transit_state_consume_1char (d, s1, pp, NULL, &mbclen, &follows); + mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d); + transit_state_consume_1char (d, s1, pp, wc, mbclen, NULL); for (i = 0; i < nelem; i++) { @@ -3210,68 +3226,15 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp) for (j = 0; j < d->follows[d->states[s1].mbps.elems[i].index].nelem; j++) insert (d->follows[d->states[s1].mbps.elems[i].index].elems[j], - &follows); + &d->mb_follows); } - wc = inputwcs[*pp - mbclen - buf_begin]; - s1 = state_index (d, &follows, wchar_context (wc)); + s1 = state_index (d, &d->mb_follows, wchar_context (wc)); realloc_trans_if_necessary (d, s1); } - free (match_lens); - free (follows.elems); return s1; } - -/* Initialize mblen_buf and inputwcs with data from the next line. */ - -static void -prepare_wc_buf (const char *begin, const char *end) -{ -#if MBS_SUPPORT - unsigned char eol = eolbyte; - size_t remain_bytes, i; - - buf_begin = (unsigned char *) begin; - - remain_bytes = 0; - for (i = 0; i < end - begin + 1; i++) - { - if (remain_bytes == 0) - { - remain_bytes - = mbrtowc (inputwcs + i, begin + i, end - begin - i + 1, &mbs); - if (remain_bytes < 1 - || remain_bytes == (size_t) -1 - || remain_bytes == (size_t) -2 - || (remain_bytes == 1 && inputwcs[i] == (wchar_t) begin[i])) - { - remain_bytes = 0; - inputwcs[i] = (wchar_t) begin[i]; - mblen_buf[i] = 0; - if (begin[i] == eol) - break; - } - else - { - mblen_buf[i] = remain_bytes; - remain_bytes--; - } - } - else - { - mblen_buf[i] = remain_bytes; - inputwcs[i] = 0; - remain_bytes--; - } - } - - buf_end = (unsigned char *) (begin + i); - mblen_buf[i] = 0; - inputwcs[i] = 0; /* sentinel */ -#endif /* MBS_SUPPORT */ -} - /* Search through a buffer looking for a match to the given struct dfa. Find the first occurrence of a string matching the regexp in the buffer, and the shortest possible version thereof. Return a pointer to @@ -3283,69 +3246,97 @@ prepare_wc_buf (const char *begin, const char *end) If COUNT is non-NULL, increment *COUNT once for each newline processed. Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we encountered a back-reference (1) or not (0). The caller may use this - to decide whether to fall back on a backtracking matcher. */ + to decide whether to fall back on a backtracking matcher. */ char * dfaexec (struct dfa *d, char const *begin, char *end, int allow_nl, size_t *count, int *backref) { - state_num s, s1; /* Current state. */ - unsigned char const *p; /* Current input character. */ + state_num s, s1; /* Current state. */ + unsigned char const *p, *mbp; /* Current input character. */ state_num **trans, *t; /* Copy of d->trans so it can be optimized - into a register. */ + into a register. */ unsigned char eol = eolbyte; /* Likewise for eolbyte. */ unsigned char saved_end; + size_t nlcount = 0; if (!d->tralloc) - build_state_zero (d); + { + realloc_trans_if_necessary (d, 1); + build_state (0, d); + } s = s1 = 0; - p = (unsigned char const *) begin; + p = mbp = (unsigned char const *) begin; trans = d->trans; saved_end = *(unsigned char *) end; *end = eol; - if (d->mb_cur_max > 1) + if (d->multibyte) { - MALLOC (mblen_buf, end - begin + 2); - MALLOC (inputwcs, end - begin + 2); - memset (&mbs, 0, sizeof (mbstate_t)); - prepare_wc_buf ((const char *) p, end); + memset (&d->mbs, 0, sizeof d->mbs); + if (! d->mb_match_lens) + { + d->mb_match_lens = xnmalloc (d->nleaves, sizeof *d->mb_match_lens); + alloc_position_set (&d->mb_follows, d->nleaves); + } } for (;;) { - if (d->mb_cur_max > 1) - while ((t = trans[s]) != NULL) - { - if (p > buf_end) - break; - s1 = s; - SKIP_REMAINS_MB_IF_INITIAL_STATE (s, p); + if (d->multibyte) + { + while ((t = trans[s]) != NULL) + { + s1 = s; - if (d->states[s].mbps.nelem == 0) - { - s = t[*p++]; - continue; - } + if (s == 0) + { + /* The initial state may encounter a byte which is not + a single byte character nor the first byte of a + multibyte character. But it is incorrect for the + initial state to accept such a byte. For example, + in Shift JIS the regular expression "\\" accepts + the codepoint 0x5c, but should not accept the second + byte of the codepoint 0x815c. Then the initial + state must skip the bytes that are not a single + byte character nor the first byte of a multibyte + character. */ + wint_t wc; + while (mbp < p) + mbp += mbs_to_wchar (&wc, (char const *) mbp, + end - (char const *) mbp, d); + p = mbp; + + if ((char *) p > end) + { + p = NULL; + goto done; + } + } - /* Falling back to the glibc matcher in this case gives - better performance (up to 25% better on [a-z], for - example) and enables support for collating symbols and - equivalence classes. */ - if (backref) - { - *backref = 1; - free (mblen_buf); - free (inputwcs); - *end = saved_end; - return (char *) p; - } + if (d->states[s].mbps.nelem == 0) + { + s = t[*p++]; + continue; + } - /* Can match with a multibyte character (and multi character - collating element). Transition table might be updated. */ - s = transit_state (d, s, &p); - trans = d->trans; - } + /* Falling back to the glibc matcher in this case gives + better performance (up to 25% better on [a-z], for + example) and enables support for collating symbols and + equivalence classes. */ + if (d->states[s].has_mbcset && backref) + { + *backref = 1; + goto done; + } + + /* Can match with a multibyte character (and multi character + collating element). Transition table might be updated. */ + s = transit_state (d, s, &p, (unsigned char *) end); + mbp = p; + trans = d->trans; + } + } else { while ((t = trans[s]) != NULL) @@ -3362,27 +3353,28 @@ dfaexec (struct dfa *d, char const *begin, char *end, } } - if (s >= 0 && (char *) p <= end && d->fails[s]) + if ((char *) p > end) + { + p = NULL; + goto done; + } + + if (s >= 0 && d->fails[s]) { if (d->success[s] & sbit[*p]) { if (backref) - *backref = (d->states[s].backref != 0); - if (d->mb_cur_max > 1) - { - free (mblen_buf); - free (inputwcs); - } - *end = saved_end; - return (char *) p; + *backref = d->states[s].has_backref; + goto done; } s1 = s; - if (d->mb_cur_max > 1) + if (d->multibyte) { /* Can match with a multibyte character (and multicharacter collating element). Transition table might be updated. */ - s = transit_state (d, s, &p); + s = transit_state (d, s, &p, (unsigned char *) end); + mbp = p; trans = d->trans; } else @@ -3390,31 +3382,18 @@ dfaexec (struct dfa *d, char const *begin, char *end, continue; } - /* If the previous character was a newline, count it. */ - if ((char *) p <= end && p[-1] == eol) + /* If the previous character was a newline, count it, and skip + checking of multibyte character boundary until here. */ + if (p[-1] == eol) { - if (count) - ++*count; - - if (d->mb_cur_max > 1) - prepare_wc_buf ((const char *) p, end); - } - - /* Check if we've run off the end of the buffer. */ - if ((char *) p > end) - { - if (d->mb_cur_max > 1) - { - free (mblen_buf); - free (inputwcs); - } - *end = saved_end; - return NULL; + nlcount++; + mbp = p; } if (s >= 0) { - build_state (s, d); + if (!d->trans[s]) + build_state (s, d); trans = d->trans; continue; } @@ -3427,6 +3406,24 @@ dfaexec (struct dfa *d, char const *begin, char *end, s = 0; } + + done: + if (count) + *count += nlcount; + *end = saved_end; + return (char *) p; +} + +struct dfa * +dfasuperset (struct dfa const *d) +{ + return d->superset; +} + +bool +dfaisfast (struct dfa const *d) +{ + return d->fast; } static void @@ -3435,7 +3432,6 @@ free_mbdata (struct dfa *d) size_t i; free (d->multibyte_prop); - d->multibyte_prop = NULL; for (i = 0; i < d->nmbcsets; ++i) { @@ -3443,8 +3439,7 @@ free_mbdata (struct dfa *d) struct mb_char_classes *p = &(d->mbcsets[i]); free (p->chars); free (p->ch_classes); - free (p->range_sts); - free (p->range_ends); + free (p->ranges); for (j = 0; j < p->nequivs; ++j) free (p->equivs[j]); @@ -3456,40 +3451,28 @@ free_mbdata (struct dfa *d) } free (d->mbcsets); - d->mbcsets = NULL; - d->nmbcsets = 0; + free (d->mb_follows.elems); + free (d->mb_match_lens); + d->mb_match_lens = NULL; } /* Initialize the components of a dfa that the other routines don't - initialize for themselves. */ + initialize for themselves. */ void dfainit (struct dfa *d) { memset (d, 0, sizeof *d); - - d->calloc = 1; - MALLOC (d->charclasses, d->calloc); - - d->talloc = 1; - MALLOC (d->tokens, d->talloc); - - d->mb_cur_max = MB_CUR_MAX; - - if (d->mb_cur_max > 1) - { - d->nmultibyte_prop = 1; - MALLOC (d->multibyte_prop, d->nmultibyte_prop); - d->mbcsets_alloc = 1; - MALLOC (d->mbcsets, d->mbcsets_alloc); - } + d->multibyte = MB_CUR_MAX > 1; + d->fast = !d->multibyte; } static void dfaoptimize (struct dfa *d) { size_t i; + bool have_backref = false; - if (!MBS_SUPPORT || !using_utf8 ()) + if (!using_utf8 ()) return; for (i = 0; i < d->tindex; ++i) @@ -3499,6 +3482,9 @@ dfaoptimize (struct dfa *d) case ANYCHAR: /* Lowered. */ abort (); + case BACKREF: + have_backref = true; + break; case MBCSET: /* Requires multi-byte algorithm. */ return; @@ -3507,22 +3493,116 @@ dfaoptimize (struct dfa *d) } } + if (!have_backref && d->superset) + { + /* The superset DFA is not likely to be much faster, so remove it. */ + dfafree (d->superset); + free (d->superset); + d->superset = NULL; + } + free_mbdata (d); - d->mb_cur_max = 1; + d->multibyte = false; } -/* Parse and analyze a single string of the given length. */ +static void +dfassbuild (struct dfa *d) +{ + size_t i, j; + charclass ccl; + bool have_achar = false; + bool have_nchar = false; + struct dfa *sup = dfaalloc (); + + *sup = *d; + sup->multibyte = false; + sup->multibyte_prop = NULL; + sup->mbcsets = NULL; + sup->superset = NULL; + sup->states = NULL; + sup->sindex = 0; + sup->follows = NULL; + sup->tralloc = 0; + sup->trans = NULL; + sup->fails = NULL; + sup->success = NULL; + sup->newlines = NULL; + sup->musts = NULL; + + sup->charclasses = xnmalloc (sup->calloc, sizeof *sup->charclasses); + memcpy (sup->charclasses, d->charclasses, + d->cindex * sizeof *sup->charclasses); + + sup->tokens = xnmalloc (d->tindex, 2 * sizeof *sup->tokens); + sup->talloc = d->tindex * 2; + + for (i = j = 0; i < d->tindex; i++) + { + switch (d->tokens[i]) + { + case ANYCHAR: + case MBCSET: + case BACKREF: + zeroset (ccl); + notset (ccl); + sup->tokens[j++] = CSET + dfa_charclass_index (sup, ccl); + sup->tokens[j++] = STAR; + if (d->tokens[i + 1] == QMARK || d->tokens[i + 1] == STAR + || d->tokens[i + 1] == PLUS) + i++; + have_achar = true; + break; + case BEGWORD: + case ENDWORD: + case LIMWORD: + case NOTLIMWORD: + if (d->multibyte) + { + /* These constraints aren't supported in a multibyte locale. + Ignore them in the superset DFA, and treat them as + backreferences in the main DFA. */ + sup->tokens[j++] = EMPTY; + d->tokens[i] = BACKREF; + break; + } + default: + sup->tokens[j++] = d->tokens[i]; + if ((0 <= d->tokens[i] && d->tokens[i] < NOTCHAR) + || d->tokens[i] >= CSET) + have_nchar = true; + break; + } + } + sup->tindex = j; + + if (have_nchar && (have_achar || d->multibyte)) + d->superset = sup; + else + { + dfafree (sup); + free (sup); + } +} + +/* Parse and analyze a single string of the given length. */ void dfacomp (char const *s, size_t len, struct dfa *d, int searchflag) { dfainit (d); + dfambcache (d); dfaparse (s, len, d); dfamust (d); + dfassbuild (d); dfaoptimize (d); dfaanalyze (d, searchflag); + if (d->superset) + { + d->fast = true; + dfaanalyze (d->superset, searchflag); + } } -/* Free the storage held by the components of a dfa. */ +/* Free the storage held by the components of a dfa. */ void dfafree (struct dfa *d) { @@ -3532,34 +3612,46 @@ dfafree (struct dfa *d) free (d->charclasses); free (d->tokens); - if (d->mb_cur_max > 1) + if (d->multibyte) free_mbdata (d); for (i = 0; i < d->sindex; ++i) { free (d->states[i].elems.elems); - if (MBS_SUPPORT) - free (d->states[i].mbps.elems); + free (d->states[i].mbps.elems); } free (d->states); - for (i = 0; i < d->tindex; ++i) - free (d->follows[i].elems); - free (d->follows); - for (i = 0; i < d->tralloc; ++i) + + if (d->follows) { - free (d->trans[i]); - free (d->fails[i]); + for (i = 0; i < d->tindex; ++i) + free (d->follows[i].elems); + free (d->follows); } - free (d->realtrans); - free (d->fails); - free (d->newlines); - free (d->success); + + if (d->trans) + { + for (i = 0; i < d->tralloc; ++i) + { + free (d->trans[i]); + free (d->fails[i]); + } + + free (d->trans - 1); + free (d->fails); + free (d->newlines); + free (d->success); + } + for (dm = d->musts; dm; dm = ndm) { ndm = dm->next; free (dm->must); free (dm); } + + if (d->superset) + dfafree (d->superset); } /* Having found the postfix representation of the regular expression, @@ -3617,13 +3709,13 @@ dfafree (struct dfa *d) and q->left and q->right p->is : NULL If there's anything else we recognize in the tree, all four sequences get set - to zero-length sequences. If there's something we don't recognize in the tree, - we just return a zero-length sequence. + to zero-length sequences. If there's something we don't recognize in the + tree, we just return a zero-length sequence. Break ties in favor of infrequent letters (choosing 'zzz' in preference to 'aaa')? - And. . .is it here or someplace that we might ponder "optimizations" such as + And ... is it here or someplace that we might ponder "optimizations" such as egrep 'psi|epsilon' -> egrep 'psi' egrep 'pepsi|epsilon' -> egrep 'epsi' (Yes, we now find "epsi" as a "string @@ -3644,27 +3736,22 @@ dfafree (struct dfa *d) Are optimizable r.e.'s likely to be used in real-life situations (something like 'ab*' is probably unlikely; something like is - 'psi|epsilon' is likelier)? */ + 'psi|epsilon' is likelier)? */ static char * icatalloc (char *old, char const *new) { char *result; - size_t oldsize = old == NULL ? 0 : strlen (old); - size_t newsize = new == NULL ? 0 : strlen (new); + size_t oldsize; + size_t newsize = strlen (new); if (newsize == 0) return old; + oldsize = strlen (old); result = xrealloc (old, oldsize + newsize + 1); memcpy (result + oldsize, new, newsize + 1); return result; } -static char * -icpyalloc (char const *string) -{ - return icatalloc (NULL, string); -} - static char *_GL_ATTRIBUTE_PURE istrstr (char const *lookin, char const *lookfor) { @@ -3681,38 +3768,24 @@ istrstr (char const *lookin, char const *lookfor) static void freelist (char **cpp) { - size_t i; - - if (cpp == NULL) - return; - for (i = 0; cpp[i] != NULL; ++i) - { - free (cpp[i]); - cpp[i] = NULL; - } + while (*cpp) + free (*cpp++); } static char ** enlist (char **cpp, char *new, size_t len) { size_t i, j; - - if (cpp == NULL) - return NULL; - if ((new = icpyalloc (new)) == NULL) - { - freelist (cpp); - return NULL; - } + new = memcpy (xmalloc (len + 1), new, len); new[len] = '\0'; - /* Is there already something in the list that's new (or longer)? */ + /* Is there already something in the list that's new (or longer)? */ for (i = 0; cpp[i] != NULL; ++i) if (istrstr (cpp[i], new) != NULL) { free (new); return cpp; } - /* Eliminate any obsoleted strings. */ + /* Eliminate any obsoleted strings. */ j = 0; while (cpp[j] != NULL) if (istrstr (new, cpp[j]) == NULL) @@ -3725,54 +3798,36 @@ enlist (char **cpp, char *new, size_t len) cpp[j] = cpp[i]; cpp[i] = NULL; } - /* Add the new string. */ - REALLOC (cpp, i + 2); + /* Add the new string. */ + cpp = xnrealloc (cpp, i + 2, sizeof *cpp); cpp[i] = new; cpp[i + 1] = NULL; return cpp; } /* Given pointers to two strings, return a pointer to an allocated - list of their distinct common substrings. Return NULL if something - seems wild. */ + list of their distinct common substrings. */ static char ** comsubs (char *left, char const *right) { - char **cpp; + char **cpp = xzalloc (sizeof *cpp); char *lcp; - char *rcp; - size_t i, len; - - if (left == NULL || right == NULL) - return NULL; - cpp = malloc (sizeof *cpp); - if (cpp == NULL) - return NULL; - cpp[0] = NULL; + for (lcp = left; *lcp != '\0'; ++lcp) { - len = 0; - rcp = strchr (right, *lcp); + size_t len = 0; + char *rcp = strchr (right, *lcp); while (rcp != NULL) { + size_t i; for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i) continue; if (i > len) len = i; rcp = strchr (rcp + 1, *lcp); } - if (len == 0) - continue; - { - char **p = enlist (cpp, lcp, len); - if (p == NULL) - { - freelist (cpp); - cpp = NULL; - break; - } - cpp = p; - } + if (len != 0) + cpp = enlist (cpp, lcp, len); } return cpp; } @@ -3780,143 +3835,144 @@ comsubs (char *left, char const *right) static char ** addlists (char **old, char **new) { - size_t i; - - if (old == NULL || new == NULL) - return NULL; - for (i = 0; new[i] != NULL; ++i) - { - old = enlist (old, new[i], strlen (new[i])); - if (old == NULL) - break; - } + for (; *new; new++) + old = enlist (old, *new, strlen (*new)); return old; } /* Given two lists of substrings, return a new list giving substrings - common to both. */ + common to both. */ static char ** inboth (char **left, char **right) { - char **both; - char **temp; + char **both = xzalloc (sizeof *both); size_t lnum, rnum; - if (left == NULL || right == NULL) - return NULL; - both = malloc (sizeof *both); - if (both == NULL) - return NULL; - both[0] = NULL; for (lnum = 0; left[lnum] != NULL; ++lnum) { for (rnum = 0; right[rnum] != NULL; ++rnum) { - temp = comsubs (left[lnum], right[rnum]); - if (temp == NULL) - { - freelist (both); - return NULL; - } + char **temp = comsubs (left[lnum], right[rnum]); both = addlists (both, temp); freelist (temp); free (temp); - if (both == NULL) - return NULL; } } return both; } -typedef struct +typedef struct must must; + +struct must { char **in; char *left; char *right; char *is; -} must; + bool begline; + bool endline; + must *prev; +}; + +static must * +allocmust (must *mp) +{ + must *new_mp = xmalloc (sizeof *new_mp); + new_mp->in = xzalloc (sizeof *new_mp->in); + new_mp->left = xzalloc (2); + new_mp->right = xzalloc (2); + new_mp->is = xzalloc (2); + new_mp->begline = false; + new_mp->endline = false; + new_mp->prev = mp; + return new_mp; +} static void -resetmust (must * mp) +resetmust (must *mp) { + freelist (mp->in); + mp->in[0] = NULL; mp->left[0] = mp->right[0] = mp->is[0] = '\0'; + mp->begline = false; + mp->endline = false; +} + +static void +freemust (must *mp) +{ freelist (mp->in); + free (mp->in); + free (mp->left); + free (mp->right); + free (mp->is); + free (mp); } static void dfamust (struct dfa *d) { - must *musts; - must *mp; - char *result; + must *mp = NULL; + char const *result = ""; size_t ri; size_t i; - int exact; - token t; - static must must0; + bool exact = false; + bool begline = false; + bool endline = false; struct dfamust *dm; - static char empty_string[] = ""; - - result = empty_string; - exact = 0; - MALLOC (musts, d->tindex + 1); - mp = musts; - for (i = 0; i <= d->tindex; ++i) - mp[i] = must0; - for (i = 0; i <= d->tindex; ++i) - { - mp[i].in = xmalloc (sizeof *mp[i].in); - mp[i].left = xmalloc (2); - mp[i].right = xmalloc (2); - mp[i].is = xmalloc (2); - mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0'; - mp[i].in[0] = NULL; - } -#ifdef DEBUG - fprintf (stderr, "dfamust:\n"); - for (i = 0; i < d->tindex; ++i) - { - fprintf (stderr, " %zd:", i); - prtok (d->tokens[i]); - } - putc ('\n', stderr); -#endif + for (ri = 0; ri < d->tindex; ++ri) { - switch (t = d->tokens[ri]) + token t = d->tokens[ri]; + switch (t) { + case BEGLINE: + mp = allocmust (mp); + mp->begline = true; + break; + case ENDLINE: + mp = allocmust (mp); + mp->endline = true; + break; case LPAREN: case RPAREN: assert (!"neither LPAREN nor RPAREN may appear here"); + case EMPTY: - case BEGLINE: - case ENDLINE: case BEGWORD: case ENDWORD: case LIMWORD: case NOTLIMWORD: case BACKREF: - resetmust (mp); + case ANYCHAR: + case MBCSET: + mp = allocmust (mp); break; + case STAR: case QMARK: - assert (musts < mp); - --mp; resetmust (mp); break; + case OR: - assert (&musts[2] <= mp); { char **new; - must *lmp; - must *rmp; + must *rmp = mp; + must *lmp = mp = mp->prev; size_t j, ln, rn, n; - rmp = --mp; - lmp = --mp; - /* Guaranteed to be. Unlikely, but. . . */ - if (!STREQ (lmp->is, rmp->is)) - lmp->is[0] = '\0'; + /* Guaranteed to be. Unlikely, but ... */ + if (STREQ (lmp->is, rmp->is)) + { + lmp->begline &= rmp->begline; + lmp->endline &= rmp->endline; + } + else + { + lmp->is[0] = '\0'; + lmp->begline = false; + lmp->endline = false; + } /* Left side--easy */ i = 0; while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i]) @@ -3935,133 +3991,126 @@ dfamust (struct dfa *d) lmp->right[j] = lmp->right[(ln - i) + j]; lmp->right[j] = '\0'; new = inboth (lmp->in, rmp->in); - if (new == NULL) - goto done; freelist (lmp->in); free (lmp->in); lmp->in = new; + freemust (rmp); } break; + case PLUS: - assert (musts < mp); - --mp; mp->is[0] = '\0'; break; + case END: - assert (mp == &musts[1]); - for (i = 0; musts[0].in[i] != NULL; ++i) - if (strlen (musts[0].in[i]) > strlen (result)) - result = musts[0].in[i]; - if (STREQ (result, musts[0].is)) - exact = 1; + assert (!mp->prev); + for (i = 0; mp->in[i] != NULL; ++i) + if (strlen (mp->in[i]) > strlen (result)) + result = mp->in[i]; + if (STREQ (result, mp->is)) + { + exact = true; + begline = mp->begline; + endline = mp->endline; + } goto done; + case CAT: - assert (&musts[2] <= mp); { - must *lmp; - must *rmp; + must *rmp = mp; + must *lmp = mp = mp->prev; - rmp = --mp; - lmp = --mp; /* In. Everything in left, plus everything in right, plus concatenation of - left's right and right's left. */ + left's right and right's left. */ lmp->in = addlists (lmp->in, rmp->in); - if (lmp->in == NULL) - goto done; if (lmp->right[0] != '\0' && rmp->left[0] != '\0') { - char *tp; - - tp = icpyalloc (lmp->right); - tp = icatalloc (tp, rmp->left); - lmp->in = enlist (lmp->in, tp, strlen (tp)); + size_t lrlen = strlen (lmp->right); + size_t rllen = strlen (rmp->left); + char *tp = xmalloc (lrlen + rllen); + memcpy (tp, lmp->right, lrlen); + memcpy (tp + lrlen, rmp->left, rllen); + lmp->in = enlist (lmp->in, tp, lrlen + rllen); free (tp); - if (lmp->in == NULL) - goto done; } /* Left-hand */ if (lmp->is[0] != '\0') - { - lmp->left = icatalloc (lmp->left, rmp->left); - if (lmp->left == NULL) - goto done; - } + lmp->left = icatalloc (lmp->left, rmp->left); /* Right-hand */ if (rmp->is[0] == '\0') lmp->right[0] = '\0'; lmp->right = icatalloc (lmp->right, rmp->right); - if (lmp->right == NULL) - goto done; /* Guaranteed to be */ - if (lmp->is[0] != '\0' && rmp->is[0] != '\0') + if ((lmp->is[0] != '\0' || lmp->begline) + && (rmp->is[0] != '\0' || rmp->endline)) { lmp->is = icatalloc (lmp->is, rmp->is); - if (lmp->is == NULL) - goto done; + lmp->endline = rmp->endline; } else - lmp->is[0] = '\0'; + { + lmp->is[0] = '\0'; + lmp->begline = false; + lmp->endline = false; + } + freemust (rmp); } break; + + case '\0': + /* Not on *my* shift. */ + goto done; + default: - if (t < END) + mp = allocmust (mp); + if (CSET <= t) { - assert (!"oops! t >= END"); - } - else if (t == '\0') - { - /* not on *my* shift */ - goto done; - } - else if (t >= CSET || !MBS_SUPPORT || t == ANYCHAR || t == MBCSET) - { - /* easy enough */ - resetmust (mp); - } - else - { - /* plain character */ - resetmust (mp); - mp->is[0] = mp->left[0] = mp->right[0] = t; - mp->is[1] = mp->left[1] = mp->right[1] = '\0'; - mp->in = enlist (mp->in, mp->is, (size_t) 1); - if (mp->in == NULL) - goto done; + /* If T is a singleton, or if case-folding in a unibyte + locale and T's members all case-fold to the same char, + convert T to one of its members. Otherwise, do + nothing further with T. */ + charclass *ccl = &d->charclasses[t - CSET]; + int j; + for (j = 0; j < NOTCHAR; j++) + if (tstbit (j, *ccl)) + break; + if (! (j < NOTCHAR)) + break; + t = j; + while (++j < NOTCHAR) + if (tstbit (j, *ccl) + && ! (case_fold && !d->multibyte + && toupper (j) == toupper (t))) + break; + if (j < NOTCHAR) + break; } + mp->is[0] = mp->left[0] = mp->right[0] + = case_fold && !d->multibyte ? toupper (t) : t; + mp->is[1] = mp->left[1] = mp->right[1] = '\0'; + mp->in = enlist (mp->in, mp->is, 1); break; } -#ifdef DEBUG - fprintf (stderr, " node: %zd:", ri); - prtok (d->tokens[ri]); - fprintf (stderr, "\n in:"); - for (i = 0; mp->in[i]; ++i) - fprintf (stderr, " \"%s\"", mp->in[i]); - fprintf (stderr, "\n is: \"%s\"\n", mp->is); - fprintf (stderr, " left: \"%s\"\n", mp->left); - fprintf (stderr, " right: \"%s\"\n", mp->right); -#endif - ++mp; } done: - if (strlen (result)) + if (*result) { - MALLOC (dm, 1); + dm = xmalloc (sizeof *dm); dm->exact = exact; - dm->must = xmemdup (result, strlen (result) + 1); + dm->begline = begline; + dm->endline = endline; + dm->must = xstrdup (result); dm->next = d->musts; d->musts = dm; } - mp = musts; - for (i = 0; i <= d->tindex; ++i) + + while (mp) { - freelist (mp[i].in); - free (mp[i].in); - free (mp[i].left); - free (mp[i].right); - free (mp[i].is); + must *prev = mp->prev; + freemust (mp); + mp = prev; } - free (mp); } struct dfa * diff --git a/contrib/grep/src/dfa.h b/contrib/grep/src/dfa.h index 96dd4b86e3..f30c3cb8c4 100644 --- a/contrib/grep/src/dfa.h +++ b/contrib/grep/src/dfa.h @@ -1,5 +1,5 @@ /* dfa.h - declarations for GNU deterministic regexp compiler - Copyright (C) 1988, 1998, 2007, 2009-2012 Free Software Foundation, Inc. + Copyright (C) 1988, 1998, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,11 +18,17 @@ /* Written June, 1988 by Mike Haertel */ +#include +#include +#include + /* Element of a list of strings, at least one of which is known to appear in any R.E. matching the DFA. */ struct dfamust { - int exact; + bool exact; + bool begline; + bool endline; char *must; struct dfamust *next; }; @@ -65,6 +71,15 @@ extern void dfacomp (char const *, size_t, struct dfa *, int); extern char *dfaexec (struct dfa *d, char const *begin, char *end, int newline, size_t *count, int *backref); +/* Return a superset for D. The superset matches everything that D + matches, along with some other strings (though the latter should be + rare, for efficiency reasons). Return a null pointer if no useful + superset is available. */ +extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE; + +/* The DFA is likely to be fast. */ +extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE; + /* Free the storage held by the components of a struct dfa. */ extern void dfafree (struct dfa *); @@ -96,3 +111,5 @@ extern void dfawarn (const char *); takes a single argument, a NUL-terminated string describing the error. The user must supply a dfaerror. */ extern _Noreturn void dfaerror (const char *); + +extern int using_utf8 (void); diff --git a/contrib/grep/src/dfasearch.c b/contrib/grep/src/dfasearch.c index eaf783e22a..77b4e3ecaf 100644 --- a/contrib/grep/src/dfasearch.c +++ b/contrib/grep/src/dfasearch.c @@ -1,5 +1,5 @@ /* dfasearch.c - searching subroutines using dfa and regex for grep. - Copyright 1992, 1998, 2000, 2007, 2009-2012 Free Software Foundation, Inc. + Copyright 1992, 1998, 2000, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,10 +21,13 @@ #include #include "intprops.h" #include "search.h" -#include "dfa.h" -/* For -w, we also consider _ to be word constituent. */ -#define WCHAR(C) (isalnum (C) || (C) == '_') +/* Whether -w considers WC to be a word constituent. */ +static bool +wordchar (wint_t wc) +{ + return wc == L'_' || iswalnum (wc); +} /* KWset compiled pattern. For Ecompile and Gcompile, we compile a list of strings, at least one of which is known to occur in @@ -46,6 +49,13 @@ static struct patterns static struct patterns *patterns; static size_t pcount; +/* Number of compiled fixed strings known to exactly match the regexp. + If kwsexec returns < kwset_exact_matches, then we don't need to + call the regexp matcher at all. */ +static size_t kwset_exact_matches; + +static bool begline; + void dfaerror (char const *mesg) { @@ -69,22 +79,6 @@ dfawarn (char const *mesg) dfaerror (mesg); } -/* Number of compiled fixed strings known to exactly match the regexp. - If kwsexec returns < kwset_exact_matches, then we don't need to - call the regexp matcher at all. */ -static size_t kwset_exact_matches; - -static char const * -kwsincr_case (const char *must) -{ - size_t n = strlen (must); - mb_len_map_t *map = NULL; - const char *buf = (match_icase && MB_CUR_MAX > 1 - ? mbtolower (must, &n, &map) - : must); - return kwsincr (kwset, buf, n); -} - /* If the DFA turns out to have some set of fixed strings one of which must occur in the match, then we build a kwset matcher to find those strings, and thus quickly filter out impossible @@ -92,10 +86,7 @@ kwsincr_case (const char *must) static void kwsmusts (void) { - struct dfamust const *dm; - char const *err; - - dm = dfamusts (dfa); + struct dfamust const *dm = dfamusts (dfa); if (dm) { kwsinit (&kwset); @@ -107,8 +98,18 @@ kwsmusts (void) if (!dm->exact) continue; ++kwset_exact_matches; - if ((err = kwsincr_case (dm->must)) != NULL) - error (EXIT_TROUBLE, 0, "%s", err); + size_t old_len = strlen (dm->must); + size_t new_len = old_len + dm->begline + dm->endline; + char *must = xmalloc (new_len); + char *mp = must; + *mp = eolbyte; + mp += dm->begline; + begline |= dm->begline; + memcpy (mp, dm->must, old_len); + if (dm->endline) + mp[old_len] = eolbyte; + kwsincr (kwset, must, new_len); + free (must); } /* Now, we compile the substrings that will require the use of the regexp matcher. */ @@ -116,19 +117,15 @@ kwsmusts (void) { if (dm->exact) continue; - if ((err = kwsincr_case (dm->must)) != NULL) - error (EXIT_TROUBLE, 0, "%s", err); + kwsincr (kwset, dm->must, strlen (dm->must)); } - if ((err = kwsprep (kwset)) != NULL) - error (EXIT_TROUBLE, 0, "%s", err); + kwsprep (kwset); } } void GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) { - const char *err; - const char *p, *sep; size_t total = size; char *motif; @@ -137,15 +134,15 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) re_set_syntax (syntax_bits); dfasyntax (syntax_bits, match_icase, eolbyte); - /* For GNU regex compiler we have to pass the patterns separately to detect - errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]" - GNU regex should have raise a syntax error. The same for backref, where - the backref should have been local to each pattern. */ - p = pattern; + /* For GNU regex, pass the patterns separately to detect errors like + "[\nallo\n]\n", where the patterns are "[", "allo" and "]", and + this should be a syntax error. The same for backref, where the + backref should be local to each pattern. */ + char const *p = pattern; do { size_t len; - sep = memchr (p, '\n', total); + char const *sep = memchr (p, '\n', total); if (sep) { len = sep - p; @@ -161,13 +158,14 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) patterns = xnrealloc (patterns, pcount + 1, sizeof *patterns); patterns[pcount] = patterns0; - if ((err = re_compile_pattern (p, len, - &(patterns[pcount].regexbuf))) != NULL) + char const *err = re_compile_pattern (p, len, + &(patterns[pcount].regexbuf)); + if (err) error (EXIT_TROUBLE, 0, "%s", err); pcount++; - p = sep; - } while (sep && total != 0); + } + while (p); /* In the match_words and match_lines cases, we use a different pattern for the DFA matcher that will quickly throw out cases that won't work. @@ -211,128 +209,162 @@ size_t EGexecute (char const *buf, size_t size, size_t *match_size, char const *start_ptr) { - char const *buflim, *beg, *end, *match, *best_match, *mb_start; + char const *buflim, *beg, *end, *ptr, *match, *best_match, *mb_start; char eol = eolbyte; int backref; regoff_t start; size_t len, best_len; struct kwsmatch kwsm; - size_t i, ret_val; - mb_len_map_t *map = NULL; - - if (MB_CUR_MAX > 1) - { - if (match_icase) - { - /* mbtolower adds a NUL byte at the end. That will provide - space for the sentinel byte dfaexec may add. */ - char *case_buf = mbtolower (buf, &size, &map); - if (start_ptr) - start_ptr = case_buf + (start_ptr - buf); - buf = case_buf; - } - } + size_t i; + struct dfa *superset = dfasuperset (dfa); + bool dfafast = dfaisfast (dfa); mb_start = buf; buflim = buf + size; for (beg = end = buf; end < buflim; beg = end) { + end = buflim; + if (!start_ptr) { - /* We don't care about an exact match. */ + char const *next_beg, *dfa_beg = beg; + size_t count = 0; + bool exact_kwset_match = false; + + /* Try matching with KWset, if it's defined. */ if (kwset) { - /* Find a possible match using the KWset matcher. */ - size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); + char const *prev_beg; + + /* Find a possible match using the KWset matcher. */ + size_t offset = kwsexec (kwset, beg - begline, + buflim - beg + begline, &kwsm); if (offset == (size_t) -1) goto failure; - beg += offset; - /* Narrow down to the line containing the candidate, and - run it through DFA. */ - if ((end = memchr(beg, eol, buflim - beg)) != NULL) - end++; - else - end = buflim; - match = beg; - while (beg > buf && beg[-1] != eol) - --beg; - if (kwsm.index < kwset_exact_matches) + match = beg + offset; + prev_beg = beg; + + /* Narrow down to the line containing the possible match. */ + beg = memrchr (buf, eol, match - buf); + beg = beg ? beg + 1 : buf; + dfa_beg = beg; + + /* Determine the end pointer to give the DFA next. Typically + this is after the first newline after MATCH; but if the KWset + match is not exact, the DFA is fast, and the offset from + PREV_BEG is less than 64 or (MATCH - PREV_BEG), this is the + greater of the latter two values; this temporarily prefers + the DFA to KWset. */ + exact_kwset_match = kwsm.index < kwset_exact_matches; + end = ((exact_kwset_match || !dfafast + || MAX (16, match - beg) < (match - prev_beg) >> 2) + ? match + : MAX (16, match - beg) < (buflim - prev_beg) >> 2 + ? prev_beg + 4 * MAX (16, match - beg) + : buflim); + end = memchr (end, eol, buflim - end); + end = end ? end + 1 : buflim; + + if (exact_kwset_match) { - if (!MBS_SUPPORT) + if (MB_CUR_MAX == 1 || using_utf8 ()) goto success; - if (mb_start < beg) mb_start = beg; - if (MB_CUR_MAX == 1 - || !is_mb_middle (&mb_start, match, buflim, - kwsm.size[0])) + if (mb_goback (&mb_start, match, buflim) == 0) goto success; + /* The matched line starts in the middle of a multibyte + character. Perform the DFA search starting from the + beginning of the next character. */ + dfa_beg = mb_start; + } + } + + /* Try matching with the superset of DFA, if it's defined. */ + if (superset && !exact_kwset_match) + { + /* Keep using the superset while it reports multiline + potential matches; this is more likely to be fast + than falling back to KWset would be. */ + while ((next_beg = dfaexec (superset, dfa_beg, (char *) end, 1, + &count, NULL)) + && next_beg != end + && count != 0) + { + /* Try to match in just one line. */ + count = 0; + beg = memrchr (buf, eol, next_beg - buf); + beg++; + dfa_beg = beg; } - if (dfaexec (dfa, beg, (char *) end, 0, NULL, &backref) == NULL) + if (next_beg == NULL || next_beg == end) continue; + + /* Narrow down to the line we've found. */ + end = memchr (next_beg, eol, buflim - next_beg); + end = end ? end + 1 : buflim; } - else + + /* Try matching with DFA. */ + next_beg = dfaexec (dfa, dfa_beg, (char *) end, 0, &count, &backref); + + /* If there's no match, or if we've matched the sentinel, + we're done. */ + if (next_beg == NULL || next_beg == end) + continue; + + /* Narrow down to the line we've found. */ + if (count != 0) { - /* No good fixed strings; start with DFA. */ - char const *next_beg = dfaexec (dfa, beg, (char *) buflim, - 0, NULL, &backref); - /* If there's no match, or if we've matched the sentinel, - we're done. */ - if (next_beg == NULL || next_beg == buflim) - break; - /* Narrow down to the line we've found. */ - beg = next_beg; - if ((end = memchr(beg, eol, buflim - beg)) != NULL) - end++; - else - end = buflim; - while (beg > buf && beg[-1] != eol) - --beg; + beg = memrchr (buf, eol, next_beg - buf); + beg++; } + end = memchr (next_beg, eol, buflim - next_beg); + end = end ? end + 1 : buflim; + /* Successful, no backreferences encountered! */ if (!backref) goto success; + ptr = beg; } else { /* We are looking for the leftmost (then longest) exact match. We will go through the outer loop only once. */ - beg = start_ptr; - end = buflim; + ptr = start_ptr; } /* If the "line" is longer than the maximum regexp offset, die as if we've run out of memory. */ - if (TYPE_MAXIMUM (regoff_t) < end - buf - 1) + if (TYPE_MAXIMUM (regoff_t) < end - beg - 1) xalloc_die (); - /* If we've made it to this point, this means DFA has seen - a probable match, and we need to run it through Regex. */ + /* Run the possible match through Regex. */ best_match = end; best_len = 0; for (i = 0; i < pcount; i++) { patterns[i].regexbuf.not_eol = 0; start = re_search (&(patterns[i].regexbuf), - buf, end - buf - 1, - beg - buf, end - beg - 1, + beg, end - beg - 1, + ptr - beg, end - ptr - 1, &(patterns[i].regs)); if (start < -1) xalloc_die (); else if (0 <= start) { len = patterns[i].regs.end[0] - start; - match = buf + start; + match = beg + start; if (match > best_match) continue; if (start_ptr && !match_words) goto assess_pattern_match; if ((!match_lines && !match_words) - || (match_lines && len == end - beg - 1)) + || (match_lines && len == end - ptr - 1)) { - match = beg; - len = end - beg; + match = ptr; + len = end - ptr; goto assess_pattern_match; } /* If -w, check if the match aligns with word boundaries. @@ -346,9 +378,8 @@ EGexecute (char const *buf, size_t size, size_t *match_size, while (match <= best_match) { regoff_t shorter_len = 0; - if ((match == buf || !WCHAR ((unsigned char) match[-1])) - && (start + len == end - buf - 1 - || !WCHAR ((unsigned char) match[len]))) + if (!wordchar (mb_prev_wc (beg, match, end - 1)) + && !wordchar (mb_next_wc (match + len, end - 1))) goto assess_pattern_match; if (len > 0) { @@ -356,8 +387,8 @@ EGexecute (char const *buf, size_t size, size_t *match_size, --len; patterns[i].regexbuf.not_eol = 1; shorter_len = re_match (&(patterns[i].regexbuf), - buf, match + len - beg, - match - buf, + beg, match + len - ptr, + match - beg, &(patterns[i].regs)); if (shorter_len < -1) xalloc_die (); @@ -372,8 +403,8 @@ EGexecute (char const *buf, size_t size, size_t *match_size, match++; patterns[i].regexbuf.not_eol = 0; start = re_search (&(patterns[i].regexbuf), - buf, end - buf - 1, - match - buf, end - match - 1, + beg, end - beg - 1, + match - beg, end - match - 1, &(patterns[i].regs)); if (start < 0) { @@ -382,7 +413,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size, break; } len = patterns[i].regs.end[0] - start; - match = buf + start; + match = beg + start; } } /* while (match <= best_match) */ continue; @@ -412,16 +443,12 @@ EGexecute (char const *buf, size_t size, size_t *match_size, } /* for (beg = end ..) */ failure: - ret_val = -1; - goto out; + return -1; success: len = end - beg; success_in_len:; size_t off = beg - buf; - mb_case_map_apply (map, &off, &len); *match_size = len; - ret_val = off; - out: - return ret_val; + return off; } diff --git a/contrib/grep/src/dosbuf.c b/contrib/grep/src/dosbuf.c new file mode 100644 index 0000000000..9ac2d13ed5 --- /dev/null +++ b/contrib/grep/src/dosbuf.c @@ -0,0 +1,222 @@ +/* dosbuf.c + Copyright (C) 1992, 1997-2002, 2004-2014 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* Messy DOS-specific code for correctly treating binary, Unix text + and DOS text files. + + This has several aspects: + + * Guessing the file type (unless the user tells us); + * Stripping CR characters from DOS text files (otherwise regex + functions won't work correctly); + * Reporting correct byte count with -b for any kind of file. + +*/ + +#include + +typedef enum { + UNKNOWN, DOS_BINARY, DOS_TEXT, UNIX_TEXT +} File_type; + +struct dos_map { + off_t pos; /* position in buffer passed to matcher */ + off_t add; /* how much to add when reporting char position */ +}; + +static int dos_report_unix_offset = 0; + +static File_type dos_file_type = UNKNOWN; +static File_type dos_use_file_type = UNKNOWN; +static off_t dos_stripped_crs = 0; +static struct dos_map *dos_pos_map; +static int dos_pos_map_size = 0; +static int dos_pos_map_used = 0; +static int inp_map_idx = 0, out_map_idx = 1; + +/* Set default DOS file type to binary. */ +static void +dos_binary (void) +{ + if (O_BINARY) + dos_use_file_type = DOS_BINARY; +} + +/* Tell DOS routines to report Unix offset. */ +static void +dos_unix_byte_offsets (void) +{ + if (O_BINARY) + dos_report_unix_offset = 1; +} + +/* Guess DOS file type by looking at its contents. */ +static File_type +guess_type (char *buf, size_t buflen) +{ + int crlf_seen = 0; + char *bp = buf; + + while (buflen--) + { + /* Treat a file as binary if it has a NUL character. */ + if (!*bp) + return DOS_BINARY; + + /* CR before LF means DOS text file (unless we later see + binary characters). */ + else if (*bp == '\r' && buflen && bp[1] == '\n') + crlf_seen = 1; + + bp++; + } + + return crlf_seen ? DOS_TEXT : UNIX_TEXT; +} + +/* Convert external DOS file representation to internal. + Return the count of characters left in the buffer. + Build table to map character positions when reporting byte counts. */ +static int +undossify_input (char *buf, size_t buflen) +{ + if (! O_BINARY) + return buflen; + + int chars_left = 0; + + if (totalcc == 0) + { + /* New file: forget everything we knew about character + position mapping table and file type. */ + inp_map_idx = 0; + out_map_idx = 1; + dos_pos_map_used = 0; + dos_stripped_crs = 0; + dos_file_type = dos_use_file_type; + } + + /* Guess if this file is binary, unless we already know that. */ + if (dos_file_type == UNKNOWN) + dos_file_type = guess_type(buf, buflen); + + /* If this file is to be treated as DOS Text, strip the CR characters + and maybe build the table for character position mapping on output. */ + if (dos_file_type == DOS_TEXT) + { + char *destp = buf; + + while (buflen--) + { + if (*buf != '\r') + { + *destp++ = *buf++; + chars_left++; + } + else + { + buf++; + if (out_byte && !dos_report_unix_offset) + { + dos_stripped_crs++; + while (buflen && *buf == '\r') + { + dos_stripped_crs++; + buflen--; + buf++; + } + if (inp_map_idx >= dos_pos_map_size - 1) + { + dos_pos_map_size = inp_map_idx ? inp_map_idx * 2 : 1000; + dos_pos_map = xrealloc(dos_pos_map, + dos_pos_map_size * + sizeof(struct dos_map)); + } + + if (!inp_map_idx) + { + /* Add sentinel entry. */ + dos_pos_map[inp_map_idx].pos = 0; + dos_pos_map[inp_map_idx++].add = 0; + + /* Initialize first real entry. */ + dos_pos_map[inp_map_idx].add = 0; + } + + /* Put the new entry. If the stripped CR characters + precede a Newline (the usual case), pretend that + they were found *after* the Newline. This makes + displayed byte offsets more reasonable in some + cases, and fits better the intuitive notion that + the line ends *before* the CR, not *after* it. */ + inp_map_idx++; + dos_pos_map[inp_map_idx-1].pos = + (*buf == '\n' ? destp + 1 : destp ) - bufbeg + totalcc; + dos_pos_map[inp_map_idx].add = dos_stripped_crs; + dos_pos_map_used = inp_map_idx; + + /* The following will be updated on the next pass. */ + dos_pos_map[inp_map_idx].pos = destp - bufbeg + totalcc + 1; + } + } + } + + return chars_left; + } + + return buflen; +} + +/* Convert internal byte count into external. */ +static off_t +dossified_pos (off_t byteno) +{ + if (! O_BINARY) + return byteno; + + off_t pos_lo; + off_t pos_hi; + + if (dos_file_type != DOS_TEXT || dos_report_unix_offset) + return byteno; + + /* Optimization: usually the file will be scanned sequentially. + So in most cases, this byte position will be found in the + table near the previous one, as recorded in 'out_map_idx'. */ + pos_lo = dos_pos_map[out_map_idx-1].pos; + pos_hi = dos_pos_map[out_map_idx].pos; + + /* If the initial guess failed, search up or down, as + appropriate, beginning with the previous place. */ + if (byteno >= pos_hi) + { + out_map_idx++; + while (out_map_idx < dos_pos_map_used + && byteno >= dos_pos_map[out_map_idx].pos) + out_map_idx++; + } + + else if (byteno < pos_lo) + { + out_map_idx--; + while (out_map_idx > 1 && byteno < dos_pos_map[out_map_idx-1].pos) + out_map_idx--; + } + + return byteno + dos_pos_map[out_map_idx].add; +} diff --git a/contrib/grep/src/egrep.c b/contrib/grep/src/egrep.c deleted file mode 100644 index 7dfff09fd3..0000000000 --- a/contrib/grep/src/egrep.c +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include "search.h" - -static void -Ecompile (char const *pattern, size_t size) -{ - GEAcompile (pattern, size, RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES); -} - -struct matcher const matchers[] = { - { "egrep", Ecompile, EGexecute }, - { NULL, NULL, NULL }, -}; - -const char before_options[] = -N_("PATTERN is an extended regular expression (ERE).\n"); -const char after_options[] = -N_("Invocation as 'egrep' is deprecated; use 'grep -E' instead.\n"); diff --git a/contrib/grep/src/egrep.sh b/contrib/grep/src/egrep.sh new file mode 100644 index 0000000000..1a03d2a260 --- /dev/null +++ b/contrib/grep/src/egrep.sh @@ -0,0 +1,11 @@ +#!@SHELL@ +grep=grep +case $0 in + */*) + dir=${0%/*} + if test -x "$dir/@grep@"; then + PATH=$dir:$PATH + grep=@grep@ + fi;; +esac +exec $grep @option@ "$@" diff --git a/contrib/grep/src/fgrep.c b/contrib/grep/src/fgrep.c deleted file mode 100644 index a0940cc33f..0000000000 --- a/contrib/grep/src/fgrep.c +++ /dev/null @@ -1,12 +0,0 @@ -#include -#include "search.h" - -struct matcher const matchers[] = { - { "fgrep", Fcompile, Fexecute }, - { NULL, NULL, NULL }, -}; - -const char before_options[] = -N_("PATTERN is a set of newline-separated fixed strings.\n"); -const char after_options[] = -N_("Invocation as 'fgrep' is deprecated; use 'grep -F' instead.\n"); diff --git a/contrib/grep/src/grep.c b/contrib/grep/src/grep.c index 1b2198fd35..d424f6b6df 100644 --- a/contrib/grep/src/grep.c +++ b/contrib/grep/src/grep.c @@ -1,5 +1,1634 @@ +/* grep.c - main driver file for grep. + Copyright (C) 1992, 1997-2002, 2004-2014 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* Written July 1992 by Mike Haertel. */ + #include +#include +#include +#include +#include +#include +#include +#include +#include "system.h" + +#include "argmatch.h" +#include "c-ctype.h" +#include "closeout.h" +#include "colorize.h" +#include "error.h" +#include "exclude.h" +#include "exitfail.h" +#include "fcntl-safer.h" +#include "fts_.h" +#include "getopt.h" +#include "grep.h" +#include "intprops.h" +#include "progname.h" +#include "propername.h" +#include "quote.h" +#include "safe-read.h" #include "search.h" +#include "version-etc.h" +#include "xalloc.h" +#include "xstrtol.h" + +#define SEP_CHAR_SELECTED ':' +#define SEP_CHAR_REJECTED '-' +#define SEP_STR_GROUP "--" + +#define AUTHORS \ + proper_name ("Mike Haertel"), \ + _("others, see\n") + +/* When stdout is connected to a regular file, save its stat + information here, so that we can automatically skip it, thus + avoiding a potential (racy) infinite loop. */ +static struct stat out_stat; + +/* if non-zero, display usage information and exit */ +static int show_help; + +/* If non-zero, print the version on standard output and exit. */ +static int show_version; + +/* If nonzero, suppress diagnostics for nonexistent or unreadable files. */ +static int suppress_errors; + +/* If nonzero, use color markers. */ +static int color_option; + +/* If nonzero, show only the part of a line matching the expression. */ +static int only_matching; + +/* If nonzero, make sure first content char in a line is on a tab stop. */ +static int align_tabs; + +/* The group separator used when context is requested. */ +static const char *group_separator = SEP_STR_GROUP; + +/* The context and logic for choosing default --color screen attributes + (foreground and background colors, etc.) are the following. + -- There are eight basic colors available, each with its own + nominal luminosity to the human eye and foreground/background + codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41], + magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46], + yellow [89 %, 33/43], and white [100 %, 37/47]). + -- Sometimes, white as a background is actually implemented using + a shade of light gray, so that a foreground white can be visible + on top of it (but most often not). + -- Sometimes, black as a foreground is actually implemented using + a shade of dark gray, so that it can be visible on top of a + background black (but most often not). + -- Sometimes, more colors are available, as extensions. + -- Other attributes can be selected/deselected (bold [1/22], + underline [4/24], standout/inverse [7/27], blink [5/25], and + invisible/hidden [8/28]). They are sometimes implemented by + using colors instead of what their names imply; e.g., bold is + often achieved by using brighter colors. In practice, only bold + is really available to us, underline sometimes being mapped by + the terminal to some strange color choice, and standout best + being left for use by downstream programs such as less(1). + -- We cannot assume that any of the extensions or special features + are available for the purpose of choosing defaults for everyone. + -- The most prevalent default terminal backgrounds are pure black + and pure white, and are not necessarily the same shades of + those as if they were selected explicitly with SGR sequences. + Some terminals use dark or light pictures as default background, + but those are covered over by an explicit selection of background + color with an SGR sequence; their users will appreciate their + background pictures not be covered like this, if possible. + -- Some uses of colors attributes is to make some output items + more understated (e.g., context lines); this cannot be achieved + by changing the background color. + -- For these reasons, the grep color defaults should strive not + to change the background color from its default, unless it's + for a short item that should be highlighted, not understated. + -- The grep foreground color defaults (without an explicitly set + background) should provide enough contrast to be readable on any + terminal with either a black (dark) or white (light) background. + This only leaves red, magenta, green, and cyan (and their bold + counterparts) and possibly bold blue. */ +/* The color strings used for matched text. + The user can overwrite them using the deprecated + environment variable GREP_COLOR or the new GREP_COLORS. */ +static const char *selected_match_color = "01;31"; /* bold red */ +static const char *context_match_color = "01;31"; /* bold red */ + +/* Other colors. Defaults look damn good. */ +static const char *filename_color = "35"; /* magenta */ +static const char *line_num_color = "32"; /* green */ +static const char *byte_num_color = "32"; /* green */ +static const char *sep_color = "36"; /* cyan */ +static const char *selected_line_color = ""; /* default color pair */ +static const char *context_line_color = ""; /* default color pair */ + +/* Select Graphic Rendition (SGR, "\33[...m") strings. */ +/* Also Erase in Line (EL) to Right ("\33[K") by default. */ +/* Why have EL to Right after SGR? + -- The behavior of line-wrapping when at the bottom of the + terminal screen and at the end of the current line is often + such that a new line is introduced, entirely cleared with + the current background color which may be different from the + default one (see the boolean back_color_erase terminfo(5) + capability), thus scrolling the display by one line. + The end of this new line will stay in this background color + even after reverting to the default background color with + "\33[m', unless it is explicitly cleared again with "\33[K" + (which is the behavior the user would instinctively expect + from the whole thing). There may be some unavoidable + background-color flicker at the end of this new line because + of this (when timing with the monitor's redraw is just right). + -- The behavior of HT (tab, "\t") is usually the same as that of + Cursor Forward Tabulation (CHT) with a default parameter + of 1 ("\33[I"), i.e., it performs pure movement to the next + tab stop, without any clearing of either content or screen + attributes (including background color); try + printf 'asdfqwerzxcv\rASDF\tZXCV\n' + in a bash(1) shell to demonstrate this. This is not what the + user would instinctively expect of HT (but is ok for CHT). + The instinctive behavior would include clearing the terminal + cells that are skipped over by HT with blank cells in the + current screen attributes, including background color; + the boolean dest_tabs_magic_smso terminfo(5) capability + indicates this saner behavior for HT, but only some rare + terminals have it (although it also indicates a special + glitch with standout mode in the Teleray terminal for which + it was initially introduced). The remedy is to add "\33K" + after each SGR sequence, be it START (to fix the behavior + of any HT after that before another SGR) or END (to fix the + behavior of an HT in default background color that would + follow a line-wrapping at the bottom of the screen in another + background color, and to complement doing it after START). + Piping grep's output through a pager such as less(1) avoids + any HT problems since the pager performs tab expansion. + + Generic disadvantages of this remedy are: + -- Some very rare terminals might support SGR but not EL (nobody + will use "grep --color" on a terminal that does not support + SGR in the first place). + -- Having these extra control sequences might somewhat complicate + the task of any program trying to parse "grep --color" + output in order to extract structuring information from it. + A specific disadvantage to doing it after SGR START is: + -- Even more possible background color flicker (when timing + with the monitor's redraw is just right), even when not at the + bottom of the screen. + There are no additional disadvantages specific to doing it after + SGR END. + + It would be impractical for GNU grep to become a full-fledged + terminal program linked against ncurses or the like, so it will + not detect terminfo(5) capabilities. */ +static const char *sgr_start = "\33[%sm\33[K"; +static const char *sgr_end = "\33[m\33[K"; + +/* SGR utility functions. */ +static void +pr_sgr_start (char const *s) +{ + if (*s) + print_start_colorize (sgr_start, s); +} +static void +pr_sgr_end (char const *s) +{ + if (*s) + print_end_colorize (sgr_end); +} +static void +pr_sgr_start_if (char const *s) +{ + if (color_option) + pr_sgr_start (s); +} +static void +pr_sgr_end_if (char const *s) +{ + if (color_option) + pr_sgr_end (s); +} + +struct color_cap + { + const char *name; + const char **var; + void (*fct) (void); + }; + +static void +color_cap_mt_fct (void) +{ + /* Our caller just set selected_match_color. */ + context_match_color = selected_match_color; +} + +static void +color_cap_rv_fct (void) +{ + /* By this point, it was 1 (or already -1). */ + color_option = -1; /* That's still != 0. */ +} + +static void +color_cap_ne_fct (void) +{ + sgr_start = "\33[%sm"; + sgr_end = "\33[m"; +} + +/* For GREP_COLORS. */ +static const struct color_cap color_dict[] = + { + { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */ + { "ms", &selected_match_color, NULL }, /* selected matched text */ + { "mc", &context_match_color, NULL }, /* context matched text */ + { "fn", &filename_color, NULL }, /* filename */ + { "ln", &line_num_color, NULL }, /* line number */ + { "bn", &byte_num_color, NULL }, /* byte (sic) offset */ + { "se", &sep_color, NULL }, /* separator */ + { "sl", &selected_line_color, NULL }, /* selected lines */ + { "cx", &context_line_color, NULL }, /* context lines */ + { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */ + { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */ + { NULL, NULL, NULL } + }; + +static struct exclude *excluded_patterns; +static struct exclude *excluded_directory_patterns; +/* Short options. */ +static char const short_options[] = +"0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz"; + +/* Non-boolean long options that have no corresponding short equivalents. */ +enum +{ + BINARY_FILES_OPTION = CHAR_MAX + 1, + COLOR_OPTION, + INCLUDE_OPTION, + EXCLUDE_OPTION, + EXCLUDE_FROM_OPTION, + LINE_BUFFERED_OPTION, + LABEL_OPTION, + EXCLUDE_DIRECTORY_OPTION, + GROUP_SEPARATOR_OPTION +}; + +/* Long options equivalences. */ +static struct option const long_options[] = +{ + {"basic-regexp", no_argument, NULL, 'G'}, + {"extended-regexp", no_argument, NULL, 'E'}, + {"fixed-regexp", no_argument, NULL, 'F'}, + {"fixed-strings", no_argument, NULL, 'F'}, + {"perl-regexp", no_argument, NULL, 'P'}, + {"after-context", required_argument, NULL, 'A'}, + {"before-context", required_argument, NULL, 'B'}, + {"binary-files", required_argument, NULL, BINARY_FILES_OPTION}, + {"byte-offset", no_argument, NULL, 'b'}, + {"context", required_argument, NULL, 'C'}, + {"color", optional_argument, NULL, COLOR_OPTION}, + {"colour", optional_argument, NULL, COLOR_OPTION}, + {"count", no_argument, NULL, 'c'}, + {"devices", required_argument, NULL, 'D'}, + {"directories", required_argument, NULL, 'd'}, + {"exclude", required_argument, NULL, EXCLUDE_OPTION}, + {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION}, + {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION}, + {"file", required_argument, NULL, 'f'}, + {"files-with-matches", no_argument, NULL, 'l'}, + {"files-without-match", no_argument, NULL, 'L'}, + {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION}, + {"help", no_argument, &show_help, 1}, + {"include", required_argument, NULL, INCLUDE_OPTION}, + {"ignore-case", no_argument, NULL, 'i'}, + {"initial-tab", no_argument, NULL, 'T'}, + {"label", required_argument, NULL, LABEL_OPTION}, + {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION}, + {"line-number", no_argument, NULL, 'n'}, + {"line-regexp", no_argument, NULL, 'x'}, + {"max-count", required_argument, NULL, 'm'}, + + {"no-filename", no_argument, NULL, 'h'}, + {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION}, + {"no-messages", no_argument, NULL, 's'}, + {"null", no_argument, NULL, 'Z'}, + {"null-data", no_argument, NULL, 'z'}, + {"only-matching", no_argument, NULL, 'o'}, + {"quiet", no_argument, NULL, 'q'}, + {"recursive", no_argument, NULL, 'r'}, + {"dereference-recursive", no_argument, NULL, 'R'}, + {"regexp", required_argument, NULL, 'e'}, + {"invert-match", no_argument, NULL, 'v'}, + {"silent", no_argument, NULL, 'q'}, + {"text", no_argument, NULL, 'a'}, + {"binary", no_argument, NULL, 'U'}, + {"unix-byte-offsets", no_argument, NULL, 'u'}, + {"version", no_argument, NULL, 'V'}, + {"with-filename", no_argument, NULL, 'H'}, + {"word-regexp", no_argument, NULL, 'w'}, + {0, 0, 0, 0} +}; + +/* Define flags declared in grep.h. */ +int match_icase; +int match_words; +int match_lines; +unsigned char eolbyte; + +static char const *matcher; + +/* For error messages. */ +/* The input file name, or (if standard input) "-" or a --label argument. */ +static char const *filename; +static size_t filename_prefix_len; +static int errseen; +static int write_error_seen; + +enum directories_type + { + READ_DIRECTORIES = 2, + RECURSE_DIRECTORIES, + SKIP_DIRECTORIES + }; + +/* How to handle directories. */ +static char const *const directories_args[] = +{ + "read", "recurse", "skip", NULL +}; +static enum directories_type const directories_types[] = +{ + READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES +}; +ARGMATCH_VERIFY (directories_args, directories_types); + +static enum directories_type directories = READ_DIRECTORIES; + +enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK }; +static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL; + +/* How to handle devices. */ +static enum + { + READ_COMMAND_LINE_DEVICES, + READ_DEVICES, + SKIP_DEVICES + } devices = READ_COMMAND_LINE_DEVICES; + +static int grepfile (int, char const *, int, int); +static int grepdesc (int, int); + +static void dos_binary (void); +static void dos_unix_byte_offsets (void); +static int undossify_input (char *, size_t); + +static int +is_device_mode (mode_t m) +{ + return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m); +} + +/* Return nonzero if ST->st_size is defined. Assume the file is not a + symbolic link. */ +static int +usable_st_size (struct stat const *st) +{ + return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st); +} + +/* Functions we'll use to search. */ +typedef void (*compile_fp_t) (char const *, size_t); +typedef size_t (*execute_fp_t) (char const *, size_t, size_t *, char const *); +static compile_fp_t compile; +static execute_fp_t execute; + +/* Like error, but suppress the diagnostic if requested. */ +static void +suppressible_error (char const *mesg, int errnum) +{ + if (! suppress_errors) + error (0, errnum, "%s", mesg); + errseen = 1; +} + +/* If there has already been a write error, don't bother closing + standard output, as that might elicit a duplicate diagnostic. */ +static void +clean_up_stdout (void) +{ + if (! write_error_seen) + close_stdout (); +} + +/* Return 1 if a file is known to be binary for the purpose of 'grep'. + BUF, of size BUFSIZE, is the initial buffer read from the file with + descriptor FD and status ST. */ +static int +file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st) +{ + #ifndef SEEK_HOLE + enum { SEEK_HOLE = SEEK_END }; + #endif + + /* If -z, test only whether the initial buffer contains '\200'; + knowing about holes won't help. */ + if (! eolbyte) + return memchr (buf, '\200', bufsize) != 0; + + /* If the initial buffer contains a null byte, guess that the file + is binary. */ + if (memchr (buf, '\0', bufsize)) + return 1; + + /* If the file has holes, it must contain a null byte somewhere. */ + if (SEEK_HOLE != SEEK_END && usable_st_size (st)) + { + off_t cur = bufsize; + if (O_BINARY || fd == STDIN_FILENO) + { + cur = lseek (fd, 0, SEEK_CUR); + if (cur < 0) + return 0; + } + + /* Look for a hole after the current location. */ + off_t hole_start = lseek (fd, cur, SEEK_HOLE); + if (0 <= hole_start) + { + if (lseek (fd, cur, SEEK_SET) < 0) + suppressible_error (filename, errno); + if (hole_start < st->st_size) + return 1; + } + } + + /* Guess that the file does not contain binary data. */ + return 0; +} + +/* Convert STR to a nonnegative integer, storing the result in *OUT. + STR must be a valid context length argument; report an error if it + isn't. Silently ceiling *OUT at the maximum value, as that is + practically equivalent to infinity for grep's purposes. */ +static void +context_length_arg (char const *str, intmax_t *out) +{ + switch (xstrtoimax (str, 0, 10, out, "")) + { + case LONGINT_OK: + case LONGINT_OVERFLOW: + if (0 <= *out) + break; + /* Fall through. */ + default: + error (EXIT_TROUBLE, 0, "%s: %s", str, + _("invalid context length argument")); + } +} + +/* Return nonzero if the file with NAME should be skipped. + If COMMAND_LINE is nonzero, it is a command-line argument. + If IS_DIR is nonzero, it is a directory. */ +static int +skipped_file (char const *name, int command_line, int is_dir) +{ + return (is_dir + ? (directories == SKIP_DIRECTORIES + || (! (command_line && filename_prefix_len != 0) + && excluded_directory_patterns + && excluded_file_name (excluded_directory_patterns, name))) + : (excluded_patterns + && excluded_file_name (excluded_patterns, name))); +} + +/* Hairy buffering mechanism for grep. The intent is to keep + all reads aligned on a page boundary and multiples of the + page size, unless a read yields a partial page. */ + +static char *buffer; /* Base of buffer. */ +static size_t bufalloc; /* Allocated buffer size, counting slop. */ +#define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */ +static int bufdesc; /* File descriptor. */ +static char *bufbeg; /* Beginning of user-visible stuff. */ +static char *buflim; /* Limit of user-visible stuff. */ +static size_t pagesize; /* alignment of memory pages */ +static off_t bufoffset; /* Read offset; defined on regular files. */ +static off_t after_last_match; /* Pointer after last matching line that + would have been output if we were + outputting characters. */ + +/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be + an integer or a pointer. Both args must be free of side effects. */ +#define ALIGN_TO(val, alignment) \ + ((size_t) (val) % (alignment) == 0 \ + ? (val) \ + : (val) + ((alignment) - (size_t) (val) % (alignment))) + +/* Reset the buffer for a new file, returning zero if we should skip it. + Initialize on the first time through. */ +static int +reset (int fd, struct stat const *st) +{ + if (! pagesize) + { + pagesize = getpagesize (); + if (pagesize == 0 || 2 * pagesize + 1 <= pagesize) + abort (); + bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + 1; + buffer = xmalloc (bufalloc); + } + + bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize); + bufbeg[-1] = eolbyte; + bufdesc = fd; + + if (S_ISREG (st->st_mode)) + { + if (fd != STDIN_FILENO) + bufoffset = 0; + else + { + bufoffset = lseek (fd, 0, SEEK_CUR); + if (bufoffset < 0) + { + suppressible_error (_("lseek failed"), errno); + return 0; + } + } + } + return 1; +} + +/* Read new stuff into the buffer, saving the specified + amount of old stuff. When we're done, 'bufbeg' points + to the beginning of the buffer contents, and 'buflim' + points just after the end. Return zero if there's an error. */ +static int +fillbuf (size_t save, struct stat const *st) +{ + ssize_t fillsize; + int cc = 1; + char *readbuf; + size_t readsize; + + /* Offset from start of buffer to start of old stuff + that we want to save. */ + size_t saved_offset = buflim - save - buffer; + + if (pagesize <= buffer + bufalloc - buflim) + { + readbuf = buflim; + bufbeg = buflim - save; + } + else + { + size_t minsize = save + pagesize; + size_t newsize; + size_t newalloc; + char *newbuf; + + /* Grow newsize until it is at least as great as minsize. */ + for (newsize = bufalloc - pagesize - 1; newsize < minsize; newsize *= 2) + if (newsize * 2 < newsize || newsize * 2 + pagesize + 1 < newsize * 2) + xalloc_die (); + + /* Try not to allocate more memory than the file size indicates, + as that might cause unnecessary memory exhaustion if the file + is large. However, do not use the original file size as a + heuristic if we've already read past the file end, as most + likely the file is growing. */ + if (usable_st_size (st)) + { + off_t to_be_read = st->st_size - bufoffset; + off_t maxsize_off = save + to_be_read; + if (0 <= to_be_read && to_be_read <= maxsize_off + && maxsize_off == (size_t) maxsize_off + && minsize <= (size_t) maxsize_off + && (size_t) maxsize_off < newsize) + newsize = maxsize_off; + } + + /* Add enough room so that the buffer is aligned and has room + for byte sentinels fore and aft. */ + newalloc = newsize + pagesize + 1; + + newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer; + readbuf = ALIGN_TO (newbuf + 1 + save, pagesize); + bufbeg = readbuf - save; + memmove (bufbeg, buffer + saved_offset, save); + bufbeg[-1] = eolbyte; + if (newbuf != buffer) + { + free (buffer); + buffer = newbuf; + } + } + + readsize = buffer + bufalloc - readbuf; + readsize -= readsize % pagesize; + + fillsize = safe_read (bufdesc, readbuf, readsize); + if (fillsize < 0) + fillsize = cc = 0; + bufoffset += fillsize; + fillsize = undossify_input (readbuf, fillsize); + buflim = readbuf + fillsize; + return cc; +} + +/* Flags controlling the style of output. */ +static enum +{ + BINARY_BINARY_FILES, + TEXT_BINARY_FILES, + WITHOUT_MATCH_BINARY_FILES +} binary_files; /* How to handle binary files. */ + +static int filename_mask; /* If zero, output nulls after filenames. */ +static int out_quiet; /* Suppress all normal output. */ +static bool out_invert; /* Print nonmatching stuff. */ +static int out_file; /* Print filenames. */ +static int out_line; /* Print line numbers. */ +static int out_byte; /* Print byte offsets. */ +static intmax_t out_before; /* Lines of leading context. */ +static intmax_t out_after; /* Lines of trailing context. */ +static int count_matches; /* Count matching lines. */ +static int list_files; /* List matching files. */ +static int no_filenames; /* Suppress file names. */ +static intmax_t max_count; /* Stop after outputting this many + lines from an input file. */ +static int line_buffered; /* If nonzero, use line buffering, i.e. + fflush everyline out. */ +static char *label = NULL; /* Fake filename for stdin */ + + +/* Internal variables to keep track of byte count, context, etc. */ +static uintmax_t totalcc; /* Total character count before bufbeg. */ +static char const *lastnl; /* Pointer after last newline counted. */ +static char const *lastout; /* Pointer after last character output; + NULL if no character has been output + or if it's conceptually before bufbeg. */ +static uintmax_t totalnl; /* Total newline count before lastnl. */ +static intmax_t outleft; /* Maximum number of lines to be output. */ +static intmax_t pending; /* Pending lines of output. + Always kept 0 if out_quiet is true. */ +static int done_on_match; /* Stop scanning file on first match. */ +static int exit_on_match; /* Exit on first match. */ + +#include "dosbuf.c" + +/* Add two numbers that count input bytes or lines, and report an + error if the addition overflows. */ +static uintmax_t +add_count (uintmax_t a, uintmax_t b) +{ + uintmax_t sum = a + b; + if (sum < a) + error (EXIT_TROUBLE, 0, _("input is too large to count")); + return sum; +} + +static void +nlscan (char const *lim) +{ + size_t newlines = 0; + char const *beg; + for (beg = lastnl; beg < lim; beg++) + { + beg = memchr (beg, eolbyte, lim - beg); + if (!beg) + break; + newlines++; + } + totalnl = add_count (totalnl, newlines); + lastnl = lim; +} + +/* Print the current filename. */ +static void +print_filename (void) +{ + pr_sgr_start_if (filename_color); + fputs (filename, stdout); + pr_sgr_end_if (filename_color); +} + +/* Print a character separator. */ +static void +print_sep (char sep) +{ + pr_sgr_start_if (sep_color); + fputc (sep, stdout); + pr_sgr_end_if (sep_color); +} + +/* Print a line number or a byte offset. */ +static void +print_offset (uintmax_t pos, int min_width, const char *color) +{ + /* Do not rely on printf to print pos, since uintmax_t may be longer + than long, and long long is not portable. */ + + char buf[sizeof pos * CHAR_BIT]; + char *p = buf + sizeof buf; + + do + { + *--p = '0' + pos % 10; + --min_width; + } + while ((pos /= 10) != 0); + + /* Do this to maximize the probability of alignment across lines. */ + if (align_tabs) + while (--min_width >= 0) + *--p = ' '; + + pr_sgr_start_if (color); + fwrite (p, 1, buf + sizeof buf - p, stdout); + pr_sgr_end_if (color); +} + +/* Print a whole line head (filename, line, byte). */ +static void +print_line_head (char const *beg, char const *lim, int sep) +{ + int pending_sep = 0; + + if (out_file) + { + print_filename (); + if (filename_mask) + pending_sep = 1; + else + fputc (0, stdout); + } + + if (out_line) + { + if (lastnl < lim) + { + nlscan (beg); + totalnl = add_count (totalnl, 1); + lastnl = lim; + } + if (pending_sep) + print_sep (sep); + print_offset (totalnl, 4, line_num_color); + pending_sep = 1; + } + + if (out_byte) + { + uintmax_t pos = add_count (totalcc, beg - bufbeg); + pos = dossified_pos (pos); + if (pending_sep) + print_sep (sep); + print_offset (pos, 6, byte_num_color); + pending_sep = 1; + } + + if (pending_sep) + { + /* This assumes sep is one column wide. + Try doing this any other way with Unicode + (and its combining and wide characters) + filenames and you're wasting your efforts. */ + if (align_tabs) + fputs ("\t\b", stdout); + + print_sep (sep); + } +} + +static const char * +print_line_middle (const char *beg, const char *lim, + const char *line_color, const char *match_color) +{ + size_t match_size; + size_t match_offset; + const char *cur = beg; + const char *mid = NULL; + + while (cur < lim + && ((match_offset = execute (beg, lim - beg, &match_size, + beg + (cur - beg))) != (size_t) -1)) + { + char const *b = beg + match_offset; + + /* Avoid matching the empty line at the end of the buffer. */ + if (b == lim) + break; + + /* Avoid hanging on grep --color "" foo */ + if (match_size == 0) + { + /* Make minimal progress; there may be further non-empty matches. */ + /* XXX - Could really advance by one whole multi-octet character. */ + match_size = 1; + if (!mid) + mid = cur; + } + else + { + /* This function is called on a matching line only, + but is it selected or rejected/context? */ + if (only_matching) + print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED + : SEP_CHAR_SELECTED)); + else + { + pr_sgr_start (line_color); + if (mid) + { + cur = mid; + mid = NULL; + } + fwrite (cur, sizeof (char), b - cur, stdout); + } + + pr_sgr_start_if (match_color); + fwrite (b, sizeof (char), match_size, stdout); + pr_sgr_end_if (match_color); + if (only_matching) + fputs ("\n", stdout); + } + cur = b + match_size; + } + + if (only_matching) + cur = lim; + else if (mid) + cur = mid; + + return cur; +} + +static const char * +print_line_tail (const char *beg, const char *lim, const char *line_color) +{ + size_t eol_size; + size_t tail_size; + + eol_size = (lim > beg && lim[-1] == eolbyte); + eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r'); + tail_size = lim - eol_size - beg; + + if (tail_size > 0) + { + pr_sgr_start (line_color); + fwrite (beg, 1, tail_size, stdout); + beg += tail_size; + pr_sgr_end (line_color); + } + + return beg; +} + +static void +prline (char const *beg, char const *lim, int sep) +{ + int matching; + const char *line_color; + const char *match_color; + + if (!only_matching) + print_line_head (beg, lim, sep); + + matching = (sep == SEP_CHAR_SELECTED) ^ out_invert; + + if (color_option) + { + line_color = (((sep == SEP_CHAR_SELECTED) + ^ (out_invert && (color_option < 0))) + ? selected_line_color : context_line_color); + match_color = (sep == SEP_CHAR_SELECTED + ? selected_match_color : context_match_color); + } + else + line_color = match_color = NULL; /* Shouldn't be used. */ + + if ((only_matching && matching) + || (color_option && (*line_color || *match_color))) + { + /* We already know that non-matching lines have no match (to colorize). */ + if (matching && (only_matching || *match_color)) + beg = print_line_middle (beg, lim, line_color, match_color); + + if (!only_matching && *line_color) + { + /* This code is exercised at least when grep is invoked like this: + echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */ + beg = print_line_tail (beg, lim, line_color); + } + } + + if (!only_matching && lim > beg) + fwrite (beg, 1, lim - beg, stdout); + + if (ferror (stdout)) + { + write_error_seen = 1; + error (EXIT_TROUBLE, 0, _("write error")); + } + + lastout = lim; + + if (line_buffered) + fflush (stdout); +} + +/* Print pending lines of trailing context prior to LIM. Trailing context ends + at the next matching line when OUTLEFT is 0. */ +static void +prpending (char const *lim) +{ + if (!lastout) + lastout = bufbeg; + while (pending > 0 && lastout < lim) + { + char const *nl = memchr (lastout, eolbyte, lim - lastout); + size_t match_size; + --pending; + if (outleft + || ((execute (lastout, nl + 1 - lastout, + &match_size, NULL) == (size_t) -1) + == !out_invert)) + prline (lastout, nl + 1, SEP_CHAR_REJECTED); + else + pending = 0; + } +} + +/* Output the lines between BEG and LIM. Deal with context. */ +static void +prtext (char const *beg, char const *lim) +{ + static bool used; /* Avoid printing SEP_STR_GROUP before any output. */ + char eol = eolbyte; + + if (!out_quiet && pending > 0) + prpending (beg); + + char const *p = beg; + + if (!out_quiet) + { + /* Deal with leading context. */ + char const *bp = lastout ? lastout : bufbeg; + intmax_t i; + for (i = 0; i < out_before; ++i) + if (p > bp) + do + --p; + while (p[-1] != eol); + + /* Print the group separator unless the output is adjacent to + the previous output in the file. */ + if ((0 <= out_before || 0 <= out_after) && used + && p != lastout && group_separator) + { + pr_sgr_start_if (sep_color); + fputs (group_separator, stdout); + pr_sgr_end_if (sep_color); + fputc ('\n', stdout); + } + + while (p < beg) + { + char const *nl = memchr (p, eol, beg - p); + nl++; + prline (p, nl, SEP_CHAR_REJECTED); + p = nl; + } + } + + intmax_t n; + if (out_invert) + { + /* One or more lines are output. */ + for (n = 0; p < lim && n < outleft; n++) + { + char const *nl = memchr (p, eol, lim - p); + nl++; + if (!out_quiet) + prline (p, nl, SEP_CHAR_SELECTED); + p = nl; + } + } + else + { + /* Just one line is output. */ + if (!out_quiet) + prline (beg, lim, SEP_CHAR_SELECTED); + n = 1; + p = lim; + } + + after_last_match = bufoffset - (buflim - p); + pending = out_quiet ? 0 : MAX (0, out_after); + used = true; + outleft -= n; +} + +/* Invoke the matcher, EXECUTE, on buffer BUF of SIZE bytes. If there + is no match, return (size_t) -1. Otherwise, set *MATCH_SIZE to the + length of the match and return the offset of the start of the match. */ +static size_t +do_execute (char const *buf, size_t size, size_t *match_size) +{ + size_t result; + const char *line_next; + + /* With the current implementation, using --ignore-case with a multi-byte + character set is very inefficient when applied to a large buffer + containing many matches. We can avoid much of the wasted effort + by matching line-by-line. + + FIXME: this is just an ugly workaround, and it doesn't really + belong here. Also, PCRE is always using this same per-line + matching algorithm. Either we fix -i, or we should refactor + this code---for example, we could add another function pointer + to struct matcher to split the buffer passed to execute. It would + perform the memchr if line-by-line matching is necessary, or just + return buf + size otherwise. */ + if (! (execute == Fexecute || execute == Pexecute) + || MB_CUR_MAX == 1 || !match_icase) + return execute (buf, size, match_size, NULL); + + for (line_next = buf; line_next < buf + size; ) + { + const char *line_buf = line_next; + const char *line_end = memchr (line_buf, eolbyte, + (buf + size) - line_buf); + if (line_end == NULL) + line_next = line_end = buf + size; + else + line_next = line_end + 1; + + result = execute (line_buf, line_next - line_buf, match_size, NULL); + if (result != (size_t) -1) + return (line_buf - buf) + result; + } + + return (size_t) -1; +} + +/* Scan the specified portion of the buffer, matching lines (or + between matching lines if OUT_INVERT is true). Return a count of + lines printed. */ +static intmax_t +grepbuf (char const *beg, char const *lim) +{ + intmax_t outleft0 = outleft; + char const *p; + char const *endp; + + for (p = beg; p < lim; p = endp) + { + size_t match_size; + size_t match_offset = do_execute (p, lim - p, &match_size); + if (match_offset == (size_t) -1) + { + if (!out_invert) + break; + match_offset = lim - p; + match_size = 0; + } + char const *b = p + match_offset; + endp = b + match_size; + /* Avoid matching the empty line at the end of the buffer. */ + if (!out_invert && b == lim) + break; + if (!out_invert || p < b) + { + char const *prbeg = out_invert ? p : b; + char const *prend = out_invert ? b : endp; + prtext (prbeg, prend); + if (!outleft || done_on_match) + { + if (exit_on_match) + exit (EXIT_SUCCESS); + break; + } + } + } + + return outleft0 - outleft; +} + +/* Search a given file. Normally, return a count of lines printed; + but if the file is a directory and we search it recursively, then + return -2 if there was a match, and -1 otherwise. */ +static intmax_t +grep (int fd, struct stat const *st) +{ + intmax_t nlines, i; + int not_text; + size_t residue, save; + char oldc; + char *beg; + char *lim; + char eol = eolbyte; + + if (! reset (fd, st)) + return 0; + + totalcc = 0; + lastout = 0; + totalnl = 0; + outleft = max_count; + after_last_match = 0; + pending = 0; + + nlines = 0; + residue = 0; + save = 0; + + if (! fillbuf (save, st)) + { + if (errno != EINVAL) + suppressible_error (filename, errno); + return 0; + } + + not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet) + || binary_files == WITHOUT_MATCH_BINARY_FILES) + && file_is_binary (bufbeg, buflim - bufbeg, fd, st)); + if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES) + return 0; + done_on_match += not_text; + out_quiet += not_text; + + for (;;) + { + lastnl = bufbeg; + if (lastout) + lastout = bufbeg; + + beg = bufbeg + save; + + /* no more data to scan (eof) except for maybe a residue -> break */ + if (beg == buflim) + break; + + /* Determine new residue (the length of an incomplete line at the end of + the buffer, 0 means there is no incomplete last line). */ + oldc = beg[-1]; + beg[-1] = eol; + /* FIXME: use rawmemrchr if/when it exists, since we have ensured + that this use of memrchr is guaranteed never to return NULL. */ + lim = memrchr (beg - 1, eol, buflim - beg + 1); + ++lim; + beg[-1] = oldc; + if (lim == beg) + lim = beg - residue; + beg -= residue; + residue = buflim - lim; + + if (beg < lim) + { + if (outleft) + nlines += grepbuf (beg, lim); + if (pending) + prpending (lim); + if ((!outleft && !pending) || (nlines && done_on_match)) + goto finish_grep; + } + + /* The last OUT_BEFORE lines at the end of the buffer will be needed as + leading context if there is a matching line at the begin of the + next data. Make beg point to their begin. */ + i = 0; + beg = lim; + while (i < out_before && beg > bufbeg && beg != lastout) + { + ++i; + do + --beg; + while (beg[-1] != eol); + } + + /* Detect whether leading context is adjacent to previous output. */ + if (beg != lastout) + lastout = 0; + + /* Handle some details and read more data to scan. */ + save = residue + lim - beg; + if (out_byte) + totalcc = add_count (totalcc, buflim - bufbeg - save); + if (out_line) + nlscan (beg); + if (! fillbuf (save, st)) + { + suppressible_error (filename, errno); + goto finish_grep; + } + } + if (residue) + { + *buflim++ = eol; + if (outleft) + nlines += grepbuf (bufbeg + save - residue, buflim); + if (pending) + prpending (buflim); + } + + finish_grep: + done_on_match -= not_text; + out_quiet -= not_text; + if ((not_text & ~out_quiet) && nlines != 0) + printf (_("Binary file %s matches\n"), filename); + return nlines; +} + +static int +grepdirent (FTS *fts, FTSENT *ent, int command_line) +{ + int follow, dirdesc; + struct stat *st = ent->fts_statp; + command_line &= ent->fts_level == FTS_ROOTLEVEL; + + if (ent->fts_info == FTS_DP) + { + if (directories == RECURSE_DIRECTORIES && command_line) + out_file &= ~ (2 * !no_filenames); + return 1; + } + + if (skipped_file (ent->fts_name, command_line, + (ent->fts_info == FTS_D || ent->fts_info == FTS_DC + || ent->fts_info == FTS_DNR))) + { + fts_set (fts, ent, FTS_SKIP); + return 1; + } + + filename = ent->fts_path + filename_prefix_len; + follow = (fts->fts_options & FTS_LOGICAL + || (fts->fts_options & FTS_COMFOLLOW && command_line)); + + switch (ent->fts_info) + { + case FTS_D: + if (directories == RECURSE_DIRECTORIES) + { + out_file |= 2 * !no_filenames; + return 1; + } + fts_set (fts, ent, FTS_SKIP); + break; + + case FTS_DC: + if (!suppress_errors) + error (0, 0, _("warning: %s: %s"), filename, + _("recursive directory loop")); + return 1; + + case FTS_DNR: + case FTS_ERR: + case FTS_NS: + suppressible_error (filename, ent->fts_errno); + return 1; + + case FTS_DEFAULT: + case FTS_NSOK: + if (devices == SKIP_DEVICES + || (devices == READ_COMMAND_LINE_DEVICES && !command_line)) + { + struct stat st1; + if (! st->st_mode) + { + /* The file type is not already known. Get the file status + before opening, since opening might have side effects + on a device. */ + int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW; + if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0) + { + suppressible_error (filename, errno); + return 1; + } + st = &st1; + } + if (is_device_mode (st->st_mode)) + return 1; + } + break; + + case FTS_F: + case FTS_SLNONE: + break; + + case FTS_SL: + case FTS_W: + return 1; + + default: + abort (); + } + + dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD + ? fts->fts_cwd_fd + : AT_FDCWD); + return grepfile (dirdesc, ent->fts_accpath, follow, command_line); +} + +static int +grepfile (int dirdesc, char const *name, int follow, int command_line) +{ + int desc = openat_safer (dirdesc, name, O_RDONLY | (follow ? 0 : O_NOFOLLOW)); + if (desc < 0) + { + if (follow || (errno != ELOOP && errno != EMLINK)) + suppressible_error (filename, errno); + return 1; + } + return grepdesc (desc, command_line); +} + +static int +grepdesc (int desc, int command_line) +{ + intmax_t count; + int status = 1; + struct stat st; + + /* Get the file status, possibly for the second time. This catches + a race condition if the directory entry changes after the + directory entry is read and before the file is opened. For + example, normally DESC is a directory only at the top level, but + there is an exception if some other process substitutes a + directory for a non-directory while 'grep' is running. */ + if (fstat (desc, &st) != 0) + { + suppressible_error (filename, errno); + goto closeout; + } + + if (desc != STDIN_FILENO && command_line + && skipped_file (filename, 1, S_ISDIR (st.st_mode))) + goto closeout; + + if (desc != STDIN_FILENO + && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode)) + { + /* Traverse the directory starting with its full name, because + unfortunately fts provides no way to traverse the directory + starting from its file descriptor. */ + + FTS *fts; + FTSENT *ent; + int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW); + char *fts_arg[2]; + + /* Close DESC now, to conserve file descriptors if the race + condition occurs many times in a deep recursion. */ + if (close (desc) != 0) + suppressible_error (filename, errno); + + fts_arg[0] = (char *) filename; + fts_arg[1] = NULL; + fts = fts_open (fts_arg, opts, NULL); + + if (!fts) + xalloc_die (); + while ((ent = fts_read (fts))) + status &= grepdirent (fts, ent, command_line); + if (errno) + suppressible_error (filename, errno); + if (fts_close (fts) != 0) + suppressible_error (filename, errno); + return status; + } + if (desc != STDIN_FILENO + && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode)) + || ((devices == SKIP_DEVICES + || (devices == READ_COMMAND_LINE_DEVICES && !command_line)) + && is_device_mode (st.st_mode)))) + goto closeout; + + /* If there is a regular file on stdout and the current file refers + to the same i-node, we have to report the problem and skip it. + Otherwise when matching lines from some other input reach the + disk before we open this file, we can end up reading and matching + those lines and appending them to the file from which we're reading. + Then we'd have what appears to be an infinite loop that'd terminate + only upon filling the output file system or reaching a quota. + However, there is no risk of an infinite loop if grep is generating + no output, i.e., with --silent, --quiet, -q. + Similarly, with any of these: + --max-count=N (-m) (for N >= 2) + --files-with-matches (-l) + --files-without-match (-L) + there is no risk of trouble. + For --max-count=1, grep stops after printing the first match, + so there is no risk of malfunction. But even --max-count=2, with + input==output, while there is no risk of infloop, there is a race + condition that could result in "alternate" output. */ + if (!out_quiet && list_files == 0 && 1 < max_count + && S_ISREG (out_stat.st_mode) && out_stat.st_ino + && SAME_INODE (st, out_stat)) + { + if (! suppress_errors) + error (0, 0, _("input file %s is also the output"), quote (filename)); + errseen = 1; + goto closeout; + } + +#if defined SET_BINARY + /* Set input to binary mode. Pipes are simulated with files + on DOS, so this includes the case of "foo | grep bar". */ + if (!isatty (desc)) + SET_BINARY (desc); +#endif + + count = grep (desc, &st); + if (count < 0) + status = count + 2; + else + { + if (count_matches) + { + if (out_file) + { + print_filename (); + if (filename_mask) + print_sep (SEP_CHAR_SELECTED); + else + fputc (0, stdout); + } + printf ("%" PRIdMAX "\n", count); + } + + status = !count; + if (list_files == 1 - 2 * status) + { + print_filename (); + fputc ('\n' & filename_mask, stdout); + } + + if (desc == STDIN_FILENO) + { + off_t required_offset = outleft ? bufoffset : after_last_match; + if (required_offset != bufoffset + && lseek (desc, required_offset, SEEK_SET) < 0 + && S_ISREG (st.st_mode)) + suppressible_error (filename, errno); + } + } + + closeout: + if (desc != STDIN_FILENO && close (desc) != 0) + suppressible_error (filename, errno); + return status; +} + +static int +grep_command_line_arg (char const *arg) +{ + if (STREQ (arg, "-")) + { + filename = label ? label : _("(standard input)"); + return grepdesc (STDIN_FILENO, 1); + } + else + { + filename = arg; + return grepfile (AT_FDCWD, arg, 1, 1); + } +} + +_Noreturn void usage (int); +void +usage (int status) +{ + if (status != 0) + { + fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), + program_name); + fprintf (stderr, _("Try '%s --help' for more information.\n"), + program_name); + } + else + { + printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name); + printf (_("Search for PATTERN in each FILE or standard input.\n")); + printf (_("PATTERN is, by default, a basic regular expression (BRE).\n")); + printf (_("\ +Example: %s -i 'hello world' menu.h main.c\n\ +\n\ +Regexp selection and interpretation:\n"), program_name); + printf (_("\ + -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\ + -F, --fixed-strings PATTERN is a set of newline-separated fixed strings\n\ + -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\ + -P, --perl-regexp PATTERN is a Perl regular expression\n")); + /* -X is undocumented on purpose. */ + printf (_("\ + -e, --regexp=PATTERN use PATTERN for matching\n\ + -f, --file=FILE obtain PATTERN from FILE\n\ + -i, --ignore-case ignore case distinctions\n\ + -w, --word-regexp force PATTERN to match only whole words\n\ + -x, --line-regexp force PATTERN to match only whole lines\n\ + -z, --null-data a data line ends in 0 byte, not newline\n")); + printf (_("\ +\n\ +Miscellaneous:\n\ + -s, --no-messages suppress error messages\n\ + -v, --invert-match select non-matching lines\n\ + -V, --version display version information and exit\n\ + --help display this help text and exit\n")); + printf (_("\ +\n\ +Output control:\n\ + -m, --max-count=NUM stop after NUM matches\n\ + -b, --byte-offset print the byte offset with output lines\n\ + -n, --line-number print line number with output lines\n\ + --line-buffered flush output on every line\n\ + -H, --with-filename print the file name for each match\n\ + -h, --no-filename suppress the file name prefix on output\n\ + --label=LABEL use LABEL as the standard input file name prefix\n\ +")); + printf (_("\ + -o, --only-matching show only the part of a line matching PATTERN\n\ + -q, --quiet, --silent suppress all normal output\n\ + --binary-files=TYPE assume that binary files are TYPE;\n\ + TYPE is 'binary', 'text', or 'without-match'\n\ + -a, --text equivalent to --binary-files=text\n\ +")); + printf (_("\ + -I equivalent to --binary-files=without-match\n\ + -d, --directories=ACTION how to handle directories;\n\ + ACTION is 'read', 'recurse', or 'skip'\n\ + -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\ + ACTION is 'read' or 'skip'\n\ + -r, --recursive like --directories=recurse\n\ + -R, --dereference-recursive likewise, but follow all symlinks\n\ +")); + printf (_("\ + --include=FILE_PATTERN search only files that match FILE_PATTERN\n\ + --exclude=FILE_PATTERN skip files and directories matching FILE_PATTERN\n\ + --exclude-from=FILE skip files matching any file pattern from FILE\n\ + --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\ +")); + printf (_("\ + -L, --files-without-match print only names of FILEs containing no match\n\ + -l, --files-with-matches print only names of FILEs containing matches\n\ + -c, --count print only a count of matching lines per FILE\n\ + -T, --initial-tab make tabs line up (if needed)\n\ + -Z, --null print 0 byte after FILE name\n")); + printf (_("\ +\n\ +Context control:\n\ + -B, --before-context=NUM print NUM lines of leading context\n\ + -A, --after-context=NUM print NUM lines of trailing context\n\ + -C, --context=NUM print NUM lines of output context\n\ +")); + printf (_("\ + -NUM same as --context=NUM\n\ + --color[=WHEN],\n\ + --colour[=WHEN] use markers to highlight the matching strings;\n\ + WHEN is 'always', 'never', or 'auto'\n\ + -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\ + -u, --unix-byte-offsets report offsets as if CRs were not there\n\ + (MSDOS/Windows)\n\ +\n")); + printf (_("\ +'egrep' means 'grep -E'. 'fgrep' means 'grep -F'.\n\ +Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n")); + printf (_("\ +When FILE is -, read standard input. With no FILE, read . if a command-line\n\ +-r is given, - otherwise. If fewer than two FILEs are given, assume -h.\n\ +Exit status is 0 if any line is selected, 1 otherwise;\n\ +if any error occurs and -q is not given, the exit status is 2.\n")); + printf (_("\nReport bugs to: %s\n"), PACKAGE_BUGREPORT); + printf (_("GNU Grep home page: <%s>\n"), + "http://www.gnu.org/software/grep/"); + fputs (_("General help using GNU software: \n"), + stdout); + + } + exit (status); +} + +/* Pattern compilers and matchers. */ static void Gcompile (char const *pattern, size_t size) @@ -19,17 +1648,737 @@ Acompile (char const *pattern, size_t size) GEAcompile (pattern, size, RE_SYNTAX_AWK); } -struct matcher const matchers[] = { - { "grep", Gcompile, EGexecute }, - { "egrep", Ecompile, EGexecute }, - { "awk", Acompile, EGexecute }, - { "fgrep", Fcompile, Fexecute }, - { "perl", Pcompile, Pexecute }, - { NULL, NULL, NULL }, +static void +GAcompile (char const *pattern, size_t size) +{ + GEAcompile (pattern, size, RE_SYNTAX_GNU_AWK); +} + +static void +PAcompile (char const *pattern, size_t size) +{ + GEAcompile (pattern, size, RE_SYNTAX_POSIX_AWK); +} + +struct matcher +{ + char const name[16]; + compile_fp_t compile; + execute_fp_t execute; +}; +static struct matcher const matchers[] = { + { "grep", Gcompile, EGexecute }, + { "egrep", Ecompile, EGexecute }, + { "fgrep", Fcompile, Fexecute }, + { "awk", Acompile, EGexecute }, + { "gawk", GAcompile, EGexecute }, + { "posixawk", PAcompile, EGexecute }, + { "perl", Pcompile, Pexecute }, + { "", NULL, NULL }, }; -const char before_options[] = -N_("PATTERN is, by default, a basic regular expression (BRE).\n"); -const char after_options[] = -N_("'egrep' means 'grep -E'. 'fgrep' means 'grep -F'.\n\ -Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n"); +/* Set the matcher to M if available. Exit in case of conflicts or if + M is not available. */ +static void +setmatcher (char const *m) +{ + struct matcher const *p; + + if (matcher && !STREQ (matcher, m)) + error (EXIT_TROUBLE, 0, _("conflicting matchers specified")); + + for (p = matchers; p->compile; p++) + if (STREQ (m, p->name)) + { + matcher = p->name; + compile = p->compile; + execute = p->execute; + return; + } + + error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m); +} + +/* Find the white-space-separated options specified by OPTIONS, and + using BUF to store copies of these options, set ARGV[0], ARGV[1], + etc. to the option copies. Return the number N of options found. + Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0] + etc. Backslash can be used to escape whitespace (and backslashes). */ +static size_t +prepend_args (char const *options, char *buf, char **argv) +{ + char const *o = options; + char *b = buf; + size_t n = 0; + + for (;;) + { + while (c_isspace (to_uchar (*o))) + o++; + if (!*o) + return n; + if (argv) + argv[n] = b; + n++; + + do + if ((*b++ = *o++) == '\\' && *o) + b[-1] = *o++; + while (*o && ! c_isspace (to_uchar (*o))); + + *b++ = '\0'; + } +} + +/* Prepend the whitespace-separated options in OPTIONS to the argument + vector of a main program with argument count *PARGC and argument + vector *PARGV. Return the number of options prepended. */ +static int +prepend_default_options (char const *options, int *pargc, char ***pargv) +{ + if (options && *options) + { + char *buf = xmalloc (strlen (options) + 1); + size_t prepended = prepend_args (options, buf, NULL); + int argc = *pargc; + char *const *argv = *pargv; + char **pp; + enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) }; + if (MAX_ARGS - argc < prepended) + xalloc_die (); + pp = xmalloc ((prepended + argc + 1) * sizeof *pp); + *pargc = prepended + argc; + *pargv = pp; + *pp++ = *argv++; + pp += prepend_args (options, buf, pp); + while ((*pp++ = *argv++)) + continue; + return prepended; + } + + return 0; +} + +/* Get the next non-digit option from ARGC and ARGV. + Return -1 if there are no more options. + Process any digit options that were encountered on the way, + and store the resulting integer into *DEFAULT_CONTEXT. */ +static int +get_nondigit_option (int argc, char *const *argv, intmax_t *default_context) +{ + static int prev_digit_optind = -1; + int this_digit_optind, was_digit; + char buf[INT_BUFSIZE_BOUND (intmax_t) + 4]; + char *p = buf; + int opt; + + was_digit = 0; + this_digit_optind = optind; + while (1) + { + opt = getopt_long (argc, (char **) argv, short_options, + long_options, NULL); + if ( ! ('0' <= opt && opt <= '9')) + break; + + if (prev_digit_optind != this_digit_optind || !was_digit) + { + /* Reset to start another context length argument. */ + p = buf; + } + else + { + /* Suppress trivial leading zeros, to avoid incorrect + diagnostic on strings like 00000000000. */ + p -= buf[0] == '0'; + } + + if (p == buf + sizeof buf - 4) + { + /* Too many digits. Append "..." to make context_length_arg + complain about "X...", where X contains the digits seen + so far. */ + strcpy (p, "..."); + p += 3; + break; + } + *p++ = opt; + + was_digit = 1; + prev_digit_optind = this_digit_optind; + this_digit_optind = optind; + } + if (p != buf) + { + *p = '\0'; + context_length_arg (buf, default_context); + } + + return opt; +} + +/* Parse GREP_COLORS. The default would look like: + GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36' + with boolean capabilities (ne and rv) unset (i.e., omitted). + No character escaping is needed or supported. */ +static void +parse_grep_colors (void) +{ + const char *p; + char *q; + char *name; + char *val; + + p = getenv ("GREP_COLORS"); /* Plural! */ + if (p == NULL || *p == '\0') + return; + + /* Work off a writable copy. */ + q = xstrdup (p); + + name = q; + val = NULL; + /* From now on, be well-formed or you're gone. */ + for (;;) + if (*q == ':' || *q == '\0') + { + char c = *q; + struct color_cap const *cap; + + *q++ = '\0'; /* Terminate name or val. */ + /* Empty name without val (empty cap) + * won't match and will be ignored. */ + for (cap = color_dict; cap->name; cap++) + if (STREQ (cap->name, name)) + break; + /* If name unknown, go on for forward compatibility. */ + if (cap->var && val) + *(cap->var) = val; + if (cap->fct) + cap->fct (); + if (c == '\0') + return; + name = q; + val = NULL; + } + else if (*q == '=') + { + if (q == name || val) + return; + *q++ = '\0'; /* Terminate name. */ + val = q; /* Can be the empty string. */ + } + else if (val == NULL) + q++; /* Accumulate name. */ + else if (*q == ';' || (*q >= '0' && *q <= '9')) + q++; /* Accumulate val. Protect the terminal from being sent crap. */ + else + return; +} + +/* Return true if PAT (of length PATLEN) contains an encoding error. */ +static bool +contains_encoding_error (char const *pat, size_t patlen) +{ + mbstate_t mbs = { 0 }; + size_t i, charlen; + + for (i = 0; i < patlen; i += charlen + (charlen == 0)) + { + charlen = mbrlen (pat + i, patlen - i, &mbs); + if ((size_t) -2 <= charlen) + return true; + } + return false; +} + +/* Change a pattern for fgrep into grep. */ +static void +fgrep_to_grep_pattern (size_t len, char const *keys, + size_t *new_len, char **new_keys) +{ + char *p = *new_keys = xnmalloc (len + 1, 2); + mbstate_t mb_state = { 0 }; + size_t n; + + for (; len; keys += n, len -= n) + { + wchar_t wc; + n = mbrtowc (&wc, keys, len, &mb_state); + switch (n) + { + case (size_t) -2: + n = len; + /* Fall through. */ + default: + p = mempcpy (p, keys, n); + break; + + case (size_t) -1: + memset (&mb_state, 0, sizeof mb_state); + /* Fall through. */ + case 1: + *p = '\\'; + p += strchr ("$*.[\\^", *keys) != NULL; + /* Fall through. */ + case 0: + *p++ = *keys; + n = 1; + break; + } + } + + *new_len = p - *new_keys; +} + +int +main (int argc, char **argv) +{ + char *keys; + size_t keycc, oldcc, keyalloc; + int with_filenames; + size_t cc; + int opt, status, prepended; + int prev_optind, last_recursive; + int fread_errno; + intmax_t default_context; + FILE *fp; + exit_failure = EXIT_TROUBLE; + initialize_main (&argc, &argv); + set_program_name (argv[0]); + program_name = argv[0]; + + keys = NULL; + keycc = 0; + with_filenames = 0; + eolbyte = '\n'; + filename_mask = ~0; + + max_count = INTMAX_MAX; + + /* The value -1 means to use DEFAULT_CONTEXT. */ + out_after = out_before = -1; + /* Default before/after context: changed by -C/-NUM options */ + default_context = -1; + /* Changed by -o option */ + only_matching = 0; + + /* Internationalization. */ +#if defined HAVE_SETLOCALE + setlocale (LC_ALL, ""); +#endif +#if defined ENABLE_NLS + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); +#endif + + exit_failure = EXIT_TROUBLE; + atexit (clean_up_stdout); + + last_recursive = 0; + prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv); + compile = matchers[0].compile; + execute = matchers[0].execute; + + while (prev_optind = optind, + (opt = get_nondigit_option (argc, argv, &default_context)) != -1) + switch (opt) + { + case 'A': + context_length_arg (optarg, &out_after); + break; + + case 'B': + context_length_arg (optarg, &out_before); + break; + + case 'C': + /* Set output match context, but let any explicit leading or + trailing amount specified with -A or -B stand. */ + context_length_arg (optarg, &default_context); + break; + + case 'D': + if (STREQ (optarg, "read")) + devices = READ_DEVICES; + else if (STREQ (optarg, "skip")) + devices = SKIP_DEVICES; + else + error (EXIT_TROUBLE, 0, _("unknown devices method")); + break; + + case 'E': + setmatcher ("egrep"); + break; + + case 'F': + setmatcher ("fgrep"); + break; + + case 'P': + setmatcher ("perl"); + break; + + case 'G': + setmatcher ("grep"); + break; + + case 'X': /* undocumented on purpose */ + setmatcher (optarg); + break; + + case 'H': + with_filenames = 1; + no_filenames = 0; + break; + + case 'I': + binary_files = WITHOUT_MATCH_BINARY_FILES; + break; + + case 'T': + align_tabs = 1; + break; + + case 'U': + dos_binary (); + break; + + case 'u': + dos_unix_byte_offsets (); + break; + + case 'V': + show_version = 1; + break; + + case 'a': + binary_files = TEXT_BINARY_FILES; + break; + + case 'b': + out_byte = 1; + break; + + case 'c': + count_matches = 1; + break; + + case 'd': + directories = XARGMATCH ("--directories", optarg, + directories_args, directories_types); + if (directories == RECURSE_DIRECTORIES) + last_recursive = prev_optind; + break; + + case 'e': + cc = strlen (optarg); + keys = xrealloc (keys, keycc + cc + 1); + strcpy (&keys[keycc], optarg); + keycc += cc; + keys[keycc++] = '\n'; + break; + + case 'f': + fp = STREQ (optarg, "-") ? stdin : fopen (optarg, O_TEXT ? "rt" : "r"); + if (!fp) + error (EXIT_TROUBLE, errno, "%s", optarg); + for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2) + ; + keys = xrealloc (keys, keyalloc); + oldcc = keycc; + while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0) + { + keycc += cc; + if (keycc == keyalloc - 1) + keys = x2nrealloc (keys, &keyalloc, sizeof *keys); + } + fread_errno = errno; + if (ferror (fp)) + error (EXIT_TROUBLE, fread_errno, "%s", optarg); + if (fp != stdin) + fclose (fp); + /* Append final newline if file ended in non-newline. */ + if (oldcc != keycc && keys[keycc - 1] != '\n') + keys[keycc++] = '\n'; + break; + + case 'h': + with_filenames = 0; + no_filenames = 1; + break; + + case 'i': + case 'y': /* For old-timers . . . */ + match_icase = 1; + break; + + case 'L': + /* Like -l, except list files that don't contain matches. + Inspired by the same option in Hume's gre. */ + list_files = -1; + break; + + case 'l': + list_files = 1; + break; + + case 'm': + switch (xstrtoimax (optarg, 0, 10, &max_count, "")) + { + case LONGINT_OK: + case LONGINT_OVERFLOW: + break; + + default: + error (EXIT_TROUBLE, 0, _("invalid max count")); + } + break; + + case 'n': + out_line = 1; + break; + + case 'o': + only_matching = 1; + break; + + case 'q': + exit_on_match = 1; + exit_failure = 0; + break; + + case 'R': + fts_options = basic_fts_options | FTS_LOGICAL; + /* Fall through. */ + case 'r': + directories = RECURSE_DIRECTORIES; + last_recursive = prev_optind; + break; + + case 's': + suppress_errors = 1; + break; + + case 'v': + out_invert = true; + break; + + case 'w': + match_words = 1; + break; + + case 'x': + match_lines = 1; + break; + + case 'Z': + filename_mask = 0; + break; + + case 'z': + eolbyte = '\0'; + break; + + case BINARY_FILES_OPTION: + if (STREQ (optarg, "binary")) + binary_files = BINARY_BINARY_FILES; + else if (STREQ (optarg, "text")) + binary_files = TEXT_BINARY_FILES; + else if (STREQ (optarg, "without-match")) + binary_files = WITHOUT_MATCH_BINARY_FILES; + else + error (EXIT_TROUBLE, 0, _("unknown binary-files type")); + break; + + case COLOR_OPTION: + if (optarg) + { + if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes") + || !strcasecmp (optarg, "force")) + color_option = 1; + else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg, "no") + || !strcasecmp (optarg, "none")) + color_option = 0; + else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg, "tty") + || !strcasecmp (optarg, "if-tty")) + color_option = 2; + else + show_help = 1; + } + else + color_option = 2; + break; + + case EXCLUDE_OPTION: + case INCLUDE_OPTION: + if (!excluded_patterns) + excluded_patterns = new_exclude (); + add_exclude (excluded_patterns, optarg, + (EXCLUDE_WILDCARDS + | (opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0))); + break; + case EXCLUDE_FROM_OPTION: + if (!excluded_patterns) + excluded_patterns = new_exclude (); + if (add_exclude_file (add_exclude, excluded_patterns, optarg, + EXCLUDE_WILDCARDS, '\n') != 0) + { + error (EXIT_TROUBLE, errno, "%s", optarg); + } + break; + + case EXCLUDE_DIRECTORY_OPTION: + if (!excluded_directory_patterns) + excluded_directory_patterns = new_exclude (); + strip_trailing_slashes (optarg); + add_exclude (excluded_directory_patterns, optarg, EXCLUDE_WILDCARDS); + break; + + case GROUP_SEPARATOR_OPTION: + group_separator = optarg; + break; + + case LINE_BUFFERED_OPTION: + line_buffered = 1; + break; + + case LABEL_OPTION: + label = optarg; + break; + + case 0: + /* long options */ + break; + + default: + usage (EXIT_TROUBLE); + break; + + } + + if (color_option == 2) + color_option = isatty (STDOUT_FILENO) && should_colorize (); + init_colorize (); + + /* POSIX says that -q overrides -l, which in turn overrides the + other output options. */ + if (exit_on_match) + list_files = 0; + if (exit_on_match | list_files) + { + count_matches = 0; + done_on_match = 1; + } + out_quiet = count_matches | done_on_match; + + if (out_after < 0) + out_after = default_context; + if (out_before < 0) + out_before = default_context; + + if (color_option) + { + /* Legacy. */ + char *userval = getenv ("GREP_COLOR"); + if (userval != NULL && *userval != '\0') + selected_match_color = context_match_color = userval; + + /* New GREP_COLORS has priority. */ + parse_grep_colors (); + } + + if (show_version) + { + version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS, + (char *) NULL); + exit (EXIT_SUCCESS); + } + + if (show_help) + usage (EXIT_SUCCESS); + + struct stat tmp_stat; + if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode)) + out_stat = tmp_stat; + + if (keys) + { + if (keycc == 0) + { + /* No keys were specified (e.g. -f /dev/null). Match nothing. */ + out_invert ^= true; + match_lines = match_words = 0; + } + else + /* Strip trailing newline. */ + --keycc; + } + else if (optind < argc) + { + /* A copy must be made in case of an xrealloc() or free() later. */ + keycc = strlen (argv[optind]); + keys = xmemdup (argv[optind++], keycc + 1); + } + else + usage (EXIT_TROUBLE); + + /* If fgrep in a multibyte locale, then use grep if either + (1) case is ignored (where grep is typically faster), or + (2) the pattern has an encoding error (where fgrep might not work). */ + if (compile == Fcompile && MB_CUR_MAX > 1 + && (match_icase || contains_encoding_error (keys, keycc))) + { + size_t new_keycc; + char *new_keys; + fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys); + free (keys); + keys = new_keys; + keycc = new_keycc; + matcher = "grep"; + compile = Gcompile; + execute = EGexecute; + } + + if (MB_CUR_MAX > 1) + build_mbclen_cache (); + + compile (keys, keycc); + free (keys); + + if ((argc - optind > 1 && !no_filenames) || with_filenames) + out_file = 1; + +#ifdef SET_BINARY + /* Output is set to binary mode because we shouldn't convert + NL to CR-LF pairs, especially when grepping binary files. */ + if (!isatty (1)) + SET_BINARY (1); +#endif + + if (max_count == 0) + exit (EXIT_FAILURE); + + if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES) + devices = READ_DEVICES; + + if (optind < argc) + { + status = 1; + do + status &= grep_command_line_arg (argv[optind]); + while (++optind < argc); + } + else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive) + { + /* Grep through ".", omitting leading "./" from diagnostics. */ + filename_prefix_len = 2; + status = grep_command_line_arg ("."); + } + else + status = grep_command_line_arg ("-"); + + /* We register via atexit() to test stdout. */ + exit (errseen ? EXIT_TROUBLE : status); +} +/* vim:set shiftwidth=2: */ diff --git a/contrib/grep/src/grep.h b/contrib/grep/src/grep.h index f5ab8bfce4..493587213f 100644 --- a/contrib/grep/src/grep.h +++ b/contrib/grep/src/grep.h @@ -1,5 +1,5 @@ /* grep.h - interface to grep driver for searching subroutines. - Copyright (C) 1992, 1998, 2001, 2007, 2009-2012 Free Software Foundation, + Copyright (C) 1992, 1998, 2001, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify @@ -20,23 +20,6 @@ #ifndef GREP_GREP_H #define GREP_GREP_H 1 -/* Function pointer types. */ -typedef void (*compile_fp_t) (char const *, size_t); -typedef size_t (*execute_fp_t) (char const *, size_t, size_t *, char const *); - -/* grep.c expects the matchers vector to be terminated by an entry - with a NULL name, and to contain at least one entry. */ -struct matcher -{ - const char *name; - compile_fp_t compile; - execute_fp_t execute; -}; -extern const struct matcher matchers[]; - -extern const char before_options[]; -extern const char after_options[]; - /* The following flags are exported from grep for the matchers to look at. */ extern int match_icase; /* -i */ diff --git a/contrib/grep/src/kwsearch.c b/contrib/grep/src/kwsearch.c index b56b465175..6bd516a93a 100644 --- a/contrib/grep/src/kwsearch.c +++ b/contrib/grep/src/kwsearch.c @@ -1,5 +1,5 @@ /* kwsearch.c - searching subroutines using kwset for grep. - Copyright 1992, 1998, 2000, 2007, 2009-2012 Free Software Foundation, Inc. + Copyright 1992, 1998, 2000, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,8 +21,12 @@ #include #include "search.h" -/* For -w, we also consider _ to be word constituent. */ -#define WCHAR(C) (isalnum (C) || (C) == '_') +/* Whether -w considers WC to be a word constituent. */ +static bool +wordchar (wint_t wc) +{ + return wc == L'_' || iswalnum (wc); +} /* KWset compiled pattern. For Ecompile and Gcompile, we compile a list of strings, at least one of which is known to occur in @@ -32,47 +36,69 @@ static kwset_t kwset; void Fcompile (char const *pattern, size_t size) { - char const *err; - size_t psize = size; + size_t total = size; mb_len_map_t *map = NULL; char const *pat = (match_icase && MB_CUR_MAX > 1 - ? mbtolower (pattern, &psize, &map) + ? mbtoupper (pattern, &total, &map) : pattern); kwsinit (&kwset); - char const *beg = pat; + char const *p = pat; do { - char const *lim; - char const *end; - for (lim = beg;; ++lim) + size_t len; + char const *sep = memchr (p, '\n', total); + if (sep) + { + len = sep - p; + sep++; + total -= (len + 1); + } + else + { + len = total; + total = 0; + } + + char *buf = NULL; + if (match_lines) { - end = lim; - if (lim >= pat + psize) - break; - if (*lim == '\n') - { - lim++; - break; - } -#if HAVE_DOS_FILE_CONTENTS - if (*lim == '\r' && lim + 1 < pat + psize && lim[1] == '\n') - { - lim += 2; - break; - } -#endif + buf = xmalloc (len + 2); + buf[0] = eolbyte; + memcpy (buf + 1, p, len); + buf[len + 1] = eolbyte; + p = buf; + len += 2; } + kwsincr (kwset, p, len); + free (buf); - if ((err = kwsincr (kwset, beg, end - beg)) != NULL) - error (EXIT_TROUBLE, 0, "%s", err); - beg = lim; + p = sep; } - while (beg < pat + psize); + while (p); - if ((err = kwsprep (kwset)) != NULL) - error (EXIT_TROUBLE, 0, "%s", err); + kwsprep (kwset); +} + +/* Apply the MAP (created by mbtoupper) to the uppercase-buffer-relative + *OFF and *LEN, converting them to be relative to the original buffer. */ + +static void +mb_case_map_apply (mb_len_map_t const *map, size_t *off, size_t *len) +{ + if (map) + { + size_t off_incr = 0; + size_t len_incr = 0; + size_t k; + for (k = 0; k < *off; k++) + off_incr += map[k]; + for (; k < *off + *len; k++) + len_incr += map[k]; + *off += off_incr; + *len += len_incr; + } } size_t @@ -90,7 +116,7 @@ Fexecute (char const *buf, size_t size, size_t *match_size, { if (match_icase) { - char *case_buf = mbtolower (buf, &size, &map); + char *case_buf = mbtoupper (buf, &size, &map); if (start_ptr) start_ptr = case_buf + (start_ptr - buf); buf = case_buf; @@ -99,42 +125,30 @@ Fexecute (char const *buf, size_t size, size_t *match_size, for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) { - size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); + size_t offset = kwsexec (kwset, beg - match_lines, + buf + size - beg + match_lines, &kwsmatch); if (offset == (size_t) -1) goto failure; - len = kwsmatch.size[0]; - if (MB_CUR_MAX > 1 - && is_mb_middle (&mb_start, beg + offset, buf + size, len)) + len = kwsmatch.size[0] - match_lines; + if (!match_lines && MB_CUR_MAX > 1 && !using_utf8 () + && mb_goback (&mb_start, beg + offset, buf + size) != 0) { /* The match was a part of multibyte character, advance at least one byte to ensure no infinite loop happens. */ - mbstate_t s; - memset (&s, 0, sizeof s); - size_t mb_len = mbrlen (mb_start, (buf + size) - (beg + offset), &s); - if (mb_len == (size_t) -2) - goto failure; beg = mb_start; - if (mb_len != (size_t) -1) - beg += mb_len - 1; continue; } beg += offset; if (start_ptr && !match_words) goto success_in_beg_and_len; if (match_lines) - { - if (beg > buf && beg[-1] != eol) - continue; - if (beg + len < buf + size && beg[len] != eol) - continue; - goto success; - } - else if (match_words) + goto success_in_beg_and_len; + if (match_words) for (try = beg; ; ) { - if (try > buf && WCHAR((unsigned char) try[-1])) + if (wordchar (mb_prev_wc (buf, try, buf + size))) break; - if (try + len < buf + size && WCHAR((unsigned char) try[len])) + if (wordchar (mb_next_wc (try + len, buf + size))) { if (!len) break; @@ -154,8 +168,7 @@ Fexecute (char const *buf, size_t size, size_t *match_size, } /* for (beg in buf) */ failure: - ret_val = -1; - goto out; + return -1; success: if ((end = memchr (beg + len, eol, (buf + size) - (beg + len))) != NULL) @@ -171,6 +184,5 @@ Fexecute (char const *buf, size_t size, size_t *match_size, *match_size = len; ret_val = off; - out: return ret_val; } diff --git a/contrib/grep/src/kwset.c b/contrib/grep/src/kwset.c index cd40953921..6d218938e7 100644 --- a/contrib/grep/src/kwset.c +++ b/contrib/grep/src/kwset.c @@ -1,5 +1,5 @@ /* kwset.c - search for any of a set of keywords. - Copyright (C) 1989, 1998, 2000, 2005, 2007, 2009-2012 Free Software + Copyright (C) 1989, 1998, 2000, 2005, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify @@ -23,17 +23,25 @@ /* The algorithm implemented by these routines bears a startling resemblance to one discovered by Beate Commentz-Walter, although it is not identical. - See "A String Matching Algorithm Fast on the Average," Technical Report, - IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900 - Heidelberg, Germany. See also Aho, A.V., and M. Corasick, "Efficient - String Matching: An Aid to Bibliographic Search," CACM June 1975, - Vol. 18, No. 6, which describes the failure function used below. */ + See: Commentz-Walter B. A string matching algorithm fast on the average. + Lecture Notes in Computer Science 71 (1979), 118-32 + . + See also: Aho AV, Corasick MJ. Efficient string matching: an aid to + bibliographic search. CACM 18, 6 (1975), 333-40 + , which describes the + failure function used below. */ #include + +#include "kwset.h" + +#include +#include #include #include "system.h" -#include "kwset.h" +#include "memchr2.h" #include "obstack.h" +#include "xalloc.h" #define link kwset_link @@ -47,7 +55,7 @@ #define obstack_chunk_alloc malloc #define obstack_chunk_free free -#define U(c) ((unsigned char) (c)) +#define U(c) (to_uchar (c)) /* Balanced tree of edges and labels leaving a given trie node. */ struct tree @@ -83,30 +91,45 @@ struct kwset unsigned char delta[NCHAR]; /* Delta table for rapid search. */ struct trie *next[NCHAR]; /* Table of children of the root. */ char *target; /* Target string if there's only one. */ - int mind2; /* Used in Boyer-Moore search for one string. */ + int *shift; /* Used in Boyer-Moore search for one string. */ char const *trans; /* Character translation table. */ + + /* If there's only one string, this is the string's last byte, + translated via TRANS if TRANS is nonnull. */ + char gc1; + + /* Likewise for the string's penultimate byte, if it has two or more + bytes. */ + char gc2; + + /* If there's only one string, this helps to match the string's last byte. + If GC1HELP is negative, only GC1 matches the string's last byte; + otherwise at least two bytes match, and B matches if TRANS[B] == GC1. + If GC1HELP is in the range 0..(NCHAR - 1), there are exactly two + such matches, and GC1HELP is the other match after conversion to + unsigned char. If GC1HELP is at least NCHAR, there are three or + more such matches; e.g., Greek has three sigma characters that + all match when case-folding. */ + int gc1help; }; +/* Use TRANS to transliterate C. A null TRANS does no transliteration. */ +static inline char +tr (char const *trans, char c) +{ + return trans ? trans[U(c)] : c; +} + /* Allocate and initialize a keyword set object, returning an opaque - pointer to it. Return NULL if memory is not available. */ + pointer to it. */ kwset_t kwsalloc (char const *trans) { - struct kwset *kwset; - - kwset = (struct kwset *) malloc(sizeof (struct kwset)); - if (!kwset) - return NULL; + struct kwset *kwset = xmalloc (sizeof *kwset); - obstack_init(&kwset->obstack); + obstack_init (&kwset->obstack); kwset->words = 0; - kwset->trie - = (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie)); - if (!kwset->trie) - { - kwsfree((kwset_t) kwset); - return NULL; - } + kwset->trie = obstack_alloc (&kwset->obstack, sizeof *kwset->trie); kwset->trie->accepting = 0; kwset->trie->links = NULL; kwset->trie->parent = NULL; @@ -119,44 +142,38 @@ kwsalloc (char const *trans) kwset->target = NULL; kwset->trans = trans; - return (kwset_t) kwset; + return kwset; } /* This upper bound is valid for CHAR_BIT >= 4 and exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */ #define DEPTH_SIZE (CHAR_BIT + CHAR_BIT/2) -/* Add the given string to the contents of the keyword set. Return NULL - for success, an error message otherwise. */ -const char * -kwsincr (kwset_t kws, char const *text, size_t len) +/* Add the given string to the contents of the keyword set. */ +void +kwsincr (kwset_t kwset, char const *text, size_t len) { - struct kwset *kwset; - struct trie *trie; - unsigned char label; - struct tree *link; - int depth; - struct tree *links[DEPTH_SIZE]; - enum { L, R } dirs[DEPTH_SIZE]; - struct tree *t, *r, *l, *rl, *lr; + struct trie *trie = kwset->trie; + char const *trans = kwset->trans; - kwset = (struct kwset *) kws; - trie = kwset->trie; text += len; /* Descend the trie (built of reversed keywords) character-by-character, installing new nodes when necessary. */ while (len--) { - label = kwset->trans ? kwset->trans[U(*--text)] : *--text; + unsigned char uc = *--text; + unsigned char label = trans ? trans[uc] : uc; /* Descend the tree of outgoing links for this trie node, looking for the current character and keeping track of the path followed. */ - link = trie->links; + struct tree *link = trie->links; + struct tree *links[DEPTH_SIZE]; + enum { L, R } dirs[DEPTH_SIZE]; links[0] = (struct tree *) &trie->links; dirs[0] = L; - depth = 1; + int depth = 1; while (link && label != link->label) { @@ -172,19 +189,10 @@ kwsincr (kwset_t kws, char const *text, size_t len) a link in the current trie node's tree. */ if (!link) { - link = (struct tree *) obstack_alloc(&kwset->obstack, - sizeof (struct tree)); - if (!link) - return _("memory exhausted"); + link = obstack_alloc (&kwset->obstack, sizeof *link); link->llink = NULL; link->rlink = NULL; - link->trie = (struct trie *) obstack_alloc(&kwset->obstack, - sizeof (struct trie)); - if (!link->trie) - { - obstack_free(&kwset->obstack, link); - return _("memory exhausted"); - } + link->trie = obstack_alloc (&kwset->obstack, sizeof *link->trie); link->trie->accepting = 0; link->trie->links = NULL; link->trie->parent = trie; @@ -215,6 +223,8 @@ kwsincr (kwset_t kws, char const *text, size_t len) if (depth && ((dirs[depth] == L && --links[depth]->balance) || (dirs[depth] == R && ++links[depth]->balance))) { + struct tree *t, *r, *l, *rl, *lr; + switch (links[depth]->balance) { case (char) -2: @@ -282,8 +292,6 @@ kwsincr (kwset_t kws, char const *text, size_t len) kwset->mind = trie->depth; if (trie->depth > kwset->maxd) kwset->maxd = trie->depth; - - return NULL; } /* Enqueue the trie nodes referenced from the given tree in the @@ -381,132 +389,210 @@ treenext (struct tree const *tree, struct trie *next[]) /* Compute the shift for each trie node, as well as the delta table and next cache for the given keyword set. */ -const char * -kwsprep (kwset_t kws) +void +kwsprep (kwset_t kwset) { - struct kwset *kwset; + char const *trans = kwset->trans; int i; - struct trie *curr; - char const *trans; - unsigned char delta[NCHAR]; - - kwset = (struct kwset *) kws; + unsigned char deltabuf[NCHAR]; + unsigned char *delta = trans ? deltabuf : kwset->delta; /* Initial values for the delta table; will be changed later. The delta entry for a given character is the smallest depth of any node at which an outgoing edge is labeled by that character. */ - memset(delta, kwset->mind < UCHAR_MAX ? kwset->mind : UCHAR_MAX, NCHAR); + memset (delta, MIN (kwset->mind, UCHAR_MAX), sizeof deltabuf); + + /* Traverse the nodes of the trie in level order, simultaneously + computing the delta table, failure function, and shift function. */ + struct trie *curr, *last; + for (curr = last = kwset->trie; curr; curr = curr->next) + { + /* Enqueue the immediate descendants in the level order queue. */ + enqueue (curr->links, &last); + + curr->shift = kwset->mind; + curr->maxshift = kwset->mind; + + /* Update the delta table for the descendants of this node. */ + treedelta (curr->links, curr->depth, delta); + + /* Compute the failure function for the descendants of this node. */ + treefails (curr->links, curr->fail, kwset->trie); + + /* Update the shifts at each node in the current node's chain + of fails back to the root. */ + struct trie *fail; + for (fail = curr->fail; fail; fail = fail->fail) + { + /* If the current node has some outgoing edge that the fail + doesn't, then the shift at the fail should be no larger + than the difference of their depths. */ + if (!hasevery (fail->links, curr->links)) + if (curr->depth - fail->depth < fail->shift) + fail->shift = curr->depth - fail->depth; + + /* If the current node is accepting then the shift at the + fail and its descendants should be no larger than the + difference of their depths. */ + if (curr->accepting && fail->maxshift > curr->depth - fail->depth) + fail->maxshift = curr->depth - fail->depth; + } + } + + /* Traverse the trie in level order again, fixing up all nodes whose + shift exceeds their inherited maxshift. */ + for (curr = kwset->trie->next; curr; curr = curr->next) + { + if (curr->maxshift > curr->parent->maxshift) + curr->maxshift = curr->parent->maxshift; + if (curr->shift > curr->maxshift) + curr->shift = curr->maxshift; + } + + /* Create a vector, indexed by character code, of the outgoing links + from the root node. */ + struct trie *nextbuf[NCHAR]; + struct trie **next = trans ? nextbuf : kwset->next; + memset (next, 0, sizeof nextbuf); + treenext (kwset->trie->links, next); + if (trans) + for (i = 0; i < NCHAR; ++i) + kwset->next[i] = next[U(trans[i])]; /* Check if we can use the simple boyer-moore algorithm, instead of the hairy commentz-walter algorithm. */ - if (kwset->words == 1 && kwset->trans == NULL) + if (kwset->words == 1) { - char c; - /* Looking for just one string. Extract it from the trie. */ - kwset->target = obstack_alloc(&kwset->obstack, kwset->mind); - if (!kwset->target) - return _("memory exhausted"); + kwset->target = obstack_alloc (&kwset->obstack, kwset->mind); for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i) { kwset->target[i] = curr->links->label; - curr = curr->links->trie; + curr = curr->next; } - /* Build the Boyer Moore delta. Boy that's easy compared to CW. */ - for (i = 0; i < kwset->mind; ++i) - delta[U(kwset->target[i])] = kwset->mind - (i + 1); - /* Find the minimal delta2 shift that we might make after - a backwards match has failed. */ - c = kwset->target[kwset->mind - 1]; - for (i = kwset->mind - 2; i >= 0; --i) - if (kwset->target[i] == c) - break; - kwset->mind2 = kwset->mind - (i + 1); - } - else - { - struct trie *fail; - struct trie *last, *next[NCHAR]; + /* Looking for the delta2 shift that we might make after a + backwards match has failed. Extract it from the trie. */ + if (kwset->mind > 1) + { + kwset->shift + = obstack_alloc (&kwset->obstack, + sizeof *kwset->shift * (kwset->mind - 1)); + for (i = 0, curr = kwset->trie->next; i < kwset->mind - 1; ++i) + { + kwset->shift[i] = curr->shift; + curr = curr->next; + } + } + + char gc1 = tr (trans, kwset->target[kwset->mind - 1]); - /* Traverse the nodes of the trie in level order, simultaneously - computing the delta table, failure function, and shift function. */ - for (curr = last = kwset->trie; curr; curr = curr->next) + /* Set GC1HELP according to whether exactly one, exactly two, or + three-or-more characters match GC1. */ + int gc1help = -1; + if (trans) { - /* Enqueue the immediate descendants in the level order queue. */ - enqueue(curr->links, &last); + char const *equiv1 = memchr (trans, gc1, NCHAR); + char const *equiv2 = memchr (equiv1 + 1, gc1, + trans + NCHAR - (equiv1 + 1)); + if (equiv2) + gc1help = (memchr (equiv2 + 1, gc1, trans + NCHAR - (equiv2 + 1)) + ? NCHAR + : U(gc1) ^ (equiv1 - trans) ^ (equiv2 - trans)); + } - curr->shift = kwset->mind; - curr->maxshift = kwset->mind; + kwset->gc1 = gc1; + kwset->gc1help = gc1help; + if (kwset->mind > 1) + kwset->gc2 = tr (trans, kwset->target[kwset->mind - 2]); + } - /* Update the delta table for the descendants of this node. */ - treedelta(curr->links, curr->depth, delta); + /* Fix things up for any translation table. */ + if (trans) + for (i = 0; i < NCHAR; ++i) + kwset->delta[i] = delta[U(trans[i])]; +} - /* Compute the failure function for the descendants of this node. */ - treefails(curr->links, curr->fail, kwset->trie); +/* Delta2 portion of a Boyer-Moore search. *TP is the string text + pointer; it is updated in place. EP is the end of the string text, + and SP the end of the pattern. LEN is the pattern length; it must + be at least 2. TRANS, if nonnull, is the input translation table. + GC1 and GC2 are the last and second-from last bytes of the pattern, + transliterated by TRANS; the caller precomputes them for + efficiency. If D1 is nonnull, it is a delta1 table for shifting *TP + when failing. KWSET->shift says how much to shift. */ +static inline bool +bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len, + char const *trans, char gc1, char gc2, + unsigned char const *d1, kwset_t kwset) +{ + char const *tp = *tpp; + int d = len, skip = 0; - /* Update the shifts at each node in the current node's chain - of fails back to the root. */ - for (fail = curr->fail; fail; fail = fail->fail) + while (true) + { + int i = 2; + if (tr (trans, tp[-2]) == gc2) + { + while (++i <= d) + if (tr (trans, tp[-i]) != tr (trans, sp[-i])) + break; + if (i > d) { - /* If the current node has some outgoing edge that the fail - doesn't, then the shift at the fail should be no larger - than the difference of their depths. */ - if (!hasevery(fail->links, curr->links)) - if (curr->depth - fail->depth < fail->shift) - fail->shift = curr->depth - fail->depth; - - /* If the current node is accepting then the shift at the - fail and its descendants should be no larger than the - difference of their depths. */ - if (curr->accepting && fail->maxshift > curr->depth - fail->depth) - fail->maxshift = curr->depth - fail->depth; + for (i = d + skip + 1; i <= len; ++i) + if (tr (trans, tp[-i]) != tr (trans, sp[-i])) + break; + if (i > len) + { + *tpp = tp - len; + return true; + } } } - /* Traverse the trie in level order again, fixing up all nodes whose - shift exceeds their inherited maxshift. */ - for (curr = kwset->trie->next; curr; curr = curr->next) + tp += d = kwset->shift[i - 2]; + if (tp > ep) + break; + if (tr (trans, tp[-1]) != gc1) { - if (curr->maxshift > curr->parent->maxshift) - curr->maxshift = curr->parent->maxshift; - if (curr->shift > curr->maxshift) - curr->shift = curr->maxshift; + if (d1) + tp += d1[U(tp[-1])]; + break; } - - /* Create a vector, indexed by character code, of the outgoing links - from the root node. */ - for (i = 0; i < NCHAR; ++i) - next[i] = NULL; - treenext(kwset->trie->links, next); - - if ((trans = kwset->trans) != NULL) - for (i = 0; i < NCHAR; ++i) - kwset->next[i] = next[U(trans[i])]; - else - memcpy(kwset->next, next, NCHAR * sizeof(struct trie *)); + skip = i - 1; } - /* Fix things up for any translation table. */ - if ((trans = kwset->trans) != NULL) - for (i = 0; i < NCHAR; ++i) - kwset->delta[i] = delta[U(trans[i])]; - else - memcpy(kwset->delta, delta, NCHAR); + *tpp = tp; + return false; +} - return NULL; +/* Return the address of the first byte in the buffer S (of size N) + that matches the last byte specified by KWSET, a singleton. */ +static char const * +memchr_kwset (char const *s, size_t n, kwset_t kwset) +{ + if (kwset->gc1help < 0) + return memchr (s, kwset->gc1, n); + int small_heuristic = 2; + int small = (- (uintptr_t) s % sizeof (long) + + small_heuristic * sizeof (long)); + size_t ntrans = kwset->gc1help < NCHAR && small < n ? small : n; + char const *slim = s + ntrans; + for (; s < slim; s++) + if (kwset->trans[U(*s)] == kwset->gc1) + return s; + n -= ntrans; + return n == 0 ? NULL : memchr2 (s, kwset->gc1, kwset->gc1help, n); } -/* Fast boyer-moore search. */ -static size_t _GL_ATTRIBUTE_PURE -bmexec (kwset_t kws, char const *text, size_t size) +/* Fast Boyer-Moore search (inlinable version). */ +static inline size_t _GL_ATTRIBUTE_PURE +bmexec_trans (kwset_t kwset, char const *text, size_t size) { - struct kwset const *kwset; unsigned char const *d1; char const *ep, *sp, *tp; - int d, gc, i, len, md2; - - kwset = (struct kwset const *) kws; - len = kwset->mind; + int d; + int len = kwset->mind; + char const *trans = kwset->trans; if (len == 0) return 0; @@ -514,50 +600,55 @@ bmexec (kwset_t kws, char const *text, size_t size) return -1; if (len == 1) { - tp = memchr (text, kwset->target[0], size); + tp = memchr_kwset (text, size, kwset); return tp ? tp - text : -1; } d1 = kwset->delta; sp = kwset->target + len; - gc = U(sp[-2]); - md2 = kwset->mind2; tp = text + len; + char gc1 = kwset->gc1; + char gc2 = kwset->gc2; /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */ if (size > 12 * len) /* 11 is not a bug, the initial offset happens only once. */ - for (ep = text + size - 11 * len;;) + for (ep = text + size - 11 * len; tp <= ep; ) { - while (tp <= ep) + char const *tp0 = tp; + d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])], tp += d; + if (d != 0) { d = d1[U(tp[-1])], tp += d; d = d1[U(tp[-1])], tp += d; - if (d == 0) - goto found; - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - if (d == 0) - goto found; - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - if (d == 0) - goto found; - d = d1[U(tp[-1])], tp += d; d = d1[U(tp[-1])], tp += d; + if (d != 0) + { + d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])], tp += d; + if (d != 0) + { + d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])], tp += d; + + /* As a heuristic, prefer memchr to seeking by + delta1 when the latter doesn't advance much. */ + int advance_heuristic = 16 * sizeof (long); + if (advance_heuristic <= tp - tp0) + goto big_advance; + tp--; + tp = memchr_kwset (tp, text + size - tp, kwset); + if (! tp) + return -1; + tp++; + } + } } - break; - found: - if (U(tp[-2]) == gc) - { - for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i) - ; - if (i > len) - return tp - len - text; - } - tp += md2; + if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset)) + return tp - text; + big_advance:; } /* Now we have only a few characters left to search. We @@ -569,24 +660,28 @@ bmexec (kwset_t kws, char const *text, size_t size) d = d1[U((tp += d)[-1])]; if (d != 0) continue; - if (U(tp[-2]) == gc) - { - for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i) - ; - if (i > len) - return tp - len - text; - } - d = md2; + if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset)) + return tp - text; } return -1; } +/* Fast Boyer-Moore search. */ +static size_t +bmexec (kwset_t kwset, char const *text, size_t size) +{ + /* Help the compiler inline bmexec_trans in two ways, depending on + whether kwset->trans is null. */ + return (kwset->trans + ? bmexec_trans (kwset, text, size) + : bmexec_trans (kwset, text, size)); +} + /* Hairy multiple string search. */ static size_t _GL_ARG_NONNULL ((4)) -cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch) +cwexec (kwset_t kwset, char const *text, size_t len, struct kwsmatch *kwsmatch) { - struct kwset const *kwset; struct trie * const *next; struct trie const *trie; struct trie const *accept; @@ -603,7 +698,6 @@ cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch) #endif /* Initialize register copies and look for easy ways out. */ - kwset = (struct kwset *) kws; if (len < kwset->mind) return -1; next = kwset->next; @@ -651,7 +745,8 @@ cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch) d = trie->shift; while (beg > text) { - c = trans ? trans[U(*--beg)] : *--beg; + unsigned char uc = *--beg; + c = trans ? trans[uc] : uc; tree = trie->links; while (tree && c != tree->label) if (c < tree->label) @@ -702,7 +797,8 @@ cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch) d = trie->shift; while (beg > text) { - c = trans ? trans[U(*--beg)] : *--beg; + unsigned char uc = *--beg; + c = trans ? trans[uc] : uc; tree = trie->links; while (tree && c != tree->label) if (c < tree->label) @@ -738,18 +834,18 @@ cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch) return mch - text; } -/* Search TEXT for a match of any member of the keyword set, KWS. +/* Search TEXT for a match of any member of KWSET. Return the offset (into TEXT) of the first byte of the matching substring, or (size_t) -1 if no match is found. Upon a match, store details in *KWSMATCH: index of matched keyword, start offset (same as the return value), and length. */ size_t -kwsexec (kwset_t kws, char const *text, size_t size, struct kwsmatch *kwsmatch) +kwsexec (kwset_t kwset, char const *text, size_t size, + struct kwsmatch *kwsmatch) { - struct kwset const *kwset = (struct kwset *) kws; - if (kwset->words == 1 && kwset->trans == NULL) + if (kwset->words == 1) { - size_t ret = bmexec (kws, text, size); + size_t ret = bmexec (kwset, text, size); if (ret != (size_t) -1) { kwsmatch->index = 0; @@ -759,16 +855,13 @@ kwsexec (kwset_t kws, char const *text, size_t size, struct kwsmatch *kwsmatch) return ret; } else - return cwexec(kws, text, size, kwsmatch); + return cwexec (kwset, text, size, kwsmatch); } /* Free the components of the given keyword set. */ void -kwsfree (kwset_t kws) +kwsfree (kwset_t kwset) { - struct kwset *kwset; - - kwset = (struct kwset *) kws; - obstack_free(&kwset->obstack, NULL); - free(kws); + obstack_free (&kwset->obstack, NULL); + free (kwset); } diff --git a/contrib/grep/src/kwset.h b/contrib/grep/src/kwset.h index 01775e1b94..12afb8ee60 100644 --- a/contrib/grep/src/kwset.h +++ b/contrib/grep/src/kwset.h @@ -1,5 +1,5 @@ /* kwset.h - header declaring the keyword set library. - Copyright (C) 1989, 1998, 2005, 2007, 2009-2012 Free Software Foundation, + Copyright (C) 1989, 1998, 2005, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify @@ -21,6 +21,8 @@ The author may be reached (Email) at the address mike@ai.mit.edu, or (US mail) as Mike Haertel c/o Free Software Foundation. */ +#include + struct kwsmatch { size_t index; /* Index number of matching keyword. */ @@ -33,20 +35,17 @@ struct kwsmatch struct kwset; typedef struct kwset *kwset_t; -/* Return an opaque pointer to a newly allocated keyword set, or NULL - if enough memory cannot be obtained. The argument if non-NULL +/* Return an opaque pointer to a newly allocated keyword set. A nonnull arg specifies a table of character translations to be applied to all - pattern and search text. */ + pattern and search text. */ extern kwset_t kwsalloc (char const *); /* Incrementally extend the keyword set to include the given string. - Return NULL for success, or an error message. Remember an index - number for each keyword included in the set. */ -extern const char *kwsincr (kwset_t, char const *, size_t); + Remember an index number for each keyword included in the set. */ +extern void kwsincr (kwset_t, char const *, size_t); -/* When the keyword set has been completely built, prepare it for - use. Return NULL for success, or an error message. */ -extern const char *kwsprep (kwset_t); +/* When the keyword set has been completely built, prepare it for use. */ +extern void kwsprep (kwset_t); /* Search through the given buffer for a member of the keyword set. Return a pointer to the leftmost longest match found, or NULL if diff --git a/contrib/grep/src/main.c b/contrib/grep/src/main.c deleted file mode 100644 index 3ff53bacac..0000000000 --- a/contrib/grep/src/main.c +++ /dev/null @@ -1,2291 +0,0 @@ -/* grep.c - main driver file for grep. - Copyright (C) 1992, 1997-2002, 2004-2012 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA - 02110-1301, USA. */ - -/* Written July 1992 by Mike Haertel. */ - -#include -#include -#include -#include "mbsupport.h" -#include -#include -#include -#include -#include -#include "system.h" - -#include "argmatch.h" -#include "c-ctype.h" -#include "closeout.h" -#include "colorize.h" -#include "error.h" -#include "exclude.h" -#include "exitfail.h" -#include "fcntl-safer.h" -#include "fts_.h" -#include "getopt.h" -#include "grep.h" -#include "intprops.h" -#include "progname.h" -#include "propername.h" -#include "quote.h" -#include "version-etc.h" -#include "xalloc.h" -#include "xstrtol.h" - -#define SEP_CHAR_SELECTED ':' -#define SEP_CHAR_REJECTED '-' -#define SEP_STR_GROUP "--" - -#define STREQ(a, b) (strcmp (a, b) == 0) - -#define AUTHORS \ - proper_name ("Mike Haertel"), \ - _("others, see\n") - -/* When stdout is connected to a regular file, save its stat - information here, so that we can automatically skip it, thus - avoiding a potential (racy) infinite loop. */ -static struct stat out_stat; - -/* if non-zero, display usage information and exit */ -static int show_help; - -/* If non-zero, print the version on standard output and exit. */ -static int show_version; - -/* If nonzero, suppress diagnostics for nonexistent or unreadable files. */ -static int suppress_errors; - -/* If nonzero, use color markers. */ -static int color_option; - -/* If nonzero, show only the part of a line matching the expression. */ -static int only_matching; - -/* If nonzero, make sure first content char in a line is on a tab stop. */ -static int align_tabs; - -/* The group separator used when context is requested. */ -static const char *group_separator = SEP_STR_GROUP; - -/* The context and logic for choosing default --color screen attributes - (foreground and background colors, etc.) are the following. - -- There are eight basic colors available, each with its own - nominal luminosity to the human eye and foreground/background - codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41], - magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46], - yellow [89 %, 33/43], and white [100 %, 37/47]). - -- Sometimes, white as a background is actually implemented using - a shade of light gray, so that a foreground white can be visible - on top of it (but most often not). - -- Sometimes, black as a foreground is actually implemented using - a shade of dark gray, so that it can be visible on top of a - background black (but most often not). - -- Sometimes, more colors are available, as extensions. - -- Other attributes can be selected/deselected (bold [1/22], - underline [4/24], standout/inverse [7/27], blink [5/25], and - invisible/hidden [8/28]). They are sometimes implemented by - using colors instead of what their names imply; e.g., bold is - often achieved by using brighter colors. In practice, only bold - is really available to us, underline sometimes being mapped by - the terminal to some strange color choice, and standout best - being left for use by downstream programs such as less(1). - -- We cannot assume that any of the extensions or special features - are available for the purpose of choosing defaults for everyone. - -- The most prevalent default terminal backgrounds are pure black - and pure white, and are not necessarily the same shades of - those as if they were selected explicitly with SGR sequences. - Some terminals use dark or light pictures as default background, - but those are covered over by an explicit selection of background - color with an SGR sequence; their users will appreciate their - background pictures not be covered like this, if possible. - -- Some uses of colors attributes is to make some output items - more understated (e.g., context lines); this cannot be achieved - by changing the background color. - -- For these reasons, the grep color defaults should strive not - to change the background color from its default, unless it's - for a short item that should be highlighted, not understated. - -- The grep foreground color defaults (without an explicitly set - background) should provide enough contrast to be readable on any - terminal with either a black (dark) or white (light) background. - This only leaves red, magenta, green, and cyan (and their bold - counterparts) and possibly bold blue. */ -/* The color strings used for matched text. - The user can overwrite them using the deprecated - environment variable GREP_COLOR or the new GREP_COLORS. */ -static const char *selected_match_color = "01;31"; /* bold red */ -static const char *context_match_color = "01;31"; /* bold red */ - -/* Other colors. Defaults look damn good. */ -static const char *filename_color = "35"; /* magenta */ -static const char *line_num_color = "32"; /* green */ -static const char *byte_num_color = "32"; /* green */ -static const char *sep_color = "36"; /* cyan */ -static const char *selected_line_color = ""; /* default color pair */ -static const char *context_line_color = ""; /* default color pair */ - -/* Select Graphic Rendition (SGR, "\33[...m") strings. */ -/* Also Erase in Line (EL) to Right ("\33[K") by default. */ -/* Why have EL to Right after SGR? - -- The behavior of line-wrapping when at the bottom of the - terminal screen and at the end of the current line is often - such that a new line is introduced, entirely cleared with - the current background color which may be different from the - default one (see the boolean back_color_erase terminfo(5) - capability), thus scrolling the display by one line. - The end of this new line will stay in this background color - even after reverting to the default background color with - "\33[m', unless it is explicitly cleared again with "\33[K" - (which is the behavior the user would instinctively expect - from the whole thing). There may be some unavoidable - background-color flicker at the end of this new line because - of this (when timing with the monitor's redraw is just right). - -- The behavior of HT (tab, "\t") is usually the same as that of - Cursor Forward Tabulation (CHT) with a default parameter - of 1 ("\33[I"), i.e., it performs pure movement to the next - tab stop, without any clearing of either content or screen - attributes (including background color); try - printf 'asdfqwerzxcv\rASDF\tZXCV\n' - in a bash(1) shell to demonstrate this. This is not what the - user would instinctively expect of HT (but is ok for CHT). - The instinctive behavior would include clearing the terminal - cells that are skipped over by HT with blank cells in the - current screen attributes, including background color; - the boolean dest_tabs_magic_smso terminfo(5) capability - indicates this saner behavior for HT, but only some rare - terminals have it (although it also indicates a special - glitch with standout mode in the Teleray terminal for which - it was initially introduced). The remedy is to add "\33K" - after each SGR sequence, be it START (to fix the behavior - of any HT after that before another SGR) or END (to fix the - behavior of an HT in default background color that would - follow a line-wrapping at the bottom of the screen in another - background color, and to complement doing it after START). - Piping grep's output through a pager such as less(1) avoids - any HT problems since the pager performs tab expansion. - - Generic disadvantages of this remedy are: - -- Some very rare terminals might support SGR but not EL (nobody - will use "grep --color" on a terminal that does not support - SGR in the first place). - -- Having these extra control sequences might somewhat complicate - the task of any program trying to parse "grep --color" - output in order to extract structuring information from it. - A specific disadvantage to doing it after SGR START is: - -- Even more possible background color flicker (when timing - with the monitor's redraw is just right), even when not at the - bottom of the screen. - There are no additional disadvantages specific to doing it after - SGR END. - - It would be impractical for GNU grep to become a full-fledged - terminal program linked against ncurses or the like, so it will - not detect terminfo(5) capabilities. */ -static const char *sgr_start = "\33[%sm\33[K"; -static const char *sgr_end = "\33[m\33[K"; - -/* SGR utility functions. */ -static void -pr_sgr_start (char const *s) -{ - if (*s) - print_start_colorize (sgr_start, s); -} -static void -pr_sgr_end (char const *s) -{ - if (*s) - print_end_colorize (sgr_end); -} -static void -pr_sgr_start_if (char const *s) -{ - if (color_option) - pr_sgr_start (s); -} -static void -pr_sgr_end_if (char const *s) -{ - if (color_option) - pr_sgr_end (s); -} - -struct color_cap - { - const char *name; - const char **var; - void (*fct) (void); - }; - -static void -color_cap_mt_fct (void) -{ - /* Our caller just set selected_match_color. */ - context_match_color = selected_match_color; -} - -static void -color_cap_rv_fct (void) -{ - /* By this point, it was 1 (or already -1). */ - color_option = -1; /* That's still != 0. */ -} - -static void -color_cap_ne_fct (void) -{ - sgr_start = "\33[%sm"; - sgr_end = "\33[m"; -} - -/* For GREP_COLORS. */ -static const struct color_cap color_dict[] = - { - { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */ - { "ms", &selected_match_color, NULL }, /* selected matched text */ - { "mc", &context_match_color, NULL }, /* context matched text */ - { "fn", &filename_color, NULL }, /* filename */ - { "ln", &line_num_color, NULL }, /* line number */ - { "bn", &byte_num_color, NULL }, /* byte (sic) offset */ - { "se", &sep_color, NULL }, /* separator */ - { "sl", &selected_line_color, NULL }, /* selected lines */ - { "cx", &context_line_color, NULL }, /* context lines */ - { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */ - { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */ - { NULL, NULL, NULL } - }; - -static struct exclude *excluded_patterns; -static struct exclude *excluded_directory_patterns; -/* Short options. */ -static char const short_options[] = -"0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz"; - -/* Non-boolean long options that have no corresponding short equivalents. */ -enum -{ - BINARY_FILES_OPTION = CHAR_MAX + 1, - COLOR_OPTION, - INCLUDE_OPTION, - EXCLUDE_OPTION, - EXCLUDE_FROM_OPTION, - LINE_BUFFERED_OPTION, - LABEL_OPTION, - EXCLUDE_DIRECTORY_OPTION, - GROUP_SEPARATOR_OPTION, - MMAP_OPTION -}; - -/* Long options equivalences. */ -static struct option const long_options[] = -{ - {"basic-regexp", no_argument, NULL, 'G'}, - {"extended-regexp", no_argument, NULL, 'E'}, - {"fixed-regexp", no_argument, NULL, 'F'}, - {"fixed-strings", no_argument, NULL, 'F'}, - {"perl-regexp", no_argument, NULL, 'P'}, - {"after-context", required_argument, NULL, 'A'}, - {"before-context", required_argument, NULL, 'B'}, - {"binary-files", required_argument, NULL, BINARY_FILES_OPTION}, - {"byte-offset", no_argument, NULL, 'b'}, - {"context", required_argument, NULL, 'C'}, - {"color", optional_argument, NULL, COLOR_OPTION}, - {"colour", optional_argument, NULL, COLOR_OPTION}, - {"count", no_argument, NULL, 'c'}, - {"devices", required_argument, NULL, 'D'}, - {"directories", required_argument, NULL, 'd'}, - {"exclude", required_argument, NULL, EXCLUDE_OPTION}, - {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION}, - {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION}, - {"file", required_argument, NULL, 'f'}, - {"files-with-matches", no_argument, NULL, 'l'}, - {"files-without-match", no_argument, NULL, 'L'}, - {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION}, - {"help", no_argument, &show_help, 1}, - {"include", required_argument, NULL, INCLUDE_OPTION}, - {"ignore-case", no_argument, NULL, 'i'}, - {"initial-tab", no_argument, NULL, 'T'}, - {"label", required_argument, NULL, LABEL_OPTION}, - {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION}, - {"line-number", no_argument, NULL, 'n'}, - {"line-regexp", no_argument, NULL, 'x'}, - {"max-count", required_argument, NULL, 'm'}, - - /* FIXME: disabled in Mar 2010; warn towards end of 2011; remove in 2013. */ - {"mmap", no_argument, NULL, MMAP_OPTION}, - {"no-filename", no_argument, NULL, 'h'}, - {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION}, - {"no-messages", no_argument, NULL, 's'}, - {"null", no_argument, NULL, 'Z'}, - {"null-data", no_argument, NULL, 'z'}, - {"only-matching", no_argument, NULL, 'o'}, - {"quiet", no_argument, NULL, 'q'}, - {"recursive", no_argument, NULL, 'r'}, - {"dereference-recursive", no_argument, NULL, 'R'}, - {"regexp", required_argument, NULL, 'e'}, - {"invert-match", no_argument, NULL, 'v'}, - {"silent", no_argument, NULL, 'q'}, - {"text", no_argument, NULL, 'a'}, - {"binary", no_argument, NULL, 'U'}, - {"unix-byte-offsets", no_argument, NULL, 'u'}, - {"version", no_argument, NULL, 'V'}, - {"with-filename", no_argument, NULL, 'H'}, - {"word-regexp", no_argument, NULL, 'w'}, - {0, 0, 0, 0} -}; - -/* Define flags declared in grep.h. */ -int match_icase; -int match_words; -int match_lines; -unsigned char eolbyte; - -/* For error messages. */ -/* The input file name, or (if standard input) "-" or a --label argument. */ -static char const *filename; -static size_t filename_prefix_len; -static int errseen; -static int write_error_seen; - -enum directories_type - { - READ_DIRECTORIES = 2, - RECURSE_DIRECTORIES, - SKIP_DIRECTORIES - }; - -/* How to handle directories. */ -static char const *const directories_args[] = -{ - "read", "recurse", "skip", NULL -}; -static enum directories_type const directories_types[] = -{ - READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES -}; -ARGMATCH_VERIFY (directories_args, directories_types); - -static enum directories_type directories = READ_DIRECTORIES; - -enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK }; -static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL; - -/* How to handle devices. */ -static enum - { - READ_COMMAND_LINE_DEVICES, - READ_DEVICES, - SKIP_DEVICES - } devices = READ_COMMAND_LINE_DEVICES; - -static int grepfile (int, char const *, int, int); -static int grepdesc (int, int); -#if defined HAVE_DOS_FILE_CONTENTS -static inline int undossify_input (char *, size_t); -#endif - -static int -is_device_mode (mode_t m) -{ - return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m); -} - -/* Return nonzero if ST->st_size is defined. Assume the file is not a - symbolic link. */ -static int -usable_st_size (struct stat const *st) -{ - return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st); -} - -/* Functions we'll use to search. */ -static compile_fp_t compile; -static execute_fp_t execute; - -/* Like error, but suppress the diagnostic if requested. */ -static void -suppressible_error (char const *mesg, int errnum) -{ - if (! suppress_errors) - error (0, errnum, "%s", mesg); - errseen = 1; -} - -/* If there has already been a write error, don't bother closing - standard output, as that might elicit a duplicate diagnostic. */ -static void -clean_up_stdout (void) -{ - if (! write_error_seen) - close_stdout (); -} - -/* Return 1 if a file is known to be binary for the purpose of 'grep'. - BUF, of size BUFSIZE, is the initial buffer read from the file with - descriptor FD and status ST. */ -static int -file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st) -{ - #ifndef SEEK_HOLE - enum { SEEK_HOLE = SEEK_END }; - #endif - - /* If -z, test only whether the initial buffer contains '\200'; - knowing about holes won't help. */ - if (! eolbyte) - return memchr (buf, '\200', bufsize) != 0; - - /* If the initial buffer contains a null byte, guess that the file - is binary. */ - if (memchr (buf, '\0', bufsize)) - return 1; - - /* If the file has holes, it must contain a null byte somewhere. */ - if (SEEK_HOLE != SEEK_END && usable_st_size (st)) - { - off_t cur = bufsize; - if (O_BINARY || fd == STDIN_FILENO) - { - cur = lseek (fd, 0, SEEK_CUR); - if (cur < 0) - return 0; - } - - /* Look for a hole after the current location. */ - off_t hole_start = lseek (fd, cur, SEEK_HOLE); - if (0 <= hole_start) - { - if (lseek (fd, cur, SEEK_SET) < 0) - suppressible_error (filename, errno); - if (hole_start < st->st_size) - return 1; - } - } - - /* Guess that the file does not contain binary data. */ - return 0; -} - -/* Convert STR to a nonnegative integer, storing the result in *OUT. - STR must be a valid context length argument; report an error if it - isn't. Silently ceiling *OUT at the maximum value, as that is - practically equivalent to infinity for grep's purposes. */ -static void -context_length_arg (char const *str, intmax_t *out) -{ - switch (xstrtoimax (str, 0, 10, out, "")) - { - case LONGINT_OK: - case LONGINT_OVERFLOW: - if (0 <= *out) - break; - /* Fall through. */ - default: - error (EXIT_TROUBLE, 0, "%s: %s", str, - _("invalid context length argument")); - } -} - -/* Return nonzero if the file with NAME should be skipped. - If COMMAND_LINE is nonzero, it is a command-line argument. - If IS_DIR is nonzero, it is a directory. */ -static int -skipped_file (char const *name, int command_line, int is_dir) -{ - return (is_dir - ? (directories == SKIP_DIRECTORIES - || (! (command_line && filename_prefix_len != 0) - && excluded_directory_patterns - && excluded_file_name (excluded_directory_patterns, name))) - : (excluded_patterns - && excluded_file_name (excluded_patterns, name))); -} - -/* Hairy buffering mechanism for grep. The intent is to keep - all reads aligned on a page boundary and multiples of the - page size, unless a read yields a partial page. */ - -static char *buffer; /* Base of buffer. */ -static size_t bufalloc; /* Allocated buffer size, counting slop. */ -#define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */ -static int bufdesc; /* File descriptor. */ -static char *bufbeg; /* Beginning of user-visible stuff. */ -static char *buflim; /* Limit of user-visible stuff. */ -static size_t pagesize; /* alignment of memory pages */ -static off_t bufoffset; /* Read offset; defined on regular files. */ -static off_t after_last_match; /* Pointer after last matching line that - would have been output if we were - outputting characters. */ - -/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be - an integer or a pointer. Both args must be free of side effects. */ -#define ALIGN_TO(val, alignment) \ - ((size_t) (val) % (alignment) == 0 \ - ? (val) \ - : (val) + ((alignment) - (size_t) (val) % (alignment))) - -/* Reset the buffer for a new file, returning zero if we should skip it. - Initialize on the first time through. */ -static int -reset (int fd, struct stat const *st) -{ - if (! pagesize) - { - pagesize = getpagesize (); - if (pagesize == 0 || 2 * pagesize + 1 <= pagesize) - abort (); - bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + 1; - buffer = xmalloc (bufalloc); - } - - bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize); - bufbeg[-1] = eolbyte; - bufdesc = fd; - - if (S_ISREG (st->st_mode)) - { - if (fd != STDIN_FILENO) - bufoffset = 0; - else - { - bufoffset = lseek (fd, 0, SEEK_CUR); - if (bufoffset < 0) - { - suppressible_error (_("lseek failed"), errno); - return 0; - } - } - } - return 1; -} - -/* Read new stuff into the buffer, saving the specified - amount of old stuff. When we're done, 'bufbeg' points - to the beginning of the buffer contents, and 'buflim' - points just after the end. Return zero if there's an error. */ -static int -fillbuf (size_t save, struct stat const *st) -{ - ssize_t fillsize; - int cc = 1; - char *readbuf; - size_t readsize; - - /* Offset from start of buffer to start of old stuff - that we want to save. */ - size_t saved_offset = buflim - save - buffer; - - if (pagesize <= buffer + bufalloc - buflim) - { - readbuf = buflim; - bufbeg = buflim - save; - } - else - { - size_t minsize = save + pagesize; - size_t newsize; - size_t newalloc; - char *newbuf; - - /* Grow newsize until it is at least as great as minsize. */ - for (newsize = bufalloc - pagesize - 1; newsize < minsize; newsize *= 2) - if (newsize * 2 < newsize || newsize * 2 + pagesize + 1 < newsize * 2) - xalloc_die (); - - /* Try not to allocate more memory than the file size indicates, - as that might cause unnecessary memory exhaustion if the file - is large. However, do not use the original file size as a - heuristic if we've already read past the file end, as most - likely the file is growing. */ - if (usable_st_size (st)) - { - off_t to_be_read = st->st_size - bufoffset; - off_t maxsize_off = save + to_be_read; - if (0 <= to_be_read && to_be_read <= maxsize_off - && maxsize_off == (size_t) maxsize_off - && minsize <= (size_t) maxsize_off - && (size_t) maxsize_off < newsize) - newsize = maxsize_off; - } - - /* Add enough room so that the buffer is aligned and has room - for byte sentinels fore and aft. */ - newalloc = newsize + pagesize + 1; - - newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer; - readbuf = ALIGN_TO (newbuf + 1 + save, pagesize); - bufbeg = readbuf - save; - memmove (bufbeg, buffer + saved_offset, save); - bufbeg[-1] = eolbyte; - if (newbuf != buffer) - { - free (buffer); - buffer = newbuf; - } - } - - readsize = buffer + bufalloc - readbuf; - readsize -= readsize % pagesize; - - fillsize = read (bufdesc, readbuf, readsize); - if (fillsize < 0) - fillsize = cc = 0; - bufoffset += fillsize; -#if defined HAVE_DOS_FILE_CONTENTS - if (fillsize) - fillsize = undossify_input (readbuf, fillsize); -#endif - buflim = readbuf + fillsize; - return cc; -} - -/* Flags controlling the style of output. */ -static enum -{ - BINARY_BINARY_FILES, - TEXT_BINARY_FILES, - WITHOUT_MATCH_BINARY_FILES -} binary_files; /* How to handle binary files. */ - -static int filename_mask; /* If zero, output nulls after filenames. */ -static int out_quiet; /* Suppress all normal output. */ -static int out_invert; /* Print nonmatching stuff. */ -static int out_file; /* Print filenames. */ -static int out_line; /* Print line numbers. */ -static int out_byte; /* Print byte offsets. */ -static intmax_t out_before; /* Lines of leading context. */ -static intmax_t out_after; /* Lines of trailing context. */ -static int count_matches; /* Count matching lines. */ -static int list_files; /* List matching files. */ -static int no_filenames; /* Suppress file names. */ -static intmax_t max_count; /* Stop after outputting this many - lines from an input file. */ -static int line_buffered; /* If nonzero, use line buffering, i.e. - fflush everyline out. */ -static char *label = NULL; /* Fake filename for stdin */ - - -/* Internal variables to keep track of byte count, context, etc. */ -static uintmax_t totalcc; /* Total character count before bufbeg. */ -static char const *lastnl; /* Pointer after last newline counted. */ -static char const *lastout; /* Pointer after last character output; - NULL if no character has been output - or if it's conceptually before bufbeg. */ -static uintmax_t totalnl; /* Total newline count before lastnl. */ -static intmax_t outleft; /* Maximum number of lines to be output. */ -static intmax_t pending; /* Pending lines of output. - Always kept 0 if out_quiet is true. */ -static int done_on_match; /* Stop scanning file on first match. */ -static int exit_on_match; /* Exit on first match. */ - -#if defined HAVE_DOS_FILE_CONTENTS -# include "dosbuf.c" -#endif - -/* Add two numbers that count input bytes or lines, and report an - error if the addition overflows. */ -static uintmax_t -add_count (uintmax_t a, uintmax_t b) -{ - uintmax_t sum = a + b; - if (sum < a) - error (EXIT_TROUBLE, 0, _("input is too large to count")); - return sum; -} - -static void -nlscan (char const *lim) -{ - size_t newlines = 0; - char const *beg; - for (beg = lastnl; beg < lim; beg++) - { - beg = memchr (beg, eolbyte, lim - beg); - if (!beg) - break; - newlines++; - } - totalnl = add_count (totalnl, newlines); - lastnl = lim; -} - -/* Print the current filename. */ -static void -print_filename (void) -{ - pr_sgr_start_if (filename_color); - fputs (filename, stdout); - pr_sgr_end_if (filename_color); -} - -/* Print a character separator. */ -static void -print_sep (char sep) -{ - pr_sgr_start_if (sep_color); - fputc (sep, stdout); - pr_sgr_end_if (sep_color); -} - -/* Print a line number or a byte offset. */ -static void -print_offset (uintmax_t pos, int min_width, const char *color) -{ - /* Do not rely on printf to print pos, since uintmax_t may be longer - than long, and long long is not portable. */ - - char buf[sizeof pos * CHAR_BIT]; - char *p = buf + sizeof buf; - - do - { - *--p = '0' + pos % 10; - --min_width; - } - while ((pos /= 10) != 0); - - /* Do this to maximize the probability of alignment across lines. */ - if (align_tabs) - while (--min_width >= 0) - *--p = ' '; - - pr_sgr_start_if (color); - fwrite (p, 1, buf + sizeof buf - p, stdout); - pr_sgr_end_if (color); -} - -/* Print a whole line head (filename, line, byte). */ -static void -print_line_head (char const *beg, char const *lim, int sep) -{ - int pending_sep = 0; - - if (out_file) - { - print_filename (); - if (filename_mask) - pending_sep = 1; - else - fputc (0, stdout); - } - - if (out_line) - { - if (lastnl < lim) - { - nlscan (beg); - totalnl = add_count (totalnl, 1); - lastnl = lim; - } - if (pending_sep) - print_sep (sep); - print_offset (totalnl, 4, line_num_color); - pending_sep = 1; - } - - if (out_byte) - { - uintmax_t pos = add_count (totalcc, beg - bufbeg); -#if defined HAVE_DOS_FILE_CONTENTS - pos = dossified_pos (pos); -#endif - if (pending_sep) - print_sep (sep); - print_offset (pos, 6, byte_num_color); - pending_sep = 1; - } - - if (pending_sep) - { - /* This assumes sep is one column wide. - Try doing this any other way with Unicode - (and its combining and wide characters) - filenames and you're wasting your efforts. */ - if (align_tabs) - fputs ("\t\b", stdout); - - print_sep (sep); - } -} - -static const char * -print_line_middle (const char *beg, const char *lim, - const char *line_color, const char *match_color) -{ - size_t match_size; - size_t match_offset; - const char *cur = beg; - const char *mid = NULL; - - while (cur < lim - && ((match_offset = execute (beg, lim - beg, &match_size, - beg + (cur - beg))) != (size_t) -1)) - { - char const *b = beg + match_offset; - - /* Avoid matching the empty line at the end of the buffer. */ - if (b == lim) - break; - - /* Avoid hanging on grep --color "" foo */ - if (match_size == 0) - { - /* Make minimal progress; there may be further non-empty matches. */ - /* XXX - Could really advance by one whole multi-octet character. */ - match_size = 1; - if (!mid) - mid = cur; - } - else - { - /* This function is called on a matching line only, - but is it selected or rejected/context? */ - if (only_matching) - print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED - : SEP_CHAR_SELECTED)); - else - { - pr_sgr_start (line_color); - if (mid) - { - cur = mid; - mid = NULL; - } - fwrite (cur, sizeof (char), b - cur, stdout); - } - - pr_sgr_start_if (match_color); - fwrite (b, sizeof (char), match_size, stdout); - pr_sgr_end_if (match_color); - if (only_matching) - fputs ("\n", stdout); - } - cur = b + match_size; - } - - if (only_matching) - cur = lim; - else if (mid) - cur = mid; - - return cur; -} - -static const char * -print_line_tail (const char *beg, const char *lim, const char *line_color) -{ - size_t eol_size; - size_t tail_size; - - eol_size = (lim > beg && lim[-1] == eolbyte); - eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r'); - tail_size = lim - eol_size - beg; - - if (tail_size > 0) - { - pr_sgr_start (line_color); - fwrite (beg, 1, tail_size, stdout); - beg += tail_size; - pr_sgr_end (line_color); - } - - return beg; -} - -static void -prline (char const *beg, char const *lim, int sep) -{ - int matching; - const char *line_color; - const char *match_color; - - if (!only_matching) - print_line_head (beg, lim, sep); - - matching = (sep == SEP_CHAR_SELECTED) ^ !!out_invert; - - if (color_option) - { - line_color = (((sep == SEP_CHAR_SELECTED) - ^ (out_invert && (color_option < 0))) - ? selected_line_color : context_line_color); - match_color = (sep == SEP_CHAR_SELECTED - ? selected_match_color : context_match_color); - } - else - line_color = match_color = NULL; /* Shouldn't be used. */ - - if ((only_matching && matching) - || (color_option && (*line_color || *match_color))) - { - /* We already know that non-matching lines have no match (to colorize). */ - if (matching && (only_matching || *match_color)) - beg = print_line_middle (beg, lim, line_color, match_color); - - /* FIXME: this test may be removable. */ - if (!only_matching && *line_color) - beg = print_line_tail (beg, lim, line_color); - } - - if (!only_matching && lim > beg) - fwrite (beg, 1, lim - beg, stdout); - - if (ferror (stdout)) - { - write_error_seen = 1; - error (EXIT_TROUBLE, 0, _("write error")); - } - - lastout = lim; - - if (line_buffered) - fflush (stdout); -} - -/* Print pending lines of trailing context prior to LIM. Trailing context ends - at the next matching line when OUTLEFT is 0. */ -static void -prpending (char const *lim) -{ - if (!lastout) - lastout = bufbeg; - while (pending > 0 && lastout < lim) - { - char const *nl = memchr (lastout, eolbyte, lim - lastout); - size_t match_size; - --pending; - if (outleft - || ((execute (lastout, nl + 1 - lastout, - &match_size, NULL) == (size_t) -1) - == !out_invert)) - prline (lastout, nl + 1, SEP_CHAR_REJECTED); - else - pending = 0; - } -} - -/* Print the lines between BEG and LIM. Deal with context crap. - If NLINESP is non-null, store a count of lines between BEG and LIM. */ -static void -prtext (char const *beg, char const *lim, intmax_t *nlinesp) -{ - static int used; /* avoid printing SEP_STR_GROUP before any output */ - char const *bp, *p; - char eol = eolbyte; - intmax_t i, n; - - if (!out_quiet && pending > 0) - prpending (beg); - - p = beg; - - if (!out_quiet) - { - /* Deal with leading context crap. */ - - bp = lastout ? lastout : bufbeg; - for (i = 0; i < out_before; ++i) - if (p > bp) - do - --p; - while (p[-1] != eol); - - /* We print the SEP_STR_GROUP separator only if our output is - discontiguous from the last output in the file. */ - if ((out_before || out_after) && used && p != lastout && group_separator) - { - pr_sgr_start_if (sep_color); - fputs (group_separator, stdout); - pr_sgr_end_if (sep_color); - fputc ('\n', stdout); - } - - while (p < beg) - { - char const *nl = memchr (p, eol, beg - p); - nl++; - prline (p, nl, SEP_CHAR_REJECTED); - p = nl; - } - } - - if (nlinesp) - { - /* Caller wants a line count. */ - for (n = 0; p < lim && n < outleft; n++) - { - char const *nl = memchr (p, eol, lim - p); - nl++; - if (!out_quiet) - prline (p, nl, SEP_CHAR_SELECTED); - p = nl; - } - *nlinesp = n; - - /* relying on it that this function is never called when outleft = 0. */ - after_last_match = bufoffset - (buflim - p); - } - else if (!out_quiet) - prline (beg, lim, SEP_CHAR_SELECTED); - - pending = out_quiet ? 0 : out_after; - used = 1; -} - -static size_t -do_execute (char const *buf, size_t size, size_t *match_size, char const *start_ptr) -{ - size_t result; - const char *line_next; - - /* With the current implementation, using --ignore-case with a multi-byte - character set is very inefficient when applied to a large buffer - containing many matches. We can avoid much of the wasted effort - by matching line-by-line. - - FIXME: this is just an ugly workaround, and it doesn't really - belong here. Also, PCRE is always using this same per-line - matching algorithm. Either we fix -i, or we should refactor - this code---for example, we could add another function pointer - to struct matcher to split the buffer passed to execute. It would - perform the memchr if line-by-line matching is necessary, or just - return buf + size otherwise. */ - if (MB_CUR_MAX == 1 || !match_icase) - return execute (buf, size, match_size, start_ptr); - - for (line_next = buf; line_next < buf + size; ) - { - const char *line_buf = line_next; - const char *line_end = memchr (line_buf, eolbyte, (buf + size) - line_buf); - if (line_end == NULL) - line_next = line_end = buf + size; - else - line_next = line_end + 1; - - if (start_ptr && start_ptr >= line_end) - continue; - - result = execute (line_buf, line_next - line_buf, match_size, start_ptr); - if (result != (size_t) -1) - return (line_buf - buf) + result; - } - - return (size_t) -1; -} - -/* Scan the specified portion of the buffer, matching lines (or - between matching lines if OUT_INVERT is true). Return a count of - lines printed. */ -static intmax_t -grepbuf (char const *beg, char const *lim) -{ - intmax_t nlines, n; - char const *p; - size_t match_offset; - size_t match_size; - - nlines = 0; - p = beg; - while ((match_offset = do_execute (p, lim - p, &match_size, - NULL)) != (size_t) -1) - { - char const *b = p + match_offset; - char const *endp = b + match_size; - /* Avoid matching the empty line at the end of the buffer. */ - if (b == lim) - break; - if (!out_invert) - { - prtext (b, endp, NULL); - nlines++; - outleft--; - if (!outleft || done_on_match) - { - if (exit_on_match) - exit (EXIT_SUCCESS); - after_last_match = bufoffset - (buflim - endp); - return nlines; - } - } - else if (p < b) - { - prtext (p, b, &n); - nlines += n; - outleft -= n; - if (!outleft) - return nlines; - } - p = endp; - } - if (out_invert && p < lim) - { - prtext (p, lim, &n); - nlines += n; - outleft -= n; - } - return nlines; -} - -/* Search a given file. Normally, return a count of lines printed; - but if the file is a directory and we search it recursively, then - return -2 if there was a match, and -1 otherwise. */ -static intmax_t -grep (int fd, struct stat const *st) -{ - intmax_t nlines, i; - int not_text; - size_t residue, save; - char oldc; - char *beg; - char *lim; - char eol = eolbyte; - - if (! reset (fd, st)) - return 0; - - totalcc = 0; - lastout = 0; - totalnl = 0; - outleft = max_count; - after_last_match = 0; - pending = 0; - - nlines = 0; - residue = 0; - save = 0; - - if (! fillbuf (save, st)) - { - if (errno != EINVAL) - suppressible_error (filename, errno); - return 0; - } - - not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet) - || binary_files == WITHOUT_MATCH_BINARY_FILES) - && file_is_binary (bufbeg, buflim - bufbeg, fd, st)); - if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES) - return 0; - done_on_match += not_text; - out_quiet += not_text; - - for (;;) - { - lastnl = bufbeg; - if (lastout) - lastout = bufbeg; - - beg = bufbeg + save; - - /* no more data to scan (eof) except for maybe a residue -> break */ - if (beg == buflim) - break; - - /* Determine new residue (the length of an incomplete line at the end of - the buffer, 0 means there is no incomplete last line). */ - oldc = beg[-1]; - beg[-1] = eol; - /* FIXME: use rawmemrchr if/when it exists, since we have ensured - that this use of memrchr is guaranteed never to return NULL. */ - lim = memrchr (beg - 1, eol, buflim - beg + 1); - ++lim; - beg[-1] = oldc; - if (lim == beg) - lim = beg - residue; - beg -= residue; - residue = buflim - lim; - - if (beg < lim) - { - if (outleft) - nlines += grepbuf (beg, lim); - if (pending) - prpending (lim); - if ((!outleft && !pending) || (nlines && done_on_match && !out_invert)) - goto finish_grep; - } - - /* The last OUT_BEFORE lines at the end of the buffer will be needed as - leading context if there is a matching line at the begin of the - next data. Make beg point to their begin. */ - i = 0; - beg = lim; - while (i < out_before && beg > bufbeg && beg != lastout) - { - ++i; - do - --beg; - while (beg[-1] != eol); - } - - /* detect if leading context is discontinuous from last printed line. */ - if (beg != lastout) - lastout = 0; - - /* Handle some details and read more data to scan. */ - save = residue + lim - beg; - if (out_byte) - totalcc = add_count (totalcc, buflim - bufbeg - save); - if (out_line) - nlscan (beg); - if (! fillbuf (save, st)) - { - suppressible_error (filename, errno); - goto finish_grep; - } - } - if (residue) - { - *buflim++ = eol; - if (outleft) - nlines += grepbuf (bufbeg + save - residue, buflim); - if (pending) - prpending (buflim); - } - - finish_grep: - done_on_match -= not_text; - out_quiet -= not_text; - if ((not_text & ~out_quiet) && nlines != 0) - printf (_("Binary file %s matches\n"), filename); - return nlines; -} - -static int -grepdirent (FTS *fts, FTSENT *ent, int command_line) -{ - int follow, dirdesc; - struct stat *st = ent->fts_statp; - command_line &= ent->fts_level == FTS_ROOTLEVEL; - - if (ent->fts_info == FTS_DP) - { - if (directories == RECURSE_DIRECTORIES && command_line) - out_file &= ~ (2 * !no_filenames); - return 1; - } - - if (skipped_file (ent->fts_name, command_line, - (ent->fts_info == FTS_D || ent->fts_info == FTS_DC - || ent->fts_info == FTS_DNR))) - { - fts_set (fts, ent, FTS_SKIP); - return 1; - } - - filename = ent->fts_path + filename_prefix_len; - follow = (fts->fts_options & FTS_LOGICAL - || (fts->fts_options & FTS_COMFOLLOW && command_line)); - - switch (ent->fts_info) - { - case FTS_D: - if (directories == RECURSE_DIRECTORIES) - { - out_file |= 2 * !no_filenames; - return 1; - } - fts_set (fts, ent, FTS_SKIP); - break; - - case FTS_DC: - if (!suppress_errors) - error (0, 0, _("warning: %s: %s"), filename, - _("recursive directory loop")); - return 1; - - case FTS_DNR: - case FTS_ERR: - case FTS_NS: - suppressible_error (filename, ent->fts_errno); - return 1; - - case FTS_DEFAULT: - case FTS_NSOK: - if (devices == SKIP_DEVICES - || (devices == READ_COMMAND_LINE_DEVICES && !command_line)) - { - struct stat st1; - if (! st->st_mode) - { - /* The file type is not already known. Get the file status - before opening, since opening might have side effects - on a device. */ - int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW; - if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0) - { - suppressible_error (filename, errno); - return 1; - } - st = &st1; - } - if (is_device_mode (st->st_mode)) - return 1; - } - break; - - case FTS_F: - case FTS_SLNONE: - break; - - case FTS_SL: - case FTS_W: - return 1; - - default: - abort (); - } - - dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD - ? fts->fts_cwd_fd - : AT_FDCWD); - return grepfile (dirdesc, ent->fts_accpath, follow, command_line); -} - -static int -grepfile (int dirdesc, char const *name, int follow, int command_line) -{ - int desc = openat_safer (dirdesc, name, O_RDONLY | (follow ? 0 : O_NOFOLLOW)); - if (desc < 0) - { - if (follow || (errno != ELOOP && errno != EMLINK)) - suppressible_error (filename, errno); - return 1; - } - return grepdesc (desc, command_line); -} - -static int -grepdesc (int desc, int command_line) -{ - intmax_t count; - int status = 1; - struct stat st; - - /* Get the file status, possibly for the second time. This catches - a race condition if the directory entry changes after the - directory entry is read and before the file is opened. For - example, normally DESC is a directory only at the top level, but - there is an exception if some other process substitutes a - directory for a non-directory while 'grep' is running. */ - if (fstat (desc, &st) != 0) - { - suppressible_error (filename, errno); - goto closeout; - } - - if (desc != STDIN_FILENO && command_line - && skipped_file (filename, 1, S_ISDIR (st.st_mode))) - goto closeout; - - if (desc != STDIN_FILENO - && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode)) - { - /* Traverse the directory starting with its full name, because - unfortunately fts provides no way to traverse the directory - starting from its file descriptor. */ - - FTS *fts; - FTSENT *ent; - int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW); - char *fts_arg[2]; - - /* Close DESC now, to conserve file descriptors if the race - condition occurs many times in a deep recursion. */ - if (close (desc) != 0) - suppressible_error (filename, errno); - - fts_arg[0] = (char *) filename; - fts_arg[1] = NULL; - fts = fts_open (fts_arg, opts, NULL); - - if (!fts) - xalloc_die (); - while ((ent = fts_read (fts))) - status &= grepdirent (fts, ent, command_line); - if (errno) - suppressible_error (filename, errno); - if (fts_close (fts) != 0) - suppressible_error (filename, errno); - return status; - } - if (desc != STDIN_FILENO - && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode)) - || ((devices == SKIP_DEVICES - || (devices == READ_COMMAND_LINE_DEVICES && !command_line)) - && is_device_mode (st.st_mode)))) - goto closeout; - - /* If there is a regular file on stdout and the current file refers - to the same i-node, we have to report the problem and skip it. - Otherwise when matching lines from some other input reach the - disk before we open this file, we can end up reading and matching - those lines and appending them to the file from which we're reading. - Then we'd have what appears to be an infinite loop that'd terminate - only upon filling the output file system or reaching a quota. - However, there is no risk of an infinite loop if grep is generating - no output, i.e., with --silent, --quiet, -q. - Similarly, with any of these: - --max-count=N (-m) (for N >= 2) - --files-with-matches (-l) - --files-without-match (-L) - there is no risk of trouble. - For --max-count=1, grep stops after printing the first match, - so there is no risk of malfunction. But even --max-count=2, with - input==output, while there is no risk of infloop, there is a race - condition that could result in "alternate" output. */ - if (!out_quiet && list_files == 0 && 1 < max_count - && S_ISREG (out_stat.st_mode) && out_stat.st_ino - && SAME_INODE (st, out_stat)) - { - if (! suppress_errors) - error (0, 0, _("input file %s is also the output"), quote (filename)); - errseen = 1; - goto closeout; - } - -#if defined SET_BINARY - /* Set input to binary mode. Pipes are simulated with files - on DOS, so this includes the case of "foo | grep bar". */ - if (!isatty (desc)) - SET_BINARY (desc); -#endif - - count = grep (desc, &st); - if (count < 0) - status = count + 2; - else - { - if (count_matches) - { - if (out_file) - { - print_filename (); - if (filename_mask) - print_sep (SEP_CHAR_SELECTED); - else - fputc (0, stdout); - } - printf ("%" PRIdMAX "\n", count); - } - - status = !count; - if (list_files == 1 - 2 * status) - { - print_filename (); - fputc ('\n' & filename_mask, stdout); - } - - if (desc == STDIN_FILENO) - { - off_t required_offset = outleft ? bufoffset : after_last_match; - if (required_offset != bufoffset - && lseek (desc, required_offset, SEEK_SET) < 0 - && S_ISREG (st.st_mode)) - suppressible_error (filename, errno); - } - } - - closeout: - if (desc != STDIN_FILENO && close (desc) != 0) - suppressible_error (filename, errno); - return status; -} - -static int -grep_command_line_arg (char const *arg) -{ - if (STREQ (arg, "-")) - { - filename = label ? label : _("(standard input)"); - return grepdesc (STDIN_FILENO, 1); - } - else - { - filename = arg; - return grepfile (AT_FDCWD, arg, 1, 1); - } -} - -_Noreturn void usage (int); -void -usage (int status) -{ - if (status != 0) - { - fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), - program_name); - fprintf (stderr, _("Try '%s --help' for more information.\n"), - program_name); - } - else - { - printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name); - printf (_("\ -Search for PATTERN in each FILE or standard input.\n")); - fputs (_(before_options), stdout); - printf (_("\ -Example: %s -i 'hello world' menu.h main.c\n\ -\n\ -Regexp selection and interpretation:\n"), program_name); - if (matchers[1].name) - printf (_("\ - -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\ - -F, --fixed-strings PATTERN is a set of newline-separated fixed strings\n\ - -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\ - -P, --perl-regexp PATTERN is a Perl regular expression\n")); - /* -X is undocumented on purpose. */ - printf (_("\ - -e, --regexp=PATTERN use PATTERN for matching\n\ - -f, --file=FILE obtain PATTERN from FILE\n\ - -i, --ignore-case ignore case distinctions\n\ - -w, --word-regexp force PATTERN to match only whole words\n\ - -x, --line-regexp force PATTERN to match only whole lines\n\ - -z, --null-data a data line ends in 0 byte, not newline\n")); - printf (_("\ -\n\ -Miscellaneous:\n\ - -s, --no-messages suppress error messages\n\ - -v, --invert-match select non-matching lines\n\ - -V, --version print version information and exit\n\ - --help display this help and exit\n\ - --mmap deprecated no-op; evokes a warning\n")); - printf (_("\ -\n\ -Output control:\n\ - -m, --max-count=NUM stop after NUM matches\n\ - -b, --byte-offset print the byte offset with output lines\n\ - -n, --line-number print line number with output lines\n\ - --line-buffered flush output on every line\n\ - -H, --with-filename print the file name for each match\n\ - -h, --no-filename suppress the file name prefix on output\n\ - --label=LABEL use LABEL as the standard input file name prefix\n\ -")); - printf (_("\ - -o, --only-matching show only the part of a line matching PATTERN\n\ - -q, --quiet, --silent suppress all normal output\n\ - --binary-files=TYPE assume that binary files are TYPE;\n\ - TYPE is 'binary', 'text', or 'without-match'\n\ - -a, --text equivalent to --binary-files=text\n\ -")); - printf (_("\ - -I equivalent to --binary-files=without-match\n\ - -d, --directories=ACTION how to handle directories;\n\ - ACTION is 'read', 'recurse', or 'skip'\n\ - -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\ - ACTION is 'read' or 'skip'\n\ - -r, --recursive like --directories=recurse\n\ - -R, --dereference-recursive likewise, but follow all symlinks\n\ -")); - printf (_("\ - --include=FILE_PATTERN search only files that match FILE_PATTERN\n\ - --exclude=FILE_PATTERN skip files and directories matching FILE_PATTERN\n\ - --exclude-from=FILE skip files matching any file pattern from FILE\n\ - --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\ -")); - printf (_("\ - -L, --files-without-match print only names of FILEs containing no match\n\ - -l, --files-with-matches print only names of FILEs containing matches\n\ - -c, --count print only a count of matching lines per FILE\n\ - -T, --initial-tab make tabs line up (if needed)\n\ - -Z, --null print 0 byte after FILE name\n")); - printf (_("\ -\n\ -Context control:\n\ - -B, --before-context=NUM print NUM lines of leading context\n\ - -A, --after-context=NUM print NUM lines of trailing context\n\ - -C, --context=NUM print NUM lines of output context\n\ -")); - printf (_("\ - -NUM same as --context=NUM\n\ - --color[=WHEN],\n\ - --colour[=WHEN] use markers to highlight the matching strings;\n\ - WHEN is 'always', 'never', or 'auto'\n\ - -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\ - -u, --unix-byte-offsets report offsets as if CRs were not there\n\ - (MSDOS/Windows)\n\ -\n")); - fputs (_(after_options), stdout); - printf (_("\ -When FILE is -, read standard input. With no FILE, read . if a command-line\n\ --r is given, - otherwise. If fewer than two FILEs are given, assume -h.\n\ -Exit status is 0 if any line is selected, 1 otherwise;\n\ -if any error occurs and -q is not given, the exit status is 2.\n")); - printf (_("\nReport bugs to: %s\n"), PACKAGE_BUGREPORT); - printf (_("GNU Grep home page: <%s>\n"), - "http://www.gnu.org/software/grep/"); - fputs (_("General help using GNU software: \n"), - stdout); - - } - exit (status); -} - -/* If M is NULL, initialize the matcher to the default. Otherwise set the - matcher to M if available. Exit in case of conflicts or if M is not - available. */ -static void -setmatcher (char const *m) -{ - static char const *matcher; - unsigned int i; - - if (!m) - { - compile = matchers[0].compile; - execute = matchers[0].execute; - if (!matchers[1].name) - matcher = matchers[0].name; - } - - else if (matcher) - { - if (matcher && STREQ (matcher, m)) - ; - - else if (!matchers[1].name) - error (EXIT_TROUBLE, 0, _("%s can only use the %s pattern syntax"), - program_name, matcher); - else - error (EXIT_TROUBLE, 0, _("conflicting matchers specified")); - } - - else - { - for (i = 0; matchers[i].name; i++) - if (STREQ (m, matchers[i].name)) - { - compile = matchers[i].compile; - execute = matchers[i].execute; - matcher = m; - return; - } - - error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m); - } -} - -/* Find the white-space-separated options specified by OPTIONS, and - using BUF to store copies of these options, set ARGV[0], ARGV[1], - etc. to the option copies. Return the number N of options found. - Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0] - etc. Backslash can be used to escape whitespace (and backslashes). */ -static size_t -prepend_args (char const *options, char *buf, char **argv) -{ - char const *o = options; - char *b = buf; - size_t n = 0; - - for (;;) - { - while (c_isspace ((unsigned char) *o)) - o++; - if (!*o) - return n; - if (argv) - argv[n] = b; - n++; - - do - if ((*b++ = *o++) == '\\' && *o) - b[-1] = *o++; - while (*o && ! c_isspace ((unsigned char) *o)); - - *b++ = '\0'; - } -} - -/* Prepend the whitespace-separated options in OPTIONS to the argument - vector of a main program with argument count *PARGC and argument - vector *PARGV. Return the number of options prepended. */ -static int -prepend_default_options (char const *options, int *pargc, char ***pargv) -{ - if (options && *options) - { - char *buf = xmalloc (strlen (options) + 1); - size_t prepended = prepend_args (options, buf, NULL); - int argc = *pargc; - char *const *argv = *pargv; - char **pp; - enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) }; - if (MAX_ARGS - argc < prepended) - xalloc_die (); - pp = xmalloc ((prepended + argc + 1) * sizeof *pp); - *pargc = prepended + argc; - *pargv = pp; - *pp++ = *argv++; - pp += prepend_args (options, buf, pp); - while ((*pp++ = *argv++)) - continue; - return prepended; - } - - return 0; -} - -/* Get the next non-digit option from ARGC and ARGV. - Return -1 if there are no more options. - Process any digit options that were encountered on the way, - and store the resulting integer into *DEFAULT_CONTEXT. */ -static int -get_nondigit_option (int argc, char *const *argv, intmax_t *default_context) -{ - static int prev_digit_optind = -1; - int opt, this_digit_optind, was_digit; - char buf[INT_BUFSIZE_BOUND (intmax_t) + 4]; - char *p = buf; - - was_digit = 0; - this_digit_optind = optind; - while (opt = getopt_long (argc, (char **) argv, short_options, long_options, - NULL), - '0' <= opt && opt <= '9') - { - if (prev_digit_optind != this_digit_optind || !was_digit) - { - /* Reset to start another context length argument. */ - p = buf; - } - else - { - /* Suppress trivial leading zeros, to avoid incorrect - diagnostic on strings like 00000000000. */ - p -= buf[0] == '0'; - } - - if (p == buf + sizeof buf - 4) - { - /* Too many digits. Append "..." to make context_length_arg - complain about "X...", where X contains the digits seen - so far. */ - strcpy (p, "..."); - p += 3; - break; - } - *p++ = opt; - - was_digit = 1; - prev_digit_optind = this_digit_optind; - this_digit_optind = optind; - } - if (p != buf) - { - *p = '\0'; - context_length_arg (buf, default_context); - } - - return opt; -} - -/* Parse GREP_COLORS. The default would look like: - GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36' - with boolean capabilities (ne and rv) unset (i.e., omitted). - No character escaping is needed or supported. */ -static void -parse_grep_colors (void) -{ - const char *p; - char *q; - char *name; - char *val; - - p = getenv ("GREP_COLORS"); /* Plural! */ - if (p == NULL || *p == '\0') - return; - - /* Work off a writable copy. */ - q = xstrdup (p); - - name = q; - val = NULL; - /* From now on, be well-formed or you're gone. */ - for (;;) - if (*q == ':' || *q == '\0') - { - char c = *q; - struct color_cap const *cap; - - *q++ = '\0'; /* Terminate name or val. */ - /* Empty name without val (empty cap) - * won't match and will be ignored. */ - for (cap = color_dict; cap->name; cap++) - if (STREQ (cap->name, name)) - break; - /* If name unknown, go on for forward compatibility. */ - if (cap->var && val) - *(cap->var) = val; - if (cap->fct) - cap->fct (); - if (c == '\0') - return; - name = q; - val = NULL; - } - else if (*q == '=') - { - if (q == name || val) - return; - *q++ = '\0'; /* Terminate name. */ - val = q; /* Can be the empty string. */ - } - else if (val == NULL) - q++; /* Accumulate name. */ - else if (*q == ';' || (*q >= '0' && *q <= '9')) - q++; /* Accumulate val. Protect the terminal from being sent crap. */ - else - return; -} - -int -main (int argc, char **argv) -{ - char *keys; - size_t keycc, oldcc, keyalloc; - int with_filenames; - size_t cc; - int opt, status, prepended; - int prev_optind, last_recursive; - intmax_t default_context; - FILE *fp; - exit_failure = EXIT_TROUBLE; - initialize_main (&argc, &argv); - set_program_name (argv[0]); - program_name = argv[0]; - - keys = NULL; - keycc = 0; - with_filenames = 0; - eolbyte = '\n'; - filename_mask = ~0; - - max_count = INTMAX_MAX; - - /* The value -1 means to use DEFAULT_CONTEXT. */ - out_after = out_before = -1; - /* Default before/after context: changed by -C/-NUM options */ - default_context = 0; - /* Changed by -o option */ - only_matching = 0; - - /* Internationalization. */ -#if defined HAVE_SETLOCALE - setlocale (LC_ALL, ""); -#endif -#if defined ENABLE_NLS - bindtextdomain (PACKAGE, LOCALEDIR); - textdomain (PACKAGE); -#endif - - exit_failure = EXIT_TROUBLE; - atexit (clean_up_stdout); - - last_recursive = 0; - prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv); - setmatcher (NULL); - - while (prev_optind = optind, - (opt = get_nondigit_option (argc, argv, &default_context)) != -1) - switch (opt) - { - case 'A': - context_length_arg (optarg, &out_after); - break; - - case 'B': - context_length_arg (optarg, &out_before); - break; - - case 'C': - /* Set output match context, but let any explicit leading or - trailing amount specified with -A or -B stand. */ - context_length_arg (optarg, &default_context); - break; - - case 'D': - if (STREQ (optarg, "read")) - devices = READ_DEVICES; - else if (STREQ (optarg, "skip")) - devices = SKIP_DEVICES; - else - error (EXIT_TROUBLE, 0, _("unknown devices method")); - break; - - case 'E': - setmatcher ("egrep"); - break; - - case 'F': - setmatcher ("fgrep"); - break; - - case 'P': - setmatcher ("perl"); - break; - - case 'G': - setmatcher ("grep"); - break; - - case 'X': /* undocumented on purpose */ - setmatcher (optarg); - break; - - case 'H': - with_filenames = 1; - no_filenames = 0; - break; - - case 'I': - binary_files = WITHOUT_MATCH_BINARY_FILES; - break; - - case 'T': - align_tabs = 1; - break; - - case 'U': -#if defined HAVE_DOS_FILE_CONTENTS - dos_use_file_type = DOS_BINARY; -#endif - break; - - case 'u': -#if defined HAVE_DOS_FILE_CONTENTS - dos_report_unix_offset = 1; -#endif - break; - - case 'V': - show_version = 1; - break; - - case 'a': - binary_files = TEXT_BINARY_FILES; - break; - - case 'b': - out_byte = 1; - break; - - case 'c': - count_matches = 1; - break; - - case 'd': - directories = XARGMATCH ("--directories", optarg, - directories_args, directories_types); - if (directories == RECURSE_DIRECTORIES) - last_recursive = prev_optind; - break; - - case 'e': - cc = strlen (optarg); - keys = xrealloc (keys, keycc + cc + 1); - strcpy (&keys[keycc], optarg); - keycc += cc; - keys[keycc++] = '\n'; - break; - - case 'f': - fp = STREQ (optarg, "-") ? stdin : fopen (optarg, "r"); - if (!fp) - error (EXIT_TROUBLE, errno, "%s", optarg); - for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2) - ; - keys = xrealloc (keys, keyalloc); - oldcc = keycc; - while (!feof (fp) - && (cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) > 0) - { - keycc += cc; - if (keycc == keyalloc - 1) - keys = x2nrealloc (keys, &keyalloc, sizeof *keys); - } - if (fp != stdin) - fclose (fp); - /* Append final newline if file ended in non-newline. */ - if (oldcc != keycc && keys[keycc - 1] != '\n') - keys[keycc++] = '\n'; - break; - - case 'h': - with_filenames = 0; - no_filenames = 1; - break; - - case 'i': - case 'y': /* For old-timers . . . */ - match_icase = 1; - break; - - case 'L': - /* Like -l, except list files that don't contain matches. - Inspired by the same option in Hume's gre. */ - list_files = -1; - break; - - case 'l': - list_files = 1; - break; - - case 'm': - switch (xstrtoimax (optarg, 0, 10, &max_count, "")) - { - case LONGINT_OK: - case LONGINT_OVERFLOW: - break; - - default: - error (EXIT_TROUBLE, 0, _("invalid max count")); - } - break; - - case 'n': - out_line = 1; - break; - - case 'o': - only_matching = 1; - break; - - case 'q': - exit_on_match = 1; - exit_failure = 0; - break; - - case 'R': - fts_options = basic_fts_options | FTS_LOGICAL; - /* Fall through. */ - case 'r': - directories = RECURSE_DIRECTORIES; - last_recursive = prev_optind; - break; - - case 's': - suppress_errors = 1; - break; - - case 'v': - out_invert = 1; - break; - - case 'w': - match_words = 1; - break; - - case 'x': - match_lines = 1; - break; - - case 'Z': - filename_mask = 0; - break; - - case 'z': - eolbyte = '\0'; - break; - - case BINARY_FILES_OPTION: - if (STREQ (optarg, "binary")) - binary_files = BINARY_BINARY_FILES; - else if (STREQ (optarg, "text")) - binary_files = TEXT_BINARY_FILES; - else if (STREQ (optarg, "without-match")) - binary_files = WITHOUT_MATCH_BINARY_FILES; - else - error (EXIT_TROUBLE, 0, _("unknown binary-files type")); - break; - - case COLOR_OPTION: - if (optarg) - { - if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes") - || !strcasecmp (optarg, "force")) - color_option = 1; - else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg, "no") - || !strcasecmp (optarg, "none")) - color_option = 0; - else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg, "tty") - || !strcasecmp (optarg, "if-tty")) - color_option = 2; - else - show_help = 1; - } - else - color_option = 2; - break; - - case EXCLUDE_OPTION: - case INCLUDE_OPTION: - if (!excluded_patterns) - excluded_patterns = new_exclude (); - add_exclude (excluded_patterns, optarg, - (EXCLUDE_WILDCARDS - | (opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0))); - break; - case EXCLUDE_FROM_OPTION: - if (!excluded_patterns) - excluded_patterns = new_exclude (); - if (add_exclude_file (add_exclude, excluded_patterns, optarg, - EXCLUDE_WILDCARDS, '\n') != 0) - { - error (EXIT_TROUBLE, errno, "%s", optarg); - } - break; - - case EXCLUDE_DIRECTORY_OPTION: - if (!excluded_directory_patterns) - excluded_directory_patterns = new_exclude (); - add_exclude (excluded_directory_patterns, optarg, EXCLUDE_WILDCARDS); - break; - - case GROUP_SEPARATOR_OPTION: - group_separator = optarg; - break; - - case LINE_BUFFERED_OPTION: - line_buffered = 1; - break; - - case LABEL_OPTION: - label = optarg; - break; - - case MMAP_OPTION: - error (0, 0, _("the --mmap option has been a no-op since 2010")); - break; - - case 0: - /* long options */ - break; - - default: - usage (EXIT_TROUBLE); - break; - - } - - if (color_option == 2) - color_option = isatty (STDOUT_FILENO) && should_colorize (); - init_colorize (); - - /* POSIX.2 says that -q overrides -l, which in turn overrides the - other output options. */ - if (exit_on_match) - list_files = 0; - if (exit_on_match | list_files) - { - count_matches = 0; - done_on_match = 1; - } - out_quiet = count_matches | done_on_match; - - if (out_after < 0) - out_after = default_context; - if (out_before < 0) - out_before = default_context; - - if (color_option) - { - /* Legacy. */ - char *userval = getenv ("GREP_COLOR"); - if (userval != NULL && *userval != '\0') - selected_match_color = context_match_color = userval; - - /* New GREP_COLORS has priority. */ - parse_grep_colors (); - } - - if (show_version) - { - version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS, - (char *) NULL); - exit (EXIT_SUCCESS); - } - - if (show_help) - usage (EXIT_SUCCESS); - - struct stat tmp_stat; - if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode)) - out_stat = tmp_stat; - - if (keys) - { - if (keycc == 0) - { - /* No keys were specified (e.g. -f /dev/null). Match nothing. */ - out_invert ^= 1; - match_lines = match_words = 0; - } - else - /* Strip trailing newline. */ - --keycc; - } - else if (optind < argc) - { - /* A copy must be made in case of an xrealloc() or free() later. */ - keycc = strlen (argv[optind]); - keys = xmalloc (keycc + 1); - strcpy (keys, argv[optind++]); - } - else - usage (EXIT_TROUBLE); - - compile (keys, keycc); - free (keys); - - if ((argc - optind > 1 && !no_filenames) || with_filenames) - out_file = 1; - -#ifdef SET_BINARY - /* Output is set to binary mode because we shouldn't convert - NL to CR-LF pairs, especially when grepping binary files. */ - if (!isatty (1)) - SET_BINARY (1); -#endif - - if (max_count == 0) - exit (EXIT_FAILURE); - - if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES) - devices = READ_DEVICES; - - if (optind < argc) - { - status = 1; - do - status &= grep_command_line_arg (argv[optind]); - while (++optind < argc); - } - else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive) - { - /* Grep through ".", omitting leading "./" from diagnostics. */ - filename_prefix_len = 2; - status = grep_command_line_arg ("."); - } - else - status = grep_command_line_arg ("-"); - - /* We register via atexit() to test stdout. */ - exit (errseen ? EXIT_TROUBLE : status); -} -/* vim:set shiftwidth=2: */ diff --git a/contrib/grep/src/mbsupport.h b/contrib/grep/src/mbsupport.h deleted file mode 100644 index c4b7b4ec89..0000000000 --- a/contrib/grep/src/mbsupport.h +++ /dev/null @@ -1,29 +0,0 @@ -/* mbsupport.h --- Localize determination of whether we have multibyte stuff. - - Copyright (C) 2004-2005, 2007, 2009-2012 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#include - -#ifndef MBS_SUPPORT -# define MBS_SUPPORT 1 -#endif - -#if ! MBS_SUPPORT -# undef MB_CUR_MAX -# define MB_CUR_MAX 1 -#endif diff --git a/contrib/grep/src/pcresearch.c b/contrib/grep/src/pcresearch.c index 2994e65338..820dd0096b 100644 --- a/contrib/grep/src/pcresearch.c +++ b/contrib/grep/src/pcresearch.c @@ -1,5 +1,5 @@ /* pcresearch.c - searching subroutines using PCRE for grep. - Copyright 2000, 2007, 2009-2012 Free Software Foundation, Inc. + Copyright 2000, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,6 +32,12 @@ static pcre *cre; /* Additional information about the pattern. */ static pcre_extra *extra; + +# ifdef PCRE_STUDY_JIT_COMPILE +static pcre_jit_stack *jit_stack; +# else +# define PCRE_STUDY_JIT_COMPILE 0 +# endif #endif void @@ -45,21 +51,23 @@ Pcompile (char const *pattern, size_t size) int e; char const *ep; char *re = xnmalloc (4, size + 7); - int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0); + int flags = (PCRE_MULTILINE + | (match_icase ? PCRE_CASELESS : 0) + | (using_utf8 () ? PCRE_UTF8 : 0)); char const *patlim = pattern + size; char *n = re; char const *p; char const *pnul; /* FIXME: Remove these restrictions. */ - if (memchr(pattern, '\n', size)) + if (memchr (pattern, '\n', size)) error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern")); *n = '\0'; if (match_lines) - strcpy (n, "^("); + strcpy (n, "^(?:"); if (match_words) - strcpy (n, "\\b("); + strcpy (n, "(? #include - -#include "mbsupport.h" - #include #include #include @@ -33,6 +30,7 @@ #include "system.h" #include "error.h" #include "grep.h" +#include "dfa.h" #include "kwset.h" #include "xalloc.h" @@ -45,8 +43,11 @@ typedef signed char mb_len_map_t; /* searchutils.c */ extern void kwsinit (kwset_t *); -extern char *mbtolower (const char *, size_t *, mb_len_map_t **); -extern bool is_mb_middle (const char **, const char *, const char *, size_t); +extern char *mbtoupper (char const *, size_t *, mb_len_map_t **); +extern void build_mbclen_cache (void); +extern ptrdiff_t mb_goback (char const **, char const *, char const *); +extern wint_t mb_prev_wc (char const *, char const *, char const *); +extern wint_t mb_next_wc (char const *, char const *); /* dfasearch.c */ extern void GEAcompile (char const *, size_t, reg_syntax_t); @@ -60,23 +61,4 @@ extern size_t Fexecute (char const *, size_t, size_t *, char const *); extern void Pcompile (char const *, size_t); extern size_t Pexecute (char const *, size_t, size_t *, char const *); -/* Apply the MAP (created by mbtolower) to the lowercase-buffer-relative - *OFF and *LEN, converting them to be relative to the original buffer. */ -static inline void -mb_case_map_apply (mb_len_map_t const *map, size_t *off, size_t *len) -{ - if (map) - { - intmax_t off_incr = 0; - intmax_t len_incr = 0; - size_t k; - for (k = 0; k < *off; k++) - off_incr += map[k]; - for (k = *off; k < *off + *len; k++) - len_incr += map[k]; - *off += off_incr; - *len += len_incr; - } -} - #endif /* GREP_SEARCH_H */ diff --git a/contrib/grep/src/searchutils.c b/contrib/grep/src/searchutils.c index ca30134a21..5eb9a12fdb 100644 --- a/contrib/grep/src/searchutils.c +++ b/contrib/grep/src/searchutils.c @@ -1,5 +1,5 @@ /* searchutils.c - helper subroutines for grep's matchers. - Copyright 1992, 1998, 2000, 2007, 2009-2012 Free Software Foundation, Inc. + Copyright 1992, 1998, 2000, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,6 +22,8 @@ #define NCHAR (UCHAR_MAX + 1) +static size_t mbclen_cache[NCHAR]; + void kwsinit (kwset_t *kwset) { @@ -31,7 +33,7 @@ kwsinit (kwset_t *kwset) if (match_icase && MB_CUR_MAX == 1) { for (i = 0; i < NCHAR; ++i) - trans[i] = tolower (i); + trans[i] = toupper (i); *kwset = kwsalloc (trans); } @@ -42,39 +44,37 @@ kwsinit (kwset_t *kwset) xalloc_die (); } -#if MBS_SUPPORT -/* Convert the *N-byte string, BEG, to lower-case, and write the +/* Convert BEG, an *N-byte string, to uppercase, and write the NUL-terminated result into malloc'd storage. Upon success, set *N to the length (in bytes) of the resulting string (not including the - trailing NUL byte), and return a pointer to the lower-case string. - Upon memory allocation failure, this function exits. - Note that on input, *N must be larger than zero. + trailing NUL byte), and return a pointer to the uppercase string. + Upon memory allocation failure, exit. *N must be positive. - Note that while this function returns a pointer to malloc'd storage, + Although this function returns a pointer to malloc'd storage, the caller must not free it, since this function retains a pointer to the buffer and reuses it on any subsequent call. As a consequence, this function is not thread-safe. - When each character in the lower-case result string has the same length + When each character in the uppercase result string has the same length as the corresponding character in the input string, set *LEN_MAP_P to NULL. Otherwise, set it to a malloc'd buffer (like the returned buffer, this must not be freed by caller) of the same length as the result string. (*LEN_MAP_P)[J] is the change in byte-length of the character in BEG that formed byte J of the result as it was converted to - lower-case. It is usually zero. For the upper-case Turkish I-with-dot - it is -1, since the upper-case character occupies two bytes, while the - lower-case one occupies only one byte. For the Turkish-I-without-dot - in the tr_TR.utf8 locale, it is 1 because the lower-case representation + uppercase. It is usually zero. For lowercase Turkish dotless I it + is -1, since the lowercase input occupies two bytes, while the + uppercase output occupies only one byte. For lowercase I in the + tr_TR.utf8 locale, it is 1 because the uppercase Turkish dotted I is one byte longer than the original. When that happens, we have two or more slots in *LEN_MAP_P for each such character. We store the difference in the first one and 0's in any remaining slots. This map is used by the caller to convert offset,length pairs that - reference the lower-case result to numbers that refer to the matched + reference the uppercase result to numbers that refer to the matched part of the original buffer. */ char * -mbtolower (const char *beg, size_t *n, mb_len_map_t **len_map_p) +mbtoupper (const char *beg, size_t *n, mb_len_map_t **len_map_p) { static char *out; static mb_len_map_t *len_map; @@ -88,7 +88,7 @@ mbtolower (const char *beg, size_t *n, mb_len_map_t **len_map_p) if (*n > outalloc || outalloc == 0) { - outalloc = MAX(1, *n); + outalloc = MAX (1, *n); out = xrealloc (out, outalloc); len_map = xrealloc (len_map, outalloc); } @@ -111,6 +111,37 @@ mbtolower (const char *beg, size_t *n, mb_len_map_t **len_map_p) { wchar_t wc; size_t mbclen = mbrtowc (&wc, beg, end - beg, &is); +#ifdef __CYGWIN__ + /* Handle a UTF-8 sequence for a character beyond the base plane. + Cygwin's wchar_t is UTF-16, as in the underlying OS. This + results in surrogate pairs which need some extra attention. */ + wint_t wci = 0; + if (mbclen == 3 && (wc & 0xdc00) == 0xd800) + { + /* We got the start of a 4 byte UTF-8 sequence. This is returned + as a UTF-16 surrogate pair. The first call to mbrtowc returned 3 + and wc has been set to a high surrogate value, now we're going + to fetch the matching low surrogate. This second call to mbrtowc + is supposed to return 1 to complete the 4 byte UTF-8 sequence. */ + wchar_t wc_2; + size_t mbclen_2 = mbrtowc (&wc_2, beg + mbclen, end - beg - mbclen, + &is); + if (mbclen_2 == 1 && (wc_2 & 0xdc00) == 0xdc00) + { + /* Match. Convert this to a 4 byte wint_t which constitutes + a 32-bit UTF-32 value. */ + wci = ( (((wint_t) (wc - 0xd800)) << 10) + | ((wint_t) (wc_2 - 0xdc00))) + + 0x10000; + ++mbclen; + } + else + { + /* Invalid UTF-8 sequence. */ + mbclen = (size_t) -1; + } + } +#endif if (outlen + mb_cur_max >= outalloc) { size_t dm = m - len_map; @@ -132,8 +163,35 @@ mbtolower (const char *beg, size_t *n, mb_len_map_t **len_map_p) } else { + size_t ombclen; beg += mbclen; - size_t ombclen = wcrtomb (p, towlower ((wint_t) wc), &os); +#ifdef __CYGWIN__ + /* Handle Unicode characters beyond the base plane. */ + if (mbclen == 4) + { + /* towupper, taking wint_t (4 bytes), handles UCS-4 values. */ + wci = towupper (wci); + if (wci >= 0x10000) + { + wci -= 0x10000; + wc = (wci >> 10) | 0xd800; + /* No need to check the return value. When reading the + high surrogate, the return value will be 0 and only the + mbstate indicates that we're in the middle of reading a + surrogate pair. The next wcrtomb call reading the low + surrogate will then return 4 and reset the mbstate. */ + wcrtomb (p, wc, &os); + wc = (wci & 0x3ff) | 0xdc00; + } + else + { + wc = (wchar_t) wci; + } + ombclen = wcrtomb (p, wc, &os); + } + else +#endif + ombclen = wcrtomb (p, towupper (wc), &os); *m = mbclen - ombclen; memset (m + 1, 0, ombclen - 1); m += ombclen; @@ -149,41 +207,83 @@ mbtolower (const char *beg, size_t *n, mb_len_map_t **len_map_p) return out; } +/* Initialize a cache of mbrlen values for each of its 1-byte inputs. */ +void +build_mbclen_cache (void) +{ + int i; -bool -is_mb_middle (const char **good, const char *buf, const char *end, - size_t match_len) + for (i = CHAR_MIN; i <= CHAR_MAX; ++i) + { + char c = i; + unsigned char uc = i; + mbstate_t mbs = { 0 }; + mbclen_cache[uc] = mbrlen (&c, 1, &mbs); + } +} + +/* In the buffer *MB_START, return the number of bytes needed to go + back from CUR to the previous boundary, where a "boundary" is the + start of a multibyte character or is an error-encoding byte. The + buffer ends at END (i.e., one past the address of the buffer's last + byte). If CUR is already at a boundary, return 0. If *MB_START is + greater than or equal to CUR, return the negative value CUR - *MB_START. + + When returning zero, set *MB_START to CUR. When returning a + positive value, set *MB_START to the next boundary after CUR, or to + END if there is no such boundary. When returning a negative value, + leave *MB_START alone. */ +ptrdiff_t +mb_goback (char const **mb_start, char const *cur, char const *end) { - const char *p = *good; - const char *prev = p; + const char *p = *mb_start; + const char *p0 = p; mbstate_t cur_state; - /* TODO: can be optimized for UTF-8. */ - memset(&cur_state, 0, sizeof(mbstate_t)); - while (p < buf) + memset (&cur_state, 0, sizeof cur_state); + + while (p < cur) { - size_t mbclen = mbrlen(p, end - p, &cur_state); + size_t mbclen = mbclen_cache[to_uchar (*p)]; - /* Store the beginning of the previous complete multibyte character. */ - if (mbclen != (size_t) -2) - prev = p; + if (mbclen == (size_t) -2) + mbclen = mbrlen (p, end - p, &cur_state); - if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) + if (! (0 < mbclen && mbclen < (size_t) -2)) { - /* An invalid sequence, or a truncated multibyte character. - We treat it as a single byte character. */ + /* An invalid sequence, or a truncated multibyte character, or + a null wide character. Treat it as a single byte character. */ mbclen = 1; - memset(&cur_state, 0, sizeof cur_state); + memset (&cur_state, 0, sizeof cur_state); } + p0 = p; p += mbclen; } - *good = prev; + *mb_start = p; + return p == cur ? 0 : cur - p0; +} - if (p > buf) - return true; +/* In the buffer BUF, return the wide character that is encoded just + before CUR. The buffer ends at END. Return WEOF if there is no + wide character just before CUR. */ +wint_t +mb_prev_wc (char const *buf, char const *cur, char const *end) +{ + if (cur == buf) + return WEOF; + char const *p = buf; + cur--; + cur -= mb_goback (&p, cur, end); + return mb_next_wc (cur, end); +} - /* P == BUF here. */ - return 0 < match_len && match_len < mbrlen (p, end - p, &cur_state); +/* Return the wide character that is encoded at CUR. The buffer ends + at END. Return WEOF if there is no wide character encoded at CUR. */ +wint_t +mb_next_wc (char const *cur, char const *end) +{ + wchar_t wc; + mbstate_t mbs = { 0 }; + return mbrtowc (&wc, cur, end - cur, &mbs) < (size_t) -2 ? wc : WEOF; } -#endif /* MBS_SUPPORT */ diff --git a/contrib/grep/src/system.h b/contrib/grep/src/system.h index 426d89376b..7da1d8d72b 100644 --- a/contrib/grep/src/system.h +++ b/contrib/grep/src/system.h @@ -1,5 +1,5 @@ /* Portability cruft. Include after config.h and sys/types.h. - Copyright 1996, 1998-2000, 2007, 2009-2012 Free Software Foundation, Inc. + Copyright 1996, 1998-2000, 2007, 2009-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,10 +29,6 @@ #include "minmax.h" #include "same-inode.h" -#if O_BINARY -# define HAVE_DOS_FILE_CONTENTS 1 -#endif - #include #include #include @@ -52,4 +48,16 @@ enum { EXIT_TROUBLE = 2 }; #endif #include "unlocked-io.h" + +#define STREQ(a, b) (strcmp (a, b) == 0) + +/* Convert a possibly-signed character to an unsigned character. This is + a bit safer than casting to unsigned char, since it catches some type + errors that the cast doesn't. */ +static inline unsigned char +to_uchar (char ch) +{ + return ch; +} + #endif