From 09d4459f3c9dceed89a46b9e67a783fae1eaa8c5 Mon Sep 17 00:00:00 2001 From: Daniel Fojt Date: Tue, 2 Jun 2020 18:56:39 +0200 Subject: [PATCH] vendor/grep: upgrade from 2.22 to 3.4 --- contrib/grep/COPYING | 9 +- contrib/grep/README | 8 +- contrib/grep/doc/fdl.texi | 4 +- contrib/grep/doc/grep.texi | 531 +- contrib/grep/doc/version.texi | 8 +- contrib/grep/lib/alignof.h | 50 - contrib/grep/lib/alloca.c | 478 -- contrib/grep/lib/argmatch.c | 34 +- contrib/grep/lib/argmatch.h | 252 +- contrib/grep/lib/assure.h | 6 +- contrib/grep/lib/at-func.c | 146 - contrib/grep/lib/basename-lgpl.c | 4 +- contrib/grep/lib/binary-io.c | 37 +- contrib/grep/lib/binary-io.h | 40 +- contrib/grep/lib/bitrotate.h | 14 +- contrib/grep/lib/c-ctype.h | 24 +- contrib/grep/lib/c-stack.c | 338 ++ contrib/grep/lib/c-stack.h | 44 + contrib/grep/lib/c-strcase.h | 4 +- contrib/grep/lib/c-strcasecmp.c | 4 +- contrib/grep/lib/c-strcaseeq.h | 4 +- contrib/grep/lib/c-strncasecmp.c | 4 +- contrib/grep/lib/cdefs.h | 514 ++ contrib/grep/lib/chdir-long.c | 7 +- contrib/grep/lib/chdir-long.h | 4 +- contrib/grep/lib/cloexec.c | 6 +- contrib/grep/lib/cloexec.h | 6 +- contrib/grep/lib/close-stream.c | 4 +- contrib/grep/lib/close.c | 69 - contrib/grep/lib/closedir.c | 67 - contrib/grep/lib/closeout.c | 20 +- contrib/grep/lib/closeout.h | 4 +- contrib/grep/lib/colorize-posix.c | 2 +- contrib/grep/lib/colorize-w32.c | 208 - contrib/grep/lib/colorize.h | 2 +- contrib/grep/lib/config.charset | 682 --- contrib/grep/lib/creat-safer.c | 4 +- contrib/grep/lib/cycle-check.c | 4 +- contrib/grep/lib/cycle-check.h | 4 +- contrib/grep/lib/dfa.c | 4359 +++++++++++++++++ contrib/grep/{src => lib}/dfa.h | 79 +- contrib/grep/lib/dirent--.h | 24 - contrib/grep/lib/dirent-private.h | 40 - contrib/grep/lib/dirent-safer.h | 22 - contrib/grep/lib/dirname-lgpl.c | 4 +- contrib/grep/lib/dirname.h | 6 +- contrib/grep/lib/dosname.h | 9 +- .../lib/{dup-safer.c => dup-safer-flag.c} | 18 +- contrib/grep/lib/dup-safer.c | 4 +- contrib/grep/lib/dup.c | 61 - contrib/grep/lib/dup2.c | 103 +- contrib/grep/lib/error.c | 37 +- contrib/grep/lib/error.h | 4 +- contrib/grep/lib/exclude.c | 136 +- contrib/grep/lib/exclude.h | 8 +- contrib/grep/lib/exitfail.c | 4 +- contrib/grep/lib/exitfail.h | 4 +- contrib/grep/lib/fchdir.c | 208 - contrib/grep/lib/fcntl--.h | 4 +- contrib/grep/lib/fcntl-safer.h | 4 +- contrib/grep/lib/fcntl.c | 486 +- contrib/grep/lib/fd-hook.c | 6 +- contrib/grep/lib/fd-hook.h | 6 +- .../grep/lib/{fd-safer.c => fd-safer-flag.c} | 25 +- contrib/grep/lib/fd-safer.c | 4 +- contrib/grep/lib/fdopendir.c | 213 - contrib/grep/lib/filename.h | 6 +- contrib/grep/lib/filenamecat-lgpl.c | 61 +- contrib/grep/lib/filenamecat.h | 4 +- contrib/grep/lib/flexmember.h | 60 + contrib/grep/lib/fnmatch.c | 350 -- contrib/grep/lib/fnmatch_loop.c | 1219 ----- contrib/grep/lib/fpending.c | 30 - contrib/grep/lib/fpending.h | 4 +- contrib/grep/lib/fstat.c | 88 - contrib/grep/lib/fstatat.c | 135 - contrib/grep/lib/fts-cycle.c | 4 +- contrib/grep/lib/fts.c | 487 +- contrib/grep/lib/fts_.h | 20 +- contrib/grep/lib/getcwd-lgpl.c | 126 - contrib/grep/lib/getdtablesize.c | 121 - contrib/grep/lib/getopt-core.h | 96 + contrib/grep/lib/getopt-ext.h | 77 + contrib/grep/lib/getopt-pfx-core.h | 59 + contrib/grep/lib/getopt-pfx-ext.h | 71 + contrib/grep/lib/getopt.c | 1470 ++---- contrib/grep/lib/getopt1.c | 179 +- contrib/grep/lib/getopt_int.h | 91 +- contrib/grep/lib/getpagesize.c | 39 - contrib/grep/lib/getprogname.c | 260 + contrib/grep/lib/{btowc.c => getprogname.h} | 45 +- contrib/grep/lib/gettext.h | 29 +- contrib/grep/lib/gettimeofday.c | 154 - contrib/grep/lib/glthread/lock.c | 848 +--- contrib/grep/lib/glthread/lock.h | 513 +- contrib/grep/lib/glthread/threadlib.c | 6 +- contrib/grep/lib/gnulib.mk | 2872 ----------- contrib/grep/lib/{wctomb.c => hard-locale.c} | 24 +- .../grep/lib/{closeout.h => hard-locale.h} | 28 +- contrib/grep/lib/hash.c | 4 +- contrib/grep/lib/hash.h | 7 +- contrib/grep/lib/i-ring.c | 4 +- contrib/grep/lib/i-ring.h | 4 +- contrib/grep/lib/iconv_open.c | 172 - contrib/grep/lib/ignore-value.h | 4 +- contrib/grep/lib/intprops.h | 414 +- contrib/grep/lib/isatty.c | 83 - contrib/grep/lib/isblank.c | 33 - contrib/grep/lib/iswctype-impl.h | 22 - contrib/grep/lib/iswctype.c | 23 - contrib/grep/lib/libc-config.h | 174 + contrib/grep/lib/localcharset.c | 1310 +++-- contrib/grep/lib/localcharset.h | 105 +- contrib/grep/lib/localeconv.c | 103 - contrib/grep/lib/localeinfo.c | 151 + contrib/grep/lib/localeinfo.h | 60 + contrib/grep/lib/lseek.c | 67 - contrib/grep/lib/lstat.c | 97 - contrib/grep/lib/malloc.c | 56 - contrib/grep/lib/malloca.c | 140 +- contrib/grep/lib/malloca.h | 32 +- contrib/grep/lib/malloca.valgrind | 7 - contrib/grep/lib/mbchar.c | 4 +- contrib/grep/lib/mbchar.h | 4 +- contrib/grep/lib/mbiter.h | 4 +- contrib/grep/lib/mbrlen.c | 32 - contrib/grep/lib/mbrtowc.c | 407 -- contrib/grep/lib/mbscasecmp.c | 4 +- contrib/grep/lib/mbsinit.c | 61 - contrib/grep/lib/mbslen.c | 4 +- contrib/grep/lib/mbsrtowcs-state.c | 37 - contrib/grep/lib/mbsrtowcs.c | 32 - contrib/grep/lib/mbsstr.c | 7 +- contrib/grep/lib/mbuiter.h | 4 +- contrib/grep/lib/memchr.c | 172 - contrib/grep/lib/memchr.valgrind | 14 - contrib/grep/lib/memchr2.c | 4 +- contrib/grep/lib/memchr2.h | 4 +- contrib/grep/lib/memchr2.valgrind | 16 + contrib/grep/lib/mempcpy.c | 28 - contrib/grep/lib/memrchr.c | 161 - contrib/grep/lib/minmax.h | 4 +- contrib/grep/lib/msvc-inval.c | 129 - contrib/grep/lib/msvc-inval.h | 222 - contrib/grep/lib/msvc-nothrow.c | 49 - contrib/grep/lib/msvc-nothrow.h | 43 - contrib/grep/lib/nl_langinfo.c | 322 -- contrib/grep/lib/obstack.c | 7 +- contrib/grep/lib/obstack.h | 23 +- contrib/grep/lib/open-safer.c | 4 +- contrib/grep/lib/open.c | 181 - contrib/grep/lib/openat-die.c | 4 +- contrib/grep/lib/openat-priv.h | 4 +- contrib/grep/lib/openat-proc.c | 148 +- contrib/grep/lib/openat-safer.c | 4 +- contrib/grep/lib/openat.c | 286 -- contrib/grep/lib/openat.h | 4 +- contrib/grep/lib/opendir-safer.c | 76 - contrib/grep/lib/opendir.c | 148 - contrib/grep/lib/opendirat.c | 54 + contrib/grep/lib/opendirat.h | 2 + contrib/grep/lib/pathmax.h | 10 +- contrib/grep/lib/pipe-safer.c | 4 +- contrib/grep/lib/progname.c | 92 - contrib/grep/lib/progname.h | 62 - contrib/grep/lib/propername.c | 4 +- contrib/grep/lib/propername.h | 4 +- contrib/grep/lib/quote.h | 4 +- contrib/grep/lib/quotearg.c | 193 +- contrib/grep/lib/quotearg.h | 40 +- contrib/grep/lib/read.c | 85 - contrib/grep/lib/readdir.c | 98 - contrib/grep/lib/realloc.c | 79 - contrib/grep/lib/ref-add.sin | 29 - contrib/grep/lib/ref-del.sin | 24 - contrib/grep/lib/regcomp.c | 553 +-- contrib/grep/lib/regex.c | 10 +- contrib/grep/lib/regex.h | 64 +- contrib/grep/lib/regex_internal.c | 296 +- contrib/grep/lib/regex_internal.h | 140 +- contrib/grep/lib/regexec.c | 898 ++-- contrib/grep/lib/safe-read.c | 16 +- contrib/grep/lib/safe-read.h | 4 +- contrib/grep/lib/same-inode.h | 22 +- contrib/grep/lib/save-cwd.c | 10 +- contrib/grep/lib/save-cwd.h | 4 +- contrib/grep/lib/setlocale-lock.c | 150 + contrib/grep/lib/setlocale_null.c | 411 ++ contrib/grep/lib/setlocale_null.h | 82 + contrib/grep/lib/sig-handler.c | 3 + contrib/grep/lib/sig-handler.h | 51 + contrib/grep/lib/stat-time.c | 3 + contrib/grep/lib/stat-time.h | 252 + contrib/grep/lib/stat.c | 138 - contrib/grep/lib/stdalign.in.h | 121 - contrib/grep/lib/stpcpy.c | 49 - contrib/grep/lib/str-kmp.h | 6 +- contrib/grep/lib/str-two-way.h | 12 +- contrib/grep/lib/strdup.c | 54 - contrib/grep/lib/streq.h | 4 +- contrib/grep/lib/strerror-override.c | 4 +- contrib/grep/lib/strerror-override.h | 4 +- contrib/grep/lib/strerror.c | 7 +- contrib/grep/lib/striconv.c | 4 +- contrib/grep/lib/striconv.h | 4 +- contrib/grep/lib/stripslash.c | 4 +- contrib/grep/lib/strnlen.c | 30 - contrib/grep/lib/strnlen1.c | 4 +- contrib/grep/lib/strnlen1.h | 4 +- contrib/grep/lib/strstr.c | 8 +- contrib/grep/lib/strtoimax.c | 82 - contrib/grep/lib/strtol.c | 433 -- contrib/grep/lib/strtoll.c | 33 - contrib/grep/lib/strtoul.c | 19 - contrib/grep/lib/strtoull.c | 26 - contrib/grep/lib/strtoumax.c | 2 - contrib/grep/lib/sys-limits.h | 42 + contrib/grep/lib/trim.c | 6 +- contrib/grep/lib/trim.h | 4 +- contrib/grep/lib/unistd--.h | 4 +- contrib/grep/lib/unistd-safer.h | 4 +- contrib/grep/lib/unistr/u8-mbtoucr.c | 149 +- contrib/grep/lib/unistr/u8-uctomb-aux.c | 27 +- contrib/grep/lib/unistr/u8-uctomb.c | 17 +- contrib/grep/lib/uniwidth/cjk.h | 4 +- contrib/grep/lib/uniwidth/width.c | 50 +- contrib/grep/lib/unlocked-io.h | 4 +- contrib/grep/lib/verify.h | 126 +- contrib/grep/lib/version-etc-fsf.c | 4 +- contrib/grep/lib/version-etc.c | 36 +- contrib/grep/lib/version-etc.h | 4 +- contrib/grep/lib/wcrtomb.c | 53 - contrib/grep/lib/wctob.c | 38 - contrib/grep/lib/wctomb-impl.h | 34 - contrib/grep/lib/wcwidth.c | 50 - contrib/grep/lib/xalloc-die.c | 4 +- contrib/grep/lib/xalloc-oversized.h | 54 +- contrib/grep/lib/xalloc.h | 22 +- contrib/grep/lib/{dirfd.c => xbinary-io.c} | 35 +- contrib/grep/lib/xbinary-io.h | 48 + contrib/grep/lib/xmalloc.c | 20 +- contrib/grep/lib/xstriconv.c | 4 +- contrib/grep/lib/xstriconv.h | 4 +- contrib/grep/lib/xstrtol-error.c | 98 - contrib/grep/lib/xstrtol.c | 53 +- contrib/grep/lib/xstrtol.h | 31 +- contrib/grep/src/dfa.c | 4184 ---------------- contrib/grep/src/dfasearch.c | 355 +- contrib/grep/{lib/colorize.h => src/die.h} | 21 +- contrib/grep/src/dosbuf.c | 222 - contrib/grep/src/egrep.sh | 2 - contrib/grep/src/grep.c | 1555 +++--- contrib/grep/src/grep.h | 20 +- contrib/grep/src/kwsearch.c | 243 +- contrib/grep/src/kwset.c | 817 +-- contrib/grep/src/kwset.h | 36 +- contrib/grep/src/pcresearch.c | 358 -- contrib/grep/src/search.h | 38 +- contrib/grep/src/searchutils.c | 367 +- contrib/grep/src/system.h | 26 +- 260 files changed, 15276 insertions(+), 23635 deletions(-) delete mode 100644 contrib/grep/lib/alignof.h delete mode 100644 contrib/grep/lib/alloca.c delete mode 100644 contrib/grep/lib/at-func.c create mode 100644 contrib/grep/lib/c-stack.c create mode 100644 contrib/grep/lib/c-stack.h create mode 100644 contrib/grep/lib/cdefs.h delete mode 100644 contrib/grep/lib/close.c delete mode 100644 contrib/grep/lib/closedir.c delete mode 100644 contrib/grep/lib/colorize-w32.c delete mode 100644 contrib/grep/lib/config.charset create mode 100644 contrib/grep/lib/dfa.c rename contrib/grep/{src => lib}/dfa.h (63%) delete mode 100644 contrib/grep/lib/dirent--.h delete mode 100644 contrib/grep/lib/dirent-private.h delete mode 100644 contrib/grep/lib/dirent-safer.h copy contrib/grep/lib/{dup-safer.c => dup-safer-flag.c} (56%) delete mode 100644 contrib/grep/lib/dup.c delete mode 100644 contrib/grep/lib/fchdir.c copy contrib/grep/lib/{fd-safer.c => fd-safer-flag.c} (54%) delete mode 100644 contrib/grep/lib/fdopendir.c create mode 100644 contrib/grep/lib/flexmember.h delete mode 100644 contrib/grep/lib/fnmatch.c delete mode 100644 contrib/grep/lib/fnmatch_loop.c delete mode 100644 contrib/grep/lib/fpending.c delete mode 100644 contrib/grep/lib/fstat.c delete mode 100644 contrib/grep/lib/fstatat.c delete mode 100644 contrib/grep/lib/getcwd-lgpl.c delete mode 100644 contrib/grep/lib/getdtablesize.c create mode 100644 contrib/grep/lib/getopt-core.h create mode 100644 contrib/grep/lib/getopt-ext.h create mode 100644 contrib/grep/lib/getopt-pfx-core.h create mode 100644 contrib/grep/lib/getopt-pfx-ext.h delete mode 100644 contrib/grep/lib/getpagesize.c create mode 100644 contrib/grep/lib/getprogname.c rename contrib/grep/lib/{btowc.c => getprogname.h} (52%) delete mode 100644 contrib/grep/lib/gettimeofday.c delete mode 100644 contrib/grep/lib/gnulib.mk rename contrib/grep/lib/{wctomb.c => hard-locale.c} (55%) copy contrib/grep/lib/{closeout.h => hard-locale.h} (53%) delete mode 100644 contrib/grep/lib/iconv_open.c delete mode 100644 contrib/grep/lib/isatty.c delete mode 100644 contrib/grep/lib/isblank.c delete mode 100644 contrib/grep/lib/iswctype-impl.h delete mode 100644 contrib/grep/lib/iswctype.c create mode 100644 contrib/grep/lib/libc-config.h delete mode 100644 contrib/grep/lib/localeconv.c create mode 100644 contrib/grep/lib/localeinfo.c create mode 100644 contrib/grep/lib/localeinfo.h delete mode 100644 contrib/grep/lib/lseek.c delete mode 100644 contrib/grep/lib/lstat.c delete mode 100644 contrib/grep/lib/malloc.c delete mode 100644 contrib/grep/lib/malloca.valgrind delete mode 100644 contrib/grep/lib/mbrlen.c delete mode 100644 contrib/grep/lib/mbrtowc.c delete mode 100644 contrib/grep/lib/mbsinit.c delete mode 100644 contrib/grep/lib/mbsrtowcs-state.c delete mode 100644 contrib/grep/lib/mbsrtowcs.c delete mode 100644 contrib/grep/lib/memchr.c delete mode 100644 contrib/grep/lib/memchr.valgrind delete mode 100644 contrib/grep/lib/mempcpy.c delete mode 100644 contrib/grep/lib/memrchr.c delete mode 100644 contrib/grep/lib/msvc-inval.c delete mode 100644 contrib/grep/lib/msvc-inval.h delete mode 100644 contrib/grep/lib/msvc-nothrow.c delete mode 100644 contrib/grep/lib/msvc-nothrow.h delete mode 100644 contrib/grep/lib/nl_langinfo.c delete mode 100644 contrib/grep/lib/open.c delete mode 100644 contrib/grep/lib/openat.c delete mode 100644 contrib/grep/lib/opendir-safer.c delete mode 100644 contrib/grep/lib/opendir.c create mode 100644 contrib/grep/lib/opendirat.c create mode 100644 contrib/grep/lib/opendirat.h delete mode 100644 contrib/grep/lib/progname.c delete mode 100644 contrib/grep/lib/progname.h delete mode 100644 contrib/grep/lib/read.c delete mode 100644 contrib/grep/lib/readdir.c delete mode 100644 contrib/grep/lib/realloc.c delete mode 100644 contrib/grep/lib/ref-add.sin delete mode 100644 contrib/grep/lib/ref-del.sin create mode 100644 contrib/grep/lib/setlocale-lock.c create mode 100644 contrib/grep/lib/setlocale_null.c create mode 100644 contrib/grep/lib/setlocale_null.h create mode 100644 contrib/grep/lib/sig-handler.c create mode 100644 contrib/grep/lib/sig-handler.h create mode 100644 contrib/grep/lib/stat-time.c create mode 100644 contrib/grep/lib/stat-time.h delete mode 100644 contrib/grep/lib/stat.c delete mode 100644 contrib/grep/lib/stdalign.in.h delete mode 100644 contrib/grep/lib/stpcpy.c delete mode 100644 contrib/grep/lib/strdup.c delete mode 100644 contrib/grep/lib/strnlen.c delete mode 100644 contrib/grep/lib/strtoimax.c delete mode 100644 contrib/grep/lib/strtol.c delete mode 100644 contrib/grep/lib/strtoll.c delete mode 100644 contrib/grep/lib/strtoul.c delete mode 100644 contrib/grep/lib/strtoull.c delete mode 100644 contrib/grep/lib/strtoumax.c create mode 100644 contrib/grep/lib/sys-limits.h delete mode 100644 contrib/grep/lib/wcrtomb.c delete mode 100644 contrib/grep/lib/wctob.c delete mode 100644 contrib/grep/lib/wctomb-impl.h delete mode 100644 contrib/grep/lib/wcwidth.c rename contrib/grep/lib/{dirfd.c => xbinary-io.c} (53%) create mode 100644 contrib/grep/lib/xbinary-io.h delete mode 100644 contrib/grep/lib/xstrtol-error.c delete mode 100644 contrib/grep/src/dfa.c copy contrib/grep/{lib/colorize.h => src/die.h} (62%) delete mode 100644 contrib/grep/src/dosbuf.c delete mode 100644 contrib/grep/src/egrep.sh delete mode 100644 contrib/grep/src/pcresearch.c diff --git a/contrib/grep/COPYING b/contrib/grep/COPYING index a737dcfed5..f288702d2f 100644 --- a/contrib/grep/COPYING +++ b/contrib/grep/COPYING @@ -1,8 +1,7 @@ - GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 - Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. @@ -646,7 +645,7 @@ the "copyright" line and a pointer to where the full notice is found. GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . + along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. @@ -665,11 +664,11 @@ might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see -. +. The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read -. +. diff --git a/contrib/grep/README b/contrib/grep/README index e41c161f2c..f157977170 100644 --- a/contrib/grep/README +++ b/contrib/grep/README @@ -1,4 +1,4 @@ - Copyright (C) 1992, 1997-2002, 2004-2015 Free Software Foundation, Inc. + Copyright (C) 1992, 1997-2002, 2004-2020 Free Software Foundation, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright @@ -17,7 +17,7 @@ twice as fast as stock Unix egrep) hybridized with a Boyer-Moore-Gosper search for a fixed string that eliminates impossible text from being considered by the full regexp matcher without necessarily having to look at every character. The result is typically many times faster -than Unix grep or egrep. (Regular expressions containing backreferencing +than Unix grep or egrep. (Regular expressions containing back-references will run more slowly, however.) See the files AUTHORS and THANKS for a list of authors and other contributors. @@ -49,11 +49,11 @@ necessarily of the same byte length. A list of outstanding and resolved bugs can be found at: - http://debbugs.gnu.org/cgi/pkgreport.cgi?package=grep + https://debbugs.gnu.org/cgi/pkgreport.cgi?package=grep You can also browse the bug-grep mailing list archive at: - http://lists.gnu.org/archive/html/bug-grep/ + https://lists.gnu.org/r/bug-grep/ For any copyright year range specified as YYYY-ZZZZ in this package note that the range specifies every single year in that closed interval. diff --git a/contrib/grep/doc/fdl.texi b/contrib/grep/doc/fdl.texi index 9c3bbe56e9..542edaad25 100644 --- a/contrib/grep/doc/fdl.texi +++ b/contrib/grep/doc/fdl.texi @@ -6,7 +6,7 @@ @display Copyright @copyright{} 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. -@uref{http://fsf.org/} +@uref{https://fsf.org/} Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. @@ -414,7 +414,7 @@ The Free Software Foundation may publish new, revised versions of the GNU Free Documentation License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. See -@uref{http://www.gnu.org/copyleft/}. +@uref{https://www.gnu.org/copyleft/}. Each version of the License is given a distinguishing version number. If the Document specifies that a particular numbered version of this diff --git a/contrib/grep/doc/grep.texi b/contrib/grep/doc/grep.texi index e3495bb9eb..4b279ccb64 100644 --- a/contrib/grep/doc/grep.texi +++ b/contrib/grep/doc/grep.texi @@ -14,11 +14,23 @@ @c %**end of header @documentencoding UTF-8 +@c These two require Texinfo 5.0 or later, so use the older +@c equivalent @set variables supported in 4.11 and later. +@ignore +@codequotebacktick on +@codequoteundirected on +@end ignore +@set txicodequoteundirected +@set txicodequotebacktick +@iftex +@c TeX sometimes fails to hyphenate, so help it here. +@hyphenation{spec-i-fied} +@end iftex @copying This manual is for @command{grep}, a pattern matching engine. -Copyright @copyright{} 1999-2002, 2005, 2008-2015 Free Software Foundation, +Copyright @copyright{} 1999--2002, 2005, 2008--2020 Free Software Foundation, Inc. @quotation @@ -33,11 +45,11 @@ Texts. A copy of the license is included in the section entitled @dircategory Text creation and manipulation @direntry -* grep: (grep). Print lines matching a pattern. +* grep: (grep). Print lines that match patterns. @end direntry @titlepage -@title GNU Grep: Print lines matching a pattern +@title GNU Grep: Print lines that match patterns @subtitle version @value{VERSION}, @value{UPDATED} @author Alain Magloire et al. @page @@ -52,7 +64,7 @@ Texts. A copy of the license is included in the section entitled @node Top @top grep -@command{grep} prints lines that contain a match for a pattern. +@command{grep} prints lines that contain a match for one or more patterns. This manual is for version @value{VERSION} of GNU Grep. @@ -64,6 +76,7 @@ This manual is for version @value{VERSION} of GNU Grep. * Invoking:: Command-line options, environment, exit status. * Regular Expressions:: Regular Expressions. * Usage:: Examples. +* Performance:: Performance tuning. * Reporting Bugs:: Reporting Bugs. * Copying:: License terms for this manual. * Index:: Combined index. @@ -73,10 +86,10 @@ This manual is for version @value{VERSION} of GNU Grep. @node Introduction @chapter Introduction -@cindex searching for a pattern +@cindex searching for patterns -@command{grep} searches input files -for lines containing a match to a given pattern list. +Given one or more patterns, @command{grep} searches input files +for matches to the patterns. When it finds a match in a line, it copies the line to standard output (by default), or produces whatever other sort of output you have requested with options. @@ -96,17 +109,16 @@ there is no way to match newline characters in a text. The general synopsis of the @command{grep} command line is @example -grep @var{options} @var{pattern} @var{input_file_names} +grep [@var{option}...] [@var{patterns}] [@var{file}...] @end example @noindent -There can be zero or more @var{options}. -@var{pattern} will only be seen as such -(and not as an @var{input_file_name}) -if it wasn't already specified within @var{options} -(by using the @samp{-e@ @var{pattern}} -or @samp{-f@ @var{file}} options). -There can be zero or more @var{input_file_names}. +There can be zero or more @var{option} arguments, and zero or more +@var{file} arguments. The @var{patterns} argument contains one or +more patterns separated by newlines, and is omitted when patterns are +given via the @samp{-e@ @var{patterns}} or @samp{-f@ @var{file}} +options. Typically @var{patterns} should be quoted when +@command{grep} is used in a shell command. @menu * Command-line Options:: Short and long names, grouped by category. @@ -169,22 +181,27 @@ This version number should be included in all bug reports. @table @option -@item -e @var{pattern} -@itemx --regexp=@var{pattern} +@item -e @var{patterns} +@itemx --regexp=@var{patterns} @opindex -e -@opindex --regexp=@var{pattern} -@cindex pattern list -Use @var{pattern} as the pattern. -This can be used to specify multiple search patterns, -or to protect a pattern beginning with a @samp{-}. +@opindex --regexp=@var{patterns} +@cindex patterns option +Use @var{patterns} as one or more patterns; newlines within +@var{patterns} separate each pattern from the next. +If this option is used multiple times or is combined with the +@option{-f} (@option{--file}) option, search for all patterns given. +Typically @var{patterns} should be quoted when @command{grep} is used +in a shell command. (@option{-e} is specified by POSIX.) @item -f @var{file} @itemx --file=@var{file} @opindex -f @opindex --file -@cindex pattern from file +@cindex patterns from file Obtain patterns from @var{file}, one per line. +If this option is used multiple times or is combined with the +@option{-e} (@option{--regexp}) option, search for all patterns given. The empty file contains zero patterns, and therefore matches nothing. (@option{-f} is specified by POSIX.) @@ -195,7 +212,8 @@ The empty file contains zero patterns, and therefore matches nothing. @opindex -y @opindex --ignore-case @cindex case insensitive search -Ignore case distinctions, so that characters that differ only in case +Ignore case distinctions in patterns and input data, +so that characters that differ only in case match each other. Although this is straightforward when letters differ in case only via lowercase-uppercase pairs, the behavior is unspecified in other situations. For example, uppercase ``S'' has an @@ -211,6 +229,13 @@ SHARP S) even though lowercasing the latter yields the former. @option{-y} is an obsolete synonym that is provided for compatibility. (@option{-i} is specified by POSIX.) +@item --no-ignore-case +@opindex --no-ignore-case +Do not ignore case distinctions in patterns and input data. This is +the default. This option is useful for passing to shell scripts that +already use @option{-i}, in order to cancel its effects because the +two options override each other. + @item -v @itemx --invert-match @opindex -v @@ -232,7 +257,16 @@ or preceded by a non-word constituent character. Similarly, it must be either at the end of the line or followed by a non-word constituent character. -Word-constituent characters are letters, digits, and the underscore. +Word constituent characters are letters, digits, and the underscore. +This option has no effect if @option{-x} is also specified. + +Because the @option{-w} option can match a substring that does not +begin and end with word constituents, it differs from surrounding a +regular expression with @samp{\<} and @samp{\>}. For example, although +@samp{grep -w @@} matches a line containing only @samp{@@}, @samp{grep +'\<@@\>'} cannot match any line because @samp{@@} is not a +word constituent. @xref{The Backslash Character and Special +Expressions}. @item -x @itemx --line-regexp @@ -240,6 +274,8 @@ Word-constituent characters are letters, digits, and the underscore. @opindex --line-regexp @cindex match the whole line Select only those matches that exactly match the whole line. +For regular expression patterns, this is like parenthesizing each +pattern and then surrounding it with @samp{^} and @samp{$}. (@option{-x} is specified by POSIX.) @end table @@ -305,17 +341,17 @@ The scanning of each file stops on the first match. @opindex -m @opindex --max-count @cindex max-count -Stop reading a file after @var{num} matching lines. +Stop after the first @var{num} selected lines. If the input is standard input from a regular file, -and @var{num} matching lines are output, +and @var{num} selected lines are output, @command{grep} ensures that the standard input is positioned -just after the last matching line before exiting, +just after the last selected line before exiting, regardless of the presence of trailing context lines. This enables a calling process to resume a search. For example, the following shell script makes use of it: @example -while grep -m 1 PATTERN +while grep -m 1 'PATTERN' do echo xxxx done < FILE @@ -327,16 +363,15 @@ file: @example # This probably will not work. cat FILE | -while grep -m 1 PATTERN +while grep -m 1 'PATTERN' do echo xxxx done @end example -When @command{grep} stops after @var{num} matching lines, +@cindex context lines +When @command{grep} stops after @var{num} selected lines, it outputs any trailing context lines. -Since context does not include matching lines, -@command{grep} will stop when it encounters another matching line. When the @option{-c} or @option{--count} option is also used, @command{grep} does not output a count greater than @var{num}. When the @option{-v} or @option{--invert-match} option is also used, @@ -349,6 +384,9 @@ When the @option{-v} or @option{--invert-match} option is also used, @cindex only matching Print only the matched (non-empty) parts of matching lines, with each such part on a separate output line. +Output lines use the same delimiters as input, and delimiters are null +bytes if @option{-z} (@option{--null-data}) is also used (@pxref{Other +Options}). @item -q @itemx --quiet @@ -403,10 +441,6 @@ Print the 0-based byte offset within the input file before each line of output. If @option{-o} (@option{--only-matching}) is specified, print the offset of the matching part itself. -When @command{grep} runs on MS-DOS or MS-Windows, -the printed byte offsets depend on whether -the @option{-u} (@option{--unix-byte-offsets}) option is used; -see below. @item -H @itemx --with-filename @@ -429,12 +463,12 @@ This is the default when there is only one file @opindex --label @cindex changing name of standard input Display input actually coming from standard input -as input coming from file @var{LABEL}. This is -especially useful when implementing tools like -@command{zgrep}; e.g.: +as input coming from file @var{LABEL}. +This can be useful for commands that transform a file's contents +before searching; e.g.: @example -gzip -cd foo.gz | grep --label=foo -H something +gzip -cd foo.gz | grep --label=foo -H 'some pattern' @end example @item -n @@ -454,25 +488,8 @@ Make sure that the first character of actual line content lies on a tab stop, so that the alignment of tabs looks normal. This is useful with options that prefix their output to the actual content: @option{-H}, @option{-n}, and @option{-b}. -In order to improve the probability that lines -from a single file will all start at the same column, -this also causes the line number and byte offset (if present) -to be printed in a minimum-size field width. - -@item -u -@itemx --unix-byte-offsets -@opindex -u -@opindex --unix-byte-offsets -@cindex MS-DOS/MS-Windows byte offsets -@cindex byte offsets, on MS-DOS/MS-Windows -Report Unix-style byte offsets. -This option causes @command{grep} to report byte offsets -as if the file were a Unix-style text file, -i.e., the byte offsets ignore carriage returns that were stripped. -This will produce results identical -to running @command{grep} on a Unix machine. -This option has no effect unless the @option{-b} option is also used; -it has no effect on platforms other than MS-DOS and MS-Windows. +This may also prepend spaces to output line numbers and byte offsets +so that lines from a single file all start at the same column. @item -Z @itemx --null @@ -496,8 +513,11 @@ even those that contain newline characters. @node Context Line Control @subsection Context Line Control +@cindex context lines +@dfn{Context lines} are non-matching lines that are near a matching line. +They are output only if one of the following options are used. Regardless of how these options are set, -@command{grep} will never print any given line more than once. +@command{grep} never outputs any given line more than once. If the @option{-o} (@option{--only-matching}) option is specified, these options have no effect and a warning is given upon their use. @@ -525,7 +545,7 @@ Print @var{num} lines of leading context before matching lines. @opindex -C @opindex --context @opindex -@var{num} -@cindex context +@cindex context lines Print @var{num} lines of leading and trailing output context. @item --group-separator=@var{string} @@ -592,36 +612,47 @@ this is equivalent to the @samp{--binary-files=text} option. @item --binary-files=@var{type} @opindex --binary-files @cindex binary files -If a file's allocation metadata, -or if its data read before a line is selected for output, +If a file's data or metadata indicate that the file contains binary data, assume that the file is of type @var{type}. -Non-text bytes indicate binary data; these are either data bytes -improperly encoded for the current locale, or null bytes when the +Non-text bytes indicate binary data; these are either output bytes that are +improperly encoded for the current locale (@pxref{Environment +Variables}), or null input bytes when the @option{-z} (@option{--null-data}) option is not given (@pxref{Other Options}). -By default, @var{type} is @samp{binary}, -and @command{grep} normally outputs either -a one-line message saying that a binary file matches, -or no message if there is no match. -When processing binary data, @command{grep} may treat non-text bytes -as line terminators; for example, the pattern @samp{.} (period) might -not match a null byte, as the null byte might be treated as a line -terminator even without the @option{-z} (@option{--null-data}) option. +By default, @var{type} is @samp{binary}, and @command{grep} +suppresses output after null input binary data is discovered, +and suppresses output lines that contain improperly encoded data. +When some output is suppressed, @command{grep} follows any output +with a one-line message saying that a binary file matches. If @var{type} is @samp{without-match}, -@command{grep} assumes that a binary file does not match; +when @command{grep} discovers null input binary data +it assumes that the rest of the file does not match; this is equivalent to the @option{-I} option. If @var{type} is @samp{text}, -@command{grep} processes a binary file as if it were text; +@command{grep} processes binary data as if it were text; this is equivalent to the @option{-a} option. -@emph{Warning:} @samp{--binary-files=text} might output binary garbage, -which can have nasty side effects -if the output is a terminal and -if the terminal driver interprets some of it as commands. +When @var{type} is @samp{binary}, @command{grep} may treat non-text +bytes as line terminators even without the @option{-z} +(@option{--null-data}) option. This means choosing @samp{binary} +versus @samp{text} can affect whether a pattern matches a file. For +example, when @var{type} is @samp{binary} the pattern @samp{q$} might +match @samp{q} immediately followed by a null byte, even though this +is not matched when @var{type} is @samp{text}. Conversely, when +@var{type} is @samp{binary} the pattern @samp{.} (period) might not +match a null byte. + +@emph{Warning:} The @option{-a} (@option{--binary-files=text}) option +might output binary garbage, which can have nasty side effects if the +output is a terminal and if the terminal driver interprets some of it +as commands. On the other hand, when reading files whose text +encodings are unknown, it can be helpful to use @option{-a} or to set +@samp{LC_ALL='C'} in the environment, in order to find more matches +even if the matches are unsafe for direct display. @item -D @var{action} @itemx --devices=@var{action} @@ -661,10 +692,13 @@ this is equivalent to the @option{-r} option. @opindex --exclude @cindex exclude files @cindex searching directory trees -Skip files whose name matches the pattern @var{glob}, using wildcard -matching. When searching recursively, skip any subfile whose base +Skip any command-line file with a name suffix that matches the pattern +@var{glob}, using wildcard matching; a name suffix is either the whole +name, or a trailing part that starts with a non-slash character +immediately after a slash (@samp{/}) in the name. +When searching recursively, skip any subfile whose base name matches @var{glob}; the base name is the part after the last -@samp{/}. A pattern can use +slash. A pattern can use @samp{*}, @samp{?}, and @samp{[}...@samp{]} as wildcards, and @code{\} to quote a wildcard or backslash character literally. @@ -679,9 +713,10 @@ under @option{--exclude}). @item --exclude-dir=@var{glob} @opindex --exclude-dir @cindex exclude directories -Skip any directory whose name matches the pattern @var{glob}. When -searching recursively, skip any subdirectory whose base name matches -@var{glob}. Ignore any redundant trailing slashes in @var{glob}. +Skip any command-line directory with a name suffix that matches the +pattern @var{glob}. When searching recursively, skip any subdirectory +whose base name matches @var{glob}. Ignore any redundant trailing +slashes in @var{glob}. @item -I Process a binary file as if it did not contain matching data; @@ -725,6 +760,14 @@ directory, recursively, following all symbolic links. @table @option +@item -- +@opindex -- +@cindex option delimiter +Delimit the option list. Later arguments, if any, are treated as +operands even if they begin with @samp{-}. For example, @samp{grep PAT -- +-file1 file2} searches for the pattern PAT in the files named @file{-file1} +and @file{file2}. + @item --line-buffered @opindex --line-buffered @cindex line buffering @@ -735,29 +778,35 @@ This can cause a performance penalty. @itemx --binary @opindex -U @opindex --binary -@cindex MS-DOS/MS-Windows binary files -@cindex binary files, MS-DOS/MS-Windows -Treat the file(s) as binary. -By default, under MS-DOS and MS-Windows, -@command{grep} guesses whether a file is text or binary -as described for the @option{--binary-files} option. -If @command{grep} decides the file is a text file, -it strips carriage returns from the original file contents -(to make regular expressions with @code{^} and @code{$} work correctly). -Specifying @option{-U} overrules this guesswork, -causing all files to be read and passed to the matching mechanism verbatim; -if the file is a text file with @code{CR/LF} pairs at the end of each line, -this will cause some regular expressions to fail. -This option has no effect -on platforms other than MS-DOS and MS-Windows. +@cindex MS-Windows binary I/O +@cindex binary I/O +On platforms that distinguish between text and binary I/O, +use the latter when reading and writing files other +than the user's terminal, so that all input bytes are read and written +as-is. This overrides the default behavior where @command{grep} +follows the operating system's advice whether to use text or binary +I/O@. On MS-Windows when @command{grep} uses text I/O it reads a +carriage return--newline pair as a newline and a Control-Z as +end-of-file, and it writes a newline as a carriage return--newline +pair. + +When using text I/O @option{--byte-offset} (@option{-b}) counts and +@option{--binary-files} heuristics apply to input data after text-I/O +processing. Also, the @option{--binary-files} heuristics need not agree +with the @option{--binary} option; that is, they may treat the data as +text even if @option{--binary} is given, or vice versa. +@xref{File and Directory Selection}. + +This option has no effect on GNU and other POSIX-compatible platforms, +which do not distinguish text from binary I/O. @item -z @itemx --null-data @opindex -z @opindex --null-data @cindex zero-terminated lines -Treat the input as a set of lines, each terminated by a zero byte (the -ASCII NUL character) instead of a newline. +Treat input and output data as sequences of lines, each terminated by +a zero byte (the ASCII NUL character) instead of a newline. Like the @option{-Z} or @option{--null} option, this option can be used with commands like @samp{sort -z} to process arbitrary file names. @@ -791,6 +840,7 @@ The @samp{C} locale is used if none of these environment variables are set, if the locale catalog is not installed, or if @command{grep} was not compiled with national language support (NLS). +The shell command @code{locale -a} lists locales that are currently available. Many of the environment variables in the following list let you control highlighting using @@ -989,9 +1039,27 @@ interpreted. @vindex LC_ALL @r{environment variable} @vindex LC_CTYPE @r{environment variable} @vindex LANG @r{environment variable} +@cindex encoding error +@cindex null character These variables specify the locale for the @env{LC_CTYPE} category, which determines the type of characters, e.g., which characters are whitespace. +This category also determines the character encoding, that is, whether +text is encoded in UTF-8, ASCII, or some other encoding. In the +@samp{C} or @samp{POSIX} locale, all characters are encoded as a +single byte and every byte is a valid character. +In more-complex encodings such as UTF-8, a sequence of multiple bytes +may be needed to represent a character, and some bytes may be encoding +errors that do not contribute to the representation of any character. +POSIX does not specify the behavior of @command{grep} when patterns or +input data contain encoding errors or null characters, so portable +scripts should avoid such usage. As an extension to POSIX, GNU +@command{grep} treats null characters like any other character. +However, unless the @option{-a} (@option{--binary-files=text}) option +is used, the presence of null characters in input or of encoding +errors in output causes GNU @command{grep} to treat the file as binary +and suppress details about matches. @xref{File and Directory +Selection}. @item LANGUAGE @itemx LC_ALL @@ -1044,6 +1112,9 @@ and only when @env{POSIXLY_CORRECT} is not set. Normally the exit status is 0 if a line is selected, 1 if no lines were selected, and 2 if an error occurred. However, if the +@option{-L} or @option{--files-without-match} is used, the exit status +is 0 if a file is listed, 1 if no files were listed, and 2 if an error +occurred. Also, if the @option{-q} or @option{--quiet} or @option{--silent} option is used and a line is selected, the exit status is 0 even if an error occurred. Other @command{grep} implementations may exit with status @@ -1055,7 +1126,7 @@ greater than 2 on error. @cindex variants of @command{grep} @command{grep} searches the named input files -for lines containing a match to the given pattern. +for lines containing a match to the given patterns. By default, @command{grep} prints the matching lines. A file named @file{-} stands for standard input. If no input is specified, @command{grep} searches the working @@ -1071,7 +1142,7 @@ controlled by the following options. @opindex -G @opindex --basic-regexp @cindex matching basic regular expressions -Interpret the pattern as a basic regular expression (BRE). +Interpret patterns as basic regular expressions (BREs). This is the default. @item -E @@ -1079,7 +1150,7 @@ This is the default. @opindex -E @opindex --extended-regexp @cindex matching extended regular expressions -Interpret the pattern as an extended regular expression (ERE). +Interpret patterns as extended regular expressions (EREs). (@option{-E} is specified by POSIX.) @item -F @@ -1087,18 +1158,19 @@ Interpret the pattern as an extended regular expression (ERE). @opindex -F @opindex --fixed-strings @cindex matching fixed strings -Interpret the pattern as a list of fixed strings, separated -by newlines, any of which is to be matched. +Interpret patterns as fixed strings, not regular expressions. (@option{-F} is specified by POSIX.) @item -P @itemx --perl-regexp @opindex -P @opindex --perl-regexp -@cindex matching Perl regular expressions -Interpret the pattern as a Perl regular expression. -This is highly experimental and +@cindex matching Perl-compatible regular expressions +Interpret patterns as Perl-compatible regular expressions (PCREs). +PCRE support is here to stay, but consider this option experimental when +combined with the @option{-z} (@option{--null-data}) option, and note that @samp{grep@ -P} may warn of unimplemented features. +@xref{Other Options}. @end table @@ -1121,16 +1193,16 @@ Regular expressions are constructed analogously to arithmetic expressions, by using various operators to combine smaller expressions. @command{grep} understands three different versions of regular expression syntax: -``basic,'' (BRE) ``extended'' (ERE) and ``perl''. +basic (BRE), extended (ERE), and Perl-compatible (PCRE). In GNU @command{grep}, there is no difference in available functionality between the basic and extended syntaxes. In other implementations, basic regular expressions are less powerful. The following description applies to extended regular expressions; differences for basic regular expressions are summarized afterwards. -Perl regular expressions give additional functionality, and are -documented in the @i{pcresyntax}(3) and @i{pcrepattern}(3) manual pages, -but may not be available on every system. +Perl-compatible regular expressions give additional functionality, and +are documented in the @i{pcresyntax}(3) and @i{pcrepattern}(3) manual +pages, but work only if PCRE is available in the system. @menu * Fundamental Structure:: @@ -1151,16 +1223,16 @@ are regular expressions that match themselves. Any meta-character with special meaning may be quoted by preceding it with a backslash. -A regular expression may be followed by one of several -repetition operators: - -@table @samp - -@item . @opindex . @cindex dot @cindex period The period @samp{.} matches any single character. +It is unspecified whether @samp{.} matches an encoding error. + +A regular expression may be followed by one of several +repetition operators: + +@table @samp @item ? @opindex ? @@ -1231,11 +1303,15 @@ An unmatched @samp{)} matches just itself. @cindex character class A @dfn{bracket expression} is a list of characters enclosed by @samp{[} and @samp{]}. -It matches any single character in that list; -if the first character of the list is the caret @samp{^}, -then it matches any character @strong{not} in the list. +It matches any single character in that list. +If the first character of the list is the caret @samp{^}, +then it matches any character @strong{not} in the list, +and it is unspecified whether it matches an encoding error. For example, the regular expression -@samp{[0123456789]} matches any single digit. +@samp{[0123456789]} matches any single digit, +whereas @samp{[^()]} matches any single character that is not +an opening or closing parenthesis, and might or might not match an +encoding error. @cindex range expression Within a bracket expression, a @dfn{range expression} consists of two @@ -1460,6 +1536,8 @@ When multiple regular expressions are given with @option{-e} or from a file (@samp{-f @var{file}}), back-references are local to each expression. +@xref{Known Bugs}, for some known problems with back-references. + @node Basic vs Extended @section Basic vs Extended Regular Expressions @cindex basic regular expressions @@ -1503,6 +1581,27 @@ this is because @samp{.*} matches zero or more characters within a line. The @option{-i} option causes @command{grep} to ignore case, causing it to match the line @samp{Hello, world!}, which it would not otherwise match. + +Here is a more complex example session, +showing the location and contents of any line +containing @samp{f} and ending in @samp{.c}, +within all files in the current directory whose names +contain @samp{g} and end in @samp{.h}. +The @option{-n} option outputs line numbers, the @option{--} argument +treats any later arguments starting with @samp{-} as file names not +options, and the empty file @file{/dev/null} causes file names to be output +even if only one file name happens to be of the form @samp{*g*.h}. + +@example +$ @kbd{grep -n -- 'f.*\.c$' *g*.h /dev/null} +argmatch.h:1:/* definitions and prototypes for argmatch.c +@end example + +@noindent +The only line that contains a match is line 1 of @file{argmatch.h}. +Note that the regular expression syntax used in the pattern differs +from the globbing syntax that the shell uses to match file names. + @xref{Invoking}, for more details about how to invoke @command{grep}. @@ -1516,11 +1615,11 @@ Here are some common questions and answers about @command{grep} usage. How can I list just the names of matching files? @example -grep -l 'main' *.c +grep -l 'main' test-*.c @end example @noindent -lists the names of all C files in the current directory whose contents +lists names of @samp{test-*.c} files in the current directory whose contents mention @samp{main}. @item @@ -1534,45 +1633,54 @@ grep -r 'hello' /home/gigi searches for @samp{hello} in all files under the @file{/home/gigi} directory. For more control over which files are searched, -use @command{find}, @command{grep}, and @command{xargs}. +use @command{find} and @command{grep}. For example, the following command searches only C files: @example -find /home/gigi -name '*.c' -print0 | xargs -0r grep -H 'hello' +find /home/gigi -name '*.c' ! -type d \ + -exec grep -H 'hello' '@{@}' + @end example This differs from the command: @example -grep -H 'hello' *.c +grep -H 'hello' /home/gigi/*.c @end example -which merely looks for @samp{hello} in all files in the current -directory whose names end in @samp{.c}. -The @samp{find ...} command line above is more similar to the command: +which merely looks for @samp{hello} in non-hidden C files in +@file{/home/gigi} whose names end in @samp{.c}. +The @command{find} command line above is more similar to the command: @example -grep -rH --include='*.c' 'hello' /home/gigi +grep -r --include='*.c' 'hello' /home/gigi @end example @item -What if a pattern has a leading @samp{-}? +What if a pattern or file has a leading @samp{-}? @example -grep -e '--cut here--' * +grep -- '--cut here--' * @end example @noindent searches for all lines matching @samp{--cut here--}. -Without @option{-e}, +Without @option{--}, @command{grep} would attempt to parse @samp{--cut here--} as a list of -options. +options, and there would be similar problems with any file names +beginning with @samp{-}. + +Alternatively, you can prevent misinterpretation of leading @samp{-} +by using @option{-e} for patterns and leading @samp{./} for files: + +@example +grep -e '--cut here--' ./* +@end example @item Suppose I want to search for a whole word, not a part of a word? @example -grep -w 'hello' * +grep -w 'hello' test*.log @end example @noindent @@ -1583,7 +1691,7 @@ For more control, use @samp{\<} and For example: @example -grep 'hello\>' * +grep 'hello\>' test*.log @end example @noindent @@ -1594,7 +1702,7 @@ searches only for words ending in @samp{hello}, so it matches the word How do I output context around the matching lines? @example -grep -C 2 'hello' * +grep -C 2 'hello' test*.log @end example @noindent @@ -1676,7 +1784,7 @@ Why does the empty pattern match every input line? The @command{grep} command searches for lines that contain strings that match a pattern. Every line contains the empty string, so an empty pattern causes @command{grep} to find a match on each line. It -is not the only such pattern: @samp{^}, @samp{$}, @samp{.*}, and many +is not the only such pattern: @samp{^}, @samp{$}, and many other patterns cause @command{grep} to match every line. To match empty lines, use the pattern @samp{^$}. To match blank @@ -1692,31 +1800,6 @@ Use the special file name @samp{-}: cat /etc/passwd | grep 'alain' - /etc/motd @end example -@item -@cindex palindromes -How to express palindromes in a regular expression? - -It can be done by using back-references; -for example, -a palindrome of 4 characters can be written with a BRE: - -@example -grep -w -e '\(.\)\(.\).\2\1' file -@end example - -It matches the word ``radar'' or ``civic.'' - -Guglielmo Bondioni proposed a single RE -that finds all palindromes up to 19 characters long -using @w{9 subexpressions} and @w{9 back-references}: - -@smallexample -grep -E -e '^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\9\8\7\6\5\4\3\2\1$' file -@end smallexample - -Note this is done by using GNU ERE extensions; -it might not be portable to other implementations of @command{grep}. - @item Why is this back-reference failing? @@ -1738,7 +1821,7 @@ Therefore, merely using the @code{[:space:]} character class does not match newlines in the way you might expect. With the GNU @command{grep} option @option{-z} (@option{--null-data}), each -input ``line'' is terminated by a null byte; @pxref{Other Options}. Thus, +input and output ``line'' is null-terminated; @pxref{Other Options}. Thus, you can match newlines in the input, but typically if there is a match the entire input is output, so this usage is often combined with output-suppressing options like @option{-q}, e.g.: @@ -1771,16 +1854,108 @@ g/re/p @end enumerate +@node Performance +@chapter Performance + +@cindex performance +Typically @command{grep} is an efficient way to search text. However, +it can be quite slow in some cases, and it can search large files +where even minor performance tweaking can help significantly. +Although the algorithm used by @command{grep} is an implementation +detail that can change from release to release, understanding its +basic strengths and weaknesses can help you improve its performance. + +The @command{grep} command operates partly via a set of automata that +are designed for efficiency, and partly via a slower matcher that +takes over when the fast matchers run into unusual features like +back-references. When feasible, the Boyer--Moore fast string +searching algorithm is used to match a single fixed pattern, and the +Aho--Corasick algorithm is used to match multiple fixed patterns. + +@cindex locales +Generally speaking @command{grep} operates more efficiently in +single-byte locales, since it can avoid the special processing needed +for multi-byte characters. If your patterns will work just as well +that way, setting @env{LC_ALL} to a single-byte locale can help +performance considerably. Setting @samp{LC_ALL='C'} can be +particularly efficient, as @command{grep} is tuned for that locale. + +@cindex case insensitive search +Outside the @samp{C} locale, case-insensitive search, and search for +bracket expressions like @samp{[a-z]} and @samp{[[=a=]b]}, can be +surprisingly inefficient due to difficulties in fast portable access to +concepts like multi-character collating elements. + +@cindex back-references +A back-reference such as @samp{\1} can hurt performance significantly +in some cases, since back-references cannot in general be implemented +via a finite state automaton, and instead trigger a backtracking +algorithm that can be quite inefficient. For example, although the +pattern @samp{^(.*)\1@{14@}(.*)\2@{13@}$} matches only lines whose +lengths can be written as a sum @math{15x + 14y} for nonnegative +integers @math{x} and @math{y}, the pattern matcher does not perform +linear Diophantine analysis and instead backtracks through all +possible matching strings, using an algorithm that is exponential in +the worst case. + +@cindex holes in files +On some operating systems that support files with holes---large +regions of zeros that are not physically present on secondary +storage---@command{grep} can skip over the holes efficiently without +needing to read the zeros. This optimization is not available if the +@option{-a} (@option{--binary-files=text}) option is used (@pxref{File and +Directory Selection}), unless the @option{-z} (@option{--null-data}) +option is also used (@pxref{Other Options}). + +For more about the algorithms used by @command{grep} and about +related string matching algorithms, see: + +@frenchspacing on +@itemize @bullet +@item +Aho AV. Algorithms for finding patterns in strings. +In: van Leeuwen J. @emph{Handbook of Theoretical Computer Science}, vol. A. +New York: Elsevier; 1990. p. 255--300. +This surveys classic string matching algorithms, some of which are +used by @command{grep}. + +@item +Aho AV, Corasick MJ. Efficient string matching: an aid to bibliographic search. +@emph{CACM}. 1975;18(6):333--40. +@url{https://dx.doi.org/10.1145/360825.360855}. +This introduces the Aho--Corasick algorithm. + +@item +Boyer RS, Moore JS. A fast string searching algorithm. +@emph{CACM}. 1977;20(10):762--72. +@url{https://dx.doi.org/10.1145/359842.359859}. +This introduces the Boyer--Moore algorithm. + +@item +Faro S, Lecroq T. The exact online string matching problem: a review +of the most recent results. +@emph{ACM Comput Surv}. 2013;45(2):13. +@url{https://dx.doi.org/10.1145/2431211.2431212}. +This surveys string matching algorithms that might help improve the +performance of @command{grep} in the future. +@end itemize +@frenchspacing off + @node Reporting Bugs @chapter Reporting bugs @cindex bugs, reporting Bug reports can be found at the -@url{http://debbugs.gnu.org/cgi/pkgreport.cgi?package=grep, +@url{https://debbugs.gnu.org/cgi/pkgreport.cgi?package=grep, GNU bug report logs for @command{grep}}. If you find a bug not listed there, please email it to @email{bug-grep@@gnu.org} to create a new bug report. +@menu +* Known Bugs:: +@end menu + +@node Known Bugs @section Known Bugs @cindex Bugs, known @@ -1790,7 +1965,17 @@ In addition, certain other obscure regular expressions require exponential time and space, and may cause @command{grep} to run out of memory. -Back-references are very slow, and may require exponential time. +Back-references can greatly slow down matching, as they can generate +exponentially many matching possibilities that can consume both time +and memory to explore. Also, the POSIX specification for +back-references is at times unclear. Furthermore, many regular +expression implementations have back-reference bugs that can cause +programs to return incorrect answers or even crash, and fixing these +bugs has often been low-priority---for example, as of 2019 the GNU C +library bug database contained back-reference bugs 52, 10844, 11053, +and 25322, with little sign of forthcoming fixes. Luckily, +back-references are rarely useful and it should be little trouble to +avoid them in practical applications. @node Copying @@ -1800,7 +1985,7 @@ Back-references are very slow, and may require exponential time. GNU @command{grep} is licensed under the GNU GPL, which makes it @dfn{free software}. -The ``free'' in ``free software'' refers to liberty, not price. As +The ``free'' in ``free software'' refers to liberty, not price. As some GNU project advocates like to point out, think of ``free speech'' rather than ``free beer''. In short, you have the right (freedom) to run and change @command{grep} and distribute it to other people, and---if you @@ -1811,12 +1996,12 @@ same restrictions. This general method of licensing software is sometimes called @dfn{open source}. The GNU project prefers the term ``free software'' for reasons outlined at -@url{http://www.gnu.org/philosophy/open-source-misses-the-point.html}. +@url{https://www.gnu.org/philosophy/open-source-misses-the-point.html}. This manual is free documentation in the same sense. The documentation license is included below. The license for the program is available with the source code, or at -@url{http://www.gnu.org/licenses/gpl.html}. +@url{https://www.gnu.org/licenses/gpl.html}. @menu * GNU Free Documentation License:: diff --git a/contrib/grep/doc/version.texi b/contrib/grep/doc/version.texi index 09ff4d2b4a..edefa518b4 100644 --- a/contrib/grep/doc/version.texi +++ b/contrib/grep/doc/version.texi @@ -1,4 +1,4 @@ -@set UPDATED 21 October 2015 -@set UPDATED-MONTH October 2015 -@set EDITION 2.22 -@set VERSION 2.22 +@set UPDATED 1 January 2020 +@set UPDATED-MONTH January 2020 +@set EDITION 3.4 +@set VERSION 3.4 diff --git a/contrib/grep/lib/alignof.h b/contrib/grep/lib/alignof.h deleted file mode 100644 index be53fa5013..0000000000 --- a/contrib/grep/lib/alignof.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Determine alignment of types. - Copyright (C) 2003-2004, 2006, 2009-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see . */ - -#ifndef _ALIGNOF_H -#define _ALIGNOF_H - -#include - -/* alignof_slot (TYPE) - Determine the alignment of a structure slot (field) of a given type, - at compile time. Note that the result depends on the ABI. - This is the same as alignof (TYPE) and _Alignof (TYPE), defined in - if __alignof_is_defined is 1. - Note: The result cannot be used as a value for an 'enum' constant, - due to bugs in HP-UX 10.20 cc and AIX 3.2.5 xlc. */ -#if defined __cplusplus - template struct alignof_helper { char __slot1; type __slot2; }; -# define alignof_slot(type) offsetof (alignof_helper, __slot2) -#else -# define alignof_slot(type) offsetof (struct { char __slot1; type __slot2; }, __slot2) -#endif - -/* alignof_type (TYPE) - Determine the good alignment of an object of the given type at compile time. - Note that this is not necessarily the same as alignof_slot(type). - For example, with GNU C on x86 platforms: alignof_type(double) = 8, but - - when -malign-double is not specified: alignof_slot(double) = 4, - - when -malign-double is specified: alignof_slot(double) = 8. - Note: The result cannot be used as a value for an 'enum' constant, - due to bugs in HP-UX 10.20 cc and AIX 3.2.5 xlc. */ -#if defined __GNUC__ || defined __IBM__ALIGNOF__ -# define alignof_type __alignof__ -#else -# define alignof_type alignof_slot -#endif - -#endif /* _ALIGNOF_H */ diff --git a/contrib/grep/lib/alloca.c b/contrib/grep/lib/alloca.c deleted file mode 100644 index ee0f018868..0000000000 --- a/contrib/grep/lib/alloca.c +++ /dev/null @@ -1,478 +0,0 @@ -/* alloca.c -- allocate automatically reclaimed memory - (Mostly) portable public-domain implementation -- D A Gwyn - - This implementation of the PWB library alloca function, - which is used to allocate space off the run-time stack so - that it is automatically reclaimed upon procedure exit, - was inspired by discussions with J. Q. Johnson of Cornell. - J.Otto Tennant contributed the Cray support. - - There are some preprocessor constants that can - be defined when compiling for your specific system, for - improved efficiency; however, the defaults should be okay. - - The general concept of this implementation is to keep - track of all alloca-allocated blocks, and reclaim any - that are found to be deeper in the stack than the current - invocation. This heuristic does not reclaim storage as - soon as it becomes invalid, but it will do so eventually. - - As a special case, alloca(0) reclaims storage without - allocating any. It is a good idea to use alloca(0) in - your main control loop, etc. to force garbage collection. */ - -#include - -#include - -#include -#include - -#ifdef emacs -# include "lisp.h" -# include "blockinput.h" -# ifdef EMACS_FREE -# undef free -# define free EMACS_FREE -# endif -#else -# define memory_full() abort () -#endif - -/* If compiling with GCC 2, this file's not needed. */ -#if !defined (__GNUC__) || __GNUC__ < 2 - -/* If someone has defined alloca as a macro, - there must be some other way alloca is supposed to work. */ -# ifndef alloca - -# ifdef emacs -# ifdef static -/* actually, only want this if static is defined as "" - -- this is for usg, in which emacs must undefine static - in order to make unexec workable - */ -# ifndef STACK_DIRECTION -you -lose --- must know STACK_DIRECTION at compile-time -/* Using #error here is not wise since this file should work for - old and obscure compilers. */ -# endif /* STACK_DIRECTION undefined */ -# endif /* static */ -# endif /* emacs */ - -/* If your stack is a linked list of frames, you have to - provide an "address metric" ADDRESS_FUNCTION macro. */ - -# if defined (CRAY) && defined (CRAY_STACKSEG_END) -long i00afunc (); -# define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg)) -# else -# define ADDRESS_FUNCTION(arg) &(arg) -# endif - -/* Define STACK_DIRECTION if you know the direction of stack - growth for your system; otherwise it will be automatically - deduced at run-time. - - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown */ - -# ifndef STACK_DIRECTION -# define STACK_DIRECTION 0 /* Direction unknown. */ -# endif - -# if STACK_DIRECTION != 0 - -# define STACK_DIR STACK_DIRECTION /* Known at compile-time. */ - -# else /* STACK_DIRECTION == 0; need run-time code. */ - -static int stack_dir; /* 1 or -1 once known. */ -# define STACK_DIR stack_dir - -static int -find_stack_direction (int *addr, int depth) -{ - int dir, dummy = 0; - if (! addr) - addr = &dummy; - *addr = addr < &dummy ? 1 : addr == &dummy ? 0 : -1; - dir = depth ? find_stack_direction (addr, depth - 1) : 0; - return dir + dummy; -} - -# endif /* STACK_DIRECTION == 0 */ - -/* An "alloca header" is used to: - (a) chain together all alloca'ed blocks; - (b) keep track of stack depth. - - It is very important that sizeof(header) agree with malloc - alignment chunk size. The following default should work okay. */ - -# ifndef ALIGN_SIZE -# define ALIGN_SIZE sizeof(double) -# endif - -typedef union hdr -{ - char align[ALIGN_SIZE]; /* To force sizeof(header). */ - struct - { - union hdr *next; /* For chaining headers. */ - char *deep; /* For stack depth measure. */ - } h; -} header; - -static header *last_alloca_header = NULL; /* -> last alloca header. */ - -/* Return a pointer to at least SIZE bytes of storage, - which will be automatically reclaimed upon exit from - the procedure that called alloca. Originally, this space - was supposed to be taken from the current stack frame of the - caller, but that method cannot be made to work for some - implementations of C, for example under Gould's UTX/32. */ - -void * -alloca (size_t size) -{ - auto char probe; /* Probes stack depth: */ - register char *depth = ADDRESS_FUNCTION (probe); - -# if STACK_DIRECTION == 0 - if (STACK_DIR == 0) /* Unknown growth direction. */ - STACK_DIR = find_stack_direction (NULL, (size & 1) + 20); -# endif - - /* Reclaim garbage, defined as all alloca'd storage that - was allocated from deeper in the stack than currently. */ - - { - register header *hp; /* Traverses linked list. */ - -# ifdef emacs - BLOCK_INPUT; -# endif - - for (hp = last_alloca_header; hp != NULL;) - if ((STACK_DIR > 0 && hp->h.deep > depth) - || (STACK_DIR < 0 && hp->h.deep < depth)) - { - register header *np = hp->h.next; - - free (hp); /* Collect garbage. */ - - hp = np; /* -> next header. */ - } - else - break; /* Rest are not deeper. */ - - last_alloca_header = hp; /* -> last valid storage. */ - -# ifdef emacs - UNBLOCK_INPUT; -# endif - } - - if (size == 0) - return NULL; /* No allocation required. */ - - /* Allocate combined header + user data storage. */ - - { - /* Address of header. */ - register header *new; - - size_t combined_size = sizeof (header) + size; - if (combined_size < sizeof (header)) - memory_full (); - - new = malloc (combined_size); - - if (! new) - memory_full (); - - new->h.next = last_alloca_header; - new->h.deep = depth; - - last_alloca_header = new; - - /* User storage begins just after header. */ - - return (void *) (new + 1); - } -} - -# if defined (CRAY) && defined (CRAY_STACKSEG_END) - -# ifdef DEBUG_I00AFUNC -# include -# endif - -# ifndef CRAY_STACK -# define CRAY_STACK -# ifndef CRAY2 -/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */ -struct stack_control_header - { - long shgrow:32; /* Number of times stack has grown. */ - long shaseg:32; /* Size of increments to stack. */ - long shhwm:32; /* High water mark of stack. */ - long shsize:32; /* Current size of stack (all segments). */ - }; - -/* The stack segment linkage control information occurs at - the high-address end of a stack segment. (The stack - grows from low addresses to high addresses.) The initial - part of the stack segment linkage control information is - 0200 (octal) words. This provides for register storage - for the routine which overflows the stack. */ - -struct stack_segment_linkage - { - long ss[0200]; /* 0200 overflow words. */ - long sssize:32; /* Number of words in this segment. */ - long ssbase:32; /* Offset to stack base. */ - long:32; - long sspseg:32; /* Offset to linkage control of previous - segment of stack. */ - long:32; - long sstcpt:32; /* Pointer to task common address block. */ - long sscsnm; /* Private control structure number for - microtasking. */ - long ssusr1; /* Reserved for user. */ - long ssusr2; /* Reserved for user. */ - long sstpid; /* Process ID for pid based multi-tasking. */ - long ssgvup; /* Pointer to multitasking thread giveup. */ - long sscray[7]; /* Reserved for Cray Research. */ - long ssa0; - long ssa1; - long ssa2; - long ssa3; - long ssa4; - long ssa5; - long ssa6; - long ssa7; - long sss0; - long sss1; - long sss2; - long sss3; - long sss4; - long sss5; - long sss6; - long sss7; - }; - -# else /* CRAY2 */ -/* The following structure defines the vector of words - returned by the STKSTAT library routine. */ -struct stk_stat - { - long now; /* Current total stack size. */ - long maxc; /* Amount of contiguous space which would - be required to satisfy the maximum - stack demand to date. */ - long high_water; /* Stack high-water mark. */ - long overflows; /* Number of stack overflow ($STKOFEN) calls. */ - long hits; /* Number of internal buffer hits. */ - long extends; /* Number of block extensions. */ - long stko_mallocs; /* Block allocations by $STKOFEN. */ - long underflows; /* Number of stack underflow calls ($STKRETN). */ - long stko_free; /* Number of deallocations by $STKRETN. */ - long stkm_free; /* Number of deallocations by $STKMRET. */ - long segments; /* Current number of stack segments. */ - long maxs; /* Maximum number of stack segments so far. */ - long pad_size; /* Stack pad size. */ - long current_address; /* Current stack segment address. */ - long current_size; /* Current stack segment size. This - number is actually corrupted by STKSTAT to - include the fifteen word trailer area. */ - long initial_address; /* Address of initial segment. */ - long initial_size; /* Size of initial segment. */ - }; - -/* The following structure describes the data structure which trails - any stack segment. I think that the description in 'asdef' is - out of date. I only describe the parts that I am sure about. */ - -struct stk_trailer - { - long this_address; /* Address of this block. */ - long this_size; /* Size of this block (does not include - this trailer). */ - long unknown2; - long unknown3; - long link; /* Address of trailer block of previous - segment. */ - long unknown5; - long unknown6; - long unknown7; - long unknown8; - long unknown9; - long unknown10; - long unknown11; - long unknown12; - long unknown13; - long unknown14; - }; - -# endif /* CRAY2 */ -# endif /* not CRAY_STACK */ - -# ifdef CRAY2 -/* Determine a "stack measure" for an arbitrary ADDRESS. - I doubt that "lint" will like this much. */ - -static long -i00afunc (long *address) -{ - struct stk_stat status; - struct stk_trailer *trailer; - long *block, size; - long result = 0; - - /* We want to iterate through all of the segments. The first - step is to get the stack status structure. We could do this - more quickly and more directly, perhaps, by referencing the - $LM00 common block, but I know that this works. */ - - STKSTAT (&status); - - /* Set up the iteration. */ - - trailer = (struct stk_trailer *) (status.current_address - + status.current_size - - 15); - - /* There must be at least one stack segment. Therefore it is - a fatal error if "trailer" is null. */ - - if (trailer == 0) - abort (); - - /* Discard segments that do not contain our argument address. */ - - while (trailer != 0) - { - block = (long *) trailer->this_address; - size = trailer->this_size; - if (block == 0 || size == 0) - abort (); - trailer = (struct stk_trailer *) trailer->link; - if ((block <= address) && (address < (block + size))) - break; - } - - /* Set the result to the offset in this segment and add the sizes - of all predecessor segments. */ - - result = address - block; - - if (trailer == 0) - { - return result; - } - - do - { - if (trailer->this_size <= 0) - abort (); - result += trailer->this_size; - trailer = (struct stk_trailer *) trailer->link; - } - while (trailer != 0); - - /* We are done. Note that if you present a bogus address (one - not in any segment), you will get a different number back, formed - from subtracting the address of the first block. This is probably - not what you want. */ - - return (result); -} - -# else /* not CRAY2 */ -/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP. - Determine the number of the cell within the stack, - given the address of the cell. The purpose of this - routine is to linearize, in some sense, stack addresses - for alloca. */ - -static long -i00afunc (long address) -{ - long stkl = 0; - - long size, pseg, this_segment, stack; - long result = 0; - - struct stack_segment_linkage *ssptr; - - /* Register B67 contains the address of the end of the - current stack segment. If you (as a subprogram) store - your registers on the stack and find that you are past - the contents of B67, you have overflowed the segment. - - B67 also points to the stack segment linkage control - area, which is what we are really interested in. */ - - stkl = CRAY_STACKSEG_END (); - ssptr = (struct stack_segment_linkage *) stkl; - - /* If one subtracts 'size' from the end of the segment, - one has the address of the first word of the segment. - - If this is not the first segment, 'pseg' will be - nonzero. */ - - pseg = ssptr->sspseg; - size = ssptr->sssize; - - this_segment = stkl - size; - - /* It is possible that calling this routine itself caused - a stack overflow. Discard stack segments which do not - contain the target address. */ - - while (!(this_segment <= address && address <= stkl)) - { -# ifdef DEBUG_I00AFUNC - fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl); -# endif - if (pseg == 0) - break; - stkl = stkl - pseg; - ssptr = (struct stack_segment_linkage *) stkl; - size = ssptr->sssize; - pseg = ssptr->sspseg; - this_segment = stkl - size; - } - - result = address - this_segment; - - /* If you subtract pseg from the current end of the stack, - you get the address of the previous stack segment's end. - This seems a little convoluted to me, but I'll bet you save - a cycle somewhere. */ - - while (pseg != 0) - { -# ifdef DEBUG_I00AFUNC - fprintf (stderr, "%011o %011o\n", pseg, size); -# endif - stkl = stkl - pseg; - ssptr = (struct stack_segment_linkage *) stkl; - size = ssptr->sssize; - pseg = ssptr->sspseg; - result += size; - } - return (result); -} - -# endif /* not CRAY2 */ -# endif /* CRAY */ - -# endif /* no alloca */ -#endif /* not GCC 2 */ diff --git a/contrib/grep/lib/argmatch.c b/contrib/grep/lib/argmatch.c index 0e452bd1cb..195f90252b 100644 --- a/contrib/grep/lib/argmatch.c +++ b/contrib/grep/lib/argmatch.c @@ -1,6 +1,6 @@ /* argmatch.c -- find a match for a string in an array - Copyright (C) 1990, 1998-1999, 2001-2007, 2009-2015 Free Software + Copyright (C) 1990, 1998-1999, 2001-2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by David MacKenzie Modified by Akim Demaille */ @@ -29,12 +29,11 @@ #include #include -#include "gettext.h" #define _(msgid) gettext (msgid) #include "error.h" #include "quotearg.h" -#include "quote.h" +#include "getprogname.h" #if USE_UNLOCKED_IO # include "unlocked-io.h" @@ -81,7 +80,7 @@ argmatch_exit_fn argmatch_die = __argmatch_die; ptrdiff_t argmatch (const char *arg, const char *const *arglist, - const char *vallist, size_t valsize) + const void *vallist, size_t valsize) { size_t i; /* Temporary index in ARGLIST. */ size_t arglen; /* Length of ARG. */ @@ -105,8 +104,8 @@ argmatch (const char *arg, const char *const *arglist, { /* Second nonexact match found. */ if (vallist == NULL - || memcmp (vallist + valsize * matchind, - vallist + valsize * i, valsize)) + || memcmp ((char const *) vallist + valsize * matchind, + (char const *) vallist + valsize * i, valsize)) { /* There is a real ambiguity, or we could not disambiguate. */ @@ -143,7 +142,7 @@ argmatch_invalid (const char *context, const char *value, ptrdiff_t problem) VALSIZE is the size of the elements of VALLIST */ void argmatch_valid (const char *const *arglist, - const char *vallist, size_t valsize) + const void *vallist, size_t valsize) { size_t i; const char *last_val = NULL; @@ -153,10 +152,10 @@ argmatch_valid (const char *const *arglist, fputs (_("Valid arguments are:"), stderr); for (i = 0; arglist[i]; i++) if ((i == 0) - || memcmp (last_val, vallist + valsize * i, valsize)) + || memcmp (last_val, (char const *) vallist + valsize * i, valsize)) { fprintf (stderr, "\n - %s", quote (arglist[i])); - last_val = vallist + valsize * i; + last_val = (char const *) vallist + valsize * i; } else { @@ -174,7 +173,7 @@ argmatch_valid (const char *const *arglist, ptrdiff_t __xargmatch_internal (const char *context, const char *arg, const char *const *arglist, - const char *vallist, size_t valsize, + const void *vallist, size_t valsize, argmatch_exit_fn exit_fn) { ptrdiff_t res = argmatch (arg, arglist, vallist, valsize); @@ -193,14 +192,14 @@ __xargmatch_internal (const char *context, /* Look for VALUE in VALLIST, an array of objects of size VALSIZE and return the first corresponding argument in ARGLIST */ const char * -argmatch_to_argument (const char *value, +argmatch_to_argument (const void *value, const char *const *arglist, - const char *vallist, size_t valsize) + const void *vallist, size_t valsize) { size_t i; for (i = 0; arglist[i]; i++) - if (!memcmp (value, vallist + valsize * i, valsize)) + if (!memcmp (value, (char const *) vallist + valsize * i, valsize)) return arglist[i]; return NULL; } @@ -209,7 +208,6 @@ argmatch_to_argument (const char *value, /* * Based on "getversion.c" by David MacKenzie */ -char *program_name; /* When to make backup files. */ enum backup_type @@ -253,11 +251,9 @@ main (int argc, const char *const *argv) const char *cp; enum backup_type backup_type = no_backups; - program_name = (char *) argv[0]; - if (argc > 2) { - fprintf (stderr, "Usage: %s [VERSION_CONTROL]\n", program_name); + fprintf (stderr, "Usage: %s [VERSION_CONTROL]\n", getprogname ()); exit (1); } @@ -266,7 +262,7 @@ main (int argc, const char *const *argv) backup_args, backup_vals); if (argc == 2) - backup_type = XARGMATCH (program_name, argv[1], + backup_type = XARGMATCH (getprogname (), argv[1], backup_args, backup_vals); printf ("The version control is '%s'\n", diff --git a/contrib/grep/lib/argmatch.h b/contrib/grep/lib/argmatch.h index bbbfe64dfb..daa0fdbf3a 100644 --- a/contrib/grep/lib/argmatch.h +++ b/contrib/grep/lib/argmatch.h @@ -1,6 +1,6 @@ /* argmatch.h -- definitions and prototypes for argmatch.c - Copyright (C) 1990, 1998-1999, 2001-2002, 2004-2005, 2009-2015 Free Software + Copyright (C) 1990, 1998-1999, 2001-2002, 2004-2005, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by David MacKenzie Modified by Akim Demaille */ @@ -22,13 +22,19 @@ #ifndef ARGMATCH_H_ # define ARGMATCH_H_ 1 +# include +# include # include +# include +# include /* memcmp */ +# include "gettext.h" +# include "quote.h" # include "verify.h" -#ifdef __cplusplus +# ifdef __cplusplus extern "C" { -#endif +# endif # define ARRAY_CARDINALITY(Array) (sizeof (Array) / sizeof *(Array)) @@ -44,10 +50,10 @@ extern "C" { to the same values in VALLIST). */ ptrdiff_t argmatch (char const *arg, char const *const *arglist, - char const *vallist, size_t valsize) _GL_ATTRIBUTE_PURE; + void const *vallist, size_t valsize) _GL_ATTRIBUTE_PURE; # define ARGMATCH(Arg, Arglist, Vallist) \ - argmatch (Arg, Arglist, (char const *) (Vallist), sizeof *(Vallist)) + argmatch (Arg, Arglist, (void const *) (Vallist), sizeof *(Vallist)) /* xargmatch calls this function when it fails. This function should not return. By default, this is a function that calls ARGMATCH_DIE which @@ -70,10 +76,10 @@ void argmatch_invalid (char const *context, char const *value, /* Report on stderr the list of possible arguments. */ void argmatch_valid (char const *const *arglist, - char const *vallist, size_t valsize); + void const *vallist, size_t valsize); # define ARGMATCH_VALID(Arglist, Vallist) \ - argmatch_valid (Arglist, (char const *) (Vallist), sizeof *(Vallist)) + argmatch_valid (Arglist, (void const *) (Vallist), sizeof *(Vallist)) @@ -82,30 +88,244 @@ void argmatch_valid (char const *const *arglist, ptrdiff_t __xargmatch_internal (char const *context, char const *arg, char const *const *arglist, - char const *vallist, size_t valsize, + void const *vallist, size_t valsize, argmatch_exit_fn exit_fn); /* Programmer friendly interface to __xargmatch_internal. */ # define XARGMATCH(Context, Arg, Arglist, Vallist) \ ((Vallist) [__xargmatch_internal (Context, Arg, Arglist, \ - (char const *) (Vallist), \ + (void const *) (Vallist), \ sizeof *(Vallist), \ argmatch_die)]) /* Convert a value into a corresponding argument. */ -char const *argmatch_to_argument (char const *value, +char const *argmatch_to_argument (void const *value, char const *const *arglist, - char const *vallist, size_t valsize) + void const *vallist, size_t valsize) _GL_ATTRIBUTE_PURE; # define ARGMATCH_TO_ARGUMENT(Value, Arglist, Vallist) \ argmatch_to_argument (Value, Arglist, \ - (char const *) (Vallist), sizeof *(Vallist)) - -#ifdef __cplusplus + (void const *) (Vallist), sizeof *(Vallist)) + +# define ARGMATCH_DEFINE_GROUP(Name, Type) \ + /* The type of the values of this group. */ \ + typedef Type argmatch_##Name##_type; \ + \ + /* The size of the type of the values of this group. */ \ + enum argmatch_##Name##_size_enum \ + { \ + argmatch_##Name##_size = sizeof (argmatch_##Name##_type) \ + }; \ + \ + /* Argument mapping of this group. */ \ + typedef struct \ + { \ + /* Argument (e.g., "simple"). */ \ + const char *arg; \ + /* Value (e.g., simple_backups). */ \ + const argmatch_##Name##_type val; \ + } argmatch_##Name##_arg; \ + \ + /* Documentation of this group. */ \ + typedef struct \ + { \ + /* Argument (e.g., "simple"). */ \ + const char *arg; \ + /* Documentation (e.g., N_("always make simple backups")). */ \ + const char *doc; \ + } argmatch_##Name##_doc; \ + \ + /* All the features of an argmatch group. */ \ + typedef struct \ + { \ + const argmatch_##Name##_arg* args; \ + const argmatch_##Name##_doc* docs; \ + \ + /* Printed before the usage message. */ \ + const char *doc_pre; \ + /* Printed after the usage message. */ \ + const char *doc_post; \ + } argmatch_##Name##_group_type; \ + \ + /* The structure the user must build. */ \ + extern const argmatch_##Name##_group_type argmatch_##Name##_group; \ + \ + /* Print the documentation of this group. */ \ + void argmatch_##Name##_usage (FILE *out); \ + \ + /* If nonnegative, the index I of ARG in ARGS, i.e, \ + ARGS[I] == ARG. \ + Return -1 for invalid argument, -2 for ambiguous argument. */ \ + ptrdiff_t argmatch_##Name##_choice (const char *arg); \ + \ + /* A pointer to the corresponding value if it exists, or \ + report an error and exit with failure if the argument was \ + not recognized. */ \ + const argmatch_##Name##_type* \ + argmatch_##Name##_value (const char *context, const char *arg); \ + \ + /* The first argument in ARGS that matches this value, or NULL. */ \ + const char * \ + argmatch_##Name##_argument (const argmatch_##Name##_type *val); \ + \ + ptrdiff_t \ + argmatch_##Name##_choice (const char *arg) \ + { \ + const argmatch_##Name##_group_type *g = &argmatch_##Name##_group; \ + size_t size = argmatch_##Name##_size; \ + ptrdiff_t res = -1; /* Index of first nonexact match. */ \ + bool ambiguous = false; /* Whether multiple nonexact match(es). */ \ + size_t arglen = strlen (arg); \ + \ + /* Test all elements for either exact match or abbreviated \ + matches. */ \ + for (size_t i = 0; g->args[i].arg; i++) \ + if (!strncmp (g->args[i].arg, arg, arglen)) \ + { \ + if (strlen (g->args[i].arg) == arglen) \ + /* Exact match found. */ \ + return i; \ + else if (res == -1) \ + /* First nonexact match found. */ \ + res = i; \ + else if (memcmp (&g->args[res].val, &g->args[i].val, size)) \ + /* Second nonexact match found. */ \ + /* There is a real ambiguity, or we could not \ + disambiguate. */ \ + ambiguous = true; \ + } \ + return ambiguous ? -2 : res; \ + } \ + \ + const char * \ + argmatch_##Name##_argument (const argmatch_##Name##_type *val) \ + { \ + const argmatch_##Name##_group_type *g = &argmatch_##Name##_group; \ + size_t size = argmatch_##Name##_size; \ + for (size_t i = 0; g->args[i].arg; i++) \ + if (!memcmp (val, &g->args[i].val, size)) \ + return g->args[i].arg; \ + return NULL; \ + } \ + \ + /* List the valid values of this group. */ \ + static void \ + argmatch_##Name##_valid (FILE *out) \ + { \ + const argmatch_##Name##_group_type *g = &argmatch_##Name##_group; \ + size_t size = argmatch_##Name##_size; \ + \ + /* Try to put synonyms on the same line. Synonyms are expected \ + to follow each other. */ \ + fputs (gettext ("Valid arguments are:"), out); \ + for (int i = 0; g->args[i].arg; i++) \ + if (i == 0 \ + || memcmp (&g->args[i-1].val, &g->args[i].val, size)) \ + fprintf (out, "\n - %s", quote (g->args[i].arg)); \ + else \ + fprintf (out, ", %s", quote (g->args[i].arg)); \ + putc ('\n', out); \ + } \ + \ + const argmatch_##Name##_type* \ + argmatch_##Name##_value (const char *context, const char *arg) \ + { \ + const argmatch_##Name##_group_type *g = &argmatch_##Name##_group; \ + ptrdiff_t res = argmatch_##Name##_choice (arg); \ + if (res < 0) \ + { \ + argmatch_invalid (context, arg, res); \ + argmatch_##Name##_valid (stderr); \ + argmatch_die (); \ + } \ + return &g->args[res].val; \ + } \ + \ + /* The column in which the documentation is displayed. \ + The leftmost possible, but no more than 20. */ \ + static int \ + argmatch_##Name##_doc_col (void) \ + { \ + const argmatch_##Name##_group_type *g = &argmatch_##Name##_group; \ + size_t size = argmatch_##Name##_size; \ + int res = 0; \ + for (int i = 0; g->docs[i].arg; ++i) \ + { \ + int col = 4; \ + int ival = argmatch_##Name##_choice (g->docs[i].arg); \ + if (ival < 0) \ + /* Pseudo argument, display it. */ \ + col += strlen (g->docs[i].arg); \ + else \ + /* Genuine argument, display it with its synonyms. */ \ + for (int j = 0; g->args[j].arg; ++j) \ + if (! memcmp (&g->args[ival].val, &g->args[j].val, size)) \ + col += (col == 4 ? 0 : 2) + strlen (g->args[j].arg); \ + if (res <= col) \ + res = col <= 20 ? col : 20; \ + } \ + return res ? res : 20; \ + } \ + \ + void \ + argmatch_##Name##_usage (FILE *out) \ + { \ + const argmatch_##Name##_group_type *g = &argmatch_##Name##_group; \ + size_t size = argmatch_##Name##_size; \ + /* Width of the screen. Help2man does not seem to support \ + arguments on several lines, so in that case pretend a very \ + large width. */ \ + const int screen_width = getenv ("HELP2MAN") ? INT_MAX : 80; \ + if (g->doc_pre) \ + fprintf (out, "%s\n", gettext (g->doc_pre)); \ + int doc_col = argmatch_##Name##_doc_col (); \ + for (int i = 0; g->docs[i].arg; ++i) \ + { \ + int col = 0; \ + bool first = true; \ + int ival = argmatch_##Name##_choice (g->docs[i].arg); \ + if (ival < 0) \ + /* Pseudo argument, display it. */ \ + col += fprintf (out, " %s", g->docs[i].arg); \ + else \ + /* Genuine argument, display it with its synonyms. */ \ + for (int j = 0; g->args[j].arg; ++j) \ + if (! memcmp (&g->args[ival].val, &g->args[j].val, size)) \ + { \ + if (!first \ + && screen_width < col + 2 + strlen (g->args[j].arg)) \ + { \ + fprintf (out, ",\n"); \ + col = 0; \ + first = true; \ + } \ + if (first) \ + { \ + col += fprintf (out, " "); \ + first = false; \ + } \ + else \ + col += fprintf (out, ","); \ + col += fprintf (out, " %s", g->args[j].arg); \ + } \ + /* The doc. Separated by at least two spaces. */ \ + if (doc_col < col + 2) \ + { \ + fprintf (out, "\n"); \ + col = 0; \ + } \ + fprintf (out, "%*s%s\n", \ + doc_col - col, "", gettext (g->docs[i].doc)); \ + } \ + if (g->doc_post) \ + fprintf (out, "%s\n", gettext (g->doc_post)); \ + } + +# ifdef __cplusplus } -#endif +# endif #endif /* ARGMATCH_H_ */ diff --git a/contrib/grep/lib/assure.h b/contrib/grep/lib/assure.h index f44f380b52..8ea2f6e480 100644 --- a/contrib/grep/lib/assure.h +++ b/contrib/grep/lib/assure.h @@ -1,6 +1,6 @@ /* Run-time assert-like macros. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert. */ @@ -23,7 +23,7 @@ #include /* Check E's value at runtime, and report an error and abort if not. - However, do nothng if NDEBUG is defined. + However, do nothing if NDEBUG is defined. Unlike standard 'assert', this macro always compiles E even when NDEBUG is defined, so as to catch typos and avoid some GCC warnings. */ diff --git a/contrib/grep/lib/at-func.c b/contrib/grep/lib/at-func.c deleted file mode 100644 index bbbe458d9b..0000000000 --- a/contrib/grep/lib/at-func.c +++ /dev/null @@ -1,146 +0,0 @@ -/* Define at-style functions like fstatat, unlinkat, fchownat, etc. - Copyright (C) 2006, 2009-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* written by Jim Meyering */ - -#include "dosname.h" /* solely for definition of IS_ABSOLUTE_FILE_NAME */ - -#ifdef GNULIB_SUPPORT_ONLY_AT_FDCWD -# include -# ifndef ENOTSUP -# define ENOTSUP EINVAL -# endif -#else -# include "openat.h" -# include "openat-priv.h" -# include "save-cwd.h" -#endif - -#ifdef AT_FUNC_USE_F1_COND -# define CALL_FUNC(F) \ - (flag == AT_FUNC_USE_F1_COND \ - ? AT_FUNC_F1 (F AT_FUNC_POST_FILE_ARGS) \ - : AT_FUNC_F2 (F AT_FUNC_POST_FILE_ARGS)) -# define VALIDATE_FLAG(F) \ - if (flag & ~AT_FUNC_USE_F1_COND) \ - { \ - errno = EINVAL; \ - return FUNC_FAIL; \ - } -#else -# define CALL_FUNC(F) (AT_FUNC_F1 (F AT_FUNC_POST_FILE_ARGS)) -# define VALIDATE_FLAG(F) /* empty */ -#endif - -#ifdef AT_FUNC_RESULT -# define FUNC_RESULT AT_FUNC_RESULT -#else -# define FUNC_RESULT int -#endif - -#ifdef AT_FUNC_FAIL -# define FUNC_FAIL AT_FUNC_FAIL -#else -# define FUNC_FAIL -1 -#endif - -/* Call AT_FUNC_F1 to operate on FILE, which is in the directory - open on descriptor FD. If AT_FUNC_USE_F1_COND is defined to a value, - AT_FUNC_POST_FILE_PARAM_DECLS must include a parameter named flag; - call AT_FUNC_F2 if FLAG is 0 or fail if FLAG contains more bits than - AT_FUNC_USE_F1_COND. Return int and fail with -1 unless AT_FUNC_RESULT - or AT_FUNC_FAIL are defined. If possible, do it without changing the - working directory. Otherwise, resort to using save_cwd/fchdir, - then AT_FUNC_F?/restore_cwd. If either the save_cwd or the restore_cwd - fails, then give a diagnostic and exit nonzero. */ -FUNC_RESULT -AT_FUNC_NAME (int fd, char const *file AT_FUNC_POST_FILE_PARAM_DECLS) -{ - VALIDATE_FLAG (flag); - - if (fd == AT_FDCWD || IS_ABSOLUTE_FILE_NAME (file)) - return CALL_FUNC (file); - -#ifdef GNULIB_SUPPORT_ONLY_AT_FDCWD - errno = ENOTSUP; - return FUNC_FAIL; -#else - { - /* Be careful to choose names unlikely to conflict with - AT_FUNC_POST_FILE_PARAM_DECLS. */ - struct saved_cwd saved_cwd; - int saved_errno; - FUNC_RESULT err; - - { - char proc_buf[OPENAT_BUFFER_SIZE]; - char *proc_file = openat_proc_name (proc_buf, fd, file); - if (proc_file) - { - FUNC_RESULT proc_result = CALL_FUNC (proc_file); - int proc_errno = errno; - if (proc_file != proc_buf) - free (proc_file); - /* If the syscall succeeds, or if it fails with an unexpected - errno value, then return right away. Otherwise, fall through - and resort to using save_cwd/restore_cwd. */ - if (FUNC_FAIL != proc_result) - return proc_result; - if (! EXPECTED_ERRNO (proc_errno)) - { - errno = proc_errno; - return proc_result; - } - } - } - - if (save_cwd (&saved_cwd) != 0) - openat_save_fail (errno); - if (0 <= fd && fd == saved_cwd.desc) - { - /* If saving the working directory collides with the user's - requested fd, then the user's fd must have been closed to - begin with. */ - free_cwd (&saved_cwd); - errno = EBADF; - return FUNC_FAIL; - } - - if (fchdir (fd) != 0) - { - saved_errno = errno; - free_cwd (&saved_cwd); - errno = saved_errno; - return FUNC_FAIL; - } - - err = CALL_FUNC (file); - saved_errno = (err == FUNC_FAIL ? errno : 0); - - if (restore_cwd (&saved_cwd) != 0) - openat_restore_fail (errno); - - free_cwd (&saved_cwd); - - if (saved_errno) - errno = saved_errno; - return err; - } -#endif -} -#undef CALL_FUNC -#undef FUNC_RESULT -#undef FUNC_FAIL diff --git a/contrib/grep/lib/basename-lgpl.c b/contrib/grep/lib/basename-lgpl.c index aae1280f2f..565469ea3d 100644 --- a/contrib/grep/lib/basename-lgpl.c +++ b/contrib/grep/lib/basename-lgpl.c @@ -1,6 +1,6 @@ /* basename.c -- return the last element in a file name - Copyright (C) 1990, 1998-2001, 2003-2006, 2009-2015 Free Software + Copyright (C) 1990, 1998-2001, 2003-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/binary-io.c b/contrib/grep/lib/binary-io.c index d828bcd015..e84e223fc8 100644 --- a/contrib/grep/lib/binary-io.c +++ b/contrib/grep/lib/binary-io.c @@ -1,4 +1,39 @@ +/* Binary mode I/O. + Copyright 2017-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + #include + #define BINARY_IO_INLINE _GL_EXTERN_INLINE #include "binary-io.h" -typedef int dummy; + +#if defined __DJGPP__ || defined __EMX__ +# include + +int +set_binary_mode (int fd, int mode) +{ + if (isatty (fd)) + /* If FD refers to a console (not a pipe, not a regular file), + O_TEXT is the only reasonable mode, both on input and on output. + Silently ignore the request. If we were to return -1 here, + all programs that use xset_binary_mode would fail when run + with console input or console output. */ + return O_TEXT; + else + return __gl_setmode (fd, mode); +} + +#endif diff --git a/contrib/grep/lib/binary-io.h b/contrib/grep/lib/binary-io.h index f5b66c7909..477b4bf4dd 100644 --- a/contrib/grep/lib/binary-io.h +++ b/contrib/grep/lib/binary-io.h @@ -1,5 +1,5 @@ /* Binary mode I/O. - Copyright (C) 2001, 2003, 2005, 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2001, 2003, 2005, 2008-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef _BINARY_H #define _BINARY_H @@ -33,15 +33,12 @@ _GL_INLINE_HEADER_BEGIN # define BINARY_IO_INLINE _GL_INLINE #endif -/* set_binary_mode (fd, mode) - sets the binary/text I/O mode of file descriptor fd to the given mode - (must be O_BINARY or O_TEXT) and returns the previous mode. */ #if O_BINARY # if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__ # include /* declares setmode() */ -# define set_binary_mode setmode +# define __gl_setmode setmode # else -# define set_binary_mode _setmode +# define __gl_setmode _setmode # undef fileno # define fileno _fileno # endif @@ -50,26 +47,31 @@ _GL_INLINE_HEADER_BEGIN /* Use a function rather than a macro, to avoid gcc warnings "warning: statement with no effect". */ BINARY_IO_INLINE int -set_binary_mode (int fd, int mode) +__gl_setmode (int fd _GL_UNUSED, int mode _GL_UNUSED) { - (void) fd; - (void) mode; return O_BINARY; } #endif -/* SET_BINARY (fd); - changes the file descriptor fd to perform binary I/O. */ -#ifdef __DJGPP__ -# include /* declares isatty() */ - /* Avoid putting stdin/stdout in binary mode if it is connected to - the console, because that would make it impossible for the user - to interrupt the program through Ctrl-C or Ctrl-Break. */ -# define SET_BINARY(fd) ((void) (!isatty (fd) ? (set_binary_mode (fd, O_BINARY), 0) : 0)) +/* Set FD's mode to MODE, which should be either O_TEXT or O_BINARY. + Return the old mode if successful, -1 (setting errno) on failure. + Ordinarily this function would be called 'setmode', since that is + its name on MS-Windows, but it is called 'set_binary_mode' here + to avoid colliding with a BSD function of another name. */ + +#if defined __DJGPP__ || defined __EMX__ +extern int set_binary_mode (int fd, int mode); #else -# define SET_BINARY(fd) ((void) set_binary_mode (fd, O_BINARY)) +BINARY_IO_INLINE int +set_binary_mode (int fd, int mode) +{ + return __gl_setmode (fd, mode); +} #endif +/* This macro is obsolescent. */ +#define SET_BINARY(fd) ((void) set_binary_mode (fd, O_BINARY)) + _GL_INLINE_HEADER_END #endif /* _BINARY_H */ diff --git a/contrib/grep/lib/bitrotate.h b/contrib/grep/lib/bitrotate.h index 1665e99679..59827e2742 100644 --- a/contrib/grep/lib/bitrotate.h +++ b/contrib/grep/lib/bitrotate.h @@ -1,5 +1,5 @@ /* bitrotate.h - Rotate bits in integers - Copyright (C) 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2008-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Simon Josefsson , 2008. */ @@ -95,7 +95,8 @@ rotr_sz (size_t x, int n) BITROTATE_INLINE uint16_t rotl16 (uint16_t x, int n) { - return ((x << n) | (x >> (16 - n))) & UINT16_MAX; + return (((unsigned int) x << n) | ((unsigned int) x >> (16 - n))) + & UINT16_MAX; } /* Given an unsigned 16-bit argument X, return the value corresponding @@ -106,7 +107,8 @@ rotl16 (uint16_t x, int n) BITROTATE_INLINE uint16_t rotr16 (uint16_t x, int n) { - return ((x >> n) | (x << (16 - n))) & UINT16_MAX; + return (((unsigned int) x >> n) | ((unsigned int) x << (16 - n))) + & UINT16_MAX; } /* Given an unsigned 8-bit argument X, return the value corresponding @@ -117,7 +119,7 @@ rotr16 (uint16_t x, int n) BITROTATE_INLINE uint8_t rotl8 (uint8_t x, int n) { - return ((x << n) | (x >> (8 - n))) & UINT8_MAX; + return (((unsigned int) x << n) | ((unsigned int) x >> (8 - n))) & UINT8_MAX; } /* Given an unsigned 8-bit argument X, return the value corresponding @@ -128,7 +130,7 @@ rotl8 (uint8_t x, int n) BITROTATE_INLINE uint8_t rotr8 (uint8_t x, int n) { - return ((x >> n) | (x << (8 - n))) & UINT8_MAX; + return (((unsigned int) x >> n) | ((unsigned int) x << (8 - n))) & UINT8_MAX; } _GL_INLINE_HEADER_END diff --git a/contrib/grep/lib/c-ctype.h b/contrib/grep/lib/c-ctype.h index 15125308cb..fbd11b3450 100644 --- a/contrib/grep/lib/c-ctype.h +++ b/contrib/grep/lib/c-ctype.h @@ -5,7 +5,7 @@ functions' behaviour depends on the current locale set via setlocale. - Copyright (C) 2000-2003, 2006, 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2000-2003, 2006, 2008-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with this program; if not, see . */ +along with this program; if not, see . */ #ifndef C_CTYPE_H #define C_CTYPE_H @@ -115,16 +115,16 @@ extern "C" { /* Cases for lowercase hex letters, and lowercase letters, all offset by N. */ -#define _C_CTYPE_LOWER_A_THRU_F_N(n) \ - case 'a' + (n): case 'b' + (n): case 'c' + (n): case 'd' + (n): \ - case 'e' + (n): case 'f' + (n) -#define _C_CTYPE_LOWER_N(n) \ - _C_CTYPE_LOWER_A_THRU_F_N(n): \ - case 'g' + (n): case 'h' + (n): case 'i' + (n): case 'j' + (n): \ - case 'k' + (n): case 'l' + (n): case 'm' + (n): case 'n' + (n): \ - case 'o' + (n): case 'p' + (n): case 'q' + (n): case 'r' + (n): \ - case 's' + (n): case 't' + (n): case 'u' + (n): case 'v' + (n): \ - case 'w' + (n): case 'x' + (n): case 'y' + (n): case 'z' + (n) +#define _C_CTYPE_LOWER_A_THRU_F_N(N) \ + case 'a' + (N): case 'b' + (N): case 'c' + (N): case 'd' + (N): \ + case 'e' + (N): case 'f' + (N) +#define _C_CTYPE_LOWER_N(N) \ + _C_CTYPE_LOWER_A_THRU_F_N(N): \ + case 'g' + (N): case 'h' + (N): case 'i' + (N): case 'j' + (N): \ + case 'k' + (N): case 'l' + (N): case 'm' + (N): case 'n' + (N): \ + case 'o' + (N): case 'p' + (N): case 'q' + (N): case 'r' + (N): \ + case 's' + (N): case 't' + (N): case 'u' + (N): case 'v' + (N): \ + case 'w' + (N): case 'x' + (N): case 'y' + (N): case 'z' + (N) /* Cases for hex letters, digits, lower, punct, and upper. */ diff --git a/contrib/grep/lib/c-stack.c b/contrib/grep/lib/c-stack.c new file mode 100644 index 0000000000..50a0380c25 --- /dev/null +++ b/contrib/grep/lib/c-stack.c @@ -0,0 +1,338 @@ +/* Stack overflow handling. + + Copyright (C) 2002, 2004, 2006, 2008-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Paul Eggert. */ + +/* NOTES: + + A program that uses alloca, dynamic arrays, or large local + variables may extend the stack by more than a page at a time. If + so, when the stack overflows the operating system may not detect + the overflow until the program uses the array, and this module may + incorrectly report a program error instead of a stack overflow. + + To avoid this problem, allocate only small objects on the stack; a + program should be OK if it limits single allocations to a page or + less. Allocate larger arrays in static storage, or on the heap + (e.g., with malloc). Yes, this is a pain, but we don't know of any + better solution that is portable. + + No attempt has been made to deal with multithreaded applications. */ + +#include + +#ifndef __attribute__ +# if __GNUC__ < 3 +# define __attribute__(x) +# endif +#endif + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +#include + +#include +#if ! HAVE_STACK_T && ! defined stack_t +typedef struct sigaltstack stack_t; +#endif +#ifndef SIGSTKSZ +# define SIGSTKSZ 16384 +#elif HAVE_LIBSIGSEGV && SIGSTKSZ < 16384 +/* libsigsegv 2.6 through 2.8 have a bug where some architectures use + more than the Linux default of an 8k alternate stack when deciding + if a fault was caused by stack overflow. */ +# undef SIGSTKSZ +# define SIGSTKSZ 16384 +#endif + +#include +#include + +/* Posix 2001 declares ucontext_t in , Posix 200x in + . */ +#if HAVE_UCONTEXT_H +# include +#endif + +#include + +#if HAVE_LIBSIGSEGV +# include +#endif + +#include "c-stack.h" +#include "exitfail.h" +#include "ignore-value.h" +#include "getprogname.h" + +#if defined SA_ONSTACK && defined SA_SIGINFO +# define SIGINFO_WORKS 1 +#else +# define SIGINFO_WORKS 0 +# ifndef SA_ONSTACK +# define SA_ONSTACK 0 +# endif +#endif + +/* The user-specified action to take when a SEGV-related program error + or stack overflow occurs. */ +static _GL_ASYNC_SAFE void (* volatile segv_action) (int); + +/* Translated messages for program errors and stack overflow. Do not + translate them in the signal handler, since gettext is not + async-signal-safe. */ +static char const * volatile program_error_message; +static char const * volatile stack_overflow_message; + +#if ((HAVE_LIBSIGSEGV && ! HAVE_XSI_STACK_OVERFLOW_HEURISTIC) \ + || (HAVE_SIGALTSTACK && HAVE_DECL_SIGALTSTACK \ + && HAVE_STACK_OVERFLOW_HANDLING)) + +/* Output an error message, then exit with status EXIT_FAILURE if it + appears to have been a stack overflow, or with a core dump + otherwise. This function is async-signal-safe. */ + +static char const * volatile progname; + +static _GL_ASYNC_SAFE _Noreturn void +die (int signo) +{ + char const *message; +#if !SIGINFO_WORKS && !HAVE_LIBSIGSEGV + /* We can't easily determine whether it is a stack overflow; so + assume that the rest of our program is perfect (!) and that + this segmentation violation is a stack overflow. */ + signo = 0; +#endif /* !SIGINFO_WORKS && !HAVE_LIBSIGSEGV */ + segv_action (signo); + message = signo ? program_error_message : stack_overflow_message; + ignore_value (write (STDERR_FILENO, progname, strlen (progname))); + ignore_value (write (STDERR_FILENO, ": ", 2)); + ignore_value (write (STDERR_FILENO, message, strlen (message))); + ignore_value (write (STDERR_FILENO, "\n", 1)); + if (! signo) + _exit (exit_failure); + raise (signo); + abort (); +} +#endif + +#if (HAVE_SIGALTSTACK && HAVE_DECL_SIGALTSTACK \ + && HAVE_STACK_OVERFLOW_HANDLING) || HAVE_LIBSIGSEGV + +/* Storage for the alternate signal stack. */ +static union +{ + char buffer[SIGSTKSZ]; + + /* These other members are for proper alignment. There's no + standard way to guarantee stack alignment, but this seems enough + in practice. */ + long double ld; + long l; + void *p; +} alternate_signal_stack; + +static _GL_ASYNC_SAFE void +null_action (int signo _GL_UNUSED) +{ +} + +#endif /* SIGALTSTACK || LIBSIGSEGV */ + +/* Only use libsigsegv if we need it; platforms like Solaris can + detect stack overflow without the overhead of an external + library. */ +#if HAVE_LIBSIGSEGV && ! HAVE_XSI_STACK_OVERFLOW_HEURISTIC + +/* Nonzero if general segv handler could not be installed. */ +static volatile int segv_handler_missing; + +/* Handle a segmentation violation and exit if it cannot be stack + overflow. This function is async-signal-safe. */ + +static _GL_ASYNC_SAFE int +segv_handler (void *address _GL_UNUSED, int serious) +{ +# if DEBUG + { + char buf[1024]; + int saved_errno = errno; + sprintf (buf, "segv_handler serious=%d\n", serious); + write (STDERR_FILENO, buf, strlen (buf)); + errno = saved_errno; + } +# endif + + /* If this fault is not serious, return 0 to let the stack overflow + handler take a shot at it. */ + if (!serious) + return 0; + die (SIGSEGV); +} + +/* Handle a segmentation violation that is likely to be a stack + overflow and exit. This function is async-signal-safe. */ + +static _GL_ASYNC_SAFE _Noreturn void +overflow_handler (int emergency, stackoverflow_context_t context _GL_UNUSED) +{ +# if DEBUG + { + char buf[1024]; + sprintf (buf, "overflow_handler emergency=%d segv_handler_missing=%d\n", + emergency, segv_handler_missing); + write (STDERR_FILENO, buf, strlen (buf)); + } +# endif + + die ((!emergency || segv_handler_missing) ? 0 : SIGSEGV); +} + +int +c_stack_action (_GL_ASYNC_SAFE void (*action) (int)) +{ + segv_action = action ? action : null_action; + program_error_message = _("program error"); + stack_overflow_message = _("stack overflow"); + progname = getprogname (); + + /* Always install the overflow handler. */ + if (stackoverflow_install_handler (overflow_handler, + alternate_signal_stack.buffer, + sizeof alternate_signal_stack.buffer)) + { + errno = ENOTSUP; + return -1; + } + /* Try installing a general handler; if it fails, then treat all + segv as stack overflow. */ + segv_handler_missing = sigsegv_install_handler (segv_handler); + return 0; +} + +#elif HAVE_SIGALTSTACK && HAVE_DECL_SIGALTSTACK && HAVE_STACK_OVERFLOW_HANDLING + +# if SIGINFO_WORKS + +/* Handle a segmentation violation and exit. This function is + async-signal-safe. */ + +static _GL_ASYNC_SAFE _Noreturn void +segv_handler (int signo, siginfo_t *info, void *context _GL_UNUSED) +{ + /* Clear SIGNO if it seems to have been a stack overflow. */ +# if ! HAVE_XSI_STACK_OVERFLOW_HEURISTIC + /* We can't easily determine whether it is a stack overflow; so + assume that the rest of our program is perfect (!) and that + this segmentation violation is a stack overflow. + + Note that although both Linux and Solaris provide + sigaltstack, SA_ONSTACK, and SA_SIGINFO, currently only + Solaris satisfies the XSI heuristic. This is because + Solaris populates uc_stack with the details of the + interrupted stack, while Linux populates it with the details + of the current stack. */ + signo = 0; +# else + if (0 < info->si_code) + { + /* If the faulting address is within the stack, or within one + page of the stack, assume that it is a stack overflow. */ + ucontext_t const *user_context = context; + char const *stack_base = user_context->uc_stack.ss_sp; + size_t stack_size = user_context->uc_stack.ss_size; + char const *faulting_address = info->si_addr; + size_t page_size = sysconf (_SC_PAGESIZE); + size_t s = faulting_address - stack_base + page_size; + if (s < stack_size + 2 * page_size) + signo = 0; + +# if DEBUG + { + char buf[1024]; + sprintf (buf, + "segv_handler fault=%p base=%p size=%lx page=%lx signo=%d\n", + faulting_address, stack_base, (unsigned long) stack_size, + (unsigned long) page_size, signo); + write (STDERR_FILENO, buf, strlen (buf)); + } +# endif + } +# endif + + die (signo); +} +# endif + +int +c_stack_action (_GL_ASYNC_SAFE void (*action) (int)) +{ + int r; + stack_t st; + struct sigaction act; + st.ss_flags = 0; +# if SIGALTSTACK_SS_REVERSED + /* Irix mistakenly treats ss_sp as the upper bound, rather than + lower bound, of the alternate stack. */ + st.ss_sp = alternate_signal_stack.buffer + SIGSTKSZ - sizeof (void *); + st.ss_size = sizeof alternate_signal_stack.buffer - sizeof (void *); +# else + st.ss_sp = alternate_signal_stack.buffer; + st.ss_size = sizeof alternate_signal_stack.buffer; +# endif + r = sigaltstack (&st, NULL); + if (r != 0) + return r; + + segv_action = action ? action : null_action; + program_error_message = _("program error"); + stack_overflow_message = _("stack overflow"); + progname = getprogname (); + + sigemptyset (&act.sa_mask); + +# if SIGINFO_WORKS + /* POSIX 1003.1-2001 says SA_RESETHAND implies SA_NODEFER, but + this is not true on Solaris 8 at least. It doesn't hurt to use + SA_NODEFER here, so leave it in. */ + act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO; + act.sa_sigaction = segv_handler; +# else + act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND; + act.sa_handler = die; +# endif + +# if FAULT_YIELDS_SIGBUS + if (sigaction (SIGBUS, &act, NULL) < 0) + return -1; +# endif + return sigaction (SIGSEGV, &act, NULL); +} + +#else /* ! ((HAVE_SIGALTSTACK && HAVE_DECL_SIGALTSTACK + && HAVE_STACK_OVERFLOW_HANDLING) || HAVE_LIBSIGSEGV) */ + +int +c_stack_action (_GL_ASYNC_SAFE void (*action) (int) _GL_UNUSED) +{ + errno = ENOTSUP; + return -1; +} + +#endif diff --git a/contrib/grep/lib/c-stack.h b/contrib/grep/lib/c-stack.h new file mode 100644 index 0000000000..a11fa3123e --- /dev/null +++ b/contrib/grep/lib/c-stack.h @@ -0,0 +1,44 @@ +/* Stack overflow handling. + + Copyright (C) 2002, 2004, 2008-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + +/* Set up ACTION so that it is invoked on C stack overflow and on other, + stack-unrelated, segmentation violation. + Return -1 (setting errno) if this cannot be done. + + When a stack overflow or segmentation violation occurs: + 1) ACTION is called. It is passed an argument equal to + - 0, for a stack overflow, + - SIGSEGV, for a segmentation violation that does not appear related + to stack overflow. + On many platforms the two cases are hard to distinguish; when in doubt, + zero is passed. + 2) If ACTION returns, a message is written to standard error, and the + program is terminated: in the case of stack overflow, with exit code + exit_failure (see "exitfail.h"), otherwise through a signal SIGSEGV. + + A null ACTION acts like an action that does nothing. + + ACTION must be async-signal-safe. ACTION together with its callees + must not require more than SIGSTKSZ bytes of stack space. Also, + ACTION should not call longjmp, because this implementation does + not guarantee that it is safe to return to the original stack. + + This function may install a handler for the SIGSEGV signal or for the SIGBUS + signal or exercise other system dependent exception handling APIs. */ + +extern int c_stack_action (_GL_ASYNC_SAFE void (* /*action*/) (int)); diff --git a/contrib/grep/lib/c-strcase.h b/contrib/grep/lib/c-strcase.h index f25bcca23f..3b711f5aa5 100644 --- a/contrib/grep/lib/c-strcase.h +++ b/contrib/grep/lib/c-strcase.h @@ -1,5 +1,5 @@ /* Case-insensitive string comparison functions in C locale. - Copyright (C) 1995-1996, 2001, 2003, 2005, 2009-2015 Free Software + Copyright (C) 1995-1996, 2001, 2003, 2005, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ #ifndef C_STRCASE_H #define C_STRCASE_H diff --git a/contrib/grep/lib/c-strcasecmp.c b/contrib/grep/lib/c-strcasecmp.c index 6deb6d1236..951220f3e2 100644 --- a/contrib/grep/lib/c-strcasecmp.c +++ b/contrib/grep/lib/c-strcasecmp.c @@ -1,5 +1,5 @@ /* c-strcasecmp.c -- case insensitive string comparator in C locale - Copyright (C) 1998-1999, 2005-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1998-1999, 2005-2006, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ #include diff --git a/contrib/grep/lib/c-strcaseeq.h b/contrib/grep/lib/c-strcaseeq.h index 28dc55ee3f..304c72ea49 100644 --- a/contrib/grep/lib/c-strcaseeq.h +++ b/contrib/grep/lib/c-strcaseeq.h @@ -1,5 +1,5 @@ /* Optimized case-insensitive string comparison in C locale. - Copyright (C) 2001-2002, 2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -12,7 +12,7 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Bruno Haible . */ diff --git a/contrib/grep/lib/c-strncasecmp.c b/contrib/grep/lib/c-strncasecmp.c index b98e36838c..9ad49191b7 100644 --- a/contrib/grep/lib/c-strncasecmp.c +++ b/contrib/grep/lib/c-strncasecmp.c @@ -1,5 +1,5 @@ /* c-strncasecmp.c -- case insensitive string comparator in C locale - Copyright (C) 1998-1999, 2005-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1998-1999, 2005-2006, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ #include diff --git a/contrib/grep/lib/cdefs.h b/contrib/grep/lib/cdefs.h new file mode 100644 index 0000000000..d8e4a00033 --- /dev/null +++ b/contrib/grep/lib/cdefs.h @@ -0,0 +1,514 @@ +/* Copyright (C) 1992-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _SYS_CDEFS_H +#define _SYS_CDEFS_H 1 + +/* We are almost always included from features.h. */ +#ifndef _FEATURES_H +# include +#endif + +/* The GNU libc does not support any K&R compilers or the traditional mode + of ISO C compilers anymore. Check for some of the combinations not + anymore supported. */ +#if defined __GNUC__ && !defined __STDC__ +# error "You need a ISO C conforming compiler to use the glibc headers" +#endif + +/* Some user header file might have defined this before. */ +#undef __P +#undef __PMT + +#ifdef __GNUC__ + +/* All functions, except those with callbacks or those that + synchronize memory, are leaf functions. */ +# if __GNUC_PREREQ (4, 6) && !defined _LIBC +# define __LEAF , __leaf__ +# define __LEAF_ATTR __attribute__ ((__leaf__)) +# else +# define __LEAF +# define __LEAF_ATTR +# endif + +/* GCC can always grok prototypes. For C++ programs we add throw() + to help it optimize the function calls. But this works only with + gcc 2.8.x and egcs. For gcc 3.2 and up we even mark C functions + as non-throwing using a function attribute since programs can use + the -fexceptions options for C code as well. */ +# if !defined __cplusplus && __GNUC_PREREQ (3, 3) +# define __THROW __attribute__ ((__nothrow__ __LEAF)) +# define __THROWNL __attribute__ ((__nothrow__)) +# define __NTH(fct) __attribute__ ((__nothrow__ __LEAF)) fct +# define __NTHNL(fct) __attribute__ ((__nothrow__)) fct +# else +# if defined __cplusplus && __GNUC_PREREQ (2,8) +# define __THROW throw () +# define __THROWNL throw () +# define __NTH(fct) __LEAF_ATTR fct throw () +# define __NTHNL(fct) fct throw () +# else +# define __THROW +# define __THROWNL +# define __NTH(fct) fct +# define __NTHNL(fct) fct +# endif +# endif + +#else /* Not GCC. */ + +# if (defined __cplusplus \ + || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) +# define __inline inline +# else +# define __inline /* No inline functions. */ +# endif + +# define __THROW +# define __THROWNL +# define __NTH(fct) fct + +#endif /* GCC. */ + +/* Compilers that are not clang may object to + #if defined __clang__ && __has_extension(...) + even though they do not need to evaluate the right-hand side of the &&. */ +#if defined __clang__ && defined __has_extension +# define __glibc_clang_has_extension(ext) __has_extension (ext) +#else +# define __glibc_clang_has_extension(ext) 0 +#endif + +/* These two macros are not used in glibc anymore. They are kept here + only because some other projects expect the macros to be defined. */ +#define __P(args) args +#define __PMT(args) args + +/* For these things, GCC behaves the ANSI way normally, + and the non-ANSI way under -traditional. */ + +#define __CONCAT(x,y) x ## y +#define __STRING(x) #x + +/* This is not a typedef so `const __ptr_t' does the right thing. */ +#define __ptr_t void * + + +/* C++ needs to know that types and declarations are C, not C++. */ +#ifdef __cplusplus +# define __BEGIN_DECLS extern "C" { +# define __END_DECLS } +#else +# define __BEGIN_DECLS +# define __END_DECLS +#endif + + +/* Fortify support. */ +#define __bos(ptr) __builtin_object_size (ptr, __USE_FORTIFY_LEVEL > 1) +#define __bos0(ptr) __builtin_object_size (ptr, 0) + +#if __GNUC_PREREQ (4,3) +# define __warndecl(name, msg) \ + extern void name (void) __attribute__((__warning__ (msg))) +# define __warnattr(msg) __attribute__((__warning__ (msg))) +# define __errordecl(name, msg) \ + extern void name (void) __attribute__((__error__ (msg))) +#else +# define __warndecl(name, msg) extern void name (void) +# define __warnattr(msg) +# define __errordecl(name, msg) extern void name (void) +#endif + +/* Support for flexible arrays. + Headers that should use flexible arrays only if they're "real" + (e.g. only if they won't affect sizeof()) should test + #if __glibc_c99_flexarr_available. */ +#if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L && !defined __HP_cc +# define __flexarr [] +# define __glibc_c99_flexarr_available 1 +#elif __GNUC_PREREQ (2,97) +/* GCC 2.97 supports C99 flexible array members as an extension, + even when in C89 mode or compiling C++ (any version). */ +# define __flexarr [] +# define __glibc_c99_flexarr_available 1 +#elif defined __GNUC__ +/* Pre-2.97 GCC did not support C99 flexible arrays but did have + an equivalent extension with slightly different notation. */ +# define __flexarr [0] +# define __glibc_c99_flexarr_available 1 +#else +/* Some other non-C99 compiler. Approximate with [1]. */ +# define __flexarr [1] +# define __glibc_c99_flexarr_available 0 +#endif + + +/* __asm__ ("xyz") is used throughout the headers to rename functions + at the assembly language level. This is wrapped by the __REDIRECT + macro, in order to support compilers that can do this some other + way. When compilers don't support asm-names at all, we have to do + preprocessor tricks instead (which don't have exactly the right + semantics, but it's the best we can do). + + Example: + int __REDIRECT(setpgrp, (__pid_t pid, __pid_t pgrp), setpgid); */ + +#if defined __GNUC__ && __GNUC__ >= 2 + +# define __REDIRECT(name, proto, alias) name proto __asm__ (__ASMNAME (#alias)) +# ifdef __cplusplus +# define __REDIRECT_NTH(name, proto, alias) \ + name proto __THROW __asm__ (__ASMNAME (#alias)) +# define __REDIRECT_NTHNL(name, proto, alias) \ + name proto __THROWNL __asm__ (__ASMNAME (#alias)) +# else +# define __REDIRECT_NTH(name, proto, alias) \ + name proto __asm__ (__ASMNAME (#alias)) __THROW +# define __REDIRECT_NTHNL(name, proto, alias) \ + name proto __asm__ (__ASMNAME (#alias)) __THROWNL +# endif +# define __ASMNAME(cname) __ASMNAME2 (__USER_LABEL_PREFIX__, cname) +# define __ASMNAME2(prefix, cname) __STRING (prefix) cname + +/* +#elif __SOME_OTHER_COMPILER__ + +# define __REDIRECT(name, proto, alias) name proto; \ + _Pragma("let " #name " = " #alias) +*/ +#endif + +/* GCC has various useful declarations that can be made with the + `__attribute__' syntax. All of the ways we use this do fine if + they are omitted for compilers that don't understand it. */ +#if !defined __GNUC__ || __GNUC__ < 2 +# define __attribute__(xyz) /* Ignore */ +#endif + +/* At some point during the gcc 2.96 development the `malloc' attribute + for functions was introduced. We don't want to use it unconditionally + (although this would be possible) since it generates warnings. */ +#if __GNUC_PREREQ (2,96) +# define __attribute_malloc__ __attribute__ ((__malloc__)) +#else +# define __attribute_malloc__ /* Ignore */ +#endif + +/* Tell the compiler which arguments to an allocation function + indicate the size of the allocation. */ +#if __GNUC_PREREQ (4, 3) +# define __attribute_alloc_size__(params) \ + __attribute__ ((__alloc_size__ params)) +#else +# define __attribute_alloc_size__(params) /* Ignore. */ +#endif + +/* At some point during the gcc 2.96 development the `pure' attribute + for functions was introduced. We don't want to use it unconditionally + (although this would be possible) since it generates warnings. */ +#if __GNUC_PREREQ (2,96) +# define __attribute_pure__ __attribute__ ((__pure__)) +#else +# define __attribute_pure__ /* Ignore */ +#endif + +/* This declaration tells the compiler that the value is constant. */ +#if __GNUC_PREREQ (2,5) +# define __attribute_const__ __attribute__ ((__const__)) +#else +# define __attribute_const__ /* Ignore */ +#endif + +/* At some point during the gcc 3.1 development the `used' attribute + for functions was introduced. We don't want to use it unconditionally + (although this would be possible) since it generates warnings. */ +#if __GNUC_PREREQ (3,1) +# define __attribute_used__ __attribute__ ((__used__)) +# define __attribute_noinline__ __attribute__ ((__noinline__)) +#else +# define __attribute_used__ __attribute__ ((__unused__)) +# define __attribute_noinline__ /* Ignore */ +#endif + +/* Since version 3.2, gcc allows marking deprecated functions. */ +#if __GNUC_PREREQ (3,2) +# define __attribute_deprecated__ __attribute__ ((__deprecated__)) +#else +# define __attribute_deprecated__ /* Ignore */ +#endif + +/* Since version 4.5, gcc also allows one to specify the message printed + when a deprecated function is used. clang claims to be gcc 4.2, but + may also support this feature. */ +#if __GNUC_PREREQ (4,5) || \ + __glibc_clang_has_extension (__attribute_deprecated_with_message__) +# define __attribute_deprecated_msg__(msg) \ + __attribute__ ((__deprecated__ (msg))) +#else +# define __attribute_deprecated_msg__(msg) __attribute_deprecated__ +#endif + +/* At some point during the gcc 2.8 development the `format_arg' attribute + for functions was introduced. We don't want to use it unconditionally + (although this would be possible) since it generates warnings. + If several `format_arg' attributes are given for the same function, in + gcc-3.0 and older, all but the last one are ignored. In newer gccs, + all designated arguments are considered. */ +#if __GNUC_PREREQ (2,8) +# define __attribute_format_arg__(x) __attribute__ ((__format_arg__ (x))) +#else +# define __attribute_format_arg__(x) /* Ignore */ +#endif + +/* At some point during the gcc 2.97 development the `strfmon' format + attribute for functions was introduced. We don't want to use it + unconditionally (although this would be possible) since it + generates warnings. */ +#if __GNUC_PREREQ (2,97) +# define __attribute_format_strfmon__(a,b) \ + __attribute__ ((__format__ (__strfmon__, a, b))) +#else +# define __attribute_format_strfmon__(a,b) /* Ignore */ +#endif + +/* The nonnull function attribute marks pointer parameters that + must not be NULL. Do not define __nonnull if it is already defined, + for portability when this file is used in Gnulib. */ +#ifndef __nonnull +# if __GNUC_PREREQ (3,3) +# define __nonnull(params) __attribute__ ((__nonnull__ params)) +# else +# define __nonnull(params) +# endif +#endif + +/* If fortification mode, we warn about unused results of certain + function calls which can lead to problems. */ +#if __GNUC_PREREQ (3,4) +# define __attribute_warn_unused_result__ \ + __attribute__ ((__warn_unused_result__)) +# if defined __USE_FORTIFY_LEVEL && __USE_FORTIFY_LEVEL > 0 +# define __wur __attribute_warn_unused_result__ +# endif +#else +# define __attribute_warn_unused_result__ /* empty */ +#endif +#ifndef __wur +# define __wur /* Ignore */ +#endif + +/* Forces a function to be always inlined. */ +#if __GNUC_PREREQ (3,2) +/* The Linux kernel defines __always_inline in stddef.h (283d7573), and + it conflicts with this definition. Therefore undefine it first to + allow either header to be included first. */ +# undef __always_inline +# define __always_inline __inline __attribute__ ((__always_inline__)) +#else +# undef __always_inline +# define __always_inline __inline +#endif + +/* Associate error messages with the source location of the call site rather + than with the source location inside the function. */ +#if __GNUC_PREREQ (4,3) +# define __attribute_artificial__ __attribute__ ((__artificial__)) +#else +# define __attribute_artificial__ /* Ignore */ +#endif + +/* GCC 4.3 and above with -std=c99 or -std=gnu99 implements ISO C99 + inline semantics, unless -fgnu89-inline is used. Using __GNUC_STDC_INLINE__ + or __GNUC_GNU_INLINE is not a good enough check for gcc because gcc versions + older than 4.3 may define these macros and still not guarantee GNU inlining + semantics. + + clang++ identifies itself as gcc-4.2, but has support for GNU inlining + semantics, that can be checked for by using the __GNUC_STDC_INLINE_ and + __GNUC_GNU_INLINE__ macro definitions. */ +#if (!defined __cplusplus || __GNUC_PREREQ (4,3) \ + || (defined __clang__ && (defined __GNUC_STDC_INLINE__ \ + || defined __GNUC_GNU_INLINE__))) +# if defined __GNUC_STDC_INLINE__ || defined __cplusplus +# define __extern_inline extern __inline __attribute__ ((__gnu_inline__)) +# define __extern_always_inline \ + extern __always_inline __attribute__ ((__gnu_inline__)) +# else +# define __extern_inline extern __inline +# define __extern_always_inline extern __always_inline +# endif +#endif + +#ifdef __extern_always_inline +# define __fortify_function __extern_always_inline __attribute_artificial__ +#endif + +/* GCC 4.3 and above allow passing all anonymous arguments of an + __extern_always_inline function to some other vararg function. */ +#if __GNUC_PREREQ (4,3) +# define __va_arg_pack() __builtin_va_arg_pack () +# define __va_arg_pack_len() __builtin_va_arg_pack_len () +#endif + +/* It is possible to compile containing GCC extensions even if GCC is + run in pedantic mode if the uses are carefully marked using the + `__extension__' keyword. But this is not generally available before + version 2.8. */ +#if !__GNUC_PREREQ (2,8) +# define __extension__ /* Ignore */ +#endif + +/* __restrict is known in EGCS 1.2 and above. */ +#if !__GNUC_PREREQ (2,92) +# if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L +# define __restrict restrict +# else +# define __restrict /* Ignore */ +# endif +#endif + +/* ISO C99 also allows to declare arrays as non-overlapping. The syntax is + array_name[restrict] + GCC 3.1 supports this. */ +#if __GNUC_PREREQ (3,1) && !defined __GNUG__ +# define __restrict_arr __restrict +#else +# ifdef __GNUC__ +# define __restrict_arr /* Not supported in old GCC. */ +# else +# if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L +# define __restrict_arr restrict +# else +/* Some other non-C99 compiler. */ +# define __restrict_arr /* Not supported. */ +# endif +# endif +#endif + +#if __GNUC__ >= 3 +# define __glibc_unlikely(cond) __builtin_expect ((cond), 0) +# define __glibc_likely(cond) __builtin_expect ((cond), 1) +#else +# define __glibc_unlikely(cond) (cond) +# define __glibc_likely(cond) (cond) +#endif + +#ifdef __has_attribute +# define __glibc_has_attribute(attr) __has_attribute (attr) +#else +# define __glibc_has_attribute(attr) 0 +#endif + +#if (!defined _Noreturn \ + && (defined __STDC_VERSION__ ? __STDC_VERSION__ : 0) < 201112 \ + && !__GNUC_PREREQ (4,7)) +# if __GNUC_PREREQ (2,8) +# define _Noreturn __attribute__ ((__noreturn__)) +# else +# define _Noreturn +# endif +#endif + +#if __GNUC_PREREQ (8, 0) +/* Describes a char array whose address can safely be passed as the first + argument to strncpy and strncat, as the char array is not necessarily + a NUL-terminated string. */ +# define __attribute_nonstring__ __attribute__ ((__nonstring__)) +#else +# define __attribute_nonstring__ +#endif + +#if (!defined _Static_assert && !defined __cplusplus \ + && (defined __STDC_VERSION__ ? __STDC_VERSION__ : 0) < 201112 \ + && (!__GNUC_PREREQ (4, 6) || defined __STRICT_ANSI__)) +# define _Static_assert(expr, diagnostic) \ + extern int (*__Static_assert_function (void)) \ + [!!sizeof (struct { int __error_if_negative: (expr) ? 2 : -1; })] +#endif + +/* The #ifndef lets Gnulib avoid including these on non-glibc + platforms, where the includes typically do not exist. */ +#ifndef __WORDSIZE +# include +# include +#endif + +#if defined __LONG_DOUBLE_MATH_OPTIONAL && defined __NO_LONG_DOUBLE_MATH +# define __LDBL_COMPAT 1 +# ifdef __REDIRECT +# define __LDBL_REDIR1(name, proto, alias) __REDIRECT (name, proto, alias) +# define __LDBL_REDIR(name, proto) \ + __LDBL_REDIR1 (name, proto, __nldbl_##name) +# define __LDBL_REDIR1_NTH(name, proto, alias) __REDIRECT_NTH (name, proto, alias) +# define __LDBL_REDIR_NTH(name, proto) \ + __LDBL_REDIR1_NTH (name, proto, __nldbl_##name) +# define __LDBL_REDIR1_DECL(name, alias) \ + extern __typeof (name) name __asm (__ASMNAME (#alias)); +# define __LDBL_REDIR_DECL(name) \ + extern __typeof (name) name __asm (__ASMNAME ("__nldbl_" #name)); +# define __REDIRECT_LDBL(name, proto, alias) \ + __LDBL_REDIR1 (name, proto, __nldbl_##alias) +# define __REDIRECT_NTH_LDBL(name, proto, alias) \ + __LDBL_REDIR1_NTH (name, proto, __nldbl_##alias) +# endif +#endif +#if !defined __LDBL_COMPAT || !defined __REDIRECT +# define __LDBL_REDIR1(name, proto, alias) name proto +# define __LDBL_REDIR(name, proto) name proto +# define __LDBL_REDIR1_NTH(name, proto, alias) name proto __THROW +# define __LDBL_REDIR_NTH(name, proto) name proto __THROW +# define __LDBL_REDIR_DECL(name) +# ifdef __REDIRECT +# define __REDIRECT_LDBL(name, proto, alias) __REDIRECT (name, proto, alias) +# define __REDIRECT_NTH_LDBL(name, proto, alias) \ + __REDIRECT_NTH (name, proto, alias) +# endif +#endif + +/* __glibc_macro_warning (MESSAGE) issues warning MESSAGE. This is + intended for use in preprocessor macros. + + Note: MESSAGE must be a _single_ string; concatenation of string + literals is not supported. */ +#if __GNUC_PREREQ (4,8) || __glibc_clang_prereq (3,5) +# define __glibc_macro_warning1(message) _Pragma (#message) +# define __glibc_macro_warning(message) \ + __glibc_macro_warning1 (GCC warning message) +#else +# define __glibc_macro_warning(msg) +#endif + +/* Generic selection (ISO C11) is a C-only feature, available in GCC + since version 4.9. Previous versions do not provide generic + selection, even though they might set __STDC_VERSION__ to 201112L, + when in -std=c11 mode. Thus, we must check for !defined __GNUC__ + when testing __STDC_VERSION__ for generic selection support. + On the other hand, Clang also defines __GNUC__, so a clang-specific + check is required to enable the use of generic selection. */ +#if !defined __cplusplus \ + && (__GNUC_PREREQ (4, 9) \ + || __glibc_clang_has_extension (c_generic_selections) \ + || (!defined __GNUC__ && defined __STDC_VERSION__ \ + && __STDC_VERSION__ >= 201112L)) +# define __HAVE_GENERIC_SELECTION 1 +#else +# define __HAVE_GENERIC_SELECTION 0 +#endif + +#endif /* sys/cdefs.h */ diff --git a/contrib/grep/lib/chdir-long.c b/contrib/grep/lib/chdir-long.c index ecc025a8c3..a38d38c42f 100644 --- a/contrib/grep/lib/chdir-long.c +++ b/contrib/grep/lib/chdir-long.c @@ -1,5 +1,5 @@ /* provide a chdir function that tries not to fail due to ENAMETOOLONG - Copyright (C) 2004-2015 Free Software Foundation, Inc. + Copyright (C) 2004-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* written by Jim Meyering */ @@ -212,8 +212,6 @@ chdir_long (char *dir) # include "closeout.h" # include "error.h" -char *program_name; - int main (int argc, char *argv[]) { @@ -221,7 +219,6 @@ main (int argc, char *argv[]) size_t n = 0; int len; - program_name = argv[0]; atexit (close_stdout); len = getline (&line, &n, stdin); diff --git a/contrib/grep/lib/chdir-long.h b/contrib/grep/lib/chdir-long.h index 81dacc3949..851222fe73 100644 --- a/contrib/grep/lib/chdir-long.h +++ b/contrib/grep/lib/chdir-long.h @@ -1,5 +1,5 @@ /* provide a chdir function that tries not to fail due to ENAMETOOLONG - Copyright (C) 2004-2005, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2004-2005, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering. */ diff --git a/contrib/grep/lib/cloexec.c b/contrib/grep/lib/cloexec.c index 29d1a38aaf..510be3d57e 100644 --- a/contrib/grep/lib/cloexec.c +++ b/contrib/grep/lib/cloexec.c @@ -1,6 +1,6 @@ -/* closexec.c - set or clear the close-on-exec descriptor flag +/* cloexec.c - set or clear the close-on-exec descriptor flag - Copyright (C) 1991, 2004-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1991, 2004-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . + along with this program. If not, see . The code is taken from glibc/manual/llio.texi */ diff --git a/contrib/grep/lib/cloexec.h b/contrib/grep/lib/cloexec.h index ef5bacc291..f14a99736d 100644 --- a/contrib/grep/lib/cloexec.h +++ b/contrib/grep/lib/cloexec.h @@ -1,6 +1,6 @@ -/* closexec.c - set or clear the close-on-exec descriptor flag +/* cloexec.c - set or clear the close-on-exec descriptor flag - Copyright (C) 2004, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2004, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . + along with this program. If not, see . */ diff --git a/contrib/grep/lib/close-stream.c b/contrib/grep/lib/close-stream.c index 6e3d8658d5..04bc8009a5 100644 --- a/contrib/grep/lib/close-stream.c +++ b/contrib/grep/lib/close-stream.c @@ -1,6 +1,6 @@ /* Close a stream, with nicer error checking than fclose's. - Copyright (C) 1998-2002, 2004, 2006-2015 Free Software Foundation, Inc. + Copyright (C) 1998-2002, 2004, 2006-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/close.c b/contrib/grep/lib/close.c deleted file mode 100644 index 54d1f71b9c..0000000000 --- a/contrib/grep/lib/close.c +++ /dev/null @@ -1,69 +0,0 @@ -/* close replacement. - Copyright (C) 2008-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#include - -#include "fd-hook.h" -#include "msvc-inval.h" - -#undef close - -#if HAVE_MSVC_INVALID_PARAMETER_HANDLER -static int -close_nothrow (int fd) -{ - int result; - - TRY_MSVC_INVAL - { - result = close (fd); - } - CATCH_MSVC_INVAL - { - result = -1; - errno = EBADF; - } - DONE_MSVC_INVAL; - - return result; -} -#else -# define close_nothrow close -#endif - -/* Override close() to call into other gnulib modules. */ - -int -rpl_close (int fd) -{ -#if WINDOWS_SOCKETS - int retval = execute_all_close_hooks (close_nothrow, fd); -#else - int retval = close_nothrow (fd); -#endif - -#if REPLACE_FCHDIR - if (retval >= 0) - _gl_unregister_fd (fd); -#endif - - return retval; -} diff --git a/contrib/grep/lib/closedir.c b/contrib/grep/lib/closedir.c deleted file mode 100644 index f80843f919..0000000000 --- a/contrib/grep/lib/closedir.c +++ /dev/null @@ -1,67 +0,0 @@ -/* Stop reading the entries of a directory. - Copyright (C) 2006-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#if REPLACE_FCHDIR -# include -#endif - -#if HAVE_CLOSEDIR - -/* Override closedir(), to keep track of the open file descriptors. - Needed because there is a function dirfd(). */ - -#else - -# include - -# include "dirent-private.h" - -#endif - -int -closedir (DIR *dirp) -{ -# if REPLACE_FCHDIR - int fd = dirfd (dirp); -# endif - int retval; - -#if HAVE_CLOSEDIR -# undef closedir - - retval = closedir (dirp); - -#else - - if (dirp->current != INVALID_HANDLE_VALUE) - FindClose (dirp->current); - free (dirp); - - retval = 0; - -#endif - -#if REPLACE_FCHDIR - if (retval >= 0) - _gl_unregister_fd (fd); -#endif - return retval; -} diff --git a/contrib/grep/lib/closeout.c b/contrib/grep/lib/closeout.c index 761d715183..8b92ce9476 100644 --- a/contrib/grep/lib/closeout.c +++ b/contrib/grep/lib/closeout.c @@ -1,6 +1,6 @@ /* Close standard output and standard error, exiting with a diagnostic on error. - Copyright (C) 1998-2002, 2004, 2006, 2008-2015 Free Software Foundation, + Copyright (C) 1998-2002, 2004, 2006, 2008-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include @@ -33,6 +33,16 @@ #include "exitfail.h" #include "quotearg.h" +#ifndef __has_feature +# define __has_feature(a) false +#endif + +#if defined __SANITIZE_ADDRESS__ || __has_feature (address_sanitizer) +enum { SANITIZE_ADDRESS = true }; +#else +enum { SANITIZE_ADDRESS = false }; +#endif + static const char *file_name; /* Set the file name to be reported in the event an error is detected @@ -119,6 +129,8 @@ close_stdout (void) _exit (exit_failure); } - if (close_stream (stderr) != 0) - _exit (exit_failure); + /* Close stderr only if not sanitizing, as sanitizers may report to + stderr after this function returns. */ + if (!SANITIZE_ADDRESS && close_stream (stderr) != 0) + _exit (exit_failure); } diff --git a/contrib/grep/lib/closeout.h b/contrib/grep/lib/closeout.h index 87cea5b1af..8294c6268c 100644 --- a/contrib/grep/lib/closeout.h +++ b/contrib/grep/lib/closeout.h @@ -1,6 +1,6 @@ /* Close standard output and standard error. - Copyright (C) 1998, 2000, 2003-2004, 2006, 2008-2015 Free Software + Copyright (C) 1998, 2000, 2003-2004, 2006, 2008-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef CLOSEOUT_H # define CLOSEOUT_H 1 diff --git a/contrib/grep/lib/colorize-posix.c b/contrib/grep/lib/colorize-posix.c index 9261ae1706..cdd2dca59b 100644 --- a/contrib/grep/lib/colorize-posix.c +++ b/contrib/grep/lib/colorize-posix.c @@ -1,5 +1,5 @@ /* Output colorization. - Copyright 2011-2015 Free Software Foundation, Inc. + Copyright 2011-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/contrib/grep/lib/colorize-w32.c b/contrib/grep/lib/colorize-w32.c deleted file mode 100644 index cdb64a6375..0000000000 --- a/contrib/grep/lib/colorize-w32.c +++ /dev/null @@ -1,208 +0,0 @@ -/* Output colorization on MS-Windows. - Copyright 2011-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA - 02110-1301, USA. */ - -/* Written by Eli Zaretskii. */ - -#include - -#include "colorize.h" - -#include -#include -#include -#include - -#undef DATADIR /* conflicts with objidl.h, which is included by windows.h */ -#include - -static HANDLE hstdout = INVALID_HANDLE_VALUE; -static SHORT norm_attr; - -/* Initialize the normal text attribute used by the console. */ -void -init_colorize (void) -{ - CONSOLE_SCREEN_BUFFER_INFO csbi; - - hstdout = GetStdHandle (STD_OUTPUT_HANDLE); - if (hstdout != INVALID_HANDLE_VALUE - && GetConsoleScreenBufferInfo (hstdout, &csbi)) - norm_attr = csbi.wAttributes; - else - hstdout = INVALID_HANDLE_VALUE; -} - -/* Return non-zero if we should highlight matches in output. */ -int -should_colorize (void) -{ - /* $TERM is not normally defined on DOS/Windows, so don't require - it for highlighting. But some programs, like Emacs, do define - it when running Grep as a subprocess, so make sure they don't - set TERM=dumb. */ - char const *t = getenv ("TERM"); - return ! (t && strcmp (t, "dumb") == 0); -} - -/* Convert a color spec, a semi-colon separated list of the form - "NN;MM;KK;...", where each number is a value of the SGR parameter, - into the corresponding Windows console text attribute. - - This function supports a subset of the SGR rendition aspects that - the Windows console can display. */ -static int -w32_sgr2attr (const char *sgr_seq) -{ - const char *s, *p; - int code, fg = norm_attr & 15, bg = norm_attr & (15 << 4); - int bright = 0, inverse = 0; - static const int fg_color[] = { - 0, /* black */ - FOREGROUND_RED, /* red */ - FOREGROUND_GREEN, /* green */ - FOREGROUND_GREEN | FOREGROUND_RED, /* yellow */ - FOREGROUND_BLUE, /* blue */ - FOREGROUND_BLUE | FOREGROUND_RED, /* magenta */ - FOREGROUND_BLUE | FOREGROUND_GREEN, /* cyan */ - FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE /* gray */ - }; - static const int bg_color[] = { - 0, /* black */ - BACKGROUND_RED, /* red */ - BACKGROUND_GREEN, /* green */ - BACKGROUND_GREEN | BACKGROUND_RED, /* yellow */ - BACKGROUND_BLUE, /* blue */ - BACKGROUND_BLUE | BACKGROUND_RED, /* magenta */ - BACKGROUND_BLUE | BACKGROUND_GREEN, /* cyan */ - BACKGROUND_RED | BACKGROUND_GREEN | BACKGROUND_BLUE /* gray */ - }; - - for (s = p = sgr_seq; *s; p++) - { - if (*p == ';' || *p == '\0') - { - code = strtol (s, NULL, 10); - s = p + (*p != '\0'); - - switch (code) - { - case 0: /* all attributes off */ - fg = norm_attr & 15; - bg = norm_attr & (15 << 4); - bright = 0; - inverse = 0; - break; - case 1: /* intensity on */ - bright = 1; - break; - case 7: /* inverse video */ - inverse = 1; - break; - case 22: /* intensity off */ - bright = 0; - break; - case 27: /* inverse off */ - inverse = 0; - break; - case 30: case 31: case 32: case 33: /* foreground color */ - case 34: case 35: case 36: case 37: - fg = fg_color[code - 30]; - break; - case 39: /* default foreground */ - fg = norm_attr & 15; - break; - case 40: case 41: case 42: case 43: /* background color */ - case 44: case 45: case 46: case 47: - bg = bg_color[code - 40]; - break; - case 49: /* default background */ - bg = norm_attr & (15 << 4); - break; - default: - break; - } - } - } - if (inverse) - { - int t = fg; - fg = (bg >> 4); - bg = (t << 4); - } - if (bright) - fg |= FOREGROUND_INTENSITY; - - return (bg & (15 << 4)) | (fg & 15); -} - -/* Start a colorized text attribute on stdout using the SGR_START - format; the attribute is specified by SGR_SEQ. */ -void -print_start_colorize (char const *sgr_start, char const *sgr_seq) -{ - /* If stdout is connected to a console, set the console text - attribute directly instead of using SGR_START. Otherwise, use - SGR_START to emit the SGR escape sequence as on Posix platforms; - this is needed when Grep is invoked as a subprocess of another - program, such as Emacs, which will handle the display of the - matches. */ - if (hstdout != INVALID_HANDLE_VALUE) - { - SHORT attr = w32_sgr2attr (sgr_seq); - SetConsoleTextAttribute (hstdout, attr); - } - else - printf (sgr_start, sgr_seq); -} - -/* Clear to the end of the current line with the default attribute. - This is needed for reasons similar to those that require the "EL to - Right after SGR" operation on Posix platforms: if we don't do this, - setting the 'mt', 'ms', or 'mc' capabilities to use a non-default - background color spills that color to the empty space at the end of - the last screen line in a match whose line spans multiple screen - lines. */ -static void -w32_clreol (void) -{ - DWORD nchars; - COORD start_pos; - DWORD written; - CONSOLE_SCREEN_BUFFER_INFO csbi; - - GetConsoleScreenBufferInfo (hstdout, &csbi); - start_pos = csbi.dwCursorPosition; - nchars = csbi.dwSize.X - start_pos.X; - - FillConsoleOutputAttribute (hstdout, norm_attr, nchars, start_pos, - &written); - FillConsoleOutputCharacter (hstdout, ' ', nchars, start_pos, &written); -} - -/* Restore the normal text attribute using the SGR_END string. */ -void -print_end_colorize (char const *sgr_end) -{ - if (hstdout != INVALID_HANDLE_VALUE) - { - SetConsoleTextAttribute (hstdout, norm_attr); - w32_clreol (); - } - else - fputs (sgr_end, stdout); -} diff --git a/contrib/grep/lib/colorize.h b/contrib/grep/lib/colorize.h index d04fcb4d6e..93163d0271 100644 --- a/contrib/grep/lib/colorize.h +++ b/contrib/grep/lib/colorize.h @@ -1,6 +1,6 @@ /* Output colorization. - Copyright 2011-2015 Free Software Foundation, Inc. + Copyright 2011-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) diff --git a/contrib/grep/lib/config.charset b/contrib/grep/lib/config.charset deleted file mode 100644 index f06d7e9f00..0000000000 --- a/contrib/grep/lib/config.charset +++ /dev/null @@ -1,682 +0,0 @@ -#! /bin/sh -# Output a system dependent table of character encoding aliases. -# -# Copyright (C) 2000-2004, 2006-2015 Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, see . -# -# The table consists of lines of the form -# ALIAS CANONICAL -# -# ALIAS is the (system dependent) result of "nl_langinfo (CODESET)". -# ALIAS is compared in a case sensitive way. -# -# CANONICAL is the GNU canonical name for this character encoding. -# It must be an encoding supported by libiconv. Support by GNU libc is -# also desirable. CANONICAL is case insensitive. Usually an upper case -# MIME charset name is preferred. -# The current list of GNU canonical charset names is as follows. -# -# name MIME? used by which systems -# (darwin = Mac OS X, woe32 = native Windows) -# -# ASCII, ANSI_X3.4-1968 glibc solaris freebsd netbsd darwin cygwin -# ISO-8859-1 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin -# ISO-8859-2 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin -# ISO-8859-3 Y glibc solaris cygwin -# ISO-8859-4 Y osf solaris freebsd netbsd openbsd darwin -# ISO-8859-5 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin -# ISO-8859-6 Y glibc aix hpux solaris cygwin -# ISO-8859-7 Y glibc aix hpux irix osf solaris netbsd openbsd darwin cygwin -# ISO-8859-8 Y glibc aix hpux osf solaris cygwin -# ISO-8859-9 Y glibc aix hpux irix osf solaris darwin cygwin -# ISO-8859-13 glibc netbsd openbsd darwin cygwin -# ISO-8859-14 glibc cygwin -# ISO-8859-15 glibc aix osf solaris freebsd netbsd openbsd darwin cygwin -# KOI8-R Y glibc solaris freebsd netbsd openbsd darwin -# KOI8-U Y glibc freebsd netbsd openbsd darwin cygwin -# KOI8-T glibc -# CP437 dos -# CP775 dos -# CP850 aix osf dos -# CP852 dos -# CP855 dos -# CP856 aix -# CP857 dos -# CP861 dos -# CP862 dos -# CP864 dos -# CP865 dos -# CP866 freebsd netbsd openbsd darwin dos -# CP869 dos -# CP874 woe32 dos -# CP922 aix -# CP932 aix cygwin woe32 dos -# CP943 aix -# CP949 osf darwin woe32 dos -# CP950 woe32 dos -# CP1046 aix -# CP1124 aix -# CP1125 dos -# CP1129 aix -# CP1131 darwin -# CP1250 woe32 -# CP1251 glibc solaris netbsd openbsd darwin cygwin woe32 -# CP1252 aix woe32 -# CP1253 woe32 -# CP1254 woe32 -# CP1255 glibc woe32 -# CP1256 woe32 -# CP1257 woe32 -# GB2312 Y glibc aix hpux irix solaris freebsd netbsd darwin -# EUC-JP Y glibc aix hpux irix osf solaris freebsd netbsd darwin -# EUC-KR Y glibc aix hpux irix osf solaris freebsd netbsd darwin cygwin -# EUC-TW glibc aix hpux irix osf solaris netbsd -# BIG5 Y glibc aix hpux osf solaris freebsd netbsd darwin cygwin -# BIG5-HKSCS glibc solaris darwin -# GBK glibc aix osf solaris darwin cygwin woe32 dos -# GB18030 glibc solaris netbsd darwin -# SHIFT_JIS Y hpux osf solaris freebsd netbsd darwin -# JOHAB glibc solaris woe32 -# TIS-620 glibc aix hpux osf solaris cygwin -# VISCII Y glibc -# TCVN5712-1 glibc -# ARMSCII-8 glibc darwin -# GEORGIAN-PS glibc cygwin -# PT154 glibc -# HP-ROMAN8 hpux -# HP-ARABIC8 hpux -# HP-GREEK8 hpux -# HP-HEBREW8 hpux -# HP-TURKISH8 hpux -# HP-KANA8 hpux -# DEC-KANJI osf -# DEC-HANYU osf -# UTF-8 Y glibc aix hpux osf solaris netbsd darwin cygwin -# -# Note: Names which are not marked as being a MIME name should not be used in -# Internet protocols for information interchange (mail, news, etc.). -# -# Note: ASCII and ANSI_X3.4-1968 are synonymous canonical names. Applications -# must understand both names and treat them as equivalent. -# -# The first argument passed to this file is the canonical host specification, -# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM -# or -# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM - -host="$1" -os=`echo "$host" | sed -e 's/^[^-]*-[^-]*-\(.*\)$/\1/'` -echo "# This file contains a table of character encoding aliases," -echo "# suitable for operating system '${os}'." -echo "# It was automatically generated from config.charset." -# List of references, updated during installation: -echo "# Packages using this file: " -case "$os" in - linux-gnulibc1*) - # Linux libc5 doesn't have nl_langinfo(CODESET); therefore - # localcharset.c falls back to using the full locale name - # from the environment variables. - echo "C ASCII" - echo "POSIX ASCII" - for l in af af_ZA ca ca_ES da da_DK de de_AT de_BE de_CH de_DE de_LU \ - en en_AU en_BW en_CA en_DK en_GB en_IE en_NZ en_US en_ZA \ - en_ZW es es_AR es_BO es_CL es_CO es_DO es_EC es_ES es_GT \ - es_HN es_MX es_PA es_PE es_PY es_SV es_US es_UY es_VE et \ - et_EE eu eu_ES fi fi_FI fo fo_FO fr fr_BE fr_CA fr_CH fr_FR \ - fr_LU ga ga_IE gl gl_ES id id_ID in in_ID is is_IS it it_CH \ - it_IT kl kl_GL nl nl_BE nl_NL no no_NO pt pt_BR pt_PT sv \ - sv_FI sv_SE; do - echo "$l ISO-8859-1" - echo "$l.iso-8859-1 ISO-8859-1" - echo "$l.iso-8859-15 ISO-8859-15" - echo "$l.iso-8859-15@euro ISO-8859-15" - echo "$l@euro ISO-8859-15" - echo "$l.cp-437 CP437" - echo "$l.cp-850 CP850" - echo "$l.cp-1252 CP1252" - echo "$l.cp-1252@euro CP1252" - #echo "$l.atari-st ATARI-ST" # not a commonly used encoding - echo "$l.utf-8 UTF-8" - echo "$l.utf-8@euro UTF-8" - done - for l in cs cs_CZ hr hr_HR hu hu_HU pl pl_PL ro ro_RO sk sk_SK sl \ - sl_SI sr sr_CS sr_YU; do - echo "$l ISO-8859-2" - echo "$l.iso-8859-2 ISO-8859-2" - echo "$l.cp-852 CP852" - echo "$l.cp-1250 CP1250" - echo "$l.utf-8 UTF-8" - done - for l in mk mk_MK ru ru_RU; do - echo "$l ISO-8859-5" - echo "$l.iso-8859-5 ISO-8859-5" - echo "$l.koi8-r KOI8-R" - echo "$l.cp-866 CP866" - echo "$l.cp-1251 CP1251" - echo "$l.utf-8 UTF-8" - done - for l in ar ar_SA; do - echo "$l ISO-8859-6" - echo "$l.iso-8859-6 ISO-8859-6" - echo "$l.cp-864 CP864" - #echo "$l.cp-868 CP868" # not a commonly used encoding - echo "$l.cp-1256 CP1256" - echo "$l.utf-8 UTF-8" - done - for l in el el_GR gr gr_GR; do - echo "$l ISO-8859-7" - echo "$l.iso-8859-7 ISO-8859-7" - echo "$l.cp-869 CP869" - echo "$l.cp-1253 CP1253" - echo "$l.cp-1253@euro CP1253" - echo "$l.utf-8 UTF-8" - echo "$l.utf-8@euro UTF-8" - done - for l in he he_IL iw iw_IL; do - echo "$l ISO-8859-8" - echo "$l.iso-8859-8 ISO-8859-8" - echo "$l.cp-862 CP862" - echo "$l.cp-1255 CP1255" - echo "$l.utf-8 UTF-8" - done - for l in tr tr_TR; do - echo "$l ISO-8859-9" - echo "$l.iso-8859-9 ISO-8859-9" - echo "$l.cp-857 CP857" - echo "$l.cp-1254 CP1254" - echo "$l.utf-8 UTF-8" - done - for l in lt lt_LT lv lv_LV; do - #echo "$l BALTIC" # not a commonly used encoding, wrong encoding name - echo "$l ISO-8859-13" - done - for l in ru_UA uk uk_UA; do - echo "$l KOI8-U" - done - for l in zh zh_CN; do - #echo "$l GB_2312-80" # not a commonly used encoding, wrong encoding name - echo "$l GB2312" - done - for l in ja ja_JP ja_JP.EUC; do - echo "$l EUC-JP" - done - for l in ko ko_KR; do - echo "$l EUC-KR" - done - for l in th th_TH; do - echo "$l TIS-620" - done - for l in fa fa_IR; do - #echo "$l ISIRI-3342" # a broken encoding - echo "$l.utf-8 UTF-8" - done - ;; - linux* | *-gnu*) - # With glibc-2.1 or newer, we don't need any canonicalization, - # because glibc has iconv and both glibc and libiconv support all - # GNU canonical names directly. Therefore, the Makefile does not - # need to install the alias file at all. - # The following applies only to glibc-2.0.x and older libcs. - echo "ISO_646.IRV:1983 ASCII" - ;; - aix*) - echo "ISO8859-1 ISO-8859-1" - echo "ISO8859-2 ISO-8859-2" - echo "ISO8859-5 ISO-8859-5" - echo "ISO8859-6 ISO-8859-6" - echo "ISO8859-7 ISO-8859-7" - echo "ISO8859-8 ISO-8859-8" - echo "ISO8859-9 ISO-8859-9" - echo "ISO8859-15 ISO-8859-15" - echo "IBM-850 CP850" - echo "IBM-856 CP856" - echo "IBM-921 ISO-8859-13" - echo "IBM-922 CP922" - echo "IBM-932 CP932" - echo "IBM-943 CP943" - echo "IBM-1046 CP1046" - echo "IBM-1124 CP1124" - echo "IBM-1129 CP1129" - echo "IBM-1252 CP1252" - echo "IBM-eucCN GB2312" - echo "IBM-eucJP EUC-JP" - echo "IBM-eucKR EUC-KR" - echo "IBM-eucTW EUC-TW" - echo "big5 BIG5" - echo "GBK GBK" - echo "TIS-620 TIS-620" - echo "UTF-8 UTF-8" - ;; - hpux*) - echo "iso88591 ISO-8859-1" - echo "iso88592 ISO-8859-2" - echo "iso88595 ISO-8859-5" - echo "iso88596 ISO-8859-6" - echo "iso88597 ISO-8859-7" - echo "iso88598 ISO-8859-8" - echo "iso88599 ISO-8859-9" - echo "iso885915 ISO-8859-15" - echo "roman8 HP-ROMAN8" - echo "arabic8 HP-ARABIC8" - echo "greek8 HP-GREEK8" - echo "hebrew8 HP-HEBREW8" - echo "turkish8 HP-TURKISH8" - echo "kana8 HP-KANA8" - echo "tis620 TIS-620" - echo "big5 BIG5" - echo "eucJP EUC-JP" - echo "eucKR EUC-KR" - echo "eucTW EUC-TW" - echo "hp15CN GB2312" - #echo "ccdc ?" # what is this? - echo "SJIS SHIFT_JIS" - echo "utf8 UTF-8" - ;; - irix*) - echo "ISO8859-1 ISO-8859-1" - echo "ISO8859-2 ISO-8859-2" - echo "ISO8859-5 ISO-8859-5" - echo "ISO8859-7 ISO-8859-7" - echo "ISO8859-9 ISO-8859-9" - echo "eucCN GB2312" - echo "eucJP EUC-JP" - echo "eucKR EUC-KR" - echo "eucTW EUC-TW" - ;; - osf*) - echo "ISO8859-1 ISO-8859-1" - echo "ISO8859-2 ISO-8859-2" - echo "ISO8859-4 ISO-8859-4" - echo "ISO8859-5 ISO-8859-5" - echo "ISO8859-7 ISO-8859-7" - echo "ISO8859-8 ISO-8859-8" - echo "ISO8859-9 ISO-8859-9" - echo "ISO8859-15 ISO-8859-15" - echo "cp850 CP850" - echo "big5 BIG5" - echo "dechanyu DEC-HANYU" - echo "dechanzi GB2312" - echo "deckanji DEC-KANJI" - echo "deckorean EUC-KR" - echo "eucJP EUC-JP" - echo "eucKR EUC-KR" - echo "eucTW EUC-TW" - echo "GBK GBK" - echo "KSC5601 CP949" - echo "sdeckanji EUC-JP" - echo "SJIS SHIFT_JIS" - echo "TACTIS TIS-620" - echo "UTF-8 UTF-8" - ;; - solaris*) - echo "646 ASCII" - echo "ISO8859-1 ISO-8859-1" - echo "ISO8859-2 ISO-8859-2" - echo "ISO8859-3 ISO-8859-3" - echo "ISO8859-4 ISO-8859-4" - echo "ISO8859-5 ISO-8859-5" - echo "ISO8859-6 ISO-8859-6" - echo "ISO8859-7 ISO-8859-7" - echo "ISO8859-8 ISO-8859-8" - echo "ISO8859-9 ISO-8859-9" - echo "ISO8859-15 ISO-8859-15" - echo "koi8-r KOI8-R" - echo "ansi-1251 CP1251" - echo "BIG5 BIG5" - echo "Big5-HKSCS BIG5-HKSCS" - echo "gb2312 GB2312" - echo "GBK GBK" - echo "GB18030 GB18030" - echo "cns11643 EUC-TW" - echo "5601 EUC-KR" - echo "ko_KR.johap92 JOHAB" - echo "eucJP EUC-JP" - echo "PCK SHIFT_JIS" - echo "TIS620.2533 TIS-620" - #echo "sun_eu_greek ?" # what is this? - echo "UTF-8 UTF-8" - ;; - freebsd*) - # FreeBSD 4.2 doesn't have nl_langinfo(CODESET); therefore - # localcharset.c falls back to using the full locale name - # from the environment variables. - echo "C ASCII" - echo "US-ASCII ASCII" - for l in la_LN lt_LN; do - echo "$l.ASCII ASCII" - done - for l in da_DK de_AT de_CH de_DE en_AU en_CA en_GB en_US es_ES \ - fi_FI fr_BE fr_CA fr_CH fr_FR is_IS it_CH it_IT la_LN \ - lt_LN nl_BE nl_NL no_NO pt_PT sv_SE; do - echo "$l.ISO_8859-1 ISO-8859-1" - echo "$l.DIS_8859-15 ISO-8859-15" - done - for l in cs_CZ hr_HR hu_HU la_LN lt_LN pl_PL sl_SI; do - echo "$l.ISO_8859-2 ISO-8859-2" - done - for l in la_LN lt_LT; do - echo "$l.ISO_8859-4 ISO-8859-4" - done - for l in ru_RU ru_SU; do - echo "$l.KOI8-R KOI8-R" - echo "$l.ISO_8859-5 ISO-8859-5" - echo "$l.CP866 CP866" - done - echo "uk_UA.KOI8-U KOI8-U" - echo "zh_TW.BIG5 BIG5" - echo "zh_TW.Big5 BIG5" - echo "zh_CN.EUC GB2312" - echo "ja_JP.EUC EUC-JP" - echo "ja_JP.SJIS SHIFT_JIS" - echo "ja_JP.Shift_JIS SHIFT_JIS" - echo "ko_KR.EUC EUC-KR" - ;; - netbsd*) - echo "646 ASCII" - echo "ISO8859-1 ISO-8859-1" - echo "ISO8859-2 ISO-8859-2" - echo "ISO8859-4 ISO-8859-4" - echo "ISO8859-5 ISO-8859-5" - echo "ISO8859-7 ISO-8859-7" - echo "ISO8859-13 ISO-8859-13" - echo "ISO8859-15 ISO-8859-15" - echo "eucCN GB2312" - echo "eucJP EUC-JP" - echo "eucKR EUC-KR" - echo "eucTW EUC-TW" - echo "BIG5 BIG5" - echo "SJIS SHIFT_JIS" - ;; - openbsd*) - echo "646 ASCII" - echo "ISO8859-1 ISO-8859-1" - echo "ISO8859-2 ISO-8859-2" - echo "ISO8859-4 ISO-8859-4" - echo "ISO8859-5 ISO-8859-5" - echo "ISO8859-7 ISO-8859-7" - echo "ISO8859-13 ISO-8859-13" - echo "ISO8859-15 ISO-8859-15" - ;; - darwin[56]*) - # Darwin 6.8 doesn't have nl_langinfo(CODESET); therefore - # localcharset.c falls back to using the full locale name - # from the environment variables. - echo "C ASCII" - for l in en_AU en_CA en_GB en_US la_LN; do - echo "$l.US-ASCII ASCII" - done - for l in da_DK de_AT de_CH de_DE en_AU en_CA en_GB en_US es_ES \ - fi_FI fr_BE fr_CA fr_CH fr_FR is_IS it_CH it_IT nl_BE \ - nl_NL no_NO pt_PT sv_SE; do - echo "$l ISO-8859-1" - echo "$l.ISO8859-1 ISO-8859-1" - echo "$l.ISO8859-15 ISO-8859-15" - done - for l in la_LN; do - echo "$l.ISO8859-1 ISO-8859-1" - echo "$l.ISO8859-15 ISO-8859-15" - done - for l in cs_CZ hr_HR hu_HU la_LN pl_PL sl_SI; do - echo "$l.ISO8859-2 ISO-8859-2" - done - for l in la_LN lt_LT; do - echo "$l.ISO8859-4 ISO-8859-4" - done - for l in ru_RU; do - echo "$l.KOI8-R KOI8-R" - echo "$l.ISO8859-5 ISO-8859-5" - echo "$l.CP866 CP866" - done - for l in bg_BG; do - echo "$l.CP1251 CP1251" - done - echo "uk_UA.KOI8-U KOI8-U" - echo "zh_TW.BIG5 BIG5" - echo "zh_TW.Big5 BIG5" - echo "zh_CN.EUC GB2312" - echo "ja_JP.EUC EUC-JP" - echo "ja_JP.SJIS SHIFT_JIS" - echo "ko_KR.EUC EUC-KR" - ;; - darwin*) - # Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is - # useless: - # - It returns the empty string when LANG is set to a locale of the - # form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8 - # LC_CTYPE file. - # - The environment variables LANG, LC_CTYPE, LC_ALL are not set by - # the system; nl_langinfo(CODESET) returns "US-ASCII" in this case. - # - The documentation says: - # "... all code that calls BSD system routines should ensure - # that the const *char parameters of these routines are in UTF-8 - # encoding. All BSD system functions expect their string - # parameters to be in UTF-8 encoding and nothing else." - # It also says - # "An additional caveat is that string parameters for files, - # paths, and other file-system entities must be in canonical - # UTF-8. In a canonical UTF-8 Unicode string, all decomposable - # characters are decomposed ..." - # but this is not true: You can pass non-decomposed UTF-8 strings - # to file system functions, and it is the OS which will convert - # them to decomposed UTF-8 before accessing the file system. - # - The Apple Terminal application displays UTF-8 by default. - # - However, other applications are free to use different encodings: - # - xterm uses ISO-8859-1 by default. - # - TextEdit uses MacRoman by default. - # We prefer UTF-8 over decomposed UTF-8-MAC because one should - # minimize the use of decomposed Unicode. Unfortunately, through the - # Darwin file system, decomposed UTF-8 strings are leaked into user - # space nevertheless. - # Then there are also the locales with encodings other than US-ASCII - # and UTF-8. These locales can be occasionally useful to users (e.g. - # when grepping through ISO-8859-1 encoded text files), when all their - # file names are in US-ASCII. - echo "ISO8859-1 ISO-8859-1" - echo "ISO8859-2 ISO-8859-2" - echo "ISO8859-4 ISO-8859-4" - echo "ISO8859-5 ISO-8859-5" - echo "ISO8859-7 ISO-8859-7" - echo "ISO8859-9 ISO-8859-9" - echo "ISO8859-13 ISO-8859-13" - echo "ISO8859-15 ISO-8859-15" - echo "KOI8-R KOI8-R" - echo "KOI8-U KOI8-U" - echo "CP866 CP866" - echo "CP949 CP949" - echo "CP1131 CP1131" - echo "CP1251 CP1251" - echo "eucCN GB2312" - echo "GB2312 GB2312" - echo "eucJP EUC-JP" - echo "eucKR EUC-KR" - echo "Big5 BIG5" - echo "Big5HKSCS BIG5-HKSCS" - echo "GBK GBK" - echo "GB18030 GB18030" - echo "SJIS SHIFT_JIS" - echo "ARMSCII-8 ARMSCII-8" - echo "PT154 PT154" - #echo "ISCII-DEV ?" - echo "* UTF-8" - ;; - beos* | haiku*) - # BeOS and Haiku have a single locale, and it has UTF-8 encoding. - echo "* UTF-8" - ;; - msdosdjgpp*) - # DJGPP 2.03 doesn't have nl_langinfo(CODESET); therefore - # localcharset.c falls back to using the full locale name - # from the environment variables. - echo "#" - echo "# The encodings given here may not all be correct." - echo "# If you find that the encoding given for your language and" - echo "# country is not the one your DOS machine actually uses, just" - echo "# correct it in this file, and send a mail to" - echo "# Juan Manuel Guerrero " - echo "# and Bruno Haible ." - echo "#" - echo "C ASCII" - # ISO-8859-1 languages - echo "ca CP850" - echo "ca_ES CP850" - echo "da CP865" # not CP850 ?? - echo "da_DK CP865" # not CP850 ?? - echo "de CP850" - echo "de_AT CP850" - echo "de_CH CP850" - echo "de_DE CP850" - echo "en CP850" - echo "en_AU CP850" # not CP437 ?? - echo "en_CA CP850" - echo "en_GB CP850" - echo "en_NZ CP437" - echo "en_US CP437" - echo "en_ZA CP850" # not CP437 ?? - echo "es CP850" - echo "es_AR CP850" - echo "es_BO CP850" - echo "es_CL CP850" - echo "es_CO CP850" - echo "es_CR CP850" - echo "es_CU CP850" - echo "es_DO CP850" - echo "es_EC CP850" - echo "es_ES CP850" - echo "es_GT CP850" - echo "es_HN CP850" - echo "es_MX CP850" - echo "es_NI CP850" - echo "es_PA CP850" - echo "es_PY CP850" - echo "es_PE CP850" - echo "es_SV CP850" - echo "es_UY CP850" - echo "es_VE CP850" - echo "et CP850" - echo "et_EE CP850" - echo "eu CP850" - echo "eu_ES CP850" - echo "fi CP850" - echo "fi_FI CP850" - echo "fr CP850" - echo "fr_BE CP850" - echo "fr_CA CP850" - echo "fr_CH CP850" - echo "fr_FR CP850" - echo "ga CP850" - echo "ga_IE CP850" - echo "gd CP850" - echo "gd_GB CP850" - echo "gl CP850" - echo "gl_ES CP850" - echo "id CP850" # not CP437 ?? - echo "id_ID CP850" # not CP437 ?? - echo "is CP861" # not CP850 ?? - echo "is_IS CP861" # not CP850 ?? - echo "it CP850" - echo "it_CH CP850" - echo "it_IT CP850" - echo "lt CP775" - echo "lt_LT CP775" - echo "lv CP775" - echo "lv_LV CP775" - echo "nb CP865" # not CP850 ?? - echo "nb_NO CP865" # not CP850 ?? - echo "nl CP850" - echo "nl_BE CP850" - echo "nl_NL CP850" - echo "nn CP865" # not CP850 ?? - echo "nn_NO CP865" # not CP850 ?? - echo "no CP865" # not CP850 ?? - echo "no_NO CP865" # not CP850 ?? - echo "pt CP850" - echo "pt_BR CP850" - echo "pt_PT CP850" - echo "sv CP850" - echo "sv_SE CP850" - # ISO-8859-2 languages - echo "cs CP852" - echo "cs_CZ CP852" - echo "hr CP852" - echo "hr_HR CP852" - echo "hu CP852" - echo "hu_HU CP852" - echo "pl CP852" - echo "pl_PL CP852" - echo "ro CP852" - echo "ro_RO CP852" - echo "sk CP852" - echo "sk_SK CP852" - echo "sl CP852" - echo "sl_SI CP852" - echo "sq CP852" - echo "sq_AL CP852" - echo "sr CP852" # CP852 or CP866 or CP855 ?? - echo "sr_CS CP852" # CP852 or CP866 or CP855 ?? - echo "sr_YU CP852" # CP852 or CP866 or CP855 ?? - # ISO-8859-3 languages - echo "mt CP850" - echo "mt_MT CP850" - # ISO-8859-5 languages - echo "be CP866" - echo "be_BE CP866" - echo "bg CP866" # not CP855 ?? - echo "bg_BG CP866" # not CP855 ?? - echo "mk CP866" # not CP855 ?? - echo "mk_MK CP866" # not CP855 ?? - echo "ru CP866" - echo "ru_RU CP866" - echo "uk CP1125" - echo "uk_UA CP1125" - # ISO-8859-6 languages - echo "ar CP864" - echo "ar_AE CP864" - echo "ar_DZ CP864" - echo "ar_EG CP864" - echo "ar_IQ CP864" - echo "ar_IR CP864" - echo "ar_JO CP864" - echo "ar_KW CP864" - echo "ar_MA CP864" - echo "ar_OM CP864" - echo "ar_QA CP864" - echo "ar_SA CP864" - echo "ar_SY CP864" - # ISO-8859-7 languages - echo "el CP869" - echo "el_GR CP869" - # ISO-8859-8 languages - echo "he CP862" - echo "he_IL CP862" - # ISO-8859-9 languages - echo "tr CP857" - echo "tr_TR CP857" - # Japanese - echo "ja CP932" - echo "ja_JP CP932" - # Chinese - echo "zh_CN GBK" - echo "zh_TW CP950" # not CP938 ?? - # Korean - echo "kr CP949" # not CP934 ?? - echo "kr_KR CP949" # not CP934 ?? - # Thai - echo "th CP874" - echo "th_TH CP874" - # Other - echo "eo CP850" - echo "eo_EO CP850" - ;; -esac diff --git a/contrib/grep/lib/creat-safer.c b/contrib/grep/lib/creat-safer.c index 5645d31b21..79db9da65f 100644 --- a/contrib/grep/lib/creat-safer.c +++ b/contrib/grep/lib/creat-safer.c @@ -1,6 +1,6 @@ /* Invoke creat, but avoid some glitches. - Copyright (C) 2005-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering. */ diff --git a/contrib/grep/lib/cycle-check.c b/contrib/grep/lib/cycle-check.c index a198b091f2..fa6a026183 100644 --- a/contrib/grep/lib/cycle-check.c +++ b/contrib/grep/lib/cycle-check.c @@ -1,6 +1,6 @@ /* help detect directory cycles efficiently - Copyright (C) 2003-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2003-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering */ diff --git a/contrib/grep/lib/cycle-check.h b/contrib/grep/lib/cycle-check.h index 9756658dc7..9590f44a84 100644 --- a/contrib/grep/lib/cycle-check.h +++ b/contrib/grep/lib/cycle-check.h @@ -1,6 +1,6 @@ /* help detect directory cycles efficiently - Copyright (C) 2003-2004, 2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2003-2004, 2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering */ diff --git a/contrib/grep/lib/dfa.c b/contrib/grep/lib/dfa.c new file mode 100644 index 0000000000..96ae560b14 --- /dev/null +++ b/contrib/grep/lib/dfa.c @@ -0,0 +1,4359 @@ +/* dfa.c - deterministic extended regexp routines for GNU + Copyright (C) 1988, 1998, 2000, 2002, 2004-2005, 2007-2020 Free Software + Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., + 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA */ + +/* Written June, 1988 by Mike Haertel + Modified July, 1988 by Arthur David Olson to assist BMG speedups */ + +#include + +#include "dfa.h" + +#include "flexmember.h" + +#include +#include +#include +#include +#include +#include +#include + +/* Another name for ptrdiff_t, for sizes of objects and nonnegative + indexes into objects. It is signed to help catch integer overflow. + It has its own name because it is for nonnegative values only. */ +typedef ptrdiff_t idx_t; +static idx_t const IDX_MAX = PTRDIFF_MAX; + +static bool +streq (char const *a, char const *b) +{ + return strcmp (a, b) == 0; +} + +static bool +isasciidigit (char c) +{ + return '0' <= c && c <= '9'; +} + +#include "gettext.h" +#define _(str) gettext (str) + +#include + +#include "intprops.h" +#include "xalloc.h" +#include "localeinfo.h" + +#ifndef FALLTHROUGH +# if __GNUC__ < 7 +# define FALLTHROUGH ((void) 0) +# else +# define FALLTHROUGH __attribute__ ((__fallthrough__)) +# endif +#endif + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +/* HPUX defines these as macros in sys/param.h. */ +#ifdef setbit +# undef setbit +#endif +#ifdef clrbit +# undef clrbit +#endif + +/* First integer value that is greater than any character code. */ +enum { NOTCHAR = 1 << CHAR_BIT }; + +/* Number of bits used in a charclass word. */ +enum { CHARCLASS_WORD_BITS = 64 }; + +/* This represents part of a character class. It must be unsigned and + at least CHARCLASS_WORD_BITS wide. Any excess bits are zero. */ +typedef uint_least64_t charclass_word; + +/* An initializer for a charclass whose 64-bit words are A through D. */ +#define CHARCLASS_INIT(a, b, c, d) {{a, b, c, d}} + +/* The maximum useful value of a charclass_word; all used bits are 1. */ +static charclass_word const CHARCLASS_WORD_MASK + = ((charclass_word) 1 << (CHARCLASS_WORD_BITS - 1) << 1) - 1; + +/* Number of words required to hold a bit for every character. */ +enum +{ + CHARCLASS_WORDS = (NOTCHAR + CHARCLASS_WORD_BITS - 1) / CHARCLASS_WORD_BITS +}; + +/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */ +typedef struct { charclass_word w[CHARCLASS_WORDS]; } charclass; + +/* Convert a possibly-signed character to an unsigned character. This is + a bit safer than casting to unsigned char, since it catches some type + errors that the cast doesn't. */ +static unsigned char +to_uchar (char ch) +{ + return ch; +} + +/* Contexts tell us whether a character is a newline or a word constituent. + Word-constituent characters are those that satisfy iswalnum, plus '_'. + Each character has a single CTX_* value; bitmasks of CTX_* values denote + a particular character class. + + A state also stores a context value, which is a bitmask of CTX_* values. + A state's context represents a set of characters that the state's + predecessors must match. For example, a state whose context does not + include CTX_LETTER will never have transitions where the previous + character is a word constituent. A state whose context is CTX_ANY + might have transitions from any character. */ + +enum + { + CTX_NONE = 1, + CTX_LETTER = 2, + CTX_NEWLINE = 4, + CTX_ANY = 7 + }; + +/* Sometimes characters can only be matched depending on the surrounding + context. Such context decisions depend on what the previous character + was, and the value of the current (lookahead) character. Context + dependent constraints are encoded as 9-bit integers. Each bit that + is set indicates that the constraint succeeds in the corresponding + context. + + bit 6-8 - valid contexts when next character is CTX_NEWLINE + bit 3-5 - valid contexts when next character is CTX_LETTER + bit 0-2 - valid contexts when next character is CTX_NONE + + succeeds_in_context determines whether a given constraint + succeeds in a particular context. Prev is a bitmask of possible + context values for the previous character, curr is the (single-bit) + context value for the lookahead character. */ +static int +newline_constraint (int constraint) +{ + return (constraint >> 6) & 7; +} +static int +letter_constraint (int constraint) +{ + return (constraint >> 3) & 7; +} +static int +other_constraint (int constraint) +{ + return constraint & 7; +} + +static bool +succeeds_in_context (int constraint, int prev, int curr) +{ + return !! (((curr & CTX_NONE ? other_constraint (constraint) : 0) \ + | (curr & CTX_LETTER ? letter_constraint (constraint) : 0) \ + | (curr & CTX_NEWLINE ? newline_constraint (constraint) : 0)) \ + & prev); +} + +/* The following describe what a constraint depends on. */ +static bool +prev_newline_dependent (int constraint) +{ + return ((constraint ^ constraint >> 2) & 0111) != 0; +} +static bool +prev_letter_dependent (int constraint) +{ + return ((constraint ^ constraint >> 1) & 0111) != 0; +} + +/* Tokens that match the empty string subject to some constraint actually + work by applying that constraint to determine what may follow them, + taking into account what has gone before. The following values are + the constraints corresponding to the special tokens previously defined. */ +enum + { + NO_CONSTRAINT = 0777, + BEGLINE_CONSTRAINT = 0444, + ENDLINE_CONSTRAINT = 0700, + BEGWORD_CONSTRAINT = 0050, + ENDWORD_CONSTRAINT = 0202, + LIMWORD_CONSTRAINT = 0252, + NOTLIMWORD_CONSTRAINT = 0525 + }; + +/* The regexp is parsed into an array of tokens in postfix form. Some tokens + are operators and others are terminal symbols. Most (but not all) of these + codes are returned by the lexical analyzer. */ + +typedef ptrdiff_t token; +static token const TOKEN_MAX = PTRDIFF_MAX; + +/* States are indexed by state_num values. These are normally + nonnegative but -1 is used as a special value. */ +typedef ptrdiff_t state_num; + +/* Predefined token values. */ +enum +{ + END = -1, /* END is a terminal symbol that matches the + end of input; any value of END or less in + the parse tree is such a symbol. Accepting + states of the DFA are those that would have + a transition on END. This is -1, not some + more-negative value, to tweak the speed of + comparisons to END. */ + + /* Ordinary character values are terminal symbols that match themselves. */ + + /* CSET must come last in the following list of special tokens. Otherwise, + the list order matters only for performance. Related special tokens + should have nearby values so that code like (t == ANYCHAR || t == MBCSET + || CSET <= t) can be done with a single machine-level comparison. */ + + EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches + the empty string. */ + + QMARK, /* QMARK is an operator of one argument that + matches zero or one occurrences of its + argument. */ + + STAR, /* STAR is an operator of one argument that + matches the Kleene closure (zero or more + occurrences) of its argument. */ + + PLUS, /* PLUS is an operator of one argument that + matches the positive closure (one or more + occurrences) of its argument. */ + + REPMN, /* REPMN is a lexical token corresponding + to the {m,n} construct. REPMN never + appears in the compiled token vector. */ + + CAT, /* CAT is an operator of two arguments that + matches the concatenation of its + arguments. CAT is never returned by the + lexical analyzer. */ + + OR, /* OR is an operator of two arguments that + matches either of its arguments. */ + + LPAREN, /* LPAREN never appears in the parse tree, + it is only a lexeme. */ + + RPAREN, /* RPAREN never appears in the parse tree. */ + + WCHAR, /* Only returned by lex. wctok contains + the wide character representation. */ + + ANYCHAR, /* ANYCHAR is a terminal symbol that matches + a valid multibyte (or single byte) character. + It is used only if MB_CUR_MAX > 1. */ + + BEG, /* BEG is an initial symbol that matches the + beginning of input. */ + + BEGLINE, /* BEGLINE is a terminal symbol that matches + the empty string at the beginning of a + line. */ + + ENDLINE, /* ENDLINE is a terminal symbol that matches + the empty string at the end of a line. */ + + BEGWORD, /* BEGWORD is a terminal symbol that matches + the empty string at the beginning of a + word. */ + + ENDWORD, /* ENDWORD is a terminal symbol that matches + the empty string at the end of a word. */ + + LIMWORD, /* LIMWORD is a terminal symbol that matches + the empty string at the beginning or the + end of a word. */ + + NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that + matches the empty string not at + the beginning or end of a word. */ + + BACKREF, /* BACKREF is generated by \ + or by any other construct that + is not completely handled. If the scanner + detects a transition on backref, it returns + a kind of "semi-success" indicating that + the match will have to be verified with + a backtracking matcher. */ + + MBCSET, /* MBCSET is similar to CSET, but for + multibyte characters. */ + + CSET /* CSET and (and any value greater) is a + terminal symbol that matches any of a + class of characters. */ +}; + + +/* States of the recognizer correspond to sets of positions in the parse + tree, together with the constraints under which they may be matched. + So a position is encoded as an index into the parse tree together with + a constraint. */ +typedef struct +{ + idx_t index; /* Index into the parse array. */ + unsigned int constraint; /* Constraint for matching this position. */ +} position; + +/* Sets of positions are stored as arrays. */ +typedef struct +{ + position *elems; /* Elements of this position set. */ + idx_t nelem; /* Number of elements in this set. */ + idx_t alloc; /* Number of elements allocated in ELEMS. */ +} position_set; + +/* A state of the dfa consists of a set of positions, some flags, + and the token value of the lowest-numbered position of the state that + contains an END token. */ +typedef struct +{ + size_t hash; /* Hash of the positions of this state. */ + position_set elems; /* Positions this state could match. */ + unsigned char context; /* Context from previous state. */ + unsigned short constraint; /* Constraint for this state to accept. */ + token first_end; /* Token value of the first END in elems. */ + position_set mbps; /* Positions which can match multibyte + characters or the follows, e.g., period. + Used only if MB_CUR_MAX > 1. */ + state_num mb_trindex; /* Index of this state in MB_TRANS, or + negative if the state does not have + ANYCHAR. */ +} dfa_state; + +/* Maximum for any transition table count. This should be at least 3, + for the initial state setup. */ +enum { MAX_TRCOUNT = 1024 }; + +/* A bracket operator. + e.g., [a-c], [[:alpha:]], etc. */ +struct mb_char_classes +{ + ptrdiff_t cset; + bool invert; + wchar_t *chars; /* Normal characters. */ + idx_t nchars; + idx_t nchars_alloc; +}; + +struct regex_syntax +{ + /* Syntax bits controlling the behavior of the lexical analyzer. */ + reg_syntax_t syntax_bits; + bool syntax_bits_set; + + /* Flag for case-folding letters into sets. */ + bool case_fold; + + /* True if ^ and $ match only the start and end of data, and do not match + end-of-line within data. */ + bool anchor; + + /* End-of-line byte in data. */ + unsigned char eolbyte; + + /* Cache of char-context values. */ + char sbit[NOTCHAR]; + + /* If never_trail[B], the byte B cannot be a non-initial byte in a + multibyte character. */ + bool never_trail[NOTCHAR]; + + /* Set of characters considered letters. */ + charclass letters; + + /* Set of characters that are newline. */ + charclass newline; +}; + +/* Lexical analyzer. All the dross that deals with the obnoxious + GNU Regex syntax bits is located here. The poor, suffering + reader is referred to the GNU Regex documentation for the + meaning of the @#%!@#%^!@ syntax bits. */ +struct lexer_state +{ + char const *ptr; /* Pointer to next input character. */ + idx_t left; /* Number of characters remaining. */ + token lasttok; /* Previous token returned; initially END. */ + idx_t parens; /* Count of outstanding left parens. */ + int minrep, maxrep; /* Repeat counts for {m,n}. */ + + /* Wide character representation of the current multibyte character, + or WEOF if there was an encoding error. Used only if + MB_CUR_MAX > 1. */ + wint_t wctok; + + /* The most recently analyzed multibyte bracket expression. */ + struct mb_char_classes brack; + + /* We're separated from beginning or (, | only by zero-width characters. */ + bool laststart; +}; + +/* Recursive descent parser for regular expressions. */ + +struct parser_state +{ + token tok; /* Lookahead token. */ + idx_t depth; /* Current depth of a hypothetical stack + holding deferred productions. This is + used to determine the depth that will be + required of the real stack later on in + dfaanalyze. */ +}; + +/* A compiled regular expression. */ +struct dfa +{ + /* Fields filled by the scanner. */ + charclass *charclasses; /* Array of character sets for CSET tokens. */ + idx_t cindex; /* Index for adding new charclasses. */ + idx_t calloc; /* Number of charclasses allocated. */ + ptrdiff_t canychar; /* Index of anychar class, or -1. */ + + /* Scanner state */ + struct lexer_state lex; + + /* Parser state */ + struct parser_state parse; + + /* Fields filled by the parser. */ + token *tokens; /* Postfix parse array. */ + idx_t tindex; /* Index for adding new tokens. */ + idx_t talloc; /* Number of tokens currently allocated. */ + idx_t depth; /* Depth required of an evaluation stack + used for depth-first traversal of the + parse tree. */ + idx_t nleaves; /* Number of leaves on the parse tree. */ + idx_t nregexps; /* Count of parallel regexps being built + with dfaparse. */ + bool fast; /* The DFA is fast. */ + token utf8_anychar_classes[9]; /* To lower ANYCHAR in UTF-8 locales. */ + mbstate_t mbs; /* Multibyte conversion state. */ + + /* The following are valid only if MB_CUR_MAX > 1. */ + + /* The value of multibyte_prop[i] is defined by following rule. + if tokens[i] < NOTCHAR + bit 0 : tokens[i] is the first byte of a character, including + single-byte characters. + bit 1 : tokens[i] is the last byte of a character, including + single-byte characters. + + e.g. + tokens + = 'single_byte_a', 'multi_byte_A', single_byte_b' + = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b' + multibyte_prop + = 3 , 1 , 0 , 2 , 3 + */ + char *multibyte_prop; + + /* Fields filled by the superset. */ + struct dfa *superset; /* Hint of the dfa. */ + + /* Fields filled by the state builder. */ + dfa_state *states; /* States of the dfa. */ + state_num sindex; /* Index for adding new states. */ + idx_t salloc; /* Number of states currently allocated. */ + + /* Fields filled by the parse tree->NFA conversion. */ + position_set *follows; /* Array of follow sets, indexed by position + index. The follow of a position is the set + of positions containing characters that + could conceivably follow a character + matching the given position in a string + matching the regexp. Allocated to the + maximum possible position index. */ + bool searchflag; /* We are supposed to build a searching + as opposed to an exact matcher. A searching + matcher finds the first and shortest string + matching a regexp anywhere in the buffer, + whereas an exact matcher finds the longest + string matching, but anchored to the + beginning of the buffer. */ + + /* Fields filled by dfaanalyze. */ + int *constraints; /* Array of union of accepting constraints + in the follow of a position. */ + int *separates; /* Array of contexts on follow of a + position. */ + + /* Fields filled by dfaexec. */ + state_num tralloc; /* Number of transition tables that have + slots so far, not counting trans[-1] and + trans[-2]. */ + int trcount; /* Number of transition tables that have + been built, other than for initial + states. */ + int min_trcount; /* Number of initial states. Equivalently, + the minimum state number for which trcount + counts transitions. */ + state_num **trans; /* Transition tables for states that can + never accept. If the transitions for a + state have not yet been computed, or the + state could possibly accept, its entry in + this table is NULL. This points to two + past the start of the allocated array, + and trans[-1] and trans[-2] are always + NULL. */ + state_num **fails; /* Transition tables after failing to accept + on a state that potentially could do so. + If trans[i] is non-null, fails[i] must + be null. */ + char *success; /* Table of acceptance conditions used in + dfaexec and computed in build_state. */ + state_num *newlines; /* Transitions on newlines. The entry for a + newline in any transition table is always + -1 so we can count lines without wasting + too many cycles. The transition for a + newline is stored separately and handled + as a special case. Newline is also used + as a sentinel at the end of the buffer. */ + state_num initstate_notbol; /* Initial state for CTX_LETTER and CTX_NONE + context in multibyte locales, in which we + do not distinguish between their contexts, + as not supported word. */ + position_set mb_follows; /* Follow set added by ANYCHAR on demand. */ + state_num **mb_trans; /* Transition tables for states with + ANYCHAR. */ + state_num mb_trcount; /* Number of transition tables for states with + ANYCHAR that have actually been built. */ + + /* Syntax configuration. This is near the end so that dfacopysyntax + can memset up to here. */ + struct regex_syntax syntax; + + /* Information derived from the locale. This is at the end so that + a quick memset need not clear it specially. */ + + /* dfaexec implementation. */ + char *(*dfaexec) (struct dfa *, char const *, char *, + bool, ptrdiff_t *, bool *); + + /* Other cached information derived from the locale. */ + struct localeinfo localeinfo; +}; + +/* User access to dfa internals. */ + +/* S could possibly be an accepting state of R. */ +static bool +accepting (state_num s, struct dfa const *r) +{ + return r->states[s].constraint != 0; +} + +/* STATE accepts in the specified context. */ +static bool +accepts_in_context (int prev, int curr, state_num state, struct dfa const *dfa) +{ + return succeeds_in_context (dfa->states[state].constraint, prev, curr); +} + +static void regexp (struct dfa *dfa); + +/* Store into *PWC the result of converting the leading bytes of the + multibyte buffer S of length N bytes, using D->localeinfo.sbctowc + and updating the conversion state in *D. On conversion error, + convert just a single byte, to WEOF. Return the number of bytes + converted. + + This differs from mbrtowc (PWC, S, N, &D->mbs) as follows: + + * PWC points to wint_t, not to wchar_t. + * The last arg is a dfa *D instead of merely a multibyte conversion + state D->mbs. + * N must be at least 1. + * S[N - 1] must be a sentinel byte. + * Shift encodings are not supported. + * The return value is always in the range 1..N. + * D->mbs is always valid afterwards. + * *PWC is always set to something. */ +static int +mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d) +{ + unsigned char uc = s[0]; + wint_t wc = d->localeinfo.sbctowc[uc]; + + if (wc == WEOF) + { + wchar_t wch; + size_t nbytes = mbrtowc (&wch, s, n, &d->mbs); + if (0 < nbytes && nbytes < (size_t) -2) + { + *pwc = wch; + return nbytes; + } + memset (&d->mbs, 0, sizeof d->mbs); + } + + *pwc = wc; + return 1; +} + +#ifdef DEBUG + +static void +prtok (token t) +{ + if (t <= END) + fprintf (stderr, "END"); + else if (0 <= t && t < NOTCHAR) + { + unsigned int ch = t; + fprintf (stderr, "0x%02x", ch); + } + else + { + char const *s; + switch (t) + { + case BEG: + s = "BEG"; + break; + case EMPTY: + s = "EMPTY"; + break; + case BACKREF: + s = "BACKREF"; + break; + case BEGLINE: + s = "BEGLINE"; + break; + case ENDLINE: + s = "ENDLINE"; + break; + case BEGWORD: + s = "BEGWORD"; + break; + case ENDWORD: + s = "ENDWORD"; + break; + case LIMWORD: + s = "LIMWORD"; + break; + case NOTLIMWORD: + s = "NOTLIMWORD"; + break; + case QMARK: + s = "QMARK"; + break; + case STAR: + s = "STAR"; + break; + case PLUS: + s = "PLUS"; + break; + case CAT: + s = "CAT"; + break; + case OR: + s = "OR"; + break; + case LPAREN: + s = "LPAREN"; + break; + case RPAREN: + s = "RPAREN"; + break; + case ANYCHAR: + s = "ANYCHAR"; + break; + case MBCSET: + s = "MBCSET"; + break; + default: + s = "CSET"; + break; + } + fprintf (stderr, "%s", s); + } +} +#endif /* DEBUG */ + +/* Stuff pertaining to charclasses. */ + +static bool +tstbit (unsigned int b, charclass const *c) +{ + return c->w[b / CHARCLASS_WORD_BITS] >> b % CHARCLASS_WORD_BITS & 1; +} + +static void +setbit (unsigned int b, charclass *c) +{ + charclass_word one = 1; + c->w[b / CHARCLASS_WORD_BITS] |= one << b % CHARCLASS_WORD_BITS; +} + +static void +clrbit (unsigned int b, charclass *c) +{ + charclass_word one = 1; + c->w[b / CHARCLASS_WORD_BITS] &= ~(one << b % CHARCLASS_WORD_BITS); +} + +static void +zeroset (charclass *s) +{ + memset (s, 0, sizeof *s); +} + +static void +fillset (charclass *s) +{ + for (int i = 0; i < CHARCLASS_WORDS; i++) + s->w[i] = CHARCLASS_WORD_MASK; +} + +static void +notset (charclass *s) +{ + for (int i = 0; i < CHARCLASS_WORDS; ++i) + s->w[i] = CHARCLASS_WORD_MASK & ~s->w[i]; +} + +static bool +equal (charclass const *s1, charclass const *s2) +{ + charclass_word w = 0; + for (int i = 0; i < CHARCLASS_WORDS; i++) + w |= s1->w[i] ^ s2->w[i]; + return w == 0; +} + +static bool +emptyset (charclass const *s) +{ + charclass_word w = 0; + for (int i = 0; i < CHARCLASS_WORDS; i++) + w |= s->w[i]; + return w == 0; +} + +/* Grow PA, which points to an array of *NITEMS items, and return the + location of the reallocated array, updating *NITEMS to reflect its + new size. The new array will contain at least NITEMS_INCR_MIN more + items, but will not contain more than NITEMS_MAX items total. + ITEM_SIZE is the size of each item, in bytes. + + ITEM_SIZE and NITEMS_INCR_MIN must be positive. *NITEMS must be + nonnegative. If NITEMS_MAX is -1, it is treated as if it were + infinity. + + If PA is null, then allocate a new array instead of reallocating + the old one. + + Thus, to grow an array A without saving its old contents, do + { free (A); A = xpalloc (NULL, &AITEMS, ...); }. */ + +static void * +xpalloc (void *pa, idx_t *nitems, idx_t nitems_incr_min, + ptrdiff_t nitems_max, idx_t item_size) +{ + idx_t n0 = *nitems; + + /* The approximate size to use for initial small allocation + requests. This is the largest "small" request for the GNU C + library malloc. */ + enum { DEFAULT_MXFAST = 64 * sizeof (size_t) / 4 }; + + /* If the array is tiny, grow it to about (but no greater than) + DEFAULT_MXFAST bytes. Otherwise, grow it by about 50%. + Adjust the growth according to three constraints: NITEMS_INCR_MIN, + NITEMS_MAX, and what the C language can represent safely. */ + + idx_t n, nbytes; + if (INT_ADD_WRAPV (n0, n0 >> 1, &n)) + n = IDX_MAX; + if (0 <= nitems_max && nitems_max < n) + n = nitems_max; + + idx_t adjusted_nbytes + = ((INT_MULTIPLY_WRAPV (n, item_size, &nbytes) || SIZE_MAX < nbytes) + ? MIN (IDX_MAX, SIZE_MAX) + : nbytes < DEFAULT_MXFAST ? DEFAULT_MXFAST : 0); + if (adjusted_nbytes) + { + n = adjusted_nbytes / item_size; + nbytes = adjusted_nbytes - adjusted_nbytes % item_size; + } + + if (! pa) + *nitems = 0; + if (n - n0 < nitems_incr_min + && (INT_ADD_WRAPV (n0, nitems_incr_min, &n) + || (0 <= nitems_max && nitems_max < n) + || INT_MULTIPLY_WRAPV (n, item_size, &nbytes))) + xalloc_die (); + pa = xrealloc (pa, nbytes); + *nitems = n; + return pa; +} + +/* Ensure that the array addressed by PA holds at least I + 1 items. + Either return PA, or reallocate the array and return its new address. + Although PA may be null, the returned value is never null. + + The array holds *NITEMS items, where 0 <= I <= *NITEMS; *NITEMS + is updated on reallocation. If PA is null, *NITEMS must be zero. + Do not allocate more than NITEMS_MAX items total; -1 means no limit. + ITEM_SIZE is the size of one item; it must be positive. + Avoid O(N**2) behavior on arrays growing linearly. */ +static void * +maybe_realloc (void *pa, idx_t i, idx_t *nitems, + ptrdiff_t nitems_max, idx_t item_size) +{ + if (i < *nitems) + return pa; + return xpalloc (pa, nitems, 1, nitems_max, item_size); +} + +/* In DFA D, find the index of charclass S, or allocate a new one. */ +static idx_t +charclass_index (struct dfa *d, charclass const *s) +{ + idx_t i; + + for (i = 0; i < d->cindex; ++i) + if (equal (s, &d->charclasses[i])) + return i; + d->charclasses = maybe_realloc (d->charclasses, d->cindex, &d->calloc, + TOKEN_MAX - CSET, sizeof *d->charclasses); + ++d->cindex; + d->charclasses[i] = *s; + return i; +} + +static bool +unibyte_word_constituent (struct dfa const *dfa, unsigned char c) +{ + return dfa->localeinfo.sbctowc[c] != WEOF && (isalnum (c) || (c) == '_'); +} + +static int +char_context (struct dfa const *dfa, unsigned char c) +{ + if (c == dfa->syntax.eolbyte && !dfa->syntax.anchor) + return CTX_NEWLINE; + if (unibyte_word_constituent (dfa, c)) + return CTX_LETTER; + return CTX_NONE; +} + +/* Set a bit in the charclass for the given wchar_t. Do nothing if WC + is represented by a multi-byte sequence. Even for MB_CUR_MAX == 1, + this may happen when folding case in weird Turkish locales where + dotless i/dotted I are not included in the chosen character set. + Return whether a bit was set in the charclass. */ +static bool +setbit_wc (wint_t wc, charclass *c) +{ + int b = wctob (wc); + if (b < 0) + return false; + + setbit (b, c); + return true; +} + +/* Set a bit for B and its case variants in the charclass C. + MB_CUR_MAX must be 1. */ +static void +setbit_case_fold_c (int b, charclass *c) +{ + int ub = toupper (b); + for (int i = 0; i < NOTCHAR; i++) + if (toupper (i) == ub) + setbit (i, c); +} + +/* Fetch the next lexical input character from the pattern. There + must at least one byte of pattern input. Set DFA->lex.wctok to the + value of the character or to WEOF depending on whether the input is + a valid multibyte character (possibly of length 1). Then return + the next input byte value, except return EOF if the input is a + multibyte character of length greater than 1. */ +static int +fetch_wc (struct dfa *dfa) +{ + int nbytes = mbs_to_wchar (&dfa->lex.wctok, dfa->lex.ptr, dfa->lex.left, + dfa); + int c = nbytes == 1 ? to_uchar (dfa->lex.ptr[0]) : EOF; + dfa->lex.ptr += nbytes; + dfa->lex.left -= nbytes; + return c; +} + +/* If there is no more input, report an error about unbalanced brackets. + Otherwise, behave as with fetch_wc (DFA). */ +static int +bracket_fetch_wc (struct dfa *dfa) +{ + if (! dfa->lex.left) + dfaerror (_("unbalanced [")); + return fetch_wc (dfa); +} + +typedef int predicate (int); + +/* The following list maps the names of the Posix named character classes + to predicate functions that determine whether a given character is in + the class. The leading [ has already been eaten by the lexical + analyzer. */ +struct dfa_ctype +{ + const char *name; + predicate *func; + bool single_byte_only; +}; + +static const struct dfa_ctype prednames[] = { + {"alpha", isalpha, false}, + {"upper", isupper, false}, + {"lower", islower, false}, + {"digit", isdigit, true}, + {"xdigit", isxdigit, false}, + {"space", isspace, false}, + {"punct", ispunct, false}, + {"alnum", isalnum, false}, + {"print", isprint, false}, + {"graph", isgraph, false}, + {"cntrl", iscntrl, false}, + {"blank", isblank, false}, + {NULL, NULL, false} +}; + +static const struct dfa_ctype *_GL_ATTRIBUTE_PURE +find_pred (const char *str) +{ + for (int i = 0; prednames[i].name; i++) + if (streq (str, prednames[i].name)) + return &prednames[i]; + return NULL; +} + +/* Parse a bracket expression, which possibly includes multibyte + characters. */ +static token +parse_bracket_exp (struct dfa *dfa) +{ + /* This is a bracket expression that dfaexec is known to + process correctly. */ + bool known_bracket_exp = true; + + /* Used to warn about [:space:]. + Bit 0 = first character is a colon. + Bit 1 = last character is a colon. + Bit 2 = includes any other character but a colon. + Bit 3 = includes ranges, char/equiv classes or collation elements. */ + int colon_warning_state; + + dfa->lex.brack.nchars = 0; + charclass ccl; + zeroset (&ccl); + int c = bracket_fetch_wc (dfa); + bool invert = c == '^'; + if (invert) + { + c = bracket_fetch_wc (dfa); + known_bracket_exp = dfa->localeinfo.simple; + } + wint_t wc = dfa->lex.wctok; + int c1; + wint_t wc1; + colon_warning_state = (c == ':'); + do + { + c1 = NOTCHAR; /* Mark c1 as not initialized. */ + colon_warning_state &= ~2; + + /* Note that if we're looking at some other [:...:] construct, + we just treat it as a bunch of ordinary characters. We can do + this because we assume regex has checked for syntax errors before + dfa is ever called. */ + if (c == '[') + { + c1 = bracket_fetch_wc (dfa); + wc1 = dfa->lex.wctok; + + if ((c1 == ':' && (dfa->syntax.syntax_bits & RE_CHAR_CLASSES)) + || c1 == '.' || c1 == '=') + { + enum { MAX_BRACKET_STRING_LEN = 32 }; + char str[MAX_BRACKET_STRING_LEN + 1]; + int len = 0; + for (;;) + { + c = bracket_fetch_wc (dfa); + if (dfa->lex.left == 0 + || (c == c1 && dfa->lex.ptr[0] == ']')) + break; + if (len < MAX_BRACKET_STRING_LEN) + str[len++] = c; + else + /* This is in any case an invalid class name. */ + str[0] = '\0'; + } + str[len] = '\0'; + + /* Fetch bracket. */ + c = bracket_fetch_wc (dfa); + wc = dfa->lex.wctok; + if (c1 == ':') + /* Build character class. POSIX allows character + classes to match multicharacter collating elements, + but the regex code does not support that, so do not + worry about that possibility. */ + { + char const *class + = (dfa->syntax.case_fold && (streq (str, "upper") + || streq (str, "lower")) + ? "alpha" : str); + const struct dfa_ctype *pred = find_pred (class); + if (!pred) + dfaerror (_("invalid character class")); + + if (dfa->localeinfo.multibyte && !pred->single_byte_only) + known_bracket_exp = false; + else + for (int c2 = 0; c2 < NOTCHAR; ++c2) + if (pred->func (c2)) + setbit (c2, &ccl); + } + else + known_bracket_exp = false; + + colon_warning_state |= 8; + + /* Fetch new lookahead character. */ + c1 = bracket_fetch_wc (dfa); + wc1 = dfa->lex.wctok; + continue; + } + + /* We treat '[' as a normal character here. c/c1/wc/wc1 + are already set up. */ + } + + if (c == '\\' + && (dfa->syntax.syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) + { + c = bracket_fetch_wc (dfa); + wc = dfa->lex.wctok; + } + + if (c1 == NOTCHAR) + { + c1 = bracket_fetch_wc (dfa); + wc1 = dfa->lex.wctok; + } + + if (c1 == '-') + /* build range characters. */ + { + int c2 = bracket_fetch_wc (dfa); + wint_t wc2 = dfa->lex.wctok; + + /* A bracket expression like [a-[.aa.]] matches an unknown set. + Treat it like [-a[.aa.]] while parsing it, and + remember that the set is unknown. */ + if (c2 == '[' && dfa->lex.ptr[0] == '.') + { + known_bracket_exp = false; + c2 = ']'; + } + + if (c2 == ']') + { + /* In the case [x-], the - is an ordinary hyphen, + which is left in c1, the lookahead character. */ + dfa->lex.ptr--; + dfa->lex.left++; + } + else + { + if (c2 == '\\' && (dfa->syntax.syntax_bits + & RE_BACKSLASH_ESCAPE_IN_LISTS)) + { + c2 = bracket_fetch_wc (dfa); + wc2 = dfa->lex.wctok; + } + + colon_warning_state |= 8; + c1 = bracket_fetch_wc (dfa); + wc1 = dfa->lex.wctok; + + /* Treat [x-y] as a range if x != y. */ + if (wc != wc2 || wc == WEOF) + { + if (dfa->localeinfo.simple + || (isasciidigit (c) & isasciidigit (c2))) + { + for (int ci = c; ci <= c2; ci++) + if (dfa->syntax.case_fold && isalpha (ci)) + setbit_case_fold_c (ci, &ccl); + else + setbit (ci, &ccl); + } + else + known_bracket_exp = false; + + continue; + } + } + } + + colon_warning_state |= (c == ':') ? 2 : 4; + + if (!dfa->localeinfo.multibyte) + { + if (dfa->syntax.case_fold && isalpha (c)) + setbit_case_fold_c (c, &ccl); + else + setbit (c, &ccl); + continue; + } + + if (wc == WEOF) + known_bracket_exp = false; + else + { + wchar_t folded[CASE_FOLDED_BUFSIZE + 1]; + int n = (dfa->syntax.case_fold + ? case_folded_counterparts (wc, folded + 1) + 1 + : 1); + folded[0] = wc; + for (int i = 0; i < n; i++) + if (!setbit_wc (folded[i], &ccl)) + { + dfa->lex.brack.chars + = maybe_realloc (dfa->lex.brack.chars, dfa->lex.brack.nchars, + &dfa->lex.brack.nchars_alloc, -1, + sizeof *dfa->lex.brack.chars); + dfa->lex.brack.chars[dfa->lex.brack.nchars++] = folded[i]; + } + } + } + while ((wc = wc1, (c = c1) != ']')); + + if (colon_warning_state == 7) + dfawarn (_("character class syntax is [[:space:]], not [:space:]")); + + if (! known_bracket_exp) + return BACKREF; + + if (dfa->localeinfo.multibyte && (invert || dfa->lex.brack.nchars != 0)) + { + dfa->lex.brack.invert = invert; + dfa->lex.brack.cset = emptyset (&ccl) ? -1 : charclass_index (dfa, &ccl); + return MBCSET; + } + + if (invert) + { + notset (&ccl); + if (dfa->syntax.syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) + clrbit ('\n', &ccl); + } + + return CSET + charclass_index (dfa, &ccl); +} + +struct lexptr +{ + char const *ptr; + idx_t left; +}; + +static void +push_lex_state (struct dfa *dfa, struct lexptr *ls, char const *s) +{ + ls->ptr = dfa->lex.ptr; + ls->left = dfa->lex.left; + dfa->lex.ptr = s; + dfa->lex.left = strlen (s); +} + +static void +pop_lex_state (struct dfa *dfa, struct lexptr const *ls) +{ + dfa->lex.ptr = ls->ptr; + dfa->lex.left = ls->left; +} + +static token +lex (struct dfa *dfa) +{ + bool backslash = false; + + /* Basic plan: We fetch a character. If it's a backslash, + we set the backslash flag and go through the loop again. + On the plus side, this avoids having a duplicate of the + main switch inside the backslash case. On the minus side, + it means that just about every case begins with + "if (backslash) ...". */ + for (int i = 0; i < 2; ++i) + { + if (! dfa->lex.left) + return dfa->lex.lasttok = END; + int c = fetch_wc (dfa); + + switch (c) + { + case '\\': + if (backslash) + goto normal_char; + if (dfa->lex.left == 0) + dfaerror (_("unfinished \\ escape")); + backslash = true; + break; + + case '^': + if (backslash) + goto normal_char; + if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS + || dfa->lex.lasttok == END || dfa->lex.lasttok == LPAREN + || dfa->lex.lasttok == OR) + return dfa->lex.lasttok = BEGLINE; + goto normal_char; + + case '$': + if (backslash) + goto normal_char; + if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS + || dfa->lex.left == 0 + || ((dfa->lex.left + > !(dfa->syntax.syntax_bits & RE_NO_BK_PARENS)) + && (dfa->lex.ptr[!(dfa->syntax.syntax_bits & RE_NO_BK_PARENS) + & (dfa->lex.ptr[0] == '\\')] + == ')')) + || ((dfa->lex.left + > !(dfa->syntax.syntax_bits & RE_NO_BK_VBAR)) + && (dfa->lex.ptr[!(dfa->syntax.syntax_bits & RE_NO_BK_VBAR) + & (dfa->lex.ptr[0] == '\\')] + == '|')) + || ((dfa->syntax.syntax_bits & RE_NEWLINE_ALT) + && dfa->lex.left > 0 && dfa->lex.ptr[0] == '\n')) + return dfa->lex.lasttok = ENDLINE; + goto normal_char; + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_BK_REFS)) + { + dfa->lex.laststart = false; + return dfa->lex.lasttok = BACKREF; + } + goto normal_char; + + case '`': + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + { + /* FIXME: should be beginning of string */ + return dfa->lex.lasttok = BEGLINE; + } + goto normal_char; + + case '\'': + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + { + /* FIXME: should be end of string */ + return dfa->lex.lasttok = ENDLINE; + } + goto normal_char; + + case '<': + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + return dfa->lex.lasttok = BEGWORD; + goto normal_char; + + case '>': + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + return dfa->lex.lasttok = ENDWORD; + goto normal_char; + + case 'b': + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + return dfa->lex.lasttok = LIMWORD; + goto normal_char; + + case 'B': + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + return dfa->lex.lasttok = NOTLIMWORD; + goto normal_char; + + case '?': + if (dfa->syntax.syntax_bits & RE_LIMITED_OPS) + goto normal_char; + if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0)) + goto normal_char; + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) + && dfa->lex.laststart) + goto normal_char; + return dfa->lex.lasttok = QMARK; + + case '*': + if (backslash) + goto normal_char; + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) + && dfa->lex.laststart) + goto normal_char; + return dfa->lex.lasttok = STAR; + + case '+': + if (dfa->syntax.syntax_bits & RE_LIMITED_OPS) + goto normal_char; + if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0)) + goto normal_char; + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) + && dfa->lex.laststart) + goto normal_char; + return dfa->lex.lasttok = PLUS; + + case '{': + if (!(dfa->syntax.syntax_bits & RE_INTERVALS)) + goto normal_char; + if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0)) + goto normal_char; + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) + && dfa->lex.laststart) + goto normal_char; + + /* Cases: + {M} - exact count + {M,} - minimum count, maximum is infinity + {,N} - 0 through N + {,} - 0 to infinity (same as '*') + {M,N} - M through N */ + { + char const *p = dfa->lex.ptr; + char const *lim = p + dfa->lex.left; + dfa->lex.minrep = dfa->lex.maxrep = -1; + for (; p != lim && isasciidigit (*p); p++) + dfa->lex.minrep = (dfa->lex.minrep < 0 + ? *p - '0' + : MIN (RE_DUP_MAX + 1, + dfa->lex.minrep * 10 + *p - '0')); + if (p != lim) + { + if (*p != ',') + dfa->lex.maxrep = dfa->lex.minrep; + else + { + if (dfa->lex.minrep < 0) + dfa->lex.minrep = 0; + while (++p != lim && isasciidigit (*p)) + dfa->lex.maxrep + = (dfa->lex.maxrep < 0 + ? *p - '0' + : MIN (RE_DUP_MAX + 1, + dfa->lex.maxrep * 10 + *p - '0')); + } + } + if (! ((! backslash || (p != lim && *p++ == '\\')) + && p != lim && *p++ == '}' + && 0 <= dfa->lex.minrep + && (dfa->lex.maxrep < 0 + || dfa->lex.minrep <= dfa->lex.maxrep))) + { + if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD) + goto normal_char; + dfaerror (_("invalid content of \\{\\}")); + } + if (RE_DUP_MAX < dfa->lex.maxrep) + dfaerror (_("regular expression too big")); + dfa->lex.ptr = p; + dfa->lex.left = lim - p; + } + dfa->lex.laststart = false; + return dfa->lex.lasttok = REPMN; + + case '|': + if (dfa->syntax.syntax_bits & RE_LIMITED_OPS) + goto normal_char; + if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_VBAR) == 0)) + goto normal_char; + dfa->lex.laststart = true; + return dfa->lex.lasttok = OR; + + case '\n': + if (dfa->syntax.syntax_bits & RE_LIMITED_OPS + || backslash || !(dfa->syntax.syntax_bits & RE_NEWLINE_ALT)) + goto normal_char; + dfa->lex.laststart = true; + return dfa->lex.lasttok = OR; + + case '(': + if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0)) + goto normal_char; + dfa->lex.parens++; + dfa->lex.laststart = true; + return dfa->lex.lasttok = LPAREN; + + case ')': + if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0)) + goto normal_char; + if (dfa->lex.parens == 0 + && dfa->syntax.syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_char; + dfa->lex.parens--; + dfa->lex.laststart = false; + return dfa->lex.lasttok = RPAREN; + + case '.': + if (backslash) + goto normal_char; + if (dfa->canychar < 0) + { + charclass ccl; + fillset (&ccl); + if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE)) + clrbit ('\n', &ccl); + if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL) + clrbit ('\0', &ccl); + if (dfa->localeinfo.multibyte) + for (int c2 = 0; c2 < NOTCHAR; c2++) + if (dfa->localeinfo.sbctowc[c2] == WEOF) + clrbit (c2, &ccl); + dfa->canychar = charclass_index (dfa, &ccl); + } + dfa->lex.laststart = false; + return dfa->lex.lasttok = (dfa->localeinfo.multibyte + ? ANYCHAR + : CSET + dfa->canychar); + + case 's': + case 'S': + if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + goto normal_char; + if (!dfa->localeinfo.multibyte) + { + charclass ccl; + zeroset (&ccl); + for (int c2 = 0; c2 < NOTCHAR; ++c2) + if (isspace (c2)) + setbit (c2, &ccl); + if (c == 'S') + notset (&ccl); + dfa->lex.laststart = false; + return dfa->lex.lasttok = CSET + charclass_index (dfa, &ccl); + } + + /* FIXME: see if optimizing this, as is done with ANYCHAR and + add_utf8_anychar, makes sense. */ + + /* \s and \S are documented to be equivalent to [[:space:]] and + [^[:space:]] respectively, so tell the lexer to process those + strings, each minus its "already processed" '['. */ + { + struct lexptr ls; + push_lex_state (dfa, &ls, &"^[:space:]]"[c == 's']); + dfa->lex.lasttok = parse_bracket_exp (dfa); + pop_lex_state (dfa, &ls); + } + + dfa->lex.laststart = false; + return dfa->lex.lasttok; + + case 'w': + case 'W': + if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + goto normal_char; + + if (!dfa->localeinfo.multibyte) + { + charclass ccl; + zeroset (&ccl); + for (int c2 = 0; c2 < NOTCHAR; ++c2) + if (dfa->syntax.sbit[c2] == CTX_LETTER) + setbit (c2, &ccl); + if (c == 'W') + notset (&ccl); + dfa->lex.laststart = false; + return dfa->lex.lasttok = CSET + charclass_index (dfa, &ccl); + } + + /* FIXME: see if optimizing this, as is done with ANYCHAR and + add_utf8_anychar, makes sense. */ + + /* \w and \W are documented to be equivalent to [_[:alnum:]] and + [^_[:alnum:]] respectively, so tell the lexer to process those + strings, each minus its "already processed" '['. */ + { + struct lexptr ls; + push_lex_state (dfa, &ls, &"^_[:alnum:]]"[c == 'w']); + dfa->lex.lasttok = parse_bracket_exp (dfa); + pop_lex_state (dfa, &ls); + } + + dfa->lex.laststart = false; + return dfa->lex.lasttok; + + case '[': + if (backslash) + goto normal_char; + dfa->lex.laststart = false; + return dfa->lex.lasttok = parse_bracket_exp (dfa); + + default: + normal_char: + dfa->lex.laststart = false; + /* For multibyte character sets, folding is done in atom. Always + return WCHAR. */ + if (dfa->localeinfo.multibyte) + return dfa->lex.lasttok = WCHAR; + + if (dfa->syntax.case_fold && isalpha (c)) + { + charclass ccl; + zeroset (&ccl); + setbit_case_fold_c (c, &ccl); + return dfa->lex.lasttok = CSET + charclass_index (dfa, &ccl); + } + + return dfa->lex.lasttok = c; + } + } + + /* The above loop should consume at most a backslash + and some other character. */ + abort (); + return END; /* keeps pedantic compilers happy. */ +} + +static void +addtok_mb (struct dfa *dfa, token t, char mbprop) +{ + if (dfa->talloc == dfa->tindex) + { + dfa->tokens = xpalloc (dfa->tokens, &dfa->talloc, 1, -1, + sizeof *dfa->tokens); + if (dfa->localeinfo.multibyte) + dfa->multibyte_prop = xnrealloc (dfa->multibyte_prop, dfa->talloc, + sizeof *dfa->multibyte_prop); + } + if (dfa->localeinfo.multibyte) + dfa->multibyte_prop[dfa->tindex] = mbprop; + dfa->tokens[dfa->tindex++] = t; + + switch (t) + { + case QMARK: + case STAR: + case PLUS: + break; + + case CAT: + case OR: + dfa->parse.depth--; + break; + + case BACKREF: + dfa->fast = false; + FALLTHROUGH; + default: + dfa->nleaves++; + FALLTHROUGH; + case EMPTY: + dfa->parse.depth++; + break; + } + if (dfa->parse.depth > dfa->depth) + dfa->depth = dfa->parse.depth; +} + +static void addtok_wc (struct dfa *dfa, wint_t wc); + +/* Add the given token to the parse tree, maintaining the depth count and + updating the maximum depth if necessary. */ +static void +addtok (struct dfa *dfa, token t) +{ + if (dfa->localeinfo.multibyte && t == MBCSET) + { + bool need_or = false; + + /* Extract wide characters into alternations for better performance. + This does not require UTF-8. */ + for (idx_t i = 0; i < dfa->lex.brack.nchars; i++) + { + addtok_wc (dfa, dfa->lex.brack.chars[i]); + if (need_or) + addtok (dfa, OR); + need_or = true; + } + dfa->lex.brack.nchars = 0; + + /* Wide characters have been handled above, so it is possible + that the set is empty now. Do nothing in that case. */ + if (dfa->lex.brack.cset != -1) + { + addtok (dfa, CSET + dfa->lex.brack.cset); + if (need_or) + addtok (dfa, OR); + } + } + else + { + addtok_mb (dfa, t, 3); + } +} + +/* We treat a multibyte character as a single atom, so that DFA + can treat a multibyte character as a single expression. + + e.g., we construct the following tree from "". + + */ +static void +addtok_wc (struct dfa *dfa, wint_t wc) +{ + unsigned char buf[MB_LEN_MAX]; + mbstate_t s = { 0 }; + size_t stored_bytes = wcrtomb ((char *) buf, wc, &s); + int buflen; + + if (stored_bytes != (size_t) -1) + buflen = stored_bytes; + else + { + /* This is merely stop-gap. buf[0] is undefined, yet skipping + the addtok_mb call altogether can corrupt the heap. */ + buflen = 1; + buf[0] = 0; + } + + addtok_mb (dfa, buf[0], buflen == 1 ? 3 : 1); + for (int i = 1; i < buflen; i++) + { + addtok_mb (dfa, buf[i], i == buflen - 1 ? 2 : 0); + addtok (dfa, CAT); + } +} + +static void +add_utf8_anychar (struct dfa *dfa) +{ + /* Since the Unicode Standard Version 4.0.0 (2003), a well-formed + UTF-8 byte sequence has been defined as follows: + + ([\x00-\x7f] + |[\xc2-\xdf][\x80-\xbf] + |[\xe0][\xa0-\xbf][\x80-\xbf] + |[\xe1-\xec\xee-\xef][\x80-\xbf][\x80-\xbf] + |[\xed][\x80-\x9f][\x80-\xbf] + |[\xf0][\x90-\xbf][\x80-\xbf][\x80-\xbf]) + |[\xf1-\xf3][\x80-\xbf][\x80-\xbf][\x80-\xbf] + |[\xf4][\x80-\x8f][\x80-\xbf][\x80-\xbf]) + + which I'll write more concisely "A|BC|DEC|FCC|GHC|IJCC|KCCC|LMCC", + where A = [\x00-\x7f], B = [\xc2-\xdf], C = [\x80-\xbf], + D = [\xe0], E = [\xa0-\xbf], F = [\xe1-\xec\xee-\xef], G = [\xed], + H = [\x80-\x9f], I = [\xf0], + J = [\x90-\xbf], K = [\xf1-\xf3], L = [\xf4], M = [\x80-\x8f]. + + This can be refactored to "A|(B|DE|GH|(F|IJ|LM|KC)C)C". */ + + /* Mnemonics for classes containing two or more bytes. */ + enum { A, B, C, E, F, H, J, K, M }; + + /* Mnemonics for single-byte tokens. */ + enum { D_token = 0xe0, G_token = 0xed, I_token = 0xf0, L_token = 0xf4 }; + + static charclass const utf8_classes[] = { + /* A. 00-7f: 1-byte sequence. */ + CHARCLASS_INIT (0xffffffffffffffff, 0xffffffffffffffff, 0, 0), + + /* B. c2-df: 1st byte of a 2-byte sequence. */ + CHARCLASS_INIT (0, 0, 0, 0x00000000fffffffc), + + /* C. 80-bf: non-leading bytes. */ + CHARCLASS_INIT (0, 0, 0xffffffffffffffff, 0), + + /* D. e0 (just a token). */ + + /* E. a0-bf: 2nd byte of a "DEC" sequence. */ + CHARCLASS_INIT (0, 0, 0xffffffff00000000, 0), + + /* F. e1-ec + ee-ef: 1st byte of an "FCC" sequence. */ + CHARCLASS_INIT (0, 0, 0, 0x0000dffe00000000), + + /* G. ed (just a token). */ + + /* H. 80-9f: 2nd byte of a "GHC" sequence. */ + CHARCLASS_INIT (0, 0, 0x000000000000ffff, 0), + + /* I. f0 (just a token). */ + + /* J. 90-bf: 2nd byte of an "IJCC" sequence. */ + CHARCLASS_INIT (0, 0, 0xffffffffffff0000, 0), + + /* K. f1-f3: 1st byte of a "KCCC" sequence. */ + CHARCLASS_INIT (0, 0, 0, 0x000e000000000000), + + /* L. f4 (just a token). */ + + /* M. 80-8f: 2nd byte of a "LMCC" sequence. */ + CHARCLASS_INIT (0, 0, 0x00000000000000ff, 0), + }; + + /* Define the character classes that are needed below. */ + if (dfa->utf8_anychar_classes[0] == 0) + { + charclass c = utf8_classes[0]; + if (! (dfa->syntax.syntax_bits & RE_DOT_NEWLINE)) + clrbit ('\n', &c); + if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL) + clrbit ('\0', &c); + dfa->utf8_anychar_classes[0] = CSET + charclass_index (dfa, &c); + + for (int i = 1; i < sizeof utf8_classes / sizeof *utf8_classes; i++) + dfa->utf8_anychar_classes[i] + = CSET + charclass_index (dfa, &utf8_classes[i]); + } + + /* Implement the "A|(B|DE|GH|(F|IJ|LM|KC)C)C" pattern mentioned above. + The token buffer is in reverse Polish order, so we get + "A B D E CAT OR G H CAT OR F I J CAT OR L M CAT OR K + C CAT OR C CAT OR C CAT OR". */ + addtok (dfa, dfa->utf8_anychar_classes[A]); + addtok (dfa, dfa->utf8_anychar_classes[B]); + addtok (dfa, D_token); + addtok (dfa, dfa->utf8_anychar_classes[E]); + addtok (dfa, CAT); + addtok (dfa, OR); + addtok (dfa, G_token); + addtok (dfa, dfa->utf8_anychar_classes[H]); + addtok (dfa, CAT); + addtok (dfa, OR); + addtok (dfa, dfa->utf8_anychar_classes[F]); + addtok (dfa, I_token); + addtok (dfa, dfa->utf8_anychar_classes[J]); + addtok (dfa, CAT); + addtok (dfa, OR); + addtok (dfa, L_token); + addtok (dfa, dfa->utf8_anychar_classes[M]); + addtok (dfa, CAT); + addtok (dfa, OR); + addtok (dfa, dfa->utf8_anychar_classes[K]); + for (int i = 0; i < 3; i++) + { + addtok (dfa, dfa->utf8_anychar_classes[C]); + addtok (dfa, CAT); + addtok (dfa, OR); + } +} + +/* The grammar understood by the parser is as follows. + + regexp: + regexp OR branch + branch + + branch: + branch closure + closure + + closure: + closure QMARK + closure STAR + closure PLUS + closure REPMN + atom + + atom: + + + ANYCHAR + MBCSET + CSET + BACKREF + BEGLINE + ENDLINE + BEGWORD + ENDWORD + LIMWORD + NOTLIMWORD + LPAREN regexp RPAREN + + + The parser builds a parse tree in postfix form in an array of tokens. */ + +static void +atom (struct dfa *dfa) +{ + if ((0 <= dfa->parse.tok && dfa->parse.tok < NOTCHAR) + || dfa->parse.tok >= CSET + || dfa->parse.tok == BEG || dfa->parse.tok == BACKREF + || dfa->parse.tok == BEGLINE || dfa->parse.tok == ENDLINE + || dfa->parse.tok == BEGWORD || dfa->parse.tok == ENDWORD + || dfa->parse.tok == LIMWORD || dfa->parse.tok == NOTLIMWORD + || dfa->parse.tok == ANYCHAR || dfa->parse.tok == MBCSET) + { + if (dfa->parse.tok == ANYCHAR && dfa->localeinfo.using_utf8) + { + /* For UTF-8 expand the period to a series of CSETs that define a + valid UTF-8 character. This avoids using the slow multibyte + path. I'm pretty sure it would be both profitable and correct to + do it for any encoding; however, the optimization must be done + manually as it is done above in add_utf8_anychar. So, let's + start with UTF-8: it is the most used, and the structure of the + encoding makes the correctness more obvious. */ + add_utf8_anychar (dfa); + } + else + addtok (dfa, dfa->parse.tok); + dfa->parse.tok = lex (dfa); + } + else if (dfa->parse.tok == WCHAR) + { + if (dfa->lex.wctok == WEOF) + addtok (dfa, BACKREF); + else + { + addtok_wc (dfa, dfa->lex.wctok); + + if (dfa->syntax.case_fold) + { + wchar_t folded[CASE_FOLDED_BUFSIZE]; + int n = case_folded_counterparts (dfa->lex.wctok, folded); + for (int i = 0; i < n; i++) + { + addtok_wc (dfa, folded[i]); + addtok (dfa, OR); + } + } + } + + dfa->parse.tok = lex (dfa); + } + else if (dfa->parse.tok == LPAREN) + { + dfa->parse.tok = lex (dfa); + regexp (dfa); + if (dfa->parse.tok != RPAREN) + dfaerror (_("unbalanced (")); + dfa->parse.tok = lex (dfa); + } + else + addtok (dfa, EMPTY); +} + +/* Return the number of tokens in the given subexpression. */ +static idx_t _GL_ATTRIBUTE_PURE +nsubtoks (struct dfa const *dfa, idx_t tindex) +{ + switch (dfa->tokens[tindex - 1]) + { + default: + return 1; + case QMARK: + case STAR: + case PLUS: + return 1 + nsubtoks (dfa, tindex - 1); + case CAT: + case OR: + { + idx_t ntoks1 = nsubtoks (dfa, tindex - 1); + return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1); + } + } +} + +/* Copy the given subexpression to the top of the tree. */ +static void +copytoks (struct dfa *dfa, idx_t tindex, idx_t ntokens) +{ + if (dfa->localeinfo.multibyte) + for (idx_t i = 0; i < ntokens; i++) + addtok_mb (dfa, dfa->tokens[tindex + i], + dfa->multibyte_prop[tindex + i]); + else + for (idx_t i = 0; i < ntokens; i++) + addtok_mb (dfa, dfa->tokens[tindex + i], 3); +} + +static void +closure (struct dfa *dfa) +{ + atom (dfa); + while (dfa->parse.tok == QMARK || dfa->parse.tok == STAR + || dfa->parse.tok == PLUS || dfa->parse.tok == REPMN) + if (dfa->parse.tok == REPMN && (dfa->lex.minrep || dfa->lex.maxrep)) + { + idx_t ntokens = nsubtoks (dfa, dfa->tindex); + idx_t tindex = dfa->tindex - ntokens; + if (dfa->lex.maxrep < 0) + addtok (dfa, PLUS); + if (dfa->lex.minrep == 0) + addtok (dfa, QMARK); + int i; + for (i = 1; i < dfa->lex.minrep; i++) + { + copytoks (dfa, tindex, ntokens); + addtok (dfa, CAT); + } + for (; i < dfa->lex.maxrep; i++) + { + copytoks (dfa, tindex, ntokens); + addtok (dfa, QMARK); + addtok (dfa, CAT); + } + dfa->parse.tok = lex (dfa); + } + else if (dfa->parse.tok == REPMN) + { + dfa->tindex -= nsubtoks (dfa, dfa->tindex); + dfa->parse.tok = lex (dfa); + closure (dfa); + } + else + { + addtok (dfa, dfa->parse.tok); + dfa->parse.tok = lex (dfa); + } +} + +static void +branch (struct dfa* dfa) +{ + closure (dfa); + while (dfa->parse.tok != RPAREN && dfa->parse.tok != OR + && dfa->parse.tok >= 0) + { + closure (dfa); + addtok (dfa, CAT); + } +} + +static void +regexp (struct dfa *dfa) +{ + branch (dfa); + while (dfa->parse.tok == OR) + { + dfa->parse.tok = lex (dfa); + branch (dfa); + addtok (dfa, OR); + } +} + +/* Parse a string S of length LEN into D. S can include NUL characters. + This is the main entry point for the parser. */ +void +dfaparse (char const *s, idx_t len, struct dfa *d) +{ + d->lex.ptr = s; + d->lex.left = len; + d->lex.lasttok = END; + d->lex.laststart = true; + + if (!d->syntax.syntax_bits_set) + dfaerror (_("no syntax specified")); + + if (!d->nregexps) + addtok (d, BEG); + + d->parse.tok = lex (d); + d->parse.depth = d->depth; + + regexp (d); + + if (d->parse.tok != END) + dfaerror (_("unbalanced )")); + + addtok (d, END - d->nregexps); + addtok (d, CAT); + + if (d->nregexps) + addtok (d, OR); + + ++d->nregexps; +} + +/* Some primitives for operating on sets of positions. */ + +/* Copy one set to another. */ +static void +copy (position_set const *src, position_set *dst) +{ + if (dst->alloc < src->nelem) + { + free (dst->elems); + dst->elems = xpalloc (NULL, &dst->alloc, src->nelem - dst->alloc, -1, + sizeof *dst->elems); + } + dst->nelem = src->nelem; + if (src->nelem != 0) + memcpy (dst->elems, src->elems, src->nelem * sizeof *dst->elems); +} + +static void +alloc_position_set (position_set *s, idx_t size) +{ + s->elems = xnmalloc (size, sizeof *s->elems); + s->alloc = size; + s->nelem = 0; +} + +/* Insert position P in set S. S is maintained in sorted order on + decreasing index. If there is already an entry in S with P.index + then merge (logically-OR) P's constraints into the one in S. + S->elems must point to an array large enough to hold the resulting set. */ +static void +insert (position p, position_set *s) +{ + idx_t count = s->nelem; + idx_t lo = 0, hi = count; + while (lo < hi) + { + idx_t mid = (lo + hi) >> 1; + if (s->elems[mid].index < p.index) + lo = mid + 1; + else if (s->elems[mid].index == p.index) + { + s->elems[mid].constraint |= p.constraint; + return; + } + else + hi = mid; + } + + s->elems = maybe_realloc (s->elems, count, &s->alloc, -1, sizeof *s->elems); + for (idx_t i = count; i > lo; i--) + s->elems[i] = s->elems[i - 1]; + s->elems[lo] = p; + ++s->nelem; +} + +static void +append (position p, position_set *s) +{ + idx_t count = s->nelem; + s->elems = maybe_realloc (s->elems, count, &s->alloc, -1, sizeof *s->elems); + s->elems[s->nelem++] = p; +} + +/* Merge S1 and S2 (with the additional constraint C2) into M. The + result is as if the positions of S1, and of S2 with the additional + constraint C2, were inserted into an initially empty set. */ +static void +merge_constrained (position_set const *s1, position_set const *s2, + unsigned int c2, position_set *m) +{ + idx_t i = 0, j = 0; + + if (m->alloc - s1->nelem < s2->nelem) + { + free (m->elems); + m->alloc = s1->nelem; + m->elems = xpalloc (NULL, &m->alloc, s2->nelem, -1, sizeof *m->elems); + } + m->nelem = 0; + while (i < s1->nelem || j < s2->nelem) + if (! (j < s2->nelem) + || (i < s1->nelem && s1->elems[i].index <= s2->elems[j].index)) + { + unsigned int c = ((i < s1->nelem && j < s2->nelem + && s1->elems[i].index == s2->elems[j].index) + ? s2->elems[j++].constraint & c2 + : 0); + m->elems[m->nelem].index = s1->elems[i].index; + m->elems[m->nelem++].constraint = s1->elems[i++].constraint | c; + } + else + { + if (s2->elems[j].constraint & c2) + { + m->elems[m->nelem].index = s2->elems[j].index; + m->elems[m->nelem++].constraint = s2->elems[j].constraint & c2; + } + j++; + } +} + +/* Merge two sets of positions into a third. The result is exactly as if + the positions of both sets were inserted into an initially empty set. */ +static void +merge (position_set const *s1, position_set const *s2, position_set *m) +{ + merge_constrained (s1, s2, -1, m); +} + +static void +merge2 (position_set *dst, position_set const *src, position_set *m) +{ + if (src->nelem < 4) + { + for (idx_t i = 0; i < src->nelem; i++) + insert (src->elems[i], dst); + } + else + { + merge (src, dst, m); + copy (m, dst); + } +} + +/* Delete a position from a set. Return the nonzero constraint of the + deleted position, or zero if there was no such position. */ +static unsigned int +delete (idx_t del, position_set *s) +{ + idx_t count = s->nelem; + idx_t lo = 0, hi = count; + while (lo < hi) + { + idx_t mid = (lo + hi) >> 1; + if (s->elems[mid].index < del) + lo = mid + 1; + else if (s->elems[mid].index == del) + { + unsigned int c = s->elems[mid].constraint; + idx_t i; + for (i = mid; i + 1 < count; i++) + s->elems[i] = s->elems[i + 1]; + s->nelem = i; + return c; + } + else + hi = mid; + } + return 0; +} + +/* Replace a position with the followed set. */ +static void +replace (position_set *dst, idx_t del, position_set *add, + unsigned int constraint, position_set *tmp) +{ + unsigned int c = delete (del, dst) & constraint; + + if (c) + { + copy (dst, tmp); + merge_constrained (tmp, add, c, dst); + } +} + +/* Find the index of the state corresponding to the given position set with + the given preceding context, or create a new state if there is no such + state. Context tells whether we got here on a newline or letter. */ +static state_num +state_index (struct dfa *d, position_set const *s, int context) +{ + size_t hash = 0; + int constraint = 0; + state_num i; + token first_end = 0; + + for (i = 0; i < s->nelem; ++i) + { + size_t ind = s->elems[i].index; + hash ^= ind + s->elems[i].constraint; + } + + /* Try to find a state that exactly matches the proposed one. */ + for (i = 0; i < d->sindex; ++i) + { + if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem + || context != d->states[i].context) + continue; + state_num j; + for (j = 0; j < s->nelem; ++j) + if (s->elems[j].constraint != d->states[i].elems.elems[j].constraint + || s->elems[j].index != d->states[i].elems.elems[j].index) + break; + if (j == s->nelem) + return i; + } + +#ifdef DEBUG + fprintf (stderr, "new state %td\n nextpos:", i); + for (state_num j = 0; j < s->nelem; j++) + { + fprintf (stderr, " %td:", s->elems[j].index); + prtok (d->tokens[s->elems[j].index]); + } + fprintf (stderr, "\n context:"); + if (context ^ CTX_ANY) + { + if (context & CTX_NONE) + fprintf (stderr, " CTX_NONE"); + if (context & CTX_LETTER) + fprintf (stderr, " CTX_LETTER"); + if (context & CTX_NEWLINE) + fprintf (stderr, " CTX_NEWLINE"); + } + else + fprintf (stderr, " CTX_ANY"); + fprintf (stderr, "\n"); +#endif + + for (state_num j = 0; j < s->nelem; j++) + { + int c = d->constraints[s->elems[j].index]; + + if (c != 0) + { + if (succeeds_in_context (c, context, CTX_ANY)) + constraint |= c; + if (!first_end) + first_end = d->tokens[s->elems[j].index]; + } + else if (d->tokens[s->elems[j].index] == BACKREF) + constraint = NO_CONSTRAINT; + } + + + /* Create a new state. */ + d->states = maybe_realloc (d->states, d->sindex, &d->salloc, -1, + sizeof *d->states); + d->states[i].hash = hash; + alloc_position_set (&d->states[i].elems, s->nelem); + copy (s, &d->states[i].elems); + d->states[i].context = context; + d->states[i].constraint = constraint; + d->states[i].first_end = first_end; + d->states[i].mbps.nelem = 0; + d->states[i].mbps.elems = NULL; + d->states[i].mb_trindex = -1; + + ++d->sindex; + + return i; +} + +/* Find the epsilon closure of a set of positions. If any position of the set + contains a symbol that matches the empty string in some context, replace + that position with the elements of its follow labeled with an appropriate + constraint. Repeat exhaustively until no funny positions are left. + S->elems must be large enough to hold the result. */ +static void +epsclosure (struct dfa const *d) +{ + position_set tmp; + alloc_position_set (&tmp, d->nleaves); + for (idx_t i = 0; i < d->tindex; i++) + if (d->follows[i].nelem > 0 && d->tokens[i] >= NOTCHAR + && d->tokens[i] != BACKREF && d->tokens[i] != ANYCHAR + && d->tokens[i] != MBCSET && d->tokens[i] < CSET) + { + unsigned int constraint; + switch (d->tokens[i]) + { + case BEGLINE: + constraint = BEGLINE_CONSTRAINT; + break; + case ENDLINE: + constraint = ENDLINE_CONSTRAINT; + break; + case BEGWORD: + constraint = BEGWORD_CONSTRAINT; + break; + case ENDWORD: + constraint = ENDWORD_CONSTRAINT; + break; + case LIMWORD: + constraint = LIMWORD_CONSTRAINT; + break; + case NOTLIMWORD: + constraint = NOTLIMWORD_CONSTRAINT; + break; + default: + constraint = NO_CONSTRAINT; + break; + } + + delete (i, &d->follows[i]); + + for (idx_t j = 0; j < d->tindex; j++) + if (i != j && d->follows[j].nelem > 0) + replace (&d->follows[j], i, &d->follows[i], constraint, &tmp); + } + free (tmp.elems); +} + +/* Returns the set of contexts for which there is at least one + character included in C. */ + +static int +charclass_context (struct dfa const *dfa, charclass const *c) +{ + int context = 0; + + for (int j = 0; j < CHARCLASS_WORDS; j++) + { + if (c->w[j] & dfa->syntax.newline.w[j]) + context |= CTX_NEWLINE; + if (c->w[j] & dfa->syntax.letters.w[j]) + context |= CTX_LETTER; + if (c->w[j] & ~(dfa->syntax.letters.w[j] | dfa->syntax.newline.w[j])) + context |= CTX_NONE; + } + + return context; +} + +/* Returns the contexts on which the position set S depends. Each context + in the set of returned contexts (let's call it SC) may have a different + follow set than other contexts in SC, and also different from the + follow set of the complement set (sc ^ CTX_ANY). However, all contexts + in the complement set will have the same follow set. */ + +static int _GL_ATTRIBUTE_PURE +state_separate_contexts (struct dfa *d, position_set const *s) +{ + int separate_contexts = 0; + + for (idx_t j = 0; j < s->nelem; j++) + separate_contexts |= d->separates[s->elems[j].index]; + + return separate_contexts; +} + +enum +{ + /* Single token is repeated. It is distinguished from non-repeated. */ + OPT_REPEAT = (1 << 0), + + /* Multiple tokens are repeated. This flag is on at head of tokens. The + node is not merged. */ + OPT_LPAREN = (1 << 1), + + /* Multiple branches are joined. The node is not merged. */ + OPT_RPAREN = (1 << 2), + + /* The node is walked. If the node is found in walking again, OPT_RPAREN + flag is turned on. */ + OPT_WALKED = (1 << 3), + + /* The node is queued. The node is not queued again. */ + OPT_QUEUED = (1 << 4) +}; + +static void +merge_nfa_state (struct dfa *d, idx_t tindex, char *flags, + position_set *merged) +{ + position_set *follows = d->follows; + idx_t nelem = 0; + + d->constraints[tindex] = 0; + + for (idx_t i = 0; i < follows[tindex].nelem; i++) + { + idx_t sindex = follows[tindex].elems[i].index; + + /* Skip the node as pruned in future. */ + unsigned int iconstraint = follows[tindex].elems[i].constraint; + if (iconstraint == 0) + continue; + + if (d->tokens[follows[tindex].elems[i].index] <= END) + { + d->constraints[tindex] |= follows[tindex].elems[i].constraint; + continue; + } + + if (!(flags[sindex] & (OPT_LPAREN | OPT_RPAREN))) + { + idx_t j; + + for (j = 0; j < nelem; j++) + { + idx_t dindex = follows[tindex].elems[j].index; + + if (follows[tindex].elems[j].constraint != iconstraint) + continue; + + if (flags[dindex] & (OPT_LPAREN | OPT_RPAREN)) + continue; + + if (d->tokens[sindex] != d->tokens[dindex]) + continue; + + if ((flags[sindex] ^ flags[dindex]) & OPT_REPEAT) + continue; + + if (flags[sindex] & OPT_REPEAT) + delete (sindex, &follows[sindex]); + + merge2 (&follows[dindex], &follows[sindex], merged); + + break; + } + + if (j < nelem) + continue; + } + + follows[tindex].elems[nelem++] = follows[tindex].elems[i]; + flags[sindex] |= OPT_QUEUED; + } + + follows[tindex].nelem = nelem; +} + +static int +compare (const void *a, const void *b) +{ + position const *p = a, *q = b; + return p->index < q->index ? -1 : p->index > q->index; +} + +static void +reorder_tokens (struct dfa *d) +{ + idx_t nleaves; + ptrdiff_t *map; + token *tokens; + position_set *follows; + int *constraints; + char *multibyte_prop; + + nleaves = 0; + + map = xnmalloc (d->tindex, sizeof *map); + + map[0] = nleaves++; + + for (idx_t i = 1; i < d->tindex; i++) + map[i] = -1; + + tokens = xnmalloc (d->nleaves, sizeof *tokens); + follows = xnmalloc (d->nleaves, sizeof *follows); + constraints = xnmalloc (d->nleaves, sizeof *constraints); + + if (d->localeinfo.multibyte) + multibyte_prop = xnmalloc (d->nleaves, sizeof *multibyte_prop); + else + multibyte_prop = NULL; + + for (idx_t i = 0; i < d->tindex; i++) + { + if (map[i] == -1) + { + free (d->follows[i].elems); + d->follows[i].elems = NULL; + d->follows[i].nelem = 0; + continue; + } + + tokens[map[i]] = d->tokens[i]; + follows[map[i]] = d->follows[i]; + constraints[map[i]] = d->constraints[i]; + + if (multibyte_prop != NULL) + multibyte_prop[map[i]] = d->multibyte_prop[i]; + + for (idx_t j = 0; j < d->follows[i].nelem; j++) + { + if (map[d->follows[i].elems[j].index] == -1) + map[d->follows[i].elems[j].index] = nleaves++; + + d->follows[i].elems[j].index = map[d->follows[i].elems[j].index]; + } + + qsort (d->follows[i].elems, d->follows[i].nelem, + sizeof *d->follows[i].elems, compare); + } + + for (idx_t i = 0; i < nleaves; i++) + { + d->tokens[i] = tokens[i]; + d->follows[i] = follows[i]; + d->constraints[i] = constraints[i]; + + if (multibyte_prop != NULL) + d->multibyte_prop[i] = multibyte_prop[i]; + } + + d->tindex = d->nleaves = nleaves; + + free (tokens); + free (follows); + free (constraints); + free (multibyte_prop); + free (map); +} + +static void +dfaoptimize (struct dfa *d) +{ + char *flags = xzalloc (d->tindex); + + for (idx_t i = 0; i < d->tindex; i++) + { + for (idx_t j = 0; j < d->follows[i].nelem; j++) + { + if (d->follows[i].elems[j].index == i) + flags[d->follows[i].elems[j].index] |= OPT_REPEAT; + else if (d->follows[i].elems[j].index < i) + flags[d->follows[i].elems[j].index] |= OPT_LPAREN; + else if (flags[d->follows[i].elems[j].index] &= OPT_WALKED) + flags[d->follows[i].elems[j].index] |= OPT_RPAREN; + else + flags[d->follows[i].elems[j].index] |= OPT_WALKED; + } + } + + flags[0] |= OPT_QUEUED; + + position_set merged0; + position_set *merged = &merged0; + alloc_position_set (merged, d->nleaves); + + d->constraints = xnmalloc (d->tindex, sizeof *d->constraints); + + for (idx_t i = 0; i < d->tindex; i++) + if (flags[i] & OPT_QUEUED) + merge_nfa_state (d, i, flags, merged); + + reorder_tokens (d); + + free (merged->elems); + free (flags); +} + +/* Perform bottom-up analysis on the parse tree, computing various functions. + Note that at this point, we're pretending constructs like \< are real + characters rather than constraints on what can follow them. + + Nullable: A node is nullable if it is at the root of a regexp that can + match the empty string. + * EMPTY leaves are nullable. + * No other leaf is nullable. + * A QMARK or STAR node is nullable. + * A PLUS node is nullable if its argument is nullable. + * A CAT node is nullable if both its arguments are nullable. + * An OR node is nullable if either argument is nullable. + + Firstpos: The firstpos of a node is the set of positions (nonempty leaves) + that could correspond to the first character of a string matching the + regexp rooted at the given node. + * EMPTY leaves have empty firstpos. + * The firstpos of a nonempty leaf is that leaf itself. + * The firstpos of a QMARK, STAR, or PLUS node is the firstpos of its + argument. + * The firstpos of a CAT node is the firstpos of the left argument, union + the firstpos of the right if the left argument is nullable. + * The firstpos of an OR node is the union of firstpos of each argument. + + Lastpos: The lastpos of a node is the set of positions that could + correspond to the last character of a string matching the regexp at + the given node. + * EMPTY leaves have empty lastpos. + * The lastpos of a nonempty leaf is that leaf itself. + * The lastpos of a QMARK, STAR, or PLUS node is the lastpos of its + argument. + * The lastpos of a CAT node is the lastpos of its right argument, union + the lastpos of the left if the right argument is nullable. + * The lastpos of an OR node is the union of the lastpos of each argument. + + Follow: The follow of a position is the set of positions that could + correspond to the character following a character matching the node in + a string matching the regexp. At this point we consider special symbols + that match the empty string in some context to be just normal characters. + Later, if we find that a special symbol is in a follow set, we will + replace it with the elements of its follow, labeled with an appropriate + constraint. + * Every node in the firstpos of the argument of a STAR or PLUS node is in + the follow of every node in the lastpos. + * Every node in the firstpos of the second argument of a CAT node is in + the follow of every node in the lastpos of the first argument. + + Because of the postfix representation of the parse tree, the depth-first + analysis is conveniently done by a linear scan with the aid of a stack. + Sets are stored as arrays of the elements, obeying a stack-like allocation + scheme; the number of elements in each set deeper in the stack can be + used to determine the address of a particular set's array. */ +static void +dfaanalyze (struct dfa *d, bool searchflag) +{ + /* Array allocated to hold position sets. */ + position *posalloc = xnmalloc (d->nleaves, 2 * sizeof *posalloc); + /* Firstpos and lastpos elements. */ + position *firstpos = posalloc; + position *lastpos = firstpos + d->nleaves; + position pos; + position_set tmp; + + /* Stack for element counts and nullable flags. */ + struct + { + /* Whether the entry is nullable. */ + bool nullable; + + /* Counts of firstpos and lastpos sets. */ + idx_t nfirstpos; + idx_t nlastpos; + } *stkalloc = xnmalloc (d->depth, sizeof *stkalloc), *stk = stkalloc; + + position_set merged; /* Result of merging sets. */ + + addtok (d, CAT); + +#ifdef DEBUG + fprintf (stderr, "dfaanalyze:\n"); + for (idx_t i = 0; i < d->tindex; i++) + { + fprintf (stderr, " %td:", i); + prtok (d->tokens[i]); + } + putc ('\n', stderr); +#endif + + d->searchflag = searchflag; + alloc_position_set (&merged, d->nleaves); + d->follows = xcalloc (d->tindex, sizeof *d->follows); + + for (idx_t i = 0; i < d->tindex; i++) + { + switch (d->tokens[i]) + { + case EMPTY: + /* The empty set is nullable. */ + stk->nullable = true; + + /* The firstpos and lastpos of the empty leaf are both empty. */ + stk->nfirstpos = stk->nlastpos = 0; + stk++; + break; + + case STAR: + case PLUS: + /* Every element in the firstpos of the argument is in the follow + of every element in the lastpos. */ + { + tmp.elems = firstpos - stk[-1].nfirstpos; + tmp.nelem = stk[-1].nfirstpos; + position *p = lastpos - stk[-1].nlastpos; + for (idx_t j = 0; j < stk[-1].nlastpos; j++) + { + merge (&tmp, &d->follows[p[j].index], &merged); + copy (&merged, &d->follows[p[j].index]); + } + } + FALLTHROUGH; + case QMARK: + /* A QMARK or STAR node is automatically nullable. */ + if (d->tokens[i] != PLUS) + stk[-1].nullable = true; + break; + + case CAT: + /* Every element in the firstpos of the second argument is in the + follow of every element in the lastpos of the first argument. */ + { + tmp.nelem = stk[-1].nfirstpos; + tmp.elems = firstpos - stk[-1].nfirstpos; + position *p = lastpos - stk[-1].nlastpos - stk[-2].nlastpos; + for (idx_t j = 0; j < stk[-2].nlastpos; j++) + { + merge (&tmp, &d->follows[p[j].index], &merged); + copy (&merged, &d->follows[p[j].index]); + } + } + + /* The firstpos of a CAT node is the firstpos of the first argument, + union that of the second argument if the first is nullable. */ + if (stk[-2].nullable) + stk[-2].nfirstpos += stk[-1].nfirstpos; + else + firstpos -= stk[-1].nfirstpos; + + /* The lastpos of a CAT node is the lastpos of the second argument, + union that of the first argument if the second is nullable. */ + if (stk[-1].nullable) + stk[-2].nlastpos += stk[-1].nlastpos; + else + { + position *p = lastpos - stk[-1].nlastpos - stk[-2].nlastpos; + for (idx_t j = 0; j < stk[-1].nlastpos; j++) + p[j] = p[j + stk[-2].nlastpos]; + lastpos -= stk[-2].nlastpos; + stk[-2].nlastpos = stk[-1].nlastpos; + } + + /* A CAT node is nullable if both arguments are nullable. */ + stk[-2].nullable &= stk[-1].nullable; + stk--; + break; + + case OR: + /* The firstpos is the union of the firstpos of each argument. */ + stk[-2].nfirstpos += stk[-1].nfirstpos; + + /* The lastpos is the union of the lastpos of each argument. */ + stk[-2].nlastpos += stk[-1].nlastpos; + + /* An OR node is nullable if either argument is nullable. */ + stk[-2].nullable |= stk[-1].nullable; + stk--; + break; + + default: + /* Anything else is a nonempty position. (Note that special + constructs like \< are treated as nonempty strings here; + an "epsilon closure" effectively makes them nullable later. + Backreferences have to get a real position so we can detect + transitions on them later. But they are nullable. */ + stk->nullable = d->tokens[i] == BACKREF; + + /* This position is in its own firstpos and lastpos. */ + stk->nfirstpos = stk->nlastpos = 1; + stk++; + + firstpos->index = lastpos->index = i; + firstpos->constraint = lastpos->constraint = NO_CONSTRAINT; + firstpos++, lastpos++; + + break; + } +#ifdef DEBUG + /* ... balance the above nonsyntactic #ifdef goo... */ + fprintf (stderr, "node %td:", i); + prtok (d->tokens[i]); + putc ('\n', stderr); + fprintf (stderr, + stk[-1].nullable ? " nullable: yes\n" : " nullable: no\n"); + fprintf (stderr, " firstpos:"); + for (idx_t j = 0; j < stk[-1].nfirstpos; j++) + { + fprintf (stderr, " %td:", firstpos[j - stk[-1].nfirstpos].index); + prtok (d->tokens[firstpos[j - stk[-1].nfirstpos].index]); + } + fprintf (stderr, "\n lastpos:"); + for (idx_t j = 0; j < stk[-1].nlastpos; j++) + { + fprintf (stderr, " %td:", lastpos[j - stk[-1].nlastpos].index); + prtok (d->tokens[lastpos[j - stk[-1].nlastpos].index]); + } + putc ('\n', stderr); +#endif + } + + /* For each follow set that is the follow set of a real position, replace + it with its epsilon closure. */ + epsclosure (d); + + dfaoptimize (d); + +#ifdef DEBUG + for (idx_t i = 0; i < d->tindex; i++) + if (d->tokens[i] == BEG || d->tokens[i] < NOTCHAR + || d->tokens[i] == BACKREF || d->tokens[i] == ANYCHAR + || d->tokens[i] == MBCSET || d->tokens[i] >= CSET) + { + fprintf (stderr, "follows(%td:", i); + prtok (d->tokens[i]); + fprintf (stderr, "):"); + for (idx_t j = 0; j < d->follows[i].nelem; j++) + { + fprintf (stderr, " %td:", d->follows[i].elems[j].index); + prtok (d->tokens[d->follows[i].elems[j].index]); + } + putc ('\n', stderr); + } +#endif + + pos.index = 0; + pos.constraint = NO_CONSTRAINT; + + alloc_position_set (&tmp, 1); + + append (pos, &tmp); + + d->separates = xnmalloc (d->tindex, sizeof *d->separates); + + for (idx_t i = 0; i < d->tindex; i++) + { + d->separates[i] = 0; + + if (prev_newline_dependent (d->constraints[i])) + d->separates[i] |= CTX_NEWLINE; + if (prev_letter_dependent (d->constraints[i])) + d->separates[i] |= CTX_LETTER; + + for (idx_t j = 0; j < d->follows[i].nelem; j++) + { + if (prev_newline_dependent (d->follows[i].elems[j].constraint)) + d->separates[i] |= CTX_NEWLINE; + if (prev_letter_dependent (d->follows[i].elems[j].constraint)) + d->separates[i] |= CTX_LETTER; + } + } + + /* Context wanted by some position. */ + int separate_contexts = state_separate_contexts (d, &tmp); + + /* Build the initial state. */ + if (separate_contexts & CTX_NEWLINE) + state_index (d, &tmp, CTX_NEWLINE); + d->initstate_notbol = d->min_trcount + = state_index (d, &tmp, separate_contexts ^ CTX_ANY); + if (separate_contexts & CTX_LETTER) + d->min_trcount = state_index (d, &tmp, CTX_LETTER); + d->min_trcount++; + d->trcount = 0; + + free (posalloc); + free (stkalloc); + free (merged.elems); + free (tmp.elems); +} + +/* Make sure D's state arrays are large enough to hold NEW_STATE. */ +static void +realloc_trans_if_necessary (struct dfa *d) +{ + state_num oldalloc = d->tralloc; + if (oldalloc < d->sindex) + { + state_num **realtrans = d->trans ? d->trans - 2 : NULL; + idx_t newalloc1 = realtrans ? d->tralloc + 2 : 0; + realtrans = xpalloc (realtrans, &newalloc1, d->sindex - oldalloc, + -1, sizeof *realtrans); + realtrans[0] = realtrans[1] = NULL; + d->trans = realtrans + 2; + idx_t newalloc = d->tralloc = newalloc1 - 2; + d->fails = xnrealloc (d->fails, newalloc, sizeof *d->fails); + d->success = xnrealloc (d->success, newalloc, sizeof *d->success); + d->newlines = xnrealloc (d->newlines, newalloc, sizeof *d->newlines); + if (d->localeinfo.multibyte) + { + realtrans = d->mb_trans ? d->mb_trans - 2 : NULL; + realtrans = xnrealloc (realtrans, newalloc1, sizeof *realtrans); + if (oldalloc == 0) + realtrans[0] = realtrans[1] = NULL; + d->mb_trans = realtrans + 2; + } + for (; oldalloc < newalloc; oldalloc++) + { + d->trans[oldalloc] = NULL; + d->fails[oldalloc] = NULL; + if (d->localeinfo.multibyte) + d->mb_trans[oldalloc] = NULL; + } + } +} + +/* + Calculate the transition table for a new state derived from state s + for a compiled dfa d after input character uc, and return the new + state number. + + Do not worry about all possible input characters; calculate just the group + of positions that match uc. Label it with the set of characters that + every position in the group matches (taking into account, if necessary, + preceding context information of s). Then find the union + of these positions' follows, i.e., the set of positions of the + new state. For each character in the group's label, set the transition + on this character to be to a state corresponding to the set's positions, + and its associated backward context information, if necessary. + + When building a searching matcher, include the positions of state + 0 in every state. + + The group is constructed by building an equivalence-class + partition of the positions of s. + + For each position, find the set of characters C that it matches. Eliminate + any characters from C that fail on grounds of backward context. + + Check whether the group's label L has nonempty + intersection with C. If L - C is nonempty, create a new group labeled + L - C and having the same positions as the current group, and set L to + the intersection of L and C. Insert the position in the group, set + C = C - L, and resume scanning. + + If after comparing with every group there are characters remaining in C, + create a new group labeled with the characters of C and insert this + position in that group. */ + +static state_num +build_state (state_num s, struct dfa *d, unsigned char uc) +{ + position_set follows; /* Union of the follows for each + position of the current state. */ + position_set group; /* Positions that match the input char. */ + position_set tmp; /* Temporary space for merging sets. */ + state_num state; /* New state. */ + state_num state_newline; /* New state on a newline transition. */ + state_num state_letter; /* New state on a letter transition. */ + +#ifdef DEBUG + fprintf (stderr, "build state %td\n", s); +#endif + + /* A pointer to the new transition table, and the table itself. */ + state_num **ptrans = (accepting (s, d) ? d->fails : d->trans) + s; + state_num *trans = *ptrans; + + if (!trans) + { + /* MAX_TRCOUNT is an arbitrary upper limit on the number of + transition tables that can exist at once, other than for + initial states. Often-used transition tables are quickly + rebuilt, whereas rarely-used ones are cleared away. */ + if (MAX_TRCOUNT <= d->trcount) + { + for (state_num i = d->min_trcount; i < d->tralloc; i++) + { + free (d->trans[i]); + free (d->fails[i]); + d->trans[i] = d->fails[i] = NULL; + } + d->trcount = 0; + } + + d->trcount++; + *ptrans = trans = xmalloc (NOTCHAR * sizeof *trans); + + /* Fill transition table with a default value which means that the + transited state has not been calculated yet. */ + for (int i = 0; i < NOTCHAR; i++) + trans[i] = -2; + } + + /* Set up the success bits for this state. */ + d->success[s] = 0; + if (accepts_in_context (d->states[s].context, CTX_NEWLINE, s, d)) + d->success[s] |= CTX_NEWLINE; + if (accepts_in_context (d->states[s].context, CTX_LETTER, s, d)) + d->success[s] |= CTX_LETTER; + if (accepts_in_context (d->states[s].context, CTX_NONE, s, d)) + d->success[s] |= CTX_NONE; + + alloc_position_set (&follows, d->nleaves); + + /* Find the union of the follows of the positions of the group. + This is a hideously inefficient loop. Fix it someday. */ + for (idx_t j = 0; j < d->states[s].elems.nelem; j++) + for (idx_t k = 0; + k < d->follows[d->states[s].elems.elems[j].index].nelem; ++k) + insert (d->follows[d->states[s].elems.elems[j].index].elems[k], + &follows); + + /* Positions that match the input char. */ + alloc_position_set (&group, d->nleaves); + + /* The group's label. */ + charclass label; + fillset (&label); + + for (idx_t i = 0; i < follows.nelem; i++) + { + charclass matches; /* Set of matching characters. */ + position pos = follows.elems[i]; + bool matched = false; + if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR) + { + zeroset (&matches); + setbit (d->tokens[pos.index], &matches); + if (d->tokens[pos.index] == uc) + matched = true; + } + else if (d->tokens[pos.index] >= CSET) + { + matches = d->charclasses[d->tokens[pos.index] - CSET]; + if (tstbit (uc, &matches)) + matched = true; + } + else if (d->tokens[pos.index] == ANYCHAR) + { + matches = d->charclasses[d->canychar]; + if (tstbit (uc, &matches)) + matched = true; + + /* ANYCHAR must match with a single character, so we must put + it to D->states[s].mbps which contains the positions which + can match with a single character not a byte. If all + positions which has ANYCHAR does not depend on context of + next character, we put the follows instead of it to + D->states[s].mbps to optimize. */ + if (succeeds_in_context (pos.constraint, d->states[s].context, + CTX_NONE)) + { + if (d->states[s].mbps.nelem == 0) + alloc_position_set (&d->states[s].mbps, 1); + insert (pos, &d->states[s].mbps); + } + } + else + continue; + + /* Some characters may need to be eliminated from matches because + they fail in the current context. */ + if (pos.constraint != NO_CONSTRAINT) + { + if (!succeeds_in_context (pos.constraint, + d->states[s].context, CTX_NEWLINE)) + for (int j = 0; j < CHARCLASS_WORDS; j++) + matches.w[j] &= ~d->syntax.newline.w[j]; + if (!succeeds_in_context (pos.constraint, + d->states[s].context, CTX_LETTER)) + for (int j = 0; j < CHARCLASS_WORDS; ++j) + matches.w[j] &= ~d->syntax.letters.w[j]; + if (!succeeds_in_context (pos.constraint, + d->states[s].context, CTX_NONE)) + for (int j = 0; j < CHARCLASS_WORDS; ++j) + matches.w[j] &= d->syntax.letters.w[j] | d->syntax.newline.w[j]; + + /* If there are no characters left, there's no point in going on. */ + if (emptyset (&matches)) + continue; + + /* If we have reset the bit that made us declare "matched", reset + that indicator, too. This is required to avoid an infinite loop + with this command: echo cx | LC_ALL=C grep -E 'c\b[x ]' */ + if (!tstbit (uc, &matches)) + matched = false; + } + +#ifdef DEBUG + fprintf (stderr, " nextpos %td:", pos.index); + prtok (d->tokens[pos.index]); + fprintf (stderr, " of"); + for (unsigned j = 0; j < NOTCHAR; j++) + if (tstbit (j, &matches)) + fprintf (stderr, " 0x%02x", j); + fprintf (stderr, "\n"); +#endif + + if (matched) + { + for (int k = 0; k < CHARCLASS_WORDS; ++k) + label.w[k] &= matches.w[k]; + append (pos, &group); + } + else + { + for (int k = 0; k < CHARCLASS_WORDS; ++k) + label.w[k] &= ~matches.w[k]; + } + } + + alloc_position_set (&tmp, d->nleaves); + + if (group.nelem > 0) + { + /* If we are building a searching matcher, throw in the positions + of state 0 as well, if possible. */ + if (d->searchflag) + { + /* If a token in follows.elems is not 1st byte of a multibyte + character, or the states of follows must accept the bytes + which are not 1st byte of the multibyte character. + Then, if a state of follows encounters a byte, it must not be + a 1st byte of a multibyte character nor a single byte character. + In this case, do not add state[0].follows to next state, because + state[0] must accept 1st-byte. + + For example, suppose is a certain single byte character, + is a certain multibyte character, and the codepoint of + equals the 2nd byte of the codepoint of . When + state[0] accepts , state[i] transits to state[i+1] by + accepting the 1st byte of , and state[i+1] accepts the + 2nd byte of , if state[i+1] encounters the codepoint of + , it must not be but the 2nd byte of , so do + not add state[0]. */ + + bool mergeit = !d->localeinfo.multibyte; + if (!mergeit) + { + mergeit = true; + for (idx_t j = 0; mergeit && j < group.nelem; j++) + mergeit &= d->multibyte_prop[group.elems[j].index]; + } + if (mergeit) + { + merge (&d->states[0].elems, &group, &tmp); + copy (&tmp, &group); + } + } + + /* Find out if the new state will want any context information, + by calculating possible contexts that the group can match, + and separate contexts that the new state wants to know. */ + int possible_contexts = charclass_context (d, &label); + int separate_contexts = state_separate_contexts (d, &group); + + /* Find the state(s) corresponding to the union of the follows. */ + if (possible_contexts & ~separate_contexts) + state = state_index (d, &group, separate_contexts ^ CTX_ANY); + else + state = -1; + if (separate_contexts & possible_contexts & CTX_NEWLINE) + state_newline = state_index (d, &group, CTX_NEWLINE); + else + state_newline = state; + if (separate_contexts & possible_contexts & CTX_LETTER) + state_letter = state_index (d, &group, CTX_LETTER); + else + state_letter = state; + + /* Reallocate now, to reallocate any newline transition properly. */ + realloc_trans_if_necessary (d); + } + + /* If we are a searching matcher, the default transition is to a state + containing the positions of state 0, otherwise the default transition + is to fail miserably. */ + else if (d->searchflag) + { + state_newline = 0; + state_letter = d->min_trcount - 1; + state = d->initstate_notbol; + } + else + { + state_newline = -1; + state_letter = -1; + state = -1; + } + + /* Set the transitions for each character in the label. */ + for (int i = 0; i < NOTCHAR; i++) + if (tstbit (i, &label)) + switch (d->syntax.sbit[i]) + { + case CTX_NEWLINE: + trans[i] = state_newline; + break; + case CTX_LETTER: + trans[i] = state_letter; + break; + default: + trans[i] = state; + break; + } + +#ifdef DEBUG + fprintf (stderr, "trans table %td", s); + for (int i = 0; i < NOTCHAR; ++i) + { + if (!(i & 0xf)) + fprintf (stderr, "\n"); + fprintf (stderr, " %2td", trans[i]); + } + fprintf (stderr, "\n"); +#endif + + free (group.elems); + free (follows.elems); + free (tmp.elems); + + /* Keep the newline transition in a special place so we can use it as + a sentinel. */ + if (tstbit (d->syntax.eolbyte, &label)) + { + d->newlines[s] = trans[d->syntax.eolbyte]; + trans[d->syntax.eolbyte] = -1; + } + + return trans[uc]; +} + +/* Multibyte character handling sub-routines for dfaexec. */ + +/* Consume a single byte and transit state from 's' to '*next_state'. + This function is almost same as the state transition routin in dfaexec. + But state transition is done just once, otherwise matching succeed or + reach the end of the buffer. */ +static state_num +transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const **pp) +{ + state_num *t; + + if (d->trans[s]) + t = d->trans[s]; + else if (d->fails[s]) + t = d->fails[s]; + else + { + build_state (s, d, **pp); + if (d->trans[s]) + t = d->trans[s]; + else + { + t = d->fails[s]; + assert (t); + } + } + + if (t[**pp] == -2) + build_state (s, d, **pp); + + return t[*(*pp)++]; +} + +/* Transit state from s, then return new state and update the pointer of + the buffer. This function is for a period operator which can match a + multi-byte character. */ +static state_num +transit_state (struct dfa *d, state_num s, unsigned char const **pp, + unsigned char const *end) +{ + wint_t wc; + + int mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d); + + /* This state has some operators which can match a multibyte character. */ + d->mb_follows.nelem = 0; + + /* Calculate the state which can be reached from the state 's' by + consuming 'mbclen' single bytes from the buffer. */ + state_num s1 = s; + int mbci; + for (mbci = 0; mbci < mbclen && (mbci == 0 || d->min_trcount <= s); mbci++) + s = transit_state_singlebyte (d, s, pp); + *pp += mbclen - mbci; + + if (wc == WEOF) + { + /* It is an invalid character, so ANYCHAR is not accepted. */ + return s; + } + + /* If all positions which have ANYCHAR do not depend on the context + of the next character, calculate the next state with + pre-calculated follows and cache the result. */ + if (d->states[s1].mb_trindex < 0) + { + if (MAX_TRCOUNT <= d->mb_trcount) + { + state_num s3; + for (s3 = -1; s3 < d->tralloc; s3++) + { + free (d->mb_trans[s3]); + d->mb_trans[s3] = NULL; + } + + for (state_num i = 0; i < d->sindex; i++) + d->states[i].mb_trindex = -1; + d->mb_trcount = 0; + } + d->states[s1].mb_trindex = d->mb_trcount++; + } + + if (! d->mb_trans[s]) + { + enum { TRANSPTR_SIZE = sizeof *d->mb_trans[s] }; + enum { TRANSALLOC_SIZE = MAX_TRCOUNT * TRANSPTR_SIZE }; + d->mb_trans[s] = xmalloc (TRANSALLOC_SIZE); + for (int i = 0; i < MAX_TRCOUNT; i++) + d->mb_trans[s][i] = -1; + } + else if (d->mb_trans[s][d->states[s1].mb_trindex] >= 0) + return d->mb_trans[s][d->states[s1].mb_trindex]; + + if (s == -1) + copy (&d->states[s1].mbps, &d->mb_follows); + else + merge (&d->states[s1].mbps, &d->states[s].elems, &d->mb_follows); + + int separate_contexts = state_separate_contexts (d, &d->mb_follows); + state_num s2 = state_index (d, &d->mb_follows, separate_contexts ^ CTX_ANY); + realloc_trans_if_necessary (d); + + d->mb_trans[s][d->states[s1].mb_trindex] = s2; + + return s2; +} + +/* The initial state may encounter a byte which is not a single byte character + nor the first byte of a multibyte character. But it is incorrect for the + initial state to accept such a byte. For example, in Shift JIS the regular + expression "\\" accepts the codepoint 0x5c, but should not accept the second + byte of the codepoint 0x815c. Then the initial state must skip the bytes + that are not a single byte character nor the first byte of a multibyte + character. + + Given DFA state d, use mbs_to_wchar to advance MBP until it reaches + or exceeds P, and return the advanced MBP. If WCP is non-NULL and + the result is greater than P, set *WCP to the final wide character + processed, or to WEOF if no wide character is processed. Otherwise, + if WCP is non-NULL, *WCP may or may not be updated. + + Both P and MBP must be no larger than END. */ +static unsigned char const * +skip_remains_mb (struct dfa *d, unsigned char const *p, + unsigned char const *mbp, char const *end) +{ + if (d->syntax.never_trail[*p]) + return p; + while (mbp < p) + { + wint_t wc; + mbp += mbs_to_wchar (&wc, (char const *) mbp, + end - (char const *) mbp, d); + } + return mbp; +} + +/* Search through a buffer looking for a match to the struct dfa *D. + Find the first occurrence of a string matching the regexp in the + buffer, and the shortest possible version thereof. Return a pointer to + the first character after the match, or NULL if none is found. BEGIN + points to the beginning of the buffer, and END points to the first byte + after its end. Note however that we store a sentinel byte (usually + newline) in *END, so the actual buffer must be one byte longer. + When ALLOW_NL, newlines may appear in the matching string. + If COUNT is non-NULL, increment *COUNT once for each newline processed. + If MULTIBYTE, the input consists of multibyte characters and/or + encoding-error bytes. Otherwise, it consists of single-byte characters. + Here is the list of features that make this DFA matcher punt: + - [M-N] range in non-simple locale: regex is up to 25% faster on [a-z] + - [^...] in non-simple locale + - [[=foo=]] or [[.foo.]] + - [[:alpha:]] etc. in multibyte locale (except [[:digit:]] works OK) + - back-reference: (.)\1 + - word-delimiter in multibyte locale: \<, \>, \b, \B + See struct localeinfo.simple for the definition of "simple locale". */ + +static inline char * +dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl, + ptrdiff_t *count, bool multibyte) +{ + if (MAX_TRCOUNT <= d->sindex) + { + for (state_num s = d->min_trcount; s < d->sindex; s++) + { + free (d->states[s].elems.elems); + free (d->states[s].mbps.elems); + } + d->sindex = d->min_trcount; + + if (d->trans) + { + for (state_num s = 0; s < d->tralloc; s++) + { + free (d->trans[s]); + free (d->fails[s]); + d->trans[s] = d->fails[s] = NULL; + } + d->trcount = 0; + } + + if (d->localeinfo.multibyte && d->mb_trans) + { + for (state_num s = -1; s < d->tralloc; s++) + { + free (d->mb_trans[s]); + d->mb_trans[s] = NULL; + } + for (state_num s = 0; s < d->min_trcount; s++) + d->states[s].mb_trindex = -1; + d->mb_trcount = 0; + } + } + + if (!d->tralloc) + realloc_trans_if_necessary (d); + + /* Current state. */ + state_num s = 0, s1 = 0; + + /* Current input character. */ + unsigned char const *p = (unsigned char const *) begin; + unsigned char const *mbp = p; + + /* Copy of d->trans so it can be optimized into a register. */ + state_num **trans = d->trans; + unsigned char eol = d->syntax.eolbyte; /* Likewise for eolbyte. */ + unsigned char saved_end = *(unsigned char *) end; + *end = eol; + + if (multibyte) + { + memset (&d->mbs, 0, sizeof d->mbs); + if (d->mb_follows.alloc == 0) + alloc_position_set (&d->mb_follows, d->nleaves); + } + + idx_t nlcount = 0; + for (;;) + { + state_num *t; + while ((t = trans[s]) != NULL) + { + if (s < d->min_trcount) + { + if (!multibyte || d->states[s].mbps.nelem == 0) + { + while (t[*p] == s) + p++; + } + if (multibyte) + p = mbp = skip_remains_mb (d, p, mbp, end); + } + + if (multibyte) + { + s1 = s; + + if (d->states[s].mbps.nelem == 0 + || d->localeinfo.sbctowc[*p] != WEOF || (char *) p >= end) + { + /* If an input character does not match ANYCHAR, do it + like a single-byte character. */ + s = t[*p++]; + } + else + { + s = transit_state (d, s, &p, (unsigned char *) end); + mbp = p; + trans = d->trans; + } + } + else + { + s1 = t[*p++]; + t = trans[s1]; + if (! t) + { + state_num tmp = s; + s = s1; + s1 = tmp; /* swap */ + break; + } + if (s < d->min_trcount) + { + while (t[*p] == s1) + p++; + } + s = t[*p++]; + } + } + + if (s < 0) + { + if (s == -2) + { + s = build_state (s1, d, p[-1]); + trans = d->trans; + } + else if ((char *) p <= end && p[-1] == eol && 0 <= d->newlines[s1]) + { + /* The previous character was a newline. Count it, and skip + checking of multibyte character boundary until here. */ + nlcount++; + mbp = p; + + s = (allow_nl ? d->newlines[s1] + : d->syntax.sbit[eol] == CTX_NEWLINE ? 0 + : d->syntax.sbit[eol] == CTX_LETTER ? d->min_trcount - 1 + : d->initstate_notbol); + } + else + { + p = NULL; + goto done; + } + } + else if (d->fails[s]) + { + if ((d->success[s] & d->syntax.sbit[*p]) + || ((char *) p == end + && accepts_in_context (d->states[s].context, CTX_NEWLINE, s, + d))) + goto done; + + if (multibyte && s < d->min_trcount) + p = mbp = skip_remains_mb (d, p, mbp, end); + + s1 = s; + if (!multibyte || d->states[s].mbps.nelem == 0 + || d->localeinfo.sbctowc[*p] != WEOF || (char *) p >= end) + { + /* If a input character does not match ANYCHAR, do it + like a single-byte character. */ + s = d->fails[s][*p++]; + } + else + { + s = transit_state (d, s, &p, (unsigned char *) end); + mbp = p; + trans = d->trans; + } + } + else + { + build_state (s, d, p[0]); + trans = d->trans; + } + } + + done: + if (count) + *count += nlcount; + *end = saved_end; + return (char *) p; +} + +/* Specialized versions of dfaexec for multibyte and single-byte cases. + This is for performance, as dfaexec_main is an inline function. */ + +static char * +dfaexec_mb (struct dfa *d, char const *begin, char *end, + bool allow_nl, ptrdiff_t *count, bool *backref) +{ + return dfaexec_main (d, begin, end, allow_nl, count, true); +} + +static char * +dfaexec_sb (struct dfa *d, char const *begin, char *end, + bool allow_nl, ptrdiff_t *count, bool *backref) +{ + return dfaexec_main (d, begin, end, allow_nl, count, false); +} + +/* Always set *BACKREF and return BEGIN. Use this wrapper for + any regexp that uses a construct not supported by this code. */ +static char * +dfaexec_noop (struct dfa *d, char const *begin, char *end, + bool allow_nl, ptrdiff_t *count, bool *backref) +{ + *backref = true; + return (char *) begin; +} + +/* Like dfaexec_main (D, BEGIN, END, ALLOW_NL, COUNT, D->localeinfo.multibyte), + but faster and set *BACKREF if the DFA code does not support this + regexp usage. */ + +char * +dfaexec (struct dfa *d, char const *begin, char *end, + bool allow_nl, ptrdiff_t *count, bool *backref) +{ + return d->dfaexec (d, begin, end, allow_nl, count, backref); +} + +struct dfa * +dfasuperset (struct dfa const *d) +{ + return d->superset; +} + +bool +dfaisfast (struct dfa const *d) +{ + return d->fast; +} + +static void +free_mbdata (struct dfa *d) +{ + free (d->multibyte_prop); + free (d->lex.brack.chars); + free (d->mb_follows.elems); + + if (d->mb_trans) + { + state_num s; + for (s = -1; s < d->tralloc; s++) + free (d->mb_trans[s]); + free (d->mb_trans - 2); + } +} + +/* Return true if every construct in D is supported by this DFA matcher. */ +static bool _GL_ATTRIBUTE_PURE +dfa_supported (struct dfa const *d) +{ + for (idx_t i = 0; i < d->tindex; i++) + { + switch (d->tokens[i]) + { + case BEGWORD: + case ENDWORD: + case LIMWORD: + case NOTLIMWORD: + if (!d->localeinfo.multibyte) + continue; + FALLTHROUGH; + case BACKREF: + case MBCSET: + return false; + } + } + return true; +} + +/* Disable use of the superset DFA if it is not likely to help + performance. */ +static void +maybe_disable_superset_dfa (struct dfa *d) +{ + if (!d->localeinfo.using_utf8) + return; + + bool have_backref = false; + for (idx_t i = 0; i < d->tindex; i++) + { + switch (d->tokens[i]) + { + case ANYCHAR: + /* Lowered. */ + abort (); + case BACKREF: + have_backref = true; + break; + case MBCSET: + /* Requires multi-byte algorithm. */ + return; + default: + break; + } + } + + if (!have_backref && d->superset) + { + /* The superset DFA is not likely to be much faster, so remove it. */ + dfafree (d->superset); + free (d->superset); + d->superset = NULL; + } + + free_mbdata (d); + d->localeinfo.multibyte = false; + d->dfaexec = dfaexec_sb; + d->fast = true; +} + +static void +dfassbuild (struct dfa *d) +{ + struct dfa *sup = dfaalloc (); + + *sup = *d; + sup->localeinfo.multibyte = false; + sup->dfaexec = dfaexec_sb; + sup->multibyte_prop = NULL; + sup->superset = NULL; + sup->states = NULL; + sup->sindex = 0; + sup->constraints = NULL; + sup->separates = NULL; + sup->follows = NULL; + sup->tralloc = 0; + sup->trans = NULL; + sup->fails = NULL; + sup->success = NULL; + sup->newlines = NULL; + + sup->charclasses = xnmalloc (sup->calloc, sizeof *sup->charclasses); + if (d->cindex) + { + memcpy (sup->charclasses, d->charclasses, + d->cindex * sizeof *sup->charclasses); + } + + sup->tokens = xnmalloc (d->tindex, 2 * sizeof *sup->tokens); + sup->talloc = d->tindex * 2; + + bool have_achar = false; + bool have_nchar = false; + idx_t j; + for (idx_t i = j = 0; i < d->tindex; i++) + { + switch (d->tokens[i]) + { + case ANYCHAR: + case MBCSET: + case BACKREF: + { + charclass ccl; + fillset (&ccl); + sup->tokens[j++] = CSET + charclass_index (sup, &ccl); + sup->tokens[j++] = STAR; + if (d->tokens[i + 1] == QMARK || d->tokens[i + 1] == STAR + || d->tokens[i + 1] == PLUS) + i++; + have_achar = true; + } + break; + case BEGWORD: + case ENDWORD: + case LIMWORD: + case NOTLIMWORD: + if (d->localeinfo.multibyte) + { + /* These constraints aren't supported in a multibyte locale. + Ignore them in the superset DFA. */ + sup->tokens[j++] = EMPTY; + break; + } + FALLTHROUGH; + default: + sup->tokens[j++] = d->tokens[i]; + if ((0 <= d->tokens[i] && d->tokens[i] < NOTCHAR) + || d->tokens[i] >= CSET) + have_nchar = true; + break; + } + } + sup->tindex = j; + + if (have_nchar && (have_achar || d->localeinfo.multibyte)) + d->superset = sup; + else + { + dfafree (sup); + free (sup); + } +} + +/* Parse a string S of length LEN into D (but skip this step if S is null). + Then analyze D and build a matcher for it. + SEARCHFLAG says whether to build a searching or an exact matcher. */ +void +dfacomp (char const *s, idx_t len, struct dfa *d, bool searchflag) +{ + if (s != NULL) + dfaparse (s, len, d); + + dfassbuild (d); + + if (dfa_supported (d)) + { + maybe_disable_superset_dfa (d); + dfaanalyze (d, searchflag); + } + else + { + d->dfaexec = dfaexec_noop; + } + + if (d->superset) + { + d->fast = true; + dfaanalyze (d->superset, searchflag); + } +} + +/* Free the storage held by the components of a dfa. */ +void +dfafree (struct dfa *d) +{ + free (d->charclasses); + free (d->tokens); + + if (d->localeinfo.multibyte) + free_mbdata (d); + + free (d->constraints); + free (d->separates); + + for (idx_t i = 0; i < d->sindex; i++) + { + free (d->states[i].elems.elems); + free (d->states[i].mbps.elems); + } + free (d->states); + + if (d->follows) + { + for (idx_t i = 0; i < d->tindex; i++) + free (d->follows[i].elems); + free (d->follows); + } + + if (d->trans) + { + for (idx_t i = 0; i < d->tralloc; i++) + { + free (d->trans[i]); + free (d->fails[i]); + } + + free (d->trans - 2); + free (d->fails); + free (d->newlines); + free (d->success); + } + + if (d->superset) + { + dfafree (d->superset); + free (d->superset); + } +} + +/* Having found the postfix representation of the regular expression, + try to find a long sequence of characters that must appear in any line + containing the r.e. + Finding a "longest" sequence is beyond the scope here; + we take an easy way out and hope for the best. + (Take "(ab|a)b"--please.) + + We do a bottom-up calculation of sequences of characters that must appear + in matches of r.e.'s represented by trees rooted at the nodes of the postfix + representation: + sequences that must appear at the left of the match ("left") + sequences that must appear at the right of the match ("right") + lists of sequences that must appear somewhere in the match ("in") + sequences that must constitute the match ("is") + + When we get to the root of the tree, we use one of the longest of its + calculated "in" sequences as our answer. + + The sequences calculated for the various types of node (in pseudo ANSI c) + are shown below. "p" is the operand of unary operators (and the left-hand + operand of binary operators); "q" is the right-hand operand of binary + operators. + + "ZERO" means "a zero-length sequence" below. + + Type left right is in + ---- ---- ----- -- -- + char c # c # c # c # c + + ANYCHAR ZERO ZERO ZERO ZERO + + MBCSET ZERO ZERO ZERO ZERO + + CSET ZERO ZERO ZERO ZERO + + STAR ZERO ZERO ZERO ZERO + + QMARK ZERO ZERO ZERO ZERO + + PLUS p->left p->right ZERO p->in + + CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus + p->left : q->right : q->is!=ZERO) ? q->in plus + p->is##q->left p->right##q->is p->is##q->is : p->right##q->left + ZERO + + OR longest common longest common (do p->is and substrings common + leading trailing to q->is have same p->in and + (sub)sequence (sub)sequence q->in length and content) ? + of p->left of p->right + and q->left and q->right p->is : NULL + + If there's anything else we recognize in the tree, all four sequences get set + to zero-length sequences. If there's something we don't recognize in the + tree, we just return a zero-length sequence. + + Break ties in favor of infrequent letters (choosing 'zzz' in preference to + 'aaa')? + + And ... is it here or someplace that we might ponder "optimizations" such as + egrep 'psi|epsilon' -> egrep 'psi' + egrep 'pepsi|epsilon' -> egrep 'epsi' + (Yes, we now find "epsi" as a "string + that must occur", but we might also + simplify the *entire* r.e. being sought) + grep '[c]' -> grep 'c' + grep '(ab|a)b' -> grep 'ab' + grep 'ab*' -> grep 'a' + grep 'a*b' -> grep 'b' + + There are several issues: + + Is optimization easy (enough)? + + Does optimization actually accomplish anything, + or is the automaton you get from "psi|epsilon" (for example) + the same as the one you get from "psi" (for example)? + + Are optimizable r.e.'s likely to be used in real-life situations + (something like 'ab*' is probably unlikely; something like is + 'psi|epsilon' is likelier)? */ + +static char * +icatalloc (char *old, char const *new) +{ + idx_t newsize = strlen (new); + if (newsize == 0) + return old; + idx_t oldsize = strlen (old); + char *result = xrealloc (old, oldsize + newsize + 1); + memcpy (result + oldsize, new, newsize + 1); + return result; +} + +static void +freelist (char **cpp) +{ + while (*cpp) + free (*cpp++); +} + +static char ** +enlist (char **cpp, char *new, idx_t len) +{ + new = memcpy (xmalloc (len + 1), new, len); + new[len] = '\0'; + /* Is there already something in the list that's new (or longer)? */ + idx_t i; + for (i = 0; cpp[i] != NULL; i++) + if (strstr (cpp[i], new) != NULL) + { + free (new); + return cpp; + } + /* Eliminate any obsoleted strings. */ + for (idx_t j = 0; cpp[j] != NULL; ) + if (strstr (new, cpp[j]) == NULL) + ++j; + else + { + free (cpp[j]); + if (--i == j) + break; + cpp[j] = cpp[i]; + cpp[i] = NULL; + } + /* Add the new string. */ + cpp = xnrealloc (cpp, i + 2, sizeof *cpp); + cpp[i] = new; + cpp[i + 1] = NULL; + return cpp; +} + +/* Given pointers to two strings, return a pointer to an allocated + list of their distinct common substrings. */ +static char ** +comsubs (char *left, char const *right) +{ + char **cpp = xzalloc (sizeof *cpp); + + for (char *lcp = left; *lcp != '\0'; lcp++) + { + idx_t len = 0; + char *rcp = strchr (right, *lcp); + while (rcp != NULL) + { + idx_t i; + for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i) + continue; + if (i > len) + len = i; + rcp = strchr (rcp + 1, *lcp); + } + if (len != 0) + cpp = enlist (cpp, lcp, len); + } + return cpp; +} + +static char ** +addlists (char **old, char **new) +{ + for (; *new; new++) + old = enlist (old, *new, strlen (*new)); + return old; +} + +/* Given two lists of substrings, return a new list giving substrings + common to both. */ +static char ** +inboth (char **left, char **right) +{ + char **both = xzalloc (sizeof *both); + + for (idx_t lnum = 0; left[lnum] != NULL; lnum++) + { + for (idx_t rnum = 0; right[rnum] != NULL; rnum++) + { + char **temp = comsubs (left[lnum], right[rnum]); + both = addlists (both, temp); + freelist (temp); + free (temp); + } + } + return both; +} + +typedef struct must must; + +struct must +{ + char **in; + char *left; + char *right; + char *is; + bool begline; + bool endline; + must *prev; +}; + +static must * +allocmust (must *mp, idx_t size) +{ + must *new_mp = xmalloc (sizeof *new_mp); + new_mp->in = xzalloc (sizeof *new_mp->in); + new_mp->left = xzalloc (size); + new_mp->right = xzalloc (size); + new_mp->is = xzalloc (size); + new_mp->begline = false; + new_mp->endline = false; + new_mp->prev = mp; + return new_mp; +} + +static void +resetmust (must *mp) +{ + freelist (mp->in); + mp->in[0] = NULL; + mp->left[0] = mp->right[0] = mp->is[0] = '\0'; + mp->begline = false; + mp->endline = false; +} + +static void +freemust (must *mp) +{ + freelist (mp->in); + free (mp->in); + free (mp->left); + free (mp->right); + free (mp->is); + free (mp); +} + +struct dfamust * +dfamust (struct dfa const *d) +{ + must *mp = NULL; + char const *result = ""; + bool exact = false; + bool begline = false; + bool endline = false; + bool need_begline = false; + bool need_endline = false; + bool case_fold_unibyte = d->syntax.case_fold & !d->localeinfo.multibyte; + + for (idx_t ri = 1; ri + 1 < d->tindex; ri++) + { + token t = d->tokens[ri]; + switch (t) + { + case BEGLINE: + mp = allocmust (mp, 2); + mp->begline = true; + need_begline = true; + break; + case ENDLINE: + mp = allocmust (mp, 2); + mp->endline = true; + need_endline = true; + break; + case LPAREN: + case RPAREN: + assert (!"neither LPAREN nor RPAREN may appear here"); + + case EMPTY: + case BEGWORD: + case ENDWORD: + case LIMWORD: + case NOTLIMWORD: + case BACKREF: + case ANYCHAR: + case MBCSET: + mp = allocmust (mp, 2); + break; + + case STAR: + case QMARK: + resetmust (mp); + break; + + case OR: + { + char **new; + must *rmp = mp; + must *lmp = mp = mp->prev; + idx_t j, ln, rn, n; + + /* Guaranteed to be. Unlikely, but ... */ + if (streq (lmp->is, rmp->is)) + { + lmp->begline &= rmp->begline; + lmp->endline &= rmp->endline; + } + else + { + lmp->is[0] = '\0'; + lmp->begline = false; + lmp->endline = false; + } + /* Left side--easy */ + idx_t i = 0; + while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i]) + ++i; + lmp->left[i] = '\0'; + /* Right side */ + ln = strlen (lmp->right); + rn = strlen (rmp->right); + n = ln; + if (n > rn) + n = rn; + for (i = 0; i < n; ++i) + if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1]) + break; + for (j = 0; j < i; ++j) + lmp->right[j] = lmp->right[(ln - i) + j]; + lmp->right[j] = '\0'; + new = inboth (lmp->in, rmp->in); + freelist (lmp->in); + free (lmp->in); + lmp->in = new; + freemust (rmp); + } + break; + + case PLUS: + mp->is[0] = '\0'; + break; + + case END: + assert (!mp->prev); + for (idx_t i = 0; mp->in[i] != NULL; i++) + if (strlen (mp->in[i]) > strlen (result)) + result = mp->in[i]; + if (streq (result, mp->is)) + { + if ((!need_begline || mp->begline) && (!need_endline + || mp->endline)) + exact = true; + begline = mp->begline; + endline = mp->endline; + } + goto done; + + case CAT: + { + must *rmp = mp; + must *lmp = mp = mp->prev; + + /* In. Everything in left, plus everything in + right, plus concatenation of + left's right and right's left. */ + lmp->in = addlists (lmp->in, rmp->in); + if (lmp->right[0] != '\0' && rmp->left[0] != '\0') + { + idx_t lrlen = strlen (lmp->right); + idx_t rllen = strlen (rmp->left); + char *tp = xmalloc (lrlen + rllen); + memcpy (tp, lmp->right, lrlen); + memcpy (tp + lrlen, rmp->left, rllen); + lmp->in = enlist (lmp->in, tp, lrlen + rllen); + free (tp); + } + /* Left-hand */ + if (lmp->is[0] != '\0') + lmp->left = icatalloc (lmp->left, rmp->left); + /* Right-hand */ + if (rmp->is[0] == '\0') + lmp->right[0] = '\0'; + lmp->right = icatalloc (lmp->right, rmp->right); + /* Guaranteed to be */ + if ((lmp->is[0] != '\0' || lmp->begline) + && (rmp->is[0] != '\0' || rmp->endline)) + { + lmp->is = icatalloc (lmp->is, rmp->is); + lmp->endline = rmp->endline; + } + else + { + lmp->is[0] = '\0'; + lmp->begline = false; + lmp->endline = false; + } + freemust (rmp); + } + break; + + case '\0': + /* Not on *my* shift. */ + goto done; + + default: + if (CSET <= t) + { + /* If T is a singleton, or if case-folding in a unibyte + locale and T's members all case-fold to the same char, + convert T to one of its members. Otherwise, do + nothing further with T. */ + charclass *ccl = &d->charclasses[t - CSET]; + int j; + for (j = 0; j < NOTCHAR; j++) + if (tstbit (j, ccl)) + break; + if (! (j < NOTCHAR)) + { + mp = allocmust (mp, 2); + break; + } + t = j; + while (++j < NOTCHAR) + if (tstbit (j, ccl) + && ! (case_fold_unibyte + && toupper (j) == toupper (t))) + break; + if (j < NOTCHAR) + { + mp = allocmust (mp, 2); + break; + } + } + + idx_t rj = ri + 2; + if (d->tokens[ri + 1] == CAT) + { + for (; rj < d->tindex - 1; rj += 2) + { + if ((rj != ri && (d->tokens[rj] <= 0 + || NOTCHAR <= d->tokens[rj])) + || d->tokens[rj + 1] != CAT) + break; + } + } + mp = allocmust (mp, ((rj - ri) >> 1) + 1); + mp->is[0] = mp->left[0] = mp->right[0] + = case_fold_unibyte ? toupper (t) : t; + + idx_t i; + for (i = 1; ri + 2 < rj; i++) + { + ri += 2; + t = d->tokens[ri]; + mp->is[i] = mp->left[i] = mp->right[i] + = case_fold_unibyte ? toupper (t) : t; + } + mp->is[i] = mp->left[i] = mp->right[i] = '\0'; + mp->in = enlist (mp->in, mp->is, i); + break; + } + } + done:; + + struct dfamust *dm = NULL; + if (*result) + { + dm = xmalloc (FLEXSIZEOF (struct dfamust, must, strlen (result) + 1)); + dm->exact = exact; + dm->begline = begline; + dm->endline = endline; + strcpy (dm->must, result); + } + + while (mp) + { + must *prev = mp->prev; + freemust (mp); + mp = prev; + } + + return dm; +} + +void +dfamustfree (struct dfamust *dm) +{ + free (dm); +} + +struct dfa * +dfaalloc (void) +{ + return xmalloc (sizeof (struct dfa)); +} + +/* Initialize DFA. */ +void +dfasyntax (struct dfa *dfa, struct localeinfo const *linfo, + reg_syntax_t bits, int dfaopts) +{ + memset (dfa, 0, offsetof (struct dfa, dfaexec)); + dfa->dfaexec = linfo->multibyte ? dfaexec_mb : dfaexec_sb; + dfa->localeinfo = *linfo; + + dfa->fast = !dfa->localeinfo.multibyte; + + dfa->canychar = -1; + dfa->syntax.syntax_bits_set = true; + dfa->syntax.case_fold = (bits & RE_ICASE) != 0; + dfa->syntax.anchor = (dfaopts & DFA_ANCHOR) != 0; + dfa->syntax.eolbyte = dfaopts & DFA_EOL_NUL ? '\0' : '\n'; + dfa->syntax.syntax_bits = bits; + + for (int i = CHAR_MIN; i <= CHAR_MAX; ++i) + { + unsigned char uc = i; + + dfa->syntax.sbit[uc] = char_context (dfa, uc); + switch (dfa->syntax.sbit[uc]) + { + case CTX_LETTER: + setbit (uc, &dfa->syntax.letters); + break; + case CTX_NEWLINE: + setbit (uc, &dfa->syntax.newline); + break; + } + + /* POSIX requires that the five bytes in "\n\r./" (including the + terminating NUL) cannot occur inside a multibyte character. */ + dfa->syntax.never_trail[uc] = (dfa->localeinfo.using_utf8 + ? (uc & 0xc0) != 0x80 + : strchr ("\n\r./", uc) != NULL); + } +} + +/* Initialize TO by copying FROM's syntax settings. */ +void +dfacopysyntax (struct dfa *to, struct dfa const *from) +{ + memset (to, 0, offsetof (struct dfa, syntax)); + to->canychar = -1; + to->fast = from->fast; + to->syntax = from->syntax; + to->dfaexec = from->dfaexec; + to->localeinfo = from->localeinfo; +} + +/* vim:set shiftwidth=2: */ diff --git a/contrib/grep/src/dfa.h b/contrib/grep/lib/dfa.h similarity index 63% rename from contrib/grep/src/dfa.h rename to contrib/grep/lib/dfa.h index 80ddac3a65..c5bff89818 100644 --- a/contrib/grep/src/dfa.h +++ b/contrib/grep/lib/dfa.h @@ -1,5 +1,5 @@ /* dfa.h - declarations for GNU deterministic regexp compiler - Copyright (C) 1988, 1998, 2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1988, 1998, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ #include #include -#include "xalloc.h" /* for _GL_ATTRIBUTE_MALLOC */ +struct localeinfo; /* See localeinfo.h. */ /* Element of a list of strings, at least one of which is known to appear in any R.E. matching the DFA. */ @@ -31,34 +31,63 @@ struct dfamust bool exact; bool begline; bool endline; - char *must; + char must[FLEXIBLE_ARRAY_MEMBER]; }; /* The dfa structure. It is completely opaque. */ struct dfa; +/* Needed when Gnulib is not used. */ +#ifndef _GL_ATTRIBUTE_MALLOC +# define _GL_ATTRIBUTE_MALLOC +#endif + /* Entry points. */ /* Allocate a struct dfa. The struct dfa is completely opaque. + It should be initialized via dfasyntax or dfacopysyntax before other use. The returned pointer should be passed directly to free() after calling dfafree() on it. */ extern struct dfa *dfaalloc (void) _GL_ATTRIBUTE_MALLOC; -/* Build and return the struct dfamust from the given struct dfa. */ +/* DFA options that can be ORed together, for dfasyntax's 4th arg. */ +enum + { + /* ^ and $ match only the start and end of data, and do not match + end-of-line within data. This is always false for grep, but + possibly true for other apps. */ + DFA_ANCHOR = 1 << 0, + + /* '\0' in data is end-of-line, instead of the traditional '\n'. */ + DFA_EOL_NUL = 1 << 1 + }; + +/* Initialize or reinitialize a DFA. The arguments are: + 1. The DFA to operate on. + 2. Information about the current locale. + 3. Syntax bits described in regex.h. + 4. Additional DFA options described above. */ +extern void dfasyntax (struct dfa *, struct localeinfo const *, + reg_syntax_t, int); + +/* Initialize or reinitialize a DFA from an already-initialized DFA. */ +extern void dfacopysyntax (struct dfa *, struct dfa const *); + +/* Parse the given string of given length into the given struct dfa. */ +extern void dfaparse (char const *, ptrdiff_t, struct dfa *); + +/* Allocate and return a struct dfamust from a struct dfa that was + initialized by dfaparse and not yet given to dfacomp. */ extern struct dfamust *dfamust (struct dfa const *); /* Free the storage held by the components of a struct dfamust. */ extern void dfamustfree (struct dfamust *); -/* dfasyntax() takes three arguments; the first sets the syntax bits described - earlier in this file, the second sets the case-folding flag, and the - third specifies the line terminator. */ -extern void dfasyntax (reg_syntax_t, int, unsigned char); - /* Compile the given string of the given length into the given struct dfa. - Final argument is a flag specifying whether to build a searching or an - exact matcher. */ -extern void dfacomp (char const *, size_t, struct dfa *, int); + The last argument says whether to build a searching or an exact matcher. + A null first argument means the struct dfa has already been + initialized by dfaparse; the second argument is ignored. */ +extern void dfacomp (char const *, ptrdiff_t, struct dfa *, bool); /* Search through a buffer looking for a match to the given struct dfa. Find the first occurrence of a string matching the regexp in the @@ -67,13 +96,13 @@ extern void dfacomp (char const *, size_t, struct dfa *, int); points to the beginning of the buffer, and END points to the first byte after its end. Note however that we store a sentinel byte (usually newline) in *END, so the actual buffer must be one byte longer. - When NEWLINE is nonzero, newlines may appear in the matching string. + When ALLOW_NL is true, newlines may appear in the matching string. If COUNT is non-NULL, increment *COUNT once for each newline processed. Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we - encountered a back-reference (1) or not (0). The caller may use this - to decide whether to fall back on a backtracking matcher. */ + encountered a back-reference. The caller can use this to decide + whether to fall back on a backtracking matcher. */ extern char *dfaexec (struct dfa *d, char const *begin, char *end, - int newline, size_t *count, int *backref); + bool allow_nl, ptrdiff_t *count, bool *backref); /* Return a superset for D. The superset matches everything that D matches, along with some other strings (though the latter should be @@ -87,22 +116,6 @@ extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE; /* Free the storage held by the components of a struct dfa. */ extern void dfafree (struct dfa *); -/* Entry points for people who know what they're doing. */ - -/* Initialize the components of a struct dfa. */ -extern void dfainit (struct dfa *); - -/* Incrementally parse a string of given length into a struct dfa. */ -extern void dfaparse (char const *, size_t, struct dfa *); - -/* Analyze a parsed regexp; second argument tells whether to build a searching - or an exact matcher. */ -extern void dfaanalyze (struct dfa *, int); - -/* Compute, for each possible character, the transitions out of a given - state, storing them in an array of integers. */ -extern void dfastate (ptrdiff_t, struct dfa *, ptrdiff_t []); - /* Error handling. */ /* dfawarn() is called by the regexp routines whenever a regex is compiled @@ -115,5 +128,3 @@ extern void dfawarn (const char *); takes a single argument, a NUL-terminated string describing the error. The user must supply a dfaerror. */ extern _Noreturn void dfaerror (const char *); - -extern int using_utf8 (void); diff --git a/contrib/grep/lib/dirent--.h b/contrib/grep/lib/dirent--.h deleted file mode 100644 index aa3f4929cb..0000000000 --- a/contrib/grep/lib/dirent--.h +++ /dev/null @@ -1,24 +0,0 @@ -/* Like dirent.h, but redefine some names to avoid glitches. - - Copyright (C) 2009-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* Written by Eric Blake. */ - -#include "dirent-safer.h" - -#undef opendir -#define opendir opendir_safer -#define GNULIB_defined_opendir 1 diff --git a/contrib/grep/lib/dirent-private.h b/contrib/grep/lib/dirent-private.h deleted file mode 100644 index aa01f6026d..0000000000 --- a/contrib/grep/lib/dirent-private.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Private details of the DIR type. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#ifndef _DIRENT_PRIVATE_H -#define _DIRENT_PRIVATE_H 1 - -#define WIN32_LEAN_AND_MEAN -#include - -struct gl_directory -{ - /* Status, or error code to produce in next readdir() call. - -2 means the end of the directory is already reached, - -1 means the entry was already filled by FindFirstFile, - 0 means the entry needs to be filled using FindNextFile. - A positive value is an error code. */ - int status; - /* Handle, reading the directory, at current position. */ - HANDLE current; - /* Found directory entry. */ - WIN32_FIND_DATA entry; - /* Argument to pass to FindFirstFile. It consists of the absolutized - directory name, followed by a directory separator and the wildcards. */ - char dir_name_mask[1]; -}; - -#endif /* _DIRENT_PRIVATE_H */ diff --git a/contrib/grep/lib/dirent-safer.h b/contrib/grep/lib/dirent-safer.h deleted file mode 100644 index da62b2e315..0000000000 --- a/contrib/grep/lib/dirent-safer.h +++ /dev/null @@ -1,22 +0,0 @@ -/* Invoke dirent-like functions, but avoid some glitches. - - Copyright (C) 2009-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* Written by Eric Blake. */ - -#include - -DIR *opendir_safer (const char *name); diff --git a/contrib/grep/lib/dirname-lgpl.c b/contrib/grep/lib/dirname-lgpl.c index 98391356fd..27d4374c2e 100644 --- a/contrib/grep/lib/dirname-lgpl.c +++ b/contrib/grep/lib/dirname-lgpl.c @@ -1,6 +1,6 @@ /* dirname.c -- return all but the last element in a file name - Copyright (C) 1990, 1998, 2000-2001, 2003-2006, 2009-2015 Free Software + Copyright (C) 1990, 1998, 2000-2001, 2003-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/dirname.h b/contrib/grep/lib/dirname.h index 55021ec5e6..8c12d93b51 100644 --- a/contrib/grep/lib/dirname.h +++ b/contrib/grep/lib/dirname.h @@ -1,6 +1,6 @@ /* Take file names apart into directory and base names. - Copyright (C) 1998, 2001, 2003-2006, 2009-2015 Free Software Foundation, + Copyright (C) 1998, 2001, 2003-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef DIRNAME_H_ # define DIRNAME_H_ 1 @@ -36,7 +36,7 @@ extern "C" { #endif # if GNULIB_DIRNAME -char *base_name (char const *file); +char *base_name (char const *file) _GL_ATTRIBUTE_MALLOC; char *dir_name (char const *file); # endif diff --git a/contrib/grep/lib/dosname.h b/contrib/grep/lib/dosname.h index 893baf6ccf..5782960094 100644 --- a/contrib/grep/lib/dosname.h +++ b/contrib/grep/lib/dosname.h @@ -1,6 +1,6 @@ /* File names on MS-DOS/Windows systems. - Copyright (C) 2000-2001, 2004-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2000-2001, 2004-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,16 +13,15 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . + along with this program. If not, see . From Paul Eggert and Jim Meyering. */ #ifndef _DOSNAME_H #define _DOSNAME_H -#if (defined _WIN32 || defined __WIN32__ || \ - defined __MSDOS__ || defined __CYGWIN__ || \ - defined __EMX__ || defined __DJGPP__) +#if (defined _WIN32 || defined __CYGWIN__ \ + || defined __EMX__ || defined __MSDOS__ || defined __DJGPP__) /* This internal macro assumes ASCII, but all hosts that support drive letters use ASCII. */ # define _IS_DRIVE_LETTER(C) (((unsigned int) (C) | ('a' - 'A')) - 'a' \ diff --git a/contrib/grep/lib/dup-safer.c b/contrib/grep/lib/dup-safer-flag.c similarity index 56% copy from contrib/grep/lib/dup-safer.c copy to contrib/grep/lib/dup-safer-flag.c index c6436cd9ba..b2c345e0ed 100644 --- a/contrib/grep/lib/dup-safer.c +++ b/contrib/grep/lib/dup-safer-flag.c @@ -1,6 +1,7 @@ -/* Invoke dup, but avoid some glitches. +/* Duplicate a file descriptor result, avoiding clobbering + STD{IN,OUT,ERR}_FILENO, with specific flags. - Copyright (C) 2001, 2004-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001, 2004-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,22 +14,25 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ -/* Written by Paul Eggert. */ +/* Written by Paul Eggert and Eric Blake. */ #include +/* Specification. */ #include "unistd-safer.h" #include #include /* Like dup, but do not return STDIN_FILENO, STDOUT_FILENO, or - STDERR_FILENO. */ + STDERR_FILENO. If FLAG contains O_CLOEXEC, behave like + fcntl(F_DUPFD_CLOEXEC) rather than fcntl(F_DUPFD). */ int -dup_safer (int fd) +dup_safer_flag (int fd, int flag) { - return fcntl (fd, F_DUPFD, STDERR_FILENO + 1); + return fcntl (fd, (flag & O_CLOEXEC) ? F_DUPFD_CLOEXEC : F_DUPFD, + STDERR_FILENO + 1); } diff --git a/contrib/grep/lib/dup-safer.c b/contrib/grep/lib/dup-safer.c index c6436cd9ba..4ada3fe8d2 100644 --- a/contrib/grep/lib/dup-safer.c +++ b/contrib/grep/lib/dup-safer.c @@ -1,6 +1,6 @@ /* Invoke dup, but avoid some glitches. - Copyright (C) 2001, 2004-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001, 2004-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert. */ diff --git a/contrib/grep/lib/dup.c b/contrib/grep/lib/dup.c deleted file mode 100644 index 20f0453405..0000000000 --- a/contrib/grep/lib/dup.c +++ /dev/null @@ -1,61 +0,0 @@ -/* Duplicate an open file descriptor. - - Copyright (C) 2011-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#include - -#include "msvc-inval.h" - -#undef dup - -#if HAVE_MSVC_INVALID_PARAMETER_HANDLER -static int -dup_nothrow (int fd) -{ - int result; - - TRY_MSVC_INVAL - { - result = dup (fd); - } - CATCH_MSVC_INVAL - { - result = -1; - errno = EBADF; - } - DONE_MSVC_INVAL; - - return result; -} -#else -# define dup_nothrow dup -#endif - -int -rpl_dup (int fd) -{ - int result = dup_nothrow (fd); -#if REPLACE_FCHDIR - if (result >= 0) - result = _gl_register_dup (fd, result); -#endif - return result; -} diff --git a/contrib/grep/lib/dup2.c b/contrib/grep/lib/dup2.c index 0e13214c82..88ef259131 100644 --- a/contrib/grep/lib/dup2.c +++ b/contrib/grep/lib/dup2.c @@ -1,6 +1,6 @@ /* Duplicate an open file descriptor to a specified file descriptor. - Copyright (C) 1999, 2004-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1999, 2004-2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* written by Paul Eggert */ @@ -29,16 +29,45 @@ # undef dup2 -# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ +# if defined _WIN32 && ! defined __CYGWIN__ /* Get declarations of the native Windows API functions. */ # define WIN32_LEAN_AND_MEAN # include -# include "msvc-inval.h" +# if HAVE_MSVC_INVALID_PARAMETER_HANDLER +# include "msvc-inval.h" +# endif /* Get _get_osfhandle. */ -# include "msvc-nothrow.h" +# if GNULIB_MSVC_NOTHROW +# include "msvc-nothrow.h" +# else +# include +# endif + +# if HAVE_MSVC_INVALID_PARAMETER_HANDLER +static int +dup2_nothrow (int fd, int desired_fd) +{ + int result; + + TRY_MSVC_INVAL + { + result = dup2 (fd, desired_fd); + } + CATCH_MSVC_INVAL + { + errno = EBADF; + result = -1; + } + DONE_MSVC_INVAL; + + return result; +} +# else +# define dup2_nothrow dup2 +# endif static int ms_windows_dup2 (int fd, int desired_fd) @@ -59,23 +88,14 @@ ms_windows_dup2 (int fd, int desired_fd) } /* Wine 1.0.1 return 0 when desired_fd is negative but not -1: - http://bugs.winehq.org/show_bug.cgi?id=21289 */ + https://bugs.winehq.org/show_bug.cgi?id=21289 */ if (desired_fd < 0) { errno = EBADF; return -1; } - TRY_MSVC_INVAL - { - result = dup2 (fd, desired_fd); - } - CATCH_MSVC_INVAL - { - errno = EBADF; - result = -1; - } - DONE_MSVC_INVAL; + result = dup2_nothrow (fd, desired_fd); if (result == 0) result = desired_fd; @@ -85,6 +105,57 @@ ms_windows_dup2 (int fd, int desired_fd) # define dup2 ms_windows_dup2 +# elif defined __KLIBC__ + +# include + +static int +klibc_dup2dirfd (int fd, int desired_fd) +{ + int tempfd; + int dupfd; + + tempfd = open ("NUL", O_RDONLY); + if (tempfd == -1) + return -1; + + if (tempfd == desired_fd) + { + close (tempfd); + + char path[_MAX_PATH]; + if (__libc_Back_ioFHToPath (fd, path, sizeof (path))) + return -1; + + return open(path, O_RDONLY); + } + + dupfd = klibc_dup2dirfd (fd, desired_fd); + + close (tempfd); + + return dupfd; +} + +static int +klibc_dup2 (int fd, int desired_fd) +{ + int dupfd; + struct stat sbuf; + + dupfd = dup2 (fd, desired_fd); + if (dupfd == -1 && errno == ENOTSUP \ + && !fstat (fd, &sbuf) && S_ISDIR (sbuf.st_mode)) + { + close (desired_fd); + + return klibc_dup2dirfd (fd, desired_fd); + } + + return dupfd; +} + +# define dup2 klibc_dup2 # endif int diff --git a/contrib/grep/lib/error.c b/contrib/grep/lib/error.c index 0ac7695185..3657b51cdf 100644 --- a/contrib/grep/lib/error.c +++ b/contrib/grep/lib/error.c @@ -1,5 +1,5 @@ /* Error handler for noninteractive utilities - Copyright (C) 1990-1998, 2000-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1990-1998, 2000-2007, 2009-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. This program is free software: you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by David MacKenzie . */ @@ -42,6 +42,8 @@ # define USE_UNLOCKED_IO 0 # define _GL_ATTRIBUTE_FORMAT_PRINTF(a, b) # define _GL_ARG_NONNULL(a) +#else +# include "getprogname.h" #endif #if USE_UNLOCKED_IO @@ -91,35 +93,37 @@ extern void __error_at_line (int status, int errnum, const char *file_name, # include # include -# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ +# if defined _WIN32 && ! defined __CYGWIN__ /* Get declarations of the native Windows API functions. */ # define WIN32_LEAN_AND_MEAN # include /* Get _get_osfhandle. */ -# include "msvc-nothrow.h" +# if GNULIB_MSVC_NOTHROW +# include "msvc-nothrow.h" +# else +# include +# endif # endif /* The gnulib override of fcntl is not needed in this file. */ # undef fcntl -# if !HAVE_DECL_STRERROR_R +# if !(GNULIB_STRERROR_R_POSIX || HAVE_DECL_STRERROR_R) # ifndef HAVE_DECL_STRERROR_R "this configure-time declaration test was not run" # endif # if STRERROR_R_CHAR_P -char *strerror_r (); +char *strerror_r (int errnum, char *buf, size_t buflen); # else -int strerror_r (); +int strerror_r (int errnum, char *buf, size_t buflen); # endif # endif -/* The calling program should define program_name and set it to the - name of the executing program. */ -extern char *program_name; +# define program_name getprogname () -# if HAVE_STRERROR_R || defined strerror_r +# if GNULIB_STRERROR_R_POSIX || HAVE_STRERROR_R || defined strerror_r # define __strerror_r strerror_r -# endif /* HAVE_STRERROR_R || defined strerror_r */ +# endif /* GNULIB_STRERROR_R_POSIX || HAVE_STRERROR_R || defined strerror_r */ #endif /* not _LIBC */ #if !_LIBC @@ -127,7 +131,7 @@ extern char *program_name; static int is_open (int fd) { -# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ +# if defined _WIN32 && ! defined __CYGWIN__ /* On native Windows: The initial state of unassigned standard file descriptors is that they are open but point to an INVALID_HANDLE_VALUE. There is no fcntl, and the gnulib replacement fcntl does not support @@ -172,9 +176,9 @@ print_errno_message (int errnum) { char const *s; -#if defined HAVE_STRERROR_R || _LIBC +#if _LIBC || GNULIB_STRERROR_R_POSIX || defined HAVE_STRERROR_R char errbuf[1024]; -# if _LIBC || STRERROR_R_CHAR_P +# if _LIBC || (!GNULIB_STRERROR_R_POSIX && STRERROR_R_CHAR_P) s = __strerror_r (errnum, errbuf, sizeof errbuf); # else if (__strerror_r (errnum, errbuf, sizeof errbuf) == 0) @@ -268,7 +272,6 @@ error_tail (int status, int errnum, const char *message, va_list args) else #endif vfprintf (stderr, message, args); - va_end (args); ++error_message_count; if (errnum) @@ -318,6 +321,7 @@ error (int status, int errnum, const char *message, ...) va_start (args, message); error_tail (status, errnum, message, args); + va_end (args); #ifdef _LIBC _IO_funlockfile (stderr); @@ -388,6 +392,7 @@ error_at_line (int status, int errnum, const char *file_name, va_start (args, message); error_tail (status, errnum, message, args); + va_end (args); #ifdef _LIBC _IO_funlockfile (stderr); diff --git a/contrib/grep/lib/error.h b/contrib/grep/lib/error.h index eb4fb70175..bad47a16dd 100644 --- a/contrib/grep/lib/error.h +++ b/contrib/grep/lib/error.h @@ -1,5 +1,5 @@ /* Declaration for error-reporting function - Copyright (C) 1995-1997, 2003, 2006, 2008-2015 Free Software Foundation, + Copyright (C) 1995-1997, 2003, 2006, 2008-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef _ERROR_H #define _ERROR_H 1 diff --git a/contrib/grep/lib/exclude.c b/contrib/grep/lib/exclude.c index 20dedf7e1b..c63c004f8f 100644 --- a/contrib/grep/lib/exclude.c +++ b/contrib/grep/lib/exclude.c @@ -1,6 +1,6 @@ /* exclude.c -- exclude file names - Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2015 Free Software + Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert and Sergey Poznyakoff . @@ -146,20 +146,20 @@ fnmatch_pattern_has_wildcards (const char *str, int options) { switch (*str++) { - case '.': - case '{': - case '}': - case '(': - case ')': - if (options & EXCLUDE_REGEX) - return true; - break; + case '.': + case '{': + case '}': + case '(': + case ')': + if (options & EXCLUDE_REGEX) + return true; + break; case '\\': - if (options & EXCLUDE_REGEX) - continue; - else - str += ! (options & FNM_NOESCAPE) && *str; + if (options & EXCLUDE_REGEX) + continue; + else + str += ! (options & FNM_NOESCAPE) && *str; break; case '+': case '@': case '!': @@ -287,10 +287,10 @@ free_exclude_segment (struct exclude_segment *seg) { case exclude_pattern: for (i = 0; i < seg->v.pat.exclude_count; i++) - { - if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX) - regfree (&seg->v.pat.exclude[i].v.re); - } + { + if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX) + regfree (&seg->v.pat.exclude[i].v.re); + } free (seg->v.pat.exclude); break; @@ -387,7 +387,7 @@ exclude_fnmatch (char const *pattern, char const *f, int options) if (! (options & EXCLUDE_ANCHORED)) for (p = f; *p && ! matched; p++) if (*p == '/' && p[1] != '/') - matched = ((*matcher) (pattern, p + 1, options) == 0); + matched = ((*matcher) (pattern, p + 1, options) == 0); return matched; } @@ -525,9 +525,9 @@ add_exclude (struct exclude *ex, char const *pattern, int options) && fnmatch_pattern_has_wildcards (pattern, options)) { if (! (ex->head && ex->head->type == exclude_pattern - && ((ex->head->options & EXCLUDE_INCLUDE) - == (options & EXCLUDE_INCLUDE)))) - new_exclude_segment (ex, exclude_pattern, options); + && ((ex->head->options & EXCLUDE_INCLUDE) + == (options & EXCLUDE_INCLUDE)))) + new_exclude_segment (ex, exclude_pattern, options); seg = ex->head; @@ -539,48 +539,48 @@ add_exclude (struct exclude *ex, char const *pattern, int options) patopts->options = options; if (options & EXCLUDE_REGEX) - { - int rc; - int cflags = REG_NOSUB|REG_EXTENDED| - ((options & FNM_CASEFOLD) ? REG_ICASE : 0); - - if (options & FNM_LEADING_DIR) - { - char *tmp; - size_t len = strlen (pattern); - - while (len > 0 && ISSLASH (pattern[len-1])) - --len; - - if (len == 0) - rc = 1; - else - { - tmp = xmalloc (len + 7); - memcpy (tmp, pattern, len); - strcpy (tmp + len, "(/.*)?"); - rc = regcomp (&patopts->v.re, tmp, cflags); - free (tmp); - } - } - else - rc = regcomp (&patopts->v.re, pattern, cflags); - - if (rc) - { - pat->exclude_count--; - return; - } - } + { + int rc; + int cflags = REG_NOSUB|REG_EXTENDED| + ((options & FNM_CASEFOLD) ? REG_ICASE : 0); + + if (options & FNM_LEADING_DIR) + { + char *tmp; + size_t len = strlen (pattern); + + while (len > 0 && ISSLASH (pattern[len-1])) + --len; + + if (len == 0) + rc = 1; + else + { + tmp = xmalloc (len + 7); + memcpy (tmp, pattern, len); + strcpy (tmp + len, "(/.*)?"); + rc = regcomp (&patopts->v.re, tmp, cflags); + free (tmp); + } + } + else + rc = regcomp (&patopts->v.re, pattern, cflags); + + if (rc) + { + pat->exclude_count--; + return; + } + } else - { - if (options & EXCLUDE_ALLOC) - { - pattern = xstrdup (pattern); - exclude_add_pattern_buffer (ex, (char*) pattern); - } - patopts->v.pattern = pattern; - } + { + if (options & EXCLUDE_ALLOC) + { + pattern = xstrdup (pattern); + exclude_add_pattern_buffer (ex, (char*) pattern); + } + patopts->v.pattern = pattern; + } } else { @@ -609,9 +609,9 @@ add_exclude (struct exclude *ex, char const *pattern, int options) int add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *), - struct exclude *ex, FILE *fp, int options, - char line_end, - void *data) + struct exclude *ex, FILE *fp, int options, + char line_end, + void *data) { char *buf = NULL; char *p; @@ -674,8 +674,8 @@ call_addfn (struct exclude *ex, char const *pattern, int options, void *data) int add_exclude_file (void (*add_func) (struct exclude *, char const *, int), - struct exclude *ex, char const *file_name, int options, - char line_end) + struct exclude *ex, char const *file_name, int options, + char line_end) { bool use_stdin = file_name[0] == '-' && !file_name[1]; FILE *in; diff --git a/contrib/grep/lib/exclude.h b/contrib/grep/lib/exclude.h index b522d35f60..32e7a36bd4 100644 --- a/contrib/grep/lib/exclude.h +++ b/contrib/grep/lib/exclude.h @@ -1,6 +1,6 @@ /* exclude.h -- declarations for excluding file names - Copyright (C) 1992-1994, 1997, 1999, 2001-2003, 2005-2006, 2009-2015 Free + Copyright (C) 1992-1994, 1997, 1999, 2001-2003, 2005-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef _GL_EXCLUDE_H #define _GL_EXCLUDE_H 1 @@ -48,13 +48,13 @@ struct exclude; bool fnmatch_pattern_has_wildcards (const char *, int) _GL_ATTRIBUTE_PURE; -struct exclude *new_exclude (void); +struct exclude *new_exclude (void) _GL_ATTRIBUTE_MALLOC; void free_exclude (struct exclude *); void add_exclude (struct exclude *, char const *, int); int add_exclude_file (void (*) (struct exclude *, char const *, int), struct exclude *, char const *, int, char); int add_exclude_fp (void (*) (struct exclude *, char const *, int, void *), - struct exclude *, FILE *, int, char, void *); + struct exclude *, FILE *, int, char, void *); bool excluded_file_name (struct exclude const *, char const *); void exclude_add_pattern_buffer (struct exclude *ex, char *buf); bool exclude_fnmatch (char const *, char const *, int); diff --git a/contrib/grep/lib/exitfail.c b/contrib/grep/lib/exitfail.c index 8035b8952c..189fa16f46 100644 --- a/contrib/grep/lib/exitfail.c +++ b/contrib/grep/lib/exitfail.c @@ -1,6 +1,6 @@ /* Failure exit status - Copyright (C) 2002-2003, 2005-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2003, 2005-2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/exitfail.h b/contrib/grep/lib/exitfail.h index 62d0ac5d9e..a69a03bbd5 100644 --- a/contrib/grep/lib/exitfail.h +++ b/contrib/grep/lib/exitfail.h @@ -1,6 +1,6 @@ /* Failure exit status - Copyright (C) 2002, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2002, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,6 +13,6 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ extern int volatile exit_failure; diff --git a/contrib/grep/lib/fchdir.c b/contrib/grep/lib/fchdir.c deleted file mode 100644 index 944d1bdf87..0000000000 --- a/contrib/grep/lib/fchdir.c +++ /dev/null @@ -1,208 +0,0 @@ -/* fchdir replacement. - Copyright (C) 2006-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "assure.h" -#include "dosname.h" -#include "filenamecat.h" - -#ifndef REPLACE_OPEN_DIRECTORY -# define REPLACE_OPEN_DIRECTORY 0 -#endif - -/* This replacement assumes that a directory is not renamed while opened - through a file descriptor. - - FIXME: On mingw, this would be possible to enforce if we were to - also open a HANDLE to each directory currently visited by a file - descriptor, since mingw refuses to rename any in-use file system - object. */ - -/* Array of file descriptors opened. If REPLACE_OPEN_DIRECTORY or if it points - to a directory, it stores info about this directory. */ -typedef struct -{ - char *name; /* Absolute name of the directory, or NULL. */ - /* FIXME - add a DIR* member to make dirfd possible on mingw? */ -} dir_info_t; -static dir_info_t *dirs; -static size_t dirs_allocated; - -/* Try to ensure dirs has enough room for a slot at index fd; free any - contents already in that slot. Return false and set errno to - ENOMEM on allocation failure. */ -static bool -ensure_dirs_slot (size_t fd) -{ - if (fd < dirs_allocated) - free (dirs[fd].name); - else - { - size_t new_allocated; - dir_info_t *new_dirs; - - new_allocated = 2 * dirs_allocated + 1; - if (new_allocated <= fd) - new_allocated = fd + 1; - new_dirs = - (dirs != NULL - ? (dir_info_t *) realloc (dirs, new_allocated * sizeof *dirs) - : (dir_info_t *) malloc (new_allocated * sizeof *dirs)); - if (new_dirs == NULL) - return false; - memset (new_dirs + dirs_allocated, 0, - (new_allocated - dirs_allocated) * sizeof *dirs); - dirs = new_dirs; - dirs_allocated = new_allocated; - } - return true; -} - -/* Return an absolute name of DIR in malloc'd storage. */ -static char * -get_name (char const *dir) -{ - char *cwd; - char *result; - int saved_errno; - - if (IS_ABSOLUTE_FILE_NAME (dir)) - return strdup (dir); - - /* We often encounter "."; treat it as a special case. */ - cwd = getcwd (NULL, 0); - if (!cwd || (dir[0] == '.' && dir[1] == '\0')) - return cwd; - - result = mfile_name_concat (cwd, dir, NULL); - saved_errno = errno; - free (cwd); - errno = saved_errno; - return result; -} - -/* Hook into the gnulib replacements for open() and close() to keep track - of the open file descriptors. */ - -/* Close FD, cleaning up any fd to name mapping if fd was visiting a - directory. */ -void -_gl_unregister_fd (int fd) -{ - if (fd >= 0 && fd < dirs_allocated) - { - free (dirs[fd].name); - dirs[fd].name = NULL; - } -} - -/* Mark FD as visiting FILENAME. FD must be non-negative, and refer - to an open file descriptor. If REPLACE_OPEN_DIRECTORY is non-zero, - this should only be called if FD is visiting a directory. Close FD - and return -1 if there is insufficient memory to track the - directory name; otherwise return FD. */ -int -_gl_register_fd (int fd, const char *filename) -{ - struct stat statbuf; - - assure (0 <= fd); - if (REPLACE_OPEN_DIRECTORY - || (fstat (fd, &statbuf) == 0 && S_ISDIR (statbuf.st_mode))) - { - if (!ensure_dirs_slot (fd) - || (dirs[fd].name = get_name (filename)) == NULL) - { - int saved_errno = errno; - close (fd); - errno = saved_errno; - return -1; - } - } - return fd; -} - -/* Mark NEWFD as a duplicate of OLDFD; useful from dup, dup2, dup3, - and fcntl. Both arguments must be valid and distinct file - descriptors. Close NEWFD and return -1 if OLDFD is tracking a - directory, but there is insufficient memory to track the same - directory in NEWFD; otherwise return NEWFD. */ -int -_gl_register_dup (int oldfd, int newfd) -{ - assure (0 <= oldfd && 0 <= newfd && oldfd != newfd); - if (oldfd < dirs_allocated && dirs[oldfd].name) - { - /* Duplicated a directory; must ensure newfd is allocated. */ - if (!ensure_dirs_slot (newfd) - || (dirs[newfd].name = strdup (dirs[oldfd].name)) == NULL) - { - int saved_errno = errno; - close (newfd); - errno = saved_errno; - newfd = -1; - } - } - else if (newfd < dirs_allocated) - { - /* Duplicated a non-directory; ensure newfd is cleared. */ - free (dirs[newfd].name); - dirs[newfd].name = NULL; - } - return newfd; -} - -/* If FD is currently visiting a directory, then return the name of - that directory. Otherwise, return NULL and set errno. */ -const char * -_gl_directory_name (int fd) -{ - if (0 <= fd && fd < dirs_allocated && dirs[fd].name != NULL) - return dirs[fd].name; - /* At this point, fd is either invalid, or open but not a directory. - If dup2 fails, errno is correctly EBADF. */ - if (0 <= fd) - { - if (dup2 (fd, fd) == fd) - errno = ENOTDIR; - } - else - errno = EBADF; - return NULL; -} - - -/* Implement fchdir() in terms of chdir(). */ - -int -fchdir (int fd) -{ - const char *name = _gl_directory_name (fd); - return name ? chdir (name) : -1; -} diff --git a/contrib/grep/lib/fcntl--.h b/contrib/grep/lib/fcntl--.h index c158db5450..b78f6ad523 100644 --- a/contrib/grep/lib/fcntl--.h +++ b/contrib/grep/lib/fcntl--.h @@ -1,6 +1,6 @@ /* Like fcntl.h, but redefine some names to avoid glitches. - Copyright (C) 2005, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2005, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert. */ diff --git a/contrib/grep/lib/fcntl-safer.h b/contrib/grep/lib/fcntl-safer.h index 9585c56f22..566747cdd6 100644 --- a/contrib/grep/lib/fcntl-safer.h +++ b/contrib/grep/lib/fcntl-safer.h @@ -1,6 +1,6 @@ /* Invoke fcntl-like functions, but avoid some glitches. - Copyright (C) 2005, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2005, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert. */ diff --git a/contrib/grep/lib/fcntl.c b/contrib/grep/lib/fcntl.c index 23b73e975c..6b9927ec4e 100644 --- a/contrib/grep/lib/fcntl.c +++ b/contrib/grep/lib/fcntl.c @@ -1,6 +1,6 @@ /* Provide file descriptor control. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Eric Blake . */ @@ -25,20 +25,25 @@ #include #include #include +#include #include -#if !HAVE_FCNTL -# define rpl_fcntl fcntl +#ifdef __KLIBC__ +# define INCL_DOS +# include #endif -#undef fcntl -#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ +#if defined _WIN32 && ! defined __CYGWIN__ /* Get declarations of the native Windows API functions. */ # define WIN32_LEAN_AND_MEAN # include /* Get _get_osfhandle. */ -# include "msvc-nothrow.h" +# if GNULIB_MSVC_NOTHROW +# include "msvc-nothrow.h" +# else +# include +# endif /* Upper bound on getdtablesize(). See lib/getdtablesize.c. */ # define OPEN_MAX_MAX 0x10000 @@ -162,6 +167,18 @@ dupfd (int oldfd, int newfd, int flags) } #endif /* W32 */ +/* Forward declarations, because we '#undef fcntl' in the middle of this + compilation unit. */ +/* Our implementation of fcntl (fd, F_DUPFD, target). */ +static int rpl_fcntl_DUPFD (int fd, int target); +/* Our implementation of fcntl (fd, F_DUPFD_CLOEXEC, target). */ +static int rpl_fcntl_DUPFD_CLOEXEC (int fd, int target); +#ifdef __KLIBC__ +/* Adds support for fcntl on directories. */ +static int klibc_fcntl (int fd, int action, /* arg */...); +#endif + + /* Perform the specified ACTION on the file descriptor FD, possibly using the argument ARG further described below. This replacement handles the following actions, and forwards all others on to the @@ -182,110 +199,35 @@ dupfd (int oldfd, int newfd, int flags) return -1 and set errno. */ int -rpl_fcntl (int fd, int action, /* arg */...) +fcntl (int fd, int action, /* arg */...) +#undef fcntl +#ifdef __KLIBC__ +# define fcntl klibc_fcntl +#endif { va_list arg; int result = -1; va_start (arg, action); switch (action) { - -#if !HAVE_FCNTL case F_DUPFD: { int target = va_arg (arg, int); - result = dupfd (fd, target, 0); + result = rpl_fcntl_DUPFD (fd, target); break; } -#elif FCNTL_DUPFD_BUGGY || REPLACE_FCHDIR - case F_DUPFD: - { - int target = va_arg (arg, int); - /* Detect invalid target; needed for cygwin 1.5.x. */ - if (target < 0 || getdtablesize () <= target) - errno = EINVAL; - else - { - /* Haiku alpha 2 loses fd flags on original. */ - int flags = fcntl (fd, F_GETFD); - if (flags < 0) - { - result = -1; - break; - } - result = fcntl (fd, action, target); - if (0 <= result && fcntl (fd, F_SETFD, flags) == -1) - { - int saved_errno = errno; - close (result); - result = -1; - errno = saved_errno; - } -# if REPLACE_FCHDIR - if (0 <= result) - result = _gl_register_dup (fd, result); -# endif - } - break; - } /* F_DUPFD */ -#endif /* FCNTL_DUPFD_BUGGY || REPLACE_FCHDIR */ case F_DUPFD_CLOEXEC: { int target = va_arg (arg, int); - -#if !HAVE_FCNTL - result = dupfd (fd, target, O_CLOEXEC); + result = rpl_fcntl_DUPFD_CLOEXEC (fd, target); break; -#else /* HAVE_FCNTL */ - /* Try the system call first, if the headers claim it exists - (that is, if GNULIB_defined_F_DUPFD_CLOEXEC is 0), since we - may be running with a glibc that has the macro but with an - older kernel that does not support it. Cache the - information on whether the system call really works, but - avoid caching failure if the corresponding F_DUPFD fails - for any reason. 0 = unknown, 1 = yes, -1 = no. */ - static int have_dupfd_cloexec = GNULIB_defined_F_DUPFD_CLOEXEC ? -1 : 0; - if (0 <= have_dupfd_cloexec) - { - result = fcntl (fd, action, target); - if (0 <= result || errno != EINVAL) - { - have_dupfd_cloexec = 1; -# if REPLACE_FCHDIR - if (0 <= result) - result = _gl_register_dup (fd, result); -# endif - } - else - { - result = rpl_fcntl (fd, F_DUPFD, target); - if (result < 0) - break; - have_dupfd_cloexec = -1; - } - } - else - result = rpl_fcntl (fd, F_DUPFD, target); - if (0 <= result && have_dupfd_cloexec == -1) - { - int flags = fcntl (result, F_GETFD); - if (flags < 0 || fcntl (result, F_SETFD, flags | FD_CLOEXEC) == -1) - { - int saved_errno = errno; - close (result); - errno = saved_errno; - result = -1; - } - } - break; -#endif /* HAVE_FCNTL */ - } /* F_DUPFD_CLOEXEC */ + } #if !HAVE_FCNTL case F_GETFD: { -# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ +# if defined _WIN32 && ! defined __CYGWIN__ HANDLE handle = (HANDLE) _get_osfhandle (fd); DWORD flags; if (handle == INVALID_HANDLE_VALUE @@ -314,8 +256,183 @@ rpl_fcntl (int fd, int action, /* arg */...) default: { #if HAVE_FCNTL - void *p = va_arg (arg, void *); - result = fcntl (fd, action, p); + switch (action) + { + #ifdef F_BARRIERFSYNC /* macOS */ + case F_BARRIERFSYNC: + #endif + #ifdef F_CHKCLEAN /* macOS */ + case F_CHKCLEAN: + #endif + #ifdef F_CLOSEM /* NetBSD, HP-UX */ + case F_CLOSEM: + #endif + #ifdef F_FLUSH_DATA /* macOS */ + case F_FLUSH_DATA: + #endif + #ifdef F_FREEZE_FS /* macOS */ + case F_FREEZE_FS: + #endif + #ifdef F_FULLFSYNC /* macOS */ + case F_FULLFSYNC: + #endif + #ifdef F_GETCONFINED /* macOS */ + case F_GETCONFINED: + #endif + #ifdef F_GETDEFAULTPROTLEVEL /* macOS */ + case F_GETDEFAULTPROTLEVEL: + #endif + #ifdef F_GETFD /* POSIX */ + case F_GETFD: + #endif + #ifdef F_GETFL /* POSIX */ + case F_GETFL: + #endif + #ifdef F_GETLEASE /* Linux */ + case F_GETLEASE: + #endif + #ifdef F_GETNOSIGPIPE /* macOS */ + case F_GETNOSIGPIPE: + #endif + #ifdef F_GETOWN /* POSIX */ + case F_GETOWN: + #endif + #ifdef F_GETPIPE_SZ /* Linux */ + case F_GETPIPE_SZ: + #endif + #ifdef F_GETPROTECTIONCLASS /* macOS */ + case F_GETPROTECTIONCLASS: + #endif + #ifdef F_GETPROTECTIONLEVEL /* macOS */ + case F_GETPROTECTIONLEVEL: + #endif + #ifdef F_GET_SEALS /* Linux */ + case F_GET_SEALS: + #endif + #ifdef F_GETSIG /* Linux */ + case F_GETSIG: + #endif + #ifdef F_MAXFD /* NetBSD */ + case F_MAXFD: + #endif + #ifdef F_RECYCLE /* macOS */ + case F_RECYCLE: + #endif + #ifdef F_SETFIFOENH /* HP-UX */ + case F_SETFIFOENH: + #endif + #ifdef F_THAW_FS /* macOS */ + case F_THAW_FS: + #endif + /* These actions take no argument. */ + result = fcntl (fd, action); + break; + + #ifdef F_ADD_SEALS /* Linux */ + case F_ADD_SEALS: + #endif + #ifdef F_BADFD /* Solaris */ + case F_BADFD: + #endif + #ifdef F_CHECK_OPENEVT /* macOS */ + case F_CHECK_OPENEVT: + #endif + #ifdef F_DUP2FD /* FreeBSD, AIX, Solaris */ + case F_DUP2FD: + #endif + #ifdef F_DUP2FD_CLOEXEC /* FreeBSD, Solaris */ + case F_DUP2FD_CLOEXEC: + #endif + #ifdef F_DUP2FD_CLOFORK /* Solaris */ + case F_DUP2FD_CLOFORK: + #endif + #ifdef F_DUPFD /* POSIX */ + case F_DUPFD: + #endif + #ifdef F_DUPFD_CLOEXEC /* POSIX */ + case F_DUPFD_CLOEXEC: + #endif + #ifdef F_DUPFD_CLOFORK /* Solaris */ + case F_DUPFD_CLOFORK: + #endif + #ifdef F_GETXFL /* Solaris */ + case F_GETXFL: + #endif + #ifdef F_GLOBAL_NOCACHE /* macOS */ + case F_GLOBAL_NOCACHE: + #endif + #ifdef F_MAKECOMPRESSED /* macOS */ + case F_MAKECOMPRESSED: + #endif + #ifdef F_MOVEDATAEXTENTS /* macOS */ + case F_MOVEDATAEXTENTS: + #endif + #ifdef F_NOCACHE /* macOS */ + case F_NOCACHE: + #endif + #ifdef F_NODIRECT /* macOS */ + case F_NODIRECT: + #endif + #ifdef F_NOTIFY /* Linux */ + case F_NOTIFY: + #endif + #ifdef F_OPLKACK /* IRIX */ + case F_OPLKACK: + #endif + #ifdef F_OPLKREG /* IRIX */ + case F_OPLKREG: + #endif + #ifdef F_RDAHEAD /* macOS */ + case F_RDAHEAD: + #endif + #ifdef F_SETBACKINGSTORE /* macOS */ + case F_SETBACKINGSTORE: + #endif + #ifdef F_SETCONFINED /* macOS */ + case F_SETCONFINED: + #endif + #ifdef F_SETFD /* POSIX */ + case F_SETFD: + #endif + #ifdef F_SETFL /* POSIX */ + case F_SETFL: + #endif + #ifdef F_SETLEASE /* Linux */ + case F_SETLEASE: + #endif + #ifdef F_SETNOSIGPIPE /* macOS */ + case F_SETNOSIGPIPE: + #endif + #ifdef F_SETOWN /* POSIX */ + case F_SETOWN: + #endif + #ifdef F_SETPIPE_SZ /* Linux */ + case F_SETPIPE_SZ: + #endif + #ifdef F_SETPROTECTIONCLASS /* macOS */ + case F_SETPROTECTIONCLASS: + #endif + #ifdef F_SETSIG /* Linux */ + case F_SETSIG: + #endif + #ifdef F_SINGLE_WRITER /* macOS */ + case F_SINGLE_WRITER: + #endif + /* These actions take an 'int' argument. */ + { + int x = va_arg (arg, int); + result = fcntl (fd, action, x); + } + break; + + default: + /* Other actions take a pointer argument. */ + { + void *p = va_arg (arg, void *); + result = fcntl (fd, action, p); + } + break; + } #else errno = EINVAL; #endif @@ -325,3 +442,186 @@ rpl_fcntl (int fd, int action, /* arg */...) va_end (arg); return result; } + +static int +rpl_fcntl_DUPFD (int fd, int target) +{ + int result; +#if !HAVE_FCNTL + result = dupfd (fd, target, 0); +#elif FCNTL_DUPFD_BUGGY || REPLACE_FCHDIR + /* Detect invalid target; needed for cygwin 1.5.x. */ + if (target < 0 || getdtablesize () <= target) + { + result = -1; + errno = EINVAL; + } + else + { + /* Haiku alpha 2 loses fd flags on original. */ + int flags = fcntl (fd, F_GETFD); + if (flags < 0) + result = -1; + else + { + result = fcntl (fd, F_DUPFD, target); + if (0 <= result && fcntl (fd, F_SETFD, flags) == -1) + { + int saved_errno = errno; + close (result); + result = -1; + errno = saved_errno; + } +# if REPLACE_FCHDIR + if (0 <= result) + result = _gl_register_dup (fd, result); +# endif + } + } +#else + result = fcntl (fd, F_DUPFD, target); +#endif + return result; +} + +static int +rpl_fcntl_DUPFD_CLOEXEC (int fd, int target) +{ + int result; +#if !HAVE_FCNTL + result = dupfd (fd, target, O_CLOEXEC); +#else /* HAVE_FCNTL */ +# if defined __HAIKU__ + /* On Haiku, the system fcntl (fd, F_DUPFD_CLOEXEC, target) sets + the FD_CLOEXEC flag on fd, not on target. Therefore avoid the + system fcntl in this case. */ +# define have_dupfd_cloexec -1 +# else + /* Try the system call first, if the headers claim it exists + (that is, if GNULIB_defined_F_DUPFD_CLOEXEC is 0), since we + may be running with a glibc that has the macro but with an + older kernel that does not support it. Cache the + information on whether the system call really works, but + avoid caching failure if the corresponding F_DUPFD fails + for any reason. 0 = unknown, 1 = yes, -1 = no. */ + static int have_dupfd_cloexec = GNULIB_defined_F_DUPFD_CLOEXEC ? -1 : 0; + if (0 <= have_dupfd_cloexec) + { + result = fcntl (fd, F_DUPFD_CLOEXEC, target); + if (0 <= result || errno != EINVAL) + { + have_dupfd_cloexec = 1; +# if REPLACE_FCHDIR + if (0 <= result) + result = _gl_register_dup (fd, result); +# endif + } + else + { + result = rpl_fcntl_DUPFD (fd, target); + if (result >= 0) + have_dupfd_cloexec = -1; + } + } + else +# endif + result = rpl_fcntl_DUPFD (fd, target); + if (0 <= result && have_dupfd_cloexec == -1) + { + int flags = fcntl (result, F_GETFD); + if (flags < 0 || fcntl (result, F_SETFD, flags | FD_CLOEXEC) == -1) + { + int saved_errno = errno; + close (result); + errno = saved_errno; + result = -1; + } + } +#endif /* HAVE_FCNTL */ + return result; +} + +#undef fcntl + +#ifdef __KLIBC__ + +static int +klibc_fcntl (int fd, int action, /* arg */...) +{ + va_list arg_ptr; + int arg; + struct stat sbuf; + int result; + + va_start (arg_ptr, action); + arg = va_arg (arg_ptr, int); + result = fcntl (fd, action, arg); + /* EPERM for F_DUPFD, ENOTSUP for others */ + if (result == -1 && (errno == EPERM || errno == ENOTSUP) + && !fstat (fd, &sbuf) && S_ISDIR (sbuf.st_mode)) + { + ULONG ulMode; + + switch (action) + { + case F_DUPFD: + /* Find available fd */ + while (fcntl (arg, F_GETFL) != -1 || errno != EBADF) + arg++; + + result = dup2 (fd, arg); + break; + + /* Using underlying APIs is right ? */ + case F_GETFD: + if (DosQueryFHState (fd, &ulMode)) + break; + + result = (ulMode & OPEN_FLAGS_NOINHERIT) ? FD_CLOEXEC : 0; + break; + + case F_SETFD: + if (arg & ~FD_CLOEXEC) + break; + + if (DosQueryFHState (fd, &ulMode)) + break; + + if (arg & FD_CLOEXEC) + ulMode |= OPEN_FLAGS_NOINHERIT; + else + ulMode &= ~OPEN_FLAGS_NOINHERIT; + + /* Filter supported flags. */ + ulMode &= (OPEN_FLAGS_WRITE_THROUGH | OPEN_FLAGS_FAIL_ON_ERROR + | OPEN_FLAGS_NO_CACHE | OPEN_FLAGS_NOINHERIT); + + if (DosSetFHState (fd, ulMode)) + break; + + result = 0; + break; + + case F_GETFL: + result = 0; + break; + + case F_SETFL: + if (arg != 0) + break; + + result = 0; + break; + + default: + errno = EINVAL; + break; + } + } + + va_end (arg_ptr); + + return result; +} + +#endif diff --git a/contrib/grep/lib/fd-hook.c b/contrib/grep/lib/fd-hook.c index b1488c4ce7..8840f63488 100644 --- a/contrib/grep/lib/fd-hook.c +++ b/contrib/grep/lib/fd-hook.c @@ -1,5 +1,5 @@ -/* Hook for making making file descriptor functions close(), ioctl() extensible. - Copyright (C) 2009-2015 Free Software Foundation, Inc. +/* Hook for making file descriptor functions close(), ioctl() extensible. + Copyright (C) 2009-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2009. This program is free software: you can redistribute it and/or modify it @@ -13,7 +13,7 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/fd-hook.h b/contrib/grep/lib/fd-hook.h index bdb9aefe83..ed1a15a230 100644 --- a/contrib/grep/lib/fd-hook.h +++ b/contrib/grep/lib/fd-hook.h @@ -1,5 +1,5 @@ -/* Hook for making making file descriptor functions close(), ioctl() extensible. - Copyright (C) 2009-2015 Free Software Foundation, Inc. +/* Hook for making file descriptor functions close(), ioctl() extensible. + Copyright (C) 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -12,7 +12,7 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef FD_HOOK_H diff --git a/contrib/grep/lib/fd-safer.c b/contrib/grep/lib/fd-safer-flag.c similarity index 54% copy from contrib/grep/lib/fd-safer.c copy to contrib/grep/lib/fd-safer-flag.c index 6c1fb2b60d..8fdf8dbc16 100644 --- a/contrib/grep/lib/fd-safer.c +++ b/contrib/grep/lib/fd-safer-flag.c @@ -1,6 +1,7 @@ -/* Return a safer copy of a file descriptor. +/* Adjust a file descriptor result so that it avoids clobbering + STD{IN,OUT,ERR}_FILENO, with specific flags. - Copyright (C) 2005-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,32 +14,34 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ -/* Written by Paul Eggert. */ +/* Written by Paul Eggert and Eric Blake. */ #include +/* Specification. */ #include "unistd-safer.h" #include #include /* Return FD, unless FD would be a copy of standard input, output, or - error; in that case, return a duplicate of FD, closing FD. On - failure to duplicate, close FD, set errno, and return -1. Preserve - errno if FD is negative, so that the caller can always inspect - errno when the returned value is negative. + error; in that case, return a duplicate of FD, closing FD. If FLAG + contains O_CLOEXEC, the returned FD will have close-on-exec + semantics. On failure to duplicate, close FD, set errno, and + return -1. Preserve errno if FD is negative, so that the caller + can always inspect errno when the returned value is negative. This function is usefully wrapped around functions that return file - descriptors, e.g., fd_safer (open ("file", O_RDONLY)). */ + descriptors, e.g., fd_safer_flag (open ("file", O_RDONLY | flag), flag). */ int -fd_safer (int fd) +fd_safer_flag (int fd, int flag) { if (STDIN_FILENO <= fd && fd <= STDERR_FILENO) { - int f = dup_safer (fd); + int f = dup_safer_flag (fd, flag); int e = errno; close (fd); errno = e; diff --git a/contrib/grep/lib/fd-safer.c b/contrib/grep/lib/fd-safer.c index 6c1fb2b60d..e011c00308 100644 --- a/contrib/grep/lib/fd-safer.c +++ b/contrib/grep/lib/fd-safer.c @@ -1,6 +1,6 @@ /* Return a safer copy of a file descriptor. - Copyright (C) 2005-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert. */ diff --git a/contrib/grep/lib/fdopendir.c b/contrib/grep/lib/fdopendir.c deleted file mode 100644 index 837a8219b3..0000000000 --- a/contrib/grep/lib/fdopendir.c +++ /dev/null @@ -1,213 +0,0 @@ -/* provide a replacement fdopendir function - Copyright (C) 2004-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* written by Jim Meyering */ - -#include - -#include - -#include -#include - -#if !HAVE_FDOPENDIR - -# include "openat.h" -# include "openat-priv.h" -# include "save-cwd.h" - -# if GNULIB_DIRENT_SAFER -# include "dirent--.h" -# endif - -# ifndef REPLACE_FCHDIR -# define REPLACE_FCHDIR 0 -# endif - -static DIR *fdopendir_with_dup (int, int, struct saved_cwd const *); -static DIR *fd_clone_opendir (int, struct saved_cwd const *); - -/* Replacement for POSIX fdopendir. - - First, try to simulate it via opendir ("/proc/self/fd/..."). Failing - that, simulate it by using fchdir metadata, or by doing - save_cwd/fchdir/opendir(".")/restore_cwd. - If either the save_cwd or the restore_cwd fails (relatively unlikely), - then give a diagnostic and exit nonzero. - - If successful, the resulting stream is based on FD in - implementations where streams are based on file descriptors and in - applications where no other thread or signal handler allocates or - frees file descriptors. In other cases, consult dirfd on the result - to find out whether FD is still being used. - - Otherwise, this function works just like POSIX fdopendir. - - W A R N I N G: - - Unlike other fd-related functions, this one places constraints on FD. - If this function returns successfully, FD is under control of the - dirent.h system, and the caller should not close or modify the state of - FD other than by the dirent.h functions. */ -DIR * -fdopendir (int fd) -{ - DIR *dir = fdopendir_with_dup (fd, -1, NULL); - - if (! REPLACE_FCHDIR && ! dir) - { - int saved_errno = errno; - if (EXPECTED_ERRNO (saved_errno)) - { - struct saved_cwd cwd; - if (save_cwd (&cwd) != 0) - openat_save_fail (errno); - dir = fdopendir_with_dup (fd, -1, &cwd); - saved_errno = errno; - free_cwd (&cwd); - errno = saved_errno; - } - } - - return dir; -} - -/* Like fdopendir, except that if OLDER_DUPFD is not -1, it is known - to be a dup of FD which is less than FD - 1 and which will be - closed by the caller and not otherwise used by the caller. This - function makes sure that FD is closed and all file descriptors less - than FD are open, and then calls fd_clone_opendir on a dup of FD. - That way, barring race conditions, fd_clone_opendir returns a - stream whose file descriptor is FD. - - If REPLACE_FCHDIR or CWD is null, use opendir ("/proc/self/fd/...", - falling back on fchdir metadata. Otherwise, CWD is a saved version - of the working directory; use fchdir/opendir(".")/restore_cwd(CWD). */ -static DIR * -fdopendir_with_dup (int fd, int older_dupfd, struct saved_cwd const *cwd) -{ - int dupfd = dup (fd); - if (dupfd < 0 && errno == EMFILE) - dupfd = older_dupfd; - if (dupfd < 0) - return NULL; - else - { - DIR *dir; - int saved_errno; - if (dupfd < fd - 1 && dupfd != older_dupfd) - { - dir = fdopendir_with_dup (fd, dupfd, cwd); - saved_errno = errno; - } - else - { - close (fd); - dir = fd_clone_opendir (dupfd, cwd); - saved_errno = errno; - if (! dir) - { - int fd1 = dup (dupfd); - if (fd1 != fd) - openat_save_fail (fd1 < 0 ? errno : EBADF); - } - } - - if (dupfd != older_dupfd) - close (dupfd); - errno = saved_errno; - return dir; - } -} - -/* Like fdopendir, except the result controls a clone of FD. It is - the caller's responsibility both to close FD and (if the result is - not null) to closedir the result. */ -static DIR * -fd_clone_opendir (int fd, struct saved_cwd const *cwd) -{ - if (REPLACE_FCHDIR || ! cwd) - { - DIR *dir = NULL; - int saved_errno = EOPNOTSUPP; - char buf[OPENAT_BUFFER_SIZE]; - char *proc_file = openat_proc_name (buf, fd, "."); - if (proc_file) - { - dir = opendir (proc_file); - saved_errno = errno; - if (proc_file != buf) - free (proc_file); - } -# if REPLACE_FCHDIR - if (! dir && EXPECTED_ERRNO (saved_errno)) - { - char const *name = _gl_directory_name (fd); - DIR *dp = name ? opendir (name) : NULL; - - /* The caller has done an elaborate dance to arrange for opendir to - consume just the right file descriptor. If dirfd returns -1, - though, we're on a system like mingw where opendir does not - consume a file descriptor. Consume it via 'dup' instead. */ - if (dp && dirfd (dp) < 0) - dup (fd); - - return dp; - } -# endif - errno = saved_errno; - return dir; - } - else - { - if (fchdir (fd) != 0) - return NULL; - else - { - DIR *dir = opendir ("."); - int saved_errno = errno; - if (restore_cwd (cwd) != 0) - openat_restore_fail (errno); - errno = saved_errno; - return dir; - } - } -} - -#else /* HAVE_FDOPENDIR */ - -# include -# include - -# undef fdopendir - -/* Like fdopendir, but work around GNU/Hurd bug by validating FD. */ - -DIR * -rpl_fdopendir (int fd) -{ - struct stat st; - if (fstat (fd, &st)) - return NULL; - if (!S_ISDIR (st.st_mode)) - { - errno = ENOTDIR; - return NULL; - } - return fdopendir (fd); -} - -#endif /* HAVE_FDOPENDIR */ diff --git a/contrib/grep/lib/filename.h b/contrib/grep/lib/filename.h index 457699315b..d4c70203e6 100644 --- a/contrib/grep/lib/filename.h +++ b/contrib/grep/lib/filename.h @@ -1,5 +1,5 @@ /* Basic filename support macros. - Copyright (C) 2001-2004, 2007-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2004, 2007-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef _FILENAME_H #define _FILENAME_H @@ -28,7 +28,7 @@ extern "C" { it may be concatenated to a directory pathname. IS_PATH_WITH_DIR(P) tests whether P contains a directory specification. */ -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ +#if defined _WIN32 || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ /* Native Windows, Cygwin, OS/2, DOS */ # define ISSLASH(C) ((C) == '/' || (C) == '\\') # define HAS_DEVICE(P) \ diff --git a/contrib/grep/lib/filenamecat-lgpl.c b/contrib/grep/lib/filenamecat-lgpl.c index a359a5167d..6f666f2abb 100644 --- a/contrib/grep/lib/filenamecat-lgpl.c +++ b/contrib/grep/lib/filenamecat-lgpl.c @@ -1,6 +1,6 @@ /* Concatenate two arbitrary file names. - Copyright (C) 1996-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1996-2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering. */ @@ -31,55 +31,54 @@ # define mempcpy(D, S, N) ((void *) ((char *) memcpy (D, S, N) + (N))) #endif -/* Return the longest suffix of F that is a relative file name. - If it has no such suffix, return the empty string. */ - -static char const * _GL_ATTRIBUTE_PURE -longest_relative_suffix (char const *f) -{ - for (f += FILE_SYSTEM_PREFIX_LEN (f); ISSLASH (*f); f++) - continue; - return f; -} - -/* Concatenate two file name components, DIR and ABASE, in +/* Concatenate two file name components, DIR and BASE, in newly-allocated storage and return the result. The resulting file name F is such that the commands "ls F" and "(cd - DIR; ls BASE)" refer to the same file, where BASE is ABASE with any - file system prefixes and leading separators removed. - Arrange for a directory separator if necessary between DIR and BASE - in the result, removing any redundant separators. + DIR; ls ./BASE)" refer to the same file. If necessary, put + a separator between DIR and BASE in the result. Typically this + separator is "/", but in rare cases it might be ".". In any case, if BASE_IN_RESULT is non-NULL, set - *BASE_IN_RESULT to point to the copy of ABASE in the returned - concatenation. However, if ABASE begins with more than one slash, - set *BASE_IN_RESULT to point to the sole corresponding slash that - is copied into the result buffer. + *BASE_IN_RESULT to point to the copy of BASE at the end of the + returned concatenation. Return NULL if malloc fails. */ char * -mfile_name_concat (char const *dir, char const *abase, char **base_in_result) +mfile_name_concat (char const *dir, char const *base, char **base_in_result) { char const *dirbase = last_component (dir); size_t dirbaselen = base_len (dirbase); size_t dirlen = dirbase - dir + dirbaselen; - size_t needs_separator = (dirbaselen && ! ISSLASH (dirbase[dirbaselen - 1])); - - char const *base = longest_relative_suffix (abase); size_t baselen = strlen (base); - - char *p_concat = malloc (dirlen + needs_separator + baselen + 1); + char sep = '\0'; + if (dirbaselen) + { + /* DIR is not a file system root, so separate with / if needed. */ + if (! ISSLASH (dir[dirlen - 1]) && ! ISSLASH (*base)) + sep = '/'; + } + else if (ISSLASH (*base)) + { + /* DIR is a file system root and BASE begins with a slash, so + separate with ".". For example, if DIR is "/" and BASE is + "/foo" then return "/./foo", as "//foo" would be wrong on + some POSIX systems. A fancier algorithm could omit "." in + some cases but is not worth the trouble. */ + sep = '.'; + } + + char *p_concat = malloc (dirlen + (sep != '\0') + baselen + 1); char *p; if (p_concat == NULL) return NULL; p = mempcpy (p_concat, dir, dirlen); - *p = DIRECTORY_SEPARATOR; - p += needs_separator; + *p = sep; + p += sep != '\0'; if (base_in_result) - *base_in_result = p - IS_ABSOLUTE_FILE_NAME (abase); + *base_in_result = p; p = mempcpy (p, base, baselen); *p = '\0'; diff --git a/contrib/grep/lib/filenamecat.h b/contrib/grep/lib/filenamecat.h index 62f81b007f..13387b40b5 100644 --- a/contrib/grep/lib/filenamecat.h +++ b/contrib/grep/lib/filenamecat.h @@ -1,6 +1,6 @@ /* Concatenate two arbitrary file names. - Copyright (C) 1996-1997, 2003, 2005, 2007, 2009-2015 Free Software + Copyright (C) 1996-1997, 2003, 2005, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering. */ diff --git a/contrib/grep/lib/flexmember.h b/contrib/grep/lib/flexmember.h new file mode 100644 index 0000000000..0dc77c3c84 --- /dev/null +++ b/contrib/grep/lib/flexmember.h @@ -0,0 +1,60 @@ +/* Sizes of structs with flexible array members. + + Copyright 2016-2020 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . + + Written by Paul Eggert. */ + +#include + +/* Nonzero multiple of alignment of TYPE, suitable for FLEXSIZEOF below. + On older platforms without _Alignof, use a pessimistic bound that is + safe in practice even if FLEXIBLE_ARRAY_MEMBER is 1. + On newer platforms, use _Alignof to get a tighter bound. */ + +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112 +# define FLEXALIGNOF(type) (sizeof (type) & ~ (sizeof (type) - 1)) +#else +# define FLEXALIGNOF(type) _Alignof (type) +#endif + +/* Yield a properly aligned upper bound on the size of a struct of + type TYPE with a flexible array member named MEMBER that is + followed by N bytes of other data. The result is suitable as an + argument to malloc. For example: + + struct s { int n; char d[FLEXIBLE_ARRAY_MEMBER]; }; + struct s *p = malloc (FLEXSIZEOF (struct s, d, n * sizeof (char))); + + FLEXSIZEOF (TYPE, MEMBER, N) is not simply (sizeof (TYPE) + N), + since FLEXIBLE_ARRAY_MEMBER may be 1 on pre-C11 platforms. Nor is + it simply (offsetof (TYPE, MEMBER) + N), as that might yield a size + that causes malloc to yield a pointer that is not properly aligned + for TYPE; for example, if sizeof (int) == alignof (int) == 4, + malloc (offsetof (struct s, d) + 3 * sizeof (char)) is equivalent + to malloc (7) and might yield a pointer that is not a multiple of 4 + (which means the pointer is not properly aligned for struct s), + whereas malloc (FLEXSIZEOF (struct s, d, 3 * sizeof (char))) is + equivalent to malloc (8) and must yield a pointer that is a + multiple of 4. + + Yield a value less than N if and only if arithmetic overflow occurs. */ + +#define FLEXSIZEOF(type, member, n) \ + ((offsetof (type, member) + FLEXALIGNOF (type) - 1 + (n)) \ + & ~ (FLEXALIGNOF (type) - 1)) diff --git a/contrib/grep/lib/fnmatch.c b/contrib/grep/lib/fnmatch.c deleted file mode 100644 index f4a9e7c674..0000000000 --- a/contrib/grep/lib/fnmatch.c +++ /dev/null @@ -1,350 +0,0 @@ -/* Copyright (C) 1991-1993, 1996-2007, 2009-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see . */ - -#ifndef _LIBC -# include -#endif - -/* Enable GNU extensions in fnmatch.h. */ -#ifndef _GNU_SOURCE -# define _GNU_SOURCE 1 -#endif - -#if ! defined __builtin_expect && __GNUC__ < 3 -# define __builtin_expect(expr, expected) (expr) -#endif - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#define WIDE_CHAR_SUPPORT \ - (HAVE_WCTYPE_H && HAVE_BTOWC && HAVE_ISWCTYPE \ - && HAVE_WMEMCHR && (HAVE_WMEMCPY || HAVE_WMEMPCPY)) - -/* For platform which support the ISO C amendment 1 functionality we - support user defined character classes. */ -#if defined _LIBC || WIDE_CHAR_SUPPORT -# include -# include -#endif - -/* We need some of the locale data (the collation sequence information) - but there is no interface to get this information in general. Therefore - we support a correct implementation only in glibc. */ -#ifdef _LIBC -# include "../locale/localeinfo.h" -# include "../locale/elem-hash.h" -# include "../locale/coll-lookup.h" -# include - -# define CONCAT(a,b) __CONCAT(a,b) -# define mbsrtowcs __mbsrtowcs -# define fnmatch __fnmatch -extern int fnmatch (const char *pattern, const char *string, int flags); -#endif - -#ifndef SIZE_MAX -# define SIZE_MAX ((size_t) -1) -#endif - -/* We often have to test for FNM_FILE_NAME and FNM_PERIOD being both set. */ -#define NO_LEADING_PERIOD(flags) \ - ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD)) - -/* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself, and have not detected a bug - in the library. This code is part of the GNU C - Library, but also included in many other GNU distributions. Compiling - and linking in this code is a waste when using the GNU C library - (especially if it is a shared library). Rather than having every GNU - program understand 'configure --with-gnu-libc' and omit the object files, - it is simpler to just do this in the source for each such file. */ - -#if defined _LIBC || !defined __GNU_LIBRARY__ || !HAVE_FNMATCH_GNU - - -# if ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK)) -# define isblank(c) ((c) == ' ' || (c) == '\t') -# endif - -# define STREQ(s1, s2) (strcmp (s1, s2) == 0) - -# if defined _LIBC || WIDE_CHAR_SUPPORT -/* The GNU C library provides support for user-defined character classes - and the functions from ISO C amendment 1. */ -# ifdef CHARCLASS_NAME_MAX -# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX -# else -/* This shouldn't happen but some implementation might still have this - problem. Use a reasonable default value. */ -# define CHAR_CLASS_MAX_LENGTH 256 -# endif - -# ifdef _LIBC -# define IS_CHAR_CLASS(string) __wctype (string) -# else -# define IS_CHAR_CLASS(string) wctype (string) -# endif - -# ifdef _LIBC -# define ISWCTYPE(WC, WT) __iswctype (WC, WT) -# else -# define ISWCTYPE(WC, WT) iswctype (WC, WT) -# endif - -# if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC -/* In this case we are implementing the multibyte character handling. */ -# define HANDLE_MULTIBYTE 1 -# endif - -# else -# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, 'xdigit'. */ - -# define IS_CHAR_CLASS(string) \ - (STREQ (string, "alpha") || STREQ (string, "upper") \ - || STREQ (string, "lower") || STREQ (string, "digit") \ - || STREQ (string, "alnum") || STREQ (string, "xdigit") \ - || STREQ (string, "space") || STREQ (string, "print") \ - || STREQ (string, "punct") || STREQ (string, "graph") \ - || STREQ (string, "cntrl") || STREQ (string, "blank")) -# endif - -/* Avoid depending on library functions or files - whose names are inconsistent. */ - -/* Global variable. */ -static int posixly_correct; - -# ifndef internal_function -/* Inside GNU libc we mark some function in a special way. In other - environments simply ignore the marking. */ -# define internal_function -# endif - -/* Note that this evaluates C many times. */ -# define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c)) -# define CHAR char -# define UCHAR unsigned char -# define INT int -# define FCT internal_fnmatch -# define EXT ext_match -# define END end_pattern -# define L_(CS) CS -# ifdef _LIBC -# define BTOWC(C) __btowc (C) -# else -# define BTOWC(C) btowc (C) -# endif -# define STRLEN(S) strlen (S) -# define STRCAT(D, S) strcat (D, S) -# ifdef _LIBC -# define MEMPCPY(D, S, N) __mempcpy (D, S, N) -# else -# if HAVE_MEMPCPY -# define MEMPCPY(D, S, N) mempcpy (D, S, N) -# else -# define MEMPCPY(D, S, N) ((void *) ((char *) memcpy (D, S, N) + (N))) -# endif -# endif -# define MEMCHR(S, C, N) memchr (S, C, N) -# include "fnmatch_loop.c" - - -# if HANDLE_MULTIBYTE -# define FOLD(c) ((flags & FNM_CASEFOLD) ? towlower (c) : (c)) -# define CHAR wchar_t -# define UCHAR wint_t -# define INT wint_t -# define FCT internal_fnwmatch -# define EXT ext_wmatch -# define END end_wpattern -# define L_(CS) L##CS -# define BTOWC(C) (C) -# ifdef _LIBC -# define STRLEN(S) __wcslen (S) -# define STRCAT(D, S) __wcscat (D, S) -# define MEMPCPY(D, S, N) __wmempcpy (D, S, N) -# else -# define STRLEN(S) wcslen (S) -# define STRCAT(D, S) wcscat (D, S) -# if HAVE_WMEMPCPY -# define MEMPCPY(D, S, N) wmempcpy (D, S, N) -# else -# define MEMPCPY(D, S, N) (wmemcpy (D, S, N) + (N)) -# endif -# endif -# define MEMCHR(S, C, N) wmemchr (S, C, N) -# define WIDE_CHAR_VERSION 1 - -# undef IS_CHAR_CLASS -/* We have to convert the wide character string in a multibyte string. But - we know that the character class names consist of alphanumeric characters - from the portable character set, and since the wide character encoding - for a member of the portable character set is the same code point as - its single-byte encoding, we can use a simplified method to convert the - string to a multibyte character string. */ -static wctype_t -is_char_class (const wchar_t *wcs) -{ - char s[CHAR_CLASS_MAX_LENGTH + 1]; - char *cp = s; - - do - { - /* Test for a printable character from the portable character set. */ -# ifdef _LIBC - if (*wcs < 0x20 || *wcs > 0x7e - || *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60) - return (wctype_t) 0; -# else - switch (*wcs) - { - case L' ': case L'!': case L'"': case L'#': case L'%': - case L'&': case L'\'': case L'(': case L')': case L'*': - case L'+': case L',': case L'-': case L'.': case L'/': - case L'0': case L'1': case L'2': case L'3': case L'4': - case L'5': case L'6': case L'7': case L'8': case L'9': - case L':': case L';': case L'<': case L'=': case L'>': - case L'?': - case L'A': case L'B': case L'C': case L'D': case L'E': - case L'F': case L'G': case L'H': case L'I': case L'J': - case L'K': case L'L': case L'M': case L'N': case L'O': - case L'P': case L'Q': case L'R': case L'S': case L'T': - case L'U': case L'V': case L'W': case L'X': case L'Y': - case L'Z': - case L'[': case L'\\': case L']': case L'^': case L'_': - case L'a': case L'b': case L'c': case L'd': case L'e': - case L'f': case L'g': case L'h': case L'i': case L'j': - case L'k': case L'l': case L'm': case L'n': case L'o': - case L'p': case L'q': case L'r': case L's': case L't': - case L'u': case L'v': case L'w': case L'x': case L'y': - case L'z': case L'{': case L'|': case L'}': case L'~': - break; - default: - return (wctype_t) 0; - } -# endif - - /* Avoid overrunning the buffer. */ - if (cp == s + CHAR_CLASS_MAX_LENGTH) - return (wctype_t) 0; - - *cp++ = (char) *wcs++; - } - while (*wcs != L'\0'); - - *cp = '\0'; - -# ifdef _LIBC - return __wctype (s); -# else - return wctype (s); -# endif -} -# define IS_CHAR_CLASS(string) is_char_class (string) - -# include "fnmatch_loop.c" -# endif - - -int -fnmatch (const char *pattern, const char *string, int flags) -{ -# if HANDLE_MULTIBYTE -# define ALLOCA_LIMIT 2000 - if (__builtin_expect (MB_CUR_MAX, 1) != 1) - { - mbstate_t ps; - size_t patsize; - size_t strsize; - size_t totsize; - wchar_t *wpattern; - wchar_t *wstring; - int res; - - /* Calculate the size needed to convert the strings to - wide characters. */ - memset (&ps, '\0', sizeof (ps)); - patsize = mbsrtowcs (NULL, &pattern, 0, &ps) + 1; - if (__builtin_expect (patsize != 0, 1)) - { - assert (mbsinit (&ps)); - strsize = mbsrtowcs (NULL, &string, 0, &ps) + 1; - if (__builtin_expect (strsize != 0, 1)) - { - assert (mbsinit (&ps)); - totsize = patsize + strsize; - if (__builtin_expect (! (patsize <= totsize - && totsize <= SIZE_MAX / sizeof (wchar_t)), - 0)) - { - errno = ENOMEM; - return -1; - } - - /* Allocate room for the wide characters. */ - if (__builtin_expect (totsize < ALLOCA_LIMIT, 1)) - wpattern = (wchar_t *) alloca (totsize * sizeof (wchar_t)); - else - { - wpattern = malloc (totsize * sizeof (wchar_t)); - if (__builtin_expect (! wpattern, 0)) - { - errno = ENOMEM; - return -1; - } - } - wstring = wpattern + patsize; - - /* Convert the strings into wide characters. */ - mbsrtowcs (wpattern, &pattern, patsize, &ps); - assert (mbsinit (&ps)); - mbsrtowcs (wstring, &string, strsize, &ps); - - res = internal_fnwmatch (wpattern, wstring, wstring + strsize - 1, - flags & FNM_PERIOD, flags); - - if (__builtin_expect (! (totsize < ALLOCA_LIMIT), 0)) - free (wpattern); - return res; - } - } - } - -# endif /* HANDLE_MULTIBYTE */ - - return internal_fnmatch (pattern, string, string + strlen (string), - flags & FNM_PERIOD, flags); -} - -# ifdef _LIBC -# undef fnmatch -versioned_symbol (libc, __fnmatch, fnmatch, GLIBC_2_2_3); -# if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3) -strong_alias (__fnmatch, __fnmatch_old) -compat_symbol (libc, __fnmatch_old, fnmatch, GLIBC_2_0); -# endif -libc_hidden_ver (__fnmatch, fnmatch) -# endif - -#endif /* _LIBC or not __GNU_LIBRARY__. */ diff --git a/contrib/grep/lib/fnmatch_loop.c b/contrib/grep/lib/fnmatch_loop.c deleted file mode 100644 index 61778bdd57..0000000000 --- a/contrib/grep/lib/fnmatch_loop.c +++ /dev/null @@ -1,1219 +0,0 @@ -/* Copyright (C) 1991-1993, 1996-2006, 2009-2015 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see . */ - -/* Match STRING against the file name pattern PATTERN, returning zero if - it matches, nonzero if not. */ -static int EXT (INT opt, const CHAR *pattern, const CHAR *string, - const CHAR *string_end, bool no_leading_period, int flags) - internal_function; -static const CHAR *END (const CHAR *patternp) internal_function; - -static int -internal_function -FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, - bool no_leading_period, int flags) -{ - register const CHAR *p = pattern, *n = string; - register UCHAR c; -#ifdef _LIBC -# if WIDE_CHAR_VERSION - const char *collseq = (const char *) - _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); -# else - const UCHAR *collseq = (const UCHAR *) - _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); -# endif -#endif - - while ((c = *p++) != L_('\0')) - { - bool new_no_leading_period = false; - c = FOLD (c); - - switch (c) - { - case L_('?'): - if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') - { - int res; - - res = EXT (c, p, n, string_end, no_leading_period, - flags); - if (res != -1) - return res; - } - - if (n == string_end) - return FNM_NOMATCH; - else if (*n == L_('/') && (flags & FNM_FILE_NAME)) - return FNM_NOMATCH; - else if (*n == L_('.') && no_leading_period) - return FNM_NOMATCH; - break; - - case L_('\\'): - if (!(flags & FNM_NOESCAPE)) - { - c = *p++; - if (c == L_('\0')) - /* Trailing \ loses. */ - return FNM_NOMATCH; - c = FOLD (c); - } - if (n == string_end || FOLD ((UCHAR) *n) != c) - return FNM_NOMATCH; - break; - - case L_('*'): - if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') - { - int res; - - res = EXT (c, p, n, string_end, no_leading_period, - flags); - if (res != -1) - return res; - } - - if (n != string_end && *n == L_('.') && no_leading_period) - return FNM_NOMATCH; - - for (c = *p++; c == L_('?') || c == L_('*'); c = *p++) - { - if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0) - { - const CHAR *endp = END (p); - if (endp != p) - { - /* This is a pattern. Skip over it. */ - p = endp; - continue; - } - } - - if (c == L_('?')) - { - /* A ? needs to match one character. */ - if (n == string_end) - /* There isn't another character; no match. */ - return FNM_NOMATCH; - else if (*n == L_('/') - && __builtin_expect (flags & FNM_FILE_NAME, 0)) - /* A slash does not match a wildcard under - FNM_FILE_NAME. */ - return FNM_NOMATCH; - else - /* One character of the string is consumed in matching - this ? wildcard, so *??? won't match if there are - less than three characters. */ - ++n; - } - } - - if (c == L_('\0')) - /* The wildcard(s) is/are the last element of the pattern. - If the name is a file name and contains another slash - this means it cannot match, unless the FNM_LEADING_DIR - flag is set. */ - { - int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; - - if (flags & FNM_FILE_NAME) - { - if (flags & FNM_LEADING_DIR) - result = 0; - else - { - if (MEMCHR (n, L_('/'), string_end - n) == NULL) - result = 0; - } - } - - return result; - } - else - { - const CHAR *endp; - - endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'), - string_end - n); - if (endp == NULL) - endp = string_end; - - if (c == L_('[') - || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0 - && (c == L_('@') || c == L_('+') || c == L_('!')) - && *p == L_('('))) - { - int flags2 = ((flags & FNM_FILE_NAME) - ? flags : (flags & ~FNM_PERIOD)); - bool no_leading_period2 = no_leading_period; - - for (--p; n < endp; ++n, no_leading_period2 = false) - if (FCT (p, n, string_end, no_leading_period2, flags2) - == 0) - return 0; - } - else if (c == L_('/') && (flags & FNM_FILE_NAME)) - { - while (n < string_end && *n != L_('/')) - ++n; - if (n < string_end && *n == L_('/') - && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags) - == 0)) - return 0; - } - else - { - int flags2 = ((flags & FNM_FILE_NAME) - ? flags : (flags & ~FNM_PERIOD)); - int no_leading_period2 = no_leading_period; - - if (c == L_('\\') && !(flags & FNM_NOESCAPE)) - c = *p; - c = FOLD (c); - for (--p; n < endp; ++n, no_leading_period2 = false) - if (FOLD ((UCHAR) *n) == c - && (FCT (p, n, string_end, no_leading_period2, flags2) - == 0)) - return 0; - } - } - - /* If we come here no match is possible with the wildcard. */ - return FNM_NOMATCH; - - case L_('['): - { - /* Nonzero if the sense of the character class is inverted. */ - const CHAR *p_init = p; - const CHAR *n_init = n; - register bool not; - CHAR cold; - UCHAR fn; - - if (posixly_correct == 0) - posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; - - if (n == string_end) - return FNM_NOMATCH; - - if (*n == L_('.') && no_leading_period) - return FNM_NOMATCH; - - if (*n == L_('/') && (flags & FNM_FILE_NAME)) - /* '/' cannot be matched. */ - return FNM_NOMATCH; - - not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^'))); - if (not) - ++p; - - fn = FOLD ((UCHAR) *n); - - c = *p++; - for (;;) - { - bool is_range = false; - - if (!(flags & FNM_NOESCAPE) && c == L_('\\')) - { - if (*p == L_('\0')) - return FNM_NOMATCH; - c = FOLD ((UCHAR) *p); - ++p; - - goto normal_bracket; - } - else if (c == L_('[') && *p == L_(':')) - { - /* Leave room for the null. */ - CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; - size_t c1 = 0; -#if defined _LIBC || WIDE_CHAR_SUPPORT - wctype_t wt; -#endif - const CHAR *startp = p; - - for (;;) - { - if (c1 == CHAR_CLASS_MAX_LENGTH) - /* The name is too long and therefore the pattern - is ill-formed. */ - return FNM_NOMATCH; - - c = *++p; - if (c == L_(':') && p[1] == L_(']')) - { - p += 2; - break; - } - if (c < L_('a') || c >= L_('z')) - { - /* This cannot possibly be a character class name. - Match it as a normal range. */ - p = startp; - c = L_('['); - goto normal_bracket; - } - str[c1++] = c; - } - str[c1] = L_('\0'); - -#if defined _LIBC || WIDE_CHAR_SUPPORT - wt = IS_CHAR_CLASS (str); - if (wt == 0) - /* Invalid character class name. */ - return FNM_NOMATCH; - -# if defined _LIBC && ! WIDE_CHAR_VERSION - /* The following code is glibc specific but does - there a good job in speeding up the code since - we can avoid the btowc() call. */ - if (_ISCTYPE ((UCHAR) *n, wt)) - goto matched; -# else - if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) - goto matched; -# endif -#else - if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n)) - || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n)) - || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n)) - || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n)) - || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n)) - || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n)) - || (STREQ (str, L_("lower")) && islower ((UCHAR) *n)) - || (STREQ (str, L_("print")) && isprint ((UCHAR) *n)) - || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n)) - || (STREQ (str, L_("space")) && isspace ((UCHAR) *n)) - || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n)) - || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n))) - goto matched; -#endif - c = *p++; - } -#ifdef _LIBC - else if (c == L_('[') && *p == L_('=')) - { - UCHAR str[1]; - uint32_t nrules = - _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - const CHAR *startp = p; - - c = *++p; - if (c == L_('\0')) - { - p = startp; - c = L_('['); - goto normal_bracket; - } - str[0] = c; - - c = *++p; - if (c != L_('=') || p[1] != L_(']')) - { - p = startp; - c = L_('['); - goto normal_bracket; - } - p += 2; - - if (nrules == 0) - { - if ((UCHAR) *n == str[0]) - goto matched; - } - else - { - const int32_t *table; -# if WIDE_CHAR_VERSION - const int32_t *weights; - const int32_t *extra; -# else - const unsigned char *weights; - const unsigned char *extra; -# endif - const int32_t *indirect; - int32_t idx; - const UCHAR *cp = (const UCHAR *) str; - - /* This #include defines a local function! */ -# if WIDE_CHAR_VERSION -# include -# else -# include -# endif - -# if WIDE_CHAR_VERSION - table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); - weights = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); - extra = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); - indirect = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); -# else - table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - weights = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); - indirect = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); -# endif - - idx = findidx (&cp); - if (idx != 0) - { - /* We found a table entry. Now see whether the - character we are currently at has the same - equivalence class value. */ - int len = weights[idx & 0xffffff]; - int32_t idx2; - const UCHAR *np = (const UCHAR *) n; - - idx2 = findidx (&np); - if (idx2 != 0 - && (idx >> 24) == (idx2 >> 24) - && len == weights[idx2 & 0xffffff]) - { - int cnt = 0; - - idx &= 0xffffff; - idx2 &= 0xffffff; - - while (cnt < len - && (weights[idx + 1 + cnt] - == weights[idx2 + 1 + cnt])) - ++cnt; - - if (cnt == len) - goto matched; - } - } - } - - c = *p++; - } -#endif - else if (c == L_('\0')) - { - /* [ unterminated, treat as normal character. */ - p = p_init; - n = n_init; - c = L_('['); - goto normal_match; - } - else - { -#ifdef _LIBC - bool is_seqval = false; - - if (c == L_('[') && *p == L_('.')) - { - uint32_t nrules = - _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - const CHAR *startp = p; - size_t c1 = 0; - - while (1) - { - c = *++p; - if (c == L_('.') && p[1] == L_(']')) - { - p += 2; - break; - } - if (c == '\0') - return FNM_NOMATCH; - ++c1; - } - - /* We have to handling the symbols differently in - ranges since then the collation sequence is - important. */ - is_range = *p == L_('-') && p[1] != L_('\0'); - - if (nrules == 0) - { - /* There are no names defined in the collation - data. Therefore we only accept the trivial - names consisting of the character itself. */ - if (c1 != 1) - return FNM_NOMATCH; - - if (!is_range && *n == startp[1]) - goto matched; - - cold = startp[1]; - c = *p++; - } - else - { - int32_t table_size; - const int32_t *symb_table; -# ifdef WIDE_CHAR_VERSION - char str[c1]; - size_t strcnt; -# else -# define str (startp + 1) -# endif - const unsigned char *extra; - int32_t idx; - int32_t elem; - int32_t second; - int32_t hash; - -# ifdef WIDE_CHAR_VERSION - /* We have to convert the name to a single-byte - string. This is possible since the names - consist of ASCII characters and the internal - representation is UCS4. */ - for (strcnt = 0; strcnt < c1; ++strcnt) - str[strcnt] = startp[1 + strcnt]; -# endif - - table_size = - _NL_CURRENT_WORD (LC_COLLATE, - _NL_COLLATE_SYMB_HASH_SIZEMB); - symb_table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_TABLEMB); - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_EXTRAMB); - - /* Locate the character in the hashing table. */ - hash = elem_hash (str, c1); - - idx = 0; - elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - second = hash % (table_size - 2) + 1; - - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - && (c1 - == extra[symb_table[2 * elem + 1]]) - && memcmp (str, - &extra[symb_table[2 * elem - + 1] - + 1], c1) == 0) - { - /* Yep, this is the entry. */ - idx = symb_table[2 * elem + 1]; - idx += 1 + extra[idx]; - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } - - if (symb_table[2 * elem] != 0) - { - /* Compare the byte sequence but only if - this is not part of a range. */ -# ifdef WIDE_CHAR_VERSION - int32_t *wextra; - - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~3; - - wextra = (int32_t *) &extra[idx + 4]; -# endif - - if (! is_range) - { -# ifdef WIDE_CHAR_VERSION - for (c1 = 0; - (int32_t) c1 < wextra[idx]; - ++c1) - if (n[c1] != wextra[1 + c1]) - break; - - if ((int32_t) c1 == wextra[idx]) - goto matched; -# else - for (c1 = 0; c1 < extra[idx]; ++c1) - if (n[c1] != extra[1 + c1]) - break; - - if (c1 == extra[idx]) - goto matched; -# endif - } - - /* Get the collation sequence value. */ - is_seqval = true; -# ifdef WIDE_CHAR_VERSION - cold = wextra[1 + wextra[idx]]; -# else - /* Adjust for the alignment. */ - idx += 1 + extra[idx]; - idx = (idx + 3) & ~4; - cold = *((int32_t *) &extra[idx]); -# endif - - c = *p++; - } - else if (c1 == 1) - { - /* No valid character. Match it as a - single byte. */ - if (!is_range && *n == str[0]) - goto matched; - - cold = str[0]; - c = *p++; - } - else - return FNM_NOMATCH; - } - } - else -# undef str -#endif - { - c = FOLD (c); - normal_bracket: - - /* We have to handling the symbols differently in - ranges since then the collation sequence is - important. */ - is_range = (*p == L_('-') && p[1] != L_('\0') - && p[1] != L_(']')); - - if (!is_range && c == fn) - goto matched; - -#if _LIBC - /* This is needed if we goto normal_bracket; from - outside of is_seqval's scope. */ - is_seqval = false; -#endif - - cold = c; - c = *p++; - } - - if (c == L_('-') && *p != L_(']')) - { -#if _LIBC - /* We have to find the collation sequence - value for C. Collation sequence is nothing - we can regularly access. The sequence - value is defined by the order in which the - definitions of the collation values for the - various characters appear in the source - file. A strange concept, nowhere - documented. */ - uint32_t fcollseq; - uint32_t lcollseq; - UCHAR cend = *p++; - -# ifdef WIDE_CHAR_VERSION - /* Search in the 'names' array for the characters. */ - fcollseq = __collseq_table_lookup (collseq, fn); - if (fcollseq == ~((uint32_t) 0)) - /* XXX We don't know anything about the character - we are supposed to match. This means we are - failing. */ - goto range_not_matched; - - if (is_seqval) - lcollseq = cold; - else - lcollseq = __collseq_table_lookup (collseq, cold); -# else - fcollseq = collseq[fn]; - lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; -# endif - - is_seqval = false; - if (cend == L_('[') && *p == L_('.')) - { - uint32_t nrules = - _NL_CURRENT_WORD (LC_COLLATE, - _NL_COLLATE_NRULES); - const CHAR *startp = p; - size_t c1 = 0; - - while (1) - { - c = *++p; - if (c == L_('.') && p[1] == L_(']')) - { - p += 2; - break; - } - if (c == '\0') - return FNM_NOMATCH; - ++c1; - } - - if (nrules == 0) - { - /* There are no names defined in the - collation data. Therefore we only - accept the trivial names consisting - of the character itself. */ - if (c1 != 1) - return FNM_NOMATCH; - - cend = startp[1]; - } - else - { - int32_t table_size; - const int32_t *symb_table; -# ifdef WIDE_CHAR_VERSION - char str[c1]; - size_t strcnt; -# else -# define str (startp + 1) -# endif - const unsigned char *extra; - int32_t idx; - int32_t elem; - int32_t second; - int32_t hash; - -# ifdef WIDE_CHAR_VERSION - /* We have to convert the name to a single-byte - string. This is possible since the names - consist of ASCII characters and the internal - representation is UCS4. */ - for (strcnt = 0; strcnt < c1; ++strcnt) - str[strcnt] = startp[1 + strcnt]; -# endif - - table_size = - _NL_CURRENT_WORD (LC_COLLATE, - _NL_COLLATE_SYMB_HASH_SIZEMB); - symb_table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_TABLEMB); - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_EXTRAMB); - - /* Locate the character in the hashing - table. */ - hash = elem_hash (str, c1); - - idx = 0; - elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - second = hash % (table_size - 2) + 1; - - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - && (c1 - == extra[symb_table[2 * elem + 1]]) - && memcmp (str, - &extra[symb_table[2 * elem + 1] - + 1], c1) == 0) - { - /* Yep, this is the entry. */ - idx = symb_table[2 * elem + 1]; - idx += 1 + extra[idx]; - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } - - if (symb_table[2 * elem] != 0) - { - /* Compare the byte sequence but only if - this is not part of a range. */ -# ifdef WIDE_CHAR_VERSION - int32_t *wextra; - - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~4; - - wextra = (int32_t *) &extra[idx + 4]; -# endif - /* Get the collation sequence value. */ - is_seqval = true; -# ifdef WIDE_CHAR_VERSION - cend = wextra[1 + wextra[idx]]; -# else - /* Adjust for the alignment. */ - idx += 1 + extra[idx]; - idx = (idx + 3) & ~4; - cend = *((int32_t *) &extra[idx]); -# endif - } - else if (symb_table[2 * elem] != 0 && c1 == 1) - { - cend = str[0]; - c = *p++; - } - else - return FNM_NOMATCH; - } -# undef str - } - else - { - if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) - cend = *p++; - if (cend == L_('\0')) - return FNM_NOMATCH; - cend = FOLD (cend); - } - - /* XXX It is not entirely clear to me how to handle - characters which are not mentioned in the - collation specification. */ - if ( -# ifdef WIDE_CHAR_VERSION - lcollseq == 0xffffffff || -# endif - lcollseq <= fcollseq) - { - /* We have to look at the upper bound. */ - uint32_t hcollseq; - - if (is_seqval) - hcollseq = cend; - else - { -# ifdef WIDE_CHAR_VERSION - hcollseq = - __collseq_table_lookup (collseq, cend); - if (hcollseq == ~((uint32_t) 0)) - { - /* Hum, no information about the upper - bound. The matching succeeds if the - lower bound is matched exactly. */ - if (lcollseq != fcollseq) - goto range_not_matched; - - goto matched; - } -# else - hcollseq = collseq[cend]; -# endif - } - - if (lcollseq <= hcollseq && fcollseq <= hcollseq) - goto matched; - } -# ifdef WIDE_CHAR_VERSION - range_not_matched: -# endif -#else - /* We use a boring value comparison of the character - values. This is better than comparing using - 'strcoll' since the latter would have surprising - and sometimes fatal consequences. */ - UCHAR cend = *p++; - - if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) - cend = *p++; - if (cend == L_('\0')) - return FNM_NOMATCH; - - /* It is a range. */ - if (cold <= fn && fn <= cend) - goto matched; -#endif - - c = *p++; - } - } - - if (c == L_(']')) - break; - } - - if (!not) - return FNM_NOMATCH; - break; - - matched: - /* Skip the rest of the [...] that already matched. */ - do - { - ignore_next: - c = *p++; - - if (c == L_('\0')) - /* [... (unterminated) loses. */ - return FNM_NOMATCH; - - if (!(flags & FNM_NOESCAPE) && c == L_('\\')) - { - if (*p == L_('\0')) - return FNM_NOMATCH; - /* XXX 1003.2d11 is unclear if this is right. */ - ++p; - } - else if (c == L_('[') && *p == L_(':')) - { - int c1 = 0; - const CHAR *startp = p; - - while (1) - { - c = *++p; - if (++c1 == CHAR_CLASS_MAX_LENGTH) - return FNM_NOMATCH; - - if (*p == L_(':') && p[1] == L_(']')) - break; - - if (c < L_('a') || c >= L_('z')) - { - p = startp; - goto ignore_next; - } - } - p += 2; - c = *p++; - } - else if (c == L_('[') && *p == L_('=')) - { - c = *++p; - if (c == L_('\0')) - return FNM_NOMATCH; - c = *++p; - if (c != L_('=') || p[1] != L_(']')) - return FNM_NOMATCH; - p += 2; - c = *p++; - } - else if (c == L_('[') && *p == L_('.')) - { - ++p; - while (1) - { - c = *++p; - if (c == '\0') - return FNM_NOMATCH; - - if (*p == L_('.') && p[1] == L_(']')) - break; - } - p += 2; - c = *p++; - } - } - while (c != L_(']')); - if (not) - return FNM_NOMATCH; - } - break; - - case L_('+'): - case L_('@'): - case L_('!'): - if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') - { - int res; - - res = EXT (c, p, n, string_end, no_leading_period, flags); - if (res != -1) - return res; - } - goto normal_match; - - case L_('/'): - if (NO_LEADING_PERIOD (flags)) - { - if (n == string_end || c != (UCHAR) *n) - return FNM_NOMATCH; - - new_no_leading_period = true; - break; - } - /* FALLTHROUGH */ - default: - normal_match: - if (n == string_end || c != FOLD ((UCHAR) *n)) - return FNM_NOMATCH; - } - - no_leading_period = new_no_leading_period; - ++n; - } - - if (n == string_end) - return 0; - - if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/')) - /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ - return 0; - - return FNM_NOMATCH; -} - - -static const CHAR * -internal_function -END (const CHAR *pattern) -{ - const CHAR *p = pattern; - - while (1) - if (*++p == L_('\0')) - /* This is an invalid pattern. */ - return pattern; - else if (*p == L_('[')) - { - /* Handle brackets special. */ - if (posixly_correct == 0) - posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; - - /* Skip the not sign. We have to recognize it because of a possibly - following ']'. */ - if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) - ++p; - /* A leading ']' is recognized as such. */ - if (*p == L_(']')) - ++p; - /* Skip over all characters of the list. */ - while (*p != L_(']')) - if (*p++ == L_('\0')) - /* This is no valid pattern. */ - return pattern; - } - else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') - || *p == L_('!')) && p[1] == L_('(')) - p = END (p + 1); - else if (*p == L_(')')) - break; - - return p + 1; -} - - -static int -internal_function -EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, - bool no_leading_period, int flags) -{ - const CHAR *startp; - size_t level; - struct patternlist - { - struct patternlist *next; - CHAR str[1]; - } *list = NULL; - struct patternlist **lastp = &list; - size_t pattern_len = STRLEN (pattern); - const CHAR *p; - const CHAR *rs; - enum { ALLOCA_LIMIT = 8000 }; - - /* Parse the pattern. Store the individual parts in the list. */ - level = 0; - for (startp = p = pattern + 1; ; ++p) - if (*p == L_('\0')) - /* This is an invalid pattern. */ - return -1; - else if (*p == L_('[')) - { - /* Handle brackets special. */ - if (posixly_correct == 0) - posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; - - /* Skip the not sign. We have to recognize it because of a possibly - following ']'. */ - if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) - ++p; - /* A leading ']' is recognized as such. */ - if (*p == L_(']')) - ++p; - /* Skip over all characters of the list. */ - while (*p != L_(']')) - if (*p++ == L_('\0')) - /* This is no valid pattern. */ - return -1; - } - else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') - || *p == L_('!')) && p[1] == L_('(')) - /* Remember the nesting level. */ - ++level; - else if (*p == L_(')')) - { - if (level-- == 0) - { - /* This means we found the end of the pattern. */ -#define NEW_PATTERN \ - struct patternlist *newp; \ - size_t plen; \ - size_t plensize; \ - size_t newpsize; \ - \ - plen = (opt == L_('?') || opt == L_('@') \ - ? pattern_len \ - : p - startp + 1UL); \ - plensize = plen * sizeof (CHAR); \ - newpsize = offsetof (struct patternlist, str) + plensize; \ - if ((size_t) -1 / sizeof (CHAR) < plen \ - || newpsize < offsetof (struct patternlist, str) \ - || ALLOCA_LIMIT <= newpsize) \ - return -1; \ - newp = (struct patternlist *) alloca (newpsize); \ - *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \ - newp->next = NULL; \ - *lastp = newp; \ - lastp = &newp->next - NEW_PATTERN; - break; - } - } - else if (*p == L_('|')) - { - if (level == 0) - { - NEW_PATTERN; - startp = p + 1; - } - } - assert (list != NULL); - assert (p[-1] == L_(')')); -#undef NEW_PATTERN - - switch (opt) - { - case L_('*'): - if (FCT (p, string, string_end, no_leading_period, flags) == 0) - return 0; - /* FALLTHROUGH */ - - case L_('+'): - do - { - for (rs = string; rs <= string_end; ++rs) - /* First match the prefix with the current pattern with the - current pattern. */ - if (FCT (list->str, string, rs, no_leading_period, - flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0 - /* This was successful. Now match the rest with the rest - of the pattern. */ - && (FCT (p, rs, string_end, - rs == string - ? no_leading_period - : rs[-1] == '/' && NO_LEADING_PERIOD (flags), - flags & FNM_FILE_NAME - ? flags : flags & ~FNM_PERIOD) == 0 - /* This didn't work. Try the whole pattern. */ - || (rs != string - && FCT (pattern - 1, rs, string_end, - rs == string - ? no_leading_period - : rs[-1] == '/' && NO_LEADING_PERIOD (flags), - flags & FNM_FILE_NAME - ? flags : flags & ~FNM_PERIOD) == 0))) - /* It worked. Signal success. */ - return 0; - } - while ((list = list->next) != NULL); - - /* None of the patterns lead to a match. */ - return FNM_NOMATCH; - - case L_('?'): - if (FCT (p, string, string_end, no_leading_period, flags) == 0) - return 0; - /* FALLTHROUGH */ - - case L_('@'): - do - /* I cannot believe it but 'strcat' is actually acceptable - here. Match the entire string with the prefix from the - pattern list and the rest of the pattern following the - pattern list. */ - if (FCT (STRCAT (list->str, p), string, string_end, - no_leading_period, - flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) - /* It worked. Signal success. */ - return 0; - while ((list = list->next) != NULL); - - /* None of the patterns lead to a match. */ - return FNM_NOMATCH; - - case L_('!'): - for (rs = string; rs <= string_end; ++rs) - { - struct patternlist *runp; - - for (runp = list; runp != NULL; runp = runp->next) - if (FCT (runp->str, string, rs, no_leading_period, - flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) - break; - - /* If none of the patterns matched see whether the rest does. */ - if (runp == NULL - && (FCT (p, rs, string_end, - rs == string - ? no_leading_period - : rs[-1] == '/' && NO_LEADING_PERIOD (flags), - flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) - == 0)) - /* This is successful. */ - return 0; - } - - /* None of the patterns together with the rest of the pattern - lead to a match. */ - return FNM_NOMATCH; - - default: - assert (! "Invalid extended matching operator"); - break; - } - - return -1; -} - - -#undef FOLD -#undef CHAR -#undef UCHAR -#undef INT -#undef FCT -#undef EXT -#undef END -#undef MEMPCPY -#undef MEMCHR -#undef STRLEN -#undef STRCAT -#undef L_ -#undef BTOWC diff --git a/contrib/grep/lib/fpending.c b/contrib/grep/lib/fpending.c deleted file mode 100644 index c4b4a51961..0000000000 --- a/contrib/grep/lib/fpending.c +++ /dev/null @@ -1,30 +0,0 @@ -/* fpending.c -- return the number of pending output bytes on a stream - Copyright (C) 2000, 2004, 2006-2007, 2009-2015 Free Software Foundation, - Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* Written by Jim Meyering. */ - -#include - -#include "fpending.h" - -/* Return the number of pending (aka buffered, unflushed) - bytes on the stream, FP, that is open for writing. */ -size_t -__fpending (FILE *fp) -{ - return PENDING_OUTPUT_N_BYTES; -} diff --git a/contrib/grep/lib/fpending.h b/contrib/grep/lib/fpending.h index 5a1b2ad944..a8b8859726 100644 --- a/contrib/grep/lib/fpending.h +++ b/contrib/grep/lib/fpending.h @@ -1,6 +1,6 @@ /* Declare __fpending. - Copyright (C) 2000, 2003, 2005-2006, 2009-2015 Free Software Foundation, + Copyright (C) 2000, 2003, 2005-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . + along with this program. If not, see . Written by Jim Meyering. */ diff --git a/contrib/grep/lib/fstat.c b/contrib/grep/lib/fstat.c deleted file mode 100644 index 7a0521fa0f..0000000000 --- a/contrib/grep/lib/fstat.c +++ /dev/null @@ -1,88 +0,0 @@ -/* fstat() replacement. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* If the user's config.h happens to include , let it include only - the system's here, so that orig_fstat doesn't recurse to - rpl_fstat. */ -#define __need_system_sys_stat_h -#include - -/* Get the original definition of fstat. It might be defined as a macro. */ -#include -#include -#if _GL_WINDOWS_64_BIT_ST_SIZE -# undef stat /* avoid warning on mingw64 with _FILE_OFFSET_BITS=64 */ -# define stat _stati64 -# undef fstat /* avoid warning on mingw64 with _FILE_OFFSET_BITS=64 */ -# define fstat _fstati64 -#endif -#undef __need_system_sys_stat_h - -static int -orig_fstat (int fd, struct stat *buf) -{ - return fstat (fd, buf); -} - -/* Specification. */ -/* Write "sys/stat.h" here, not , otherwise OSF/1 5.1 DTK cc - eliminates this include because of the preliminary #include - above. */ -#include "sys/stat.h" - -#include -#include - -#if HAVE_MSVC_INVALID_PARAMETER_HANDLER -# include "msvc-inval.h" -#endif - -#if HAVE_MSVC_INVALID_PARAMETER_HANDLER -static int -fstat_nothrow (int fd, struct stat *buf) -{ - int result; - - TRY_MSVC_INVAL - { - result = orig_fstat (fd, buf); - } - CATCH_MSVC_INVAL - { - result = -1; - errno = EBADF; - } - DONE_MSVC_INVAL; - - return result; -} -#else -# define fstat_nothrow orig_fstat -#endif - -int -rpl_fstat (int fd, struct stat *buf) -{ -#if REPLACE_FCHDIR && REPLACE_OPEN_DIRECTORY - /* Handle the case when rpl_open() used a dummy file descriptor to work - around an open() that can't normally visit directories. */ - const char *name = _gl_directory_name (fd); - if (name != NULL) - return stat (name, buf); -#endif - - return fstat_nothrow (fd, buf); -} diff --git a/contrib/grep/lib/fstatat.c b/contrib/grep/lib/fstatat.c deleted file mode 100644 index 6a9e8623e1..0000000000 --- a/contrib/grep/lib/fstatat.c +++ /dev/null @@ -1,135 +0,0 @@ -/* Work around an fstatat bug on Solaris 9. - - Copyright (C) 2006, 2009-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* Written by Paul Eggert and Jim Meyering. */ - -/* If the user's config.h happens to include , let it include only - the system's here, so that orig_fstatat doesn't recurse to - rpl_fstatat. */ -#define __need_system_sys_stat_h -#include - -/* Get the original definition of fstatat. It might be defined as a macro. */ -#include -#include -#undef __need_system_sys_stat_h - -#if HAVE_FSTATAT -static int -orig_fstatat (int fd, char const *filename, struct stat *buf, int flags) -{ - return fstatat (fd, filename, buf, flags); -} -#endif - -/* Write "sys/stat.h" here, not , otherwise OSF/1 5.1 DTK cc - eliminates this include because of the preliminary #include - above. */ -#include "sys/stat.h" - -#include -#include -#include - -#if HAVE_FSTATAT && HAVE_WORKING_FSTATAT_ZERO_FLAG - -# ifndef LSTAT_FOLLOWS_SLASHED_SYMLINK -# define LSTAT_FOLLOWS_SLASHED_SYMLINK 0 -# endif - -/* fstatat should always follow symbolic links that end in /, but on - Solaris 9 it doesn't if AT_SYMLINK_NOFOLLOW is specified. - Likewise, trailing slash on a non-directory should be an error. - These are the same problems that lstat.c and stat.c address, so - solve it in a similar way. - - AIX 7.1 fstatat (AT_FDCWD, ..., 0) always fails, which is a bug. - Work around this bug if FSTATAT_AT_FDCWD_0_BROKEN is nonzero. */ - -int -rpl_fstatat (int fd, char const *file, struct stat *st, int flag) -{ - int result = orig_fstatat (fd, file, st, flag); - size_t len; - - if (LSTAT_FOLLOWS_SLASHED_SYMLINK || result != 0) - return result; - len = strlen (file); - if (flag & AT_SYMLINK_NOFOLLOW) - { - /* Fix lstat behavior. */ - if (file[len - 1] != '/' || S_ISDIR (st->st_mode)) - return 0; - if (!S_ISLNK (st->st_mode)) - { - errno = ENOTDIR; - return -1; - } - result = orig_fstatat (fd, file, st, flag & ~AT_SYMLINK_NOFOLLOW); - } - /* Fix stat behavior. */ - if (result == 0 && !S_ISDIR (st->st_mode) && file[len - 1] == '/') - { - errno = ENOTDIR; - return -1; - } - return result; -} - -#else /* ! (HAVE_FSTATAT && HAVE_WORKING_FSTATAT_ZERO_FLAG) */ - -/* On mingw, the gnulib defines 'stat' as a function-like - macro; but using it in AT_FUNC_F2 causes compilation failure - because the preprocessor sees a use of a macro that requires two - arguments but is only given one. Hence, we need an inline - forwarder to get past the preprocessor. */ -static int -stat_func (char const *name, struct stat *st) -{ - return stat (name, st); -} - -/* Likewise, if there is no native 'lstat', then the gnulib - defined it as stat, which also needs adjustment. */ -# if !HAVE_LSTAT -# undef lstat -# define lstat stat_func -# endif - -/* Replacement for Solaris' function by the same name. - - First, try to simulate it via l?stat ("/proc/self/fd/FD/FILE"). - Failing that, simulate it via save_cwd/fchdir/(stat|lstat)/restore_cwd. - If either the save_cwd or the restore_cwd fails (relatively unlikely), - then give a diagnostic and exit nonzero. - Otherwise, this function works just like Solaris' fstatat. */ - -# define AT_FUNC_NAME fstatat -# define AT_FUNC_F1 lstat -# define AT_FUNC_F2 stat_func -# define AT_FUNC_USE_F1_COND AT_SYMLINK_NOFOLLOW -# define AT_FUNC_POST_FILE_PARAM_DECLS , struct stat *st, int flag -# define AT_FUNC_POST_FILE_ARGS , st -# include "at-func.c" -# undef AT_FUNC_NAME -# undef AT_FUNC_F1 -# undef AT_FUNC_F2 -# undef AT_FUNC_USE_F1_COND -# undef AT_FUNC_POST_FILE_PARAM_DECLS -# undef AT_FUNC_POST_FILE_ARGS - -#endif /* !HAVE_FSTATAT */ diff --git a/contrib/grep/lib/fts-cycle.c b/contrib/grep/lib/fts-cycle.c index 7c96bc66c7..46b5f61859 100644 --- a/contrib/grep/lib/fts-cycle.c +++ b/contrib/grep/lib/fts-cycle.c @@ -1,6 +1,6 @@ /* Detect cycles in file tree walks. - Copyright (C) 2003-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2003-2006, 2009-2020 Free Software Foundation, Inc. Written by Jim Meyering. @@ -15,7 +15,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include "cycle-check.h" #include "hash.h" diff --git a/contrib/grep/lib/fts.c b/contrib/grep/lib/fts.c index ea73675163..d3a0472a63 100644 --- a/contrib/grep/lib/fts.c +++ b/contrib/grep/lib/fts.c @@ -1,6 +1,6 @@ /* Traverse a file hierarchy. - Copyright (C) 2004-2015 Free Software Foundation, Inc. + Copyright (C) 2004-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /*- * Copyright (c) 1990, 1993, 1994 @@ -46,9 +46,9 @@ #include -#if defined(LIBC_SCCS) && !defined(lint) +#if defined LIBC_SCCS && !defined GCC_LINT && !defined lint static char sccsid[] = "@(#)fts.c 8.6 (Berkeley) 8/14/94"; -#endif /* LIBC_SCCS and not lint */ +#endif #include "fts_.h" @@ -71,12 +71,9 @@ static char sccsid[] = "@(#)fts.c 8.6 (Berkeley) 8/14/94"; #if ! _LIBC # include "fcntl--.h" -# include "dirent--.h" -# include "unistd--.h" -/* FIXME - use fcntl(F_DUPFD_CLOEXEC)/openat(O_CLOEXEC) once they are - supported. */ -# include "cloexec.h" +# include "flexmember.h" # include "openat.h" +# include "opendirat.h" # include "same-inode.h" #endif @@ -202,6 +199,14 @@ enum Fts_stat while (false) #endif +#ifndef FALLTHROUGH +# if __GNUC__ < 7 +# define FALLTHROUGH ((void) 0) +# else +# define FALLTHROUGH __attribute__ ((__fallthrough__)) +# endif +#endif + static FTSENT *fts_alloc (FTS *, const char *, size_t) internal_function; static FTSENT *fts_build (FTS *, int) internal_function; static void fts_lfree (FTSENT *) internal_function; @@ -290,32 +295,6 @@ fts_set_stat_required (FTSENT *p, bool required) : FTS_NO_STAT_REQUIRED); } -/* file-descriptor-relative opendir. */ -/* FIXME: if others need this function, move it into lib/openat.c */ -static DIR * -internal_function -opendirat (int fd, char const *dir, int extra_flags, int *pdir_fd) -{ - int new_fd = openat (fd, dir, - (O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK - | extra_flags)); - DIR *dirp; - - if (new_fd < 0) - return NULL; - set_cloexec_flag (new_fd, true); - dirp = fdopendir (new_fd); - if (dirp) - *pdir_fd = new_fd; - else - { - int saved_errno = errno; - close (new_fd); - errno = saved_errno; - } - return dirp; -} - /* Virtual fchdir. Advance SP's working directory file descriptor, SP->fts_cwd_fd, to FD, and push the previous value onto the fd_ring. CHDIR_DOWN_ONE is true if FD corresponds to an entry in the directory @@ -366,15 +345,12 @@ static int internal_function diropen (FTS const *sp, char const *dir) { - int open_flags = (O_SEARCH | O_DIRECTORY | O_NOCTTY | O_NONBLOCK - | (ISSET (FTS_PHYSICAL) ? O_NOFOLLOW : 0) - | (ISSET (FTS_NOATIME) ? O_NOATIME : 0)); + int open_flags = (O_SEARCH | O_CLOEXEC | O_DIRECTORY | O_NOCTTY | O_NONBLOCK + | (ISSET (FTS_PHYSICAL) ? O_NOFOLLOW : 0)); int fd = (ISSET (FTS_CWDFD) ? openat (sp->fts_cwd_fd, dir, open_flags) : open (dir, open_flags)); - if (0 <= fd) - set_cloexec_flag (fd, true); return fd; } @@ -405,9 +381,9 @@ fts_open (char * const *argv, } /* Allocate/initialize the stream */ - if ((sp = malloc(sizeof(FTS))) == NULL) + sp = calloc (1, sizeof *sp); + if (sp == NULL) return (NULL); - memset(sp, 0, sizeof(FTS)); sp->fts_compar = compar; sp->fts_options = options; @@ -425,8 +401,7 @@ fts_open (char * const *argv, early, doing it here saves us the trouble of ensuring later (where it'd be messier) that "." can in fact be opened. If not, revert to FTS_NOCHDIR mode. */ - int fd = open (".", - O_SEARCH | (ISSET (FTS_NOATIME) ? O_NOATIME : 0)); + int fd = open (".", O_SEARCH); if (fd < 0) { /* Even if "." is unreadable, don't revert to FTS_NOCHDIR mode @@ -470,6 +445,7 @@ fts_open (char * const *argv, if ((parent = fts_alloc(sp, "", 0)) == NULL) goto mem2; parent->fts_level = FTS_ROOTPARENTLEVEL; + parent->fts_n_dirs_remaining = -1; } /* The classic fts implementation would call fts_stat with @@ -544,6 +520,7 @@ fts_open (char * const *argv, goto mem3; sp->fts_cur->fts_link = root; sp->fts_cur->fts_info = FTS_INIT; + sp->fts_cur->fts_level = 1; if (! setup_dir (sp)) goto mem3; @@ -656,172 +633,213 @@ fts_close (FTS *sp) return (0); } -#if defined __linux__ \ +/* Minimum link count of a traditional Unix directory. When leaf + optimization is OK and MIN_DIR_NLINK <= st_nlink, then st_nlink is + an upper bound on the number of subdirectories (counting "." and + ".."). */ +enum { MIN_DIR_NLINK = 2 }; + +/* Whether leaf optimization is OK for a directory. */ +enum leaf_optimization + { + /* st_nlink is not reliable for this directory's subdirectories. */ + NO_LEAF_OPTIMIZATION, + + /* Leaf optimization is OK, but is not useful for avoiding stat calls. */ + OK_LEAF_OPTIMIZATION, + + /* Leaf optimization is not only OK: it is useful for avoiding + stat calls, because dirent.d_type does not work. */ + NOSTAT_LEAF_OPTIMIZATION + }; + +#if (defined __linux__ || defined __ANDROID__) \ && HAVE_SYS_VFS_H && HAVE_FSTATFS && HAVE_STRUCT_STATFS_F_TYPE # include /* Linux-specific constants from coreutils' src/fs.h */ -# define S_MAGIC_TMPFS 0x1021994 +# define S_MAGIC_AFS 0x5346414F +# define S_MAGIC_CIFS 0xFF534D42 # define S_MAGIC_NFS 0x6969 -# define S_MAGIC_REISERFS 0x52654973 # define S_MAGIC_PROC 0x9FA0 +# define S_MAGIC_REISERFS 0x52654973 +# define S_MAGIC_TMPFS 0x1021994 +# define S_MAGIC_XFS 0x58465342 -/* Return false if it is easy to determine the file system type of - the directory on which DIR_FD is open, and sorting dirents on - inode numbers is known not to improve traversal performance with - that type of file system. Otherwise, return true. */ -static bool -dirent_inode_sort_may_be_useful (int dir_fd) -{ - /* Skip the sort only if we can determine efficiently - that skipping it is the right thing to do. - The cost of performing an unnecessary sort is negligible, - while the cost of *not* performing it can be O(N^2) with - a very large constant. */ - struct statfs fs_buf; - - /* If fstatfs fails, assume sorting would be useful. */ - if (fstatfs (dir_fd, &fs_buf) != 0) - return true; - - /* FIXME: what about when f_type is not an integral type? - deal with that if/when it's encountered. */ - switch (fs_buf.f_type) - { - case S_MAGIC_TMPFS: - case S_MAGIC_NFS: - /* On a file system of any of these types, sorting - is unnecessary, and hence wasteful. */ - return false; - - default: - return true; - } -} - -/* Given a file descriptor DIR_FD open on a directory D, - return true if it is valid to apply the leaf-optimization - technique of counting directories in D via stat.st_nlink. */ -static bool -leaf_optimization_applies (int dir_fd) -{ - struct statfs fs_buf; - - /* If fstatfs fails, assume we can't use the optimization. */ - if (fstatfs (dir_fd, &fs_buf) != 0) - return false; - - /* FIXME: do we need to detect AFS mount points? I doubt it, - unless fstatfs can report S_MAGIC_REISERFS for such a directory. */ - - switch (fs_buf.f_type) - { - /* List here the file system types that lack usable dirent.d_type - info, yet for which the optimization does apply. */ - case S_MAGIC_REISERFS: - return true; - - case S_MAGIC_PROC: - /* Explicitly listing this or any other file system type for which - the optimization is not applicable is not necessary, but we leave - it here to document the risk. Per http://bugs.debian.org/143111, - /proc may have bogus stat.st_nlink values. */ - /* fall through */ - default: - return false; - } -} - -#else -static bool -dirent_inode_sort_may_be_useful (int dir_fd _GL_UNUSED) { return true; } -static bool -leaf_optimization_applies (int dir_fd _GL_UNUSED) { return false; } -#endif +# ifdef HAVE___FSWORD_T +typedef __fsword_t fsword; +# else +typedef long int fsword; +# endif -/* link-count-optimization entry: - map a stat.st_dev number to a boolean: leaf_optimization_works */ -struct LCO_ent +/* Map a stat.st_dev number to a file system type number f_ftype. */ +struct dev_type { dev_t st_dev; - bool opt_ok; + fsword f_type; }; /* Use a tiny initial size. If a traversal encounters more than a few devices, the cost of growing/rehashing this table will be rendered negligible by the number of inodes processed. */ -enum { LCO_HT_INITIAL_SIZE = 13 }; +enum { DEV_TYPE_HT_INITIAL_SIZE = 13 }; static size_t -LCO_hash (void const *x, size_t table_size) +dev_type_hash (void const *x, size_t table_size) { - struct LCO_ent const *ax = x; - return (uintmax_t) ax->st_dev % table_size; + struct dev_type const *ax = x; + uintmax_t dev = ax->st_dev; + return dev % table_size; } static bool -LCO_compare (void const *x, void const *y) +dev_type_compare (void const *x, void const *y) { - struct LCO_ent const *ax = x; - struct LCO_ent const *ay = y; + struct dev_type const *ax = x; + struct dev_type const *ay = y; return ax->st_dev == ay->st_dev; } -/* Ask the same question as leaf_optimization_applies, but query - the cache first (FTS.fts_leaf_optimization_works_ht), and if necessary, - update that cache. */ -static bool -link_count_optimize_ok (FTSENT const *p) +/* Return the file system type of P with file descriptor FD, or 0 if not known. + If FD is negative, P's file descriptor is unavailable. + Try to cache known values. */ + +static fsword +filesystem_type (FTSENT const *p, int fd) { FTS *sp = p->fts_fts; Hash_table *h = sp->fts_leaf_optimization_works_ht; - struct LCO_ent tmp; - struct LCO_ent *ent; - bool opt_ok; - struct LCO_ent *t2; + struct dev_type *ent; + struct statfs fs_buf; /* If we're not in CWDFD mode, don't bother with this optimization, - since the caller is not serious about performance. */ - if (!ISSET(FTS_CWDFD)) - return false; - - /* map st_dev to the boolean, leaf_optimization_works */ - if (h == NULL) + since the caller is not serious about performance. */ + if (!ISSET (FTS_CWDFD)) + return 0; + + if (! h) + h = sp->fts_leaf_optimization_works_ht + = hash_initialize (DEV_TYPE_HT_INITIAL_SIZE, NULL, dev_type_hash, + dev_type_compare, free); + if (h) { - h = sp->fts_leaf_optimization_works_ht - = hash_initialize (LCO_HT_INITIAL_SIZE, NULL, LCO_hash, - LCO_compare, free); - if (h == NULL) - return false; + struct dev_type tmp; + tmp.st_dev = p->fts_statp->st_dev; + ent = hash_lookup (h, &tmp); + if (ent) + return ent->f_type; } - tmp.st_dev = p->fts_statp->st_dev; - ent = hash_lookup (h, &tmp); - if (ent) - return ent->opt_ok; /* Look-up failed. Query directly and cache the result. */ - t2 = malloc (sizeof *t2); - if (t2 == NULL) - return false; + if (fd < 0 || fstatfs (fd, &fs_buf) != 0) + return 0; - /* Is it ok to perform the optimization in the dir, FTS_CWD_FD? */ - opt_ok = leaf_optimization_applies (sp->fts_cwd_fd); - t2->opt_ok = opt_ok; - t2->st_dev = p->fts_statp->st_dev; + if (h) + { + struct dev_type *t2 = malloc (sizeof *t2); + if (t2) + { + t2->st_dev = p->fts_statp->st_dev; + t2->f_type = fs_buf.f_type; + + ent = hash_insert (h, t2); + if (ent) + fts_assert (ent == t2); + else + free (t2); + } + } + + return fs_buf.f_type; +} + +/* Return true if sorting dirents on inode numbers is known to improve + traversal performance for the directory P with descriptor DIR_FD. + Return false otherwise. When in doubt, return true. + DIR_FD is negative if unavailable. */ +static bool +dirent_inode_sort_may_be_useful (FTSENT const *p, int dir_fd) +{ + /* Skip the sort only if we can determine efficiently + that skipping it is the right thing to do. + The cost of performing an unnecessary sort is negligible, + while the cost of *not* performing it can be O(N^2) with + a very large constant. */ - ent = hash_insert (h, t2); - if (ent == NULL) + switch (filesystem_type (p, dir_fd)) { - /* insertion failed */ - free (t2); + case S_MAGIC_CIFS: + case S_MAGIC_NFS: + case S_MAGIC_TMPFS: + /* On a file system of any of these types, sorting + is unnecessary, and hence wasteful. */ return false; + + default: + return true; } - fts_assert (ent == t2); +} + +/* Given an FTS entry P for a directory with descriptor DIR_FD, + return true if it is both useful and valid to apply leaf optimization. + The optimization is useful only for file systems that lack usable + dirent.d_type info. The optimization is valid if an st_nlink value + of at least MIN_DIR_NLINK is an upper bound on the number of + subdirectories of D, counting "." and ".." as subdirectories. + DIR_FD is negative if unavailable. */ +static enum leaf_optimization +leaf_optimization (FTSENT const *p, int dir_fd) +{ + switch (filesystem_type (p, dir_fd)) + { + /* List here the file system types that may lack usable dirent.d_type + info, yet for which the optimization does apply. */ + case S_MAGIC_REISERFS: + case S_MAGIC_XFS: /* XFS lacked it until 2013-08-22 commit. */ + return NOSTAT_LEAF_OPTIMIZATION; + + case 0: + /* Leaf optimization is unsafe if the file system type is unknown. */ + FALLTHROUGH; + case S_MAGIC_AFS: + /* Although AFS mount points are not counted in st_nlink, they + act like directories. See . */ + FALLTHROUGH; + case S_MAGIC_CIFS: + /* Leaf optimization causes 'find' to abort. See + . */ + FALLTHROUGH; + case S_MAGIC_NFS: + /* NFS provides usable dirent.d_type but not necessarily for all entries + of large directories, so as per + NFS should return true. However st_nlink values are not accurate on + all implementations as per . */ + FALLTHROUGH; + case S_MAGIC_PROC: + /* Per /proc + may have bogus stat.st_nlink values. */ + return NO_LEAF_OPTIMIZATION; - return opt_ok; + default: + return OK_LEAF_OPTIMIZATION; + } } +#else +static bool +dirent_inode_sort_may_be_useful (FTSENT const *p _GL_UNUSED, + int dir_fd _GL_UNUSED) +{ + return true; +} +static enum leaf_optimization +leaf_optimization (FTSENT const *p _GL_UNUSED, int dir_fd _GL_UNUSED) +{ + return NO_LEAF_OPTIMIZATION; +} +#endif + /* * Special case of "/" at the end of the file name so that slashes aren't * appended which would cause file names to be written as "....//foo". @@ -1007,13 +1025,11 @@ check_for_dir: if (p->fts_statp->st_size == FTS_STAT_REQUIRED) { FTSENT *parent = p->fts_parent; - if (FTS_ROOTLEVEL < p->fts_level - /* ->fts_n_dirs_remaining is not valid - for command-line-specified names. */ - && parent->fts_n_dirs_remaining == 0 + if (parent->fts_n_dirs_remaining == 0 && ISSET(FTS_NOSTAT) && ISSET(FTS_PHYSICAL) - && link_count_optimize_ok (parent)) + && (leaf_optimization (parent, sp->fts_cwd_fd) + == NOSTAT_LEAF_OPTIMIZATION)) { /* nothing more needed */ } @@ -1022,7 +1038,8 @@ check_for_dir: p->fts_info = fts_stat(sp, p, false); if (S_ISDIR(p->fts_statp->st_mode) && p->fts_level != FTS_ROOTLEVEL - && parent->fts_n_dirs_remaining) + && 0 < parent->fts_n_dirs_remaining + && parent->fts_n_dirs_remaining != (nlink_t) -1) parent->fts_n_dirs_remaining--; } } @@ -1261,8 +1278,7 @@ set_stat_type (struct stat *st, unsigned int dtype) (((ISSET(FTS_PHYSICAL) \ && ! (ISSET(FTS_COMFOLLOW) \ && cur->fts_level == FTS_ROOTLEVEL)) \ - ? O_NOFOLLOW : 0) \ - | (ISSET (FTS_NOATIME) ? O_NOATIME : 0)), \ + ? O_NOFOLLOW : 0)), \ Pdir_fd) /* @@ -1291,13 +1307,12 @@ fts_build (register FTS *sp, int type) bool descend; bool doadjust; ptrdiff_t level; - nlink_t nlinks; - bool nostat; size_t len, maxlen, new_len; char *cp; int dir_fd; FTSENT *cur = sp->fts_cur; bool continue_readdir = !!cur->fts_dirp; + bool sort_by_inode = false; size_t max_entries; /* When cur->fts_dirp is non-NULL, that means we should @@ -1362,24 +1377,6 @@ fts_build (register FTS *sp, int type) sorting, yet not so large that we risk exhausting memory. */ max_entries = sp->fts_compar ? SIZE_MAX : FTS_MAX_READDIR_ENTRIES; - /* - * Nlinks is the number of possible entries of type directory in the - * directory if we're cheating on stat calls, 0 if we're not doing - * any stat calls at all, (nlink_t) -1 if we're statting everything. - */ - if (type == BNAMES) { - nlinks = 0; - /* Be quiet about nostat, GCC. */ - nostat = false; - } else if (ISSET(FTS_NOSTAT) && ISSET(FTS_PHYSICAL)) { - nlinks = (cur->fts_statp->st_nlink - - (ISSET(FTS_SEEDOT) ? 0 : 2)); - nostat = true; - } else { - nlinks = -1; - nostat = false; - } - /* * If we're going to need to stat anything or we want to descend * and stay in the directory, chdir. If this fails we keep going, @@ -1401,15 +1398,22 @@ fts_build (register FTS *sp, int type) the required dirp and dir_fd. */ descend = true; } - else if (nlinks || type == BREAD) { + else + { + /* Try to descend unless it is a names-only fts_children, + or the directory is known to lack subdirectories. */ + descend = (type != BNAMES + && ! (ISSET (FTS_NOSTAT) && ISSET (FTS_PHYSICAL) + && ! ISSET (FTS_SEEDOT) + && cur->fts_statp->st_nlink == MIN_DIR_NLINK + && (leaf_optimization (cur, dir_fd) + != NO_LEAF_OPTIMIZATION))); + if (descend || type == BREAD) + { if (ISSET(FTS_CWDFD)) - { - dir_fd = dup (dir_fd); - if (0 <= dir_fd) - set_cloexec_flag (dir_fd, true); - } + dir_fd = fcntl (dir_fd, F_DUPFD_CLOEXEC, STDERR_FILENO + 1); if (dir_fd < 0 || fts_safe_changedir(sp, cur, dir_fd, NULL)) { - if (nlinks && type == BREAD) + if (descend && type == BREAD) cur->fts_errno = errno; cur->fts_flags |= FTS_DONTCHDIR; descend = false; @@ -1419,8 +1423,8 @@ fts_build (register FTS *sp, int type) cur->fts_dirp = NULL; } else descend = true; - } else - descend = false; + } + } /* * Figure out the max file name length that can be stored in the @@ -1451,11 +1455,19 @@ fts_build (register FTS *sp, int type) tail = NULL; nitems = 0; while (cur->fts_dirp) { - bool is_dir; size_t d_namelen; + __set_errno (0); struct dirent *dp = readdir(cur->fts_dirp); - if (dp == NULL) + if (dp == NULL) { + if (errno) { + cur->fts_errno = errno; + /* If we've not read any items yet, treat + the error as if we can't access the dir. */ + cur->fts_info = (continue_readdir || nitems) + ? FTS_ERR : FTS_DNR; + } break; + } if (!ISSET(FTS_SEEDOT) && ISDOT(dp->d_name)) continue; @@ -1543,19 +1555,10 @@ mem1: saved_errno = errno; to caller, when possible. */ set_stat_type (p->fts_statp, D_TYPE (dp)); fts_set_stat_required(p, !skip_stat); - is_dir = (ISSET(FTS_PHYSICAL) - && DT_MUST_BE(dp, DT_DIR)); } else { p->fts_info = fts_stat(sp, p, false); - is_dir = (p->fts_info == FTS_D - || p->fts_info == FTS_DC - || p->fts_info == FTS_DOT); } - /* Decrement link count if applicable. */ - if (nlinks > 0 && is_dir) - nlinks -= nostat; - /* We walk in directory order so "ls -f" doesn't get upset. */ p->fts_link = NULL; if (head == NULL) @@ -1564,6 +1567,19 @@ mem1: saved_errno = errno; tail->fts_link = p; tail = p; } + + /* If there are many entries, no sorting function has been + specified, and this file system is of a type that may be + slow with a large number of entries, arrange to sort the + directory entries on increasing inode numbers. + + The NITEMS comparison uses ==, not >, because the test + needs to be tried at most once once, and NITEMS will exceed + the threshold after it is incremented below. */ + if (nitems == _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD + && !sp->fts_compar) + sort_by_inode = dirent_inode_sort_may_be_useful (cur, dir_fd); + ++nitems; if (max_entries <= nitems) { /* When there are too many dir entries, leave @@ -1614,20 +1630,14 @@ mem1: saved_errno = errno; /* If didn't find anything, return NULL. */ if (!nitems) { - if (type == BREAD) + if (type == BREAD + && cur->fts_info != FTS_DNR && cur->fts_info != FTS_ERR) cur->fts_info = FTS_DP; fts_lfree(head); return (NULL); } - /* If there are many entries, no sorting function has been specified, - and this file system is of a type that may be slow with a large - number of entries, then sort the directory entries on increasing - inode numbers. */ - if (nitems > _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD - && !sp->fts_compar - && ISSET (FTS_CWDFD) - && dirent_inode_sort_may_be_useful (sp->fts_cwd_fd)) { + if (sort_by_inode) { sp->fts_compar = fts_compare_ino; head = fts_sort (sp, head, nitems); sp->fts_compar = NULL; @@ -1750,7 +1760,7 @@ fd_ring_check (FTS const *sp) I_ring fd_w = sp->fts_fd_ring; int cwd_fd = sp->fts_cwd_fd; - cwd_fd = dup (cwd_fd); + cwd_fd = fcntl (cwd_fd, F_DUPFD_CLOEXEC, STDERR_FILENO + 1); char *dot = getcwdat (cwd_fd, NULL, 0); error (0, 0, "===== check ===== cwd: %s", dot); free (dot); @@ -1759,7 +1769,8 @@ fd_ring_check (FTS const *sp) int fd = i_ring_pop (&fd_w); if (0 <= fd) { - int parent_fd = openat (cwd_fd, "..", O_SEARCH | O_NOATIME); + int open_flags = O_SEARCH | O_CLOEXEC; + int parent_fd = openat (cwd_fd, "..", open_flags); if (parent_fd < 0) { // Warn? @@ -1788,7 +1799,6 @@ internal_function fts_stat(FTS *sp, register FTSENT *p, bool follow) { struct stat *sbp = p->fts_statp; - int saved_errno; if (p->fts_level == FTS_ROOTLEVEL && ISSET(FTS_COMFOLLOW)) follow = true; @@ -1800,13 +1810,12 @@ fts_stat(FTS *sp, register FTSENT *p, bool follow) */ if (ISSET(FTS_LOGICAL) || follow) { if (stat(p->fts_accpath, sbp)) { - saved_errno = errno; if (errno == ENOENT && lstat(p->fts_accpath, sbp) == 0) { __set_errno (0); return (FTS_SLNONE); } - p->fts_errno = saved_errno; + p->fts_errno = errno; goto err; } } else if (fstatat(sp->fts_cwd_fd, p->fts_accpath, sbp, @@ -1817,8 +1826,11 @@ err: memset(sbp, 0, sizeof(struct stat)); } if (S_ISDIR(sbp->st_mode)) { - p->fts_n_dirs_remaining = (sbp->st_nlink - - (ISSET(FTS_SEEDOT) ? 0 : 2)); + p->fts_n_dirs_remaining + = ((sbp->st_nlink < MIN_DIR_NLINK + || p->fts_level <= FTS_ROOTLEVEL) + ? -1 + : sbp->st_nlink - (ISSET (FTS_SEEDOT) ? 0 : MIN_DIR_NLINK)); if (ISDOT(p->fts_name)) { /* Command-line "." and ".." are real directories. */ return (p->fts_level == FTS_ROOTLEVEL ? FTS_D : FTS_DOT); @@ -1907,17 +1919,7 @@ fts_alloc (FTS *sp, const char *name, register size_t namelen) * The file name is a variable length array. Allocate the FTSENT * structure and the file name in one chunk. */ - len = offsetof(FTSENT, fts_name) + namelen + 1; - /* Align the allocation size so that it works for FTSENT, - so that trailing padding may be referenced by direct access - to the flexible array members, without triggering undefined behavior - by accessing bytes beyond the heap allocation. This implicit access - was seen for example with ISDOT() and GCC 5.1.1 at -O2. - Do not use alignof (FTSENT) here, since C11 prohibits - taking the alignment of a structure containing a flexible - array member. */ - len += alignof (max_align_t) - 1; - len &= ~ (alignof (max_align_t) - 1); + len = FLEXSIZEOF(FTSENT, fts_name, namelen + 1); if ((p = malloc(len)) == NULL) return (NULL); @@ -2067,7 +2069,6 @@ fts_safe_changedir (FTS *sp, FTSENT *p, int fd, char const *dir) int parent_fd; fd_ring_print (sp, stderr, "pre-pop"); parent_fd = i_ring_pop (&sp->fts_fd_ring); - is_dotdot = true; if (0 <= parent_fd) { fd = parent_fd; diff --git a/contrib/grep/lib/fts_.h b/contrib/grep/lib/fts_.h index b9a3f12cca..6c7d0cef6f 100644 --- a/contrib/grep/lib/fts_.h +++ b/contrib/grep/lib/fts_.h @@ -1,6 +1,6 @@ /* Traverse a file hierarchy. - Copyright (C) 2004-2015 Free Software Foundation, Inc. + Copyright (C) 2004-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* * Copyright (c) 1989, 1993 @@ -149,16 +149,14 @@ typedef struct { dirent.d_type data. */ # define FTS_DEFER_STAT 0x0400 -# define FTS_NOATIME 0x0800 /* use O_NOATIME during traversal */ - /* Use this flag to disable stripping of trailing slashes from input path names during fts_open initialization. */ -# define FTS_VERBATIM 0x1000 +# define FTS_VERBATIM 0x0800 -# define FTS_OPTIONMASK 0x1fff /* valid user option mask */ +# define FTS_OPTIONMASK 0x0fff /* valid user option mask */ -# define FTS_NAMEONLY 0x2000 /* (private) child names only */ -# define FTS_STOP 0x4000 /* (private) unrecoverable error */ +# define FTS_NAMEONLY 0x1000 /* (private) child names only */ +# define FTS_STOP 0x2000 /* (private) unrecoverable error */ int fts_options; /* fts_open options, global flags */ /* Map a directory's device number to a boolean. The boolean is @@ -220,7 +218,11 @@ typedef struct _ftsent { ptrdiff_t fts_level; /* depth (-1 to N) */ size_t fts_namelen; /* strlen(fts_name) */ - nlink_t fts_n_dirs_remaining; /* count down from st_nlink */ + + /* If not (nlink_t) -1, an upper bound on the number of + remaining subdirectories of interest. If this becomes + zero, some work can be avoided. */ + nlink_t fts_n_dirs_remaining; # define FTS_D 1 /* preorder directory */ # define FTS_DC 2 /* directory that causes cycles */ diff --git a/contrib/grep/lib/getcwd-lgpl.c b/contrib/grep/lib/getcwd-lgpl.c deleted file mode 100644 index 1e17e193a8..0000000000 --- a/contrib/grep/lib/getcwd-lgpl.c +++ /dev/null @@ -1,126 +0,0 @@ -/* Copyright (C) 2011-2015 Free Software Foundation, Inc. - This file is part of gnulib. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification */ -#include - -#include -#include -#include - -#if GNULIB_GETCWD -/* Favor GPL getcwd.c if both getcwd and getcwd-lgpl modules are in use. */ -typedef int dummy; -#else - -/* Get the name of the current working directory, and put it in SIZE - bytes of BUF. Returns NULL if the directory couldn't be determined - (perhaps because the absolute name was longer than PATH_MAX, or - because of missing read/search permissions on parent directories) - or SIZE was too small. If successful, returns BUF. If BUF is - NULL, an array is allocated with 'malloc'; the array is SIZE bytes - long, unless SIZE == 0, in which case it is as big as - necessary. */ - -# undef getcwd -char * -rpl_getcwd (char *buf, size_t size) -{ - char *ptr; - char *result; - - /* Handle single size operations. */ - if (buf) - { - if (!size) - { - errno = EINVAL; - return NULL; - } - return getcwd (buf, size); - } - - if (size) - { - buf = malloc (size); - if (!buf) - { - errno = ENOMEM; - return NULL; - } - result = getcwd (buf, size); - if (!result) - { - int saved_errno = errno; - free (buf); - errno = saved_errno; - } - return result; - } - - /* Flexible sizing requested. Avoid over-allocation for the common - case of a name that fits within a 4k page, minus some space for - local variables, to be sure we don't skip over a guard page. */ - { - char tmp[4032]; - size = sizeof tmp; - ptr = getcwd (tmp, size); - if (ptr) - { - result = strdup (ptr); - if (!result) - errno = ENOMEM; - return result; - } - if (errno != ERANGE) - return NULL; - } - - /* My what a large directory name we have. */ - do - { - size <<= 1; - ptr = realloc (buf, size); - if (ptr == NULL) - { - free (buf); - errno = ENOMEM; - return NULL; - } - buf = ptr; - result = getcwd (buf, size); - } - while (!result && errno == ERANGE); - - if (!result) - { - int saved_errno = errno; - free (buf); - errno = saved_errno; - } - else - { - /* Trim to fit, if possible. */ - result = realloc (buf, strlen (buf) + 1); - if (!result) - result = buf; - } - return result; -} - -#endif diff --git a/contrib/grep/lib/getdtablesize.c b/contrib/grep/lib/getdtablesize.c deleted file mode 100644 index 03eb7ef1bf..0000000000 --- a/contrib/grep/lib/getdtablesize.c +++ /dev/null @@ -1,121 +0,0 @@ -/* getdtablesize() function for platforms that don't have it. - Copyright (C) 2008-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2008. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ - -# include - -# include "msvc-inval.h" - -# if HAVE_MSVC_INVALID_PARAMETER_HANDLER -static int -_setmaxstdio_nothrow (int newmax) -{ - int result; - - TRY_MSVC_INVAL - { - result = _setmaxstdio (newmax); - } - CATCH_MSVC_INVAL - { - result = -1; - } - DONE_MSVC_INVAL; - - return result; -} -# define _setmaxstdio _setmaxstdio_nothrow -# endif - -/* Cache for the previous getdtablesize () result. Safe to cache because - Windows also lacks setrlimit. */ -static int dtablesize; - -int -getdtablesize (void) -{ - if (dtablesize == 0) - { - /* We are looking for the number N such that the valid file descriptors - are 0..N-1. It can be obtained through a loop as follows: - { - int fd; - for (fd = 3; fd < 65536; fd++) - if (dup2 (0, fd) == -1) - break; - return fd; - } - On Windows XP, the result is 2048. - The drawback of this loop is that it allocates memory for a libc - internal array that is never freed. - - The number N can also be obtained as the upper bound for - _getmaxstdio (). _getmaxstdio () returns the maximum number of open - FILE objects. The sanity check in _setmaxstdio reveals the maximum - number of file descriptors. This too allocates memory, but it is - freed when we call _setmaxstdio with the original value. */ - int orig_max_stdio = _getmaxstdio (); - unsigned int bound; - for (bound = 0x10000; _setmaxstdio (bound) < 0; bound = bound / 2) - ; - _setmaxstdio (orig_max_stdio); - dtablesize = bound; - } - return dtablesize; -} - -#else - -# include -# include - -# ifndef RLIM_SAVED_CUR -# define RLIM_SAVED_CUR RLIM_INFINITY -# endif -# ifndef RLIM_SAVED_MAX -# define RLIM_SAVED_MAX RLIM_INFINITY -# endif - -# ifdef __CYGWIN__ - /* Cygwin 1.7.25 auto-increases the RLIMIT_NOFILE soft limit until it - hits the compile-time constant hard limit of 3200. We might as - well just report the hard limit. */ -# define rlim_cur rlim_max -# endif - -int -getdtablesize (void) -{ - struct rlimit lim; - - if (getrlimit (RLIMIT_NOFILE, &lim) == 0 - && 0 <= lim.rlim_cur && lim.rlim_cur <= INT_MAX - && lim.rlim_cur != RLIM_INFINITY - && lim.rlim_cur != RLIM_SAVED_CUR - && lim.rlim_cur != RLIM_SAVED_MAX) - return lim.rlim_cur; - - return INT_MAX; -} - -#endif diff --git a/contrib/grep/lib/getopt-core.h b/contrib/grep/lib/getopt-core.h new file mode 100644 index 0000000000..d4d942ef22 --- /dev/null +++ b/contrib/grep/lib/getopt-core.h @@ -0,0 +1,96 @@ +/* Declarations for getopt (basic, portable features only). + Copyright (C) 1989-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library and is also part of gnulib. + Patches to this file should be submitted to both projects. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _GETOPT_CORE_H +#define _GETOPT_CORE_H 1 + +/* This header should not be used directly; include getopt.h or + unistd.h instead. Unlike most bits headers, it does not have + a protective #error, because the guard macro for getopt.h in + gnulib is not fixed. */ + +__BEGIN_DECLS + +/* For communication from 'getopt' to the caller. + When 'getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when 'ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to 'getopt'. + + On entry to 'getopt', zero means this is the first call; initialize. + + When 'getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, 'optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message 'getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, 'optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in 'optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU 'getopt'. + + The argument '--' causes premature termination of argument + scanning, explicitly telling 'getopt' that there are no more + options. + + If OPTS begins with '-', then non-option arguments are treated as + arguments to the option '\1'. This behavior is specific to the GNU + 'getopt'. If OPTS begins with '+', or POSIXLY_CORRECT is set in + the environment, then do not permute arguments. + + For standards compliance, the 'argv' argument has the type + char *const *, but this is inaccurate; if argument permutation is + enabled, the argv array (not the strings it points to) must be + writable. */ + +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts) + __THROW _GL_ARG_NONNULL ((2, 3)); + +__END_DECLS + +#endif /* _GETOPT_CORE_H */ diff --git a/contrib/grep/lib/getopt-ext.h b/contrib/grep/lib/getopt-ext.h new file mode 100644 index 0000000000..05f7083ac6 --- /dev/null +++ b/contrib/grep/lib/getopt-ext.h @@ -0,0 +1,77 @@ +/* Declarations for getopt (GNU extensions). + Copyright (C) 1989-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library and is also part of gnulib. + Patches to this file should be submitted to both projects. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _GETOPT_EXT_H +#define _GETOPT_EXT_H 1 + +/* This header should not be used directly; include getopt.h instead. + Unlike most bits headers, it does not have a protective #error, + because the guard macro for getopt.h in gnulib is not fixed. */ + +__BEGIN_DECLS + +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of 'struct option' terminated by an element containing a name which is + zero. + + The field 'has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field 'flag' is not NULL, it points to a variable that is set + to the value given in the field 'val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an 'int' to + a compiled-in constant, such as set a value from 'optarg', set the + option's 'flag' field to zero and its 'val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero 'flag' field, 'getopt' + returns the contents of the 'val' field. */ + +struct option +{ + const char *name; + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the 'has_arg' field of 'struct option'. */ + +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + +extern int getopt_long (int ___argc, char *__getopt_argv_const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind) + __THROW _GL_ARG_NONNULL ((2, 3)); +extern int getopt_long_only (int ___argc, char *__getopt_argv_const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind) + __THROW _GL_ARG_NONNULL ((2, 3)); + +__END_DECLS + +#endif /* _GETOPT_EXT_H */ diff --git a/contrib/grep/lib/getopt-pfx-core.h b/contrib/grep/lib/getopt-pfx-core.h new file mode 100644 index 0000000000..da0a6d0c3c --- /dev/null +++ b/contrib/grep/lib/getopt-pfx-core.h @@ -0,0 +1,59 @@ +/* getopt (basic, portable features) gnulib wrapper header. + Copyright (C) 1989-2020 Free Software Foundation, Inc. + This file is part of gnulib. + Unlike most of the getopt implementation, it is NOT shared + with the GNU C Library. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 3 of + the License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with gnulib; if not, see + . */ + +#ifndef _GETOPT_PFX_CORE_H +#define _GETOPT_PFX_CORE_H 1 + +/* This header should not be used directly; include getopt.h or + unistd.h instead. It does not have a protective #error, because + the guard macro for getopt.h in gnulib is not fixed. */ + +/* Standalone applications should #define __GETOPT_PREFIX to an + identifier that prefixes the external functions and variables + defined in getopt-core.h and getopt-ext.h. Systematically + rename identifiers so that they do not collide with the system + functions and variables. Renaming avoids problems with some + compilers and linkers. */ +#ifdef __GETOPT_PREFIX +# ifndef __GETOPT_ID +# define __GETOPT_CONCAT(x, y) x ## y +# define __GETOPT_XCONCAT(x, y) __GETOPT_CONCAT (x, y) +# define __GETOPT_ID(y) __GETOPT_XCONCAT (__GETOPT_PREFIX, y) +# endif +# undef getopt +# undef optarg +# undef opterr +# undef optind +# undef optopt +# define getopt __GETOPT_ID (getopt) +# define optarg __GETOPT_ID (optarg) +# define opterr __GETOPT_ID (opterr) +# define optind __GETOPT_ID (optind) +# define optopt __GETOPT_ID (optopt) + +/* The system's getopt.h may have already included getopt-core.h to + declare the unprefixed identifiers. Undef _GETOPT_CORE_H so that + getopt-core.h declares them with prefixes. */ +# undef _GETOPT_CORE_H +#endif + +#include + +#endif /* _GETOPT_PFX_CORE_H */ diff --git a/contrib/grep/lib/getopt-pfx-ext.h b/contrib/grep/lib/getopt-pfx-ext.h new file mode 100644 index 0000000000..647fae6352 --- /dev/null +++ b/contrib/grep/lib/getopt-pfx-ext.h @@ -0,0 +1,71 @@ +/* getopt (GNU extensions) gnulib wrapper header. + Copyright (C) 1989-2020 Free Software Foundation, Inc. + This file is part of gnulib. + Unlike most of the getopt implementation, it is NOT shared + with the GNU C Library. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 3 of + the License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with gnulib; if not, see + . */ + +#ifndef _GETOPT_PFX_EXT_H +#define _GETOPT_PFX_EXT_H 1 + +/* This header should not be used directly; include getopt.h instead. + It does not have a protective #error, because the guard macro for + getopt.h in gnulib is not fixed. */ + +/* Standalone applications should #define __GETOPT_PREFIX to an + identifier that prefixes the external functions and variables + defined in getopt-core.h and getopt-ext.h. Systematically + rename identifiers so that they do not collide with the system + functions and variables. Renaming avoids problems with some + compilers and linkers. */ +#ifdef __GETOPT_PREFIX +# ifndef __GETOPT_ID +# define __GETOPT_CONCAT(x, y) x ## y +# define __GETOPT_XCONCAT(x, y) __GETOPT_CONCAT (x, y) +# define __GETOPT_ID(y) __GETOPT_XCONCAT (__GETOPT_PREFIX, y) +# endif +# undef getopt_long +# undef getopt_long_only +# undef option +# undef _getopt_internal +# define getopt_long __GETOPT_ID (getopt_long) +# define getopt_long_only __GETOPT_ID (getopt_long_only) +# define option __GETOPT_ID (option) +# define _getopt_internal __GETOPT_ID (getopt_internal) + +/* The system's getopt.h may have already included getopt-ext.h to + declare the unprefixed identifiers. Undef _GETOPT_EXT_H so that + getopt-ext.h declares them with prefixes. */ +# undef _GETOPT_EXT_H +#endif + +/* Standalone applications get correct prototypes for getopt_long and + getopt_long_only; they declare "char **argv". For backward + compatibility with old applications, if __GETOPT_PREFIX is not + defined, we supply GNU-libc-compatible, but incorrect, prototypes + using "char *const *argv". (GNU libc is stuck with the incorrect + prototypes, as they are baked into older versions of LSB.) */ +#ifndef __getopt_argv_const +# if defined __GETOPT_PREFIX +# define __getopt_argv_const /* empty */ +# else +# define __getopt_argv_const const +# endif +#endif + +#include + +#endif /* _GETOPT_PFX_EXT_H */ diff --git a/contrib/grep/lib/getopt.c b/contrib/grep/lib/getopt.c index 212cbf7341..a6389d8ea9 100644 --- a/contrib/grep/lib/getopt.c +++ b/contrib/grep/lib/getopt.c @@ -1,23 +1,21 @@ /* Getopt for GNU. - NOTE: getopt is part of the C library, so if you don't know what - "Keep this file name-space clean" means, talk to drepper@gnu.org - before changing it! - Copyright (C) 1987-1996, 1998-2004, 2006, 2008-2015 Free Software - Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, + Copyright (C) 1987-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library and is also part of gnulib. + Patches to this file should be submitted to both projects. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ #ifndef _LIBC # include @@ -31,30 +29,54 @@ #include #ifdef _LIBC +/* When used as part of glibc, error printing must be done differently + for standards compliance. getopt is not a cancellation point, so + it must not call functions that are, and it is specified by an + older standard than stdio locking, so it must not refer to + functions in the "user namespace" related to stdio locking. + Finally, it must use glibc's internal message translation so that + the messages are looked up in the proper text domain. */ # include +# define fprintf __fxprintf_nocancel +# define flockfile(fp) _IO_flockfile (fp) +# define funlockfile(fp) _IO_funlockfile (fp) #else # include "gettext.h" # define _(msgid) gettext (msgid) +/* When used standalone, flockfile and funlockfile might not be + available. */ +# if (!defined _POSIX_THREAD_SAFE_FUNCTIONS \ + || (defined _WIN32 && ! defined __CYGWIN__)) +# define flockfile(fp) /* nop */ +# define funlockfile(fp) /* nop */ +# endif +/* When used standalone, do not attempt to use alloca. */ +# define __libc_use_alloca(size) 0 +# undef alloca +# define alloca(size) (abort (), (void *)0) #endif -#if defined _LIBC && defined USE_IN_LIBIO -# include -#endif - -/* This version of 'getopt' appears to the caller like standard Unix 'getopt' - but it behaves differently for the user, since it allows the user - to intersperse the options with the other arguments. - - As 'getopt_long' works, it permutes the elements of ARGV so that, - when it is done, all the options precede everything else. Thus - all application programs are extended to handle flexible argument order. - - Using 'getopt' or setting the environment variable POSIXLY_CORRECT +/* This implementation of 'getopt' has three modes for handling + options interspersed with non-option arguments. It can stop + scanning for options at the first non-option argument encountered, + as POSIX specifies. It can continue scanning for options after the + first non-option argument, but permute 'argv' as it goes so that, + after 'getopt' is done, all the options precede all the non-option + arguments and 'optind' points to the first non-option argument. + Or, it can report non-option arguments as if they were arguments to + the option character '\x01'. + + The default behavior of 'getopt_long' is to permute the argument list. + When this implementation is used standalone, the default behavior of + 'getopt' is to stop at the first non-option argument, but when it is + used as part of GNU libc it also permutes the argument list. In both + cases, setting the environment variable POSIXLY_CORRECT to any value disables permutation. - Then the behavior is completely standard. - GNU application programs can use a third alternative mode in which - they can distinguish the relative order of options and other arguments. */ + If the first character of the OPTSTRING argument to 'getopt' or + 'getopt_long' is '+', both functions will stop at the first + non-option argument. If it is '-', both functions will report + non-option arguments as arguments to the option character '\x01'. */ #include "getopt_int.h" @@ -95,42 +117,7 @@ int optopt = '?'; /* Keep a global copy of all internal members of getopt_data. */ static struct _getopt_data getopt_data; - - -#if defined HAVE_DECL_GETENV && !HAVE_DECL_GETENV -extern char *getenv (); -#endif -#ifdef _LIBC -/* Stored original parameters. - XXX This is no good solution. We should rather copy the args so - that we can compare them later. But we must not use malloc(3). */ -extern int __libc_argc; -extern char **__libc_argv; - -/* Bash 2.0 gives us an environment variable containing flags - indicating ARGV elements that should not be considered arguments. */ - -# ifdef USE_NONOPTION_FLAGS -/* Defined in getopt_init.c */ -extern char *__getopt_nonoption_flags; -# endif - -# ifdef USE_NONOPTION_FLAGS -# define SWAP_FLAGS(ch1, ch2) \ - if (d->__nonoption_flags_len > 0) \ - { \ - char __tmp = __getopt_nonoption_flags[ch1]; \ - __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ - __getopt_nonoption_flags[ch2] = __tmp; \ - } -# else -# define SWAP_FLAGS(ch1, ch2) -# endif -#else /* !_LIBC */ -# define SWAP_FLAGS(ch1, ch2) -#endif /* _LIBC */ - /* Exchange two adjacent subsequences of ARGV. One subsequence is elements [first_nonopt,last_nonopt) which contains all the non-options that have been skipped so far. @@ -153,64 +140,40 @@ exchange (char **argv, struct _getopt_data *d) It leaves the longer segment in the right place overall, but it consists of two parts that need to be swapped next. */ -#if defined _LIBC && defined USE_NONOPTION_FLAGS - /* First make sure the handling of the '__getopt_nonoption_flags' - string can work normally. Our top argument must be in the range - of the string. */ - if (d->__nonoption_flags_len > 0 && top >= d->__nonoption_flags_max_len) - { - /* We must extend the array. The user plays games with us and - presents new arguments. */ - char *new_str = malloc (top + 1); - if (new_str == NULL) - d->__nonoption_flags_len = d->__nonoption_flags_max_len = 0; - else - { - memset (__mempcpy (new_str, __getopt_nonoption_flags, - d->__nonoption_flags_max_len), - '\0', top + 1 - d->__nonoption_flags_max_len); - d->__nonoption_flags_max_len = top + 1; - __getopt_nonoption_flags = new_str; - } - } -#endif - while (top > middle && middle > bottom) { if (top - middle > middle - bottom) - { - /* Bottom segment is the short one. */ - int len = middle - bottom; - register int i; - - /* Swap it with the top part of the top segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[top - (middle - bottom) + i]; - argv[top - (middle - bottom) + i] = tem; - SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); - } - /* Exclude the moved bottom segment from further swapping. */ - top -= len; - } + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } else - { - /* Top segment is the short one. */ - int len = top - middle; - register int i; - - /* Swap it with the bottom part of the bottom segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[middle + i]; - argv[middle + i] = tem; - SWAP_FLAGS (bottom + i, middle + i); - } - /* Exclude the moved top segment from further swapping. */ - bottom += len; - } + { + /* Top segment is the short one. */ + int len = top - middle; + int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } } /* Update records for the slots the non-options now occupy. */ @@ -219,25 +182,216 @@ exchange (char **argv, struct _getopt_data *d) d->__last_nonopt = d->optind; } -/* Initialize the internal data when the first call is made. */ +/* Process the argument starting with d->__nextchar as a long option. + d->optind should *not* have been advanced over this argument. + + If the value returned is -1, it was not actually a long option, the + state is unchanged, and the argument should be processed as a set + of short options (this can only happen when long_only is true). + Otherwise, the option (and its argument, if any) have been consumed + and the return value is the value to return from _getopt_internal_r. */ +static int +process_long_option (int argc, char **argv, const char *optstring, + const struct option *longopts, int *longind, + int long_only, struct _getopt_data *d, + int print_errors, const char *prefix) +{ + char *nameend; + size_t namelen; + const struct option *p; + const struct option *pfound = NULL; + int n_options; + int option_index; + + for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + namelen = nameend - d->__nextchar; + + /* First look for an exact match, counting the options as a side + effect. */ + for (p = longopts, n_options = 0; p->name; p++, n_options++) + if (!strncmp (p->name, d->__nextchar, namelen) + && namelen == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + option_index = n_options; + break; + } + + if (pfound == NULL) + { + /* Didn't find an exact match, so look for abbreviations. */ + unsigned char *ambig_set = NULL; + int ambig_malloced = 0; + int ambig_fallback = 0; + int indfound = -1; + + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, d->__nextchar, namelen)) + { + if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + { + /* Second or later nonexact match found. */ + if (!ambig_fallback) + { + if (!print_errors) + /* Don't waste effort tracking the ambig set if + we're not going to print it anyway. */ + ambig_fallback = 1; + else if (!ambig_set) + { + if (__libc_use_alloca (n_options)) + ambig_set = alloca (n_options); + else if ((ambig_set = malloc (n_options)) == NULL) + /* Fall back to simpler error message. */ + ambig_fallback = 1; + else + ambig_malloced = 1; + + if (ambig_set) + { + memset (ambig_set, 0, n_options); + ambig_set[indfound] = 1; + } + } + if (ambig_set) + ambig_set[option_index] = 1; + } + } + } + + if (ambig_set || ambig_fallback) + { + if (print_errors) + { + if (ambig_fallback) + fprintf (stderr, _("%s: option '%s%s' is ambiguous\n"), + argv[0], prefix, d->__nextchar); + else + { + flockfile (stderr); + fprintf (stderr, + _("%s: option '%s%s' is ambiguous; possibilities:"), + argv[0], prefix, d->__nextchar); + + for (option_index = 0; option_index < n_options; option_index++) + if (ambig_set[option_index]) + fprintf (stderr, " '%s%s'", + prefix, longopts[option_index].name); + + /* This must use 'fprintf' even though it's only + printing a single character, so that it goes through + __fxprintf_nocancel when compiled as part of glibc. */ + fprintf (stderr, "\n"); + funlockfile (stderr); + } + } + if (ambig_malloced) + free (ambig_set); + d->__nextchar += strlen (d->__nextchar); + d->optind++; + d->optopt = 0; + return '?'; + } + + option_index = indfound; + } + + if (pfound == NULL) + { + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short option, + then it's an error. */ + if (!long_only || argv[d->optind][1] == '-' + || strchr (optstring, *d->__nextchar) == NULL) + { + if (print_errors) + fprintf (stderr, _("%s: unrecognized option '%s%s'\n"), + argv[0], prefix, d->__nextchar); + + d->__nextchar = NULL; + d->optind++; + d->optopt = 0; + return '?'; + } + + /* Otherwise interpret it as a short option. */ + return -1; + } + + /* We have found a matching long option. Consume it. */ + d->optind++; + d->__nextchar = NULL; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + d->optarg = nameend + 1; + else + { + if (print_errors) + fprintf (stderr, + _("%s: option '%s%s' doesn't allow an argument\n"), + argv[0], prefix, pfound->name); + + d->optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (d->optind < argc) + d->optarg = argv[d->optind++]; + else + { + if (print_errors) + fprintf (stderr, + _("%s: option '%s%s' requires an argument\n"), + argv[0], prefix, pfound->name); + + d->optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; +} + +/* Initialize internal data upon the first call to getopt. */ static const char * _getopt_initialize (int argc _GL_UNUSED, - char **argv _GL_UNUSED, const char *optstring, - struct _getopt_data *d, int posixly_correct) + char **argv _GL_UNUSED, const char *optstring, + struct _getopt_data *d, int posixly_correct) { /* Start processing options with ARGV-element 1 (since ARGV-element 0 is the program name); the sequence of previously skipped non-option ARGV-elements is empty. */ + if (d->optind == 0) + d->optind = 1; d->__first_nonopt = d->__last_nonopt = d->optind; - d->__nextchar = NULL; - d->__posixly_correct = posixly_correct || !!getenv ("POSIXLY_CORRECT"); - /* Determine how to handle the ordering of options and nonoptions. */ - if (optstring[0] == '-') { d->__ordering = RETURN_IN_ORDER; @@ -248,41 +402,12 @@ _getopt_initialize (int argc _GL_UNUSED, d->__ordering = REQUIRE_ORDER; ++optstring; } - else if (d->__posixly_correct) + else if (posixly_correct || !!getenv ("POSIXLY_CORRECT")) d->__ordering = REQUIRE_ORDER; else d->__ordering = PERMUTE; -#if defined _LIBC && defined USE_NONOPTION_FLAGS - if (!d->__posixly_correct - && argc == __libc_argc && argv == __libc_argv) - { - if (d->__nonoption_flags_max_len == 0) - { - if (__getopt_nonoption_flags == NULL - || __getopt_nonoption_flags[0] == '\0') - d->__nonoption_flags_max_len = -1; - else - { - const char *orig_str = __getopt_nonoption_flags; - int len = d->__nonoption_flags_max_len = strlen (orig_str); - if (d->__nonoption_flags_max_len < argc) - d->__nonoption_flags_max_len = argc; - __getopt_nonoption_flags = - (char *) malloc (d->__nonoption_flags_max_len); - if (__getopt_nonoption_flags == NULL) - d->__nonoption_flags_max_len = -1; - else - memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), - '\0', d->__nonoption_flags_max_len - len); - } - } - d->__nonoption_flags_len = d->__nonoption_flags_max_len; - } - else - d->__nonoption_flags_len = 0; -#endif - + d->__initialized = 1; return optstring; } @@ -344,8 +469,8 @@ _getopt_initialize (int argc _GL_UNUSED, int _getopt_internal_r (int argc, char **argv, const char *optstring, - const struct option *longopts, int *longind, - int long_only, struct _getopt_data *d, int posixly_correct) + const struct option *longopts, int *longind, + int long_only, struct _getopt_data *d, int posixly_correct) { int print_errors = d->opterr; @@ -355,461 +480,129 @@ _getopt_internal_r (int argc, char **argv, const char *optstring, d->optarg = NULL; if (d->optind == 0 || !d->__initialized) - { - if (d->optind == 0) - d->optind = 1; /* Don't scan ARGV[0], the program name. */ - optstring = _getopt_initialize (argc, argv, optstring, d, - posixly_correct); - d->__initialized = 1; - } + optstring = _getopt_initialize (argc, argv, optstring, d, posixly_correct); else if (optstring[0] == '-' || optstring[0] == '+') optstring++; + if (optstring[0] == ':') print_errors = 0; - /* Test whether ARGV[optind] points to a non-option argument. - Either it does not have option syntax, or there is an environment flag - from the shell indicating it is not an option. The later information - is only used when the used in the GNU libc. */ -#if defined _LIBC && defined USE_NONOPTION_FLAGS -# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0' \ - || (d->optind < d->__nonoption_flags_len \ - && __getopt_nonoption_flags[d->optind] == '1')) -#else -# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0') -#endif + /* Test whether ARGV[optind] points to a non-option argument. */ +#define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0') if (d->__nextchar == NULL || *d->__nextchar == '\0') { /* Advance to the next ARGV-element. */ /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been - moved back by the user (who may also have changed the arguments). */ + moved back by the user (who may also have changed the arguments). */ if (d->__last_nonopt > d->optind) - d->__last_nonopt = d->optind; + d->__last_nonopt = d->optind; if (d->__first_nonopt > d->optind) - d->__first_nonopt = d->optind; + d->__first_nonopt = d->optind; if (d->__ordering == PERMUTE) - { - /* If we have just processed some options following some non-options, - exchange them so that the options come first. */ + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ - if (d->__first_nonopt != d->__last_nonopt - && d->__last_nonopt != d->optind) - exchange ((char **) argv, d); - else if (d->__last_nonopt != d->optind) - d->__first_nonopt = d->optind; + if (d->__first_nonopt != d->__last_nonopt + && d->__last_nonopt != d->optind) + exchange (argv, d); + else if (d->__last_nonopt != d->optind) + d->__first_nonopt = d->optind; - /* Skip any additional non-options - and extend the range of non-options previously skipped. */ + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ - while (d->optind < argc && NONOPTION_P) - d->optind++; - d->__last_nonopt = d->optind; - } + while (d->optind < argc && NONOPTION_P) + d->optind++; + d->__last_nonopt = d->optind; + } /* The special ARGV-element '--' means premature end of options. - Skip it like a null option, - then exchange with previous non-options as if it were an option, - then skip everything else like a non-option. */ + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ if (d->optind != argc && !strcmp (argv[d->optind], "--")) - { - d->optind++; + { + d->optind++; - if (d->__first_nonopt != d->__last_nonopt - && d->__last_nonopt != d->optind) - exchange ((char **) argv, d); - else if (d->__first_nonopt == d->__last_nonopt) - d->__first_nonopt = d->optind; - d->__last_nonopt = argc; + if (d->__first_nonopt != d->__last_nonopt + && d->__last_nonopt != d->optind) + exchange (argv, d); + else if (d->__first_nonopt == d->__last_nonopt) + d->__first_nonopt = d->optind; + d->__last_nonopt = argc; - d->optind = argc; - } + d->optind = argc; + } /* If we have done all the ARGV-elements, stop the scan - and back over any non-options that we skipped and permuted. */ + and back over any non-options that we skipped and permuted. */ if (d->optind == argc) - { - /* Set the next-arg-index to point at the non-options - that we previously skipped, so the caller will digest them. */ - if (d->__first_nonopt != d->__last_nonopt) - d->optind = d->__first_nonopt; - return -1; - } + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (d->__first_nonopt != d->__last_nonopt) + d->optind = d->__first_nonopt; + return -1; + } /* If we have come to a non-option and did not permute it, - either stop the scan or describe it to the caller and pass it by. */ + either stop the scan or describe it to the caller and pass it by. */ if (NONOPTION_P) - { - if (d->__ordering == REQUIRE_ORDER) - return -1; - d->optarg = argv[d->optind++]; - return 1; - } + { + if (d->__ordering == REQUIRE_ORDER) + return -1; + d->optarg = argv[d->optind++]; + return 1; + } /* We have found another option-ARGV-element. - Skip the initial punctuation. */ - - d->__nextchar = (argv[d->optind] + 1 - + (longopts != NULL && argv[d->optind][1] == '-')); - } - - /* Decode the current option-ARGV-element. */ - - /* Check whether the ARGV-element is a long option. - - If long_only and the ARGV-element has the form "-f", where f is - a valid short option, don't consider it an abbreviated form of - a long option that starts with f. Otherwise there would be no - way to give the -f short option. - - On the other hand, if there's a long option "fubar" and - the ARGV-element is "-fu", do consider that an abbreviation of - the long option, just like "--fu", and not "-f" with arg "u". - - This distinction seems to be the most useful approach. */ - - if (longopts != NULL - && (argv[d->optind][1] == '-' - || (long_only && (argv[d->optind][2] - || !strchr (optstring, argv[d->optind][1]))))) - { - char *nameend; - unsigned int namelen; - const struct option *p; - const struct option *pfound = NULL; - struct option_list - { - const struct option *p; - struct option_list *next; - } *ambig_list = NULL; -#ifdef _LIBC -/* malloc() not used for _LIBC to simplify failure messages. */ -# define free_option_list(l) -#else -# define free_option_list(l) \ - while (l != NULL) \ - { \ - struct option_list *pn = l->next; \ - free (l); \ - l = pn; \ - } -#endif - int exact = 0; - int ambig = 0; - int indfound = -1; - int option_index; - - for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++) - /* Do nothing. */ ; - namelen = nameend - d->__nextchar; - - /* Test all long options for either exact match - or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; p++, option_index++) - if (!strncmp (p->name, d->__nextchar, namelen)) - { - if (namelen == (unsigned int) strlen (p->name)) - { - /* Exact match found. */ - pfound = p; - indfound = option_index; - exact = 1; - break; - } - else if (pfound == NULL) - { - /* First nonexact match found. */ - pfound = p; - indfound = option_index; - } - else if (ambig) - ; /* Taking simpler path to handling ambiguities. */ - else if (long_only - || pfound->has_arg != p->has_arg - || pfound->flag != p->flag - || pfound->val != p->val) - { - /* Second or later nonexact match found. */ -#ifdef _LIBC - struct option_list *newp = alloca (sizeof (*newp)); -#else - struct option_list *newp = malloc (sizeof (*newp)); - if (newp == NULL) - { - free_option_list (ambig_list); - ambig_list = NULL; - ambig = 1; /* Use simpler fallback message. */ - } - else -#endif - { - newp->p = p; - newp->next = ambig_list; - ambig_list = newp; - } - } - } - - if ((ambig || ambig_list) && !exact) - { - if (print_errors && ambig_list) - { - struct option_list first; - first.p = pfound; - first.next = ambig_list; - ambig_list = &first; - -#if defined _LIBC && defined USE_IN_LIBIO - char *buf = NULL; - size_t buflen = 0; - - FILE *fp = open_memstream (&buf, &buflen); - if (fp != NULL) - { - fprintf (fp, - _("%s: option '%s' is ambiguous; possibilities:"), - argv[0], argv[d->optind]); - - do - { - fprintf (fp, " '--%s'", ambig_list->p->name); - ambig_list = ambig_list->next; - } - while (ambig_list != NULL); - - fputc_unlocked ('\n', fp); - - if (__builtin_expect (fclose (fp) != EOF, 1)) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } - } -#else - fprintf (stderr, - _("%s: option '%s' is ambiguous; possibilities:"), - argv[0], argv[d->optind]); - do - { - fprintf (stderr, " '--%s'", ambig_list->p->name); - ambig_list = ambig_list->next; - } - while (ambig_list != NULL); - - fputc ('\n', stderr); -#endif - } - else if (print_errors && ambig) - { - fprintf (stderr, - _("%s: option '%s' is ambiguous\n"), - argv[0], argv[d->optind]); - } - d->__nextchar += strlen (d->__nextchar); - d->optind++; - d->optopt = 0; - free_option_list (ambig_list); - return '?'; - } - - free_option_list (ambig_list); - - if (pfound != NULL) - { - option_index = indfound; - d->optind++; - if (*nameend) - { - /* Don't test has_arg with >, because some C compilers don't - allow it to be used on enums. */ - if (pfound->has_arg) - d->optarg = nameend + 1; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - int n; -#endif - - if (argv[d->optind - 1][1] == '-') - { - /* --option */ -#if defined _LIBC && defined USE_IN_LIBIO - n = __asprintf (&buf, _("\ -%s: option '--%s' doesn't allow an argument\n"), - argv[0], pfound->name); -#else - fprintf (stderr, _("\ -%s: option '--%s' doesn't allow an argument\n"), - argv[0], pfound->name); -#endif - } - else - { - /* +option or -option */ -#if defined _LIBC && defined USE_IN_LIBIO - n = __asprintf (&buf, _("\ -%s: option '%c%s' doesn't allow an argument\n"), - argv[0], argv[d->optind - 1][0], - pfound->name); -#else - fprintf (stderr, _("\ -%s: option '%c%s' doesn't allow an argument\n"), - argv[0], argv[d->optind - 1][0], - pfound->name); -#endif - } - -#if defined _LIBC && defined USE_IN_LIBIO - if (n >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 - |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#endif - } - - d->__nextchar += strlen (d->__nextchar); - - d->optopt = pfound->val; - return '?'; - } - } - else if (pfound->has_arg == 1) - { - if (d->optind < argc) - d->optarg = argv[d->optind++]; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, _("\ -%s: option '--%s' requires an argument\n"), - argv[0], pfound->name) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 - |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, - _("%s: option '--%s' requires an argument\n"), - argv[0], pfound->name); -#endif - } - d->__nextchar += strlen (d->__nextchar); - d->optopt = pfound->val; - return optstring[0] == ':' ? ':' : '?'; - } - } - d->__nextchar += strlen (d->__nextchar); - if (longind != NULL) - *longind = option_index; - if (pfound->flag) - { - *(pfound->flag) = pfound->val; - return 0; - } - return pfound->val; - } - - /* Can't find it as a long option. If this is not getopt_long_only, - or the option starts with '--' or is not a valid short - option, then it's an error. - Otherwise interpret it as a short option. */ - if (!long_only || argv[d->optind][1] == '-' - || strchr (optstring, *d->__nextchar) == NULL) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - int n; -#endif - - if (argv[d->optind][1] == '-') - { - /* --option */ -#if defined _LIBC && defined USE_IN_LIBIO - n = __asprintf (&buf, _("%s: unrecognized option '--%s'\n"), - argv[0], d->__nextchar); -#else - fprintf (stderr, _("%s: unrecognized option '--%s'\n"), - argv[0], d->__nextchar); -#endif - } - else - { - /* +option or -option */ -#if defined _LIBC && defined USE_IN_LIBIO - n = __asprintf (&buf, _("%s: unrecognized option '%c%s'\n"), - argv[0], argv[d->optind][0], d->__nextchar); -#else - fprintf (stderr, _("%s: unrecognized option '%c%s'\n"), - argv[0], argv[d->optind][0], d->__nextchar); -#endif - } - -#if defined _LIBC && defined USE_IN_LIBIO - if (n >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#endif - } - d->__nextchar = (char *) ""; - d->optind++; - d->optopt = 0; - return '?'; - } + Check whether it might be a long option. */ + if (longopts) + { + if (argv[d->optind][1] == '-') + { + /* "--foo" is always a long option. The special option + "--" was handled above. */ + d->__nextchar = argv[d->optind] + 2; + return process_long_option (argc, argv, optstring, longopts, + longind, long_only, d, + print_errors, "--"); + } + + /* If long_only and the ARGV-element has the form "-f", + where f is a valid short option, don't consider it an + abbreviated form of a long option that starts with f. + Otherwise there would be no way to give the -f short + option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an + abbreviation of the long option, just like "--fu", and + not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + if (long_only && (argv[d->optind][2] + || !strchr (optstring, argv[d->optind][1]))) + { + int code; + d->__nextchar = argv[d->optind] + 1; + code = process_long_option (argc, argv, optstring, longopts, + longind, long_only, d, + print_errors, "-"); + if (code != -1) + return code; + } + } + + /* It is not a long option. Skip the initial punctuation. */ + d->__nextchar = argv[d->optind] + 1; } /* Look at and handle the next short option-character. */ @@ -824,331 +617,83 @@ _getopt_internal_r (int argc, char **argv, const char *optstring, if (temp == NULL || c == ':' || c == ';') { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - int n; -#endif - -#if defined _LIBC && defined USE_IN_LIBIO - n = __asprintf (&buf, _("%s: invalid option -- '%c'\n"), - argv[0], c); -#else - fprintf (stderr, _("%s: invalid option -- '%c'\n"), argv[0], c); -#endif - -#if defined _LIBC && defined USE_IN_LIBIO - if (n >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#endif - } - d->optopt = c; - return '?'; + if (print_errors) + fprintf (stderr, _("%s: invalid option -- '%c'\n"), argv[0], c); + d->optopt = c; + return '?'; } + /* Convenience. Treat POSIX -W foo same as long option --foo */ - if (temp[0] == 'W' && temp[1] == ';') + if (temp[0] == 'W' && temp[1] == ';' && longopts != NULL) { - char *nameend; - const struct option *p; - const struct option *pfound = NULL; - int exact = 0; - int ambig = 0; - int indfound = 0; - int option_index; - - if (longopts == NULL) - goto no_longs; - - /* This is an option that requires an argument. */ - if (*d->__nextchar != '\0') - { - d->optarg = d->__nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - d->optind++; - } - else if (d->optind == argc) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, - _("%s: option requires an argument -- '%c'\n"), - argv[0], c) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, - _("%s: option requires an argument -- '%c'\n"), - argv[0], c); -#endif - } - d->optopt = c; - if (optstring[0] == ':') - c = ':'; - else - c = '?'; - return c; - } - else - /* We already incremented 'd->optind' once; - increment it again when taking next ARGV-elt as argument. */ - d->optarg = argv[d->optind++]; - - /* optarg is now the argument, see if it's in the - table of longopts. */ - - for (d->__nextchar = nameend = d->optarg; *nameend && *nameend != '='; - nameend++) - /* Do nothing. */ ; - - /* Test all long options for either exact match - or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; p++, option_index++) - if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar)) - { - if ((unsigned int) (nameend - d->__nextchar) == strlen (p->name)) - { - /* Exact match found. */ - pfound = p; - indfound = option_index; - exact = 1; - break; - } - else if (pfound == NULL) - { - /* First nonexact match found. */ - pfound = p; - indfound = option_index; - } - else if (long_only - || pfound->has_arg != p->has_arg - || pfound->flag != p->flag - || pfound->val != p->val) - /* Second or later nonexact match found. */ - ambig = 1; - } - if (ambig && !exact) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, _("%s: option '-W %s' is ambiguous\n"), - argv[0], d->optarg) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, _("%s: option '-W %s' is ambiguous\n"), - argv[0], d->optarg); -#endif - } - d->__nextchar += strlen (d->__nextchar); - d->optind++; - return '?'; - } - if (pfound != NULL) - { - option_index = indfound; - if (*nameend) - { - /* Don't test has_arg with >, because some C compilers don't - allow it to be used on enums. */ - if (pfound->has_arg) - d->optarg = nameend + 1; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, _("\ -%s: option '-W %s' doesn't allow an argument\n"), - argv[0], pfound->name) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 - |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, _("\ -%s: option '-W %s' doesn't allow an argument\n"), - argv[0], pfound->name); -#endif - } - - d->__nextchar += strlen (d->__nextchar); - return '?'; - } - } - else if (pfound->has_arg == 1) - { - if (d->optind < argc) - d->optarg = argv[d->optind++]; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, _("\ -%s: option '-W %s' requires an argument\n"), - argv[0], pfound->name) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 - |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, _("\ -%s: option '-W %s' requires an argument\n"), - argv[0], pfound->name); -#endif - } - d->__nextchar += strlen (d->__nextchar); - return optstring[0] == ':' ? ':' : '?'; - } - } - else - d->optarg = NULL; - d->__nextchar += strlen (d->__nextchar); - if (longind != NULL) - *longind = option_index; - if (pfound->flag) - { - *(pfound->flag) = pfound->val; - return 0; - } - return pfound->val; - } - - no_longs: - d->__nextchar = NULL; - return 'W'; /* Let the application handle it. */ + /* This is an option that requires an argument. */ + if (*d->__nextchar != '\0') + d->optarg = d->__nextchar; + else if (d->optind == argc) + { + if (print_errors) + fprintf (stderr, + _("%s: option requires an argument -- '%c'\n"), + argv[0], c); + + d->optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + d->optarg = argv[d->optind]; + + d->__nextchar = d->optarg; + d->optarg = NULL; + return process_long_option (argc, argv, optstring, longopts, longind, + 0 /* long_only */, d, print_errors, "-W "); } if (temp[1] == ':') { - if (temp[2] == ':') - { - /* This is an option that accepts an argument optionally. */ - if (*d->__nextchar != '\0') - { - d->optarg = d->__nextchar; - d->optind++; - } - else - d->optarg = NULL; - d->__nextchar = NULL; - } - else - { - /* This is an option that requires an argument. */ - if (*d->__nextchar != '\0') - { - d->optarg = d->__nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - d->optind++; - } - else if (d->optind == argc) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - if (__asprintf (&buf, _("\ -%s: option requires an argument -- '%c'\n"), - argv[0], c) >= 0) - { - _IO_flockfile (stderr); - - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; - - __fxprintf (NULL, "%s", buf); - - ((_IO_FILE *) stderr)->_flags2 = old_flags2; - _IO_funlockfile (stderr); - - free (buf); - } -#else - fprintf (stderr, - _("%s: option requires an argument -- '%c'\n"), - argv[0], c); -#endif - } - d->optopt = c; - if (optstring[0] == ':') - c = ':'; - else - c = '?'; - } - else - /* We already incremented 'optind' once; - increment it again when taking next ARGV-elt as argument. */ - d->optarg = argv[d->optind++]; - d->__nextchar = NULL; - } + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + d->optind++; + } + else + d->optarg = NULL; + d->__nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + d->optind++; + } + else if (d->optind == argc) + { + if (print_errors) + fprintf (stderr, + _("%s: option requires an argument -- '%c'\n"), + argv[0], c); + + d->optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented 'optind' once; + increment it again when taking next ARGV-elt as argument. */ + d->optarg = argv[d->optind++]; + d->__nextchar = NULL; + } } return c; } @@ -1156,8 +701,8 @@ _getopt_internal_r (int argc, char **argv, const char *optstring, int _getopt_internal (int argc, char **argv, const char *optstring, - const struct option *longopts, int *longind, int long_only, - int posixly_correct) + const struct option *longopts, int *longind, int long_only, + int posixly_correct) { int result; @@ -1165,8 +710,8 @@ _getopt_internal (int argc, char **argv, const char *optstring, getopt_data.opterr = opterr; result = _getopt_internal_r (argc, argv, optstring, longopts, - longind, long_only, &getopt_data, - posixly_correct); + longind, long_only, &getopt_data, + posixly_correct); optind = getopt_data.optind; optarg = getopt_data.optarg; @@ -1175,32 +720,23 @@ _getopt_internal (int argc, char **argv, const char *optstring, return result; } -/* glibc gets a LSB-compliant getopt. - Standalone applications get a POSIX-compliant getopt. */ -#if _LIBC -enum { POSIXLY_CORRECT = 0 }; -#else -enum { POSIXLY_CORRECT = 1 }; -#endif - -int -getopt (int argc, char *const *argv, const char *optstring) -{ - return _getopt_internal (argc, (char **) argv, optstring, - (const struct option *) 0, - (int *) 0, - 0, POSIXLY_CORRECT); -} +/* glibc gets a LSB-compliant getopt and a POSIX-complaint __posix_getopt. + Standalone applications just get a POSIX-compliant getopt. + POSIX and LSB both require these functions to take 'char *const *argv' + even though this is incorrect (because of the permutation). */ +#define GETOPT_ENTRY(NAME, POSIXLY_CORRECT) \ + int \ + NAME (int argc, char *const *argv, const char *optstring) \ + { \ + return _getopt_internal (argc, (char **)argv, optstring, \ + 0, 0, 0, POSIXLY_CORRECT); \ + } #ifdef _LIBC -int -__posix_getopt (int argc, char *const *argv, const char *optstring) -{ - return _getopt_internal (argc, argv, optstring, - (const struct option *) 0, - (int *) 0, - 0, 1); -} +GETOPT_ENTRY(getopt, 0) +GETOPT_ENTRY(__posix_getopt, 1) +#else +GETOPT_ENTRY(getopt, 1) #endif @@ -1221,51 +757,51 @@ main (int argc, char **argv) c = getopt (argc, argv, "abc:d:0123456789"); if (c == -1) - break; + break; switch (c) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (digit_optind != 0 && digit_optind != this_option_optind) - printf ("digits occur in two different argv-elements.\n"); - digit_optind = this_option_optind; - printf ("option %c\n", c); - break; - - case 'a': - printf ("option a\n"); - break; - - case 'b': - printf ("option b\n"); - break; - - case 'c': - printf ("option c with value '%s'\n", optarg); - break; - - case '?': - break; - - default: - printf ("?? getopt returned character code 0%o ??\n", c); - } + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value '%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } } if (optind < argc) { printf ("non-option ARGV-elements: "); while (optind < argc) - printf ("%s ", argv[optind++]); + printf ("%s ", argv[optind++]); printf ("\n"); } diff --git a/contrib/grep/lib/getopt1.c b/contrib/grep/lib/getopt1.c index 2b1feb6ead..0902efe68e 100644 --- a/contrib/grep/lib/getopt1.c +++ b/contrib/grep/lib/getopt1.c @@ -1,56 +1,44 @@ /* getopt_long and getopt_long_only entry points for GNU getopt. - Copyright (C) 1987-1994, 1996-1998, 2004, 2006, 2009-2015 Free Software - Foundation, Inc. - This file is part of the GNU C Library. + Copyright (C) 1987-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library and is also part of gnulib. + Patches to this file should be submitted to both projects. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ -#ifdef _LIBC -# include -#else +#ifndef _LIBC # include -# include "getopt.h" #endif -#include "getopt_int.h" - -#include -/* This needs to come after some library #include - to get __GNU_LIBRARY__ defined. */ -#ifdef __GNU_LIBRARY__ -#include -#endif - -#ifndef NULL -#define NULL 0 -#endif +#include "getopt.h" +#include "getopt_int.h" int getopt_long (int argc, char *__getopt_argv_const *argv, const char *options, - const struct option *long_options, int *opt_index) + const struct option *long_options, int *opt_index) { return _getopt_internal (argc, (char **) argv, options, long_options, - opt_index, 0, 0); + opt_index, 0, 0); } int _getopt_long_r (int argc, char **argv, const char *options, - const struct option *long_options, int *opt_index, - struct _getopt_data *d) + const struct option *long_options, int *opt_index, + struct _getopt_data *d) { return _getopt_internal_r (argc, argv, options, long_options, opt_index, - 0, d, 0); + 0, d, 0); } /* Like getopt_long, but '-' as well as '--' can indicate a long option. @@ -60,26 +48,27 @@ _getopt_long_r (int argc, char **argv, const char *options, int getopt_long_only (int argc, char *__getopt_argv_const *argv, - const char *options, - const struct option *long_options, int *opt_index) + const char *options, + const struct option *long_options, int *opt_index) { return _getopt_internal (argc, (char **) argv, options, long_options, - opt_index, 1, 0); + opt_index, 1, 0); } int _getopt_long_only_r (int argc, char **argv, const char *options, - const struct option *long_options, int *opt_index, - struct _getopt_data *d) + const struct option *long_options, int *opt_index, + struct _getopt_data *d) { return _getopt_internal_r (argc, argv, options, long_options, opt_index, - 1, d, 0); + 1, d, 0); } #ifdef TEST #include +#include int main (int argc, char **argv) @@ -93,74 +82,74 @@ main (int argc, char **argv) int option_index = 0; static const struct option long_options[] = { - {"add", 1, 0, 0}, - {"append", 0, 0, 0}, - {"delete", 1, 0, 0}, - {"verbose", 0, 0, 0}, - {"create", 0, 0, 0}, - {"file", 1, 0, 0}, - {0, 0, 0, 0} + {"add", 1, 0, 0}, + {"append", 0, 0, 0}, + {"delete", 1, 0, 0}, + {"verbose", 0, 0, 0}, + {"create", 0, 0, 0}, + {"file", 1, 0, 0}, + {0, 0, 0, 0} }; c = getopt_long (argc, argv, "abc:d:0123456789", - long_options, &option_index); + long_options, &option_index); if (c == -1) - break; + break; switch (c) - { - case 0: - printf ("option %s", long_options[option_index].name); - if (optarg) - printf (" with arg %s", optarg); - printf ("\n"); - break; - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (digit_optind != 0 && digit_optind != this_option_optind) - printf ("digits occur in two different argv-elements.\n"); - digit_optind = this_option_optind; - printf ("option %c\n", c); - break; - - case 'a': - printf ("option a\n"); - break; - - case 'b': - printf ("option b\n"); - break; - - case 'c': - printf ("option c with value '%s'\n", optarg); - break; - - case 'd': - printf ("option d with value '%s'\n", optarg); - break; - - case '?': - break; - - default: - printf ("?? getopt returned character code 0%o ??\n", c); - } + { + case 0: + printf ("option %s", long_options[option_index].name); + if (optarg) + printf (" with arg %s", optarg); + printf ("\n"); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value '%s'\n", optarg); + break; + + case 'd': + printf ("option d with value '%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } } if (optind < argc) { printf ("non-option ARGV-elements: "); while (optind < argc) - printf ("%s ", argv[optind++]); + printf ("%s ", argv[optind++]); printf ("\n"); } diff --git a/contrib/grep/lib/getopt_int.h b/contrib/grep/lib/getopt_int.h index e893a6e133..afcd8a67fb 100644 --- a/contrib/grep/lib/getopt_int.h +++ b/contrib/grep/lib/getopt_int.h @@ -1,30 +1,31 @@ /* Internal declarations for getopt. - Copyright (C) 1989-1994, 1996-1999, 2001, 2003-2004, 2009-2015 Free Software - Foundation, Inc. - This file is part of the GNU C Library. + Copyright (C) 1989-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library and is also part of gnulib. + Patches to this file should be submitted to both projects. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + . */ #ifndef _GETOPT_INT_H -#define _GETOPT_INT_H 1 +#define _GETOPT_INT_H 1 #include extern int _getopt_internal (int ___argc, char **___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind, - int __long_only, int __posixly_correct); + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only, int __posixly_correct); /* Reentrant versions which can handle parsing multiple argument @@ -32,28 +33,20 @@ extern int _getopt_internal (int ___argc, char **___argv, /* Describe how to deal with options that follow non-option ARGV-elements. - If the caller did not specify anything, - the default is REQUIRE_ORDER if the environment variable - POSIXLY_CORRECT is defined, PERMUTE otherwise. + REQUIRE_ORDER means don't recognize them as options; stop option + processing when the first non-option is seen. This is what POSIX + specifies should happen. - REQUIRE_ORDER means don't recognize them as options; - stop option processing when the first non-option is seen. - This is what Unix does. - This mode of operation is selected by either setting the environment - variable POSIXLY_CORRECT, or using '+' as the first character - of the list of option characters, or by calling getopt. - - PERMUTE is the default. We permute the contents of ARGV as we - scan, so that eventually all the non-options are at the end. - This allows options to be given in any order, even with programs - that were not written to expect this. + PERMUTE means permute the contents of ARGV as we scan, so that + eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written + to expect this. RETURN_IN_ORDER is an option available to programs that were written to expect options and other ARGV-elements in any order and that care about the ordering of the two. We describe each non-option ARGV-element as if it were the argument of an option - with character code 1. Using '-' as the first character of the - list of option characters selects this mode of operation. + with character code 1. The special argument '--' forces an end of option-scanning regardless of the value of 'ordering'. In the case of RETURN_IN_ORDER, only @@ -91,11 +84,6 @@ struct _getopt_data /* See __ord above. */ enum __ord __ordering; - /* If the POSIXLY_CORRECT environment variable is set - or getopt was called. */ - int __posixly_correct; - - /* Handle permutation of arguments. */ /* Describe the part of ARGV that contains non-options that have @@ -104,32 +92,27 @@ struct _getopt_data int __first_nonopt; int __last_nonopt; - -#if defined _LIBC && defined USE_NONOPTION_FLAGS - int __nonoption_flags_max_len; - int __nonoption_flags_len; -#endif }; /* The initializer is necessary to set OPTIND and OPTERR to their default values and to clear the initialization flag. */ -#define _GETOPT_DATA_INITIALIZER { 1, 1 } +#define _GETOPT_DATA_INITIALIZER { 1, 1 } extern int _getopt_internal_r (int ___argc, char **___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind, - int __long_only, struct _getopt_data *__data, - int __posixly_correct); + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only, struct _getopt_data *__data, + int __posixly_correct); extern int _getopt_long_r (int ___argc, char **___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind, - struct _getopt_data *__data); + const char *__shortopts, + const struct option *__longopts, int *__longind, + struct _getopt_data *__data); extern int _getopt_long_only_r (int ___argc, char **___argv, - const char *__shortopts, - const struct option *__longopts, - int *__longind, - struct _getopt_data *__data); + const char *__shortopts, + const struct option *__longopts, + int *__longind, + struct _getopt_data *__data); #endif /* getopt_int.h */ diff --git a/contrib/grep/lib/getpagesize.c b/contrib/grep/lib/getpagesize.c deleted file mode 100644 index 10e9c18776..0000000000 --- a/contrib/grep/lib/getpagesize.c +++ /dev/null @@ -1,39 +0,0 @@ -/* getpagesize emulation for systems where it cannot be done in a C macro. - - Copyright (C) 2007, 2009-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* Written by Bruno Haible and Martin Lambers. */ - -#include - -/* Specification. */ -#include - -/* This implementation is only for native Windows systems. */ -#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ - -# define WIN32_LEAN_AND_MEAN -# include - -int -getpagesize (void) -{ - SYSTEM_INFO system_info; - GetSystemInfo (&system_info); - return system_info.dwPageSize; -} - -#endif diff --git a/contrib/grep/lib/getprogname.c b/contrib/grep/lib/getprogname.c new file mode 100644 index 0000000000..9f69f5a559 --- /dev/null +++ b/contrib/grep/lib/getprogname.c @@ -0,0 +1,260 @@ +/* Program name management. + Copyright (C) 2016-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "getprogname.h" + +#include /* get program_invocation_name declaration */ +#include /* get __argv declaration */ + +#ifdef _AIX +# include +# include +# include +#endif + +#ifdef __MVS__ +# ifndef _OPEN_SYS +# define _OPEN_SYS +# endif +# include +# include +#endif + +#ifdef __hpux +# include +# include +# include +# include +#endif + +#ifdef __sgi +# include +# include +# include +# include +# include +#endif + +#include "dirname.h" + +#ifndef HAVE_GETPROGNAME /* not Mac OS X, FreeBSD, NetBSD, OpenBSD >= 5.4, Cygwin */ +char const * +getprogname (void) +{ +# if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME /* glibc, BeOS */ + /* https://www.gnu.org/software/libc/manual/html_node/Error-Messages.html */ + return program_invocation_short_name; +# elif HAVE_DECL_PROGRAM_INVOCATION_NAME /* glibc, BeOS */ + /* https://www.gnu.org/software/libc/manual/html_node/Error-Messages.html */ + return last_component (program_invocation_name); +# elif HAVE_GETEXECNAME /* Solaris */ + /* https://docs.oracle.com/cd/E19253-01/816-5168/6mbb3hrb1/index.html */ + const char *p = getexecname (); + if (!p) + p = "?"; + return last_component (p); +# elif HAVE_DECL___ARGV /* mingw, MSVC */ + /* https://docs.microsoft.com/en-us/cpp/c-runtime-library/argc-argv-wargv */ + const char *p = __argv && __argv[0] ? __argv[0] : "?"; + return last_component (p); +# elif HAVE_VAR___PROGNAME /* OpenBSD, Android, QNX */ + /* https://man.openbsd.org/style.9 */ + /* http://www.qnx.de/developers/docs/6.5.0/index.jsp?topic=%2Fcom.qnx.doc.neutrino_lib_ref%2Fp%2F__progname.html */ + /* Be careful to declare this only when we absolutely need it + (OpenBSD 5.1), rather than when it's available. Otherwise, + its mere declaration makes program_invocation_short_name + malfunction (have zero length) with Fedora 25's glibc. */ + extern char *__progname; + const char *p = __progname; +# if defined __ANDROID__ + return last_component (p); +# else + return p && p[0] ? p : "?"; +# endif +# elif _AIX /* AIX */ + /* Idea by Bastien ROUCARIÈS, + https://lists.gnu.org/r/bug-gnulib/2010-12/msg00095.html + Reference: https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/getprocs.htm + */ + static char *p; + static int first = 1; + if (first) + { + first = 0; + pid_t pid = getpid (); + struct procentry64 procs; + p = (0 < getprocs64 (&procs, sizeof procs, NULL, 0, &pid, 1) + ? strdup (procs.pi_comm) + : NULL); + if (!p) + p = "?"; + } + return p; +# elif defined __hpux + static char *p; + static int first = 1; + if (first) + { + first = 0; + pid_t pid = getpid (); + struct pst_status status; + if (pstat_getproc (&status, sizeof status, 0, pid) > 0) + { + char *ucomm = status.pst_ucomm; + char *cmd = status.pst_cmd; + if (strlen (ucomm) < PST_UCOMMLEN - 1) + p = ucomm; + else + { + /* ucomm is truncated to length PST_UCOMMLEN - 1. + Look at cmd instead. */ + char *space = strchr (cmd, ' '); + if (space != NULL) + *space = '\0'; + p = strrchr (cmd, '/'); + if (p != NULL) + p++; + else + p = cmd; + if (strlen (p) > PST_UCOMMLEN - 1 + && memcmp (p, ucomm, PST_UCOMMLEN - 1) == 0) + /* p is less truncated than ucomm. */ + ; + else + p = ucomm; + } + p = strdup (p); + } + else + { +# if !defined __LP64__ + /* Support for 32-bit programs running in 64-bit HP-UX. + The documented way to do this is to use the same source code + as above, but in a compilation unit where '#define _PSTAT64 1' + is in effect. I prefer a single compilation unit; the struct + size and the offsets are not going to change. */ + char status64[1216]; + if (__pstat_getproc64 (status64, sizeof status64, 0, pid) > 0) + { + char *ucomm = status64 + 288; + char *cmd = status64 + 168; + if (strlen (ucomm) < PST_UCOMMLEN - 1) + p = ucomm; + else + { + /* ucomm is truncated to length PST_UCOMMLEN - 1. + Look at cmd instead. */ + char *space = strchr (cmd, ' '); + if (space != NULL) + *space = '\0'; + p = strrchr (cmd, '/'); + if (p != NULL) + p++; + else + p = cmd; + if (strlen (p) > PST_UCOMMLEN - 1 + && memcmp (p, ucomm, PST_UCOMMLEN - 1) == 0) + /* p is less truncated than ucomm. */ + ; + else + p = ucomm; + } + p = strdup (p); + } + else +# endif + p = NULL; + } + if (!p) + p = "?"; + } + return p; +# elif __MVS__ /* z/OS */ + /* https://www.ibm.com/support/knowledgecenter/SSLTBW_2.1.0/com.ibm.zos.v2r1.bpxbd00/rtwgetp.htm */ + static char *p = "?"; + static int first = 1; + if (first) + { + pid_t pid = getpid (); + int token; + W_PSPROC buf; + first = 0; + memset (&buf, 0, sizeof(buf)); + buf.ps_cmdptr = (char *) malloc (buf.ps_cmdlen = PS_CMDBLEN_LONG); + buf.ps_conttyptr = (char *) malloc (buf.ps_conttylen = PS_CONTTYBLEN); + buf.ps_pathptr = (char *) malloc (buf.ps_pathlen = PS_PATHBLEN); + if (buf.ps_cmdptr && buf.ps_conttyptr && buf.ps_pathptr) + { + for (token = 0; token >= 0; + token = w_getpsent (token, &buf, sizeof(buf))) + { + if (token > 0 && buf.ps_pid == pid) + { + char *s = strdup (last_component (buf.ps_pathptr)); + if (s) + p = s; + break; + } + } + } + free (buf.ps_cmdptr); + free (buf.ps_conttyptr); + free (buf.ps_pathptr); + } + return p; +# elif defined __sgi /* IRIX */ + char filename[50]; + int fd; + + sprintf (filename, "/proc/pinfo/%d", (int) getpid ()); + fd = open (filename, O_RDONLY); + if (0 <= fd) + { + prpsinfo_t buf; + int ioctl_ok = 0 <= ioctl (fd, PIOCPSINFO, &buf); + close (fd); + if (ioctl_ok) + { + char *name = buf.pr_fname; + size_t namesize = sizeof buf.pr_fname; + /* It may not be NUL-terminated. */ + char *namenul = memchr (name, '\0', namesize); + size_t namelen = namenul ? namenul - name : namesize; + char *namecopy = malloc (namelen + 1); + if (namecopy) + { + namecopy[namelen] = '\0'; + return memcpy (namecopy, name, namelen); + } + } + } + return NULL; +# else +# error "getprogname module not ported to this OS" +# endif +} + +#endif + +/* + * Hey Emacs! + * Local Variables: + * coding: utf-8 + * End: + */ diff --git a/contrib/grep/lib/btowc.c b/contrib/grep/lib/getprogname.h similarity index 52% rename from contrib/grep/lib/btowc.c rename to contrib/grep/lib/getprogname.h index 2e32deec4e..676912b4b8 100644 --- a/contrib/grep/lib/btowc.c +++ b/contrib/grep/lib/getprogname.h @@ -1,6 +1,5 @@ -/* Convert unibyte character to wide character. - Copyright (C) 2008, 2010-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2008. +/* Program name management. + Copyright (C) 2016-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,27 +12,29 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ -#include +#ifndef _GL_GETPROGNAME_H +#define _GL_GETPROGNAME_H -/* Specification. */ -#include - -#include #include -wint_t -btowc (int c) -{ - if (c != EOF) - { - char buf[1]; - wchar_t wc; - - buf[0] = c; - if (mbtowc (&wc, buf, 1) >= 0) - return wc; - } - return WEOF; +#ifdef __cplusplus +extern "C" { +#endif + +/* Return the base name of the executing program. + On native Windows this will usually end in ".exe" or ".EXE". */ +#ifndef HAVE_GETPROGNAME +extern char const *getprogname (void) +# ifdef HAVE_DECL_PROGRAM_INVOCATION_NAME + _GL_ATTRIBUTE_PURE +# endif + ; +#endif + +#ifdef __cplusplus } +#endif + +#endif diff --git a/contrib/grep/lib/gettext.h b/contrib/grep/lib/gettext.h index 599a14ec1b..0bd1e13348 100644 --- a/contrib/grep/lib/gettext.h +++ b/contrib/grep/lib/gettext.h @@ -1,5 +1,5 @@ /* Convenience header for conditional use of GNU . - Copyright (C) 1995-1998, 2000-2002, 2004-2006, 2009-2015 Free Software + Copyright (C) 1995-1998, 2000-2002, 2004-2006, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify @@ -13,13 +13,14 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along - with this program; if not, see . */ + with this program; if not, see . */ #ifndef _LIBGETTEXT_H #define _LIBGETTEXT_H 1 -/* NLS can be disabled through the configure --disable-nls option. */ -#if ENABLE_NLS +/* NLS can be disabled through the configure --disable-nls option + or through "#define ENABLE NLS 0" before including this file. */ +#if defined ENABLE_NLS && ENABLE_NLS /* Get declarations of GNU message catalog functions. */ # include @@ -183,8 +184,16 @@ npgettext_aux (const char *domain, #include -#if (((__GNUC__ >= 3 || __GNUG__ >= 2) && !defined __STRICT_ANSI__) \ - /* || __STDC_VERSION__ >= 199901L */ ) +/* GNULIB_NO_VLA can be defined to disable use of VLAs even if supported. + This relates to the -Wvla and -Wvla-larger-than warnings, enabled in + the default GCC many warnings set. This allows programs to disable use + of VLAs, which may be unintended, or may be awkward to support portably, + or may have security implications due to non-deterministic stack usage. */ + +#if (!defined GNULIB_NO_VLA \ + && (((__GNUC__ >= 3 || __GNUG__ >= 2) && !defined __STRICT_ANSI__) \ + /* || (__STDC_VERSION__ == 199901L && !defined __HP_cc) + || (__STDC_VERSION__ >= 201112L && !defined __STDC_NO_VLA__) */ )) # define _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS 1 #else # define _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS 0 @@ -225,15 +234,17 @@ dcpgettext_expr (const char *domain, if (msg_ctxt_id != NULL) #endif { + int found_translation; memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1); msg_ctxt_id[msgctxt_len - 1] = '\004'; memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len); translation = dcgettext (domain, msg_ctxt_id, category); + found_translation = (translation != msg_ctxt_id); #if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS if (msg_ctxt_id != buf) free (msg_ctxt_id); #endif - if (translation != msg_ctxt_id) + if (found_translation) return translation; } return msgid; @@ -271,15 +282,17 @@ dcnpgettext_expr (const char *domain, if (msg_ctxt_id != NULL) #endif { + int found_translation; memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1); msg_ctxt_id[msgctxt_len - 1] = '\004'; memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len); translation = dcngettext (domain, msg_ctxt_id, msgid_plural, n, category); + found_translation = !(translation == msg_ctxt_id || translation == msgid_plural); #if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS if (msg_ctxt_id != buf) free (msg_ctxt_id); #endif - if (!(translation == msg_ctxt_id || translation == msgid_plural)) + if (found_translation) return translation; } return (n == 1 ? msgid : msgid_plural); diff --git a/contrib/grep/lib/gettimeofday.c b/contrib/grep/lib/gettimeofday.c deleted file mode 100644 index e0e2e696d0..0000000000 --- a/contrib/grep/lib/gettimeofday.c +++ /dev/null @@ -1,154 +0,0 @@ -/* Provide gettimeofday for systems that don't have it or for which it's broken. - - Copyright (C) 2001-2003, 2005-2007, 2009-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see . */ - -/* written by Jim Meyering */ - -#include - -/* Specification. */ -#include - -#include - -#if HAVE_SYS_TIMEB_H -# include -#endif - -#if GETTIMEOFDAY_CLOBBERS_LOCALTIME || TZSET_CLOBBERS_LOCALTIME - -/* Work around the bug in some systems whereby gettimeofday clobbers - the static buffer that localtime uses for its return value. The - gettimeofday function from Mac OS X 10.0.4 (i.e., Darwin 1.3.7) has - this problem. The tzset replacement is necessary for at least - Solaris 2.5, 2.5.1, and 2.6. */ - -static struct tm tm_zero_buffer; -static struct tm *localtime_buffer_addr = &tm_zero_buffer; - -# undef localtime -extern struct tm *localtime (time_t const *); - -# undef gmtime -extern struct tm *gmtime (time_t const *); - -/* This is a wrapper for localtime. It is used only on systems for which - gettimeofday clobbers the static buffer used for localtime's result. - - On the first call, record the address of the static buffer that - localtime uses for its result. */ - -struct tm * -rpl_localtime (time_t const *timep) -{ - struct tm *tm = localtime (timep); - - if (localtime_buffer_addr == &tm_zero_buffer) - localtime_buffer_addr = tm; - - return tm; -} - -/* Same as above, since gmtime and localtime use the same buffer. */ -struct tm * -rpl_gmtime (time_t const *timep) -{ - struct tm *tm = gmtime (timep); - - if (localtime_buffer_addr == &tm_zero_buffer) - localtime_buffer_addr = tm; - - return tm; -} - -#endif /* GETTIMEOFDAY_CLOBBERS_LOCALTIME || TZSET_CLOBBERS_LOCALTIME */ - -#if TZSET_CLOBBERS_LOCALTIME - -# undef tzset -extern void tzset (void); - -/* This is a wrapper for tzset, for systems on which tzset may clobber - the static buffer used for localtime's result. */ -void -rpl_tzset (void) -{ - /* Save and restore the contents of the buffer used for localtime's - result around the call to tzset. */ - struct tm save = *localtime_buffer_addr; - tzset (); - *localtime_buffer_addr = save; -} -#endif - -/* This is a wrapper for gettimeofday. It is used only on systems - that lack this function, or whose implementation of this function - causes problems. */ - -int -gettimeofday (struct timeval *restrict tv, void *restrict tz) -{ -#undef gettimeofday -#if HAVE_GETTIMEOFDAY -# if GETTIMEOFDAY_CLOBBERS_LOCALTIME - /* Save and restore the contents of the buffer used for localtime's - result around the call to gettimeofday. */ - struct tm save = *localtime_buffer_addr; -# endif - -# if defined timeval /* 'struct timeval' overridden by gnulib? */ -# undef timeval - struct timeval otv; - int result = gettimeofday (&otv, (struct timezone *) tz); - if (result == 0) - { - tv->tv_sec = otv.tv_sec; - tv->tv_usec = otv.tv_usec; - } -# else - int result = gettimeofday (tv, (struct timezone *) tz); -# endif - -# if GETTIMEOFDAY_CLOBBERS_LOCALTIME - *localtime_buffer_addr = save; -# endif - - return result; - -#else - -# if HAVE__FTIME - - struct _timeb timebuf; - _ftime (&timebuf); - tv->tv_sec = timebuf.time; - tv->tv_usec = timebuf.millitm * 1000; - -# else - -# if !defined OK_TO_USE_1S_CLOCK -# error "Only 1-second nominal clock resolution found. Is that intended?" \ - "If so, compile with the -DOK_TO_USE_1S_CLOCK option." -# endif - tv->tv_sec = time (NULL); - tv->tv_usec = 0; - -# endif - - return 0; - -#endif -} diff --git a/contrib/grep/lib/glthread/lock.c b/contrib/grep/lib/glthread/lock.c index b64132ab32..a84318bb0b 100644 --- a/contrib/grep/lib/glthread/lock.c +++ b/contrib/grep/lib/glthread/lock.c @@ -1,5 +1,5 @@ /* Locking in multithreaded situations. - Copyright (C) 2005-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,11 +12,10 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ /* Written by Bruno Haible , 2005. - Based on GCC's gthr-posix.h, gthr-posix95.h, gthr-solaris.h, - gthr-win32.h. */ + Based on GCC's gthr-posix.h, gthr-posix95.h. */ #include @@ -24,15 +23,267 @@ /* ========================================================================= */ +#if USE_ISOC_THREADS || USE_ISOC_AND_POSIX_THREADS + +/* -------------------------- gl_lock_t datatype -------------------------- */ + +int +glthread_lock_init (gl_lock_t *lock) +{ + if (mtx_init (&lock->mutex, mtx_plain) != thrd_success) + return ENOMEM; + lock->init_needed = 0; + return 0; +} + +int +glthread_lock_lock (gl_lock_t *lock) +{ + if (lock->init_needed) + call_once (&lock->init_once, lock->init_func); + if (mtx_lock (&lock->mutex) != thrd_success) + return EAGAIN; + return 0; +} + +int +glthread_lock_unlock (gl_lock_t *lock) +{ + if (lock->init_needed) + call_once (&lock->init_once, lock->init_func); + if (mtx_unlock (&lock->mutex) != thrd_success) + return EINVAL; + return 0; +} + +int +glthread_lock_destroy (gl_lock_t *lock) +{ + if (lock->init_needed) + call_once (&lock->init_once, lock->init_func); + mtx_destroy (&lock->mutex); + return 0; +} + +/* ------------------------- gl_rwlock_t datatype ------------------------- */ + +int +glthread_rwlock_init (gl_rwlock_t *lock) +{ + if (mtx_init (&lock->lock, mtx_plain) != thrd_success + || cnd_init (&lock->waiting_readers) != thrd_success + || cnd_init (&lock->waiting_writers) != thrd_success) + return ENOMEM; + lock->waiting_writers_count = 0; + lock->runcount = 0; + lock->init_needed = 0; + return 0; +} + +int +glthread_rwlock_rdlock (gl_rwlock_t *lock) +{ + if (lock->init_needed) + call_once (&lock->init_once, lock->init_func); + if (mtx_lock (&lock->lock) != thrd_success) + return EAGAIN; + /* Test whether only readers are currently running, and whether the runcount + field will not overflow, and whether no writer is waiting. The latter + condition is because POSIX recommends that "write locks shall take + precedence over read locks", to avoid "writer starvation". */ + while (!(lock->runcount + 1 > 0 && lock->waiting_writers_count == 0)) + { + /* This thread has to wait for a while. Enqueue it among the + waiting_readers. */ + if (cnd_wait (&lock->waiting_readers, &lock->lock) != thrd_success) + { + mtx_unlock (&lock->lock); + return EINVAL; + } + } + lock->runcount++; + if (mtx_unlock (&lock->lock) != thrd_success) + return EINVAL; + return 0; +} + +int +glthread_rwlock_wrlock (gl_rwlock_t *lock) +{ + if (lock->init_needed) + call_once (&lock->init_once, lock->init_func); + if (mtx_lock (&lock->lock) != thrd_success) + return EAGAIN; + /* Test whether no readers or writers are currently running. */ + while (!(lock->runcount == 0)) + { + /* This thread has to wait for a while. Enqueue it among the + waiting_writers. */ + lock->waiting_writers_count++; + if (cnd_wait (&lock->waiting_writers, &lock->lock) != thrd_success) + { + lock->waiting_writers_count--; + mtx_unlock (&lock->lock); + return EINVAL; + } + lock->waiting_writers_count--; + } + lock->runcount--; /* runcount becomes -1 */ + if (mtx_unlock (&lock->lock) != thrd_success) + return EINVAL; + return 0; +} + +int +glthread_rwlock_unlock (gl_rwlock_t *lock) +{ + if (lock->init_needed) + call_once (&lock->init_once, lock->init_func); + if (mtx_lock (&lock->lock) != thrd_success) + return EAGAIN; + if (lock->runcount < 0) + { + /* Drop a writer lock. */ + if (!(lock->runcount == -1)) + { + mtx_unlock (&lock->lock); + return EINVAL; + } + lock->runcount = 0; + } + else + { + /* Drop a reader lock. */ + if (!(lock->runcount > 0)) + { + mtx_unlock (&lock->lock); + return EINVAL; + } + lock->runcount--; + } + if (lock->runcount == 0) + { + /* POSIX recommends that "write locks shall take precedence over read + locks", to avoid "writer starvation". */ + if (lock->waiting_writers_count > 0) + { + /* Wake up one of the waiting writers. */ + if (cnd_signal (&lock->waiting_writers) != thrd_success) + { + mtx_unlock (&lock->lock); + return EINVAL; + } + } + else + { + /* Wake up all waiting readers. */ + if (cnd_broadcast (&lock->waiting_readers) != thrd_success) + { + mtx_unlock (&lock->lock); + return EINVAL; + } + } + } + if (mtx_unlock (&lock->lock) != thrd_success) + return EINVAL; + return 0; +} + +int +glthread_rwlock_destroy (gl_rwlock_t *lock) +{ + if (lock->init_needed) + call_once (&lock->init_once, lock->init_func); + mtx_destroy (&lock->lock); + cnd_destroy (&lock->waiting_readers); + cnd_destroy (&lock->waiting_writers); + return 0; +} + +/* --------------------- gl_recursive_lock_t datatype --------------------- */ + +int +glthread_recursive_lock_init (gl_recursive_lock_t *lock) +{ + if (mtx_init (&lock->mutex, mtx_plain | mtx_recursive) != thrd_success) + return ENOMEM; + lock->init_needed = 0; + return 0; +} + +int +glthread_recursive_lock_lock (gl_recursive_lock_t *lock) +{ + if (lock->init_needed) + call_once (&lock->init_once, lock->init_func); + if (mtx_lock (&lock->mutex) != thrd_success) + return EAGAIN; + return 0; +} + +int +glthread_recursive_lock_unlock (gl_recursive_lock_t *lock) +{ + if (lock->init_needed) + call_once (&lock->init_once, lock->init_func); + if (mtx_unlock (&lock->mutex) != thrd_success) + return EINVAL; + return 0; +} + +int +glthread_recursive_lock_destroy (gl_recursive_lock_t *lock) +{ + if (lock->init_needed) + call_once (&lock->init_once, lock->init_func); + mtx_destroy (&lock->mutex); + return 0; +} + +/* -------------------------- gl_once_t datatype -------------------------- */ + +#endif + +/* ========================================================================= */ + #if USE_POSIX_THREADS /* -------------------------- gl_lock_t datatype -------------------------- */ /* ------------------------- gl_rwlock_t datatype ------------------------- */ -# if HAVE_PTHREAD_RWLOCK +# if HAVE_PTHREAD_RWLOCK && (HAVE_PTHREAD_RWLOCK_RDLOCK_PREFER_WRITER || (defined PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP && (__GNU_LIBRARY__ > 1))) + +# if defined PTHREAD_RWLOCK_INITIALIZER || defined PTHREAD_RWLOCK_INITIALIZER_NP + +# if !HAVE_PTHREAD_RWLOCK_RDLOCK_PREFER_WRITER + /* glibc with bug https://sourceware.org/bugzilla/show_bug.cgi?id=13701 */ + +int +glthread_rwlock_init_for_glibc (pthread_rwlock_t *lock) +{ + pthread_rwlockattr_t attributes; + int err; -# if !defined PTHREAD_RWLOCK_INITIALIZER + err = pthread_rwlockattr_init (&attributes); + if (err != 0) + return err; + /* Note: PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP is the only value that + causes the writer to be preferred. PTHREAD_RWLOCK_PREFER_WRITER_NP does not + do this; see + http://man7.org/linux/man-pages/man3/pthread_rwlockattr_setkind_np.3.html */ + err = pthread_rwlockattr_setkind_np (&attributes, + PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP); + if (err == 0) + err = pthread_rwlock_init(lock, &attributes); + /* pthread_rwlockattr_destroy always returns 0. It cannot influence the + return value. */ + pthread_rwlockattr_destroy (&attributes); + return err; +} + +# endif +# else int glthread_rwlock_init_multithreaded (gl_rwlock_t *lock) @@ -152,11 +403,9 @@ glthread_rwlock_rdlock_multithreaded (gl_rwlock_t *lock) if (err != 0) return err; /* Test whether only readers are currently running, and whether the runcount - field will not overflow. */ - /* POSIX says: "It is implementation-defined whether the calling thread - acquires the lock when a writer does not hold the lock and there are - writers blocked on the lock." Let's say, no: give the writers a higher - priority. */ + field will not overflow, and whether no writer is waiting. The latter + condition is because POSIX recommends that "write locks shall take + precedence over read locks", to avoid "writer starvation". */ while (!(lock->runcount + 1 > 0 && lock->waiting_writers_count == 0)) { /* This thread has to wait for a while. Enqueue it among the @@ -473,585 +722,8 @@ glthread_once_singlethreaded (pthread_once_t *once_control) /* ========================================================================= */ -#if USE_PTH_THREADS - -/* Use the GNU Pth threads library. */ - -/* -------------------------- gl_lock_t datatype -------------------------- */ - -/* ------------------------- gl_rwlock_t datatype ------------------------- */ - -/* --------------------- gl_recursive_lock_t datatype --------------------- */ - -/* -------------------------- gl_once_t datatype -------------------------- */ - -static void -glthread_once_call (void *arg) -{ - void (**gl_once_temp_addr) (void) = (void (**) (void)) arg; - void (*initfunction) (void) = *gl_once_temp_addr; - initfunction (); -} - -int -glthread_once_multithreaded (pth_once_t *once_control, void (*initfunction) (void)) -{ - void (*temp) (void) = initfunction; - return (!pth_once (once_control, glthread_once_call, &temp) ? errno : 0); -} - -int -glthread_once_singlethreaded (pth_once_t *once_control) -{ - /* We know that pth_once_t is an integer type. */ - if (*once_control == PTH_ONCE_INIT) - { - /* First time use of once_control. Invert the marker. */ - *once_control = ~ PTH_ONCE_INIT; - return 1; - } - else - return 0; -} - -#endif - -/* ========================================================================= */ - -#if USE_SOLARIS_THREADS - -/* Use the old Solaris threads library. */ - -/* -------------------------- gl_lock_t datatype -------------------------- */ - -/* ------------------------- gl_rwlock_t datatype ------------------------- */ - -/* --------------------- gl_recursive_lock_t datatype --------------------- */ - -int -glthread_recursive_lock_init_multithreaded (gl_recursive_lock_t *lock) -{ - int err; - - err = mutex_init (&lock->mutex, USYNC_THREAD, NULL); - if (err != 0) - return err; - lock->owner = (thread_t) 0; - lock->depth = 0; - return 0; -} - -int -glthread_recursive_lock_lock_multithreaded (gl_recursive_lock_t *lock) -{ - thread_t self = thr_self (); - if (lock->owner != self) - { - int err; - - err = mutex_lock (&lock->mutex); - if (err != 0) - return err; - lock->owner = self; - } - if (++(lock->depth) == 0) /* wraparound? */ - { - lock->depth--; - return EAGAIN; - } - return 0; -} - -int -glthread_recursive_lock_unlock_multithreaded (gl_recursive_lock_t *lock) -{ - if (lock->owner != thr_self ()) - return EPERM; - if (lock->depth == 0) - return EINVAL; - if (--(lock->depth) == 0) - { - lock->owner = (thread_t) 0; - return mutex_unlock (&lock->mutex); - } - else - return 0; -} - -int -glthread_recursive_lock_destroy_multithreaded (gl_recursive_lock_t *lock) -{ - if (lock->owner != (thread_t) 0) - return EBUSY; - return mutex_destroy (&lock->mutex); -} - -/* -------------------------- gl_once_t datatype -------------------------- */ - -int -glthread_once_multithreaded (gl_once_t *once_control, void (*initfunction) (void)) -{ - if (!once_control->inited) - { - int err; - - /* Use the mutex to guarantee that if another thread is already calling - the initfunction, this thread waits until it's finished. */ - err = mutex_lock (&once_control->mutex); - if (err != 0) - return err; - if (!once_control->inited) - { - once_control->inited = 1; - initfunction (); - } - return mutex_unlock (&once_control->mutex); - } - else - return 0; -} - -int -glthread_once_singlethreaded (gl_once_t *once_control) -{ - /* We know that gl_once_t contains an integer type. */ - if (!once_control->inited) - { - /* First time use of once_control. Invert the marker. */ - once_control->inited = ~ 0; - return 1; - } - else - return 0; -} - -#endif - -/* ========================================================================= */ - #if USE_WINDOWS_THREADS -/* -------------------------- gl_lock_t datatype -------------------------- */ - -void -glthread_lock_init_func (gl_lock_t *lock) -{ - InitializeCriticalSection (&lock->lock); - lock->guard.done = 1; -} - -int -glthread_lock_lock_func (gl_lock_t *lock) -{ - if (!lock->guard.done) - { - if (InterlockedIncrement (&lock->guard.started) == 0) - /* This thread is the first one to need this lock. Initialize it. */ - glthread_lock_init (lock); - else - /* Yield the CPU while waiting for another thread to finish - initializing this lock. */ - while (!lock->guard.done) - Sleep (0); - } - EnterCriticalSection (&lock->lock); - return 0; -} - -int -glthread_lock_unlock_func (gl_lock_t *lock) -{ - if (!lock->guard.done) - return EINVAL; - LeaveCriticalSection (&lock->lock); - return 0; -} - -int -glthread_lock_destroy_func (gl_lock_t *lock) -{ - if (!lock->guard.done) - return EINVAL; - DeleteCriticalSection (&lock->lock); - lock->guard.done = 0; - return 0; -} - -/* ------------------------- gl_rwlock_t datatype ------------------------- */ - -/* In this file, the waitqueues are implemented as circular arrays. */ -#define gl_waitqueue_t gl_carray_waitqueue_t - -static void -gl_waitqueue_init (gl_waitqueue_t *wq) -{ - wq->array = NULL; - wq->count = 0; - wq->alloc = 0; - wq->offset = 0; -} - -/* Enqueues the current thread, represented by an event, in a wait queue. - Returns INVALID_HANDLE_VALUE if an allocation failure occurs. */ -static HANDLE -gl_waitqueue_add (gl_waitqueue_t *wq) -{ - HANDLE event; - unsigned int index; - - if (wq->count == wq->alloc) - { - unsigned int new_alloc = 2 * wq->alloc + 1; - HANDLE *new_array = - (HANDLE *) realloc (wq->array, new_alloc * sizeof (HANDLE)); - if (new_array == NULL) - /* No more memory. */ - return INVALID_HANDLE_VALUE; - /* Now is a good opportunity to rotate the array so that its contents - starts at offset 0. */ - if (wq->offset > 0) - { - unsigned int old_count = wq->count; - unsigned int old_alloc = wq->alloc; - unsigned int old_offset = wq->offset; - unsigned int i; - if (old_offset + old_count > old_alloc) - { - unsigned int limit = old_offset + old_count - old_alloc; - for (i = 0; i < limit; i++) - new_array[old_alloc + i] = new_array[i]; - } - for (i = 0; i < old_count; i++) - new_array[i] = new_array[old_offset + i]; - wq->offset = 0; - } - wq->array = new_array; - wq->alloc = new_alloc; - } - /* Whether the created event is a manual-reset one or an auto-reset one, - does not matter, since we will wait on it only once. */ - event = CreateEvent (NULL, TRUE, FALSE, NULL); - if (event == INVALID_HANDLE_VALUE) - /* No way to allocate an event. */ - return INVALID_HANDLE_VALUE; - index = wq->offset + wq->count; - if (index >= wq->alloc) - index -= wq->alloc; - wq->array[index] = event; - wq->count++; - return event; -} - -/* Notifies the first thread from a wait queue and dequeues it. */ -static void -gl_waitqueue_notify_first (gl_waitqueue_t *wq) -{ - SetEvent (wq->array[wq->offset + 0]); - wq->offset++; - wq->count--; - if (wq->count == 0 || wq->offset == wq->alloc) - wq->offset = 0; -} - -/* Notifies all threads from a wait queue and dequeues them all. */ -static void -gl_waitqueue_notify_all (gl_waitqueue_t *wq) -{ - unsigned int i; - - for (i = 0; i < wq->count; i++) - { - unsigned int index = wq->offset + i; - if (index >= wq->alloc) - index -= wq->alloc; - SetEvent (wq->array[index]); - } - wq->count = 0; - wq->offset = 0; -} - -void -glthread_rwlock_init_func (gl_rwlock_t *lock) -{ - InitializeCriticalSection (&lock->lock); - gl_waitqueue_init (&lock->waiting_readers); - gl_waitqueue_init (&lock->waiting_writers); - lock->runcount = 0; - lock->guard.done = 1; -} - -int -glthread_rwlock_rdlock_func (gl_rwlock_t *lock) -{ - if (!lock->guard.done) - { - if (InterlockedIncrement (&lock->guard.started) == 0) - /* This thread is the first one to need this lock. Initialize it. */ - glthread_rwlock_init (lock); - else - /* Yield the CPU while waiting for another thread to finish - initializing this lock. */ - while (!lock->guard.done) - Sleep (0); - } - EnterCriticalSection (&lock->lock); - /* Test whether only readers are currently running, and whether the runcount - field will not overflow. */ - if (!(lock->runcount + 1 > 0)) - { - /* This thread has to wait for a while. Enqueue it among the - waiting_readers. */ - HANDLE event = gl_waitqueue_add (&lock->waiting_readers); - if (event != INVALID_HANDLE_VALUE) - { - DWORD result; - LeaveCriticalSection (&lock->lock); - /* Wait until another thread signals this event. */ - result = WaitForSingleObject (event, INFINITE); - if (result == WAIT_FAILED || result == WAIT_TIMEOUT) - abort (); - CloseHandle (event); - /* The thread which signalled the event already did the bookkeeping: - removed us from the waiting_readers, incremented lock->runcount. */ - if (!(lock->runcount > 0)) - abort (); - return 0; - } - else - { - /* Allocation failure. Weird. */ - do - { - LeaveCriticalSection (&lock->lock); - Sleep (1); - EnterCriticalSection (&lock->lock); - } - while (!(lock->runcount + 1 > 0)); - } - } - lock->runcount++; - LeaveCriticalSection (&lock->lock); - return 0; -} - -int -glthread_rwlock_wrlock_func (gl_rwlock_t *lock) -{ - if (!lock->guard.done) - { - if (InterlockedIncrement (&lock->guard.started) == 0) - /* This thread is the first one to need this lock. Initialize it. */ - glthread_rwlock_init (lock); - else - /* Yield the CPU while waiting for another thread to finish - initializing this lock. */ - while (!lock->guard.done) - Sleep (0); - } - EnterCriticalSection (&lock->lock); - /* Test whether no readers or writers are currently running. */ - if (!(lock->runcount == 0)) - { - /* This thread has to wait for a while. Enqueue it among the - waiting_writers. */ - HANDLE event = gl_waitqueue_add (&lock->waiting_writers); - if (event != INVALID_HANDLE_VALUE) - { - DWORD result; - LeaveCriticalSection (&lock->lock); - /* Wait until another thread signals this event. */ - result = WaitForSingleObject (event, INFINITE); - if (result == WAIT_FAILED || result == WAIT_TIMEOUT) - abort (); - CloseHandle (event); - /* The thread which signalled the event already did the bookkeeping: - removed us from the waiting_writers, set lock->runcount = -1. */ - if (!(lock->runcount == -1)) - abort (); - return 0; - } - else - { - /* Allocation failure. Weird. */ - do - { - LeaveCriticalSection (&lock->lock); - Sleep (1); - EnterCriticalSection (&lock->lock); - } - while (!(lock->runcount == 0)); - } - } - lock->runcount--; /* runcount becomes -1 */ - LeaveCriticalSection (&lock->lock); - return 0; -} - -int -glthread_rwlock_unlock_func (gl_rwlock_t *lock) -{ - if (!lock->guard.done) - return EINVAL; - EnterCriticalSection (&lock->lock); - if (lock->runcount < 0) - { - /* Drop a writer lock. */ - if (!(lock->runcount == -1)) - abort (); - lock->runcount = 0; - } - else - { - /* Drop a reader lock. */ - if (!(lock->runcount > 0)) - { - LeaveCriticalSection (&lock->lock); - return EPERM; - } - lock->runcount--; - } - if (lock->runcount == 0) - { - /* POSIX recommends that "write locks shall take precedence over read - locks", to avoid "writer starvation". */ - if (lock->waiting_writers.count > 0) - { - /* Wake up one of the waiting writers. */ - lock->runcount--; - gl_waitqueue_notify_first (&lock->waiting_writers); - } - else - { - /* Wake up all waiting readers. */ - lock->runcount += lock->waiting_readers.count; - gl_waitqueue_notify_all (&lock->waiting_readers); - } - } - LeaveCriticalSection (&lock->lock); - return 0; -} - -int -glthread_rwlock_destroy_func (gl_rwlock_t *lock) -{ - if (!lock->guard.done) - return EINVAL; - if (lock->runcount != 0) - return EBUSY; - DeleteCriticalSection (&lock->lock); - if (lock->waiting_readers.array != NULL) - free (lock->waiting_readers.array); - if (lock->waiting_writers.array != NULL) - free (lock->waiting_writers.array); - lock->guard.done = 0; - return 0; -} - -/* --------------------- gl_recursive_lock_t datatype --------------------- */ - -void -glthread_recursive_lock_init_func (gl_recursive_lock_t *lock) -{ - lock->owner = 0; - lock->depth = 0; - InitializeCriticalSection (&lock->lock); - lock->guard.done = 1; -} - -int -glthread_recursive_lock_lock_func (gl_recursive_lock_t *lock) -{ - if (!lock->guard.done) - { - if (InterlockedIncrement (&lock->guard.started) == 0) - /* This thread is the first one to need this lock. Initialize it. */ - glthread_recursive_lock_init (lock); - else - /* Yield the CPU while waiting for another thread to finish - initializing this lock. */ - while (!lock->guard.done) - Sleep (0); - } - { - DWORD self = GetCurrentThreadId (); - if (lock->owner != self) - { - EnterCriticalSection (&lock->lock); - lock->owner = self; - } - if (++(lock->depth) == 0) /* wraparound? */ - { - lock->depth--; - return EAGAIN; - } - } - return 0; -} - -int -glthread_recursive_lock_unlock_func (gl_recursive_lock_t *lock) -{ - if (lock->owner != GetCurrentThreadId ()) - return EPERM; - if (lock->depth == 0) - return EINVAL; - if (--(lock->depth) == 0) - { - lock->owner = 0; - LeaveCriticalSection (&lock->lock); - } - return 0; -} - -int -glthread_recursive_lock_destroy_func (gl_recursive_lock_t *lock) -{ - if (lock->owner != 0) - return EBUSY; - DeleteCriticalSection (&lock->lock); - lock->guard.done = 0; - return 0; -} - -/* -------------------------- gl_once_t datatype -------------------------- */ - -void -glthread_once_func (gl_once_t *once_control, void (*initfunction) (void)) -{ - if (once_control->inited <= 0) - { - if (InterlockedIncrement (&once_control->started) == 0) - { - /* This thread is the first one to come to this once_control. */ - InitializeCriticalSection (&once_control->lock); - EnterCriticalSection (&once_control->lock); - once_control->inited = 0; - initfunction (); - once_control->inited = 1; - LeaveCriticalSection (&once_control->lock); - } - else - { - /* Undo last operation. */ - InterlockedDecrement (&once_control->started); - /* Some other thread has already started the initialization. - Yield the CPU while waiting for the other thread to finish - initializing and taking the lock. */ - while (once_control->inited < 0) - Sleep (0); - if (once_control->inited <= 0) - { - /* Take the lock. This blocks until the other thread has - finished calling the initfunction. */ - EnterCriticalSection (&once_control->lock); - LeaveCriticalSection (&once_control->lock); - if (!(once_control->inited > 0)) - abort (); - } - } - } -} - #endif /* ========================================================================= */ diff --git a/contrib/grep/lib/glthread/lock.h b/contrib/grep/lib/glthread/lock.h index cf3d0d9531..28a9e3d4e6 100644 --- a/contrib/grep/lib/glthread/lock.h +++ b/contrib/grep/lib/glthread/lock.h @@ -1,5 +1,5 @@ /* Locking in multithreaded situations. - Copyright (C) 2005-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,11 +12,10 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ /* Written by Bruno Haible , 2005. - Based on GCC's gthr-posix.h, gthr-posix95.h, gthr-solaris.h, - gthr-win32.h. */ + Based on GCC's gthr-posix.h, gthr-posix95.h, gthr-win32.h. */ /* This file contains locking primitives for use with a given thread library. It does not contain primitives for creating threads or for other @@ -81,6 +80,125 @@ #include #include +#if !defined c11_threads_in_use +# if HAVE_THREADS_H && USE_POSIX_THREADS_WEAK +# include +# pragma weak thrd_exit +# define c11_threads_in_use() (thrd_exit != NULL) +# else +# define c11_threads_in_use() 0 +# endif +#endif + +/* ========================================================================= */ + +#if USE_ISOC_THREADS || USE_ISOC_AND_POSIX_THREADS + +/* Use the ISO C threads library. */ + +# include + +# ifdef __cplusplus +extern "C" { +# endif + +/* -------------------------- gl_lock_t datatype -------------------------- */ + +typedef struct + { + int volatile init_needed; + once_flag init_once; + void (*init_func) (void); + mtx_t mutex; + } + gl_lock_t; +# define gl_lock_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_lock_t NAME; +# define gl_lock_define_initialized(STORAGECLASS, NAME) \ + static void _atomic_init_##NAME (void); \ + STORAGECLASS gl_lock_t NAME = \ + { 1, ONCE_FLAG_INIT, _atomic_init_##NAME }; \ + static void _atomic_init_##NAME (void) \ + { \ + if (glthread_lock_init (&(NAME))) \ + abort (); \ + } +extern int glthread_lock_init (gl_lock_t *lock); +extern int glthread_lock_lock (gl_lock_t *lock); +extern int glthread_lock_unlock (gl_lock_t *lock); +extern int glthread_lock_destroy (gl_lock_t *lock); + +/* ------------------------- gl_rwlock_t datatype ------------------------- */ + +typedef struct + { + int volatile init_needed; + once_flag init_once; + void (*init_func) (void); + mtx_t lock; /* protects the remaining fields */ + cnd_t waiting_readers; /* waiting readers */ + cnd_t waiting_writers; /* waiting writers */ + unsigned int waiting_writers_count; /* number of waiting writers */ + int runcount; /* number of readers running, or -1 when a writer runs */ + } + gl_rwlock_t; +# define gl_rwlock_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_rwlock_t NAME; +# define gl_rwlock_define_initialized(STORAGECLASS, NAME) \ + static void _atomic_init_##NAME (void); \ + STORAGECLASS gl_rwlock_t NAME = \ + { 1, ONCE_FLAG_INIT, _atomic_init_##NAME }; \ + static void _atomic_init_##NAME (void) \ + { \ + if (glthread_rwlock_init (&(NAME))) \ + abort (); \ + } +extern int glthread_rwlock_init (gl_rwlock_t *lock); +extern int glthread_rwlock_rdlock (gl_rwlock_t *lock); +extern int glthread_rwlock_wrlock (gl_rwlock_t *lock); +extern int glthread_rwlock_unlock (gl_rwlock_t *lock); +extern int glthread_rwlock_destroy (gl_rwlock_t *lock); + +/* --------------------- gl_recursive_lock_t datatype --------------------- */ + +typedef struct + { + int volatile init_needed; + once_flag init_once; + void (*init_func) (void); + mtx_t mutex; + } + gl_recursive_lock_t; +# define gl_recursive_lock_define(STORAGECLASS, NAME) \ + STORAGECLASS gl_recursive_lock_t NAME; +# define gl_recursive_lock_define_initialized(STORAGECLASS, NAME) \ + static void _atomic_init_##NAME (void); \ + STORAGECLASS gl_recursive_lock_t NAME = \ + { 1, ONCE_FLAG_INIT, _atomic_init_##NAME }; \ + static void _atomic_init_##NAME (void) \ + { \ + if (glthread_recursive_lock_init (&(NAME))) \ + abort (); \ + } +extern int glthread_recursive_lock_init (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_lock (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_unlock (gl_recursive_lock_t *lock); +extern int glthread_recursive_lock_destroy (gl_recursive_lock_t *lock); + +/* -------------------------- gl_once_t datatype -------------------------- */ + +typedef once_flag gl_once_t; +# define gl_once_define(STORAGECLASS, NAME) \ + STORAGECLASS once_flag NAME = ONCE_FLAG_INIT; +# define glthread_once(ONCE_CONTROL, INITFUNCTION) \ + (call_once (ONCE_CONTROL, INITFUNCTION), 0) + +# ifdef __cplusplus +} +# endif + +#endif + /* ========================================================================= */ #if USE_POSIX_THREADS @@ -139,13 +257,25 @@ extern int glthread_in_use (void); # pragma weak pthread_mutexattr_init # pragma weak pthread_mutexattr_settype # pragma weak pthread_mutexattr_destroy +# pragma weak pthread_rwlockattr_init +# if __GNU_LIBRARY__ > 1 +# pragma weak pthread_rwlockattr_setkind_np +# endif +# pragma weak pthread_rwlockattr_destroy # ifndef pthread_self # pragma weak pthread_self # endif # if !PTHREAD_IN_USE_DETECTION_HARD -# pragma weak pthread_cancel -# define pthread_in_use() (pthread_cancel != NULL) + /* Considering all platforms with USE_POSIX_THREADS_WEAK, only few symbols + can be used to determine whether libpthread is in use. These are: + pthread_mutexattr_gettype + pthread_rwlockattr_destroy + pthread_rwlockattr_init + */ +# pragma weak pthread_mutexattr_gettype +# define pthread_in_use() \ + (pthread_mutexattr_gettype != NULL || c11_threads_in_use ()) # endif # else @@ -176,19 +306,32 @@ typedef pthread_mutex_t gl_lock_t; /* ------------------------- gl_rwlock_t datatype ------------------------- */ -# if HAVE_PTHREAD_RWLOCK +# if HAVE_PTHREAD_RWLOCK && (HAVE_PTHREAD_RWLOCK_RDLOCK_PREFER_WRITER || (defined PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP && (__GNU_LIBRARY__ > 1))) -# ifdef PTHREAD_RWLOCK_INITIALIZER +# if defined PTHREAD_RWLOCK_INITIALIZER || defined PTHREAD_RWLOCK_INITIALIZER_NP typedef pthread_rwlock_t gl_rwlock_t; # define gl_rwlock_define(STORAGECLASS, NAME) \ STORAGECLASS pthread_rwlock_t NAME; # define gl_rwlock_define_initialized(STORAGECLASS, NAME) \ STORAGECLASS pthread_rwlock_t NAME = gl_rwlock_initializer; -# define gl_rwlock_initializer \ - PTHREAD_RWLOCK_INITIALIZER -# define glthread_rwlock_init(LOCK) \ - (pthread_in_use () ? pthread_rwlock_init (LOCK, NULL) : 0) +# if HAVE_PTHREAD_RWLOCK_RDLOCK_PREFER_WRITER +# if defined PTHREAD_RWLOCK_INITIALIZER +# define gl_rwlock_initializer \ + PTHREAD_RWLOCK_INITIALIZER +# else +# define gl_rwlock_initializer \ + PTHREAD_RWLOCK_INITIALIZER_NP +# endif +# define glthread_rwlock_init(LOCK) \ + (pthread_in_use () ? pthread_rwlock_init (LOCK, NULL) : 0) +# else /* glibc with bug https://sourceware.org/bugzilla/show_bug.cgi?id=13701 */ +# define gl_rwlock_initializer \ + PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP +# define glthread_rwlock_init(LOCK) \ + (pthread_in_use () ? glthread_rwlock_init_for_glibc (LOCK) : 0) +extern int glthread_rwlock_init_for_glibc (pthread_rwlock_t *lock); +# endif # define glthread_rwlock_rdlock(LOCK) \ (pthread_in_use () ? pthread_rwlock_rdlock (LOCK) : 0) # define glthread_rwlock_wrlock(LOCK) \ @@ -376,248 +519,16 @@ extern int glthread_once_singlethreaded (pthread_once_t *once_control); /* ========================================================================= */ -#if USE_PTH_THREADS - -/* Use the GNU Pth threads library. */ - -# include - -# ifdef __cplusplus -extern "C" { -# endif - -# if USE_PTH_THREADS_WEAK - -/* Use weak references to the GNU Pth threads library. */ - -# pragma weak pth_mutex_init -# pragma weak pth_mutex_acquire -# pragma weak pth_mutex_release -# pragma weak pth_rwlock_init -# pragma weak pth_rwlock_acquire -# pragma weak pth_rwlock_release -# pragma weak pth_once - -# pragma weak pth_cancel -# define pth_in_use() (pth_cancel != NULL) - -# else - -# define pth_in_use() 1 - -# endif - -/* -------------------------- gl_lock_t datatype -------------------------- */ - -typedef pth_mutex_t gl_lock_t; -# define gl_lock_define(STORAGECLASS, NAME) \ - STORAGECLASS pth_mutex_t NAME; -# define gl_lock_define_initialized(STORAGECLASS, NAME) \ - STORAGECLASS pth_mutex_t NAME = gl_lock_initializer; -# define gl_lock_initializer \ - PTH_MUTEX_INIT -# define glthread_lock_init(LOCK) \ - (pth_in_use () && !pth_mutex_init (LOCK) ? errno : 0) -# define glthread_lock_lock(LOCK) \ - (pth_in_use () && !pth_mutex_acquire (LOCK, 0, NULL) ? errno : 0) -# define glthread_lock_unlock(LOCK) \ - (pth_in_use () && !pth_mutex_release (LOCK) ? errno : 0) -# define glthread_lock_destroy(LOCK) \ - ((void)(LOCK), 0) - -/* ------------------------- gl_rwlock_t datatype ------------------------- */ - -typedef pth_rwlock_t gl_rwlock_t; -# define gl_rwlock_define(STORAGECLASS, NAME) \ - STORAGECLASS pth_rwlock_t NAME; -# define gl_rwlock_define_initialized(STORAGECLASS, NAME) \ - STORAGECLASS pth_rwlock_t NAME = gl_rwlock_initializer; -# define gl_rwlock_initializer \ - PTH_RWLOCK_INIT -# define glthread_rwlock_init(LOCK) \ - (pth_in_use () && !pth_rwlock_init (LOCK) ? errno : 0) -# define glthread_rwlock_rdlock(LOCK) \ - (pth_in_use () && !pth_rwlock_acquire (LOCK, PTH_RWLOCK_RD, 0, NULL) ? errno : 0) -# define glthread_rwlock_wrlock(LOCK) \ - (pth_in_use () && !pth_rwlock_acquire (LOCK, PTH_RWLOCK_RW, 0, NULL) ? errno : 0) -# define glthread_rwlock_unlock(LOCK) \ - (pth_in_use () && !pth_rwlock_release (LOCK) ? errno : 0) -# define glthread_rwlock_destroy(LOCK) \ - ((void)(LOCK), 0) - -/* --------------------- gl_recursive_lock_t datatype --------------------- */ - -/* In Pth, mutexes are recursive by default. */ -typedef pth_mutex_t gl_recursive_lock_t; -# define gl_recursive_lock_define(STORAGECLASS, NAME) \ - STORAGECLASS pth_mutex_t NAME; -# define gl_recursive_lock_define_initialized(STORAGECLASS, NAME) \ - STORAGECLASS pth_mutex_t NAME = gl_recursive_lock_initializer; -# define gl_recursive_lock_initializer \ - PTH_MUTEX_INIT -# define glthread_recursive_lock_init(LOCK) \ - (pth_in_use () && !pth_mutex_init (LOCK) ? errno : 0) -# define glthread_recursive_lock_lock(LOCK) \ - (pth_in_use () && !pth_mutex_acquire (LOCK, 0, NULL) ? errno : 0) -# define glthread_recursive_lock_unlock(LOCK) \ - (pth_in_use () && !pth_mutex_release (LOCK) ? errno : 0) -# define glthread_recursive_lock_destroy(LOCK) \ - ((void)(LOCK), 0) - -/* -------------------------- gl_once_t datatype -------------------------- */ - -typedef pth_once_t gl_once_t; -# define gl_once_define(STORAGECLASS, NAME) \ - STORAGECLASS pth_once_t NAME = PTH_ONCE_INIT; -# define glthread_once(ONCE_CONTROL, INITFUNCTION) \ - (pth_in_use () \ - ? glthread_once_multithreaded (ONCE_CONTROL, INITFUNCTION) \ - : (glthread_once_singlethreaded (ONCE_CONTROL) ? (INITFUNCTION (), 0) : 0)) -extern int glthread_once_multithreaded (pth_once_t *once_control, void (*initfunction) (void)); -extern int glthread_once_singlethreaded (pth_once_t *once_control); - -# ifdef __cplusplus -} -# endif - -#endif - -/* ========================================================================= */ - -#if USE_SOLARIS_THREADS - -/* Use the old Solaris threads library. */ - -# include -# include - -# ifdef __cplusplus -extern "C" { -# endif - -# if USE_SOLARIS_THREADS_WEAK - -/* Use weak references to the old Solaris threads library. */ - -# pragma weak mutex_init -# pragma weak mutex_lock -# pragma weak mutex_unlock -# pragma weak mutex_destroy -# pragma weak rwlock_init -# pragma weak rw_rdlock -# pragma weak rw_wrlock -# pragma weak rw_unlock -# pragma weak rwlock_destroy -# pragma weak thr_self - -# pragma weak thr_suspend -# define thread_in_use() (thr_suspend != NULL) - -# else - -# define thread_in_use() 1 - -# endif - -/* -------------------------- gl_lock_t datatype -------------------------- */ - -typedef mutex_t gl_lock_t; -# define gl_lock_define(STORAGECLASS, NAME) \ - STORAGECLASS mutex_t NAME; -# define gl_lock_define_initialized(STORAGECLASS, NAME) \ - STORAGECLASS mutex_t NAME = gl_lock_initializer; -# define gl_lock_initializer \ - DEFAULTMUTEX -# define glthread_lock_init(LOCK) \ - (thread_in_use () ? mutex_init (LOCK, USYNC_THREAD, NULL) : 0) -# define glthread_lock_lock(LOCK) \ - (thread_in_use () ? mutex_lock (LOCK) : 0) -# define glthread_lock_unlock(LOCK) \ - (thread_in_use () ? mutex_unlock (LOCK) : 0) -# define glthread_lock_destroy(LOCK) \ - (thread_in_use () ? mutex_destroy (LOCK) : 0) - -/* ------------------------- gl_rwlock_t datatype ------------------------- */ - -typedef rwlock_t gl_rwlock_t; -# define gl_rwlock_define(STORAGECLASS, NAME) \ - STORAGECLASS rwlock_t NAME; -# define gl_rwlock_define_initialized(STORAGECLASS, NAME) \ - STORAGECLASS rwlock_t NAME = gl_rwlock_initializer; -# define gl_rwlock_initializer \ - DEFAULTRWLOCK -# define glthread_rwlock_init(LOCK) \ - (thread_in_use () ? rwlock_init (LOCK, USYNC_THREAD, NULL) : 0) -# define glthread_rwlock_rdlock(LOCK) \ - (thread_in_use () ? rw_rdlock (LOCK) : 0) -# define glthread_rwlock_wrlock(LOCK) \ - (thread_in_use () ? rw_wrlock (LOCK) : 0) -# define glthread_rwlock_unlock(LOCK) \ - (thread_in_use () ? rw_unlock (LOCK) : 0) -# define glthread_rwlock_destroy(LOCK) \ - (thread_in_use () ? rwlock_destroy (LOCK) : 0) - -/* --------------------- gl_recursive_lock_t datatype --------------------- */ - -/* Old Solaris threads did not have recursive locks. - We have to implement them ourselves. */ - -typedef struct - { - mutex_t mutex; - thread_t owner; - unsigned long depth; - } - gl_recursive_lock_t; -# define gl_recursive_lock_define(STORAGECLASS, NAME) \ - STORAGECLASS gl_recursive_lock_t NAME; -# define gl_recursive_lock_define_initialized(STORAGECLASS, NAME) \ - STORAGECLASS gl_recursive_lock_t NAME = gl_recursive_lock_initializer; -# define gl_recursive_lock_initializer \ - { DEFAULTMUTEX, (thread_t) 0, 0 } -# define glthread_recursive_lock_init(LOCK) \ - (thread_in_use () ? glthread_recursive_lock_init_multithreaded (LOCK) : 0) -# define glthread_recursive_lock_lock(LOCK) \ - (thread_in_use () ? glthread_recursive_lock_lock_multithreaded (LOCK) : 0) -# define glthread_recursive_lock_unlock(LOCK) \ - (thread_in_use () ? glthread_recursive_lock_unlock_multithreaded (LOCK) : 0) -# define glthread_recursive_lock_destroy(LOCK) \ - (thread_in_use () ? glthread_recursive_lock_destroy_multithreaded (LOCK) : 0) -extern int glthread_recursive_lock_init_multithreaded (gl_recursive_lock_t *lock); -extern int glthread_recursive_lock_lock_multithreaded (gl_recursive_lock_t *lock); -extern int glthread_recursive_lock_unlock_multithreaded (gl_recursive_lock_t *lock); -extern int glthread_recursive_lock_destroy_multithreaded (gl_recursive_lock_t *lock); - -/* -------------------------- gl_once_t datatype -------------------------- */ - -typedef struct - { - volatile int inited; - mutex_t mutex; - } - gl_once_t; -# define gl_once_define(STORAGECLASS, NAME) \ - STORAGECLASS gl_once_t NAME = { 0, DEFAULTMUTEX }; -# define glthread_once(ONCE_CONTROL, INITFUNCTION) \ - (thread_in_use () \ - ? glthread_once_multithreaded (ONCE_CONTROL, INITFUNCTION) \ - : (glthread_once_singlethreaded (ONCE_CONTROL) ? (INITFUNCTION (), 0) : 0)) -extern int glthread_once_multithreaded (gl_once_t *once_control, void (*initfunction) (void)); -extern int glthread_once_singlethreaded (gl_once_t *once_control); - -# ifdef __cplusplus -} -# endif - -#endif - -/* ========================================================================= */ - #if USE_WINDOWS_THREADS # define WIN32_LEAN_AND_MEAN /* avoid including junk */ # include +# include "windows-mutex.h" +# include "windows-rwlock.h" +# include "windows-recmutex.h" +# include "windows-once.h" + # ifdef __cplusplus extern "C" { # endif @@ -633,127 +544,69 @@ extern "C" { /* There is no way to statically initialize a CRITICAL_SECTION. It needs to be done lazily, once only. For this we need spinlocks. */ -typedef struct { volatile int done; volatile long started; } gl_spinlock_t; - /* -------------------------- gl_lock_t datatype -------------------------- */ -typedef struct - { - gl_spinlock_t guard; /* protects the initialization */ - CRITICAL_SECTION lock; - } - gl_lock_t; +typedef glwthread_mutex_t gl_lock_t; # define gl_lock_define(STORAGECLASS, NAME) \ STORAGECLASS gl_lock_t NAME; # define gl_lock_define_initialized(STORAGECLASS, NAME) \ STORAGECLASS gl_lock_t NAME = gl_lock_initializer; # define gl_lock_initializer \ - { { 0, -1 } } + GLWTHREAD_MUTEX_INIT # define glthread_lock_init(LOCK) \ - (glthread_lock_init_func (LOCK), 0) + (glwthread_mutex_init (LOCK), 0) # define glthread_lock_lock(LOCK) \ - glthread_lock_lock_func (LOCK) + glwthread_mutex_lock (LOCK) # define glthread_lock_unlock(LOCK) \ - glthread_lock_unlock_func (LOCK) + glwthread_mutex_unlock (LOCK) # define glthread_lock_destroy(LOCK) \ - glthread_lock_destroy_func (LOCK) -extern void glthread_lock_init_func (gl_lock_t *lock); -extern int glthread_lock_lock_func (gl_lock_t *lock); -extern int glthread_lock_unlock_func (gl_lock_t *lock); -extern int glthread_lock_destroy_func (gl_lock_t *lock); + glwthread_mutex_destroy (LOCK) /* ------------------------- gl_rwlock_t datatype ------------------------- */ -/* It is impossible to implement read-write locks using plain locks, without - introducing an extra thread dedicated to managing read-write locks. - Therefore here we need to use the low-level Event type. */ - -typedef struct - { - HANDLE *array; /* array of waiting threads, each represented by an event */ - unsigned int count; /* number of waiting threads */ - unsigned int alloc; /* length of allocated array */ - unsigned int offset; /* index of first waiting thread in array */ - } - gl_carray_waitqueue_t; -typedef struct - { - gl_spinlock_t guard; /* protects the initialization */ - CRITICAL_SECTION lock; /* protects the remaining fields */ - gl_carray_waitqueue_t waiting_readers; /* waiting readers */ - gl_carray_waitqueue_t waiting_writers; /* waiting writers */ - int runcount; /* number of readers running, or -1 when a writer runs */ - } - gl_rwlock_t; +typedef glwthread_rwlock_t gl_rwlock_t; # define gl_rwlock_define(STORAGECLASS, NAME) \ STORAGECLASS gl_rwlock_t NAME; # define gl_rwlock_define_initialized(STORAGECLASS, NAME) \ STORAGECLASS gl_rwlock_t NAME = gl_rwlock_initializer; # define gl_rwlock_initializer \ - { { 0, -1 } } + GLWTHREAD_RWLOCK_INIT # define glthread_rwlock_init(LOCK) \ - (glthread_rwlock_init_func (LOCK), 0) + (glwthread_rwlock_init (LOCK), 0) # define glthread_rwlock_rdlock(LOCK) \ - glthread_rwlock_rdlock_func (LOCK) + glwthread_rwlock_rdlock (LOCK) # define glthread_rwlock_wrlock(LOCK) \ - glthread_rwlock_wrlock_func (LOCK) + glwthread_rwlock_wrlock (LOCK) # define glthread_rwlock_unlock(LOCK) \ - glthread_rwlock_unlock_func (LOCK) + glwthread_rwlock_unlock (LOCK) # define glthread_rwlock_destroy(LOCK) \ - glthread_rwlock_destroy_func (LOCK) -extern void glthread_rwlock_init_func (gl_rwlock_t *lock); -extern int glthread_rwlock_rdlock_func (gl_rwlock_t *lock); -extern int glthread_rwlock_wrlock_func (gl_rwlock_t *lock); -extern int glthread_rwlock_unlock_func (gl_rwlock_t *lock); -extern int glthread_rwlock_destroy_func (gl_rwlock_t *lock); + glwthread_rwlock_destroy (LOCK) /* --------------------- gl_recursive_lock_t datatype --------------------- */ -/* The native Windows documentation says that CRITICAL_SECTION already - implements a recursive lock. But we need not rely on it: It's easy to - implement a recursive lock without this assumption. */ - -typedef struct - { - gl_spinlock_t guard; /* protects the initialization */ - DWORD owner; - unsigned long depth; - CRITICAL_SECTION lock; - } - gl_recursive_lock_t; +typedef glwthread_recmutex_t gl_recursive_lock_t; # define gl_recursive_lock_define(STORAGECLASS, NAME) \ STORAGECLASS gl_recursive_lock_t NAME; # define gl_recursive_lock_define_initialized(STORAGECLASS, NAME) \ STORAGECLASS gl_recursive_lock_t NAME = gl_recursive_lock_initializer; # define gl_recursive_lock_initializer \ - { { 0, -1 }, 0, 0 } + GLWTHREAD_RECMUTEX_INIT # define glthread_recursive_lock_init(LOCK) \ - (glthread_recursive_lock_init_func (LOCK), 0) + (glwthread_recmutex_init (LOCK), 0) # define glthread_recursive_lock_lock(LOCK) \ - glthread_recursive_lock_lock_func (LOCK) + glwthread_recmutex_lock (LOCK) # define glthread_recursive_lock_unlock(LOCK) \ - glthread_recursive_lock_unlock_func (LOCK) + glwthread_recmutex_unlock (LOCK) # define glthread_recursive_lock_destroy(LOCK) \ - glthread_recursive_lock_destroy_func (LOCK) -extern void glthread_recursive_lock_init_func (gl_recursive_lock_t *lock); -extern int glthread_recursive_lock_lock_func (gl_recursive_lock_t *lock); -extern int glthread_recursive_lock_unlock_func (gl_recursive_lock_t *lock); -extern int glthread_recursive_lock_destroy_func (gl_recursive_lock_t *lock); + glwthread_recmutex_destroy (LOCK) /* -------------------------- gl_once_t datatype -------------------------- */ -typedef struct - { - volatile int inited; - volatile long started; - CRITICAL_SECTION lock; - } - gl_once_t; +typedef glwthread_once_t gl_once_t; # define gl_once_define(STORAGECLASS, NAME) \ - STORAGECLASS gl_once_t NAME = { -1, -1 }; + STORAGECLASS gl_once_t NAME = GLWTHREAD_ONCE_INIT; # define glthread_once(ONCE_CONTROL, INITFUNCTION) \ - (glthread_once_func (ONCE_CONTROL, INITFUNCTION), 0) -extern void glthread_once_func (gl_once_t *once_control, void (*initfunction) (void)); + (glwthread_once (ONCE_CONTROL, INITFUNCTION), 0) # ifdef __cplusplus } @@ -763,7 +616,7 @@ extern void glthread_once_func (gl_once_t *once_control, void (*initfunction) (v /* ========================================================================= */ -#if !(USE_POSIX_THREADS || USE_PTH_THREADS || USE_SOLARIS_THREADS || USE_WINDOWS_THREADS) +#if !(USE_ISOC_THREADS || USE_POSIX_THREADS || USE_ISOC_AND_POSIX_THREADS || USE_WINDOWS_THREADS) /* Provide dummy implementation if threads are not supported. */ diff --git a/contrib/grep/lib/glthread/threadlib.c b/contrib/grep/lib/glthread/threadlib.c index f01d351c8c..28bf3c6f76 100644 --- a/contrib/grep/lib/glthread/threadlib.c +++ b/contrib/grep/lib/glthread/threadlib.c @@ -1,5 +1,5 @@ /* Multithreading primitives. - Copyright (C) 2005-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ /* Written by Bruno Haible , 2005. */ @@ -20,7 +20,7 @@ /* ========================================================================= */ -#if USE_POSIX_THREADS +#if USE_POSIX_THREADS || USE_ISOC_AND_POSIX_THREADS /* Use the POSIX threads library. */ diff --git a/contrib/grep/lib/gnulib.mk b/contrib/grep/lib/gnulib.mk deleted file mode 100644 index 2793024b49..0000000000 --- a/contrib/grep/lib/gnulib.mk +++ /dev/null @@ -1,2872 +0,0 @@ -## DO NOT EDIT! GENERATED AUTOMATICALLY! -## Process this file with automake to produce Makefile.in. -# Copyright (C) 2002-2015 Free Software Foundation, Inc. -# -# This file is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This file is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this file. If not, see . -# -# As a special exception to the GNU General Public License, -# this file may be distributed as part of a program that -# contains a configuration script generated by Autoconf, under -# the same distribution terms as the rest of that program. -# -# Generated by gnulib-tool. -# Reproduce by: gnulib-tool --import --dir=. --local-dir=gl --lib=libgreputils --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=gnulib-tests --aux-dir=build-aux --with-tests --avoid=lock-tests --avoid=mbrtowc-tests --makefile-name=gnulib.mk --no-conditional-dependencies --no-libtool --macro-prefix=gl alloca announce-gen argmatch binary-io btowc c-ctype closeout do-release-commit-and-tag error exclude fcntl-h fdl fnmatch fstatat fts getopt-gnu getpagesize gettext-h git-version-gen gitlog-to-changelog gnu-web-doc-update gnupload ignore-value intprops inttypes isatty isblank iswctype largefile locale lseek maintainer-makefile malloc-gnu manywarnings mbrlen mbrtowc memchr memchr2 mempcpy minmax obstack openat-safer perl progname propername quote readme-release realloc-gnu regex safe-read same-inode ssize_t stddef stdlib stpcpy strerror string strstr strtoull strtoumax sys_stat unistd unlocked-io update-copyright useless-if-before-free version-etc-fsf wchar wcrtomb wctob wctype-h xalloc xstrtoimax - - -MOSTLYCLEANFILES += core *.stackdump - -noinst_LIBRARIES += libgreputils.a - -libgreputils_a_SOURCES = -libgreputils_a_LIBADD = $(gl_LIBOBJS) -libgreputils_a_DEPENDENCIES = $(gl_LIBOBJS) -EXTRA_libgreputils_a_SOURCES = - -## begin gnulib module absolute-header - -# Use this preprocessor expression to decide whether #include_next works. -# Do not rely on a 'configure'-time test for this, since the expression -# might appear in an installed header, which is used by some other compiler. -HAVE_INCLUDE_NEXT = (__GNUC__ || 60000000 <= __DECC_VER) - -## end gnulib module absolute-header - -## begin gnulib module alignof - - -EXTRA_DIST += alignof.h - -## end gnulib module alignof - -## begin gnulib module alloca - - -libgreputils_a_LIBADD += @ALLOCA@ -libgreputils_a_DEPENDENCIES += @ALLOCA@ -EXTRA_DIST += alloca.c - -EXTRA_libgreputils_a_SOURCES += alloca.c - -## end gnulib module alloca - -## begin gnulib module alloca-opt - -BUILT_SOURCES += $(ALLOCA_H) - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -if GL_GENERATE_ALLOCA_H -alloca.h: alloca.in.h $(top_builddir)/config.status - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - cat $(srcdir)/alloca.in.h; \ - } > $@-t && \ - mv -f $@-t $@ -else -alloca.h: $(top_builddir)/config.status - rm -f $@ -endif -MOSTLYCLEANFILES += alloca.h alloca.h-t - -EXTRA_DIST += alloca.in.h - -## end gnulib module alloca-opt - -## begin gnulib module announce-gen - - -EXTRA_DIST += $(top_srcdir)/build-aux/announce-gen - -## end gnulib module announce-gen - -## begin gnulib module argmatch - -libgreputils_a_SOURCES += argmatch.c - -EXTRA_DIST += argmatch.h - -## end gnulib module argmatch - -## begin gnulib module assure - - -EXTRA_DIST += assure.h - -## end gnulib module assure - -## begin gnulib module at-internal - - -EXTRA_DIST += openat-priv.h openat-proc.c - -EXTRA_libgreputils_a_SOURCES += openat-proc.c - -## end gnulib module at-internal - -## begin gnulib module binary-io - -libgreputils_a_SOURCES += binary-io.h binary-io.c - -## end gnulib module binary-io - -## begin gnulib module bitrotate - -libgreputils_a_SOURCES += bitrotate.h bitrotate.c - -## end gnulib module bitrotate - -## begin gnulib module btowc - - -EXTRA_DIST += btowc.c - -EXTRA_libgreputils_a_SOURCES += btowc.c - -## end gnulib module btowc - -## begin gnulib module c-ctype - -libgreputils_a_SOURCES += c-ctype.h c-ctype.c - -## end gnulib module c-ctype - -## begin gnulib module c-strcase - -libgreputils_a_SOURCES += c-strcase.h c-strcasecmp.c c-strncasecmp.c - -## end gnulib module c-strcase - -## begin gnulib module c-strcaseeq - - -EXTRA_DIST += c-strcaseeq.h - -## end gnulib module c-strcaseeq - -## begin gnulib module chdir-long - - -EXTRA_DIST += chdir-long.c chdir-long.h - -EXTRA_libgreputils_a_SOURCES += chdir-long.c - -## end gnulib module chdir-long - -## begin gnulib module cloexec - -libgreputils_a_SOURCES += cloexec.c - -EXTRA_DIST += cloexec.h - -## end gnulib module cloexec - -## begin gnulib module close - - -EXTRA_DIST += close.c - -EXTRA_libgreputils_a_SOURCES += close.c - -## end gnulib module close - -## begin gnulib module close-stream - -libgreputils_a_SOURCES += close-stream.c - -EXTRA_DIST += close-stream.h - -## end gnulib module close-stream - -## begin gnulib module closedir - - -EXTRA_DIST += closedir.c dirent-private.h - -EXTRA_libgreputils_a_SOURCES += closedir.c - -## end gnulib module closedir - -## begin gnulib module closeout - -libgreputils_a_SOURCES += closeout.c - -EXTRA_DIST += closeout.h - -## end gnulib module closeout - -## begin gnulib module configmake - -# Listed in the same order as the GNU makefile conventions, and -# provided by autoconf 2.59c+ or 2.70. -# The Automake-defined pkg* macros are appended, in the order -# listed in the Automake 1.10a+ documentation. -configmake.h: Makefile - $(AM_V_GEN)rm -f $@-t && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - echo '#define PREFIX "$(prefix)"'; \ - echo '#define EXEC_PREFIX "$(exec_prefix)"'; \ - echo '#define BINDIR "$(bindir)"'; \ - echo '#define SBINDIR "$(sbindir)"'; \ - echo '#define LIBEXECDIR "$(libexecdir)"'; \ - echo '#define DATAROOTDIR "$(datarootdir)"'; \ - echo '#define DATADIR "$(datadir)"'; \ - echo '#define SYSCONFDIR "$(sysconfdir)"'; \ - echo '#define SHAREDSTATEDIR "$(sharedstatedir)"'; \ - echo '#define LOCALSTATEDIR "$(localstatedir)"'; \ - echo '#define RUNSTATEDIR "$(runstatedir)"'; \ - echo '#define INCLUDEDIR "$(includedir)"'; \ - echo '#define OLDINCLUDEDIR "$(oldincludedir)"'; \ - echo '#define DOCDIR "$(docdir)"'; \ - echo '#define INFODIR "$(infodir)"'; \ - echo '#define HTMLDIR "$(htmldir)"'; \ - echo '#define DVIDIR "$(dvidir)"'; \ - echo '#define PDFDIR "$(pdfdir)"'; \ - echo '#define PSDIR "$(psdir)"'; \ - echo '#define LIBDIR "$(libdir)"'; \ - echo '#define LISPDIR "$(lispdir)"'; \ - echo '#define LOCALEDIR "$(localedir)"'; \ - echo '#define MANDIR "$(mandir)"'; \ - echo '#define MANEXT "$(manext)"'; \ - echo '#define PKGDATADIR "$(pkgdatadir)"'; \ - echo '#define PKGINCLUDEDIR "$(pkgincludedir)"'; \ - echo '#define PKGLIBDIR "$(pkglibdir)"'; \ - echo '#define PKGLIBEXECDIR "$(pkglibexecdir)"'; \ - } | sed '/""/d' > $@-t && \ - mv -f $@-t $@ - -BUILT_SOURCES += configmake.h -CLEANFILES += configmake.h configmake.h-t - -## end gnulib module configmake - -## begin gnulib module ctype - -BUILT_SOURCES += ctype.h - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -ctype.h: ctype.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_CTYPE_H''@|$(NEXT_CTYPE_H)|g' \ - -e 's/@''GNULIB_ISBLANK''@/$(GNULIB_ISBLANK)/g' \ - -e 's/@''HAVE_ISBLANK''@/$(HAVE_ISBLANK)/g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ - < $(srcdir)/ctype.in.h; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += ctype.h ctype.h-t - -EXTRA_DIST += ctype.in.h - -## end gnulib module ctype - -## begin gnulib module cycle-check - -libgreputils_a_SOURCES += cycle-check.c - -EXTRA_DIST += cycle-check.h - -## end gnulib module cycle-check - -## begin gnulib module dev-ino - - -EXTRA_DIST += dev-ino.h - -## end gnulib module dev-ino - -## begin gnulib module dirent - -BUILT_SOURCES += dirent.h - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -dirent.h: dirent.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''HAVE_DIRENT_H''@|$(HAVE_DIRENT_H)|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_DIRENT_H''@|$(NEXT_DIRENT_H)|g' \ - -e 's/@''GNULIB_OPENDIR''@/$(GNULIB_OPENDIR)/g' \ - -e 's/@''GNULIB_READDIR''@/$(GNULIB_READDIR)/g' \ - -e 's/@''GNULIB_REWINDDIR''@/$(GNULIB_REWINDDIR)/g' \ - -e 's/@''GNULIB_CLOSEDIR''@/$(GNULIB_CLOSEDIR)/g' \ - -e 's/@''GNULIB_DIRFD''@/$(GNULIB_DIRFD)/g' \ - -e 's/@''GNULIB_FDOPENDIR''@/$(GNULIB_FDOPENDIR)/g' \ - -e 's/@''GNULIB_SCANDIR''@/$(GNULIB_SCANDIR)/g' \ - -e 's/@''GNULIB_ALPHASORT''@/$(GNULIB_ALPHASORT)/g' \ - -e 's/@''HAVE_OPENDIR''@/$(HAVE_OPENDIR)/g' \ - -e 's/@''HAVE_READDIR''@/$(HAVE_READDIR)/g' \ - -e 's/@''HAVE_REWINDDIR''@/$(HAVE_REWINDDIR)/g' \ - -e 's/@''HAVE_CLOSEDIR''@/$(HAVE_CLOSEDIR)/g' \ - -e 's|@''HAVE_DECL_DIRFD''@|$(HAVE_DECL_DIRFD)|g' \ - -e 's|@''HAVE_DECL_FDOPENDIR''@|$(HAVE_DECL_FDOPENDIR)|g' \ - -e 's|@''HAVE_FDOPENDIR''@|$(HAVE_FDOPENDIR)|g' \ - -e 's|@''HAVE_SCANDIR''@|$(HAVE_SCANDIR)|g' \ - -e 's|@''HAVE_ALPHASORT''@|$(HAVE_ALPHASORT)|g' \ - -e 's|@''REPLACE_OPENDIR''@|$(REPLACE_OPENDIR)|g' \ - -e 's|@''REPLACE_CLOSEDIR''@|$(REPLACE_CLOSEDIR)|g' \ - -e 's|@''REPLACE_DIRFD''@|$(REPLACE_DIRFD)|g' \ - -e 's|@''REPLACE_FDOPENDIR''@|$(REPLACE_FDOPENDIR)|g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ - < $(srcdir)/dirent.in.h; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += dirent.h dirent.h-t - -EXTRA_DIST += dirent.in.h - -## end gnulib module dirent - -## begin gnulib module dirent-safer - -libgreputils_a_SOURCES += opendir-safer.c - -EXTRA_DIST += dirent--.h dirent-safer.h - -## end gnulib module dirent-safer - -## begin gnulib module dirfd - - -EXTRA_DIST += dirfd.c - -EXTRA_libgreputils_a_SOURCES += dirfd.c - -## end gnulib module dirfd - -## begin gnulib module dirname-lgpl - -libgreputils_a_SOURCES += dirname-lgpl.c basename-lgpl.c stripslash.c - -EXTRA_DIST += dirname.h - -## end gnulib module dirname-lgpl - -## begin gnulib module do-release-commit-and-tag - - -EXTRA_DIST += $(top_srcdir)/build-aux/do-release-commit-and-tag - -## end gnulib module do-release-commit-and-tag - -## begin gnulib module dosname - - -EXTRA_DIST += dosname.h - -## end gnulib module dosname - -## begin gnulib module dup - - -EXTRA_DIST += dup.c - -EXTRA_libgreputils_a_SOURCES += dup.c - -## end gnulib module dup - -## begin gnulib module dup2 - - -EXTRA_DIST += dup2.c - -EXTRA_libgreputils_a_SOURCES += dup2.c - -## end gnulib module dup2 - -## begin gnulib module errno - -BUILT_SOURCES += $(ERRNO_H) - -# We need the following in order to create when the system -# doesn't have one that is POSIX compliant. -if GL_GENERATE_ERRNO_H -errno.h: errno.in.h $(top_builddir)/config.status - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_ERRNO_H''@|$(NEXT_ERRNO_H)|g' \ - -e 's|@''EMULTIHOP_HIDDEN''@|$(EMULTIHOP_HIDDEN)|g' \ - -e 's|@''EMULTIHOP_VALUE''@|$(EMULTIHOP_VALUE)|g' \ - -e 's|@''ENOLINK_HIDDEN''@|$(ENOLINK_HIDDEN)|g' \ - -e 's|@''ENOLINK_VALUE''@|$(ENOLINK_VALUE)|g' \ - -e 's|@''EOVERFLOW_HIDDEN''@|$(EOVERFLOW_HIDDEN)|g' \ - -e 's|@''EOVERFLOW_VALUE''@|$(EOVERFLOW_VALUE)|g' \ - < $(srcdir)/errno.in.h; \ - } > $@-t && \ - mv $@-t $@ -else -errno.h: $(top_builddir)/config.status - rm -f $@ -endif -MOSTLYCLEANFILES += errno.h errno.h-t - -EXTRA_DIST += errno.in.h - -## end gnulib module errno - -## begin gnulib module error - - -EXTRA_DIST += error.c error.h - -EXTRA_libgreputils_a_SOURCES += error.c - -## end gnulib module error - -## begin gnulib module exclude - -libgreputils_a_SOURCES += exclude.c - -EXTRA_DIST += exclude.h - -## end gnulib module exclude - -## begin gnulib module exitfail - -libgreputils_a_SOURCES += exitfail.c - -EXTRA_DIST += exitfail.h - -## end gnulib module exitfail - -## begin gnulib module fchdir - - -EXTRA_DIST += fchdir.c - -EXTRA_libgreputils_a_SOURCES += fchdir.c - -## end gnulib module fchdir - -## begin gnulib module fcntl - - -EXTRA_DIST += fcntl.c - -EXTRA_libgreputils_a_SOURCES += fcntl.c - -## end gnulib module fcntl - -## begin gnulib module fcntl-h - -BUILT_SOURCES += fcntl.h - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -fcntl.h: fcntl.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_FCNTL_H''@|$(NEXT_FCNTL_H)|g' \ - -e 's/@''GNULIB_FCNTL''@/$(GNULIB_FCNTL)/g' \ - -e 's/@''GNULIB_NONBLOCKING''@/$(GNULIB_NONBLOCKING)/g' \ - -e 's/@''GNULIB_OPEN''@/$(GNULIB_OPEN)/g' \ - -e 's/@''GNULIB_OPENAT''@/$(GNULIB_OPENAT)/g' \ - -e 's|@''HAVE_FCNTL''@|$(HAVE_FCNTL)|g' \ - -e 's|@''HAVE_OPENAT''@|$(HAVE_OPENAT)|g' \ - -e 's|@''REPLACE_FCNTL''@|$(REPLACE_FCNTL)|g' \ - -e 's|@''REPLACE_OPEN''@|$(REPLACE_OPEN)|g' \ - -e 's|@''REPLACE_OPENAT''@|$(REPLACE_OPENAT)|g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ - < $(srcdir)/fcntl.in.h; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += fcntl.h fcntl.h-t - -EXTRA_DIST += fcntl.in.h - -## end gnulib module fcntl-h - -## begin gnulib module fcntl-safer - -libgreputils_a_SOURCES += creat-safer.c open-safer.c - -EXTRA_DIST += fcntl--.h fcntl-safer.h - -## end gnulib module fcntl-safer - -## begin gnulib module fd-hook - -libgreputils_a_SOURCES += fd-hook.c - -EXTRA_DIST += fd-hook.h - -## end gnulib module fd-hook - -## begin gnulib module fdopendir - - -EXTRA_DIST += fdopendir.c - -EXTRA_libgreputils_a_SOURCES += fdopendir.c - -## end gnulib module fdopendir - -## begin gnulib module filename - - -EXTRA_DIST += filename.h - -## end gnulib module filename - -## begin gnulib module filenamecat-lgpl - -libgreputils_a_SOURCES += filenamecat-lgpl.c - -EXTRA_DIST += filenamecat.h - -## end gnulib module filenamecat-lgpl - -## begin gnulib module fnmatch - -BUILT_SOURCES += $(FNMATCH_H) - -# We need the following in order to create when the system -# doesn't have one that supports the required API. -if GL_GENERATE_FNMATCH_H -fnmatch.h: fnmatch.in.h $(top_builddir)/config.status $(ARG_NONNULL_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - < $(srcdir)/fnmatch.in.h; \ - } > $@-t && \ - mv -f $@-t $@ -else -fnmatch.h: $(top_builddir)/config.status - rm -f $@ -endif -MOSTLYCLEANFILES += fnmatch.h fnmatch.h-t - -EXTRA_DIST += fnmatch.c fnmatch.in.h fnmatch_loop.c - -EXTRA_libgreputils_a_SOURCES += fnmatch.c fnmatch_loop.c - -## end gnulib module fnmatch - -## begin gnulib module fpending - - -EXTRA_DIST += fpending.c fpending.h - -EXTRA_libgreputils_a_SOURCES += fpending.c - -## end gnulib module fpending - -## begin gnulib module fstat - - -EXTRA_DIST += fstat.c - -EXTRA_libgreputils_a_SOURCES += fstat.c - -## end gnulib module fstat - -## begin gnulib module fstatat - - -EXTRA_DIST += at-func.c fstatat.c - -EXTRA_libgreputils_a_SOURCES += at-func.c fstatat.c - -## end gnulib module fstatat - -## begin gnulib module fts - - -EXTRA_DIST += fts-cycle.c fts.c fts_.h - -EXTRA_libgreputils_a_SOURCES += fts-cycle.c fts.c - -## end gnulib module fts - -## begin gnulib module gendocs - - -EXTRA_DIST += $(top_srcdir)/build-aux/gendocs.sh - -## end gnulib module gendocs - -## begin gnulib module getcwd-lgpl - - -EXTRA_DIST += getcwd-lgpl.c - -EXTRA_libgreputils_a_SOURCES += getcwd-lgpl.c - -## end gnulib module getcwd-lgpl - -## begin gnulib module getdtablesize - - -EXTRA_DIST += getdtablesize.c - -EXTRA_libgreputils_a_SOURCES += getdtablesize.c - -## end gnulib module getdtablesize - -## begin gnulib module getopt-posix - -BUILT_SOURCES += $(GETOPT_H) - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -getopt.h: getopt.in.h $(top_builddir)/config.status $(ARG_NONNULL_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''HAVE_GETOPT_H''@|$(HAVE_GETOPT_H)|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_GETOPT_H''@|$(NEXT_GETOPT_H)|g' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - < $(srcdir)/getopt.in.h; \ - } > $@-t && \ - mv -f $@-t $@ -MOSTLYCLEANFILES += getopt.h getopt.h-t - -EXTRA_DIST += getopt.c getopt.in.h getopt1.c getopt_int.h - -EXTRA_libgreputils_a_SOURCES += getopt.c getopt1.c - -## end gnulib module getopt-posix - -## begin gnulib module getpagesize - - -EXTRA_DIST += getpagesize.c - -EXTRA_libgreputils_a_SOURCES += getpagesize.c - -## end gnulib module getpagesize - -## begin gnulib module gettext-h - -libgreputils_a_SOURCES += gettext.h - -## end gnulib module gettext-h - -## begin gnulib module gettimeofday - - -EXTRA_DIST += gettimeofday.c - -EXTRA_libgreputils_a_SOURCES += gettimeofday.c - -## end gnulib module gettimeofday - -## begin gnulib module git-version-gen - - -EXTRA_DIST += $(top_srcdir)/build-aux/git-version-gen - -## end gnulib module git-version-gen - -## begin gnulib module gitlog-to-changelog - - -EXTRA_DIST += $(top_srcdir)/build-aux/gitlog-to-changelog - -## end gnulib module gitlog-to-changelog - -## begin gnulib module gnu-web-doc-update - - -EXTRA_DIST += $(top_srcdir)/build-aux/gnu-web-doc-update - -## end gnulib module gnu-web-doc-update - -## begin gnulib module gnumakefile - -distclean-local: clean-GNUmakefile -clean-GNUmakefile: - test '$(srcdir)' = . || rm -f $(top_builddir)/GNUmakefile - -EXTRA_DIST += $(top_srcdir)/GNUmakefile - -## end gnulib module gnumakefile - -## begin gnulib module gnupload - - -EXTRA_DIST += $(top_srcdir)/build-aux/gnupload - -## end gnulib module gnupload - -## begin gnulib module gperf - -GPERF = gperf -V_GPERF = $(V_GPERF_@AM_V@) -V_GPERF_ = $(V_GPERF_@AM_DEFAULT_V@) -V_GPERF_0 = @echo " GPERF " $@; - -## end gnulib module gperf - -## begin gnulib module hash - -libgreputils_a_SOURCES += hash.c - -EXTRA_DIST += hash.h - -## end gnulib module hash - -## begin gnulib module havelib - - -EXTRA_DIST += $(top_srcdir)/build-aux/config.rpath - -## end gnulib module havelib - -## begin gnulib module i-ring - -libgreputils_a_SOURCES += i-ring.c - -EXTRA_DIST += i-ring.h - -## end gnulib module i-ring - -## begin gnulib module iconv-h - -BUILT_SOURCES += $(ICONV_H) - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -if GL_GENERATE_ICONV_H -iconv.h: iconv.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_ICONV_H''@|$(NEXT_ICONV_H)|g' \ - -e 's/@''GNULIB_ICONV''@/$(GNULIB_ICONV)/g' \ - -e 's|@''ICONV_CONST''@|$(ICONV_CONST)|g' \ - -e 's|@''REPLACE_ICONV''@|$(REPLACE_ICONV)|g' \ - -e 's|@''REPLACE_ICONV_OPEN''@|$(REPLACE_ICONV_OPEN)|g' \ - -e 's|@''REPLACE_ICONV_UTF''@|$(REPLACE_ICONV_UTF)|g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ - < $(srcdir)/iconv.in.h; \ - } > $@-t && \ - mv $@-t $@ -else -iconv.h: $(top_builddir)/config.status - rm -f $@ -endif -MOSTLYCLEANFILES += iconv.h iconv.h-t - -EXTRA_DIST += iconv.in.h - -## end gnulib module iconv-h - -## begin gnulib module iconv_open - -iconv_open-aix.h: iconv_open-aix.gperf - $(V_GPERF)$(GPERF) -m 10 $(srcdir)/iconv_open-aix.gperf > $(srcdir)/iconv_open-aix.h-t && \ - mv $(srcdir)/iconv_open-aix.h-t $(srcdir)/iconv_open-aix.h -iconv_open-hpux.h: iconv_open-hpux.gperf - $(V_GPERF)$(GPERF) -m 10 $(srcdir)/iconv_open-hpux.gperf > $(srcdir)/iconv_open-hpux.h-t && \ - mv $(srcdir)/iconv_open-hpux.h-t $(srcdir)/iconv_open-hpux.h -iconv_open-irix.h: iconv_open-irix.gperf - $(V_GPERF)$(GPERF) -m 10 $(srcdir)/iconv_open-irix.gperf > $(srcdir)/iconv_open-irix.h-t && \ - mv $(srcdir)/iconv_open-irix.h-t $(srcdir)/iconv_open-irix.h -iconv_open-osf.h: iconv_open-osf.gperf - $(V_GPERF)$(GPERF) -m 10 $(srcdir)/iconv_open-osf.gperf > $(srcdir)/iconv_open-osf.h-t && \ - mv $(srcdir)/iconv_open-osf.h-t $(srcdir)/iconv_open-osf.h -iconv_open-solaris.h: iconv_open-solaris.gperf - $(V_GPERF)$(GPERF) -m 10 $(srcdir)/iconv_open-solaris.gperf > $(srcdir)/iconv_open-solaris.h-t && \ - mv $(srcdir)/iconv_open-solaris.h-t $(srcdir)/iconv_open-solaris.h -BUILT_SOURCES += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h iconv_open-solaris.h -MOSTLYCLEANFILES += iconv_open-aix.h-t iconv_open-hpux.h-t iconv_open-irix.h-t iconv_open-osf.h-t iconv_open-solaris.h-t -MAINTAINERCLEANFILES += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h iconv_open-solaris.h -EXTRA_DIST += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h iconv_open-solaris.h - -EXTRA_DIST += iconv.c iconv_close.c iconv_open-aix.gperf iconv_open-hpux.gperf iconv_open-irix.gperf iconv_open-osf.gperf iconv_open-solaris.gperf iconv_open.c - -EXTRA_libgreputils_a_SOURCES += iconv.c iconv_close.c iconv_open.c - -## end gnulib module iconv_open - -## begin gnulib module ignore-value - - -EXTRA_DIST += ignore-value.h - -## end gnulib module ignore-value - -## begin gnulib module intprops - - -EXTRA_DIST += intprops.h - -## end gnulib module intprops - -## begin gnulib module inttypes-incomplete - -BUILT_SOURCES += inttypes.h - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -inttypes.h: inttypes.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(WARN_ON_USE_H) $(ARG_NONNULL_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's/@''HAVE_INTTYPES_H''@/$(HAVE_INTTYPES_H)/g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_INTTYPES_H''@|$(NEXT_INTTYPES_H)|g' \ - -e 's/@''PRI_MACROS_BROKEN''@/$(PRI_MACROS_BROKEN)/g' \ - -e 's/@''APPLE_UNIVERSAL_BUILD''@/$(APPLE_UNIVERSAL_BUILD)/g' \ - -e 's/@''HAVE_LONG_LONG_INT''@/$(HAVE_LONG_LONG_INT)/g' \ - -e 's/@''HAVE_UNSIGNED_LONG_LONG_INT''@/$(HAVE_UNSIGNED_LONG_LONG_INT)/g' \ - -e 's/@''PRIPTR_PREFIX''@/$(PRIPTR_PREFIX)/g' \ - -e 's/@''GNULIB_IMAXABS''@/$(GNULIB_IMAXABS)/g' \ - -e 's/@''GNULIB_IMAXDIV''@/$(GNULIB_IMAXDIV)/g' \ - -e 's/@''GNULIB_STRTOIMAX''@/$(GNULIB_STRTOIMAX)/g' \ - -e 's/@''GNULIB_STRTOUMAX''@/$(GNULIB_STRTOUMAX)/g' \ - -e 's/@''HAVE_DECL_IMAXABS''@/$(HAVE_DECL_IMAXABS)/g' \ - -e 's/@''HAVE_DECL_IMAXDIV''@/$(HAVE_DECL_IMAXDIV)/g' \ - -e 's/@''HAVE_DECL_STRTOIMAX''@/$(HAVE_DECL_STRTOIMAX)/g' \ - -e 's/@''HAVE_DECL_STRTOUMAX''@/$(HAVE_DECL_STRTOUMAX)/g' \ - -e 's/@''REPLACE_STRTOIMAX''@/$(REPLACE_STRTOIMAX)/g' \ - -e 's/@''REPLACE_STRTOUMAX''@/$(REPLACE_STRTOUMAX)/g' \ - -e 's/@''INT32_MAX_LT_INTMAX_MAX''@/$(INT32_MAX_LT_INTMAX_MAX)/g' \ - -e 's/@''INT64_MAX_EQ_LONG_MAX''@/$(INT64_MAX_EQ_LONG_MAX)/g' \ - -e 's/@''UINT32_MAX_LT_UINTMAX_MAX''@/$(UINT32_MAX_LT_UINTMAX_MAX)/g' \ - -e 's/@''UINT64_MAX_EQ_ULONG_MAX''@/$(UINT64_MAX_EQ_ULONG_MAX)/g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ - < $(srcdir)/inttypes.in.h; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += inttypes.h inttypes.h-t - -EXTRA_DIST += inttypes.in.h - -## end gnulib module inttypes-incomplete - -## begin gnulib module isatty - - -EXTRA_DIST += isatty.c - -EXTRA_libgreputils_a_SOURCES += isatty.c - -## end gnulib module isatty - -## begin gnulib module isblank - - -EXTRA_DIST += isblank.c - -EXTRA_libgreputils_a_SOURCES += isblank.c - -## end gnulib module isblank - -## begin gnulib module iswblank - - -EXTRA_DIST += iswblank.c - -EXTRA_libgreputils_a_SOURCES += iswblank.c - -## end gnulib module iswblank - -## begin gnulib module iswctype - - -EXTRA_DIST += iswctype-impl.h iswctype.c - -EXTRA_libgreputils_a_SOURCES += iswctype.c - -## end gnulib module iswctype - -## begin gnulib module langinfo - -BUILT_SOURCES += langinfo.h - -# We need the following in order to create an empty placeholder for -# when the system doesn't have one. -langinfo.h: langinfo.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''HAVE_LANGINFO_H''@|$(HAVE_LANGINFO_H)|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_LANGINFO_H''@|$(NEXT_LANGINFO_H)|g' \ - -e 's/@''GNULIB_NL_LANGINFO''@/$(GNULIB_NL_LANGINFO)/g' \ - -e 's|@''HAVE_LANGINFO_CODESET''@|$(HAVE_LANGINFO_CODESET)|g' \ - -e 's|@''HAVE_LANGINFO_T_FMT_AMPM''@|$(HAVE_LANGINFO_T_FMT_AMPM)|g' \ - -e 's|@''HAVE_LANGINFO_ERA''@|$(HAVE_LANGINFO_ERA)|g' \ - -e 's|@''HAVE_LANGINFO_YESEXPR''@|$(HAVE_LANGINFO_YESEXPR)|g' \ - -e 's|@''HAVE_NL_LANGINFO''@|$(HAVE_NL_LANGINFO)|g' \ - -e 's|@''REPLACE_NL_LANGINFO''@|$(REPLACE_NL_LANGINFO)|g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ - < $(srcdir)/langinfo.in.h; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += langinfo.h langinfo.h-t - -EXTRA_DIST += langinfo.in.h - -## end gnulib module langinfo - -## begin gnulib module localcharset - -libgreputils_a_SOURCES += localcharset.h localcharset.c - -# We need the following in order to install a simple file in $(libdir) -# which is shared with other installed packages. We use a list of referencing -# packages so that "make uninstall" will remove the file if and only if it -# is not used by another installed package. -# On systems with glibc-2.1 or newer, the file is redundant, therefore we -# avoid installing it. - -all-local: charset.alias ref-add.sed ref-del.sed - -charset_alias = $(DESTDIR)$(libdir)/charset.alias -charset_tmp = $(DESTDIR)$(libdir)/charset.tmp -install-exec-local: install-exec-localcharset -install-exec-localcharset: all-local - if test $(GLIBC21) = no; then \ - case '$(host_os)' in \ - darwin[56]*) \ - need_charset_alias=true ;; \ - darwin* | cygwin* | mingw* | pw32* | cegcc*) \ - need_charset_alias=false ;; \ - *) \ - need_charset_alias=true ;; \ - esac ; \ - else \ - need_charset_alias=false ; \ - fi ; \ - if $$need_charset_alias; then \ - $(mkinstalldirs) $(DESTDIR)$(libdir) ; \ - fi ; \ - if test -f $(charset_alias); then \ - sed -f ref-add.sed $(charset_alias) > $(charset_tmp) ; \ - $(INSTALL_DATA) $(charset_tmp) $(charset_alias) ; \ - rm -f $(charset_tmp) ; \ - else \ - if $$need_charset_alias; then \ - sed -f ref-add.sed charset.alias > $(charset_tmp) ; \ - $(INSTALL_DATA) $(charset_tmp) $(charset_alias) ; \ - rm -f $(charset_tmp) ; \ - fi ; \ - fi - -uninstall-local: uninstall-localcharset -uninstall-localcharset: all-local - if test -f $(charset_alias); then \ - sed -f ref-del.sed $(charset_alias) > $(charset_tmp); \ - if grep '^# Packages using this file: $$' $(charset_tmp) \ - > /dev/null; then \ - rm -f $(charset_alias); \ - else \ - $(INSTALL_DATA) $(charset_tmp) $(charset_alias); \ - fi; \ - rm -f $(charset_tmp); \ - fi - -charset.alias: config.charset - $(AM_V_GEN)rm -f t-$@ $@ && \ - $(SHELL) $(srcdir)/config.charset '$(host)' > t-$@ && \ - mv t-$@ $@ - -SUFFIXES += .sed .sin -.sin.sed: - $(AM_V_GEN)rm -f t-$@ $@ && \ - sed -e '/^#/d' -e 's/@''PACKAGE''@/$(PACKAGE)/g' $< > t-$@ && \ - mv t-$@ $@ - -CLEANFILES += charset.alias ref-add.sed ref-del.sed - -EXTRA_DIST += config.charset ref-add.sin ref-del.sin - -## end gnulib module localcharset - -## begin gnulib module locale - -BUILT_SOURCES += locale.h - -# We need the following in order to create when the system -# doesn't have one that provides all definitions. -locale.h: locale.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_LOCALE_H''@|$(NEXT_LOCALE_H)|g' \ - -e 's/@''GNULIB_LOCALECONV''@/$(GNULIB_LOCALECONV)/g' \ - -e 's/@''GNULIB_SETLOCALE''@/$(GNULIB_SETLOCALE)/g' \ - -e 's/@''GNULIB_DUPLOCALE''@/$(GNULIB_DUPLOCALE)/g' \ - -e 's|@''HAVE_DUPLOCALE''@|$(HAVE_DUPLOCALE)|g' \ - -e 's|@''HAVE_XLOCALE_H''@|$(HAVE_XLOCALE_H)|g' \ - -e 's|@''REPLACE_LOCALECONV''@|$(REPLACE_LOCALECONV)|g' \ - -e 's|@''REPLACE_SETLOCALE''@|$(REPLACE_SETLOCALE)|g' \ - -e 's|@''REPLACE_DUPLOCALE''@|$(REPLACE_DUPLOCALE)|g' \ - -e 's|@''REPLACE_STRUCT_LCONV''@|$(REPLACE_STRUCT_LCONV)|g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ - < $(srcdir)/locale.in.h; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += locale.h locale.h-t - -EXTRA_DIST += locale.in.h - -## end gnulib module locale - -## begin gnulib module localeconv - - -EXTRA_DIST += localeconv.c - -EXTRA_libgreputils_a_SOURCES += localeconv.c - -## end gnulib module localeconv - -## begin gnulib module lock - -libgreputils_a_SOURCES += glthread/lock.h glthread/lock.c - -## end gnulib module lock - -## begin gnulib module lseek - - -EXTRA_DIST += lseek.c - -EXTRA_libgreputils_a_SOURCES += lseek.c - -## end gnulib module lseek - -## begin gnulib module lstat - - -EXTRA_DIST += lstat.c - -EXTRA_libgreputils_a_SOURCES += lstat.c - -## end gnulib module lstat - -## begin gnulib module maintainer-makefile - -EXTRA_DIST += $(top_srcdir)/maint.mk - -## end gnulib module maintainer-makefile - -## begin gnulib module malloc-gnu - - -EXTRA_DIST += malloc.c - -EXTRA_libgreputils_a_SOURCES += malloc.c - -## end gnulib module malloc-gnu - -## begin gnulib module malloc-posix - - -EXTRA_DIST += malloc.c - -EXTRA_libgreputils_a_SOURCES += malloc.c - -## end gnulib module malloc-posix - -## begin gnulib module malloca - -libgreputils_a_SOURCES += malloca.c - -EXTRA_DIST += malloca.h malloca.valgrind - -## end gnulib module malloca - -## begin gnulib module mbchar - -libgreputils_a_SOURCES += mbchar.c - -EXTRA_DIST += mbchar.h - -## end gnulib module mbchar - -## begin gnulib module mbiter - -libgreputils_a_SOURCES += mbiter.h mbiter.c - -## end gnulib module mbiter - -## begin gnulib module mbrlen - - -EXTRA_DIST += mbrlen.c - -EXTRA_libgreputils_a_SOURCES += mbrlen.c - -## end gnulib module mbrlen - -## begin gnulib module mbrtowc - - -EXTRA_DIST += mbrtowc.c - -EXTRA_libgreputils_a_SOURCES += mbrtowc.c - -## end gnulib module mbrtowc - -## begin gnulib module mbscasecmp - -libgreputils_a_SOURCES += mbscasecmp.c - -## end gnulib module mbscasecmp - -## begin gnulib module mbsinit - - -EXTRA_DIST += mbsinit.c - -EXTRA_libgreputils_a_SOURCES += mbsinit.c - -## end gnulib module mbsinit - -## begin gnulib module mbslen - -libgreputils_a_SOURCES += mbslen.c - -## end gnulib module mbslen - -## begin gnulib module mbsrtowcs - - -EXTRA_DIST += mbsrtowcs-impl.h mbsrtowcs-state.c mbsrtowcs.c - -EXTRA_libgreputils_a_SOURCES += mbsrtowcs-state.c mbsrtowcs.c - -## end gnulib module mbsrtowcs - -## begin gnulib module mbsstr - -libgreputils_a_SOURCES += mbsstr.c - -EXTRA_DIST += str-kmp.h - -## end gnulib module mbsstr - -## begin gnulib module mbtowc - - -EXTRA_DIST += mbtowc-impl.h mbtowc.c - -EXTRA_libgreputils_a_SOURCES += mbtowc.c - -## end gnulib module mbtowc - -## begin gnulib module mbuiter - -libgreputils_a_SOURCES += mbuiter.h mbuiter.c - -## end gnulib module mbuiter - -## begin gnulib module memchr - - -EXTRA_DIST += memchr.c memchr.valgrind - -EXTRA_libgreputils_a_SOURCES += memchr.c - -## end gnulib module memchr - -## begin gnulib module memchr2 - -libgreputils_a_SOURCES += memchr2.h memchr2.c - -EXTRA_DIST += memchr2.valgrind - -## end gnulib module memchr2 - -## begin gnulib module mempcpy - - -EXTRA_DIST += mempcpy.c - -EXTRA_libgreputils_a_SOURCES += mempcpy.c - -## end gnulib module mempcpy - -## begin gnulib module memrchr - - -EXTRA_DIST += memrchr.c - -EXTRA_libgreputils_a_SOURCES += memrchr.c - -## end gnulib module memrchr - -## begin gnulib module minmax - -libgreputils_a_SOURCES += minmax.h - -## end gnulib module minmax - -## begin gnulib module msvc-inval - - -EXTRA_DIST += msvc-inval.c msvc-inval.h - -EXTRA_libgreputils_a_SOURCES += msvc-inval.c - -## end gnulib module msvc-inval - -## begin gnulib module msvc-nothrow - - -EXTRA_DIST += msvc-nothrow.c msvc-nothrow.h - -EXTRA_libgreputils_a_SOURCES += msvc-nothrow.c - -## end gnulib module msvc-nothrow - -## begin gnulib module nl_langinfo - - -EXTRA_DIST += nl_langinfo.c - -EXTRA_libgreputils_a_SOURCES += nl_langinfo.c - -## end gnulib module nl_langinfo - -## begin gnulib module obstack - - -EXTRA_DIST += obstack.c obstack.h - -EXTRA_libgreputils_a_SOURCES += obstack.c - -## end gnulib module obstack - -## begin gnulib module open - - -EXTRA_DIST += open.c - -EXTRA_libgreputils_a_SOURCES += open.c - -## end gnulib module open - -## begin gnulib module openat - - -EXTRA_DIST += openat.c - -EXTRA_libgreputils_a_SOURCES += openat.c - -## end gnulib module openat - -## begin gnulib module openat-die - -libgreputils_a_SOURCES += openat-die.c - -## end gnulib module openat-die - -## begin gnulib module openat-h - - -EXTRA_DIST += openat.h - -## end gnulib module openat-h - -## begin gnulib module openat-safer - -libgreputils_a_SOURCES += openat-safer.c - -EXTRA_DIST += fcntl--.h fcntl-safer.h - -## end gnulib module openat-safer - -## begin gnulib module opendir - - -EXTRA_DIST += dirent-private.h opendir.c - -EXTRA_libgreputils_a_SOURCES += opendir.c - -## end gnulib module opendir - -## begin gnulib module pathmax - - -EXTRA_DIST += pathmax.h - -## end gnulib module pathmax - -## begin gnulib module progname - -libgreputils_a_SOURCES += progname.h progname.c - -## end gnulib module progname - -## begin gnulib module propername - -libgreputils_a_SOURCES += propername.h propername.c - -## end gnulib module propername - -## begin gnulib module quote - - -EXTRA_DIST += quote.h - -## end gnulib module quote - -## begin gnulib module quotearg - -libgreputils_a_SOURCES += quotearg.c - -EXTRA_DIST += quote.h quotearg.h - -## end gnulib module quotearg - -## begin gnulib module read - - -EXTRA_DIST += read.c - -EXTRA_libgreputils_a_SOURCES += read.c - -## end gnulib module read - -## begin gnulib module readdir - - -EXTRA_DIST += dirent-private.h readdir.c - -EXTRA_libgreputils_a_SOURCES += readdir.c - -## end gnulib module readdir - -## begin gnulib module readme-release - - -EXTRA_DIST += $(top_srcdir)/README-release - -## end gnulib module readme-release - -## begin gnulib module realloc-gnu - - -EXTRA_DIST += realloc.c - -EXTRA_libgreputils_a_SOURCES += realloc.c - -## end gnulib module realloc-gnu - -## begin gnulib module realloc-posix - - -EXTRA_DIST += realloc.c - -EXTRA_libgreputils_a_SOURCES += realloc.c - -## end gnulib module realloc-posix - -## begin gnulib module regex - - -EXTRA_DIST += regcomp.c regex.c regex.h regex_internal.c regex_internal.h regexec.c - -EXTRA_libgreputils_a_SOURCES += regcomp.c regex.c regex_internal.c regexec.c - -## end gnulib module regex - -## begin gnulib module safe-read - -libgreputils_a_SOURCES += safe-read.c - -EXTRA_DIST += safe-read.h - -## end gnulib module safe-read - -## begin gnulib module same-inode - - -EXTRA_DIST += same-inode.h - -## end gnulib module same-inode - -## begin gnulib module save-cwd - -libgreputils_a_SOURCES += save-cwd.c - -EXTRA_DIST += save-cwd.h - -## end gnulib module save-cwd - -## begin gnulib module snippet/_Noreturn - -# Because this Makefile snippet defines a variable used by other -# gnulib Makefile snippets, it must be present in all Makefile.am that -# need it. This is ensured by the applicability 'all' defined above. - -_NORETURN_H=$(top_srcdir)/build-aux/snippet/_Noreturn.h - -EXTRA_DIST += $(top_srcdir)/build-aux/snippet/_Noreturn.h - -## end gnulib module snippet/_Noreturn - -## begin gnulib module snippet/arg-nonnull - -# The BUILT_SOURCES created by this Makefile snippet are not used via #include -# statements but through direct file reference. Therefore this snippet must be -# present in all Makefile.am that need it. This is ensured by the applicability -# 'all' defined above. - -BUILT_SOURCES += arg-nonnull.h -# The arg-nonnull.h that gets inserted into generated .h files is the same as -# build-aux/snippet/arg-nonnull.h, except that it has the copyright header cut -# off. -arg-nonnull.h: $(top_srcdir)/build-aux/snippet/arg-nonnull.h - $(AM_V_GEN)rm -f $@-t $@ && \ - sed -n -e '/GL_ARG_NONNULL/,$$p' \ - < $(top_srcdir)/build-aux/snippet/arg-nonnull.h \ - > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += arg-nonnull.h arg-nonnull.h-t - -ARG_NONNULL_H=arg-nonnull.h - -EXTRA_DIST += $(top_srcdir)/build-aux/snippet/arg-nonnull.h - -## end gnulib module snippet/arg-nonnull - -## begin gnulib module snippet/c++defs - -# The BUILT_SOURCES created by this Makefile snippet are not used via #include -# statements but through direct file reference. Therefore this snippet must be -# present in all Makefile.am that need it. This is ensured by the applicability -# 'all' defined above. - -BUILT_SOURCES += c++defs.h -# The c++defs.h that gets inserted into generated .h files is the same as -# build-aux/snippet/c++defs.h, except that it has the copyright header cut off. -c++defs.h: $(top_srcdir)/build-aux/snippet/c++defs.h - $(AM_V_GEN)rm -f $@-t $@ && \ - sed -n -e '/_GL_CXXDEFS/,$$p' \ - < $(top_srcdir)/build-aux/snippet/c++defs.h \ - > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += c++defs.h c++defs.h-t - -CXXDEFS_H=c++defs.h - -EXTRA_DIST += $(top_srcdir)/build-aux/snippet/c++defs.h - -## end gnulib module snippet/c++defs - -## begin gnulib module snippet/unused-parameter - -# The BUILT_SOURCES created by this Makefile snippet are not used via #include -# statements but through direct file reference. Therefore this snippet must be -# present in all Makefile.am that need it. This is ensured by the applicability -# 'all' defined above. - -BUILT_SOURCES += unused-parameter.h -# The unused-parameter.h that gets inserted into generated .h files is the same -# as build-aux/snippet/unused-parameter.h, except that it has the copyright -# header cut off. -unused-parameter.h: $(top_srcdir)/build-aux/snippet/unused-parameter.h - $(AM_V_GEN)rm -f $@-t $@ && \ - sed -n -e '/GL_UNUSED_PARAMETER/,$$p' \ - < $(top_srcdir)/build-aux/snippet/unused-parameter.h \ - > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += unused-parameter.h unused-parameter.h-t - -UNUSED_PARAMETER_H=unused-parameter.h - -EXTRA_DIST += $(top_srcdir)/build-aux/snippet/unused-parameter.h - -## end gnulib module snippet/unused-parameter - -## begin gnulib module snippet/warn-on-use - -BUILT_SOURCES += warn-on-use.h -# The warn-on-use.h that gets inserted into generated .h files is the same as -# build-aux/snippet/warn-on-use.h, except that it has the copyright header cut -# off. -warn-on-use.h: $(top_srcdir)/build-aux/snippet/warn-on-use.h - $(AM_V_GEN)rm -f $@-t $@ && \ - sed -n -e '/^.ifndef/,$$p' \ - < $(top_srcdir)/build-aux/snippet/warn-on-use.h \ - > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += warn-on-use.h warn-on-use.h-t - -WARN_ON_USE_H=warn-on-use.h - -EXTRA_DIST += $(top_srcdir)/build-aux/snippet/warn-on-use.h - -## end gnulib module snippet/warn-on-use - -## begin gnulib module stat - - -EXTRA_DIST += stat.c - -EXTRA_libgreputils_a_SOURCES += stat.c - -## end gnulib module stat - -## begin gnulib module stdalign - -BUILT_SOURCES += $(STDALIGN_H) - -# We need the following in order to create when the system -# doesn't have one that works. -if GL_GENERATE_STDALIGN_H -stdalign.h: stdalign.in.h $(top_builddir)/config.status - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - cat $(srcdir)/stdalign.in.h; \ - } > $@-t && \ - mv $@-t $@ -else -stdalign.h: $(top_builddir)/config.status - rm -f $@ -endif -MOSTLYCLEANFILES += stdalign.h stdalign.h-t - -EXTRA_DIST += stdalign.in.h - -## end gnulib module stdalign - -## begin gnulib module stdarg - -BUILT_SOURCES += $(STDARG_H) - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -if GL_GENERATE_STDARG_H -stdarg.h: stdarg.in.h $(top_builddir)/config.status - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_STDARG_H''@|$(NEXT_STDARG_H)|g' \ - < $(srcdir)/stdarg.in.h; \ - } > $@-t && \ - mv $@-t $@ -else -stdarg.h: $(top_builddir)/config.status - rm -f $@ -endif -MOSTLYCLEANFILES += stdarg.h stdarg.h-t - -EXTRA_DIST += stdarg.in.h - -## end gnulib module stdarg - -## begin gnulib module stdbool - -BUILT_SOURCES += $(STDBOOL_H) - -# We need the following in order to create when the system -# doesn't have one that works. -if GL_GENERATE_STDBOOL_H -stdbool.h: stdbool.in.h $(top_builddir)/config.status - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's/@''HAVE__BOOL''@/$(HAVE__BOOL)/g' < $(srcdir)/stdbool.in.h; \ - } > $@-t && \ - mv $@-t $@ -else -stdbool.h: $(top_builddir)/config.status - rm -f $@ -endif -MOSTLYCLEANFILES += stdbool.h stdbool.h-t - -EXTRA_DIST += stdbool.in.h - -## end gnulib module stdbool - -## begin gnulib module stddef - -BUILT_SOURCES += $(STDDEF_H) - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -if GL_GENERATE_STDDEF_H -stddef.h: stddef.in.h $(top_builddir)/config.status - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_STDDEF_H''@|$(NEXT_STDDEF_H)|g' \ - -e 's|@''HAVE_MAX_ALIGN_T''@|$(HAVE_MAX_ALIGN_T)|g' \ - -e 's|@''HAVE_WCHAR_T''@|$(HAVE_WCHAR_T)|g' \ - -e 's|@''REPLACE_NULL''@|$(REPLACE_NULL)|g' \ - < $(srcdir)/stddef.in.h; \ - } > $@-t && \ - mv $@-t $@ -else -stddef.h: $(top_builddir)/config.status - rm -f $@ -endif -MOSTLYCLEANFILES += stddef.h stddef.h-t - -EXTRA_DIST += stddef.in.h - -## end gnulib module stddef - -## begin gnulib module stdint - -BUILT_SOURCES += $(STDINT_H) - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -if GL_GENERATE_STDINT_H -stdint.h: stdint.in.h $(top_builddir)/config.status - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's/@''HAVE_STDINT_H''@/$(HAVE_STDINT_H)/g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_STDINT_H''@|$(NEXT_STDINT_H)|g' \ - -e 's/@''HAVE_SYS_TYPES_H''@/$(HAVE_SYS_TYPES_H)/g' \ - -e 's/@''HAVE_INTTYPES_H''@/$(HAVE_INTTYPES_H)/g' \ - -e 's/@''HAVE_SYS_INTTYPES_H''@/$(HAVE_SYS_INTTYPES_H)/g' \ - -e 's/@''HAVE_SYS_BITYPES_H''@/$(HAVE_SYS_BITYPES_H)/g' \ - -e 's/@''HAVE_WCHAR_H''@/$(HAVE_WCHAR_H)/g' \ - -e 's/@''HAVE_LONG_LONG_INT''@/$(HAVE_LONG_LONG_INT)/g' \ - -e 's/@''HAVE_UNSIGNED_LONG_LONG_INT''@/$(HAVE_UNSIGNED_LONG_LONG_INT)/g' \ - -e 's/@''APPLE_UNIVERSAL_BUILD''@/$(APPLE_UNIVERSAL_BUILD)/g' \ - -e 's/@''BITSIZEOF_PTRDIFF_T''@/$(BITSIZEOF_PTRDIFF_T)/g' \ - -e 's/@''PTRDIFF_T_SUFFIX''@/$(PTRDIFF_T_SUFFIX)/g' \ - -e 's/@''BITSIZEOF_SIG_ATOMIC_T''@/$(BITSIZEOF_SIG_ATOMIC_T)/g' \ - -e 's/@''HAVE_SIGNED_SIG_ATOMIC_T''@/$(HAVE_SIGNED_SIG_ATOMIC_T)/g' \ - -e 's/@''SIG_ATOMIC_T_SUFFIX''@/$(SIG_ATOMIC_T_SUFFIX)/g' \ - -e 's/@''BITSIZEOF_SIZE_T''@/$(BITSIZEOF_SIZE_T)/g' \ - -e 's/@''SIZE_T_SUFFIX''@/$(SIZE_T_SUFFIX)/g' \ - -e 's/@''BITSIZEOF_WCHAR_T''@/$(BITSIZEOF_WCHAR_T)/g' \ - -e 's/@''HAVE_SIGNED_WCHAR_T''@/$(HAVE_SIGNED_WCHAR_T)/g' \ - -e 's/@''WCHAR_T_SUFFIX''@/$(WCHAR_T_SUFFIX)/g' \ - -e 's/@''BITSIZEOF_WINT_T''@/$(BITSIZEOF_WINT_T)/g' \ - -e 's/@''HAVE_SIGNED_WINT_T''@/$(HAVE_SIGNED_WINT_T)/g' \ - -e 's/@''WINT_T_SUFFIX''@/$(WINT_T_SUFFIX)/g' \ - < $(srcdir)/stdint.in.h; \ - } > $@-t && \ - mv $@-t $@ -else -stdint.h: $(top_builddir)/config.status - rm -f $@ -endif -MOSTLYCLEANFILES += stdint.h stdint.h-t - -EXTRA_DIST += stdint.in.h - -## end gnulib module stdint - -## begin gnulib module stdio - -BUILT_SOURCES += stdio.h - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -stdio.h: stdio.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_STDIO_H''@|$(NEXT_STDIO_H)|g' \ - -e 's/@''GNULIB_DPRINTF''@/$(GNULIB_DPRINTF)/g' \ - -e 's/@''GNULIB_FCLOSE''@/$(GNULIB_FCLOSE)/g' \ - -e 's/@''GNULIB_FDOPEN''@/$(GNULIB_FDOPEN)/g' \ - -e 's/@''GNULIB_FFLUSH''@/$(GNULIB_FFLUSH)/g' \ - -e 's/@''GNULIB_FGETC''@/$(GNULIB_FGETC)/g' \ - -e 's/@''GNULIB_FGETS''@/$(GNULIB_FGETS)/g' \ - -e 's/@''GNULIB_FOPEN''@/$(GNULIB_FOPEN)/g' \ - -e 's/@''GNULIB_FPRINTF''@/$(GNULIB_FPRINTF)/g' \ - -e 's/@''GNULIB_FPRINTF_POSIX''@/$(GNULIB_FPRINTF_POSIX)/g' \ - -e 's/@''GNULIB_FPURGE''@/$(GNULIB_FPURGE)/g' \ - -e 's/@''GNULIB_FPUTC''@/$(GNULIB_FPUTC)/g' \ - -e 's/@''GNULIB_FPUTS''@/$(GNULIB_FPUTS)/g' \ - -e 's/@''GNULIB_FREAD''@/$(GNULIB_FREAD)/g' \ - -e 's/@''GNULIB_FREOPEN''@/$(GNULIB_FREOPEN)/g' \ - -e 's/@''GNULIB_FSCANF''@/$(GNULIB_FSCANF)/g' \ - -e 's/@''GNULIB_FSEEK''@/$(GNULIB_FSEEK)/g' \ - -e 's/@''GNULIB_FSEEKO''@/$(GNULIB_FSEEKO)/g' \ - -e 's/@''GNULIB_FTELL''@/$(GNULIB_FTELL)/g' \ - -e 's/@''GNULIB_FTELLO''@/$(GNULIB_FTELLO)/g' \ - -e 's/@''GNULIB_FWRITE''@/$(GNULIB_FWRITE)/g' \ - -e 's/@''GNULIB_GETC''@/$(GNULIB_GETC)/g' \ - -e 's/@''GNULIB_GETCHAR''@/$(GNULIB_GETCHAR)/g' \ - -e 's/@''GNULIB_GETDELIM''@/$(GNULIB_GETDELIM)/g' \ - -e 's/@''GNULIB_GETLINE''@/$(GNULIB_GETLINE)/g' \ - -e 's/@''GNULIB_OBSTACK_PRINTF''@/$(GNULIB_OBSTACK_PRINTF)/g' \ - -e 's/@''GNULIB_OBSTACK_PRINTF_POSIX''@/$(GNULIB_OBSTACK_PRINTF_POSIX)/g' \ - -e 's/@''GNULIB_PCLOSE''@/$(GNULIB_PCLOSE)/g' \ - -e 's/@''GNULIB_PERROR''@/$(GNULIB_PERROR)/g' \ - -e 's/@''GNULIB_POPEN''@/$(GNULIB_POPEN)/g' \ - -e 's/@''GNULIB_PRINTF''@/$(GNULIB_PRINTF)/g' \ - -e 's/@''GNULIB_PRINTF_POSIX''@/$(GNULIB_PRINTF_POSIX)/g' \ - -e 's/@''GNULIB_PUTC''@/$(GNULIB_PUTC)/g' \ - -e 's/@''GNULIB_PUTCHAR''@/$(GNULIB_PUTCHAR)/g' \ - -e 's/@''GNULIB_PUTS''@/$(GNULIB_PUTS)/g' \ - -e 's/@''GNULIB_REMOVE''@/$(GNULIB_REMOVE)/g' \ - -e 's/@''GNULIB_RENAME''@/$(GNULIB_RENAME)/g' \ - -e 's/@''GNULIB_RENAMEAT''@/$(GNULIB_RENAMEAT)/g' \ - -e 's/@''GNULIB_SCANF''@/$(GNULIB_SCANF)/g' \ - -e 's/@''GNULIB_SNPRINTF''@/$(GNULIB_SNPRINTF)/g' \ - -e 's/@''GNULIB_SPRINTF_POSIX''@/$(GNULIB_SPRINTF_POSIX)/g' \ - -e 's/@''GNULIB_STDIO_H_NONBLOCKING''@/$(GNULIB_STDIO_H_NONBLOCKING)/g' \ - -e 's/@''GNULIB_STDIO_H_SIGPIPE''@/$(GNULIB_STDIO_H_SIGPIPE)/g' \ - -e 's/@''GNULIB_TMPFILE''@/$(GNULIB_TMPFILE)/g' \ - -e 's/@''GNULIB_VASPRINTF''@/$(GNULIB_VASPRINTF)/g' \ - -e 's/@''GNULIB_VDPRINTF''@/$(GNULIB_VDPRINTF)/g' \ - -e 's/@''GNULIB_VFPRINTF''@/$(GNULIB_VFPRINTF)/g' \ - -e 's/@''GNULIB_VFPRINTF_POSIX''@/$(GNULIB_VFPRINTF_POSIX)/g' \ - -e 's/@''GNULIB_VFSCANF''@/$(GNULIB_VFSCANF)/g' \ - -e 's/@''GNULIB_VSCANF''@/$(GNULIB_VSCANF)/g' \ - -e 's/@''GNULIB_VPRINTF''@/$(GNULIB_VPRINTF)/g' \ - -e 's/@''GNULIB_VPRINTF_POSIX''@/$(GNULIB_VPRINTF_POSIX)/g' \ - -e 's/@''GNULIB_VSNPRINTF''@/$(GNULIB_VSNPRINTF)/g' \ - -e 's/@''GNULIB_VSPRINTF_POSIX''@/$(GNULIB_VSPRINTF_POSIX)/g' \ - < $(srcdir)/stdio.in.h | \ - sed -e 's|@''HAVE_DECL_FPURGE''@|$(HAVE_DECL_FPURGE)|g' \ - -e 's|@''HAVE_DECL_FSEEKO''@|$(HAVE_DECL_FSEEKO)|g' \ - -e 's|@''HAVE_DECL_FTELLO''@|$(HAVE_DECL_FTELLO)|g' \ - -e 's|@''HAVE_DECL_GETDELIM''@|$(HAVE_DECL_GETDELIM)|g' \ - -e 's|@''HAVE_DECL_GETLINE''@|$(HAVE_DECL_GETLINE)|g' \ - -e 's|@''HAVE_DECL_OBSTACK_PRINTF''@|$(HAVE_DECL_OBSTACK_PRINTF)|g' \ - -e 's|@''HAVE_DECL_SNPRINTF''@|$(HAVE_DECL_SNPRINTF)|g' \ - -e 's|@''HAVE_DECL_VSNPRINTF''@|$(HAVE_DECL_VSNPRINTF)|g' \ - -e 's|@''HAVE_DPRINTF''@|$(HAVE_DPRINTF)|g' \ - -e 's|@''HAVE_FSEEKO''@|$(HAVE_FSEEKO)|g' \ - -e 's|@''HAVE_FTELLO''@|$(HAVE_FTELLO)|g' \ - -e 's|@''HAVE_PCLOSE''@|$(HAVE_PCLOSE)|g' \ - -e 's|@''HAVE_POPEN''@|$(HAVE_POPEN)|g' \ - -e 's|@''HAVE_RENAMEAT''@|$(HAVE_RENAMEAT)|g' \ - -e 's|@''HAVE_VASPRINTF''@|$(HAVE_VASPRINTF)|g' \ - -e 's|@''HAVE_VDPRINTF''@|$(HAVE_VDPRINTF)|g' \ - -e 's|@''REPLACE_DPRINTF''@|$(REPLACE_DPRINTF)|g' \ - -e 's|@''REPLACE_FCLOSE''@|$(REPLACE_FCLOSE)|g' \ - -e 's|@''REPLACE_FDOPEN''@|$(REPLACE_FDOPEN)|g' \ - -e 's|@''REPLACE_FFLUSH''@|$(REPLACE_FFLUSH)|g' \ - -e 's|@''REPLACE_FOPEN''@|$(REPLACE_FOPEN)|g' \ - -e 's|@''REPLACE_FPRINTF''@|$(REPLACE_FPRINTF)|g' \ - -e 's|@''REPLACE_FPURGE''@|$(REPLACE_FPURGE)|g' \ - -e 's|@''REPLACE_FREOPEN''@|$(REPLACE_FREOPEN)|g' \ - -e 's|@''REPLACE_FSEEK''@|$(REPLACE_FSEEK)|g' \ - -e 's|@''REPLACE_FSEEKO''@|$(REPLACE_FSEEKO)|g' \ - -e 's|@''REPLACE_FTELL''@|$(REPLACE_FTELL)|g' \ - -e 's|@''REPLACE_FTELLO''@|$(REPLACE_FTELLO)|g' \ - -e 's|@''REPLACE_GETDELIM''@|$(REPLACE_GETDELIM)|g' \ - -e 's|@''REPLACE_GETLINE''@|$(REPLACE_GETLINE)|g' \ - -e 's|@''REPLACE_OBSTACK_PRINTF''@|$(REPLACE_OBSTACK_PRINTF)|g' \ - -e 's|@''REPLACE_PERROR''@|$(REPLACE_PERROR)|g' \ - -e 's|@''REPLACE_POPEN''@|$(REPLACE_POPEN)|g' \ - -e 's|@''REPLACE_PRINTF''@|$(REPLACE_PRINTF)|g' \ - -e 's|@''REPLACE_REMOVE''@|$(REPLACE_REMOVE)|g' \ - -e 's|@''REPLACE_RENAME''@|$(REPLACE_RENAME)|g' \ - -e 's|@''REPLACE_RENAMEAT''@|$(REPLACE_RENAMEAT)|g' \ - -e 's|@''REPLACE_SNPRINTF''@|$(REPLACE_SNPRINTF)|g' \ - -e 's|@''REPLACE_SPRINTF''@|$(REPLACE_SPRINTF)|g' \ - -e 's|@''REPLACE_STDIO_READ_FUNCS''@|$(REPLACE_STDIO_READ_FUNCS)|g' \ - -e 's|@''REPLACE_STDIO_WRITE_FUNCS''@|$(REPLACE_STDIO_WRITE_FUNCS)|g' \ - -e 's|@''REPLACE_TMPFILE''@|$(REPLACE_TMPFILE)|g' \ - -e 's|@''REPLACE_VASPRINTF''@|$(REPLACE_VASPRINTF)|g' \ - -e 's|@''REPLACE_VDPRINTF''@|$(REPLACE_VDPRINTF)|g' \ - -e 's|@''REPLACE_VFPRINTF''@|$(REPLACE_VFPRINTF)|g' \ - -e 's|@''REPLACE_VPRINTF''@|$(REPLACE_VPRINTF)|g' \ - -e 's|@''REPLACE_VSNPRINTF''@|$(REPLACE_VSNPRINTF)|g' \ - -e 's|@''REPLACE_VSPRINTF''@|$(REPLACE_VSPRINTF)|g' \ - -e 's|@''ASM_SYMBOL_PREFIX''@|$(ASM_SYMBOL_PREFIX)|g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)'; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += stdio.h stdio.h-t - -EXTRA_DIST += stdio.in.h - -## end gnulib module stdio - -## begin gnulib module stdlib - -BUILT_SOURCES += stdlib.h - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -stdlib.h: stdlib.in.h $(top_builddir)/config.status $(CXXDEFS_H) \ - $(_NORETURN_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_STDLIB_H''@|$(NEXT_STDLIB_H)|g' \ - -e 's/@''GNULIB__EXIT''@/$(GNULIB__EXIT)/g' \ - -e 's/@''GNULIB_ATOLL''@/$(GNULIB_ATOLL)/g' \ - -e 's/@''GNULIB_CALLOC_POSIX''@/$(GNULIB_CALLOC_POSIX)/g' \ - -e 's/@''GNULIB_CANONICALIZE_FILE_NAME''@/$(GNULIB_CANONICALIZE_FILE_NAME)/g' \ - -e 's/@''GNULIB_GETLOADAVG''@/$(GNULIB_GETLOADAVG)/g' \ - -e 's/@''GNULIB_GETSUBOPT''@/$(GNULIB_GETSUBOPT)/g' \ - -e 's/@''GNULIB_GRANTPT''@/$(GNULIB_GRANTPT)/g' \ - -e 's/@''GNULIB_MALLOC_POSIX''@/$(GNULIB_MALLOC_POSIX)/g' \ - -e 's/@''GNULIB_MBTOWC''@/$(GNULIB_MBTOWC)/g' \ - -e 's/@''GNULIB_MKDTEMP''@/$(GNULIB_MKDTEMP)/g' \ - -e 's/@''GNULIB_MKOSTEMP''@/$(GNULIB_MKOSTEMP)/g' \ - -e 's/@''GNULIB_MKOSTEMPS''@/$(GNULIB_MKOSTEMPS)/g' \ - -e 's/@''GNULIB_MKSTEMP''@/$(GNULIB_MKSTEMP)/g' \ - -e 's/@''GNULIB_MKSTEMPS''@/$(GNULIB_MKSTEMPS)/g' \ - -e 's/@''GNULIB_POSIX_OPENPT''@/$(GNULIB_POSIX_OPENPT)/g' \ - -e 's/@''GNULIB_PTSNAME''@/$(GNULIB_PTSNAME)/g' \ - -e 's/@''GNULIB_PTSNAME_R''@/$(GNULIB_PTSNAME_R)/g' \ - -e 's/@''GNULIB_PUTENV''@/$(GNULIB_PUTENV)/g' \ - -e 's/@''GNULIB_QSORT_R''@/$(GNULIB_QSORT_R)/g' \ - -e 's/@''GNULIB_RANDOM''@/$(GNULIB_RANDOM)/g' \ - -e 's/@''GNULIB_RANDOM_R''@/$(GNULIB_RANDOM_R)/g' \ - -e 's/@''GNULIB_REALLOC_POSIX''@/$(GNULIB_REALLOC_POSIX)/g' \ - -e 's/@''GNULIB_REALPATH''@/$(GNULIB_REALPATH)/g' \ - -e 's/@''GNULIB_RPMATCH''@/$(GNULIB_RPMATCH)/g' \ - -e 's/@''GNULIB_SECURE_GETENV''@/$(GNULIB_SECURE_GETENV)/g' \ - -e 's/@''GNULIB_SETENV''@/$(GNULIB_SETENV)/g' \ - -e 's/@''GNULIB_STRTOD''@/$(GNULIB_STRTOD)/g' \ - -e 's/@''GNULIB_STRTOLL''@/$(GNULIB_STRTOLL)/g' \ - -e 's/@''GNULIB_STRTOULL''@/$(GNULIB_STRTOULL)/g' \ - -e 's/@''GNULIB_SYSTEM_POSIX''@/$(GNULIB_SYSTEM_POSIX)/g' \ - -e 's/@''GNULIB_UNLOCKPT''@/$(GNULIB_UNLOCKPT)/g' \ - -e 's/@''GNULIB_UNSETENV''@/$(GNULIB_UNSETENV)/g' \ - -e 's/@''GNULIB_WCTOMB''@/$(GNULIB_WCTOMB)/g' \ - < $(srcdir)/stdlib.in.h | \ - sed -e 's|@''HAVE__EXIT''@|$(HAVE__EXIT)|g' \ - -e 's|@''HAVE_ATOLL''@|$(HAVE_ATOLL)|g' \ - -e 's|@''HAVE_CANONICALIZE_FILE_NAME''@|$(HAVE_CANONICALIZE_FILE_NAME)|g' \ - -e 's|@''HAVE_DECL_GETLOADAVG''@|$(HAVE_DECL_GETLOADAVG)|g' \ - -e 's|@''HAVE_GETSUBOPT''@|$(HAVE_GETSUBOPT)|g' \ - -e 's|@''HAVE_GRANTPT''@|$(HAVE_GRANTPT)|g' \ - -e 's|@''HAVE_MKDTEMP''@|$(HAVE_MKDTEMP)|g' \ - -e 's|@''HAVE_MKOSTEMP''@|$(HAVE_MKOSTEMP)|g' \ - -e 's|@''HAVE_MKOSTEMPS''@|$(HAVE_MKOSTEMPS)|g' \ - -e 's|@''HAVE_MKSTEMP''@|$(HAVE_MKSTEMP)|g' \ - -e 's|@''HAVE_MKSTEMPS''@|$(HAVE_MKSTEMPS)|g' \ - -e 's|@''HAVE_POSIX_OPENPT''@|$(HAVE_POSIX_OPENPT)|g' \ - -e 's|@''HAVE_PTSNAME''@|$(HAVE_PTSNAME)|g' \ - -e 's|@''HAVE_PTSNAME_R''@|$(HAVE_PTSNAME_R)|g' \ - -e 's|@''HAVE_RANDOM''@|$(HAVE_RANDOM)|g' \ - -e 's|@''HAVE_RANDOM_H''@|$(HAVE_RANDOM_H)|g' \ - -e 's|@''HAVE_RANDOM_R''@|$(HAVE_RANDOM_R)|g' \ - -e 's|@''HAVE_REALPATH''@|$(HAVE_REALPATH)|g' \ - -e 's|@''HAVE_RPMATCH''@|$(HAVE_RPMATCH)|g' \ - -e 's|@''HAVE_SECURE_GETENV''@|$(HAVE_SECURE_GETENV)|g' \ - -e 's|@''HAVE_DECL_SETENV''@|$(HAVE_DECL_SETENV)|g' \ - -e 's|@''HAVE_STRTOD''@|$(HAVE_STRTOD)|g' \ - -e 's|@''HAVE_STRTOLL''@|$(HAVE_STRTOLL)|g' \ - -e 's|@''HAVE_STRTOULL''@|$(HAVE_STRTOULL)|g' \ - -e 's|@''HAVE_STRUCT_RANDOM_DATA''@|$(HAVE_STRUCT_RANDOM_DATA)|g' \ - -e 's|@''HAVE_SYS_LOADAVG_H''@|$(HAVE_SYS_LOADAVG_H)|g' \ - -e 's|@''HAVE_UNLOCKPT''@|$(HAVE_UNLOCKPT)|g' \ - -e 's|@''HAVE_DECL_UNSETENV''@|$(HAVE_DECL_UNSETENV)|g' \ - -e 's|@''REPLACE_CALLOC''@|$(REPLACE_CALLOC)|g' \ - -e 's|@''REPLACE_CANONICALIZE_FILE_NAME''@|$(REPLACE_CANONICALIZE_FILE_NAME)|g' \ - -e 's|@''REPLACE_MALLOC''@|$(REPLACE_MALLOC)|g' \ - -e 's|@''REPLACE_MBTOWC''@|$(REPLACE_MBTOWC)|g' \ - -e 's|@''REPLACE_MKSTEMP''@|$(REPLACE_MKSTEMP)|g' \ - -e 's|@''REPLACE_PTSNAME''@|$(REPLACE_PTSNAME)|g' \ - -e 's|@''REPLACE_PTSNAME_R''@|$(REPLACE_PTSNAME_R)|g' \ - -e 's|@''REPLACE_PUTENV''@|$(REPLACE_PUTENV)|g' \ - -e 's|@''REPLACE_QSORT_R''@|$(REPLACE_QSORT_R)|g' \ - -e 's|@''REPLACE_RANDOM_R''@|$(REPLACE_RANDOM_R)|g' \ - -e 's|@''REPLACE_REALLOC''@|$(REPLACE_REALLOC)|g' \ - -e 's|@''REPLACE_REALPATH''@|$(REPLACE_REALPATH)|g' \ - -e 's|@''REPLACE_SETENV''@|$(REPLACE_SETENV)|g' \ - -e 's|@''REPLACE_STRTOD''@|$(REPLACE_STRTOD)|g' \ - -e 's|@''REPLACE_UNSETENV''@|$(REPLACE_UNSETENV)|g' \ - -e 's|@''REPLACE_WCTOMB''@|$(REPLACE_WCTOMB)|g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _Noreturn/r $(_NORETURN_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)'; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += stdlib.h stdlib.h-t - -EXTRA_DIST += stdlib.in.h - -## end gnulib module stdlib - -## begin gnulib module stpcpy - - -EXTRA_DIST += stpcpy.c - -EXTRA_libgreputils_a_SOURCES += stpcpy.c - -## end gnulib module stpcpy - -## begin gnulib module strdup-posix - - -EXTRA_DIST += strdup.c - -EXTRA_libgreputils_a_SOURCES += strdup.c - -## end gnulib module strdup-posix - -## begin gnulib module streq - - -EXTRA_DIST += streq.h - -## end gnulib module streq - -## begin gnulib module strerror - - -EXTRA_DIST += strerror.c - -EXTRA_libgreputils_a_SOURCES += strerror.c - -## end gnulib module strerror - -## begin gnulib module strerror-override - - -EXTRA_DIST += strerror-override.c strerror-override.h - -EXTRA_libgreputils_a_SOURCES += strerror-override.c - -## end gnulib module strerror-override - -## begin gnulib module striconv - -libgreputils_a_SOURCES += striconv.h striconv.c -if GL_COND_LIBTOOL -endif - -## end gnulib module striconv - -## begin gnulib module string - -BUILT_SOURCES += string.h - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -string.h: string.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_STRING_H''@|$(NEXT_STRING_H)|g' \ - -e 's/@''GNULIB_FFSL''@/$(GNULIB_FFSL)/g' \ - -e 's/@''GNULIB_FFSLL''@/$(GNULIB_FFSLL)/g' \ - -e 's/@''GNULIB_MBSLEN''@/$(GNULIB_MBSLEN)/g' \ - -e 's/@''GNULIB_MBSNLEN''@/$(GNULIB_MBSNLEN)/g' \ - -e 's/@''GNULIB_MBSCHR''@/$(GNULIB_MBSCHR)/g' \ - -e 's/@''GNULIB_MBSRCHR''@/$(GNULIB_MBSRCHR)/g' \ - -e 's/@''GNULIB_MBSSTR''@/$(GNULIB_MBSSTR)/g' \ - -e 's/@''GNULIB_MBSCASECMP''@/$(GNULIB_MBSCASECMP)/g' \ - -e 's/@''GNULIB_MBSNCASECMP''@/$(GNULIB_MBSNCASECMP)/g' \ - -e 's/@''GNULIB_MBSPCASECMP''@/$(GNULIB_MBSPCASECMP)/g' \ - -e 's/@''GNULIB_MBSCASESTR''@/$(GNULIB_MBSCASESTR)/g' \ - -e 's/@''GNULIB_MBSCSPN''@/$(GNULIB_MBSCSPN)/g' \ - -e 's/@''GNULIB_MBSPBRK''@/$(GNULIB_MBSPBRK)/g' \ - -e 's/@''GNULIB_MBSSPN''@/$(GNULIB_MBSSPN)/g' \ - -e 's/@''GNULIB_MBSSEP''@/$(GNULIB_MBSSEP)/g' \ - -e 's/@''GNULIB_MBSTOK_R''@/$(GNULIB_MBSTOK_R)/g' \ - -e 's/@''GNULIB_MEMCHR''@/$(GNULIB_MEMCHR)/g' \ - -e 's/@''GNULIB_MEMMEM''@/$(GNULIB_MEMMEM)/g' \ - -e 's/@''GNULIB_MEMPCPY''@/$(GNULIB_MEMPCPY)/g' \ - -e 's/@''GNULIB_MEMRCHR''@/$(GNULIB_MEMRCHR)/g' \ - -e 's/@''GNULIB_RAWMEMCHR''@/$(GNULIB_RAWMEMCHR)/g' \ - -e 's/@''GNULIB_STPCPY''@/$(GNULIB_STPCPY)/g' \ - -e 's/@''GNULIB_STPNCPY''@/$(GNULIB_STPNCPY)/g' \ - -e 's/@''GNULIB_STRCHRNUL''@/$(GNULIB_STRCHRNUL)/g' \ - -e 's/@''GNULIB_STRDUP''@/$(GNULIB_STRDUP)/g' \ - -e 's/@''GNULIB_STRNCAT''@/$(GNULIB_STRNCAT)/g' \ - -e 's/@''GNULIB_STRNDUP''@/$(GNULIB_STRNDUP)/g' \ - -e 's/@''GNULIB_STRNLEN''@/$(GNULIB_STRNLEN)/g' \ - -e 's/@''GNULIB_STRPBRK''@/$(GNULIB_STRPBRK)/g' \ - -e 's/@''GNULIB_STRSEP''@/$(GNULIB_STRSEP)/g' \ - -e 's/@''GNULIB_STRSTR''@/$(GNULIB_STRSTR)/g' \ - -e 's/@''GNULIB_STRCASESTR''@/$(GNULIB_STRCASESTR)/g' \ - -e 's/@''GNULIB_STRTOK_R''@/$(GNULIB_STRTOK_R)/g' \ - -e 's/@''GNULIB_STRERROR''@/$(GNULIB_STRERROR)/g' \ - -e 's/@''GNULIB_STRERROR_R''@/$(GNULIB_STRERROR_R)/g' \ - -e 's/@''GNULIB_STRSIGNAL''@/$(GNULIB_STRSIGNAL)/g' \ - -e 's/@''GNULIB_STRVERSCMP''@/$(GNULIB_STRVERSCMP)/g' \ - < $(srcdir)/string.in.h | \ - sed -e 's|@''HAVE_FFSL''@|$(HAVE_FFSL)|g' \ - -e 's|@''HAVE_FFSLL''@|$(HAVE_FFSLL)|g' \ - -e 's|@''HAVE_MBSLEN''@|$(HAVE_MBSLEN)|g' \ - -e 's|@''HAVE_MEMCHR''@|$(HAVE_MEMCHR)|g' \ - -e 's|@''HAVE_DECL_MEMMEM''@|$(HAVE_DECL_MEMMEM)|g' \ - -e 's|@''HAVE_MEMPCPY''@|$(HAVE_MEMPCPY)|g' \ - -e 's|@''HAVE_DECL_MEMRCHR''@|$(HAVE_DECL_MEMRCHR)|g' \ - -e 's|@''HAVE_RAWMEMCHR''@|$(HAVE_RAWMEMCHR)|g' \ - -e 's|@''HAVE_STPCPY''@|$(HAVE_STPCPY)|g' \ - -e 's|@''HAVE_STPNCPY''@|$(HAVE_STPNCPY)|g' \ - -e 's|@''HAVE_STRCHRNUL''@|$(HAVE_STRCHRNUL)|g' \ - -e 's|@''HAVE_DECL_STRDUP''@|$(HAVE_DECL_STRDUP)|g' \ - -e 's|@''HAVE_DECL_STRNDUP''@|$(HAVE_DECL_STRNDUP)|g' \ - -e 's|@''HAVE_DECL_STRNLEN''@|$(HAVE_DECL_STRNLEN)|g' \ - -e 's|@''HAVE_STRPBRK''@|$(HAVE_STRPBRK)|g' \ - -e 's|@''HAVE_STRSEP''@|$(HAVE_STRSEP)|g' \ - -e 's|@''HAVE_STRCASESTR''@|$(HAVE_STRCASESTR)|g' \ - -e 's|@''HAVE_DECL_STRTOK_R''@|$(HAVE_DECL_STRTOK_R)|g' \ - -e 's|@''HAVE_DECL_STRERROR_R''@|$(HAVE_DECL_STRERROR_R)|g' \ - -e 's|@''HAVE_DECL_STRSIGNAL''@|$(HAVE_DECL_STRSIGNAL)|g' \ - -e 's|@''HAVE_STRVERSCMP''@|$(HAVE_STRVERSCMP)|g' \ - -e 's|@''REPLACE_STPNCPY''@|$(REPLACE_STPNCPY)|g' \ - -e 's|@''REPLACE_MEMCHR''@|$(REPLACE_MEMCHR)|g' \ - -e 's|@''REPLACE_MEMMEM''@|$(REPLACE_MEMMEM)|g' \ - -e 's|@''REPLACE_STRCASESTR''@|$(REPLACE_STRCASESTR)|g' \ - -e 's|@''REPLACE_STRCHRNUL''@|$(REPLACE_STRCHRNUL)|g' \ - -e 's|@''REPLACE_STRDUP''@|$(REPLACE_STRDUP)|g' \ - -e 's|@''REPLACE_STRSTR''@|$(REPLACE_STRSTR)|g' \ - -e 's|@''REPLACE_STRERROR''@|$(REPLACE_STRERROR)|g' \ - -e 's|@''REPLACE_STRERROR_R''@|$(REPLACE_STRERROR_R)|g' \ - -e 's|@''REPLACE_STRNCAT''@|$(REPLACE_STRNCAT)|g' \ - -e 's|@''REPLACE_STRNDUP''@|$(REPLACE_STRNDUP)|g' \ - -e 's|@''REPLACE_STRNLEN''@|$(REPLACE_STRNLEN)|g' \ - -e 's|@''REPLACE_STRSIGNAL''@|$(REPLACE_STRSIGNAL)|g' \ - -e 's|@''REPLACE_STRTOK_R''@|$(REPLACE_STRTOK_R)|g' \ - -e 's|@''UNDEFINE_STRTOK_R''@|$(UNDEFINE_STRTOK_R)|g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)'; \ - < $(srcdir)/string.in.h; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += string.h string.h-t - -EXTRA_DIST += string.in.h - -## end gnulib module string - -## begin gnulib module strnlen - - -EXTRA_DIST += strnlen.c - -EXTRA_libgreputils_a_SOURCES += strnlen.c - -## end gnulib module strnlen - -## begin gnulib module strnlen1 - -libgreputils_a_SOURCES += strnlen1.h strnlen1.c - -## end gnulib module strnlen1 - -## begin gnulib module strstr-simple - - -EXTRA_DIST += str-two-way.h strstr.c - -EXTRA_libgreputils_a_SOURCES += strstr.c - -## end gnulib module strstr-simple - -## begin gnulib module strtoimax - - -EXTRA_DIST += strtoimax.c - -EXTRA_libgreputils_a_SOURCES += strtoimax.c - -## end gnulib module strtoimax - -## begin gnulib module strtoll - - -EXTRA_DIST += strtol.c strtoll.c - -EXTRA_libgreputils_a_SOURCES += strtol.c strtoll.c - -## end gnulib module strtoll - -## begin gnulib module strtoull - - -EXTRA_DIST += strtol.c strtoul.c strtoull.c - -EXTRA_libgreputils_a_SOURCES += strtol.c strtoul.c strtoull.c - -## end gnulib module strtoull - -## begin gnulib module strtoumax - - -EXTRA_DIST += strtoimax.c strtoumax.c - -EXTRA_libgreputils_a_SOURCES += strtoimax.c strtoumax.c - -## end gnulib module strtoumax - -## begin gnulib module sys_stat - -BUILT_SOURCES += sys/stat.h - -# We need the following in order to create when the system -# has one that is incomplete. -sys/stat.h: sys_stat.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) - $(AM_V_at)$(MKDIR_P) sys - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_SYS_STAT_H''@|$(NEXT_SYS_STAT_H)|g' \ - -e 's|@''WINDOWS_64_BIT_ST_SIZE''@|$(WINDOWS_64_BIT_ST_SIZE)|g' \ - -e 's/@''GNULIB_FCHMODAT''@/$(GNULIB_FCHMODAT)/g' \ - -e 's/@''GNULIB_FSTAT''@/$(GNULIB_FSTAT)/g' \ - -e 's/@''GNULIB_FSTATAT''@/$(GNULIB_FSTATAT)/g' \ - -e 's/@''GNULIB_FUTIMENS''@/$(GNULIB_FUTIMENS)/g' \ - -e 's/@''GNULIB_LCHMOD''@/$(GNULIB_LCHMOD)/g' \ - -e 's/@''GNULIB_LSTAT''@/$(GNULIB_LSTAT)/g' \ - -e 's/@''GNULIB_MKDIRAT''@/$(GNULIB_MKDIRAT)/g' \ - -e 's/@''GNULIB_MKFIFO''@/$(GNULIB_MKFIFO)/g' \ - -e 's/@''GNULIB_MKFIFOAT''@/$(GNULIB_MKFIFOAT)/g' \ - -e 's/@''GNULIB_MKNOD''@/$(GNULIB_MKNOD)/g' \ - -e 's/@''GNULIB_MKNODAT''@/$(GNULIB_MKNODAT)/g' \ - -e 's/@''GNULIB_STAT''@/$(GNULIB_STAT)/g' \ - -e 's/@''GNULIB_UTIMENSAT''@/$(GNULIB_UTIMENSAT)/g' \ - -e 's|@''HAVE_FCHMODAT''@|$(HAVE_FCHMODAT)|g' \ - -e 's|@''HAVE_FSTATAT''@|$(HAVE_FSTATAT)|g' \ - -e 's|@''HAVE_FUTIMENS''@|$(HAVE_FUTIMENS)|g' \ - -e 's|@''HAVE_LCHMOD''@|$(HAVE_LCHMOD)|g' \ - -e 's|@''HAVE_LSTAT''@|$(HAVE_LSTAT)|g' \ - -e 's|@''HAVE_MKDIRAT''@|$(HAVE_MKDIRAT)|g' \ - -e 's|@''HAVE_MKFIFO''@|$(HAVE_MKFIFO)|g' \ - -e 's|@''HAVE_MKFIFOAT''@|$(HAVE_MKFIFOAT)|g' \ - -e 's|@''HAVE_MKNOD''@|$(HAVE_MKNOD)|g' \ - -e 's|@''HAVE_MKNODAT''@|$(HAVE_MKNODAT)|g' \ - -e 's|@''HAVE_UTIMENSAT''@|$(HAVE_UTIMENSAT)|g' \ - -e 's|@''REPLACE_FSTAT''@|$(REPLACE_FSTAT)|g' \ - -e 's|@''REPLACE_FSTATAT''@|$(REPLACE_FSTATAT)|g' \ - -e 's|@''REPLACE_FUTIMENS''@|$(REPLACE_FUTIMENS)|g' \ - -e 's|@''REPLACE_LSTAT''@|$(REPLACE_LSTAT)|g' \ - -e 's|@''REPLACE_MKDIR''@|$(REPLACE_MKDIR)|g' \ - -e 's|@''REPLACE_MKFIFO''@|$(REPLACE_MKFIFO)|g' \ - -e 's|@''REPLACE_MKNOD''@|$(REPLACE_MKNOD)|g' \ - -e 's|@''REPLACE_STAT''@|$(REPLACE_STAT)|g' \ - -e 's|@''REPLACE_UTIMENSAT''@|$(REPLACE_UTIMENSAT)|g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ - < $(srcdir)/sys_stat.in.h; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += sys/stat.h sys/stat.h-t -MOSTLYCLEANDIRS += sys - -EXTRA_DIST += sys_stat.in.h - -## end gnulib module sys_stat - -## begin gnulib module sys_time - -BUILT_SOURCES += sys/time.h - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -sys/time.h: sys_time.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) - $(AM_V_at)$(MKDIR_P) sys - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's/@''HAVE_SYS_TIME_H''@/$(HAVE_SYS_TIME_H)/g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_SYS_TIME_H''@|$(NEXT_SYS_TIME_H)|g' \ - -e 's/@''GNULIB_GETTIMEOFDAY''@/$(GNULIB_GETTIMEOFDAY)/g' \ - -e 's|@''HAVE_WINSOCK2_H''@|$(HAVE_WINSOCK2_H)|g' \ - -e 's/@''HAVE_GETTIMEOFDAY''@/$(HAVE_GETTIMEOFDAY)/g' \ - -e 's/@''HAVE_STRUCT_TIMEVAL''@/$(HAVE_STRUCT_TIMEVAL)/g' \ - -e 's/@''REPLACE_GETTIMEOFDAY''@/$(REPLACE_GETTIMEOFDAY)/g' \ - -e 's/@''REPLACE_STRUCT_TIMEVAL''@/$(REPLACE_STRUCT_TIMEVAL)/g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ - < $(srcdir)/sys_time.in.h; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += sys/time.h sys/time.h-t - -EXTRA_DIST += sys_time.in.h - -## end gnulib module sys_time - -## begin gnulib module sys_types - -BUILT_SOURCES += sys/types.h - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -sys/types.h: sys_types.in.h $(top_builddir)/config.status - $(AM_V_at)$(MKDIR_P) sys - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_SYS_TYPES_H''@|$(NEXT_SYS_TYPES_H)|g' \ - -e 's|@''WINDOWS_64_BIT_OFF_T''@|$(WINDOWS_64_BIT_OFF_T)|g' \ - < $(srcdir)/sys_types.in.h; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += sys/types.h sys/types.h-t - -EXTRA_DIST += sys_types.in.h - -## end gnulib module sys_types - -## begin gnulib module threadlib - -libgreputils_a_SOURCES += glthread/threadlib.c - -EXTRA_DIST += $(top_srcdir)/build-aux/config.rpath - -## end gnulib module threadlib - -## begin gnulib module time - -BUILT_SOURCES += time.h - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -time.h: time.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_TIME_H''@|$(NEXT_TIME_H)|g' \ - -e 's/@''GNULIB_GETTIMEOFDAY''@/$(GNULIB_GETTIMEOFDAY)/g' \ - -e 's/@''GNULIB_MKTIME''@/$(GNULIB_MKTIME)/g' \ - -e 's/@''GNULIB_NANOSLEEP''@/$(GNULIB_NANOSLEEP)/g' \ - -e 's/@''GNULIB_STRPTIME''@/$(GNULIB_STRPTIME)/g' \ - -e 's/@''GNULIB_TIMEGM''@/$(GNULIB_TIMEGM)/g' \ - -e 's/@''GNULIB_TIME_R''@/$(GNULIB_TIME_R)/g' \ - -e 's/@''GNULIB_TIME_RZ''@/$(GNULIB_TIME_RZ)/g' \ - -e 's|@''HAVE_DECL_LOCALTIME_R''@|$(HAVE_DECL_LOCALTIME_R)|g' \ - -e 's|@''HAVE_NANOSLEEP''@|$(HAVE_NANOSLEEP)|g' \ - -e 's|@''HAVE_STRPTIME''@|$(HAVE_STRPTIME)|g' \ - -e 's|@''HAVE_TIMEGM''@|$(HAVE_TIMEGM)|g' \ - -e 's|@''HAVE_TIMEZONE_T''@|$(HAVE_TIMEZONE_T)|g' \ - -e 's|@''REPLACE_GMTIME''@|$(REPLACE_GMTIME)|g' \ - -e 's|@''REPLACE_LOCALTIME''@|$(REPLACE_LOCALTIME)|g' \ - -e 's|@''REPLACE_LOCALTIME_R''@|$(REPLACE_LOCALTIME_R)|g' \ - -e 's|@''REPLACE_MKTIME''@|$(REPLACE_MKTIME)|g' \ - -e 's|@''REPLACE_NANOSLEEP''@|$(REPLACE_NANOSLEEP)|g' \ - -e 's|@''REPLACE_TIMEGM''@|$(REPLACE_TIMEGM)|g' \ - -e 's|@''PTHREAD_H_DEFINES_STRUCT_TIMESPEC''@|$(PTHREAD_H_DEFINES_STRUCT_TIMESPEC)|g' \ - -e 's|@''SYS_TIME_H_DEFINES_STRUCT_TIMESPEC''@|$(SYS_TIME_H_DEFINES_STRUCT_TIMESPEC)|g' \ - -e 's|@''TIME_H_DEFINES_STRUCT_TIMESPEC''@|$(TIME_H_DEFINES_STRUCT_TIMESPEC)|g' \ - -e 's|@''UNISTD_H_DEFINES_STRUCT_TIMESPEC''@|$(UNISTD_H_DEFINES_STRUCT_TIMESPEC)|g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ - < $(srcdir)/time.in.h; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += time.h time.h-t - -EXTRA_DIST += time.in.h - -## end gnulib module time - -## begin gnulib module trim - -libgreputils_a_SOURCES += trim.c - -EXTRA_DIST += trim.h - -## end gnulib module trim - -## begin gnulib module unistd - -BUILT_SOURCES += unistd.h -libgreputils_a_SOURCES += unistd.c - -# We need the following in order to create an empty placeholder for -# when the system doesn't have one. -unistd.h: unistd.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''HAVE_UNISTD_H''@|$(HAVE_UNISTD_H)|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_UNISTD_H''@|$(NEXT_UNISTD_H)|g' \ - -e 's|@''WINDOWS_64_BIT_OFF_T''@|$(WINDOWS_64_BIT_OFF_T)|g' \ - -e 's/@''GNULIB_CHDIR''@/$(GNULIB_CHDIR)/g' \ - -e 's/@''GNULIB_CHOWN''@/$(GNULIB_CHOWN)/g' \ - -e 's/@''GNULIB_CLOSE''@/$(GNULIB_CLOSE)/g' \ - -e 's/@''GNULIB_DUP''@/$(GNULIB_DUP)/g' \ - -e 's/@''GNULIB_DUP2''@/$(GNULIB_DUP2)/g' \ - -e 's/@''GNULIB_DUP3''@/$(GNULIB_DUP3)/g' \ - -e 's/@''GNULIB_ENVIRON''@/$(GNULIB_ENVIRON)/g' \ - -e 's/@''GNULIB_EUIDACCESS''@/$(GNULIB_EUIDACCESS)/g' \ - -e 's/@''GNULIB_FACCESSAT''@/$(GNULIB_FACCESSAT)/g' \ - -e 's/@''GNULIB_FCHDIR''@/$(GNULIB_FCHDIR)/g' \ - -e 's/@''GNULIB_FCHOWNAT''@/$(GNULIB_FCHOWNAT)/g' \ - -e 's/@''GNULIB_FDATASYNC''@/$(GNULIB_FDATASYNC)/g' \ - -e 's/@''GNULIB_FSYNC''@/$(GNULIB_FSYNC)/g' \ - -e 's/@''GNULIB_FTRUNCATE''@/$(GNULIB_FTRUNCATE)/g' \ - -e 's/@''GNULIB_GETCWD''@/$(GNULIB_GETCWD)/g' \ - -e 's/@''GNULIB_GETDOMAINNAME''@/$(GNULIB_GETDOMAINNAME)/g' \ - -e 's/@''GNULIB_GETDTABLESIZE''@/$(GNULIB_GETDTABLESIZE)/g' \ - -e 's/@''GNULIB_GETGROUPS''@/$(GNULIB_GETGROUPS)/g' \ - -e 's/@''GNULIB_GETHOSTNAME''@/$(GNULIB_GETHOSTNAME)/g' \ - -e 's/@''GNULIB_GETLOGIN''@/$(GNULIB_GETLOGIN)/g' \ - -e 's/@''GNULIB_GETLOGIN_R''@/$(GNULIB_GETLOGIN_R)/g' \ - -e 's/@''GNULIB_GETPAGESIZE''@/$(GNULIB_GETPAGESIZE)/g' \ - -e 's/@''GNULIB_GETUSERSHELL''@/$(GNULIB_GETUSERSHELL)/g' \ - -e 's/@''GNULIB_GROUP_MEMBER''@/$(GNULIB_GROUP_MEMBER)/g' \ - -e 's/@''GNULIB_ISATTY''@/$(GNULIB_ISATTY)/g' \ - -e 's/@''GNULIB_LCHOWN''@/$(GNULIB_LCHOWN)/g' \ - -e 's/@''GNULIB_LINK''@/$(GNULIB_LINK)/g' \ - -e 's/@''GNULIB_LINKAT''@/$(GNULIB_LINKAT)/g' \ - -e 's/@''GNULIB_LSEEK''@/$(GNULIB_LSEEK)/g' \ - -e 's/@''GNULIB_PIPE''@/$(GNULIB_PIPE)/g' \ - -e 's/@''GNULIB_PIPE2''@/$(GNULIB_PIPE2)/g' \ - -e 's/@''GNULIB_PREAD''@/$(GNULIB_PREAD)/g' \ - -e 's/@''GNULIB_PWRITE''@/$(GNULIB_PWRITE)/g' \ - -e 's/@''GNULIB_READ''@/$(GNULIB_READ)/g' \ - -e 's/@''GNULIB_READLINK''@/$(GNULIB_READLINK)/g' \ - -e 's/@''GNULIB_READLINKAT''@/$(GNULIB_READLINKAT)/g' \ - -e 's/@''GNULIB_RMDIR''@/$(GNULIB_RMDIR)/g' \ - -e 's/@''GNULIB_SETHOSTNAME''@/$(GNULIB_SETHOSTNAME)/g' \ - -e 's/@''GNULIB_SLEEP''@/$(GNULIB_SLEEP)/g' \ - -e 's/@''GNULIB_SYMLINK''@/$(GNULIB_SYMLINK)/g' \ - -e 's/@''GNULIB_SYMLINKAT''@/$(GNULIB_SYMLINKAT)/g' \ - -e 's/@''GNULIB_TTYNAME_R''@/$(GNULIB_TTYNAME_R)/g' \ - -e 's/@''GNULIB_UNISTD_H_GETOPT''@/0$(GNULIB_GL_UNISTD_H_GETOPT)/g' \ - -e 's/@''GNULIB_UNISTD_H_NONBLOCKING''@/$(GNULIB_UNISTD_H_NONBLOCKING)/g' \ - -e 's/@''GNULIB_UNISTD_H_SIGPIPE''@/$(GNULIB_UNISTD_H_SIGPIPE)/g' \ - -e 's/@''GNULIB_UNLINK''@/$(GNULIB_UNLINK)/g' \ - -e 's/@''GNULIB_UNLINKAT''@/$(GNULIB_UNLINKAT)/g' \ - -e 's/@''GNULIB_USLEEP''@/$(GNULIB_USLEEP)/g' \ - -e 's/@''GNULIB_WRITE''@/$(GNULIB_WRITE)/g' \ - < $(srcdir)/unistd.in.h | \ - sed -e 's|@''HAVE_CHOWN''@|$(HAVE_CHOWN)|g' \ - -e 's|@''HAVE_DUP2''@|$(HAVE_DUP2)|g' \ - -e 's|@''HAVE_DUP3''@|$(HAVE_DUP3)|g' \ - -e 's|@''HAVE_EUIDACCESS''@|$(HAVE_EUIDACCESS)|g' \ - -e 's|@''HAVE_FACCESSAT''@|$(HAVE_FACCESSAT)|g' \ - -e 's|@''HAVE_FCHDIR''@|$(HAVE_FCHDIR)|g' \ - -e 's|@''HAVE_FCHOWNAT''@|$(HAVE_FCHOWNAT)|g' \ - -e 's|@''HAVE_FDATASYNC''@|$(HAVE_FDATASYNC)|g' \ - -e 's|@''HAVE_FSYNC''@|$(HAVE_FSYNC)|g' \ - -e 's|@''HAVE_FTRUNCATE''@|$(HAVE_FTRUNCATE)|g' \ - -e 's|@''HAVE_GETDTABLESIZE''@|$(HAVE_GETDTABLESIZE)|g' \ - -e 's|@''HAVE_GETGROUPS''@|$(HAVE_GETGROUPS)|g' \ - -e 's|@''HAVE_GETHOSTNAME''@|$(HAVE_GETHOSTNAME)|g' \ - -e 's|@''HAVE_GETLOGIN''@|$(HAVE_GETLOGIN)|g' \ - -e 's|@''HAVE_GETPAGESIZE''@|$(HAVE_GETPAGESIZE)|g' \ - -e 's|@''HAVE_GROUP_MEMBER''@|$(HAVE_GROUP_MEMBER)|g' \ - -e 's|@''HAVE_LCHOWN''@|$(HAVE_LCHOWN)|g' \ - -e 's|@''HAVE_LINK''@|$(HAVE_LINK)|g' \ - -e 's|@''HAVE_LINKAT''@|$(HAVE_LINKAT)|g' \ - -e 's|@''HAVE_PIPE''@|$(HAVE_PIPE)|g' \ - -e 's|@''HAVE_PIPE2''@|$(HAVE_PIPE2)|g' \ - -e 's|@''HAVE_PREAD''@|$(HAVE_PREAD)|g' \ - -e 's|@''HAVE_PWRITE''@|$(HAVE_PWRITE)|g' \ - -e 's|@''HAVE_READLINK''@|$(HAVE_READLINK)|g' \ - -e 's|@''HAVE_READLINKAT''@|$(HAVE_READLINKAT)|g' \ - -e 's|@''HAVE_SETHOSTNAME''@|$(HAVE_SETHOSTNAME)|g' \ - -e 's|@''HAVE_SLEEP''@|$(HAVE_SLEEP)|g' \ - -e 's|@''HAVE_SYMLINK''@|$(HAVE_SYMLINK)|g' \ - -e 's|@''HAVE_SYMLINKAT''@|$(HAVE_SYMLINKAT)|g' \ - -e 's|@''HAVE_UNLINKAT''@|$(HAVE_UNLINKAT)|g' \ - -e 's|@''HAVE_USLEEP''@|$(HAVE_USLEEP)|g' \ - -e 's|@''HAVE_DECL_ENVIRON''@|$(HAVE_DECL_ENVIRON)|g' \ - -e 's|@''HAVE_DECL_FCHDIR''@|$(HAVE_DECL_FCHDIR)|g' \ - -e 's|@''HAVE_DECL_FDATASYNC''@|$(HAVE_DECL_FDATASYNC)|g' \ - -e 's|@''HAVE_DECL_GETDOMAINNAME''@|$(HAVE_DECL_GETDOMAINNAME)|g' \ - -e 's|@''HAVE_DECL_GETLOGIN_R''@|$(HAVE_DECL_GETLOGIN_R)|g' \ - -e 's|@''HAVE_DECL_GETPAGESIZE''@|$(HAVE_DECL_GETPAGESIZE)|g' \ - -e 's|@''HAVE_DECL_GETUSERSHELL''@|$(HAVE_DECL_GETUSERSHELL)|g' \ - -e 's|@''HAVE_DECL_SETHOSTNAME''@|$(HAVE_DECL_SETHOSTNAME)|g' \ - -e 's|@''HAVE_DECL_TTYNAME_R''@|$(HAVE_DECL_TTYNAME_R)|g' \ - -e 's|@''HAVE_OS_H''@|$(HAVE_OS_H)|g' \ - -e 's|@''HAVE_SYS_PARAM_H''@|$(HAVE_SYS_PARAM_H)|g' \ - | \ - sed -e 's|@''REPLACE_CHOWN''@|$(REPLACE_CHOWN)|g' \ - -e 's|@''REPLACE_CLOSE''@|$(REPLACE_CLOSE)|g' \ - -e 's|@''REPLACE_DUP''@|$(REPLACE_DUP)|g' \ - -e 's|@''REPLACE_DUP2''@|$(REPLACE_DUP2)|g' \ - -e 's|@''REPLACE_FCHOWNAT''@|$(REPLACE_FCHOWNAT)|g' \ - -e 's|@''REPLACE_FTRUNCATE''@|$(REPLACE_FTRUNCATE)|g' \ - -e 's|@''REPLACE_GETCWD''@|$(REPLACE_GETCWD)|g' \ - -e 's|@''REPLACE_GETDOMAINNAME''@|$(REPLACE_GETDOMAINNAME)|g' \ - -e 's|@''REPLACE_GETDTABLESIZE''@|$(REPLACE_GETDTABLESIZE)|g' \ - -e 's|@''REPLACE_GETLOGIN_R''@|$(REPLACE_GETLOGIN_R)|g' \ - -e 's|@''REPLACE_GETGROUPS''@|$(REPLACE_GETGROUPS)|g' \ - -e 's|@''REPLACE_GETPAGESIZE''@|$(REPLACE_GETPAGESIZE)|g' \ - -e 's|@''REPLACE_ISATTY''@|$(REPLACE_ISATTY)|g' \ - -e 's|@''REPLACE_LCHOWN''@|$(REPLACE_LCHOWN)|g' \ - -e 's|@''REPLACE_LINK''@|$(REPLACE_LINK)|g' \ - -e 's|@''REPLACE_LINKAT''@|$(REPLACE_LINKAT)|g' \ - -e 's|@''REPLACE_LSEEK''@|$(REPLACE_LSEEK)|g' \ - -e 's|@''REPLACE_PREAD''@|$(REPLACE_PREAD)|g' \ - -e 's|@''REPLACE_PWRITE''@|$(REPLACE_PWRITE)|g' \ - -e 's|@''REPLACE_READ''@|$(REPLACE_READ)|g' \ - -e 's|@''REPLACE_READLINK''@|$(REPLACE_READLINK)|g' \ - -e 's|@''REPLACE_READLINKAT''@|$(REPLACE_READLINKAT)|g' \ - -e 's|@''REPLACE_RMDIR''@|$(REPLACE_RMDIR)|g' \ - -e 's|@''REPLACE_SLEEP''@|$(REPLACE_SLEEP)|g' \ - -e 's|@''REPLACE_SYMLINK''@|$(REPLACE_SYMLINK)|g' \ - -e 's|@''REPLACE_SYMLINKAT''@|$(REPLACE_SYMLINKAT)|g' \ - -e 's|@''REPLACE_TTYNAME_R''@|$(REPLACE_TTYNAME_R)|g' \ - -e 's|@''REPLACE_UNLINK''@|$(REPLACE_UNLINK)|g' \ - -e 's|@''REPLACE_UNLINKAT''@|$(REPLACE_UNLINKAT)|g' \ - -e 's|@''REPLACE_USLEEP''@|$(REPLACE_USLEEP)|g' \ - -e 's|@''REPLACE_WRITE''@|$(REPLACE_WRITE)|g' \ - -e 's|@''UNISTD_H_HAVE_WINSOCK2_H''@|$(UNISTD_H_HAVE_WINSOCK2_H)|g' \ - -e 's|@''UNISTD_H_HAVE_WINSOCK2_H_AND_USE_SOCKETS''@|$(UNISTD_H_HAVE_WINSOCK2_H_AND_USE_SOCKETS)|g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)'; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += unistd.h unistd.h-t - -EXTRA_DIST += unistd.in.h - -## end gnulib module unistd - -## begin gnulib module unistd-safer - -libgreputils_a_SOURCES += dup-safer.c fd-safer.c pipe-safer.c - -EXTRA_DIST += unistd--.h unistd-safer.h - -## end gnulib module unistd-safer - -## begin gnulib module unistr/base - -BUILT_SOURCES += $(LIBUNISTRING_UNISTR_H) - -unistr.h: unistr.in.h - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - cat $(srcdir)/unistr.in.h; \ - } > $@-t && \ - mv -f $@-t $@ -MOSTLYCLEANFILES += unistr.h unistr.h-t - -EXTRA_DIST += unistr.in.h - -## end gnulib module unistr/base - -## begin gnulib module unistr/u8-mbtoucr - -if LIBUNISTRING_COMPILE_UNISTR_U8_MBTOUCR -libgreputils_a_SOURCES += unistr/u8-mbtoucr.c -endif - -## end gnulib module unistr/u8-mbtoucr - -## begin gnulib module unistr/u8-uctomb - -if LIBUNISTRING_COMPILE_UNISTR_U8_UCTOMB -libgreputils_a_SOURCES += unistr/u8-uctomb.c unistr/u8-uctomb-aux.c -endif - -## end gnulib module unistr/u8-uctomb - -## begin gnulib module unitypes - -BUILT_SOURCES += $(LIBUNISTRING_UNITYPES_H) - -unitypes.h: unitypes.in.h - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - cat $(srcdir)/unitypes.in.h; \ - } > $@-t && \ - mv -f $@-t $@ -MOSTLYCLEANFILES += unitypes.h unitypes.h-t - -EXTRA_DIST += unitypes.in.h - -## end gnulib module unitypes - -## begin gnulib module uniwidth/base - -BUILT_SOURCES += $(LIBUNISTRING_UNIWIDTH_H) - -uniwidth.h: uniwidth.in.h - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - cat $(srcdir)/uniwidth.in.h; \ - } > $@-t && \ - mv -f $@-t $@ -MOSTLYCLEANFILES += uniwidth.h uniwidth.h-t - -EXTRA_DIST += localcharset.h uniwidth.in.h - -## end gnulib module uniwidth/base - -## begin gnulib module uniwidth/width - -if LIBUNISTRING_COMPILE_UNIWIDTH_WIDTH -libgreputils_a_SOURCES += uniwidth/width.c -endif - -EXTRA_DIST += uniwidth/cjk.h - -## end gnulib module uniwidth/width - -## begin gnulib module unlocked-io - - -EXTRA_DIST += unlocked-io.h - -## end gnulib module unlocked-io - -## begin gnulib module update-copyright - - -EXTRA_DIST += $(top_srcdir)/build-aux/update-copyright - -## end gnulib module update-copyright - -## begin gnulib module useless-if-before-free - - -EXTRA_DIST += $(top_srcdir)/build-aux/useless-if-before-free - -## end gnulib module useless-if-before-free - -## begin gnulib module vc-list-files - - -EXTRA_DIST += $(top_srcdir)/build-aux/vc-list-files - -## end gnulib module vc-list-files - -## begin gnulib module verify - - -EXTRA_DIST += verify.h - -## end gnulib module verify - -## begin gnulib module version-etc - -libgreputils_a_SOURCES += version-etc.h version-etc.c - -## end gnulib module version-etc - -## begin gnulib module version-etc-fsf - -libgreputils_a_SOURCES += version-etc-fsf.c - -## end gnulib module version-etc-fsf - -## begin gnulib module wchar - -BUILT_SOURCES += wchar.h - -# We need the following in order to create when the system -# version does not work standalone. -wchar.h: wchar.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''HAVE_FEATURES_H''@|$(HAVE_FEATURES_H)|g' \ - -e 's|@''NEXT_WCHAR_H''@|$(NEXT_WCHAR_H)|g' \ - -e 's|@''HAVE_WCHAR_H''@|$(HAVE_WCHAR_H)|g' \ - -e 's/@''GNULIB_BTOWC''@/$(GNULIB_BTOWC)/g' \ - -e 's/@''GNULIB_WCTOB''@/$(GNULIB_WCTOB)/g' \ - -e 's/@''GNULIB_MBSINIT''@/$(GNULIB_MBSINIT)/g' \ - -e 's/@''GNULIB_MBRTOWC''@/$(GNULIB_MBRTOWC)/g' \ - -e 's/@''GNULIB_MBRLEN''@/$(GNULIB_MBRLEN)/g' \ - -e 's/@''GNULIB_MBSRTOWCS''@/$(GNULIB_MBSRTOWCS)/g' \ - -e 's/@''GNULIB_MBSNRTOWCS''@/$(GNULIB_MBSNRTOWCS)/g' \ - -e 's/@''GNULIB_WCRTOMB''@/$(GNULIB_WCRTOMB)/g' \ - -e 's/@''GNULIB_WCSRTOMBS''@/$(GNULIB_WCSRTOMBS)/g' \ - -e 's/@''GNULIB_WCSNRTOMBS''@/$(GNULIB_WCSNRTOMBS)/g' \ - -e 's/@''GNULIB_WCWIDTH''@/$(GNULIB_WCWIDTH)/g' \ - -e 's/@''GNULIB_WMEMCHR''@/$(GNULIB_WMEMCHR)/g' \ - -e 's/@''GNULIB_WMEMCMP''@/$(GNULIB_WMEMCMP)/g' \ - -e 's/@''GNULIB_WMEMCPY''@/$(GNULIB_WMEMCPY)/g' \ - -e 's/@''GNULIB_WMEMMOVE''@/$(GNULIB_WMEMMOVE)/g' \ - -e 's/@''GNULIB_WMEMSET''@/$(GNULIB_WMEMSET)/g' \ - -e 's/@''GNULIB_WCSLEN''@/$(GNULIB_WCSLEN)/g' \ - -e 's/@''GNULIB_WCSNLEN''@/$(GNULIB_WCSNLEN)/g' \ - -e 's/@''GNULIB_WCSCPY''@/$(GNULIB_WCSCPY)/g' \ - -e 's/@''GNULIB_WCPCPY''@/$(GNULIB_WCPCPY)/g' \ - -e 's/@''GNULIB_WCSNCPY''@/$(GNULIB_WCSNCPY)/g' \ - -e 's/@''GNULIB_WCPNCPY''@/$(GNULIB_WCPNCPY)/g' \ - -e 's/@''GNULIB_WCSCAT''@/$(GNULIB_WCSCAT)/g' \ - -e 's/@''GNULIB_WCSNCAT''@/$(GNULIB_WCSNCAT)/g' \ - -e 's/@''GNULIB_WCSCMP''@/$(GNULIB_WCSCMP)/g' \ - -e 's/@''GNULIB_WCSNCMP''@/$(GNULIB_WCSNCMP)/g' \ - -e 's/@''GNULIB_WCSCASECMP''@/$(GNULIB_WCSCASECMP)/g' \ - -e 's/@''GNULIB_WCSNCASECMP''@/$(GNULIB_WCSNCASECMP)/g' \ - -e 's/@''GNULIB_WCSCOLL''@/$(GNULIB_WCSCOLL)/g' \ - -e 's/@''GNULIB_WCSXFRM''@/$(GNULIB_WCSXFRM)/g' \ - -e 's/@''GNULIB_WCSDUP''@/$(GNULIB_WCSDUP)/g' \ - -e 's/@''GNULIB_WCSCHR''@/$(GNULIB_WCSCHR)/g' \ - -e 's/@''GNULIB_WCSRCHR''@/$(GNULIB_WCSRCHR)/g' \ - -e 's/@''GNULIB_WCSCSPN''@/$(GNULIB_WCSCSPN)/g' \ - -e 's/@''GNULIB_WCSSPN''@/$(GNULIB_WCSSPN)/g' \ - -e 's/@''GNULIB_WCSPBRK''@/$(GNULIB_WCSPBRK)/g' \ - -e 's/@''GNULIB_WCSSTR''@/$(GNULIB_WCSSTR)/g' \ - -e 's/@''GNULIB_WCSTOK''@/$(GNULIB_WCSTOK)/g' \ - -e 's/@''GNULIB_WCSWIDTH''@/$(GNULIB_WCSWIDTH)/g' \ - < $(srcdir)/wchar.in.h | \ - sed -e 's|@''HAVE_WINT_T''@|$(HAVE_WINT_T)|g' \ - -e 's|@''HAVE_BTOWC''@|$(HAVE_BTOWC)|g' \ - -e 's|@''HAVE_MBSINIT''@|$(HAVE_MBSINIT)|g' \ - -e 's|@''HAVE_MBRTOWC''@|$(HAVE_MBRTOWC)|g' \ - -e 's|@''HAVE_MBRLEN''@|$(HAVE_MBRLEN)|g' \ - -e 's|@''HAVE_MBSRTOWCS''@|$(HAVE_MBSRTOWCS)|g' \ - -e 's|@''HAVE_MBSNRTOWCS''@|$(HAVE_MBSNRTOWCS)|g' \ - -e 's|@''HAVE_WCRTOMB''@|$(HAVE_WCRTOMB)|g' \ - -e 's|@''HAVE_WCSRTOMBS''@|$(HAVE_WCSRTOMBS)|g' \ - -e 's|@''HAVE_WCSNRTOMBS''@|$(HAVE_WCSNRTOMBS)|g' \ - -e 's|@''HAVE_WMEMCHR''@|$(HAVE_WMEMCHR)|g' \ - -e 's|@''HAVE_WMEMCMP''@|$(HAVE_WMEMCMP)|g' \ - -e 's|@''HAVE_WMEMCPY''@|$(HAVE_WMEMCPY)|g' \ - -e 's|@''HAVE_WMEMMOVE''@|$(HAVE_WMEMMOVE)|g' \ - -e 's|@''HAVE_WMEMSET''@|$(HAVE_WMEMSET)|g' \ - -e 's|@''HAVE_WCSLEN''@|$(HAVE_WCSLEN)|g' \ - -e 's|@''HAVE_WCSNLEN''@|$(HAVE_WCSNLEN)|g' \ - -e 's|@''HAVE_WCSCPY''@|$(HAVE_WCSCPY)|g' \ - -e 's|@''HAVE_WCPCPY''@|$(HAVE_WCPCPY)|g' \ - -e 's|@''HAVE_WCSNCPY''@|$(HAVE_WCSNCPY)|g' \ - -e 's|@''HAVE_WCPNCPY''@|$(HAVE_WCPNCPY)|g' \ - -e 's|@''HAVE_WCSCAT''@|$(HAVE_WCSCAT)|g' \ - -e 's|@''HAVE_WCSNCAT''@|$(HAVE_WCSNCAT)|g' \ - -e 's|@''HAVE_WCSCMP''@|$(HAVE_WCSCMP)|g' \ - -e 's|@''HAVE_WCSNCMP''@|$(HAVE_WCSNCMP)|g' \ - -e 's|@''HAVE_WCSCASECMP''@|$(HAVE_WCSCASECMP)|g' \ - -e 's|@''HAVE_WCSNCASECMP''@|$(HAVE_WCSNCASECMP)|g' \ - -e 's|@''HAVE_WCSCOLL''@|$(HAVE_WCSCOLL)|g' \ - -e 's|@''HAVE_WCSXFRM''@|$(HAVE_WCSXFRM)|g' \ - -e 's|@''HAVE_WCSDUP''@|$(HAVE_WCSDUP)|g' \ - -e 's|@''HAVE_WCSCHR''@|$(HAVE_WCSCHR)|g' \ - -e 's|@''HAVE_WCSRCHR''@|$(HAVE_WCSRCHR)|g' \ - -e 's|@''HAVE_WCSCSPN''@|$(HAVE_WCSCSPN)|g' \ - -e 's|@''HAVE_WCSSPN''@|$(HAVE_WCSSPN)|g' \ - -e 's|@''HAVE_WCSPBRK''@|$(HAVE_WCSPBRK)|g' \ - -e 's|@''HAVE_WCSSTR''@|$(HAVE_WCSSTR)|g' \ - -e 's|@''HAVE_WCSTOK''@|$(HAVE_WCSTOK)|g' \ - -e 's|@''HAVE_WCSWIDTH''@|$(HAVE_WCSWIDTH)|g' \ - -e 's|@''HAVE_DECL_WCTOB''@|$(HAVE_DECL_WCTOB)|g' \ - -e 's|@''HAVE_DECL_WCWIDTH''@|$(HAVE_DECL_WCWIDTH)|g' \ - | \ - sed -e 's|@''REPLACE_MBSTATE_T''@|$(REPLACE_MBSTATE_T)|g' \ - -e 's|@''REPLACE_BTOWC''@|$(REPLACE_BTOWC)|g' \ - -e 's|@''REPLACE_WCTOB''@|$(REPLACE_WCTOB)|g' \ - -e 's|@''REPLACE_MBSINIT''@|$(REPLACE_MBSINIT)|g' \ - -e 's|@''REPLACE_MBRTOWC''@|$(REPLACE_MBRTOWC)|g' \ - -e 's|@''REPLACE_MBRLEN''@|$(REPLACE_MBRLEN)|g' \ - -e 's|@''REPLACE_MBSRTOWCS''@|$(REPLACE_MBSRTOWCS)|g' \ - -e 's|@''REPLACE_MBSNRTOWCS''@|$(REPLACE_MBSNRTOWCS)|g' \ - -e 's|@''REPLACE_WCRTOMB''@|$(REPLACE_WCRTOMB)|g' \ - -e 's|@''REPLACE_WCSRTOMBS''@|$(REPLACE_WCSRTOMBS)|g' \ - -e 's|@''REPLACE_WCSNRTOMBS''@|$(REPLACE_WCSNRTOMBS)|g' \ - -e 's|@''REPLACE_WCWIDTH''@|$(REPLACE_WCWIDTH)|g' \ - -e 's|@''REPLACE_WCSWIDTH''@|$(REPLACE_WCSWIDTH)|g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)'; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += wchar.h wchar.h-t - -EXTRA_DIST += wchar.in.h - -## end gnulib module wchar - -## begin gnulib module wcrtomb - - -EXTRA_DIST += wcrtomb.c - -EXTRA_libgreputils_a_SOURCES += wcrtomb.c - -## end gnulib module wcrtomb - -## begin gnulib module wctob - - -EXTRA_DIST += wctob.c - -EXTRA_libgreputils_a_SOURCES += wctob.c - -## end gnulib module wctob - -## begin gnulib module wctomb - - -EXTRA_DIST += wctomb-impl.h wctomb.c - -EXTRA_libgreputils_a_SOURCES += wctomb.c - -## end gnulib module wctomb - -## begin gnulib module wctype-h - -BUILT_SOURCES += wctype.h -libgreputils_a_SOURCES += wctype-h.c - -# We need the following in order to create when the system -# doesn't have one that works with the given compiler. -wctype.h: wctype.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(WARN_ON_USE_H) - $(AM_V_GEN)rm -f $@-t $@ && \ - { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - sed -e 's|@''GUARD_PREFIX''@|GL|g' \ - -e 's/@''HAVE_WCTYPE_H''@/$(HAVE_WCTYPE_H)/g' \ - -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ - -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ - -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ - -e 's|@''NEXT_WCTYPE_H''@|$(NEXT_WCTYPE_H)|g' \ - -e 's/@''GNULIB_ISWBLANK''@/$(GNULIB_ISWBLANK)/g' \ - -e 's/@''GNULIB_WCTYPE''@/$(GNULIB_WCTYPE)/g' \ - -e 's/@''GNULIB_ISWCTYPE''@/$(GNULIB_ISWCTYPE)/g' \ - -e 's/@''GNULIB_WCTRANS''@/$(GNULIB_WCTRANS)/g' \ - -e 's/@''GNULIB_TOWCTRANS''@/$(GNULIB_TOWCTRANS)/g' \ - -e 's/@''HAVE_ISWBLANK''@/$(HAVE_ISWBLANK)/g' \ - -e 's/@''HAVE_ISWCNTRL''@/$(HAVE_ISWCNTRL)/g' \ - -e 's/@''HAVE_WCTYPE_T''@/$(HAVE_WCTYPE_T)/g' \ - -e 's/@''HAVE_WCTRANS_T''@/$(HAVE_WCTRANS_T)/g' \ - -e 's/@''HAVE_WINT_T''@/$(HAVE_WINT_T)/g' \ - -e 's/@''REPLACE_ISWBLANK''@/$(REPLACE_ISWBLANK)/g' \ - -e 's/@''REPLACE_ISWCNTRL''@/$(REPLACE_ISWCNTRL)/g' \ - -e 's/@''REPLACE_TOWLOWER''@/$(REPLACE_TOWLOWER)/g' \ - -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ - -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ - < $(srcdir)/wctype.in.h; \ - } > $@-t && \ - mv $@-t $@ -MOSTLYCLEANFILES += wctype.h wctype.h-t - -EXTRA_DIST += wctype.in.h - -## end gnulib module wctype-h - -## begin gnulib module wcwidth - - -EXTRA_DIST += wcwidth.c - -EXTRA_libgreputils_a_SOURCES += wcwidth.c - -## end gnulib module wcwidth - -## begin gnulib module xalloc - -libgreputils_a_SOURCES += xmalloc.c - -EXTRA_DIST += xalloc.h - -## end gnulib module xalloc - -## begin gnulib module xalloc-die - -libgreputils_a_SOURCES += xalloc-die.c - -## end gnulib module xalloc-die - -## begin gnulib module xalloc-oversized - - -EXTRA_DIST += xalloc-oversized.h - -## end gnulib module xalloc-oversized - -## begin gnulib module xstriconv - -libgreputils_a_SOURCES += xstriconv.h xstriconv.c - -## end gnulib module xstriconv - -## begin gnulib module xstrtoimax - -libgreputils_a_SOURCES += xstrtoimax.c - -## end gnulib module xstrtoimax - -## begin gnulib module xstrtol - -libgreputils_a_SOURCES += xstrtol.c xstrtoul.c xstrtol-error.c - -EXTRA_DIST += xstrtol.h - -## end gnulib module xstrtol - - -mostlyclean-local: mostlyclean-generic - @for dir in '' $(MOSTLYCLEANDIRS); do \ - if test -n "$$dir" && test -d $$dir; then \ - echo "rmdir $$dir"; rmdir $$dir; \ - fi; \ - done; \ - : diff --git a/contrib/grep/lib/wctomb.c b/contrib/grep/lib/hard-locale.c similarity index 55% rename from contrib/grep/lib/wctomb.c rename to contrib/grep/lib/hard-locale.c index 8022aa845e..730e9be6e7 100644 --- a/contrib/grep/lib/wctomb.c +++ b/contrib/grep/lib/hard-locale.c @@ -1,6 +1,7 @@ -/* Convert wide character to multibyte character. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2011. +/* hard-locale.c -- Determine whether a locale is hard. + + Copyright (C) 1997-1999, 2002-2004, 2006-2007, 2009-2020 Free Software + Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,13 +14,22 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include -#include +#include "hard-locale.h" +#include #include -#include -#include "wctomb-impl.h" +bool +hard_locale (int category) +{ + char locale[SETLOCALE_NULL_MAX]; + + if (setlocale_null_r (category, locale, sizeof (locale))) + return false; + + return !(strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0); +} diff --git a/contrib/grep/lib/closeout.h b/contrib/grep/lib/hard-locale.h similarity index 53% copy from contrib/grep/lib/closeout.h copy to contrib/grep/lib/hard-locale.h index 87cea5b1af..1886eaae92 100644 --- a/contrib/grep/lib/closeout.h +++ b/contrib/grep/lib/hard-locale.h @@ -1,7 +1,6 @@ -/* Close standard output and standard error. +/* Determine whether a locale is hard. - Copyright (C) 1998, 2000, 2003-2004, 2006, 2008-2015 Free Software - Foundation, Inc. + Copyright (C) 1999, 2003-2004, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -14,23 +13,16 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ -#ifndef CLOSEOUT_H -# define CLOSEOUT_H 1 +#ifndef HARD_LOCALE_H_ +# define HARD_LOCALE_H_ 1 # include -# ifdef __cplusplus -extern "C" { -# endif +/* Return true if the specified CATEGORY of the current locale is hard, i.e. + different from the C or POSIX locale that has a fixed behavior. + CATEGORY must be one of the LC_* values, but not LC_ALL. */ +extern bool hard_locale (int category); -void close_stdout_set_file_name (const char *file); -void close_stdout_set_ignore_EPIPE (bool ignore); -void close_stdout (void); - -# ifdef __cplusplus -} -# endif - -#endif +#endif /* HARD_LOCALE_H_ */ diff --git a/contrib/grep/lib/hash.c b/contrib/grep/lib/hash.c index 4f27d5c8f8..7aaf106267 100644 --- a/contrib/grep/lib/hash.c +++ b/contrib/grep/lib/hash.c @@ -1,6 +1,6 @@ /* hash - hashing table processing. - Copyright (C) 1998-2004, 2006-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1998-2004, 2006-2007, 2009-2020 Free Software Foundation, Inc. Written by Jim Meyering, 1992. @@ -15,7 +15,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* A generic hash table package. */ diff --git a/contrib/grep/lib/hash.h b/contrib/grep/lib/hash.h index 1e90c31a6c..2ff4266a4f 100644 --- a/contrib/grep/lib/hash.h +++ b/contrib/grep/lib/hash.h @@ -1,5 +1,5 @@ /* hash - hashing table processing. - Copyright (C) 1998-1999, 2001, 2003, 2009-2015 Free Software Foundation, + Copyright (C) 1998-1999, 2001, 2003, 2009-2020 Free Software Foundation, Inc. Written by Jim Meyering , 1998. @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* A generic hash table package. */ @@ -89,6 +89,9 @@ void hash_reset_tuning (Hash_tuning *); Hash_table *hash_initialize (size_t, const Hash_tuning *, Hash_hasher, Hash_comparator, Hash_data_freer) _GL_ATTRIBUTE_WUR; +Hash_table *hash_xinitialize (size_t, const Hash_tuning *, + Hash_hasher, Hash_comparator, + Hash_data_freer) _GL_ATTRIBUTE_WUR; void hash_clear (Hash_table *); void hash_free (Hash_table *); diff --git a/contrib/grep/lib/i-ring.c b/contrib/grep/lib/i-ring.c index 9a8a0afb7b..8386df9329 100644 --- a/contrib/grep/lib/i-ring.c +++ b/contrib/grep/lib/i-ring.c @@ -1,5 +1,5 @@ /* a simple ring buffer - Copyright (C) 2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* written by Jim Meyering */ diff --git a/contrib/grep/lib/i-ring.h b/contrib/grep/lib/i-ring.h index f18325748b..d8cc466e3f 100644 --- a/contrib/grep/lib/i-ring.h +++ b/contrib/grep/lib/i-ring.h @@ -1,5 +1,5 @@ /* definitions for a simple ring buffer - Copyright (C) 2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include #include "verify.h" diff --git a/contrib/grep/lib/iconv_open.c b/contrib/grep/lib/iconv_open.c deleted file mode 100644 index 09d042d1f0..0000000000 --- a/contrib/grep/lib/iconv_open.c +++ /dev/null @@ -1,172 +0,0 @@ -/* Character set conversion. - Copyright (C) 2007, 2009-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ - -#include - -/* Specification. */ -#include - -#include -#include -#include "c-ctype.h" -#include "c-strcase.h" - -#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) - -/* Namespace cleanliness. */ -#define mapping_lookup rpl_iconv_open_mapping_lookup - -/* The macro ICONV_FLAVOR is defined to one of these or undefined. */ - -#define ICONV_FLAVOR_AIX "iconv_open-aix.h" -#define ICONV_FLAVOR_HPUX "iconv_open-hpux.h" -#define ICONV_FLAVOR_IRIX "iconv_open-irix.h" -#define ICONV_FLAVOR_OSF "iconv_open-osf.h" -#define ICONV_FLAVOR_SOLARIS "iconv_open-solaris.h" - -#ifdef ICONV_FLAVOR -# include ICONV_FLAVOR -#endif - -iconv_t -rpl_iconv_open (const char *tocode, const char *fromcode) -#undef iconv_open -{ - char fromcode_upper[32]; - char tocode_upper[32]; - char *fromcode_upper_end; - char *tocode_upper_end; - -#if REPLACE_ICONV_UTF - /* Special handling of conversion between UTF-8 and UTF-{16,32}{BE,LE}. - Do this here, before calling the real iconv_open(), because OSF/1 5.1 - iconv() to these encoding inserts a BOM, which is wrong. - We do not need to handle conversion between arbitrary encodings and - UTF-{16,32}{BE,LE}, because the 'striconveh' module implements two-step - conversion through UTF-8. - The _ICONV_* constants are chosen to be disjoint from any iconv_t - returned by the system's iconv_open() functions. Recall that iconv_t - is a scalar type. */ - if (c_toupper (fromcode[0]) == 'U' - && c_toupper (fromcode[1]) == 'T' - && c_toupper (fromcode[2]) == 'F' - && fromcode[3] == '-') - { - if (c_toupper (tocode[0]) == 'U' - && c_toupper (tocode[1]) == 'T' - && c_toupper (tocode[2]) == 'F' - && tocode[3] == '-') - { - if (strcmp (fromcode + 4, "8") == 0) - { - if (c_strcasecmp (tocode + 4, "16BE") == 0) - return _ICONV_UTF8_UTF16BE; - if (c_strcasecmp (tocode + 4, "16LE") == 0) - return _ICONV_UTF8_UTF16LE; - if (c_strcasecmp (tocode + 4, "32BE") == 0) - return _ICONV_UTF8_UTF32BE; - if (c_strcasecmp (tocode + 4, "32LE") == 0) - return _ICONV_UTF8_UTF32LE; - } - else if (strcmp (tocode + 4, "8") == 0) - { - if (c_strcasecmp (fromcode + 4, "16BE") == 0) - return _ICONV_UTF16BE_UTF8; - if (c_strcasecmp (fromcode + 4, "16LE") == 0) - return _ICONV_UTF16LE_UTF8; - if (c_strcasecmp (fromcode + 4, "32BE") == 0) - return _ICONV_UTF32BE_UTF8; - if (c_strcasecmp (fromcode + 4, "32LE") == 0) - return _ICONV_UTF32LE_UTF8; - } - } - } -#endif - - /* Do *not* add special support for 8-bit encodings like ASCII or ISO-8859-1 - here. This would lead to programs that work in some locales (such as the - "C" or "en_US" locales) but do not work in East Asian locales. It is - better if programmers make their programs depend on GNU libiconv (except - on glibc systems), e.g. by using the AM_ICONV macro and documenting the - dependency in an INSTALL or DEPENDENCIES file. */ - - /* Try with the original names first. - This covers the case when fromcode or tocode is a lowercase encoding name - that is understood by the system's iconv_open but not listed in our - mappings table. */ - { - iconv_t cd = iconv_open (tocode, fromcode); - if (cd != (iconv_t)(-1)) - return cd; - } - - /* Convert the encodings to upper case, because - 1. in the arguments of iconv_open() on AIX, HP-UX, and OSF/1 the case - matters, - 2. it makes searching in the table faster. */ - { - const char *p = fromcode; - char *q = fromcode_upper; - while ((*q = c_toupper (*p)) != '\0') - { - p++; - q++; - if (q == &fromcode_upper[SIZEOF (fromcode_upper)]) - { - errno = EINVAL; - return (iconv_t)(-1); - } - } - fromcode_upper_end = q; - } - - { - const char *p = tocode; - char *q = tocode_upper; - while ((*q = c_toupper (*p)) != '\0') - { - p++; - q++; - if (q == &tocode_upper[SIZEOF (tocode_upper)]) - { - errno = EINVAL; - return (iconv_t)(-1); - } - } - tocode_upper_end = q; - } - -#ifdef ICONV_FLAVOR - /* Apply the mappings. */ - { - const struct mapping *m = - mapping_lookup (fromcode_upper, fromcode_upper_end - fromcode_upper); - - fromcode = (m != NULL ? m->vendor_name : fromcode_upper); - } - { - const struct mapping *m = - mapping_lookup (tocode_upper, tocode_upper_end - tocode_upper); - - tocode = (m != NULL ? m->vendor_name : tocode_upper); - } -#else - fromcode = fromcode_upper; - tocode = tocode_upper; -#endif - - return iconv_open (tocode, fromcode); -} diff --git a/contrib/grep/lib/ignore-value.h b/contrib/grep/lib/ignore-value.h index 2e3121fac0..7a92226843 100644 --- a/contrib/grep/lib/ignore-value.h +++ b/contrib/grep/lib/ignore-value.h @@ -1,6 +1,6 @@ /* ignore a function return without a compiler warning. -*- coding: utf-8 -*- - Copyright (C) 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2008-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering, Eric Blake and Pádraig Brady. */ diff --git a/contrib/grep/lib/intprops.h b/contrib/grep/lib/intprops.h index f85ccade4e..dfbcaae73e 100644 --- a/contrib/grep/lib/intprops.h +++ b/contrib/grep/lib/intprops.h @@ -1,10 +1,10 @@ /* intprops.h -- properties of integer types - Copyright (C) 2001-2005, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2020 Free Software Foundation, Inc. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert. */ @@ -22,13 +22,13 @@ #include -/* Return an integer value, converted to the same type as the integer - expression E after integer type promotion. V is the unconverted value. */ -#define _GL_INT_CONVERT(e, v) (0 * (e) + (v)) +/* Return a value with the common real type of E and V and the value of V. + Do not evaluate E. */ +#define _GL_INT_CONVERT(e, v) ((1 ? 0 : (e)) + (v)) /* Act like _GL_INT_CONVERT (E, -V) but work around a bug in IRIX 6.5 cc; see - . */ -#define _GL_INT_NEGATE_CONVERT(e, v) (0 * (e) - (v)) + . */ +#define _GL_INT_NEGATE_CONVERT(e, v) ((1 ? 0 : (e)) - (v)) /* The extra casts in the following macros work around compiler bugs, e.g., in Cray C 5.0.3.0. */ @@ -37,59 +37,56 @@ an integer. */ #define TYPE_IS_INTEGER(t) ((t) 1.5 == 1) -/* True if negative values of the signed integer type T use two's - complement, ones' complement, or signed magnitude representation, - respectively. Much GNU code assumes two's complement, but some - people like to be portable to all possible C hosts. */ -#define TYPE_TWOS_COMPLEMENT(t) ((t) ~ (t) 0 == (t) -1) -#define TYPE_ONES_COMPLEMENT(t) ((t) ~ (t) 0 == 0) -#define TYPE_SIGNED_MAGNITUDE(t) ((t) ~ (t) 0 < (t) -1) - -/* True if the signed integer expression E uses two's complement. */ -#define _GL_INT_TWOS_COMPLEMENT(e) (~ _GL_INT_CONVERT (e, 0) == -1) - -/* True if the arithmetic type T is signed. */ +/* True if the real type T is signed. */ #define TYPE_SIGNED(t) (! ((t) 0 < (t) -1)) -/* Return 1 if the integer expression E, after integer promotion, has - a signed type. */ -#define _GL_INT_SIGNED(e) (_GL_INT_NEGATE_CONVERT (e, 1) < 0) +/* Return 1 if the real expression E, after promotion, has a + signed or floating type. Do not evaluate E. */ +#define EXPR_SIGNED(e) (_GL_INT_NEGATE_CONVERT (e, 1) < 0) + +/* Minimum and maximum values for integer types and expressions. */ -/* Minimum and maximum values for integer types and expressions. These - macros have undefined behavior if T is signed and has padding bits. - If this is a problem for you, please let us know how to fix it for - your host. */ +/* The width in bits of the integer type or expression T. + Do not evaluate T. + Padding bits are not supported; this is checked at compile-time below. */ +#define TYPE_WIDTH(t) (sizeof (t) * CHAR_BIT) /* The maximum and minimum values for the integer type T. */ -#define TYPE_MINIMUM(t) \ - ((t) (! TYPE_SIGNED (t) \ - ? (t) 0 \ - : TYPE_SIGNED_MAGNITUDE (t) \ - ? ~ (t) 0 \ - : ~ TYPE_MAXIMUM (t))) +#define TYPE_MINIMUM(t) ((t) ~ TYPE_MAXIMUM (t)) #define TYPE_MAXIMUM(t) \ ((t) (! TYPE_SIGNED (t) \ ? (t) -1 \ - : ((((t) 1 << (sizeof (t) * CHAR_BIT - 2)) - 1) * 2 + 1))) + : ((((t) 1 << (TYPE_WIDTH (t) - 2)) - 1) * 2 + 1))) /* The maximum and minimum values for the type of the expression E, - after integer promotion. E should not have side effects. */ + after integer promotion. E is not evaluated. */ #define _GL_INT_MINIMUM(e) \ - (_GL_INT_SIGNED (e) \ - ? - _GL_INT_TWOS_COMPLEMENT (e) - _GL_SIGNED_INT_MAXIMUM (e) \ + (EXPR_SIGNED (e) \ + ? ~ _GL_SIGNED_INT_MAXIMUM (e) \ : _GL_INT_CONVERT (e, 0)) #define _GL_INT_MAXIMUM(e) \ - (_GL_INT_SIGNED (e) \ + (EXPR_SIGNED (e) \ ? _GL_SIGNED_INT_MAXIMUM (e) \ : _GL_INT_NEGATE_CONVERT (e, 1)) #define _GL_SIGNED_INT_MAXIMUM(e) \ - (((_GL_INT_CONVERT (e, 1) << (sizeof ((e) + 0) * CHAR_BIT - 2)) - 1) * 2 + 1) + (((_GL_INT_CONVERT (e, 1) << (TYPE_WIDTH ((e) + 0) - 2)) - 1) * 2 + 1) + +/* Work around OpenVMS incompatibility with C99. */ +#if !defined LLONG_MAX && defined __INT64_MAX +# define LLONG_MAX __INT64_MAX +# define LLONG_MIN __INT64_MIN +#endif +/* This include file assumes that signed types are two's complement without + padding bits; the above macros have undefined behavior otherwise. + If this is a problem for you, please let us know how to fix it for your host. + This assumption is tested by the intprops-tests module. */ -/* Return 1 if the __typeof__ keyword works. This could be done by +/* Does the __typeof__ keyword work? This could be done by 'configure', but for now it's easier to do it by hand. */ -#if (2 <= __GNUC__ || defined __IBM__TYPEOF__ \ +#if (2 <= __GNUC__ \ + || (1210 <= __IBMC__ && defined __IBM__TYPEOF__) \ || (0x5110 <= __SUNPRO_C && !__STDC__)) # define _GL_HAVE___TYPEOF__ 1 #else @@ -114,12 +111,11 @@ Subtract 1 for the sign bit if T is signed, and then add 1 more for a minus sign if needed. - Because _GL_SIGNED_TYPE_OR_EXPR sometimes returns 0 when its argument is - signed, this macro may overestimate the true bound by one byte when + Because _GL_SIGNED_TYPE_OR_EXPR sometimes returns 1 when its argument is + unsigned, this macro may overestimate the true bound by one byte when applied to unsigned types of size 2, 4, 16, ... bytes. */ #define INT_STRLEN_BOUND(t) \ - (INT_BITS_STRLEN_BOUND (sizeof (t) * CHAR_BIT \ - - _GL_SIGNED_TYPE_OR_EXPR (t)) \ + (INT_BITS_STRLEN_BOUND (TYPE_WIDTH (t) - _GL_SIGNED_TYPE_OR_EXPR (t)) \ + _GL_SIGNED_TYPE_OR_EXPR (t)) /* Bound on buffer size needed to represent an integer type or expression T, @@ -185,7 +181,7 @@ /* Return 1 if A * B would overflow in [MIN,MAX] arithmetic. See above for restrictions. Avoid && and || as they tickle bugs in Sun C 5.11 2010/08/13 and other compilers; see - . */ + . */ #define INT_MULTIPLY_RANGE_OVERFLOW(a, b, min, max) \ ((b) < 0 \ ? ((a) < 0 \ @@ -224,24 +220,54 @@ ? (a) < (min) >> (b) \ : (max) >> (b) < (a)) +/* True if __builtin_add_overflow (A, B, P) and __builtin_sub_overflow + (A, B, P) work when P is non-null. */ +#if 5 <= __GNUC__ && !defined __ICC +# define _GL_HAS_BUILTIN_ADD_OVERFLOW 1 +#elif defined __has_builtin +# define _GL_HAS_BUILTIN_ADD_OVERFLOW __has_builtin (__builtin_add_overflow) +#else +# define _GL_HAS_BUILTIN_ADD_OVERFLOW 0 +#endif + +/* True if __builtin_mul_overflow (A, B, P) works when P is non-null. */ +#ifdef __clang__ +/* Work around Clang bug . */ +# define _GL_HAS_BUILTIN_MUL_OVERFLOW 0 +#else +# define _GL_HAS_BUILTIN_MUL_OVERFLOW _GL_HAS_BUILTIN_ADD_OVERFLOW +#endif + +/* True if __builtin_add_overflow_p (A, B, C) works, and similarly for + __builtin_mul_overflow_p and __builtin_mul_overflow_p. */ +#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__) /* The _GL*_OVERFLOW macros have the same restrictions as the *_RANGE_OVERFLOW macros, except that they do not assume that operands (e.g., A and B) have the same type as MIN and MAX. Instead, they assume that the result (e.g., A + B) has that type. */ -#define _GL_ADD_OVERFLOW(a, b, min, max) \ - ((min) < 0 ? INT_ADD_RANGE_OVERFLOW (a, b, min, max) \ - : (a) < 0 ? (b) <= (a) + (b) \ - : (b) < 0 ? (a) <= (a) + (b) \ - : (a) + (b) < (b)) -#define _GL_SUBTRACT_OVERFLOW(a, b, min, max) \ - ((min) < 0 ? INT_SUBTRACT_RANGE_OVERFLOW (a, b, min, max) \ - : (a) < 0 ? 1 \ - : (b) < 0 ? (a) - (b) <= (a) \ - : (a) < (b)) -#define _GL_MULTIPLY_OVERFLOW(a, b, min, max) \ - (((min) == 0 && (((a) < 0 && 0 < (b)) || ((b) < 0 && 0 < (a)))) \ - || INT_MULTIPLY_RANGE_OVERFLOW (a, b, min, max)) +#if _GL_HAS_BUILTIN_OVERFLOW_P +# define _GL_ADD_OVERFLOW(a, b, min, max) \ + __builtin_add_overflow_p (a, b, (__typeof__ ((a) + (b))) 0) +# define _GL_SUBTRACT_OVERFLOW(a, b, min, max) \ + __builtin_sub_overflow_p (a, b, (__typeof__ ((a) - (b))) 0) +# define _GL_MULTIPLY_OVERFLOW(a, b, min, max) \ + __builtin_mul_overflow_p (a, b, (__typeof__ ((a) * (b))) 0) +#else +# define _GL_ADD_OVERFLOW(a, b, min, max) \ + ((min) < 0 ? INT_ADD_RANGE_OVERFLOW (a, b, min, max) \ + : (a) < 0 ? (b) <= (a) + (b) \ + : (b) < 0 ? (a) <= (a) + (b) \ + : (a) + (b) < (b)) +# define _GL_SUBTRACT_OVERFLOW(a, b, min, max) \ + ((min) < 0 ? INT_SUBTRACT_RANGE_OVERFLOW (a, b, min, max) \ + : (a) < 0 ? 1 \ + : (b) < 0 ? (a) - (b) <= (a) \ + : (a) < (b)) +# define _GL_MULTIPLY_OVERFLOW(a, b, min, max) \ + (((min) == 0 && (((a) < 0 && 0 < (b)) || ((b) < 0 && 0 < (a)))) \ + || INT_MULTIPLY_RANGE_OVERFLOW (a, b, min, max)) +#endif #define _GL_DIVIDE_OVERFLOW(a, b, min, max) \ ((min) < 0 ? (b) == _GL_INT_NEGATE_CONVERT (min, 1) && (a) < - (max) \ : (a) < 0 ? (b) <= (a) + (b) - 1 \ @@ -263,22 +289,31 @@ : (a) % - (b)) \ == 0) - -/* Integer overflow checks. +/* Check for integer overflow, and report low order bits of answer. The INT__OVERFLOW macros return 1 if the corresponding C operators might not yield numerically correct answers due to arithmetic overflow. - They work correctly on all known practical hosts, and do not rely + The INT__WRAPV macros compute the low-order bits of the sum, + difference, and product of two C integers, and return 1 if these + low-order bits are not numerically correct. + These macros work correctly on all known practical hosts, and do not rely on undefined behavior due to signed arithmetic overflow. - Example usage: + Example usage, assuming A and B are long int: - long int i = ...; - long int j = ...; - if (INT_MULTIPLY_OVERFLOW (i, j)) - printf ("multiply would overflow"); + if (INT_MULTIPLY_OVERFLOW (a, b)) + printf ("result would overflow\n"); else - printf ("product is %ld", i * j); + printf ("result is %ld (no overflow)\n", a * b); + + Example usage with WRAPV flavor: + + long int result; + bool overflow = INT_MULTIPLY_WRAPV (a, b, &result); + printf ("result is %ld (%s)\n", result, + overflow ? "after overflow" : "no overflow"); + + Restrictions on these macros: These macros do not check for all possible numerical problems or undefined or unspecified behavior: they do not check for division @@ -287,7 +322,12 @@ These macros may evaluate their arguments zero or multiple times, so the arguments should not have side effects. - These macros are tuned for their last argument being a constant. + The WRAPV macros are not constant expressions. They support only + +, binary -, and *. Because the WRAPV macros convert the result, + they report overflow in different circumstances than the OVERFLOW + macros do. + + These macros are tuned for their last input argument being a constant. Return 1 if the integer expressions A * B, A - B, -A, A * B, A / B, A % B, and A << B would overflow, respectively. */ @@ -296,8 +336,12 @@ _GL_BINARY_OP_OVERFLOW (a, b, _GL_ADD_OVERFLOW) #define INT_SUBTRACT_OVERFLOW(a, b) \ _GL_BINARY_OP_OVERFLOW (a, b, _GL_SUBTRACT_OVERFLOW) -#define INT_NEGATE_OVERFLOW(a) \ - INT_NEGATE_RANGE_OVERFLOW (a, _GL_INT_MINIMUM (a), _GL_INT_MAXIMUM (a)) +#if _GL_HAS_BUILTIN_OVERFLOW_P +# define INT_NEGATE_OVERFLOW(a) INT_SUBTRACT_OVERFLOW (0, a) +#else +# define INT_NEGATE_OVERFLOW(a) \ + INT_NEGATE_RANGE_OVERFLOW (a, _GL_INT_MINIMUM (a), _GL_INT_MAXIMUM (a)) +#endif #define INT_MULTIPLY_OVERFLOW(a, b) \ _GL_BINARY_OP_OVERFLOW (a, b, _GL_MULTIPLY_OVERFLOW) #define INT_DIVIDE_OVERFLOW(a, b) \ @@ -314,7 +358,227 @@ Arguments should be free of side effects. */ #define _GL_BINARY_OP_OVERFLOW(a, b, op_result_overflow) \ op_result_overflow (a, b, \ - _GL_INT_MINIMUM (0 * (b) + (a)), \ - _GL_INT_MAXIMUM (0 * (b) + (a))) + _GL_INT_MINIMUM (_GL_INT_CONVERT (a, b)), \ + _GL_INT_MAXIMUM (_GL_INT_CONVERT (a, b))) + +/* Store the low-order bits of A + B, A - B, A * B, respectively, into *R. + Return 1 if the result overflows. See above for restrictions. */ +#if _GL_HAS_BUILTIN_ADD_OVERFLOW +# define INT_ADD_WRAPV(a, b, r) __builtin_add_overflow (a, b, r) +# define INT_SUBTRACT_WRAPV(a, b, r) __builtin_sub_overflow (a, b, r) +#else +# define INT_ADD_WRAPV(a, b, r) \ + _GL_INT_OP_WRAPV (a, b, r, +, _GL_INT_ADD_RANGE_OVERFLOW) +# define INT_SUBTRACT_WRAPV(a, b, r) \ + _GL_INT_OP_WRAPV (a, b, r, -, _GL_INT_SUBTRACT_RANGE_OVERFLOW) +#endif +#if _GL_HAS_BUILTIN_MUL_OVERFLOW +# if (9 < __GNUC__ + (3 <= __GNUC_MINOR__) \ + || (__GNUC__ == 8 && 4 <= __GNUC_MINOR__)) +# define INT_MULTIPLY_WRAPV(a, b, r) __builtin_mul_overflow (a, b, r) +# else + /* Work around GCC bug 91450. */ +# define INT_MULTIPLY_WRAPV(a, b, r) \ + ((!_GL_SIGNED_TYPE_OR_EXPR (*(r)) && EXPR_SIGNED (a) && EXPR_SIGNED (b) \ + && _GL_INT_MULTIPLY_RANGE_OVERFLOW (a, b, 0, (__typeof__ (*(r))) -1)) \ + ? ((void) __builtin_mul_overflow (a, b, r), 1) \ + : __builtin_mul_overflow (a, b, r)) +# endif +#else +# define INT_MULTIPLY_WRAPV(a, b, r) \ + _GL_INT_OP_WRAPV (a, b, r, *, _GL_INT_MULTIPLY_RANGE_OVERFLOW) +#endif + +/* Nonzero if this compiler has GCC bug 68193 or Clang bug 25390. See: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68193 + https://llvm.org/bugs/show_bug.cgi?id=25390 + For now, assume all versions of GCC-like compilers generate bogus + warnings for _Generic. This matters only for compilers that + lack relevant builtins. */ +#if __GNUC__ +# define _GL__GENERIC_BOGUS 1 +#else +# define _GL__GENERIC_BOGUS 0 +#endif + +/* Store the low-order bits of A B into *R, where OP specifies + the operation and OVERFLOW the overflow predicate. Return 1 if the + result overflows. See above for restrictions. */ +#if 201112 <= __STDC_VERSION__ && !_GL__GENERIC_BOGUS +# define _GL_INT_OP_WRAPV(a, b, r, op, overflow) \ + (_Generic \ + (*(r), \ + signed char: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned int, \ + signed char, SCHAR_MIN, SCHAR_MAX), \ + unsigned char: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned int, \ + unsigned char, 0, UCHAR_MAX), \ + short int: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned int, \ + short int, SHRT_MIN, SHRT_MAX), \ + unsigned short int: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned int, \ + unsigned short int, 0, USHRT_MAX), \ + int: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned int, \ + int, INT_MIN, INT_MAX), \ + unsigned int: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned int, \ + unsigned int, 0, UINT_MAX), \ + long int: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long int, \ + long int, LONG_MIN, LONG_MAX), \ + unsigned long int: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long int, \ + unsigned long int, 0, ULONG_MAX), \ + long long int: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long long int, \ + long long int, LLONG_MIN, LLONG_MAX), \ + unsigned long long int: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long long int, \ + unsigned long long int, 0, ULLONG_MAX))) +#else +/* Store the low-order bits of A B into *R, where OP specifies + the operation and OVERFLOW the overflow predicate. If *R is + signed, its type is ST with bounds SMIN..SMAX; otherwise its type + is UT with bounds U..UMAX. ST and UT are narrower than int. + Return 1 if the result overflows. See above for restrictions. */ +# if _GL_HAVE___TYPEOF__ +# define _GL_INT_OP_WRAPV_SMALLISH(a,b,r,op,overflow,st,smin,smax,ut,umax) \ + (TYPE_SIGNED (__typeof__ (*(r))) \ + ? _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned int, st, smin, smax) \ + : _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned int, ut, 0, umax)) +# else +# define _GL_INT_OP_WRAPV_SMALLISH(a,b,r,op,overflow,st,smin,smax,ut,umax) \ + (overflow (a, b, smin, smax) \ + ? (overflow (a, b, 0, umax) \ + ? (*(r) = _GL_INT_OP_WRAPV_VIA_UNSIGNED (a,b,op,unsigned,st), 1) \ + : (*(r) = _GL_INT_OP_WRAPV_VIA_UNSIGNED (a,b,op,unsigned,st)) < 0) \ + : (overflow (a, b, 0, umax) \ + ? (*(r) = _GL_INT_OP_WRAPV_VIA_UNSIGNED (a,b,op,unsigned,st)) >= 0 \ + : (*(r) = _GL_INT_OP_WRAPV_VIA_UNSIGNED (a,b,op,unsigned,st), 0))) +# endif + +# define _GL_INT_OP_WRAPV(a, b, r, op, overflow) \ + (sizeof *(r) == sizeof (signed char) \ + ? _GL_INT_OP_WRAPV_SMALLISH (a, b, r, op, overflow, \ + signed char, SCHAR_MIN, SCHAR_MAX, \ + unsigned char, UCHAR_MAX) \ + : sizeof *(r) == sizeof (short int) \ + ? _GL_INT_OP_WRAPV_SMALLISH (a, b, r, op, overflow, \ + short int, SHRT_MIN, SHRT_MAX, \ + unsigned short int, USHRT_MAX) \ + : sizeof *(r) == sizeof (int) \ + ? (EXPR_SIGNED (*(r)) \ + ? _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned int, \ + int, INT_MIN, INT_MAX) \ + : _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned int, \ + unsigned int, 0, UINT_MAX)) \ + : _GL_INT_OP_WRAPV_LONGISH(a, b, r, op, overflow)) +# ifdef LLONG_MAX +# define _GL_INT_OP_WRAPV_LONGISH(a, b, r, op, overflow) \ + (sizeof *(r) == sizeof (long int) \ + ? (EXPR_SIGNED (*(r)) \ + ? _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long int, \ + long int, LONG_MIN, LONG_MAX) \ + : _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long int, \ + unsigned long int, 0, ULONG_MAX)) \ + : (EXPR_SIGNED (*(r)) \ + ? _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long long int, \ + long long int, LLONG_MIN, LLONG_MAX) \ + : _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long long int, \ + unsigned long long int, 0, ULLONG_MAX))) +# else +# define _GL_INT_OP_WRAPV_LONGISH(a, b, r, op, overflow) \ + (EXPR_SIGNED (*(r)) \ + ? _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long int, \ + long int, LONG_MIN, LONG_MAX) \ + : _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long int, \ + unsigned long int, 0, ULONG_MAX)) +# endif +#endif + +/* Store the low-order bits of A B into *R, where the operation + is given by OP. Use the unsigned type UT for calculation to avoid + overflow problems. *R's type is T, with extrema TMIN and TMAX. + T must be a signed integer type. Return 1 if the result overflows. */ +#define _GL_INT_OP_CALC(a, b, r, op, overflow, ut, t, tmin, tmax) \ + (overflow (a, b, tmin, tmax) \ + ? (*(r) = _GL_INT_OP_WRAPV_VIA_UNSIGNED (a, b, op, ut, t), 1) \ + : (*(r) = _GL_INT_OP_WRAPV_VIA_UNSIGNED (a, b, op, ut, t), 0)) + +/* Return the low-order bits of A B, where the operation is given + by OP. Use the unsigned type UT for calculation to avoid undefined + behavior on signed integer overflow, and convert the result to type T. + UT is at least as wide as T and is no narrower than unsigned int, + T is two's complement, and there is no padding or trap representations. + Assume that converting UT to T yields the low-order bits, as is + done in all known two's-complement C compilers. E.g., see: + https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html + + According to the C standard, converting UT to T yields an + implementation-defined result or signal for values outside T's + range. However, code that works around this theoretical problem + runs afoul of a compiler bug in Oracle Studio 12.3 x86. See: + https://lists.gnu.org/r/bug-gnulib/2017-04/msg00049.html + As the compiler bug is real, don't try to work around the + theoretical problem. */ + +#define _GL_INT_OP_WRAPV_VIA_UNSIGNED(a, b, op, ut, t) \ + ((t) ((ut) (a) op (ut) (b))) + +/* Return true if the numeric values A + B, A - B, A * B fall outside + the range TMIN..TMAX. Arguments should be integer expressions + without side effects. TMIN should be signed and nonpositive. + TMAX should be positive, and should be signed unless TMIN is zero. */ +#define _GL_INT_ADD_RANGE_OVERFLOW(a, b, tmin, tmax) \ + ((b) < 0 \ + ? (((tmin) \ + ? ((EXPR_SIGNED (_GL_INT_CONVERT (a, (tmin) - (b))) || (b) < (tmin)) \ + && (a) < (tmin) - (b)) \ + : (a) <= -1 - (b)) \ + || ((EXPR_SIGNED (a) ? 0 <= (a) : (tmax) < (a)) && (tmax) < (a) + (b))) \ + : (a) < 0 \ + ? (((tmin) \ + ? ((EXPR_SIGNED (_GL_INT_CONVERT (b, (tmin) - (a))) || (a) < (tmin)) \ + && (b) < (tmin) - (a)) \ + : (b) <= -1 - (a)) \ + || ((EXPR_SIGNED (_GL_INT_CONVERT (a, b)) || (tmax) < (b)) \ + && (tmax) < (a) + (b))) \ + : (tmax) < (b) || (tmax) - (b) < (a)) +#define _GL_INT_SUBTRACT_RANGE_OVERFLOW(a, b, tmin, tmax) \ + (((a) < 0) == ((b) < 0) \ + ? ((a) < (b) \ + ? !(tmin) || -1 - (tmin) < (b) - (a) - 1 \ + : (tmax) < (a) - (b)) \ + : (a) < 0 \ + ? ((!EXPR_SIGNED (_GL_INT_CONVERT ((a) - (tmin), b)) && (a) - (tmin) < 0) \ + || (a) - (tmin) < (b)) \ + : ((! (EXPR_SIGNED (_GL_INT_CONVERT (tmax, b)) \ + && EXPR_SIGNED (_GL_INT_CONVERT ((tmax) + (b), a))) \ + && (tmax) <= -1 - (b)) \ + || (tmax) + (b) < (a))) +#define _GL_INT_MULTIPLY_RANGE_OVERFLOW(a, b, tmin, tmax) \ + ((b) < 0 \ + ? ((a) < 0 \ + ? (EXPR_SIGNED (_GL_INT_CONVERT (tmax, b)) \ + ? (a) < (tmax) / (b) \ + : ((INT_NEGATE_OVERFLOW (b) \ + ? _GL_INT_CONVERT (b, tmax) >> (TYPE_WIDTH (b) - 1) \ + : (tmax) / -(b)) \ + <= -1 - (a))) \ + : INT_NEGATE_OVERFLOW (_GL_INT_CONVERT (b, tmin)) && (b) == -1 \ + ? (EXPR_SIGNED (a) \ + ? 0 < (a) + (tmin) \ + : 0 < (a) && -1 - (tmin) < (a) - 1) \ + : (tmin) / (b) < (a)) \ + : (b) == 0 \ + ? 0 \ + : ((a) < 0 \ + ? (INT_NEGATE_OVERFLOW (_GL_INT_CONVERT (a, tmin)) && (a) == -1 \ + ? (EXPR_SIGNED (b) ? 0 < (b) + (tmin) : -1 - (tmin) < (b) - 1) \ + : (tmin) / (a) < (b)) \ + : (tmax) / (b) < (a))) #endif /* _GL_INTPROPS_H */ diff --git a/contrib/grep/lib/isatty.c b/contrib/grep/lib/isatty.c deleted file mode 100644 index f7b552bfc4..0000000000 --- a/contrib/grep/lib/isatty.c +++ /dev/null @@ -1,83 +0,0 @@ -/* isatty() replacement. - Copyright (C) 2012-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -/* This replacement is enabled on native Windows. */ - -#include - -/* Get declarations of the Win32 API functions. */ -#define WIN32_LEAN_AND_MEAN -#include - -#include "msvc-inval.h" - -/* Get _get_osfhandle(). */ -#include "msvc-nothrow.h" - -static BOOL IsConsoleHandle (HANDLE h) -{ - DWORD mode; - return GetConsoleMode (h, &mode) != 0; -} - -#if HAVE_MSVC_INVALID_PARAMETER_HANDLER -static int -_isatty_nothrow (int fd) -{ - int result; - - TRY_MSVC_INVAL - { - result = _isatty (fd); - } - CATCH_MSVC_INVAL - { - result = 0; - } - DONE_MSVC_INVAL; - - return result; -} -#else -# define _isatty_nothrow _isatty -#endif - -/* Determine whether FD refers to a console device. Return 1 if yes. - Return 0 and set errno if no. (ptsname_r relies on the errno value.) */ -int -isatty (int fd) -{ - HANDLE h = (HANDLE) _get_osfhandle (fd); - if (h == INVALID_HANDLE_VALUE) - { - errno = EBADF; - return 0; - } - /* _isatty (fd) tests whether GetFileType of the handle is FILE_TYPE_CHAR. - But it does not set errno when it returns 0. */ - if (_isatty_nothrow (fd)) - { - if (IsConsoleHandle (h)) - return 1; - } - errno = ENOTTY; - return 0; -} diff --git a/contrib/grep/lib/isblank.c b/contrib/grep/lib/isblank.c deleted file mode 100644 index ac871ef1e0..0000000000 --- a/contrib/grep/lib/isblank.c +++ /dev/null @@ -1,33 +0,0 @@ -/* Test whether a character is a blank. - - Copyright (C) 2009-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -int -isblank (int c) -{ - /* On all known platforms, in all predefined locales, isblank(c) is likely - equivalent with (c == ' ' || c == '\t'). Look at the glibc definition - (in glibc/localedata/locales/i18n): The "blank" characters are '\t', ' ', - U+1680, U+180E, U+2000..U+2006, U+2008..U+200A, U+205F, U+3000, and none - except the first two is present in a common 8-bit encoding. Therefore - the substitute for other platforms is not more complicated than this. */ - return (c == ' ' || c == '\t'); -} diff --git a/contrib/grep/lib/iswctype-impl.h b/contrib/grep/lib/iswctype-impl.h deleted file mode 100644 index 3ab94b0f72..0000000000 --- a/contrib/grep/lib/iswctype-impl.h +++ /dev/null @@ -1,22 +0,0 @@ -/* Test whether a wide character has a given property. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2011. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -int -iswctype (wint_t wc, wctype_t desc) -{ - return ((int (*) (wint_t)) desc) (wc); -} diff --git a/contrib/grep/lib/iswctype.c b/contrib/grep/lib/iswctype.c deleted file mode 100644 index 65d585d33e..0000000000 --- a/contrib/grep/lib/iswctype.c +++ /dev/null @@ -1,23 +0,0 @@ -/* Test whether a wide character has a given property. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2011. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#include "iswctype-impl.h" diff --git a/contrib/grep/lib/libc-config.h b/contrib/grep/lib/libc-config.h new file mode 100644 index 0000000000..aef1f79324 --- /dev/null +++ b/contrib/grep/lib/libc-config.h @@ -0,0 +1,174 @@ +/* System definitions for code taken from the GNU C Library + + Copyright 2017-2020 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this program; if not, see + . */ + +/* Written by Paul Eggert. */ + +/* This is intended to be a good-enough substitute for glibc system + macros like those defined in , so that Gnulib code + shared with glibc can do this as the first #include: + + #ifndef _LIBC + # include + #endif + + When compiled as part of glibc this is a no-op; when compiled as + part of Gnulib this includes Gnulib's and defines macros + that glibc library code would normally assume. */ + +#include + +/* On glibc this includes and and #defines + _FEATURES_H, __WORDSIZE, and __set_errno. On FreeBSD 11 it + includes which defines __nonnull. Elsewhere it + is harmless. */ +#include + +/* From glibc . */ +#ifndef __set_errno +# define __set_errno(val) (errno = (val)) +#endif + +/* From glibc . */ + +#ifndef __GNUC_PREREQ +# if defined __GNUC__ && defined __GNUC_MINOR__ +# define __GNUC_PREREQ(maj, min) ((maj) < __GNUC__ + ((min) <= __GNUC_MINOR__)) +# else +# define __GNUC_PREREQ(maj, min) 0 +# endif +#endif + +#ifndef __glibc_clang_prereq +# if defined __clang_major__ && defined __clang_minor__ +# define __glibc_clang_prereq(maj, min) \ + ((maj) < __clang_major__ + ((min) <= __clang_minor__)) +# else +# define __glibc_clang_prereq(maj, min) 0 +# endif +#endif + + +/* Prepare to include , which is our copy of glibc + . */ + +/* Define _FEATURES_H so that does not include . */ +#ifndef _FEATURES_H +# define _FEATURES_H 1 +#endif +/* Define __WORDSIZE so that does not attempt to include + nonexistent files. Make it a syntax error, since Gnulib does not + use __WORDSIZE now, and if Gnulib uses it later the syntax error + will let us know that __WORDSIZE needs configuring. */ +#ifndef __WORDSIZE +# define __WORDSIZE %%% +#endif +/* Undef the macros unconditionally defined by our copy of glibc + , so that they do not clash with any system-defined + versions. */ +#undef _SYS_CDEFS_H +#undef __ASMNAME +#undef __ASMNAME2 +#undef __BEGIN_DECLS +#undef __CONCAT +#undef __END_DECLS +#undef __HAVE_GENERIC_SELECTION +#undef __LDBL_COMPAT +#undef __LDBL_REDIR +#undef __LDBL_REDIR1 +#undef __LDBL_REDIR1_DECL +#undef __LDBL_REDIR1_NTH +#undef __LDBL_REDIR_DECL +#undef __LDBL_REDIR_NTH +#undef __LEAF +#undef __LEAF_ATTR +#undef __NTH +#undef __NTHNL +#undef __P +#undef __PMT +#undef __REDIRECT +#undef __REDIRECT_LDBL +#undef __REDIRECT_NTH +#undef __REDIRECT_NTHNL +#undef __REDIRECT_NTH_LDBL +#undef __STRING +#undef __THROW +#undef __THROWNL +#undef __always_inline +#undef __attribute__ +#undef __attribute_alloc_size__ +#undef __attribute_artificial__ +#undef __attribute_const__ +#undef __attribute_deprecated__ +#undef __attribute_deprecated_msg__ +#undef __attribute_format_arg__ +#undef __attribute_format_strfmon__ +#undef __attribute_malloc__ +#undef __attribute_noinline__ +#undef __attribute_nonstring__ +#undef __attribute_pure__ +#undef __attribute_used__ +#undef __attribute_warn_unused_result__ +#undef __bos +#undef __bos0 +#undef __errordecl +#undef __extension__ +#undef __extern_always_inline +#undef __extern_inline +#undef __flexarr +#undef __fortify_function +#undef __glibc_c99_flexarr_available +#undef __glibc_clang_has_extension +#undef __glibc_likely +#undef __glibc_macro_warning +#undef __glibc_macro_warning1 +#undef __glibc_unlikely +#undef __inline +#undef __ptr_t +#undef __restrict +#undef __restrict_arr +#undef __va_arg_pack +#undef __va_arg_pack_len +#undef __warnattr +#undef __warndecl + +/* Include our copy of glibc . */ +#include + +/* __inline is too pessimistic for non-GCC. */ +#undef __inline +#ifndef HAVE___INLINE +# if 199901 <= __STDC_VERSION__ || defined inline +# define __inline inline +# else +# define __inline +# endif +#endif + + +/* A substitute for glibc , good enough for Gnulib. */ +#define attribute_hidden +#define libc_hidden_proto(name, ...) +#define libc_hidden_def(name) +#define libc_hidden_weak(name) +#define libc_hidden_ver(local, name) +#define strong_alias(name, aliasname) +#define weak_alias(name, aliasname) + +/* A substitute for glibc , good enough for Gnulib. */ +#define SHLIB_COMPAT(lib, introduced, obsoleted) 0 +#define versioned_symbol(lib, local, symbol, version) diff --git a/contrib/grep/lib/localcharset.c b/contrib/grep/lib/localcharset.c index 1f02aa5988..721c8a9d13 100644 --- a/contrib/grep/lib/localcharset.c +++ b/contrib/grep/lib/localcharset.c @@ -1,6 +1,6 @@ /* Determine a canonical name for the current locale's character encoding. - Copyright (C) 2000-2006, 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2000-2006, 2008-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along - with this program; if not, see . */ + with this program; if not, see . */ /* Written by Bruno Haible . */ @@ -22,7 +22,6 @@ /* Specification. */ #include "localcharset.h" -#include #include #include #include @@ -32,7 +31,7 @@ # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */ #endif -#if defined _WIN32 || defined __WIN32__ +#if defined _WIN32 && !defined __CYGWIN__ # define WINDOWS_NATIVE # include #endif @@ -45,11 +44,10 @@ #endif #if !defined WINDOWS_NATIVE -# include # if HAVE_LANGINFO_CODESET # include # else -# if 0 /* see comment below */ +# if 0 /* see comment regarding use of setlocale(), below */ # include # endif # endif @@ -60,6 +58,9 @@ #elif defined WINDOWS_NATIVE # define WIN32_LEAN_AND_MEAN # include + /* For the use of setlocale() below, the Gnulib override in setlocale.c is + not needed; see the platform lists in setlocale_null.m4. */ +# undef setlocale #endif #if defined OS2 # define INCL_DOS @@ -71,318 +72,755 @@ # include #endif -#if ENABLE_RELOCATABLE -# include "relocatable.h" -#else -# define relocate(pathname) (pathname) -#endif -/* Get LIBDIR. */ -#ifndef LIBDIR -# include "configmake.h" -#endif +#if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2 -/* Define O_NOFOLLOW to 0 on platforms where it does not exist. */ -#ifndef O_NOFOLLOW -# define O_NOFOLLOW 0 -#endif +/* On these platforms, we use a mapping from non-canonical encoding name + to GNU canonical encoding name. */ -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ - /* Native Windows, Cygwin, OS/2, DOS */ -# define ISSLASH(C) ((C) == '/' || (C) == '\\') -#endif - -#ifndef DIRECTORY_SEPARATOR -# define DIRECTORY_SEPARATOR '/' -#endif - -#ifndef ISSLASH -# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR) -#endif +/* With glibc-2.1 or newer, we don't need any canonicalization, + because glibc has iconv and both glibc and libiconv support all + GNU canonical names directly. */ +# if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__) -#if HAVE_DECL_GETC_UNLOCKED -# undef getc -# define getc getc_unlocked -#endif - -/* The following static variable is declared 'volatile' to avoid a - possible multithread problem in the function get_charset_aliases. If we - are running in a threaded environment, and if two threads initialize - 'charset_aliases' simultaneously, both will produce the same value, - and everything will be ok if the two assignments to 'charset_aliases' - are atomic. But I don't know what will happen if the two assignments mix. */ -#if __STDC__ != 1 -# define volatile /* empty */ -#endif -/* Pointer to the contents of the charset.alias file, if it has already been - read, else NULL. Its format is: - ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */ -static const char * volatile charset_aliases; - -/* Return a pointer to the contents of the charset.alias file. */ -static const char * -get_charset_aliases (void) +struct table_entry { - const char *cp; - - cp = charset_aliases; - if (cp == NULL) - { -#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__ || defined OS2) - const char *dir; - const char *base = "charset.alias"; - char *file_name; - - /* Make it possible to override the charset.alias location. This is - necessary for running the testsuite before "make install". */ - dir = getenv ("CHARSETALIASDIR"); - if (dir == NULL || dir[0] == '\0') - dir = relocate (LIBDIR); - - /* Concatenate dir and base into freshly allocated file_name. */ - { - size_t dir_len = strlen (dir); - size_t base_len = strlen (base); - int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1])); - file_name = (char *) malloc (dir_len + add_slash + base_len + 1); - if (file_name != NULL) - { - memcpy (file_name, dir, dir_len); - if (add_slash) - file_name[dir_len] = DIRECTORY_SEPARATOR; - memcpy (file_name + dir_len + add_slash, base, base_len + 1); - } - } - - if (file_name == NULL) - /* Out of memory. Treat the file as empty. */ - cp = ""; - else - { - int fd; - - /* Open the file. Reject symbolic links on platforms that support - O_NOFOLLOW. This is a security feature. Without it, an attacker - could retrieve parts of the contents (namely, the tail of the - first line that starts with "* ") of an arbitrary file by placing - a symbolic link to that file under the name "charset.alias" in - some writable directory and defining the environment variable - CHARSETALIASDIR to point to that directory. */ - fd = open (file_name, - O_RDONLY | (HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0)); - if (fd < 0) - /* File not found. Treat it as empty. */ - cp = ""; - else - { - FILE *fp; - - fp = fdopen (fd, "r"); - if (fp == NULL) - { - /* Out of memory. Treat the file as empty. */ - close (fd); - cp = ""; - } - else - { - /* Parse the file's contents. */ - char *res_ptr = NULL; - size_t res_size = 0; - - for (;;) - { - int c; - char buf1[50+1]; - char buf2[50+1]; - size_t l1, l2; - char *old_res_ptr; - - c = getc (fp); - if (c == EOF) - break; - if (c == '\n' || c == ' ' || c == '\t') - continue; - if (c == '#') - { - /* Skip comment, to end of line. */ - do - c = getc (fp); - while (!(c == EOF || c == '\n')); - if (c == EOF) - break; - continue; - } - ungetc (c, fp); - if (fscanf (fp, "%50s %50s", buf1, buf2) < 2) - break; - l1 = strlen (buf1); - l2 = strlen (buf2); - old_res_ptr = res_ptr; - if (res_size == 0) - { - res_size = l1 + 1 + l2 + 1; - res_ptr = (char *) malloc (res_size + 1); - } - else - { - res_size += l1 + 1 + l2 + 1; - res_ptr = (char *) realloc (res_ptr, res_size + 1); - } - if (res_ptr == NULL) - { - /* Out of memory. */ - res_size = 0; - free (old_res_ptr); - break; - } - strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); - strcpy (res_ptr + res_size - (l2 + 1), buf2); - } - fclose (fp); - if (res_size == 0) - cp = ""; - else - { - *(res_ptr + res_size) = '\0'; - cp = res_ptr; - } - } - } + const char alias[11+1]; + const char canonical[11+1]; +}; + +/* Table of platform-dependent mappings, sorted in ascending order. */ +static const struct table_entry alias_table[] = + { +# if defined __FreeBSD__ /* FreeBSD */ + /*{ "ARMSCII-8", "ARMSCII-8" },*/ + { "Big5", "BIG5" }, + { "C", "ASCII" }, + /*{ "CP1131", "CP1131" },*/ + /*{ "CP1251", "CP1251" },*/ + /*{ "CP866", "CP866" },*/ + /*{ "GB18030", "GB18030" },*/ + /*{ "GB2312", "GB2312" },*/ + /*{ "GBK", "GBK" },*/ + /*{ "ISCII-DEV", "?" },*/ + { "ISO8859-1", "ISO-8859-1" }, + { "ISO8859-13", "ISO-8859-13" }, + { "ISO8859-15", "ISO-8859-15" }, + { "ISO8859-2", "ISO-8859-2" }, + { "ISO8859-5", "ISO-8859-5" }, + { "ISO8859-7", "ISO-8859-7" }, + { "ISO8859-9", "ISO-8859-9" }, + /*{ "KOI8-R", "KOI8-R" },*/ + /*{ "KOI8-U", "KOI8-U" },*/ + { "SJIS", "SHIFT_JIS" }, + { "US-ASCII", "ASCII" }, + { "eucCN", "GB2312" }, + { "eucJP", "EUC-JP" }, + { "eucKR", "EUC-KR" } +# define alias_table_defined +# endif +# if defined __NetBSD__ /* NetBSD */ + { "646", "ASCII" }, + /*{ "ARMSCII-8", "ARMSCII-8" },*/ + /*{ "BIG5", "BIG5" },*/ + { "Big5-HKSCS", "BIG5-HKSCS" }, + /*{ "CP1251", "CP1251" },*/ + /*{ "CP866", "CP866" },*/ + /*{ "GB18030", "GB18030" },*/ + /*{ "GB2312", "GB2312" },*/ + { "ISO8859-1", "ISO-8859-1" }, + { "ISO8859-13", "ISO-8859-13" }, + { "ISO8859-15", "ISO-8859-15" }, + { "ISO8859-2", "ISO-8859-2" }, + { "ISO8859-4", "ISO-8859-4" }, + { "ISO8859-5", "ISO-8859-5" }, + { "ISO8859-7", "ISO-8859-7" }, + /*{ "KOI8-R", "KOI8-R" },*/ + /*{ "KOI8-U", "KOI8-U" },*/ + /*{ "PT154", "PT154" },*/ + { "SJIS", "SHIFT_JIS" }, + { "eucCN", "GB2312" }, + { "eucJP", "EUC-JP" }, + { "eucKR", "EUC-KR" }, + { "eucTW", "EUC-TW" } +# define alias_table_defined +# endif +# if defined __OpenBSD__ /* OpenBSD */ + { "646", "ASCII" }, + { "ISO8859-1", "ISO-8859-1" }, + { "ISO8859-13", "ISO-8859-13" }, + { "ISO8859-15", "ISO-8859-15" }, + { "ISO8859-2", "ISO-8859-2" }, + { "ISO8859-4", "ISO-8859-4" }, + { "ISO8859-5", "ISO-8859-5" }, + { "ISO8859-7", "ISO-8859-7" }, + { "US-ASCII", "ASCII" } +# define alias_table_defined +# endif +# if defined __APPLE__ && defined __MACH__ /* Mac OS X */ + /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is + useless: + - It returns the empty string when LANG is set to a locale of the + form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8 + LC_CTYPE file. + - The environment variables LANG, LC_CTYPE, LC_ALL are not set by + the system; nl_langinfo(CODESET) returns "US-ASCII" in this case. + - The documentation says: + "... all code that calls BSD system routines should ensure + that the const *char parameters of these routines are in UTF-8 + encoding. All BSD system functions expect their string + parameters to be in UTF-8 encoding and nothing else." + It also says + "An additional caveat is that string parameters for files, + paths, and other file-system entities must be in canonical + UTF-8. In a canonical UTF-8 Unicode string, all decomposable + characters are decomposed ..." + but this is not true: You can pass non-decomposed UTF-8 strings + to file system functions, and it is the OS which will convert + them to decomposed UTF-8 before accessing the file system. + - The Apple Terminal application displays UTF-8 by default. + - However, other applications are free to use different encodings: + - xterm uses ISO-8859-1 by default. + - TextEdit uses MacRoman by default. + We prefer UTF-8 over decomposed UTF-8-MAC because one should + minimize the use of decomposed Unicode. Unfortunately, through the + Darwin file system, decomposed UTF-8 strings are leaked into user + space nevertheless. + Then there are also the locales with encodings other than US-ASCII + and UTF-8. These locales can be occasionally useful to users (e.g. + when grepping through ISO-8859-1 encoded text files), when all their + file names are in US-ASCII. + */ + { "ARMSCII-8", "ARMSCII-8" }, + { "Big5", "BIG5" }, + { "Big5HKSCS", "BIG5-HKSCS" }, + { "CP1131", "CP1131" }, + { "CP1251", "CP1251" }, + { "CP866", "CP866" }, + { "CP949", "CP949" }, + { "GB18030", "GB18030" }, + { "GB2312", "GB2312" }, + { "GBK", "GBK" }, + /*{ "ISCII-DEV", "?" },*/ + { "ISO8859-1", "ISO-8859-1" }, + { "ISO8859-13", "ISO-8859-13" }, + { "ISO8859-15", "ISO-8859-15" }, + { "ISO8859-2", "ISO-8859-2" }, + { "ISO8859-4", "ISO-8859-4" }, + { "ISO8859-5", "ISO-8859-5" }, + { "ISO8859-7", "ISO-8859-7" }, + { "ISO8859-9", "ISO-8859-9" }, + { "KOI8-R", "KOI8-R" }, + { "KOI8-U", "KOI8-U" }, + { "PT154", "PT154" }, + { "SJIS", "SHIFT_JIS" }, + { "eucCN", "GB2312" }, + { "eucJP", "EUC-JP" }, + { "eucKR", "EUC-KR" } +# define alias_table_defined +# endif +# if defined _AIX /* AIX */ + /*{ "GBK", "GBK" },*/ + { "IBM-1046", "CP1046" }, + { "IBM-1124", "CP1124" }, + { "IBM-1129", "CP1129" }, + { "IBM-1252", "CP1252" }, + { "IBM-850", "CP850" }, + { "IBM-856", "CP856" }, + { "IBM-921", "ISO-8859-13" }, + { "IBM-922", "CP922" }, + { "IBM-932", "CP932" }, + { "IBM-943", "CP943" }, + { "IBM-eucCN", "GB2312" }, + { "IBM-eucJP", "EUC-JP" }, + { "IBM-eucKR", "EUC-KR" }, + { "IBM-eucTW", "EUC-TW" }, + { "ISO8859-1", "ISO-8859-1" }, + { "ISO8859-15", "ISO-8859-15" }, + { "ISO8859-2", "ISO-8859-2" }, + { "ISO8859-5", "ISO-8859-5" }, + { "ISO8859-6", "ISO-8859-6" }, + { "ISO8859-7", "ISO-8859-7" }, + { "ISO8859-8", "ISO-8859-8" }, + { "ISO8859-9", "ISO-8859-9" }, + { "TIS-620", "TIS-620" }, + /*{ "UTF-8", "UTF-8" },*/ + { "big5", "BIG5" } +# define alias_table_defined +# endif +# if defined __hpux /* HP-UX */ + { "SJIS", "SHIFT_JIS" }, + { "arabic8", "HP-ARABIC8" }, + { "big5", "BIG5" }, + { "cp1251", "CP1251" }, + { "eucJP", "EUC-JP" }, + { "eucKR", "EUC-KR" }, + { "eucTW", "EUC-TW" }, + { "gb18030", "GB18030" }, + { "greek8", "HP-GREEK8" }, + { "hebrew8", "HP-HEBREW8" }, + { "hkbig5", "BIG5-HKSCS" }, + { "hp15CN", "GB2312" }, + { "iso88591", "ISO-8859-1" }, + { "iso885913", "ISO-8859-13" }, + { "iso885915", "ISO-8859-15" }, + { "iso88592", "ISO-8859-2" }, + { "iso88594", "ISO-8859-4" }, + { "iso88595", "ISO-8859-5" }, + { "iso88596", "ISO-8859-6" }, + { "iso88597", "ISO-8859-7" }, + { "iso88598", "ISO-8859-8" }, + { "iso88599", "ISO-8859-9" }, + { "kana8", "HP-KANA8" }, + { "koi8r", "KOI8-R" }, + { "roman8", "HP-ROMAN8" }, + { "tis620", "TIS-620" }, + { "turkish8", "HP-TURKISH8" }, + { "utf8", "UTF-8" } +# define alias_table_defined +# endif +# if defined __sgi /* IRIX */ + { "ISO8859-1", "ISO-8859-1" }, + { "ISO8859-15", "ISO-8859-15" }, + { "ISO8859-2", "ISO-8859-2" }, + { "ISO8859-5", "ISO-8859-5" }, + { "ISO8859-7", "ISO-8859-7" }, + { "ISO8859-9", "ISO-8859-9" }, + { "eucCN", "GB2312" }, + { "eucJP", "EUC-JP" }, + { "eucKR", "EUC-KR" }, + { "eucTW", "EUC-TW" } +# define alias_table_defined +# endif +# if defined __osf__ /* OSF/1 */ + /*{ "GBK", "GBK" },*/ + { "ISO8859-1", "ISO-8859-1" }, + { "ISO8859-15", "ISO-8859-15" }, + { "ISO8859-2", "ISO-8859-2" }, + { "ISO8859-4", "ISO-8859-4" }, + { "ISO8859-5", "ISO-8859-5" }, + { "ISO8859-7", "ISO-8859-7" }, + { "ISO8859-8", "ISO-8859-8" }, + { "ISO8859-9", "ISO-8859-9" }, + { "KSC5601", "CP949" }, + { "SJIS", "SHIFT_JIS" }, + { "TACTIS", "TIS-620" }, + /*{ "UTF-8", "UTF-8" },*/ + { "big5", "BIG5" }, + { "cp850", "CP850" }, + { "dechanyu", "DEC-HANYU" }, + { "dechanzi", "GB2312" }, + { "deckanji", "DEC-KANJI" }, + { "deckorean", "EUC-KR" }, + { "eucJP", "EUC-JP" }, + { "eucKR", "EUC-KR" }, + { "eucTW", "EUC-TW" }, + { "sdeckanji", "EUC-JP" } +# define alias_table_defined +# endif +# if defined __sun /* Solaris */ + { "5601", "EUC-KR" }, + { "646", "ASCII" }, + /*{ "BIG5", "BIG5" },*/ + { "Big5-HKSCS", "BIG5-HKSCS" }, + { "GB18030", "GB18030" }, + /*{ "GBK", "GBK" },*/ + { "ISO8859-1", "ISO-8859-1" }, + { "ISO8859-11", "TIS-620" }, + { "ISO8859-13", "ISO-8859-13" }, + { "ISO8859-15", "ISO-8859-15" }, + { "ISO8859-2", "ISO-8859-2" }, + { "ISO8859-3", "ISO-8859-3" }, + { "ISO8859-4", "ISO-8859-4" }, + { "ISO8859-5", "ISO-8859-5" }, + { "ISO8859-6", "ISO-8859-6" }, + { "ISO8859-7", "ISO-8859-7" }, + { "ISO8859-8", "ISO-8859-8" }, + { "ISO8859-9", "ISO-8859-9" }, + { "PCK", "SHIFT_JIS" }, + { "TIS620.2533", "TIS-620" }, + /*{ "UTF-8", "UTF-8" },*/ + { "ansi-1251", "CP1251" }, + { "cns11643", "EUC-TW" }, + { "eucJP", "EUC-JP" }, + { "gb2312", "GB2312" }, + { "koi8-r", "KOI8-R" } +# define alias_table_defined +# endif +# if defined __minix /* Minix */ + { "646", "ASCII" } +# define alias_table_defined +# endif +# if defined WINDOWS_NATIVE || defined __CYGWIN__ /* Windows */ + { "CP1361", "JOHAB" }, + { "CP20127", "ASCII" }, + { "CP20866", "KOI8-R" }, + { "CP20936", "GB2312" }, + { "CP21866", "KOI8-RU" }, + { "CP28591", "ISO-8859-1" }, + { "CP28592", "ISO-8859-2" }, + { "CP28593", "ISO-8859-3" }, + { "CP28594", "ISO-8859-4" }, + { "CP28595", "ISO-8859-5" }, + { "CP28596", "ISO-8859-6" }, + { "CP28597", "ISO-8859-7" }, + { "CP28598", "ISO-8859-8" }, + { "CP28599", "ISO-8859-9" }, + { "CP28605", "ISO-8859-15" }, + { "CP38598", "ISO-8859-8" }, + { "CP51932", "EUC-JP" }, + { "CP51936", "GB2312" }, + { "CP51949", "EUC-KR" }, + { "CP51950", "EUC-TW" }, + { "CP54936", "GB18030" }, + { "CP65001", "UTF-8" }, + { "CP936", "GBK" } +# define alias_table_defined +# endif +# if defined OS2 /* OS/2 */ + /* The list of encodings is taken from "List of OS/2 Codepages" + by Alex Taylor: + . + See also "__convcp() of kLIBC": + . */ + { "CP1004", "CP1252" }, + /*{ "CP1041", "CP943" },*/ + /*{ "CP1088", "CP949" },*/ + { "CP1089", "ISO-8859-6" }, + /*{ "CP1114", "CP950" },*/ + /*{ "CP1115", "GB2312" },*/ + { "CP1208", "UTF-8" }, + /*{ "CP1380", "GB2312" },*/ + { "CP1381", "GB2312" }, + { "CP1383", "GB2312" }, + { "CP1386", "GBK" }, + /*{ "CP301", "CP943" },*/ + { "CP3372", "EUC-JP" }, + { "CP4946", "CP850" }, + /*{ "CP5048", "JIS_X0208-1990" },*/ + /*{ "CP5049", "JIS_X0212-1990" },*/ + /*{ "CP5067", "KS_C_5601-1987" },*/ + { "CP813", "ISO-8859-7" }, + { "CP819", "ISO-8859-1" }, + { "CP878", "KOI8-R" }, + /*{ "CP897", "CP943" },*/ + { "CP912", "ISO-8859-2" }, + { "CP913", "ISO-8859-3" }, + { "CP914", "ISO-8859-4" }, + { "CP915", "ISO-8859-5" }, + { "CP916", "ISO-8859-8" }, + { "CP920", "ISO-8859-9" }, + { "CP921", "ISO-8859-13" }, + { "CP923", "ISO-8859-15" }, + /*{ "CP941", "CP943" },*/ + /*{ "CP947", "CP950" },*/ + /*{ "CP951", "CP949" },*/ + /*{ "CP952", "JIS_X0208-1990" },*/ + /*{ "CP953", "JIS_X0212-1990" },*/ + { "CP954", "EUC-JP" }, + { "CP964", "EUC-TW" }, + { "CP970", "EUC-KR" }, + /*{ "CP971", "KS_C_5601-1987" },*/ + { "IBM-1004", "CP1252" }, + /*{ "IBM-1006", "?" },*/ + /*{ "IBM-1008", "?" },*/ + /*{ "IBM-1041", "CP943" },*/ + /*{ "IBM-1051", "?" },*/ + /*{ "IBM-1088", "CP949" },*/ + { "IBM-1089", "ISO-8859-6" }, + /*{ "IBM-1098", "?" },*/ + /*{ "IBM-1114", "CP950" },*/ + /*{ "IBM-1115", "GB2312" },*/ + /*{ "IBM-1116", "?" },*/ + /*{ "IBM-1117", "?" },*/ + /*{ "IBM-1118", "?" },*/ + /*{ "IBM-1119", "?" },*/ + { "IBM-1124", "CP1124" }, + { "IBM-1125", "CP1125" }, + { "IBM-1131", "CP1131" }, + { "IBM-1208", "UTF-8" }, + { "IBM-1250", "CP1250" }, + { "IBM-1251", "CP1251" }, + { "IBM-1252", "CP1252" }, + { "IBM-1253", "CP1253" }, + { "IBM-1254", "CP1254" }, + { "IBM-1255", "CP1255" }, + { "IBM-1256", "CP1256" }, + { "IBM-1257", "CP1257" }, + /*{ "IBM-1275", "?" },*/ + /*{ "IBM-1276", "?" },*/ + /*{ "IBM-1277", "?" },*/ + /*{ "IBM-1280", "?" },*/ + /*{ "IBM-1281", "?" },*/ + /*{ "IBM-1282", "?" },*/ + /*{ "IBM-1283", "?" },*/ + /*{ "IBM-1380", "GB2312" },*/ + { "IBM-1381", "GB2312" }, + { "IBM-1383", "GB2312" }, + { "IBM-1386", "GBK" }, + /*{ "IBM-301", "CP943" },*/ + { "IBM-3372", "EUC-JP" }, + { "IBM-367", "ASCII" }, + { "IBM-437", "CP437" }, + { "IBM-4946", "CP850" }, + /*{ "IBM-5048", "JIS_X0208-1990" },*/ + /*{ "IBM-5049", "JIS_X0212-1990" },*/ + /*{ "IBM-5067", "KS_C_5601-1987" },*/ + { "IBM-813", "ISO-8859-7" }, + { "IBM-819", "ISO-8859-1" }, + { "IBM-850", "CP850" }, + /*{ "IBM-851", "?" },*/ + { "IBM-852", "CP852" }, + { "IBM-855", "CP855" }, + { "IBM-856", "CP856" }, + { "IBM-857", "CP857" }, + /*{ "IBM-859", "?" },*/ + { "IBM-860", "CP860" }, + { "IBM-861", "CP861" }, + { "IBM-862", "CP862" }, + { "IBM-863", "CP863" }, + { "IBM-864", "CP864" }, + { "IBM-865", "CP865" }, + { "IBM-866", "CP866" }, + /*{ "IBM-868", "?" },*/ + { "IBM-869", "CP869" }, + { "IBM-874", "CP874" }, + { "IBM-878", "KOI8-R" }, + /*{ "IBM-895", "?" },*/ + /*{ "IBM-897", "CP943" },*/ + /*{ "IBM-907", "?" },*/ + /*{ "IBM-909", "?" },*/ + { "IBM-912", "ISO-8859-2" }, + { "IBM-913", "ISO-8859-3" }, + { "IBM-914", "ISO-8859-4" }, + { "IBM-915", "ISO-8859-5" }, + { "IBM-916", "ISO-8859-8" }, + { "IBM-920", "ISO-8859-9" }, + { "IBM-921", "ISO-8859-13" }, + { "IBM-922", "CP922" }, + { "IBM-923", "ISO-8859-15" }, + { "IBM-932", "CP932" }, + /*{ "IBM-941", "CP943" },*/ + /*{ "IBM-942", "?" },*/ + { "IBM-943", "CP943" }, + /*{ "IBM-947", "CP950" },*/ + { "IBM-949", "CP949" }, + { "IBM-950", "CP950" }, + /*{ "IBM-951", "CP949" },*/ + /*{ "IBM-952", "JIS_X0208-1990" },*/ + /*{ "IBM-953", "JIS_X0212-1990" },*/ + { "IBM-954", "EUC-JP" }, + /*{ "IBM-955", "?" },*/ + { "IBM-964", "EUC-TW" }, + { "IBM-970", "EUC-KR" }, + /*{ "IBM-971", "KS_C_5601-1987" },*/ + { "IBM-eucCN", "GB2312" }, + { "IBM-eucJP", "EUC-JP" }, + { "IBM-eucKR", "EUC-KR" }, + { "IBM-eucTW", "EUC-TW" }, + { "IBM33722", "EUC-JP" }, + { "ISO8859-1", "ISO-8859-1" }, + { "ISO8859-2", "ISO-8859-2" }, + { "ISO8859-3", "ISO-8859-3" }, + { "ISO8859-4", "ISO-8859-4" }, + { "ISO8859-5", "ISO-8859-5" }, + { "ISO8859-6", "ISO-8859-6" }, + { "ISO8859-7", "ISO-8859-7" }, + { "ISO8859-8", "ISO-8859-8" }, + { "ISO8859-9", "ISO-8859-9" }, + /*{ "JISX0201-1976", "JISX0201-1976" },*/ + /*{ "JISX0208-1978", "?" },*/ + /*{ "JISX0208-1983", "JIS_X0208-1983" },*/ + /*{ "JISX0208-1990", "JIS_X0208-1990" },*/ + /*{ "JISX0212-1990", "JIS_X0212-1990" },*/ + /*{ "KSC5601-1987", "KS_C_5601-1987" },*/ + { "SJIS-1", "CP943" }, + { "SJIS-2", "CP943" }, + { "eucJP", "EUC-JP" }, + { "eucKR", "EUC-KR" }, + { "eucTW-1993", "EUC-TW" } +# define alias_table_defined +# endif +# if defined VMS /* OpenVMS */ + /* The list of encodings is taken from the OpenVMS 7.3-1 documentation + "Compaq C Run-Time Library Reference Manual for OpenVMS systems" + section 10.7 "Handling Different Character Sets". */ + { "DECHANYU", "DEC-HANYU" }, + { "DECHANZI", "GB2312" }, + { "DECKANJI", "DEC-KANJI" }, + { "DECKOREAN", "EUC-KR" }, + { "ISO8859-1", "ISO-8859-1" }, + { "ISO8859-2", "ISO-8859-2" }, + { "ISO8859-5", "ISO-8859-5" }, + { "ISO8859-7", "ISO-8859-7" }, + { "ISO8859-8", "ISO-8859-8" }, + { "ISO8859-9", "ISO-8859-9" }, + { "SDECKANJI", "EUC-JP" }, + { "SJIS", "SHIFT_JIS" }, + { "eucJP", "EUC-JP" }, + { "eucTW", "EUC-TW" } +# define alias_table_defined +# endif +# ifndef alias_table_defined + /* Just a dummy entry, to avoid a C syntax error. */ + { "", "" } +# endif + }; - free (file_name); - } +# endif #else -# if defined DARWIN7 - /* To avoid the trouble of installing a file that is shared by many - GNU packages -- many packaging systems have problems with this --, - simply inline the aliases here. */ - cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" - "ISO8859-2" "\0" "ISO-8859-2" "\0" - "ISO8859-4" "\0" "ISO-8859-4" "\0" - "ISO8859-5" "\0" "ISO-8859-5" "\0" - "ISO8859-7" "\0" "ISO-8859-7" "\0" - "ISO8859-9" "\0" "ISO-8859-9" "\0" - "ISO8859-13" "\0" "ISO-8859-13" "\0" - "ISO8859-15" "\0" "ISO-8859-15" "\0" - "KOI8-R" "\0" "KOI8-R" "\0" - "KOI8-U" "\0" "KOI8-U" "\0" - "CP866" "\0" "CP866" "\0" - "CP949" "\0" "CP949" "\0" - "CP1131" "\0" "CP1131" "\0" - "CP1251" "\0" "CP1251" "\0" - "eucCN" "\0" "GB2312" "\0" - "GB2312" "\0" "GB2312" "\0" - "eucJP" "\0" "EUC-JP" "\0" - "eucKR" "\0" "EUC-KR" "\0" - "Big5" "\0" "BIG5" "\0" - "Big5HKSCS" "\0" "BIG5-HKSCS" "\0" - "GBK" "\0" "GBK" "\0" - "GB18030" "\0" "GB18030" "\0" - "SJIS" "\0" "SHIFT_JIS" "\0" - "ARMSCII-8" "\0" "ARMSCII-8" "\0" - "PT154" "\0" "PT154" "\0" - /*"ISCII-DEV" "\0" "?" "\0"*/ - "*" "\0" "UTF-8" "\0"; -# endif +/* On these platforms, we use a mapping from locale name to GNU canonical + encoding name. */ -# if defined VMS - /* To avoid the troubles of an extra file charset.alias_vms in the - sources of many GNU packages, simply inline the aliases here. */ - /* The list of encodings is taken from the OpenVMS 7.3-1 documentation - "Compaq C Run-Time Library Reference Manual for OpenVMS systems" - section 10.7 "Handling Different Character Sets". */ - cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" - "ISO8859-2" "\0" "ISO-8859-2" "\0" - "ISO8859-5" "\0" "ISO-8859-5" "\0" - "ISO8859-7" "\0" "ISO-8859-7" "\0" - "ISO8859-8" "\0" "ISO-8859-8" "\0" - "ISO8859-9" "\0" "ISO-8859-9" "\0" - /* Japanese */ - "eucJP" "\0" "EUC-JP" "\0" - "SJIS" "\0" "SHIFT_JIS" "\0" - "DECKANJI" "\0" "DEC-KANJI" "\0" - "SDECKANJI" "\0" "EUC-JP" "\0" - /* Chinese */ - "eucTW" "\0" "EUC-TW" "\0" - "DECHANYU" "\0" "DEC-HANYU" "\0" - "DECHANZI" "\0" "GB2312" "\0" - /* Korean */ - "DECKOREAN" "\0" "EUC-KR" "\0"; +struct table_entry +{ + const char locale[17+1]; + const char canonical[11+1]; +}; + +/* Table of platform-dependent mappings, sorted in ascending order. */ +static const struct table_entry locale_table[] = + { +# if defined __FreeBSD__ /* FreeBSD 4.2 */ + { "cs_CZ.ISO_8859-2", "ISO-8859-2" }, + { "da_DK.DIS_8859-15", "ISO-8859-15" }, + { "da_DK.ISO_8859-1", "ISO-8859-1" }, + { "de_AT.DIS_8859-15", "ISO-8859-15" }, + { "de_AT.ISO_8859-1", "ISO-8859-1" }, + { "de_CH.DIS_8859-15", "ISO-8859-15" }, + { "de_CH.ISO_8859-1", "ISO-8859-1" }, + { "de_DE.DIS_8859-15", "ISO-8859-15" }, + { "de_DE.ISO_8859-1", "ISO-8859-1" }, + { "en_AU.DIS_8859-15", "ISO-8859-15" }, + { "en_AU.ISO_8859-1", "ISO-8859-1" }, + { "en_CA.DIS_8859-15", "ISO-8859-15" }, + { "en_CA.ISO_8859-1", "ISO-8859-1" }, + { "en_GB.DIS_8859-15", "ISO-8859-15" }, + { "en_GB.ISO_8859-1", "ISO-8859-1" }, + { "en_US.DIS_8859-15", "ISO-8859-15" }, + { "en_US.ISO_8859-1", "ISO-8859-1" }, + { "es_ES.DIS_8859-15", "ISO-8859-15" }, + { "es_ES.ISO_8859-1", "ISO-8859-1" }, + { "fi_FI.DIS_8859-15", "ISO-8859-15" }, + { "fi_FI.ISO_8859-1", "ISO-8859-1" }, + { "fr_BE.DIS_8859-15", "ISO-8859-15" }, + { "fr_BE.ISO_8859-1", "ISO-8859-1" }, + { "fr_CA.DIS_8859-15", "ISO-8859-15" }, + { "fr_CA.ISO_8859-1", "ISO-8859-1" }, + { "fr_CH.DIS_8859-15", "ISO-8859-15" }, + { "fr_CH.ISO_8859-1", "ISO-8859-1" }, + { "fr_FR.DIS_8859-15", "ISO-8859-15" }, + { "fr_FR.ISO_8859-1", "ISO-8859-1" }, + { "hr_HR.ISO_8859-2", "ISO-8859-2" }, + { "hu_HU.ISO_8859-2", "ISO-8859-2" }, + { "is_IS.DIS_8859-15", "ISO-8859-15" }, + { "is_IS.ISO_8859-1", "ISO-8859-1" }, + { "it_CH.DIS_8859-15", "ISO-8859-15" }, + { "it_CH.ISO_8859-1", "ISO-8859-1" }, + { "it_IT.DIS_8859-15", "ISO-8859-15" }, + { "it_IT.ISO_8859-1", "ISO-8859-1" }, + { "ja_JP.EUC", "EUC-JP" }, + { "ja_JP.SJIS", "SHIFT_JIS" }, + { "ja_JP.Shift_JIS", "SHIFT_JIS" }, + { "ko_KR.EUC", "EUC-KR" }, + { "la_LN.ASCII", "ASCII" }, + { "la_LN.DIS_8859-15", "ISO-8859-15" }, + { "la_LN.ISO_8859-1", "ISO-8859-1" }, + { "la_LN.ISO_8859-2", "ISO-8859-2" }, + { "la_LN.ISO_8859-4", "ISO-8859-4" }, + { "lt_LN.ASCII", "ASCII" }, + { "lt_LN.DIS_8859-15", "ISO-8859-15" }, + { "lt_LN.ISO_8859-1", "ISO-8859-1" }, + { "lt_LN.ISO_8859-2", "ISO-8859-2" }, + { "lt_LT.ISO_8859-4", "ISO-8859-4" }, + { "nl_BE.DIS_8859-15", "ISO-8859-15" }, + { "nl_BE.ISO_8859-1", "ISO-8859-1" }, + { "nl_NL.DIS_8859-15", "ISO-8859-15" }, + { "nl_NL.ISO_8859-1", "ISO-8859-1" }, + { "no_NO.DIS_8859-15", "ISO-8859-15" }, + { "no_NO.ISO_8859-1", "ISO-8859-1" }, + { "pl_PL.ISO_8859-2", "ISO-8859-2" }, + { "pt_PT.DIS_8859-15", "ISO-8859-15" }, + { "pt_PT.ISO_8859-1", "ISO-8859-1" }, + { "ru_RU.CP866", "CP866" }, + { "ru_RU.ISO_8859-5", "ISO-8859-5" }, + { "ru_RU.KOI8-R", "KOI8-R" }, + { "ru_SU.CP866", "CP866" }, + { "ru_SU.ISO_8859-5", "ISO-8859-5" }, + { "ru_SU.KOI8-R", "KOI8-R" }, + { "sl_SI.ISO_8859-2", "ISO-8859-2" }, + { "sv_SE.DIS_8859-15", "ISO-8859-15" }, + { "sv_SE.ISO_8859-1", "ISO-8859-1" }, + { "uk_UA.KOI8-U", "KOI8-U" }, + { "zh_CN.EUC", "GB2312" }, + { "zh_TW.BIG5", "BIG5" }, + { "zh_TW.Big5", "BIG5" } +# define locale_table_defined # endif - -# if defined WINDOWS_NATIVE || defined __CYGWIN__ - /* To avoid the troubles of installing a separate file in the same - directory as the DLL and of retrieving the DLL's directory at - runtime, simply inline the aliases here. */ - - cp = "CP936" "\0" "GBK" "\0" - "CP1361" "\0" "JOHAB" "\0" - "CP20127" "\0" "ASCII" "\0" - "CP20866" "\0" "KOI8-R" "\0" - "CP20936" "\0" "GB2312" "\0" - "CP21866" "\0" "KOI8-RU" "\0" - "CP28591" "\0" "ISO-8859-1" "\0" - "CP28592" "\0" "ISO-8859-2" "\0" - "CP28593" "\0" "ISO-8859-3" "\0" - "CP28594" "\0" "ISO-8859-4" "\0" - "CP28595" "\0" "ISO-8859-5" "\0" - "CP28596" "\0" "ISO-8859-6" "\0" - "CP28597" "\0" "ISO-8859-7" "\0" - "CP28598" "\0" "ISO-8859-8" "\0" - "CP28599" "\0" "ISO-8859-9" "\0" - "CP28605" "\0" "ISO-8859-15" "\0" - "CP38598" "\0" "ISO-8859-8" "\0" - "CP51932" "\0" "EUC-JP" "\0" - "CP51936" "\0" "GB2312" "\0" - "CP51949" "\0" "EUC-KR" "\0" - "CP51950" "\0" "EUC-TW" "\0" - "CP54936" "\0" "GB18030" "\0" - "CP65001" "\0" "UTF-8" "\0"; +# if defined __DJGPP__ /* DOS / DJGPP 2.03 */ + /* The encodings given here may not all be correct. + If you find that the encoding given for your language and + country is not the one your DOS machine actually uses, just + correct it in this file, and send a mail to + Juan Manuel Guerrero + and . */ + { "C", "ASCII" }, + { "ar", "CP864" }, + { "ar_AE", "CP864" }, + { "ar_DZ", "CP864" }, + { "ar_EG", "CP864" }, + { "ar_IQ", "CP864" }, + { "ar_IR", "CP864" }, + { "ar_JO", "CP864" }, + { "ar_KW", "CP864" }, + { "ar_MA", "CP864" }, + { "ar_OM", "CP864" }, + { "ar_QA", "CP864" }, + { "ar_SA", "CP864" }, + { "ar_SY", "CP864" }, + { "be", "CP866" }, + { "be_BE", "CP866" }, + { "bg", "CP866" }, /* not CP855 ?? */ + { "bg_BG", "CP866" }, /* not CP855 ?? */ + { "ca", "CP850" }, + { "ca_ES", "CP850" }, + { "cs", "CP852" }, + { "cs_CZ", "CP852" }, + { "da", "CP865" }, /* not CP850 ?? */ + { "da_DK", "CP865" }, /* not CP850 ?? */ + { "de", "CP850" }, + { "de_AT", "CP850" }, + { "de_CH", "CP850" }, + { "de_DE", "CP850" }, + { "el", "CP869" }, + { "el_GR", "CP869" }, + { "en", "CP850" }, + { "en_AU", "CP850" }, /* not CP437 ?? */ + { "en_CA", "CP850" }, + { "en_GB", "CP850" }, + { "en_NZ", "CP437" }, + { "en_US", "CP437" }, + { "en_ZA", "CP850" }, /* not CP437 ?? */ + { "eo", "CP850" }, + { "eo_EO", "CP850" }, + { "es", "CP850" }, + { "es_AR", "CP850" }, + { "es_BO", "CP850" }, + { "es_CL", "CP850" }, + { "es_CO", "CP850" }, + { "es_CR", "CP850" }, + { "es_CU", "CP850" }, + { "es_DO", "CP850" }, + { "es_EC", "CP850" }, + { "es_ES", "CP850" }, + { "es_GT", "CP850" }, + { "es_HN", "CP850" }, + { "es_MX", "CP850" }, + { "es_NI", "CP850" }, + { "es_PA", "CP850" }, + { "es_PE", "CP850" }, + { "es_PY", "CP850" }, + { "es_SV", "CP850" }, + { "es_UY", "CP850" }, + { "es_VE", "CP850" }, + { "et", "CP850" }, + { "et_EE", "CP850" }, + { "eu", "CP850" }, + { "eu_ES", "CP850" }, + { "fi", "CP850" }, + { "fi_FI", "CP850" }, + { "fr", "CP850" }, + { "fr_BE", "CP850" }, + { "fr_CA", "CP850" }, + { "fr_CH", "CP850" }, + { "fr_FR", "CP850" }, + { "ga", "CP850" }, + { "ga_IE", "CP850" }, + { "gd", "CP850" }, + { "gd_GB", "CP850" }, + { "gl", "CP850" }, + { "gl_ES", "CP850" }, + { "he", "CP862" }, + { "he_IL", "CP862" }, + { "hr", "CP852" }, + { "hr_HR", "CP852" }, + { "hu", "CP852" }, + { "hu_HU", "CP852" }, + { "id", "CP850" }, /* not CP437 ?? */ + { "id_ID", "CP850" }, /* not CP437 ?? */ + { "is", "CP861" }, /* not CP850 ?? */ + { "is_IS", "CP861" }, /* not CP850 ?? */ + { "it", "CP850" }, + { "it_CH", "CP850" }, + { "it_IT", "CP850" }, + { "ja", "CP932" }, + { "ja_JP", "CP932" }, + { "kr", "CP949" }, /* not CP934 ?? */ + { "kr_KR", "CP949" }, /* not CP934 ?? */ + { "lt", "CP775" }, + { "lt_LT", "CP775" }, + { "lv", "CP775" }, + { "lv_LV", "CP775" }, + { "mk", "CP866" }, /* not CP855 ?? */ + { "mk_MK", "CP866" }, /* not CP855 ?? */ + { "mt", "CP850" }, + { "mt_MT", "CP850" }, + { "nb", "CP865" }, /* not CP850 ?? */ + { "nb_NO", "CP865" }, /* not CP850 ?? */ + { "nl", "CP850" }, + { "nl_BE", "CP850" }, + { "nl_NL", "CP850" }, + { "nn", "CP865" }, /* not CP850 ?? */ + { "nn_NO", "CP865" }, /* not CP850 ?? */ + { "no", "CP865" }, /* not CP850 ?? */ + { "no_NO", "CP865" }, /* not CP850 ?? */ + { "pl", "CP852" }, + { "pl_PL", "CP852" }, + { "pt", "CP850" }, + { "pt_BR", "CP850" }, + { "pt_PT", "CP850" }, + { "ro", "CP852" }, + { "ro_RO", "CP852" }, + { "ru", "CP866" }, + { "ru_RU", "CP866" }, + { "sk", "CP852" }, + { "sk_SK", "CP852" }, + { "sl", "CP852" }, + { "sl_SI", "CP852" }, + { "sq", "CP852" }, + { "sq_AL", "CP852" }, + { "sr", "CP852" }, /* CP852 or CP866 or CP855 ?? */ + { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */ + { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */ + { "sv", "CP850" }, + { "sv_SE", "CP850" }, + { "th", "CP874" }, + { "th_TH", "CP874" }, + { "tr", "CP857" }, + { "tr_TR", "CP857" }, + { "uk", "CP1125" }, + { "uk_UA", "CP1125" }, + { "zh_CN", "GBK" }, + { "zh_TW", "CP950" } /* not CP938 ?? */ +# define locale_table_defined # endif -# if defined OS2 - /* To avoid the troubles of installing a separate file in the same - directory as the DLL and of retrieving the DLL's directory at - runtime, simply inline the aliases here. */ - - /* The list of encodings is taken from "List of OS/2 Codepages" - by Alex Taylor: - . - See also "IBM Globalization - Code page identifiers": - . */ - cp = "CP813" "\0" "ISO-8859-7" "\0" - "CP878" "\0" "KOI8-R" "\0" - "CP819" "\0" "ISO-8859-1" "\0" - "CP912" "\0" "ISO-8859-2" "\0" - "CP913" "\0" "ISO-8859-3" "\0" - "CP914" "\0" "ISO-8859-4" "\0" - "CP915" "\0" "ISO-8859-5" "\0" - "CP916" "\0" "ISO-8859-8" "\0" - "CP920" "\0" "ISO-8859-9" "\0" - "CP921" "\0" "ISO-8859-13" "\0" - "CP923" "\0" "ISO-8859-15" "\0" - "CP954" "\0" "EUC-JP" "\0" - "CP964" "\0" "EUC-TW" "\0" - "CP970" "\0" "EUC-KR" "\0" - "CP1089" "\0" "ISO-8859-6" "\0" - "CP1208" "\0" "UTF-8" "\0" - "CP1381" "\0" "GB2312" "\0" - "CP1386" "\0" "GBK" "\0" - "CP3372" "\0" "EUC-JP" "\0"; +# ifndef locale_table_defined + /* Just a dummy entry, to avoid a C syntax error. */ + { "", "" } # endif + }; + #endif - charset_aliases = cp; - } - - return cp; -} /* Determine the current locale's character encoding, and canonicalize it - into one of the canonical names listed in config.charset. - The result must not be freed; it is statically allocated. + into one of the canonical names listed below. + The result must not be freed; it is statically allocated. The result + becomes invalid when setlocale() is used to change the global locale, or + when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG + is changed; threads in multithreaded programs should not do this. If the canonical name cannot be determined, the result is a non-canonical name. */ @@ -393,9 +831,15 @@ const char * locale_charset (void) { const char *codeset; - const char *aliases; -#if !(defined WINDOWS_NATIVE || defined OS2) + /* This function must be multithread-safe. To achieve this without using + thread-local storage, we use a simple strcpy or memcpy to fill this static + buffer. Filling it through, for example, strcpy + strcat would not be + guaranteed to leave the buffer's contents intact if another thread is + currently accessing it. If necessary, the contents is first assembled in + a stack-allocated buffer. */ + +#if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2 # if HAVE_LANGINFO_CODESET @@ -409,7 +853,7 @@ locale_charset (void) if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0) { const char *locale; - static char buf[2 + 10 + 1]; + static char resultbuf[2 + 10 + 1]; locale = getenv ("LC_ALL"); if (locale == NULL || locale[0] == '\0') @@ -433,11 +877,12 @@ locale_charset (void) modifier = strchr (dot, '@'); if (modifier == NULL) return dot; - if (modifier - dot < sizeof (buf)) + if (modifier - dot < sizeof (resultbuf)) { - memcpy (buf, dot, modifier - dot); - buf [modifier - dot] = '\0'; - return buf; + /* This way of filling resultbuf is multithread-safe. */ + memcpy (resultbuf, dot, modifier - dot); + resultbuf [modifier - dot] = '\0'; + return resultbuf; } } } @@ -453,79 +898,60 @@ locale_charset (void) converting to GetConsoleOutputCP(). This leads to correct results, except when SetConsoleOutputCP has been called and a raster font is in use. */ - sprintf (buf, "CP%u", GetACP ()); - codeset = buf; - } -# endif - -# else - - /* On old systems which lack it, use setlocale or getenv. */ - const char *locale = NULL; + { + char buf[2 + 10 + 1]; - /* But most old systems don't have a complete set of locales. Some - (like SunOS 4 or DJGPP) have only the C locale. Therefore we don't - use setlocale here; it would return "C" when it doesn't support the - locale name the user has set. */ -# if 0 - locale = setlocale (LC_CTYPE, NULL); -# endif - if (locale == NULL || locale[0] == '\0') - { - locale = getenv ("LC_ALL"); - if (locale == NULL || locale[0] == '\0') - { - locale = getenv ("LC_CTYPE"); - if (locale == NULL || locale[0] == '\0') - locale = getenv ("LANG"); - } + sprintf (buf, "CP%u", GetACP ()); + strcpy (resultbuf, buf); + codeset = resultbuf; + } } +# endif - /* On some old systems, one used to set locale = "iso8859_1". On others, - you set it to "language_COUNTRY.charset". In any case, we resolve it - through the charset.alias file. */ - codeset = locale; - -# endif + if (codeset == NULL) + /* The canonical name cannot be determined. */ + codeset = ""; -#elif defined WINDOWS_NATIVE +# elif defined WINDOWS_NATIVE - static char buf[2 + 10 + 1]; + char buf[2 + 10 + 1]; + static char resultbuf[2 + 10 + 1]; /* The Windows API has a function returning the locale's codepage as a number, but the value doesn't change according to what the 'setlocale' call specified. So we use it as a last resort, in case the string returned by 'setlocale' doesn't specify the codepage. */ - char *current_locale = setlocale (LC_ALL, NULL); - char *pdot; + char *current_locale = setlocale (LC_CTYPE, NULL); + char *pdot = strrchr (current_locale, '.'); - /* If they set different locales for different categories, - 'setlocale' will return a semi-colon separated list of locale - values. To make sure we use the correct one, we choose LC_CTYPE. */ - if (strchr (current_locale, ';')) - current_locale = setlocale (LC_CTYPE, NULL); - - pdot = strrchr (current_locale, '.'); - if (pdot) + if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf)) sprintf (buf, "CP%s", pdot + 1); else { /* The Windows API has a function returning the locale's codepage as a - number: GetACP(). - When the output goes to a console window, it needs to be provided in - GetOEMCP() encoding if the console is using a raster font, or in - GetConsoleOutputCP() encoding if it is using a TrueType font. - But in GUI programs and for output sent to files and pipes, GetACP() - encoding is the best bet. */ + number: GetACP(). + When the output goes to a console window, it needs to be provided in + GetOEMCP() encoding if the console is using a raster font, or in + GetConsoleOutputCP() encoding if it is using a TrueType font. + But in GUI programs and for output sent to files and pipes, GetACP() + encoding is the best bet. */ sprintf (buf, "CP%u", GetACP ()); } - codeset = buf; + /* For a locale name such as "French_France.65001", in Windows 10, + setlocale now returns "French_France.utf8" instead. */ + if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0) + codeset = "UTF-8"; + else + { + strcpy (resultbuf, buf); + codeset = resultbuf; + } -#elif defined OS2 +# elif defined OS2 const char *locale; - static char buf[2 + 10 + 1]; + static char resultbuf[2 + 10 + 1]; ULONG cp[3]; ULONG cplen; @@ -554,11 +980,12 @@ locale_charset (void) modifier = strchr (dot, '@'); if (modifier == NULL) return dot; - if (modifier - dot < sizeof (buf)) + if (modifier - dot < sizeof (resultbuf)) { - memcpy (buf, dot, modifier - dot); - buf [modifier - dot] = '\0'; - return buf; + /* This way of filling resultbuf is multithread-safe. */ + memcpy (resultbuf, dot, modifier - dot); + resultbuf [modifier - dot] = '\0'; + return resultbuf; } } @@ -574,33 +1001,152 @@ locale_charset (void) codeset = ""; else { + char buf[2 + 10 + 1]; + sprintf (buf, "CP%u", cp[0]); - codeset = buf; + strcpy (resultbuf, buf); + codeset = resultbuf; } } -#endif +# else - if (codeset == NULL) - /* The canonical name cannot be determined. */ - codeset = ""; +# error "Add code for other platforms here." + +# endif + + /* Resolve alias. */ + { +# ifdef alias_table_defined + /* On some platforms, UTF-8 locales are the most frequently used ones. + Speed up the common case and slow down the less common cases by + testing for this case first. */ +# if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__ + if (strcmp (codeset, "UTF-8") == 0) + goto done_table_lookup; + else +# endif + { + const struct table_entry * const table = alias_table; + size_t const table_size = + sizeof (alias_table) / sizeof (struct table_entry); + /* The table is sorted. Perform a binary search. */ + size_t hi = table_size; + size_t lo = 0; + while (lo < hi) + { + /* Invariant: + for i < lo, strcmp (table[i].alias, codeset) < 0, + for i >= hi, strcmp (table[i].alias, codeset) > 0. */ + size_t mid = (hi + lo) >> 1; /* >= lo, < hi */ + int cmp = strcmp (table[mid].alias, codeset); + if (cmp < 0) + lo = mid + 1; + else if (cmp > 0) + hi = mid; + else + { + /* Found an i with + strcmp (table[i].alias, codeset) == 0. */ + codeset = table[mid].canonical; + goto done_table_lookup; + } + } + } + if (0) + done_table_lookup: ; + else +# endif + { + /* Did not find it in the table. */ + /* On Mac OS X, all modern locales use the UTF-8 encoding. + BeOS and Haiku have a single locale, and it has UTF-8 encoding. */ +# if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__ + codeset = "UTF-8"; +# else + /* Don't return an empty string. GNU libc and GNU libiconv interpret + the empty string as denoting "the locale's character encoding", + thus GNU libiconv would call this function a second time. */ + if (codeset[0] == '\0') + codeset = "ASCII"; +# endif + } + } - /* Resolve alias. */ - for (aliases = get_charset_aliases (); - *aliases != '\0'; - aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) - if (strcmp (codeset, aliases) == 0 - || (aliases[0] == '*' && aliases[1] == '\0')) +#else + + /* On old systems which lack it, use setlocale or getenv. */ + const char *locale = NULL; + + /* But most old systems don't have a complete set of locales. Some + (like DJGPP) have only the C locale. Therefore we don't use setlocale + here; it would return "C" when it doesn't support the locale name the + user has set. */ +# if 0 + locale = setlocale (LC_CTYPE, NULL); +# endif + if (locale == NULL || locale[0] == '\0') + { + locale = getenv ("LC_ALL"); + if (locale == NULL || locale[0] == '\0') + { + locale = getenv ("LC_CTYPE"); + if (locale == NULL || locale[0] == '\0') + locale = getenv ("LANG"); + if (locale == NULL) + locale = ""; + } + } + + /* Map locale name to canonical encoding name. */ + { +# ifdef locale_table_defined + const struct table_entry * const table = locale_table; + size_t const table_size = + sizeof (locale_table) / sizeof (struct table_entry); + /* The table is sorted. Perform a binary search. */ + size_t hi = table_size; + size_t lo = 0; + while (lo < hi) + { + /* Invariant: + for i < lo, strcmp (table[i].locale, locale) < 0, + for i >= hi, strcmp (table[i].locale, locale) > 0. */ + size_t mid = (hi + lo) >> 1; /* >= lo, < hi */ + int cmp = strcmp (table[mid].locale, locale); + if (cmp < 0) + lo = mid + 1; + else if (cmp > 0) + hi = mid; + else + { + /* Found an i with + strcmp (table[i].locale, locale) == 0. */ + codeset = table[mid].canonical; + goto done_table_lookup; + } + } + if (0) + done_table_lookup: ; + else +# endif { - codeset = aliases + strlen (aliases) + 1; - break; + /* Did not find it in the table. */ + /* On Mac OS X, all modern locales use the UTF-8 encoding. + BeOS and Haiku have a single locale, and it has UTF-8 encoding. */ +# if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__ + codeset = "UTF-8"; +# else + /* The canonical name cannot be determined. */ + /* Don't return an empty string. GNU libc and GNU libiconv interpret + the empty string as denoting "the locale's character encoding", + thus GNU libiconv would call this function a second time. */ + codeset = "ASCII"; +# endif } + } - /* Don't return an empty string. GNU libc and GNU libiconv interpret - the empty string as denoting "the locale's character encoding", - thus GNU libiconv would call this function a second time. */ - if (codeset[0] == '\0') - codeset = "ASCII"; +#endif #ifdef DARWIN7 /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8" diff --git a/contrib/grep/lib/localcharset.h b/contrib/grep/lib/localcharset.h index c5e6d46350..aa623be07a 100644 --- a/contrib/grep/lib/localcharset.h +++ b/contrib/grep/lib/localcharset.h @@ -1,5 +1,5 @@ /* Determine a canonical name for the current locale's character encoding. - Copyright (C) 2000-2003, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2000-2003, 2009-2020 Free Software Foundation, Inc. This file is part of the GNU CHARSET Library. This program is free software; you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along - with this program; if not, see . */ + with this program; if not, see . */ #ifndef _LOCALCHARSET_H #define _LOCALCHARSET_H @@ -25,12 +25,109 @@ extern "C" { /* Determine the current locale's character encoding, and canonicalize it - into one of the canonical names listed in config.charset. - The result must not be freed; it is statically allocated. + into one of the canonical names listed below. + The result must not be freed; it is statically allocated. The result + becomes invalid when setlocale() is used to change the global locale, or + when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG + is changed; threads in multithreaded programs should not do this. If the canonical name cannot be determined, the result is a non-canonical name. */ extern const char * locale_charset (void); +/* About GNU canonical names for character encodings: + + Every canonical name must be supported by GNU libiconv. Support by GNU libc + is also desirable. + + The name is case insensitive. Usually an upper case MIME charset name is + preferred. + + The current list of these GNU canonical names is: + + name MIME? used by which systems + (darwin = Mac OS X, windows = native Windows) + + ASCII, ANSI_X3.4-1968 glibc solaris freebsd netbsd darwin minix cygwin + ISO-8859-1 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin zos + ISO-8859-2 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin zos + ISO-8859-3 Y glibc solaris cygwin + ISO-8859-4 Y hpux osf solaris freebsd netbsd openbsd darwin + ISO-8859-5 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin zos + ISO-8859-6 Y glibc aix hpux solaris cygwin + ISO-8859-7 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin zos + ISO-8859-8 Y glibc aix hpux osf solaris cygwin zos + ISO-8859-9 Y glibc aix hpux irix osf solaris freebsd darwin cygwin zos + ISO-8859-13 glibc hpux solaris freebsd netbsd openbsd darwin cygwin + ISO-8859-14 glibc cygwin + ISO-8859-15 glibc aix irix osf solaris freebsd netbsd openbsd darwin cygwin + KOI8-R Y glibc hpux solaris freebsd netbsd openbsd darwin + KOI8-U Y glibc freebsd netbsd openbsd darwin cygwin + KOI8-T glibc + CP437 dos + CP775 dos + CP850 aix osf dos + CP852 dos + CP855 dos + CP856 aix + CP857 dos + CP861 dos + CP862 dos + CP864 dos + CP865 dos + CP866 freebsd netbsd openbsd darwin dos + CP869 dos + CP874 windows dos + CP922 aix + CP932 aix cygwin windows dos + CP943 aix zos + CP949 osf darwin windows dos + CP950 windows dos + CP1046 aix + CP1124 aix + CP1125 dos + CP1129 aix + CP1131 freebsd darwin + CP1250 windows + CP1251 glibc hpux solaris freebsd netbsd openbsd darwin cygwin windows + CP1252 aix windows + CP1253 windows + CP1254 windows + CP1255 glibc windows + CP1256 windows + CP1257 windows + GB2312 Y glibc aix hpux irix solaris freebsd netbsd darwin cygwin zos + EUC-JP Y glibc aix hpux irix osf solaris freebsd netbsd darwin cygwin + EUC-KR Y glibc aix hpux irix osf solaris freebsd netbsd darwin cygwin zos + EUC-TW glibc aix hpux irix osf solaris netbsd + BIG5 Y glibc aix hpux osf solaris freebsd netbsd darwin cygwin zos + BIG5-HKSCS glibc hpux solaris netbsd darwin + GBK glibc aix osf solaris freebsd darwin cygwin windows dos + GB18030 glibc hpux solaris freebsd netbsd darwin + SHIFT_JIS Y hpux osf solaris freebsd netbsd darwin + JOHAB glibc solaris windows + TIS-620 glibc aix hpux osf solaris cygwin zos + VISCII Y glibc + TCVN5712-1 glibc + ARMSCII-8 glibc freebsd netbsd darwin + GEORGIAN-PS glibc cygwin + PT154 glibc netbsd cygwin + HP-ROMAN8 hpux + HP-ARABIC8 hpux + HP-GREEK8 hpux + HP-HEBREW8 hpux + HP-TURKISH8 hpux + HP-KANA8 hpux + DEC-KANJI osf + DEC-HANYU osf + UTF-8 Y glibc aix hpux osf solaris netbsd darwin cygwin zos + + Note: Names which are not marked as being a MIME name should not be used in + Internet protocols for information interchange (mail, news, etc.). + + Note: ASCII and ANSI_X3.4-1968 are synonymous canonical names. Applications + must understand both names and treat them as equivalent. + */ + #ifdef __cplusplus } diff --git a/contrib/grep/lib/localeconv.c b/contrib/grep/lib/localeconv.c deleted file mode 100644 index 8f2d08402f..0000000000 --- a/contrib/grep/lib/localeconv.c +++ /dev/null @@ -1,103 +0,0 @@ -/* Query locale dependent information for formatting numbers. - Copyright (C) 2012-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#if HAVE_STRUCT_LCONV_DECIMAL_POINT - -/* Override for platforms where 'struct lconv' lacks the int_p_*, int_n_* - members. */ - -struct lconv * -localeconv (void) -{ - static struct lconv result; -# undef lconv -# undef localeconv - struct lconv *sys_result = localeconv (); - - result.decimal_point = sys_result->decimal_point; - result.thousands_sep = sys_result->thousands_sep; - result.grouping = sys_result->grouping; - result.mon_decimal_point = sys_result->mon_decimal_point; - result.mon_thousands_sep = sys_result->mon_thousands_sep; - result.mon_grouping = sys_result->mon_grouping; - result.positive_sign = sys_result->positive_sign; - result.negative_sign = sys_result->negative_sign; - result.currency_symbol = sys_result->currency_symbol; - result.frac_digits = sys_result->frac_digits; - result.p_cs_precedes = sys_result->p_cs_precedes; - result.p_sign_posn = sys_result->p_sign_posn; - result.p_sep_by_space = sys_result->p_sep_by_space; - result.n_cs_precedes = sys_result->n_cs_precedes; - result.n_sign_posn = sys_result->n_sign_posn; - result.n_sep_by_space = sys_result->n_sep_by_space; - result.int_curr_symbol = sys_result->int_curr_symbol; - result.int_frac_digits = sys_result->int_frac_digits; - result.int_p_cs_precedes = sys_result->p_cs_precedes; - result.int_p_sign_posn = sys_result->p_sign_posn; - result.int_p_sep_by_space = sys_result->p_sep_by_space; - result.int_n_cs_precedes = sys_result->n_cs_precedes; - result.int_n_sign_posn = sys_result->n_sign_posn; - result.int_n_sep_by_space = sys_result->n_sep_by_space; - - return &result; -} - -#else - -/* Override for platforms where 'struct lconv' is a dummy. */ - -# include - -struct lconv * -localeconv (void) -{ - static /*const*/ struct lconv result = - { - /* decimal_point */ ".", - /* thousands_sep */ "", - /* grouping */ "", - /* mon_decimal_point */ "", - /* mon_thousands_sep */ "", - /* mon_grouping */ "", - /* positive_sign */ "", - /* negative_sign */ "", - /* currency_symbol */ "", - /* frac_digits */ CHAR_MAX, - /* p_cs_precedes */ CHAR_MAX, - /* p_sign_posn */ CHAR_MAX, - /* p_sep_by_space */ CHAR_MAX, - /* n_cs_precedes */ CHAR_MAX, - /* n_sign_posn */ CHAR_MAX, - /* n_sep_by_space */ CHAR_MAX, - /* int_curr_symbol */ "", - /* int_frac_digits */ CHAR_MAX, - /* int_p_cs_precedes */ CHAR_MAX, - /* int_p_sign_posn */ CHAR_MAX, - /* int_p_sep_by_space */ CHAR_MAX, - /* int_n_cs_precedes */ CHAR_MAX, - /* int_n_sign_posn */ CHAR_MAX, - /* int_n_sep_by_space */ CHAR_MAX - }; - - return &result; -} - -#endif diff --git a/contrib/grep/lib/localeinfo.c b/contrib/grep/lib/localeinfo.c new file mode 100644 index 0000000000..a6dfaf8a7a --- /dev/null +++ b/contrib/grep/lib/localeinfo.c @@ -0,0 +1,151 @@ +/* locale information + + Copyright 2016-2020 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* Written by Paul Eggert. */ + +#include + +#include + +#include + +#include +#include +#include +#include +#include + +/* The sbclen implementation relies on this. */ +verify (MB_LEN_MAX <= SCHAR_MAX); + +/* Return true if the locale uses UTF-8. */ + +static bool +is_using_utf8 (void) +{ + wchar_t wc; + mbstate_t mbs = {0}; + return mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100; +} + +/* Return true if the locale is compatible enough with the C locale so + that the locale is single-byte, bytes are in collating-sequence + order, and there are no multi-character collating elements. */ + +static bool +using_simple_locale (bool multibyte) +{ + /* The native character set is known to be compatible with + the C locale. The following test isn't perfect, but it's good + enough in practice, as only ASCII and EBCDIC are in common use + and this test correctly accepts ASCII and rejects EBCDIC. */ + enum { native_c_charset = + ('\b' == 8 && '\t' == 9 && '\n' == 10 && '\v' == 11 && '\f' == 12 + && '\r' == 13 && ' ' == 32 && '!' == 33 && '"' == 34 && '#' == 35 + && '%' == 37 && '&' == 38 && '\'' == 39 && '(' == 40 && ')' == 41 + && '*' == 42 && '+' == 43 && ',' == 44 && '-' == 45 && '.' == 46 + && '/' == 47 && '0' == 48 && '9' == 57 && ':' == 58 && ';' == 59 + && '<' == 60 && '=' == 61 && '>' == 62 && '?' == 63 && 'A' == 65 + && 'Z' == 90 && '[' == 91 && '\\' == 92 && ']' == 93 && '^' == 94 + && '_' == 95 && 'a' == 97 && 'z' == 122 && '{' == 123 && '|' == 124 + && '}' == 125 && '~' == 126) + }; + + if (!native_c_charset || multibyte) + return false; + + /* As a heuristic, use strcoll to compare native character order. + If this agrees with byte order the locale should be simple. + This heuristic should work for all known practical locales, + although it would be invalid for artificially-constructed locales + where the native order is the collating-sequence order but there + are multi-character collating elements. */ + for (int i = 0; i < UCHAR_MAX; i++) + if (0 <= strcoll (((char []) {i, 0}), ((char []) {i + 1, 0}))) + return false; + + return true; +} + +/* Initialize *LOCALEINFO from the current locale. */ + +void +init_localeinfo (struct localeinfo *localeinfo) +{ + localeinfo->multibyte = MB_CUR_MAX > 1; + localeinfo->simple = using_simple_locale (localeinfo->multibyte); + localeinfo->using_utf8 = is_using_utf8 (); + + for (int i = CHAR_MIN; i <= CHAR_MAX; i++) + { + char c = i; + unsigned char uc = i; + mbstate_t s = {0}; + wchar_t wc; + size_t len = mbrtowc (&wc, &c, 1, &s); + localeinfo->sbclen[uc] = len <= 1 ? 1 : - (int) - len; + localeinfo->sbctowc[uc] = len <= 1 ? wc : WEOF; + } +} + +/* The set of wchar_t values C such that there's a useful locale + somewhere where C != towupper (C) && C != towlower (towupper (C)). + For example, 0x00B5 (U+00B5 MICRO SIGN) is in this table, because + towupper (0x00B5) == 0x039C (U+039C GREEK CAPITAL LETTER MU), and + towlower (0x039C) == 0x03BC (U+03BC GREEK SMALL LETTER MU). */ +static short const lonesome_lower[] = + { + 0x00B5, 0x0131, 0x017F, 0x01C5, 0x01C8, 0x01CB, 0x01F2, 0x0345, + 0x03C2, 0x03D0, 0x03D1, 0x03D5, 0x03D6, 0x03F0, 0x03F1, + + /* U+03F2 GREEK LUNATE SIGMA SYMBOL lacks a specific uppercase + counterpart in locales predating Unicode 4.0.0 (April 2003). */ + 0x03F2, + + 0x03F5, 0x1E9B, 0x1FBE, + }; + +/* Verify that the worst case fits. This is 1 for towupper, 1 for + towlower, and 1 for each entry in LONESOME_LOWER. */ +verify (1 + 1 + sizeof lonesome_lower / sizeof *lonesome_lower + <= CASE_FOLDED_BUFSIZE); + +/* Find the characters equal to C after case-folding, other than C + itself, and store them into FOLDED. Return the number of characters + stored; this is zero if C is WEOF. */ + +int +case_folded_counterparts (wint_t c, wchar_t folded[CASE_FOLDED_BUFSIZE]) +{ + int i; + int n = 0; + wint_t uc = towupper (c); + wint_t lc = towlower (uc); + if (uc != c) + folded[n++] = uc; + if (lc != uc && lc != c && towupper (lc) == uc) + folded[n++] = lc; + for (i = 0; i < sizeof lonesome_lower / sizeof *lonesome_lower; i++) + { + wint_t li = lonesome_lower[i]; + if (li != lc && li != uc && li != c && towupper (li) == uc) + folded[n++] = li; + } + return n; +} diff --git a/contrib/grep/lib/localeinfo.h b/contrib/grep/lib/localeinfo.h new file mode 100644 index 0000000000..16f5129de0 --- /dev/null +++ b/contrib/grep/lib/localeinfo.h @@ -0,0 +1,60 @@ +/* locale information + + Copyright 2016-2020 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* Written by Paul Eggert. */ + +#include +#include +#include + +struct localeinfo +{ + /* MB_CUR_MAX > 1. */ + bool multibyte; + + /* The locale is simple, like the C locale. These locales can be + processed more efficiently, as they are single-byte, their native + character set is in collating-sequence order, and they do not + have multi-character collating elements. */ + bool simple; + + /* The locale uses UTF-8. */ + bool using_utf8; + + /* An array indexed by byte values B that contains 1 if B is a + single-byte character, -1 if B is an encoding error, and -2 if B + is the leading byte of a multibyte character that contains more + than one byte. */ + signed char sbclen[UCHAR_MAX + 1]; + + /* An array indexed by byte values B that contains the corresponding + wide character (if any) for B if sbclen[B] == 1. WEOF means the + byte is not a valid single-byte character, i.e., sbclen[B] == -1 + or -2. */ + wint_t sbctowc[UCHAR_MAX + 1]; +}; + +extern void init_localeinfo (struct localeinfo *); + +/* Maximum number of characters that can be the case-folded + counterparts of a single character, not counting the character + itself. This is a generous upper bound. */ +enum { CASE_FOLDED_BUFSIZE = 32 }; + +extern int case_folded_counterparts (wint_t, wchar_t[CASE_FOLDED_BUFSIZE]); diff --git a/contrib/grep/lib/lseek.c b/contrib/grep/lib/lseek.c deleted file mode 100644 index fdec6cdf91..0000000000 --- a/contrib/grep/lib/lseek.c +++ /dev/null @@ -1,67 +0,0 @@ -/* An lseek() function that detects pipes. - Copyright (C) 2007, 2009-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ - -#include - -/* Specification. */ -#include - -#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ -/* Windows platforms. */ -/* Get GetFileType. */ -# include -/* Get _get_osfhandle. */ -# include "msvc-nothrow.h" -#else -# include -#endif -#include - -#undef lseek - -off_t -rpl_lseek (int fd, off_t offset, int whence) -{ -#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ - /* mingw lseek mistakenly succeeds on pipes, sockets, and terminals. */ - HANDLE h = (HANDLE) _get_osfhandle (fd); - if (h == INVALID_HANDLE_VALUE) - { - errno = EBADF; - return -1; - } - if (GetFileType (h) != FILE_TYPE_DISK) - { - errno = ESPIPE; - return -1; - } -#else - /* BeOS lseek mistakenly succeeds on pipes... */ - struct stat statbuf; - if (fstat (fd, &statbuf) < 0) - return -1; - if (!S_ISREG (statbuf.st_mode)) - { - errno = ESPIPE; - return -1; - } -#endif -#if _GL_WINDOWS_64_BIT_OFF_T - return _lseeki64 (fd, offset, whence); -#else - return lseek (fd, offset, whence); -#endif -} diff --git a/contrib/grep/lib/lstat.c b/contrib/grep/lib/lstat.c deleted file mode 100644 index 31dee1387f..0000000000 --- a/contrib/grep/lib/lstat.c +++ /dev/null @@ -1,97 +0,0 @@ -/* Work around a bug of lstat on some systems - - Copyright (C) 1997-2006, 2008-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* written by Jim Meyering */ - -/* If the user's config.h happens to include , let it include only - the system's here, so that orig_lstat doesn't recurse to - rpl_lstat. */ -#define __need_system_sys_stat_h -#include - -#if !HAVE_LSTAT -/* On systems that lack symlinks, our replacement already - defined lstat as stat, so there is nothing further to do other than - avoid an empty file. */ -typedef int dummy; -#else /* HAVE_LSTAT */ - -/* Get the original definition of lstat. It might be defined as a macro. */ -# include -# include -# undef __need_system_sys_stat_h - -static int -orig_lstat (const char *filename, struct stat *buf) -{ - return lstat (filename, buf); -} - -/* Specification. */ -/* Write "sys/stat.h" here, not , otherwise OSF/1 5.1 DTK cc - eliminates this include because of the preliminary #include - above. */ -# include "sys/stat.h" - -# include -# include - -/* lstat works differently on Linux and Solaris systems. POSIX (see - "pathname resolution" in the glossary) requires that programs like - 'ls' take into consideration the fact that FILE has a trailing slash - when FILE is a symbolic link. On Linux and Solaris 10 systems, the - lstat function already has the desired semantics (in treating - 'lstat ("symlink/", sbuf)' just like 'lstat ("symlink/.", sbuf)', - but on Solaris 9 and earlier it does not. - - If FILE has a trailing slash and specifies a symbolic link, - then use stat() to get more info on the referent of FILE. - If the referent is a non-directory, then set errno to ENOTDIR - and return -1. Otherwise, return stat's result. */ - -int -rpl_lstat (const char *file, struct stat *sbuf) -{ - size_t len; - int lstat_result = orig_lstat (file, sbuf); - - if (lstat_result != 0) - return lstat_result; - - /* This replacement file can blindly check against '/' rather than - using the ISSLASH macro, because all platforms with '\\' either - lack symlinks (mingw) or have working lstat (cygwin) and thus do - not compile this file. 0 len should have already been filtered - out above, with a failure return of ENOENT. */ - len = strlen (file); - if (file[len - 1] != '/' || S_ISDIR (sbuf->st_mode)) - return 0; - - /* At this point, a trailing slash is only permitted on - symlink-to-dir; but it should have found information on the - directory, not the symlink. Call stat() to get info about the - link's referent. Our replacement stat guarantees valid results, - even if the symlink is not pointing to a directory. */ - if (!S_ISLNK (sbuf->st_mode)) - { - errno = ENOTDIR; - return -1; - } - return stat (file, sbuf); -} - -#endif /* HAVE_LSTAT */ diff --git a/contrib/grep/lib/malloc.c b/contrib/grep/lib/malloc.c deleted file mode 100644 index 00800a2848..0000000000 --- a/contrib/grep/lib/malloc.c +++ /dev/null @@ -1,56 +0,0 @@ -/* malloc() function that is glibc compatible. - - Copyright (C) 1997-1998, 2006-2007, 2009-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see . */ - -/* written by Jim Meyering and Bruno Haible */ - -#define _GL_USE_STDLIB_ALLOC 1 -#include -/* Only the AC_FUNC_MALLOC macro defines 'malloc' already in config.h. */ -#ifdef malloc -# define NEED_MALLOC_GNU 1 -# undef malloc -/* Whereas the gnulib module 'malloc-gnu' defines HAVE_MALLOC_GNU. */ -#elif GNULIB_MALLOC_GNU && !HAVE_MALLOC_GNU -# define NEED_MALLOC_GNU 1 -#endif - -#include - -#include - -/* Allocate an N-byte block of memory from the heap. - If N is zero, allocate a 1-byte block. */ - -void * -rpl_malloc (size_t n) -{ - void *result; - -#if NEED_MALLOC_GNU - if (n == 0) - n = 1; -#endif - - result = malloc (n); - -#if !HAVE_MALLOC_POSIX - if (result == NULL) - errno = ENOMEM; -#endif - - return result; -} diff --git a/contrib/grep/lib/malloca.c b/contrib/grep/lib/malloca.c index 198a96ce74..975b166dae 100644 --- a/contrib/grep/lib/malloca.c +++ b/contrib/grep/lib/malloca.c @@ -1,6 +1,6 @@ /* Safe automatic memory allocation. - Copyright (C) 2003, 2006-2007, 2009-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2003. + Copyright (C) 2003, 2006-2007, 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible , 2003, 2018. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ #define _GL_USE_STDLIB_ALLOC 1 #include @@ -21,82 +21,49 @@ /* Specification. */ #include "malloca.h" -#include - #include "verify.h" /* The speed critical point in this file is freea() applied to an alloca() result: it must be fast, to match the speed of alloca(). The speed of mmalloca() and freea() in the other case are not critical, because they - are only invoked for big memory sizes. */ - -#if HAVE_ALLOCA - -/* Store the mmalloca() results in a hash table. This is needed to reliably - distinguish a mmalloca() result and an alloca() result. - - Although it is possible that the same pointer is returned by alloca() and - by mmalloca() at different times in the same application, it does not lead - to a bug in freea(), because: - - Before a pointer returned by alloca() can point into malloc()ed memory, - the function must return, and once this has happened the programmer must - not call freea() on it anyway. - - Before a pointer returned by mmalloca() can point into the stack, it - must be freed. The only function that can free it is freea(), and - when freea() frees it, it also removes it from the hash table. */ - -#define MAGIC_NUMBER 0x1415fb4a -#define MAGIC_SIZE sizeof (int) -/* This is how the header info would look like without any alignment - considerations. */ -struct preliminary_header { void *next; int magic; }; -/* But the header's size must be a multiple of sa_alignment_max. */ -#define HEADER_SIZE \ - (((sizeof (struct preliminary_header) + sa_alignment_max - 1) / sa_alignment_max) * sa_alignment_max) -union header { - void *next; - struct { - char room[HEADER_SIZE - MAGIC_SIZE]; - int word; - } magic; -}; -verify (HEADER_SIZE == sizeof (union header)); -/* We make the hash table quite big, so that during lookups the probability - of empty hash buckets is quite high. There is no need to make the hash - table resizable, because when the hash table gets filled so much that the - lookup becomes slow, it means that the application has memory leaks. */ -#define HASH_TABLE_SIZE 257 -static void * mmalloca_results[HASH_TABLE_SIZE]; - -#endif + are only invoked for big memory sizes. + Here we use a bit in the address as an indicator, an idea by Ondřej Bílka. + malloca() can return three types of pointers: + - Pointers ≡ 0 mod 2*sa_alignment_max come from stack allocation. + - Pointers ≡ sa_alignment_max mod 2*sa_alignment_max come from heap + allocation. + - NULL comes from a failed heap allocation. */ + +/* Type for holding very small pointer differences. */ +typedef unsigned char small_t; +/* Verify that it is wide enough. */ +verify (2 * sa_alignment_max - 1 <= (small_t) -1); void * mmalloca (size_t n) { #if HAVE_ALLOCA - /* Allocate one more word, that serves as an indicator for malloc()ed - memory, so that freea() of an alloca() result is fast. */ - size_t nplus = n + HEADER_SIZE; + /* Allocate one more word, used to determine the address to pass to freea(), + and room for the alignment ≡ sa_alignment_max mod 2*sa_alignment_max. */ + size_t nplus = n + sizeof (small_t) + 2 * sa_alignment_max - 1; if (nplus >= n) { - void *p = malloc (nplus); + char *mem = (char *) malloc (nplus); - if (p != NULL) + if (mem != NULL) { - size_t slot; - union header *h = p; - - p = h + 1; - - /* Put a magic number into the indicator word. */ - h->magic.word = MAGIC_NUMBER; - - /* Enter p into the hash table. */ - slot = (uintptr_t) p % HASH_TABLE_SIZE; - h->next = mmalloca_results[slot]; - mmalloca_results[slot] = p; - + char *p = + (char *)((((uintptr_t)mem + sizeof (small_t) + sa_alignment_max - 1) + & ~(uintptr_t)(2 * sa_alignment_max - 1)) + + sa_alignment_max); + /* Here p >= mem + sizeof (small_t), + and p <= mem + sizeof (small_t) + 2 * sa_alignment_max - 1 + hence p + n <= mem + nplus. + So, the memory range [p, p+n) lies in the allocated memory range + [mem, mem + nplus). */ + ((small_t *) p)[-1] = p - mem; + /* p ≡ sa_alignment_max mod 2*sa_alignment_max. */ return p; } } @@ -115,35 +82,24 @@ mmalloca (size_t n) void freea (void *p) { - /* mmalloca() may have returned NULL. */ - if (p != NULL) + /* Check argument. */ + if ((uintptr_t) p & (sa_alignment_max - 1)) { - /* Attempt to quickly distinguish the mmalloca() result - which has - a magic indicator word - and the alloca() result - which has an - uninitialized indicator word. It is for this test that sa_increment - additional bytes are allocated in the alloca() case. */ - if (((int *) p)[-1] == MAGIC_NUMBER) - { - /* Looks like a mmalloca() result. To see whether it really is one, - perform a lookup in the hash table. */ - size_t slot = (uintptr_t) p % HASH_TABLE_SIZE; - void **chain = &mmalloca_results[slot]; - for (; *chain != NULL;) - { - union header *h = p; - if (*chain == p) - { - /* Found it. Remove it from the hash table and free it. */ - union header *p_begin = h - 1; - *chain = p_begin->next; - free (p_begin); - return; - } - h = *chain; - chain = &h[-1].next; - } - } - /* At this point, we know it was not a mmalloca() result. */ + /* p was not the result of a malloca() call. Invalid argument. */ + abort (); + } + /* Determine whether p was a non-NULL pointer returned by mmalloca(). */ + if ((uintptr_t) p & sa_alignment_max) + { + void *mem = (char *) p - ((small_t *) p)[-1]; + free (mem); } } #endif + +/* + * Hey Emacs! + * Local Variables: + * coding: utf-8 + * End: + */ diff --git a/contrib/grep/lib/malloca.h b/contrib/grep/lib/malloca.h index 7a4190cfe4..cfcd4de4ad 100644 --- a/contrib/grep/lib/malloca.h +++ b/contrib/grep/lib/malloca.h @@ -1,5 +1,5 @@ /* Safe automatic memory allocation. - Copyright (C) 2003-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2003-2007, 2009-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2003. This program is free software; you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ #ifndef _MALLOCA_H #define _MALLOCA_H @@ -21,6 +21,9 @@ #include #include #include +#include + +#include "xalloc-oversized.h" #ifdef __cplusplus @@ -53,8 +56,10 @@ extern "C" { the function returns. Upon failure, it returns NULL. */ #if HAVE_ALLOCA # define malloca(N) \ - ((N) < 4032 - sa_increment \ - ? (void *) ((char *) alloca ((N) + sa_increment) + sa_increment) \ + ((N) < 4032 - (2 * sa_alignment_max - 1) \ + ? (void *) (((uintptr_t) (char *) alloca ((N) + 2 * sa_alignment_max - 1) \ + + (2 * sa_alignment_max - 1)) \ + & ~(uintptr_t)(2 * sa_alignment_max - 1)) \ : mmalloca (N)) #else # define malloca(N) \ @@ -73,15 +78,7 @@ extern void freea (void *p); It allocates an array of N objects, each with S bytes of memory, on the stack. S must be positive and N must be nonnegative. The array must be freed using freea() before the function returns. */ -#if 1 -/* Cf. the definition of xalloc_oversized. */ -# define nmalloca(n, s) \ - ((n) > (size_t) (sizeof (ptrdiff_t) <= sizeof (size_t) ? -1 : -2) / (s) \ - ? NULL \ - : malloca ((n) * (s))) -#else -extern void * nmalloca (size_t n, size_t s); -#endif +#define nmalloca(n, s) (xalloc_oversized (n, s) ? NULL : malloca ((n) * (s))) #ifdef __cplusplus @@ -115,19 +112,12 @@ enum among all elementary types. */ sa_alignment_long = sa_alignof (long), sa_alignment_double = sa_alignof (double), -#if HAVE_LONG_LONG_INT sa_alignment_longlong = sa_alignof (long long), -#endif sa_alignment_longdouble = sa_alignof (long double), sa_alignment_max = ((sa_alignment_long - 1) | (sa_alignment_double - 1) -#if HAVE_LONG_LONG_INT | (sa_alignment_longlong - 1) -#endif | (sa_alignment_longdouble - 1) - ) + 1, -/* The increment that guarantees room for a magic word must be >= sizeof (int) - and a multiple of sa_alignment_max. */ - sa_increment = ((sizeof (int) + sa_alignment_max - 1) / sa_alignment_max) * sa_alignment_max + ) + 1 }; #endif /* _MALLOCA_H */ diff --git a/contrib/grep/lib/malloca.valgrind b/contrib/grep/lib/malloca.valgrind deleted file mode 100644 index 52f0a50f57..0000000000 --- a/contrib/grep/lib/malloca.valgrind +++ /dev/null @@ -1,7 +0,0 @@ -# Suppress a valgrind message about use of uninitialized memory in freea(). -# This use is OK because it provides only a speedup. -{ - freea - Memcheck:Cond - fun:freea -} diff --git a/contrib/grep/lib/mbchar.c b/contrib/grep/lib/mbchar.c index ae9b7eb9f0..bd5c93215d 100644 --- a/contrib/grep/lib/mbchar.c +++ b/contrib/grep/lib/mbchar.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2001, 2006, 2009-2015 Free Software Foundation, Inc. +/* Copyright (C) 2001, 2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -11,7 +11,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/mbchar.h b/contrib/grep/lib/mbchar.h index fcdefaf0ec..8ff58bf519 100644 --- a/contrib/grep/lib/mbchar.h +++ b/contrib/grep/lib/mbchar.h @@ -1,5 +1,5 @@ /* Multibyte character data type. - Copyright (C) 2001, 2005-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001, 2005-2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Bruno Haible . */ diff --git a/contrib/grep/lib/mbiter.h b/contrib/grep/lib/mbiter.h index ccc1d195bf..a8c4d4e7ba 100644 --- a/contrib/grep/lib/mbiter.h +++ b/contrib/grep/lib/mbiter.h @@ -1,5 +1,5 @@ /* Iterating through multibyte strings: macros for multi-byte encodings. - Copyright (C) 2001, 2005, 2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001, 2005, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Bruno Haible . */ diff --git a/contrib/grep/lib/mbrlen.c b/contrib/grep/lib/mbrlen.c deleted file mode 100644 index b6334f2907..0000000000 --- a/contrib/grep/lib/mbrlen.c +++ /dev/null @@ -1,32 +0,0 @@ -/* Recognize multibyte character. - Copyright (C) 1999-2000, 2008-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2008. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - - -static mbstate_t internal_state; - -size_t -mbrlen (const char *s, size_t n, mbstate_t *ps) -{ - if (ps == NULL) - ps = &internal_state; - return mbrtowc (NULL, s, n, ps); -} diff --git a/contrib/grep/lib/mbrtowc.c b/contrib/grep/lib/mbrtowc.c deleted file mode 100644 index e49d55b3b1..0000000000 --- a/contrib/grep/lib/mbrtowc.c +++ /dev/null @@ -1,407 +0,0 @@ -/* Convert multibyte character to wide character. - Copyright (C) 1999-2002, 2005-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2008. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#if GNULIB_defined_mbstate_t -/* Implement mbrtowc() on top of mbtowc(). */ - -# include -# include - -# include "localcharset.h" -# include "streq.h" -# include "verify.h" - - -verify (sizeof (mbstate_t) >= 4); - -static char internal_state[4]; - -size_t -mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) -{ - char *pstate = (char *)ps; - - if (s == NULL) - { - pwc = NULL; - s = ""; - n = 1; - } - - if (n == 0) - return (size_t)(-2); - - /* Here n > 0. */ - - if (pstate == NULL) - pstate = internal_state; - - { - size_t nstate = pstate[0]; - char buf[4]; - const char *p; - size_t m; - - switch (nstate) - { - case 0: - p = s; - m = n; - break; - case 3: - buf[2] = pstate[3]; - /*FALLTHROUGH*/ - case 2: - buf[1] = pstate[2]; - /*FALLTHROUGH*/ - case 1: - buf[0] = pstate[1]; - p = buf; - m = nstate; - buf[m++] = s[0]; - if (n >= 2 && m < 4) - { - buf[m++] = s[1]; - if (n >= 3 && m < 4) - buf[m++] = s[2]; - } - break; - default: - errno = EINVAL; - return (size_t)(-1); - } - - /* Here m > 0. */ - -# if __GLIBC__ || defined __UCLIBC__ - /* Work around bug */ - mbtowc (NULL, NULL, 0); -# endif - { - int res = mbtowc (pwc, p, m); - - if (res >= 0) - { - if (pwc != NULL && ((*pwc == 0) != (res == 0))) - abort (); - if (nstate >= (res > 0 ? res : 1)) - abort (); - res -= nstate; - pstate[0] = 0; - return res; - } - - /* mbtowc does not distinguish between invalid and incomplete multibyte - sequences. But mbrtowc needs to make this distinction. - There are two possible approaches: - - Use iconv() and its return value. - - Use built-in knowledge about the possible encodings. - Given the low quality of implementation of iconv() on the systems that - lack mbrtowc(), we use the second approach. - The possible encodings are: - - 8-bit encodings, - - EUC-JP, EUC-KR, GB2312, EUC-TW, BIG5, GB18030, SJIS, - - UTF-8. - Use specialized code for each. */ - if (m >= 4 || m >= MB_CUR_MAX) - goto invalid; - /* Here MB_CUR_MAX > 1 and 0 < m < 4. */ - { - const char *encoding = locale_charset (); - - if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0)) - { - /* Cf. unistr/u8-mblen.c. */ - unsigned char c = (unsigned char) p[0]; - - if (c >= 0xc2) - { - if (c < 0xe0) - { - if (m == 1) - goto incomplete; - } - else if (c < 0xf0) - { - if (m == 1) - goto incomplete; - if (m == 2) - { - unsigned char c2 = (unsigned char) p[1]; - - if ((c2 ^ 0x80) < 0x40 - && (c >= 0xe1 || c2 >= 0xa0) - && (c != 0xed || c2 < 0xa0)) - goto incomplete; - } - } - else if (c <= 0xf4) - { - if (m == 1) - goto incomplete; - else /* m == 2 || m == 3 */ - { - unsigned char c2 = (unsigned char) p[1]; - - if ((c2 ^ 0x80) < 0x40 - && (c >= 0xf1 || c2 >= 0x90) - && (c < 0xf4 || (c == 0xf4 && c2 < 0x90))) - { - if (m == 2) - goto incomplete; - else /* m == 3 */ - { - unsigned char c3 = (unsigned char) p[2]; - - if ((c3 ^ 0x80) < 0x40) - goto incomplete; - } - } - } - } - } - goto invalid; - } - - /* As a reference for this code, you can use the GNU libiconv - implementation. Look for uses of the RET_TOOFEW macro. */ - - if (STREQ_OPT (encoding, - "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0)) - { - if (m == 1) - { - unsigned char c = (unsigned char) p[0]; - - if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f) - goto incomplete; - } - if (m == 2) - { - unsigned char c = (unsigned char) p[0]; - - if (c == 0x8f) - { - unsigned char c2 = (unsigned char) p[1]; - - if (c2 >= 0xa1 && c2 < 0xff) - goto incomplete; - } - } - goto invalid; - } - if (STREQ_OPT (encoding, - "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0) - || STREQ_OPT (encoding, - "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0) - || STREQ_OPT (encoding, - "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0)) - { - if (m == 1) - { - unsigned char c = (unsigned char) p[0]; - - if (c >= 0xa1 && c < 0xff) - goto incomplete; - } - goto invalid; - } - if (STREQ_OPT (encoding, - "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0)) - { - if (m == 1) - { - unsigned char c = (unsigned char) p[0]; - - if ((c >= 0xa1 && c < 0xff) || c == 0x8e) - goto incomplete; - } - else /* m == 2 || m == 3 */ - { - unsigned char c = (unsigned char) p[0]; - - if (c == 0x8e) - goto incomplete; - } - goto invalid; - } - if (STREQ_OPT (encoding, - "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0)) - { - if (m == 1) - { - unsigned char c = (unsigned char) p[0]; - - if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe)) - goto incomplete; - } - else /* m == 2 || m == 3 */ - { - unsigned char c = (unsigned char) p[0]; - - if (c >= 0x90 && c <= 0xe3) - { - unsigned char c2 = (unsigned char) p[1]; - - if (c2 >= 0x30 && c2 <= 0x39) - { - if (m == 2) - goto incomplete; - else /* m == 3 */ - { - unsigned char c3 = (unsigned char) p[2]; - - if (c3 >= 0x81 && c3 <= 0xfe) - goto incomplete; - } - } - } - } - goto invalid; - } - if (STREQ_OPT (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0)) - { - if (m == 1) - { - unsigned char c = (unsigned char) p[0]; - - if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea) - || (c >= 0xf0 && c <= 0xf9)) - goto incomplete; - } - goto invalid; - } - - /* An unknown multibyte encoding. */ - goto incomplete; - } - - incomplete: - { - size_t k = nstate; - /* Here 0 <= k < m < 4. */ - pstate[++k] = s[0]; - if (k < m) - { - pstate[++k] = s[1]; - if (k < m) - pstate[++k] = s[2]; - } - if (k != m) - abort (); - } - pstate[0] = m; - return (size_t)(-2); - - invalid: - errno = EILSEQ; - /* The conversion state is undefined, says POSIX. */ - return (size_t)(-1); - } - } -} - -#else -/* Override the system's mbrtowc() function. */ - -# undef mbrtowc - -size_t -rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) -{ -# if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG - if (s == NULL) - { - pwc = NULL; - s = ""; - n = 1; - } -# endif - -# if MBRTOWC_EMPTY_INPUT_BUG - if (n == 0) - return (size_t) -2; -# endif - -# if MBRTOWC_RETVAL_BUG - { - static mbstate_t internal_state; - - /* Override mbrtowc's internal state. We cannot call mbsinit() on the - hidden internal state, but we can call it on our variable. */ - if (ps == NULL) - ps = &internal_state; - - if (!mbsinit (ps)) - { - /* Parse the rest of the multibyte character byte for byte. */ - size_t count = 0; - for (; n > 0; s++, n--) - { - wchar_t wc; - size_t ret = mbrtowc (&wc, s, 1, ps); - - if (ret == (size_t)(-1)) - return (size_t)(-1); - count++; - if (ret != (size_t)(-2)) - { - /* The multibyte character has been completed. */ - if (pwc != NULL) - *pwc = wc; - return (wc == 0 ? 0 : count); - } - } - return (size_t)(-2); - } - } -# endif - -# if MBRTOWC_NUL_RETVAL_BUG - { - wchar_t wc; - size_t ret = mbrtowc (&wc, s, n, ps); - - if (ret != (size_t)(-1) && ret != (size_t)(-2)) - { - if (pwc != NULL) - *pwc = wc; - if (wc == 0) - ret = 0; - } - return ret; - } -# else - { -# if MBRTOWC_NULL_ARG1_BUG - wchar_t dummy; - - if (pwc == NULL) - pwc = &dummy; -# endif - - return mbrtowc (pwc, s, n, ps); - } -# endif -} - -#endif diff --git a/contrib/grep/lib/mbscasecmp.c b/contrib/grep/lib/mbscasecmp.c index 9288fb29db..9a1ea4bb35 100644 --- a/contrib/grep/lib/mbscasecmp.c +++ b/contrib/grep/lib/mbscasecmp.c @@ -1,5 +1,5 @@ /* Case-insensitive string comparison function. - Copyright (C) 1998-1999, 2005-2015 Free Software Foundation, Inc. + Copyright (C) 1998-1999, 2005-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2005, based on earlier glibc code. @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/mbsinit.c b/contrib/grep/lib/mbsinit.c deleted file mode 100644 index 4effdd8047..0000000000 --- a/contrib/grep/lib/mbsinit.c +++ /dev/null @@ -1,61 +0,0 @@ -/* Test for initial conversion state. - Copyright (C) 2008-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2008. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#include "verify.h" - -#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__ - -/* On native Windows, 'mbstate_t' is defined as 'int'. */ - -int -mbsinit (const mbstate_t *ps) -{ - return ps == NULL || *ps == 0; -} - -#else - -/* Platforms that lack mbsinit() also lack mbrlen(), mbrtowc(), mbsrtowcs() - and wcrtomb(), wcsrtombs(). - We assume that - - sizeof (mbstate_t) >= 4, - - only stateless encodings are supported (such as UTF-8 and EUC-JP, but - not ISO-2022 variants), - - for each encoding, the number of bytes for a wide character is <= 4. - (This maximum is attained for UTF-8, GB18030, EUC-TW.) - We define the meaning of mbstate_t as follows: - - In mb -> wc direction, mbstate_t's first byte contains the number of - buffered bytes (in the range 0..3), followed by up to 3 buffered bytes. - - In wc -> mb direction, mbstate_t contains no information. In other - words, it is always in the initial state. */ - -verify (sizeof (mbstate_t) >= 4); - -int -mbsinit (const mbstate_t *ps) -{ - const char *pstate = (const char *)ps; - - return pstate == NULL || pstate[0] == 0; -} - -#endif diff --git a/contrib/grep/lib/mbslen.c b/contrib/grep/lib/mbslen.c index 011f821d48..6c6017e6e6 100644 --- a/contrib/grep/lib/mbslen.c +++ b/contrib/grep/lib/mbslen.c @@ -1,5 +1,5 @@ /* Counting the multibyte characters in a string. - Copyright (C) 2007-2015 Free Software Foundation, Inc. + Copyright (C) 2007-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2007. This program is free software: you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/mbsrtowcs-state.c b/contrib/grep/lib/mbsrtowcs-state.c deleted file mode 100644 index 522eb69068..0000000000 --- a/contrib/grep/lib/mbsrtowcs-state.c +++ /dev/null @@ -1,37 +0,0 @@ -/* Convert string to wide string. - Copyright (C) 2008-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2008. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -#include - -/* Internal state used by the functions mbsrtowcs() and mbsnrtowcs(). */ -mbstate_t _gl_mbsrtowcs_state -/* The state must initially be in the "initial state"; so, zero-initialize it. - On most systems, putting it into BSS is sufficient. Not so on Mac OS X 10.3, - see . - When it needs an initializer, use 0 or {0} as initializer? 0 only works - when mbstate_t is a scalar type (such as when gnulib defines it, or on - AIX, IRIX, mingw). {0} works as an initializer in all cases: for a struct - or union type, but also for a scalar type (ISO C 99, 6.7.8.(11)). */ -#if defined __ELF__ - /* On ELF systems, variables in BSS behave well. */ -#else - /* Use braces, to be on the safe side. */ - = { 0 } -#endif - ; diff --git a/contrib/grep/lib/mbsrtowcs.c b/contrib/grep/lib/mbsrtowcs.c deleted file mode 100644 index c37eba72c9..0000000000 --- a/contrib/grep/lib/mbsrtowcs.c +++ /dev/null @@ -1,32 +0,0 @@ -/* Convert string to wide string. - Copyright (C) 2008-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2008. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#include -#include -#include - -#include "strnlen1.h" - - -extern mbstate_t _gl_mbsrtowcs_state; - -#include "mbsrtowcs-impl.h" diff --git a/contrib/grep/lib/mbsstr.c b/contrib/grep/lib/mbsstr.c index 6d5239e8ad..d0b16e3c3c 100644 --- a/contrib/grep/lib/mbsstr.c +++ b/contrib/grep/lib/mbsstr.c @@ -1,5 +1,5 @@ /* Searching in a string. -*- coding: utf-8 -*- - Copyright (C) 2005-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2005. This program is free software: you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include @@ -22,6 +22,7 @@ #include #include /* for NULL, in case a nonstandard string.h lacks it */ +#include #include "malloca.h" #include "mbuiter.h" @@ -32,7 +33,7 @@ #include "str-kmp.h" /* Knuth-Morris-Pratt algorithm. - See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm + See https://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm Return a boolean indicating success: Return true and set *RESULTP if the search was completed. Return false if it was aborted because not enough memory was available. */ diff --git a/contrib/grep/lib/mbuiter.h b/contrib/grep/lib/mbuiter.h index acdec297a1..2d2a11b997 100644 --- a/contrib/grep/lib/mbuiter.h +++ b/contrib/grep/lib/mbuiter.h @@ -1,5 +1,5 @@ /* Iterating through multibyte strings: macros for multi-byte encodings. - Copyright (C) 2001, 2005, 2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001, 2005, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Bruno Haible . */ diff --git a/contrib/grep/lib/memchr.c b/contrib/grep/lib/memchr.c deleted file mode 100644 index f74cc9afc1..0000000000 --- a/contrib/grep/lib/memchr.c +++ /dev/null @@ -1,172 +0,0 @@ -/* Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2004, 2006, 2008-2015 - Free Software Foundation, Inc. - - Based on strlen implementation by Torbjorn Granlund (tege@sics.se), - with help from Dan Sahlin (dan@sics.se) and - commentary by Jim Blandy (jimb@ai.mit.edu); - adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), - and implemented by Roland McGrath (roland@ai.mit.edu). - -NOTE: The canonical source of this file is maintained with the GNU C Library. -Bugs can be reported to bug-glibc@prep.ai.mit.edu. - -This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3 of the License, or any -later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . */ - -#ifndef _LIBC -# include -#endif - -#include - -#include - -#if defined _LIBC -# include -#else -# define reg_char char -#endif - -#include - -#if HAVE_BP_SYM_H || defined _LIBC -# include -#else -# define BP_SYM(sym) sym -#endif - -#undef __memchr -#ifdef _LIBC -# undef memchr -#endif - -#ifndef weak_alias -# define __memchr memchr -#endif - -/* Search no more than N bytes of S for C. */ -void * -__memchr (void const *s, int c_in, size_t n) -{ - /* On 32-bit hardware, choosing longword to be a 32-bit unsigned - long instead of a 64-bit uintmax_t tends to give better - performance. On 64-bit hardware, unsigned long is generally 64 - bits already. Change this typedef to experiment with - performance. */ - typedef unsigned long int longword; - - const unsigned char *char_ptr; - const longword *longword_ptr; - longword repeated_one; - longword repeated_c; - unsigned reg_char c; - - c = (unsigned char) c_in; - - /* Handle the first few bytes by reading one byte at a time. - Do this until CHAR_PTR is aligned on a longword boundary. */ - for (char_ptr = (const unsigned char *) s; - n > 0 && (size_t) char_ptr % sizeof (longword) != 0; - --n, ++char_ptr) - if (*char_ptr == c) - return (void *) char_ptr; - - longword_ptr = (const longword *) char_ptr; - - /* All these elucidatory comments refer to 4-byte longwords, - but the theory applies equally well to any size longwords. */ - - /* Compute auxiliary longword values: - repeated_one is a value which has a 1 in every byte. - repeated_c has c in every byte. */ - repeated_one = 0x01010101; - repeated_c = c | (c << 8); - repeated_c |= repeated_c << 16; - if (0xffffffffU < (longword) -1) - { - repeated_one |= repeated_one << 31 << 1; - repeated_c |= repeated_c << 31 << 1; - if (8 < sizeof (longword)) - { - size_t i; - - for (i = 64; i < sizeof (longword) * 8; i *= 2) - { - repeated_one |= repeated_one << i; - repeated_c |= repeated_c << i; - } - } - } - - /* Instead of the traditional loop which tests each byte, we will test a - longword at a time. The tricky part is testing if *any of the four* - bytes in the longword in question are equal to c. We first use an xor - with repeated_c. This reduces the task to testing whether *any of the - four* bytes in longword1 is zero. - - We compute tmp = - ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7). - That is, we perform the following operations: - 1. Subtract repeated_one. - 2. & ~longword1. - 3. & a mask consisting of 0x80 in every byte. - Consider what happens in each byte: - - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff, - and step 3 transforms it into 0x80. A carry can also be propagated - to more significant bytes. - - If a byte of longword1 is nonzero, let its lowest 1 bit be at - position k (0 <= k <= 7); so the lowest k bits are 0. After step 1, - the byte ends in a single bit of value 0 and k bits of value 1. - After step 2, the result is just k bits of value 1: 2^k - 1. After - step 3, the result is 0. And no carry is produced. - So, if longword1 has only non-zero bytes, tmp is zero. - Whereas if longword1 has a zero byte, call j the position of the least - significant zero byte. Then the result has a zero at positions 0, ..., - j-1 and a 0x80 at position j. We cannot predict the result at the more - significant bytes (positions j+1..3), but it does not matter since we - already have a non-zero bit at position 8*j+7. - - So, the test whether any byte in longword1 is zero is equivalent to - testing whether tmp is nonzero. */ - - while (n >= sizeof (longword)) - { - longword longword1 = *longword_ptr ^ repeated_c; - - if ((((longword1 - repeated_one) & ~longword1) - & (repeated_one << 7)) != 0) - break; - longword_ptr++; - n -= sizeof (longword); - } - - char_ptr = (const unsigned char *) longword_ptr; - - /* At this point, we know that either n < sizeof (longword), or one of the - sizeof (longword) bytes starting at char_ptr is == c. On little-endian - machines, we could determine the first such byte without any further - memory accesses, just by looking at the tmp result from the last loop - iteration. But this does not work on big-endian machines. Choose code - that works in both cases. */ - - for (; n > 0; --n, ++char_ptr) - { - if (*char_ptr == c) - return (void *) char_ptr; - } - - return NULL; -} -#ifdef weak_alias -weak_alias (__memchr, BP_SYM (memchr)) -#endif diff --git a/contrib/grep/lib/memchr.valgrind b/contrib/grep/lib/memchr.valgrind deleted file mode 100644 index 60f247e10d..0000000000 --- a/contrib/grep/lib/memchr.valgrind +++ /dev/null @@ -1,14 +0,0 @@ -# Suppress a valgrind message about use of uninitialized memory in memchr(). -# POSIX states that when the character is found, memchr must not read extra -# bytes in an overestimated length (for example, where memchr is used to -# implement strnlen). However, we use a safe word read to provide a speedup. -{ - memchr-value4 - Memcheck:Value4 - fun:rpl_memchr -} -{ - memchr-value8 - Memcheck:Value8 - fun:rpl_memchr -} diff --git a/contrib/grep/lib/memchr2.c b/contrib/grep/lib/memchr2.c index d7468d9da7..ed728c46e6 100644 --- a/contrib/grep/lib/memchr2.c +++ b/contrib/grep/lib/memchr2.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2004, 2006, 2008-2015 +/* Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2004, 2006, 2008-2020 Free Software Foundation, Inc. Based on strlen implementation by Torbjorn Granlund (tege@sics.se), @@ -19,7 +19,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with this program. If not, see . */ +along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/memchr2.h b/contrib/grep/lib/memchr2.h index d49c6e8de9..f612efed5c 100644 --- a/contrib/grep/lib/memchr2.h +++ b/contrib/grep/lib/memchr2.h @@ -1,5 +1,5 @@ /* Scan memory for the first of two bytes. - Copyright (C) 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2008-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/memchr2.valgrind b/contrib/grep/lib/memchr2.valgrind index 778fe867dc..9a7511c644 100644 --- a/contrib/grep/lib/memchr2.valgrind +++ b/contrib/grep/lib/memchr2.valgrind @@ -1,4 +1,20 @@ # Suppress a valgrind message about use of uninitialized memory in memchr2(). + +# Copyright (C) 2009-2020 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + # Like memchr, it is safe to overestimate the length when the terminator # is guaranteed to be found. In this case, we may end up reading a word # that is partially uninitialized, but this use is OK for a speedup. diff --git a/contrib/grep/lib/mempcpy.c b/contrib/grep/lib/mempcpy.c deleted file mode 100644 index e9f2e34fa6..0000000000 --- a/contrib/grep/lib/mempcpy.c +++ /dev/null @@ -1,28 +0,0 @@ -/* Copy memory area and return pointer after last written byte. - Copyright (C) 2003, 2007, 2009-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see . */ - -#include - -/* Specification. */ -#include - -/* Copy N bytes of SRC to DEST, return pointer to bytes after the - last written byte. */ -void * -mempcpy (void *dest, const void *src, size_t n) -{ - return (char *) memcpy (dest, src, n) + n; -} diff --git a/contrib/grep/lib/memrchr.c b/contrib/grep/lib/memrchr.c deleted file mode 100644 index 3827208d80..0000000000 --- a/contrib/grep/lib/memrchr.c +++ /dev/null @@ -1,161 +0,0 @@ -/* memrchr -- find the last occurrence of a byte in a memory block - - Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2015 Free Software - Foundation, Inc. - - Based on strlen implementation by Torbjorn Granlund (tege@sics.se), - with help from Dan Sahlin (dan@sics.se) and - commentary by Jim Blandy (jimb@ai.mit.edu); - adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), - and implemented by Roland McGrath (roland@ai.mit.edu). - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#if defined _LIBC -# include -#else -# include -# define reg_char char -#endif - -#include -#include - -#undef __memrchr -#ifdef _LIBC -# undef memrchr -#endif - -#ifndef weak_alias -# define __memrchr memrchr -#endif - -/* Search no more than N bytes of S for C. */ -void * -__memrchr (void const *s, int c_in, size_t n) -{ - /* On 32-bit hardware, choosing longword to be a 32-bit unsigned - long instead of a 64-bit uintmax_t tends to give better - performance. On 64-bit hardware, unsigned long is generally 64 - bits already. Change this typedef to experiment with - performance. */ - typedef unsigned long int longword; - - const unsigned char *char_ptr; - const longword *longword_ptr; - longword repeated_one; - longword repeated_c; - unsigned reg_char c; - - c = (unsigned char) c_in; - - /* Handle the last few bytes by reading one byte at a time. - Do this until CHAR_PTR is aligned on a longword boundary. */ - for (char_ptr = (const unsigned char *) s + n; - n > 0 && (size_t) char_ptr % sizeof (longword) != 0; - --n) - if (*--char_ptr == c) - return (void *) char_ptr; - - longword_ptr = (const longword *) char_ptr; - - /* All these elucidatory comments refer to 4-byte longwords, - but the theory applies equally well to any size longwords. */ - - /* Compute auxiliary longword values: - repeated_one is a value which has a 1 in every byte. - repeated_c has c in every byte. */ - repeated_one = 0x01010101; - repeated_c = c | (c << 8); - repeated_c |= repeated_c << 16; - if (0xffffffffU < (longword) -1) - { - repeated_one |= repeated_one << 31 << 1; - repeated_c |= repeated_c << 31 << 1; - if (8 < sizeof (longword)) - { - size_t i; - - for (i = 64; i < sizeof (longword) * 8; i *= 2) - { - repeated_one |= repeated_one << i; - repeated_c |= repeated_c << i; - } - } - } - - /* Instead of the traditional loop which tests each byte, we will test a - longword at a time. The tricky part is testing if *any of the four* - bytes in the longword in question are equal to c. We first use an xor - with repeated_c. This reduces the task to testing whether *any of the - four* bytes in longword1 is zero. - - We compute tmp = - ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7). - That is, we perform the following operations: - 1. Subtract repeated_one. - 2. & ~longword1. - 3. & a mask consisting of 0x80 in every byte. - Consider what happens in each byte: - - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff, - and step 3 transforms it into 0x80. A carry can also be propagated - to more significant bytes. - - If a byte of longword1 is nonzero, let its lowest 1 bit be at - position k (0 <= k <= 7); so the lowest k bits are 0. After step 1, - the byte ends in a single bit of value 0 and k bits of value 1. - After step 2, the result is just k bits of value 1: 2^k - 1. After - step 3, the result is 0. And no carry is produced. - So, if longword1 has only non-zero bytes, tmp is zero. - Whereas if longword1 has a zero byte, call j the position of the least - significant zero byte. Then the result has a zero at positions 0, ..., - j-1 and a 0x80 at position j. We cannot predict the result at the more - significant bytes (positions j+1..3), but it does not matter since we - already have a non-zero bit at position 8*j+7. - - So, the test whether any byte in longword1 is zero is equivalent to - testing whether tmp is nonzero. */ - - while (n >= sizeof (longword)) - { - longword longword1 = *--longword_ptr ^ repeated_c; - - if ((((longword1 - repeated_one) & ~longword1) - & (repeated_one << 7)) != 0) - { - longword_ptr++; - break; - } - n -= sizeof (longword); - } - - char_ptr = (const unsigned char *) longword_ptr; - - /* At this point, we know that either n < sizeof (longword), or one of the - sizeof (longword) bytes starting at char_ptr is == c. On little-endian - machines, we could determine the first such byte without any further - memory accesses, just by looking at the tmp result from the last loop - iteration. But this does not work on big-endian machines. Choose code - that works in both cases. */ - - while (n-- > 0) - { - if (*--char_ptr == c) - return (void *) char_ptr; - } - - return NULL; -} -#ifdef weak_alias -weak_alias (__memrchr, memrchr) -#endif diff --git a/contrib/grep/lib/minmax.h b/contrib/grep/lib/minmax.h index 919678db35..b9477767b0 100644 --- a/contrib/grep/lib/minmax.h +++ b/contrib/grep/lib/minmax.h @@ -1,5 +1,5 @@ /* MIN, MAX macros. - Copyright (C) 1995, 1998, 2001, 2003, 2005, 2009-2015 Free Software + Copyright (C) 1995, 1998, 2001, 2003, 2005, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ #ifndef _MINMAX_H #define _MINMAX_H diff --git a/contrib/grep/lib/msvc-inval.c b/contrib/grep/lib/msvc-inval.c deleted file mode 100644 index a1f1dc0ee0..0000000000 --- a/contrib/grep/lib/msvc-inval.c +++ /dev/null @@ -1,129 +0,0 @@ -/* Invalid parameter handler for MSVC runtime libraries. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ - -#include - -/* Specification. */ -#include "msvc-inval.h" - -#if HAVE_MSVC_INVALID_PARAMETER_HANDLER \ - && !(MSVC_INVALID_PARAMETER_HANDLING == SANE_LIBRARY_HANDLING) - -/* Get _invalid_parameter_handler type and _set_invalid_parameter_handler - declaration. */ -# include - -# if MSVC_INVALID_PARAMETER_HANDLING == DEFAULT_HANDLING - -static void __cdecl -gl_msvc_invalid_parameter_handler (const wchar_t *expression, - const wchar_t *function, - const wchar_t *file, - unsigned int line, - uintptr_t dummy) -{ -} - -# else - -/* Get declarations of the native Windows API functions. */ -# define WIN32_LEAN_AND_MEAN -# include - -# if defined _MSC_VER - -static void __cdecl -gl_msvc_invalid_parameter_handler (const wchar_t *expression, - const wchar_t *function, - const wchar_t *file, - unsigned int line, - uintptr_t dummy) -{ - RaiseException (STATUS_GNULIB_INVALID_PARAMETER, 0, 0, NULL); -} - -# else - -/* An index to thread-local storage. */ -static DWORD tls_index; -static int tls_initialized /* = 0 */; - -/* Used as a fallback only. */ -static struct gl_msvc_inval_per_thread not_per_thread; - -struct gl_msvc_inval_per_thread * -gl_msvc_inval_current (void) -{ - if (!tls_initialized) - { - tls_index = TlsAlloc (); - tls_initialized = 1; - } - if (tls_index == TLS_OUT_OF_INDEXES) - /* TlsAlloc had failed. */ - return ¬_per_thread; - else - { - struct gl_msvc_inval_per_thread *pointer = - (struct gl_msvc_inval_per_thread *) TlsGetValue (tls_index); - if (pointer == NULL) - { - /* First call. Allocate a new 'struct gl_msvc_inval_per_thread'. */ - pointer = - (struct gl_msvc_inval_per_thread *) - malloc (sizeof (struct gl_msvc_inval_per_thread)); - if (pointer == NULL) - /* Could not allocate memory. Use the global storage. */ - pointer = ¬_per_thread; - TlsSetValue (tls_index, pointer); - } - return pointer; - } -} - -static void __cdecl -gl_msvc_invalid_parameter_handler (const wchar_t *expression, - const wchar_t *function, - const wchar_t *file, - unsigned int line, - uintptr_t dummy) -{ - struct gl_msvc_inval_per_thread *current = gl_msvc_inval_current (); - if (current->restart_valid) - longjmp (current->restart, 1); - else - /* An invalid parameter notification from outside the gnulib code. - Give the caller a chance to intervene. */ - RaiseException (STATUS_GNULIB_INVALID_PARAMETER, 0, 0, NULL); -} - -# endif - -# endif - -static int gl_msvc_inval_initialized /* = 0 */; - -void -gl_msvc_inval_ensure_handler (void) -{ - if (gl_msvc_inval_initialized == 0) - { - _set_invalid_parameter_handler (gl_msvc_invalid_parameter_handler); - gl_msvc_inval_initialized = 1; - } -} - -#endif diff --git a/contrib/grep/lib/msvc-inval.h b/contrib/grep/lib/msvc-inval.h deleted file mode 100644 index 8b07269a70..0000000000 --- a/contrib/grep/lib/msvc-inval.h +++ /dev/null @@ -1,222 +0,0 @@ -/* Invalid parameter handler for MSVC runtime libraries. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ - -#ifndef _MSVC_INVAL_H -#define _MSVC_INVAL_H - -/* With MSVC runtime libraries with the "invalid parameter handler" concept, - functions like fprintf(), dup2(), or close() crash when the caller passes - an invalid argument. But POSIX wants error codes (such as EINVAL or EBADF) - instead. - This file defines macros that turn such an invalid parameter notification - into a non-local exit. An error code can then be produced at the target - of this exit. You can thus write code like - - TRY_MSVC_INVAL - { - - } - CATCH_MSVC_INVAL - { - - } - DONE_MSVC_INVAL; - - This entire block expands to a single statement. - - The handling of invalid parameters can be done in three ways: - - * The default way, which is reasonable for programs (not libraries): - AC_DEFINE([MSVC_INVALID_PARAMETER_HANDLING], [DEFAULT_HANDLING]) - - * The way for libraries that make "hairy" calls (like close(-1), or - fclose(fp) where fileno(fp) is closed, or simply getdtablesize()): - AC_DEFINE([MSVC_INVALID_PARAMETER_HANDLING], [HAIRY_LIBRARY_HANDLING]) - - * The way for libraries that make no "hairy" calls: - AC_DEFINE([MSVC_INVALID_PARAMETER_HANDLING], [SANE_LIBRARY_HANDLING]) - */ - -#define DEFAULT_HANDLING 0 -#define HAIRY_LIBRARY_HANDLING 1 -#define SANE_LIBRARY_HANDLING 2 - -#if HAVE_MSVC_INVALID_PARAMETER_HANDLER \ - && !(MSVC_INVALID_PARAMETER_HANDLING == SANE_LIBRARY_HANDLING) -/* A native Windows platform with the "invalid parameter handler" concept, - and either DEFAULT_HANDLING or HAIRY_LIBRARY_HANDLING. */ - -# if MSVC_INVALID_PARAMETER_HANDLING == DEFAULT_HANDLING -/* Default handling. */ - -# ifdef __cplusplus -extern "C" { -# endif - -/* Ensure that the invalid parameter handler in installed that just returns. - Because we assume no other part of the program installs a different - invalid parameter handler, this solution is multithread-safe. */ -extern void gl_msvc_inval_ensure_handler (void); - -# ifdef __cplusplus -} -# endif - -# define TRY_MSVC_INVAL \ - do \ - { \ - gl_msvc_inval_ensure_handler (); \ - if (1) -# define CATCH_MSVC_INVAL \ - else -# define DONE_MSVC_INVAL \ - } \ - while (0) - -# else -/* Handling for hairy libraries. */ - -# include - -/* Gnulib can define its own status codes, as described in the page - "Raising Software Exceptions" on microsoft.com - . - Our status codes are composed of - - 0xE0000000, mandatory for all user-defined status codes, - - 0x474E550, a API identifier ("GNU"), - - 0, 1, 2, ..., used to distinguish different status codes from the - same API. */ -# define STATUS_GNULIB_INVALID_PARAMETER (0xE0000000 + 0x474E550 + 0) - -# if defined _MSC_VER -/* A compiler that supports __try/__except, as described in the page - "try-except statement" on microsoft.com - . - With __try/__except, we can use the multithread-safe exception handling. */ - -# ifdef __cplusplus -extern "C" { -# endif - -/* Ensure that the invalid parameter handler in installed that raises a - software exception with code STATUS_GNULIB_INVALID_PARAMETER. - Because we assume no other part of the program installs a different - invalid parameter handler, this solution is multithread-safe. */ -extern void gl_msvc_inval_ensure_handler (void); - -# ifdef __cplusplus -} -# endif - -# define TRY_MSVC_INVAL \ - do \ - { \ - gl_msvc_inval_ensure_handler (); \ - __try -# define CATCH_MSVC_INVAL \ - __except (GetExceptionCode () == STATUS_GNULIB_INVALID_PARAMETER \ - ? EXCEPTION_EXECUTE_HANDLER \ - : EXCEPTION_CONTINUE_SEARCH) -# define DONE_MSVC_INVAL \ - } \ - while (0) - -# else -/* Any compiler. - We can only use setjmp/longjmp. */ - -# include - -# ifdef __cplusplus -extern "C" { -# endif - -struct gl_msvc_inval_per_thread -{ - /* The restart that will resume execution at the code between - CATCH_MSVC_INVAL and DONE_MSVC_INVAL. It is enabled only between - TRY_MSVC_INVAL and CATCH_MSVC_INVAL. */ - jmp_buf restart; - - /* Tells whether the contents of restart is valid. */ - int restart_valid; -}; - -/* Ensure that the invalid parameter handler in installed that passes - control to the gl_msvc_inval_restart if it is valid, or raises a - software exception with code STATUS_GNULIB_INVALID_PARAMETER otherwise. - Because we assume no other part of the program installs a different - invalid parameter handler, this solution is multithread-safe. */ -extern void gl_msvc_inval_ensure_handler (void); - -/* Return a pointer to the per-thread data for the current thread. */ -extern struct gl_msvc_inval_per_thread *gl_msvc_inval_current (void); - -# ifdef __cplusplus -} -# endif - -# define TRY_MSVC_INVAL \ - do \ - { \ - struct gl_msvc_inval_per_thread *msvc_inval_current; \ - gl_msvc_inval_ensure_handler (); \ - msvc_inval_current = gl_msvc_inval_current (); \ - /* First, initialize gl_msvc_inval_restart. */ \ - if (setjmp (msvc_inval_current->restart) == 0) \ - { \ - /* Then, mark it as valid. */ \ - msvc_inval_current->restart_valid = 1; -# define CATCH_MSVC_INVAL \ - /* Execution completed. \ - Mark gl_msvc_inval_restart as invalid. */ \ - msvc_inval_current->restart_valid = 0; \ - } \ - else \ - { \ - /* Execution triggered an invalid parameter notification. \ - Mark gl_msvc_inval_restart as invalid. */ \ - msvc_inval_current->restart_valid = 0; -# define DONE_MSVC_INVAL \ - } \ - } \ - while (0) - -# endif - -# endif - -#else -/* A platform that does not need to the invalid parameter handler, - or when SANE_LIBRARY_HANDLING is desired. */ - -/* The braces here avoid GCC warnings like - "warning: suggest explicit braces to avoid ambiguous 'else'". */ -# define TRY_MSVC_INVAL \ - do \ - { \ - if (1) -# define CATCH_MSVC_INVAL \ - else -# define DONE_MSVC_INVAL \ - } \ - while (0) - -#endif - -#endif /* _MSVC_INVAL_H */ diff --git a/contrib/grep/lib/msvc-nothrow.c b/contrib/grep/lib/msvc-nothrow.c deleted file mode 100644 index 90cf8014e5..0000000000 --- a/contrib/grep/lib/msvc-nothrow.c +++ /dev/null @@ -1,49 +0,0 @@ -/* Wrappers that don't throw invalid parameter notifications - with MSVC runtime libraries. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ - -#include - -/* Specification. */ -#include "msvc-nothrow.h" - -/* Get declarations of the native Windows API functions. */ -#define WIN32_LEAN_AND_MEAN -#include - -#include "msvc-inval.h" - -#undef _get_osfhandle - -#if HAVE_MSVC_INVALID_PARAMETER_HANDLER -intptr_t -_gl_nothrow_get_osfhandle (int fd) -{ - intptr_t result; - - TRY_MSVC_INVAL - { - result = _get_osfhandle (fd); - } - CATCH_MSVC_INVAL - { - result = (intptr_t) INVALID_HANDLE_VALUE; - } - DONE_MSVC_INVAL; - - return result; -} -#endif diff --git a/contrib/grep/lib/msvc-nothrow.h b/contrib/grep/lib/msvc-nothrow.h deleted file mode 100644 index 67b151e65e..0000000000 --- a/contrib/grep/lib/msvc-nothrow.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Wrappers that don't throw invalid parameter notifications - with MSVC runtime libraries. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ - -#ifndef _MSVC_NOTHROW_H -#define _MSVC_NOTHROW_H - -/* With MSVC runtime libraries with the "invalid parameter handler" concept, - functions like fprintf(), dup2(), or close() crash when the caller passes - an invalid argument. But POSIX wants error codes (such as EINVAL or EBADF) - instead. - This file defines wrappers that turn such an invalid parameter notification - into an error code. */ - -#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ - -/* Get original declaration of _get_osfhandle. */ -# include - -# if HAVE_MSVC_INVALID_PARAMETER_HANDLER - -/* Override _get_osfhandle. */ -extern intptr_t _gl_nothrow_get_osfhandle (int fd); -# define _get_osfhandle _gl_nothrow_get_osfhandle - -# endif - -#endif - -#endif /* _MSVC_NOTHROW_H */ diff --git a/contrib/grep/lib/nl_langinfo.c b/contrib/grep/lib/nl_langinfo.c deleted file mode 100644 index 79b0406ee3..0000000000 --- a/contrib/grep/lib/nl_langinfo.c +++ /dev/null @@ -1,322 +0,0 @@ -/* nl_langinfo() replacement: query locale dependent information. - - Copyright (C) 2007-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#include -#include -#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ -# define WIN32_LEAN_AND_MEAN /* avoid including junk */ -# include -# include -#endif - -/* Return the codeset of the current locale, if this is easily deducible. - Otherwise, return "". */ -static char * -ctype_codeset (void) -{ - static char buf[2 + 10 + 1]; - size_t buflen = 0; - char const *locale = setlocale (LC_CTYPE, NULL); - char *codeset = buf; - size_t codesetlen; - codeset[0] = '\0'; - - if (locale && locale[0]) - { - /* If the locale name contains an encoding after the dot, return it. */ - char *dot = strchr (locale, '.'); - - if (dot) - { - /* Look for the possible @... trailer and remove it, if any. */ - char *codeset_start = dot + 1; - char const *modifier = strchr (codeset_start, '@'); - - if (! modifier) - codeset = codeset_start; - else - { - codesetlen = modifier - codeset_start; - if (codesetlen < sizeof buf) - { - codeset = memcpy (buf, codeset_start, codesetlen); - codeset[codesetlen] = '\0'; - } - } - } - } - -#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ - /* If setlocale is successful, it returns the number of the - codepage, as a string. Otherwise, fall back on Windows API - GetACP, which returns the locale's codepage as a number (although - this doesn't change according to what the 'setlocale' call specified). - Either way, prepend "CP" to make it a valid codeset name. */ - codesetlen = strlen (codeset); - if (0 < codesetlen && codesetlen < sizeof buf - 2) - memmove (buf + 2, codeset, codesetlen + 1); - else - sprintf (buf + 2, "%u", GetACP ()); - codeset = memcpy (buf, "CP", 2); -#endif - return codeset; -} - - -#if REPLACE_NL_LANGINFO - -/* Override nl_langinfo with support for added nl_item values. */ - -# undef nl_langinfo - -char * -rpl_nl_langinfo (nl_item item) -{ - switch (item) - { -# if GNULIB_defined_CODESET - case CODESET: - return ctype_codeset (); -# endif -# if GNULIB_defined_T_FMT_AMPM - case T_FMT_AMPM: - return "%I:%M:%S %p"; -# endif -# if GNULIB_defined_ERA - case ERA: - /* The format is not standardized. In glibc it is a sequence of strings - of the form "direction:offset:start_date:end_date:era_name:era_format" - with an empty string at the end. */ - return ""; - case ERA_D_FMT: - /* The %Ex conversion in strftime behaves like %x if the locale does not - have an alternative time format. */ - item = D_FMT; - break; - case ERA_D_T_FMT: - /* The %Ec conversion in strftime behaves like %c if the locale does not - have an alternative time format. */ - item = D_T_FMT; - break; - case ERA_T_FMT: - /* The %EX conversion in strftime behaves like %X if the locale does not - have an alternative time format. */ - item = T_FMT; - break; - case ALT_DIGITS: - /* The format is not standardized. In glibc it is a sequence of 10 - strings, appended in memory. */ - return "\0\0\0\0\0\0\0\0\0\0"; -# endif -# if GNULIB_defined_YESEXPR || !FUNC_NL_LANGINFO_YESEXPR_WORKS - case YESEXPR: - return "^[yY]"; - case NOEXPR: - return "^[nN]"; -# endif - default: - break; - } - return nl_langinfo (item); -} - -#else - -/* Provide nl_langinfo from scratch, either for native MS-Windows, or - for old Unix platforms without locales, such as Linux libc5 or - BeOS. */ - -# include - -char * -nl_langinfo (nl_item item) -{ - static char nlbuf[100]; - struct tm tmm = { 0 }; - - switch (item) - { - /* nl_langinfo items of the LC_CTYPE category */ - case CODESET: - { - char *codeset = ctype_codeset (); - if (*codeset) - return codeset; - } -# ifdef __BEOS__ - return "UTF-8"; -# else - return "ISO-8859-1"; -# endif - /* nl_langinfo items of the LC_NUMERIC category */ - case RADIXCHAR: - return localeconv () ->decimal_point; - case THOUSEP: - return localeconv () ->thousands_sep; - case GROUPING: - return localeconv () ->grouping; - /* nl_langinfo items of the LC_TIME category. - TODO: Really use the locale. */ - case D_T_FMT: - case ERA_D_T_FMT: - return "%a %b %e %H:%M:%S %Y"; - case D_FMT: - case ERA_D_FMT: - return "%m/%d/%y"; - case T_FMT: - case ERA_T_FMT: - return "%H:%M:%S"; - case T_FMT_AMPM: - return "%I:%M:%S %p"; - case AM_STR: - if (!strftime (nlbuf, sizeof nlbuf, "%p", &tmm)) - return "AM"; - return nlbuf; - case PM_STR: - tmm.tm_hour = 12; - if (!strftime (nlbuf, sizeof nlbuf, "%p", &tmm)) - return "PM"; - return nlbuf; - case DAY_1: - case DAY_2: - case DAY_3: - case DAY_4: - case DAY_5: - case DAY_6: - case DAY_7: - { - static char const days[][sizeof "Wednesday"] = { - "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", - "Friday", "Saturday" - }; - tmm.tm_wday = item - DAY_1; - if (!strftime (nlbuf, sizeof nlbuf, "%A", &tmm)) - return (char *) days[item - DAY_1]; - return nlbuf; - } - case ABDAY_1: - case ABDAY_2: - case ABDAY_3: - case ABDAY_4: - case ABDAY_5: - case ABDAY_6: - case ABDAY_7: - { - static char const abdays[][sizeof "Sun"] = { - "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" - }; - tmm.tm_wday = item - ABDAY_1; - if (!strftime (nlbuf, sizeof nlbuf, "%a", &tmm)) - return (char *) abdays[item - ABDAY_1]; - return nlbuf; - } - case MON_1: - case MON_2: - case MON_3: - case MON_4: - case MON_5: - case MON_6: - case MON_7: - case MON_8: - case MON_9: - case MON_10: - case MON_11: - case MON_12: - { - static char const months[][sizeof "September"] = { - "January", "February", "March", "April", "May", "June", "July", - "September", "October", "November", "December" - }; - tmm.tm_mon = item - MON_1; - if (!strftime (nlbuf, sizeof nlbuf, "%B", &tmm)) - return (char *) months[item - MON_1]; - return nlbuf; - } - case ABMON_1: - case ABMON_2: - case ABMON_3: - case ABMON_4: - case ABMON_5: - case ABMON_6: - case ABMON_7: - case ABMON_8: - case ABMON_9: - case ABMON_10: - case ABMON_11: - case ABMON_12: - { - static char const abmonths[][sizeof "Jan"] = { - "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", - "Sep", "Oct", "Nov", "Dec" - }; - tmm.tm_mon = item - ABMON_1; - if (!strftime (nlbuf, sizeof nlbuf, "%b", &tmm)) - return (char *) abmonths[item - ABMON_1]; - return nlbuf; - } - case ERA: - return ""; - case ALT_DIGITS: - return "\0\0\0\0\0\0\0\0\0\0"; - /* nl_langinfo items of the LC_MONETARY category. */ - case CRNCYSTR: - return localeconv () ->currency_symbol; - case INT_CURR_SYMBOL: - return localeconv () ->int_curr_symbol; - case MON_DECIMAL_POINT: - return localeconv () ->mon_decimal_point; - case MON_THOUSANDS_SEP: - return localeconv () ->mon_thousands_sep; - case MON_GROUPING: - return localeconv () ->mon_grouping; - case POSITIVE_SIGN: - return localeconv () ->positive_sign; - case NEGATIVE_SIGN: - return localeconv () ->negative_sign; - case FRAC_DIGITS: - return & localeconv () ->frac_digits; - case INT_FRAC_DIGITS: - return & localeconv () ->int_frac_digits; - case P_CS_PRECEDES: - return & localeconv () ->p_cs_precedes; - case N_CS_PRECEDES: - return & localeconv () ->n_cs_precedes; - case P_SEP_BY_SPACE: - return & localeconv () ->p_sep_by_space; - case N_SEP_BY_SPACE: - return & localeconv () ->n_sep_by_space; - case P_SIGN_POSN: - return & localeconv () ->p_sign_posn; - case N_SIGN_POSN: - return & localeconv () ->n_sign_posn; - /* nl_langinfo items of the LC_MESSAGES category - TODO: Really use the locale. */ - case YESEXPR: - return "^[yY]"; - case NOEXPR: - return "^[nN]"; - default: - return ""; - } -} - -#endif diff --git a/contrib/grep/lib/obstack.c b/contrib/grep/lib/obstack.c index dd6e1f70e9..a6757b8aad 100644 --- a/contrib/grep/lib/obstack.c +++ b/contrib/grep/lib/obstack.c @@ -1,5 +1,5 @@ /* obstack.c - subroutines used implicitly by object stack macros - Copyright (C) 1988-2015 Free Software Foundation, Inc. + Copyright (C) 1988-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with the GNU C Library; if not, see - . */ + . */ #ifdef _LIBC @@ -348,6 +348,7 @@ print_and_abort (void) abort gracefully or use longjump - but shouldn't return. This variable by default points to the internal function 'print_and_abort'. */ -void (*obstack_alloc_failed_handler) (void) = print_and_abort; +__attribute_noreturn__ void (*obstack_alloc_failed_handler) (void) + = print_and_abort; # endif /* !_OBSTACK_NO_ERROR_HANDLER */ #endif /* !_OBSTACK_ELIDE_CODE */ diff --git a/contrib/grep/lib/obstack.h b/contrib/grep/lib/obstack.h index 2b7d13b73b..f1c1d007ad 100644 --- a/contrib/grep/lib/obstack.h +++ b/contrib/grep/lib/obstack.h @@ -1,5 +1,5 @@ /* obstack.h - object stack macros - Copyright (C) 1988-2015 Free Software Foundation, Inc. + Copyright (C) 1988-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with the GNU C Library; if not, see - . */ + . */ /* Summary: @@ -111,6 +111,12 @@ #include /* For size_t and ptrdiff_t. */ #include /* For __GNU_LIBRARY__, and memcpy. */ +#if __STDC_VERSION__ < 199901L || defined __HP_cc +# define __FLEXIBLE_ARRAY_MEMBER 1 +#else +# define __FLEXIBLE_ARRAY_MEMBER +#endif + #if _OBSTACK_INTERFACE_VERSION == 1 /* For binary compatibility with obstack version 1, which used "int" and "long" for these two types. */ @@ -145,6 +151,15 @@ # define __attribute_pure__ _GL_ATTRIBUTE_PURE #endif +/* Not the same as _Noreturn, since it also works with function pointers. */ +#ifndef __attribute_noreturn__ +# if 2 < __GNUC__ + (8 <= __GNUC_MINOR__) || 0x5110 <= __SUNPRO_C +# define __attribute_noreturn__ __attribute__ ((__noreturn__)) +# else +# define __attribute_noreturn__ +# endif +#endif + #ifdef __cplusplus extern "C" { #endif @@ -153,7 +168,7 @@ struct _obstack_chunk /* Lives at front of each chunk. */ { char *limit; /* 1 past end of this chunk */ struct _obstack_chunk *prev; /* address of prior chunk or NULL */ - char contents[4]; /* objects begin here */ + char contents[__FLEXIBLE_ARRAY_MEMBER]; /* objects begin here */ }; struct obstack /* control current object in current chunk */ @@ -212,7 +227,7 @@ extern _OBSTACK_SIZE_T _obstack_memory_used (struct obstack *) more memory. This can be set to a user defined function which should either abort gracefully or use longjump - but shouldn't return. The default action is to print a message and abort. */ -extern void (*obstack_alloc_failed_handler) (void); +extern __attribute_noreturn__ void (*obstack_alloc_failed_handler) (void); /* Exit value used when 'print_and_abort' is used. */ extern int obstack_exit_failure; diff --git a/contrib/grep/lib/open-safer.c b/contrib/grep/lib/open-safer.c index 267b5cf3c4..d2eea81d15 100644 --- a/contrib/grep/lib/open-safer.c +++ b/contrib/grep/lib/open-safer.c @@ -1,6 +1,6 @@ /* Invoke open, but avoid some glitches. - Copyright (C) 2005-2006, 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2008-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert. */ diff --git a/contrib/grep/lib/open.c b/contrib/grep/lib/open.c deleted file mode 100644 index 95c781147f..0000000000 --- a/contrib/grep/lib/open.c +++ /dev/null @@ -1,181 +0,0 @@ -/* Open a descriptor to a file. - Copyright (C) 2007-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* Written by Bruno Haible , 2007. */ - -/* If the user's config.h happens to include , let it include only - the system's here, so that orig_open doesn't recurse to - rpl_open. */ -#define __need_system_fcntl_h -#include - -/* Get the original definition of open. It might be defined as a macro. */ -#include -#include -#undef __need_system_fcntl_h - -static int -orig_open (const char *filename, int flags, mode_t mode) -{ - return open (filename, flags, mode); -} - -/* Specification. */ -/* Write "fcntl.h" here, not , otherwise OSF/1 5.1 DTK cc eliminates - this include because of the preliminary #include above. */ -#include "fcntl.h" - -#include -#include -#include -#include -#include -#include - -#ifndef REPLACE_OPEN_DIRECTORY -# define REPLACE_OPEN_DIRECTORY 0 -#endif - -int -open (const char *filename, int flags, ...) -{ - mode_t mode; - int fd; - - mode = 0; - if (flags & O_CREAT) - { - va_list arg; - va_start (arg, flags); - - /* We have to use PROMOTED_MODE_T instead of mode_t, otherwise GCC 4 - creates crashing code when 'mode_t' is smaller than 'int'. */ - mode = va_arg (arg, PROMOTED_MODE_T); - - va_end (arg); - } - -#if GNULIB_defined_O_NONBLOCK - /* The only known platform that lacks O_NONBLOCK is mingw, but it - also lacks named pipes and Unix sockets, which are the only two - file types that require non-blocking handling in open(). - Therefore, it is safe to ignore O_NONBLOCK here. It is handy - that mingw also lacks openat(), so that is also covered here. */ - flags &= ~O_NONBLOCK; -#endif - -#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ - if (strcmp (filename, "/dev/null") == 0) - filename = "NUL"; -#endif - -#if OPEN_TRAILING_SLASH_BUG - /* If the filename ends in a slash and one of O_CREAT, O_WRONLY, O_RDWR - is specified, then fail. - Rationale: POSIX - says that - "A pathname that contains at least one non-slash character and that - ends with one or more trailing slashes shall be resolved as if a - single dot character ( '.' ) were appended to the pathname." - and - "The special filename dot shall refer to the directory specified by - its predecessor." - If the named file already exists as a directory, then - - if O_CREAT is specified, open() must fail because of the semantics - of O_CREAT, - - if O_WRONLY or O_RDWR is specified, open() must fail because POSIX - says that it - fails with errno = EISDIR in this case. - If the named file does not exist or does not name a directory, then - - if O_CREAT is specified, open() must fail since open() cannot create - directories, - - if O_WRONLY or O_RDWR is specified, open() must fail because the - file does not contain a '.' directory. */ - if (flags & (O_CREAT | O_WRONLY | O_RDWR)) - { - size_t len = strlen (filename); - if (len > 0 && filename[len - 1] == '/') - { - errno = EISDIR; - return -1; - } - } -#endif - - fd = orig_open (filename, flags, mode); - -#if REPLACE_FCHDIR - /* Implementing fchdir and fdopendir requires the ability to open a - directory file descriptor. If open doesn't support that (as on - mingw), we use a dummy file that behaves the same as directories - on Linux (ie. always reports EOF on attempts to read()), and - override fstat() in fchdir.c to hide the fact that we have a - dummy. */ - if (REPLACE_OPEN_DIRECTORY && fd < 0 && errno == EACCES - && ((flags & O_ACCMODE) == O_RDONLY - || (O_SEARCH != O_RDONLY && (flags & O_ACCMODE) == O_SEARCH))) - { - struct stat statbuf; - if (stat (filename, &statbuf) == 0 && S_ISDIR (statbuf.st_mode)) - { - /* Maximum recursion depth of 1. */ - fd = open ("/dev/null", flags, mode); - if (0 <= fd) - fd = _gl_register_fd (fd, filename); - } - else - errno = EACCES; - } -#endif - -#if OPEN_TRAILING_SLASH_BUG - /* If the filename ends in a slash and fd does not refer to a directory, - then fail. - Rationale: POSIX - says that - "A pathname that contains at least one non-slash character and that - ends with one or more trailing slashes shall be resolved as if a - single dot character ( '.' ) were appended to the pathname." - and - "The special filename dot shall refer to the directory specified by - its predecessor." - If the named file without the slash is not a directory, open() must fail - with ENOTDIR. */ - if (fd >= 0) - { - /* We know len is positive, since open did not fail with ENOENT. */ - size_t len = strlen (filename); - if (filename[len - 1] == '/') - { - struct stat statbuf; - - if (fstat (fd, &statbuf) >= 0 && !S_ISDIR (statbuf.st_mode)) - { - close (fd); - errno = ENOTDIR; - return -1; - } - } - } -#endif - -#if REPLACE_FCHDIR - if (!REPLACE_OPEN_DIRECTORY && 0 <= fd) - fd = _gl_register_fd (fd, filename); -#endif - - return fd; -} diff --git a/contrib/grep/lib/openat-die.c b/contrib/grep/lib/openat-die.c index 4accca0b2f..fd96dd7949 100644 --- a/contrib/grep/lib/openat-die.c +++ b/contrib/grep/lib/openat-die.c @@ -1,6 +1,6 @@ /* Report a save- or restore-cwd failure in our openat replacement and then exit. - Copyright (C) 2005-2006, 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2008-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/openat-priv.h b/contrib/grep/lib/openat-priv.h index f5a3690553..5b824f7a97 100644 --- a/contrib/grep/lib/openat-priv.h +++ b/contrib/grep/lib/openat-priv.h @@ -1,6 +1,6 @@ /* Internals for openat-like functions. - Copyright (C) 2005-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* written by Jim Meyering */ diff --git a/contrib/grep/lib/openat-proc.c b/contrib/grep/lib/openat-proc.c index 4e1d341985..9111cd3d7e 100644 --- a/contrib/grep/lib/openat-proc.c +++ b/contrib/grep/lib/openat-proc.c @@ -1,6 +1,6 @@ /* Create /proc/self/fd-related names for subfiles of open directories. - Copyright (C) 2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert. */ @@ -30,24 +30,21 @@ #include #include -#include "intprops.h" - -#define PROC_SELF_FD_FORMAT "/proc/self/fd/%d/%s" - -#define PROC_SELF_FD_NAME_SIZE_BOUND(len) \ - (sizeof PROC_SELF_FD_FORMAT - sizeof "%d%s" \ - + INT_STRLEN_BOUND (int) + (len) + 1) +#ifdef __KLIBC__ +# include +#endif +#include "intprops.h" -/* Set BUF to the expansion of PROC_SELF_FD_FORMAT, using FD and FILE - respectively for %d and %s. If successful, return BUF if the - result fits in BUF, dynamically allocated memory otherwise. But - return NULL if /proc is not reliable, either because the operating - system support is lacking or because memory is low. */ +/* Set BUF to the name of the subfile of the directory identified by + FD, where the subfile is named FILE. If successful, return BUF if + the result fits in BUF, dynamically allocated memory otherwise. + Return NULL (setting errno) on error. */ char * openat_proc_name (char buf[OPENAT_BUFFER_SIZE], int fd, char const *file) { - static int proc_status = 0; + char *result = buf; + int dirlen; /* Make sure the caller gets ENOENT when appropriate. */ if (!*file) @@ -56,47 +53,82 @@ openat_proc_name (char buf[OPENAT_BUFFER_SIZE], int fd, char const *file) return buf; } - if (! proc_status) - { - /* Set PROC_STATUS to a positive value if /proc/self/fd is - reliable, and a negative value otherwise. Solaris 10 - /proc/self/fd mishandles "..", and any file name might expand - to ".." after symbolic link expansion, so avoid /proc/self/fd - if it mishandles "..". Solaris 10 has openat, but this - problem is exhibited on code that built on Solaris 8 and - running on Solaris 10. */ - - int proc_self_fd = open ("/proc/self/fd", - O_SEARCH | O_DIRECTORY | O_NOCTTY | O_NONBLOCK); - if (proc_self_fd < 0) - proc_status = -1; - else - { - /* Detect whether /proc/self/fd/%i/../fd exists, where %i is the - number of a file descriptor open on /proc/self/fd. On Linux, - that name resolves to /proc/self/fd, which was opened above. - However, on Solaris, it may resolve to /proc/self/fd/fd, which - cannot exist, since all names in /proc/self/fd are numeric. */ - char dotdot_buf[PROC_SELF_FD_NAME_SIZE_BOUND (sizeof "../fd" - 1)]; - sprintf (dotdot_buf, PROC_SELF_FD_FORMAT, proc_self_fd, "../fd"); - proc_status = access (dotdot_buf, F_OK) ? -1 : 1; - close (proc_self_fd); - } - } - - if (proc_status < 0) - return NULL; - else - { - size_t bufsize = PROC_SELF_FD_NAME_SIZE_BOUND (strlen (file)); - char *result = buf; - if (OPENAT_BUFFER_SIZE < bufsize) - { - result = malloc (bufsize); - if (! result) - return NULL; - } - sprintf (result, PROC_SELF_FD_FORMAT, fd, file); - return result; - } +#ifndef __KLIBC__ +# define PROC_SELF_FD_FORMAT "/proc/self/fd/%d/" + { + enum { + PROC_SELF_FD_DIR_SIZE_BOUND + = (sizeof PROC_SELF_FD_FORMAT - (sizeof "%d" - 1) + + INT_STRLEN_BOUND (int)) + }; + + static int proc_status = 0; + if (! proc_status) + { + /* Set PROC_STATUS to a positive value if /proc/self/fd is + reliable, and a negative value otherwise. Solaris 10 + /proc/self/fd mishandles "..", and any file name might expand + to ".." after symbolic link expansion, so avoid /proc/self/fd + if it mishandles "..". Solaris 10 has openat, but this + problem is exhibited on code that built on Solaris 8 and + running on Solaris 10. */ + + int proc_self_fd = open ("/proc/self/fd", + O_SEARCH | O_DIRECTORY | O_NOCTTY | O_NONBLOCK); + if (proc_self_fd < 0) + proc_status = -1; + else + { + /* Detect whether /proc/self/fd/%i/../fd exists, where %i is the + number of a file descriptor open on /proc/self/fd. On Linux, + that name resolves to /proc/self/fd, which was opened above. + However, on Solaris, it may resolve to /proc/self/fd/fd, which + cannot exist, since all names in /proc/self/fd are numeric. */ + char dotdot_buf[PROC_SELF_FD_DIR_SIZE_BOUND + sizeof "../fd" - 1]; + sprintf (dotdot_buf, PROC_SELF_FD_FORMAT "../fd", proc_self_fd); + proc_status = access (dotdot_buf, F_OK) ? -1 : 1; + close (proc_self_fd); + } + } + + if (proc_status < 0) + return NULL; + else + { + size_t bufsize = PROC_SELF_FD_DIR_SIZE_BOUND + strlen (file); + if (OPENAT_BUFFER_SIZE < bufsize) + { + result = malloc (bufsize); + if (! result) + return NULL; + } + + dirlen = sprintf (result, PROC_SELF_FD_FORMAT, fd); + } + } +#else + /* OS/2 kLIBC provides a function to retrieve a path from a fd. */ + { + char dir[_MAX_PATH]; + size_t bufsize; + + if (__libc_Back_ioFHToPath (fd, dir, sizeof dir)) + return NULL; + + dirlen = strlen (dir); + bufsize = dirlen + 1 + strlen (file) + 1; /* 1 for '/', 1 for null */ + if (OPENAT_BUFFER_SIZE < bufsize) + { + result = malloc (bufsize); + if (! result) + return NULL; + } + + strcpy (result, dir); + result[dirlen++] = '/'; + } +#endif + + strcpy (result + dirlen, file); + return result; } diff --git a/contrib/grep/lib/openat-safer.c b/contrib/grep/lib/openat-safer.c index f2401bcd02..ff6789afe4 100644 --- a/contrib/grep/lib/openat-safer.c +++ b/contrib/grep/lib/openat-safer.c @@ -1,6 +1,6 @@ /* Invoke openat, but avoid some glitches. - Copyright (C) 2005-2006, 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2008-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert for open, ported by Eric Blake for openat. */ diff --git a/contrib/grep/lib/openat.c b/contrib/grep/lib/openat.c deleted file mode 100644 index d1de8b9e27..0000000000 --- a/contrib/grep/lib/openat.c +++ /dev/null @@ -1,286 +0,0 @@ -/* provide a replacement openat function - Copyright (C) 2004-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* written by Jim Meyering */ - -/* If the user's config.h happens to include , let it include only - the system's here, so that orig_openat doesn't recurse to - rpl_openat. */ -#define __need_system_fcntl_h -#include - -/* Get the original definition of open. It might be defined as a macro. */ -#include -#include -#undef __need_system_fcntl_h - -#if HAVE_OPENAT -static int -orig_openat (int fd, char const *filename, int flags, mode_t mode) -{ - return openat (fd, filename, flags, mode); -} -#endif - -/* Write "fcntl.h" here, not , otherwise OSF/1 5.1 DTK cc eliminates - this include because of the preliminary #include above. */ -#include "fcntl.h" - -#include "openat.h" - -#include -#include -#include -#include -#include -#include - -#if HAVE_OPENAT - -/* Like openat, but work around Solaris 9 bugs with trailing slash. */ -int -rpl_openat (int dfd, char const *filename, int flags, ...) -{ - mode_t mode; - int fd; - - mode = 0; - if (flags & O_CREAT) - { - va_list arg; - va_start (arg, flags); - - /* We have to use PROMOTED_MODE_T instead of mode_t, otherwise GCC 4 - creates crashing code when 'mode_t' is smaller than 'int'. */ - mode = va_arg (arg, PROMOTED_MODE_T); - - va_end (arg); - } - -# if OPEN_TRAILING_SLASH_BUG - /* If the filename ends in a slash and one of O_CREAT, O_WRONLY, O_RDWR - is specified, then fail. - Rationale: POSIX - says that - "A pathname that contains at least one non-slash character and that - ends with one or more trailing slashes shall be resolved as if a - single dot character ( '.' ) were appended to the pathname." - and - "The special filename dot shall refer to the directory specified by - its predecessor." - If the named file already exists as a directory, then - - if O_CREAT is specified, open() must fail because of the semantics - of O_CREAT, - - if O_WRONLY or O_RDWR is specified, open() must fail because POSIX - says that it - fails with errno = EISDIR in this case. - If the named file does not exist or does not name a directory, then - - if O_CREAT is specified, open() must fail since open() cannot create - directories, - - if O_WRONLY or O_RDWR is specified, open() must fail because the - file does not contain a '.' directory. */ - if (flags & (O_CREAT | O_WRONLY | O_RDWR)) - { - size_t len = strlen (filename); - if (len > 0 && filename[len - 1] == '/') - { - errno = EISDIR; - return -1; - } - } -# endif - - fd = orig_openat (dfd, filename, flags, mode); - -# if OPEN_TRAILING_SLASH_BUG - /* If the filename ends in a slash and fd does not refer to a directory, - then fail. - Rationale: POSIX - says that - "A pathname that contains at least one non-slash character and that - ends with one or more trailing slashes shall be resolved as if a - single dot character ( '.' ) were appended to the pathname." - and - "The special filename dot shall refer to the directory specified by - its predecessor." - If the named file without the slash is not a directory, open() must fail - with ENOTDIR. */ - if (fd >= 0) - { - /* We know len is positive, since open did not fail with ENOENT. */ - size_t len = strlen (filename); - if (filename[len - 1] == '/') - { - struct stat statbuf; - - if (fstat (fd, &statbuf) >= 0 && !S_ISDIR (statbuf.st_mode)) - { - close (fd); - errno = ENOTDIR; - return -1; - } - } - } -# endif - - return fd; -} - -#else /* !HAVE_OPENAT */ - -# include "dosname.h" /* solely for definition of IS_ABSOLUTE_FILE_NAME */ -# include "openat-priv.h" -# include "save-cwd.h" - -/* Replacement for Solaris' openat function. - - First, try to simulate it via open ("/proc/self/fd/FD/FILE"). - Failing that, simulate it by doing save_cwd/fchdir/open/restore_cwd. - If either the save_cwd or the restore_cwd fails (relatively unlikely), - then give a diagnostic and exit nonzero. - Otherwise, upon failure, set errno and return -1, as openat does. - Upon successful completion, return a file descriptor. */ -int -openat (int fd, char const *file, int flags, ...) -{ - mode_t mode = 0; - - if (flags & O_CREAT) - { - va_list arg; - va_start (arg, flags); - - /* We have to use PROMOTED_MODE_T instead of mode_t, otherwise GCC 4 - creates crashing code when 'mode_t' is smaller than 'int'. */ - mode = va_arg (arg, PROMOTED_MODE_T); - - va_end (arg); - } - - return openat_permissive (fd, file, flags, mode, NULL); -} - -/* Like openat (FD, FILE, FLAGS, MODE), but if CWD_ERRNO is - nonnull, set *CWD_ERRNO to an errno value if unable to save - or restore the initial working directory. This is needed only - the first time remove.c's remove_dir opens a command-line - directory argument. - - If a previous attempt to restore the current working directory - failed, then we must not even try to access a '.'-relative name. - It is the caller's responsibility not to call this function - in that case. */ - -int -openat_permissive (int fd, char const *file, int flags, mode_t mode, - int *cwd_errno) -{ - struct saved_cwd saved_cwd; - int saved_errno; - int err; - bool save_ok; - - if (fd == AT_FDCWD || IS_ABSOLUTE_FILE_NAME (file)) - return open (file, flags, mode); - - { - char buf[OPENAT_BUFFER_SIZE]; - char *proc_file = openat_proc_name (buf, fd, file); - if (proc_file) - { - int open_result = open (proc_file, flags, mode); - int open_errno = errno; - if (proc_file != buf) - free (proc_file); - /* If the syscall succeeds, or if it fails with an unexpected - errno value, then return right away. Otherwise, fall through - and resort to using save_cwd/restore_cwd. */ - if (0 <= open_result || ! EXPECTED_ERRNO (open_errno)) - { - errno = open_errno; - return open_result; - } - } - } - - save_ok = (save_cwd (&saved_cwd) == 0); - if (! save_ok) - { - if (! cwd_errno) - openat_save_fail (errno); - *cwd_errno = errno; - } - if (0 <= fd && fd == saved_cwd.desc) - { - /* If saving the working directory collides with the user's - requested fd, then the user's fd must have been closed to - begin with. */ - free_cwd (&saved_cwd); - errno = EBADF; - return -1; - } - - err = fchdir (fd); - saved_errno = errno; - - if (! err) - { - err = open (file, flags, mode); - saved_errno = errno; - if (save_ok && restore_cwd (&saved_cwd) != 0) - { - if (! cwd_errno) - { - /* Don't write a message to just-created fd 2. */ - saved_errno = errno; - if (err == STDERR_FILENO) - close (err); - openat_restore_fail (saved_errno); - } - *cwd_errno = errno; - } - } - - free_cwd (&saved_cwd); - errno = saved_errno; - return err; -} - -/* Return true if our openat implementation must resort to - using save_cwd and restore_cwd. */ -bool -openat_needs_fchdir (void) -{ - bool needs_fchdir = true; - int fd = open ("/", O_SEARCH); - - if (0 <= fd) - { - char buf[OPENAT_BUFFER_SIZE]; - char *proc_file = openat_proc_name (buf, fd, "."); - if (proc_file) - { - needs_fchdir = false; - if (proc_file != buf) - free (proc_file); - } - close (fd); - } - - return needs_fchdir; -} - -#endif /* !HAVE_OPENAT */ diff --git a/contrib/grep/lib/openat.h b/contrib/grep/lib/openat.h index f148adfab2..7589150f34 100644 --- a/contrib/grep/lib/openat.h +++ b/contrib/grep/lib/openat.h @@ -1,5 +1,5 @@ /* provide a replacement openat function - Copyright (C) 2004-2006, 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2004-2006, 2008-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* written by Jim Meyering */ diff --git a/contrib/grep/lib/opendir-safer.c b/contrib/grep/lib/opendir-safer.c deleted file mode 100644 index 9bbcda85a3..0000000000 --- a/contrib/grep/lib/opendir-safer.c +++ /dev/null @@ -1,76 +0,0 @@ -/* Invoke opendir, but avoid some glitches. - - Copyright (C) 2009-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* Written by Eric Blake. */ - -#include - -#include "dirent-safer.h" - -#include -#include -#include "unistd-safer.h" - -/* Like opendir, but do not clobber stdin, stdout, or stderr. */ - -DIR * -opendir_safer (char const *name) -{ - DIR *dp = opendir (name); - - if (dp) - { - int fd = dirfd (dp); - - if (0 <= fd && fd <= STDERR_FILENO) - { - /* If fdopendir is native (as on Linux), then it is safe to - assume dirfd(fdopendir(n))==n. If we are using the - gnulib module fdopendir, then this guarantee is not met, - but fdopendir recursively calls opendir_safer up to 3 - times to at least get a safe fd. If fdopendir is not - present but dirfd is accurate (as on cygwin 1.5.x), then - we recurse up to 3 times ourselves. Finally, if dirfd - always fails (as on mingw), then we are already safe. */ - DIR *newdp; - int e; -#if HAVE_FDOPENDIR || GNULIB_FDOPENDIR - int f = dup_safer (fd); - if (f < 0) - { - e = errno; - newdp = NULL; - } - else - { - newdp = fdopendir (f); - e = errno; - if (! newdp) - close (f); - } -#else /* !FDOPENDIR */ - newdp = opendir_safer (name); - e = errno; -#endif - closedir (dp); - errno = e; - dp = newdp; - } - } - - return dp; -} diff --git a/contrib/grep/lib/opendir.c b/contrib/grep/lib/opendir.c deleted file mode 100644 index 9f53110310..0000000000 --- a/contrib/grep/lib/opendir.c +++ /dev/null @@ -1,148 +0,0 @@ -/* Start reading the entries of a directory. - Copyright (C) 2006-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#include -#include - -#if HAVE_OPENDIR - -/* Override opendir(), to keep track of the open file descriptors. - Needed because there is a function dirfd(). */ - -#else - -# include - -# include "dirent-private.h" -# include "filename.h" - -#endif - -#if REPLACE_FCHDIR -# include -#endif - -DIR * -opendir (const char *dir_name) -{ -#if HAVE_OPENDIR -# undef opendir - DIR *dirp; - - dirp = opendir (dir_name); - if (dirp == NULL) - return NULL; - -#else - - char dir_name_mask[MAX_PATH + 1 + 1 + 1]; - int status; - HANDLE current; - WIN32_FIND_DATA entry; - struct gl_directory *dirp; - - if (dir_name[0] == '\0') - { - errno = ENOENT; - return NULL; - } - - /* Make the dir_name absolute, so that we continue reading the same - directory if the current directory changed between this opendir() - call and a subsequent rewinddir() call. */ - if (!GetFullPathName (dir_name, MAX_PATH, dir_name_mask, NULL)) - { - errno = EINVAL; - return NULL; - } - - /* Append the mask. - "*" and "*.*" appear to be equivalent. */ - { - char *p; - - p = dir_name_mask + strlen (dir_name_mask); - if (p > dir_name_mask && !ISSLASH (p[-1])) - *p++ = '\\'; - *p++ = '*'; - *p = '\0'; - } - - /* Start searching the directory. */ - status = -1; - current = FindFirstFile (dir_name_mask, &entry); - if (current == INVALID_HANDLE_VALUE) - { - switch (GetLastError ()) - { - case ERROR_FILE_NOT_FOUND: - status = -2; - break; - case ERROR_PATH_NOT_FOUND: - errno = ENOENT; - return NULL; - case ERROR_DIRECTORY: - errno = ENOTDIR; - return NULL; - case ERROR_ACCESS_DENIED: - errno = EACCES; - return NULL; - default: - errno = EIO; - return NULL; - } - } - - /* Allocate the result. */ - dirp = - (struct gl_directory *) - malloc (offsetof (struct gl_directory, dir_name_mask[0]) - + strlen (dir_name_mask) + 1); - if (dirp == NULL) - { - if (current != INVALID_HANDLE_VALUE) - FindClose (current); - errno = ENOMEM; - return NULL; - } - dirp->status = status; - dirp->current = current; - if (status == -1) - memcpy (&dirp->entry, &entry, sizeof (WIN32_FIND_DATA)); - strcpy (dirp->dir_name_mask, dir_name_mask); - -#endif - -#if REPLACE_FCHDIR - { - int fd = dirfd (dirp); - if (0 <= fd && _gl_register_fd (fd, dir_name) != fd) - { - int saved_errno = errno; - closedir (dirp); - errno = saved_errno; - return NULL; - } - } -#endif - - return dirp; -} diff --git a/contrib/grep/lib/opendirat.c b/contrib/grep/lib/opendirat.c new file mode 100644 index 0000000000..6d88420e99 --- /dev/null +++ b/contrib/grep/lib/opendirat.c @@ -0,0 +1,54 @@ +/* Open a directory relative to another directory. + + Copyright 2006-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written by Jim Meyering and Paul Eggert. */ + +#include + +#include + +#include +#include +#include + +/* Relative to DIR_FD, open the directory DIR, passing EXTRA_FLAGS to + the underlying openat call. On success, store into *PNEW_FD the + underlying file descriptor of the newly opened directory and return + the directory stream. On failure, return NULL and set errno. + + On success, *PNEW_FD is at least 3, so this is a "safer" function. */ + +DIR * +opendirat (int dir_fd, char const *dir, int extra_flags, int *pnew_fd) +{ + int open_flags = (O_RDONLY | O_CLOEXEC | O_DIRECTORY | O_NOCTTY + | O_NONBLOCK | extra_flags); + int new_fd = openat (dir_fd, dir, open_flags); + + if (new_fd < 0) + return NULL; + DIR *dirp = fdopendir (new_fd); + if (dirp) + *pnew_fd = new_fd; + else + { + int fdopendir_errno = errno; + close (new_fd); + errno = fdopendir_errno; + } + return dirp; +} diff --git a/contrib/grep/lib/opendirat.h b/contrib/grep/lib/opendirat.h new file mode 100644 index 0000000000..1edf5b57d0 --- /dev/null +++ b/contrib/grep/lib/opendirat.h @@ -0,0 +1,2 @@ +#include +DIR *opendirat (int, char const *, int, int *); diff --git a/contrib/grep/lib/pathmax.h b/contrib/grep/lib/pathmax.h index 2f3b64f457..15a236fae8 100644 --- a/contrib/grep/lib/pathmax.h +++ b/contrib/grep/lib/pathmax.h @@ -1,5 +1,5 @@ /* Define PATH_MAX somehow. Requires sys/types.h. - Copyright (C) 1992, 1999, 2001, 2003, 2005, 2009-2015 Free Software + Copyright (C) 1992, 1999, 2001, 2003, 2005, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify @@ -13,14 +13,14 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ #ifndef _PATHMAX_H # define _PATHMAX_H /* POSIX:2008 defines PATH_MAX to be the maximum number of bytes in a filename, including the terminating NUL byte. - + PATH_MAX is not defined on systems which have no limit on filename length, such as GNU/Hurd. @@ -65,10 +65,10 @@ # define PATH_MAX 1024 # endif -# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ +# if defined _WIN32 && ! defined __CYGWIN__ /* The page "Naming Files, Paths, and Namespaces" on msdn.microsoft.com, section "Maximum Path Length Limitation", - + explains that the maximum size of a filename, including the terminating NUL byte, is 260 = 3 + 256 + 1. This is the same value as diff --git a/contrib/grep/lib/pipe-safer.c b/contrib/grep/lib/pipe-safer.c index 91b0fe7715..5a597c5270 100644 --- a/contrib/grep/lib/pipe-safer.c +++ b/contrib/grep/lib/pipe-safer.c @@ -1,5 +1,5 @@ /* Invoke pipe, but avoid some glitches. - Copyright (C) 2005-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering. */ diff --git a/contrib/grep/lib/progname.c b/contrib/grep/lib/progname.c deleted file mode 100644 index fe93bcaf2a..0000000000 --- a/contrib/grep/lib/progname.c +++ /dev/null @@ -1,92 +0,0 @@ -/* Program name management. - Copyright (C) 2001-2003, 2005-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2001. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - - -#include - -/* Specification. */ -#undef ENABLE_RELOCATABLE /* avoid defining set_program_name as a macro */ -#include "progname.h" - -#include /* get program_invocation_name declaration */ -#include -#include -#include - - -/* String containing name the program is called with. - To be initialized by main(). */ -const char *program_name = NULL; - -/* Set program_name, based on argv[0]. - argv0 must be a string allocated with indefinite extent, and must not be - modified after this call. */ -void -set_program_name (const char *argv0) -{ - /* libtool creates a temporary executable whose name is sometimes prefixed - with "lt-" (depends on the platform). It also makes argv[0] absolute. - But the name of the temporary executable is a detail that should not be - visible to the end user and to the test suite. - Remove this "/.libs/" or "/.libs/lt-" prefix here. */ - const char *slash; - const char *base; - - /* Sanity check. POSIX requires the invoking process to pass a non-NULL - argv[0]. */ - if (argv0 == NULL) - { - /* It's a bug in the invoking program. Help diagnosing it. */ - fputs ("A NULL argv[0] was passed through an exec system call.\n", - stderr); - abort (); - } - - slash = strrchr (argv0, '/'); - base = (slash != NULL ? slash + 1 : argv0); - if (base - argv0 >= 7 && strncmp (base - 7, "/.libs/", 7) == 0) - { - argv0 = base; - if (strncmp (base, "lt-", 3) == 0) - { - argv0 = base + 3; - /* On glibc systems, remove the "lt-" prefix from the variable - program_invocation_short_name. */ -#if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME - program_invocation_short_name = (char *) argv0; -#endif - } - } - - /* But don't strip off a leading / in general, because when the user - runs - /some/hidden/place/bin/cp foo foo - he should get the error message - /some/hidden/place/bin/cp: `foo' and `foo' are the same file - not - cp: `foo' and `foo' are the same file - */ - - program_name = argv0; - - /* On glibc systems, the error() function comes from libc and uses the - variable program_invocation_name, not program_name. So set this variable - as well. */ -#if HAVE_DECL_PROGRAM_INVOCATION_NAME - program_invocation_name = (char *) argv0; -#endif -} diff --git a/contrib/grep/lib/progname.h b/contrib/grep/lib/progname.h deleted file mode 100644 index d1e034f92a..0000000000 --- a/contrib/grep/lib/progname.h +++ /dev/null @@ -1,62 +0,0 @@ -/* Program name management. - Copyright (C) 2001-2004, 2006, 2009-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2001. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#ifndef _PROGNAME_H -#define _PROGNAME_H - -/* Programs using this file should do the following in main(): - set_program_name (argv[0]); - */ - - -#ifdef __cplusplus -extern "C" { -#endif - - -/* String containing name the program is called with. */ -extern const char *program_name; - -/* Set program_name, based on argv[0]. - argv0 must be a string allocated with indefinite extent, and must not be - modified after this call. */ -extern void set_program_name (const char *argv0); - -#if ENABLE_RELOCATABLE - -/* Set program_name, based on argv[0], and original installation prefix and - directory, for relocatability. */ -extern void set_program_name_and_installdir (const char *argv0, - const char *orig_installprefix, - const char *orig_installdir); -#undef set_program_name -#define set_program_name(ARG0) \ - set_program_name_and_installdir (ARG0, INSTALLPREFIX, INSTALLDIR) - -/* Return the full pathname of the current executable, based on the earlier - call to set_program_name_and_installdir. Return NULL if unknown. */ -extern char *get_full_program_name (void); - -#endif - - -#ifdef __cplusplus -} -#endif - - -#endif /* _PROGNAME_H */ diff --git a/contrib/grep/lib/propername.c b/contrib/grep/lib/propername.c index 293e7e3187..4acefa5131 100644 --- a/contrib/grep/lib/propername.c +++ b/contrib/grep/lib/propername.c @@ -1,5 +1,5 @@ /* Localization of proper names. - Copyright (C) 2006-2015 Free Software Foundation, Inc. + Copyright (C) 2006-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2006. This program is free software: you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Without this pragma, gcc 4.7.0 20111124 mistakenly suggests that the proper_name function might be candidate for attribute 'const' */ diff --git a/contrib/grep/lib/propername.h b/contrib/grep/lib/propername.h index 68e617e98c..da3f978ba3 100644 --- a/contrib/grep/lib/propername.h +++ b/contrib/grep/lib/propername.h @@ -1,5 +1,5 @@ /* Localization of proper names. -*- coding: utf-8 -*- - Copyright (C) 2006, 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2006, 2008-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2006. This program is free software: you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* INTRODUCTION diff --git a/contrib/grep/lib/quote.h b/contrib/grep/lib/quote.h index 28c05fdd06..391ff1a353 100644 --- a/contrib/grep/lib/quote.h +++ b/contrib/grep/lib/quote.h @@ -1,6 +1,6 @@ /* quote.h - prototypes for quote.c - Copyright (C) 1998-2001, 2003, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1998-2001, 2003, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef QUOTE_H_ # define QUOTE_H_ 1 diff --git a/contrib/grep/lib/quotearg.c b/contrib/grep/lib/quotearg.c index 9f916592f6..c78fc1670f 100644 --- a/contrib/grep/lib/quotearg.c +++ b/contrib/grep/lib/quotearg.c @@ -1,6 +1,6 @@ /* quotearg.c - quote arguments for output - Copyright (C) 1998-2002, 2004-2015 Free Software Foundation, Inc. + Copyright (C) 1998-2002, 2004-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert */ @@ -29,6 +29,7 @@ #include "quotearg.h" #include "quote.h" +#include "minmax.h" #include "xalloc.h" #include "c-strcaseeq.h" #include "localcharset.h" @@ -37,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +54,14 @@ #define INT_BITS (sizeof (int) * CHAR_BIT) +#ifndef FALLTHROUGH +# if __GNUC__ < 7 +# define FALLTHROUGH ((void) 0) +# else +# define FALLTHROUGH __attribute__ ((__fallthrough__)) +# endif +#endif + struct quoting_options { /* Basic quoting style. */ @@ -77,6 +87,8 @@ char const *const quoting_style_args[] = "literal", "shell", "shell-always", + "shell-escape", + "shell-escape-always", "c", "c-maybe", "escape", @@ -91,6 +103,8 @@ enum quoting_style const quoting_style_vals[] = literal_quoting_style, shell_quoting_style, shell_always_quoting_style, + shell_escape_quoting_style, + shell_escape_always_quoting_style, c_quoting_style, c_maybe_quoting_style, escape_quoting_style, @@ -116,7 +130,7 @@ clone_quoting_options (struct quoting_options *o) /* Get the value of O's quoting style. If O is null, use the default. */ enum quoting_style -get_quoting_style (struct quoting_options *o) +get_quoting_style (struct quoting_options const *o) { return (o ? o : &default_quoting_options)->style; } @@ -248,11 +262,15 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, { size_t i; size_t len = 0; + size_t orig_buffersize = 0; char const *quote_string = 0; size_t quote_string_len = 0; bool backslash_escapes = false; bool unibyte_locale = MB_CUR_MAX == 1; bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0; + bool pending_shell_escape_end = false; + bool encountered_single_quote = false; + bool all_c_and_shell_quote_compat = true; #define STORE(c) \ do \ @@ -263,12 +281,44 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, } \ while (0) +#define START_ESC() \ + do \ + { \ + if (elide_outer_quotes) \ + goto force_outer_quoting_style; \ + escaping = true; \ + if (quoting_style == shell_always_quoting_style \ + && ! pending_shell_escape_end) \ + { \ + STORE ('\''); \ + STORE ('$'); \ + STORE ('\''); \ + pending_shell_escape_end = true; \ + } \ + STORE ('\\'); \ + } \ + while (0) + +#define END_ESC() \ + do \ + { \ + if (pending_shell_escape_end && ! escaping) \ + { \ + STORE ('\''); \ + STORE ('\''); \ + pending_shell_escape_end = false; \ + } \ + } \ + while (0) + + process_input: + switch (quoting_style) { case c_maybe_quoting_style: quoting_style = c_quoting_style; elide_outer_quotes = true; - /* Fall through. */ + FALLTHROUGH; case c_quoting_style: if (!elide_outer_quotes) STORE ('"'); @@ -307,7 +357,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, for your locale. If you don't know what to put here, please see - + and use glyphs suitable for your language. */ left_quote = gettext_quote (N_("`"), quoting_style); right_quote = gettext_quote (N_("'"), quoting_style); @@ -321,11 +371,18 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, } break; + case shell_escape_quoting_style: + backslash_escapes = true; + FALLTHROUGH; case shell_quoting_style: - quoting_style = shell_always_quoting_style; elide_outer_quotes = true; - /* Fall through. */ + FALLTHROUGH; + case shell_escape_always_quoting_style: + if (!elide_outer_quotes) + backslash_escapes = true; + FALLTHROUGH; case shell_always_quoting_style: + quoting_style = shell_always_quoting_style; if (!elide_outer_quotes) STORE ('\''); quote_string = "'"; @@ -345,8 +402,11 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, unsigned char c; unsigned char esc; bool is_right_quote = false; + bool escaping = false; + bool c_and_shell_quote_compat = false; if (backslash_escapes + && quoting_style != shell_always_quoting_style && quote_string_len && (i + quote_string_len <= (argsize == SIZE_MAX && 1 < quote_string_len @@ -367,15 +427,15 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, case '\0': if (backslash_escapes) { - if (elide_outer_quotes) - goto force_outer_quoting_style; - STORE ('\\'); + START_ESC (); /* If quote_string were to begin with digits, we'd need to test for the end of the arg as well. However, it's hard to imagine any locale that would use digits in quotes, and set_custom_quoting is documented not to - accept them. */ - if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9') + accept them. Use only a single \0 with shell-escape + as currently digits are not printed within $'...' */ + if (quoting_style != shell_always_quoting_style + && i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9') { STORE ('0'); STORE ('0'); @@ -436,6 +496,14 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, case '\t': esc = 't'; goto c_and_shell_escape; case '\v': esc = 'v'; goto c_escape; case '\\': esc = c; + /* Never need to escape '\' in shell case. */ + if (quoting_style == shell_always_quoting_style) + { + if (elide_outer_quotes) + goto force_outer_quoting_style; + goto store_c; + } + /* No need to escape the escape if we are trying to elide outer quotes and nothing else is problematic. */ if (backslash_escapes && elide_outer_quotes && quote_string_len) @@ -445,7 +513,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, if (quoting_style == shell_always_quoting_style && elide_outer_quotes) goto force_outer_quoting_style; - /* Fall through. */ + /* fall through */ c_escape: if (backslash_escapes) { @@ -457,12 +525,14 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, case '{': case '}': /* sometimes special if isolated */ if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1)) break; - /* Fall through. */ + FALLTHROUGH; case '#': case '~': if (i != 0) break; - /* Fall through. */ + FALLTHROUGH; case ' ': + c_and_shell_quote_compat = true; + FALLTHROUGH; case '!': /* special in bash */ case '"': case '$': case '&': case '(': case ')': case '*': case ';': @@ -481,13 +551,26 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, break; case '\'': + encountered_single_quote = true; + c_and_shell_quote_compat = true; if (quoting_style == shell_always_quoting_style) { if (elide_outer_quotes) goto force_outer_quoting_style; + + if (buffersize && ! orig_buffersize) + { + /* Just scan string to see if supports a more concise + representation, rather than writing a longer string + but returning the length of the more concise form. */ + orig_buffersize = buffersize; + buffersize = 0; + } + STORE ('\''); STORE ('\\'); STORE ('\''); + pending_shell_escape_end = false; } break; @@ -513,6 +596,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, them. Also, a digit or a special letter would cause trouble if it appeared in quote_these_too, but that's also documented as not accepting them. */ + c_and_shell_quote_compat = true; break; default: @@ -591,6 +675,8 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, while (! mbsinit (&mbstate)); } + c_and_shell_quote_compat = printable; + if (1 < m || (backslash_escapes && ! printable)) { /* Output a multibyte sequence, or an escaped @@ -601,9 +687,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, { if (backslash_escapes && ! printable) { - if (elide_outer_quotes) - goto force_outer_quoting_style; - STORE ('\\'); + START_ESC (); STORE ('0' + (c >> 6)); STORE ('0' + ((c >> 3) & 7)); c = '0' + (c & 7); @@ -615,6 +699,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, } if (ilim <= i + 1) break; + END_ESC (); STORE (c); c = arg[++i]; } @@ -624,25 +709,49 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, } } - if (! ((backslash_escapes || elide_outer_quotes) + if (! (((backslash_escapes && quoting_style != shell_always_quoting_style) + || elide_outer_quotes) && quote_these_too && quote_these_too[c / INT_BITS] >> (c % INT_BITS) & 1) && !is_right_quote) goto store_c; store_escape: - if (elide_outer_quotes) - goto force_outer_quoting_style; - STORE ('\\'); + START_ESC (); store_c: + END_ESC (); STORE (c); + + if (! c_and_shell_quote_compat) + all_c_and_shell_quote_compat = false; } if (len == 0 && quoting_style == shell_always_quoting_style && elide_outer_quotes) goto force_outer_quoting_style; + /* Single shell quotes (') are commonly enough used as an apostrophe, + that we attempt to minimize the quoting in this case. Note itʼs + better to use the apostrophe modifier "\u02BC" if possible, as that + renders better and works with the word match regex \W+ etc. */ + if (quoting_style == shell_always_quoting_style && ! elide_outer_quotes + && encountered_single_quote) + { + if (all_c_and_shell_quote_compat) + return quotearg_buffer_restyled (buffer, orig_buffersize, arg, argsize, + c_quoting_style, + flags, quote_these_too, + left_quote, right_quote); + else if (! buffersize && orig_buffersize) + { + /* Disable read-only scan, and reprocess to write quoted string. */ + buffersize = orig_buffersize; + len = 0; + goto process_input; + } + } + if (quote_string && !elide_outer_quotes) for (; *quote_string; quote_string++) STORE (*quote_string); @@ -654,6 +763,8 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, force_outer_quoting_style: /* Don't reuse quote_these_too, since the addition of outer quotes sufficiently quotes the specified characters. */ + if (quoting_style == shell_always_quoting_style && backslash_escapes) + quoting_style = shell_escape_always_quoting_style; return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, quoting_style, flags & ~QA_ELIDE_OUTER_QUOTES, NULL, @@ -729,7 +840,7 @@ struct slotvec /* Preallocate a slot 0 buffer, so that the caller can always quote one small component of a "memory exhausted" message in slot 0. */ static char slot0[256]; -static unsigned int nslots = 1; +static int nslots = 1; static struct slotvec slotvec0 = {sizeof slot0, slot0}; static struct slotvec *slotvec = &slotvec0; @@ -737,7 +848,7 @@ void quotearg_free (void) { struct slotvec *sv = slotvec; - unsigned int i; + int i; for (i = 1; i < nslots; i++) free (sv[i].val); if (sv[0].val != slot0) @@ -768,30 +879,24 @@ quotearg_n_options (int n, char const *arg, size_t argsize, { int e = errno; - unsigned int n0 = n; struct slotvec *sv = slotvec; if (n < 0) abort (); - if (nslots <= n0) + if (nslots <= n) { - /* FIXME: technically, the type of n1 should be 'unsigned int', - but that evokes an unsuppressible warning from gcc-4.0.1 and - older. If gcc ever provides an option to suppress that warning, - revert to the original type, so that the test in xalloc_oversized - is once again performed only at compile time. */ - size_t n1 = n0 + 1; bool preallocated = (sv == &slotvec0); + int nmax = MIN (INT_MAX, MIN (PTRDIFF_MAX, SIZE_MAX) / sizeof *sv) - 1; - if (xalloc_oversized (n1, sizeof *sv)) + if (nmax < n) xalloc_die (); - slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv); + slotvec = sv = xrealloc (preallocated ? NULL : sv, (n + 1) * sizeof *sv); if (preallocated) *sv = slotvec0; - memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv); - nslots = n1; + memset (sv + nslots, 0, (n + 1 - nslots) * sizeof *sv); + nslots = n + 1; } { @@ -900,6 +1005,15 @@ quotearg_colon_mem (char const *arg, size_t argsize) return quotearg_char_mem (arg, argsize, ':'); } +char * +quotearg_n_style_colon (int n, enum quoting_style s, char const *arg) +{ + struct quoting_options options; + options = quoting_options_from_style (s); + set_char_quoting (&options, ':', 1); + return quotearg_n_options (n, arg, SIZE_MAX, &options); +} + char * quotearg_n_custom (int n, char const *left_quote, char const *right_quote, char const *arg) @@ -966,3 +1080,10 @@ quote (char const *arg) { return quote_n (0, arg); } + +/* + * Hey Emacs! + * Local Variables: + * coding: utf-8 + * End: + */ diff --git a/contrib/grep/lib/quotearg.h b/contrib/grep/lib/quotearg.h index d0ccd8beb1..d30fdd161b 100644 --- a/contrib/grep/lib/quotearg.h +++ b/contrib/grep/lib/quotearg.h @@ -1,6 +1,6 @@ /* quotearg.h - quote arguments for output - Copyright (C) 1998-2002, 2004, 2006, 2008-2015 Free Software Foundation, + Copyright (C) 1998-2002, 2004, 2006, 2008-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert */ @@ -73,6 +73,37 @@ enum quoting_style */ shell_always_quoting_style, + /* Quote names for the shell if they contain shell metacharacters + or other problematic characters (ls --quoting-style=shell-escape). + Non printable characters are quoted using the $'...' syntax, + which originated in ksh93 and is widely supported by most shells, + and proposed for inclusion in POSIX. + + quotearg_buffer: + "simple", "''$'\\0'' '$'\\t\\n'\\''\"'$'\\033''??/\\'", "a:b" + quotearg: + "simple", "''$'\\0'' '$'\\t\\n'\\''\"'$'\\033''??/\\'", "a:b" + quotearg_colon: + "simple", "''$'\\0'' '$'\\t\\n'\\''\"'$'\\033''??/\\'", "'a:b'" + */ + shell_escape_quoting_style, + + /* Quote names for the shell even if they would normally not + require quoting (ls --quoting-style=shell-escape). + Non printable characters are quoted using the $'...' syntax, + which originated in ksh93 and is widely supported by most shells, + and proposed for inclusion in POSIX. Behaves like + shell_escape_quoting_style if QA_ELIDE_OUTER_QUOTES is in effect. + + quotearg_buffer: + "simple", "''$'\\0'' '$'\\t\\n'\\''\"'$'\\033''??/\'", "a:b" + quotearg: + "simple", "''$'\\0'' '$'\\t\\n'\\''\"'$'\\033''??/\'", "a:b" + quotearg_colon: + "simple", "''$'\\0'' '$'\\t\\n'\\''\"'$'\\033''??/\'", "'a:b'" + */ + shell_escape_always_quoting_style, + /* Quote names as for a C language string (ls --quoting-style=c). Behaves like c_maybe_quoting_style if QA_ELIDE_OUTER_QUOTES is in effect. Split into consecutive strings if @@ -247,7 +278,7 @@ struct quoting_options; struct quoting_options *clone_quoting_options (struct quoting_options *o); /* Get the value of O's quoting style. If O is null, use the default. */ -enum quoting_style get_quoting_style (struct quoting_options *o); +enum quoting_style get_quoting_style (struct quoting_options const *o); /* In O (or in the default if O is null), set the value of the quoting style to S. */ @@ -362,6 +393,9 @@ char *quotearg_colon (char const *arg); /* Like quotearg_colon (ARG), except it can quote null bytes. */ char *quotearg_colon_mem (char const *arg, size_t argsize); +/* Like quotearg_n_style, except with ':' quoting enabled. */ +char *quotearg_n_style_colon (int n, enum quoting_style s, char const *arg); + /* Like quotearg_n_style (N, S, ARG) but with S as custom_quoting_style with left quote as LEFT_QUOTE and right quote as RIGHT_QUOTE. See set_custom_quoting for a description of acceptable LEFT_QUOTE and diff --git a/contrib/grep/lib/read.c b/contrib/grep/lib/read.c deleted file mode 100644 index 3d8eefa856..0000000000 --- a/contrib/grep/lib/read.c +++ /dev/null @@ -1,85 +0,0 @@ -/* POSIX compatible read() function. - Copyright (C) 2008-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2011. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ - -# include -# include - -# define WIN32_LEAN_AND_MEAN /* avoid including junk */ -# include - -# include "msvc-inval.h" -# include "msvc-nothrow.h" - -# undef read - -# if HAVE_MSVC_INVALID_PARAMETER_HANDLER -static ssize_t -read_nothrow (int fd, void *buf, size_t count) -{ - ssize_t result; - - TRY_MSVC_INVAL - { - result = read (fd, buf, count); - } - CATCH_MSVC_INVAL - { - result = -1; - errno = EBADF; - } - DONE_MSVC_INVAL; - - return result; -} -# else -# define read_nothrow read -# endif - -ssize_t -rpl_read (int fd, void *buf, size_t count) -{ - ssize_t ret = read_nothrow (fd, buf, count); - -# if GNULIB_NONBLOCKING - if (ret < 0 - && GetLastError () == ERROR_NO_DATA) - { - HANDLE h = (HANDLE) _get_osfhandle (fd); - if (GetFileType (h) == FILE_TYPE_PIPE) - { - /* h is a pipe or socket. */ - DWORD state; - if (GetNamedPipeHandleState (h, &state, NULL, NULL, NULL, NULL, 0) - && (state & PIPE_NOWAIT) != 0) - /* h is a pipe in non-blocking mode. - Change errno from EINVAL to EAGAIN. */ - errno = EAGAIN; - } - } -# endif - - return ret; -} - -#endif diff --git a/contrib/grep/lib/readdir.c b/contrib/grep/lib/readdir.c deleted file mode 100644 index 8646aac43b..0000000000 --- a/contrib/grep/lib/readdir.c +++ /dev/null @@ -1,98 +0,0 @@ -/* Read the next entry of a directory. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#include -#include - -#include "dirent-private.h" - -struct dirent * -readdir (DIR *dirp) -{ - char type; - struct dirent *result; - - /* There is no need to add code to produce entries for "." and "..". - According to the POSIX:2008 section "4.12 Pathname Resolution" - - "." and ".." are syntactic entities. - POSIX also says: - "If entries for dot or dot-dot exist, one entry shall be returned - for dot and one entry shall be returned for dot-dot; otherwise, - they shall not be returned." */ - - switch (dirp->status) - { - case -2: - /* End of directory already reached. */ - return NULL; - case -1: - break; - case 0: - if (!FindNextFile (dirp->current, &dirp->entry)) - { - switch (GetLastError ()) - { - case ERROR_NO_MORE_FILES: - dirp->status = -2; - return NULL; - default: - errno = EIO; - return NULL; - } - } - break; - default: - errno = dirp->status; - return NULL; - } - - dirp->status = 0; - - if (dirp->entry.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) - type = DT_DIR; - else if (dirp->entry.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) - type = DT_LNK; - else if ((dirp->entry.dwFileAttributes - & ~(FILE_ATTRIBUTE_READONLY - | FILE_ATTRIBUTE_HIDDEN - | FILE_ATTRIBUTE_SYSTEM - | FILE_ATTRIBUTE_ARCHIVE - | FILE_ATTRIBUTE_NORMAL - | FILE_ATTRIBUTE_TEMPORARY - | FILE_ATTRIBUTE_SPARSE_FILE - | FILE_ATTRIBUTE_COMPRESSED - | FILE_ATTRIBUTE_NOT_CONTENT_INDEXED - | FILE_ATTRIBUTE_ENCRYPTED)) == 0) - /* Devices like COM1, LPT1, NUL would also have the attributes 0x20 but - they cannot occur here. */ - type = DT_REG; - else - type = DT_UNKNOWN; - - /* Reuse the memory of dirp->entry for the result. */ - result = - (struct dirent *) - ((char *) dirp->entry.cFileName - offsetof (struct dirent, d_name[0])); - result->d_type = type; - - return result; -} diff --git a/contrib/grep/lib/realloc.c b/contrib/grep/lib/realloc.c deleted file mode 100644 index 068b081f94..0000000000 --- a/contrib/grep/lib/realloc.c +++ /dev/null @@ -1,79 +0,0 @@ -/* realloc() function that is glibc compatible. - - Copyright (C) 1997, 2003-2004, 2006-2007, 2009-2015 Free Software - Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* written by Jim Meyering and Bruno Haible */ - -#define _GL_USE_STDLIB_ALLOC 1 -#include - -/* Only the AC_FUNC_REALLOC macro defines 'realloc' already in config.h. */ -#ifdef realloc -# define NEED_REALLOC_GNU 1 -/* Whereas the gnulib module 'realloc-gnu' defines HAVE_REALLOC_GNU. */ -#elif GNULIB_REALLOC_GNU && !HAVE_REALLOC_GNU -# define NEED_REALLOC_GNU 1 -#endif - -/* Infer the properties of the system's malloc function. - The gnulib module 'malloc-gnu' defines HAVE_MALLOC_GNU. */ -#if GNULIB_MALLOC_GNU && HAVE_MALLOC_GNU -# define SYSTEM_MALLOC_GLIBC_COMPATIBLE 1 -#endif - -#include - -#include - -/* Change the size of an allocated block of memory P to N bytes, - with error checking. If N is zero, change it to 1. If P is NULL, - use malloc. */ - -void * -rpl_realloc (void *p, size_t n) -{ - void *result; - -#if NEED_REALLOC_GNU - if (n == 0) - { - n = 1; - - /* In theory realloc might fail, so don't rely on it to free. */ - free (p); - p = NULL; - } -#endif - - if (p == NULL) - { -#if GNULIB_REALLOC_GNU && !NEED_REALLOC_GNU && !SYSTEM_MALLOC_GLIBC_COMPATIBLE - if (n == 0) - n = 1; -#endif - result = malloc (n); - } - else - result = realloc (p, n); - -#if !HAVE_REALLOC_POSIX - if (result == NULL) - errno = ENOMEM; -#endif - - return result; -} diff --git a/contrib/grep/lib/ref-add.sin b/contrib/grep/lib/ref-add.sin deleted file mode 100644 index 6cbe5b540b..0000000000 --- a/contrib/grep/lib/ref-add.sin +++ /dev/null @@ -1,29 +0,0 @@ -# Add this package to a list of references stored in a text file. -# -# Copyright (C) 2000, 2009-2015 Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, see . -# -# Written by Bruno Haible . -# -/^# Packages using this file: / { - s/# Packages using this file:// - ta - :a - s/ @PACKAGE@ / @PACKAGE@ / - tb - s/ $/ @PACKAGE@ / - :b - s/^/# Packages using this file:/ -} diff --git a/contrib/grep/lib/ref-del.sin b/contrib/grep/lib/ref-del.sin deleted file mode 100644 index f50cf0aa42..0000000000 --- a/contrib/grep/lib/ref-del.sin +++ /dev/null @@ -1,24 +0,0 @@ -# Remove this package from a list of references stored in a text file. -# -# Copyright (C) 2000, 2009-2015 Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, see . -# -# Written by Bruno Haible . -# -/^# Packages using this file: / { - s/# Packages using this file:// - s/ @PACKAGE@ / / - s/^/# Packages using this file:/ -} diff --git a/contrib/grep/lib/regcomp.c b/contrib/grep/lib/regcomp.c index 01b668b193..84044be5e0 100644 --- a/contrib/grep/lib/regcomp.c +++ b/contrib/grep/lib/regcomp.c @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -15,7 +15,7 @@ You should have received a copy of the GNU General Public License along with the GNU C Library; if not, see - . */ + . */ #ifdef _LIBC # include @@ -59,7 +59,7 @@ static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); static Idx fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax); static int peek_token (re_token_t *token, re_string_t *input, - reg_syntax_t syntax) internal_function; + reg_syntax_t syntax); static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, reg_errcode_t *err); static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, @@ -153,9 +153,9 @@ static const char __re_error_msgid[] = gettext_noop ("Invalid back reference") /* REG_ESUBREG */ "\0" #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") - gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ + gettext_noop ("Unmatched [, [^, [:, [., or [=") /* REG_EBRACK */ "\0" -#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") +#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [, [^, [:, [., or [=") gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ "\0" #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") @@ -213,17 +213,9 @@ static const size_t __re_error_msgid_idx[] = Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields are set in BUFP on entry. */ -#ifdef _LIBC -const char * -re_compile_pattern (pattern, length, bufp) - const char *pattern; - size_t length; - struct re_pattern_buffer *bufp; -#else /* size_t might promote */ const char * re_compile_pattern (const char *pattern, size_t length, struct re_pattern_buffer *bufp) -#endif { reg_errcode_t ret; @@ -241,9 +233,7 @@ re_compile_pattern (const char *pattern, size_t length, return NULL; return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); } -#ifdef _LIBC weak_alias (__re_compile_pattern, re_compile_pattern) -#endif /* Set by 're_set_syntax' to the current regexp syntax to recognize. Can also be assigned to arbitrarily: each pattern buffer stores its own @@ -261,21 +251,17 @@ reg_syntax_t re_syntax_options; defined in regex.h. We return the old syntax. */ reg_syntax_t -re_set_syntax (syntax) - reg_syntax_t syntax; +re_set_syntax (reg_syntax_t syntax) { reg_syntax_t ret = re_syntax_options; re_syntax_options = syntax; return ret; } -#ifdef _LIBC weak_alias (__re_set_syntax, re_set_syntax) -#endif int -re_compile_fastmap (bufp) - struct re_pattern_buffer *bufp; +re_compile_fastmap (struct re_pattern_buffer *bufp) { re_dfa_t *dfa = bufp->buffer; char *fastmap = bufp->fastmap; @@ -291,9 +277,7 @@ re_compile_fastmap (bufp) bufp->fastmap_accurate = 1; return 0; } -#ifdef _LIBC weak_alias (__re_compile_fastmap, re_compile_fastmap) -#endif static inline void __attribute__ ((always_inline)) @@ -474,10 +458,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, the return codes and their meanings.) */ int -regcomp (preg, pattern, cflags) - regex_t *_Restrict_ preg; - const char *_Restrict_ pattern; - int cflags; +regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags) { reg_errcode_t ret; reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED @@ -489,7 +470,7 @@ regcomp (preg, pattern, cflags) /* Try to allocate space for the fastmap. */ preg->fastmap = re_malloc (char, SBC_MAX); - if (BE (preg->fastmap == NULL, 0)) + if (__glibc_unlikely (preg->fastmap == NULL)) return REG_ESPACE; syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; @@ -515,7 +496,7 @@ regcomp (preg, pattern, cflags) ret = REG_EPAREN; /* We have already checked preg->fastmap != NULL. */ - if (BE (ret == REG_NOERROR, 1)) + if (__glibc_likely (ret == REG_NOERROR)) /* Compute the fastmap now, since regexec cannot modify the pattern buffer. This function never fails in this implementation. */ (void) re_compile_fastmap (preg); @@ -528,32 +509,21 @@ regcomp (preg, pattern, cflags) return (int) ret; } -#ifdef _LIBC +libc_hidden_def (__regcomp) weak_alias (__regcomp, regcomp) -#endif /* Returns a message corresponding to an error code, ERRCODE, returned from either regcomp or regexec. We don't use PREG here. */ -#ifdef _LIBC -size_t -regerror (errcode, preg, errbuf, errbuf_size) - int errcode; - const regex_t *_Restrict_ preg; - char *_Restrict_ errbuf; - size_t errbuf_size; -#else /* size_t might promote */ size_t -regerror (int errcode, const regex_t *_Restrict_ preg, - char *_Restrict_ errbuf, size_t errbuf_size) -#endif +regerror (int errcode, const regex_t *__restrict preg, char *__restrict errbuf, + size_t errbuf_size) { const char *msg; size_t msg_size; + int nerrcodes = sizeof __re_error_msgid_idx / sizeof __re_error_msgid_idx[0]; - if (BE (errcode < 0 - || errcode >= (int) (sizeof (__re_error_msgid_idx) - / sizeof (__re_error_msgid_idx[0])), 0)) + if (__glibc_unlikely (errcode < 0 || errcode >= nerrcodes)) /* Only error codes returned by the rest of the code should be passed to this routine. If we are given anything else, or if other regex code generates an invalid error code, then the program has a bug. @@ -564,10 +534,10 @@ regerror (int errcode, const regex_t *_Restrict_ preg, msg_size = strlen (msg) + 1; /* Includes the null. */ - if (BE (errbuf_size != 0, 1)) + if (__glibc_likely (errbuf_size != 0)) { size_t cpy_size = msg_size; - if (BE (msg_size > errbuf_size, 0)) + if (__glibc_unlikely (msg_size > errbuf_size)) { cpy_size = errbuf_size - 1; errbuf[cpy_size] = '\0'; @@ -577,9 +547,7 @@ regerror (int errcode, const regex_t *_Restrict_ preg, return msg_size; } -#ifdef _LIBC weak_alias (__regerror, regerror) -#endif #ifdef RE_ENABLE_I18N @@ -662,11 +630,10 @@ free_dfa_content (re_dfa_t *dfa) /* Free dynamically allocated space used by PREG. */ void -regfree (preg) - regex_t *preg; +regfree (regex_t *preg) { re_dfa_t *dfa = preg->buffer; - if (BE (dfa != NULL, 1)) + if (__glibc_likely (dfa != NULL)) { lock_fini (dfa->lock); free_dfa_content (dfa); @@ -680,9 +647,8 @@ regfree (preg) re_free (preg->translate); preg->translate = NULL; } -#ifdef _LIBC +libc_hidden_def (__regfree) weak_alias (__regfree, regfree) -#endif /* Entry points compatible with 4.2 BSD regex library. We don't define them unless specifically requested. */ @@ -699,8 +665,7 @@ char * regcomp/regexec above without link errors. */ weak_function # endif -re_comp (s) - const char *s; +re_comp (const char *s) { reg_errcode_t ret; char *fastmap; @@ -723,7 +688,7 @@ re_comp (s) if (re_comp_buf.fastmap == NULL) { - re_comp_buf.fastmap = (char *) malloc (SBC_MAX); + re_comp_buf.fastmap = re_malloc (char, SBC_MAX); if (re_comp_buf.fastmap == NULL) return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) REG_ESPACE]); @@ -776,7 +741,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, /* Initialize the dfa. */ dfa = preg->buffer; - if (BE (preg->allocated < sizeof (re_dfa_t), 0)) + if (__glibc_unlikely (preg->allocated < sizeof (re_dfa_t))) { /* If zero allocated, but buffer is non-null, try to realloc enough space. This loses if buffer's address is bogus, but @@ -791,9 +756,9 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, preg->used = sizeof (re_dfa_t); err = init_dfa (dfa, length); - if (BE (err == REG_NOERROR && lock_init (dfa->lock) != 0, 0)) + if (__glibc_unlikely (err == REG_NOERROR && lock_init (dfa->lock) != 0)) err = REG_ESPACE; - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { free_dfa_content (dfa); preg->buffer = NULL; @@ -808,7 +773,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, err = re_string_construct (®exp, pattern, length, preg->translate, (syntax & RE_ICASE) != 0, dfa); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_compile_internal_free_return: free_workarea_compile (preg); @@ -823,12 +788,12 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, /* Parse the regular expression, and build a structure tree. */ preg->re_nsub = 0; dfa->str_tree = parse (®exp, preg, syntax, &err); - if (BE (dfa->str_tree == NULL, 0)) + if (__glibc_unlikely (dfa->str_tree == NULL)) goto re_compile_internal_free_return; /* Analyze the tree and create the nfa. */ err = analyze (preg); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto re_compile_internal_free_return; #ifdef RE_ENABLE_I18N @@ -844,7 +809,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, free_workarea_compile (preg); re_string_destruct (®exp); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { lock_fini (dfa->lock); free_dfa_content (dfa); @@ -886,7 +851,8 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) calculation below, and for similar doubling calculations elsewhere. And it's <= rather than <, because some of the doubling calculations add 1 afterwards. */ - if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 <= pat_len, 0)) + if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 + <= pat_len)) return REG_ESPACE; dfa->nodes_alloc = pat_len + 1; @@ -930,7 +896,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) int i, j, ch; dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); - if (BE (dfa->sb_char == NULL, 0)) + if (__glibc_unlikely (dfa->sb_char == NULL)) return REG_ESPACE; /* Set the bits corresponding to single byte chars. */ @@ -949,7 +915,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) } #endif - if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) + if (__glibc_unlikely (dfa->nodes == NULL || dfa->state_table == NULL)) return REG_ESPACE; return REG_NOERROR; } @@ -959,21 +925,23 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) character used by some operators like "\<", "\>", etc. */ static void -internal_function init_word_char (re_dfa_t *dfa) { int i = 0; int j; int ch = 0; dfa->word_ops_used = 1; - if (BE (dfa->map_notascii == 0, 1)) + if (__glibc_likely (dfa->map_notascii == 0)) { + /* Avoid uint32_t and uint64_t as some non-GCC platforms lack + them, an issue when this code is used in Gnulib. */ bitset_word_t bits0 = 0x00000000; bitset_word_t bits1 = 0x03ff0000; bitset_word_t bits2 = 0x87fffffe; bitset_word_t bits3 = 0x07fffffe; if (BITSET_WORD_BITS == 64) { + /* Pacify gcc -Woverflow on 32-bit platformns. */ dfa->word_char[0] = bits1 << 31 << 1 | bits0; dfa->word_char[1] = bits3 << 31 << 1 | bits2; i = 2; @@ -990,7 +958,7 @@ init_word_char (re_dfa_t *dfa) goto general_case; ch = 128; - if (BE (dfa->is_utf8, 1)) + if (__glibc_likely (dfa->is_utf8)) { memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8); return; @@ -1037,7 +1005,7 @@ create_initial_state (re_dfa_t *dfa) first = dfa->str_tree->first->node_idx; dfa->init_node = first; err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; /* The back-references which are in initial states can epsilon transit, @@ -1081,7 +1049,7 @@ create_initial_state (re_dfa_t *dfa) /* It must be the first time to invoke acquire_state. */ dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); /* We don't check ERR here, since the initial state must not be NULL. */ - if (BE (dfa->init_state == NULL, 0)) + if (__glibc_unlikely (dfa->init_state == NULL)) return err; if (dfa->init_state->has_constraint) { @@ -1093,8 +1061,9 @@ create_initial_state (re_dfa_t *dfa) &init_nodes, CONTEXT_NEWLINE | CONTEXT_BEGBUF); - if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL - || dfa->init_state_begbuf == NULL, 0)) + if (__glibc_unlikely (dfa->init_state_word == NULL + || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL)) return err; } else @@ -1201,8 +1170,8 @@ analyze (regex_t *preg) dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc); dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); - if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL - || dfa->eclosures == NULL, 0)) + if (__glibc_unlikely (dfa->nexts == NULL || dfa->org_indices == NULL + || dfa->edests == NULL || dfa->eclosures == NULL)) return REG_ESPACE; dfa->subexp_map = re_malloc (Idx, preg->re_nsub); @@ -1217,23 +1186,23 @@ analyze (regex_t *preg) break; if (i == preg->re_nsub) { - free (dfa->subexp_map); + re_free (dfa->subexp_map); dfa->subexp_map = NULL; } } ret = postorder (dfa->str_tree, lower_subexps, preg); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) return ret; ret = postorder (dfa->str_tree, calc_first, dfa); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) return ret; preorder (dfa->str_tree, calc_next, dfa); ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) return ret; ret = calc_eclosure (dfa); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) return ret; /* We only need this during the prune_impossible_nodes pass in regexec.c; @@ -1242,7 +1211,7 @@ analyze (regex_t *preg) || dfa->nbackref) { dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); - if (BE (dfa->inveclosures == NULL, 0)) + if (__glibc_unlikely (dfa->inveclosures == NULL)) return REG_ESPACE; ret = calc_inveclosure (dfa); } @@ -1272,7 +1241,7 @@ postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), do { reg_errcode_t err = fn (extra, node); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; if (node->parent == NULL) return REG_NOERROR; @@ -1294,7 +1263,7 @@ preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), for (node = root; ; ) { reg_errcode_t err = fn (extra, node); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; /* Go to the left node, or up and to the right. */ @@ -1395,7 +1364,8 @@ lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; tree = create_tree (dfa, op, tree1, CONCAT); - if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) + if (__glibc_unlikely (tree == NULL || tree1 == NULL + || op == NULL || cls == NULL)) { *err = REG_ESPACE; return NULL; @@ -1421,7 +1391,7 @@ calc_first (void *extra, bin_tree_t *node) { node->first = node; node->node_idx = re_dfa_add_node (dfa, node->token); - if (BE (node->node_idx == REG_MISSING, 0)) + if (__glibc_unlikely (node->node_idx == -1)) return REG_ESPACE; if (node->token.type == ANCHOR) dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; @@ -1466,7 +1436,7 @@ link_nfa_nodes (void *extra, bin_tree_t *node) break; case END_OF_RE: - assert (node->next == NULL); + DEBUG_ASSERT (node->next == NULL); break; case OP_DUP_ASTERISK: @@ -1482,8 +1452,8 @@ link_nfa_nodes (void *extra, bin_tree_t *node) right = node->right->first->node_idx; else right = node->next->node_idx; - assert (REG_VALID_INDEX (left)); - assert (REG_VALID_INDEX (right)); + DEBUG_ASSERT (left > -1); + DEBUG_ASSERT (right > -1); err = re_node_set_init_2 (dfa->edests + idx, left, right); } break; @@ -1501,7 +1471,7 @@ link_nfa_nodes (void *extra, bin_tree_t *node) break; default: - assert (!IS_EPSILON_NODE (node->token.type)); + DEBUG_ASSERT (!IS_EPSILON_NODE (node->token.type)); dfa->nexts[idx] = node->next->node_idx; break; } @@ -1514,7 +1484,6 @@ link_nfa_nodes (void *extra, bin_tree_t *node) to their own constraint. */ static reg_errcode_t -internal_function duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, Idx root_node, unsigned int init_constraint) { @@ -1533,11 +1502,11 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, org_dest = dfa->nexts[org_node]; re_node_set_empty (dfa->edests + clone_node); clone_dest = duplicate_node (dfa, org_dest, constraint); - if (BE (clone_dest == REG_MISSING, 0)) + if (__glibc_unlikely (clone_dest == -1)) return REG_ESPACE; dfa->nexts[clone_node] = dfa->nexts[org_node]; ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) return REG_ESPACE; } else if (dfa->edests[org_node].nelem == 0) @@ -1559,17 +1528,17 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, if (org_node == root_node && clone_node != org_node) { ok = re_node_set_insert (dfa->edests + clone_node, org_dest); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) return REG_ESPACE; break; } /* In case the node has another constraint, append it. */ constraint |= dfa->nodes[org_node].constraint; clone_dest = duplicate_node (dfa, org_dest, constraint); - if (BE (clone_dest == REG_MISSING, 0)) + if (__glibc_unlikely (clone_dest == -1)) return REG_ESPACE; ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) return REG_ESPACE; } else /* dfa->edests[org_node].nelem == 2 */ @@ -1580,19 +1549,19 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, re_node_set_empty (dfa->edests + clone_node); /* Search for a duplicated node which satisfies the constraint. */ clone_dest = search_duplicated_node (dfa, org_dest, constraint); - if (clone_dest == REG_MISSING) + if (clone_dest == -1) { /* There is no such duplicated node, create a new one. */ reg_errcode_t err; clone_dest = duplicate_node (dfa, org_dest, constraint); - if (BE (clone_dest == REG_MISSING, 0)) + if (__glibc_unlikely (clone_dest == -1)) return REG_ESPACE; ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) return REG_ESPACE; err = duplicate_node_closure (dfa, org_dest, clone_dest, root_node, constraint); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } else @@ -1600,16 +1569,16 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, /* There is a duplicated node which satisfies the constraint, use it to avoid infinite loop. */ ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) return REG_ESPACE; } org_dest = dfa->edests[org_node].elems[1]; clone_dest = duplicate_node (dfa, org_dest, constraint); - if (BE (clone_dest == REG_MISSING, 0)) + if (__glibc_unlikely (clone_dest == -1)) return REG_ESPACE; ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) return REG_ESPACE; } org_node = org_dest; @@ -1632,18 +1601,18 @@ search_duplicated_node (const re_dfa_t *dfa, Idx org_node, && constraint == dfa->nodes[idx].constraint) return idx; /* Found. */ } - return REG_MISSING; /* Not found. */ + return -1; /* Not found. */ } /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. - Return the index of the new node, or REG_MISSING if insufficient storage is + Return the index of the new node, or -1 if insufficient storage is available. */ static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint) { Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); - if (BE (dup_idx != REG_MISSING, 1)) + if (__glibc_likely (dup_idx != -1)) { dfa->nodes[dup_idx].constraint = constraint; dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint; @@ -1669,7 +1638,7 @@ calc_inveclosure (re_dfa_t *dfa) for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) { ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) return REG_ESPACE; } } @@ -1684,9 +1653,7 @@ calc_eclosure (re_dfa_t *dfa) { Idx node_idx; bool incomplete; -#ifdef DEBUG - assert (dfa->nodes_len > 0); -#endif + DEBUG_ASSERT (dfa->nodes_len > 0); incomplete = false; /* For each nodes, calculate epsilon closure. */ for (node_idx = 0; ; ++node_idx) @@ -1701,16 +1668,14 @@ calc_eclosure (re_dfa_t *dfa) node_idx = 0; } -#ifdef DEBUG - assert (dfa->eclosures[node_idx].nelem != REG_MISSING); -#endif + DEBUG_ASSERT (dfa->eclosures[node_idx].nelem != -1); /* If we have already calculated, skip it. */ if (dfa->eclosures[node_idx].nelem != 0) continue; /* Calculate epsilon closure of 'node_idx'. */ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; if (dfa->eclosures[node_idx].nelem == 0) @@ -1733,12 +1698,12 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) bool ok; bool incomplete = false; err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; /* This indicates that we are calculating this node now. We reference this value to avoid infinite loop. */ - dfa->eclosures[node].nelem = REG_MISSING; + dfa->eclosures[node].nelem = -1; /* If the current node has constraints, duplicate all nodes since they must inherit the constraints. */ @@ -1748,7 +1713,7 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) { err = duplicate_node_closure (dfa, node, node, node, dfa->nodes[node].constraint); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } @@ -1760,7 +1725,7 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) Idx edest = dfa->edests[node].elems[i]; /* If calculating the epsilon closure of 'edest' is in progress, return intermediate result. */ - if (dfa->eclosures[edest].nelem == REG_MISSING) + if (dfa->eclosures[edest].nelem == -1) { incomplete = true; continue; @@ -1770,14 +1735,14 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) if (dfa->eclosures[edest].nelem == 0) { err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } else eclosure_elem = dfa->eclosures[edest]; /* Merge the epsilon closure of 'edest'. */ err = re_node_set_merge (&eclosure, &eclosure_elem); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; /* If the epsilon closure of 'edest' is incomplete, the epsilon closure of this node is also incomplete. */ @@ -1790,7 +1755,7 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) /* An epsilon closure includes itself. */ ok = re_node_set_insert (&eclosure, node); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) return REG_ESPACE; if (incomplete && !root) dfa->eclosures[node].nelem = 0; @@ -1806,7 +1771,6 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) We must not use this function inside bracket expressions. */ static void -internal_function fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) { re_string_skip_bytes (input, peek_token (result, input, syntax)); @@ -1816,7 +1780,6 @@ fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) We must not use this function inside bracket expressions. */ static int -internal_function peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) { unsigned char c; @@ -1833,8 +1796,8 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) token->word_char = 0; #ifdef RE_ENABLE_I18N token->mb_partial = 0; - if (input->mb_cur_max > 1 && - !re_string_first_byte (input, re_string_cur_idx (input))) + if (input->mb_cur_max > 1 + && !re_string_first_byte (input, re_string_cur_idx (input))) { token->type = CHARACTER; token->mb_partial = 1; @@ -2021,8 +1984,8 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) token->type = OP_PERIOD; break; case '^': - if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && - re_string_cur_idx (input) != 0) + if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) + && re_string_cur_idx (input) != 0) { char prev = re_string_peek_byte (input, -1); if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') @@ -2032,8 +1995,8 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) token->opr.ctx_type = LINE_FIRST; break; case '$': - if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && - re_string_cur_idx (input) + 1 != re_string_length (input)) + if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) + && re_string_cur_idx (input) + 1 != re_string_length (input)) { re_token_t next; re_string_skip_bytes (input, 1); @@ -2055,7 +2018,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) We must not use this function out of bracket expressions. */ static int -internal_function peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) { unsigned char c; @@ -2068,8 +2030,8 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) token->opr.c = c; #ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1 && - !re_string_first_byte (input, re_string_cur_idx (input))) + if (input->mb_cur_max > 1 + && !re_string_first_byte (input, re_string_cur_idx (input))) { token->type = CHARACTER; return 1; @@ -2102,16 +2064,18 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) case '.': token->type = OP_OPEN_COLL_ELEM; break; + case '=': token->type = OP_OPEN_EQUIV_CLASS; break; + case ':': if (syntax & RE_CHAR_CLASSES) { token->type = OP_OPEN_CHAR_CLASS; break; } - /* else fall through. */ + FALLTHROUGH; default: token->type = CHARACTER; token->opr.c = c; @@ -2161,14 +2125,14 @@ parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, dfa->syntax = syntax; fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) + if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) return NULL; eor = create_tree (dfa, NULL, NULL, END_OF_RE); if (tree != NULL) root = create_tree (dfa, tree, eor, CONCAT); else root = eor; - if (BE (eor == NULL || root == NULL, 0)) + if (__glibc_unlikely (eor == NULL || root == NULL)) { *err = REG_ESPACE; return NULL; @@ -2193,7 +2157,7 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, bin_tree_t *tree, *branch = NULL; bitset_word_t initial_bkref_map = dfa->completed_bkref_map; tree = parse_branch (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) + if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) return NULL; while (token->type == OP_ALT) @@ -2205,7 +2169,7 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, bitset_word_t accumulated_bkref_map = dfa->completed_bkref_map; dfa->completed_bkref_map = initial_bkref_map; branch = parse_branch (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && branch == NULL, 0)) + if (__glibc_unlikely (*err != REG_NOERROR && branch == NULL)) { if (tree != NULL) postorder (tree, free_tree, NULL); @@ -2216,7 +2180,7 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, else branch = NULL; tree = create_tree (dfa, tree, branch, OP_ALT); - if (BE (tree == NULL, 0)) + if (__glibc_unlikely (tree == NULL)) { *err = REG_ESPACE; return NULL; @@ -2241,14 +2205,14 @@ parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, bin_tree_t *tree, *expr; re_dfa_t *dfa = preg->buffer; tree = parse_expression (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) + if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) return NULL; while (token->type != OP_ALT && token->type != END_OF_RE && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) { expr = parse_expression (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && expr == NULL, 0)) + if (__glibc_unlikely (*err != REG_NOERROR && expr == NULL)) { if (tree != NULL) postorder (tree, free_tree, NULL); @@ -2289,7 +2253,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, { case CHARACTER: tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) + if (__glibc_unlikely (tree == NULL)) { *err = REG_ESPACE; return NULL; @@ -2304,7 +2268,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, fetch_token (token, regexp, syntax); mbc_remain = create_token_tree (dfa, NULL, NULL, token); tree = create_tree (dfa, tree, mbc_remain, CONCAT); - if (BE (mbc_remain == NULL || tree == NULL, 0)) + if (__glibc_unlikely (mbc_remain == NULL || tree == NULL)) { *err = REG_ESPACE; return NULL; @@ -2313,25 +2277,28 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, } #endif break; + case OP_OPEN_SUBEXP: tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) + if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) return NULL; break; + case OP_OPEN_BRACKET: tree = parse_bracket_exp (regexp, dfa, token, syntax, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) + if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) return NULL; break; + case OP_BACK_REF: - if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) + if (!__glibc_likely (dfa->completed_bkref_map & (1 << token->opr.idx))) { *err = REG_ESUBREG; return NULL; } dfa->used_bkref_map |= 1 << token->opr.idx; tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) + if (__glibc_unlikely (tree == NULL)) { *err = REG_ESPACE; return NULL; @@ -2339,13 +2306,14 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, ++dfa->nbackref; dfa->has_mb_node = 1; break; + case OP_OPEN_DUP_NUM: if (syntax & RE_CONTEXT_INVALID_DUP) { *err = REG_BADRPT; return NULL; } - /* FALLTHROUGH */ + FALLTHROUGH; case OP_DUP_ASTERISK: case OP_DUP_PLUS: case OP_DUP_QUESTION: @@ -2359,15 +2327,15 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, fetch_token (token, regexp, syntax); return parse_expression (regexp, preg, token, syntax, nest, err); } - /* else fall through */ + FALLTHROUGH; case OP_CLOSE_SUBEXP: - if ((token->type == OP_CLOSE_SUBEXP) && - !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) + if ((token->type == OP_CLOSE_SUBEXP) + && !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) { *err = REG_ERPAREN; return NULL; } - /* else fall through */ + FALLTHROUGH; case OP_CLOSE_DUP_NUM: /* We treat it as a normal character. */ @@ -2376,12 +2344,13 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, /* mb_partial and word_char bits should be initialized already by peek_token. */ tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) + if (__glibc_unlikely (tree == NULL)) { *err = REG_ESPACE; return NULL; } break; + case ANCHOR: if ((token->opr.ctx_type & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) @@ -2405,7 +2374,8 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, } tree_last = create_token_tree (dfa, NULL, NULL, token); tree = create_tree (dfa, tree_first, tree_last, OP_ALT); - if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) + if (__glibc_unlikely (tree_first == NULL || tree_last == NULL + || tree == NULL)) { *err = REG_ESPACE; return NULL; @@ -2414,7 +2384,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, else { tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) + if (__glibc_unlikely (tree == NULL)) { *err = REG_ESPACE; return NULL; @@ -2426,9 +2396,10 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, it must not be "". */ fetch_token (token, regexp, syntax); return tree; + case OP_PERIOD: tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) + if (__glibc_unlikely (tree == NULL)) { *err = REG_ESPACE; return NULL; @@ -2436,35 +2407,38 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, if (dfa->mb_cur_max > 1) dfa->has_mb_node = 1; break; + case OP_WORD: case OP_NOTWORD: tree = build_charclass_op (dfa, regexp->trans, "alnum", "_", token->type == OP_NOTWORD, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) + if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) return NULL; break; + case OP_SPACE: case OP_NOTSPACE: tree = build_charclass_op (dfa, regexp->trans, "space", "", token->type == OP_NOTSPACE, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) + if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) return NULL; break; + case OP_ALT: case END_OF_RE: return NULL; + case BACK_SLASH: *err = REG_EESCAPE; return NULL; + default: /* Must not happen? */ -#ifdef DEBUG - assert (0); -#endif + DEBUG_ASSERT (false); return NULL; } fetch_token (token, regexp, syntax); @@ -2474,7 +2448,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, { bin_tree_t *dup_tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); - if (BE (*err != REG_NOERROR && dup_tree == NULL, 0)) + if (__glibc_unlikely (*err != REG_NOERROR && dup_tree == NULL)) { if (tree != NULL) postorder (tree, free_tree, NULL); @@ -2520,13 +2494,14 @@ parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, else { tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); - if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) + if (__glibc_unlikely (*err == REG_NOERROR + && token->type != OP_CLOSE_SUBEXP)) { if (tree != NULL) postorder (tree, free_tree, NULL); *err = REG_EPAREN; } - if (BE (*err != REG_NOERROR, 0)) + if (__glibc_unlikely (*err != REG_NOERROR)) return NULL; } @@ -2534,7 +2509,7 @@ parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, dfa->completed_bkref_map |= 1 << cur_nsub; tree = create_tree (dfa, tree, NULL, SUBEXP); - if (BE (tree == NULL, 0)) + if (__glibc_unlikely (tree == NULL)) { *err = REG_ESPACE; return NULL; @@ -2557,7 +2532,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, { end = 0; start = fetch_number (regexp, token, syntax); - if (start == REG_MISSING) + if (start == -1) { if (token->type == CHARACTER && token->opr.c == ',') start = 0; /* We treat "{,m}" as "{0,m}". */ @@ -2567,17 +2542,17 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, return NULL; } } - if (BE (start != REG_ERROR, 1)) + if (__glibc_likely (start != -2)) { /* We treat "{n}" as "{n,n}". */ end = ((token->type == OP_CLOSE_DUP_NUM) ? start : ((token->type == CHARACTER && token->opr.c == ',') - ? fetch_number (regexp, token, syntax) : REG_ERROR)); + ? fetch_number (regexp, token, syntax) : -2)); } - if (BE (start == REG_ERROR || end == REG_ERROR, 0)) + if (__glibc_unlikely (start == -2 || end == -2)) { /* Invalid sequence. */ - if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) + if (__glibc_unlikely (!(syntax & RE_INVALID_INTERVAL_ORD))) { if (token->type == END_OF_RE) *err = REG_EBRACE; @@ -2596,15 +2571,15 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, return elem; } - if (BE ((end != REG_MISSING && start > end) - || token->type != OP_CLOSE_DUP_NUM, 0)) + if (__glibc_unlikely ((end != -1 && start > end) + || token->type != OP_CLOSE_DUP_NUM)) { /* First number greater than second. */ *err = REG_BADBR; return NULL; } - if (BE (RE_DUP_MAX < (end == REG_MISSING ? start : end), 0)) + if (__glibc_unlikely (RE_DUP_MAX < (end == -1 ? start : end))) { *err = REG_ESIZE; return NULL; @@ -2613,28 +2588,28 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, else { start = (token->type == OP_DUP_PLUS) ? 1 : 0; - end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING; + end = (token->type == OP_DUP_QUESTION) ? 1 : -1; } fetch_token (token, regexp, syntax); - if (BE (elem == NULL, 0)) + if (__glibc_unlikely (elem == NULL)) return NULL; - if (BE (start == 0 && end == 0, 0)) + if (__glibc_unlikely (start == 0 && end == 0)) { postorder (elem, free_tree, NULL); return NULL; } /* Extract "{n,m}" to "...{0,}". */ - if (BE (start > 0, 0)) + if (__glibc_unlikely (start > 0)) { tree = elem; for (i = 2; i <= start; ++i) { elem = duplicate_tree (elem, dfa); tree = create_tree (dfa, tree, elem, CONCAT); - if (BE (elem == NULL || tree == NULL, 0)) + if (__glibc_unlikely (elem == NULL || tree == NULL)) goto parse_dup_op_espace; } @@ -2643,7 +2618,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, /* Duplicate ELEM before it is marked optional. */ elem = duplicate_tree (elem, dfa); - if (BE (elem == NULL, 0)) + if (__glibc_unlikely (elem == NULL)) goto parse_dup_op_espace; old_tree = tree; } @@ -2657,27 +2632,23 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, } tree = create_tree (dfa, elem, NULL, - (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT)); - if (BE (tree == NULL, 0)) + (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); + if (__glibc_unlikely (tree == NULL)) goto parse_dup_op_espace; -/* From gnulib's "intprops.h": - True if the arithmetic type T is signed. */ -#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1)) - - /* This loop is actually executed only when end != REG_MISSING, + /* This loop is actually executed only when end != -1, to rewrite {0,n} as ((...?)?)?... We have already created the start+1-th copy. */ - if (TYPE_SIGNED (Idx) || end != REG_MISSING) + if (TYPE_SIGNED (Idx) || end != -1) for (i = start + 2; i <= end; ++i) { elem = duplicate_tree (elem, dfa); tree = create_tree (dfa, tree, elem, CONCAT); - if (BE (elem == NULL || tree == NULL, 0)) + if (__glibc_unlikely (elem == NULL || tree == NULL)) goto parse_dup_op_espace; tree = create_tree (dfa, tree, NULL, OP_ALT); - if (BE (tree == NULL, 0)) + if (__glibc_unlikely (tree == NULL)) goto parse_dup_op_espace; } @@ -2696,6 +2667,18 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, #define BRACKET_NAME_BUF_SIZE 32 #ifndef _LIBC + +# ifdef RE_ENABLE_I18N +/* Convert the byte B to the corresponding wide character. In a + unibyte locale, treat B as itself. In a multibyte locale, return + WEOF if B is an encoding error. */ +static wint_t +parse_byte (unsigned char b, re_charset_t *mbcset) +{ + return mbcset == NULL ? b : __btowc (b); +} +# endif + /* Local function for parse_bracket_exp only used in case of NOT _LIBC. Build the range expression which starts from START_ELEM, and ends at END_ELEM. The result are written to MBCSET and SBCSET. @@ -2704,7 +2687,6 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, update it. */ static reg_errcode_t -internal_function # ifdef RE_ENABLE_I18N build_range_exp (const reg_syntax_t syntax, bitset_t sbcset, @@ -2721,17 +2703,18 @@ build_range_exp (const reg_syntax_t syntax, { unsigned int start_ch, end_ch; /* Equivalence Classes and Character Classes can't be a range start/end. */ - if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS - || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, - 0)) + if (__glibc_unlikely (start_elem->type == EQUIV_CLASS + || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS + || end_elem->type == CHAR_CLASS)) return REG_ERANGE; /* We can handle no multi character collating elements without libc support. */ - if (BE ((start_elem->type == COLL_SYM - && strlen ((char *) start_elem->opr.name) > 1) - || (end_elem->type == COLL_SYM - && strlen ((char *) end_elem->opr.name) > 1), 0)) + if (__glibc_unlikely ((start_elem->type == COLL_SYM + && strlen ((char *) start_elem->opr.name) > 1) + || (end_elem->type == COLL_SYM + && strlen ((char *) end_elem->opr.name) > 1))) return REG_ECOLLATE; # ifdef RE_ENABLE_I18N @@ -2747,12 +2730,13 @@ build_range_exp (const reg_syntax_t syntax, : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] : 0)); start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) - ? __btowc (start_ch) : start_elem->opr.wch); + ? parse_byte (start_ch, mbcset) : start_elem->opr.wch); end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) - ? __btowc (end_ch) : end_elem->opr.wch); + ? parse_byte (end_ch, mbcset) : end_elem->opr.wch); if (start_wc == WEOF || end_wc == WEOF) return REG_ECOLLATE; - else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0)) + else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES) + && start_wc > end_wc)) return REG_ERANGE; /* Got valid collation sequence values, add them as a new entry. @@ -2763,7 +2747,7 @@ build_range_exp (const reg_syntax_t syntax, if (mbcset) { /* Check the space of the arrays. */ - if (BE (*range_alloc == mbcset->nranges, 0)) + if (__glibc_unlikely (*range_alloc == mbcset->nranges)) { /* There is not enough space, need realloc. */ wchar_t *new_array_start, *new_array_end; @@ -2778,8 +2762,13 @@ build_range_exp (const reg_syntax_t syntax, new_array_end = re_realloc (mbcset->range_ends, wchar_t, new_nranges); - if (BE (new_array_start == NULL || new_array_end == NULL, 0)) - return REG_ESPACE; + if (__glibc_unlikely (new_array_start == NULL + || new_array_end == NULL)) + { + re_free (new_array_start); + re_free (new_array_end); + return REG_ESPACE; + } mbcset->range_starts = new_array_start; mbcset->range_ends = new_array_end; @@ -2826,7 +2815,6 @@ build_range_exp (const reg_syntax_t syntax, pointer argument since we may update it. */ static reg_errcode_t -internal_function # ifdef RE_ENABLE_I18N build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, Idx *coll_sym_alloc, const unsigned char *name) @@ -2835,7 +2823,7 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name) # endif /* not RE_ENABLE_I18N */ { size_t name_len = strlen ((const char *) name); - if (BE (name_len != 1, 0)) + if (__glibc_unlikely (name_len != 1)) return REG_ECOLLATE; else { @@ -2970,18 +2958,21 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, /* Equivalence Classes and Character Classes can't be a range start/end. */ - if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS - || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, - 0)) + if (__glibc_unlikely (start_elem->type == EQUIV_CLASS + || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS + || end_elem->type == CHAR_CLASS)) return REG_ERANGE; /* FIXME: Implement rational ranges here, too. */ start_collseq = lookup_collation_sequence_value (start_elem); end_collseq = lookup_collation_sequence_value (end_elem); /* Check start/end collation sequence values. */ - if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) + if (__glibc_unlikely (start_collseq == UINT_MAX + || end_collseq == UINT_MAX)) return REG_ECOLLATE; - if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) + if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES) + && start_collseq > end_collseq)) return REG_ERANGE; /* Got valid collation sequence values, add them as a new entry. @@ -2991,7 +2982,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, if (nrules > 0 || dfa->mb_cur_max > 1) { /* Check the space of the arrays. */ - if (BE (*range_alloc == mbcset->nranges, 0)) + if (__glibc_unlikely (*range_alloc == mbcset->nranges)) { /* There is not enough space, need realloc. */ uint32_t *new_array_start; @@ -3005,7 +2996,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, new_array_end = re_realloc (mbcset->range_ends, uint32_t, new_nranges); - if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + if (__glibc_unlikely (new_array_start == NULL + || new_array_end == NULL)) return REG_ESPACE; mbcset->range_starts = new_array_start; @@ -3069,7 +3061,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, /* Got valid collation sequence, add it as a new entry. */ /* Check the space of the arrays. */ - if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) + if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms)) { /* Not enough, realloc it. */ /* +1 in case of mbcset->ncoll_syms is 0. */ @@ -3078,7 +3070,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, if *alloc == 0. */ int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, new_coll_sym_alloc); - if (BE (new_coll_syms == NULL, 0)) + if (__glibc_unlikely (new_coll_syms == NULL)) return REG_ESPACE; mbcset->coll_syms = new_coll_syms; *coll_sym_alloc = new_coll_sym_alloc; @@ -3088,7 +3080,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, } else { - if (BE (name_len != 1, 0)) + if (__glibc_unlikely (name_len != 1)) return REG_ECOLLATE; else { @@ -3132,9 +3124,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); #endif /* RE_ENABLE_I18N */ #ifdef RE_ENABLE_I18N - if (BE (sbcset == NULL || mbcset == NULL, 0)) + if (__glibc_unlikely (sbcset == NULL || mbcset == NULL)) #else - if (BE (sbcset == NULL, 0)) + if (__glibc_unlikely (sbcset == NULL)) #endif /* RE_ENABLE_I18N */ { re_free (sbcset); @@ -3146,7 +3138,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, } token_len = peek_token_bracket (token, regexp, syntax); - if (BE (token->type == END_OF_RE, 0)) + if (__glibc_unlikely (token->type == END_OF_RE)) { *err = REG_BADPAT; goto parse_bracket_exp_free_return; @@ -3161,7 +3153,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, bitset_set (sbcset, '\n'); re_string_skip_bytes (regexp, token_len); /* Skip a token. */ token_len = peek_token_bracket (token, regexp, syntax); - if (BE (token->type == END_OF_RE, 0)) + if (__glibc_unlikely (token->type == END_OF_RE)) { *err = REG_BADPAT; goto parse_bracket_exp_free_return; @@ -3186,7 +3178,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, start_elem.type = COLL_SYM; ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, syntax, first_round); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) { *err = ret; goto parse_bracket_exp_free_return; @@ -3199,7 +3191,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, /* Do not check for ranges if we know they are not allowed. */ if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) { - if (BE (token->type == END_OF_RE, 0)) + if (__glibc_unlikely (token->type == END_OF_RE)) { *err = REG_EBRACK; goto parse_bracket_exp_free_return; @@ -3208,7 +3200,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, { re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ token_len2 = peek_token_bracket (&token2, regexp, syntax); - if (BE (token2.type == END_OF_RE, 0)) + if (__glibc_unlikely (token2.type == END_OF_RE)) { *err = REG_EBRACK; goto parse_bracket_exp_free_return; @@ -3230,7 +3222,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, end_elem.type = COLL_SYM; ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, dfa, syntax, true); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) { *err = ret; goto parse_bracket_exp_free_return; @@ -3250,7 +3242,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem); # endif #endif /* RE_ENABLE_I18N */ - if (BE (*err != REG_NOERROR, 0)) + if (__glibc_unlikely (*err != REG_NOERROR)) goto parse_bracket_exp_free_return; } else @@ -3263,7 +3255,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, #ifdef RE_ENABLE_I18N case MB_CHAR: /* Check whether the array has enough space. */ - if (BE (mbchar_alloc == mbcset->nmbchars, 0)) + if (__glibc_unlikely (mbchar_alloc == mbcset->nmbchars)) { wchar_t *new_mbchars; /* Not enough, realloc it. */ @@ -3272,7 +3264,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, /* Use realloc since array is NULL if *alloc == 0. */ new_mbchars = re_realloc (mbcset->mbchars, wchar_t, mbchar_alloc); - if (BE (new_mbchars == NULL, 0)) + if (__glibc_unlikely (new_mbchars == NULL)) goto parse_bracket_exp_espace; mbcset->mbchars = new_mbchars; } @@ -3285,7 +3277,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, mbcset, &equiv_class_alloc, #endif /* RE_ENABLE_I18N */ start_elem.opr.name); - if (BE (*err != REG_NOERROR, 0)) + if (__glibc_unlikely (*err != REG_NOERROR)) goto parse_bracket_exp_free_return; break; case COLL_SYM: @@ -3294,7 +3286,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, mbcset, &coll_sym_alloc, #endif /* RE_ENABLE_I18N */ start_elem.opr.name); - if (BE (*err != REG_NOERROR, 0)) + if (__glibc_unlikely (*err != REG_NOERROR)) goto parse_bracket_exp_free_return; break; case CHAR_CLASS: @@ -3304,15 +3296,15 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, #endif /* RE_ENABLE_I18N */ (const char *) start_elem.opr.name, syntax); - if (BE (*err != REG_NOERROR, 0)) + if (__glibc_unlikely (*err != REG_NOERROR)) goto parse_bracket_exp_free_return; break; default: - assert (0); + DEBUG_ASSERT (false); break; } } - if (BE (token->type == END_OF_RE, 0)) + if (__glibc_unlikely (token->type == END_OF_RE)) { *err = REG_EBRACK; goto parse_bracket_exp_free_return; @@ -3343,7 +3335,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, br_token.type = COMPLEX_BRACKET; br_token.opr.mbcset = mbcset; mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (mbc_tree == NULL, 0)) + if (__glibc_unlikely (mbc_tree == NULL)) goto parse_bracket_exp_espace; for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) if (sbcset[sbc_idx]) @@ -3356,12 +3348,12 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, br_token.type = SIMPLE_BRACKET; br_token.opr.sbcset = sbcset; work_tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (work_tree == NULL, 0)) + if (__glibc_unlikely (work_tree == NULL)) goto parse_bracket_exp_espace; /* Then join them by ALT node. */ work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); - if (BE (work_tree == NULL, 0)) + if (__glibc_unlikely (work_tree == NULL)) goto parse_bracket_exp_espace; } else @@ -3380,7 +3372,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, br_token.type = SIMPLE_BRACKET; br_token.opr.sbcset = sbcset; work_tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (work_tree == NULL, 0)) + if (__glibc_unlikely (work_tree == NULL)) goto parse_bracket_exp_espace; } return work_tree; @@ -3417,7 +3409,7 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS || token->type == OP_OPEN_EQUIV_CLASS) return parse_bracket_symbol (elem, regexp, token); - if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) + if (__glibc_unlikely (token->type == OP_CHARSET_RANGE) && !accept_hyphen) { /* A '-' must only appear as anything but a range indicator before the closing bracket. Everything else is an error. */ @@ -3512,7 +3504,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); idx1 = findidx (table, indirect, extra, &cp, -1); - if (BE (idx1 == 0 || *cp != '\0', 0)) + if (__glibc_unlikely (idx1 == 0 || *cp != '\0')) /* This isn't a valid character. */ return REG_ECOLLATE; @@ -3531,21 +3523,13 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) continue; /* Compare only if the length matches and the collation rule index is the same. */ - if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24)) - { - int cnt = 0; - - while (cnt <= len && - weights[(idx1 & 0xffffff) + 1 + cnt] - == weights[(idx2 & 0xffffff) + 1 + cnt]) - ++cnt; - - if (cnt > len) - bitset_set (sbcset, ch); - } + if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24) + && memcmp (weights + (idx1 & 0xffffff) + 1, + weights + (idx2 & 0xffffff) + 1, len) == 0) + bitset_set (sbcset, ch); } /* Check whether the array has enough space. */ - if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) + if (__glibc_unlikely (*equiv_class_alloc == mbcset->nequiv_classes)) { /* Not enough, realloc it. */ /* +1 in case of mbcset->nequiv_classes is 0. */ @@ -3554,7 +3538,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, int32_t, new_equiv_class_alloc); - if (BE (new_equiv_classes == NULL, 0)) + if (__glibc_unlikely (new_equiv_classes == NULL)) return REG_ESPACE; mbcset->equiv_classes = new_equiv_classes; *equiv_class_alloc = new_equiv_class_alloc; @@ -3564,7 +3548,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) else #endif /* _LIBC */ { - if (BE (strlen ((const char *) name) != 1, 0)) + if (__glibc_unlikely (strlen ((const char *) name) != 1)) return REG_ECOLLATE; bitset_set (sbcset, *name); } @@ -3598,7 +3582,7 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, #ifdef RE_ENABLE_I18N /* Check the space of the arrays. */ - if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) + if (__glibc_unlikely (*char_class_alloc == mbcset->nchar_classes)) { /* Not enough, realloc it. */ /* +1 in case of mbcset->nchar_classes is 0. */ @@ -3606,7 +3590,7 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, /* Use realloc since array is NULL if *alloc == 0. */ wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, new_char_class_alloc); - if (BE (new_char_classes == NULL, 0)) + if (__glibc_unlikely (new_char_classes == NULL)) return REG_ESPACE; mbcset->char_classes = new_char_classes; *char_class_alloc = new_char_class_alloc; @@ -3616,7 +3600,7 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, #define BUILD_CHARCLASS_LOOP(ctype_func) \ do { \ - if (BE (trans != NULL, 0)) \ + if (__glibc_unlikely (trans != NULL)) \ { \ for (i = 0; i < SBC_MAX; ++i) \ if (ctype_func (i)) \ @@ -3672,30 +3656,24 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, Idx alloc = 0; #endif /* not RE_ENABLE_I18N */ reg_errcode_t ret; - re_token_t br_token; bin_tree_t *tree; sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); -#ifdef RE_ENABLE_I18N - mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); -#endif /* RE_ENABLE_I18N */ - -#ifdef RE_ENABLE_I18N - if (BE (sbcset == NULL || mbcset == NULL, 0)) -#else /* not RE_ENABLE_I18N */ - if (BE (sbcset == NULL, 0)) -#endif /* not RE_ENABLE_I18N */ + if (__glibc_unlikely (sbcset == NULL)) { *err = REG_ESPACE; return NULL; } - - if (non_match) - { #ifdef RE_ENABLE_I18N - mbcset->non_match = 1; -#endif /* not RE_ENABLE_I18N */ + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); + if (__glibc_unlikely (mbcset == NULL)) + { + re_free (sbcset); + *err = REG_ESPACE; + return NULL; } + mbcset->non_match = non_match; +#endif /* RE_ENABLE_I18N */ /* We don't care the syntax in this case. */ ret = build_charclass (trans, sbcset, @@ -3704,7 +3682,7 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, #endif /* RE_ENABLE_I18N */ class_name, 0); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) { re_free (sbcset); #ifdef RE_ENABLE_I18N @@ -3728,10 +3706,9 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, #endif /* Build a tree for simple bracket. */ - br_token.type = SIMPLE_BRACKET; - br_token.opr.sbcset = sbcset; + re_token_t br_token = { .type = SIMPLE_BRACKET, .opr.sbcset = sbcset }; tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (tree == NULL, 0)) + if (__glibc_unlikely (tree == NULL)) goto build_word_op_espace; #ifdef RE_ENABLE_I18N @@ -3743,11 +3720,11 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, br_token.opr.mbcset = mbcset; dfa->has_mb_node = 1; mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (mbc_tree == NULL, 0)) + if (__glibc_unlikely (mbc_tree == NULL)) goto build_word_op_espace; /* Then join them by ALT node. */ tree = create_tree (dfa, tree, mbc_tree, OP_ALT); - if (BE (mbc_tree != NULL, 1)) + if (__glibc_likely (mbc_tree != NULL)) return tree; } else @@ -3770,27 +3747,26 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, /* This is intended for the expressions like "a{1,3}". Fetch a number from 'input', and return the number. - Return REG_MISSING if the number field is empty like "{,1}". + Return -1 if the number field is empty like "{,1}". Return RE_DUP_MAX + 1 if the number field is too large. - Return REG_ERROR if an error occurred. */ + Return -2 if an error occurred. */ static Idx fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) { - Idx num = REG_MISSING; + Idx num = -1; unsigned char c; while (1) { fetch_token (token, input, syntax); c = token->opr.c; - if (BE (token->type == END_OF_RE, 0)) - return REG_ERROR; + if (__glibc_unlikely (token->type == END_OF_RE)) + return -2; if (token->type == OP_CLOSE_DUP_NUM || c == ',') break; - num = ((token->type != CHARACTER || c < '0' || '9' < c - || num == REG_ERROR) - ? REG_ERROR - : num == REG_MISSING + num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) + ? -2 + : num == -1 ? c - '0' : MIN (RE_DUP_MAX + 1, num * 10 + c - '0')); } @@ -3805,9 +3781,9 @@ free_charset (re_charset_t *cset) # ifdef _LIBC re_free (cset->coll_syms); re_free (cset->equiv_classes); +# endif re_free (cset->range_starts); re_free (cset->range_ends); -# endif re_free (cset->char_classes); re_free (cset); } @@ -3821,8 +3797,7 @@ static bin_tree_t * create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, re_token_type_t type) { - re_token_t t; - t.type = type; + re_token_t t = { .type = type }; return create_token_tree (dfa, left, right, &t); } @@ -3831,7 +3806,7 @@ create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, const re_token_t *token) { bin_tree_t *tree; - if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) + if (__glibc_unlikely (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE)) { bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); @@ -3851,7 +3826,7 @@ create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, tree->token.opt_subexp = 0; tree->first = NULL; tree->next = NULL; - tree->node_idx = REG_MISSING; + tree->node_idx = -1; if (left != NULL) left->parent = tree; diff --git a/contrib/grep/lib/regex.c b/contrib/grep/lib/regex.c index 6d0ae8825b..6bdd77f50b 100644 --- a/contrib/grep/lib/regex.c +++ b/contrib/grep/lib/regex.c @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -15,15 +15,15 @@ You should have received a copy of the GNU General Public License along with the GNU C Library; if not, see - . */ + . */ #ifndef _LIBC -# include +# include -# if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__ +# if __GNUC_PREREQ (4, 6) # pragma GCC diagnostic ignored "-Wsuggest-attribute=pure" # endif -# if (__GNUC__ == 4 && 3 <= __GNUC_MINOR__) || 4 < __GNUC__ +# if __GNUC_PREREQ (4, 3) # pragma GCC diagnostic ignored "-Wold-style-definition" # pragma GCC diagnostic ignored "-Wtype-limits" # endif diff --git a/contrib/grep/lib/regex.h b/contrib/grep/lib/regex.h index f7de3943fc..76ff4e342f 100644 --- a/contrib/grep/lib/regex.h +++ b/contrib/grep/lib/regex.h @@ -1,7 +1,6 @@ /* Definitions for data structures and routines for the regular expression library. - Copyright (C) 1985, 1989-1993, 1995-1998, 2000-2003, 2005-2015 Free Software - Foundation, Inc. + Copyright (C) 1985, 1989-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,7 +15,7 @@ You should have received a copy of the GNU General Public License along with the GNU C Library; if not, see - . */ + . */ #ifndef _REGEX_H #define _REGEX_H 1 @@ -42,11 +41,6 @@ extern "C" { supported within glibc itself, and glibc users should not define _REGEX_LARGE_OFFSETS. */ -/* The type of nonnegative object indexes. Traditionally, GNU regex - uses 'int' for these. Code that uses __re_idx_t should work - regardless of whether the type is signed. */ -typedef size_t __re_idx_t; - /* The type of object sizes. */ typedef size_t __re_size_t; @@ -58,7 +52,6 @@ typedef size_t __re_long_size_t; /* The traditional GNU regex implementation mishandles strings longer than INT_MAX. */ -typedef int __re_idx_t; typedef unsigned int __re_size_t; typedef unsigned long int __re_long_size_t; @@ -488,7 +481,8 @@ typedef struct re_pattern_buffer regex_t; #ifdef _REGEX_LARGE_OFFSETS /* POSIX 1003.1-2008 requires that regoff_t be at least as wide as ptrdiff_t and ssize_t. We don't know of any hosts where ptrdiff_t - is wider than ssize_t, so ssize_t is safe. */ + is wider than ssize_t, so ssize_t is safe. ptrdiff_t is not + visible here, so use ssize_t. */ typedef ssize_t regoff_t; #else /* The traditional GNU regex implementation mishandles strings longer @@ -538,7 +532,7 @@ extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); BUFFER. Return NULL if successful, and an error string if not. To free the allocated storage, you must call 'regfree' on BUFFER. - Note that the translate table must either have been initialised by + Note that the translate table must either have been initialized by 'regcomp', with a malloc'ed value, or set to NULL before calling 'regfree'. */ extern const char *re_compile_pattern (const char *__pattern, size_t __length, @@ -557,34 +551,34 @@ extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); match, or -2 for an internal error. Also return register information in REGS (if REGS and BUFFER->no_sub are nonzero). */ extern regoff_t re_search (struct re_pattern_buffer *__buffer, - const char *__string, __re_idx_t __length, - __re_idx_t __start, regoff_t __range, + const char *__String, regoff_t __length, + regoff_t __start, regoff_t __range, struct re_registers *__regs); /* Like 're_search', but search in the concatenation of STRING1 and STRING2. Also, stop searching at index START + STOP. */ extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer, - const char *__string1, __re_idx_t __length1, - const char *__string2, __re_idx_t __length2, - __re_idx_t __start, regoff_t __range, + const char *__string1, regoff_t __length1, + const char *__string2, regoff_t __length2, + regoff_t __start, regoff_t __range, struct re_registers *__regs, - __re_idx_t __stop); + regoff_t __stop); /* Like 're_search', but return how many characters in STRING the regexp in BUFFER matched, starting at position START. */ extern regoff_t re_match (struct re_pattern_buffer *__buffer, - const char *__string, __re_idx_t __length, - __re_idx_t __start, struct re_registers *__regs); + const char *__String, regoff_t __length, + regoff_t __start, struct re_registers *__regs); /* Relates to 're_match' as 're_search_2' relates to 're_search'. */ extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer, - const char *__string1, __re_idx_t __length1, - const char *__string2, __re_idx_t __length2, - __re_idx_t __start, struct re_registers *__regs, - __re_idx_t __stop); + const char *__string1, regoff_t __length1, + const char *__string2, regoff_t __length2, + regoff_t __start, struct re_registers *__regs, + regoff_t __stop); /* Set REGS to hold NUM_REGS registers, storing them in STARTS and @@ -613,28 +607,28 @@ extern int re_exec (const char *); # endif #endif -/* GCC 2.95 and later have "__restrict"; C99 compilers have +/* For plain 'restrict', use glibc's __restrict if defined. + Otherwise, GCC 2.95 and later have "__restrict"; C99 compilers have "restrict", and "configure" may have defined "restrict". Other compilers use __restrict, __restrict__, and _Restrict, and 'configure' might #define 'restrict' to those words, so pick a different name. */ #ifndef _Restrict_ -# if 199901L <= __STDC_VERSION__ -# define _Restrict_ restrict -# elif 2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__) +# if defined __restrict || 2 < __GNUC__ + (95 <= __GNUC_MINOR__) # define _Restrict_ __restrict +# elif 199901L <= __STDC_VERSION__ || defined restrict +# define _Restrict_ restrict # else # define _Restrict_ # endif #endif -/* gcc 3.1 and up support the [restrict] syntax. Don't trust - sys/cdefs.h's definition of __restrict_arr, though, as it - mishandles gcc -ansi -pedantic. */ +/* For [restrict], use glibc's __restrict_arr if available. + Otherwise, GCC 3.1 (not in C++ mode) and C99 support [restrict]. */ #ifndef _Restrict_arr_ -# if ((199901L <= __STDC_VERSION__ \ - || ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) \ - && !defined __STRICT_ANSI__)) \ - && !defined __GNUG__) +# ifdef __restrict_arr +# define _Restrict_arr_ __restrict_arr +# elif ((199901L <= __STDC_VERSION__ || 3 < __GNUC__ + (1 <= __GNUC_MINOR__)) \ + && !defined __GNUG__) # define _Restrict_arr_ _Restrict_ # else # define _Restrict_arr_ @@ -647,7 +641,7 @@ extern int regcomp (regex_t *_Restrict_ __preg, int __cflags); extern int regexec (const regex_t *_Restrict_ __preg, - const char *_Restrict_ __string, size_t __nmatch, + const char *_Restrict_ __String, size_t __nmatch, regmatch_t __pmatch[_Restrict_arr_], int __eflags); diff --git a/contrib/grep/lib/regex_internal.c b/contrib/grep/lib/regex_internal.c index 2514344efe..2e21729146 100644 --- a/contrib/grep/lib/regex_internal.c +++ b/contrib/grep/lib/regex_internal.c @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -15,19 +15,29 @@ You should have received a copy of the GNU General Public License along with the GNU C Library; if not, see - . */ + . */ static void re_string_construct_common (const char *str, Idx len, re_string_t *pstr, RE_TRANSLATE_TYPE trans, bool icase, - const re_dfa_t *dfa) internal_function; + const re_dfa_t *dfa); static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, - re_hashval_t hash) internal_function; + re_hashval_t hash); static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, unsigned int context, - re_hashval_t hash) internal_function; + re_hashval_t hash); +static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, + Idx new_buf_len); +#ifdef RE_ENABLE_I18N +static void build_wcs_buffer (re_string_t *pstr); +static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr); +#endif /* RE_ENABLE_I18N */ +static void build_upper_buffer (re_string_t *pstr); +static void re_string_translate_buffer (re_string_t *pstr); +static unsigned int re_string_context_at (const re_string_t *input, Idx idx, + int eflags) __attribute__ ((pure)); /* Functions for string operation. */ @@ -35,7 +45,7 @@ static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, re_string_reconstruct before using the object. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len, RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa) { @@ -49,7 +59,7 @@ re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len, re_string_construct_common (str, len, pstr, trans, icase, dfa); ret = re_string_realloc_buffers (pstr, init_buf_len); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) return ret; pstr->word_char = dfa->word_char; @@ -63,7 +73,7 @@ re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len, /* This function allocate the buffers, and initialize them. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_string_construct (re_string_t *pstr, const char *str, Idx len, RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa) { @@ -74,7 +84,7 @@ re_string_construct (re_string_t *pstr, const char *str, Idx len, if (len > 0) { ret = re_string_realloc_buffers (pstr, len + 1); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) return ret; } pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; @@ -87,14 +97,14 @@ re_string_construct (re_string_t *pstr, const char *str, Idx len, while (1) { ret = build_wcs_upper_buffer (pstr); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) return ret; if (pstr->valid_raw_len >= len) break; if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) break; ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) return ret; } } @@ -126,7 +136,7 @@ re_string_construct (re_string_t *pstr, const char *str, Idx len, /* Helper functions for re_string_allocate, and re_string_construct. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len) { #ifdef RE_ENABLE_I18N @@ -136,17 +146,18 @@ re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len) /* Avoid overflow in realloc. */ const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx)); - if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0)) + if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) + < new_buf_len)) return REG_ESPACE; new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); - if (BE (new_wcs == NULL, 0)) + if (__glibc_unlikely (new_wcs == NULL)) return REG_ESPACE; pstr->wcs = new_wcs; if (pstr->offsets != NULL) { Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len); - if (BE (new_offsets == NULL, 0)) + if (__glibc_unlikely (new_offsets == NULL)) return REG_ESPACE; pstr->offsets = new_offsets; } @@ -156,7 +167,7 @@ re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len) { unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, new_buf_len); - if (BE (new_mbs == NULL, 0)) + if (__glibc_unlikely (new_mbs == NULL)) return REG_ESPACE; pstr->mbs = new_mbs; } @@ -166,7 +177,6 @@ re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len) static void -internal_function re_string_construct_common (const char *str, Idx len, re_string_t *pstr, RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa) @@ -198,12 +208,11 @@ re_string_construct_common (const char *str, Idx len, re_string_t *pstr, built and starts from PSTR->VALID_LEN. */ static void -internal_function build_wcs_buffer (re_string_t *pstr) { #ifdef _LIBC unsigned char buf[MB_LEN_MAX]; - assert (MB_LEN_MAX >= pstr->mb_cur_max); + DEBUG_ASSERT (MB_LEN_MAX >= pstr->mb_cur_max); #else unsigned char buf[64]; #endif @@ -222,7 +231,7 @@ build_wcs_buffer (re_string_t *pstr) remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; /* Apply the translation if we need. */ - if (BE (pstr->trans != NULL, 0)) + if (__glibc_unlikely (pstr->trans != NULL)) { int i, ch; @@ -236,17 +245,18 @@ build_wcs_buffer (re_string_t *pstr) else p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -1 || mbclen == 0 - || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0)) + if (__glibc_unlikely (mbclen == (size_t) -1 || mbclen == 0 + || (mbclen == (size_t) -2 + && pstr->bufs_len >= pstr->len))) { /* We treat these cases as a singlebyte character. */ mbclen = 1; wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; - if (BE (pstr->trans != NULL, 0)) + if (__glibc_unlikely (pstr->trans != NULL)) wc = pstr->trans[wc]; pstr->cur_state = prev_st; } - else if (BE (mbclen == (size_t) -2, 0)) + else if (__glibc_unlikely (mbclen == (size_t) -2)) { /* The buffer doesn't have enough space, finish to build. */ pstr->cur_state = prev_st; @@ -267,7 +277,7 @@ build_wcs_buffer (re_string_t *pstr) but for REG_ICASE. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ build_wcs_upper_buffer (re_string_t *pstr) { mbstate_t prev_st; @@ -275,7 +285,7 @@ build_wcs_upper_buffer (re_string_t *pstr) size_t mbclen; #ifdef _LIBC char buf[MB_LEN_MAX]; - assert (MB_LEN_MAX >= pstr->mb_cur_max); + DEBUG_ASSERT (pstr->mb_cur_max <= MB_LEN_MAX); #else char buf[64]; #endif @@ -309,7 +319,7 @@ build_wcs_upper_buffer (re_string_t *pstr) mbclen = __mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx), remain_len, &pstr->cur_state); - if (BE (mbclen < (size_t) -2, 1)) + if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2)) { wchar_t wcu = __towupper (wc); if (wcu != wc) @@ -317,7 +327,7 @@ build_wcs_upper_buffer (re_string_t *pstr) size_t mbcdlen; mbcdlen = __wcrtomb (buf, wcu, &prev_st); - if (BE (mbclen == mbcdlen, 1)) + if (__glibc_likely (mbclen == mbcdlen)) memcpy (pstr->mbs + byte_idx, buf, mbclen); else { @@ -342,7 +352,7 @@ build_wcs_upper_buffer (re_string_t *pstr) pstr->mbs[byte_idx] = ch; /* And also cast it to wide char. */ pstr->wcs[byte_idx++] = (wchar_t) ch; - if (BE (mbclen == (size_t) -1, 0)) + if (__glibc_unlikely (mbclen == (size_t) -1)) pstr->cur_state = prev_st; } else @@ -364,7 +374,7 @@ build_wcs_upper_buffer (re_string_t *pstr) offsets_needed: remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; - if (BE (pstr->trans != NULL, 0)) + if (__glibc_unlikely (pstr->trans != NULL)) { int i, ch; @@ -378,15 +388,15 @@ build_wcs_upper_buffer (re_string_t *pstr) else p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); - if (BE (mbclen < (size_t) -2, 1)) + if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2)) { wchar_t wcu = __towupper (wc); if (wcu != wc) { size_t mbcdlen; - mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); - if (BE (mbclen == mbcdlen, 1)) + mbcdlen = __wcrtomb ((char *) buf, wcu, &prev_st); + if (__glibc_likely (mbclen == mbcdlen)) memcpy (pstr->mbs + byte_idx, buf, mbclen); else if (mbcdlen != (size_t) -1) { @@ -436,7 +446,7 @@ build_wcs_upper_buffer (re_string_t *pstr) else memcpy (pstr->mbs + byte_idx, p, mbclen); - if (BE (pstr->offsets_needed != 0, 0)) + if (__glibc_unlikely (pstr->offsets_needed != 0)) { size_t i; for (i = 0; i < mbclen; ++i) @@ -455,17 +465,17 @@ build_wcs_upper_buffer (re_string_t *pstr) /* It is an invalid character or '\0'. Just use the byte. */ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; - if (BE (pstr->trans != NULL, 0)) + if (__glibc_unlikely (pstr->trans != NULL)) ch = pstr->trans [ch]; pstr->mbs[byte_idx] = ch; - if (BE (pstr->offsets_needed != 0, 0)) + if (__glibc_unlikely (pstr->offsets_needed != 0)) pstr->offsets[byte_idx] = src_idx; ++src_idx; /* And also cast it to wide char. */ pstr->wcs[byte_idx++] = (wchar_t) ch; - if (BE (mbclen == (size_t) -1, 0)) + if (__glibc_unlikely (mbclen == (size_t) -1)) pstr->cur_state = prev_st; } else @@ -484,7 +494,6 @@ build_wcs_upper_buffer (re_string_t *pstr) Return the index. */ static Idx -internal_function re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc) { mbstate_t prev_st; @@ -501,7 +510,8 @@ re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc) prev_st = pstr->cur_state; mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx, remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) + if (__glibc_unlikely (mbclen == (size_t) -2 || mbclen == (size_t) -1 + || mbclen == 0)) { /* We treat these cases as a single byte character. */ if (mbclen == 0 || remain_len == 0) @@ -525,7 +535,6 @@ re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc) This function is used in case of REG_ICASE. */ static void -internal_function build_upper_buffer (re_string_t *pstr) { Idx char_idx, end_idx; @@ -534,7 +543,7 @@ build_upper_buffer (re_string_t *pstr) for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) { int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; - if (BE (pstr->trans != NULL, 0)) + if (__glibc_unlikely (pstr->trans != NULL)) ch = pstr->trans[ch]; pstr->mbs[char_idx] = toupper (ch); } @@ -545,7 +554,6 @@ build_upper_buffer (re_string_t *pstr) /* Apply TRANS to the buffer in PSTR. */ static void -internal_function re_string_translate_buffer (re_string_t *pstr) { Idx buf_idx, end_idx; @@ -566,12 +574,12 @@ re_string_translate_buffer (re_string_t *pstr) convert to upper case in case of REG_ICASE, apply translation. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) { Idx offset; - if (BE (pstr->raw_mbs_idx <= idx, 0)) + if (__glibc_unlikely (pstr->raw_mbs_idx <= idx)) offset = idx - pstr->raw_mbs_idx; else { @@ -593,14 +601,14 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) offset = idx; } - if (BE (offset != 0, 1)) + if (__glibc_likely (offset != 0)) { /* Should the already checked characters be kept? */ - if (BE (offset < pstr->valid_raw_len, 1)) + if (__glibc_likely (offset < pstr->valid_raw_len)) { /* Yes, move them to the front of the buffer. */ #ifdef RE_ENABLE_I18N - if (BE (pstr->offsets_needed, 0)) + if (__glibc_unlikely (pstr->offsets_needed)) { Idx low = 0, high = pstr->valid_len, mid; do @@ -672,14 +680,12 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) memmove (pstr->wcs, pstr->wcs + offset, (pstr->valid_len - offset) * sizeof (wint_t)); #endif /* RE_ENABLE_I18N */ - if (BE (pstr->mbs_allocated, 0)) + if (__glibc_unlikely (pstr->mbs_allocated)) memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); pstr->valid_len -= offset; pstr->valid_raw_len -= offset; -#if defined DEBUG && DEBUG - assert (pstr->valid_len > 0); -#endif + DEBUG_ASSERT (pstr->valid_len > 0); } } else @@ -688,7 +694,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) /* No, skip all characters until IDX. */ Idx prev_valid_len = pstr->valid_len; - if (BE (pstr->offsets_needed, 0)) + if (__glibc_unlikely (pstr->offsets_needed)) { pstr->len = pstr->raw_len - idx + offset; pstr->stop = pstr->raw_stop - idx + offset; @@ -716,7 +722,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) #ifdef _LIBC /* We know the wchar_t encoding is UCS4, so for the simple case, ASCII characters, skip the conversion step. */ - if (isascii (*p) && BE (pstr->trans == NULL, 1)) + if (isascii (*p) && __glibc_likely (pstr->trans == NULL)) { memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); /* pstr->valid_len = 0; */ @@ -734,7 +740,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) size_t mbclen; const unsigned char *pp = p; - if (BE (pstr->trans != NULL, 0)) + if (__glibc_unlikely (pstr->trans != NULL)) { int i = mlen < 6 ? mlen : 6; while (--i >= 0) @@ -764,13 +770,13 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) pstr->tip_context = re_string_context_at (pstr, prev_valid_len - 1, eflags); else - pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) + pstr->tip_context = ((__glibc_unlikely (pstr->word_ops_used != 0) && IS_WIDE_WORD_CHAR (wc)) ? CONTEXT_WORD : ((IS_WIDE_NEWLINE (wc) && pstr->newline_anchor) ? CONTEXT_NEWLINE : 0)); - if (BE (pstr->valid_len, 0)) + if (__glibc_unlikely (pstr->valid_len)) { for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) pstr->wcs[wcs_idx] = WEOF; @@ -792,7 +798,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) ? CONTEXT_NEWLINE : 0)); } } - if (!BE (pstr->mbs_allocated, 0)) + if (!__glibc_unlikely (pstr->mbs_allocated)) pstr->mbs += offset; } pstr->raw_mbs_idx = idx; @@ -806,7 +812,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) if (pstr->icase) { reg_errcode_t ret = build_wcs_upper_buffer (pstr); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) return ret; } else @@ -814,7 +820,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) } else #endif /* RE_ENABLE_I18N */ - if (BE (pstr->mbs_allocated, 0)) + if (__glibc_unlikely (pstr->mbs_allocated)) { if (pstr->icase) build_upper_buffer (pstr); @@ -829,14 +835,14 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) } static unsigned char -internal_function __attribute__ ((pure)) +__attribute__ ((pure)) re_string_peek_byte_case (const re_string_t *pstr, Idx idx) { int ch; Idx off; /* Handle the common (easiest) cases first. */ - if (BE (!pstr->mbs_allocated, 1)) + if (__glibc_likely (!pstr->mbs_allocated)) return re_string_peek_byte (pstr, idx); #ifdef RE_ENABLE_I18N @@ -866,10 +872,9 @@ re_string_peek_byte_case (const re_string_t *pstr, Idx idx) } static unsigned char -internal_function re_string_fetch_byte_case (re_string_t *pstr) { - if (BE (!pstr->mbs_allocated, 1)) + if (__glibc_likely (!pstr->mbs_allocated)) return re_string_fetch_byte (pstr); #ifdef RE_ENABLE_I18N @@ -904,7 +909,6 @@ re_string_fetch_byte_case (re_string_t *pstr) } static void -internal_function re_string_destruct (re_string_t *pstr) { #ifdef RE_ENABLE_I18N @@ -918,15 +922,14 @@ re_string_destruct (re_string_t *pstr) /* Return the context at IDX in INPUT. */ static unsigned int -internal_function re_string_context_at (const re_string_t *input, Idx idx, int eflags) { int c; - if (BE (! REG_VALID_INDEX (idx), 0)) + if (__glibc_unlikely (idx < 0)) /* In this case, we use the value stored in input->tip_context, since we can't know the character in input->mbs[-1] here. */ return input->tip_context; - if (BE (idx == input->len, 0)) + if (__glibc_unlikely (idx == input->len)) return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF : CONTEXT_NEWLINE | CONTEXT_ENDBUF); #ifdef RE_ENABLE_I18N @@ -936,16 +939,14 @@ re_string_context_at (const re_string_t *input, Idx idx, int eflags) Idx wc_idx = idx; while(input->wcs[wc_idx] == WEOF) { -#if defined DEBUG && DEBUG - /* It must not happen. */ - assert (REG_VALID_INDEX (wc_idx)); -#endif + DEBUG_ASSERT (wc_idx >= 0); --wc_idx; - if (! REG_VALID_INDEX (wc_idx)) + if (wc_idx < 0) return input->tip_context; } wc = input->wcs[wc_idx]; - if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) + if (__glibc_unlikely (input->word_ops_used != 0) + && IS_WIDE_WORD_CHAR (wc)) return CONTEXT_WORD; return (IS_WIDE_NEWLINE (wc) && input->newline_anchor ? CONTEXT_NEWLINE : 0); @@ -963,25 +964,26 @@ re_string_context_at (const re_string_t *input, Idx idx, int eflags) /* Functions for set operation. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_node_set_alloc (re_node_set *set, Idx size) { set->alloc = size; set->nelem = 0; set->elems = re_malloc (Idx, size); - if (BE (set->elems == NULL, 0) && (MALLOC_0_IS_NONNULL || size != 0)) + if (__glibc_unlikely (set->elems == NULL) + && (MALLOC_0_IS_NONNULL || size != 0)) return REG_ESPACE; return REG_NOERROR; } static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_node_set_init_1 (re_node_set *set, Idx elem) { set->alloc = 1; set->nelem = 1; set->elems = re_malloc (Idx, 1); - if (BE (set->elems == NULL, 0)) + if (__glibc_unlikely (set->elems == NULL)) { set->alloc = set->nelem = 0; return REG_ESPACE; @@ -991,12 +993,12 @@ re_node_set_init_1 (re_node_set *set, Idx elem) } static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2) { set->alloc = 2; set->elems = re_malloc (Idx, 2); - if (BE (set->elems == NULL, 0)) + if (__glibc_unlikely (set->elems == NULL)) return REG_ESPACE; if (elem1 == elem2) { @@ -1021,7 +1023,7 @@ re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2) } static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_node_set_init_copy (re_node_set *dest, const re_node_set *src) { dest->nelem = src->nelem; @@ -1029,7 +1031,7 @@ re_node_set_init_copy (re_node_set *dest, const re_node_set *src) { dest->alloc = dest->nelem; dest->elems = re_malloc (Idx, dest->alloc); - if (BE (dest->elems == NULL, 0)) + if (__glibc_unlikely (dest->elems == NULL)) { dest->alloc = dest->nelem = 0; return REG_ESPACE; @@ -1046,7 +1048,7 @@ re_node_set_init_copy (re_node_set *dest, const re_node_set *src) Note: We assume dest->elems is NULL, when dest->alloc is 0. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, const re_node_set *src2) { @@ -1060,7 +1062,7 @@ re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, { Idx new_alloc = src1->nelem + src2->nelem + dest->alloc; Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc); - if (BE (new_elems == NULL, 0)) + if (__glibc_unlikely (new_elems == NULL)) return REG_ESPACE; dest->elems = new_elems; dest->alloc = new_alloc; @@ -1077,25 +1079,25 @@ re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, if (src1->elems[i1] == src2->elems[i2]) { /* Try to find the item in DEST. Maybe we could binary search? */ - while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1]) + while (id >= 0 && dest->elems[id] > src1->elems[i1]) --id; - if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1]) + if (id < 0 || dest->elems[id] != src1->elems[i1]) dest->elems[--sbase] = src1->elems[i1]; - if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2)) + if (--i1 < 0 || --i2 < 0) break; } /* Lower the highest of the two items. */ else if (src1->elems[i1] < src2->elems[i2]) { - if (! REG_VALID_INDEX (--i2)) + if (--i2 < 0) break; } else { - if (! REG_VALID_INDEX (--i1)) + if (--i1 < 0) break; } } @@ -1108,7 +1110,7 @@ re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, DEST elements are already in place; this is more or less the same loop that is in re_node_set_merge. */ dest->nelem += delta; - if (delta > 0 && REG_VALID_INDEX (id)) + if (delta > 0 && id >= 0) for (;;) { if (dest->elems[is] > dest->elems[id]) @@ -1122,7 +1124,7 @@ re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, { /* Slide from the bottom. */ dest->elems[id + delta] = dest->elems[id]; - if (! REG_VALID_INDEX (--id)) + if (--id < 0) break; } } @@ -1137,7 +1139,7 @@ re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_node_set_init_union (re_node_set *dest, const re_node_set *src1, const re_node_set *src2) { @@ -1146,7 +1148,7 @@ re_node_set_init_union (re_node_set *dest, const re_node_set *src1, { dest->alloc = src1->nelem + src2->nelem; dest->elems = re_malloc (Idx, dest->alloc); - if (BE (dest->elems == NULL, 0)) + if (__glibc_unlikely (dest->elems == NULL)) return REG_ESPACE; } else @@ -1190,7 +1192,7 @@ re_node_set_init_union (re_node_set *dest, const re_node_set *src1, DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_node_set_merge (re_node_set *dest, const re_node_set *src) { Idx is, id, sbase, delta; @@ -1200,13 +1202,13 @@ re_node_set_merge (re_node_set *dest, const re_node_set *src) { Idx new_alloc = 2 * (src->nelem + dest->alloc); Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc); - if (BE (new_buffer == NULL, 0)) + if (__glibc_unlikely (new_buffer == NULL)) return REG_ESPACE; dest->elems = new_buffer; dest->alloc = new_alloc; } - if (BE (dest->nelem == 0, 0)) + if (__glibc_unlikely (dest->nelem == 0)) { dest->nelem = src->nelem; memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx)); @@ -1216,8 +1218,7 @@ re_node_set_merge (re_node_set *dest, const re_node_set *src) /* Copy into the top of DEST the items of SRC that are not found in DEST. Maybe we could binary search in DEST? */ for (sbase = dest->nelem + 2 * src->nelem, - is = src->nelem - 1, id = dest->nelem - 1; - REG_VALID_INDEX (is) && REG_VALID_INDEX (id); ) + is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; ) { if (dest->elems[id] == src->elems[is]) is--, id--; @@ -1227,7 +1228,7 @@ re_node_set_merge (re_node_set *dest, const re_node_set *src) --id; } - if (REG_VALID_INDEX (is)) + if (is >= 0) { /* If DEST is exhausted, the remaining items of SRC must be unique. */ sbase -= is + 1; @@ -1256,7 +1257,7 @@ re_node_set_merge (re_node_set *dest, const re_node_set *src) { /* Slide from the bottom. */ dest->elems[id + delta] = dest->elems[id]; - if (! REG_VALID_INDEX (--id)) + if (--id < 0) { /* Copy remaining SRC elements. */ memcpy (dest->elems, dest->elems + sbase, @@ -1274,15 +1275,15 @@ re_node_set_merge (re_node_set *dest, const re_node_set *src) Return true if successful. */ static bool -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_node_set_insert (re_node_set *set, Idx elem) { Idx idx; /* In case the set is empty. */ if (set->alloc == 0) - return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1); + return __glibc_likely (re_node_set_init_1 (set, elem) == REG_NOERROR); - if (BE (set->nelem, 0) == 0) + if (__glibc_unlikely (set->nelem) == 0) { /* We already guaranteed above that set->alloc != 0. */ set->elems[0] = elem; @@ -1296,7 +1297,7 @@ re_node_set_insert (re_node_set *set, Idx elem) Idx *new_elems; set->alloc = set->alloc * 2; new_elems = re_realloc (set->elems, Idx, set->alloc); - if (BE (new_elems == NULL, 0)) + if (__glibc_unlikely (new_elems == NULL)) return false; set->elems = new_elems; } @@ -1305,7 +1306,6 @@ re_node_set_insert (re_node_set *set, Idx elem) first element separately to skip a check in the inner loop. */ if (elem < set->elems[0]) { - idx = 0; for (idx = set->nelem; idx > 0; idx--) set->elems[idx] = set->elems[idx - 1]; } @@ -1326,7 +1326,7 @@ re_node_set_insert (re_node_set *set, Idx elem) Return true if successful. */ static bool -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_node_set_insert_last (re_node_set *set, Idx elem) { /* Realloc if we need. */ @@ -1335,7 +1335,7 @@ re_node_set_insert_last (re_node_set *set, Idx elem) Idx *new_elems; set->alloc = (set->alloc + 1) * 2; new_elems = re_realloc (set->elems, Idx, set->alloc); - if (BE (new_elems == NULL, 0)) + if (__glibc_unlikely (new_elems == NULL)) return false; set->elems = new_elems; } @@ -1349,13 +1349,13 @@ re_node_set_insert_last (re_node_set *set, Idx elem) Return true if SET1 and SET2 are equivalent. */ static bool -internal_function __attribute__ ((pure)) +__attribute__ ((pure)) re_node_set_compare (const re_node_set *set1, const re_node_set *set2) { Idx i; if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) return false; - for (i = set1->nelem ; REG_VALID_INDEX (--i) ; ) + for (i = set1->nelem ; --i >= 0 ; ) if (set1->elems[i] != set2->elems[i]) return false; return true; @@ -1364,11 +1364,11 @@ re_node_set_compare (const re_node_set *set1, const re_node_set *set2) /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ static Idx -internal_function __attribute__ ((pure)) +__attribute__ ((pure)) re_node_set_contains (const re_node_set *set, Idx elem) { __re_size_t idx, right, mid; - if (! REG_VALID_NONZERO_INDEX (set->nelem)) + if (set->nelem <= 0) return 0; /* Binary search the element. */ @@ -1386,7 +1386,6 @@ re_node_set_contains (const re_node_set *set, Idx elem) } static void -internal_function re_node_set_remove_at (re_node_set *set, Idx idx) { if (idx < 0 || idx >= set->nelem) @@ -1398,13 +1397,12 @@ re_node_set_remove_at (re_node_set *set, Idx idx) /* Add the token TOKEN to dfa->nodes, and return the index of the token. - Or return REG_MISSING if an error occurred. */ + Or return -1 if an error occurred. */ static Idx -internal_function re_dfa_add_node (re_dfa_t *dfa, re_token_t token) { - if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) + if (__glibc_unlikely (dfa->nodes_len >= dfa->nodes_alloc)) { size_t new_nodes_alloc = dfa->nodes_alloc * 2; Idx *new_nexts, *new_indices; @@ -1415,20 +1413,27 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token) const size_t max_object_size = MAX (sizeof (re_token_t), MAX (sizeof (re_node_set), sizeof (Idx))); - if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0)) - return REG_MISSING; + if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) + < new_nodes_alloc)) + return -1; new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); - if (BE (new_nodes == NULL, 0)) - return REG_MISSING; + if (__glibc_unlikely (new_nodes == NULL)) + return -1; dfa->nodes = new_nodes; new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc); new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc); new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); - if (BE (new_nexts == NULL || new_indices == NULL - || new_edests == NULL || new_eclosures == NULL, 0)) - return REG_MISSING; + if (__glibc_unlikely (new_nexts == NULL || new_indices == NULL + || new_edests == NULL || new_eclosures == NULL)) + { + re_free (new_nexts); + re_free (new_indices); + re_free (new_edests); + re_free (new_eclosures); + return -1; + } dfa->nexts = new_nexts; dfa->org_indices = new_indices; dfa->edests = new_edests; @@ -1442,14 +1447,13 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token) ((token.type == OP_PERIOD && dfa->mb_cur_max > 1) || token.type == COMPLEX_BRACKET); #endif - dfa->nexts[dfa->nodes_len] = REG_MISSING; + dfa->nexts[dfa->nodes_len] = -1; re_node_set_init_empty (dfa->edests + dfa->nodes_len); re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); return dfa->nodes_len++; } static re_hashval_t -internal_function calc_state_hash (const re_node_set *nodes, unsigned int context) { re_hashval_t hash = nodes->nelem + context; @@ -1469,7 +1473,7 @@ calc_state_hash (const re_node_set *nodes, unsigned int context) optimization. */ static re_dfastate_t * -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, const re_node_set *nodes) { @@ -1477,11 +1481,11 @@ re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, re_dfastate_t *new_state; struct re_state_table_entry *spot; Idx i; -#ifdef lint +#if defined GCC_LINT || defined lint /* Suppress bogus uninitialized-variable warnings. */ *err = REG_NOERROR; #endif - if (BE (nodes->nelem == 0, 0)) + if (__glibc_unlikely (nodes->nelem == 0)) { *err = REG_NOERROR; return NULL; @@ -1500,7 +1504,7 @@ re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, /* There are no appropriate state in the dfa, create the new one. */ new_state = create_ci_newstate (dfa, nodes, hash); - if (BE (new_state == NULL, 0)) + if (__glibc_unlikely (new_state == NULL)) *err = REG_ESPACE; return new_state; @@ -1517,7 +1521,7 @@ re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, optimization. */ static re_dfastate_t * -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, const re_node_set *nodes, unsigned int context) { @@ -1525,7 +1529,7 @@ re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, re_dfastate_t *new_state; struct re_state_table_entry *spot; Idx i; -#ifdef lint +#if defined GCC_LINT || defined lint /* Suppress bogus uninitialized-variable warnings. */ *err = REG_NOERROR; #endif @@ -1547,7 +1551,7 @@ re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, } /* There are no appropriate state in 'dfa', create the new one. */ new_state = create_cd_newstate (dfa, nodes, context, hash); - if (BE (new_state == NULL, 0)) + if (__glibc_unlikely (new_state == NULL)) *err = REG_ESPACE; return new_state; @@ -1568,7 +1572,7 @@ register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, newstate->hash = hash; err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return REG_ESPACE; for (i = 0; i < newstate->nodes.nelem; i++) { @@ -1579,12 +1583,12 @@ register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, } spot = dfa->state_table + (hash & dfa->state_hash_mask); - if (BE (spot->alloc <= spot->num, 0)) + if (__glibc_unlikely (spot->alloc <= spot->num)) { Idx new_alloc = 2 * spot->num + 2; re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, new_alloc); - if (BE (new_array == NULL, 0)) + if (__glibc_unlikely (new_array == NULL)) return REG_ESPACE; spot->array = new_array; spot->alloc = new_alloc; @@ -1613,7 +1617,7 @@ free_state (re_dfastate_t *state) Return the new state if succeeded, otherwise return NULL. */ static re_dfastate_t * -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, re_hashval_t hash) { @@ -1622,10 +1626,10 @@ create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, re_dfastate_t *newstate; newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); - if (BE (newstate == NULL, 0)) + if (__glibc_unlikely (newstate == NULL)) return NULL; err = re_node_set_init_copy (&newstate->nodes, nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_free (newstate); return NULL; @@ -1651,7 +1655,7 @@ create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, newstate->has_constraint = 1; } err = register_state (dfa, newstate, hash); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { free_state (newstate); newstate = NULL; @@ -1663,7 +1667,7 @@ create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, Return the new state if succeeded, otherwise return NULL. */ static re_dfastate_t * -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, unsigned int context, re_hashval_t hash) { @@ -1672,10 +1676,10 @@ create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, re_dfastate_t *newstate; newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); - if (BE (newstate == NULL, 0)) + if (__glibc_unlikely (newstate == NULL)) return NULL; err = re_node_set_init_copy (&newstate->nodes, nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_free (newstate); return NULL; @@ -1706,15 +1710,19 @@ create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, { if (newstate->entrance_nodes == &newstate->nodes) { - newstate->entrance_nodes = re_malloc (re_node_set, 1); - if (BE (newstate->entrance_nodes == NULL, 0)) + re_node_set *entrance_nodes = re_malloc (re_node_set, 1); + if (__glibc_unlikely (entrance_nodes == NULL)) { free_state (newstate); return NULL; } + newstate->entrance_nodes = entrance_nodes; if (re_node_set_init_copy (newstate->entrance_nodes, nodes) != REG_NOERROR) - return NULL; + { + free_state (newstate); + return NULL; + } nctx_nodes = 0; newstate->has_constraint = 1; } @@ -1727,7 +1735,7 @@ create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, } } err = register_state (dfa, newstate, hash); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { free_state (newstate); newstate = NULL; diff --git a/contrib/grep/lib/regex_internal.h b/contrib/grep/lib/regex_internal.h index c150ed5d6d..5c9cbf3b4f 100644 --- a/contrib/grep/lib/regex_internal.h +++ b/contrib/grep/lib/regex_internal.h @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -15,12 +15,11 @@ You should have received a copy of the GNU General Public License along with the GNU C Library; if not, see - . */ + . */ #ifndef _REGEX_INTERNAL_H #define _REGEX_INTERNAL_H 1 -#include #include #include #include @@ -33,31 +32,26 @@ #include #include +#include +#include + +#if defined DEBUG && DEBUG != 0 +# include +# define DEBUG_ASSERT(x) assert (x) +#else +# define DEBUG_ASSERT(x) assume (x) +#endif + #ifdef _LIBC # include # define lock_define(name) __libc_lock_define (, name) # define lock_init(lock) (__libc_lock_init (lock), 0) -# define lock_fini(lock) 0 +# define lock_fini(lock) ((void) 0) # define lock_lock(lock) __libc_lock_lock (lock) # define lock_unlock(lock) __libc_lock_unlock (lock) #elif defined GNULIB_LOCK && !defined USE_UNLOCKED_IO # include "glthread/lock.h" - /* Use gl_lock_define if empty macro arguments are known to work. - Otherwise, fall back on less-portable substitutes. */ -# if ((defined __GNUC__ && !defined __STRICT_ANSI__) \ - || (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__)) -# define lock_define(name) gl_lock_define (, name) -# elif USE_POSIX_THREADS -# define lock_define(name) pthread_mutex_t name; -# elif USE_PTH_THREADS -# define lock_define(name) pth_mutex_t name; -# elif USE_SOLARIS_THREADS -# define lock_define(name) mutex_t name; -# elif USE_WINDOWS_THREADS -# define lock_define(name) gl_lock_t name; -# else -# define lock_define(name) -# endif +# define lock_define(name) gl_lock_define (, name) # define lock_init(lock) glthread_lock_init (&(lock)) # define lock_fini(lock) glthread_lock_destroy (&(lock)) # define lock_lock(lock) glthread_lock_lock (&(lock)) @@ -100,6 +94,7 @@ __dcgettext (_libc_intl_domainname, msgid, LC_MESSAGES) # endif #else +# undef gettext # define gettext(msgid) (msgid) #endif @@ -113,12 +108,6 @@ # define RE_ENABLE_I18N #endif -#if __GNUC__ >= 3 -# define BE(expr, val) __builtin_expect (expr, val) -#else -# define BE(expr, val) (expr) -#endif - /* Number of ASCII characters. */ #define ASCII_CHARS 0x80 @@ -134,7 +123,10 @@ /* Rename to standard API for using out of glibc. */ #ifndef _LIBC # undef __wctype +# undef __iswalnum # undef __iswctype +# undef __towlower +# undef __towupper # define __wctype wctype # define __iswalnum iswalnum # define __iswctype iswctype @@ -144,40 +136,26 @@ # define __mbrtowc mbrtowc # define __wcrtomb wcrtomb # define __regfree regfree -# define attribute_hidden #endif /* not _LIBC */ -#if __GNUC__ < 3 + (__GNUC_MINOR__ < 1) -# define __attribute__(arg) +#ifndef SSIZE_MAX +# define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2)) #endif -typedef __re_idx_t Idx; +/* The type of indexes into strings. This is signed, not size_t, + since the API requires indexes to fit in regoff_t anyway, and using + signed integers makes the code a bit smaller and presumably faster. + The traditional GNU regex implementation uses int for indexes. + The POSIX-compatible implementation uses a possibly-wider type. + The name 'Idx' is three letters to minimize the hassle of + reindenting a lot of regex code that formerly used 'int'. */ +typedef regoff_t Idx; #ifdef _REGEX_LARGE_OFFSETS -# define IDX_MAX (SIZE_MAX - 2) +# define IDX_MAX SSIZE_MAX #else # define IDX_MAX INT_MAX #endif -/* Special return value for failure to match. */ -#define REG_MISSING ((Idx) -1) - -/* Special return value for internal error. */ -#define REG_ERROR ((Idx) -2) - -/* Test whether N is a valid index, and is not one of the above. */ -#ifdef _REGEX_LARGE_OFFSETS -# define REG_VALID_INDEX(n) ((Idx) (n) < REG_ERROR) -#else -# define REG_VALID_INDEX(n) (0 <= (n)) -#endif - -/* Test whether N is a valid nonzero index. */ -#ifdef _REGEX_LARGE_OFFSETS -# define REG_VALID_NONZERO_INDEX(n) ((Idx) ((n) - 1) < (Idx) (REG_ERROR - 1)) -#else -# define REG_VALID_NONZERO_INDEX(n) (0 < (n)) -#endif - /* A hash value, suitable for computing hash tables. */ typedef __re_size_t re_hashval_t; @@ -448,24 +426,9 @@ struct re_dfa_t; typedef struct re_dfa_t re_dfa_t; #ifndef _LIBC -# define internal_function # define IS_IN(libc) false #endif -static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, - Idx new_buf_len) - internal_function; -#ifdef RE_ENABLE_I18N -static void build_wcs_buffer (re_string_t *pstr) internal_function; -static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr) - internal_function; -#endif /* RE_ENABLE_I18N */ -static void build_upper_buffer (re_string_t *pstr) internal_function; -static void re_string_translate_buffer (re_string_t *pstr) internal_function; -static unsigned int re_string_context_at (const re_string_t *input, Idx idx, - int eflags) - internal_function __attribute__ ((pure)); - #define re_string_peek_byte(pstr, offset) \ ((pstr)->mbs[(pstr)->cur_idx + offset]) #define re_string_fetch_byte(pstr) \ @@ -647,11 +610,7 @@ typedef struct { /* The string object corresponding to the input string. */ re_string_t input; -#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) const re_dfa_t *const dfa; -#else - const re_dfa_t *dfa; -#endif /* EFLAGS of the argument of regexec. */ int eflags; /* Where the matching ends. */ @@ -771,31 +730,31 @@ typedef struct /* Functions for bitset_t operation. */ -static void +static inline void bitset_set (bitset_t set, Idx i) { set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS; } -static void +static inline void bitset_clear (bitset_t set, Idx i) { set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS); } -static bool +static inline bool bitset_contain (const bitset_t set, Idx i) { return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1; } -static void +static inline void bitset_empty (bitset_t set) { memset (set, '\0', sizeof (bitset_t)); } -static void +static inline void bitset_set_all (bitset_t set) { memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS)); @@ -804,13 +763,13 @@ bitset_set_all (bitset_t set) ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1; } -static void +static inline void bitset_copy (bitset_t dest, const bitset_t src) { memcpy (dest, src, sizeof (bitset_t)); } -static void __attribute__ ((unused)) +static inline void bitset_not (bitset_t set) { int bitset_i; @@ -822,7 +781,7 @@ bitset_not (bitset_t set) & ~set[BITSET_WORDS - 1]); } -static void __attribute__ ((unused)) +static inline void bitset_merge (bitset_t dest, const bitset_t src) { int bitset_i; @@ -830,7 +789,7 @@ bitset_merge (bitset_t dest, const bitset_t src) dest[bitset_i] |= src[bitset_i]; } -static void __attribute__ ((unused)) +static inline void bitset_mask (bitset_t dest, const bitset_t src) { int bitset_i; @@ -841,7 +800,7 @@ bitset_mask (bitset_t dest, const bitset_t src) #ifdef RE_ENABLE_I18N /* Functions for re_string. */ static int -internal_function __attribute__ ((pure, unused)) +__attribute__ ((pure, unused)) re_string_char_size_at (const re_string_t *pstr, Idx idx) { int byte_idx; @@ -854,7 +813,7 @@ re_string_char_size_at (const re_string_t *pstr, Idx idx) } static wint_t -internal_function __attribute__ ((pure, unused)) +__attribute__ ((pure, unused)) re_string_wchar_at (const re_string_t *pstr, Idx idx) { if (pstr->mb_cur_max == 1) @@ -867,7 +826,7 @@ re_string_wchar_at (const re_string_t *pstr, Idx idx) # endif static int -internal_function __attribute__ ((pure, unused)) +__attribute__ ((pure, unused)) re_string_elem_size_at (const re_string_t *pstr, Idx idx) { # ifdef _LIBC @@ -892,21 +851,12 @@ re_string_elem_size_at (const re_string_t *pstr, Idx idx) } #endif /* RE_ENABLE_I18N */ -#ifndef __GNUC_PREREQ -# if defined __GNUC__ && defined __GNUC_MINOR__ -# define __GNUC_PREREQ(maj, min) \ - ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#ifndef FALLTHROUGH +# if __GNUC__ < 7 +# define FALLTHROUGH ((void) 0) # else -# define __GNUC_PREREQ(maj, min) 0 +# define FALLTHROUGH __attribute__ ((__fallthrough__)) # endif #endif -#if __GNUC_PREREQ (3,4) -# undef __attribute_warn_unused_result__ -# define __attribute_warn_unused_result__ \ - __attribute__ ((__warn_unused_result__)) -#else -# define __attribute_warn_unused_result__ /* empty */ -#endif - #endif /* _REGEX_INTERNAL_H */ diff --git a/contrib/grep/lib/regexec.c b/contrib/grep/lib/regexec.c index 067a0315c4..395e37db59 100644 --- a/contrib/grep/lib/regexec.c +++ b/contrib/grep/lib/regexec.c @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -15,192 +15,162 @@ You should have received a copy of the GNU General Public License along with the GNU C Library; if not, see - . */ + . */ static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, - Idx n) internal_function; -static void match_ctx_clean (re_match_context_t *mctx) internal_function; -static void match_ctx_free (re_match_context_t *cache) internal_function; + Idx n); +static void match_ctx_clean (re_match_context_t *mctx); +static void match_ctx_free (re_match_context_t *cache); static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node, - Idx str_idx, Idx from, Idx to) - internal_function; -static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) - internal_function; + Idx str_idx, Idx from, Idx to); +static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx); static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node, - Idx str_idx) internal_function; + Idx str_idx); static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, - Idx node, Idx str_idx) - internal_function; + Idx node, Idx str_idx); static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, re_dfastate_t **limited_sts, Idx last_node, - Idx last_str_idx) - internal_function; + Idx last_str_idx); static reg_errcode_t re_search_internal (const regex_t *preg, const char *string, Idx length, Idx start, Idx last_start, Idx stop, size_t nmatch, regmatch_t pmatch[], - int eflags) internal_function; + int eflags); static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1, Idx length1, const char *string2, Idx length2, Idx start, regoff_t range, struct re_registers *regs, - Idx stop, bool ret_len) internal_function; + Idx stop, bool ret_len); static regoff_t re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length, Idx start, regoff_t range, Idx stop, struct re_registers *regs, - bool ret_len) internal_function; + bool ret_len); static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, - Idx nregs, int regs_allocated) internal_function; -static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) - internal_function; + Idx nregs, int regs_allocated); +static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx); static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, - Idx *p_match_first) internal_function; + Idx *p_match_first); static Idx check_halt_state_context (const re_match_context_t *mctx, - const re_dfastate_t *state, Idx idx) - internal_function; + const re_dfastate_t *state, Idx idx); static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match, Idx cur_node, - Idx cur_idx, Idx nmatch) internal_function; + Idx cur_idx, Idx nmatch); static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, Idx nregs, regmatch_t *regs, - re_node_set *eps_via_nodes) - internal_function; + re_node_set *eps_via_nodes); static reg_errcode_t set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, regmatch_t *pmatch, - bool fl_backtrack) internal_function; -static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) - internal_function; + bool fl_backtrack); +static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs); #ifdef RE_ENABLE_I18N static int sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, - Idx node_idx, Idx str_idx, Idx max_str_idx) - internal_function; + Idx node_idx, Idx str_idx, Idx max_str_idx); #endif /* RE_ENABLE_I18N */ static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, - re_sift_context_t *sctx) - internal_function; + re_sift_context_t *sctx); static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, - re_node_set *cur_dest) - internal_function; + re_node_set *cur_dest); static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, - re_node_set *dest_nodes) - internal_function; + re_node_set *dest_nodes); static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, - const re_node_set *candidates) - internal_function; + const re_node_set *candidates); static bool check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits, Idx dst_node, Idx dst_idx, Idx src_node, - Idx src_idx) internal_function; + Idx src_idx); static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, Idx subexp_idx, - Idx from_node, Idx bkref_idx) - internal_function; + Idx from_node, Idx bkref_idx); static int check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit, Idx subexp_idx, Idx node, Idx str_idx, - Idx bkref_idx) internal_function; + Idx bkref_idx); static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates, re_node_set *limits, struct re_backref_cache_entry *bkref_ents, - Idx str_idx) internal_function; + Idx str_idx); static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, - Idx str_idx, const re_node_set *candidates) - internal_function; + Idx str_idx, const re_node_set *candidates); static reg_errcode_t merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, - re_dfastate_t **src, Idx num) - internal_function; + re_dfastate_t **src, Idx num); static re_dfastate_t *find_recover_state (reg_errcode_t *err, - re_match_context_t *mctx) internal_function; + re_match_context_t *mctx); static re_dfastate_t *transit_state (reg_errcode_t *err, re_match_context_t *mctx, - re_dfastate_t *state) internal_function; + re_dfastate_t *state); static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, - re_dfastate_t *next_state) - internal_function; + re_dfastate_t *next_state); static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, - Idx str_idx) internal_function; + Idx str_idx); #if 0 static re_dfastate_t *transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, - re_dfastate_t *pstate) - internal_function; + re_dfastate_t *pstate); #endif #ifdef RE_ENABLE_I18N static reg_errcode_t transit_state_mb (re_match_context_t *mctx, - re_dfastate_t *pstate) - internal_function; + re_dfastate_t *pstate); #endif /* RE_ENABLE_I18N */ static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, - const re_node_set *nodes) - internal_function; + const re_node_set *nodes); static reg_errcode_t get_subexp (re_match_context_t *mctx, - Idx bkref_node, Idx bkref_str_idx) - internal_function; + Idx bkref_node, Idx bkref_str_idx); static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, re_sub_match_last_t *sub_last, - Idx bkref_node, Idx bkref_str) - internal_function; + Idx bkref_node, Idx bkref_str); static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, - Idx subexp_idx, int type) internal_function; + Idx subexp_idx, int type); static reg_errcode_t check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, Idx top_str, Idx last_node, Idx last_str, - int type) internal_function; + int type); static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, re_node_set *cur_nodes, - re_node_set *next_nodes) - internal_function; + re_node_set *next_nodes); static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, - Idx ex_subexp, int type) - internal_function; + Idx ex_subexp, int type); static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, Idx target, Idx ex_subexp, - int type) internal_function; + int type); static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, Idx cur_str, - Idx subexp_num, int type) - internal_function; -static bool build_trtable (const re_dfa_t *dfa, - re_dfastate_t *state) internal_function; + Idx subexp_num, int type); +static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state); #ifdef RE_ENABLE_I18N static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, - const re_string_t *input, Idx idx) - internal_function; + const re_string_t *input, Idx idx); # ifdef _LIBC static unsigned int find_collation_sequence_value (const unsigned char *mbs, - size_t name_len) - internal_function; + size_t name_len); # endif /* _LIBC */ #endif /* RE_ENABLE_I18N */ static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, re_node_set *states_node, - bitset_t *states_ch) internal_function; + bitset_t *states_ch); static bool check_node_accept (const re_match_context_t *mctx, - const re_token_t *node, Idx idx) - internal_function; -static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len) - internal_function; + const re_token_t *node, Idx idx); +static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len); /* Entry point for POSIX code. */ @@ -219,12 +189,8 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len) We return 0 if we find a match and REG_NOMATCH if not. */ int -regexec (preg, string, nmatch, pmatch, eflags) - const regex_t *_Restrict_ preg; - const char *_Restrict_ string; - size_t nmatch; - regmatch_t pmatch[_Restrict_arr_]; - int eflags; +regexec (const regex_t *__restrict preg, const char *__restrict string, + size_t nmatch, regmatch_t pmatch[], int eflags) { reg_errcode_t err; Idx start, length; @@ -256,6 +222,8 @@ regexec (preg, string, nmatch, pmatch, eflags) } #ifdef _LIBC +libc_hidden_def (__regexec) + # include versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); @@ -264,8 +232,8 @@ __typeof__ (__regexec) __compat_regexec; int attribute_compat_text_section -__compat_regexec (const regex_t *_Restrict_ preg, - const char *_Restrict_ string, size_t nmatch, +__compat_regexec (const regex_t *__restrict preg, + const char *__restrict string, size_t nmatch, regmatch_t pmatch[], int eflags) { return regexec (preg, string, nmatch, pmatch, @@ -305,11 +273,8 @@ compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); match was found and -2 indicates an internal error. */ regoff_t -re_match (bufp, string, length, start, regs) - struct re_pattern_buffer *bufp; - const char *string; - Idx length, start; - struct re_registers *regs; +re_match (struct re_pattern_buffer *bufp, const char *string, Idx length, + Idx start, struct re_registers *regs) { return re_search_stub (bufp, string, length, start, 0, length, regs, true); } @@ -318,12 +283,8 @@ weak_alias (__re_match, re_match) #endif regoff_t -re_search (bufp, string, length, start, range, regs) - struct re_pattern_buffer *bufp; - const char *string; - Idx length, start; - regoff_t range; - struct re_registers *regs; +re_search (struct re_pattern_buffer *bufp, const char *string, Idx length, + Idx start, regoff_t range, struct re_registers *regs) { return re_search_stub (bufp, string, length, start, range, length, regs, false); @@ -333,11 +294,9 @@ weak_alias (__re_search, re_search) #endif regoff_t -re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - Idx length1, length2, start, stop; - struct re_registers *regs; +re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1, + const char *string2, Idx length2, Idx start, + struct re_registers *regs, Idx stop) { return re_search_2_stub (bufp, string1, length1, string2, length2, start, 0, regs, stop, true); @@ -347,12 +306,9 @@ weak_alias (__re_match_2, re_match_2) #endif regoff_t -re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - Idx length1, length2, start, stop; - regoff_t range; - struct re_registers *regs; +re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1, + const char *string2, Idx length2, Idx start, regoff_t range, + struct re_registers *regs, Idx stop) { return re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, stop, false); @@ -362,18 +318,18 @@ weak_alias (__re_search_2, re_search_2) #endif static regoff_t -re_search_2_stub (struct re_pattern_buffer *bufp, - const char *string1, Idx length1, - const char *string2, Idx length2, - Idx start, regoff_t range, struct re_registers *regs, +re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1, + Idx length1, const char *string2, Idx length2, Idx start, + regoff_t range, struct re_registers *regs, Idx stop, bool ret_len) { const char *str; regoff_t rval; - Idx len = length1 + length2; + Idx len; char *s = NULL; - if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0)) + if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0 + || INT_ADD_WRAPV (length1, length2, &len)))) return -2; /* Concatenate the strings. */ @@ -382,7 +338,7 @@ re_search_2_stub (struct re_pattern_buffer *bufp, { s = re_malloc (char, len); - if (BE (s == NULL, 0)) + if (__glibc_unlikely (s == NULL)) return -2; #ifdef _LIBC memcpy (__mempcpy (s, string1, length1), string2, length2); @@ -409,8 +365,7 @@ re_search_2_stub (struct re_pattern_buffer *bufp, otherwise the position of the match is returned. */ static regoff_t -re_search_stub (struct re_pattern_buffer *bufp, - const char *string, Idx length, +re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length, Idx start, regoff_t range, Idx stop, struct re_registers *regs, bool ret_len) { @@ -423,11 +378,13 @@ re_search_stub (struct re_pattern_buffer *bufp, Idx last_start = start + range; /* Check for out-of-range. */ - if (BE (start < 0 || start > length, 0)) + if (__glibc_unlikely (start < 0 || start > length)) return -1; - if (BE (length < last_start || (0 <= range && last_start < start), 0)) + if (__glibc_unlikely (length < last_start + || (0 <= range && last_start < start))) last_start = length; - else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0)) + else if (__glibc_unlikely (last_start < 0 + || (range < 0 && start <= last_start))) last_start = 0; lock_lock (dfa->lock); @@ -439,17 +396,17 @@ re_search_stub (struct re_pattern_buffer *bufp, if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate) re_compile_fastmap (bufp); - if (BE (bufp->no_sub, 0)) + if (__glibc_unlikely (bufp->no_sub)) regs = NULL; /* We need at least 1 register. */ if (regs == NULL) nregs = 1; - else if (BE (bufp->regs_allocated == REGS_FIXED - && regs->num_regs <= bufp->re_nsub, 0)) + else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED + && regs->num_regs <= bufp->re_nsub)) { nregs = regs->num_regs; - if (BE (nregs < 1, 0)) + if (__glibc_unlikely (nregs < 1)) { /* Nothing can be copied to regs. */ regs = NULL; @@ -459,7 +416,7 @@ re_search_stub (struct re_pattern_buffer *bufp, else nregs = bufp->re_nsub + 1; pmatch = re_malloc (regmatch_t, nregs); - if (BE (pmatch == NULL, 0)) + if (__glibc_unlikely (pmatch == NULL)) { rval = -2; goto out; @@ -478,15 +435,15 @@ re_search_stub (struct re_pattern_buffer *bufp, /* If caller wants register contents data back, copy them. */ bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, bufp->regs_allocated); - if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) + if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED)) rval = -2; } - if (BE (rval == 0, 1)) + if (__glibc_likely (rval == 0)) { if (ret_len) { - assert (pmatch[0].rm_so == start); + DEBUG_ASSERT (pmatch[0].rm_so == start); rval = pmatch[0].rm_eo - start; } else @@ -512,10 +469,10 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, if (regs_allocated == REGS_UNALLOCATED) { /* No. So allocate them with malloc. */ regs->start = re_malloc (regoff_t, need_regs); - if (BE (regs->start == NULL, 0)) + if (__glibc_unlikely (regs->start == NULL)) return REGS_UNALLOCATED; regs->end = re_malloc (regoff_t, need_regs); - if (BE (regs->end == NULL, 0)) + if (__glibc_unlikely (regs->end == NULL)) { re_free (regs->start); return REGS_UNALLOCATED; @@ -526,14 +483,14 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, { /* Yes. If we need more elements than were already allocated, reallocate them. If we need fewer, just leave it alone. */ - if (BE (need_regs > regs->num_regs, 0)) + if (__glibc_unlikely (need_regs > regs->num_regs)) { regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); regoff_t *new_end; - if (BE (new_start == NULL, 0)) + if (__glibc_unlikely (new_start == NULL)) return REGS_UNALLOCATED; new_end = re_realloc (regs->end, regoff_t, need_regs); - if (BE (new_end == NULL, 0)) + if (__glibc_unlikely (new_end == NULL)) { re_free (new_start); return REGS_UNALLOCATED; @@ -545,9 +502,9 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, } else { - assert (regs_allocated == REGS_FIXED); + DEBUG_ASSERT (regs_allocated == REGS_FIXED); /* This function may not be called with REGS_FIXED and nregs too big. */ - assert (regs->num_regs >= nregs); + DEBUG_ASSERT (nregs <= regs->num_regs); rval = REGS_FIXED; } @@ -577,11 +534,8 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, freeing the old data. */ void -re_set_registers (bufp, regs, num_regs, starts, ends) - struct re_pattern_buffer *bufp; - struct re_registers *regs; - __re_size_t num_regs; - regoff_t *starts, *ends; +re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, + __re_size_t num_regs, regoff_t *starts, regoff_t *ends) { if (num_regs) { @@ -609,8 +563,7 @@ int # ifdef _LIBC weak_function # endif -re_exec (s) - const char *s; +re_exec (const char *s) { return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); } @@ -629,11 +582,9 @@ re_exec (s) static reg_errcode_t __attribute_warn_unused_result__ -re_search_internal (const regex_t *preg, - const char *string, Idx length, - Idx start, Idx last_start, Idx stop, - size_t nmatch, regmatch_t pmatch[], - int eflags) +re_search_internal (const regex_t *preg, const char *string, Idx length, + Idx start, Idx last_start, Idx stop, size_t nmatch, + regmatch_t pmatch[], int eflags) { reg_errcode_t err; const re_dfa_t *dfa = preg->buffer; @@ -642,38 +593,28 @@ re_search_internal (const regex_t *preg, bool fl_longest_match; int match_kind; Idx match_first; - Idx match_last = REG_MISSING; + Idx match_last = -1; Idx extra_nmatch; bool sb; int ch; -#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) re_match_context_t mctx = { .dfa = dfa }; -#else - re_match_context_t mctx; -#endif char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate && start != last_start && !preg->can_be_null) ? preg->fastmap : NULL); RE_TRANSLATE_TYPE t = preg->translate; -#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) - memset (&mctx, '\0', sizeof (re_match_context_t)); - mctx.dfa = dfa; -#endif - extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; nmatch -= extra_nmatch; /* Check if the DFA haven't been compiled. */ - if (BE (preg->used == 0 || dfa->init_state == NULL - || dfa->init_state_word == NULL || dfa->init_state_nl == NULL - || dfa->init_state_begbuf == NULL, 0)) + if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL + || dfa->init_state_word == NULL + || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL)) return REG_NOMATCH; -#ifdef DEBUG /* We assume front-end functions already check them. */ - assert (0 <= last_start && last_start <= length); -#endif + DEBUG_ASSERT (0 <= last_start && last_start <= length); /* If initial states with non-begbuf contexts have no elements, the regex must be anchored. If preg->newline_anchor is set, @@ -694,14 +635,14 @@ re_search_internal (const regex_t *preg, err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, preg->translate, (preg->syntax & RE_ICASE) != 0, dfa); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; mctx.input.stop = stop; mctx.input.raw_stop = stop; mctx.input.newline_anchor = preg->newline_anchor; err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; /* We will log all the DFA states through which the dfa pass, @@ -711,22 +652,20 @@ re_search_internal (const regex_t *preg, if (nmatch > 1 || dfa->has_mb_node) { /* Avoid overflow. */ - if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) - <= mctx.input.bufs_len), 0)) + if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) + <= mctx.input.bufs_len))) { err = REG_ESPACE; goto free_return; } mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); - if (BE (mctx.state_log == NULL, 0)) + if (__glibc_unlikely (mctx.state_log == NULL)) { err = REG_ESPACE; goto free_return; } } - else - mctx.state_log = NULL; match_first = start; mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF @@ -763,19 +702,19 @@ re_search_internal (const regex_t *preg, case 7: /* Fastmap with single-byte translation, match forward. */ - while (BE (match_first < right_lim, 1) + while (__glibc_likely (match_first < right_lim) && !fastmap[t[(unsigned char) string[match_first]]]) ++match_first; goto forward_match_found_start_or_reached_end; case 6: /* Fastmap without translation, match forward. */ - while (BE (match_first < right_lim, 1) + while (__glibc_likely (match_first < right_lim) && !fastmap[(unsigned char) string[match_first]]) ++match_first; forward_match_found_start_or_reached_end: - if (BE (match_first == right_lim, 0)) + if (__glibc_unlikely (match_first == right_lim)) { ch = match_first >= length ? 0 : (unsigned char) string[match_first]; @@ -808,11 +747,12 @@ re_search_internal (const regex_t *preg, /* If MATCH_FIRST is out of the valid range, reconstruct the buffers. */ __re_size_t offset = match_first - mctx.input.raw_mbs_idx; - if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0)) + if (__glibc_unlikely (offset + >= (__re_size_t) mctx.input.valid_raw_len)) { err = re_string_reconstruct (&mctx.input, match_first, eflags); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; offset = match_first - mctx.input.raw_mbs_idx; @@ -836,7 +776,7 @@ re_search_internal (const regex_t *preg, /* Reconstruct the buffers so that the matcher can assume that the matching starts from the beginning of the buffer. */ err = re_string_reconstruct (&mctx.input, match_first, eflags); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; #ifdef RE_ENABLE_I18N @@ -851,9 +791,9 @@ re_search_internal (const regex_t *preg, mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; match_last = check_matching (&mctx, fl_longest_match, start <= last_start ? &match_first : NULL); - if (match_last != REG_MISSING) + if (match_last != -1) { - if (BE (match_last == REG_ERROR, 0)) + if (__glibc_unlikely (match_last == -2)) { err = REG_ESPACE; goto free_return; @@ -873,9 +813,9 @@ re_search_internal (const regex_t *preg, err = prune_impossible_nodes (&mctx); if (err == REG_NOERROR) break; - if (BE (err != REG_NOMATCH, 0)) + if (__glibc_unlikely (err != REG_NOMATCH)) goto free_return; - match_last = REG_MISSING; + match_last = -1; } else break; /* We found a match. */ @@ -885,10 +825,8 @@ re_search_internal (const regex_t *preg, match_ctx_clean (&mctx); } -#ifdef DEBUG - assert (match_last != REG_MISSING); - assert (err == REG_NOERROR); -#endif + DEBUG_ASSERT (match_last != -1); + DEBUG_ASSERT (err == REG_NOERROR); /* Set pmatch[] if we need. */ if (nmatch > 0) @@ -910,7 +848,7 @@ re_search_internal (const regex_t *preg, { err = set_regs (preg, &mctx, nmatch, pmatch, dfa->has_plural_match && dfa->nbackref > 0); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; } @@ -921,7 +859,7 @@ re_search_internal (const regex_t *preg, if (pmatch[reg_idx].rm_so != -1) { #ifdef RE_ENABLE_I18N - if (BE (mctx.input.offsets_needed != 0, 0)) + if (__glibc_unlikely (mctx.input.offsets_needed != 0)) { pmatch[reg_idx].rm_so = (pmatch[reg_idx].rm_so == mctx.input.valid_len @@ -933,7 +871,7 @@ re_search_internal (const regex_t *preg, : mctx.input.offsets[pmatch[reg_idx].rm_eo]); } #else - assert (mctx.input.offsets_needed == 0); + DEBUG_ASSERT (mctx.input.offsets_needed == 0); #endif pmatch[reg_idx].rm_so += match_first; pmatch[reg_idx].rm_eo += match_first; @@ -973,18 +911,17 @@ prune_impossible_nodes (re_match_context_t *mctx) re_dfastate_t **sifted_states; re_dfastate_t **lim_states = NULL; re_sift_context_t sctx; -#ifdef DEBUG - assert (mctx->state_log != NULL); -#endif + DEBUG_ASSERT (mctx->state_log != NULL); match_last = mctx->match_last; halt_node = mctx->last_node; /* Avoid overflow. */ - if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0)) + if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) + <= match_last)) return REG_ESPACE; sifted_states = re_malloc (re_dfastate_t *, match_last + 1); - if (BE (sifted_states == NULL, 0)) + if (__glibc_unlikely (sifted_states == NULL)) { ret = REG_ESPACE; goto free_return; @@ -992,7 +929,7 @@ prune_impossible_nodes (re_match_context_t *mctx) if (dfa->nbackref) { lim_states = re_malloc (re_dfastate_t *, match_last + 1); - if (BE (lim_states == NULL, 0)) + if (__glibc_unlikely (lim_states == NULL)) { ret = REG_ESPACE; goto free_return; @@ -1005,14 +942,14 @@ prune_impossible_nodes (re_match_context_t *mctx) match_last); ret = sift_states_backward (mctx, &sctx); re_node_set_free (&sctx.limits); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) goto free_return; if (sifted_states[0] != NULL || lim_states[0] != NULL) break; do { --match_last; - if (! REG_VALID_INDEX (match_last)) + if (match_last < 0) { ret = REG_NOMATCH; goto free_return; @@ -1027,7 +964,7 @@ prune_impossible_nodes (re_match_context_t *mctx) match_last + 1); re_free (lim_states); lim_states = NULL; - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) goto free_return; } else @@ -1035,7 +972,7 @@ prune_impossible_nodes (re_match_context_t *mctx) sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); ret = sift_states_backward (mctx, &sctx); re_node_set_free (&sctx.limits); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) goto free_return; if (sifted_states[0] == NULL) { @@ -1060,7 +997,7 @@ prune_impossible_nodes (re_match_context_t *mctx) since initial states may have constraints like "\<", "^", etc.. */ static inline re_dfastate_t * -__attribute__ ((always_inline)) internal_function +__attribute__ ((always_inline)) acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, Idx idx) { @@ -1093,8 +1030,8 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, } /* Check whether the regular expression match input string INPUT or not, - and return the index where the matching end. Return REG_MISSING if - there is no match, and return REG_ERROR in case of an error. + and return the index where the matching end. Return -1 if + there is no match, and return -2 in case of an error. FL_LONGEST_MATCH means we want the POSIX longest matching. If P_MATCH_FIRST is not NULL, and the match fails, it is set to the next place where we may want to try matching. @@ -1102,14 +1039,14 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, index of the buffer. */ static Idx -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ check_matching (re_match_context_t *mctx, bool fl_longest_match, Idx *p_match_first) { const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; Idx match = 0; - Idx match_last = REG_MISSING; + Idx match_last = -1; Idx cur_str_idx = re_string_cur_idx (&mctx->input); re_dfastate_t *cur_state; bool at_init_state = p_match_first != NULL; @@ -1118,10 +1055,10 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, err = REG_NOERROR; cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); /* An initial state must not be NULL (invalid). */ - if (BE (cur_state == NULL, 0)) + if (__glibc_unlikely (cur_state == NULL)) { - assert (err == REG_ESPACE); - return REG_ERROR; + DEBUG_ASSERT (err == REG_ESPACE); + return -2; } if (mctx->state_log != NULL) @@ -1130,24 +1067,24 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, /* Check OP_OPEN_SUBEXP in the initial state in case that we use them later. E.g. Processing back references. */ - if (BE (dfa->nbackref, 0)) + if (__glibc_unlikely (dfa->nbackref)) { at_init_state = false; err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; if (cur_state->has_backref) { err = transit_state_bkref (mctx, &cur_state->nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } } } /* If the RE accepts NULL string. */ - if (BE (cur_state->halt, 0)) + if (__glibc_unlikely (cur_state->halt)) { if (!cur_state->has_constraint || check_halt_state_context (mctx, cur_state, cur_str_idx)) @@ -1167,16 +1104,16 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, re_dfastate_t *old_state = cur_state; Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; - if ((BE (next_char_idx >= mctx->input.bufs_len, 0) + if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len) && mctx->input.bufs_len < mctx->input.len) - || (BE (next_char_idx >= mctx->input.valid_len, 0) + || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len) && mctx->input.valid_len < mctx->input.len)) { err = extend_buffers (mctx, next_char_idx + 1); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { - assert (err == REG_ESPACE); - return REG_ERROR; + DEBUG_ASSERT (err == REG_ESPACE); + return -2; } } @@ -1189,8 +1126,8 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, /* Reached the invalid state or an error. Try to recover a valid state using the state log, if available and if we have not already found a valid (even if not the longest) match. */ - if (BE (err != REG_NOERROR, 0)) - return REG_ERROR; + if (__glibc_unlikely (err != REG_NOERROR)) + return -2; if (mctx->state_log == NULL || (match && !fl_longest_match) @@ -1198,7 +1135,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, break; } - if (BE (at_init_state, 0)) + if (__glibc_unlikely (at_init_state)) { if (old_state == cur_state) next_start_idx = next_char_idx; @@ -1235,7 +1172,6 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, /* Check NODE match the current context. */ static bool -internal_function check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context) { re_token_type_t type = dfa->nodes[node].type; @@ -1254,15 +1190,12 @@ check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context) match the context, return the node. */ static Idx -internal_function check_halt_state_context (const re_match_context_t *mctx, const re_dfastate_t *state, Idx idx) { Idx i; unsigned int context; -#ifdef DEBUG - assert (state->halt); -#endif + DEBUG_ASSERT (state->halt); context = re_string_context_at (&mctx->input, idx, mctx->eflags); for (i = 0; i < state->nodes.nelem; ++i) if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) @@ -1273,10 +1206,9 @@ check_halt_state_context (const re_match_context_t *mctx, /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA corresponding to the DFA). Return the destination node, and update EPS_VIA_NODES; - return REG_MISSING in case of errors. */ + return -1 in case of errors. */ static Idx -internal_function proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, Idx *pidx, Idx node, re_node_set *eps_via_nodes, struct re_fail_stack_t *fs) @@ -1290,16 +1222,16 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, re_node_set *edests = &dfa->edests[node]; Idx dest_node; ok = re_node_set_insert (eps_via_nodes, node); - if (BE (! ok, 0)) - return REG_ERROR; - /* Pick up a valid destination, or return REG_MISSING if none + if (__glibc_unlikely (! ok)) + return -2; + /* Pick up a valid destination, or return -1 if none is found. */ - for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i) + for (dest_node = -1, i = 0; i < edests->nelem; ++i) { Idx candidate = edests->elems[i]; if (!re_node_set_contains (cur_nodes, candidate)) continue; - if (dest_node == REG_MISSING) + if (dest_node == -1) dest_node = candidate; else @@ -1313,7 +1245,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, else if (fs != NULL && push_fail_stack (fs, *pidx, candidate, nregs, regs, eps_via_nodes)) - return REG_ERROR; + return -2; /* We know we are going to exit. */ break; @@ -1334,17 +1266,22 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, if (type == OP_BACK_REF) { Idx subexp_idx = dfa->nodes[node].opr.idx + 1; - naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; + if (subexp_idx < nregs) + naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; if (fs != NULL) { - if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) - return REG_MISSING; + if (subexp_idx >= nregs + || regs[subexp_idx].rm_so == -1 + || regs[subexp_idx].rm_eo == -1) + return -1; else if (naccepted) { char *buf = (char *) re_string_get_buffer (&mctx->input); - if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, - naccepted) != 0) - return REG_MISSING; + if (mctx->input.valid_len - *pidx < naccepted + || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, + naccepted) + != 0)) + return -1; } } @@ -1352,8 +1289,8 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, { Idx dest_node; ok = re_node_set_insert (eps_via_nodes, node); - if (BE (! ok, 0)) - return REG_ERROR; + if (__glibc_unlikely (! ok)) + return -2; dest_node = dfa->edests[node].elems[0]; if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, dest_node)) @@ -1369,16 +1306,16 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, dest_node))) - return REG_MISSING; + return -1; re_node_set_empty (eps_via_nodes); return dest_node; } } - return REG_MISSING; + return -1; } static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) { @@ -1387,8 +1324,8 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, if (fs->num == fs->alloc) { struct re_fail_stack_ent_t *new_array; - new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) - * fs->alloc * 2)); + new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t, + fs->alloc * 2); if (new_array == NULL) return REG_ESPACE; fs->alloc *= 2; @@ -1405,12 +1342,11 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, } static Idx -internal_function pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) { Idx num = --fs->num; - assert (REG_VALID_INDEX (num)); + DEBUG_ASSERT (num >= 0); *pidx = fs->stack[num].idx; memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); re_node_set_free (eps_via_nodes); @@ -1425,7 +1361,7 @@ pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, regmatch_t *pmatch, bool fl_backtrack) { @@ -1437,10 +1373,8 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, regmatch_t *prev_idx_match; bool prev_idx_match_malloced = false; -#ifdef DEBUG - assert (nmatch > 1); - assert (mctx->state_log != NULL); -#endif + DEBUG_ASSERT (nmatch > 1); + DEBUG_ASSERT (mctx->state_log != NULL); if (fl_backtrack) { fs = &fs_body; @@ -1503,9 +1437,9 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, &eps_via_nodes, fs); - if (BE (! REG_VALID_INDEX (cur_node), 0)) + if (__glibc_unlikely (cur_node < 0)) { - if (BE (cur_node == REG_ERROR, 0)) + if (__glibc_unlikely (cur_node == -2)) { re_node_set_free (&eps_via_nodes); if (prev_idx_match_malloced) @@ -1532,7 +1466,6 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, } static reg_errcode_t -internal_function free_fail_stack_return (struct re_fail_stack_t *fs) { if (fs) @@ -1549,7 +1482,6 @@ free_fail_stack_return (struct re_fail_stack_t *fs) } static void -internal_function update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch) { @@ -1621,7 +1553,6 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) static reg_errcode_t -internal_function sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) { reg_errcode_t err; @@ -1629,17 +1560,15 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) Idx str_idx = sctx->last_str_idx; re_node_set cur_dest; -#ifdef DEBUG - assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); -#endif + DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); /* Build sifted state_log[str_idx]. It has the nodes which can epsilon transit to the last_node and the last_node itself. */ err = re_node_set_init_1 (&cur_dest, sctx->last_node); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; /* Then check each states in the state_log. */ @@ -1660,7 +1589,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) if (mctx->state_log[str_idx]) { err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; } @@ -1669,7 +1598,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) - It is in CUR_SRC. And update state_log. */ err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; } err = REG_NOERROR; @@ -1679,7 +1608,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) } static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, re_node_set *cur_dest) { @@ -1699,11 +1628,8 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx prev_node = cur_src->elems[i]; int naccepted = 0; bool ok; + DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type)); -#ifdef DEBUG - re_token_type_t type = dfa->nodes[prev_node].type; - assert (!IS_EPSILON_NODE (type)); -#endif #ifdef RE_ENABLE_I18N /* If the node may accept "multi byte". */ if (dfa->nodes[prev_node].accept_mb) @@ -1731,7 +1657,7 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, continue; } ok = re_node_set_insert (cur_dest, prev_node); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) return REG_ESPACE; } @@ -1741,7 +1667,6 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, /* Helper functions. */ static reg_errcode_t -internal_function clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) { Idx top = mctx->state_log_top; @@ -1753,7 +1678,7 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) { reg_errcode_t err; err = extend_buffers (mctx, next_state_log_idx + 1); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } @@ -1767,7 +1692,6 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) } static reg_errcode_t -internal_function merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, re_dfastate_t **src, Idx num) { @@ -1782,11 +1706,11 @@ merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, re_node_set merged_set; err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, &src[st_idx]->nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); re_node_set_free (&merged_set); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } } @@ -1794,7 +1718,6 @@ merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, } static reg_errcode_t -internal_function update_cur_sifted_state (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, re_node_set *dest_nodes) @@ -1814,7 +1737,7 @@ update_cur_sifted_state (const re_match_context_t *mctx, /* At first, add the nodes which can epsilon transit to a node in DEST_NODE. */ err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; /* Then, check the limitations in the current sift_context. */ @@ -1822,27 +1745,27 @@ update_cur_sifted_state (const re_match_context_t *mctx, { err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, mctx->bkref_ents, str_idx); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } } sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } if (candidates && mctx->state_log[str_idx]->has_backref) { err = sift_states_bkref (mctx, sctx, str_idx, candidates); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } return REG_NOERROR; } static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates) { @@ -1850,19 +1773,19 @@ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, Idx i; re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; if (!state->inveclosure.alloc) { err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return REG_ESPACE; for (i = 0; i < dest_nodes->nelem; i++) { err = re_node_set_merge (&state->inveclosure, dfa->inveclosures + dest_nodes->elems[i]); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return REG_ESPACE; } } @@ -1871,7 +1794,6 @@ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, } static reg_errcode_t -internal_function sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, const re_node_set *candidates) { @@ -1889,16 +1811,16 @@ sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, { Idx edst1 = dfa->edests[cur_node].elems[0]; Idx edst2 = ((dfa->edests[cur_node].nelem > 1) - ? dfa->edests[cur_node].elems[1] : REG_MISSING); + ? dfa->edests[cur_node].elems[1] : -1); if ((!re_node_set_contains (inv_eclosure, edst1) && re_node_set_contains (dest_nodes, edst1)) - || (REG_VALID_NONZERO_INDEX (edst2) + || (edst2 > 0 && !re_node_set_contains (inv_eclosure, edst2) && re_node_set_contains (dest_nodes, edst2))) { err = re_node_set_add_intersect (&except_nodes, candidates, dfa->inveclosures + cur_node); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_node_set_free (&except_nodes); return err; @@ -1920,7 +1842,6 @@ sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, } static bool -internal_function check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits, Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx) { @@ -1956,7 +1877,6 @@ check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits, } static int -internal_function check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, Idx subexp_idx, Idx from_node, Idx bkref_idx) { @@ -1972,7 +1892,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, switch (dfa->nodes[node].type) { case OP_BACK_REF: - if (bkref_idx != REG_MISSING) + if (bkref_idx != -1) { struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; do @@ -2038,7 +1958,6 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, } static int -internal_function check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit, Idx subexp_idx, Idx from_node, Idx str_idx, Idx bkref_idx) @@ -2068,7 +1987,6 @@ check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit, which are against limitations from DEST_NODES. */ static reg_errcode_t -internal_function check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates, re_node_set *limits, struct re_backref_cache_entry *bkref_ents, Idx str_idx) @@ -2088,8 +2006,8 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, subexp_idx = dfa->nodes[ent->node].opr.idx; if (ent->subexp_to == str_idx) { - Idx ops_node = REG_MISSING; - Idx cls_node = REG_MISSING; + Idx ops_node = -1; + Idx cls_node = -1; for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) { Idx node = dest_nodes->elems[node_idx]; @@ -2104,16 +2022,16 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, /* Check the limitation of the open subexpression. */ /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ - if (REG_VALID_INDEX (ops_node)) + if (ops_node >= 0) { err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, candidates); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } /* Check the limitation of the close subexpression. */ - if (REG_VALID_INDEX (cls_node)) + if (cls_node >= 0) for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) { Idx node = dest_nodes->elems[node_idx]; @@ -2126,7 +2044,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, Remove it form the current sifted state. */ err = sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; --node_idx; } @@ -2146,7 +2064,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, Remove it form the current sifted state. */ err = sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } } @@ -2156,7 +2074,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, } static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, const re_node_set *candidates) { @@ -2166,7 +2084,7 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, re_sift_context_t local_sctx; Idx first_idx = search_cur_bkref_entry (mctx, str_idx); - if (first_idx == REG_MISSING) + if (first_idx == -1) return REG_NOERROR; local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ @@ -2212,27 +2130,27 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, { local_sctx = *sctx; err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; } local_sctx.last_node = node; local_sctx.last_str_idx = str_idx; ok = re_node_set_insert (&local_sctx.limits, enabled_idx); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) { err = REG_ESPACE; goto free_return; } cur_state = local_sctx.sifted_states[str_idx]; err = sift_states_backward (mctx, &local_sctx); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; if (sctx->limited_states != NULL) { err = merge_state_array (dfa, sctx->limited_states, local_sctx.sifted_states, str_idx + 1); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; } local_sctx.sifted_states[str_idx] = cur_state; @@ -2256,7 +2174,6 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, #ifdef RE_ENABLE_I18N static int -internal_function sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx node_idx, Idx str_idx, Idx max_str_idx) { @@ -2264,12 +2181,12 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, int naccepted; /* Check the node can accept "multi byte". */ naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); - if (naccepted > 0 && str_idx + naccepted <= max_str_idx && - !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], - dfa->nexts[node_idx])) + if (naccepted > 0 && str_idx + naccepted <= max_str_idx + && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], + dfa->nexts[node_idx])) /* The node can't accept the "multi byte", or the destination was already thrown away, then the node - could't accept the current input "multi byte". */ + couldn't accept the current input "multi byte". */ naccepted = 0; /* Otherwise, it is sure that the node could accept 'naccepted' bytes input. */ @@ -2286,7 +2203,7 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, update the destination of STATE_LOG. */ static re_dfastate_t * -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ transit_state (reg_errcode_t *err, re_match_context_t *mctx, re_dfastate_t *state) { @@ -2295,10 +2212,10 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, #ifdef RE_ENABLE_I18N /* If the current state can accept multibyte. */ - if (BE (state->accept_mb, 0)) + if (__glibc_unlikely (state->accept_mb)) { *err = transit_state_mb (mctx, state); - if (BE (*err != REG_NOERROR, 0)) + if (__glibc_unlikely (*err != REG_NOERROR)) return NULL; } #endif /* RE_ENABLE_I18N */ @@ -2315,11 +2232,11 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, for (;;) { trtable = state->trtable; - if (BE (trtable != NULL, 1)) + if (__glibc_likely (trtable != NULL)) return trtable[ch]; trtable = state->word_trtable; - if (BE (trtable != NULL, 1)) + if (__glibc_likely (trtable != NULL)) { unsigned int context; context @@ -2344,7 +2261,6 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, /* Update the state_log if we need */ static re_dfastate_t * -internal_function merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, re_dfastate_t *next_state) { @@ -2376,7 +2292,7 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, table_nodes = next_state->entrance_nodes; *err = re_node_set_init_union (&next_nodes, table_nodes, log_nodes); - if (BE (*err != REG_NOERROR, 0)) + if (__glibc_unlikely (*err != REG_NOERROR)) return NULL; } else @@ -2396,21 +2312,21 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, re_node_set_free (&next_nodes); } - if (BE (dfa->nbackref, 0) && next_state != NULL) + if (__glibc_unlikely (dfa->nbackref) && next_state != NULL) { /* Check OP_OPEN_SUBEXP in the current state in case that we use them later. We must check them here, since the back references in the next state might use them. */ *err = check_subexp_matching_top (mctx, &next_state->nodes, cur_idx); - if (BE (*err != REG_NOERROR, 0)) + if (__glibc_unlikely (*err != REG_NOERROR)) return NULL; /* If the next state has back references. */ if (next_state->has_backref) { *err = transit_state_bkref (mctx, &next_state->nodes); - if (BE (*err != REG_NOERROR, 0)) + if (__glibc_unlikely (*err != REG_NOERROR)) return NULL; next_state = mctx->state_log[cur_idx]; } @@ -2423,7 +2339,6 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, multi-byte match, then look in the log for a state from which to restart matching. */ static re_dfastate_t * -internal_function find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) { re_dfastate_t *cur_state; @@ -2454,7 +2369,6 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) corresponding back references. */ static reg_errcode_t -internal_function check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, Idx str_idx) { @@ -2476,7 +2390,7 @@ check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) { err = match_ctx_add_subtop (mctx, node, str_idx); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } } @@ -2498,7 +2412,7 @@ transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, unsigned int context; *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); - if (BE (*err != REG_NOERROR, 0)) + if (__glibc_unlikely (*err != REG_NOERROR)) return NULL; for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) { @@ -2507,7 +2421,7 @@ transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, { *err = re_node_set_merge (&next_nodes, dfa->eclosures + dfa->nexts[cur_node]); - if (BE (*err != REG_NOERROR, 0)) + if (__glibc_unlikely (*err != REG_NOERROR)) { re_node_set_free (&next_nodes); return NULL; @@ -2527,7 +2441,6 @@ transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, #ifdef RE_ENABLE_I18N static reg_errcode_t -internal_function transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) { const re_dfa_t *const dfa = mctx->dfa; @@ -2567,11 +2480,9 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted : mctx->max_mb_elem_len); err = clean_state_log_if_needed (mctx, dest_idx); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; -#ifdef DEBUG - assert (dfa->nexts[cur_node_idx] != REG_MISSING); -#endif + DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1); new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; dest_state = mctx->state_log[dest_idx]; @@ -2581,7 +2492,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) { err = re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes, new_nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } context = re_string_context_at (&mctx->input, dest_idx - 1, @@ -2590,7 +2501,8 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) = re_acquire_state_context (&err, dfa, &dest_nodes, context); if (dest_state != NULL) re_node_set_free (&dest_nodes); - if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) + if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL + && err != REG_NOERROR)) return err; } return REG_NOERROR; @@ -2598,7 +2510,6 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) #endif /* RE_ENABLE_I18N */ static reg_errcode_t -internal_function transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) { const re_dfa_t *const dfa = mctx->dfa; @@ -2630,14 +2541,12 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) Check the substring which the substring matched. */ bkc_idx = mctx->nbkref_ents; err = get_subexp (mctx, node_idx, cur_str_idx); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; /* And add the epsilon closures (which is 'new_dest_nodes') of the backreference to appropriate state_log. */ -#ifdef DEBUG - assert (dfa->nexts[node_idx] != REG_MISSING); -#endif + DEBUG_ASSERT (dfa->nexts[node_idx] != -1); for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) { Idx subexp_len; @@ -2663,8 +2572,8 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) mctx->state_log[dest_str_idx] = re_acquire_state_context (&err, dfa, new_dest_nodes, context); - if (BE (mctx->state_log[dest_str_idx] == NULL - && err != REG_NOERROR, 0)) + if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR)) goto free_return; } else @@ -2673,7 +2582,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) err = re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes, new_dest_nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_node_set_free (&dest_nodes); goto free_return; @@ -2681,8 +2590,8 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) mctx->state_log[dest_str_idx] = re_acquire_state_context (&err, dfa, &dest_nodes, context); re_node_set_free (&dest_nodes); - if (BE (mctx->state_log[dest_str_idx] == NULL - && err != REG_NOERROR, 0)) + if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR)) goto free_return; } /* We need to check recursively if the backreference can epsilon @@ -2692,10 +2601,10 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) { err = check_subexp_matching_top (mctx, new_dest_nodes, cur_str_idx); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; err = transit_state_bkref (mctx, new_dest_nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto free_return; } } @@ -2712,7 +2621,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) delay these checking for prune_impossible_nodes(). */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) { const re_dfa_t *const dfa = mctx->dfa; @@ -2720,7 +2629,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) const char *buf = (const char *) re_string_get_buffer (&mctx->input); /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); - if (cache_idx != REG_MISSING) + if (cache_idx != -1) { const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx; @@ -2756,7 +2665,8 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) at the back reference? */ if (sl_str_diff > 0) { - if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) + if (__glibc_unlikely (bkref_str_off + sl_str_diff + > mctx->input.valid_len)) { /* Not enough chars for a successful match. */ if (bkref_str_off + sl_str_diff > mctx->input.len) @@ -2765,7 +2675,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) err = clean_state_log_if_needed (mctx, bkref_str_off + sl_str_diff); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; buf = (const char *) re_string_get_buffer (&mctx->input); } @@ -2784,7 +2694,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) if (err == REG_NOMATCH) continue; - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } @@ -2803,14 +2713,14 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) at the back reference? */ if (sl_str_off > 0) { - if (BE (bkref_str_off >= mctx->input.valid_len, 0)) + if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len)) { /* If we are at the end of the input, we cannot match. */ if (bkref_str_off >= mctx->input.len) break; err = extend_buffers (mctx, bkref_str_off + 1); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; buf = (const char *) re_string_get_buffer (&mctx->input); @@ -2825,7 +2735,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) nodes = &mctx->state_log[sl_str]->nodes; cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP); - if (cls_node == REG_MISSING) + if (cls_node == -1) continue; /* No. */ if (sub_top->path == NULL) { @@ -2841,15 +2751,18 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) OP_CLOSE_SUBEXP); if (err == REG_NOMATCH) continue; - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); - if (BE (sub_last == NULL, 0)) + if (__glibc_unlikely (sub_last == NULL)) return REG_ESPACE; err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str_idx); + buf = (const char *) re_string_get_buffer (&mctx->input); if (err == REG_NOMATCH) continue; + if (__glibc_unlikely (err != REG_NOERROR)) + return err; } } return REG_NOERROR; @@ -2862,7 +2775,6 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) and SUB_LAST. */ static reg_errcode_t -internal_function get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str) { @@ -2876,7 +2788,7 @@ get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, return err; err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, sub_last->str_idx); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; return clean_state_log_if_needed (mctx, to_idx); @@ -2891,7 +2803,6 @@ get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, E.g. RE: (a){2} */ static Idx -internal_function find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, Idx subexp_idx, int type) { @@ -2904,7 +2815,7 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, && node->opr.idx == subexp_idx) return cls_node; } - return REG_MISSING; + return -1; } /* Check whether the node TOP_NODE at TOP_STR can arrive to the node @@ -2913,7 +2824,7 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, Idx top_str, Idx last_node, Idx last_str, int type) { @@ -2927,19 +2838,19 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, subexp_num = dfa->nodes[top_node].opr.idx; /* Extend the buffer if we need. */ - if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) + if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1)) { re_dfastate_t **new_array; Idx old_alloc = path->alloc; Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1; Idx new_alloc; - if (BE (IDX_MAX - old_alloc < incr_alloc, 0)) + if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc)) return REG_ESPACE; new_alloc = old_alloc + incr_alloc; - if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0)) + if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc)) return REG_ESPACE; new_array = re_realloc (path->array, re_dfastate_t *, new_alloc); - if (BE (new_array == NULL, 0)) + if (__glibc_unlikely (new_array == NULL)) return REG_ESPACE; path->array = new_array; path->alloc = new_alloc; @@ -2960,10 +2871,10 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, if (str_idx == top_str) { err = re_node_set_init_1 (&next_nodes, top_node); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_node_set_free (&next_nodes); return err; @@ -2975,7 +2886,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, if (cur_state && cur_state->has_backref) { err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } else @@ -2987,14 +2898,14 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, { err = expand_bkref_cache (mctx, &next_nodes, str_idx, subexp_num, type); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_node_set_free (&next_nodes); return err; } } cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); - if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR)) { re_node_set_free (&next_nodes); return err; @@ -3009,7 +2920,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, { err = re_node_set_merge (&next_nodes, &mctx->state_log[str_idx + 1]->nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_node_set_free (&next_nodes); return err; @@ -3020,7 +2931,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, err = check_arrival_add_next_nodes (mctx, str_idx, &cur_state->non_eps_nodes, &next_nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_node_set_free (&next_nodes); return err; @@ -3030,14 +2941,14 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, if (next_nodes.nelem) { err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_node_set_free (&next_nodes); return err; } err = expand_bkref_cache (mctx, &next_nodes, str_idx, subexp_num, type); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_node_set_free (&next_nodes); return err; @@ -3045,7 +2956,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, } context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); - if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR)) { re_node_set_free (&next_nodes); return err; @@ -3078,7 +2989,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, Can't we unify them? */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, re_node_set *cur_nodes, re_node_set *next_nodes) { @@ -3094,10 +3005,8 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, { int naccepted = 0; Idx cur_node = cur_nodes->elems[cur_idx]; -#ifdef DEBUG - re_token_type_t type = dfa->nodes[cur_node].type; - assert (!IS_EPSILON_NODE (type)); -#endif + DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type)); + #ifdef RE_ENABLE_I18N /* If the node may accept "multi byte". */ if (dfa->nodes[cur_node].accept_mb) @@ -3114,22 +3023,22 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, if (dest_state) { err = re_node_set_merge (&union_set, &dest_state->nodes); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_node_set_free (&union_set); return err; } } ok = re_node_set_insert (&union_set, next_node); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) { re_node_set_free (&union_set); return REG_ESPACE; } mctx->state_log[next_idx] = re_acquire_state (&err, dfa, &union_set); - if (BE (mctx->state_log[next_idx] == NULL - && err != REG_NOERROR, 0)) + if (__glibc_unlikely (mctx->state_log[next_idx] == NULL + && err != REG_NOERROR)) { re_node_set_free (&union_set); return err; @@ -3141,7 +3050,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) { ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) { re_node_set_free (&union_set); return REG_ESPACE; @@ -3159,18 +3068,15 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, */ static reg_errcode_t -internal_function check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, Idx ex_subexp, int type) { reg_errcode_t err; Idx idx, outside_node; re_node_set new_nodes; -#ifdef DEBUG - assert (cur_nodes->nelem); -#endif + DEBUG_ASSERT (cur_nodes->nelem); err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; /* Create a new node set NEW_NODES with the nodes which are epsilon closures of the node in CUR_NODES. */ @@ -3180,11 +3086,11 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, Idx cur_node = cur_nodes->elems[idx]; const re_node_set *eclosure = dfa->eclosures + cur_node; outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); - if (outside_node == REG_MISSING) + if (outside_node == -1) { /* There are no problematic nodes, just merge them. */ err = re_node_set_merge (&new_nodes, eclosure); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_node_set_free (&new_nodes); return err; @@ -3195,7 +3101,7 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, /* There are problematic nodes, re-calculate incrementally. */ err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, ex_subexp, type); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) { re_node_set_free (&new_nodes); return err; @@ -3212,7 +3118,7 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, problematic append it to DST_NODES. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, Idx target, Idx ex_subexp, int type) { @@ -3227,13 +3133,13 @@ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, if (type == OP_CLOSE_SUBEXP) { ok = re_node_set_insert (dst_nodes, cur_node); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) return REG_ESPACE; } break; } ok = re_node_set_insert (dst_nodes, cur_node); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) return REG_ESPACE; if (dfa->edests[cur_node].nelem == 0) break; @@ -3243,7 +3149,7 @@ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, err = check_arrival_expand_ecl_sub (dfa, dst_nodes, dfa->edests[cur_node].elems[1], ex_subexp, type); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } cur_node = dfa->edests[cur_node].elems[0]; @@ -3257,7 +3163,7 @@ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, in MCTX->BKREF_ENTS. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, Idx cur_str, Idx subexp_num, int type) { @@ -3266,7 +3172,7 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str); struct re_backref_cache_entry *ent; - if (cache_idx_start == REG_MISSING) + if (cache_idx_start == -1) return REG_NOERROR; restart: @@ -3295,8 +3201,8 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); err3 = re_node_set_merge (cur_nodes, &new_dests); re_node_set_free (&new_dests); - if (BE (err != REG_NOERROR || err2 != REG_NOERROR - || err3 != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR + || err3 != REG_NOERROR)) { err = (err != REG_NOERROR ? err : (err2 != REG_NOERROR ? err2 : err3)); @@ -3318,7 +3224,7 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, err = re_node_set_init_copy (&union_set, &mctx->state_log[to_idx]->nodes); ok = re_node_set_insert (&union_set, next_node); - if (BE (err != REG_NOERROR || ! ok, 0)) + if (__glibc_unlikely (err != REG_NOERROR || ! ok)) { re_node_set_free (&union_set); err = err != REG_NOERROR ? err : REG_ESPACE; @@ -3328,13 +3234,13 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, else { err = re_node_set_init_1 (&union_set, next_node); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) return err; } mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); re_node_set_free (&union_set); - if (BE (mctx->state_log[to_idx] == NULL - && err != REG_NOERROR, 0)) + if (__glibc_unlikely (mctx->state_log[to_idx] == NULL + && err != REG_NOERROR)) return err; } } @@ -3346,7 +3252,6 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, Return true if successful. */ static bool -internal_function build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) { reg_errcode_t err; @@ -3378,7 +3283,7 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) else { dests_alloc = re_malloc (struct dests_alloc, 1); - if (BE (dests_alloc == NULL, 0)) + if (__glibc_unlikely (dests_alloc == NULL)) return false; dests_node_malloced = true; } @@ -3391,16 +3296,16 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) /* At first, group all nodes belonging to 'state' into several destinations. */ ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); - if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0)) + if (__glibc_unlikely (ndests <= 0)) { if (dests_node_malloced) - free (dests_alloc); + re_free (dests_alloc); /* Return false in case of an error, true otherwise. */ if (ndests == 0) { state->trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); - if (BE (state->trtable == NULL, 0)) + if (__glibc_unlikely (state->trtable == NULL)) return false; return true; } @@ -3408,14 +3313,14 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) } err = re_node_set_alloc (&follows, ndests + 1); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto out_free; /* Avoid arithmetic overflow in size calculation. */ - if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX) - / (3 * sizeof (re_dfastate_t *))) - < ndests), - 0)) + size_t ndests_max + = ((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX) + / (3 * sizeof (re_dfastate_t *))); + if (__glibc_unlikely (ndests_max < ndests)) goto out_free; if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX @@ -3424,18 +3329,17 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) alloca (ndests * 3 * sizeof (re_dfastate_t *)); else { - dest_states = (re_dfastate_t **) - malloc (ndests * 3 * sizeof (re_dfastate_t *)); - if (BE (dest_states == NULL, 0)) + dest_states = re_malloc (re_dfastate_t *, ndests * 3); + if (__glibc_unlikely (dest_states == NULL)) { out_free: if (dest_states_malloced) - free (dest_states); + re_free (dest_states); re_node_set_free (&follows); for (i = 0; i < ndests; ++i) re_node_set_free (dests_node + i); if (dests_node_malloced) - free (dests_alloc); + re_free (dests_alloc); return false; } dest_states_malloced = true; @@ -3453,15 +3357,15 @@ out_free: for (j = 0; j < dests_node[i].nelem; ++j) { next_node = dfa->nexts[dests_node[i].elems[j]]; - if (next_node != REG_MISSING) + if (next_node != -1) { err = re_node_set_merge (&follows, dfa->eclosures + next_node); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto out_free; } } dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); - if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) + if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR)) goto out_free; /* If the new state has context constraint, build appropriate states for these contexts. */ @@ -3469,7 +3373,8 @@ out_free: { dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, CONTEXT_WORD); - if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) + if (__glibc_unlikely (dest_states_word[i] == NULL + && err != REG_NOERROR)) goto out_free; if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) @@ -3477,7 +3382,7 @@ out_free: dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, CONTEXT_NEWLINE); - if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) + if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR)) goto out_free; } else @@ -3488,7 +3393,7 @@ out_free: bitset_merge (acceptable, dests_ch[i]); } - if (!BE (need_word_trtable, 0)) + if (!__glibc_unlikely (need_word_trtable)) { /* We don't care about whether the following character is a word character, or we are in a single-byte character set so we can @@ -3496,7 +3401,7 @@ out_free: 256-entry transition table. */ trtable = state->trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); - if (BE (trtable == NULL, 0)) + if (__glibc_unlikely (trtable == NULL)) goto out_free; /* For all characters ch...: */ @@ -3504,7 +3409,7 @@ out_free: for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; elem; mask <<= 1, elem >>= 1, ++ch) - if (BE (elem & 1, 0)) + if (__glibc_unlikely (elem & 1)) { /* There must be exactly one destination which accepts character ch. See group_nodes_into_DFAstates. */ @@ -3527,7 +3432,7 @@ out_free: starting at trtable[SBC_MAX]. */ trtable = state->word_trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); - if (BE (trtable == NULL, 0)) + if (__glibc_unlikely (trtable == NULL)) goto out_free; /* For all characters ch...: */ @@ -3535,7 +3440,7 @@ out_free: for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; elem; mask <<= 1, elem >>= 1, ++ch) - if (BE (elem & 1, 0)) + if (__glibc_unlikely (elem & 1)) { /* There must be exactly one destination which accepts character ch. See group_nodes_into_DFAstates. */ @@ -3566,14 +3471,14 @@ out_free: } if (dest_states_malloced) - free (dest_states); + re_free (dest_states); re_node_set_free (&follows); for (i = 0; i < ndests; ++i) re_node_set_free (dests_node + i); if (dests_node_malloced) - free (dests_alloc); + re_free (dests_alloc); return true; } @@ -3584,7 +3489,6 @@ out_free: to DEST_CH[i]. This function return the number of destinations. */ static Idx -internal_function group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, re_node_set *dests_node, bitset_t *dests_ch) { @@ -3735,14 +3639,14 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, bitset_copy (dests_ch[ndests], remains); bitset_copy (dests_ch[j], intersec); err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto error_return; ++ndests; } /* Put the position in the current group. */ ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); - if (BE (! ok, 0)) + if (__glibc_unlikely (! ok)) goto error_return; /* If all characters are consumed, go to next node. */ @@ -3754,17 +3658,18 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, { bitset_copy (dests_ch[ndests], accepts); err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); - if (BE (err != REG_NOERROR, 0)) + if (__glibc_unlikely (err != REG_NOERROR)) goto error_return; ++ndests; bitset_empty (accepts); } } + assume (ndests <= SBC_MAX); return ndests; error_return: for (j = 0; j < ndests; ++j) re_node_set_free (dests_node + j); - return REG_MISSING; + return -1; } #ifdef RE_ENABLE_I18N @@ -3781,7 +3686,6 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, # endif static int -internal_function check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, const re_string_t *input, Idx str_idx) { @@ -3789,10 +3693,10 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, int char_len, elem_len; Idx i; - if (BE (node->type == OP_UTF8_PERIOD, 0)) + if (__glibc_unlikely (node->type == OP_UTF8_PERIOD)) { unsigned char c = re_string_byte_at (input, str_idx), d; - if (BE (c < 0xc2, 1)) + if (__glibc_likely (c < 0xc2)) return 0; if (str_idx + 2 > input->len) @@ -3848,10 +3752,10 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, /* FIXME: I don't think this if is needed, as both '\n' and '\0' are char_len == 1. */ /* '.' accepts any one character except the following two cases. */ - if ((!(dfa->syntax & RE_DOT_NEWLINE) && - re_string_byte_at (input, str_idx) == '\n') || - ((dfa->syntax & RE_DOT_NOT_NULL) && - re_string_byte_at (input, str_idx) == '\0')) + if ((!(dfa->syntax & RE_DOT_NEWLINE) + && re_string_byte_at (input, str_idx) == '\n') + || ((dfa->syntax & RE_DOT_NOT_NULL) + && re_string_byte_at (input, str_idx) == '\0')) return 0; return char_len; } @@ -3956,30 +3860,27 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); int32_t idx = findidx (table, indirect, extra, &cp, elem_len); + int32_t rule = idx >> 24; + idx &= 0xffffff; if (idx > 0) - for (i = 0; i < cset->nequiv_classes; ++i) - { - int32_t equiv_class_idx = cset->equiv_classes[i]; - size_t weight_len = weights[idx & 0xffffff]; - if (weight_len == weights[equiv_class_idx & 0xffffff] - && (idx >> 24) == (equiv_class_idx >> 24)) - { - Idx cnt = 0; - - idx &= 0xffffff; - equiv_class_idx &= 0xffffff; - - while (cnt <= weight_len - && (weights[equiv_class_idx + 1 + cnt] - == weights[idx + 1 + cnt])) - ++cnt; - if (cnt > weight_len) - { - match_len = elem_len; - goto check_node_accept_bytes_match; - } - } - } + { + size_t weight_len = weights[idx]; + for (i = 0; i < cset->nequiv_classes; ++i) + { + int32_t equiv_class_idx = cset->equiv_classes[i]; + int32_t equiv_class_rule = equiv_class_idx >> 24; + equiv_class_idx &= 0xffffff; + if (weights[equiv_class_idx] == weight_len + && equiv_class_rule == rule + && memcmp (weights + idx + 1, + weights + equiv_class_idx + 1, + weight_len) == 0) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + } + } } } else @@ -4011,7 +3912,6 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, # ifdef _LIBC static unsigned int -internal_function find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) { uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); @@ -4075,7 +3975,6 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) byte of the INPUT. */ static bool -internal_function check_node_accept (const re_match_context_t *mctx, const re_token_t *node, Idx idx) { @@ -4097,7 +3996,7 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, case OP_UTF8_PERIOD: if (ch >= ASCII_CHARS) return false; - /* FALLTHROUGH */ + FALLTHROUGH; #endif case OP_PERIOD: if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) @@ -4125,22 +4024,22 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, /* Extend the buffers, if the buffers have run out. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ extend_buffers (re_match_context_t *mctx, int min_len) { reg_errcode_t ret; re_string_t *pstr = &mctx->input; /* Avoid overflow. */ - if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2 - <= pstr->bufs_len, 0)) + if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2 + <= pstr->bufs_len)) return REG_ESPACE; /* Double the lengths of the buffers, but allocate at least MIN_LEN. */ ret = re_string_realloc_buffers (pstr, MAX (min_len, MIN (pstr->len, pstr->bufs_len * 2))); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) return ret; if (mctx->state_log != NULL) @@ -4151,7 +4050,7 @@ extend_buffers (re_match_context_t *mctx, int min_len) does not have the right size. */ re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, pstr->bufs_len + 1); - if (BE (new_array == NULL, 0)) + if (__glibc_unlikely (new_array == NULL)) return REG_ESPACE; mctx->state_log = new_array; } @@ -4163,7 +4062,7 @@ extend_buffers (re_match_context_t *mctx, int min_len) if (pstr->mb_cur_max > 1) { ret = build_wcs_upper_buffer (pstr); - if (BE (ret != REG_NOERROR, 0)) + if (__glibc_unlikely (ret != REG_NOERROR)) return ret; } else @@ -4191,23 +4090,23 @@ extend_buffers (re_match_context_t *mctx, int min_len) /* Initialize MCTX. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) { mctx->eflags = eflags; - mctx->match_last = REG_MISSING; + mctx->match_last = -1; if (n > 0) { /* Avoid overflow. */ size_t max_object_size = MAX (sizeof (struct re_backref_cache_entry), sizeof (re_sub_match_top_t *)); - if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0)) + if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n)) return REG_ESPACE; mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); - if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) + if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL)) return REG_ESPACE; } /* Already zero-ed by the caller. @@ -4226,7 +4125,6 @@ match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) of the input, or changes the input string. */ static void -internal_function match_ctx_clean (re_match_context_t *mctx) { Idx st_idx; @@ -4246,7 +4144,7 @@ match_ctx_clean (re_match_context_t *mctx) re_free (top->path->array); re_free (top->path); } - free (top); + re_free (top); } mctx->nsub_tops = 0; @@ -4256,7 +4154,6 @@ match_ctx_clean (re_match_context_t *mctx) /* Free all the memory associated with MCTX. */ static void -internal_function match_ctx_free (re_match_context_t *mctx) { /* First, free all the memory associated with MCTX->SUB_TOPS. */ @@ -4271,7 +4168,7 @@ match_ctx_free (re_match_context_t *mctx) */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from, Idx to) { @@ -4280,7 +4177,7 @@ match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from, struct re_backref_cache_entry* new_entry; new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, mctx->abkref_ents * 2); - if (BE (new_entry == NULL, 0)) + if (__glibc_unlikely (new_entry == NULL)) { re_free (mctx->bkref_ents); return REG_ESPACE; @@ -4316,11 +4213,10 @@ match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from, return REG_NOERROR; } -/* Return the first entry with the same str_idx, or REG_MISSING if none is +/* Return the first entry with the same str_idx, or -1 if none is found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ static Idx -internal_function search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) { Idx left, right, mid, last; @@ -4336,33 +4232,31 @@ search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) if (left < last && mctx->bkref_ents[left].str_idx == str_idx) return left; else - return REG_MISSING; + return -1; } /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches at STR_IDX. */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) { -#ifdef DEBUG - assert (mctx->sub_tops != NULL); - assert (mctx->asub_tops > 0); -#endif - if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) + DEBUG_ASSERT (mctx->sub_tops != NULL); + DEBUG_ASSERT (mctx->asub_tops > 0); + if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops)) { Idx new_asub_tops = mctx->asub_tops * 2; re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, re_sub_match_top_t *, new_asub_tops); - if (BE (new_array == NULL, 0)) + if (__glibc_unlikely (new_array == NULL)) return REG_ESPACE; mctx->sub_tops = new_array; mctx->asub_tops = new_asub_tops; } mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); - if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) + if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL)) return REG_ESPACE; mctx->sub_tops[mctx->nsub_tops]->node = node; mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; @@ -4373,23 +4267,22 @@ match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ static re_sub_match_last_t * -internal_function match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) { re_sub_match_last_t *new_entry; - if (BE (subtop->nlasts == subtop->alasts, 0)) + if (__glibc_unlikely (subtop->nlasts == subtop->alasts)) { Idx new_alasts = 2 * subtop->alasts + 1; re_sub_match_last_t **new_array = re_realloc (subtop->lasts, re_sub_match_last_t *, new_alasts); - if (BE (new_array == NULL, 0)) + if (__glibc_unlikely (new_array == NULL)) return NULL; subtop->lasts = new_array; subtop->alasts = new_alasts; } new_entry = calloc (1, sizeof (re_sub_match_last_t)); - if (BE (new_entry != NULL, 1)) + if (__glibc_likely (new_entry != NULL)) { subtop->lasts[subtop->nlasts] = new_entry; new_entry->node = node; @@ -4400,7 +4293,6 @@ match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) } static void -internal_function sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx) { diff --git a/contrib/grep/lib/safe-read.c b/contrib/grep/lib/safe-read.c index dbb6338005..613e056d71 100644 --- a/contrib/grep/lib/safe-read.c +++ b/contrib/grep/lib/safe-read.c @@ -1,6 +1,6 @@ /* An interface to read and write that retries after interrupts. - Copyright (C) 1993-1994, 1998, 2002-2006, 2009-2015 Free Software + Copyright (C) 1993-1994, 1998, 2002-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include @@ -37,7 +37,7 @@ # define IS_EINTR(x) 0 #endif -#include +#include "sys-limits.h" #ifdef SAFE_WRITE # define safe_rw safe_write @@ -55,12 +55,6 @@ size_t safe_rw (int fd, void const *buf, size_t count) { - /* Work around a bug in Tru64 5.1. Attempting to read more than - INT_MAX bytes fails with errno == EINVAL. See - . - When decreasing COUNT, keep it block-aligned. */ - enum { BUGGY_READ_MAXIMUM = INT_MAX & ~8191 }; - for (;;) { ssize_t result = rw (fd, buf, count); @@ -69,8 +63,8 @@ safe_rw (int fd, void const *buf, size_t count) return result; else if (IS_EINTR (errno)) continue; - else if (errno == EINVAL && BUGGY_READ_MAXIMUM < count) - count = BUGGY_READ_MAXIMUM; + else if (errno == EINVAL && SYS_BUFSIZE_MAX < count) + count = SYS_BUFSIZE_MAX; else return result; } diff --git a/contrib/grep/lib/safe-read.h b/contrib/grep/lib/safe-read.h index b5d408acef..ea18423a80 100644 --- a/contrib/grep/lib/safe-read.h +++ b/contrib/grep/lib/safe-read.h @@ -1,5 +1,5 @@ /* An interface to read() that retries after interrupts. - Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Some system calls may be interrupted and fail with errno = EINTR in the following situations: diff --git a/contrib/grep/lib/same-inode.h b/contrib/grep/lib/same-inode.h index ecc3049d74..09c339a558 100644 --- a/contrib/grep/lib/same-inode.h +++ b/contrib/grep/lib/same-inode.h @@ -1,6 +1,6 @@ -/* Determine whether two stat buffers refer to the same file. +/* Determine whether two stat buffers are known to refer to the same file. - Copyright (C) 2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,17 +13,31 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef SAME_INODE_H # define SAME_INODE_H 1 -# ifdef __VMS +# include + +# if defined __VMS && __CRTL_VER < 80200000 # define SAME_INODE(a, b) \ ((a).st_ino[0] == (b).st_ino[0] \ && (a).st_ino[1] == (b).st_ino[1] \ && (a).st_ino[2] == (b).st_ino[2] \ && (a).st_dev == (b).st_dev) +# elif defined _WIN32 && ! defined __CYGWIN__ + /* Native Windows. */ +# if _GL_WINDOWS_STAT_INODES + /* stat() and fstat() set st_dev and st_ino to 0 if information about + the inode is not available. */ +# define SAME_INODE(a, b) \ + (!((a).st_ino == 0 && (a).st_dev == 0) \ + && (a).st_ino == (b).st_ino && (a).st_dev == (b).st_dev) +# else + /* stat() and fstat() set st_ino to 0 always. */ +# define SAME_INODE(a, b) 0 +# endif # else # define SAME_INODE(a, b) \ ((a).st_ino == (b).st_ino \ diff --git a/contrib/grep/lib/save-cwd.c b/contrib/grep/lib/save-cwd.c index 159d8a89c6..c2608d58b2 100644 --- a/contrib/grep/lib/save-cwd.c +++ b/contrib/grep/lib/save-cwd.c @@ -1,6 +1,6 @@ /* save-cwd.c -- Save and restore current working directory. - Copyright (C) 1995, 1997-1998, 2003-2006, 2009-2015 Free Software + Copyright (C) 1995, 1997-1998, 2003-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering. */ @@ -30,7 +30,6 @@ #include "chdir-long.h" #include "unistd--.h" -#include "cloexec.h" #if GNULIB_FCNTL_SAFER # include "fcntl--.h" @@ -64,16 +63,15 @@ save_cwd (struct saved_cwd *cwd) { cwd->name = NULL; - cwd->desc = open (".", O_SEARCH); + cwd->desc = open (".", O_SEARCH | O_CLOEXEC); if (!GNULIB_FCNTL_SAFER) - cwd->desc = fd_safer (cwd->desc); + cwd->desc = fd_safer_flag (cwd->desc, O_CLOEXEC); if (cwd->desc < 0) { cwd->name = getcwd (NULL, 0); return cwd->name ? 0 : -1; } - set_cloexec_flag (cwd->desc, true); return 0; } diff --git a/contrib/grep/lib/save-cwd.h b/contrib/grep/lib/save-cwd.h index 890f7fba51..db8bfd25fc 100644 --- a/contrib/grep/lib/save-cwd.h +++ b/contrib/grep/lib/save-cwd.h @@ -1,6 +1,6 @@ /* Save and restore current working directory. - Copyright (C) 1995, 1997-1998, 2003, 2009-2015 Free Software Foundation, + Copyright (C) 1995, 1997-1998, 2003, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering. */ diff --git a/contrib/grep/lib/setlocale-lock.c b/contrib/grep/lib/setlocale-lock.c new file mode 100644 index 0000000000..7dde32d188 --- /dev/null +++ b/contrib/grep/lib/setlocale-lock.c @@ -0,0 +1,150 @@ +/* Return the internal lock used by setlocale_null_r. + Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2019. */ + +#include + +/* When it is known that the gl_get_setlocale_null_lock function is defined + by a dependency library, it should not be defined here. */ +#if OMIT_SETLOCALE_LOCK + +/* This declaration is solely to ensure that after preprocessing + this file is never empty. */ +typedef int dummy; + +#else + +/* This file defines the internal lock used by setlocale_null_r. + It is a separate compilation unit, so that only one copy of it is + present when linking statically. */ + +/* Prohibit renaming this symbol. */ +# undef gl_get_setlocale_null_lock + +/* Macro for exporting a symbol (function, not variable) defined in this file, + when compiled into a shared library. */ +# ifndef DLL_EXPORTED +# if HAVE_VISIBILITY + /* Override the effect of the compiler option '-fvisibility=hidden'. */ +# define DLL_EXPORTED __attribute__((__visibility__("default"))) +# elif defined _WIN32 || defined __CYGWIN__ +# define DLL_EXPORTED __declspec(dllexport) +# else +# define DLL_EXPORTED +# endif +# endif + +# if defined _WIN32 && !defined __CYGWIN__ + +# define WIN32_LEAN_AND_MEAN /* avoid including junk */ +# include + +# include "windows-initguard.h" + +/* The return type is a 'CRITICAL_SECTION *', not a 'glwthread_mutex_t *', + because the latter is not guaranteed to be a stable ABI in the future. */ + +/* Make sure the function gets exported from DLLs. */ +DLL_EXPORTED CRITICAL_SECTION *gl_get_setlocale_null_lock (void); + +static glwthread_initguard_t guard = GLWTHREAD_INITGUARD_INIT; +static CRITICAL_SECTION lock; + +/* Returns the internal lock used by setlocale_null_r. */ +CRITICAL_SECTION * +gl_get_setlocale_null_lock (void) +{ + if (!guard.done) + { + if (InterlockedIncrement (&guard.started) == 0) + { + /* This thread is the first one to need the lock. Initialize it. */ + InitializeCriticalSection (&lock); + guard.done = 1; + } + else + { + /* Don't let guard.started grow and wrap around. */ + InterlockedDecrement (&guard.started); + /* Yield the CPU while waiting for another thread to finish + initializing this mutex. */ + while (!guard.done) + Sleep (0); + } + } + return &lock; +} + +# elif HAVE_PTHREAD_API + +# include + +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + +/* Make sure the function gets exported from shared libraries. */ +DLL_EXPORTED pthread_mutex_t *gl_get_setlocale_null_lock (void); + +/* Returns the internal lock used by setlocale_null_r. */ +pthread_mutex_t * +gl_get_setlocale_null_lock (void) +{ + return &mutex; +} + +# elif HAVE_THREADS_H + +# include +# include + +static int volatile init_needed = 1; +static once_flag init_once = ONCE_FLAG_INIT; +static mtx_t mutex; + +static void +atomic_init (void) +{ + if (mtx_init (&mutex, mtx_plain) != thrd_success) + abort (); + init_needed = 0; +} + +/* Make sure the function gets exported from shared libraries. */ +DLL_EXPORTED mtx_t *gl_get_setlocale_null_lock (void); + +/* Returns the internal lock used by setlocale_null_r. */ +mtx_t * +gl_get_setlocale_null_lock (void) +{ + if (init_needed) + call_once (&init_once, atomic_init); + return &mutex; +} + +# endif + +# if (defined _WIN32 || defined __CYGWIN__) && !defined _MSC_VER +/* Make sure the '__declspec(dllimport)' in setlocale_null.c does not cause + a link failure when no DLLs are involved. */ +# if defined _WIN64 || defined _LP64 +# define IMP(x) __imp_##x +# else +# define IMP(x) _imp__##x +# endif +void * IMP(gl_get_setlocale_null_lock) = &gl_get_setlocale_null_lock; +# endif + +#endif diff --git a/contrib/grep/lib/setlocale_null.c b/contrib/grep/lib/setlocale_null.c new file mode 100644 index 0000000000..abe55b5a3a --- /dev/null +++ b/contrib/grep/lib/setlocale_null.c @@ -0,0 +1,411 @@ +/* Query the name of the current global locale. + Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2019. */ + +#include + +/* Specification. */ +#include "setlocale_null.h" + +#include +#include +#include +#include +#if defined _WIN32 && !defined __CYGWIN__ +# include +#endif + +#if !(SETLOCALE_NULL_ALL_MTSAFE && SETLOCALE_NULL_ONE_MTSAFE) +# if defined _WIN32 && !defined __CYGWIN__ + +# define WIN32_LEAN_AND_MEAN /* avoid including junk */ +# include + +# elif HAVE_PTHREAD_API + +# include +# if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS +# include +# pragma weak thrd_exit +# define c11_threads_in_use() (thrd_exit != NULL) +# else +# define c11_threads_in_use() 0 +# endif + +# elif HAVE_THREADS_H + +# include + +# endif +#endif + +/* Use the system's setlocale() function, not the gnulib override, here. */ +#undef setlocale + +static const char * +setlocale_null_androidfix (int category) +{ + const char *result = setlocale (category, NULL); + +#ifdef __ANDROID__ + if (result == NULL) + switch (category) + { + case LC_CTYPE: + case LC_NUMERIC: + case LC_TIME: + case LC_COLLATE: + case LC_MONETARY: + case LC_MESSAGES: + case LC_ALL: + case LC_PAPER: + case LC_NAME: + case LC_ADDRESS: + case LC_TELEPHONE: + case LC_MEASUREMENT: + result = "C"; + break; + default: + break; + } +#endif + + return result; +} + +static int +setlocale_null_unlocked (int category, char *buf, size_t bufsize) +{ +#if defined _WIN32 && !defined __CYGWIN__ && defined _MSC_VER + /* On native Windows, nowadays, the setlocale() implementation is based + on _wsetlocale() and uses malloc() for the result. We are better off + using _wsetlocale() directly. */ + const wchar_t *result = _wsetlocale (category, NULL); + + if (result == NULL) + { + /* CATEGORY is invalid. */ + if (bufsize > 0) + /* Return an empty string in BUF. + This is a convenience for callers that don't want to write explicit + code for handling EINVAL. */ + buf[0] = '\0'; + return EINVAL; + } + else + { + size_t length = wcslen (result); + if (length < bufsize) + { + size_t i; + + /* Convert wchar_t[] -> char[], assuming plain ASCII. */ + for (i = 0; i <= length; i++) + buf[i] = result[i]; + + return 0; + } + else + { + if (bufsize > 0) + { + /* Return a truncated result in BUF. + This is a convenience for callers that don't want to write + explicit code for handling ERANGE. */ + size_t i; + + /* Convert wchar_t[] -> char[], assuming plain ASCII. */ + for (i = 0; i < bufsize; i++) + buf[i] = result[i]; + buf[bufsize - 1] = '\0'; + } + return ERANGE; + } + } +#else + const char *result = setlocale_null_androidfix (category); + + if (result == NULL) + { + /* CATEGORY is invalid. */ + if (bufsize > 0) + /* Return an empty string in BUF. + This is a convenience for callers that don't want to write explicit + code for handling EINVAL. */ + buf[0] = '\0'; + return EINVAL; + } + else + { + size_t length = strlen (result); + if (length < bufsize) + { + memcpy (buf, result, length + 1); + return 0; + } + else + { + if (bufsize > 0) + { + /* Return a truncated result in BUF. + This is a convenience for callers that don't want to write + explicit code for handling ERANGE. */ + memcpy (buf, result, bufsize - 1); + buf[bufsize - 1] = '\0'; + } + return ERANGE; + } + } +#endif +} + +#if !(SETLOCALE_NULL_ALL_MTSAFE && SETLOCALE_NULL_ONE_MTSAFE) /* musl libc, macOS, FreeBSD, NetBSD, OpenBSD, AIX, Haiku, Cygwin */ + +/* Use a lock, so that no two threads can invoke setlocale_null_unlocked + at the same time. */ + +/* Prohibit renaming this symbol. */ +# undef gl_get_setlocale_null_lock + +# if defined _WIN32 && !defined __CYGWIN__ + +extern __declspec(dllimport) CRITICAL_SECTION *gl_get_setlocale_null_lock (void); + +static int +setlocale_null_with_lock (int category, char *buf, size_t bufsize) +{ + CRITICAL_SECTION *lock = gl_get_setlocale_null_lock (); + int ret; + + EnterCriticalSection (lock); + ret = setlocale_null_unlocked (category, buf, bufsize); + LeaveCriticalSection (lock); + + return ret; +} + +# elif HAVE_PTHREAD_API /* musl libc, macOS, FreeBSD, NetBSD, OpenBSD, AIX, Haiku, Cygwin */ + +extern +# if defined _WIN32 || defined __CYGWIN__ + __declspec(dllimport) +# endif + pthread_mutex_t *gl_get_setlocale_null_lock (void); + +# if HAVE_WEAK_SYMBOLS /* musl libc, FreeBSD, NetBSD, OpenBSD, Haiku */ + + /* Avoid the need to link with '-lpthread'. */ +# pragma weak pthread_mutex_lock +# pragma weak pthread_mutex_unlock + + /* Determine whether libpthread is in use. */ +# pragma weak pthread_mutexattr_gettype + /* See the comments in lock.h. */ +# define pthread_in_use() \ + (pthread_mutexattr_gettype != NULL || c11_threads_in_use ()) + +# else +# define pthread_in_use() 1 +# endif + +static int +setlocale_null_with_lock (int category, char *buf, size_t bufsize) +{ + if (pthread_in_use()) + { + pthread_mutex_t *lock = gl_get_setlocale_null_lock (); + int ret; + + if (pthread_mutex_lock (lock)) + abort (); + ret = setlocale_null_unlocked (category, buf, bufsize); + if (pthread_mutex_unlock (lock)) + abort (); + + return ret; + } + else + return setlocale_null_unlocked (category, buf, bufsize); +} + +# elif HAVE_THREADS_H + +extern mtx_t *gl_get_setlocale_null_lock (void); + +static int +setlocale_null_with_lock (int category, char *buf, size_t bufsize) +{ + mtx_t *lock = gl_get_setlocale_null_lock (); + int ret; + + if (mtx_lock (lock) != thrd_success) + abort (); + ret = setlocale_null_unlocked (category, buf, bufsize); + if (mtx_unlock (lock) != thrd_success) + abort (); + + return ret; +} + +# endif + +#endif + +int +setlocale_null_r (int category, char *buf, size_t bufsize) +{ +#if SETLOCALE_NULL_ALL_MTSAFE +# if SETLOCALE_NULL_ONE_MTSAFE + + return setlocale_null_unlocked (category, buf, bufsize); + +# else + + if (category == LC_ALL) + return setlocale_null_unlocked (category, buf, bufsize); + else + return setlocale_null_with_lock (category, buf, bufsize); + +# endif +#else +# if SETLOCALE_NULL_ONE_MTSAFE + + if (category == LC_ALL) + return setlocale_null_with_lock (category, buf, bufsize); + else + return setlocale_null_unlocked (category, buf, bufsize); + +# else + + return setlocale_null_with_lock (category, buf, bufsize); + +# endif +#endif +} + +const char * +setlocale_null (int category) +{ +#if SETLOCALE_NULL_ALL_MTSAFE && SETLOCALE_NULL_ONE_MTSAFE + return setlocale_null_androidfix (category); +#else + + /* This call must be multithread-safe. To achieve this without using + thread-local storage: + 1. We use a specific static buffer for each possible CATEGORY + argument. So that different threads can call setlocale_mtsafe + with different CATEGORY arguments, without interfering. + 2. We use a simple strcpy or memcpy to fill this static buffer. + Filling it through, for example, strcpy + strcat would not be + guaranteed to leave the buffer's contents intact if another thread + is currently accessing it. If necessary, the contents is first + assembled in a stack-allocated buffer. */ + if (category == LC_ALL) + { +# if SETLOCALE_NULL_ALL_MTSAFE + return setlocale_null_androidfix (LC_ALL); +# else + char buf[SETLOCALE_NULL_ALL_MAX]; + static char resultbuf[SETLOCALE_NULL_ALL_MAX]; + + if (setlocale_null_r (LC_ALL, buf, sizeof (buf))) + return "C"; + strcpy (resultbuf, buf); + return resultbuf; +# endif + } + else + { +# if SETLOCALE_NULL_ONE_MTSAFE + return setlocale_null_androidfix (category); +# else + enum + { + LC_CTYPE_INDEX, + LC_NUMERIC_INDEX, + LC_TIME_INDEX, + LC_COLLATE_INDEX, + LC_MONETARY_INDEX, + LC_MESSAGES_INDEX, +# ifdef LC_PAPER + LC_PAPER_INDEX, +# endif +# ifdef LC_NAME + LC_NAME_INDEX, +# endif +# ifdef LC_ADDRESS + LC_ADDRESS_INDEX, +# endif +# ifdef LC_TELEPHONE + LC_TELEPHONE_INDEX, +# endif +# ifdef LC_MEASUREMENT + LC_MEASUREMENT_INDEX, +# endif +# ifdef LC_IDENTIFICATION + LC_IDENTIFICATION_INDEX, +# endif + LC_INDICES_COUNT + } + i; + char buf[SETLOCALE_NULL_MAX]; + static char resultbuf[LC_INDICES_COUNT][SETLOCALE_NULL_MAX]; + int err; + + err = setlocale_null_r (category, buf, sizeof (buf)); + if (err == EINVAL) + return NULL; + if (err) + return "C"; + + switch (category) + { + case LC_CTYPE: i = LC_CTYPE_INDEX; break; + case LC_NUMERIC: i = LC_NUMERIC_INDEX; break; + case LC_TIME: i = LC_TIME_INDEX; break; + case LC_COLLATE: i = LC_COLLATE_INDEX; break; + case LC_MONETARY: i = LC_MONETARY_INDEX; break; + case LC_MESSAGES: i = LC_MESSAGES_INDEX; break; +# ifdef LC_PAPER + case LC_PAPER: i = LC_PAPER_INDEX; break; +# endif +# ifdef LC_NAME + case LC_NAME: i = LC_NAME_INDEX; break; +# endif +# ifdef LC_ADDRESS + case LC_ADDRESS: i = LC_ADDRESS_INDEX; break; +# endif +# ifdef LC_TELEPHONE + case LC_TELEPHONE: i = LC_TELEPHONE_INDEX; break; +# endif +# ifdef LC_MEASUREMENT + case LC_MEASUREMENT: i = LC_MEASUREMENT_INDEX; break; +# endif +# ifdef LC_IDENTIFICATION + case LC_IDENTIFICATION: i = LC_IDENTIFICATION_INDEX; break; +# endif + default: + /* If you get here, a #ifdef LC_xxx is missing. */ + abort (); + } + + strcpy (resultbuf[i], buf); + return resultbuf[i]; +# endif + } +#endif +} diff --git a/contrib/grep/lib/setlocale_null.h b/contrib/grep/lib/setlocale_null.h new file mode 100644 index 0000000000..6844be5fac --- /dev/null +++ b/contrib/grep/lib/setlocale_null.h @@ -0,0 +1,82 @@ +/* Query the name of the current global locale. + Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2019. */ + +#ifndef _SETLOCALE_NULL_H +#define _SETLOCALE_NULL_H + +#include + +#include "arg-nonnull.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Recommended size of a buffer for a locale name for a single category. + On glibc systems, you can have locale names that are relative file names; + assume a maximum length 256. + In native Windows, in 2018 the longest locale name was of length 58 + ("FYRO Macedonian_Former Yugoslav Republic of Macedonia.1251"). */ +#define SETLOCALE_NULL_MAX (256+1) + +/* Recommended size of a buffer for a locale name with all categories. + On glibc systems, you can have locale names that are relative file names; + assume maximum length 256 for each. There are 12 categories; so, the + maximum total length is 148+12*256. + In native Windows, there are 5 categories, and the maximum total length is + 55+5*58. */ +#define SETLOCALE_NULL_ALL_MAX (148+12*256+1) + +/* setlocale_null_r (CATEGORY, BUF, BUFSIZE) is like setlocale (CATEGORY, NULL), + except that + - it is guaranteed to be multithread-safe, + - it returns the resulting locale category name or locale name in the + user-supplied buffer BUF, which must be BUFSIZE bytes long. + The recommended minimum buffer size is + - SETLOCALE_NULL_MAX for CATEGORY != LC_ALL, and + - SETLOCALE_NULL_ALL_MAX for CATEGORY == LC_ALL. + The return value is an error code: 0 if the call is successful, EINVAL if + CATEGORY is invalid, or ERANGE if BUFSIZE is smaller than the length needed + size (including the trailing NUL byte). In the latter case, a truncated + result is returned in BUF, but still NUL-terminated if BUFSIZE > 0. + For this call to be multithread-safe, *all* calls to + setlocale (CATEGORY, NULL) in all other threads must have been converted + to use setlocale_null_r or setlocale_null as well, and the other threads + must not make other setlocale invocations (since changing the global locale + has side effects on all threads). */ +extern int setlocale_null_r (int category, char *buf, size_t bufsize) + _GL_ARG_NONNULL ((2)); + +/* setlocale_null (CATEGORY) is like setlocale (CATEGORY, NULL), except that + it is guaranteed to be multithread-safe. + The return value is NULL if CATEGORY is invalid. + For this call to be multithread-safe, *all* calls to + setlocale (CATEGORY, NULL) in all other threads must have been converted + to use setlocale_null_r or setlocale_null as well, and the other threads + must not make other setlocale invocations (since changing the global locale + has side effects on all threads). */ +extern const char *setlocale_null (int category); + + +#ifdef __cplusplus +} +#endif + +#endif /* _SETLOCALE_NULL_H */ diff --git a/contrib/grep/lib/sig-handler.c b/contrib/grep/lib/sig-handler.c new file mode 100644 index 0000000000..52c36217e5 --- /dev/null +++ b/contrib/grep/lib/sig-handler.c @@ -0,0 +1,3 @@ +#include +#define SIG_HANDLER_INLINE _GL_EXTERN_INLINE +#include "sig-handler.h" diff --git a/contrib/grep/lib/sig-handler.h b/contrib/grep/lib/sig-handler.h new file mode 100644 index 0000000000..17604eada3 --- /dev/null +++ b/contrib/grep/lib/sig-handler.h @@ -0,0 +1,51 @@ +/* Convenience declarations when working with . + + Copyright (C) 2008-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef _GL_SIG_HANDLER_H +#define _GL_SIG_HANDLER_H + +#include + +#ifndef _GL_INLINE_HEADER_BEGIN + #error "Please include config.h first." +#endif +_GL_INLINE_HEADER_BEGIN +#ifndef SIG_HANDLER_INLINE +# define SIG_HANDLER_INLINE _GL_INLINE +#endif + +/* Convenience type when working with signal handlers. */ +typedef void (*sa_handler_t) (int); + +/* Return the handler of a signal, as a sa_handler_t value regardless + of its true type. The resulting function can be compared to + special values like SIG_IGN but it is not portable to call it. */ +SIG_HANDLER_INLINE sa_handler_t _GL_ATTRIBUTE_PURE +get_handler (struct sigaction const *a) +{ + /* POSIX says that special values like SIG_IGN can only occur when + action.sa_flags does not contain SA_SIGINFO. But in Linux 2.4, + for example, sa_sigaction and sa_handler are aliases and a signal + is ignored if sa_sigaction (after casting) equals SIG_IGN. In + this case, this implementation relies on the fact that the two + are aliases, and simply returns sa_handler. */ + return a->sa_handler; +} + +_GL_INLINE_HEADER_END + +#endif /* _GL_SIG_HANDLER_H */ diff --git a/contrib/grep/lib/stat-time.c b/contrib/grep/lib/stat-time.c new file mode 100644 index 0000000000..81b83ddb4f --- /dev/null +++ b/contrib/grep/lib/stat-time.c @@ -0,0 +1,3 @@ +#include +#define _GL_STAT_TIME_INLINE _GL_EXTERN_INLINE +#include "stat-time.h" diff --git a/contrib/grep/lib/stat-time.h b/contrib/grep/lib/stat-time.h new file mode 100644 index 0000000000..884ffd829a --- /dev/null +++ b/contrib/grep/lib/stat-time.h @@ -0,0 +1,252 @@ +/* stat-related time functions. + + Copyright (C) 2005, 2007, 2009-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Paul Eggert. */ + +#ifndef STAT_TIME_H +#define STAT_TIME_H 1 + +#include "intprops.h" + +#include +#include +#include +#include + +#ifndef _GL_INLINE_HEADER_BEGIN + #error "Please include config.h first." +#endif +_GL_INLINE_HEADER_BEGIN +#ifndef _GL_STAT_TIME_INLINE +# define _GL_STAT_TIME_INLINE _GL_INLINE +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* STAT_TIMESPEC (ST, ST_XTIM) is the ST_XTIM member for *ST of type + struct timespec, if available. If not, then STAT_TIMESPEC_NS (ST, + ST_XTIM) is the nanosecond component of the ST_XTIM member for *ST, + if available. ST_XTIM can be st_atim, st_ctim, st_mtim, or st_birthtim + for access, status change, data modification, or birth (creation) + time respectively. + + These macros are private to stat-time.h. */ +#if _GL_WINDOWS_STAT_TIMESPEC || defined HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC +# if _GL_WINDOWS_STAT_TIMESPEC || defined TYPEOF_STRUCT_STAT_ST_ATIM_IS_STRUCT_TIMESPEC +# define STAT_TIMESPEC(st, st_xtim) ((st)->st_xtim) +# else +# define STAT_TIMESPEC_NS(st, st_xtim) ((st)->st_xtim.tv_nsec) +# endif +#elif defined HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC +# define STAT_TIMESPEC(st, st_xtim) ((st)->st_xtim##espec) +#elif defined HAVE_STRUCT_STAT_ST_ATIMENSEC +# define STAT_TIMESPEC_NS(st, st_xtim) ((st)->st_xtim##ensec) +#elif defined HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC +# define STAT_TIMESPEC_NS(st, st_xtim) ((st)->st_xtim.st__tim.tv_nsec) +#endif + +/* Return the nanosecond component of *ST's access time. */ +_GL_STAT_TIME_INLINE long int _GL_ATTRIBUTE_PURE +get_stat_atime_ns (struct stat const *st) +{ +# if defined STAT_TIMESPEC + return STAT_TIMESPEC (st, st_atim).tv_nsec; +# elif defined STAT_TIMESPEC_NS + return STAT_TIMESPEC_NS (st, st_atim); +# else + return 0; +# endif +} + +/* Return the nanosecond component of *ST's status change time. */ +_GL_STAT_TIME_INLINE long int _GL_ATTRIBUTE_PURE +get_stat_ctime_ns (struct stat const *st) +{ +# if defined STAT_TIMESPEC + return STAT_TIMESPEC (st, st_ctim).tv_nsec; +# elif defined STAT_TIMESPEC_NS + return STAT_TIMESPEC_NS (st, st_ctim); +# else + return 0; +# endif +} + +/* Return the nanosecond component of *ST's data modification time. */ +_GL_STAT_TIME_INLINE long int _GL_ATTRIBUTE_PURE +get_stat_mtime_ns (struct stat const *st) +{ +# if defined STAT_TIMESPEC + return STAT_TIMESPEC (st, st_mtim).tv_nsec; +# elif defined STAT_TIMESPEC_NS + return STAT_TIMESPEC_NS (st, st_mtim); +# else + return 0; +# endif +} + +/* Return the nanosecond component of *ST's birth time. */ +_GL_STAT_TIME_INLINE long int _GL_ATTRIBUTE_PURE +get_stat_birthtime_ns (struct stat const *st _GL_UNUSED) +{ +# if defined HAVE_STRUCT_STAT_ST_BIRTHTIMESPEC_TV_NSEC + return STAT_TIMESPEC (st, st_birthtim).tv_nsec; +# elif defined HAVE_STRUCT_STAT_ST_BIRTHTIMENSEC + return STAT_TIMESPEC_NS (st, st_birthtim); +# else + return 0; +# endif +} + +/* Return *ST's access time. */ +_GL_STAT_TIME_INLINE struct timespec _GL_ATTRIBUTE_PURE +get_stat_atime (struct stat const *st) +{ +#ifdef STAT_TIMESPEC + return STAT_TIMESPEC (st, st_atim); +#else + struct timespec t; + t.tv_sec = st->st_atime; + t.tv_nsec = get_stat_atime_ns (st); + return t; +#endif +} + +/* Return *ST's status change time. */ +_GL_STAT_TIME_INLINE struct timespec _GL_ATTRIBUTE_PURE +get_stat_ctime (struct stat const *st) +{ +#ifdef STAT_TIMESPEC + return STAT_TIMESPEC (st, st_ctim); +#else + struct timespec t; + t.tv_sec = st->st_ctime; + t.tv_nsec = get_stat_ctime_ns (st); + return t; +#endif +} + +/* Return *ST's data modification time. */ +_GL_STAT_TIME_INLINE struct timespec _GL_ATTRIBUTE_PURE +get_stat_mtime (struct stat const *st) +{ +#ifdef STAT_TIMESPEC + return STAT_TIMESPEC (st, st_mtim); +#else + struct timespec t; + t.tv_sec = st->st_mtime; + t.tv_nsec = get_stat_mtime_ns (st); + return t; +#endif +} + +/* Return *ST's birth time, if available; otherwise return a value + with tv_sec and tv_nsec both equal to -1. */ +_GL_STAT_TIME_INLINE struct timespec _GL_ATTRIBUTE_PURE +get_stat_birthtime (struct stat const *st _GL_UNUSED) +{ + struct timespec t; + +#if (defined HAVE_STRUCT_STAT_ST_BIRTHTIMESPEC_TV_NSEC \ + || defined HAVE_STRUCT_STAT_ST_BIRTHTIM_TV_NSEC) + t = STAT_TIMESPEC (st, st_birthtim); +#elif defined HAVE_STRUCT_STAT_ST_BIRTHTIMENSEC + t.tv_sec = st->st_birthtime; + t.tv_nsec = st->st_birthtimensec; +#elif defined _WIN32 && ! defined __CYGWIN__ + /* Native Windows platforms (but not Cygwin) put the "file creation + time" in st_ctime (!). See + . */ +# if _GL_WINDOWS_STAT_TIMESPEC + t = st->st_ctim; +# else + t.tv_sec = st->st_ctime; + t.tv_nsec = 0; +# endif +#else + /* Birth time is not supported. */ + t.tv_sec = -1; + t.tv_nsec = -1; +#endif + +#if (defined HAVE_STRUCT_STAT_ST_BIRTHTIMESPEC_TV_NSEC \ + || defined HAVE_STRUCT_STAT_ST_BIRTHTIM_TV_NSEC \ + || defined HAVE_STRUCT_STAT_ST_BIRTHTIMENSEC) + /* FreeBSD and NetBSD sometimes signal the absence of knowledge by + using zero. Attempt to work around this problem. Alas, this can + report failure even for valid timestamps. Also, NetBSD + sometimes returns junk in the birth time fields; work around this + bug if it is detected. */ + if (! (t.tv_sec && 0 <= t.tv_nsec && t.tv_nsec < 1000000000)) + { + t.tv_sec = -1; + t.tv_nsec = -1; + } +#endif + + return t; +} + +/* If a stat-like function returned RESULT, normalize the timestamps + in *ST, in case this platform suffers from the Solaris 11 bug where + tv_nsec might be negative. Return the adjusted RESULT, setting + errno to EOVERFLOW if normalization overflowed. This function + is intended to be private to this .h file. */ +_GL_STAT_TIME_INLINE int +stat_time_normalize (int result, struct stat *st _GL_UNUSED) +{ +#if defined __sun && defined STAT_TIMESPEC + if (result == 0) + { + long int timespec_hz = 1000000000; + short int const ts_off[] = { offsetof (struct stat, st_atim), + offsetof (struct stat, st_mtim), + offsetof (struct stat, st_ctim) }; + int i; + for (i = 0; i < sizeof ts_off / sizeof *ts_off; i++) + { + struct timespec *ts = (struct timespec *) ((char *) st + ts_off[i]); + long int q = ts->tv_nsec / timespec_hz; + long int r = ts->tv_nsec % timespec_hz; + if (r < 0) + { + r += timespec_hz; + q--; + } + ts->tv_nsec = r; + /* Overflow is possible, as Solaris 11 stat can yield + tv_sec == TYPE_MINIMUM (time_t) && tv_nsec == -1000000000. + INT_ADD_WRAPV is OK, since time_t is signed on Solaris. */ + if (INT_ADD_WRAPV (q, ts->tv_sec, &ts->tv_sec)) + { + errno = EOVERFLOW; + return -1; + } + } + } +#endif + return result; +} + +#ifdef __cplusplus +} +#endif + +_GL_INLINE_HEADER_END + +#endif diff --git a/contrib/grep/lib/stat.c b/contrib/grep/lib/stat.c deleted file mode 100644 index 2209826fc3..0000000000 --- a/contrib/grep/lib/stat.c +++ /dev/null @@ -1,138 +0,0 @@ -/* Work around platform bugs in stat. - Copyright (C) 2009-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* written by Eric Blake */ - -/* If the user's config.h happens to include , let it include only - the system's here, so that orig_stat doesn't recurse to - rpl_stat. */ -#define __need_system_sys_stat_h -#include - -/* Get the original definition of stat. It might be defined as a macro. */ -#include -#include -#undef __need_system_sys_stat_h - -#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ -# if _GL_WINDOWS_64_BIT_ST_SIZE -# undef stat /* avoid warning on mingw64 with _FILE_OFFSET_BITS=64 */ -# define stat _stati64 -# define REPLACE_FUNC_STAT_DIR 1 -# undef REPLACE_FUNC_STAT_FILE -# elif REPLACE_FUNC_STAT_FILE -/* mingw64 has a broken stat() function, based on _stat(), in libmingwex.a. - Bypass it. */ -# define stat _stat -# define REPLACE_FUNC_STAT_DIR 1 -# undef REPLACE_FUNC_STAT_FILE -# endif -#endif - -static int -orig_stat (const char *filename, struct stat *buf) -{ - return stat (filename, buf); -} - -/* Specification. */ -/* Write "sys/stat.h" here, not , otherwise OSF/1 5.1 DTK cc - eliminates this include because of the preliminary #include - above. */ -#include "sys/stat.h" - -#include -#include -#include -#include -#include "dosname.h" -#include "verify.h" - -#if REPLACE_FUNC_STAT_DIR -# include "pathmax.h" - /* The only known systems where REPLACE_FUNC_STAT_DIR is needed also - have a constant PATH_MAX. */ -# ifndef PATH_MAX -# error "Please port this replacement to your platform" -# endif -#endif - -/* Store information about NAME into ST. Work around bugs with - trailing slashes. Mingw has other bugs (such as st_ino always - being 0 on success) which this wrapper does not work around. But - at least this implementation provides the ability to emulate fchdir - correctly. */ - -int -rpl_stat (char const *name, struct stat *st) -{ - int result = orig_stat (name, st); -#if REPLACE_FUNC_STAT_FILE - /* Solaris 9 mistakenly succeeds when given a non-directory with a - trailing slash. */ - if (result == 0 && !S_ISDIR (st->st_mode)) - { - size_t len = strlen (name); - if (ISSLASH (name[len - 1])) - { - errno = ENOTDIR; - return -1; - } - } -#endif /* REPLACE_FUNC_STAT_FILE */ -#if REPLACE_FUNC_STAT_DIR - - if (result == -1 && errno == ENOENT) - { - /* Due to mingw's oddities, there are some directories (like - c:\) where stat() only succeeds with a trailing slash, and - other directories (like c:\windows) where stat() only - succeeds without a trailing slash. But we want the two to be - synonymous, since chdir() manages either style. Likewise, Mingw also - reports ENOENT for names longer than PATH_MAX, when we want - ENAMETOOLONG, and for stat("file/"), when we want ENOTDIR. - Fortunately, mingw PATH_MAX is small enough for stack - allocation. */ - char fixed_name[PATH_MAX + 1] = {0}; - size_t len = strlen (name); - bool check_dir = false; - verify (PATH_MAX <= 4096); - if (PATH_MAX <= len) - errno = ENAMETOOLONG; - else if (len) - { - strcpy (fixed_name, name); - if (ISSLASH (fixed_name[len - 1])) - { - check_dir = true; - while (len && ISSLASH (fixed_name[len - 1])) - fixed_name[--len] = '\0'; - if (!len) - fixed_name[0] = '/'; - } - else - fixed_name[len++] = '/'; - result = orig_stat (fixed_name, st); - if (result == 0 && check_dir && !S_ISDIR (st->st_mode)) - { - result = -1; - errno = ENOTDIR; - } - } - } -#endif /* REPLACE_FUNC_STAT_DIR */ - return result; -} diff --git a/contrib/grep/lib/stdalign.in.h b/contrib/grep/lib/stdalign.in.h deleted file mode 100644 index b16ccc8699..0000000000 --- a/contrib/grep/lib/stdalign.in.h +++ /dev/null @@ -1,121 +0,0 @@ -/* A substitute for ISO C11 . - - Copyright 2011-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see . */ - -/* Written by Paul Eggert and Bruno Haible. */ - -#ifndef _GL_STDALIGN_H -#define _GL_STDALIGN_H - -/* ISO C11 for platforms that lack it. - - References: - ISO C11 (latest free draft - ) - sections 6.5.3.4, 6.7.5, 7.15. - C++11 (latest free draft - ) - section 18.10. */ - -/* alignof (TYPE), also known as _Alignof (TYPE), yields the alignment - requirement of a structure member (i.e., slot or field) that is of - type TYPE, as an integer constant expression. - - This differs from GCC's __alignof__ operator, which can yield a - better-performing alignment for an object of that type. For - example, on x86 with GCC, __alignof__ (double) and __alignof__ - (long long) are 8, whereas alignof (double) and alignof (long long) - are 4 unless the option '-malign-double' is used. - - The result cannot be used as a value for an 'enum' constant, if you - want to be portable to HP-UX 10.20 cc and AIX 3.2.5 xlc. - - Include for offsetof. */ -#include - -/* FreeBSD 9.1 , included by and lots of other - standard headers, defines conflicting implementations of _Alignas - and _Alignof that are no better than ours; override them. */ -#undef _Alignas -#undef _Alignof - -/* GCC releases before GCC 4.9 had a bug in _Alignof. See GCC bug 52023 - . */ -#if (!defined __STDC_VERSION__ || __STDC_VERSION__ < 201112 \ - || (defined __GNUC__ && __GNUC__ < 4 + (__GNUC_MINOR__ < 9))) -# ifdef __cplusplus -# if 201103 <= __cplusplus -# define _Alignof(type) alignof (type) -# else - template struct __alignof_helper { char __a; __t __b; }; -# define _Alignof(type) offsetof (__alignof_helper, __b) -# endif -# else -# define _Alignof(type) offsetof (struct { char __a; type __b; }, __b) -# endif -#endif -#if ! (defined __cplusplus && 201103 <= __cplusplus) -# define alignof _Alignof -#endif -#define __alignof_is_defined 1 - -/* alignas (A), also known as _Alignas (A), aligns a variable or type - to the alignment A, where A is an integer constant expression. For - example: - - int alignas (8) foo; - struct s { int a; int alignas (8) bar; }; - - aligns the address of FOO and the offset of BAR to be multiples of 8. - - A should be a power of two that is at least the type's alignment - and at most the implementation's alignment limit. This limit is - 2**28 on typical GNUish hosts, and 2**13 on MSVC. To be portable - to MSVC through at least version 10.0, A should be an integer - constant, as MSVC does not support expressions such as 1 << 3. - To be portable to Sun C 5.11, do not align auto variables to - anything stricter than their default alignment. - - The following C11 requirements are not supported here: - - - If A is zero, alignas has no effect. - - alignas can be used multiple times; the strictest one wins. - - alignas (TYPE) is equivalent to alignas (alignof (TYPE)). - - */ - -#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112 -# if defined __cplusplus && 201103 <= __cplusplus -# define _Alignas(a) alignas (a) -# elif ((defined __APPLE__ && defined __MACH__ \ - ? 4 < __GNUC__ + (1 <= __GNUC_MINOR__) \ - : __GNUC__) \ - || __HP_cc || __HP_aCC || __IBMC__ || __IBMCPP__ \ - || __ICC || 0x590 <= __SUNPRO_C) -# define _Alignas(a) __attribute__ ((__aligned__ (a))) -# elif 1300 <= _MSC_VER -# define _Alignas(a) __declspec (align (a)) -# endif -#endif -#if ((defined _Alignas && ! (defined __cplusplus && 201103 <= __cplusplus)) \ - || (defined __STDC_VERSION && 201112 <= __STDC_VERSION__)) -# define alignas _Alignas -#endif -#if defined alignas || (defined __cplusplus && 201103 <= __cplusplus) -# define __alignas_is_defined 1 -#endif - -#endif /* _GL_STDALIGN_H */ diff --git a/contrib/grep/lib/stpcpy.c b/contrib/grep/lib/stpcpy.c deleted file mode 100644 index 559d2f231d..0000000000 --- a/contrib/grep/lib/stpcpy.c +++ /dev/null @@ -1,49 +0,0 @@ -/* stpcpy.c -- copy a string and return pointer to end of new string - Copyright (C) 1992, 1995, 1997-1998, 2006, 2009-2015 Free Software - Foundation, Inc. - - NOTE: The canonical source of this file is maintained with the GNU C Library. - Bugs can be reported to bug-glibc@prep.ai.mit.edu. - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3 of the License, or any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -#include - -#undef __stpcpy -#ifdef _LIBC -# undef stpcpy -#endif - -#ifndef weak_alias -# define __stpcpy stpcpy -#endif - -/* Copy SRC to DEST, returning the address of the terminating '\0' in DEST. */ -char * -__stpcpy (char *dest, const char *src) -{ - register char *d = dest; - register const char *s = src; - - do - *d++ = *s; - while (*s++ != '\0'); - - return d - 1; -} -#ifdef weak_alias -weak_alias (__stpcpy, stpcpy) -#endif diff --git a/contrib/grep/lib/str-kmp.h b/contrib/grep/lib/str-kmp.h index 66fcead46c..d798513d61 100644 --- a/contrib/grep/lib/str-kmp.h +++ b/contrib/grep/lib/str-kmp.h @@ -1,6 +1,6 @@ /* Substring search in a NUL terminated string of UNIT elements, using the Knuth-Morris-Pratt algorithm. - Copyright (C) 2005-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2005. This program is free software; you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ /* Before including this file, you need to define: UNIT The element type of the needle and haystack. @@ -24,7 +24,7 @@ of type UNIT as well. */ /* Knuth-Morris-Pratt algorithm. - See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm + See https://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm HAYSTACK is the NUL terminated string in which to search for. NEEDLE is the string to search for in HAYSTACK, consisting of NEEDLE_LEN units. diff --git a/contrib/grep/lib/str-two-way.h b/contrib/grep/lib/str-two-way.h index 6e10c8e398..6ad0130fad 100644 --- a/contrib/grep/lib/str-two-way.h +++ b/contrib/grep/lib/str-two-way.h @@ -1,5 +1,5 @@ /* Byte-wise substring search, using the Two-Way algorithm. - Copyright (C) 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2008-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. Written by Eric Blake , 2008. @@ -14,11 +14,11 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along - with this program; if not, see . */ + with this program; if not, see . */ /* Before including this file, you need to include and , and define: - RESULT_TYPE A macro that expands to the return type. + RETURN_TYPE A macro that expands to the return type. AVAILABLE(h, h_l, j, n_l) A macro that returns nonzero if there are at least N_L bytes left starting at H[J]. @@ -49,9 +49,9 @@ character shift table similar to the Boyer-Moore algorithm to achieve improved (potentially sub-linear) performance. - See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260, - http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm, - http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.34.6641&rep=rep1&type=pdf + See https://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260, + https://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm, + https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.34.6641&rep=rep1&type=pdf */ /* Point at which computing a bad-byte shift table is likely to be diff --git a/contrib/grep/lib/strdup.c b/contrib/grep/lib/strdup.c deleted file mode 100644 index 4fdd0bd918..0000000000 --- a/contrib/grep/lib/strdup.c +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright (C) 1991, 1996-1998, 2002-2004, 2006-2007, 2009-2015 Free Software - Foundation, Inc. - - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ - -#ifndef _LIBC -# include -#endif - -/* Get specification. */ -#include - -#include - -#undef __strdup -#ifdef _LIBC -# undef strdup -#endif - -#ifndef weak_alias -# define __strdup strdup -#endif - -/* Duplicate S, returning an identical malloc'd string. */ -char * -__strdup (const char *s) -{ - size_t len = strlen (s) + 1; - void *new = malloc (len); - - if (new == NULL) - return NULL; - - return (char *) memcpy (new, s, len); -} -#ifdef libc_hidden_def -libc_hidden_def (__strdup) -#endif -#ifdef weak_alias -weak_alias (__strdup, strdup) -#endif diff --git a/contrib/grep/lib/streq.h b/contrib/grep/lib/streq.h index 6107fc6bd2..d06c27200c 100644 --- a/contrib/grep/lib/streq.h +++ b/contrib/grep/lib/streq.h @@ -1,5 +1,5 @@ /* Optimized string comparison. - Copyright (C) 2001-2002, 2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -12,7 +12,7 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Bruno Haible . */ diff --git a/contrib/grep/lib/strerror-override.c b/contrib/grep/lib/strerror-override.c index 2bb4d7457d..61b7689fd8 100644 --- a/contrib/grep/lib/strerror-override.c +++ b/contrib/grep/lib/strerror-override.c @@ -1,6 +1,6 @@ /* strerror-override.c --- POSIX compatible system error routine - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Bruno Haible , 2010. */ diff --git a/contrib/grep/lib/strerror-override.h b/contrib/grep/lib/strerror-override.h index 7af36dfd45..062f51cf6b 100644 --- a/contrib/grep/lib/strerror-override.h +++ b/contrib/grep/lib/strerror-override.h @@ -1,6 +1,6 @@ /* strerror-override.h --- POSIX compatible system error routine - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef _GL_STRERROR_OVERRIDE_H # define _GL_STRERROR_OVERRIDE_H diff --git a/contrib/grep/lib/strerror.c b/contrib/grep/lib/strerror.c index adba738b7b..1a53a8b927 100644 --- a/contrib/grep/lib/strerror.c +++ b/contrib/grep/lib/strerror.c @@ -1,6 +1,6 @@ /* strerror.c --- POSIX compatible system error routine - Copyright (C) 2007-2015 Free Software Foundation, Inc. + Copyright (C) 2007-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include @@ -66,5 +66,6 @@ strerror (int n) if (sizeof buf <= len) abort (); - return memcpy (buf, msg, len + 1); + memcpy (buf, msg, len + 1); + return buf; } diff --git a/contrib/grep/lib/striconv.c b/contrib/grep/lib/striconv.c index 6f7dad8e77..d6a1159e6f 100644 --- a/contrib/grep/lib/striconv.c +++ b/contrib/grep/lib/striconv.c @@ -1,5 +1,5 @@ /* Charset conversion. - Copyright (C) 2001-2007, 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2007, 2010-2020 Free Software Foundation, Inc. Written by Bruno Haible and Simon Josefsson. This program is free software; you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ #include diff --git a/contrib/grep/lib/striconv.h b/contrib/grep/lib/striconv.h index 33cc212c00..8beef35c60 100644 --- a/contrib/grep/lib/striconv.h +++ b/contrib/grep/lib/striconv.h @@ -1,5 +1,5 @@ /* Charset conversion. - Copyright (C) 2001-2004, 2006-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2004, 2006-2007, 2009-2020 Free Software Foundation, Inc. Written by Bruno Haible and Simon Josefsson. This program is free software; you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, see . */ + along with this program; if not, see . */ #ifndef _STRICONV_H #define _STRICONV_H diff --git a/contrib/grep/lib/stripslash.c b/contrib/grep/lib/stripslash.c index 12359cf7bf..6818b30614 100644 --- a/contrib/grep/lib/stripslash.c +++ b/contrib/grep/lib/stripslash.c @@ -1,6 +1,6 @@ /* stripslash.c -- remove redundant trailing slashes from a file name - Copyright (C) 1990, 2001, 2003-2006, 2009-2015 Free Software Foundation, + Copyright (C) 1990, 2001, 2003-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/strnlen.c b/contrib/grep/lib/strnlen.c deleted file mode 100644 index 79e250c4d7..0000000000 --- a/contrib/grep/lib/strnlen.c +++ /dev/null @@ -1,30 +0,0 @@ -/* Find the length of STRING, but scan at most MAXLEN characters. - Copyright (C) 2005-2007, 2009-2015 Free Software Foundation, Inc. - Written by Simon Josefsson. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see . */ - -#include - -#include - -/* Find the length of STRING, but scan at most MAXLEN characters. - If no '\0' terminator is found in that many characters, return MAXLEN. */ - -size_t -strnlen (const char *string, size_t maxlen) -{ - const char *end = memchr (string, '\0', maxlen); - return end ? (size_t) (end - string) : maxlen; -} diff --git a/contrib/grep/lib/strnlen1.c b/contrib/grep/lib/strnlen1.c index 48272a8883..88edc182f8 100644 --- a/contrib/grep/lib/strnlen1.c +++ b/contrib/grep/lib/strnlen1.c @@ -1,5 +1,5 @@ /* Find the length of STRING + 1, but scan at most MAXLEN bytes. - Copyright (C) 2005-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/strnlen1.h b/contrib/grep/lib/strnlen1.h index 794e436ec5..2e21fea1f4 100644 --- a/contrib/grep/lib/strnlen1.h +++ b/contrib/grep/lib/strnlen1.h @@ -1,5 +1,5 @@ /* Find the length of STRING + 1, but scan at most MAXLEN bytes. - Copyright (C) 2005, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2005, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef _STRNLEN1_H #define _STRNLEN1_H diff --git a/contrib/grep/lib/strstr.c b/contrib/grep/lib/strstr.c index fbc11a2d71..2f8ae46f70 100644 --- a/contrib/grep/lib/strstr.c +++ b/contrib/grep/lib/strstr.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991-1994, 1996-1998, 2000, 2004, 2007-2015 Free Software +/* Copyright (C) 1991-1994, 1996-1998, 2000, 2004, 2007-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along - with this program; if not, see . */ + with this program; if not, see . */ /* This particular implementation was written by Eric Blake, 2008. */ @@ -26,10 +26,6 @@ #include -#ifndef _LIBC -# define __builtin_expect(expr, val) (expr) -#endif - #define RETURN_TYPE char * #define AVAILABLE(h, h_l, j, n_l) \ (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ diff --git a/contrib/grep/lib/strtoimax.c b/contrib/grep/lib/strtoimax.c deleted file mode 100644 index 8ff65cee4b..0000000000 --- a/contrib/grep/lib/strtoimax.c +++ /dev/null @@ -1,82 +0,0 @@ -/* Convert string representation of a number into an intmax_t value. - - Copyright (C) 1999, 2001-2004, 2006, 2009-2015 Free Software Foundation, - Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* Written by Paul Eggert. */ - -#include - -/* Verify interface. */ -#include - -#include - -#include "verify.h" - -#ifdef UNSIGNED -# if HAVE_UNSIGNED_LONG_LONG_INT -# ifndef HAVE_DECL_STRTOULL -"this configure-time declaration test was not run" -# endif -# if !HAVE_DECL_STRTOULL -unsigned long long int strtoull (char const *, char **, int); -# endif -# endif - -#else - -# if HAVE_LONG_LONG_INT -# ifndef HAVE_DECL_STRTOLL -"this configure-time declaration test was not run" -# endif -# if !HAVE_DECL_STRTOLL -long long int strtoll (char const *, char **, int); -# endif -# endif -#endif - -#ifdef UNSIGNED -# define Have_long_long HAVE_UNSIGNED_LONG_LONG_INT -# define Int uintmax_t -# define Strtoimax strtoumax -# define Strtol strtoul -# define Strtoll strtoull -# define Unsigned unsigned -#else -# define Have_long_long HAVE_LONG_LONG_INT -# define Int intmax_t -# define Strtoimax strtoimax -# define Strtol strtol -# define Strtoll strtoll -# define Unsigned -#endif - -Int -Strtoimax (char const *ptr, char **endptr, int base) -{ -#if Have_long_long - verify (sizeof (Int) == sizeof (Unsigned long int) - || sizeof (Int) == sizeof (Unsigned long long int)); - - if (sizeof (Int) != sizeof (Unsigned long int)) - return Strtoll (ptr, endptr, base); -#else - verify (sizeof (Int) == sizeof (Unsigned long int)); -#endif - - return Strtol (ptr, endptr, base); -} diff --git a/contrib/grep/lib/strtol.c b/contrib/grep/lib/strtol.c deleted file mode 100644 index 1bc143985d..0000000000 --- a/contrib/grep/lib/strtol.c +++ /dev/null @@ -1,433 +0,0 @@ -/* Convert string representation of a number into an integer value. - - Copyright (C) 1991-1992, 1994-1999, 2003, 2005-2007, 2009-2015 Free Software - Foundation, Inc. - - NOTE: The canonical source of this file is maintained with the GNU C - Library. Bugs can be reported to bug-glibc@gnu.org. - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3 of the License, or any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#ifdef _LIBC -# define USE_NUMBER_GROUPING -#else -# include -#endif - -#include -#include -#ifndef __set_errno -# define __set_errno(Val) errno = (Val) -#endif - -#include -#include -#include -#include - -#ifdef USE_NUMBER_GROUPING -# include "../locale/localeinfo.h" -#endif - -/* Nonzero if we are defining 'strtoul' or 'strtoull', operating on - unsigned integers. */ -#ifndef UNSIGNED -# define UNSIGNED 0 -# define INT LONG int -#else -# define INT unsigned LONG int -#endif - -/* Determine the name. */ -#ifdef USE_IN_EXTENDED_LOCALE_MODEL -# if UNSIGNED -# ifdef USE_WIDE_CHAR -# ifdef QUAD -# define strtol __wcstoull_l -# else -# define strtol __wcstoul_l -# endif -# else -# ifdef QUAD -# define strtol __strtoull_l -# else -# define strtol __strtoul_l -# endif -# endif -# else -# ifdef USE_WIDE_CHAR -# ifdef QUAD -# define strtol __wcstoll_l -# else -# define strtol __wcstol_l -# endif -# else -# ifdef QUAD -# define strtol __strtoll_l -# else -# define strtol __strtol_l -# endif -# endif -# endif -#else -# if UNSIGNED -# ifdef USE_WIDE_CHAR -# ifdef QUAD -# define strtol wcstoull -# else -# define strtol wcstoul -# endif -# else -# ifdef QUAD -# define strtol strtoull -# else -# define strtol strtoul -# endif -# endif -# else -# ifdef USE_WIDE_CHAR -# ifdef QUAD -# define strtol wcstoll -# else -# define strtol wcstol -# endif -# else -# ifdef QUAD -# define strtol strtoll -# endif -# endif -# endif -#endif - -/* If QUAD is defined, we are defining 'strtoll' or 'strtoull', - operating on 'long long int's. */ -#ifdef QUAD -# define LONG long long -# define STRTOL_LONG_MIN LLONG_MIN -# define STRTOL_LONG_MAX LLONG_MAX -# define STRTOL_ULONG_MAX ULLONG_MAX - -/* The extra casts in the following macros work around compiler bugs, - e.g., in Cray C 5.0.3.0. */ - -/* True if negative values of the signed integer type T use two's - complement, ones' complement, or signed magnitude representation, - respectively. Much GNU code assumes two's complement, but some - people like to be portable to all possible C hosts. */ -# define TYPE_TWOS_COMPLEMENT(t) ((t) ~ (t) 0 == (t) -1) -# define TYPE_ONES_COMPLEMENT(t) ((t) ~ (t) 0 == 0) -# define TYPE_SIGNED_MAGNITUDE(t) ((t) ~ (t) 0 < (t) -1) - -/* True if the arithmetic type T is signed. */ -# define TYPE_SIGNED(t) (! ((t) 0 < (t) -1)) - -/* The maximum and minimum values for the integer type T. These - macros have undefined behavior if T is signed and has padding bits. - If this is a problem for you, please let us know how to fix it for - your host. */ -# define TYPE_MINIMUM(t) \ - ((t) (! TYPE_SIGNED (t) \ - ? (t) 0 \ - : TYPE_SIGNED_MAGNITUDE (t) \ - ? ~ (t) 0 \ - : ~ TYPE_MAXIMUM (t))) -# define TYPE_MAXIMUM(t) \ - ((t) (! TYPE_SIGNED (t) \ - ? (t) -1 \ - : ((((t) 1 << (sizeof (t) * CHAR_BIT - 2)) - 1) * 2 + 1))) - -# ifndef ULLONG_MAX -# define ULLONG_MAX TYPE_MAXIMUM (unsigned long long) -# endif -# ifndef LLONG_MAX -# define LLONG_MAX TYPE_MAXIMUM (long long int) -# endif -# ifndef LLONG_MIN -# define LLONG_MIN TYPE_MINIMUM (long long int) -# endif - -# if __GNUC__ == 2 && __GNUC_MINOR__ < 7 - /* Work around gcc bug with using this constant. */ - static const unsigned long long int maxquad = ULLONG_MAX; -# undef STRTOL_ULONG_MAX -# define STRTOL_ULONG_MAX maxquad -# endif -#else -# define LONG long -# define STRTOL_LONG_MIN LONG_MIN -# define STRTOL_LONG_MAX LONG_MAX -# define STRTOL_ULONG_MAX ULONG_MAX -#endif - - -/* We use this code also for the extended locale handling where the - function gets as an additional argument the locale which has to be - used. To access the values we have to redefine the _NL_CURRENT - macro. */ -#ifdef USE_IN_EXTENDED_LOCALE_MODEL -# undef _NL_CURRENT -# define _NL_CURRENT(category, item) \ - (current->values[_NL_ITEM_INDEX (item)].string) -# define LOCALE_PARAM , loc -# define LOCALE_PARAM_PROTO , __locale_t loc -#else -# define LOCALE_PARAM -# define LOCALE_PARAM_PROTO -#endif - -#ifdef USE_WIDE_CHAR -# include -# include -# define L_(Ch) L##Ch -# define UCHAR_TYPE wint_t -# define STRING_TYPE wchar_t -# ifdef USE_IN_EXTENDED_LOCALE_MODEL -# define ISSPACE(Ch) __iswspace_l ((Ch), loc) -# define ISALPHA(Ch) __iswalpha_l ((Ch), loc) -# define TOUPPER(Ch) __towupper_l ((Ch), loc) -# else -# define ISSPACE(Ch) iswspace (Ch) -# define ISALPHA(Ch) iswalpha (Ch) -# define TOUPPER(Ch) towupper (Ch) -# endif -#else -# define L_(Ch) Ch -# define UCHAR_TYPE unsigned char -# define STRING_TYPE char -# ifdef USE_IN_EXTENDED_LOCALE_MODEL -# define ISSPACE(Ch) __isspace_l ((Ch), loc) -# define ISALPHA(Ch) __isalpha_l ((Ch), loc) -# define TOUPPER(Ch) __toupper_l ((Ch), loc) -# else -# define ISSPACE(Ch) isspace (Ch) -# define ISALPHA(Ch) isalpha (Ch) -# define TOUPPER(Ch) toupper (Ch) -# endif -#endif - -#define INTERNAL(X) INTERNAL1(X) -#define INTERNAL1(X) __##X##_internal -#define WEAKNAME(X) WEAKNAME1(X) - -#ifdef USE_NUMBER_GROUPING -/* This file defines a function to check for correct grouping. */ -# include "grouping.h" -#endif - - - -/* Convert NPTR to an 'unsigned long int' or 'long int' in base BASE. - If BASE is 0 the base is determined by the presence of a leading - zero, indicating octal or a leading "0x" or "0X", indicating hexadecimal. - If BASE is < 2 or > 36, it is reset to 10. - If ENDPTR is not NULL, a pointer to the character after the last - one converted is stored in *ENDPTR. */ - -INT -INTERNAL (strtol) (const STRING_TYPE *nptr, STRING_TYPE **endptr, - int base, int group LOCALE_PARAM_PROTO) -{ - int negative; - register unsigned LONG int cutoff; - register unsigned int cutlim; - register unsigned LONG int i; - register const STRING_TYPE *s; - register UCHAR_TYPE c; - const STRING_TYPE *save, *end; - int overflow; - -#ifdef USE_NUMBER_GROUPING -# ifdef USE_IN_EXTENDED_LOCALE_MODEL - struct locale_data *current = loc->__locales[LC_NUMERIC]; -# endif - /* The thousands character of the current locale. */ - wchar_t thousands = L'\0'; - /* The numeric grouping specification of the current locale, - in the format described in . */ - const char *grouping; - - if (group) - { - grouping = _NL_CURRENT (LC_NUMERIC, GROUPING); - if (*grouping <= 0 || *grouping == CHAR_MAX) - grouping = NULL; - else - { - /* Figure out the thousands separator character. */ -# if defined _LIBC || defined _HAVE_BTOWC - thousands = __btowc (*_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP)); - if (thousands == WEOF) - thousands = L'\0'; -# endif - if (thousands == L'\0') - grouping = NULL; - } - } - else - grouping = NULL; -#endif - - if (base < 0 || base == 1 || base > 36) - { - __set_errno (EINVAL); - return 0; - } - - save = s = nptr; - - /* Skip white space. */ - while (ISSPACE (*s)) - ++s; - if (*s == L_('\0')) - goto noconv; - - /* Check for a sign. */ - if (*s == L_('-')) - { - negative = 1; - ++s; - } - else if (*s == L_('+')) - { - negative = 0; - ++s; - } - else - negative = 0; - - /* Recognize number prefix and if BASE is zero, figure it out ourselves. */ - if (*s == L_('0')) - { - if ((base == 0 || base == 16) && TOUPPER (s[1]) == L_('X')) - { - s += 2; - base = 16; - } - else if (base == 0) - base = 8; - } - else if (base == 0) - base = 10; - - /* Save the pointer so we can check later if anything happened. */ - save = s; - -#ifdef USE_NUMBER_GROUPING - if (group) - { - /* Find the end of the digit string and check its grouping. */ - end = s; - for (c = *end; c != L_('\0'); c = *++end) - if ((wchar_t) c != thousands - && ((wchar_t) c < L_('0') || (wchar_t) c > L_('9')) - && (!ISALPHA (c) || (int) (TOUPPER (c) - L_('A') + 10) >= base)) - break; - if (*s == thousands) - end = s; - else - end = correctly_grouped_prefix (s, end, thousands, grouping); - } - else -#endif - end = NULL; - - cutoff = STRTOL_ULONG_MAX / (unsigned LONG int) base; - cutlim = STRTOL_ULONG_MAX % (unsigned LONG int) base; - - overflow = 0; - i = 0; - for (c = *s; c != L_('\0'); c = *++s) - { - if (s == end) - break; - if (c >= L_('0') && c <= L_('9')) - c -= L_('0'); - else if (ISALPHA (c)) - c = TOUPPER (c) - L_('A') + 10; - else - break; - if ((int) c >= base) - break; - /* Check for overflow. */ - if (i > cutoff || (i == cutoff && c > cutlim)) - overflow = 1; - else - { - i *= (unsigned LONG int) base; - i += c; - } - } - - /* Check if anything actually happened. */ - if (s == save) - goto noconv; - - /* Store in ENDPTR the address of one character - past the last character we converted. */ - if (endptr != NULL) - *endptr = (STRING_TYPE *) s; - -#if !UNSIGNED - /* Check for a value that is within the range of - 'unsigned LONG int', but outside the range of 'LONG int'. */ - if (overflow == 0 - && i > (negative - ? -((unsigned LONG int) (STRTOL_LONG_MIN + 1)) + 1 - : (unsigned LONG int) STRTOL_LONG_MAX)) - overflow = 1; -#endif - - if (overflow) - { - __set_errno (ERANGE); -#if UNSIGNED - return STRTOL_ULONG_MAX; -#else - return negative ? STRTOL_LONG_MIN : STRTOL_LONG_MAX; -#endif - } - - /* Return the result of the appropriate sign. */ - return negative ? -i : i; - -noconv: - /* We must handle a special case here: the base is 0 or 16 and the - first two characters are '0' and 'x', but the rest are no - hexadecimal digits. This is no error case. We return 0 and - ENDPTR points to the 'x'. */ - if (endptr != NULL) - { - if (save - nptr >= 2 && TOUPPER (save[-1]) == L_('X') - && save[-2] == L_('0')) - *endptr = (STRING_TYPE *) &save[-1]; - else - /* There was no number to convert. */ - *endptr = (STRING_TYPE *) nptr; - } - - return 0L; -} - -/* External user entry point. */ - - -INT -#ifdef weak_function -weak_function -#endif -strtol (const STRING_TYPE *nptr, STRING_TYPE **endptr, - int base LOCALE_PARAM_PROTO) -{ - return INTERNAL (strtol) (nptr, endptr, base, 0 LOCALE_PARAM); -} diff --git a/contrib/grep/lib/strtoll.c b/contrib/grep/lib/strtoll.c deleted file mode 100644 index d7123491f3..0000000000 --- a/contrib/grep/lib/strtoll.c +++ /dev/null @@ -1,33 +0,0 @@ -/* Function to parse a 'long long int' from text. - Copyright (C) 1995-1997, 1999, 2001, 2009-2015 Free Software Foundation, - Inc. - This file is part of the GNU C Library. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#define QUAD 1 - -#include - -#ifdef _LIBC -# ifdef SHARED -# include - -# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_2) -compat_symbol (libc, __strtoll_internal, __strtoq_internal, GLIBC_2_0); -# endif - -# endif -weak_alias (strtoll, strtoq) -#endif diff --git a/contrib/grep/lib/strtoul.c b/contrib/grep/lib/strtoul.c deleted file mode 100644 index 2051777700..0000000000 --- a/contrib/grep/lib/strtoul.c +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (C) 1991, 1997, 2009-2015 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#define UNSIGNED 1 - -#include "strtol.c" diff --git a/contrib/grep/lib/strtoull.c b/contrib/grep/lib/strtoull.c deleted file mode 100644 index 5cd2554c71..0000000000 --- a/contrib/grep/lib/strtoull.c +++ /dev/null @@ -1,26 +0,0 @@ -/* Function to parse an 'unsigned long long int' from text. - Copyright (C) 1995-1997, 1999, 2009-2015 Free Software Foundation, Inc. - NOTE: The canonical source of this file is maintained with the GNU C - Library. Bugs can be reported to bug-glibc@gnu.org. - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3 of the License, or any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#define QUAD 1 - -#include "strtoul.c" - -#ifdef _LIBC -strong_alias (__strtoull_internal, __strtouq_internal) -weak_alias (strtoull, strtouq) -#endif diff --git a/contrib/grep/lib/strtoumax.c b/contrib/grep/lib/strtoumax.c deleted file mode 100644 index dc395d626a..0000000000 --- a/contrib/grep/lib/strtoumax.c +++ /dev/null @@ -1,2 +0,0 @@ -#define UNSIGNED 1 -#include "strtoimax.c" diff --git a/contrib/grep/lib/sys-limits.h b/contrib/grep/lib/sys-limits.h new file mode 100644 index 0000000000..9b6b4903b7 --- /dev/null +++ b/contrib/grep/lib/sys-limits.h @@ -0,0 +1,42 @@ +/* System call limits + + Copyright 2018-2020 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +#ifndef _GL_SYS_LIMITS_H +#define _GL_SYS_LIMITS_H + +#include + +/* Maximum number of bytes to read or write in a single system call. + This can be useful for system calls like sendfile on GNU/Linux, + which do not handle more than MAX_RW_COUNT bytes correctly. + The Linux kernel MAX_RW_COUNT is at least INT_MAX >> 20 << 20, + where the 20 comes from the Hexagon port with 1 MiB pages; use that + as an approximation, as the exact value may not be available to us. + + Using this also works around a serious Linux bug before 2.6.16; see + . + + Using this also works around a Tru64 5.1 bug, where attempting + to read INT_MAX bytes fails with errno == EINVAL. See + . + + Using this is likely to work around similar bugs in other operating + systems. */ + +enum { SYS_BUFSIZE_MAX = INT_MAX >> 20 << 20 }; + +#endif diff --git a/contrib/grep/lib/trim.c b/contrib/grep/lib/trim.c index 369b20fe71..51cc0d2d39 100644 --- a/contrib/grep/lib/trim.c +++ b/contrib/grep/lib/trim.c @@ -1,5 +1,5 @@ /* Removes leading and/or trailing whitespaces - Copyright (C) 2006-2015 Free Software Foundation, Inc. + Copyright (C) 2006-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Davide Angelocola */ @@ -31,7 +31,7 @@ #include "xalloc.h" /* Use this to suppress gcc's "...may be used before initialized" warnings. */ -#ifdef lint +#if defined GCC_LINT || defined lint # define IF_LINT(Code) Code #else # define IF_LINT(Code) /* empty */ diff --git a/contrib/grep/lib/trim.h b/contrib/grep/lib/trim.h index 8a38a9fbbe..8329602117 100644 --- a/contrib/grep/lib/trim.h +++ b/contrib/grep/lib/trim.h @@ -1,5 +1,5 @@ /* Removes leading and/or trailing whitespaces - Copyright (C) 2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Davide Angelocola */ diff --git a/contrib/grep/lib/unistd--.h b/contrib/grep/lib/unistd--.h index ada1f6b7bf..05f3740647 100644 --- a/contrib/grep/lib/unistd--.h +++ b/contrib/grep/lib/unistd--.h @@ -1,6 +1,6 @@ /* Like unistd.h, but redefine some names to avoid glitches. - Copyright (C) 2005, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2005, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert. */ diff --git a/contrib/grep/lib/unistd-safer.h b/contrib/grep/lib/unistd-safer.h index d4251b3ee1..3e83509678 100644 --- a/contrib/grep/lib/unistd-safer.h +++ b/contrib/grep/lib/unistd-safer.h @@ -1,6 +1,6 @@ /* Invoke unistd-like functions, but avoid some glitches. - Copyright (C) 2001, 2003, 2005, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001, 2003, 2005, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert and Eric Blake. */ diff --git a/contrib/grep/lib/unistr/u8-mbtoucr.c b/contrib/grep/lib/unistr/u8-mbtoucr.c index 9cc150021b..afc43b6a49 100644 --- a/contrib/grep/lib/unistr/u8-mbtoucr.c +++ b/contrib/grep/lib/unistr/u8-mbtoucr.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-8 string, returning an error code. - Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1999-2002, 2006-2007, 2009-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software: you can redistribute it and/or modify it @@ -13,7 +13,7 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include @@ -92,10 +92,7 @@ u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n) { if ((s[1] ^ 0x80) < 0x40 && (c >= 0xf1 || s[1] >= 0x90) -#if 1 - && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) -#endif - ) + && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))) { if (n >= 3) { @@ -138,146 +135,6 @@ u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n) return -2; } } -#if 0 - else if (c < 0xfc) - { - if (n >= 2) - { - if ((s[1] ^ 0x80) < 0x40 - && (c >= 0xf9 || s[1] >= 0x88)) - { - if (n >= 3) - { - if ((s[2] ^ 0x80) < 0x40) - { - if (n >= 4) - { - if ((s[3] ^ 0x80) < 0x40) - { - if (n >= 5) - { - if ((s[4] ^ 0x80) < 0x40) - { - *puc = ((unsigned int) (c & 0x03) << 24) - | ((unsigned int) (s[1] ^ 0x80) << 18) - | ((unsigned int) (s[2] ^ 0x80) << 12) - | ((unsigned int) (s[3] ^ 0x80) << 6) - | (unsigned int) (s[4] ^ 0x80); - return 5; - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return -2; - } - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return -2; - } - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return -2; - } - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return -2; - } - } - else if (c < 0xfe) - { - if (n >= 2) - { - if ((s[1] ^ 0x80) < 0x40 - && (c >= 0xfd || s[1] >= 0x84)) - { - if (n >= 3) - { - if ((s[2] ^ 0x80) < 0x40) - { - if (n >= 4) - { - if ((s[3] ^ 0x80) < 0x40) - { - if (n >= 5) - { - if ((s[4] ^ 0x80) < 0x40) - { - if (n >= 6) - { - if ((s[5] ^ 0x80) < 0x40) - { - *puc = ((unsigned int) (c & 0x01) << 30) - | ((unsigned int) (s[1] ^ 0x80) << 24) - | ((unsigned int) (s[2] ^ 0x80) << 18) - | ((unsigned int) (s[3] ^ 0x80) << 12) - | ((unsigned int) (s[4] ^ 0x80) << 6) - | (unsigned int) (s[5] ^ 0x80); - return 6; - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return -2; - } - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return -2; - } - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return -2; - } - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return -2; - } - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return -2; - } - } -#endif } /* invalid multibyte character */ *puc = 0xfffd; diff --git a/contrib/grep/lib/unistr/u8-uctomb-aux.c b/contrib/grep/lib/unistr/u8-uctomb-aux.c index 65445be16d..19b857bf02 100644 --- a/contrib/grep/lib/unistr/u8-uctomb-aux.c +++ b/contrib/grep/lib/unistr/u8-uctomb-aux.c @@ -1,5 +1,5 @@ /* Conversion UCS-4 to UTF-8. - Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2002. This program is free software: you can redistribute it and/or modify it @@ -13,13 +13,21 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include /* Specification. */ #include "unistr.h" +#ifndef FALLTHROUGH +# if __GNUC__ < 7 +# define FALLTHROUGH ((void) 0) +# else +# define FALLTHROUGH __attribute__ ((__fallthrough__)) +# endif +#endif + int u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n) { @@ -37,17 +45,8 @@ u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n) else return -1; } -#if 0 - else if (uc < 0x200000) - count = 4; - else if (uc < 0x4000000) - count = 5; - else if (uc <= 0x7fffffff) - count = 6; -#else else if (uc < 0x110000) count = 4; -#endif else return -1; @@ -56,12 +55,10 @@ u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n) switch (count) /* note: code falls through cases! */ { -#if 0 - case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000; - case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000; -#endif case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000; + FALLTHROUGH; case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800; + FALLTHROUGH; case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0; /*case 1:*/ s[0] = uc; } diff --git a/contrib/grep/lib/unistr/u8-uctomb.c b/contrib/grep/lib/unistr/u8-uctomb.c index a8250f420a..bb646a2f21 100644 --- a/contrib/grep/lib/unistr/u8-uctomb.c +++ b/contrib/grep/lib/unistr/u8-uctomb.c @@ -1,5 +1,5 @@ /* Store a character in UTF-8 string. - Copyright (C) 2002, 2005-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2002, 2005-2006, 2009-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2002. This program is free software: you can redistribute it and/or modify it @@ -13,7 +13,7 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include @@ -52,17 +52,8 @@ u8_uctomb (uint8_t *s, ucs4_t uc, int n) else return -1; } -#if 0 - else if (uc < 0x200000) - count = 4; - else if (uc < 0x4000000) - count = 5; - else if (uc <= 0x7fffffff) - count = 6; -#else else if (uc < 0x110000) count = 4; -#endif else return -1; @@ -70,10 +61,6 @@ u8_uctomb (uint8_t *s, ucs4_t uc, int n) { switch (count) /* note: code falls through cases! */ { -#if 0 - case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000; - case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000; -#endif case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000; case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800; case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0; diff --git a/contrib/grep/lib/uniwidth/cjk.h b/contrib/grep/lib/uniwidth/cjk.h index cdd9a5550b..3dcc7cb856 100644 --- a/contrib/grep/lib/uniwidth/cjk.h +++ b/contrib/grep/lib/uniwidth/cjk.h @@ -1,5 +1,5 @@ /* Test for CJK encoding. - Copyright (C) 2001-2002, 2005-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2005-2007, 2009-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2002. This program is free software: you can redistribute it and/or modify it @@ -13,7 +13,7 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include "streq.h" diff --git a/contrib/grep/lib/uniwidth/width.c b/contrib/grep/lib/uniwidth/width.c index c7e03b2a5f..ad92323cce 100644 --- a/contrib/grep/lib/uniwidth/width.c +++ b/contrib/grep/lib/uniwidth/width.c @@ -1,5 +1,5 @@ /* Determine display width of Unicode character. - Copyright (C) 2001-2002, 2006-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2006-2020 Free Software Foundation, Inc. Written by Bruno Haible , 2002. This program is free software: you can redistribute it and/or modify it @@ -13,7 +13,7 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include @@ -32,7 +32,7 @@ * - Zero width characters; generated from * "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt" */ -static const unsigned char nonspacing_table_data[36*64] = { +static const unsigned char nonspacing_table_data[38*64] = { /* 0x0000-0x01ff */ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0000-0x003f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0x0040-0x007f */ @@ -73,7 +73,7 @@ static const unsigned char nonspacing_table_data[36*64] = { 0x00, 0x00, 0xc0, 0xfb, 0xef, 0x3e, 0x00, 0x00, /* 0x0800-0x083f */ 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, /* 0x0840-0x087f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0880-0x08bf */ - 0x00, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */ + 0x00, 0x00, 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */ 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, /* 0x0900-0x093f */ 0xfe, 0x21, 0xfe, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0940-0x097f */ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0980-0x09bf */ @@ -135,7 +135,7 @@ static const unsigned char nonspacing_table_data[36*64] = { /* 0x1800-0x19ff */ 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1800-0x183f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1840-0x187f */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* 0x1880-0x18bf */ + 0x60, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* 0x1880-0x18bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18c0-0x18ff */ 0x00, 0x00, 0x00, 0x00, 0x87, 0x01, 0x04, 0x0e, /* 0x1900-0x193f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1940-0x197f */ @@ -158,7 +158,7 @@ static const unsigned char nonspacing_table_data[36*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d00-0x1d3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d40-0x1d7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d80-0x1dbf */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xf0, /* 0x1dc0-0x1dff */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xf8, /* 0x1dc0-0x1dff */ /* 0x2000-0x21ff */ 0x00, 0xf8, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, /* 0x2000-0x203f */ 0x00, 0x00, 0x00, 0x00, 0xdf, 0xff, 0x00, 0x00, /* 0x2040-0x207f */ @@ -199,7 +199,7 @@ static const unsigned char nonspacing_table_data[36*64] = { 0x44, 0x08, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, /* 0xa800-0xa83f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa840-0xa87f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa880-0xa8bf */ - 0x10, 0x00, 0x00, 0x00, 0xff, 0xff, 0x03, 0x00, /* 0xa8c0-0xa8ff */ + 0x30, 0x00, 0x00, 0x00, 0xff, 0xff, 0x03, 0x00, /* 0xa8c0-0xa8ff */ 0x00, 0x00, 0x00, 0x00, 0xc0, 0x3f, 0x00, 0x00, /* 0xa900-0xa93f */ 0x80, 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa940-0xa97f */ 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 0x13, /* 0xa980-0xa9bf */ @@ -268,7 +268,7 @@ static const unsigned char nonspacing_table_data[36*64] = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x7f, /* 0x11180-0x111bf */ 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x111c0-0x111ff */ /* 0x11200-0x113ff */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xd3, 0x00, /* 0x11200-0x1123f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xd3, 0x40, /* 0x11200-0x1123f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11240-0x1127f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11280-0x112bf */ 0x00, 0x00, 0x00, 0x80, 0xf8, 0x07, 0x00, 0x00, /* 0x112c0-0x112ff */ @@ -277,8 +277,8 @@ static const unsigned char nonspacing_table_data[36*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11380-0x113bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x113c0-0x113ff */ /* 0x11400-0x115ff */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11400-0x1143f */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11440-0x1147f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, /* 0x11400-0x1143f */ + 0x5c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11440-0x1147f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x85, /* 0x11480-0x114bf */ 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x114c0-0x114ff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11500-0x1153f */ @@ -294,6 +294,15 @@ static const unsigned char nonspacing_table_data[36*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11740-0x1177f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11780-0x117bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x117c0-0x117ff */ + /* 0x11c00-0x11dff */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, /* 0x11c00-0x11c3f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11c40-0x11c7f */ + 0x00, 0x00, 0xfc, 0xff, 0xff, 0xfc, 0x6d, 0x00, /* 0x11c80-0x11cbf */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11cc0-0x11cff */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11d00-0x11d3f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11d40-0x11d7f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11d80-0x11dbf */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11dc0-0x11dff */ /* 0x16a00-0x16bff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x16a00-0x16a3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x16a40-0x16a7f */ @@ -348,13 +357,22 @@ static const unsigned char nonspacing_table_data[36*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1db40-0x1db7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1db80-0x1dbbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1dbc0-0x1dbff */ + /* 0x1e000-0x1e1ff */ + 0x7f, 0xff, 0xff, 0xf9, 0xdb, 0x07, 0x00, 0x00, /* 0x1e000-0x1e03f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e040-0x1e07f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e080-0x1e0bf */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e0c0-0x1e0ff */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e100-0x1e13f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e140-0x1e17f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e180-0x1e1bf */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e1c0-0x1e1ff */ /* 0x1e800-0x1e9ff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e800-0x1e83f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e840-0x1e87f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e880-0x1e8bf */ 0x00, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e8c0-0x1e8ff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e900-0x1e93f */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e940-0x1e97f */ + 0xf0, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e940-0x1e97f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e980-0x1e9bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* 0x1e9c0-0x1e9ff */ }; @@ -376,20 +394,20 @@ static const signed char nonspacing_table_ind[248] = { -1, -1, -1, -1, -1, -1, -1, -1, /* 0xe000-0xefff */ -1, -1, -1, -1, -1, 20, -1, 21, /* 0xf000-0xffff */ 22, 23, -1, -1, -1, 24, -1, -1, /* 0x10000-0x10fff */ - 25, 26, 27, 28, -1, -1, -1, -1, /* 0x11000-0x11fff */ + 25, 26, 27, 28, -1, -1, 29, -1, /* 0x11000-0x11fff */ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x12000-0x12fff */ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x13000-0x13fff */ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x14000-0x14fff */ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x15000-0x15fff */ - -1, -1, -1, -1, -1, 29, -1, 30, /* 0x16000-0x16fff */ + -1, -1, -1, -1, -1, 30, -1, 31, /* 0x16000-0x16fff */ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x17000-0x17fff */ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x18000-0x18fff */ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x19000-0x19fff */ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x1a000-0x1afff */ - -1, -1, -1, -1, -1, -1, 31, -1, /* 0x1b000-0x1bfff */ + -1, -1, -1, -1, -1, -1, 32, -1, /* 0x1b000-0x1bfff */ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x1c000-0x1cfff */ - 32, 33, -1, -1, -1, 34, -1, -1, /* 0x1d000-0x1dfff */ - -1, -1, -1, -1, 35, -1, -1, -1 /* 0x1e000-0x1efff */ + 33, 34, -1, -1, -1, 35, -1, -1, /* 0x1d000-0x1dfff */ + 36, -1, -1, -1, 37, -1, -1, -1 /* 0x1e000-0x1efff */ }; /* Determine number of column positions required for UC. */ diff --git a/contrib/grep/lib/unlocked-io.h b/contrib/grep/lib/unlocked-io.h index 06ffac6169..e7f7199eda 100644 --- a/contrib/grep/lib/unlocked-io.h +++ b/contrib/grep/lib/unlocked-io.h @@ -1,6 +1,6 @@ /* Prefer faster, non-thread-safe stdio functions if available. - Copyright (C) 2001-2004, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2004, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering. */ diff --git a/contrib/grep/lib/verify.h b/contrib/grep/lib/verify.h index 60eba49811..d9ab89a570 100644 --- a/contrib/grep/lib/verify.h +++ b/contrib/grep/lib/verify.h @@ -1,6 +1,6 @@ /* Compile-time assert-like macros. - Copyright (C) 2005-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Paul Eggert, Bruno Haible, and Jim Meyering. */ @@ -21,31 +21,37 @@ #define _GL_VERIFY_H -/* Define _GL_HAVE__STATIC_ASSERT to 1 if _Static_assert works as per C11. - This is supported by GCC 4.6.0 and later, in C mode, and its use - here generates easier-to-read diagnostics when verify (R) fails. +/* Define _GL_HAVE__STATIC_ASSERT to 1 if _Static_assert (R, DIAGNOSTIC) + works as per C11. This is supported by GCC 4.6.0 and later, in C + mode. - Define _GL_HAVE_STATIC_ASSERT to 1 if static_assert works as per C++11. - This will likely be supported by future GCC versions, in C++ mode. + Define _GL_HAVE__STATIC_ASSERT1 to 1 if _Static_assert (R) works as + per C2X, and define _GL_HAVE_STATIC_ASSERT1 if static_assert (R) + works as per C++17. This is supported by GCC 9.1 and later. - Use this only with GCC. If we were willing to slow 'configure' - down we could also use it with other compilers, but since this - affects only the quality of diagnostics, why bother? */ -#if (4 < __GNUC__ + (6 <= __GNUC_MINOR__) \ - && (201112L <= __STDC_VERSION__ || !defined __STRICT_ANSI__) \ - && !defined __cplusplus) -# define _GL_HAVE__STATIC_ASSERT 1 -#endif -/* The condition (99 < __GNUC__) is temporary, until we know about the - first G++ release that supports static_assert. */ -#if (99 < __GNUC__) && defined __cplusplus -# define _GL_HAVE_STATIC_ASSERT 1 + Support compilers claiming conformance to the relevant standard, + and also support GCC when not pedantic. If we were willing to slow + 'configure' down we could also use it with other compilers, but + since this affects only the quality of diagnostics, why bother? */ +#ifndef __cplusplus +# if (201112L <= __STDC_VERSION__ \ + || (!defined __STRICT_ANSI__ && 4 < __GNUC__ + (6 <= __GNUC_MINOR__))) +# define _GL_HAVE__STATIC_ASSERT 1 +# endif +# if (202000L <= __STDC_VERSION__ \ + || (!defined __STRICT_ANSI__ && 9 <= __GNUC__)) +# define _GL_HAVE__STATIC_ASSERT1 1 +# endif +#else +# if 201703L <= __cplusplus || 9 <= __GNUC__ +# define _GL_HAVE_STATIC_ASSERT1 1 +# endif #endif /* FreeBSD 9.1 , included by and lots of other system headers, defines a conflicting _Static_assert that is no better than ours; override it. */ -#ifndef _GL_HAVE_STATIC_ASSERT +#ifndef _GL_HAVE__STATIC_ASSERT # include # undef _Static_assert #endif @@ -143,9 +149,9 @@ which do not support _Static_assert, also do not warn about the last declaration mentioned above. - * GCC warns if -Wnested-externs is enabled and verify() is used + * GCC warns if -Wnested-externs is enabled and 'verify' is used within a function body; but inside a function, you can always - arrange to use verify_expr() instead. + arrange to use verify_expr instead. * In C++, any struct definition inside sizeof is invalid. Use a template type to work around the problem. */ @@ -197,48 +203,61 @@ template #endif /* Verify requirement R at compile-time, as a declaration without a - trailing ';'. If R is false, fail at compile-time, preferably - with a diagnostic that includes the string-literal DIAGNOSTIC. + trailing ';'. If R is false, fail at compile-time. + + This macro requires three or more arguments but uses at most the first + two, so that the _Static_assert macro optionally defined below supports + both the C11 two-argument syntax and the C2X one-argument syntax. Unfortunately, unlike C11, this implementation must appear as an ordinary declaration, and cannot appear inside struct { ... }. */ -#ifdef _GL_HAVE__STATIC_ASSERT -# define _GL_VERIFY _Static_assert +#if defined _GL_HAVE__STATIC_ASSERT +# define _GL_VERIFY(R, DIAGNOSTIC, ...) _Static_assert (R, DIAGNOSTIC) #else -# define _GL_VERIFY(R, DIAGNOSTIC) \ +# define _GL_VERIFY(R, DIAGNOSTIC, ...) \ extern int (*_GL_GENSYM (_gl_verify_function) (void)) \ [_GL_VERIFY_TRUE (R, DIAGNOSTIC)] #endif /* _GL_STATIC_ASSERT_H is defined if this code is copied into assert.h. */ #ifdef _GL_STATIC_ASSERT_H -# if !defined _GL_HAVE__STATIC_ASSERT && !defined _Static_assert -# define _Static_assert(R, DIAGNOSTIC) _GL_VERIFY (R, DIAGNOSTIC) +# if !defined _GL_HAVE__STATIC_ASSERT1 && !defined _Static_assert +# define _Static_assert(...) \ + _GL_VERIFY (__VA_ARGS__, "static assertion failed", -) # endif -# if !defined _GL_HAVE_STATIC_ASSERT && !defined static_assert +# if !defined _GL_HAVE_STATIC_ASSERT1 && !defined static_assert # define static_assert _Static_assert /* C11 requires this #define. */ # endif #endif /* @assert.h omit start@ */ +#if 3 < __GNUC__ + (3 < __GNUC_MINOR__ + (4 <= __GNUC_PATCHLEVEL__)) +# define _GL_HAS_BUILTIN_TRAP 1 +#elif defined __has_builtin +# define _GL_HAS_BUILTIN_TRAP __has_builtin (__builtin_trap) +#else +# define _GL_HAS_BUILTIN_TRAP 0 +#endif + +#if 4 < __GNUC__ + (5 <= __GNUC_MINOR__) +# define _GL_HAS_BUILTIN_UNREACHABLE 1 +#elif defined __has_builtin +# define _GL_HAS_BUILTIN_UNREACHABLE __has_builtin (__builtin_unreachable) +#else +# define _GL_HAS_BUILTIN_UNREACHABLE 0 +#endif + /* Each of these macros verifies that its argument R is nonzero. To be portable, R should be an integer constant expression. Unlike assert (R), there is no run-time overhead. There are two macros, since no single macro can be used in all - contexts in C. verify_true (R) is for scalar contexts, including + contexts in C. verify_expr (R, E) is for scalar contexts, including integer constant expression contexts. verify (R) is for declaration contexts, e.g., the top level. */ -/* Verify requirement R at compile-time, as an integer constant expression. - Return 1. This is equivalent to verify_expr (R, 1). - - verify_true is obsolescent; please use verify_expr instead. */ - -#define verify_true(R) _GL_VERIFY_TRUE (R, "verify_true (" #R ")") - /* Verify requirement R at compile-time. Return the value of the expression E. */ @@ -246,32 +265,35 @@ template (_GL_VERIFY_TRUE (R, "verify_expr (" #R ", " #E ")") ? (E) : (E)) /* Verify requirement R at compile-time, as a declaration without a - trailing ';'. */ + trailing ';'. verify (R) acts like static_assert (R) except that + it is portable to C11/C++14 and earlier, it can issue better + diagnostics, and its name is shorter and may be more convenient. */ -#define verify(R) _GL_VERIFY (R, "verify (" #R ")") - -#ifndef __has_builtin -# define __has_builtin(x) 0 +#ifdef __PGI +/* PGI barfs if R is long. */ +# define verify(R) _GL_VERIFY (R, "verify (...)", -) +#else +# define verify(R) _GL_VERIFY (R, "verify (" #R ")", -) #endif -/* Assume that R always holds. This lets the compiler optimize - accordingly. R should not have side-effects; it may or may not be - evaluated. Behavior is undefined if R is false. */ +/* Assume that R always holds. Behavior is undefined if R is false, + fails to evaluate, or has side effects. Although assuming R can + help a compiler generate better code or diagnostics, performance + can suffer if R uses hard-to-optimize features such as function + calls not inlined by the compiler. */ -#if (__has_builtin (__builtin_unreachable) \ - || 4 < __GNUC__ + (5 <= __GNUC_MINOR__)) +#if _GL_HAS_BUILTIN_UNREACHABLE # define assume(R) ((R) ? (void) 0 : __builtin_unreachable ()) #elif 1200 <= _MSC_VER # define assume(R) __assume (R) -#elif (defined lint \ - && (__has_builtin (__builtin_trap) \ - || 3 < __GNUC__ + (3 < __GNUC_MINOR__ + (4 <= __GNUC_PATCHLEVEL__)))) +#elif (defined GCC_LINT || defined lint) && _GL_HAS_BUILTIN_TRAP /* Doing it this way helps various packages when configured with --enable-gcc-warnings, which compiles with -Dlint. It's nicer when 'assume' silences warnings even with older GCCs. */ # define assume(R) ((R) ? (void) 0 : __builtin_trap ()) #else -# define assume(R) ((void) (0 && (R))) + /* Some tools grok NOTREACHED, e.g., Oracle Studio 12.6. */ +# define assume(R) ((R) ? (void) 0 : /*NOTREACHED*/ (void) 0) #endif /* @assert.h omit end@ */ diff --git a/contrib/grep/lib/version-etc-fsf.c b/contrib/grep/lib/version-etc-fsf.c index ad78d0d794..20fa0d0aff 100644 --- a/contrib/grep/lib/version-etc-fsf.c +++ b/contrib/grep/lib/version-etc-fsf.c @@ -1,5 +1,5 @@ /* Variable with FSF copyright information, for version-etc. - Copyright (C) 1999-2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1999-2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering. */ diff --git a/contrib/grep/lib/version-etc.c b/contrib/grep/lib/version-etc.c index a93b008dba..506f0175e7 100644 --- a/contrib/grep/lib/version-etc.c +++ b/contrib/grep/lib/version-etc.c @@ -1,5 +1,5 @@ /* Print --version and bug-reporting information in a consistent format. - Copyright (C) 1999-2015 Free Software Foundation, Inc. + Copyright (C) 1999-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering. */ @@ -23,7 +23,6 @@ #include #include -#include #if USE_UNLOCKED_IO # include "unlocked-io.h" @@ -38,7 +37,7 @@ # define PACKAGE PACKAGE_TARNAME #endif -enum { COPYRIGHT_YEAR = 2015 }; +enum { COPYRIGHT_YEAR = 2020 }; /* The three functions below display the --version information the standard way. @@ -83,20 +82,24 @@ version_etc_arn (FILE *stream, locale. Otherwise, do not translate "(C)"; leave it as-is. */ fprintf (stream, version_etc_copyright, _("(C)"), COPYRIGHT_YEAR); - fputs (_("\ -\n\ -License GPLv3+: GNU GPL version 3 or later .\n\ + fputs ("\n", stream); + + /* TRANSLATORS: The %s placeholder is the web address of the GPL license. */ + fprintf (stream, _("\ +License GPLv3+: GNU GPL version 3 or later <%s>.\n\ This is free software: you are free to change and redistribute it.\n\ There is NO WARRANTY, to the extent permitted by law.\n\ -\n\ "), - stream); + "https://gnu.org/licenses/gpl.html"); + + fputs ("\n", stream); switch (n_authors) { case 0: - /* The caller must provide at least one author name. */ - abort (); + /* No authors are given. The caller should output authorship + info after calling this function. */ + break; case 1: /* TRANSLATORS: %s denotes an author name. */ fprintf (stream, _("Written by %s.\n"), authors[0]); @@ -238,11 +241,12 @@ version_etc (FILE *stream, void emit_bug_reporting_address (void) { + fputs ("\n", stdout); /* TRANSLATORS: The placeholder indicates the bug-reporting address for this package. Please add _another line_ saying "Report translation bugs to <...>\n" with the address for translation bugs (typically your translation team's web or email address). */ - printf (_("\nReport bugs to: %s\n"), PACKAGE_BUGREPORT); + printf (_("Report bugs to: %s\n"), PACKAGE_BUGREPORT); #ifdef PACKAGE_PACKAGER_BUG_REPORTS printf (_("Report %s bugs to: %s\n"), PACKAGE_PACKAGER, PACKAGE_PACKAGER_BUG_REPORTS); @@ -250,9 +254,9 @@ emit_bug_reporting_address (void) #ifdef PACKAGE_URL printf (_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); #else - printf (_("%s home page: \n"), - PACKAGE_NAME, PACKAGE); + printf (_("%s home page: <%s>\n"), + PACKAGE_NAME, "https://www.gnu.org/software/" PACKAGE "/"); #endif - fputs (_("General help using GNU software: \n"), - stdout); + printf (_("General help using GNU software: <%s>\n"), + "https://www.gnu.org/gethelp/"); } diff --git a/contrib/grep/lib/version-etc.h b/contrib/grep/lib/version-etc.h index 408581e301..73cad566d8 100644 --- a/contrib/grep/lib/version-etc.h +++ b/contrib/grep/lib/version-etc.h @@ -1,5 +1,5 @@ /* Print --version and bug-reporting information in a consistent format. - Copyright (C) 1999, 2003, 2005, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1999, 2003, 2005, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering. */ diff --git a/contrib/grep/lib/wcrtomb.c b/contrib/grep/lib/wcrtomb.c deleted file mode 100644 index b38d2e0d64..0000000000 --- a/contrib/grep/lib/wcrtomb.c +++ /dev/null @@ -1,53 +0,0 @@ -/* Convert wide character to multibyte character. - Copyright (C) 2008-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2008. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#include -#include - - -size_t -wcrtomb (char *s, wchar_t wc, mbstate_t *ps) -{ - /* This implementation of wcrtomb on top of wctomb() supports only - stateless encodings. ps must be in the initial state. */ - if (ps != NULL && !mbsinit (ps)) - { - errno = EINVAL; - return (size_t)(-1); - } - - if (s == NULL) - /* We know the NUL wide character corresponds to the NUL character. */ - return 1; - else - { - int ret = wctomb (s, wc); - - if (ret >= 0) - return ret; - else - { - errno = EILSEQ; - return (size_t)(-1); - } - } -} diff --git a/contrib/grep/lib/wctob.c b/contrib/grep/lib/wctob.c deleted file mode 100644 index 3ff6c7b71c..0000000000 --- a/contrib/grep/lib/wctob.c +++ /dev/null @@ -1,38 +0,0 @@ -/* Convert wide character to unibyte character. - Copyright (C) 2008, 2010-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2008. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -#include -#include - -int -wctob (wint_t wc) -{ - char buf[64]; - - if (!(MB_CUR_MAX <= sizeof (buf))) - abort (); - /* Handle the case where WEOF is a value that does not fit in a wchar_t. */ - if (wc == (wchar_t)wc) - if (wctomb (buf, (wchar_t)wc) == 1) - return (unsigned char) buf[0]; - return EOF; -} diff --git a/contrib/grep/lib/wctomb-impl.h b/contrib/grep/lib/wctomb-impl.h deleted file mode 100644 index 41217d5207..0000000000 --- a/contrib/grep/lib/wctomb-impl.h +++ /dev/null @@ -1,34 +0,0 @@ -/* Convert wide character to multibyte character. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Written by Bruno Haible , 2011. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -int -wctomb (char *s, wchar_t wc) -{ - if (s == NULL) - return 0; - else - { - mbstate_t state; - size_t result; - - memset (&state, 0, sizeof (mbstate_t)); - result = wcrtomb (s, wc, &state); - if (result == (size_t)-1) - return -1; - return result; - } -} diff --git a/contrib/grep/lib/wcwidth.c b/contrib/grep/lib/wcwidth.c deleted file mode 100644 index d2b71189fd..0000000000 --- a/contrib/grep/lib/wcwidth.c +++ /dev/null @@ -1,50 +0,0 @@ -/* Determine the number of screen columns needed for a character. - Copyright (C) 2006-2007, 2010-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include - -/* Get iswprint. */ -#include - -#include "localcharset.h" -#include "streq.h" -#include "uniwidth.h" - -int -wcwidth (wchar_t wc) -#undef wcwidth -{ - /* In UTF-8 locales, use a Unicode aware width function. */ - const char *encoding = locale_charset (); - if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0)) - { - /* We assume that in a UTF-8 locale, a wide character is the same as a - Unicode character. */ - return uc_width (wc, encoding); - } - else - { - /* Otherwise, fall back to the system's wcwidth function. */ -#if HAVE_WCWIDTH - return wcwidth (wc); -#else - return wc == 0 ? 0 : iswprint (wc) ? 1 : -1; -#endif - } -} diff --git a/contrib/grep/lib/xalloc-die.c b/contrib/grep/lib/xalloc-die.c index 3cea6386c5..68559deebb 100644 --- a/contrib/grep/lib/xalloc-die.c +++ b/contrib/grep/lib/xalloc-die.c @@ -1,6 +1,6 @@ /* Report a memory allocation failure and exit. - Copyright (C) 1997-2000, 2002-2004, 2006, 2009-2015 Free Software + Copyright (C) 1997-2000, 2002-2004, 2006, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/xalloc-oversized.h b/contrib/grep/lib/xalloc-oversized.h index f0e9778f73..13ee23031a 100644 --- a/contrib/grep/lib/xalloc-oversized.h +++ b/contrib/grep/lib/xalloc-oversized.h @@ -1,6 +1,6 @@ /* xalloc-oversized.h -- memory allocation size checking - Copyright (C) 1990-2000, 2003-2004, 2006-2015 Free Software Foundation, Inc. + Copyright (C) 1990-2000, 2003-2004, 2006-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,26 +13,48 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef XALLOC_OVERSIZED_H_ -# define XALLOC_OVERSIZED_H_ +#define XALLOC_OVERSIZED_H_ -# include - -/* Return 1 if an array of N objects, each of size S, cannot exist due - to size arithmetic overflow. S must be positive and N must be - nonnegative. This is a macro, not a function, so that it - works correctly even when SIZE_MAX < N. +#include +#include +/* True if N * S would overflow in a size_t calculation, + or would generate a value larger than PTRDIFF_MAX. + This expands to a constant expression if N and S are both constants. By gnulib convention, SIZE_MAX represents overflow in size - calculations, so the conservative dividend to use here is - SIZE_MAX - 1, since SIZE_MAX might represent an overflowed value. - However, malloc (SIZE_MAX) fails on all known hosts where - sizeof (ptrdiff_t) <= sizeof (size_t), so do not bother to test for - exactly-SIZE_MAX allocations on such hosts; this avoids a test and - branch when S is known to be 1. */ + calculations, so the conservative size_t-based dividend to use here + is SIZE_MAX - 1. */ +#define __xalloc_oversized(n, s) \ + ((size_t) (PTRDIFF_MAX < SIZE_MAX ? PTRDIFF_MAX : SIZE_MAX - 1) / (s) < (n)) + +#if PTRDIFF_MAX < SIZE_MAX +typedef ptrdiff_t __xalloc_count_type; +#else +typedef size_t __xalloc_count_type; +#endif + +/* Return 1 if an array of N objects, each of size S, cannot exist + reliably due to size or ptrdiff_t arithmetic overflow. S must be + positive and N must be nonnegative. This is a macro, not a + function, so that it works correctly even when SIZE_MAX < N. */ + +#if 7 <= __GNUC__ +# define xalloc_oversized(n, s) \ + __builtin_mul_overflow_p (n, s, (__xalloc_count_type) 1) +#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__ # define xalloc_oversized(n, s) \ - ((size_t) (sizeof (ptrdiff_t) <= sizeof (size_t) ? -1 : -2) / (s) < (n)) + (__builtin_constant_p (n) && __builtin_constant_p (s) \ + ? __xalloc_oversized (n, s) \ + : ({ __xalloc_count_type __xalloc_count; \ + __builtin_mul_overflow (n, s, &__xalloc_count); })) + +/* Other compilers use integer division; this may be slower but is + more portable. */ +#else +# define xalloc_oversized(n, s) __xalloc_oversized (n, s) +#endif #endif /* !XALLOC_OVERSIZED_H_ */ diff --git a/contrib/grep/lib/xalloc.h b/contrib/grep/lib/xalloc.h index 81ef680a3a..9563b0bc92 100644 --- a/contrib/grep/lib/xalloc.h +++ b/contrib/grep/lib/xalloc.h @@ -1,6 +1,6 @@ /* xalloc.h -- malloc with out-of-memory checking - Copyright (C) 1990-2000, 2003-2004, 2006-2015 Free Software Foundation, Inc. + Copyright (C) 1990-2000, 2003-2004, 2006-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,12 +13,13 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef XALLOC_H_ #define XALLOC_H_ #include +#include #include "xalloc-oversized.h" @@ -35,12 +36,6 @@ extern "C" { #endif -#if __GNUC__ >= 3 -# define _GL_ATTRIBUTE_MALLOC __attribute__ ((__malloc__)) -#else -# define _GL_ATTRIBUTE_MALLOC -#endif - #if ! defined __clang__ && \ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) # define _GL_ATTRIBUTE_ALLOC_SIZE(args) __attribute__ ((__alloc_size__ args)) @@ -193,14 +188,17 @@ x2nrealloc (void *p, size_t *pn, size_t s) n = DEFAULT_MXFAST / s; n += !n; } + if (xalloc_oversized (n, s)) + xalloc_die (); } else { /* Set N = floor (1.5 * N) + 1 so that progress is made even if N == 0. - Check for overflow, so that N * S stays in size_t range. - The check may be slightly conservative, but an exact check isn't - worth the trouble. */ - if ((size_t) -1 / 3 * 2 / s <= n) + Check for overflow, so that N * S stays in both ptrdiff_t and + size_t range. The check may be slightly conservative, but an + exact check isn't worth the trouble. */ + if ((PTRDIFF_MAX < SIZE_MAX ? PTRDIFF_MAX : SIZE_MAX) / 3 * 2 / s + <= n) xalloc_die (); n += n / 2 + 1; } diff --git a/contrib/grep/lib/dirfd.c b/contrib/grep/lib/xbinary-io.c similarity index 53% rename from contrib/grep/lib/dirfd.c rename to contrib/grep/lib/xbinary-io.c index c91f8e5587..ba012c9e87 100644 --- a/contrib/grep/lib/dirfd.c +++ b/contrib/grep/lib/xbinary-io.c @@ -1,6 +1,5 @@ -/* dirfd.c -- return the file descriptor associated with an open DIR* - - Copyright (C) 2001, 2006, 2008-2015 Free Software Foundation, Inc. +/* Binary mode I/O with checking + Copyright 2017-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,20 +12,30 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* Written by Jim Meyering. */ + along with this program. If not, see . */ #include -#include +#define XBINARY_IO_INLINE _GL_EXTERN_INLINE +#include "xbinary-io.h" + #include +#include +#include +#include "exitfail.h" +#include "verify.h" -int -dirfd (DIR *dir_p) +#include "gettext.h" +#define _(msgid) gettext (msgid) + +#if O_BINARY + +_Noreturn void +xset_binary_mode_error (void) { - int fd = DIR_TO_FD (dir_p); - if (fd == -1) - errno = ENOTSUP; - return fd; + error (exit_failure, errno, + _("failed to set file descriptor text/binary mode")); + assume (false); } + +#endif diff --git a/contrib/grep/lib/xbinary-io.h b/contrib/grep/lib/xbinary-io.h new file mode 100644 index 0000000000..cd149c2983 --- /dev/null +++ b/contrib/grep/lib/xbinary-io.h @@ -0,0 +1,48 @@ +/* Binary mode I/O with checking + Copyright 2017-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef _XBINARY_IO_H +#define _XBINARY_IO_H + +#include "binary-io.h" + +#ifndef _GL_INLINE_HEADER_BEGIN + #error "Please include config.h first." +#endif +_GL_INLINE_HEADER_BEGIN +#ifndef XBINARY_IO_INLINE +# define XBINARY_IO_INLINE _GL_INLINE +#endif + +#if O_BINARY +extern _Noreturn void xset_binary_mode_error (void); +#else +XBINARY_IO_INLINE void xset_binary_mode_error (void) {} +#endif + +/* Set the mode of FD to MODE, which should be either O_TEXT or O_BINARY. + Report an error and exit if this fails. */ + +XBINARY_IO_INLINE void +xset_binary_mode (int fd, int mode) +{ + if (set_binary_mode (fd, mode) < 0) + xset_binary_mode_error (); +} + +_GL_INLINE_HEADER_END + +#endif /* _XBINARY_IO_H */ diff --git a/contrib/grep/lib/xmalloc.c b/contrib/grep/lib/xmalloc.c index e246559f87..486873602e 100644 --- a/contrib/grep/lib/xmalloc.c +++ b/contrib/grep/lib/xmalloc.c @@ -1,6 +1,6 @@ /* xmalloc.c -- malloc with out of memory checking - Copyright (C) 1990-2000, 2002-2006, 2008-2015 Free Software Foundation, Inc. + Copyright (C) 1990-2000, 2002-2006, 2008-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include @@ -76,14 +76,14 @@ x2realloc (void *p, size_t *pn) return x2nrealloc (p, pn, 1); } -/* Allocate S bytes of zeroed memory dynamically, with error checking. +/* Allocate N bytes of zeroed memory dynamically, with error checking. There's no need for xnzalloc (N, S), since it would be equivalent to xcalloc (N, S). */ void * -xzalloc (size_t s) +xzalloc (size_t n) { - return memset (xmalloc (s), 0, s); + return xcalloc (n, 1); } /* Allocate zeroed memory for N elements of S bytes, with error @@ -93,11 +93,11 @@ void * xcalloc (size_t n, size_t s) { void *p; - /* Test for overflow, since some calloc implementations don't have - proper overflow checks. But omit overflow and size-zero tests if - HAVE_GNU_CALLOC, since GNU calloc catches overflow and never - returns NULL if successful. */ - if ((! HAVE_GNU_CALLOC && xalloc_oversized (n, s)) + /* Test for overflow, since objects with size greater than + PTRDIFF_MAX cause pointer subtraction to go awry. Omit size-zero + tests if HAVE_GNU_CALLOC, since GNU calloc never returns NULL if + successful. */ + if (xalloc_oversized (n, s) || (! (p = calloc (n, s)) && (HAVE_GNU_CALLOC || n != 0))) xalloc_die (); return p; diff --git a/contrib/grep/lib/xstriconv.c b/contrib/grep/lib/xstriconv.c index d5e19515c2..f3978bfa81 100644 --- a/contrib/grep/lib/xstriconv.c +++ b/contrib/grep/lib/xstriconv.c @@ -1,5 +1,5 @@ /* Charset conversion with out-of-memory checking. - Copyright (C) 2001-2004, 2006, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2004, 2006, 2009-2020 Free Software Foundation, Inc. Written by Bruno Haible. This program is free software: you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #include diff --git a/contrib/grep/lib/xstriconv.h b/contrib/grep/lib/xstriconv.h index 6319989f7d..b780f51699 100644 --- a/contrib/grep/lib/xstriconv.h +++ b/contrib/grep/lib/xstriconv.h @@ -1,5 +1,5 @@ /* Charset conversion with out-of-memory checking. - Copyright (C) 2001-2004, 2006-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2004, 2006-2007, 2009-2020 Free Software Foundation, Inc. Written by Bruno Haible and Simon Josefsson. This program is free software: you can redistribute it and/or modify @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef _XSTRICONV_H #define _XSTRICONV_H diff --git a/contrib/grep/lib/xstrtol-error.c b/contrib/grep/lib/xstrtol-error.c deleted file mode 100644 index 6a2ebea0e3..0000000000 --- a/contrib/grep/lib/xstrtol-error.c +++ /dev/null @@ -1,98 +0,0 @@ -/* A more useful interface to strtol. - - Copyright (C) 1995-1996, 1998-1999, 2001-2004, 2006-2015 Free Software - Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include -#include "xstrtol.h" - -#include - -#include "error.h" -#include "exitfail.h" -#include "gettext.h" - -#define N_(msgid) msgid - -/* Report an error for an invalid integer in an option argument. - - ERR is the error code returned by one of the xstrto* functions. - - Use OPT_IDX to decide whether to print the short option string "C" - or "-C" or a long option string derived from LONG_OPTION. OPT_IDX - is -2 if the short option "C" was used, without any leading "-"; it - is -1 if the short option "-C" was used; otherwise it is an index - into LONG_OPTIONS, which should have a name preceded by two '-' - characters. - - ARG is the option-argument containing the integer. - - After reporting an error, exit with status EXIT_STATUS if it is - nonzero. */ - -static void -xstrtol_error (enum strtol_error err, - int opt_idx, char c, struct option const *long_options, - char const *arg, - int exit_status) -{ - char const *hyphens = "--"; - char const *msgid; - char const *option; - char option_buffer[2]; - - switch (err) - { - default: - abort (); - - case LONGINT_INVALID: - msgid = N_("invalid %s%s argument '%s'"); - break; - - case LONGINT_INVALID_SUFFIX_CHAR: - case LONGINT_INVALID_SUFFIX_CHAR_WITH_OVERFLOW: - msgid = N_("invalid suffix in %s%s argument '%s'"); - break; - - case LONGINT_OVERFLOW: - msgid = N_("%s%s argument '%s' too large"); - break; - } - - if (opt_idx < 0) - { - hyphens -= opt_idx; - option_buffer[0] = c; - option_buffer[1] = '\0'; - option = option_buffer; - } - else - option = long_options[opt_idx].name; - - error (exit_status, 0, gettext (msgid), hyphens, option, arg); -} - -/* Like xstrtol_error, except exit with a failure status. */ - -void -xstrtol_fatal (enum strtol_error err, - int opt_idx, char c, struct option const *long_options, - char const *arg) -{ - xstrtol_error (err, opt_idx, c, long_options, arg, exit_failure); - abort (); -} diff --git a/contrib/grep/lib/xstrtol.c b/contrib/grep/lib/xstrtol.c index bd3ffeb27f..b567fffc6a 100644 --- a/contrib/grep/lib/xstrtol.c +++ b/contrib/grep/lib/xstrtol.c @@ -1,6 +1,6 @@ /* A more useful interface to strtol. - Copyright (C) 1995-1996, 1998-2001, 2003-2007, 2009-2015 Free Software + Copyright (C) 1995-1996, 1998-2001, 2003-2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* Written by Jim Meyering. */ @@ -41,20 +41,8 @@ #include #include "assure.h" -#include "intprops.h" -/* xstrtoll.c and xstrtoull.c, which include this file, require that - ULLONG_MAX, LLONG_MAX, LLONG_MIN are defined, but does not - define them on all platforms. */ -#ifndef ULLONG_MAX -# define ULLONG_MAX TYPE_MAXIMUM (unsigned long long) -#endif -#ifndef LLONG_MAX -# define LLONG_MAX TYPE_MAXIMUM (long long int) -#endif -#ifndef LLONG_MIN -# define LLONG_MIN TYPE_MINIMUM (long long int) -#endif +#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1)) static strtol_error bkm_scale (__strtol_t *x, int scale_factor) @@ -148,8 +136,11 @@ __xstrtol (const char *s, char **ptr, int strtol_base, return err | LONGINT_INVALID_SUFFIX_CHAR; } - if (strchr (valid_suffixes, '0')) + switch (**p) { + case 'E': case 'G': case 'g': case 'k': case 'K': case 'M': case 'm': + case 'P': case 'T': case 't': case 'Y': case 'Z': + /* The "valid suffix" '0' is a special flag meaning that an optional second suffix is allowed, which can change the base. A suffix "B" (e.g. "100MB") stands for a power @@ -157,19 +148,20 @@ __xstrtol (const char *s, char **ptr, int strtol_base, a power of 1024. If no suffix (e.g. "100M"), assume power-of-1024. */ - switch (p[0][1]) - { - case 'i': - if (p[0][2] == 'B') - suffixes += 2; - break; - - case 'B': - case 'D': /* 'D' is obsolescent */ - base = 1000; - suffixes++; - break; - } + if (strchr (valid_suffixes, '0')) + switch (p[0][1]) + { + case 'i': + if (p[0][2] == 'B') + suffixes += 2; + break; + + case 'B': + case 'D': /* 'D' is obsolescent */ + base = 1000; + suffixes++; + break; + } } switch (**p) @@ -179,6 +171,9 @@ __xstrtol (const char *s, char **ptr, int strtol_base, break; case 'B': + /* This obsolescent first suffix is distinct from the 'B' + second suffix above. E.g., 'tar -L 1000B' means change + the tape after writing 1000 KiB of data. */ overflow = bkm_scale (&tmp, 1024); break; diff --git a/contrib/grep/lib/xstrtol.h b/contrib/grep/lib/xstrtol.h index 19202dcddb..15d9bb73cf 100644 --- a/contrib/grep/lib/xstrtol.h +++ b/contrib/grep/lib/xstrtol.h @@ -1,6 +1,6 @@ /* A more useful interface to strtol. - Copyright (C) 1995-1996, 1998-1999, 2001-2004, 2006-2015 Free Software + Copyright (C) 1995-1996, 1998-1999, 2001-2004, 2006-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -14,12 +14,11 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ #ifndef XSTRTOL_H_ # define XSTRTOL_H_ 1 -# include # include # ifndef _STRTOL_ERROR @@ -43,31 +42,9 @@ typedef enum strtol_error strtol_error; strtol_error name (const char *, char **, int, type *, const char *); _DECLARE_XSTRTOL (xstrtol, long int) _DECLARE_XSTRTOL (xstrtoul, unsigned long int) -_DECLARE_XSTRTOL (xstrtoimax, intmax_t) -_DECLARE_XSTRTOL (xstrtoumax, uintmax_t) - -#if HAVE_LONG_LONG_INT _DECLARE_XSTRTOL (xstrtoll, long long int) _DECLARE_XSTRTOL (xstrtoull, unsigned long long int) -#endif - -/* Report an error for an invalid integer in an option argument. - - ERR is the error code returned by one of the xstrto* functions. - - Use OPT_IDX to decide whether to print the short option string "C" - or "-C" or a long option string derived from LONG_OPTION. OPT_IDX - is -2 if the short option "C" was used, without any leading "-"; it - is -1 if the short option "-C" was used; otherwise it is an index - into LONG_OPTIONS, which should have a name preceded by two '-' - characters. - - ARG is the option-argument containing the integer. - - After reporting an error, exit with a failure status. */ - -_Noreturn void xstrtol_fatal (enum strtol_error, - int, char, struct option const *, - char const *); +_DECLARE_XSTRTOL (xstrtoimax, intmax_t) +_DECLARE_XSTRTOL (xstrtoumax, uintmax_t) #endif /* not XSTRTOL_H_ */ diff --git a/contrib/grep/src/dfa.c b/contrib/grep/src/dfa.c deleted file mode 100644 index 856a886989..0000000000 --- a/contrib/grep/src/dfa.c +++ /dev/null @@ -1,4184 +0,0 @@ -/* dfa.c - deterministic extended regexp routines for GNU - Copyright (C) 1988, 1998, 2000, 2002, 2004-2005, 2007-2015 Free Software - Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., - 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA */ - -/* Written June, 1988 by Mike Haertel - Modified July, 1988 by Arthur David Olson to assist BMG speedups */ - -#include - -#include "dfa.h" - -#include -#include -#include -#include -#include -#include -#include - -#define STREQ(a, b) (strcmp (a, b) == 0) - -/* ISASCIIDIGIT differs from isdigit, as follows: - - Its arg may be any int or unsigned int; it need not be an unsigned char. - - It's guaranteed to evaluate its argument exactly once. - - It's typically faster. - Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that - only '0' through '9' are digits. Prefer ISASCIIDIGIT to isdigit unless - it's important to use the locale's definition of "digit" even when the - host does not conform to Posix. */ -#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9) - -#include "gettext.h" -#define _(str) gettext (str) - -#include -#include - -/* HPUX defines these as macros in sys/param.h. */ -#ifdef setbit -# undef setbit -#endif -#ifdef clrbit -# undef clrbit -#endif - -/* First integer value that is greater than any character code. */ -enum { NOTCHAR = 1 << CHAR_BIT }; - -/* This represents part of a character class. It must be unsigned and - at least CHARCLASS_WORD_BITS wide. Any excess bits are zero. */ -typedef unsigned int charclass_word; - -/* The number of bits used in a charclass word. utf8_classes assumes - this is exactly 32. */ -enum { CHARCLASS_WORD_BITS = 32 }; - -/* The maximum useful value of a charclass_word; all used bits are 1. */ -#define CHARCLASS_WORD_MASK \ - (((charclass_word) 1 << (CHARCLASS_WORD_BITS - 1) << 1) - 1) - -/* Number of words required to hold a bit for every character. */ -enum -{ - CHARCLASS_WORDS = (NOTCHAR + CHARCLASS_WORD_BITS - 1) / CHARCLASS_WORD_BITS -}; - -/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */ -typedef charclass_word charclass[CHARCLASS_WORDS]; - -/* Convert a possibly-signed character to an unsigned character. This is - a bit safer than casting to unsigned char, since it catches some type - errors that the cast doesn't. */ -static unsigned char -to_uchar (char ch) -{ - return ch; -} - -/* Contexts tell us whether a character is a newline or a word constituent. - Word-constituent characters are those that satisfy iswalnum, plus '_'. - Each character has a single CTX_* value; bitmasks of CTX_* values denote - a particular character class. - - A state also stores a context value, which is a bitmask of CTX_* values. - A state's context represents a set of characters that the state's - predecessors must match. For example, a state whose context does not - include CTX_LETTER will never have transitions where the previous - character is a word constituent. A state whose context is CTX_ANY - might have transitions from any character. */ - -#define CTX_NONE 1 -#define CTX_LETTER 2 -#define CTX_NEWLINE 4 -#define CTX_ANY 7 - -/* Sometimes characters can only be matched depending on the surrounding - context. Such context decisions depend on what the previous character - was, and the value of the current (lookahead) character. Context - dependent constraints are encoded as 8 bit integers. Each bit that - is set indicates that the constraint succeeds in the corresponding - context. - - bit 8-11 - valid contexts when next character is CTX_NEWLINE - bit 4-7 - valid contexts when next character is CTX_LETTER - bit 0-3 - valid contexts when next character is CTX_NONE - - The macro SUCCEEDS_IN_CONTEXT determines whether a given constraint - succeeds in a particular context. Prev is a bitmask of possible - context values for the previous character, curr is the (single-bit) - context value for the lookahead character. */ -#define NEWLINE_CONSTRAINT(constraint) (((constraint) >> 8) & 0xf) -#define LETTER_CONSTRAINT(constraint) (((constraint) >> 4) & 0xf) -#define OTHER_CONSTRAINT(constraint) ((constraint) & 0xf) - -#define SUCCEEDS_IN_CONTEXT(constraint, prev, curr) \ - ((((curr) & CTX_NONE ? OTHER_CONSTRAINT (constraint) : 0) \ - | ((curr) & CTX_LETTER ? LETTER_CONSTRAINT (constraint) : 0) \ - | ((curr) & CTX_NEWLINE ? NEWLINE_CONSTRAINT (constraint) : 0)) & (prev)) - -/* The following macros describe what a constraint depends on. */ -#define PREV_NEWLINE_CONSTRAINT(constraint) (((constraint) >> 2) & 0x111) -#define PREV_LETTER_CONSTRAINT(constraint) (((constraint) >> 1) & 0x111) -#define PREV_OTHER_CONSTRAINT(constraint) ((constraint) & 0x111) - -#define PREV_NEWLINE_DEPENDENT(constraint) \ - (PREV_NEWLINE_CONSTRAINT (constraint) != PREV_OTHER_CONSTRAINT (constraint)) -#define PREV_LETTER_DEPENDENT(constraint) \ - (PREV_LETTER_CONSTRAINT (constraint) != PREV_OTHER_CONSTRAINT (constraint)) - -/* Tokens that match the empty string subject to some constraint actually - work by applying that constraint to determine what may follow them, - taking into account what has gone before. The following values are - the constraints corresponding to the special tokens previously defined. */ -#define NO_CONSTRAINT 0x777 -#define BEGLINE_CONSTRAINT 0x444 -#define ENDLINE_CONSTRAINT 0x700 -#define BEGWORD_CONSTRAINT 0x050 -#define ENDWORD_CONSTRAINT 0x202 -#define LIMWORD_CONSTRAINT 0x252 -#define NOTLIMWORD_CONSTRAINT 0x525 - -/* The regexp is parsed into an array of tokens in postfix form. Some tokens - are operators and others are terminal symbols. Most (but not all) of these - codes are returned by the lexical analyzer. */ - -typedef ptrdiff_t token; - -/* Predefined token values. */ -enum -{ - END = -1, /* END is a terminal symbol that matches the - end of input; any value of END or less in - the parse tree is such a symbol. Accepting - states of the DFA are those that would have - a transition on END. */ - - /* Ordinary character values are terminal symbols that match themselves. */ - - EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches - the empty string. */ - - BACKREF, /* BACKREF is generated by \ - or by any other construct that - is not completely handled. If the scanner - detects a transition on backref, it returns - a kind of "semi-success" indicating that - the match will have to be verified with - a backtracking matcher. */ - - BEGLINE, /* BEGLINE is a terminal symbol that matches - the empty string at the beginning of a - line. */ - - ENDLINE, /* ENDLINE is a terminal symbol that matches - the empty string at the end of a line. */ - - BEGWORD, /* BEGWORD is a terminal symbol that matches - the empty string at the beginning of a - word. */ - - ENDWORD, /* ENDWORD is a terminal symbol that matches - the empty string at the end of a word. */ - - LIMWORD, /* LIMWORD is a terminal symbol that matches - the empty string at the beginning or the - end of a word. */ - - NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that - matches the empty string not at - the beginning or end of a word. */ - - QMARK, /* QMARK is an operator of one argument that - matches zero or one occurrences of its - argument. */ - - STAR, /* STAR is an operator of one argument that - matches the Kleene closure (zero or more - occurrences) of its argument. */ - - PLUS, /* PLUS is an operator of one argument that - matches the positive closure (one or more - occurrences) of its argument. */ - - REPMN, /* REPMN is a lexical token corresponding - to the {m,n} construct. REPMN never - appears in the compiled token vector. */ - - CAT, /* CAT is an operator of two arguments that - matches the concatenation of its - arguments. CAT is never returned by the - lexical analyzer. */ - - OR, /* OR is an operator of two arguments that - matches either of its arguments. */ - - LPAREN, /* LPAREN never appears in the parse tree, - it is only a lexeme. */ - - RPAREN, /* RPAREN never appears in the parse tree. */ - - ANYCHAR, /* ANYCHAR is a terminal symbol that matches - a valid multibyte (or single byte) character. - It is used only if MB_CUR_MAX > 1. */ - - MBCSET, /* MBCSET is similar to CSET, but for - multibyte characters. */ - - WCHAR, /* Only returned by lex. wctok contains - the wide character representation. */ - - CSET /* CSET and (and any value greater) is a - terminal symbol that matches any of a - class of characters. */ -}; - - -/* States of the recognizer correspond to sets of positions in the parse - tree, together with the constraints under which they may be matched. - So a position is encoded as an index into the parse tree together with - a constraint. */ -typedef struct -{ - size_t index; /* Index into the parse array. */ - unsigned int constraint; /* Constraint for matching this position. */ -} position; - -/* Sets of positions are stored as arrays. */ -typedef struct -{ - position *elems; /* Elements of this position set. */ - size_t nelem; /* Number of elements in this set. */ - size_t alloc; /* Number of elements allocated in ELEMS. */ -} position_set; - -/* Sets of leaves are also stored as arrays. */ -typedef struct -{ - size_t *elems; /* Elements of this position set. */ - size_t nelem; /* Number of elements in this set. */ -} leaf_set; - -/* A state of the dfa consists of a set of positions, some flags, - and the token value of the lowest-numbered position of the state that - contains an END token. */ -typedef struct -{ - size_t hash; /* Hash of the positions of this state. */ - position_set elems; /* Positions this state could match. */ - unsigned char context; /* Context from previous state. */ - unsigned short constraint; /* Constraint for this state to accept. */ - token first_end; /* Token value of the first END in elems. */ - position_set mbps; /* Positions which can match multibyte - characters, e.g., period. - Used only if MB_CUR_MAX > 1. */ -} dfa_state; - -/* States are indexed by state_num values. These are normally - nonnegative but -1 is used as a special value. */ -typedef ptrdiff_t state_num; - -/* A bracket operator. - e.g., [a-c], [[:alpha:]], etc. */ -struct mb_char_classes -{ - ptrdiff_t cset; - bool invert; - wchar_t *chars; /* Normal characters. */ - size_t nchars; -}; - -/* A compiled regular expression. */ -struct dfa -{ - /* Fields filled by the scanner. */ - charclass *charclasses; /* Array of character sets for CSET tokens. */ - size_t cindex; /* Index for adding new charclasses. */ - size_t calloc; /* Number of charclasses allocated. */ - - /* Fields filled by the parser. */ - token *tokens; /* Postfix parse array. */ - size_t tindex; /* Index for adding new tokens. */ - size_t talloc; /* Number of tokens currently allocated. */ - size_t depth; /* Depth required of an evaluation stack - used for depth-first traversal of the - parse tree. */ - size_t nleaves; /* Number of leaves on the parse tree. */ - size_t nregexps; /* Count of parallel regexps being built - with dfaparse. */ - bool fast; /* The DFA is fast. */ - bool multibyte; /* MB_CUR_MAX > 1. */ - token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */ - mbstate_t mbs; /* Multibyte conversion state. */ - - /* dfaexec implementation. */ - char *(*dfaexec) (struct dfa *, char const *, char *, int, size_t *, int *); - - /* The following are valid only if MB_CUR_MAX > 1. */ - - /* The value of multibyte_prop[i] is defined by following rule. - if tokens[i] < NOTCHAR - bit 0 : tokens[i] is the first byte of a character, including - single-byte characters. - bit 1 : tokens[i] is the last byte of a character, including - single-byte characters. - - if tokens[i] = MBCSET - ("the index of mbcsets corresponding to this operator" << 2) + 3 - - e.g. - tokens - = 'single_byte_a', 'multi_byte_A', single_byte_b' - = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b' - multibyte_prop - = 3 , 1 , 0 , 2 , 3 - */ - int *multibyte_prop; - - /* A table indexed by byte values that contains the corresponding wide - character (if any) for that byte. WEOF means the byte is not a - valid single-byte character. */ - wint_t mbrtowc_cache[NOTCHAR]; - - /* Array of the bracket expression in the DFA. */ - struct mb_char_classes *mbcsets; - size_t nmbcsets; - size_t mbcsets_alloc; - - /* Fields filled by the superset. */ - struct dfa *superset; /* Hint of the dfa. */ - - /* Fields filled by the state builder. */ - dfa_state *states; /* States of the dfa. */ - state_num sindex; /* Index for adding new states. */ - size_t salloc; /* Number of states currently allocated. */ - - /* Fields filled by the parse tree->NFA conversion. */ - position_set *follows; /* Array of follow sets, indexed by position - index. The follow of a position is the set - of positions containing characters that - could conceivably follow a character - matching the given position in a string - matching the regexp. Allocated to the - maximum possible position index. */ - bool searchflag; /* We are supposed to build a searching - as opposed to an exact matcher. A searching - matcher finds the first and shortest string - matching a regexp anywhere in the buffer, - whereas an exact matcher finds the longest - string matching, but anchored to the - beginning of the buffer. */ - - /* Fields filled by dfaexec. */ - state_num tralloc; /* Number of transition tables that have - slots so far, not counting trans[-1]. */ - int trcount; /* Number of transition tables that have - actually been built. */ - int min_trcount; /* Minimum of number of transition tables. - Always keep the number, even after freeing - the transition tables. It is also the - number of initial states. */ - state_num **trans; /* Transition tables for states that can - never accept. If the transitions for a - state have not yet been computed, or the - state could possibly accept, its entry in - this table is NULL. This points to one - past the start of the allocated array, - and trans[-1] is always NULL. */ - state_num **fails; /* Transition tables after failing to accept - on a state that potentially could do so. */ - int *success; /* Table of acceptance conditions used in - dfaexec and computed in build_state. */ - state_num *newlines; /* Transitions on newlines. The entry for a - newline in any transition table is always - -1 so we can count lines without wasting - too many cycles. The transition for a - newline is stored separately and handled - as a special case. Newline is also used - as a sentinel at the end of the buffer. */ - state_num initstate_letter; /* Initial state for letter context. */ - state_num initstate_others; /* Initial state for other contexts. */ - position_set mb_follows; /* Follow set added by ANYCHAR and/or MBCSET - on demand. */ - int *mb_match_lens; /* Array of length reduced by ANYCHAR and/or - MBCSET. Null if mb_follows.elems has not - been allocated. */ -}; - -/* Some macros for user access to dfa internals. */ - -/* S could possibly be an accepting state of R. */ -#define ACCEPTING(s, r) ((r).states[s].constraint) - -/* STATE accepts in the specified context. */ -#define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \ - SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr) - -static void regexp (void); - -static void -dfambcache (struct dfa *d) -{ - int i; - for (i = CHAR_MIN; i <= CHAR_MAX; ++i) - { - char c = i; - unsigned char uc = i; - mbstate_t s = { 0 }; - wchar_t wc; - d->mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF; - } -} - -/* Store into *PWC the result of converting the leading bytes of the - multibyte buffer S of length N bytes, using the mbrtowc_cache in *D - and updating the conversion state in *D. On conversion error, - convert just a single byte, to WEOF. Return the number of bytes - converted. - - This differs from mbrtowc (PWC, S, N, &D->mbs) as follows: - - * PWC points to wint_t, not to wchar_t. - * The last arg is a dfa *D instead of merely a multibyte conversion - state D->mbs. D also contains an mbrtowc_cache for speed. - * N must be at least 1. - * S[N - 1] must be a sentinel byte. - * Shift encodings are not supported. - * The return value is always in the range 1..N. - * D->mbs is always valid afterwards. - * *PWC is always set to something. */ -static size_t -mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d) -{ - unsigned char uc = s[0]; - wint_t wc = d->mbrtowc_cache[uc]; - - if (wc == WEOF) - { - wchar_t wch; - size_t nbytes = mbrtowc (&wch, s, n, &d->mbs); - if (0 < nbytes && nbytes < (size_t) -2) - { - *pwc = wch; - return nbytes; - } - memset (&d->mbs, 0, sizeof d->mbs); - } - - *pwc = wc; - return 1; -} - -#ifdef DEBUG - -static void -prtok (token t) -{ - char const *s; - - if (t < 0) - fprintf (stderr, "END"); - else if (t < NOTCHAR) - { - unsigned int ch = t; - fprintf (stderr, "0x%02x", ch); - } - else - { - switch (t) - { - case EMPTY: - s = "EMPTY"; - break; - case BACKREF: - s = "BACKREF"; - break; - case BEGLINE: - s = "BEGLINE"; - break; - case ENDLINE: - s = "ENDLINE"; - break; - case BEGWORD: - s = "BEGWORD"; - break; - case ENDWORD: - s = "ENDWORD"; - break; - case LIMWORD: - s = "LIMWORD"; - break; - case NOTLIMWORD: - s = "NOTLIMWORD"; - break; - case QMARK: - s = "QMARK"; - break; - case STAR: - s = "STAR"; - break; - case PLUS: - s = "PLUS"; - break; - case CAT: - s = "CAT"; - break; - case OR: - s = "OR"; - break; - case LPAREN: - s = "LPAREN"; - break; - case RPAREN: - s = "RPAREN"; - break; - case ANYCHAR: - s = "ANYCHAR"; - break; - case MBCSET: - s = "MBCSET"; - break; - default: - s = "CSET"; - break; - } - fprintf (stderr, "%s", s); - } -} -#endif /* DEBUG */ - -/* Stuff pertaining to charclasses. */ - -static bool -tstbit (unsigned int b, charclass const c) -{ - return c[b / CHARCLASS_WORD_BITS] >> b % CHARCLASS_WORD_BITS & 1; -} - -static void -setbit (unsigned int b, charclass c) -{ - c[b / CHARCLASS_WORD_BITS] |= (charclass_word) 1 << b % CHARCLASS_WORD_BITS; -} - -static void -clrbit (unsigned int b, charclass c) -{ - c[b / CHARCLASS_WORD_BITS] &= ~((charclass_word) 1 - << b % CHARCLASS_WORD_BITS); -} - -static void -copyset (charclass const src, charclass dst) -{ - memcpy (dst, src, sizeof (charclass)); -} - -static void -zeroset (charclass s) -{ - memset (s, 0, sizeof (charclass)); -} - -static void -notset (charclass s) -{ - int i; - - for (i = 0; i < CHARCLASS_WORDS; ++i) - s[i] = CHARCLASS_WORD_MASK & ~s[i]; -} - -static bool -equal (charclass const s1, charclass const s2) -{ - return memcmp (s1, s2, sizeof (charclass)) == 0; -} - -/* Ensure that the array addressed by PTR holds at least NITEMS + - (PTR || !NITEMS) items. Either return PTR, or reallocate the array - and return its new address. Although PTR may be null, the returned - value is never null. - - The array holds *NALLOC items; *NALLOC is updated on reallocation. - ITEMSIZE is the size of one item. Avoid O(N**2) behavior on arrays - growing linearly. */ -static void * -maybe_realloc (void *ptr, size_t nitems, size_t *nalloc, size_t itemsize) -{ - if (nitems < *nalloc) - return ptr; - *nalloc = nitems; - return x2nrealloc (ptr, nalloc, itemsize); -} - -/* In DFA D, find the index of charclass S, or allocate a new one. */ -static size_t -dfa_charclass_index (struct dfa *d, charclass const s) -{ - size_t i; - - for (i = 0; i < d->cindex; ++i) - if (equal (s, d->charclasses[i])) - return i; - d->charclasses = maybe_realloc (d->charclasses, d->cindex, &d->calloc, - sizeof *d->charclasses); - ++d->cindex; - copyset (s, d->charclasses[i]); - return i; -} - -/* A pointer to the current dfa is kept here during parsing. */ -static struct dfa *dfa; - -/* Find the index of charclass S in the current DFA, or allocate a new one. */ -static size_t -charclass_index (charclass const s) -{ - return dfa_charclass_index (dfa, s); -} - -/* Syntax bits controlling the behavior of the lexical analyzer. */ -static reg_syntax_t syntax_bits, syntax_bits_set; - -/* Flag for case-folding letters into sets. */ -static bool case_fold; - -/* End-of-line byte in data. */ -static unsigned char eolbyte; - -/* Cache of char-context values. */ -static int sbit[NOTCHAR]; - -/* Set of characters considered letters. */ -static charclass letters; - -/* Set of characters that are newline. */ -static charclass newline; - -/* Add this to the test for whether a byte is word-constituent, since on - BSD-based systems, many values in the 128..255 range are classified as - alphabetic, while on glibc-based systems, they are not. */ -#ifdef __GLIBC__ -# define is_valid_unibyte_character(c) 1 -#else -# define is_valid_unibyte_character(c) (btowc (c) != WEOF) -#endif - -/* C is a "word-constituent" byte. */ -#define IS_WORD_CONSTITUENT(C) \ - (is_valid_unibyte_character (C) && (isalnum (C) || (C) == '_')) - -static int -char_context (unsigned char c) -{ - if (c == eolbyte) - return CTX_NEWLINE; - if (IS_WORD_CONSTITUENT (c)) - return CTX_LETTER; - return CTX_NONE; -} - -static int -wchar_context (wint_t wc) -{ - if (wc == (wchar_t) eolbyte || wc == 0) - return CTX_NEWLINE; - if (wc == L'_' || iswalnum (wc)) - return CTX_LETTER; - return CTX_NONE; -} - -/* Entry point to set syntax options. */ -void -dfasyntax (reg_syntax_t bits, int fold, unsigned char eol) -{ - unsigned int i; - - syntax_bits_set = 1; - syntax_bits = bits; - case_fold = fold != 0; - eolbyte = eol; - - for (i = 0; i < NOTCHAR; ++i) - { - sbit[i] = char_context (i); - switch (sbit[i]) - { - case CTX_LETTER: - setbit (i, letters); - break; - case CTX_NEWLINE: - setbit (i, newline); - break; - } - } -} - -/* Set a bit in the charclass for the given wchar_t. Do nothing if WC - is represented by a multi-byte sequence. Even for MB_CUR_MAX == 1, - this may happen when folding case in weird Turkish locales where - dotless i/dotted I are not included in the chosen character set. - Return whether a bit was set in the charclass. */ -static bool -setbit_wc (wint_t wc, charclass c) -{ - int b = wctob (wc); - if (b == EOF) - return false; - - setbit (b, c); - return true; -} - -/* Set a bit for B and its case variants in the charclass C. - MB_CUR_MAX must be 1. */ -static void -setbit_case_fold_c (int b, charclass c) -{ - int ub = toupper (b); - int i; - for (i = 0; i < NOTCHAR; i++) - if (toupper (i) == ub) - setbit (i, c); -} - - - -/* UTF-8 encoding allows some optimizations that we can't otherwise - assume in a multibyte encoding. */ -int -using_utf8 (void) -{ - static int utf8 = -1; - if (utf8 < 0) - { - wchar_t wc; - mbstate_t mbs = { 0 }; - utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100; - } - return utf8; -} - -/* The current locale is known to be a unibyte locale - without multicharacter collating sequences and where range - comparisons simply use the native encoding. These locales can be - processed more efficiently. */ - -static bool -using_simple_locale (void) -{ - /* The native character set is known to be compatible with - the C locale. The following test isn't perfect, but it's good - enough in practice, as only ASCII and EBCDIC are in common use - and this test correctly accepts ASCII and rejects EBCDIC. */ - enum { native_c_charset = - ('\b' == 8 && '\t' == 9 && '\n' == 10 && '\v' == 11 && '\f' == 12 - && '\r' == 13 && ' ' == 32 && '!' == 33 && '"' == 34 && '#' == 35 - && '%' == 37 && '&' == 38 && '\'' == 39 && '(' == 40 && ')' == 41 - && '*' == 42 && '+' == 43 && ',' == 44 && '-' == 45 && '.' == 46 - && '/' == 47 && '0' == 48 && '9' == 57 && ':' == 58 && ';' == 59 - && '<' == 60 && '=' == 61 && '>' == 62 && '?' == 63 && 'A' == 65 - && 'Z' == 90 && '[' == 91 && '\\' == 92 && ']' == 93 && '^' == 94 - && '_' == 95 && 'a' == 97 && 'z' == 122 && '{' == 123 && '|' == 124 - && '}' == 125 && '~' == 126) - }; - - if (! native_c_charset || dfa->multibyte) - return false; - else - { - static int unibyte_c = -1; - if (unibyte_c < 0) - { - char const *locale = setlocale (LC_ALL, NULL); - unibyte_c = (!locale - || STREQ (locale, "C") - || STREQ (locale, "POSIX")); - } - return unibyte_c; - } -} - -/* Lexical analyzer. All the dross that deals with the obnoxious - GNU Regex syntax bits is located here. The poor, suffering - reader is referred to the GNU Regex documentation for the - meaning of the @#%!@#%^!@ syntax bits. */ - -static char const *lexptr; /* Pointer to next input character. */ -static size_t lexleft; /* Number of characters remaining. */ -static token lasttok; /* Previous token returned; initially END. */ -static bool laststart; /* We're separated from beginning or (, - | only by zero-width characters. */ -static size_t parens; /* Count of outstanding left parens. */ -static int minrep, maxrep; /* Repeat counts for {m,n}. */ - -static int cur_mb_len = 1; /* Length of the multibyte representation of - wctok. */ - -static wint_t wctok; /* Wide character representation of the current - multibyte character, or WEOF if there was - an encoding error. Used only if - MB_CUR_MAX > 1. */ - - -/* Fetch the next lexical input character. Set C (of type int) to the - next input byte, except set C to EOF if the input is a multibyte - character of length greater than 1. Set WC (of type wint_t) to the - value of the input if it is a valid multibyte character (possibly - of length 1); otherwise set WC to WEOF. If there is no more input, - report EOFERR if EOFERR is not null, and return lasttok = END - otherwise. */ -# define FETCH_WC(c, wc, eoferr) \ - do { \ - if (! lexleft) \ - { \ - if ((eoferr) != 0) \ - dfaerror (eoferr); \ - else \ - return lasttok = END; \ - } \ - else \ - { \ - wint_t _wc; \ - size_t nbytes = mbs_to_wchar (&_wc, lexptr, lexleft, dfa); \ - cur_mb_len = nbytes; \ - (wc) = _wc; \ - (c) = nbytes == 1 ? to_uchar (*lexptr) : EOF; \ - lexptr += nbytes; \ - lexleft -= nbytes; \ - } \ - } while (0) - -#ifndef MIN -# define MIN(a,b) ((a) < (b) ? (a) : (b)) -#endif - -/* The set of wchar_t values C such that there's a useful locale - somewhere where C != towupper (C) && C != towlower (towupper (C)). - For example, 0x00B5 (U+00B5 MICRO SIGN) is in this table, because - towupper (0x00B5) == 0x039C (U+039C GREEK CAPITAL LETTER MU), and - towlower (0x039C) == 0x03BC (U+03BC GREEK SMALL LETTER MU). */ -static short const lonesome_lower[] = - { - 0x00B5, 0x0131, 0x017F, 0x01C5, 0x01C8, 0x01CB, 0x01F2, 0x0345, - 0x03C2, 0x03D0, 0x03D1, 0x03D5, 0x03D6, 0x03F0, 0x03F1, - - /* U+03F2 GREEK LUNATE SIGMA SYMBOL lacks a specific uppercase - counterpart in locales predating Unicode 4.0.0 (April 2003). */ - 0x03F2, - - 0x03F5, 0x1E9B, 0x1FBE, - }; - -/* Maximum number of characters that can be the case-folded - counterparts of a single character, not counting the character - itself. This is 1 for towupper, 1 for towlower, and 1 for each - entry in LONESOME_LOWER. */ -enum -{ CASE_FOLDED_BUFSIZE = 2 + sizeof lonesome_lower / sizeof *lonesome_lower }; - -/* Find the characters equal to C after case-folding, other than C - itself, and store them into FOLDED. Return the number of characters - stored. */ -static unsigned int -case_folded_counterparts (wchar_t c, wchar_t folded[CASE_FOLDED_BUFSIZE]) -{ - unsigned int i; - unsigned int n = 0; - wint_t uc = towupper (c); - wint_t lc = towlower (uc); - if (uc != c) - folded[n++] = uc; - if (lc != uc && lc != c && towupper (lc) == uc) - folded[n++] = lc; - for (i = 0; i < sizeof lonesome_lower / sizeof *lonesome_lower; i++) - { - wint_t li = lonesome_lower[i]; - if (li != lc && li != uc && li != c && towupper (li) == uc) - folded[n++] = li; - } - return n; -} - -typedef int predicate (int); - -/* The following list maps the names of the Posix named character classes - to predicate functions that determine whether a given character is in - the class. The leading [ has already been eaten by the lexical - analyzer. */ -struct dfa_ctype -{ - const char *name; - predicate *func; - bool single_byte_only; -}; - -static const struct dfa_ctype prednames[] = { - {"alpha", isalpha, false}, - {"upper", isupper, false}, - {"lower", islower, false}, - {"digit", isdigit, true}, - {"xdigit", isxdigit, false}, - {"space", isspace, false}, - {"punct", ispunct, false}, - {"alnum", isalnum, false}, - {"print", isprint, false}, - {"graph", isgraph, false}, - {"cntrl", iscntrl, false}, - {"blank", isblank, false}, - {NULL, NULL, false} -}; - -static const struct dfa_ctype *_GL_ATTRIBUTE_PURE -find_pred (const char *str) -{ - unsigned int i; - for (i = 0; prednames[i].name; ++i) - if (STREQ (str, prednames[i].name)) - return &prednames[i]; - return NULL; -} - -/* Multibyte character handling sub-routine for lex. - Parse a bracket expression and build a struct mb_char_classes. */ -static token -parse_bracket_exp (void) -{ - bool invert; - int c, c1, c2; - charclass ccl; - - /* This is a bracket expression that dfaexec is known to - process correctly. */ - bool known_bracket_exp = true; - - /* Used to warn about [:space:]. - Bit 0 = first character is a colon. - Bit 1 = last character is a colon. - Bit 2 = includes any other character but a colon. - Bit 3 = includes ranges, char/equiv classes or collation elements. */ - int colon_warning_state; - - wint_t wc; - wint_t wc2; - wint_t wc1 = 0; - - /* Work area to build a mb_char_classes. */ - struct mb_char_classes *work_mbc; - size_t chars_al; - - chars_al = 0; - if (dfa->multibyte) - { - dfa->mbcsets = maybe_realloc (dfa->mbcsets, dfa->nmbcsets, - &dfa->mbcsets_alloc, - sizeof *dfa->mbcsets); - - /* dfa->multibyte_prop[] hold the index of dfa->mbcsets. - We will update dfa->multibyte_prop[] in addtok, because we can't - decide the index in dfa->tokens[]. */ - - /* Initialize work area. */ - work_mbc = &(dfa->mbcsets[dfa->nmbcsets++]); - memset (work_mbc, 0, sizeof *work_mbc); - } - else - work_mbc = NULL; - - memset (ccl, 0, sizeof ccl); - FETCH_WC (c, wc, _("unbalanced [")); - if (c == '^') - { - FETCH_WC (c, wc, _("unbalanced [")); - invert = true; - known_bracket_exp = using_simple_locale (); - } - else - invert = false; - - colon_warning_state = (c == ':'); - do - { - c1 = NOTCHAR; /* Mark c1 as not initialized. */ - colon_warning_state &= ~2; - - /* Note that if we're looking at some other [:...:] construct, - we just treat it as a bunch of ordinary characters. We can do - this because we assume regex has checked for syntax errors before - dfa is ever called. */ - if (c == '[') - { - FETCH_WC (c1, wc1, _("unbalanced [")); - - if ((c1 == ':' && (syntax_bits & RE_CHAR_CLASSES)) - || c1 == '.' || c1 == '=') - { - enum { MAX_BRACKET_STRING_LEN = 32 }; - char str[MAX_BRACKET_STRING_LEN + 1]; - size_t len = 0; - for (;;) - { - FETCH_WC (c, wc, _("unbalanced [")); - if ((c == c1 && *lexptr == ']') || lexleft == 0) - break; - if (len < MAX_BRACKET_STRING_LEN) - str[len++] = c; - else - /* This is in any case an invalid class name. */ - str[0] = '\0'; - } - str[len] = '\0'; - - /* Fetch bracket. */ - FETCH_WC (c, wc, _("unbalanced [")); - if (c1 == ':') - /* Build character class. POSIX allows character - classes to match multicharacter collating elements, - but the regex code does not support that, so do not - worry about that possibility. */ - { - char const *class - = (case_fold && (STREQ (str, "upper") - || STREQ (str, "lower")) ? "alpha" : str); - const struct dfa_ctype *pred = find_pred (class); - if (!pred) - dfaerror (_("invalid character class")); - - if (dfa->multibyte && !pred->single_byte_only) - known_bracket_exp = false; - else - for (c2 = 0; c2 < NOTCHAR; ++c2) - if (pred->func (c2)) - setbit (c2, ccl); - } - else - known_bracket_exp = false; - - colon_warning_state |= 8; - - /* Fetch new lookahead character. */ - FETCH_WC (c1, wc1, _("unbalanced [")); - continue; - } - - /* We treat '[' as a normal character here. c/c1/wc/wc1 - are already set up. */ - } - - if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) - FETCH_WC (c, wc, _("unbalanced [")); - - if (c1 == NOTCHAR) - FETCH_WC (c1, wc1, _("unbalanced [")); - - if (c1 == '-') - /* build range characters. */ - { - FETCH_WC (c2, wc2, _("unbalanced [")); - - /* A bracket expression like [a-[.aa.]] matches an unknown set. - Treat it like [-a[.aa.]] while parsing it, and - remember that the set is unknown. */ - if (c2 == '[' && *lexptr == '.') - { - known_bracket_exp = false; - c2 = ']'; - } - - if (c2 == ']') - { - /* In the case [x-], the - is an ordinary hyphen, - which is left in c1, the lookahead character. */ - lexptr -= cur_mb_len; - lexleft += cur_mb_len; - } - else - { - if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) - FETCH_WC (c2, wc2, _("unbalanced [")); - - colon_warning_state |= 8; - FETCH_WC (c1, wc1, _("unbalanced [")); - - /* Treat [x-y] as a range if x != y. */ - if (wc != wc2 || wc == WEOF) - { - if (dfa->multibyte) - known_bracket_exp = false; - else if (using_simple_locale ()) - { - int ci; - for (ci = c; ci <= c2; ci++) - setbit (ci, ccl); - if (case_fold) - { - int uc = toupper (c); - int uc2 = toupper (c2); - for (ci = 0; ci < NOTCHAR; ci++) - { - int uci = toupper (ci); - if (uc <= uci && uci <= uc2) - setbit (ci, ccl); - } - } - } - else - known_bracket_exp = false; - - continue; - } - } - } - - colon_warning_state |= (c == ':') ? 2 : 4; - - if (!dfa->multibyte) - { - if (case_fold) - setbit_case_fold_c (c, ccl); - else - setbit (c, ccl); - continue; - } - - if (wc == WEOF) - known_bracket_exp = false; - else - { - wchar_t folded[CASE_FOLDED_BUFSIZE + 1]; - unsigned int i; - unsigned int n = (case_fold - ? case_folded_counterparts (wc, folded + 1) + 1 - : 1); - folded[0] = wc; - for (i = 0; i < n; i++) - if (!setbit_wc (folded[i], ccl)) - { - work_mbc->chars - = maybe_realloc (work_mbc->chars, work_mbc->nchars, - &chars_al, sizeof *work_mbc->chars); - work_mbc->chars[work_mbc->nchars++] = folded[i]; - } - } - } - while ((wc = wc1, (c = c1) != ']')); - - if (colon_warning_state == 7) - dfawarn (_("character class syntax is [[:space:]], not [:space:]")); - - if (! known_bracket_exp) - return BACKREF; - - if (dfa->multibyte) - { - static charclass zeroclass; - work_mbc->invert = invert; - work_mbc->cset = equal (ccl, zeroclass) ? -1 : charclass_index (ccl); - return MBCSET; - } - - if (invert) - { - assert (!dfa->multibyte); - notset (ccl); - if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) - clrbit ('\n', ccl); - } - - return CSET + charclass_index (ccl); -} - -#define PUSH_LEX_STATE(s) \ - do \ - { \ - char const *lexptr_saved = lexptr; \ - size_t lexleft_saved = lexleft; \ - lexptr = (s); \ - lexleft = strlen (lexptr) - -#define POP_LEX_STATE() \ - lexptr = lexptr_saved; \ - lexleft = lexleft_saved; \ - } \ - while (0) - -static token -lex (void) -{ - int c, c2; - bool backslash = false; - charclass ccl; - int i; - - /* Basic plan: We fetch a character. If it's a backslash, - we set the backslash flag and go through the loop again. - On the plus side, this avoids having a duplicate of the - main switch inside the backslash case. On the minus side, - it means that just about every case begins with - "if (backslash) ...". */ - for (i = 0; i < 2; ++i) - { - FETCH_WC (c, wctok, NULL); - - switch (c) - { - case '\\': - if (backslash) - goto normal_char; - if (lexleft == 0) - dfaerror (_("unfinished \\ escape")); - backslash = true; - break; - - case '^': - if (backslash) - goto normal_char; - if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS - || lasttok == END || lasttok == LPAREN || lasttok == OR) - return lasttok = BEGLINE; - goto normal_char; - - case '$': - if (backslash) - goto normal_char; - if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS - || lexleft == 0 - || (syntax_bits & RE_NO_BK_PARENS - ? lexleft > 0 && *lexptr == ')' - : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == ')') - || (syntax_bits & RE_NO_BK_VBAR - ? lexleft > 0 && *lexptr == '|' - : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == '|') - || ((syntax_bits & RE_NEWLINE_ALT) - && lexleft > 0 && *lexptr == '\n')) - return lasttok = ENDLINE; - goto normal_char; - - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (backslash && !(syntax_bits & RE_NO_BK_REFS)) - { - laststart = false; - return lasttok = BACKREF; - } - goto normal_char; - - case '`': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = BEGLINE; /* FIXME: should be beginning of string */ - goto normal_char; - - case '\'': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = ENDLINE; /* FIXME: should be end of string */ - goto normal_char; - - case '<': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = BEGWORD; - goto normal_char; - - case '>': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = ENDWORD; - goto normal_char; - - case 'b': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = LIMWORD; - goto normal_char; - - case 'B': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = NOTLIMWORD; - goto normal_char; - - case '?': - if (syntax_bits & RE_LIMITED_OPS) - goto normal_char; - if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0)) - goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) - goto normal_char; - return lasttok = QMARK; - - case '*': - if (backslash) - goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) - goto normal_char; - return lasttok = STAR; - - case '+': - if (syntax_bits & RE_LIMITED_OPS) - goto normal_char; - if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0)) - goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) - goto normal_char; - return lasttok = PLUS; - - case '{': - if (!(syntax_bits & RE_INTERVALS)) - goto normal_char; - if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0)) - goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) - goto normal_char; - - /* Cases: - {M} - exact count - {M,} - minimum count, maximum is infinity - {,N} - 0 through N - {,} - 0 to infinity (same as '*') - {M,N} - M through N */ - { - char const *p = lexptr; - char const *lim = p + lexleft; - minrep = maxrep = -1; - for (; p != lim && ISASCIIDIGIT (*p); p++) - { - if (minrep < 0) - minrep = *p - '0'; - else - minrep = MIN (RE_DUP_MAX + 1, minrep * 10 + *p - '0'); - } - if (p != lim) - { - if (*p != ',') - maxrep = minrep; - else - { - if (minrep < 0) - minrep = 0; - while (++p != lim && ISASCIIDIGIT (*p)) - { - if (maxrep < 0) - maxrep = *p - '0'; - else - maxrep = MIN (RE_DUP_MAX + 1, maxrep * 10 + *p - '0'); - } - } - } - if (! ((! backslash || (p != lim && *p++ == '\\')) - && p != lim && *p++ == '}' - && 0 <= minrep && (maxrep < 0 || minrep <= maxrep))) - { - if (syntax_bits & RE_INVALID_INTERVAL_ORD) - goto normal_char; - dfaerror (_("invalid content of \\{\\}")); - } - if (RE_DUP_MAX < maxrep) - dfaerror (_("regular expression too big")); - lexptr = p; - lexleft = lim - p; - } - laststart = false; - return lasttok = REPMN; - - case '|': - if (syntax_bits & RE_LIMITED_OPS) - goto normal_char; - if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0)) - goto normal_char; - laststart = true; - return lasttok = OR; - - case '\n': - if (syntax_bits & RE_LIMITED_OPS - || backslash || !(syntax_bits & RE_NEWLINE_ALT)) - goto normal_char; - laststart = true; - return lasttok = OR; - - case '(': - if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0)) - goto normal_char; - ++parens; - laststart = true; - return lasttok = LPAREN; - - case ')': - if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0)) - goto normal_char; - if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_char; - --parens; - laststart = false; - return lasttok = RPAREN; - - case '.': - if (backslash) - goto normal_char; - if (dfa->multibyte) - { - /* In multibyte environment period must match with a single - character not a byte. So we use ANYCHAR. */ - laststart = false; - return lasttok = ANYCHAR; - } - zeroset (ccl); - notset (ccl); - if (!(syntax_bits & RE_DOT_NEWLINE)) - clrbit ('\n', ccl); - if (syntax_bits & RE_DOT_NOT_NULL) - clrbit ('\0', ccl); - laststart = false; - return lasttok = CSET + charclass_index (ccl); - - case 's': - case 'S': - if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) - goto normal_char; - if (!dfa->multibyte) - { - zeroset (ccl); - for (c2 = 0; c2 < NOTCHAR; ++c2) - if (isspace (c2)) - setbit (c2, ccl); - if (c == 'S') - notset (ccl); - laststart = false; - return lasttok = CSET + charclass_index (ccl); - } - - /* FIXME: see if optimizing this, as is done with ANYCHAR and - add_utf8_anychar, makes sense. */ - - /* \s and \S are documented to be equivalent to [[:space:]] and - [^[:space:]] respectively, so tell the lexer to process those - strings, each minus its "already processed" '['. */ - PUSH_LEX_STATE (c == 's' ? "[:space:]]" : "^[:space:]]"); - - lasttok = parse_bracket_exp (); - - POP_LEX_STATE (); - - laststart = false; - return lasttok; - - case 'w': - case 'W': - if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) - goto normal_char; - - if (!dfa->multibyte) - { - zeroset (ccl); - for (c2 = 0; c2 < NOTCHAR; ++c2) - if (IS_WORD_CONSTITUENT (c2)) - setbit (c2, ccl); - if (c == 'W') - notset (ccl); - laststart = false; - return lasttok = CSET + charclass_index (ccl); - } - - /* FIXME: see if optimizing this, as is done with ANYCHAR and - add_utf8_anychar, makes sense. */ - - /* \w and \W are documented to be equivalent to [_[:alnum:]] and - [^_[:alnum:]] respectively, so tell the lexer to process those - strings, each minus its "already processed" '['. */ - PUSH_LEX_STATE (c == 'w' ? "_[:alnum:]]" : "^_[:alnum:]]"); - - lasttok = parse_bracket_exp (); - - POP_LEX_STATE (); - - laststart = false; - return lasttok; - - case '[': - if (backslash) - goto normal_char; - laststart = false; - return lasttok = parse_bracket_exp (); - - default: - normal_char: - laststart = false; - /* For multibyte character sets, folding is done in atom. Always - return WCHAR. */ - if (dfa->multibyte) - return lasttok = WCHAR; - - if (case_fold && isalpha (c)) - { - zeroset (ccl); - setbit_case_fold_c (c, ccl); - return lasttok = CSET + charclass_index (ccl); - } - - return lasttok = c; - } - } - - /* The above loop should consume at most a backslash - and some other character. */ - abort (); - return END; /* keeps pedantic compilers happy. */ -} - -/* Recursive descent parser for regular expressions. */ - -static token tok; /* Lookahead token. */ -static size_t depth; /* Current depth of a hypothetical stack - holding deferred productions. This is - used to determine the depth that will be - required of the real stack later on in - dfaanalyze. */ - -static void -addtok_mb (token t, int mbprop) -{ - if (dfa->talloc == dfa->tindex) - { - dfa->tokens = x2nrealloc (dfa->tokens, &dfa->talloc, - sizeof *dfa->tokens); - if (dfa->multibyte) - dfa->multibyte_prop = xnrealloc (dfa->multibyte_prop, dfa->talloc, - sizeof *dfa->multibyte_prop); - } - if (dfa->multibyte) - dfa->multibyte_prop[dfa->tindex] = mbprop; - dfa->tokens[dfa->tindex++] = t; - - switch (t) - { - case QMARK: - case STAR: - case PLUS: - break; - - case CAT: - case OR: - --depth; - break; - - case BACKREF: - dfa->fast = false; - /* fallthrough */ - default: - ++dfa->nleaves; - /* fallthrough */ - case EMPTY: - ++depth; - break; - } - if (depth > dfa->depth) - dfa->depth = depth; -} - -static void addtok_wc (wint_t wc); - -/* Add the given token to the parse tree, maintaining the depth count and - updating the maximum depth if necessary. */ -static void -addtok (token t) -{ - if (dfa->multibyte && t == MBCSET) - { - bool need_or = false; - struct mb_char_classes *work_mbc = &dfa->mbcsets[dfa->nmbcsets - 1]; - size_t i; - - /* Extract wide characters into alternations for better performance. - This does not require UTF-8. */ - for (i = 0; i < work_mbc->nchars; i++) - { - addtok_wc (work_mbc->chars[i]); - if (need_or) - addtok (OR); - need_or = true; - } - work_mbc->nchars = 0; - - /* Characters have been handled above, so it is possible - that the mbcset is empty now. Do nothing in that case. */ - if (work_mbc->cset != -1) - { - addtok (CSET + work_mbc->cset); - if (need_or) - addtok (OR); - } - } - else - { - addtok_mb (t, 3); - } -} - -/* We treat a multibyte character as a single atom, so that DFA - can treat a multibyte character as a single expression. - - e.g., we construct the following tree from "". - - */ -static void -addtok_wc (wint_t wc) -{ - unsigned char buf[MB_LEN_MAX]; - mbstate_t s = { 0 }; - int i; - size_t stored_bytes = wcrtomb ((char *) buf, wc, &s); - - if (stored_bytes != (size_t) -1) - cur_mb_len = stored_bytes; - else - { - /* This is merely stop-gap. buf[0] is undefined, yet skipping - the addtok_mb call altogether can corrupt the heap. */ - cur_mb_len = 1; - buf[0] = 0; - } - - addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1); - for (i = 1; i < cur_mb_len; i++) - { - addtok_mb (buf[i], i == cur_mb_len - 1 ? 2 : 0); - addtok (CAT); - } -} - -static void -add_utf8_anychar (void) -{ - static const charclass utf8_classes[5] = { - /* 80-bf: non-leading bytes. */ - {0, 0, 0, 0, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, 0, 0}, - - /* 00-7f: 1-byte sequence. */ - {CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, - CHARCLASS_WORD_MASK, 0, 0, 0, 0}, - - /* c2-df: 2-byte sequence. */ - {0, 0, 0, 0, 0, 0, ~3 & CHARCLASS_WORD_MASK, 0}, - - /* e0-ef: 3-byte sequence. */ - {0, 0, 0, 0, 0, 0, 0, 0xffff}, - - /* f0-f7: 4-byte sequence. */ - {0, 0, 0, 0, 0, 0, 0, 0xff0000} - }; - const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]); - unsigned int i; - - /* Define the five character classes that are needed below. */ - if (dfa->utf8_anychar_classes[0] == 0) - for (i = 0; i < n; i++) - { - charclass c; - copyset (utf8_classes[i], c); - if (i == 1) - { - if (!(syntax_bits & RE_DOT_NEWLINE)) - clrbit ('\n', c); - if (syntax_bits & RE_DOT_NOT_NULL) - clrbit ('\0', c); - } - dfa->utf8_anychar_classes[i] = CSET + charclass_index (c); - } - - /* A valid UTF-8 character is - - ([0x00-0x7f] - |[0xc2-0xdf][0x80-0xbf] - |[0xe0-0xef[0x80-0xbf][0x80-0xbf] - |[0xf0-f7][0x80-0xbf][0x80-0xbf][0x80-0xbf]) - - which I'll write more concisely "B|CA|DAA|EAAA". Factor the [0x00-0x7f] - and you get "B|(C|(D|EA)A)A". And since the token buffer is in reverse - Polish notation, you get "B C D E A CAT OR A CAT OR A CAT OR". */ - for (i = 1; i < n; i++) - addtok (dfa->utf8_anychar_classes[i]); - while (--i > 1) - { - addtok (dfa->utf8_anychar_classes[0]); - addtok (CAT); - addtok (OR); - } -} - -/* The grammar understood by the parser is as follows. - - regexp: - regexp OR branch - branch - - branch: - branch closure - closure - - closure: - closure QMARK - closure STAR - closure PLUS - closure REPMN - atom - - atom: - - - ANYCHAR - MBCSET - CSET - BACKREF - BEGLINE - ENDLINE - BEGWORD - ENDWORD - LIMWORD - NOTLIMWORD - LPAREN regexp RPAREN - - - The parser builds a parse tree in postfix form in an array of tokens. */ - -static void -atom (void) -{ - if (tok == WCHAR) - { - if (wctok == WEOF) - addtok (BACKREF); - else - { - addtok_wc (wctok); - - if (case_fold) - { - wchar_t folded[CASE_FOLDED_BUFSIZE]; - unsigned int i, n = case_folded_counterparts (wctok, folded); - for (i = 0; i < n; i++) - { - addtok_wc (folded[i]); - addtok (OR); - } - } - } - - tok = lex (); - } - else if (tok == ANYCHAR && using_utf8 ()) - { - /* For UTF-8 expand the period to a series of CSETs that define a valid - UTF-8 character. This avoids using the slow multibyte path. I'm - pretty sure it would be both profitable and correct to do it for - any encoding; however, the optimization must be done manually as - it is done above in add_utf8_anychar. So, let's start with - UTF-8: it is the most used, and the structure of the encoding - makes the correctness more obvious. */ - add_utf8_anychar (); - tok = lex (); - } - else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF - || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD - || tok == ANYCHAR || tok == MBCSET - || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD) - { - addtok (tok); - tok = lex (); - } - else if (tok == LPAREN) - { - tok = lex (); - regexp (); - if (tok != RPAREN) - dfaerror (_("unbalanced (")); - tok = lex (); - } - else - addtok (EMPTY); -} - -/* Return the number of tokens in the given subexpression. */ -static size_t _GL_ATTRIBUTE_PURE -nsubtoks (size_t tindex) -{ - size_t ntoks1; - - switch (dfa->tokens[tindex - 1]) - { - default: - return 1; - case QMARK: - case STAR: - case PLUS: - return 1 + nsubtoks (tindex - 1); - case CAT: - case OR: - ntoks1 = nsubtoks (tindex - 1); - return 1 + ntoks1 + nsubtoks (tindex - 1 - ntoks1); - } -} - -/* Copy the given subexpression to the top of the tree. */ -static void -copytoks (size_t tindex, size_t ntokens) -{ - size_t i; - - if (dfa->multibyte) - for (i = 0; i < ntokens; ++i) - addtok_mb (dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]); - else - for (i = 0; i < ntokens; ++i) - addtok_mb (dfa->tokens[tindex + i], 3); -} - -static void -closure (void) -{ - int i; - size_t tindex, ntokens; - - atom (); - while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN) - if (tok == REPMN && (minrep || maxrep)) - { - ntokens = nsubtoks (dfa->tindex); - tindex = dfa->tindex - ntokens; - if (maxrep < 0) - addtok (PLUS); - if (minrep == 0) - addtok (QMARK); - for (i = 1; i < minrep; ++i) - { - copytoks (tindex, ntokens); - addtok (CAT); - } - for (; i < maxrep; ++i) - { - copytoks (tindex, ntokens); - addtok (QMARK); - addtok (CAT); - } - tok = lex (); - } - else if (tok == REPMN) - { - dfa->tindex -= nsubtoks (dfa->tindex); - tok = lex (); - closure (); - } - else - { - addtok (tok); - tok = lex (); - } -} - -static void -branch (void) -{ - closure (); - while (tok != RPAREN && tok != OR && tok >= 0) - { - closure (); - addtok (CAT); - } -} - -static void -regexp (void) -{ - branch (); - while (tok == OR) - { - tok = lex (); - branch (); - addtok (OR); - } -} - -/* Main entry point for the parser. S is a string to be parsed, len is the - length of the string, so s can include NUL characters. D is a pointer to - the struct dfa to parse into. */ -void -dfaparse (char const *s, size_t len, struct dfa *d) -{ - dfa = d; - lexptr = s; - lexleft = len; - lasttok = END; - laststart = true; - parens = 0; - if (dfa->multibyte) - { - cur_mb_len = 0; - memset (&d->mbs, 0, sizeof d->mbs); - } - - if (!syntax_bits_set) - dfaerror (_("no syntax specified")); - - tok = lex (); - depth = d->depth; - - regexp (); - - if (tok != END) - dfaerror (_("unbalanced )")); - - addtok (END - d->nregexps); - addtok (CAT); - - if (d->nregexps) - addtok (OR); - - ++d->nregexps; -} - -/* Some primitives for operating on sets of positions. */ - -/* Copy one set to another. */ -static void -copy (position_set const *src, position_set * dst) -{ - if (dst->alloc < src->nelem) - { - free (dst->elems); - dst->alloc = src->nelem; - dst->elems = x2nrealloc (NULL, &dst->alloc, sizeof *dst->elems); - } - memcpy (dst->elems, src->elems, src->nelem * sizeof *dst->elems); - dst->nelem = src->nelem; -} - -static void -alloc_position_set (position_set * s, size_t size) -{ - s->elems = xnmalloc (size, sizeof *s->elems); - s->alloc = size; - s->nelem = 0; -} - -/* Insert position P in set S. S is maintained in sorted order on - decreasing index. If there is already an entry in S with P.index - then merge (logically-OR) P's constraints into the one in S. - S->elems must point to an array large enough to hold the resulting set. */ -static void -insert (position p, position_set * s) -{ - size_t count = s->nelem; - size_t lo = 0, hi = count; - size_t i; - while (lo < hi) - { - size_t mid = (lo + hi) >> 1; - if (s->elems[mid].index > p.index) - lo = mid + 1; - else - hi = mid; - } - - if (lo < count && p.index == s->elems[lo].index) - { - s->elems[lo].constraint |= p.constraint; - return; - } - - s->elems = maybe_realloc (s->elems, count, &s->alloc, sizeof *s->elems); - for (i = count; i > lo; i--) - s->elems[i] = s->elems[i - 1]; - s->elems[lo] = p; - ++s->nelem; -} - -/* Merge two sets of positions into a third. The result is exactly as if - the positions of both sets were inserted into an initially empty set. */ -static void -merge (position_set const *s1, position_set const *s2, position_set * m) -{ - size_t i = 0, j = 0; - - if (m->alloc < s1->nelem + s2->nelem) - { - free (m->elems); - m->elems = maybe_realloc (NULL, s1->nelem + s2->nelem, &m->alloc, - sizeof *m->elems); - } - m->nelem = 0; - while (i < s1->nelem && j < s2->nelem) - if (s1->elems[i].index > s2->elems[j].index) - m->elems[m->nelem++] = s1->elems[i++]; - else if (s1->elems[i].index < s2->elems[j].index) - m->elems[m->nelem++] = s2->elems[j++]; - else - { - m->elems[m->nelem] = s1->elems[i++]; - m->elems[m->nelem++].constraint |= s2->elems[j++].constraint; - } - while (i < s1->nelem) - m->elems[m->nelem++] = s1->elems[i++]; - while (j < s2->nelem) - m->elems[m->nelem++] = s2->elems[j++]; -} - -/* Delete a position from a set. */ -static void -delete (position p, position_set * s) -{ - size_t i; - - for (i = 0; i < s->nelem; ++i) - if (p.index == s->elems[i].index) - break; - if (i < s->nelem) - for (--s->nelem; i < s->nelem; ++i) - s->elems[i] = s->elems[i + 1]; -} - -/* Find the index of the state corresponding to the given position set with - the given preceding context, or create a new state if there is no such - state. Context tells whether we got here on a newline or letter. */ -static state_num -state_index (struct dfa *d, position_set const *s, int context) -{ - size_t hash = 0; - int constraint; - state_num i, j; - - for (i = 0; i < s->nelem; ++i) - hash ^= s->elems[i].index + s->elems[i].constraint; - - /* Try to find a state that exactly matches the proposed one. */ - for (i = 0; i < d->sindex; ++i) - { - if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem - || context != d->states[i].context) - continue; - for (j = 0; j < s->nelem; ++j) - if (s->elems[j].constraint - != d->states[i].elems.elems[j].constraint - || s->elems[j].index != d->states[i].elems.elems[j].index) - break; - if (j == s->nelem) - return i; - } - -#ifdef DEBUG - fprintf (stderr, "new state %zd\n nextpos:", i); - for (j = 0; j < s->nelem; ++j) - { - fprintf (stderr, " %zu:", s->elems[j].index); - prtok (d->tokens[s->elems[j].index]); - } - fprintf (stderr, "\n context:"); - if (context ^ CTX_ANY) - { - if (context & CTX_NONE) - fprintf (stderr, " CTX_NONE"); - if (context & CTX_LETTER) - fprintf (stderr, " CTX_LETTER"); - if (context & CTX_NEWLINE) - fprintf (stderr, " CTX_NEWLINE"); - } - else - fprintf (stderr, " CTX_ANY"); - fprintf (stderr, "\n"); -#endif - - /* We'll have to create a new state. */ - d->states = maybe_realloc (d->states, d->sindex, &d->salloc, - sizeof *d->states); - d->states[i].hash = hash; - alloc_position_set (&d->states[i].elems, s->nelem); - copy (s, &d->states[i].elems); - d->states[i].context = context; - d->states[i].constraint = 0; - d->states[i].first_end = 0; - d->states[i].mbps.nelem = 0; - d->states[i].mbps.elems = NULL; - - for (j = 0; j < s->nelem; ++j) - if (d->tokens[s->elems[j].index] < 0) - { - constraint = s->elems[j].constraint; - if (SUCCEEDS_IN_CONTEXT (constraint, context, CTX_ANY)) - d->states[i].constraint |= constraint; - if (!d->states[i].first_end) - d->states[i].first_end = d->tokens[s->elems[j].index]; - } - else if (d->tokens[s->elems[j].index] == BACKREF) - d->states[i].constraint = NO_CONSTRAINT; - - ++d->sindex; - - return i; -} - -/* Find the epsilon closure of a set of positions. If any position of the set - contains a symbol that matches the empty string in some context, replace - that position with the elements of its follow labeled with an appropriate - constraint. Repeat exhaustively until no funny positions are left. - S->elems must be large enough to hold the result. */ -static void -epsclosure (position_set *s, struct dfa const *d, char *visited) -{ - size_t i, j; - position p, old; - bool initialized = false; - - for (i = 0; i < s->nelem; ++i) - if (d->tokens[s->elems[i].index] >= NOTCHAR - && d->tokens[s->elems[i].index] != BACKREF - && d->tokens[s->elems[i].index] != ANYCHAR - && d->tokens[s->elems[i].index] != MBCSET - && d->tokens[s->elems[i].index] < CSET) - { - if (!initialized) - { - memset (visited, 0, d->tindex * sizeof (*visited)); - initialized = true; - } - old = s->elems[i]; - p.constraint = old.constraint; - delete (s->elems[i], s); - if (visited[old.index]) - { - --i; - continue; - } - visited[old.index] = 1; - switch (d->tokens[old.index]) - { - case BEGLINE: - p.constraint &= BEGLINE_CONSTRAINT; - break; - case ENDLINE: - p.constraint &= ENDLINE_CONSTRAINT; - break; - case BEGWORD: - p.constraint &= BEGWORD_CONSTRAINT; - break; - case ENDWORD: - p.constraint &= ENDWORD_CONSTRAINT; - break; - case LIMWORD: - p.constraint &= LIMWORD_CONSTRAINT; - break; - case NOTLIMWORD: - p.constraint &= NOTLIMWORD_CONSTRAINT; - break; - default: - break; - } - for (j = 0; j < d->follows[old.index].nelem; ++j) - { - p.index = d->follows[old.index].elems[j].index; - insert (p, s); - } - /* Force rescan to start at the beginning. */ - i = -1; - } -} - -/* Returns the set of contexts for which there is at least one - character included in C. */ - -static int -charclass_context (charclass c) -{ - int context = 0; - unsigned int j; - - if (tstbit (eolbyte, c)) - context |= CTX_NEWLINE; - - for (j = 0; j < CHARCLASS_WORDS; ++j) - { - if (c[j] & letters[j]) - context |= CTX_LETTER; - if (c[j] & ~(letters[j] | newline[j])) - context |= CTX_NONE; - } - - return context; -} - -/* Returns the contexts on which the position set S depends. Each context - in the set of returned contexts (let's call it SC) may have a different - follow set than other contexts in SC, and also different from the - follow set of the complement set (sc ^ CTX_ANY). However, all contexts - in the complement set will have the same follow set. */ - -static int _GL_ATTRIBUTE_PURE -state_separate_contexts (position_set const *s) -{ - int separate_contexts = 0; - size_t j; - - for (j = 0; j < s->nelem; ++j) - { - if (PREV_NEWLINE_DEPENDENT (s->elems[j].constraint)) - separate_contexts |= CTX_NEWLINE; - if (PREV_LETTER_DEPENDENT (s->elems[j].constraint)) - separate_contexts |= CTX_LETTER; - } - - return separate_contexts; -} - - -/* Perform bottom-up analysis on the parse tree, computing various functions. - Note that at this point, we're pretending constructs like \< are real - characters rather than constraints on what can follow them. - - Nullable: A node is nullable if it is at the root of a regexp that can - match the empty string. - * EMPTY leaves are nullable. - * No other leaf is nullable. - * A QMARK or STAR node is nullable. - * A PLUS node is nullable if its argument is nullable. - * A CAT node is nullable if both its arguments are nullable. - * An OR node is nullable if either argument is nullable. - - Firstpos: The firstpos of a node is the set of positions (nonempty leaves) - that could correspond to the first character of a string matching the - regexp rooted at the given node. - * EMPTY leaves have empty firstpos. - * The firstpos of a nonempty leaf is that leaf itself. - * The firstpos of a QMARK, STAR, or PLUS node is the firstpos of its - argument. - * The firstpos of a CAT node is the firstpos of the left argument, union - the firstpos of the right if the left argument is nullable. - * The firstpos of an OR node is the union of firstpos of each argument. - - Lastpos: The lastpos of a node is the set of positions that could - correspond to the last character of a string matching the regexp at - the given node. - * EMPTY leaves have empty lastpos. - * The lastpos of a nonempty leaf is that leaf itself. - * The lastpos of a QMARK, STAR, or PLUS node is the lastpos of its - argument. - * The lastpos of a CAT node is the lastpos of its right argument, union - the lastpos of the left if the right argument is nullable. - * The lastpos of an OR node is the union of the lastpos of each argument. - - Follow: The follow of a position is the set of positions that could - correspond to the character following a character matching the node in - a string matching the regexp. At this point we consider special symbols - that match the empty string in some context to be just normal characters. - Later, if we find that a special symbol is in a follow set, we will - replace it with the elements of its follow, labeled with an appropriate - constraint. - * Every node in the firstpos of the argument of a STAR or PLUS node is in - the follow of every node in the lastpos. - * Every node in the firstpos of the second argument of a CAT node is in - the follow of every node in the lastpos of the first argument. - - Because of the postfix representation of the parse tree, the depth-first - analysis is conveniently done by a linear scan with the aid of a stack. - Sets are stored as arrays of the elements, obeying a stack-like allocation - scheme; the number of elements in each set deeper in the stack can be - used to determine the address of a particular set's array. */ -void -dfaanalyze (struct dfa *d, int searchflag) -{ - /* Array allocated to hold position sets. */ - position *posalloc = xnmalloc (d->nleaves, 2 * sizeof *posalloc); - /* Firstpos and lastpos elements. */ - position *firstpos = posalloc + d->nleaves; - position *lastpos = firstpos + d->nleaves; - - /* Stack for element counts and nullable flags. */ - struct - { - /* Whether the entry is nullable. */ - bool nullable; - - /* Counts of firstpos and lastpos sets. */ - size_t nfirstpos; - size_t nlastpos; - } *stkalloc = xnmalloc (d->depth, sizeof *stkalloc), *stk = stkalloc; - - position_set tmp; /* Temporary set for merging sets. */ - position_set merged; /* Result of merging sets. */ - int separate_contexts; /* Context wanted by some position. */ - size_t i, j; - position *pos; - char *visited = xnmalloc (d->tindex, sizeof *visited); - -#ifdef DEBUG - fprintf (stderr, "dfaanalyze:\n"); - for (i = 0; i < d->tindex; ++i) - { - fprintf (stderr, " %zu:", i); - prtok (d->tokens[i]); - } - putc ('\n', stderr); -#endif - - d->searchflag = searchflag != 0; - alloc_position_set (&merged, d->nleaves); - d->follows = xcalloc (d->tindex, sizeof *d->follows); - - for (i = 0; i < d->tindex; ++i) - { - switch (d->tokens[i]) - { - case EMPTY: - /* The empty set is nullable. */ - stk->nullable = true; - - /* The firstpos and lastpos of the empty leaf are both empty. */ - stk->nfirstpos = stk->nlastpos = 0; - stk++; - break; - - case STAR: - case PLUS: - /* Every element in the firstpos of the argument is in the follow - of every element in the lastpos. */ - tmp.nelem = stk[-1].nfirstpos; - tmp.elems = firstpos; - pos = lastpos; - for (j = 0; j < stk[-1].nlastpos; ++j) - { - merge (&tmp, &d->follows[pos[j].index], &merged); - copy (&merged, &d->follows[pos[j].index]); - } - /* fallthrough */ - - case QMARK: - /* A QMARK or STAR node is automatically nullable. */ - if (d->tokens[i] != PLUS) - stk[-1].nullable = true; - break; - - case CAT: - /* Every element in the firstpos of the second argument is in the - follow of every element in the lastpos of the first argument. */ - tmp.nelem = stk[-1].nfirstpos; - tmp.elems = firstpos; - pos = lastpos + stk[-1].nlastpos; - for (j = 0; j < stk[-2].nlastpos; ++j) - { - merge (&tmp, &d->follows[pos[j].index], &merged); - copy (&merged, &d->follows[pos[j].index]); - } - - /* The firstpos of a CAT node is the firstpos of the first argument, - union that of the second argument if the first is nullable. */ - if (stk[-2].nullable) - stk[-2].nfirstpos += stk[-1].nfirstpos; - else - firstpos += stk[-1].nfirstpos; - - /* The lastpos of a CAT node is the lastpos of the second argument, - union that of the first argument if the second is nullable. */ - if (stk[-1].nullable) - stk[-2].nlastpos += stk[-1].nlastpos; - else - { - pos = lastpos + stk[-2].nlastpos; - for (j = stk[-1].nlastpos; j-- > 0;) - pos[j] = lastpos[j]; - lastpos += stk[-2].nlastpos; - stk[-2].nlastpos = stk[-1].nlastpos; - } - - /* A CAT node is nullable if both arguments are nullable. */ - stk[-2].nullable &= stk[-1].nullable; - stk--; - break; - - case OR: - /* The firstpos is the union of the firstpos of each argument. */ - stk[-2].nfirstpos += stk[-1].nfirstpos; - - /* The lastpos is the union of the lastpos of each argument. */ - stk[-2].nlastpos += stk[-1].nlastpos; - - /* An OR node is nullable if either argument is nullable. */ - stk[-2].nullable |= stk[-1].nullable; - stk--; - break; - - default: - /* Anything else is a nonempty position. (Note that special - constructs like \< are treated as nonempty strings here; - an "epsilon closure" effectively makes them nullable later. - Backreferences have to get a real position so we can detect - transitions on them later. But they are nullable. */ - stk->nullable = d->tokens[i] == BACKREF; - - /* This position is in its own firstpos and lastpos. */ - stk->nfirstpos = stk->nlastpos = 1; - stk++; - - --firstpos, --lastpos; - firstpos->index = lastpos->index = i; - firstpos->constraint = lastpos->constraint = NO_CONSTRAINT; - - /* Allocate the follow set for this position. */ - alloc_position_set (&d->follows[i], 1); - break; - } -#ifdef DEBUG - /* ... balance the above nonsyntactic #ifdef goo... */ - fprintf (stderr, "node %zu:", i); - prtok (d->tokens[i]); - putc ('\n', stderr); - fprintf (stderr, - stk[-1].nullable ? " nullable: yes\n" : " nullable: no\n"); - fprintf (stderr, " firstpos:"); - for (j = stk[-1].nfirstpos; j-- > 0;) - { - fprintf (stderr, " %zu:", firstpos[j].index); - prtok (d->tokens[firstpos[j].index]); - } - fprintf (stderr, "\n lastpos:"); - for (j = stk[-1].nlastpos; j-- > 0;) - { - fprintf (stderr, " %zu:", lastpos[j].index); - prtok (d->tokens[lastpos[j].index]); - } - putc ('\n', stderr); -#endif - } - - /* For each follow set that is the follow set of a real position, replace - it with its epsilon closure. */ - for (i = 0; i < d->tindex; ++i) - if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF - || d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET - || d->tokens[i] >= CSET) - { -#ifdef DEBUG - fprintf (stderr, "follows(%zu:", i); - prtok (d->tokens[i]); - fprintf (stderr, "):"); - for (j = d->follows[i].nelem; j-- > 0;) - { - fprintf (stderr, " %zu:", d->follows[i].elems[j].index); - prtok (d->tokens[d->follows[i].elems[j].index]); - } - putc ('\n', stderr); -#endif - copy (&d->follows[i], &merged); - epsclosure (&merged, d, visited); - copy (&merged, &d->follows[i]); - } - - /* Get the epsilon closure of the firstpos of the regexp. The result will - be the set of positions of state 0. */ - merged.nelem = 0; - for (i = 0; i < stk[-1].nfirstpos; ++i) - insert (firstpos[i], &merged); - epsclosure (&merged, d, visited); - - /* Build the initial state. */ - separate_contexts = state_separate_contexts (&merged); - if (separate_contexts & CTX_NEWLINE) - state_index (d, &merged, CTX_NEWLINE); - d->initstate_others = d->min_trcount - = state_index (d, &merged, separate_contexts ^ CTX_ANY); - if (separate_contexts & CTX_LETTER) - d->initstate_letter = d->min_trcount - = state_index (d, &merged, CTX_LETTER); - else - d->initstate_letter = d->initstate_others; - d->min_trcount++; - - free (posalloc); - free (stkalloc); - free (merged.elems); - free (visited); -} - - -/* Find, for each character, the transition out of state s of d, and store - it in the appropriate slot of trans. - - We divide the positions of s into groups (positions can appear in more - than one group). Each group is labeled with a set of characters that - every position in the group matches (taking into account, if necessary, - preceding context information of s). For each group, find the union - of the its elements' follows. This set is the set of positions of the - new state. For each character in the group's label, set the transition - on this character to be to a state corresponding to the set's positions, - and its associated backward context information, if necessary. - - If we are building a searching matcher, we include the positions of state - 0 in every state. - - The collection of groups is constructed by building an equivalence-class - partition of the positions of s. - - For each position, find the set of characters C that it matches. Eliminate - any characters from C that fail on grounds of backward context. - - Search through the groups, looking for a group whose label L has nonempty - intersection with C. If L - C is nonempty, create a new group labeled - L - C and having the same positions as the current group, and set L to - the intersection of L and C. Insert the position in this group, set - C = C - L, and resume scanning. - - If after comparing with every group there are characters remaining in C, - create a new group labeled with the characters of C and insert this - position in that group. */ -void -dfastate (state_num s, struct dfa *d, state_num trans[]) -{ - leaf_set grps[NOTCHAR]; /* As many as will ever be needed. */ - charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */ - size_t ngrps = 0; /* Number of groups actually used. */ - position pos; /* Current position being considered. */ - charclass matches; /* Set of matching characters. */ - charclass_word matchesf; /* Nonzero if matches is nonempty. */ - charclass intersect; /* Intersection with some label set. */ - charclass_word intersectf; /* Nonzero if intersect is nonempty. */ - charclass leftovers; /* Stuff in the label that didn't match. */ - charclass_word leftoversf; /* Nonzero if leftovers is nonempty. */ - position_set follows; /* Union of the follows of some group. */ - position_set tmp; /* Temporary space for merging sets. */ - int possible_contexts; /* Contexts that this group can match. */ - int separate_contexts; /* Context that new state wants to know. */ - state_num state; /* New state. */ - state_num state_newline; /* New state on a newline transition. */ - state_num state_letter; /* New state on a letter transition. */ - bool next_isnt_1st_byte = false; /* We can't add state0. */ - size_t i, j, k; - -#ifdef DEBUG - fprintf (stderr, "build state %td\n", s); -#endif - - zeroset (matches); - - for (i = 0; i < d->states[s].elems.nelem; ++i) - { - pos = d->states[s].elems.elems[i]; - if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR) - setbit (d->tokens[pos.index], matches); - else if (d->tokens[pos.index] >= CSET) - copyset (d->charclasses[d->tokens[pos.index] - CSET], matches); - else - { - if (d->tokens[pos.index] == MBCSET - || d->tokens[pos.index] == ANYCHAR) - { - /* ANYCHAR and MBCSET must match with a single character, so we - must put it to d->states[s].mbps, which contains the positions - which can match with a single character not a byte. */ - if (d->states[s].mbps.nelem == 0) - alloc_position_set (&d->states[s].mbps, 1); - insert (pos, &(d->states[s].mbps)); - } - continue; - } - - /* Some characters may need to be eliminated from matches because - they fail in the current context. */ - if (pos.constraint != NO_CONSTRAINT) - { - if (!SUCCEEDS_IN_CONTEXT (pos.constraint, - d->states[s].context, CTX_NEWLINE)) - for (j = 0; j < CHARCLASS_WORDS; ++j) - matches[j] &= ~newline[j]; - if (!SUCCEEDS_IN_CONTEXT (pos.constraint, - d->states[s].context, CTX_LETTER)) - for (j = 0; j < CHARCLASS_WORDS; ++j) - matches[j] &= ~letters[j]; - if (!SUCCEEDS_IN_CONTEXT (pos.constraint, - d->states[s].context, CTX_NONE)) - for (j = 0; j < CHARCLASS_WORDS; ++j) - matches[j] &= letters[j] | newline[j]; - - /* If there are no characters left, there's no point in going on. */ - for (j = 0; j < CHARCLASS_WORDS && !matches[j]; ++j) - continue; - if (j == CHARCLASS_WORDS) - continue; - } - -#ifdef DEBUG - fprintf (stderr, " nextpos %zu:", pos.index); - prtok (d->tokens[pos.index]); - fprintf (stderr, " of"); - for (j = 0; j < NOTCHAR; j++) - if (tstbit (j, matches)) - fprintf (stderr, " 0x%02zx", j); - fprintf (stderr, "\n"); -#endif - - for (j = 0; j < ngrps; ++j) - { - /* If matches contains a single character only, and the current - group's label doesn't contain that character, go on to the - next group. */ - if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR - && !tstbit (d->tokens[pos.index], labels[j])) - continue; - - /* Check if this group's label has a nonempty intersection with - matches. */ - intersectf = 0; - for (k = 0; k < CHARCLASS_WORDS; ++k) - intersectf |= intersect[k] = matches[k] & labels[j][k]; - if (!intersectf) - continue; - - /* It does; now find the set differences both ways. */ - leftoversf = matchesf = 0; - for (k = 0; k < CHARCLASS_WORDS; ++k) - { - /* Even an optimizing compiler can't know this for sure. */ - charclass_word match = matches[k], label = labels[j][k]; - - leftoversf |= leftovers[k] = ~match & label; - matchesf |= matches[k] = match & ~label; - } - - /* If there were leftovers, create a new group labeled with them. */ - if (leftoversf) - { - copyset (leftovers, labels[ngrps]); - copyset (intersect, labels[j]); - grps[ngrps].elems = xnmalloc (d->nleaves, - sizeof *grps[ngrps].elems); - memcpy (grps[ngrps].elems, grps[j].elems, - sizeof (grps[j].elems[0]) * grps[j].nelem); - grps[ngrps].nelem = grps[j].nelem; - ++ngrps; - } - - /* Put the position in the current group. The constraint is - irrelevant here. */ - grps[j].elems[grps[j].nelem++] = pos.index; - - /* If every character matching the current position has been - accounted for, we're done. */ - if (!matchesf) - break; - } - - /* If we've passed the last group, and there are still characters - unaccounted for, then we'll have to create a new group. */ - if (j == ngrps) - { - copyset (matches, labels[ngrps]); - zeroset (matches); - grps[ngrps].elems = xnmalloc (d->nleaves, sizeof *grps[ngrps].elems); - grps[ngrps].nelem = 1; - grps[ngrps].elems[0] = pos.index; - ++ngrps; - } - } - - alloc_position_set (&follows, d->nleaves); - alloc_position_set (&tmp, d->nleaves); - - /* If we are a searching matcher, the default transition is to a state - containing the positions of state 0, otherwise the default transition - is to fail miserably. */ - if (d->searchflag) - { - /* Find the state(s) corresponding to the positions of state 0. */ - copy (&d->states[0].elems, &follows); - separate_contexts = state_separate_contexts (&follows); - state = state_index (d, &follows, separate_contexts ^ CTX_ANY); - if (separate_contexts & CTX_NEWLINE) - state_newline = state_index (d, &follows, CTX_NEWLINE); - else - state_newline = state; - if (separate_contexts & CTX_LETTER) - state_letter = state_index (d, &follows, CTX_LETTER); - else - state_letter = state; - - for (i = 0; i < NOTCHAR; ++i) - trans[i] = (IS_WORD_CONSTITUENT (i)) ? state_letter : state; - trans[eolbyte] = state_newline; - } - else - for (i = 0; i < NOTCHAR; ++i) - trans[i] = -1; - - for (i = 0; i < ngrps; ++i) - { - follows.nelem = 0; - - /* Find the union of the follows of the positions of the group. - This is a hideously inefficient loop. Fix it someday. */ - for (j = 0; j < grps[i].nelem; ++j) - for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k) - insert (d->follows[grps[i].elems[j]].elems[k], &follows); - - if (d->multibyte) - { - /* If a token in follows.elems is not 1st byte of a multibyte - character, or the states of follows must accept the bytes - which are not 1st byte of the multibyte character. - Then, if a state of follows encounter a byte, it must not be - a 1st byte of a multibyte character nor single byte character. - We cansel to add state[0].follows to next state, because - state[0] must accept 1st-byte - - For example, we assume is a certain single byte - character, is a certain multibyte character, and the - codepoint of equals the 2nd byte of the codepoint of - . - When state[0] accepts , state[i] transit to state[i+1] - by accepting accepts 1st byte of , and state[i+1] - accepts 2nd byte of , if state[i+1] encounter the - codepoint of , it must not be but 2nd byte of - , so we cannot add state[0]. */ - - next_isnt_1st_byte = false; - for (j = 0; j < follows.nelem; ++j) - { - if (!(d->multibyte_prop[follows.elems[j].index] & 1)) - { - next_isnt_1st_byte = true; - break; - } - } - } - - /* If we are building a searching matcher, throw in the positions - of state 0 as well. */ - if (d->searchflag && (!d->multibyte || !next_isnt_1st_byte)) - { - merge (&d->states[0].elems, &follows, &tmp); - copy (&tmp, &follows); - } - - /* Find out if the new state will want any context information. */ - possible_contexts = charclass_context (labels[i]); - separate_contexts = state_separate_contexts (&follows); - - /* Find the state(s) corresponding to the union of the follows. */ - if ((separate_contexts & possible_contexts) != possible_contexts) - state = state_index (d, &follows, separate_contexts ^ CTX_ANY); - else - state = -1; - if (separate_contexts & possible_contexts & CTX_NEWLINE) - state_newline = state_index (d, &follows, CTX_NEWLINE); - else - state_newline = state; - if (separate_contexts & possible_contexts & CTX_LETTER) - state_letter = state_index (d, &follows, CTX_LETTER); - else - state_letter = state; - -#ifdef DEBUG - fprintf (stderr, "group %zu\n nextpos:", i); - for (j = 0; j < grps[i].nelem; ++j) - { - fprintf (stderr, " %zu:", grps[i].elems[j]); - prtok (d->tokens[grps[i].elems[j]]); - } - fprintf (stderr, "\n follows:"); - for (j = 0; j < follows.nelem; ++j) - { - fprintf (stderr, " %zu:", follows.elems[j].index); - prtok (d->tokens[follows.elems[j].index]); - } - fprintf (stderr, "\n states:"); - if (possible_contexts & CTX_NEWLINE) - fprintf (stderr, " CTX_NEWLINE:%td", state_newline); - if (possible_contexts & CTX_LETTER) - fprintf (stderr, " CTX_LETTER:%td", state_letter); - if (possible_contexts & CTX_NONE) - fprintf (stderr, " CTX_NONE:%td", state); - fprintf (stderr, "\n"); -#endif - - /* Set the transitions for each character in the current label. */ - for (j = 0; j < CHARCLASS_WORDS; ++j) - for (k = 0; k < CHARCLASS_WORD_BITS; ++k) - if (labels[i][j] >> k & 1) - { - int c = j * CHARCLASS_WORD_BITS + k; - - if (c == eolbyte) - trans[c] = state_newline; - else if (IS_WORD_CONSTITUENT (c)) - trans[c] = state_letter; - else if (c < NOTCHAR) - trans[c] = state; - } - } - -#ifdef DEBUG - fprintf (stderr, "trans table %td", s); - for (i = 0; i < NOTCHAR; ++i) - { - if (!(i & 0xf)) - fprintf (stderr, "\n"); - fprintf (stderr, " %2td", trans[i]); - } - fprintf (stderr, "\n"); -#endif - - for (i = 0; i < ngrps; ++i) - free (grps[i].elems); - free (follows.elems); - free (tmp.elems); -} - -/* Make sure D's state arrays are large enough to hold NEW_STATE. */ -static void -realloc_trans_if_necessary (struct dfa *d, state_num new_state) -{ - state_num oldalloc = d->tralloc; - if (oldalloc <= new_state) - { - state_num **realtrans = d->trans ? d->trans - 1 : NULL; - size_t newalloc, newalloc1; - newalloc1 = new_state + 1; - realtrans = x2nrealloc (realtrans, &newalloc1, sizeof *realtrans); - realtrans[0] = NULL; - d->trans = realtrans + 1; - d->tralloc = newalloc = newalloc1 - 1; - d->fails = xnrealloc (d->fails, newalloc, sizeof *d->fails); - d->success = xnrealloc (d->success, newalloc, sizeof *d->success); - d->newlines = xnrealloc (d->newlines, newalloc, sizeof *d->newlines); - for (; oldalloc < newalloc; oldalloc++) - { - d->trans[oldalloc] = NULL; - d->fails[oldalloc] = NULL; - } - } -} - -/* Some routines for manipulating a compiled dfa's transition tables. - Each state may or may not have a transition table; if it does, and it - is a non-accepting state, then d->trans[state] points to its table. - If it is an accepting state then d->fails[state] points to its table. - If it has no table at all, then d->trans[state] is NULL. - TODO: Improve this comment, get rid of the unnecessary redundancy. */ - -static void -build_state (state_num s, struct dfa *d) -{ - state_num *trans; /* The new transition table. */ - state_num i, maxstate; - - /* Set an upper limit on the number of transition tables that will ever - exist at once. 1024 is arbitrary. The idea is that the frequently - used transition tables will be quickly rebuilt, whereas the ones that - were only needed once or twice will be cleared away. However, do not - clear the initial D->min_trcount states, since they are always used. */ - if (d->trcount >= 1024) - { - for (i = d->min_trcount; i < d->tralloc; ++i) - { - free (d->trans[i]); - free (d->fails[i]); - d->trans[i] = d->fails[i] = NULL; - } - d->trcount = d->min_trcount; - } - - ++d->trcount; - - /* Set up the success bits for this state. */ - d->success[s] = 0; - if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NEWLINE, s, *d)) - d->success[s] |= CTX_NEWLINE; - if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_LETTER, s, *d)) - d->success[s] |= CTX_LETTER; - if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NONE, s, *d)) - d->success[s] |= CTX_NONE; - - trans = xmalloc (NOTCHAR * sizeof *trans); - dfastate (s, d, trans); - - /* Now go through the new transition table, and make sure that the trans - and fail arrays are allocated large enough to hold a pointer for the - largest state mentioned in the table. */ - maxstate = -1; - for (i = 0; i < NOTCHAR; ++i) - if (maxstate < trans[i]) - maxstate = trans[i]; - realloc_trans_if_necessary (d, maxstate); - - /* Keep the newline transition in a special place so we can use it as - a sentinel. */ - d->newlines[s] = trans[eolbyte]; - trans[eolbyte] = -1; - - if (ACCEPTING (s, *d)) - d->fails[s] = trans; - else - d->trans[s] = trans; -} - -/* Multibyte character handling sub-routines for dfaexec. */ - -/* Return values of transit_state_singlebyte, and - transit_state_consume_1char. */ -typedef enum -{ - TRANSIT_STATE_IN_PROGRESS, /* State transition has not finished. */ - TRANSIT_STATE_DONE, /* State transition has finished. */ - TRANSIT_STATE_END_BUFFER /* Reach the end of the buffer. */ -} status_transit_state; - -/* Consume a single byte and transit state from 's' to '*next_state'. - This function is almost same as the state transition routin in dfaexec. - But state transition is done just once, otherwise matching succeed or - reach the end of the buffer. */ -static status_transit_state -transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const *p, - state_num * next_state) -{ - state_num *t; - state_num works = s; - - status_transit_state rval = TRANSIT_STATE_IN_PROGRESS; - - while (rval == TRANSIT_STATE_IN_PROGRESS) - { - if ((t = d->trans[works]) != NULL) - { - works = t[*p]; - rval = TRANSIT_STATE_DONE; - if (works < 0) - works = 0; - } - else if (works < 0) - works = 0; - else if (d->fails[works]) - { - works = d->fails[works][*p]; - rval = TRANSIT_STATE_DONE; - } - else - { - build_state (works, d); - } - } - *next_state = works; - return rval; -} - -/* Match a "." against the current context. Return the length of the - match, in bytes. POS is the position of the ".". */ -static int -match_anychar (struct dfa *d, state_num s, position pos, - wint_t wc, size_t mbclen) -{ - int context; - - /* Check syntax bits. */ - if (wc == (wchar_t) '\n') - { - if (!(syntax_bits & RE_DOT_NEWLINE)) - return 0; - } - else if (wc == (wchar_t) '\0') - { - if (syntax_bits & RE_DOT_NOT_NULL) - return 0; - } - else if (wc == WEOF) - return 0; - - context = wchar_context (wc); - if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, context)) - return 0; - - return mbclen; -} - -/* Check whether each of 'd->states[s].mbps.elem' can match. Then return the - array which corresponds to 'd->states[s].mbps.elem'; each element of the - array contains the number of bytes with which the element can match. - - The caller MUST free the array which this function return. */ -static int * -check_matching_with_multibyte_ops (struct dfa *d, state_num s, - char const *p, wint_t wc, size_t mbclen) -{ - size_t i; - int *rarray; - - rarray = d->mb_match_lens; - for (i = 0; i < d->states[s].mbps.nelem; ++i) - { - position pos = d->states[s].mbps.elems[i]; - switch (d->tokens[pos.index]) - { - case ANYCHAR: - rarray[i] = match_anychar (d, s, pos, wc, mbclen); - break; - default: - break; /* cannot happen. */ - } - } - return rarray; -} - -/* Consume a single character and enumerate all of the positions which can - be the next position from the state 's'. - - 'match_lens' is the input. It can be NULL, but it can also be the output - of check_matching_with_multibyte_ops for optimization. - - 'mbclen' and 'pps' are the output. 'mbclen' is the length of the - character consumed, and 'pps' is the set this function enumerates. */ -static status_transit_state -transit_state_consume_1char (struct dfa *d, state_num s, - unsigned char const **pp, - wint_t wc, size_t mbclen, - int *match_lens) -{ - size_t i, j; - int k; - state_num s1, s2; - status_transit_state rs = TRANSIT_STATE_DONE; - - if (! match_lens && d->states[s].mbps.nelem != 0) - match_lens = check_matching_with_multibyte_ops (d, s, (char const *) *pp, - wc, mbclen); - - /* Calculate the state which can be reached from the state 's' by - consuming 'mbclen' single bytes from the buffer. */ - s1 = s; - for (k = 0; k < mbclen; k++) - { - s2 = s1; - rs = transit_state_singlebyte (d, s2, (*pp)++, &s1); - } - copy (&d->states[s1].elems, &d->mb_follows); - - /* Add all of the positions which can be reached from 's' by consuming - a single character. */ - for (i = 0; i < d->states[s].mbps.nelem; i++) - { - if (match_lens[i] == mbclen) - for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem; - j++) - insert (d->follows[d->states[s].mbps.elems[i].index].elems[j], - &d->mb_follows); - } - - /* FIXME: this return value is always ignored. */ - return rs; -} - -/* Transit state from s, then return new state and update the pointer of the - buffer. This function is for some operator which can match with a multi- - byte character or a collating element (which may be multi characters). */ -static state_num -transit_state (struct dfa *d, state_num s, unsigned char const **pp, - unsigned char const *end) -{ - state_num s1; - int mbclen; /* The length of current input multibyte character. */ - int maxlen = 0; - size_t i, j; - int *match_lens = NULL; - size_t nelem = d->states[s].mbps.nelem; /* Just a alias. */ - unsigned char const *p1 = *pp; - wint_t wc; - - if (nelem > 0) - /* This state has (a) multibyte operator(s). - We check whether each of them can match or not. */ - { - /* Note: caller must free the return value of this function. */ - mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d); - match_lens = check_matching_with_multibyte_ops (d, s, (char const *) *pp, - wc, mbclen); - - for (i = 0; i < nelem; i++) - /* Search the operator which match the longest string, - in this state. */ - { - if (match_lens[i] > maxlen) - maxlen = match_lens[i]; - } - } - - if (nelem == 0 || maxlen == 0) - /* This state has no multibyte operator which can match. - We need to check only one single byte character. */ - { - status_transit_state rs; - rs = transit_state_singlebyte (d, s, *pp, &s1); - - /* We must update the pointer if state transition succeeded. */ - if (rs == TRANSIT_STATE_DONE) - ++*pp; - - return s1; - } - - /* This state has some operators which can match a multibyte character. */ - d->mb_follows.nelem = 0; - - /* 'maxlen' may be longer than the length of a character, because it may - not be a character but a (multi character) collating element. - We enumerate all of the positions which 's' can reach by consuming - 'maxlen' bytes. */ - transit_state_consume_1char (d, s, pp, wc, mbclen, match_lens); - - s1 = state_index (d, &d->mb_follows, wchar_context (wc)); - realloc_trans_if_necessary (d, s1); - - while (*pp - p1 < maxlen) - { - mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d); - transit_state_consume_1char (d, s1, pp, wc, mbclen, NULL); - - for (i = 0; i < nelem; i++) - { - if (match_lens[i] == *pp - p1) - for (j = 0; - j < d->follows[d->states[s1].mbps.elems[i].index].nelem; j++) - insert (d->follows[d->states[s1].mbps.elems[i].index].elems[j], - &d->mb_follows); - } - - s1 = state_index (d, &d->mb_follows, wchar_context (wc)); - realloc_trans_if_necessary (d, s1); - } - return s1; -} - -/* The initial state may encounter a byte which is not a single byte character - nor the first byte of a multibyte character. But it is incorrect for the - initial state to accept such a byte. For example, in Shift JIS the regular - expression "\\" accepts the codepoint 0x5c, but should not accept the second - byte of the codepoint 0x815c. Then the initial state must skip the bytes - that are not a single byte character nor the first byte of a multibyte - character. - - Given DFA state d, use mbs_to_wchar to advance MBP until it reaches or - exceeds P. If WCP is non-NULL, set *WCP to the final wide character - processed, or if no wide character is processed, set it to WEOF. - Both P and MBP must be no larger than END. */ -static unsigned char const * -skip_remains_mb (struct dfa *d, unsigned char const *p, - unsigned char const *mbp, char const *end, wint_t *wcp) -{ - wint_t wc = WEOF; - while (mbp < p) - mbp += mbs_to_wchar (&wc, (char const *) mbp, - end - (char const *) mbp, d); - if (wcp != NULL) - *wcp = wc; - return mbp; -} - -/* Search through a buffer looking for a match to the given struct dfa. - Find the first occurrence of a string matching the regexp in the - buffer, and the shortest possible version thereof. Return a pointer to - the first character after the match, or NULL if none is found. BEGIN - points to the beginning of the buffer, and END points to the first byte - after its end. Note however that we store a sentinel byte (usually - newline) in *END, so the actual buffer must be one byte longer. - When ALLOW_NL is nonzero, newlines may appear in the matching string. - If COUNT is non-NULL, increment *COUNT once for each newline processed. - Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we - encountered a DFA-unfriendly construct. The caller may use this to - decide whether to fall back on a matcher like regex. If MULTIBYTE, - the input consists of multibyte characters and/or encoding-error bytes. - Otherwise, the input consists of single-byte characters. - Here is the list of features that make this DFA matcher punt: - - [M-N]-range-in-MB-locale: regex is up to 25% faster on [a-z] - - back-reference: (.)\1 - - word-delimiter-in-MB-locale: \<, \>, \b - */ -static inline char * -dfaexec_main (struct dfa *d, char const *begin, char *end, int allow_nl, - size_t *count, bool multibyte) -{ - state_num s, s1; /* Current state. */ - unsigned char const *p, *mbp; /* Current input character. */ - state_num **trans, *t; /* Copy of d->trans so it can be optimized - into a register. */ - unsigned char eol = eolbyte; /* Likewise for eolbyte. */ - unsigned char saved_end; - size_t nlcount = 0; - - if (!d->tralloc) - { - realloc_trans_if_necessary (d, 1); - build_state (0, d); - } - - s = s1 = 0; - p = mbp = (unsigned char const *) begin; - trans = d->trans; - saved_end = *(unsigned char *) end; - *end = eol; - - if (multibyte) - { - memset (&d->mbs, 0, sizeof d->mbs); - if (! d->mb_match_lens) - { - d->mb_match_lens = xnmalloc (d->nleaves, sizeof *d->mb_match_lens); - alloc_position_set (&d->mb_follows, d->nleaves); - } - } - - for (;;) - { - if (multibyte) - { - while ((t = trans[s]) != NULL) - { - s1 = s; - - if (s < d->min_trcount) - { - if (d->min_trcount == 1) - { - if (d->states[s].mbps.nelem == 0) - { - do - { - while (t[*p] == 0) - p++; - p = mbp = skip_remains_mb (d, p, mbp, end, NULL); - } - while (t[*p] == 0); - } - else - p = mbp = skip_remains_mb (d, p, mbp, end, NULL); - } - else - { - wint_t wc; - mbp = skip_remains_mb (d, p, mbp, end, &wc); - - /* If d->min_trcount is greater than 1, maybe - transit to another initial state after skip. */ - if (p < mbp) - { - int context = wchar_context (wc); - if (context == CTX_LETTER) - s = d->initstate_letter; - else - /* It's CTX_NONE. CTX_NEWLINE cannot happen, - as we assume that a newline is always a - single byte character. */ - s = d->initstate_others; - p = mbp; - s1 = s; - } - } - } - - if (d->states[s].mbps.nelem == 0) - { - s = t[*p++]; - continue; - } - - /* The following code is used twice. - Use a macro to avoid the risk that they diverge. */ -#define State_transition() \ - do { \ - /* Can match with a multibyte character (and multi-character \ - collating element). Transition table might be updated. */ \ - s = transit_state (d, s, &p, (unsigned char *) end); \ - \ - /* If previous character is newline after a transition \ - for ANYCHAR or MBCSET in non-UTF8 multibyte locales, \ - check whether current position is beyond the end of \ - the input buffer. Also, transit to initial state if \ - !ALLOW_NL, even if RE_DOT_NEWLINE is set. */ \ - if (p[-1] == eol) \ - { \ - if ((char *) p > end) \ - { \ - p = NULL; \ - goto done; \ - } \ - \ - nlcount++; \ - \ - if (!allow_nl) \ - s = 0; \ - } \ - \ - mbp = p; \ - trans = d->trans; \ - } while (0) - - State_transition(); - } - } - else - { - if (s == 0 && (t = trans[s]) != NULL) - { - while (t[*p] == 0) - p++; - s1 = 0; - s = t[*p++]; - } - - while ((t = trans[s]) != NULL) - { - s1 = t[*p++]; - if ((t = trans[s1]) == NULL) - { - state_num tmp = s; - s = s1; - s1 = tmp; /* swap */ - break; - } - s = t[*p++]; - } - } - - if (s < 0) - { - if ((char *) p > end || p[-1] != eol || d->newlines[s1] < 0) - { - p = NULL; - goto done; - } - - /* The previous character was a newline, count it, and skip - checking of multibyte character boundary until here. */ - nlcount++; - mbp = p; - - s = allow_nl ? d->newlines[s1] : 0; - } - - if (d->fails[s]) - { - if (d->success[s] & sbit[*p]) - goto done; - - s1 = s; - if (multibyte) - State_transition(); - else - s = d->fails[s][*p++]; - } - else - { - if (!d->trans[s]) - build_state (s, d); - trans = d->trans; - } - } - - done: - if (count) - *count += nlcount; - *end = saved_end; - return (char *) p; -} - -/* Specialized versions of dfaexec_main for multibyte and single-byte - cases. This is for performance. */ - -static char * -dfaexec_mb (struct dfa *d, char const *begin, char *end, - int allow_nl, size_t *count, int *backref) -{ - return dfaexec_main (d, begin, end, allow_nl, count, true); -} - -static char * -dfaexec_sb (struct dfa *d, char const *begin, char *end, - int allow_nl, size_t *count, int *backref) -{ - return dfaexec_main (d, begin, end, allow_nl, count, false); -} - -/* Always set *BACKREF and return BEGIN. Use this wrapper for - any regexp that uses a construct not supported by this code. */ -static char * -dfaexec_noop (struct dfa *d, char const *begin, char *end, - int allow_nl, size_t *count, int *backref) -{ - *backref = 1; - return (char *) begin; -} - -/* Like dfaexec_main (D, BEGIN, END, ALLOW_NL, COUNT, BACKREF, D->multibyte), - but faster. */ - -char * -dfaexec (struct dfa *d, char const *begin, char *end, - int allow_nl, size_t *count, int *backref) -{ - return d->dfaexec (d, begin, end, allow_nl, count, backref); -} - -struct dfa * -dfasuperset (struct dfa const *d) -{ - return d->superset; -} - -bool -dfaisfast (struct dfa const *d) -{ - return d->fast; -} - -static void -free_mbdata (struct dfa *d) -{ - size_t i; - - free (d->multibyte_prop); - - for (i = 0; i < d->nmbcsets; ++i) - { - struct mb_char_classes *p = &(d->mbcsets[i]); - free (p->chars); - } - - free (d->mbcsets); - free (d->mb_follows.elems); - free (d->mb_match_lens); - d->mb_match_lens = NULL; -} - -/* Initialize the components of a dfa that the other routines don't - initialize for themselves. */ -void -dfainit (struct dfa *d) -{ - memset (d, 0, sizeof *d); - d->multibyte = MB_CUR_MAX > 1; - d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb; - d->fast = !d->multibyte; -} - -/* Return true if every construct in D is supported by this DFA matcher. */ -static bool _GL_ATTRIBUTE_PURE -dfa_supported (struct dfa const *d) -{ - for (size_t i = 0; i < d->tindex; i++) - { - switch (d->tokens[i]) - { - case BEGWORD: - case ENDWORD: - case LIMWORD: - case NOTLIMWORD: - if (!d->multibyte) - continue; - /* fallthrough */ - - case BACKREF: - case MBCSET: - return false; - } - } - return true; -} - -static void -dfaoptimize (struct dfa *d) -{ - size_t i; - bool have_backref = false; - - if (!using_utf8 ()) - return; - - for (i = 0; i < d->tindex; ++i) - { - switch (d->tokens[i]) - { - case ANYCHAR: - /* Lowered. */ - abort (); - case BACKREF: - have_backref = true; - break; - case MBCSET: - /* Requires multi-byte algorithm. */ - return; - default: - break; - } - } - - if (!have_backref && d->superset) - { - /* The superset DFA is not likely to be much faster, so remove it. */ - dfafree (d->superset); - free (d->superset); - d->superset = NULL; - } - - free_mbdata (d); - d->multibyte = false; - d->dfaexec = dfaexec_sb; -} - -static void -dfassbuild (struct dfa *d) -{ - size_t i, j; - charclass ccl; - bool have_achar = false; - bool have_nchar = false; - struct dfa *sup = dfaalloc (); - - *sup = *d; - sup->multibyte = false; - sup->dfaexec = dfaexec_sb; - sup->multibyte_prop = NULL; - sup->mbcsets = NULL; - sup->superset = NULL; - sup->states = NULL; - sup->sindex = 0; - sup->follows = NULL; - sup->tralloc = 0; - sup->trans = NULL; - sup->fails = NULL; - sup->success = NULL; - sup->newlines = NULL; - - sup->charclasses = xnmalloc (sup->calloc, sizeof *sup->charclasses); - if (d->cindex) - { - memcpy (sup->charclasses, d->charclasses, - d->cindex * sizeof *sup->charclasses); - } - - sup->tokens = xnmalloc (d->tindex, 2 * sizeof *sup->tokens); - sup->talloc = d->tindex * 2; - - for (i = j = 0; i < d->tindex; i++) - { - switch (d->tokens[i]) - { - case ANYCHAR: - case MBCSET: - case BACKREF: - zeroset (ccl); - notset (ccl); - sup->tokens[j++] = CSET + dfa_charclass_index (sup, ccl); - sup->tokens[j++] = STAR; - if (d->tokens[i + 1] == QMARK || d->tokens[i + 1] == STAR - || d->tokens[i + 1] == PLUS) - i++; - have_achar = true; - break; - case BEGWORD: - case ENDWORD: - case LIMWORD: - case NOTLIMWORD: - if (d->multibyte) - { - /* These constraints aren't supported in a multibyte locale. - Ignore them in the superset DFA. */ - sup->tokens[j++] = EMPTY; - break; - } - default: - sup->tokens[j++] = d->tokens[i]; - if ((0 <= d->tokens[i] && d->tokens[i] < NOTCHAR) - || d->tokens[i] >= CSET) - have_nchar = true; - break; - } - } - sup->tindex = j; - - if (have_nchar && (have_achar || d->multibyte)) - d->superset = sup; - else - { - dfafree (sup); - free (sup); - } -} - -/* Parse and analyze a single string of the given length. */ -void -dfacomp (char const *s, size_t len, struct dfa *d, int searchflag) -{ - dfainit (d); - dfambcache (d); - dfaparse (s, len, d); - dfassbuild (d); - - if (dfa_supported (d)) - { - dfaoptimize (d); - dfaanalyze (d, searchflag); - } - else - { - d->dfaexec = dfaexec_noop; - } - - if (d->superset) - { - d->fast = true; - dfaanalyze (d->superset, searchflag); - } -} - -/* Free the storage held by the components of a dfa. */ -void -dfafree (struct dfa *d) -{ - size_t i; - - free (d->charclasses); - free (d->tokens); - - if (d->multibyte) - free_mbdata (d); - - for (i = 0; i < d->sindex; ++i) - { - free (d->states[i].elems.elems); - free (d->states[i].mbps.elems); - } - free (d->states); - - if (d->follows) - { - for (i = 0; i < d->tindex; ++i) - free (d->follows[i].elems); - free (d->follows); - } - - if (d->trans) - { - for (i = 0; i < d->tralloc; ++i) - { - free (d->trans[i]); - free (d->fails[i]); - } - - free (d->trans - 1); - free (d->fails); - free (d->newlines); - free (d->success); - } - - if (d->superset) - dfafree (d->superset); -} - -/* Having found the postfix representation of the regular expression, - try to find a long sequence of characters that must appear in any line - containing the r.e. - Finding a "longest" sequence is beyond the scope here; - we take an easy way out and hope for the best. - (Take "(ab|a)b"--please.) - - We do a bottom-up calculation of sequences of characters that must appear - in matches of r.e.'s represented by trees rooted at the nodes of the postfix - representation: - sequences that must appear at the left of the match ("left") - sequences that must appear at the right of the match ("right") - lists of sequences that must appear somewhere in the match ("in") - sequences that must constitute the match ("is") - - When we get to the root of the tree, we use one of the longest of its - calculated "in" sequences as our answer. - - The sequences calculated for the various types of node (in pseudo ANSI c) - are shown below. "p" is the operand of unary operators (and the left-hand - operand of binary operators); "q" is the right-hand operand of binary - operators. - - "ZERO" means "a zero-length sequence" below. - - Type left right is in - ---- ---- ----- -- -- - char c # c # c # c # c - - ANYCHAR ZERO ZERO ZERO ZERO - - MBCSET ZERO ZERO ZERO ZERO - - CSET ZERO ZERO ZERO ZERO - - STAR ZERO ZERO ZERO ZERO - - QMARK ZERO ZERO ZERO ZERO - - PLUS p->left p->right ZERO p->in - - CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus - p->left : q->right : q->is!=ZERO) ? q->in plus - p->is##q->left p->right##q->is p->is##q->is : p->right##q->left - ZERO - - OR longest common longest common (do p->is and substrings common - leading trailing to q->is have same p->in and - (sub)sequence (sub)sequence q->in length and content) ? - of p->left of p->right - and q->left and q->right p->is : NULL - - If there's anything else we recognize in the tree, all four sequences get set - to zero-length sequences. If there's something we don't recognize in the - tree, we just return a zero-length sequence. - - Break ties in favor of infrequent letters (choosing 'zzz' in preference to - 'aaa')? - - And ... is it here or someplace that we might ponder "optimizations" such as - egrep 'psi|epsilon' -> egrep 'psi' - egrep 'pepsi|epsilon' -> egrep 'epsi' - (Yes, we now find "epsi" as a "string - that must occur", but we might also - simplify the *entire* r.e. being sought) - grep '[c]' -> grep 'c' - grep '(ab|a)b' -> grep 'ab' - grep 'ab*' -> grep 'a' - grep 'a*b' -> grep 'b' - - There are several issues: - - Is optimization easy (enough)? - - Does optimization actually accomplish anything, - or is the automaton you get from "psi|epsilon" (for example) - the same as the one you get from "psi" (for example)? - - Are optimizable r.e.'s likely to be used in real-life situations - (something like 'ab*' is probably unlikely; something like is - 'psi|epsilon' is likelier)? */ - -static char * -icatalloc (char *old, char const *new) -{ - char *result; - size_t oldsize; - size_t newsize = strlen (new); - if (newsize == 0) - return old; - oldsize = strlen (old); - result = xrealloc (old, oldsize + newsize + 1); - memcpy (result + oldsize, new, newsize + 1); - return result; -} - -static void -freelist (char **cpp) -{ - while (*cpp) - free (*cpp++); -} - -static char ** -enlist (char **cpp, char *new, size_t len) -{ - size_t i, j; - new = memcpy (xmalloc (len + 1), new, len); - new[len] = '\0'; - /* Is there already something in the list that's new (or longer)? */ - for (i = 0; cpp[i] != NULL; ++i) - if (strstr (cpp[i], new) != NULL) - { - free (new); - return cpp; - } - /* Eliminate any obsoleted strings. */ - j = 0; - while (cpp[j] != NULL) - if (strstr (new, cpp[j]) == NULL) - ++j; - else - { - free (cpp[j]); - if (--i == j) - break; - cpp[j] = cpp[i]; - cpp[i] = NULL; - } - /* Add the new string. */ - cpp = xnrealloc (cpp, i + 2, sizeof *cpp); - cpp[i] = new; - cpp[i + 1] = NULL; - return cpp; -} - -/* Given pointers to two strings, return a pointer to an allocated - list of their distinct common substrings. */ -static char ** -comsubs (char *left, char const *right) -{ - char **cpp = xzalloc (sizeof *cpp); - char *lcp; - - for (lcp = left; *lcp != '\0'; ++lcp) - { - size_t len = 0; - char *rcp = strchr (right, *lcp); - while (rcp != NULL) - { - size_t i; - for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i) - continue; - if (i > len) - len = i; - rcp = strchr (rcp + 1, *lcp); - } - if (len != 0) - cpp = enlist (cpp, lcp, len); - } - return cpp; -} - -static char ** -addlists (char **old, char **new) -{ - for (; *new; new++) - old = enlist (old, *new, strlen (*new)); - return old; -} - -/* Given two lists of substrings, return a new list giving substrings - common to both. */ -static char ** -inboth (char **left, char **right) -{ - char **both = xzalloc (sizeof *both); - size_t lnum, rnum; - - for (lnum = 0; left[lnum] != NULL; ++lnum) - { - for (rnum = 0; right[rnum] != NULL; ++rnum) - { - char **temp = comsubs (left[lnum], right[rnum]); - both = addlists (both, temp); - freelist (temp); - free (temp); - } - } - return both; -} - -typedef struct must must; - -struct must -{ - char **in; - char *left; - char *right; - char *is; - bool begline; - bool endline; - must *prev; -}; - -static must * -allocmust (must *mp, size_t size) -{ - must *new_mp = xmalloc (sizeof *new_mp); - new_mp->in = xzalloc (sizeof *new_mp->in); - new_mp->left = xzalloc (size); - new_mp->right = xzalloc (size); - new_mp->is = xzalloc (size); - new_mp->begline = false; - new_mp->endline = false; - new_mp->prev = mp; - return new_mp; -} - -static void -resetmust (must *mp) -{ - freelist (mp->in); - mp->in[0] = NULL; - mp->left[0] = mp->right[0] = mp->is[0] = '\0'; - mp->begline = false; - mp->endline = false; -} - -static void -freemust (must *mp) -{ - freelist (mp->in); - free (mp->in); - free (mp->left); - free (mp->right); - free (mp->is); - free (mp); -} - -struct dfamust * -dfamust (struct dfa const *d) -{ - must *mp = NULL; - char const *result = ""; - size_t i; - bool exact = false; - bool begline = false; - bool endline = false; - bool need_begline = false; - bool need_endline = false; - bool case_fold_unibyte = case_fold && MB_CUR_MAX == 1; - - for (size_t ri = 0; ri < d->tindex; ++ri) - { - token t = d->tokens[ri]; - switch (t) - { - case BEGLINE: - mp = allocmust (mp, 2); - mp->begline = true; - need_begline = true; - break; - case ENDLINE: - mp = allocmust (mp, 2); - mp->endline = true; - need_endline = true; - break; - case LPAREN: - case RPAREN: - assert (!"neither LPAREN nor RPAREN may appear here"); - - case EMPTY: - case BEGWORD: - case ENDWORD: - case LIMWORD: - case NOTLIMWORD: - case BACKREF: - case ANYCHAR: - case MBCSET: - mp = allocmust (mp, 2); - break; - - case STAR: - case QMARK: - resetmust (mp); - break; - - case OR: - { - char **new; - must *rmp = mp; - must *lmp = mp = mp->prev; - size_t j, ln, rn, n; - - /* Guaranteed to be. Unlikely, but ... */ - if (STREQ (lmp->is, rmp->is)) - { - lmp->begline &= rmp->begline; - lmp->endline &= rmp->endline; - } - else - { - lmp->is[0] = '\0'; - lmp->begline = false; - lmp->endline = false; - } - /* Left side--easy */ - i = 0; - while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i]) - ++i; - lmp->left[i] = '\0'; - /* Right side */ - ln = strlen (lmp->right); - rn = strlen (rmp->right); - n = ln; - if (n > rn) - n = rn; - for (i = 0; i < n; ++i) - if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1]) - break; - for (j = 0; j < i; ++j) - lmp->right[j] = lmp->right[(ln - i) + j]; - lmp->right[j] = '\0'; - new = inboth (lmp->in, rmp->in); - freelist (lmp->in); - free (lmp->in); - lmp->in = new; - freemust (rmp); - } - break; - - case PLUS: - mp->is[0] = '\0'; - break; - - case END: - assert (!mp->prev); - for (i = 0; mp->in[i] != NULL; ++i) - if (strlen (mp->in[i]) > strlen (result)) - result = mp->in[i]; - if (STREQ (result, mp->is)) - { - if ((!need_begline || mp->begline) && (!need_endline - || mp->endline)) - exact = true; - begline = mp->begline; - endline = mp->endline; - } - goto done; - - case CAT: - { - must *rmp = mp; - must *lmp = mp = mp->prev; - - /* In. Everything in left, plus everything in - right, plus concatenation of - left's right and right's left. */ - lmp->in = addlists (lmp->in, rmp->in); - if (lmp->right[0] != '\0' && rmp->left[0] != '\0') - { - size_t lrlen = strlen (lmp->right); - size_t rllen = strlen (rmp->left); - char *tp = xmalloc (lrlen + rllen); - memcpy (tp, lmp->right, lrlen); - memcpy (tp + lrlen, rmp->left, rllen); - lmp->in = enlist (lmp->in, tp, lrlen + rllen); - free (tp); - } - /* Left-hand */ - if (lmp->is[0] != '\0') - lmp->left = icatalloc (lmp->left, rmp->left); - /* Right-hand */ - if (rmp->is[0] == '\0') - lmp->right[0] = '\0'; - lmp->right = icatalloc (lmp->right, rmp->right); - /* Guaranteed to be */ - if ((lmp->is[0] != '\0' || lmp->begline) - && (rmp->is[0] != '\0' || rmp->endline)) - { - lmp->is = icatalloc (lmp->is, rmp->is); - lmp->endline = rmp->endline; - } - else - { - lmp->is[0] = '\0'; - lmp->begline = false; - lmp->endline = false; - } - freemust (rmp); - } - break; - - case '\0': - /* Not on *my* shift. */ - goto done; - - default: - if (CSET <= t) - { - /* If T is a singleton, or if case-folding in a unibyte - locale and T's members all case-fold to the same char, - convert T to one of its members. Otherwise, do - nothing further with T. */ - charclass *ccl = &d->charclasses[t - CSET]; - int j; - for (j = 0; j < NOTCHAR; j++) - if (tstbit (j, *ccl)) - break; - if (! (j < NOTCHAR)) - { - mp = allocmust (mp, 2); - break; - } - t = j; - while (++j < NOTCHAR) - if (tstbit (j, *ccl) - && ! (case_fold_unibyte - && toupper (j) == toupper (t))) - break; - if (j < NOTCHAR) - { - mp = allocmust (mp, 2); - break; - } - } - - size_t rj = ri + 2; - if (d->tokens[ri + 1] == CAT) - { - for (; rj < d->tindex - 1; rj += 2) - { - if ((rj != ri && (d->tokens[rj] <= 0 - || NOTCHAR <= d->tokens[rj])) - || d->tokens[rj + 1] != CAT) - break; - } - } - mp = allocmust (mp, ((rj - ri) >> 1) + 1); - mp->is[0] = mp->left[0] = mp->right[0] - = case_fold_unibyte ? toupper (t) : t; - - for (i = 1; ri + 2 < rj; i++) - { - ri += 2; - t = d->tokens[ri]; - mp->is[i] = mp->left[i] = mp->right[i] - = case_fold_unibyte ? toupper (t) : t; - } - mp->is[i] = mp->left[i] = mp->right[i] = '\0'; - mp->in = enlist (mp->in, mp->is, i); - break; - } - } - done:; - - struct dfamust *dm = NULL; - if (*result) - { - dm = xmalloc (sizeof *dm); - dm->exact = exact; - dm->begline = begline; - dm->endline = endline; - dm->must = xstrdup (result); - } - - while (mp) - { - must *prev = mp->prev; - freemust (mp); - mp = prev; - } - - return dm; -} - -void -dfamustfree (struct dfamust *dm) -{ - free (dm->must); - free (dm); -} - -struct dfa * -dfaalloc (void) -{ - return xmalloc (sizeof (struct dfa)); -} - -/* vim:set shiftwidth=2: */ diff --git a/contrib/grep/src/dfasearch.c b/contrib/grep/src/dfasearch.c index de513213cd..337345f157 100644 --- a/contrib/grep/src/dfasearch.c +++ b/contrib/grep/src/dfasearch.c @@ -1,5 +1,5 @@ /* dfasearch.c - searching subroutines using dfa and regex for grep. - Copyright 1992, 1998, 2000, 2007, 2009-2015 Free Software Foundation, Inc. + Copyright 1992, 1998, 2000, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,49 +21,36 @@ #include #include "intprops.h" #include "search.h" +#include "die.h" +#include -/* Whether -w considers WC to be a word constituent. */ -static bool -wordchar (wint_t wc) +struct dfa_comp { - return wc == L'_' || iswalnum (wc); -} - -/* KWset compiled pattern. For Ecompile and Gcompile, we compile - a list of strings, at least one of which is known to occur in - any string matching the regexp. */ -static kwset_t kwset; + /* KWset compiled pattern. For Ecompile and Gcompile, we compile + a list of strings, at least one of which is known to occur in + any string matching the regexp. */ + kwset_t kwset; -/* DFA compiled regexp. */ -static struct dfa *dfa; - -/* The Regex compiled patterns. */ -static struct patterns -{ - /* Regex compiled regexp. */ - struct re_pattern_buffer regexbuf; - struct re_registers regs; /* This is here on account of a BRAIN-DEAD - Q@#%!# library interface in regex.c. */ -} patterns0; + /* DFA compiled regexp. */ + struct dfa *dfa; -static struct patterns *patterns; -static size_t pcount; + /* Regex compiled regexps. */ + struct re_pattern_buffer *patterns; + size_t pcount; + struct re_registers regs; -/* Number of compiled fixed strings known to exactly match the regexp. - If kwsexec returns < kwset_exact_matches, then we don't need to - call the regexp matcher at all. */ -static size_t kwset_exact_matches; + /* Number of compiled fixed strings known to exactly match the regexp. + If kwsexec returns < kwset_exact_matches, then we don't need to + call the regexp matcher at all. */ + ptrdiff_t kwset_exact_matches; -static bool begline; + bool begline; +}; void dfaerror (char const *mesg) { - error (EXIT_TROUBLE, 0, "%s", mesg); - - /* notreached */ - /* Tell static analyzers that this function does not return. */ - abort (); + die (EXIT_TROUBLE, 0, "%s", mesg); } /* For now, the sole dfawarn-eliciting condition (use of a regexp @@ -72,10 +59,7 @@ dfaerror (char const *mesg) void dfawarn (char const *mesg) { - static enum { DW_NONE = 0, DW_POSIX, DW_GNU } mode; - if (mode == DW_NONE) - mode = (getenv ("POSIXLY_CORRECT") ? DW_POSIX : DW_GNU); - if (mode == DW_GNU) + if (!getenv ("POSIXLY_CORRECT")) dfaerror (mesg); } @@ -84,85 +68,227 @@ dfawarn (char const *mesg) to find those strings, and thus quickly filter out impossible matches. */ static void -kwsmusts (void) +kwsmusts (struct dfa_comp *dc) { - struct dfamust *dm = dfamust (dfa); + struct dfamust *dm = dfamust (dc->dfa); if (!dm) return; - kwsinit (&kwset); + dc->kwset = kwsinit (false); if (dm->exact) { /* Prepare a substring whose presence implies a match. The kwset matcher will return the index of the matching string that it chooses. */ - ++kwset_exact_matches; - size_t old_len = strlen (dm->must); - size_t new_len = old_len + dm->begline + dm->endline; + ++dc->kwset_exact_matches; + ptrdiff_t old_len = strlen (dm->must); + ptrdiff_t new_len = old_len + dm->begline + dm->endline; char *must = xmalloc (new_len); char *mp = must; *mp = eolbyte; mp += dm->begline; - begline |= dm->begline; + dc->begline |= dm->begline; memcpy (mp, dm->must, old_len); if (dm->endline) mp[old_len] = eolbyte; - kwsincr (kwset, must, new_len); + kwsincr (dc->kwset, must, new_len); free (must); } else { /* Otherwise, filtering with this substring should help reduce the search space, but we'll still have to use the regexp matcher. */ - kwsincr (kwset, dm->must, strlen (dm->must)); + kwsincr (dc->kwset, dm->must, strlen (dm->must)); } - kwsprep (kwset); + kwsprep (dc->kwset); dfamustfree (dm); } -void -GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) +/* Return true if KEYS, of length LEN, might contain a back-reference. + Return false if KEYS cannot contain a back-reference. + BS_SAFE is true of encodings where a backslash cannot appear as the + last byte of a multibyte character. */ +static bool _GL_ATTRIBUTE_PURE +possible_backrefs_in_pattern (char const *keys, ptrdiff_t len, bool bs_safe) +{ + /* Normally a backslash, but in an unsafe encoding this is a non-char + value so that the comparison below always fails, because if there + are two adjacent '\' bytes, the first might be the last byte of a + multibyte character. */ + int second_backslash = bs_safe ? '\\' : CHAR_MAX + 1; + + /* This code can return true even if KEYS lacks a back-reference, for + patterns like [\2], or for encodings where '\' appears as the last + byte of a multibyte character. However, false alarms should be + rare and do not affect correctness. */ + + /* Do not look for a backslash in the pattern's last byte, since it + can't be part of a back-reference and this streamlines the code. */ + len--; + + if (0 <= len) + { + char const *lim = keys + len; + for (char const *p = keys; (p = memchr (p, '\\', lim - p)); p++) + { + if ('1' <= p[1] && p[1] <= '9') + return true; + if (p[1] == second_backslash) + { + p++; + if (p == lim) + break; + } + } + } + return false; +} + +static bool +regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len, + ptrdiff_t pcount, ptrdiff_t lineno, bool syntax_only) +{ + struct re_pattern_buffer pat0; + struct re_pattern_buffer *pat = syntax_only ? &pat0 : &dc->patterns[pcount]; + pat->buffer = NULL; + pat->allocated = 0; + + /* Do not use a fastmap with -i, to work around glibc Bug#20381. */ + pat->fastmap = syntax_only | match_icase ? NULL : xmalloc (UCHAR_MAX + 1); + + pat->translate = NULL; + + char const *err = re_compile_pattern (p, len, pat); + if (!err) + return true; + + /* Emit a filename:lineno: prefix for patterns taken from files. */ + size_t pat_lineno = lineno; + char const *pat_filename + = lineno < 0 ? "\0" : pattern_file_name (lineno + 1, &pat_lineno); + + if (*pat_filename == '\0') + error (0, 0, "%s", err); + else + error (0, 0, "%s:%zu: %s", pat_filename, pat_lineno, err); + + return false; +} + +void * +GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits) { - size_t total = size; char *motif; + struct dfa_comp *dc = xcalloc (1, sizeof (*dc)); + + dc->dfa = dfaalloc (); if (match_icase) syntax_bits |= RE_ICASE; re_set_syntax (syntax_bits); - dfasyntax (syntax_bits, match_icase, eolbyte); + int dfaopts = eolbyte ? 0 : DFA_EOL_NUL; + dfasyntax (dc->dfa, &localeinfo, syntax_bits, dfaopts); + bool bs_safe = !localeinfo.multibyte | localeinfo.using_utf8; /* For GNU regex, pass the patterns separately to detect errors like "[\nallo\n]\n", where the patterns are "[", "allo" and "]", and this should be a syntax error. The same for backref, where the backref should be local to each pattern. */ char const *p = pattern; + char const *patlim = pattern + size; + bool compilation_failed = false; + + dc->patterns = xmalloc (sizeof *dc->patterns); + dc->patterns++; + dc->pcount = 0; + size_t palloc = 1; + + char const *prev = pattern; + + /* Buffer containing back-reference-free patterns. */ + char *buf = NULL; + ptrdiff_t buflen = 0; + size_t bufalloc = 0; + + ptrdiff_t lineno = 0; + do { size_t len; - char const *sep = memchr (p, '\n', total); + char const *sep = memchr (p, '\n', patlim - p); if (sep) { len = sep - p; sep++; - total -= (len + 1); } else + len = patlim - p; + + bool backref = possible_backrefs_in_pattern (p, len, bs_safe); + + if (backref && prev < p) + { + ptrdiff_t prevlen = p - prev; + while (bufalloc < buflen + prevlen) + buf = x2realloc (buf, &bufalloc); + memcpy (buf + buflen, prev, prevlen); + buflen += prevlen; + } + + /* Ensure room for at least two more patterns. The extra one is + for the regex_compile that may be executed after this loop + exits, and its (unused) slot is patterns[-1] until then. */ + while (palloc <= dc->pcount + 1) { - len = total; - total = 0; + dc->patterns = x2nrealloc (dc->patterns - 1, &palloc, + sizeof *dc->patterns); + dc->patterns++; } - patterns = xnrealloc (patterns, pcount + 1, sizeof *patterns); - patterns[pcount] = patterns0; + if (!regex_compile (dc, p, len, dc->pcount, lineno, !backref)) + compilation_failed = true; - char const *err = re_compile_pattern (p, len, - &(patterns[pcount].regexbuf)); - if (err) - error (EXIT_TROUBLE, 0, "%s", err); - pcount++; p = sep; + lineno++; + + if (backref) + { + dc->pcount++; + prev = p; + } } while (p); + if (compilation_failed) + exit (EXIT_TROUBLE); + + if (prev != NULL) + { + if (pattern < prev) + { + ptrdiff_t prevlen = patlim - prev; + buf = xrealloc (buf, buflen + prevlen); + memcpy (buf + buflen, prev, prevlen); + buflen += prevlen; + } + else + { + buf = pattern; + buflen = size; + } + } + + if (buf != NULL) + { + dc->patterns--; + dc->pcount++; + + if (!regex_compile (dc, buf, buflen, 0, -1, false)) + abort (); + + if (buf != pattern) + free (buf); + } + /* In the match_words and match_lines cases, we use a different pattern for the DFA matcher that will quickly throw out cases that won't work. Then if DFA succeeds we do some hairy stuff using the regex matcher @@ -182,7 +308,7 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk) : (bk ? word_beg_bk : word_beg_no_bk)); - total = strlen(n); + size_t total = strlen (n); memcpy (n + total, pattern, size); total += size; strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk) @@ -194,15 +320,17 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) else motif = NULL; - dfa = dfaalloc (); - dfacomp (pattern, size, dfa, 1); - kwsmusts (); + dfaparse (pattern, size, dc->dfa); + kwsmusts (dc); + dfacomp (NULL, 0, dc->dfa, 1); - free(motif); + free (motif); + + return dc; } size_t -EGexecute (char const *buf, size_t size, size_t *match_size, +EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size, char const *start_ptr) { char const *buflim, *beg, *end, *ptr, *match, *best_match, *mb_start; @@ -211,8 +339,9 @@ EGexecute (char const *buf, size_t size, size_t *match_size, size_t len, best_len; struct kwsmatch kwsm; size_t i; - struct dfa *superset = dfasuperset (dfa); - bool dfafast = dfaisfast (dfa); + struct dfa_comp *dc = vdc; + struct dfa *superset = dfasuperset (dc->dfa); + bool dfafast = dfaisfast (dc->dfa); mb_start = buf; buflim = buf + size; @@ -224,19 +353,20 @@ EGexecute (char const *buf, size_t size, size_t *match_size, if (!start_ptr) { char const *next_beg, *dfa_beg = beg; - size_t count = 0; + ptrdiff_t count = 0; bool exact_kwset_match = false; - int backref = 0; + bool backref = false; /* Try matching with KWset, if it's defined. */ - if (kwset) + if (dc->kwset) { char const *prev_beg; /* Find a possible match using the KWset matcher. */ - size_t offset = kwsexec (kwset, beg - begline, - buflim - beg + begline, &kwsm); - if (offset == (size_t) -1) + ptrdiff_t offset = kwsexec (dc->kwset, beg - dc->begline, + buflim - beg + dc->begline, + &kwsm, true); + if (offset < 0) goto failure; match = beg + offset; prev_beg = beg; @@ -252,7 +382,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size, PREV_BEG is less than 64 or (MATCH - PREV_BEG), this is the greater of the latter two values; this temporarily prefers the DFA to KWset. */ - exact_kwset_match = kwsm.index < kwset_exact_matches; + exact_kwset_match = kwsm.index < dc->kwset_exact_matches; end = ((exact_kwset_match || !dfafast || MAX (16, match - beg) < (match - prev_beg) >> 2) ? match @@ -264,11 +394,11 @@ EGexecute (char const *buf, size_t size, size_t *match_size, if (exact_kwset_match) { - if (MB_CUR_MAX == 1 || using_utf8 ()) + if (!localeinfo.multibyte | localeinfo.using_utf8) goto success; if (mb_start < beg) mb_start = beg; - if (mb_goback (&mb_start, match, buflim) == 0) + if (mb_goback (&mb_start, NULL, match, buflim) == 0) goto success; /* The matched line starts in the middle of a multibyte character. Perform the DFA search starting from the @@ -283,27 +413,27 @@ EGexecute (char const *buf, size_t size, size_t *match_size, /* Keep using the superset while it reports multiline potential matches; this is more likely to be fast than falling back to KWset would be. */ - while ((next_beg = dfaexec (superset, dfa_beg, (char *) end, 1, - &count, NULL)) - && next_beg != end - && count != 0) + next_beg = dfaexec (superset, dfa_beg, (char *) end, 0, + &count, NULL); + if (next_beg == NULL || next_beg == end) + continue; + + /* Narrow down to the line we've found. */ + if (count != 0) { - /* Try to match in just one line. */ - count = 0; beg = memrchr (buf, eol, next_beg - buf); beg++; dfa_beg = beg; } - if (next_beg == NULL || next_beg == end) - continue; - - /* Narrow down to the line we've found. */ end = memchr (next_beg, eol, buflim - next_beg); end = end ? end + 1 : buflim; + + count = 0; } /* Try matching with DFA. */ - next_beg = dfaexec (dfa, dfa_beg, (char *) end, 0, &count, &backref); + next_beg = dfaexec (dc->dfa, dfa_beg, (char *) end, 0, &count, + &backref); /* If there's no match, or if we've matched the sentinel, we're done. */ @@ -319,7 +449,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size, end = memchr (next_beg, eol, buflim - next_beg); end = end ? end + 1 : buflim; - /* Successful, no backreferences encountered! */ + /* Successful, no back-references encountered! */ if (!backref) goto success; ptr = beg; @@ -339,18 +469,17 @@ EGexecute (char const *buf, size_t size, size_t *match_size, /* Run the possible match through Regex. */ best_match = end; best_len = 0; - for (i = 0; i < pcount; i++) + for (i = 0; i < dc->pcount; i++) { - patterns[i].regexbuf.not_eol = 0; - start = re_search (&(patterns[i].regexbuf), - beg, end - beg - 1, - ptr - beg, end - ptr - 1, - &(patterns[i].regs)); + dc->patterns[i].not_eol = 0; + dc->patterns[i].newline_anchor = eolbyte == '\n'; + start = re_search (&dc->patterns[i], beg, end - beg - 1, + ptr - beg, end - ptr - 1, &dc->regs); if (start < -1) xalloc_die (); else if (0 <= start) { - len = patterns[i].regs.end[0] - start; + len = dc->regs.end[0] - start; match = beg + start; if (match > best_match) continue; @@ -363,29 +492,28 @@ EGexecute (char const *buf, size_t size, size_t *match_size, len = end - ptr; goto assess_pattern_match; } - /* If -w, check if the match aligns with word boundaries. - We do this iteratively because: + /* If -w and not -x, check whether the match aligns with + word boundaries. Do this iteratively because: (a) the line may contain more than one occurrence of the pattern, and (b) Several alternatives in the pattern might be valid at a given point, and we may need to consider a shorter one to find a word boundary. */ - if (match_words) + if (!match_lines && match_words) while (match <= best_match) { regoff_t shorter_len = 0; - if (!wordchar (mb_prev_wc (beg, match, end - 1)) - && !wordchar (mb_next_wc (match + len, end - 1))) + if (! wordchar_next (match + len, end - 1) + && ! wordchar_prev (beg, match, end - 1)) goto assess_pattern_match; if (len > 0) { /* Try a shorter length anchored at the same place. */ --len; - patterns[i].regexbuf.not_eol = 1; - shorter_len = re_match (&(patterns[i].regexbuf), - beg, match + len - ptr, - match - beg, - &(patterns[i].regs)); + dc->patterns[i].not_eol = 1; + shorter_len = re_match (&dc->patterns[i], beg, + match + len - ptr, match - beg, + &dc->regs); if (shorter_len < -1) xalloc_die (); } @@ -397,18 +525,17 @@ EGexecute (char const *buf, size_t size, size_t *match_size, if (match == end - 1) break; match++; - patterns[i].regexbuf.not_eol = 0; - start = re_search (&(patterns[i].regexbuf), - beg, end - beg - 1, + dc->patterns[i].not_eol = 0; + start = re_search (&dc->patterns[i], beg, end - beg - 1, match - beg, end - match - 1, - &(patterns[i].regs)); + &dc->regs); if (start < 0) { if (start < -1) xalloc_die (); break; } - len = patterns[i].regs.end[0] - start; + len = dc->regs.end[0] - start; match = beg + start; } } /* while (match <= best_match) */ diff --git a/contrib/grep/lib/colorize.h b/contrib/grep/src/die.h similarity index 62% copy from contrib/grep/lib/colorize.h copy to contrib/grep/src/die.h index d04fcb4d6e..cd636fa9e4 100644 --- a/contrib/grep/lib/colorize.h +++ b/contrib/grep/src/die.h @@ -1,6 +1,6 @@ -/* Output colorization. +/* Report an error and exit. + Copyright 2016-2020 Free Software Foundation, Inc. - Copyright 2011-2015 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) @@ -16,7 +16,16 @@ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ -extern int should_colorize (void); -extern void init_colorize (void); -extern void print_start_colorize (char const *sgr_start, char const *sgr_seq); -extern void print_end_colorize (char const *sgr_end); +#ifndef DIE_H +#define DIE_H + +#include +#include +#include + +/* Like 'error (STATUS, ...)', except STATUS must be a nonzero constant. + This may pacify the compiler or help it generate better code. */ +#define die(status, ...) \ + verify_expr (status, (error (status, __VA_ARGS__), assume (false))) + +#endif /* DIE_H */ diff --git a/contrib/grep/src/dosbuf.c b/contrib/grep/src/dosbuf.c deleted file mode 100644 index 0e8f6f7b04..0000000000 --- a/contrib/grep/src/dosbuf.c +++ /dev/null @@ -1,222 +0,0 @@ -/* dosbuf.c - Copyright (C) 1992, 1997-2002, 2004-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA - 02110-1301, USA. */ - -/* Messy DOS-specific code for correctly treating binary, Unix text - and DOS text files. - - This has several aspects: - - * Guessing the file type (unless the user tells us); - * Stripping CR characters from DOS text files (otherwise regex - functions won't work correctly); - * Reporting correct byte count with -b for any kind of file. - -*/ - -#include - -typedef enum { - UNKNOWN, DOS_BINARY, DOS_TEXT, UNIX_TEXT -} File_type; - -struct dos_map { - off_t pos; /* position in buffer passed to matcher */ - off_t add; /* how much to add when reporting char position */ -}; - -static int dos_report_unix_offset = 0; - -static File_type dos_file_type = UNKNOWN; -static File_type dos_use_file_type = UNKNOWN; -static off_t dos_stripped_crs = 0; -static struct dos_map *dos_pos_map; -static int dos_pos_map_size = 0; -static int dos_pos_map_used = 0; -static int inp_map_idx = 0, out_map_idx = 1; - -/* Set default DOS file type to binary. */ -static void -dos_binary (void) -{ - if (O_BINARY) - dos_use_file_type = DOS_BINARY; -} - -/* Tell DOS routines to report Unix offset. */ -static void -dos_unix_byte_offsets (void) -{ - if (O_BINARY) - dos_report_unix_offset = 1; -} - -/* Guess DOS file type by looking at its contents. */ -static File_type -guess_type (char *buf, size_t buflen) -{ - int crlf_seen = 0; - char *bp = buf; - - while (buflen--) - { - /* Treat a file as binary if it has a NUL character. */ - if (!*bp) - return DOS_BINARY; - - /* CR before LF means DOS text file (unless we later see - binary characters). */ - else if (*bp == '\r' && buflen && bp[1] == '\n') - crlf_seen = 1; - - bp++; - } - - return crlf_seen ? DOS_TEXT : UNIX_TEXT; -} - -/* Convert external DOS file representation to internal. - Return the count of bytes left in the buffer. - Build table to map character positions when reporting byte counts. */ -static size_t -undossify_input (char *buf, size_t buflen) -{ - if (! O_BINARY) - return buflen; - - size_t bytes_left = 0; - - if (totalcc == 0) - { - /* New file: forget everything we knew about character - position mapping table and file type. */ - inp_map_idx = 0; - out_map_idx = 1; - dos_pos_map_used = 0; - dos_stripped_crs = 0; - dos_file_type = dos_use_file_type; - } - - /* Guess if this file is binary, unless we already know that. */ - if (dos_file_type == UNKNOWN) - dos_file_type = guess_type(buf, buflen); - - /* If this file is to be treated as DOS Text, strip the CR characters - and maybe build the table for character position mapping on output. */ - if (dos_file_type == DOS_TEXT) - { - char *destp = buf; - - while (buflen--) - { - if (*buf != '\r') - { - *destp++ = *buf++; - bytes_left++; - } - else - { - buf++; - if (out_byte && !dos_report_unix_offset) - { - dos_stripped_crs++; - while (buflen && *buf == '\r') - { - dos_stripped_crs++; - buflen--; - buf++; - } - if (inp_map_idx >= dos_pos_map_size - 1) - { - dos_pos_map_size = inp_map_idx ? inp_map_idx * 2 : 1000; - dos_pos_map = xrealloc(dos_pos_map, - dos_pos_map_size * - sizeof(struct dos_map)); - } - - if (!inp_map_idx) - { - /* Add sentinel entry. */ - dos_pos_map[inp_map_idx].pos = 0; - dos_pos_map[inp_map_idx++].add = 0; - - /* Initialize first real entry. */ - dos_pos_map[inp_map_idx].add = 0; - } - - /* Put the new entry. If the stripped CR characters - precede a Newline (the usual case), pretend that - they were found *after* the Newline. This makes - displayed byte offsets more reasonable in some - cases, and fits better the intuitive notion that - the line ends *before* the CR, not *after* it. */ - inp_map_idx++; - dos_pos_map[inp_map_idx-1].pos = - (*buf == '\n' ? destp + 1 : destp ) - bufbeg + totalcc; - dos_pos_map[inp_map_idx].add = dos_stripped_crs; - dos_pos_map_used = inp_map_idx; - - /* The following will be updated on the next pass. */ - dos_pos_map[inp_map_idx].pos = destp - bufbeg + totalcc + 1; - } - } - } - - return bytes_left; - } - - return buflen; -} - -/* Convert internal byte count into external. */ -static off_t -dossified_pos (off_t byteno) -{ - if (! O_BINARY) - return byteno; - - off_t pos_lo; - off_t pos_hi; - - if (dos_file_type != DOS_TEXT || dos_report_unix_offset) - return byteno; - - /* Optimization: usually the file will be scanned sequentially. - So in most cases, this byte position will be found in the - table near the previous one, as recorded in 'out_map_idx'. */ - pos_lo = dos_pos_map[out_map_idx-1].pos; - pos_hi = dos_pos_map[out_map_idx].pos; - - /* If the initial guess failed, search up or down, as - appropriate, beginning with the previous place. */ - if (byteno >= pos_hi) - { - out_map_idx++; - while (out_map_idx < dos_pos_map_used - && byteno >= dos_pos_map[out_map_idx].pos) - out_map_idx++; - } - - else if (byteno < pos_lo) - { - out_map_idx--; - while (out_map_idx > 1 && byteno < dos_pos_map[out_map_idx-1].pos) - out_map_idx--; - } - - return byteno + dos_pos_map[out_map_idx].add; -} diff --git a/contrib/grep/src/egrep.sh b/contrib/grep/src/egrep.sh deleted file mode 100644 index 6d6c15a84c..0000000000 --- a/contrib/grep/src/egrep.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!@SHELL@ -exec @grep@ @option@ "$@" diff --git a/contrib/grep/src/grep.c b/contrib/grep/src/grep.c index 2c5e09a961..7ba602d921 100644 --- a/contrib/grep/src/grep.c +++ b/contrib/grep/src/grep.c @@ -1,5 +1,5 @@ /* grep.c - main driver file for grep. - Copyright (C) 1992, 1997-2002, 2004-2015 Free Software Foundation, Inc. + Copyright (C) 1992, 1997-2002, 2004-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,40 +22,39 @@ #include #include #include -#include -#include #include +#include #include #include "system.h" #include "argmatch.h" #include "c-ctype.h" +#include "c-stack.h" #include "closeout.h" #include "colorize.h" +#include "die.h" #include "error.h" #include "exclude.h" #include "exitfail.h" #include "fcntl-safer.h" #include "fts_.h" #include "getopt.h" +#include "getprogname.h" #include "grep.h" #include "intprops.h" -#include "progname.h" #include "propername.h" #include "quote.h" #include "safe-read.h" #include "search.h" +#include "c-strcase.h" #include "version-etc.h" #include "xalloc.h" +#include "xbinary-io.h" #include "xstrtol.h" -#define SEP_CHAR_SELECTED ':' -#define SEP_CHAR_REJECTED '-' -#define SEP_STR_GROUP "--" - -#define AUTHORS \ - proper_name ("Mike Haertel"), \ - _("others, see ") +enum { SEP_CHAR_SELECTED = ':' }; +enum { SEP_CHAR_REJECTED = '-' }; +static char const SEP_STR_GROUP[] = "--"; /* When stdout is connected to a regular file, save its stat information here, so that we can automatically skip it, thus @@ -80,6 +79,77 @@ static bool only_matching; /* If nonzero, make sure first content char in a line is on a tab stop. */ static bool align_tabs; +/* Print width of line numbers and byte offsets. Nonzero if ALIGN_TABS. */ +static int offset_width; + +/* See below */ +struct FL_pair + { + char const *filename; + size_t lineno; + }; + +/* A list of lineno,filename pairs corresponding to -f FILENAME + arguments. Since we store the concatenation of all patterns in + a single array, KEYS, be they from the command line via "-e PAT" + or read from one or more -f-specified FILENAMES. Given this + invocation, grep -f <(seq 5) -f <(seq 2) -f <(seq 3) FILE, there + will be three entries in LF_PAIR: {1, x} {6, y} {8, z}, where + x, y and z are just place-holders for shell-generated names. */ +static struct FL_pair *fl_pair; +static size_t n_fl_pair_slots; +/* Count not only -f-specified files, but also individual -e operands + and any command-line argument that serves as a regular expression. */ +static size_t n_pattern_files; + +/* The number of patterns seen so far. + It is advanced by fl_add and, when needed, used in pattern_file_name + to derive a file-relative line number. */ +static size_t n_patterns; + +/* Return the number of newline bytes in BUF with size SIZE. */ +static size_t _GL_ATTRIBUTE_PURE +count_nl_bytes (char const *buf, size_t size) +{ + char const *p = buf; + char const *end_p = buf + size; + size_t n = 0; + while ((p = memchr (p, '\n', end_p - p))) + p++, n++; + return n; +} + +/* Append a FILENAME,line-number pair to FL_PAIR, and update + pattern-related counts from the contents of BUF with SIZE bytes. */ +static void +fl_add (char const *buf, size_t size, char const *filename) +{ + if (n_fl_pair_slots <= n_pattern_files) + fl_pair = x2nrealloc (fl_pair, &n_fl_pair_slots, sizeof *fl_pair); + + fl_pair[n_pattern_files].lineno = n_patterns + 1; + fl_pair[n_pattern_files].filename = filename; + n_pattern_files++; + n_patterns += count_nl_bytes (buf, size); +} + +/* Map the line number, LINENO, of one of the input patterns to the + name of the file from which it came. If it was read from stdin + or if it was specified on the command line, return "-". */ +char const * _GL_ATTRIBUTE_PURE +pattern_file_name (size_t lineno, size_t *new_lineno) +{ + size_t i; + for (i = 1; i < n_pattern_files; i++) + { + if (lineno < fl_pair[i].lineno) + break; + } + + *new_lineno = lineno - fl_pair[i - 1].lineno + 1; + return fl_pair[i - 1].filename; +} + #if HAVE_ASAN /* Record the starting address and length of the sole poisoned region, so that we can unpoison it later, just before each following read. */ @@ -296,8 +366,49 @@ static const struct color_cap color_dict[] = { NULL, NULL, NULL } }; -static struct exclude *excluded_patterns; -static struct exclude *excluded_directory_patterns; +/* Saved errno value from failed output functions on stdout. */ +static int stdout_errno; + +static void +putchar_errno (int c) +{ + if (putchar (c) < 0) + stdout_errno = errno; +} + +static void +fputs_errno (char const *s) +{ + if (fputs (s, stdout) < 0) + stdout_errno = errno; +} + +static void _GL_ATTRIBUTE_FORMAT_PRINTF (1, 2) +printf_errno (char const *format, ...) +{ + va_list ap; + va_start (ap, format); + if (vfprintf (stdout, format, ap) < 0) + stdout_errno = errno; + va_end (ap); +} + +static void +fwrite_errno (void const *ptr, size_t size, size_t nmemb) +{ + if (fwrite (ptr, size, nmemb, stdout) != nmemb) + stdout_errno = errno; +} + +static void +fflush_errno (void) +{ + if (fflush (stdout) != 0) + stdout_errno = errno; +} + +static struct exclude *excluded_patterns[2]; +static struct exclude *excluded_directory_patterns[2]; /* Short options. */ static char const short_options[] = "0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz"; @@ -313,7 +424,8 @@ enum GROUP_SEPARATOR_OPTION, INCLUDE_OPTION, LINE_BUFFERED_OPTION, - LABEL_OPTION + LABEL_OPTION, + NO_IGNORE_CASE_OPTION }; /* Long options equivalences. */ @@ -344,6 +456,7 @@ static struct option const long_options[] = {"help", no_argument, &show_help, 1}, {"include", required_argument, NULL, INCLUDE_OPTION}, {"ignore-case", no_argument, NULL, 'i'}, + {"no-ignore-case", no_argument, NULL, NO_IGNORE_CASE_OPTION}, {"initial-tab", no_argument, NULL, 'T'}, {"label", required_argument, NULL, LABEL_OPTION}, {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION}, @@ -377,17 +490,17 @@ bool match_icase; bool match_words; bool match_lines; char eolbyte; -enum textbin input_textbin; - -static char const *matcher; /* For error messages. */ -/* The input file name, or (if standard input) "-" or a --label argument. */ +/* The input file name, or (if standard input) null or a --label argument. */ static char const *filename; /* Omit leading "./" from file names in diagnostics. */ static bool omit_dot_slash; static bool errseen; -static bool write_error_seen; + +/* True if output from the current input file has been suppressed + because an output line had an encoding error. */ +static bool encoding_error_output; enum directories_type { @@ -423,10 +536,6 @@ static enum static bool grepfile (int, char const *, bool, bool); static bool grepdesc (int, bool); -static void dos_binary (void); -static void dos_unix_byte_offsets (void); -static size_t undossify_input (char *, size_t); - static bool is_device_mode (mode_t m) { @@ -437,7 +546,7 @@ static bool skip_devices (bool command_line) { return (devices == SKIP_DEVICES - || (devices == READ_COMMAND_LINE_DEVICES && !command_line)); + || ((devices == READ_COMMAND_LINE_DEVICES) & !command_line)); } /* Return if ST->st_size is defined. Assume the file is not a @@ -457,18 +566,31 @@ enum { SEEK_DATA = SEEK_SET }; enum { SEEK_HOLE = SEEK_SET }; #endif +/* True if lseek with SEEK_CUR or SEEK_DATA failed on the current input. */ +static bool seek_failed; +static bool seek_data_failed; + /* Functions we'll use to search. */ -typedef void (*compile_fp_t) (char const *, size_t); -typedef size_t (*execute_fp_t) (char const *, size_t, size_t *, char const *); -static compile_fp_t compile; +typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t); +typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *, + char const *); static execute_fp_t execute; +static void *compiled_pattern; + +static char const * +input_filename (void) +{ + if (!filename) + filename = _("(standard input)"); + return filename; +} -/* Like error, but suppress the diagnostic if requested. */ +/* Unless requested, diagnose an error about the input file. */ static void -suppressible_error (char const *mesg, int errnum) +suppressible_error (int errnum) { if (! suppress_errors) - error (0, errnum, "%s", mesg); + error (0, errnum, "%s", input_filename ()); errseen = true; } @@ -477,31 +599,10 @@ suppressible_error (char const *mesg, int errnum) static void clean_up_stdout (void) { - if (! write_error_seen) + if (! stdout_errno) close_stdout (); } -static bool -textbin_is_binary (enum textbin textbin) -{ - return textbin < TEXTBIN_UNKNOWN; -} - -/* The high-order bit of a byte. */ -enum { HIBYTE = 0x80 }; - -/* True if every byte with HIBYTE off is a single-byte character. - UTF-8 has this property. */ -static bool easy_encoding; - -static void -init_easy_encoding (void) -{ - easy_encoding = true; - for (int i = 0; i < HIBYTE; i++) - easy_encoding &= mbclen_cache[i] == 1; -} - /* A cast to TYPE of VAL. Use this when TYPE is a pointer type, VAL is properly aligned for TYPE, and 'gcc -Wcast-align' cannot infer the alignment and would otherwise complain about the cast. */ @@ -520,21 +621,48 @@ init_easy_encoding (void) /* An unsigned type suitable for fast matching. */ typedef uintmax_t uword; +struct localeinfo localeinfo; + +/* A mask to test for unibyte characters, with the pattern repeated to + fill a uword. For a multibyte character encoding where + all bytes are unibyte characters, this is 0. For UTF-8, this is + 0x808080.... For encodings where unibyte characters have no discerned + pattern, this is all 1s. The unsigned char C is a unibyte + character if C & UNIBYTE_MASK is zero. If the uword W is the + concatenation of bytes, the bytes are all unibyte characters + if W & UNIBYTE_MASK is zero. */ +static uword unibyte_mask; + +static void +initialize_unibyte_mask (void) +{ + /* For each encoding error I that MASK does not already match, + accumulate I's most significant 1 bit by ORing it into MASK. + Although any 1 bit of I could be used, in practice high-order + bits work better. */ + unsigned char mask = 0; + int ms1b = 1; + for (int i = 1; i <= UCHAR_MAX; i++) + if ((localeinfo.sbclen[i] != 1) & ! (mask & i)) + { + while (ms1b * 2 <= i) + ms1b *= 2; + mask |= ms1b; + } + + /* Now MASK will detect any encoding-error byte, although it may + cry wolf and it may not be optimal. Build a uword-length mask by + repeating MASK. */ + uword uword_max = -1; + unibyte_mask = uword_max / UCHAR_MAX * mask; +} + /* Skip the easy bytes in a buffer that is guaranteed to have a sentinel that is not easy, and return a pointer to the first non-easy byte. - In easy encodings, the easy bytes all have HIBYTE off. - In other encodings, no byte is easy. */ + The easy bytes all have UNIBYTE_MASK off. */ static char const * _GL_ATTRIBUTE_PURE skip_easy_bytes (char const *buf) { - if (!easy_encoding) - return buf; - - uword uword_max = -1; - - /* 0x8080..., extended to be wide enough for uword. */ - uword hibyte_mask = uword_max / UCHAR_MAX * HIBYTE; - /* Search a byte at a time until the pointer is aligned, then a uword at a time until a match is found, then a byte at a time to identify the exact byte. The uword search may go slightly past @@ -542,82 +670,79 @@ skip_easy_bytes (char const *buf) char const *p; uword const *s; for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++) - if (*p & HIBYTE) + if (to_uchar (*p) & unibyte_mask) return p; - for (s = CAST_ALIGNED (uword const *, p); ! (*s & hibyte_mask); s++) + for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++) continue; - for (p = (char const *) s; ! (*p & HIBYTE); p++) + for (p = (char const *) s; ! (to_uchar (*p) & unibyte_mask); p++) continue; return p; } -/* Return the text type of data in BUF, of size SIZE. +/* Return true if BUF, of size SIZE, has an encoding error. BUF must be followed by at least sizeof (uword) bytes, - which may be arbitrarily written to or read from. */ -static enum textbin -buffer_textbin (char *buf, size_t size) + the first of which may be modified. */ +static bool +buf_has_encoding_errors (char *buf, size_t size) { - if (eolbyte && memchr (buf, '\0', size)) - return TEXTBIN_BINARY; + if (! unibyte_mask) + return false; - if (1 < MB_CUR_MAX) - { - mbstate_t mbs = { 0 }; - size_t clen; - char const *p; + mbstate_t mbs = { 0 }; + size_t clen; - buf[size] = -1; - for (p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen) - { - clen = mbrlen (p, buf + size - p, &mbs); - if ((size_t) -2 <= clen) - return clen == (size_t) -2 ? TEXTBIN_UNKNOWN : TEXTBIN_BINARY; - } + buf[size] = -1; + for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen) + { + clen = mbrlen (p, buf + size - p, &mbs); + if ((size_t) -2 <= clen) + return true; } - return TEXTBIN_TEXT; + return false; } -/* Return the text type of a file. BUF, of size SIZE, is the initial - buffer read from the file with descriptor FD and status ST. - BUF must be followed by at least sizeof (uword) bytes, + +/* Return true if BUF, of size SIZE, has a null byte. + BUF must be followed by at least one byte, which may be arbitrarily written to or read from. */ -static enum textbin -file_textbin (char *buf, size_t size, int fd, struct stat const *st) +static bool +buf_has_nulls (char *buf, size_t size) { - enum textbin textbin = buffer_textbin (buf, size); - if (textbin_is_binary (textbin)) - return textbin; + buf[size] = 0; + return strlen (buf) != size; +} - if (usable_st_size (st)) +/* Return true if a file is known to contain null bytes. + SIZE bytes have already been read from the file + with descriptor FD and status ST. */ +static bool +file_must_have_nulls (size_t size, int fd, struct stat const *st) +{ + /* If the file has holes, it must contain a null byte somewhere. */ + if (SEEK_HOLE != SEEK_SET && !seek_failed + && usable_st_size (st) && size < st->st_size) { - if (st->st_size <= size) - return textbin == TEXTBIN_UNKNOWN ? TEXTBIN_BINARY : textbin; - - /* If the file has holes, it must contain a null byte somewhere. */ - if (SEEK_HOLE != SEEK_SET && eolbyte) + off_t cur = size; + if (O_BINARY || fd == STDIN_FILENO) { - off_t cur = size; - if (O_BINARY || fd == STDIN_FILENO) - { - cur = lseek (fd, 0, SEEK_CUR); - if (cur < 0) - return TEXTBIN_UNKNOWN; - } + cur = lseek (fd, 0, SEEK_CUR); + if (cur < 0) + return false; + } - /* Look for a hole after the current location. */ - off_t hole_start = lseek (fd, cur, SEEK_HOLE); - if (0 <= hole_start) - { - if (lseek (fd, cur, SEEK_SET) < 0) - suppressible_error (filename, errno); - if (hole_start < st->st_size) - return TEXTBIN_BINARY; - } + /* Look for a hole after the current location. */ + off_t hole_start = lseek (fd, cur, SEEK_HOLE); + if (0 <= hole_start) + { + if (lseek (fd, cur, SEEK_SET) < 0) + suppressible_error (errno); + if (hole_start < st->st_size) + return true; } } - return TEXTBIN_UNKNOWN; + return false; } /* Convert STR to a nonnegative integer, storing the result in *OUT. @@ -633,26 +758,37 @@ context_length_arg (char const *str, intmax_t *out) case LONGINT_OVERFLOW: if (0 <= *out) break; - /* Fall through. */ + FALLTHROUGH; default: - error (EXIT_TROUBLE, 0, "%s: %s", str, - _("invalid context length argument")); + die (EXIT_TROUBLE, 0, "%s: %s", str, + _("invalid context length argument")); } } +/* Return the add_exclude options suitable for excluding a file name. + If COMMAND_LINE, it is a command-line file name. */ +static int +exclude_options (bool command_line) +{ + return EXCLUDE_WILDCARDS | (command_line ? 0 : EXCLUDE_ANCHORED); +} + /* Return true if the file with NAME should be skipped. If COMMAND_LINE, it is a command-line argument. If IS_DIR, it is a directory. */ static bool skipped_file (char const *name, bool command_line, bool is_dir) { - return (is_dir - ? (directories == SKIP_DIRECTORIES - || (! (command_line && omit_dot_slash) - && excluded_directory_patterns - && excluded_file_name (excluded_directory_patterns, name))) - : (excluded_patterns - && excluded_file_name (excluded_patterns, name))); + struct exclude **pats; + if (! is_dir) + pats = excluded_patterns; + else if (directories == SKIP_DIRECTORIES) + return true; + else if (command_line && omit_dot_slash) + return false; + else + pats = excluded_directory_patterns; + return pats[command_line] && excluded_file_name (pats[command_line], name); } /* Hairy buffering mechanism for grep. The intent is to keep @@ -661,20 +797,21 @@ skipped_file (char const *name, bool command_line, bool is_dir) static char *buffer; /* Base of buffer. */ static size_t bufalloc; /* Allocated buffer size, counting slop. */ -#define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */ static int bufdesc; /* File descriptor. */ static char *bufbeg; /* Beginning of user-visible stuff. */ static char *buflim; /* Limit of user-visible stuff. */ static size_t pagesize; /* alignment of memory pages */ -static off_t bufoffset; /* Read offset; defined on regular files. */ +static off_t bufoffset; /* Read offset. */ static off_t after_last_match; /* Pointer after last matching line that would have been output if we were outputting characters. */ static bool skip_nuls; /* Skip '\0' in data. */ static bool skip_empty_lines; /* Skip empty lines in data. */ -static bool seek_data_failed; /* lseek with SEEK_DATA failed. */ static uintmax_t totalnl; /* Total newline count before lastnl. */ +/* Initial buffer size, not counting slop. */ +enum { INITIAL_BUFSIZE = 96 * 1024 }; + /* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be an integer or a pointer. Both args must be free of side effects. */ #define ALIGN_TO(val, alignment) \ @@ -689,7 +826,7 @@ add_count (uintmax_t a, uintmax_t b) { uintmax_t sum = a + b; if (sum < a) - error (EXIT_TROUBLE, 0, _("input is too large to count")); + die (EXIT_TROUBLE, 0, _("input is too large to count")); return sum; } @@ -708,33 +845,23 @@ all_zeros (char const *buf, size_t size) static bool reset (int fd, struct stat const *st) { - if (! pagesize) - { - pagesize = getpagesize (); - if (pagesize == 0 || 2 * pagesize + 1 <= pagesize) - abort (); - bufalloc = (ALIGN_TO (INITIAL_BUFSIZE, pagesize) - + pagesize + sizeof (uword)); - buffer = xmalloc (bufalloc); - } - bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize); bufbeg[-1] = eolbyte; bufdesc = fd; + bufoffset = fd == STDIN_FILENO ? lseek (fd, 0, SEEK_CUR) : 0; + seek_failed = bufoffset < 0; - if (S_ISREG (st->st_mode)) + /* Assume SEEK_DATA fails if SEEK_CUR does. */ + seek_data_failed = seek_failed; + + if (seek_failed) { - if (fd != STDIN_FILENO) - bufoffset = 0; - else + if (errno != ESPIPE) { - bufoffset = lseek (fd, 0, SEEK_CUR); - if (bufoffset < 0) - { - suppressible_error (_("lseek failed"), errno); - return false; - } + suppressible_error (errno); + return false; } + bufoffset = 0; } return true; } @@ -822,7 +949,7 @@ fillbuf (size_t save, struct stat const *st) } bufoffset += fillsize; - if (fillsize == 0 || !skip_nuls || !all_zeros (readbuf, fillsize)) + if (((fillsize == 0) | !skip_nuls) || !all_zeros (readbuf, fillsize)) break; totalnl = add_count (totalnl, fillsize); @@ -844,7 +971,6 @@ fillbuf (size_t save, struct stat const *st) } } - fillsize = undossify_input (readbuf, fillsize); buflim = readbuf + fillsize; /* Initialize the following word, because skip_easy_bytes and some @@ -868,18 +994,28 @@ static enum WITHOUT_MATCH_BINARY_FILES } binary_files; /* How to handle binary files. */ +/* Options for output as a list of matching/non-matching files */ +static enum +{ + LISTFILES_NONE, + LISTFILES_MATCHING, + LISTFILES_NONMATCHING, +} list_files; + +/* Whether to output filenames. 1 means yes, 0 means no, and -1 means + 'grep -r PATTERN FILE' was used and it is not known yet whether + FILE is a directory (which means yes) or not (which means no). */ +static int out_file; + static int filename_mask; /* If zero, output nulls after filenames. */ static bool out_quiet; /* Suppress all normal output. */ static bool out_invert; /* Print nonmatching stuff. */ -static int out_file; /* Print filenames. */ static bool out_line; /* Print line numbers. */ static bool out_byte; /* Print byte offsets. */ static intmax_t out_before; /* Lines of leading context. */ static intmax_t out_after; /* Lines of trailing context. */ static bool count_matches; /* Count matching lines. */ -static int list_files; /* List matching files. */ -static bool no_filenames; /* Suppress file names. */ -static intmax_t max_count; /* Stop after outputting this many +static intmax_t max_count; /* Max number of selected lines from an input file. */ static bool line_buffered; /* Use line buffering. */ static char *label = NULL; /* Fake filename for stdin */ @@ -888,16 +1024,16 @@ static char *label = NULL; /* Fake filename for stdin */ /* Internal variables to keep track of byte count, context, etc. */ static uintmax_t totalcc; /* Total character count before bufbeg. */ static char const *lastnl; /* Pointer after last newline counted. */ -static char const *lastout; /* Pointer after last character output; +static char *lastout; /* Pointer after last character output; NULL if no character has been output or if it's conceptually before bufbeg. */ -static intmax_t outleft; /* Maximum number of lines to be output. */ +static intmax_t outleft; /* Maximum number of selected lines. */ static intmax_t pending; /* Pending lines of output. Always kept 0 if out_quiet is true. */ static bool done_on_match; /* Stop scanning file on first match. */ static bool exit_on_match; /* Exit on first match. */ - -#include "dosbuf.c" +static bool dev_null_output; /* Stdout is known to be /dev/null. */ +static bool binary; /* Use binary rather than text I/O. */ static void nlscan (char const *lim) @@ -920,7 +1056,7 @@ static void print_filename (void) { pr_sgr_start_if (filename_color); - fputs (filename, stdout); + fputs_errno (input_filename ()); pr_sgr_end_if (filename_color); } @@ -929,50 +1065,50 @@ static void print_sep (char sep) { pr_sgr_start_if (sep_color); - fputc (sep, stdout); + putchar_errno (sep); pr_sgr_end_if (sep_color); } /* Print a line number or a byte offset. */ static void -print_offset (uintmax_t pos, int min_width, const char *color) +print_offset (uintmax_t pos, const char *color) { - /* Do not rely on printf to print pos, since uintmax_t may be longer - than long, and long long is not portable. */ - - char buf[sizeof pos * CHAR_BIT]; - char *p = buf + sizeof buf; - - do - { - *--p = '0' + pos % 10; - --min_width; - } - while ((pos /= 10) != 0); - - /* Do this to maximize the probability of alignment across lines. */ - if (align_tabs) - while (--min_width >= 0) - *--p = ' '; - pr_sgr_start_if (color); - fwrite (p, 1, buf + sizeof buf - p, stdout); + printf_errno ("%*"PRIuMAX, offset_width, pos); pr_sgr_end_if (color); } -/* Print a whole line head (filename, line, byte). */ -static void -print_line_head (char const *beg, char const *lim, char sep) +/* Print a whole line head (filename, line, byte). The output data + starts at BEG and contains LEN bytes; it is followed by at least + sizeof (uword) bytes, the first of which may be temporarily modified. + The output data comes from what is perhaps a larger input line that + goes until LIM, where LIM[-1] is an end-of-line byte. Use SEP as + the separator on output. + + Return true unless the line was suppressed due to an encoding error. */ + +static bool +print_line_head (char *beg, size_t len, char const *lim, char sep) { - bool pending_sep = false; + if (binary_files != TEXT_BINARY_FILES) + { + char ch = beg[len]; + bool encoding_errors = buf_has_encoding_errors (beg, len); + beg[len] = ch; + if (encoding_errors) + { + encoding_error_output = true; + return false; + } + } if (out_file) { print_filename (); if (filename_mask) - pending_sep = true; + print_sep (sep); else - fputc (0, stdout); + putchar_errno (0); } if (out_line) @@ -983,49 +1119,40 @@ print_line_head (char const *beg, char const *lim, char sep) totalnl = add_count (totalnl, 1); lastnl = lim; } - if (pending_sep) - print_sep (sep); - print_offset (totalnl, 4, line_num_color); - pending_sep = true; + print_offset (totalnl, line_num_color); + print_sep (sep); } if (out_byte) { uintmax_t pos = add_count (totalcc, beg - bufbeg); - pos = dossified_pos (pos); - if (pending_sep) - print_sep (sep); - print_offset (pos, 6, byte_num_color); - pending_sep = true; + print_offset (pos, byte_num_color); + print_sep (sep); } - if (pending_sep) - { - /* This assumes sep is one column wide. - Try doing this any other way with Unicode - (and its combining and wide characters) - filenames and you're wasting your efforts. */ - if (align_tabs) - fputs ("\t\b", stdout); + if (align_tabs && (out_file | out_line | out_byte) && len != 0) + putchar_errno ('\t'); - print_sep (sep); - } + return true; } -static const char * -print_line_middle (const char *beg, const char *lim, +static char * +print_line_middle (char *beg, char *lim, const char *line_color, const char *match_color) { size_t match_size; size_t match_offset; - const char *cur = beg; - const char *mid = NULL; - - while (cur < lim - && ((match_offset = execute (beg, lim - beg, &match_size, cur)) - != (size_t) -1)) + char *cur; + char *mid = NULL; + char *b; + + for (cur = beg; + (cur < lim + && ((match_offset = execute (compiled_pattern, beg, lim - beg, + &match_size, cur)) != (size_t) -1)); + cur = b + match_size) { - char const *b = beg + match_offset; + b = beg + match_offset; /* Avoid matching the empty line at the end of the buffer. */ if (b == lim) @@ -1045,8 +1172,11 @@ print_line_middle (const char *beg, const char *lim, /* This function is called on a matching line only, but is it selected or rejected/context? */ if (only_matching) - print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED - : SEP_CHAR_SELECTED)); + { + char sep = out_invert ? SEP_CHAR_REJECTED : SEP_CHAR_SELECTED; + if (! print_line_head (b, match_size, lim, sep)) + return NULL; + } else { pr_sgr_start (line_color); @@ -1055,16 +1185,15 @@ print_line_middle (const char *beg, const char *lim, cur = mid; mid = NULL; } - fwrite (cur, sizeof (char), b - cur, stdout); + fwrite_errno (cur, 1, b - cur); } pr_sgr_start_if (match_color); - fwrite (b, sizeof (char), match_size, stdout); + fwrite_errno (b, 1, match_size); pr_sgr_end_if (match_color); if (only_matching) - fputs ("\n", stdout); + putchar_errno (eolbyte); } - cur = b + match_size; } if (only_matching) @@ -1075,8 +1204,8 @@ print_line_middle (const char *beg, const char *lim, return cur; } -static const char * -print_line_tail (const char *beg, const char *lim, const char *line_color) +static char * +print_line_tail (char *beg, const char *lim, const char *line_color) { size_t eol_size; size_t tail_size; @@ -1088,7 +1217,7 @@ print_line_tail (const char *beg, const char *lim, const char *line_color) if (tail_size > 0) { pr_sgr_start (line_color); - fwrite (beg, 1, tail_size, stdout); + fwrite_errno (beg, 1, tail_size); beg += tail_size; pr_sgr_end (line_color); } @@ -1097,14 +1226,15 @@ print_line_tail (const char *beg, const char *lim, const char *line_color) } static void -prline (char const *beg, char const *lim, char sep) +prline (char *beg, char *lim, char sep) { bool matching; const char *line_color; const char *match_color; if (!only_matching) - print_line_head (beg, lim, sep); + if (! print_line_head (beg, lim - beg - 1, lim, sep)) + return; matching = (sep == SEP_CHAR_SELECTED) ^ out_invert; @@ -1124,7 +1254,11 @@ prline (char const *beg, char const *lim, char sep) { /* We already know that non-matching lines have no match (to colorize). */ if (matching && (only_matching || *match_color)) - beg = print_line_middle (beg, lim, line_color, match_color); + { + beg = print_line_middle (beg, lim, line_color, match_color); + if (! beg) + return; + } if (!only_matching && *line_color) { @@ -1135,45 +1269,33 @@ prline (char const *beg, char const *lim, char sep) } if (!only_matching && lim > beg) - fwrite (beg, 1, lim - beg, stdout); + fwrite_errno (beg, 1, lim - beg); - if (ferror (stdout)) - { - write_error_seen = true; - error (EXIT_TROUBLE, 0, _("write error")); - } + if (line_buffered) + fflush_errno (); - lastout = lim; + if (stdout_errno) + die (EXIT_TROUBLE, stdout_errno, _("write error")); - if (line_buffered) - fflush (stdout); + lastout = lim; } -/* Print pending lines of trailing context prior to LIM. Trailing context ends - at the next matching line when OUTLEFT is 0. */ +/* Print pending lines of trailing context prior to LIM. */ static void prpending (char const *lim) { if (!lastout) lastout = bufbeg; - while (pending > 0 && lastout < lim) + for (; 0 < pending && lastout < lim; pending--) { - char const *nl = memchr (lastout, eolbyte, lim - lastout); - size_t match_size; - --pending; - if (outleft - || ((execute (lastout, nl + 1 - lastout, - &match_size, NULL) == (size_t) -1) - == !out_invert)) - prline (lastout, nl + 1, SEP_CHAR_REJECTED); - else - pending = 0; + char *nl = memchr (lastout, eolbyte, lim - lastout); + prline (lastout, nl + 1, SEP_CHAR_REJECTED); } } /* Output the lines between BEG and LIM. Deal with context. */ static void -prtext (char const *beg, char const *lim) +prtext (char *beg, char *lim) { static bool used; /* Avoid printing SEP_STR_GROUP before any output. */ char eol = eolbyte; @@ -1181,7 +1303,7 @@ prtext (char const *beg, char const *lim) if (!out_quiet && pending > 0) prpending (beg); - char const *p = beg; + char *p = beg; if (!out_quiet) { @@ -1200,14 +1322,14 @@ prtext (char const *beg, char const *lim) && p != lastout && group_separator) { pr_sgr_start_if (sep_color); - fputs (group_separator, stdout); + fputs_errno (group_separator); pr_sgr_end_if (sep_color); - fputc ('\n', stdout); + putchar_errno ('\n'); } while (p < beg) { - char const *nl = memchr (p, eol, beg - p); + char *nl = memchr (p, eol, beg - p); nl++; prline (p, nl, SEP_CHAR_REJECTED); p = nl; @@ -1220,7 +1342,7 @@ prtext (char const *beg, char const *lim) /* One or more lines are output. */ for (n = 0; p < lim && n < outleft; n++) { - char const *nl = memchr (p, eol, lim - p); + char *nl = memchr (p, eol, lim - p); nl++; if (!out_quiet) prline (p, nl, SEP_CHAR_SELECTED); @@ -1267,16 +1389,16 @@ zap_nuls (char *p, char *lim, char eol) between matching lines if OUT_INVERT is true). Return a count of lines printed. Replace all NUL bytes with NUL_ZAPPER as we go. */ static intmax_t -grepbuf (char const *beg, char const *lim) +grepbuf (char *beg, char const *lim) { intmax_t outleft0 = outleft; - char const *p; - char const *endp; + char *endp; - for (p = beg; p < lim; p = endp) + for (char *p = beg; p < lim; p = endp) { size_t match_size; - size_t match_offset = execute (p, lim - p, &match_size, NULL); + size_t match_offset = execute (compiled_pattern, p, lim - p, + &match_size, NULL); if (match_offset == (size_t) -1) { if (!out_invert) @@ -1284,20 +1406,20 @@ grepbuf (char const *beg, char const *lim) match_offset = lim - p; match_size = 0; } - char const *b = p + match_offset; + char *b = p + match_offset; endp = b + match_size; /* Avoid matching the empty line at the end of the buffer. */ if (!out_invert && b == lim) break; if (!out_invert || p < b) { - char const *prbeg = out_invert ? p : b; - char const *prend = out_invert ? b : endp; + char *prbeg = out_invert ? p : b; + char *prend = out_invert ? b : endp; prtext (prbeg, prend); if (!outleft || done_on_match) { if (exit_on_match) - exit (EXIT_SUCCESS); + exit (errseen ? exit_failure : EXIT_SUCCESS); break; } } @@ -1306,14 +1428,12 @@ grepbuf (char const *beg, char const *lim) return outleft0 - outleft; } -/* Search a given file. Normally, return a count of lines printed; - but if the file is a directory and we search it recursively, then - return -2 if there was a match, and -1 otherwise. */ +/* Search a given (non-directory) file. Return a count of lines printed. + Set *INEOF to true if end-of-file reached. */ static intmax_t -grep (int fd, struct stat const *st) +grep (int fd, struct stat const *st, bool *ineof) { intmax_t nlines, i; - enum textbin textbin; size_t residue, save; char oldc; char *beg; @@ -1323,6 +1443,11 @@ grep (int fd, struct stat const *st) bool done_on_match_0 = done_on_match; bool out_quiet_0 = out_quiet; + /* The value of NLINES when nulls were first deduced in the input; + this is not necessarily the same as the number of matching lines + before the first null. -1 if no input nulls have been deduced. */ + intmax_t nlines_first_null = -1; + if (! reset (fd, st)) return 0; @@ -1333,7 +1458,7 @@ grep (int fd, struct stat const *st) after_last_match = 0; pending = 0; skip_nuls = skip_empty_lines && !eol; - seek_data_failed = false; + encoding_error_output = false; nlines = 0; residue = 0; @@ -1341,30 +1466,36 @@ grep (int fd, struct stat const *st) if (! fillbuf (save, st)) { - suppressible_error (filename, errno); + suppressible_error (errno); return 0; } - if (binary_files == TEXT_BINARY_FILES) - textbin = TEXTBIN_TEXT; - else + offset_width = 0; + if (align_tabs) + { + /* Width is log of maximum number. Line numbers are origin-1. */ + uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX; + num += out_line && num < UINTMAX_MAX; + do + offset_width++; + while ((num /= 10) != 0); + } + + for (bool firsttime = true; ; firsttime = false) { - textbin = file_textbin (bufbeg, buflim - bufbeg, fd, st); - if (textbin_is_binary (textbin)) + if (nlines_first_null < 0 && eol && binary_files != TEXT_BINARY_FILES + && (buf_has_nulls (bufbeg, buflim - bufbeg) + || (firsttime && file_must_have_nulls (buflim - bufbeg, fd, st)))) { if (binary_files == WITHOUT_MATCH_BINARY_FILES) return 0; - done_on_match = out_quiet = true; + if (!count_matches) + done_on_match = out_quiet = true; + nlines_first_null = nlines; nul_zapper = eol; skip_nuls = skip_empty_lines; } - else if (execute != Pexecute) - textbin = TEXTBIN_TEXT; - } - for (;;) - { - input_textbin = textbin; lastnl = bufbeg; if (lastout) lastout = bufbeg; @@ -1373,7 +1504,10 @@ grep (int fd, struct stat const *st) /* no more data to scan (eof) except for maybe a residue -> break */ if (beg == buflim) - break; + { + *ineof = true; + break; + } zap_nuls (beg, buflim, nul_zapper); @@ -1397,7 +1531,8 @@ grep (int fd, struct stat const *st) nlines += grepbuf (beg, lim); if (pending) prpending (lim); - if ((!outleft && !pending) || (nlines && done_on_match)) + if ((!outleft && !pending) + || (done_on_match && MAX (0, nlines_first_null) < nlines)) goto finish_grep; } @@ -1415,13 +1550,8 @@ grep (int fd, struct stat const *st) } /* Detect whether leading context is adjacent to previous output. */ - if (lastout) - { - if (textbin == TEXTBIN_UNKNOWN) - textbin = TEXTBIN_TEXT; - if (beg != lastout) - lastout = 0; - } + if (beg != lastout) + lastout = 0; /* Handle some details and read more data to scan. */ save = residue + lim - beg; @@ -1431,25 +1561,9 @@ grep (int fd, struct stat const *st) nlscan (beg); if (! fillbuf (save, st)) { - suppressible_error (filename, errno); + suppressible_error (errno); goto finish_grep; } - - /* If the file's textbin has not been determined yet, assume - it's binary if the next input buffer suggests so. */ - if (textbin == TEXTBIN_UNKNOWN) - { - enum textbin tb = buffer_textbin (bufbeg, buflim - bufbeg); - if (textbin_is_binary (tb)) - { - if (binary_files == WITHOUT_MATCH_BINARY_FILES) - return 0; - textbin = tb; - done_on_match = out_quiet = true; - nul_zapper = eol; - skip_nuls = skip_empty_lines; - } - } } if (residue) { @@ -1463,8 +1577,13 @@ grep (int fd, struct stat const *st) finish_grep: done_on_match = done_on_match_0; out_quiet = out_quiet_0; - if (textbin_is_binary (textbin) && !out_quiet && nlines != 0) - printf (_("Binary file %s matches\n"), filename); + if (!out_quiet && (encoding_error_output + || (0 <= nlines_first_null && nlines_first_null < nlines))) + { + printf_errno (_("Binary file %s matches\n"), input_filename ()); + if (line_buffered) + fflush_errno (); + } return nlines; } @@ -1472,15 +1591,10 @@ static bool grepdirent (FTS *fts, FTSENT *ent, bool command_line) { bool follow; - int dirdesc; command_line &= ent->fts_level == FTS_ROOTLEVEL; if (ent->fts_info == FTS_DP) - { - if (directories == RECURSE_DIRECTORIES && command_line) - out_file &= ~ (2 * !no_filenames); - return true; - } + return true; if (!command_line && skipped_file (ent->fts_name, false, @@ -1501,10 +1615,7 @@ grepdirent (FTS *fts, FTSENT *ent, bool command_line) { case FTS_D: if (directories == RECURSE_DIRECTORIES) - { - out_file |= 2 * !no_filenames; - return true; - } + return true; fts_set (fts, ent, FTS_SKIP); break; @@ -1517,7 +1628,7 @@ grepdirent (FTS *fts, FTSENT *ent, bool command_line) case FTS_DNR: case FTS_ERR: case FTS_NS: - suppressible_error (filename, ent->fts_errno); + suppressible_error (ent->fts_errno); return true; case FTS_DEFAULT: @@ -1534,7 +1645,7 @@ grepdirent (FTS *fts, FTSENT *ent, bool command_line) int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW; if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0) { - suppressible_error (filename, errno); + suppressible_error (errno); return true; } st = &st1; @@ -1556,10 +1667,7 @@ grepdirent (FTS *fts, FTSENT *ent, bool command_line) abort (); } - dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD - ? fts->fts_cwd_fd - : AT_FDCWD); - return grepfile (dirdesc, ent->fts_accpath, follow, command_line); + return grepfile (fts->fts_cwd_fd, ent->fts_accpath, follow, command_line); } /* True if errno is ERR after 'open ("symlink", ... O_NOFOLLOW ...)'. @@ -1580,23 +1688,74 @@ static bool grepfile (int dirdesc, char const *name, bool follow, bool command_line) { int oflag = (O_RDONLY | O_NOCTTY + | (IGNORE_DUPLICATE_BRANCH_WARNING + (binary ? O_BINARY : 0)) | (follow ? 0 : O_NOFOLLOW) | (skip_devices (command_line) ? O_NONBLOCK : 0)); int desc = openat_safer (dirdesc, name, oflag); if (desc < 0) { if (follow || ! open_symlink_nofollow_error (errno)) - suppressible_error (filename, errno); + suppressible_error (errno); return true; } return grepdesc (desc, command_line); } +/* Read all data from FD, with status ST. Return true if successful, + false (setting errno) otherwise. */ +static bool +drain_input (int fd, struct stat const *st) +{ + ssize_t nbytes; + if (S_ISFIFO (st->st_mode) && dev_null_output) + { +#ifdef SPLICE_F_MOVE + /* Should be faster, since it need not copy data to user space. */ + nbytes = splice (fd, NULL, STDOUT_FILENO, NULL, + INITIAL_BUFSIZE, SPLICE_F_MOVE); + if (0 <= nbytes || errno != EINVAL) + { + while (0 < nbytes) + nbytes = splice (fd, NULL, STDOUT_FILENO, NULL, + INITIAL_BUFSIZE, SPLICE_F_MOVE); + return nbytes == 0; + } +#endif + } + while ((nbytes = safe_read (fd, buffer, bufalloc))) + if (nbytes == SAFE_READ_ERROR) + return false; + return true; +} + +/* Finish reading from FD, with status ST and where end-of-file has + been seen if INEOF. Typically this is a no-op, but when reading + from standard input this may adjust the file offset or drain a + pipe. */ + +static void +finalize_input (int fd, struct stat const *st, bool ineof) +{ + if (fd == STDIN_FILENO + && (outleft + ? (!ineof + && (seek_failed + || (lseek (fd, 0, SEEK_END) < 0 + /* Linux proc file system has EINVAL (Bug#25180). */ + && errno != EINVAL)) + && ! drain_input (fd, st)) + : (bufoffset != after_last_match && !seek_failed + && lseek (fd, after_last_match, SEEK_SET) < 0))) + suppressible_error (errno); +} + static bool grepdesc (int desc, bool command_line) { intmax_t count; bool status = true; + bool ineof = false; struct stat st; /* Get the file status, possibly for the second time. This catches @@ -1607,7 +1766,7 @@ grepdesc (int desc, bool command_line) directory for a non-directory while 'grep' is running. */ if (fstat (desc, &st) != 0) { - suppressible_error (filename, errno); + suppressible_error (errno); goto closeout; } @@ -1619,6 +1778,10 @@ grepdesc (int desc, bool command_line) && skipped_file (filename, true, S_ISDIR (st.st_mode) != 0)) goto closeout; + /* Don't output file names if invoked as 'grep -r PATTERN NONDIRECTORY'. */ + if (out_file < 0) + out_file = !!S_ISDIR (st.st_mode); + if (desc != STDIN_FILENO && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode)) { @@ -1634,7 +1797,7 @@ grepdesc (int desc, bool command_line) /* Close DESC now, to conserve file descriptors if the race condition occurs many times in a deep recursion. */ if (close (desc) != 0) - suppressible_error (filename, errno); + suppressible_error (errno); fts_arg[0] = (char *) filename; fts_arg[1] = NULL; @@ -1645,9 +1808,9 @@ grepdesc (int desc, bool command_line) while ((ent = fts_read (fts))) status &= grepdirent (fts, ent, command_line); if (errno) - suppressible_error (filename, errno); + suppressible_error (errno); if (fts_close (fts) != 0) - suppressible_error (filename, errno); + suppressible_error (errno); return status; } if (desc != STDIN_FILENO @@ -1675,61 +1838,47 @@ grepdesc (int desc, bool command_line) so there is no risk of malfunction. But even --max-count=2, with input==output, while there is no risk of infloop, there is a race condition that could result in "alternate" output. */ - if (!out_quiet && list_files == 0 && 1 < max_count - && S_ISREG (out_stat.st_mode) && out_stat.st_ino - && SAME_INODE (st, out_stat)) + if (!out_quiet && list_files == LISTFILES_NONE && 1 < max_count + && S_ISREG (st.st_mode) && SAME_INODE (st, out_stat)) { if (! suppress_errors) - error (0, 0, _("input file %s is also the output"), quote (filename)); + error (0, 0, _("input file %s is also the output"), + quote (input_filename ())); errseen = true; goto closeout; } -#if defined SET_BINARY - /* Set input to binary mode. Pipes are simulated with files - on DOS, so this includes the case of "foo | grep bar". */ - if (!isatty (desc)) - SET_BINARY (desc); -#endif - - count = grep (desc, &st); - if (count < 0) - status = count + 2; - else + count = grep (desc, &st, &ineof); + if (count_matches) { - if (count_matches) - { - if (out_file) - { - print_filename (); - if (filename_mask) - print_sep (SEP_CHAR_SELECTED); - else - fputc (0, stdout); - } - printf ("%" PRIdMAX "\n", count); - } - - status = !count; - if (list_files == 1 - 2 * status) + if (out_file) { print_filename (); - fputc ('\n' & filename_mask, stdout); + if (filename_mask) + print_sep (SEP_CHAR_SELECTED); + else + putchar_errno (0); } + printf_errno ("%" PRIdMAX "\n", count); + if (line_buffered) + fflush_errno (); + } - if (desc == STDIN_FILENO) - { - off_t required_offset = outleft ? bufoffset : after_last_match; - if (required_offset != bufoffset - && lseek (desc, required_offset, SEEK_SET) < 0 - && S_ISREG (st.st_mode)) - suppressible_error (filename, errno); - } + status = !count == !(list_files == LISTFILES_NONMATCHING); + + if (list_files == LISTFILES_NONE || dev_null_output) + finalize_input (desc, &st, ineof); + else if (status == 0) + { + print_filename (); + putchar_errno ('\n' & filename_mask); + if (line_buffered) + fflush_errno (); } closeout: if (desc != STDIN_FILENO && close (desc) != 0) - suppressible_error (filename, errno); + suppressible_error (errno); return status; } @@ -1738,7 +1887,9 @@ grep_command_line_arg (char const *arg) { if (STREQ (arg, "-")) { - filename = label ? label : _("(standard input)"); + filename = label; + if (binary) + xset_binary_mode (STDIN_FILENO, O_BINARY); return grepdesc (STDIN_FILENO, true); } else @@ -1754,32 +1905,33 @@ usage (int status) { if (status != 0) { - fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), - program_name); + fprintf (stderr, _("Usage: %s [OPTION]... PATTERNS [FILE]...\n"), + getprogname ()); fprintf (stderr, _("Try '%s --help' for more information.\n"), - program_name); + getprogname ()); } else { - printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name); - printf (_("Search for PATTERN in each FILE or standard input.\n")); - printf (_("PATTERN is, by default, a basic regular expression (BRE).\n")); + printf (_("Usage: %s [OPTION]... PATTERNS [FILE]...\n"), getprogname ()); + printf (_("Search for PATTERNS in each FILE.\n")); printf (_("\ Example: %s -i 'hello world' menu.h main.c\n\ +PATTERNS can contain multiple patterns separated by newlines.\n\ \n\ -Regexp selection and interpretation:\n"), program_name); +Pattern selection and interpretation:\n"), getprogname ()); printf (_("\ - -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\ - -F, --fixed-strings PATTERN is a set of newline-separated strings\n\ - -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\ - -P, --perl-regexp PATTERN is a Perl regular expression\n")); + -E, --extended-regexp PATTERNS are extended regular expressions\n\ + -F, --fixed-strings PATTERNS are strings\n\ + -G, --basic-regexp PATTERNS are basic regular expressions\n\ + -P, --perl-regexp PATTERNS are Perl regular expressions\n")); /* -X is deliberately undocumented. */ printf (_("\ - -e, --regexp=PATTERN use PATTERN for matching\n\ - -f, --file=FILE obtain PATTERN from FILE\n\ - -i, --ignore-case ignore case distinctions\n\ - -w, --word-regexp force PATTERN to match only whole words\n\ - -x, --line-regexp force PATTERN to match only whole lines\n\ + -e, --regexp=PATTERNS use PATTERNS for matching\n\ + -f, --file=FILE take PATTERNS from FILE\n\ + -i, --ignore-case ignore case distinctions in patterns and data\n\ + --no-ignore-case do not ignore case distinctions (default)\n\ + -w, --word-regexp match only whole words\n\ + -x, --line-regexp match only whole lines\n\ -z, --null-data a data line ends in 0 byte, not newline\n")); printf (_("\ \n\ @@ -1791,16 +1943,16 @@ Miscellaneous:\n\ printf (_("\ \n\ Output control:\n\ - -m, --max-count=NUM stop after NUM matches\n\ + -m, --max-count=NUM stop after NUM selected lines\n\ -b, --byte-offset print the byte offset with output lines\n\ -n, --line-number print line number with output lines\n\ --line-buffered flush output on every line\n\ - -H, --with-filename print the file name for each match\n\ + -H, --with-filename print file name with output lines\n\ -h, --no-filename suppress the file name prefix on output\n\ --label=LABEL use LABEL as the standard input file name prefix\n\ ")); printf (_("\ - -o, --only-matching show only the part of a line matching PATTERN\n\ + -o, --only-matching show only nonempty parts of lines that match\n\ -q, --quiet, --silent suppress all normal output\n\ --binary-files=TYPE assume that binary files are TYPE;\n\ TYPE is 'binary', 'text', or 'without-match'\n\ @@ -1816,16 +1968,16 @@ Output control:\n\ -R, --dereference-recursive likewise, but follow all symlinks\n\ ")); printf (_("\ - --include=FILE_PATTERN search only files that match FILE_PATTERN\n\ - --exclude=FILE_PATTERN skip files and directories matching\ - FILE_PATTERN\n\ - --exclude-from=FILE skip files matching any file pattern from FILE\n\ - --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\ + --include=GLOB search only files that match GLOB (a file pattern)" + "\n\ + --exclude=GLOB skip files that match GLOB\n\ + --exclude-from=FILE skip files that match any file pattern from FILE\n\ + --exclude-dir=GLOB skip directories that match GLOB\n\ ")); printf (_("\ - -L, --files-without-match print only names of FILEs containing no match\n\ - -l, --files-with-matches print only names of FILEs containing matches\n\ - -c, --count print only a count of matching lines per FILE\n\ + -L, --files-without-match print only names of FILEs with no selected lines\n\ + -l, --files-with-matches print only names of FILEs with selected lines\n\ + -c, --count print only a count of selected lines per FILE\n\ -T, --initial-tab make tabs line up (if needed)\n\ -Z, --null print 0 byte after FILE name\n")); printf (_("\ @@ -1841,16 +1993,11 @@ Context control:\n\ --colour[=WHEN] use markers to highlight the matching strings;\n\ WHEN is 'always', 'never', or 'auto'\n\ -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\ - -u, --unix-byte-offsets report offsets as if CRs were not there\n\ - (MSDOS/Windows)\n\ \n")); printf (_("\ -'egrep' means 'grep -E'. 'fgrep' means 'grep -F'.\n\ -Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n")); - printf (_("\ -When FILE is -, read standard input. With no FILE, read . if a command-line\n\ --r is given, - otherwise. If fewer than two FILEs are given, assume -h.\n\ -Exit status is 0 if any line is selected, 1 otherwise;\n\ +When FILE is '-', read standard input. With no FILE, read '.' if\n\ +recursive, '-' otherwise. With fewer than two FILEs, assume -h.\n\ +Exit status is 0 if any line (or file if -L) is selected, 1 otherwise;\n\ if any error occurs and -q is not given, the exit status is 2.\n")); emit_bug_reporting_address (); } @@ -1859,73 +2006,46 @@ if any error occurs and -q is not given, the exit status is 2.\n")); /* Pattern compilers and matchers. */ -static void -Gcompile (char const *pattern, size_t size) -{ - GEAcompile (pattern, size, RE_SYNTAX_GREP); -} - -static void -Ecompile (char const *pattern, size_t size) -{ - GEAcompile (pattern, size, RE_SYNTAX_EGREP); -} - -static void -Acompile (char const *pattern, size_t size) -{ - GEAcompile (pattern, size, RE_SYNTAX_AWK); -} - -static void -GAcompile (char const *pattern, size_t size) -{ - GEAcompile (pattern, size, RE_SYNTAX_GNU_AWK); -} - -static void -PAcompile (char const *pattern, size_t size) -{ - GEAcompile (pattern, size, RE_SYNTAX_POSIX_AWK); -} - -struct matcher +static struct { - char const name[16]; + char name[12]; + int syntax; /* used if compile == GEAcompile */ compile_fp_t compile; execute_fp_t execute; +} const matchers[] = { + { "grep", RE_SYNTAX_GREP, GEAcompile, EGexecute }, + { "egrep", RE_SYNTAX_EGREP, GEAcompile, EGexecute }, + { "fgrep", 0, Fcompile, Fexecute, }, + { "awk", RE_SYNTAX_AWK, GEAcompile, EGexecute }, + { "gawk", RE_SYNTAX_GNU_AWK, GEAcompile, EGexecute }, + { "posixawk", RE_SYNTAX_POSIX_AWK, GEAcompile, EGexecute }, +#if HAVE_LIBPCRE + { "perl", 0, Pcompile, Pexecute, }, +#endif }; -static struct matcher const matchers[] = { - { "grep", Gcompile, EGexecute }, - { "egrep", Ecompile, EGexecute }, - { "fgrep", Fcompile, Fexecute }, - { "awk", Acompile, EGexecute }, - { "gawk", GAcompile, EGexecute }, - { "posixawk", PAcompile, EGexecute }, - { "perl", Pcompile, Pexecute }, - { "", NULL, NULL }, -}; +/* Keep these in sync with the 'matchers' table. */ +enum { E_MATCHER_INDEX = 1, F_MATCHER_INDEX = 2, G_MATCHER_INDEX = 0 }; -/* Set the matcher to M if available. Exit in case of conflicts or if - M is not available. */ -static void -setmatcher (char const *m) +/* Return the index of the matcher corresponding to M if available. + MATCHER is the index of the previous matcher, or -1 if none. + Exit in case of conflicts or if M is not available. */ +static int +setmatcher (char const *m, int matcher) { - struct matcher const *p; - - if (matcher && !STREQ (matcher, m)) - error (EXIT_TROUBLE, 0, _("conflicting matchers specified")); - - for (p = matchers; p->compile; p++) - if (STREQ (m, p->name)) + for (int i = 0; i < sizeof matchers / sizeof *matchers; i++) + if (STREQ (m, matchers[i].name)) { - matcher = p->name; - compile = p->compile; - execute = p->execute; - return; + if (0 <= matcher && matcher != i) + die (EXIT_TROUBLE, 0, _("conflicting matchers specified")); + return i; } - error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m); +#if !HAVE_LIBPCRE + if (STREQ (m, "perl")) + die (EXIT_TROUBLE, 0, + _("Perl matching not supported in a --disable-perl-regexp build")); +#endif + die (EXIT_TROUBLE, 0, _("invalid matcher %s"), m); } /* Find the white-space-separated options specified by OPTIONS, and @@ -2008,7 +2128,7 @@ get_nondigit_option (int argc, char *const *argv, intmax_t *default_context) { opt = getopt_long (argc, (char **) argv, short_options, long_options, NULL); - if ( ! ('0' <= opt && opt <= '9')) + if (! c_isdigit (opt)) break; if (prev_digit_optind != this_digit_optind || !was_digit) @@ -2100,7 +2220,7 @@ parse_grep_colors (void) } else if (val == NULL) q++; /* Accumulate name. */ - else if (*q == ';' || (*q >= '0' && *q <= '9')) + else if (*q == ';' || c_isdigit (*q)) q++; /* Accumulate val. Protect the terminal from being sent crap. */ else return; @@ -2122,13 +2242,65 @@ contains_encoding_error (char const *pat, size_t patlen) return false; } -/* Change a pattern for fgrep into grep. */ -static void -fgrep_to_grep_pattern (size_t len, char const *keys, - size_t *new_len, char **new_keys) +/* Return the number of bytes in the initial character of PAT, of size + PATLEN, if Fcompile can handle that character. Return -1 if + Fcompile cannot handle it. MBS is the multibyte conversion state. + + Fcompile can handle a character C if C is single-byte, or if C has no + case folded counterparts and toupper translates none of its bytes. */ + +static int +fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs) +{ + int n = localeinfo.sbclen[to_uchar (*pat)]; + if (n < 0) + { + wchar_t wc; + wchar_t folded[CASE_FOLDED_BUFSIZE]; + size_t wn = mbrtowc (&wc, pat, patlen, mbs); + if (MB_LEN_MAX < wn || case_folded_counterparts (wc, folded)) + return -1; + for (int i = wn; 0 < --i; ) + { + unsigned char c = pat[i]; + if (toupper (c) != c) + return -1; + } + n = wn; + } + return n; +} + +/* Return true if the -F patterns PAT, of size PATLEN, contain only + single-byte characters or characters not subject to case folding, + and so can be processed by Fcompile. */ + +static bool +fgrep_icase_available (char const *pat, size_t patlen) +{ + mbstate_t mbs = {0,}; + + for (size_t i = 0; i < patlen; ) + { + int n = fgrep_icase_charlen (pat + i, patlen - i, &mbs); + if (n < 0) + return false; + i += n; + } + + return true; +} + +/* Change the pattern *KEYS_P, of size *LEN_P, from fgrep to grep style. */ + +void +fgrep_to_grep_pattern (char **keys_p, size_t *len_p) { - char *p = *new_keys = xnmalloc (len + 1, 2); + size_t len = *len_p; + char *keys = *keys_p; mbstate_t mb_state = { 0 }; + char *new_keys = xnmalloc (len + 1, 2); + char *p = new_keys; size_t n; for (; len; keys += n, len -= n) @@ -2138,34 +2310,122 @@ fgrep_to_grep_pattern (size_t len, char const *keys, { case (size_t) -2: n = len; - /* Fall through. */ + FALLTHROUGH; default: p = mempcpy (p, keys, n); break; case (size_t) -1: memset (&mb_state, 0, sizeof mb_state); - /* Fall through. */ + n = 1; + FALLTHROUGH; case 1: - *p = '\\'; - p += strchr ("$*.[\\^", *keys) != NULL; - /* Fall through. */ - case 0: + switch (*keys) + { + case '$': case '*': case '.': case '[': case '\\': case '^': + *p++ = '\\'; break; + } *p++ = *keys; - n = 1; break; } } - *new_len = p - *new_keys; + free (*keys_p); + *keys_p = new_keys; + *len_p = p - new_keys; +} + +/* If it is easy, convert the MATCHER-style patterns KEYS (of size + *LEN_P) to -F style, update *LEN_P to a possibly-smaller value, and + return F_MATCHER_INDEX. If not, leave KEYS and *LEN_P alone and + return MATCHER. This function is conservative and sometimes misses + conversions, e.g., it does not convert the -E pattern "(a|a|[aa])" + to the -F pattern "a". */ + +static int +try_fgrep_pattern (int matcher, char *keys, size_t *len_p) +{ + int result = matcher; + size_t len = *len_p; + char *new_keys = xmalloc (len + 1); + char *p = new_keys; + char const *q = keys; + mbstate_t mb_state = { 0 }; + + while (len != 0) + { + switch (*q) + { + case '$': case '*': case '.': case '[': case '^': + goto fail; + + case '(': case '+': case '?': case '{': case '|': + if (matcher != G_MATCHER_INDEX) + goto fail; + break; + + case '\\': + if (1 < len) + switch (q[1]) + { + case '\n': + case 'B': case 'S': case 'W': case'\'': case '<': + case 'b': case 's': case 'w': case '`': case '>': + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + goto fail; + + case '(': case '+': case '?': case '{': case '|': + if (matcher == G_MATCHER_INDEX) + goto fail; + FALLTHROUGH; + default: + q++, len--; + break; + } + break; + } + + { + size_t n; + if (match_icase) + { + int ni = fgrep_icase_charlen (q, len, &mb_state); + if (ni < 0) + goto fail; + n = ni; + } + else + { + n = mb_clen (q, len, &mb_state); + if (MB_LEN_MAX < n) + goto fail; + } + + p = mempcpy (p, q, n); + q += n; + len -= n; + } + } + + if (*len_p != p - new_keys) + { + *len_p = p - new_keys; + memcpy (keys, new_keys, p - new_keys); + } + result = F_MATCHER_INDEX; + + fail: + free (new_keys); + return result; } int main (int argc, char **argv) { - char *keys; - size_t keycc, oldcc, keyalloc; - bool with_filenames; + char *keys = NULL; + size_t keycc = 0, oldcc, keyalloc = 0; + int matcher = -1; size_t cc; int opt, prepended; int prev_optind, last_recursive; @@ -2174,12 +2434,11 @@ main (int argc, char **argv) FILE *fp; exit_failure = EXIT_TROUBLE; initialize_main (&argc, &argv); - set_program_name (argv[0]); - program_name = argv[0]; - keys = NULL; - keycc = 0; - with_filenames = false; + /* Which command-line options have been specified for filename output. + -1 for -h, 1 for -H, 0 for neither. */ + int filename_option = 0; + eolbyte = '\n'; filename_mask = ~0; @@ -2201,8 +2460,10 @@ main (int argc, char **argv) textdomain (PACKAGE); #endif - exit_failure = EXIT_TROUBLE; + init_localeinfo (&localeinfo); + atexit (clean_up_stdout); + c_stack_action (NULL); last_recursive = 0; @@ -2211,9 +2472,6 @@ main (int argc, char **argv) error (0, 0, _("warning: GREP_OPTIONS is deprecated;" " please use an alias or script")); - compile = matchers[0].compile; - execute = matchers[0].execute; - while (prev_optind = optind, (opt = get_nondigit_option (argc, argv, &default_context)) != -1) switch (opt) @@ -2238,32 +2496,31 @@ main (int argc, char **argv) else if (STREQ (optarg, "skip")) devices = SKIP_DEVICES; else - error (EXIT_TROUBLE, 0, _("unknown devices method")); + die (EXIT_TROUBLE, 0, _("unknown devices method")); break; case 'E': - setmatcher ("egrep"); + matcher = setmatcher ("egrep", matcher); break; case 'F': - setmatcher ("fgrep"); + matcher = setmatcher ("fgrep", matcher); break; case 'P': - setmatcher ("perl"); + matcher = setmatcher ("perl", matcher); break; case 'G': - setmatcher ("grep"); + matcher = setmatcher ("grep", matcher); break; case 'X': /* undocumented on purpose */ - setmatcher (optarg); + matcher = setmatcher (optarg, matcher); break; case 'H': - with_filenames = true; - no_filenames = false; + filename_option = 1; break; case 'I': @@ -2275,11 +2532,13 @@ main (int argc, char **argv) break; case 'U': - dos_binary (); + if (O_BINARY) + binary = true; break; case 'u': - dos_unix_byte_offsets (); + /* Obsolete option; it has no effect. FIXME: Diagnose use of + this option starting in (say) the year 2020. */ break; case 'V': @@ -2307,39 +2566,53 @@ main (int argc, char **argv) case 'e': cc = strlen (optarg); - keys = xrealloc (keys, keycc + cc + 1); - strcpy (&keys[keycc], optarg); + if (keyalloc < keycc + cc + 1) + { + keyalloc = keycc + cc + 1; + keys = x2realloc (keys, &keyalloc); + } + oldcc = keycc; + memcpy (keys + oldcc, optarg, cc); keycc += cc; keys[keycc++] = '\n'; + fl_add (keys + oldcc, cc + 1, ""); break; case 'f': - fp = STREQ (optarg, "-") ? stdin : fopen (optarg, O_TEXT ? "rt" : "r"); - if (!fp) - error (EXIT_TROUBLE, errno, "%s", optarg); - for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2) - ; - keys = xrealloc (keys, keyalloc); + if (STREQ (optarg, "-")) + { + if (binary) + xset_binary_mode (STDIN_FILENO, O_BINARY); + fp = stdin; + } + else + { + fp = fopen (optarg, binary ? "rb" : "r"); + if (!fp) + die (EXIT_TROUBLE, errno, "%s", optarg); + } oldcc = keycc; - while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0) + for (;; keycc += cc) { - keycc += cc; - if (keycc == keyalloc - 1) - keys = x2nrealloc (keys, &keyalloc, sizeof *keys); + if (keyalloc <= keycc + 1) + keys = x2realloc (keys, &keyalloc); + cc = fread (keys + keycc, 1, keyalloc - (keycc + 1), fp); + if (cc == 0) + break; } fread_errno = errno; if (ferror (fp)) - error (EXIT_TROUBLE, fread_errno, "%s", optarg); + die (EXIT_TROUBLE, fread_errno, "%s", optarg); if (fp != stdin) fclose (fp); /* Append final newline if file ended in non-newline. */ if (oldcc != keycc && keys[keycc - 1] != '\n') keys[keycc++] = '\n'; + fl_add (keys + oldcc, keycc - oldcc, optarg); break; case 'h': - with_filenames = false; - no_filenames = true; + filename_option = -1; break; case 'i': @@ -2347,14 +2620,18 @@ main (int argc, char **argv) match_icase = true; break; + case NO_IGNORE_CASE_OPTION: + match_icase = false; + break; + case 'L': /* Like -l, except list files that don't contain matches. Inspired by the same option in Hume's gre. */ - list_files = -1; + list_files = LISTFILES_NONMATCHING; break; case 'l': - list_files = 1; + list_files = LISTFILES_MATCHING; break; case 'm': @@ -2365,7 +2642,7 @@ main (int argc, char **argv) break; default: - error (EXIT_TROUBLE, 0, _("invalid max count")); + die (EXIT_TROUBLE, 0, _("invalid max count")); } break; @@ -2384,7 +2661,7 @@ main (int argc, char **argv) case 'R': fts_options = basic_fts_options | FTS_LOGICAL; - /* Fall through. */ + FALLTHROUGH; case 'r': directories = RECURSE_DIRECTORIES; last_recursive = prev_optind; @@ -2399,6 +2676,7 @@ main (int argc, char **argv) break; case 'w': + wordinit (); match_words = true; break; @@ -2422,20 +2700,23 @@ main (int argc, char **argv) else if (STREQ (optarg, "without-match")) binary_files = WITHOUT_MATCH_BINARY_FILES; else - error (EXIT_TROUBLE, 0, _("unknown binary-files type")); + die (EXIT_TROUBLE, 0, _("unknown binary-files type")); break; case COLOR_OPTION: if (optarg) { - if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes") - || !strcasecmp (optarg, "force")) + if (!c_strcasecmp (optarg, "always") + || !c_strcasecmp (optarg, "yes") + || !c_strcasecmp (optarg, "force")) color_option = 1; - else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg, "no") - || !strcasecmp (optarg, "none")) + else if (!c_strcasecmp (optarg, "never") + || !c_strcasecmp (optarg, "no") + || !c_strcasecmp (optarg, "none")) color_option = 0; - else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg, "tty") - || !strcasecmp (optarg, "if-tty")) + else if (!c_strcasecmp (optarg, "auto") + || !c_strcasecmp (optarg, "tty") + || !c_strcasecmp (optarg, "if-tty")) color_option = 2; else show_help = 1; @@ -2446,28 +2727,36 @@ main (int argc, char **argv) case EXCLUDE_OPTION: case INCLUDE_OPTION: - if (!excluded_patterns) - excluded_patterns = new_exclude (); - add_exclude (excluded_patterns, optarg, - (EXCLUDE_ANCHORED | EXCLUDE_WILDCARDS - | (opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0))); + for (int cmd = 0; cmd < 2; cmd++) + { + if (!excluded_patterns[cmd]) + excluded_patterns[cmd] = new_exclude (); + add_exclude (excluded_patterns[cmd], optarg, + ((opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0) + | exclude_options (cmd))); + } break; case EXCLUDE_FROM_OPTION: - if (!excluded_patterns) - excluded_patterns = new_exclude (); - if (add_exclude_file (add_exclude, excluded_patterns, optarg, - EXCLUDE_ANCHORED | EXCLUDE_WILDCARDS, '\n') != 0) + for (int cmd = 0; cmd < 2; cmd++) { - error (EXIT_TROUBLE, errno, "%s", optarg); + if (!excluded_patterns[cmd]) + excluded_patterns[cmd] = new_exclude (); + if (add_exclude_file (add_exclude, excluded_patterns[cmd], + optarg, exclude_options (cmd), '\n') + != 0) + die (EXIT_TROUBLE, errno, "%s", optarg); } break; case EXCLUDE_DIRECTORY_OPTION: - if (!excluded_directory_patterns) - excluded_directory_patterns = new_exclude (); strip_trailing_slashes (optarg); - add_exclude (excluded_directory_patterns, optarg, - EXCLUDE_ANCHORED | EXCLUDE_WILDCARDS); + for (int cmd = 0; cmd < 2; cmd++) + { + if (!excluded_directory_patterns[cmd]) + excluded_directory_patterns[cmd] = new_exclude (); + add_exclude (excluded_directory_patterns[cmd], optarg, + exclude_options (cmd)); + } break; case GROUP_SEPARATOR_OPTION: @@ -2492,51 +2781,18 @@ main (int argc, char **argv) } - if (color_option == 2) - color_option = isatty (STDOUT_FILENO) && should_colorize (); - init_colorize (); - - /* POSIX says that -q overrides -l, which in turn overrides the - other output options. */ - if (exit_on_match) - list_files = 0; - if (exit_on_match | list_files) - { - count_matches = false; - done_on_match = true; - } - out_quiet = count_matches | done_on_match; - - if (out_after < 0) - out_after = default_context; - if (out_before < 0) - out_before = default_context; - - if (color_option) - { - /* Legacy. */ - char *userval = getenv ("GREP_COLOR"); - if (userval != NULL && *userval != '\0') - selected_match_color = context_match_color = userval; - - /* New GREP_COLORS has priority. */ - parse_grep_colors (); - } - if (show_version) { - version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS, + version_etc (stdout, getprogname (), PACKAGE_NAME, VERSION, (char *) NULL); + puts (_("Written by Mike Haertel and others; see\n" + ".")); return EXIT_SUCCESS; } if (show_help) usage (EXIT_SUCCESS); - struct stat tmp_stat; - if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode)) - out_stat = tmp_stat; - if (keys) { if (keycc == 0) @@ -2551,63 +2807,130 @@ main (int argc, char **argv) } else if (optind < argc) { - /* A copy must be made in case of an xrealloc() or free() later. */ + /* Make a copy so that it can be reallocated or freed later. */ keycc = strlen (argv[optind]); keys = xmemdup (argv[optind++], keycc + 1); + fl_add (keys, keycc, ""); + n_patterns++; } else usage (EXIT_TROUBLE); - build_mbclen_cache (); - init_easy_encoding (); - - /* In a unibyte locale, switch from fgrep to grep if - the pattern matches words (where grep is typically faster). - In a multibyte locale, switch from fgrep to grep if either - (1) case is ignored (where grep is typically faster), or - (2) the pattern has an encoding error (where fgrep might not work). */ - if (compile == Fcompile - && (MB_CUR_MAX <= 1 - ? match_words - : match_icase || contains_encoding_error (keys, keycc))) + bool possibly_tty = false; + struct stat tmp_stat; + if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0) + { + if (S_ISREG (tmp_stat.st_mode)) + out_stat = tmp_stat; + else if (S_ISCHR (tmp_stat.st_mode)) + { + struct stat null_stat; + if (stat ("/dev/null", &null_stat) == 0 + && SAME_INODE (tmp_stat, null_stat)) + dev_null_output = true; + else + possibly_tty = true; + } + } + + /* POSIX says -c, -l and -q are mutually exclusive. In this + implementation, -q overrides -l and -L, which in turn override -c. */ + if (exit_on_match) + list_files = LISTFILES_NONE; + if ((exit_on_match | dev_null_output) || list_files != LISTFILES_NONE) + { + count_matches = false; + done_on_match = true; + } + out_quiet = count_matches | done_on_match; + + if (out_after < 0) + out_after = default_context; + if (out_before < 0) + out_before = default_context; + + /* If it is easy to see that matching cannot succeed (e.g., 'grep -f + /dev/null'), fail without reading the input. */ + if ((max_count == 0 + || (keycc == 0 && out_invert && !match_lines && !match_words)) + && list_files != LISTFILES_NONMATCHING) + return EXIT_FAILURE; + + if (color_option == 2) + color_option = possibly_tty && should_colorize () && isatty (STDOUT_FILENO); + init_colorize (); + + if (color_option) { - size_t new_keycc; - char *new_keys; - fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys); - free (keys); - keys = new_keys; - keycc = new_keycc; - matcher = "grep"; - compile = Gcompile; - execute = EGexecute; + /* Legacy. */ + char *userval = getenv ("GREP_COLOR"); + if (userval != NULL && *userval != '\0') + selected_match_color = context_match_color = userval; + + /* New GREP_COLORS has priority. */ + parse_grep_colors (); } - compile (keys, keycc); - free (keys); + initialize_unibyte_mask (); + + if (matcher < 0) + matcher = G_MATCHER_INDEX; + + /* In a single-byte locale, switch from -F to -G if it is a single + pattern that matches words, where -G is typically faster. In a + multi-byte locale, switch if the patterns have an encoding error + (where -F does not work) or if -i and the patterns will not work + for -iF. */ + if (matcher == F_MATCHER_INDEX + && (! localeinfo.multibyte + ? n_patterns == 1 && match_words + : (contains_encoding_error (keys, keycc) + || (match_icase && !fgrep_icase_available (keys, keycc))))) + { + fgrep_to_grep_pattern (&keys, &keycc); + matcher = G_MATCHER_INDEX; + } + /* With two or more patterns, if -F works then switch from either -E + or -G, as -F is probably faster then. */ + else if ((matcher == G_MATCHER_INDEX || matcher == E_MATCHER_INDEX) + && 1 < n_patterns) + matcher = try_fgrep_pattern (matcher, keys, &keycc); + + execute = matchers[matcher].execute; + compiled_pattern = matchers[matcher].compile (keys, keycc, + matchers[matcher].syntax); /* We need one byte prior and one after. */ char eolbytes[3] = { 0, eolbyte, 0 }; size_t match_size; - skip_empty_lines = ((execute (eolbytes + 1, 1, &match_size, NULL) == 0) + skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1, + &match_size, NULL) == 0) == out_invert); - if ((argc - optind > 1 && !no_filenames) || with_filenames) - out_file = 1; + int num_operands = argc - optind; + out_file = (filename_option == 0 && num_operands <= 1 + ? - (directories == RECURSE_DIRECTORIES) + : 0 <= filename_option); -#ifdef SET_BINARY - /* Output is set to binary mode because we shouldn't convert - NL to CR-LF pairs, especially when grepping binary files. */ - if (!isatty (STDOUT_FILENO)) - SET_BINARY (STDOUT_FILENO); -#endif + if (binary) + xset_binary_mode (STDOUT_FILENO, O_BINARY); - if (max_count == 0) - return EXIT_FAILURE; + /* Prefer sysconf for page size, as getpagesize typically returns int. */ +#ifdef _SC_PAGESIZE + long psize = sysconf (_SC_PAGESIZE); +#else + long psize = getpagesize (); +#endif + if (! (0 < psize && psize <= (SIZE_MAX - sizeof (uword)) / 2)) + abort (); + pagesize = psize; + bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + sizeof (uword); + buffer = xmalloc (bufalloc); if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES) devices = READ_DEVICES; char *const *files; - if (optind < argc) + if (0 < num_operands) { files = argv + optind; } @@ -2628,6 +2951,6 @@ main (int argc, char **argv) status &= grep_command_line_arg (*files++); while (*files != NULL); - /* We register via atexit() to test stdout. */ + /* We register via atexit to test stdout. */ return errseen ? EXIT_TROUBLE : status; } diff --git a/contrib/grep/src/grep.h b/contrib/grep/src/grep.h index 580eb11609..ab95e3506b 100644 --- a/contrib/grep/src/grep.h +++ b/contrib/grep/src/grep.h @@ -1,5 +1,5 @@ /* grep.h - interface to grep driver for searching subroutines. - Copyright (C) 1992, 1998, 2001, 2007, 2009-2015 Free Software Foundation, + Copyright (C) 1992, 1998, 2001, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify @@ -29,22 +29,6 @@ extern bool match_words; /* -w */ extern bool match_lines; /* -x */ extern char eolbyte; /* -z */ -/* An enum textbin describes the file's type, inferred from data read - before the first line is selected for output. */ -enum textbin - { - /* Binary, as it contains null bytes and the -z option is not in effect, - or it contains encoding errors. */ - TEXTBIN_BINARY = -1, - - /* Not known yet. Only text has been seen so far. */ - TEXTBIN_UNKNOWN = 0, - - /* Text. */ - TEXTBIN_TEXT = 1 - }; - -/* Input file type. */ -extern enum textbin input_textbin; +extern char const *pattern_file_name (size_t, size_t *); #endif diff --git a/contrib/grep/src/kwsearch.c b/contrib/grep/src/kwsearch.c index 2997ae1acb..6f6d4d058d 100644 --- a/contrib/grep/src/kwsearch.c +++ b/contrib/grep/src/kwsearch.c @@ -1,5 +1,5 @@ /* kwsearch.c - searching subroutines using kwset for grep. - Copyright 1992, 1998, 2000, 2007, 2009-2015 Free Software Foundation, Inc. + Copyright 1992, 1998, 2000, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,29 +21,45 @@ #include #include "search.h" -/* Whether -w considers WC to be a word constituent. */ -static bool -wordchar (wint_t wc) +/* A compiled -F pattern list. */ + +struct kwsearch { - return wc == L'_' || iswalnum (wc); -} + /* The kwset for this pattern list. */ + kwset_t kwset; + + /* The number of user-specified patterns. This is less than + 'kwswords (kwset)' when some extra one-character words have been + appended, one for each troublesome character that will require a + DFA search. */ + ptrdiff_t words; + + /* The user's pattern and its size in bytes. */ + char *pattern; + size_t size; + + /* The user's pattern compiled as a regular expression, + or null if it has not been compiled. */ + void *re; +}; -/* KWset compiled pattern. For Ecompile and Gcompile, we compile - a list of strings, at least one of which is known to occur in - any string matching the regexp. */ -static kwset_t kwset; +/* Compile the -F style PATTERN, containing SIZE bytes. Return a + description of the compiled pattern. */ -void -Fcompile (char const *pattern, size_t size) +void * +Fcompile (char *pattern, size_t size, reg_syntax_t ignored) { - size_t total = size; + kwset_t kwset; + ptrdiff_t total = size; + char *buf = NULL; + size_t bufalloc = 0; - kwsinit (&kwset); + kwset = kwsinit (true); char const *p = pattern; do { - size_t len; + ptrdiff_t len; char const *sep = memchr (p, '\n', total); if (sep) { @@ -57,45 +73,131 @@ Fcompile (char const *pattern, size_t size) total = 0; } - char *buf = NULL; if (match_lines) { - buf = xmalloc (len + 2); - buf[0] = eolbyte; - memcpy (buf + 1, p, len); - buf[len + 1] = eolbyte; - p = buf; + if (eolbyte == '\n' && pattern < p && sep) + p--; + else + { + if (bufalloc < len + 2) + { + free (buf); + bufalloc = len + 2; + buf = x2realloc (NULL, &bufalloc); + buf[0] = eolbyte; + } + memcpy (buf + 1, p, len); + buf[len + 1] = eolbyte; + p = buf; + } len += 2; } kwsincr (kwset, p, len); - free (buf); p = sep; } while (p); + free (buf); + ptrdiff_t words = kwswords (kwset); + + if (match_icase) + { + /* For each pattern character C that has a case folded + counterpart F that is multibyte and so cannot easily be + implemented via translating a single byte, append a pattern + containing just F. That way, if the data contains F, the + matcher can fall back on DFA. For example, if C is 'i' and + the locale is en_US.utf8, append a pattern containing just + the character U+0131 (LATIN SMALL LETTER DOTLESS I), so that + Fexecute will use a DFA if the data contain U+0131. */ + mbstate_t mbs = { 0 }; + char checked[NCHAR] = {0,}; + for (p = pattern; p < pattern + size; p++) + { + unsigned char c = *p; + if (checked[c]) + continue; + checked[c] = true; + + wint_t wc = localeinfo.sbctowc[c]; + wchar_t folded[CASE_FOLDED_BUFSIZE]; + + for (int i = case_folded_counterparts (wc, folded); 0 <= --i; ) + { + char s[MB_LEN_MAX]; + int nbytes = wcrtomb (s, folded[i], &mbs); + if (1 < nbytes) + kwsincr (kwset, s, nbytes); + } + } + } + kwsprep (kwset); + + struct kwsearch *kwsearch = xmalloc (sizeof *kwsearch); + kwsearch->kwset = kwset; + kwsearch->words = words; + kwsearch->pattern = pattern; + kwsearch->size = size; + kwsearch->re = NULL; + return kwsearch; } +/* Use the compiled pattern VCP to search the buffer BUF of size SIZE. + If found, return the offset of the first match and store its + size into *MATCH_SIZE. If not found, return SIZE_MAX. + If START_PTR is nonnull, start searching there. */ size_t -Fexecute (char const *buf, size_t size, size_t *match_size, +Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size, char const *start_ptr) { - char const *beg, *try, *end, *mb_start; - size_t len; + char const *beg, *end, *mb_start; + ptrdiff_t len; char eol = eolbyte; struct kwsmatch kwsmatch; size_t ret_val; + bool mb_check; + bool longest; + struct kwsearch *kwsearch = vcp; + kwset_t kwset = kwsearch->kwset; + size_t mbclen; + + if (match_lines) + mb_check = longest = false; + else + { + mb_check = localeinfo.multibyte & !localeinfo.using_utf8; + longest = mb_check | !!start_ptr | match_words; + } for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) { - size_t offset = kwsexec (kwset, beg - match_lines, - buf + size - beg + match_lines, &kwsmatch); - if (offset == (size_t) -1) - goto failure; + ptrdiff_t offset = kwsexec (kwset, beg - match_lines, + buf + size - beg + match_lines, &kwsmatch, + longest); + if (offset < 0) + break; len = kwsmatch.size[0] - 2 * match_lines; - if (!match_lines && MB_CUR_MAX > 1 && !using_utf8 () - && mb_goback (&mb_start, beg + offset, buf + size) != 0) + + if (kwsearch->words <= kwsmatch.index) + { + /* The data contain a multibyte character that matches + some pattern character that is a case folded counterpart. + Since the kwset code cannot handle this case, fall back + on the DFA code, which can. */ + if (! kwsearch->re) + { + fgrep_to_grep_pattern (&kwsearch->pattern, &kwsearch->size); + kwsearch->re = GEAcompile (kwsearch->pattern, kwsearch->size, + RE_SYNTAX_GREP); + } + return EGexecute (kwsearch->re, buf, size, match_size, start_ptr); + } + + mbclen = 0; + if (mb_check + && mb_goback (&mb_start, &mbclen, beg + offset, buf + size) != 0) { /* We have matched a single byte that is not at the beginning of a multibyte character. mb_goback has advanced MB_START past that @@ -114,45 +216,78 @@ Fexecute (char const *buf, size_t size, size_t *match_size, continue; } beg += offset; - if (start_ptr && !match_words) + if (!!start_ptr & !match_words) goto success_in_beg_and_len; if (match_lines) { len += start_ptr == NULL; goto success_in_beg_and_len; } - if (match_words) - for (try = beg; ; ) + if (! match_words) + goto success; + + /* We need a preceding mb_start pointer. Use the beginning of line + if there is a preceding newline. */ + if (mbclen == 0) + { + char const *nl = memrchr (mb_start, eol, beg - mb_start); + if (nl) + mb_start = nl + 1; + } + + /* Succeed if neither the preceding nor the following character is a + word constituent. If the preceding is not, yet the following + character IS a word constituent, keep trying with shorter matches. */ + if (mbclen > 0 + ? ! wordchar_next (beg - mbclen, buf + size) + : ! wordchar_prev (mb_start, beg, buf + size)) + for (;;) { - char const *bol = memrchr (buf, eol, beg - buf); - bol = bol ? bol + 1 : buf; - if (wordchar (mb_prev_wc (bol, try, buf + size))) - break; - if (wordchar (mb_next_wc (try + len, buf + size))) + if (! wordchar_next (beg + len, buf + size)) { - if (!len) - break; - offset = kwsexec (kwset, beg, --len, &kwsmatch); - if (offset == (size_t) -1) - break; - try = beg + offset; - len = kwsmatch.size[0]; + if (start_ptr) + goto success_in_beg_and_len; + else + goto success; } - else if (!start_ptr) - goto success; - else - goto success_in_beg_and_len; - } /* for (try) */ - else - goto success; + if (!start_ptr && !localeinfo.multibyte) + { + if (! kwsearch->re) + { + fgrep_to_grep_pattern (&kwsearch->pattern, &kwsearch->size); + kwsearch->re = GEAcompile (kwsearch->pattern, + kwsearch->size, + RE_SYNTAX_GREP); + } + end = memchr (beg + len, eol, (buf + size) - (beg + len)); + end = end ? end + 1 : buf + size; + if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL) + != (size_t) -1) + goto success_match_words; + beg = end - 1; + break; + } + if (!len) + break; + offset = kwsexec (kwset, beg, --len, &kwsmatch, true); + if (offset != 0) + break; + len = kwsmatch.size[0]; + } + + /* No word match was found at BEG. Skip past word constituents, + since they cannot precede the next match and not skipping + them could make things much slower. */ + beg += wordchars_size (beg, buf + size); + mb_start = beg; } /* for (beg in buf) */ - failure: return -1; success: end = memchr (beg + len, eol, (buf + size) - (beg + len)); end = end ? end + 1 : buf + size; + success_match_words: beg = memrchr (buf, eol, beg - buf); beg = beg ? beg + 1 : buf; len = end - beg; diff --git a/contrib/grep/src/kwset.c b/contrib/grep/src/kwset.c index ad7a89ebd8..403af7e8d9 100644 --- a/contrib/grep/src/kwset.c +++ b/contrib/grep/src/kwset.c @@ -1,5 +1,5 @@ /* kwset.c - search for any of a set of keywords. - Copyright (C) 1989, 1998, 2000, 2005, 2007, 2009-2015 Free Software + Copyright (C) 1989, 1998, 2000, 2005, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify @@ -17,100 +17,112 @@ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ -/* Written August 1989 by Mike Haertel. - The author may be reached (Email) at the address mike@ai.mit.edu, - or (US mail) as Mike Haertel c/o Free Software Foundation. */ +/* Written August 1989 by Mike Haertel. */ -/* The algorithm implemented by these routines bears a startling resemblance - to one discovered by Beate Commentz-Walter, although it is not identical. - See: Commentz-Walter B. A string matching algorithm fast on the average. - Lecture Notes in Computer Science 71 (1979), 118-32 - . - See also: Aho AV, Corasick MJ. Efficient string matching: an aid to +/* For the Aho-Corasick algorithm, see: + Aho AV, Corasick MJ. Efficient string matching: an aid to bibliographic search. CACM 18, 6 (1975), 333-40 - , which describes the - failure function used below. */ + , which describes the + failure function used below. + + For the Boyer-Moore algorithm, see: Boyer RS, Moore JS. + A fast string searching algorithm. CACM 20, 10 (1977), 762-72 + . + + For a survey of more-recent string matching algorithms that might + help improve performance, see: Faro S, Lecroq T. The exact online + string matching problem: a review of the most recent results. + ACM Computing Surveys 45, 2 (2013), 13 + . */ #include #include "kwset.h" -#include #include #include #include "system.h" +#include "intprops.h" #include "memchr2.h" #include "obstack.h" #include "xalloc.h" +#include "verify.h" -#define link kwset_link - -#ifdef GREP -# include "xalloc.h" -# undef malloc -# define malloc xmalloc -#endif - -#define NCHAR (UCHAR_MAX + 1) -#define obstack_chunk_alloc malloc +#define obstack_chunk_alloc xmalloc #define obstack_chunk_free free -#define U(c) (to_uchar (c)) +static unsigned char +U (char ch) +{ + return to_uchar (ch); +} -/* Balanced tree of edges and labels leaving a given trie node. */ +/* Balanced tree of edges and labels leaving a given trie node. */ struct tree { - struct tree *llink; /* Left link; MUST be first field. */ - struct tree *rlink; /* Right link (to larger labels). */ - struct trie *trie; /* Trie node pointed to by this edge. */ - unsigned char label; /* Label on this edge. */ - char balance; /* Difference in depths of subtrees. */ + struct tree *llink; /* Left link; MUST be first field. */ + struct tree *rlink; /* Right link (to larger labels). */ + struct trie *trie; /* Trie node pointed to by this edge. */ + unsigned char label; /* Label on this edge. */ + char balance; /* Difference in depths of subtrees. */ }; -/* Node of a trie representing a set of reversed keywords. */ +/* Node of a trie representing a set of keywords. */ struct trie { - size_t accepting; /* Word index of accepted word, or zero. */ - struct tree *links; /* Tree of edges leaving this node. */ - struct trie *parent; /* Parent of this node. */ - struct trie *next; /* List of all trie nodes in level order. */ - struct trie *fail; /* Aho-Corasick failure function. */ - int depth; /* Depth of this node from the root. */ - int shift; /* Shift function for search failures. */ - int maxshift; /* Max shift of self and descendants. */ + /* If an accepting node, this is either 2*W + 1 where W is the word + index, or is SIZE_MAX if Aho-Corasick is in use and FAIL + specifies where to look for more info. If not an accepting node, + this is zero. */ + size_t accepting; + + struct tree *links; /* Tree of edges leaving this node. */ + struct trie *parent; /* Parent of this node. */ + struct trie *next; /* List of all trie nodes in level order. */ + struct trie *fail; /* Aho-Corasick failure function. */ + ptrdiff_t depth; /* Depth of this node from the root. */ + ptrdiff_t shift; /* Shift function for search failures. */ + ptrdiff_t maxshift; /* Max shift of self and descendants. */ }; -/* Structure returned opaquely to the caller, containing everything. */ +/* Structure returned opaquely to the caller, containing everything. */ struct kwset { - struct obstack obstack; /* Obstack for node allocation. */ - ptrdiff_t words; /* Number of words in the trie. */ - struct trie *trie; /* The trie itself. */ - int mind; /* Minimum depth of an accepting node. */ - int maxd; /* Maximum depth of any node. */ - unsigned char delta[NCHAR]; /* Delta table for rapid search. */ - struct trie *next[NCHAR]; /* Table of children of the root. */ - char *target; /* Target string if there's only one. */ - int *shift; /* Used in Boyer-Moore search for one string. */ - char const *trans; /* Character translation table. */ - - /* If there's only one string, this is the string's last byte, - translated via TRANS if TRANS is nonnull. */ - char gc1; - - /* Likewise for the string's penultimate byte, if it has two or more - bytes. */ + struct obstack obstack; /* Obstack for node allocation. */ + ptrdiff_t words; /* Number of words in the trie. */ + struct trie *trie; /* The trie itself. */ + ptrdiff_t mind; /* Minimum depth of an accepting node. */ + ptrdiff_t maxd; /* Maximum depth of any node. */ + unsigned char delta[NCHAR]; /* Delta table for rapid search. */ + struct trie *next[NCHAR]; /* Table of children of the root. */ + char *target; /* Target string if there's only one. */ + ptrdiff_t *shift; /* Used in Boyer-Moore search for one + string. */ + char const *trans; /* Character translation table. */ + + /* This helps to match a terminal byte, which is the first byte + for Aho-Corasick, and the last byte for Boyer-More. If all the + patterns have the same terminal byte (after translation via TRANS + if TRANS is nonnull), then this is that byte as an unsigned char. + Otherwise this is -1 if there is disagreement among the strings + about terminal bytes, and -2 if there are no terminal bytes and + no disagreement because all the patterns are empty. */ + int gc1; + + /* This helps to match a terminal byte. If 0 <= GC1HELP, B is + terminal when B == GC1 || B == GC1HELP (note that GC1 == GCHELP + is common here). This is typically faster than evaluating + to_uchar (TRANS[B]) == GC1. */ + int gc1help; + + /* If the string has two or more bytes, this is the penultimate byte, + after translation via TRANS if TRANS is nonnull. This variable + is used only by Boyer-Moore. */ char gc2; - /* If there's only one string, this helps to match the string's last byte. - If GC1HELP is negative, only GC1 matches the string's last byte; - otherwise at least two bytes match, and B matches if TRANS[B] == GC1. - If GC1HELP is in the range 0..(NCHAR - 1), there are exactly two - such matches, and GC1HELP is the other match after conversion to - unsigned char. If GC1HELP is at least NCHAR, there are three or - more such matches; e.g., Greek has three sigma characters that - all match when case-folding. */ - int gc1help; + /* kwsexec implementation. */ + ptrdiff_t (*kwsexec) (kwset_t, char const *, ptrdiff_t, + struct kwsmatch *, bool); }; /* Use TRANS to transliterate C. A null TRANS does no transliteration. */ @@ -120,8 +132,14 @@ tr (char const *trans, char c) return trans ? trans[U(c)] : c; } -/* Allocate and initialize a keyword set object, returning an opaque - pointer to it. */ +static ptrdiff_t acexec (kwset_t, char const *, ptrdiff_t, + struct kwsmatch *, bool); +static ptrdiff_t bmexec (kwset_t, char const *, ptrdiff_t, + struct kwsmatch *, bool); + +/* Return a newly allocated keyword set. A nonnull TRANS specifies a + table of character translations to be applied to all pattern and + search text. */ kwset_t kwsalloc (char const *trans) { @@ -137,79 +155,83 @@ kwsalloc (char const *trans) kwset->trie->fail = NULL; kwset->trie->depth = 0; kwset->trie->shift = 0; - kwset->mind = INT_MAX; + kwset->mind = PTRDIFF_MAX; kwset->maxd = -1; kwset->target = NULL; kwset->trans = trans; + kwset->kwsexec = acexec; return kwset; } /* This upper bound is valid for CHAR_BIT >= 4 and - exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */ -#define DEPTH_SIZE (CHAR_BIT + CHAR_BIT/2) + exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */ +enum { DEPTH_SIZE = CHAR_BIT + CHAR_BIT / 2 }; /* Add the given string to the contents of the keyword set. */ void -kwsincr (kwset_t kwset, char const *text, size_t len) +kwsincr (kwset_t kwset, char const *text, ptrdiff_t len) { + assume (0 <= len); struct trie *trie = kwset->trie; char const *trans = kwset->trans; + bool reverse = kwset->kwsexec == bmexec; - text += len; + if (reverse) + text += len; - /* Descend the trie (built of reversed keywords) character-by-character, - installing new nodes when necessary. */ + /* Descend the trie (built of keywords) character-by-character, + installing new nodes when necessary. */ while (len--) { - unsigned char uc = *--text; + unsigned char uc = reverse ? *--text : *text++; unsigned char label = trans ? trans[uc] : uc; /* Descend the tree of outgoing links for this trie node, looking for the current character and keeping track - of the path followed. */ - struct tree *link = trie->links; + of the path followed. */ + struct tree *cur = trie->links; struct tree *links[DEPTH_SIZE]; enum { L, R } dirs[DEPTH_SIZE]; links[0] = (struct tree *) &trie->links; dirs[0] = L; - int depth = 1; + ptrdiff_t depth = 1; - while (link && label != link->label) + while (cur && label != cur->label) { - links[depth] = link; - if (label < link->label) - dirs[depth++] = L, link = link->llink; + links[depth] = cur; + if (label < cur->label) + dirs[depth++] = L, cur = cur->llink; else - dirs[depth++] = R, link = link->rlink; + dirs[depth++] = R, cur = cur->rlink; } /* The current character doesn't have an outgoing link at this trie node, so build a new trie node and install - a link in the current trie node's tree. */ - if (!link) + a link in the current trie node's tree. */ + if (!cur) { - link = obstack_alloc (&kwset->obstack, sizeof *link); - link->llink = NULL; - link->rlink = NULL; - link->trie = obstack_alloc (&kwset->obstack, sizeof *link->trie); - link->trie->accepting = 0; - link->trie->links = NULL; - link->trie->parent = trie; - link->trie->next = NULL; - link->trie->fail = NULL; - link->trie->depth = trie->depth + 1; - link->trie->shift = 0; - link->label = label; - link->balance = 0; - - /* Install the new tree node in its parent. */ + cur = obstack_alloc (&kwset->obstack, sizeof *cur); + cur->llink = NULL; + cur->rlink = NULL; + cur->trie = obstack_alloc (&kwset->obstack, sizeof *cur->trie); + cur->trie->accepting = 0; + cur->trie->links = NULL; + cur->trie->parent = trie; + cur->trie->next = NULL; + cur->trie->fail = NULL; + cur->trie->depth = trie->depth + 1; + cur->trie->shift = 0; + cur->label = label; + cur->balance = 0; + + /* Install the new tree node in its parent. */ if (dirs[--depth] == L) - links[depth]->llink = link; + links[depth]->llink = cur; else - links[depth]->rlink = link; + links[depth]->rlink = cur; - /* Back up the tree fixing the balance flags. */ + /* Back up the tree fixing the balance flags. */ while (depth && !links[depth]->balance) { if (dirs[depth] == L) @@ -219,7 +241,7 @@ kwsincr (kwset_t kwset, char const *text, size_t len) --depth; } - /* Rebalance the tree by pointer rotations if necessary. */ + /* Rebalance the tree by pointer rotations if necessary. */ if (depth && ((dirs[depth] == L && --links[depth]->balance) || (dirs[depth] == R && ++links[depth]->balance))) { @@ -278,62 +300,73 @@ kwsincr (kwset_t kwset, char const *text, size_t len) } } - trie = link->trie; + trie = cur->trie; } - /* Mark the node we finally reached as accepting, encoding the - index number of this word in the keyword set so far. */ + /* Mark the node finally reached as accepting, encoding the + index number of this word in the keyword set so far. */ if (!trie->accepting) - trie->accepting = 1 + 2 * kwset->words; + { + size_t words = kwset->words; + trie->accepting = 2 * words + 1; + } ++kwset->words; - /* Keep track of the longest and shortest string of the keyword set. */ + /* Keep track of the longest and shortest string of the keyword set. */ if (trie->depth < kwset->mind) kwset->mind = trie->depth; if (trie->depth > kwset->maxd) kwset->maxd = trie->depth; } +ptrdiff_t +kwswords (kwset_t kwset) +{ + return kwset->words; +} + /* Enqueue the trie nodes referenced from the given tree in the - given queue. */ + given queue. */ static void enqueue (struct tree *tree, struct trie **last) { if (!tree) return; - enqueue(tree->llink, last); - enqueue(tree->rlink, last); + enqueue (tree->llink, last); + enqueue (tree->rlink, last); (*last) = (*last)->next = tree->trie; } /* Compute the Aho-Corasick failure function for the trie nodes referenced from the given tree, given the failure function for their parent as - well as a last resort failure node. */ + well as a last resort failure node. */ static void treefails (struct tree const *tree, struct trie const *fail, - struct trie *recourse) + struct trie *recourse, bool reverse) { - struct tree *link; + struct tree *cur; if (!tree) return; - treefails(tree->llink, fail, recourse); - treefails(tree->rlink, fail, recourse); + treefails (tree->llink, fail, recourse, reverse); + treefails (tree->rlink, fail, recourse, reverse); /* Find, in the chain of fails going back to the root, the first - node that has a descendant on the current label. */ + node that has a descendant on the current label. */ while (fail) { - link = fail->links; - while (link && tree->label != link->label) - if (tree->label < link->label) - link = link->llink; + cur = fail->links; + while (cur && tree->label != cur->label) + if (tree->label < cur->label) + cur = cur->llink; else - link = link->rlink; - if (link) + cur = cur->rlink; + if (cur) { - tree->trie->fail = link->trie; + tree->trie->fail = cur->trie; + if (!reverse && cur->trie->accepting && !tree->trie->accepting) + tree->trie->accepting = SIZE_MAX; return; } fail = fail->fail; @@ -343,30 +376,28 @@ treefails (struct tree const *tree, struct trie const *fail, } /* Set delta entries for the links of the given tree such that - the preexisting delta value is larger than the current depth. */ + the preexisting delta value is larger than the current depth. */ static void -treedelta (struct tree const *tree, - unsigned int depth, - unsigned char delta[]) +treedelta (struct tree const *tree, ptrdiff_t depth, unsigned char delta[]) { if (!tree) return; - treedelta(tree->llink, depth, delta); - treedelta(tree->rlink, depth, delta); + treedelta (tree->llink, depth, delta); + treedelta (tree->rlink, depth, delta); if (depth < delta[tree->label]) delta[tree->label] = depth; } -/* Return true if A has every label in B. */ -static int _GL_ATTRIBUTE_PURE +/* Return true if A has every label in B. */ +static bool _GL_ATTRIBUTE_PURE hasevery (struct tree const *a, struct tree const *b) { if (!b) - return 1; - if (!hasevery(a, b->llink)) - return 0; - if (!hasevery(a, b->rlink)) - return 0; + return true; + if (!hasevery (a, b->llink)) + return false; + if (!hasevery (a, b->rlink)) + return false; while (a && b->label != a->label) if (b->label < a->label) a = a->llink; @@ -376,104 +407,159 @@ hasevery (struct tree const *a, struct tree const *b) } /* Compute a vector, indexed by character code, of the trie nodes - referenced from the given tree. */ + referenced from the given tree. */ static void treenext (struct tree const *tree, struct trie *next[]) { if (!tree) return; - treenext(tree->llink, next); - treenext(tree->rlink, next); + treenext (tree->llink, next); + treenext (tree->rlink, next); next[tree->label] = tree->trie; } -/* Compute the shift for each trie node, as well as the delta - table and next cache for the given keyword set. */ +/* Prepare a built keyword set for use. */ void kwsprep (kwset_t kwset) { char const *trans = kwset->trans; - int i; + ptrdiff_t i; unsigned char deltabuf[NCHAR]; unsigned char *delta = trans ? deltabuf : kwset->delta; + struct trie *curr, *last; + + /* Use Boyer-Moore if just one pattern, Aho-Corasick otherwise. */ + bool reverse = kwset->words == 1; + + if (reverse) + { + kwset_t new_kwset; + + /* Enqueue the immediate descendants in the level order queue. */ + for (curr = last = kwset->trie; curr; curr = curr->next) + enqueue (curr->links, &last); + + /* Looking for just one string. Extract it from the trie. */ + kwset->target = obstack_alloc (&kwset->obstack, kwset->mind); + for (i = 0, curr = kwset->trie; i < kwset->mind; ++i) + { + kwset->target[i] = curr->links->label; + curr = curr->next; + } + + new_kwset = kwsalloc (kwset->trans); + new_kwset->kwsexec = bmexec; + kwsincr (new_kwset, kwset->target, kwset->mind); + obstack_free (&kwset->obstack, NULL); + *kwset = *new_kwset; + free (new_kwset); + } /* Initial values for the delta table; will be changed later. The delta entry for a given character is the smallest depth of any - node at which an outgoing edge is labeled by that character. */ + node at which an outgoing edge is labeled by that character. */ memset (delta, MIN (kwset->mind, UCHAR_MAX), sizeof deltabuf); /* Traverse the nodes of the trie in level order, simultaneously computing the delta table, failure function, and shift function. */ - struct trie *curr, *last; for (curr = last = kwset->trie; curr; curr = curr->next) { /* Enqueue the immediate descendants in the level order queue. */ enqueue (curr->links, &last); - curr->shift = kwset->mind; - curr->maxshift = kwset->mind; - /* Update the delta table for the descendants of this node. */ treedelta (curr->links, curr->depth, delta); /* Compute the failure function for the descendants of this node. */ - treefails (curr->links, curr->fail, kwset->trie); + treefails (curr->links, curr->fail, kwset->trie, reverse); - /* Update the shifts at each node in the current node's chain - of fails back to the root. */ - struct trie *fail; - for (fail = curr->fail; fail; fail = fail->fail) + if (reverse) { - /* If the current node has some outgoing edge that the fail - doesn't, then the shift at the fail should be no larger - than the difference of their depths. */ - if (!hasevery (fail->links, curr->links)) - if (curr->depth - fail->depth < fail->shift) - fail->shift = curr->depth - fail->depth; - - /* If the current node is accepting then the shift at the - fail and its descendants should be no larger than the - difference of their depths. */ - if (curr->accepting && fail->maxshift > curr->depth - fail->depth) - fail->maxshift = curr->depth - fail->depth; + curr->shift = kwset->mind; + curr->maxshift = kwset->mind; + + /* Update the shifts at each node in the current node's chain + of fails back to the root. */ + struct trie *fail; + for (fail = curr->fail; fail; fail = fail->fail) + { + /* If the current node has some outgoing edge that the fail + doesn't, then the shift at the fail should be no larger + than the difference of their depths. */ + if (!hasevery (fail->links, curr->links)) + if (curr->depth - fail->depth < fail->shift) + fail->shift = curr->depth - fail->depth; + + /* If the current node is accepting then the shift at the + fail and its descendants should be no larger than the + difference of their depths. */ + if (curr->accepting && fail->maxshift > curr->depth - fail->depth) + fail->maxshift = curr->depth - fail->depth; + } } } - /* Traverse the trie in level order again, fixing up all nodes whose - shift exceeds their inherited maxshift. */ - for (curr = kwset->trie->next; curr; curr = curr->next) + if (reverse) { - if (curr->maxshift > curr->parent->maxshift) - curr->maxshift = curr->parent->maxshift; - if (curr->shift > curr->maxshift) - curr->shift = curr->maxshift; + /* Traverse the trie in level order again, fixing up all nodes whose + shift exceeds their inherited maxshift. */ + for (curr = kwset->trie->next; curr; curr = curr->next) + { + if (curr->maxshift > curr->parent->maxshift) + curr->maxshift = curr->parent->maxshift; + if (curr->shift > curr->maxshift) + curr->shift = curr->maxshift; + } } /* Create a vector, indexed by character code, of the outgoing links - from the root node. */ + from the root node. Accumulate GC1 and GC1HELP. */ struct trie *nextbuf[NCHAR]; struct trie **next = trans ? nextbuf : kwset->next; memset (next, 0, sizeof nextbuf); treenext (kwset->trie->links, next); - if (trans) - for (i = 0; i < NCHAR; ++i) - kwset->next[i] = next[U(trans[i])]; + int gc1 = -2; + int gc1help = -1; + for (i = 0; i < NCHAR; i++) + { + int ti = i; + if (trans) + { + ti = U(trans[i]); + kwset->next[i] = next[ti]; + } + if (kwset->next[i]) + { + if (gc1 < -1) + { + gc1 = ti; + gc1help = i; + } + else if (gc1 == ti) + gc1help = gc1help == ti ? i : -1; + else if (i == ti && gc1 == gc1help) + gc1help = i; + else + gc1 = -1; + } + } + kwset->gc1 = gc1; + kwset->gc1help = gc1help; - /* Check if we can use the simple boyer-moore algorithm, instead - of the hairy commentz-walter algorithm. */ - if (kwset->words == 1) + if (reverse) { - /* Looking for just one string. Extract it from the trie. */ + /* Looking for just one string. Extract it from the trie. */ kwset->target = obstack_alloc (&kwset->obstack, kwset->mind); for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i) { kwset->target[i] = curr->links->label; curr = curr->next; } - /* Looking for the delta2 shift that we might make after a - backwards match has failed. Extract it from the trie. */ + if (kwset->mind > 1) { + /* Looking for the delta2 shift that might be made after a + backwards match has failed. Extract it from the trie. */ kwset->shift = obstack_alloc (&kwset->obstack, sizeof *kwset->shift * (kwset->mind - 1)); @@ -482,31 +568,13 @@ kwsprep (kwset_t kwset) kwset->shift[i] = curr->shift; curr = curr->next; } - } - char gc1 = tr (trans, kwset->target[kwset->mind - 1]); - - /* Set GC1HELP according to whether exactly one, exactly two, or - three-or-more characters match GC1. */ - int gc1help = -1; - if (trans) - { - char const *equiv1 = memchr (trans, gc1, NCHAR); - char const *equiv2 = memchr (equiv1 + 1, gc1, - trans + NCHAR - (equiv1 + 1)); - if (equiv2) - gc1help = (memchr (equiv2 + 1, gc1, trans + NCHAR - (equiv2 + 1)) - ? NCHAR - : U(gc1) ^ (equiv1 - trans) ^ (equiv2 - trans)); + /* The penultimate byte. */ + kwset->gc2 = tr (trans, kwset->target[kwset->mind - 2]); } - - kwset->gc1 = gc1; - kwset->gc1help = gc1help; - if (kwset->mind > 1) - kwset->gc2 = tr (trans, kwset->target[kwset->mind - 2]); } - /* Fix things up for any translation table. */ + /* Fix things up for any translation table. */ if (trans) for (i = 0; i < NCHAR; ++i) kwset->delta[i] = delta[U(trans[i])]; @@ -521,16 +589,17 @@ kwsprep (kwset_t kwset) efficiency. If D1 is nonnull, it is a delta1 table for shifting *TP when failing. KWSET->shift says how much to shift. */ static inline bool -bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len, +bm_delta2_search (char const **tpp, char const *ep, char const *sp, + ptrdiff_t len, char const *trans, char gc1, char gc2, unsigned char const *d1, kwset_t kwset) { char const *tp = *tpp; - int d = len, skip = 0; + ptrdiff_t d = len, skip = 0; while (true) { - int i = 2; + ptrdiff_t i = 2; if (tr (trans, tp[-2]) == gc2) { while (++i <= d) @@ -566,32 +635,43 @@ bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len, } /* Return the address of the first byte in the buffer S (of size N) - that matches the last byte specified by KWSET, a singleton. */ + that matches the terminal byte specified by KWSET, or NULL if there + is no match. KWSET->gc1 should be nonnegative. */ static char const * -memchr_kwset (char const *s, size_t n, kwset_t kwset) +memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset) { + char const *slim = s + n; if (kwset->gc1help < 0) - return memchr (s, kwset->gc1, n); - int small_heuristic = 2; - int small = (- (uintptr_t) s % sizeof (long) - + small_heuristic * sizeof (long)); - size_t ntrans = kwset->gc1help < NCHAR && small < n ? small : n; - char const *slim = s + ntrans; - for (; s < slim; s++) - if (kwset->trans[U(*s)] == kwset->gc1) - return s; - n -= ntrans; - return n == 0 ? NULL : memchr2 (s, kwset->gc1, kwset->gc1help, n); + { + for (; s < slim; s++) + if (kwset->next[U(*s)]) + return s; + } + else + { + int small_heuristic = 2; + size_t small_bytes = small_heuristic * sizeof (unsigned long int); + while (s < slim) + { + if (kwset->next[U(*s)]) + return s; + s++; + if ((uintptr_t) s % small_bytes == 0) + return memchr2 (s, kwset->gc1, kwset->gc1help, slim - s); + } + } + return NULL; } /* Fast Boyer-Moore search (inlinable version). */ -static inline size_t _GL_ATTRIBUTE_PURE -bmexec_trans (kwset_t kwset, char const *text, size_t size) +static inline ptrdiff_t _GL_ATTRIBUTE_PURE +bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t size) { + assume (0 <= size); unsigned char const *d1; char const *ep, *sp, *tp; int d; - int len = kwset->mind; + ptrdiff_t len = kwset->mind; char const *trans = kwset->trans; if (len == 0) @@ -610,9 +690,10 @@ bmexec_trans (kwset_t kwset, char const *text, size_t size) char gc1 = kwset->gc1; char gc2 = kwset->gc2; - /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */ - if (size > 12 * len) - /* 11 is not a bug, the initial offset happens only once. */ + /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */ + ptrdiff_t len12; + if (!INT_MULTIPLY_WRAPV (len, 12, &len12) && len12 < size) + /* 11 is not a bug, the initial offset happens only once. */ for (ep = text + size - 11 * len; tp <= ep; ) { char const *tp0 = tp; @@ -652,8 +733,8 @@ bmexec_trans (kwset_t kwset, char const *text, size_t size) return tp - text; } - /* Now we have only a few characters left to search. We - carefully avoid ever producing an out-of-bounds pointer. */ + /* Now only a few characters are left to search. Carefully avoid + ever producing an out-of-bounds pointer. */ ep = text + size; d = d1[U(tp[-1])]; while (d <= ep - tp) @@ -669,197 +750,181 @@ bmexec_trans (kwset_t kwset, char const *text, size_t size) } /* Fast Boyer-Moore search. */ -static size_t -bmexec (kwset_t kwset, char const *text, size_t size) +static ptrdiff_t +bmexec (kwset_t kwset, char const *text, ptrdiff_t size, + struct kwsmatch *kwsmatch, bool longest) { - /* Help the compiler inline bmexec_trans in two ways, depending on - whether kwset->trans is null. */ - return (kwset->trans - ? bmexec_trans (kwset, text, size) - : bmexec_trans (kwset, text, size)); + /* Help the compiler inline in two ways, depending on whether + kwset->trans is null. */ + ptrdiff_t ret = (IGNORE_DUPLICATE_BRANCH_WARNING + (kwset->trans + ? bmexec_trans (kwset, text, size) + : bmexec_trans (kwset, text, size))); + if (0 <= ret) + { + kwsmatch->index = 0; + kwsmatch->offset[0] = ret; + kwsmatch->size[0] = kwset->mind; + } + + return ret; } -/* Hairy multiple string search. */ -static size_t _GL_ARG_NONNULL ((4)) -cwexec (kwset_t kwset, char const *text, size_t len, struct kwsmatch *kwsmatch) +/* Hairy multiple string search with the Aho-Corasick algorithm. + (inlinable version) */ +static inline ptrdiff_t +acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len, + struct kwsmatch *kwsmatch, bool longest) { - struct trie * const *next; - struct trie const *trie; - struct trie const *accept; - char const *beg, *lim, *mch, *lmch; - unsigned char c; - unsigned char const *delta; - int d; - char const *end, *qlim; + struct trie const *trie, *accept; + char const *tp, *left, *lim; struct tree const *tree; char const *trans; -#ifdef lint - accept = NULL; -#endif - - /* Initialize register copies and look for easy ways out. */ + /* Initialize register copies and look for easy ways out. */ if (len < kwset->mind) return -1; - next = kwset->next; - delta = kwset->delta; trans = kwset->trans; + trie = kwset->trie; lim = text + len; - end = text; - if ((d = kwset->mind) != 0) - mch = NULL; - else - { - mch = text, accept = kwset->trie; - goto match; - } - - if (len >= 4 * kwset->mind) - qlim = lim - 4 * kwset->mind; - else - qlim = NULL; + tp = text; - while (lim - end >= d) + if (!trie->accepting) { - if (qlim && end <= qlim) + unsigned char c; + int gc1 = kwset->gc1; + + while (true) { - end += d - 1; - while ((d = delta[c = *end]) && end < qlim) + if (gc1 < 0) { - end += d; - end += delta[U(*end)]; - end += delta[U(*end)]; + while (! (trie = kwset->next[c = tr (trans, *tp++)])) + if (tp >= lim) + return -1; } - ++end; - } - else - d = delta[c = (end += d)[-1]]; - if (d) - continue; - beg = end - 1; - trie = next[c]; - if (trie->accepting) - { - mch = beg; - accept = trie; - } - d = trie->shift; - while (beg > text) - { - unsigned char uc = *--beg; - c = trans ? trans[uc] : uc; - tree = trie->links; - while (tree && c != tree->label) - if (c < tree->label) - tree = tree->llink; - else - tree = tree->rlink; - if (tree) + else + { + tp = memchr_kwset (tp, lim - tp, kwset); + if (!tp) + return -1; + c = tr (trans, *tp++); + trie = kwset->next[c]; + } + + while (true) { - trie = tree->trie; if (trie->accepting) + goto match; + if (tp >= lim) + return -1; + c = tr (trans, *tp++); + + for (tree = trie->links; c != tree->label; ) { - mch = beg; - accept = trie; + tree = c < tree->label ? tree->llink : tree->rlink; + if (! tree) + { + trie = trie->fail; + if (!trie) + { + trie = kwset->next[c]; + if (trie) + goto have_trie; + if (tp >= lim) + return -1; + goto next_c; + } + if (trie->accepting) + { + --tp; + goto match; + } + tree = trie->links; + } } + trie = tree->trie; + have_trie:; } - else - break; - d = trie->shift; + next_c:; } - if (mch) - goto match; } - return -1; match: - /* Given a known match, find the longest possible match anchored - at or before its starting point. This is nearly a verbatim - copy of the preceding main search loops. */ - if (lim - mch > kwset->maxd) - lim = mch + kwset->maxd; - lmch = 0; - d = 1; - while (lim - end >= d) + accept = trie; + while (accept->accepting == SIZE_MAX) + accept = accept->fail; + left = tp - accept->depth; + + /* Try left-most longest match. */ + if (longest) { - if ((d = delta[c = (end += d)[-1]]) != 0) - continue; - beg = end - 1; - if (!(trie = next[c])) + while (tp < lim) { - d = 1; - continue; - } - if (trie->accepting && beg <= mch) - { - lmch = beg; - accept = trie; - } - d = trie->shift; - while (beg > text) - { - unsigned char uc = *--beg; - c = trans ? trans[uc] : uc; - tree = trie->links; - while (tree && c != tree->label) - if (c < tree->label) - tree = tree->llink; - else - tree = tree->rlink; - if (tree) + struct trie const *accept1; + char const *left1; + unsigned char c = tr (trans, *tp++); + + do { - trie = tree->trie; - if (trie->accepting && beg <= mch) + tree = trie->links; + while (tree && c != tree->label) + tree = c < tree->label ? tree->llink : tree->rlink; + } + while (!tree && (trie = trie->fail) && accept->depth <= trie->depth); + + if (!tree) + break; + trie = tree->trie; + if (trie->accepting) + { + accept1 = trie; + while (accept1->accepting == SIZE_MAX) + accept1 = accept1->fail; + left1 = tp - accept1->depth; + if (left1 <= left) { - lmch = beg; - accept = trie; + left = left1; + accept = accept1; } } - else - break; - d = trie->shift; } - if (lmch) - { - mch = lmch; - goto match; - } - if (!d) - d = 1; } kwsmatch->index = accept->accepting / 2; - kwsmatch->offset[0] = mch - text; + kwsmatch->offset[0] = left - text; kwsmatch->size[0] = accept->depth; - return mch - text; + return left - text; +} + +/* Hairy multiple string search with Aho-Corasick algorithm. */ +static ptrdiff_t +acexec (kwset_t kwset, char const *text, ptrdiff_t size, + struct kwsmatch *kwsmatch, bool longest) +{ + assume (0 <= size); + /* Help the compiler inline in two ways, depending on whether + kwset->trans is null. */ + return (IGNORE_DUPLICATE_BRANCH_WARNING + (kwset->trans + ? acexec_trans (kwset, text, size, kwsmatch, longest) + : acexec_trans (kwset, text, size, kwsmatch, longest))); } -/* Search TEXT for a match of any member of KWSET. +/* Find the first instance of a KWSET member in TEXT, which has SIZE bytes. Return the offset (into TEXT) of the first byte of the matching substring, - or (size_t) -1 if no match is found. Upon a match, store details in + or -1 if no match is found. Upon a match, store details in *KWSMATCH: index of matched keyword, start offset (same as the return - value), and length. */ -size_t -kwsexec (kwset_t kwset, char const *text, size_t size, - struct kwsmatch *kwsmatch) + value), and length. If LONGEST, find the longest match; otherwise + any match will do. */ +ptrdiff_t +kwsexec (kwset_t kwset, char const *text, ptrdiff_t size, + struct kwsmatch *kwsmatch, bool longest) { - if (kwset->words == 1) - { - size_t ret = bmexec (kwset, text, size); - if (ret != (size_t) -1) - { - kwsmatch->index = 0; - kwsmatch->offset[0] = ret; - kwsmatch->size[0] = kwset->mind; - } - return ret; - } - else - return cwexec (kwset, text, size, kwsmatch); + return kwset->kwsexec (kwset, text, size, kwsmatch, longest); } -/* Free the components of the given keyword set. */ +/* Free the components of the given keyword set. */ void kwsfree (kwset_t kwset) { diff --git a/contrib/grep/src/kwset.h b/contrib/grep/src/kwset.h index b449847c76..793e2103b0 100644 --- a/contrib/grep/src/kwset.h +++ b/contrib/grep/src/kwset.h @@ -1,5 +1,5 @@ /* kwset.h - header declaring the keyword set library. - Copyright (C) 1989, 1998, 2005, 2007, 2009-2015 Free Software Foundation, + Copyright (C) 1989, 1998, 2005, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify @@ -17,17 +17,16 @@ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ -/* Written August 1989 by Mike Haertel. - The author may be reached (Email) at the address mike@ai.mit.edu, - or (US mail) as Mike Haertel c/o Free Software Foundation. */ +/* Written August 1989 by Mike Haertel. */ #include +#include struct kwsmatch { - size_t index; /* Index number of matching keyword. */ - size_t offset[1]; /* Offset of each submatch. */ - size_t size[1]; /* Length of each submatch. */ + ptrdiff_t index; /* Index number of matching keyword. */ + ptrdiff_t offset[1]; /* Offset of match. */ + ptrdiff_t size[1]; /* Length of match. */ }; #include "arg-nonnull.h" @@ -35,26 +34,11 @@ struct kwsmatch struct kwset; typedef struct kwset *kwset_t; -/* Return an opaque pointer to a newly allocated keyword set. A nonnull arg - specifies a table of character translations to be applied to all - pattern and search text. */ extern kwset_t kwsalloc (char const *); - -/* Incrementally extend the keyword set to include the given string. - Remember an index number for each keyword included in the set. */ -extern void kwsincr (kwset_t, char const *, size_t); - -/* When the keyword set has been completely built, prepare it for use. */ +extern void kwsincr (kwset_t, char const *, ptrdiff_t); +extern ptrdiff_t kwswords (kwset_t) _GL_ATTRIBUTE_PURE; extern void kwsprep (kwset_t); - -/* Search through the given buffer for a member of the keyword set. - Return a pointer to the leftmost longest match found, or NULL if - no match is found. If foundlen is non-NULL, store the length of - the matching substring in the integer it points to. Similarly, - if foundindex is non-NULL, store the index of the particular - keyword found therein. */ -extern size_t kwsexec (kwset_t, char const *, size_t, struct kwsmatch *) +extern ptrdiff_t kwsexec (kwset_t, char const *, ptrdiff_t, + struct kwsmatch *, bool) _GL_ARG_NONNULL ((4)); - -/* Deallocate the given keyword set and all its associated storage. */ extern void kwsfree (kwset_t); diff --git a/contrib/grep/src/pcresearch.c b/contrib/grep/src/pcresearch.c deleted file mode 100644 index b1f8310486..0000000000 --- a/contrib/grep/src/pcresearch.c +++ /dev/null @@ -1,358 +0,0 @@ -/* pcresearch.c - searching subroutines using PCRE for grep. - Copyright 2000, 2007, 2009-2015 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA - 02110-1301, USA. */ - -/* Written August 1992 by Mike Haertel. */ - -#include -#include "search.h" - -#if HAVE_LIBPCRE -# include - -/* This must be at least 2; everything after that is for performance - in pcre_exec. */ -enum { NSUB = 300 }; - -/* Compiled internal form of a Perl regular expression. */ -static pcre *cre; - -/* Additional information about the pattern. */ -static pcre_extra *extra; - -# ifndef PCRE_STUDY_JIT_COMPILE -# define PCRE_STUDY_JIT_COMPILE 0 -# endif - -# if PCRE_STUDY_JIT_COMPILE -/* Maximum size of the JIT stack. */ -static int jit_stack_size; -# endif - -/* Match the already-compiled PCRE pattern against the data in SUBJECT, - of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with - options OPTIONS, and storing resulting matches into SUB. Return - the (nonnegative) match location or a (negative) error number. */ -static int -jit_exec (char const *subject, int search_bytes, int search_offset, - int options, int *sub) -{ - while (true) - { - int e = pcre_exec (cre, extra, subject, search_bytes, search_offset, - options, sub, NSUB); - -# if PCRE_STUDY_JIT_COMPILE - if (e == PCRE_ERROR_JIT_STACKLIMIT - && 0 < jit_stack_size && jit_stack_size <= INT_MAX / 2) - { - int old_size = jit_stack_size; - int new_size = jit_stack_size = old_size * 2; - static pcre_jit_stack *jit_stack; - if (jit_stack) - pcre_jit_stack_free (jit_stack); - jit_stack = pcre_jit_stack_alloc (old_size, new_size); - if (!jit_stack) - error (EXIT_TROUBLE, 0, - _("failed to allocate memory for the PCRE JIT stack")); - pcre_assign_jit_stack (extra, NULL, jit_stack); - continue; - } -# endif - - return e; - } -} - -#endif - -#if HAVE_LIBPCRE -/* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty - string matches when that flag is used. */ -static int empty_match[2]; -#endif - -void -Pcompile (char const *pattern, size_t size) -{ -#if !HAVE_LIBPCRE - error (EXIT_TROUBLE, 0, "%s", - _("support for the -P option is not compiled into " - "this --disable-perl-regexp binary")); -#else - int e; - char const *ep; - char *re = xnmalloc (4, size + 7); - int flags = (PCRE_MULTILINE - | (match_icase ? PCRE_CASELESS : 0)); - char const *patlim = pattern + size; - char *n = re; - char const *p; - char const *pnul; - - if (using_utf8 ()) - flags |= PCRE_UTF8; - else if (MB_CUR_MAX != 1) - error (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales")); - - /* FIXME: Remove these restrictions. */ - if (memchr (pattern, '\n', size)) - error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern")); - - *n = '\0'; - if (match_lines) - strcpy (n, "^(?:"); - if (match_words) - strcpy (n, "(? #include "system.h" -#include "error.h" #include "grep.h" #include "dfa.h" #include "kwset.h" #include "xalloc.h" +#include "localeinfo.h" _GL_INLINE_HEADER_BEGIN #ifndef SEARCH_INLINE @@ -46,26 +46,30 @@ _GL_INLINE_HEADER_BEGIN typedef signed char mb_len_map_t; /* searchutils.c */ -extern void kwsinit (kwset_t *); - -extern char *mbtoupper (char const *, size_t *, mb_len_map_t **); -extern void build_mbclen_cache (void); -extern size_t mbclen_cache[]; -extern ptrdiff_t mb_goback (char const **, char const *, char const *); -extern wint_t mb_prev_wc (char const *, char const *, char const *); -extern wint_t mb_next_wc (char const *, char const *); +extern void wordinit (void); +extern kwset_t kwsinit (bool); +extern size_t wordchars_size (char const *, char const *) _GL_ATTRIBUTE_PURE; +extern size_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE; +extern size_t wordchar_prev (char const *, char const *, char const *) + _GL_ATTRIBUTE_PURE; +extern ptrdiff_t mb_goback (char const **, size_t *, char const *, + char const *); /* dfasearch.c */ -extern void GEAcompile (char const *, size_t, reg_syntax_t); -extern size_t EGexecute (char const *, size_t, size_t *, char const *); +extern void *GEAcompile (char *, size_t, reg_syntax_t); +extern size_t EGexecute (void *, char const *, size_t, size_t *, char const *); /* kwsearch.c */ -extern void Fcompile (char const *, size_t); -extern size_t Fexecute (char const *, size_t, size_t *, char const *); +extern void *Fcompile (char *, size_t, reg_syntax_t); +extern size_t Fexecute (void *, char const *, size_t, size_t *, char const *); /* pcresearch.c */ -extern void Pcompile (char const *, size_t); -extern size_t Pexecute (char const *, size_t, size_t *, char const *); +extern void *Pcompile (char *, size_t, reg_syntax_t); +extern size_t Pexecute (void *, char const *, size_t, size_t *, char const *); + +/* grep.c */ +extern struct localeinfo localeinfo; +extern void fgrep_to_grep_pattern (char **, size_t *); /* Return the number of bytes in the character at the start of S, which is of size N. N must be positive. MBS is the conversion state. @@ -74,7 +78,7 @@ extern size_t Pexecute (char const *, size_t, size_t *, char const *); SEARCH_INLINE size_t mb_clen (char const *s, size_t n, mbstate_t *mbs) { - size_t len = mbclen_cache[to_uchar (*s)]; + size_t len = localeinfo.sbclen[to_uchar (*s)]; return len == (size_t) -2 ? mbrlen (s, n, mbs) : len; } diff --git a/contrib/grep/src/searchutils.c b/contrib/grep/src/searchutils.c index dd424519b7..84c319c794 100644 --- a/contrib/grep/src/searchutils.c +++ b/contrib/grep/src/searchutils.c @@ -1,5 +1,5 @@ /* searchutils.c - helper subroutines for grep's matchers. - Copyright 1992, 1998, 2000, 2007, 2009-2015 Free Software Foundation, Inc. + Copyright 1992, 1998, 2000, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,271 +22,182 @@ #define SYSTEM_INLINE _GL_EXTERN_INLINE #include "search.h" -#include +/* For each byte B, sbwordchar[B] is true if B is a single-byte + character that is a word constituent, and is false otherwise. */ +static bool sbwordchar[NCHAR]; -#define NCHAR (UCHAR_MAX + 1) - -size_t mbclen_cache[NCHAR]; +/* Whether -w considers WC to be a word constituent. */ +static bool +wordchar (wint_t wc) +{ + return wc == L'_' || iswalnum (wc); +} void -kwsinit (kwset_t *kwset) +wordinit (void) { - static char trans[NCHAR]; - int i; - - if (match_icase && MB_CUR_MAX == 1) - { - for (i = 0; i < NCHAR; ++i) - trans[i] = toupper (i); - - *kwset = kwsalloc (trans); - } - else - *kwset = kwsalloc (NULL); - - if (!*kwset) - xalloc_die (); + for (int i = 0; i < NCHAR; i++) + sbwordchar[i] = wordchar (localeinfo.sbctowc[i]); } -/* Convert BEG, an *N-byte string, to uppercase, and write the - NUL-terminated result into malloc'd storage. Upon success, set *N - to the length (in bytes) of the resulting string (not including the - trailing NUL byte), and return a pointer to the uppercase string. - Upon memory allocation failure, exit. *N must be positive. - - Although this function returns a pointer to malloc'd storage, - the caller must not free it, since this function retains a pointer - to the buffer and reuses it on any subsequent call. As a consequence, - this function is not thread-safe. - - When each character in the uppercase result string has the same length - as the corresponding character in the input string, set *LEN_MAP_P - to NULL. Otherwise, set it to a malloc'd buffer (like the returned - buffer, this must not be freed by caller) of the same length as the - result string. (*LEN_MAP_P)[J] is the change in byte-length of the - character in BEG that formed byte J of the result as it was converted to - uppercase. It is usually zero. For lowercase Turkish dotless I it - is -1, since the lowercase input occupies two bytes, while the - uppercase output occupies only one byte. For lowercase I in the - tr_TR.utf8 locale, it is 1 because the uppercase Turkish dotted I - is one byte longer than the original. When that happens, we have two - or more slots in *LEN_MAP_P for each such character. We store the - difference in the first one and 0's in any remaining slots. - - This map is used by the caller to convert offset,length pairs that - reference the uppercase result to numbers that refer to the matched - part of the original buffer. */ - -char * -mbtoupper (const char *beg, size_t *n, mb_len_map_t **len_map_p) +kwset_t +kwsinit (bool mb_trans) { - static char *out; - static mb_len_map_t *len_map; - static size_t outalloc; - size_t outlen, mb_cur_max; - mbstate_t is, os; - const char *end; - char *p; - mb_len_map_t *m; - bool lengths_differ = false; - - if (*n > outalloc || outalloc == 0) - { - outalloc = MAX (1, *n); - out = xrealloc (out, outalloc); - len_map = xrealloc (len_map, outalloc); - } - - /* appease clang-2.6 */ - assert (out); - assert (len_map); - if (*n == 0) - return out; - - memset (&is, 0, sizeof (is)); - memset (&os, 0, sizeof (os)); - end = beg + *n; + char *trans = NULL; - mb_cur_max = MB_CUR_MAX; - p = out; - m = len_map; - outlen = 0; - while (beg < end) + if (match_icase && (MB_CUR_MAX == 1 || mb_trans)) { - wchar_t wc; - size_t mbclen = mbrtowc (&wc, beg, end - beg, &is); -#ifdef __CYGWIN__ - /* Handle a UTF-8 sequence for a character beyond the base plane. - Cygwin's wchar_t is UTF-16, as in the underlying OS. This - results in surrogate pairs which need some extra attention. */ - wint_t wci = 0; - if (mbclen == 3 && (wc & 0xdc00) == 0xd800) - { - /* We got the start of a 4 byte UTF-8 sequence. This is returned - as a UTF-16 surrogate pair. The first call to mbrtowc returned 3 - and wc has been set to a high surrogate value, now we're going - to fetch the matching low surrogate. This second call to mbrtowc - is supposed to return 1 to complete the 4 byte UTF-8 sequence. */ - wchar_t wc_2; - size_t mbclen_2 = mbrtowc (&wc_2, beg + mbclen, end - beg - mbclen, - &is); - if (mbclen_2 == 1 && (wc_2 & 0xdc00) == 0xdc00) - { - /* Match. Convert this to a 4 byte wint_t which constitutes - a 32-bit UTF-32 value. */ - wci = ( (((wint_t) (wc - 0xd800)) << 10) - | ((wint_t) (wc_2 - 0xdc00))) - + 0x10000; - ++mbclen; - } - else - { - /* Invalid UTF-8 sequence. */ - mbclen = (size_t) -1; - } - } -#endif - if (outlen + mb_cur_max >= outalloc) - { - size_t dm = m - len_map; - out = x2nrealloc (out, &outalloc, 1); - len_map = xrealloc (len_map, outalloc); - p = out + outlen; - m = len_map + dm; - } - - if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) - { - /* An invalid sequence, or a truncated multi-octet character. - We treat it as a single-octet character. */ - *m++ = 0; - *p++ = *beg++; - outlen++; - memset (&is, 0, sizeof (is)); - memset (&os, 0, sizeof (os)); - } + trans = xmalloc (NCHAR); + if (MB_CUR_MAX == 1) + for (int i = 0; i < NCHAR; i++) + trans[i] = toupper (i); else - { - size_t ombclen; - beg += mbclen; -#ifdef __CYGWIN__ - /* Handle Unicode characters beyond the base plane. */ - if (mbclen == 4) - { - /* towupper, taking wint_t (4 bytes), handles UCS-4 values. */ - wci = towupper (wci); - if (wci >= 0x10000) - { - wci -= 0x10000; - wc = (wci >> 10) | 0xd800; - /* No need to check the return value. When reading the - high surrogate, the return value will be 0 and only the - mbstate indicates that we're in the middle of reading a - surrogate pair. The next wcrtomb call reading the low - surrogate will then return 4 and reset the mbstate. */ - wcrtomb (p, wc, &os); - wc = (wci & 0x3ff) | 0xdc00; - } - else - { - wc = (wchar_t) wci; - } - ombclen = wcrtomb (p, wc, &os); - } - else -#endif - ombclen = wcrtomb (p, towupper (wc), &os); - *m = mbclen - ombclen; - memset (m + 1, 0, ombclen - 1); - m += ombclen; - p += ombclen; - outlen += ombclen; - lengths_differ |= (mbclen != ombclen); - } + for (int i = 0; i < NCHAR; i++) + { + wint_t wc = localeinfo.sbctowc[i]; + wint_t uwc = towupper (wc); + if (uwc != wc) + { + mbstate_t mbs = { 0 }; + size_t len = wcrtomb (&trans[i], uwc, &mbs); + if (len != 1) + abort (); + } + else + trans[i] = i; + } } - *len_map_p = lengths_differ ? len_map : NULL; - *n = p - out; - *p = 0; - return out; -} - -/* Initialize a cache of mbrlen values for each of its 1-byte inputs. */ -void -build_mbclen_cache (void) -{ - int i; - - for (i = CHAR_MIN; i <= CHAR_MAX; ++i) - { - char c = i; - unsigned char uc = i; - mbstate_t mbs = { 0 }; - size_t len = mbrlen (&c, 1, &mbs); - mbclen_cache[uc] = len ? len : 1; - } + return kwsalloc (trans); } /* In the buffer *MB_START, return the number of bytes needed to go back from CUR to the previous boundary, where a "boundary" is the start of a multibyte character or is an error-encoding byte. The buffer ends at END (i.e., one past the address of the buffer's last - byte). If CUR is already at a boundary, return 0. If *MB_START is - greater than or equal to CUR, return the negative value CUR - *MB_START. + byte). If CUR is already at a boundary, return 0. If CUR is no + larger than *MB_START, return CUR - *MB_START without modifying + *MB_START or *MBCLEN. When returning zero, set *MB_START to CUR. When returning a - positive value, set *MB_START to the next boundary after CUR, or to - END if there is no such boundary. When returning a negative value, - leave *MB_START alone. */ + positive value, set *MB_START to the next boundary after CUR, + or to END if there is no such boundary, and set *MBCLEN to the + length of the preceding character. */ ptrdiff_t -mb_goback (char const **mb_start, char const *cur, char const *end) +mb_goback (char const **mb_start, size_t *mbclen, char const *cur, + char const *end) { const char *p = *mb_start; const char *p0 = p; - mbstate_t cur_state; + size_t clen; - memset (&cur_state, 0, sizeof cur_state); + if (cur <= p) + return cur - p; - while (p < cur) + if (localeinfo.using_utf8) { - size_t clen = mb_clen (p, end - p, &cur_state); + p = cur; + clen = 1; - if ((size_t) -2 <= clen) + if (cur < end && (*cur & 0xc0) == 0x80) + for (int i = 1; i <= 3; i++) + if ((cur[-i] & 0xc0) != 0x80) + { + mbstate_t mbs = { 0 }; + clen = mb_clen (cur - i, end - (cur - i), &mbs); + if (i < clen && clen < (size_t) -2) + { + p0 = cur - i; + p = p0 + clen; + } + break; + } + } + else + { + mbstate_t mbs = { 0 }; + do { - /* An invalid sequence, or a truncated multibyte character. - Treat it as a single byte character. */ - clen = 1; - memset (&cur_state, 0, sizeof cur_state); + clen = mb_clen (p, end - p, &mbs); + + if ((size_t) -2 <= clen) + { + /* An invalid sequence, or a truncated multibyte character. + Treat it as a single byte character. */ + clen = 1; + memset (&mbs, 0, sizeof mbs); + } + p0 = p; + p += clen; } - p0 = p; - p += clen; + while (p < cur); } *mb_start = p; + if (mbclen) + *mbclen = clen; return p == cur ? 0 : cur - p0; } -/* In the buffer BUF, return the wide character that is encoded just - before CUR. The buffer ends at END. Return WEOF if there is no - wide character just before CUR. */ -wint_t -mb_prev_wc (char const *buf, char const *cur, char const *end) +/* Examine the start of BUF (which goes to END) for word constituents. + If COUNTALL, examine as many as possible; otherwise, examine at most one. + Return the total number of bytes in the examined characters. */ +static size_t +wordchars_count (char const *buf, char const *end, bool countall) { - if (cur == buf) - return WEOF; - char const *p = buf; - cur--; - cur -= mb_goback (&p, cur, end); - return mb_next_wc (cur, end); + size_t n = 0; + mbstate_t mbs = { 0 }; + while (n < end - buf) + { + unsigned char b = buf[n]; + if (sbwordchar[b]) + n++; + else if (localeinfo.sbclen[b] != -2) + break; + else + { + wchar_t wc = 0; + size_t wcbytes = mbrtowc (&wc, buf + n, end - buf - n, &mbs); + if (!wordchar (wc)) + break; + n += wcbytes + !wcbytes; + } + if (!countall) + break; + } + return n; } -/* Return the wide character that is encoded at CUR. The buffer ends - at END. Return WEOF if there is no wide character encoded at CUR. */ -wint_t -mb_next_wc (char const *cur, char const *end) +/* Examine the start of BUF for the longest prefix containing just + word constituents. Return the total number of bytes in the prefix. + The buffer ends at END. */ +size_t +wordchars_size (char const *buf, char const *end) { - wchar_t wc; - mbstate_t mbs = { 0 }; - return (end - cur != 0 && mbrtowc (&wc, cur, end - cur, &mbs) < (size_t) -2 - ? wc : WEOF); + return wordchars_count (buf, end, true); +} + +/* If BUF starts with a word constituent, return the number of bytes + used to represent it; otherwise, return zero. The buffer ends at END. */ +size_t +wordchar_next (char const *buf, char const *end) +{ + return wordchars_count (buf, end, false); +} + +/* In the buffer BUF, return nonzero if the character whose encoding + contains the byte before CUR is a word constituent. The buffer + ends at END. */ +size_t +wordchar_prev (char const *buf, char const *cur, char const *end) +{ + if (buf == cur) + return 0; + unsigned char b = *--cur; + if (! localeinfo.multibyte + || (localeinfo.using_utf8 && localeinfo.sbclen[b] != -2)) + return sbwordchar[b]; + char const *p = buf; + cur -= mb_goback (&p, NULL, cur, end); + return wordchar_next (cur, end); } diff --git a/contrib/grep/src/system.h b/contrib/grep/src/system.h index 15a1abbd0e..e0287d1cc1 100644 --- a/contrib/grep/src/system.h +++ b/contrib/grep/src/system.h @@ -1,5 +1,5 @@ /* Portability cruft. Include after config.h and sys/types.h. - Copyright 1996, 1998-2000, 2007, 2009-2015 Free Software Foundation, Inc. + Copyright 1996, 1998-2000, 2007, 2009-2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,7 +23,6 @@ #include #include -#include "binary-io.h" #include "configmake.h" #include "dirname.h" #include "ignore-value.h" @@ -37,6 +36,7 @@ #include enum { EXIT_TROUBLE = 2 }; +enum { NCHAR = UCHAR_MAX + 1 }; #include #define N_(String) gettext_noop(String) @@ -107,4 +107,26 @@ static _GL_UNUSED void __asan_unpoison_memory_region (void const volatile *addr, size_t size) { } #endif +#ifndef FALLTHROUGH +# if __GNUC__ < 7 +# define FALLTHROUGH ((void) 0) +# else +# define FALLTHROUGH __attribute__ ((__fallthrough__)) +# endif +#endif + +/* When we deliberately use duplicate branches, use this macro to + disable gcc's -Wduplicated-branches in the containing expression. */ +#if 7 <= __GNUC__ +# define IGNORE_DUPLICATE_BRANCH_WARNING(exp) \ + ({ \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wduplicated-branches\"") \ + exp; \ + _Pragma ("GCC diagnostic pop") \ + }) +#else +# define IGNORE_DUPLICATE_BRANCH_WARNING(exp) exp +#endif + #endif -- 2.41.0