From 7d3cdea1b20baa44e6b51b8d71649f2bac48eeac Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Wed, 24 Apr 2019 09:10:38 +0200 Subject: [PATCH] Sync indent(1) with FreeBSD. A number of new options. Also -troff is gone. --- share/misc/indent.pro | 42 +- usr.bin/indent/Makefile | 1 + usr.bin/indent/README | 2 +- usr.bin/indent/args.c | 221 +++++---- usr.bin/indent/indent.1 | 128 +++-- usr.bin/indent/indent.c | 874 ++++++++++++++++++---------------- usr.bin/indent/indent.h | 17 +- usr.bin/indent/indent_codes.h | 11 +- usr.bin/indent/indent_globs.h | 256 +++++----- usr.bin/indent/io.c | 289 +++-------- usr.bin/indent/lexi.c | 571 ++++++++++++---------- usr.bin/indent/parse.c | 40 +- usr.bin/indent/pr_comment.c | 300 +++++------- 13 files changed, 1380 insertions(+), 1372 deletions(-) diff --git a/share/misc/indent.pro b/share/misc/indent.pro index 5aafe3b314..c85bfda3f4 100644 --- a/share/misc/indent.pro +++ b/share/misc/indent.pro @@ -1,20 +1,46 @@ +-TFILE +-Tfd_mask +-Tfd_set +-Tlinker_sym_tT +-Tu_char +-Tu_int +-Tu_long +-Tu_short +-TTAILQ_HEAD +-TTAILQ_ENTRY +-TLIST_HEAD +-TLIST_ENTRY +-TSTAILQ_HEAD +-TSTAILQ_ENTRY +-TSLIST_HEAD +-TSLIST_ENTRY +-bad +-bap +-nbbb +-nbc -br +-nbs +-c41 +-cd41 +-cdb -ce -ci4 -cli0 -d0 --di0 --i8 --ip --l79 --nbc --ncdb +-di8 -ndj --nfcb -ei -nfc1 +-nfcb +-i8 +-ip8 +-l79 +-lc77 +-ldi0 -nlp -npcs -psl -sc --sob +-nsob +-ta +-nv diff --git a/usr.bin/indent/Makefile b/usr.bin/indent/Makefile index 8d9ff94207..77a211f79a 100644 --- a/usr.bin/indent/Makefile +++ b/usr.bin/indent/Makefile @@ -1,4 +1,5 @@ # @(#)Makefile 8.1 (Berkeley) 6/6/93 +# $FreeBSD: head/usr.bin/indent/Makefile 322515 2017-08-14 19:21:37Z ngie $ PROG= indent SRCS= indent.c io.c lexi.c parse.c pr_comment.c args.c diff --git a/usr.bin/indent/README b/usr.bin/indent/README index f91f02bbc8..11b3553351 100644 --- a/usr.bin/indent/README +++ b/usr.bin/indent/README @@ -1,5 +1,5 @@ - $FreeBSD: src/usr.bin/indent/README,v 1.2 2002/10/16 13:58:39 charnier Exp $ + $FreeBSD: head/usr.bin/indent/README 105244 2002-10-16 13:58:39Z charnier $ This is the C indenter, it originally came from the University of Illinois via some distribution tape for PDP-11 Unix. It has subsequently been diff --git a/usr.bin/indent/args.c b/usr.bin/indent/args.c index eb3037f2e7..2c93306995 100644 --- a/usr.bin/indent/args.c +++ b/usr.bin/indent/args.c @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. @@ -29,7 +31,7 @@ * SUCH DAMAGE. * * @(#)args.c 8.1 (Berkeley) 6/6/93 - * $FreeBSD: src/usr.bin/indent/args.c,v 1.16 2010/03/31 17:05:30 avg Exp $ + * $FreeBSD: head/usr.bin/indent/args.c 336318 2018-07-15 21:04:21Z pstef $ */ /* @@ -46,11 +48,12 @@ #include "indent_globs.h" #include "indent.h" +#define INDENT_VERSION "2.0" + /* profile types */ #define PRO_SPECIAL 1 /* special case */ #define PRO_BOOL 2 /* boolean */ #define PRO_INT 3 /* integer */ -#define PRO_FONT 4 /* troff font */ /* profile specials for booleans */ #define ON 1 /* turn it on */ @@ -64,8 +67,13 @@ static void scan_profile(FILE *); +#define KEY_FILE 5 /* only used for args */ +#define VERSION 6 /* only used for args */ + const char *option_source = "?"; +void add_typedefs_from_file(const char *str); + /* * N.B.: because of the way the table here is scanned, options whose names are * substrings of other options must occur later; that is, with -lp vs -l, -lp @@ -81,76 +89,75 @@ struct pro { } pro[] = { {"T", PRO_SPECIAL, 0, KEY, 0}, - {"bacc", PRO_BOOL, false, ON, &blanklines_around_conditional_compilation}, - {"badp", PRO_BOOL, false, ON, &blanklines_after_declarations_at_proctop}, - {"bad", PRO_BOOL, false, ON, &blanklines_after_declarations}, - {"bap", PRO_BOOL, false, ON, &blanklines_after_procs}, - {"bbb", PRO_BOOL, false, ON, &blanklines_before_blockcomments}, - {"bc", PRO_BOOL, true, OFF, &ps.leave_comma}, - {"bl", PRO_BOOL, true, OFF, &btype_2}, - {"br", PRO_BOOL, true, ON, &btype_2}, - {"bs", PRO_BOOL, false, ON, &Bill_Shannon}, - {"cdb", PRO_BOOL, true, ON, &comment_delimiter_on_blankline}, - {"cd", PRO_INT, 0, 0, &ps.decl_com_ind}, - {"ce", PRO_BOOL, true, ON, &cuddle_else}, - {"ci", PRO_INT, 0, 0, &continuation_indent}, + {"U", PRO_SPECIAL, 0, KEY_FILE, 0}, + {"-version", PRO_SPECIAL, 0, VERSION, 0}, + {"P", PRO_SPECIAL, 0, IGN, 0}, + {"bacc", PRO_BOOL, false, ON, &opt.blanklines_around_conditional_compilation}, + {"badp", PRO_BOOL, false, ON, &opt.blanklines_after_declarations_at_proctop}, + {"bad", PRO_BOOL, false, ON, &opt.blanklines_after_declarations}, + {"bap", PRO_BOOL, false, ON, &opt.blanklines_after_procs}, + {"bbb", PRO_BOOL, false, ON, &opt.blanklines_before_blockcomments}, + {"bc", PRO_BOOL, true, OFF, &opt.leave_comma}, + {"bl", PRO_BOOL, true, OFF, &opt.btype_2}, + {"br", PRO_BOOL, true, ON, &opt.btype_2}, + {"bs", PRO_BOOL, false, ON, &opt.Bill_Shannon}, + {"cdb", PRO_BOOL, true, ON, &opt.comment_delimiter_on_blankline}, + {"cd", PRO_INT, 0, 0, &opt.decl_com_ind}, + {"ce", PRO_BOOL, true, ON, &opt.cuddle_else}, + {"ci", PRO_INT, 0, 0, &opt.continuation_indent}, {"cli", PRO_SPECIAL, 0, CLI, 0}, - {"c", PRO_INT, 33, 0, &ps.com_ind}, - {"di", PRO_INT, 16, 0, &ps.decl_indent}, - {"dj", PRO_BOOL, false, ON, &ps.ljust_decl}, - {"d", PRO_INT, 0, 0, &ps.unindent_displace}, - {"eei", PRO_BOOL, false, ON, &extra_expression_indent}, - {"ei", PRO_BOOL, true, ON, &ps.else_if}, - {"fbc", PRO_FONT, 0, 0, (int *) &blkcomf}, - {"fbs", PRO_BOOL, true, ON, &function_brace_split}, - {"fbx", PRO_FONT, 0, 0, (int *) &boxcomf}, - {"fb", PRO_FONT, 0, 0, (int *) &bodyf}, - {"fc1", PRO_BOOL, true, ON, &format_col1_comments}, - {"fcb", PRO_BOOL, true, ON, &format_block_comments}, - {"fc", PRO_FONT, 0, 0, (int *) &scomf}, - {"fk", PRO_FONT, 0, 0, (int *) &keywordf}, - {"fs", PRO_FONT, 0, 0, (int *) &stringf}, - {"ip", PRO_BOOL, true, ON, &ps.indent_parameters}, - {"i", PRO_INT, 8, 0, &ps.ind_size}, - {"lc", PRO_INT, 0, 0, &block_comment_max_col}, - {"ldi", PRO_INT, -1, 0, &ps.local_decl_indent}, - {"lp", PRO_BOOL, true, ON, &lineup_to_parens}, - {"l", PRO_INT, 78, 0, &max_col}, - {"nbacc", PRO_BOOL, false, OFF, &blanklines_around_conditional_compilation}, - {"nbadp", PRO_BOOL, false, OFF, &blanklines_after_declarations_at_proctop}, - {"nbad", PRO_BOOL, false, OFF, &blanklines_after_declarations}, - {"nbap", PRO_BOOL, false, OFF, &blanklines_after_procs}, - {"nbbb", PRO_BOOL, false, OFF, &blanklines_before_blockcomments}, - {"nbc", PRO_BOOL, true, ON, &ps.leave_comma}, - {"nbs", PRO_BOOL, false, OFF, &Bill_Shannon}, - {"ncdb", PRO_BOOL, true, OFF, &comment_delimiter_on_blankline}, - {"nce", PRO_BOOL, true, OFF, &cuddle_else}, - {"ndj", PRO_BOOL, false, OFF, &ps.ljust_decl}, - {"neei", PRO_BOOL, false, OFF, &extra_expression_indent}, - {"nei", PRO_BOOL, true, OFF, &ps.else_if}, - {"nfbs", PRO_BOOL, true, OFF, &function_brace_split}, - {"nfc1", PRO_BOOL, true, OFF, &format_col1_comments}, - {"nfcb", PRO_BOOL, true, OFF, &format_block_comments}, - {"nip", PRO_BOOL, true, OFF, &ps.indent_parameters}, - {"nlp", PRO_BOOL, true, OFF, &lineup_to_parens}, - {"npcs", PRO_BOOL, false, OFF, &proc_calls_space}, + {"cs", PRO_BOOL, false, ON, &opt.space_after_cast}, + {"c", PRO_INT, 33, 0, &opt.com_ind}, + {"di", PRO_INT, 16, 0, &opt.decl_indent}, + {"dj", PRO_BOOL, false, ON, &opt.ljust_decl}, + {"d", PRO_INT, 0, 0, &opt.unindent_displace}, + {"eei", PRO_BOOL, false, ON, &opt.extra_expression_indent}, + {"ei", PRO_BOOL, true, ON, &opt.else_if}, + {"fbs", PRO_BOOL, true, ON, &opt.function_brace_split}, + {"fc1", PRO_BOOL, true, ON, &opt.format_col1_comments}, + {"fcb", PRO_BOOL, true, ON, &opt.format_block_comments}, + {"ip", PRO_BOOL, true, ON, &opt.indent_parameters}, + {"i", PRO_INT, 8, 0, &opt.ind_size}, + {"lc", PRO_INT, 0, 0, &opt.block_comment_max_col}, + {"ldi", PRO_INT, -1, 0, &opt.local_decl_indent}, + {"lpl", PRO_BOOL, false, ON, &opt.lineup_to_parens_always}, + {"lp", PRO_BOOL, true, ON, &opt.lineup_to_parens}, + {"l", PRO_INT, 78, 0, &opt.max_col}, + {"nbacc", PRO_BOOL, false, OFF, &opt.blanklines_around_conditional_compilation}, + {"nbadp", PRO_BOOL, false, OFF, &opt.blanklines_after_declarations_at_proctop}, + {"nbad", PRO_BOOL, false, OFF, &opt.blanklines_after_declarations}, + {"nbap", PRO_BOOL, false, OFF, &opt.blanklines_after_procs}, + {"nbbb", PRO_BOOL, false, OFF, &opt.blanklines_before_blockcomments}, + {"nbc", PRO_BOOL, true, ON, &opt.leave_comma}, + {"nbs", PRO_BOOL, false, OFF, &opt.Bill_Shannon}, + {"ncdb", PRO_BOOL, true, OFF, &opt.comment_delimiter_on_blankline}, + {"nce", PRO_BOOL, true, OFF, &opt.cuddle_else}, + {"ncs", PRO_BOOL, false, OFF, &opt.space_after_cast}, + {"ndj", PRO_BOOL, false, OFF, &opt.ljust_decl}, + {"neei", PRO_BOOL, false, OFF, &opt.extra_expression_indent}, + {"nei", PRO_BOOL, true, OFF, &opt.else_if}, + {"nfbs", PRO_BOOL, true, OFF, &opt.function_brace_split}, + {"nfc1", PRO_BOOL, true, OFF, &opt.format_col1_comments}, + {"nfcb", PRO_BOOL, true, OFF, &opt.format_block_comments}, + {"nip", PRO_BOOL, true, OFF, &opt.indent_parameters}, + {"nlpl", PRO_BOOL, false, OFF, &opt.lineup_to_parens_always}, + {"nlp", PRO_BOOL, true, OFF, &opt.lineup_to_parens}, + {"npcs", PRO_BOOL, false, OFF, &opt.proc_calls_space}, {"npro", PRO_SPECIAL, 0, IGN, 0}, - {"npsl", PRO_BOOL, true, OFF, &procnames_start_line}, - {"nps", PRO_BOOL, false, OFF, &pointer_as_binop}, - {"nsc", PRO_BOOL, true, OFF, &star_comment_cont}, - {"nsob", PRO_BOOL, false, OFF, &swallow_optional_blanklines}, - {"nut", PRO_BOOL, true, OFF, &use_tabs}, - {"nv", PRO_BOOL, false, OFF, &verbose}, - {"pcs", PRO_BOOL, false, ON, &proc_calls_space}, - {"psl", PRO_BOOL, true, ON, &procnames_start_line}, - {"ps", PRO_BOOL, false, ON, &pointer_as_binop}, - {"sc", PRO_BOOL, true, ON, &star_comment_cont}, - {"sob", PRO_BOOL, false, ON, &swallow_optional_blanklines}, + {"npsl", PRO_BOOL, true, OFF, &opt.procnames_start_line}, + {"nsc", PRO_BOOL, true, OFF, &opt.star_comment_cont}, + {"nsob", PRO_BOOL, false, OFF, &opt.swallow_optional_blanklines}, + {"nut", PRO_BOOL, true, OFF, &opt.use_tabs}, + {"nv", PRO_BOOL, false, OFF, &opt.verbose}, + {"pcs", PRO_BOOL, false, ON, &opt.proc_calls_space}, + {"psl", PRO_BOOL, true, ON, &opt.procnames_start_line}, + {"sc", PRO_BOOL, true, ON, &opt.star_comment_cont}, + {"sob", PRO_BOOL, false, ON, &opt.swallow_optional_blanklines}, {"st", PRO_SPECIAL, 0, STDIN, 0}, - {"ta", PRO_BOOL, false, ON, &auto_typedefs}, - {"troff", PRO_BOOL, false, ON, &troff}, - {"ut", PRO_BOOL, true, ON, &use_tabs}, - {"v", PRO_BOOL, false, ON, &verbose}, + {"ta", PRO_BOOL, false, ON, &opt.auto_typedefs}, + {"ts", PRO_INT, 8, 0, &opt.tabsize}, + {"ut", PRO_BOOL, true, ON, &opt.use_tabs}, + {"v", PRO_BOOL, false, ON, &opt.verbose}, /* whew! */ {0, 0, 0, 0, 0} }; @@ -160,13 +167,16 @@ struct pro { * given in these files. */ void -set_profile(void) +set_profile(const char *profile_name) { FILE *f; char fname[PATH_MAX]; static char prof[] = ".indent.pro"; - snprintf(fname, sizeof(fname), "%s/%s", getenv("HOME"), prof); + if (profile_name == NULL) + snprintf(fname, sizeof(fname), "%s/%s", getenv("HOME"), prof); + else + snprintf(fname, sizeof(fname), "%s", profile_name + 2); if ((f = fopen(option_source = fname, "r")) != NULL) { scan_profile(f); (void) fclose(f); @@ -195,7 +205,7 @@ scan_profile(FILE *f) } else if (i == '/' && comment && p > buf && p[-1] == '*') { p = buf + comment - 1; comment = 0; - } else if (isspace(i)) { + } else if (isspace((unsigned char)i)) { if (p > buf && !comment) break; } else { @@ -204,7 +214,7 @@ scan_profile(FILE *f) } if (p != buf) { *p++ = 0; - if (verbose) + if (opt.verbose) printf("profile: %s\n", buf); set_option(buf); } @@ -213,17 +223,14 @@ scan_profile(FILE *f) } } -const char *param_start; - -static int +static const char * eqin(const char *s1, const char *s2) { while (*s1) { if (*s1++ != *s2++) - return (false); + return (NULL); } - param_start = s2; - return (true); + return (s2); } /* @@ -238,20 +245,21 @@ set_defaults(void) * Because ps.case_indent is a float, we can't initialize it from the * table: */ - ps.case_indent = 0.0; /* -cli0.0 */ + opt.case_indent = 0.0; /* -cli0.0 */ for (p = pro; p->p_name; p++) - if (p->p_type != PRO_SPECIAL && p->p_type != PRO_FONT) + if (p->p_type != PRO_SPECIAL) *p->p_obj = p->p_default; } void set_option(char *arg) { - struct pro *p; + struct pro *p; + const char *param_start; arg++; /* ignore leading "-" */ for (p = pro; p->p_name; p++) - if (*p->p_name == *arg && eqin(p->p_name, arg)) + if (*p->p_name == *arg && (param_start = eqin(p->p_name, arg)) != NULL) goto found; errx(1, "%s: unknown parameter \"%s\"", option_source, arg - 1); found: @@ -266,7 +274,7 @@ found: case CLI: if (*param_start == 0) goto need_param; - ps.case_indent = atof(param_start); + opt.case_indent = atof(param_start); break; case STDIN: @@ -279,14 +287,19 @@ found: case KEY: if (*param_start == 0) goto need_param; - { - char *str = strdup(param_start); - if (str == NULL) - err(1, NULL); - addkey(str, 4); - } + add_typename(param_start); + break; + + case KEY_FILE: + if (*param_start == 0) + goto need_param; + add_typedefs_from_file(param_start); break; + case VERSION: + printf("FreeBSD indent %s\n", INDENT_VERSION); + exit(0); + default: errx(1, "set_option: internal error: p_special %d", p->p_special); } @@ -300,18 +313,32 @@ found: break; case PRO_INT: - if (!isdigit(*param_start)) { + if (!isdigit((unsigned char)*param_start)) { need_param: - errx(1, "%s: ``%s'' requires a parameter", option_source, arg - 1); + errx(1, "%s: ``%s'' requires a parameter", option_source, p->p_name); } *p->p_obj = atoi(param_start); break; - case PRO_FONT: - parsefont((struct fstate *) p->p_obj, param_start); - break; - default: errx(1, "set_option: internal error: p_type %d", p->p_type); } } + +void +add_typedefs_from_file(const char *str) +{ + FILE *file; + char line[BUFSIZ]; + + if ((file = fopen(str, "r")) == NULL) { + fprintf(stderr, "indent: cannot open file %s\n", str); + exit(1); + } + while ((fgets(line, BUFSIZ, file)) != NULL) { + /* Remove trailing whitespace */ + line[strcspn(line, " \t\n\r")] = '\0'; + add_typename(line); + } + fclose(file); +} diff --git a/usr.bin/indent/indent.1 b/usr.bin/indent/indent.1 index 0184ab1244..e3e7457420 100644 --- a/usr.bin/indent/indent.1 +++ b/usr.bin/indent/indent.1 @@ -28,9 +28,9 @@ .\" SUCH DAMAGE. .\" .\" @(#)indent.1 8.1 (Berkeley) 7/1/93 -.\" $FreeBSD: src/usr.bin/indent/indent.1,v 1.28 2010/03/31 17:05:30 avg Exp $ +.\" $FreeBSD: head/usr.bin/indent/indent.1 334944 2018-06-11 05:35:57Z pstef $ .\" -.Dd May 7, 2010 +.Dd April 23, 2019 .Dt INDENT 1 .Os .Sh NAME @@ -39,14 +39,13 @@ .Sh SYNOPSIS .Nm .Op Ar input-file Op Ar output-file +.Op Fl bacc | Fl nbacc .Op Fl bad | Fl nbad +.Op Fl badp | Fl nbadp .Op Fl bap | Fl nbap -.Bk -words .Op Fl bbb | Fl nbbb -.Ek .Op Fl \&bc | Fl nbc -.Op Fl \&bl -.Op Fl \&br +.Op Fl \&bl | Fl \&br .Op Fl bs | Fl nbs .Op Fl c Ns Ar n .Op Fl \&cd Ns Ar n @@ -56,8 +55,14 @@ .Op Fl \&ce | Fl nce .Op Fl \&ci Ns Ar n .Op Fl cli Ns Ar n +.Op Fl cs | Fl ncs .Op Fl d Ns Ar n .Op Fl \&di Ns Ar n +.Op Fl dj | Fl ndj +.Bk -words +.Op Fl ei | Fl nei +.Op Fl eei | Fl neei +.Ek .Bk -words .Op Fl fbs | Fl nfbs .Op Fl fc1 | Fl nfc1 @@ -69,7 +74,9 @@ .Op Fl \&lc Ns Ar n .Op Fl \&ldi Ns Ar n .Op Fl \&lp | Fl nlp +.Op Fl \&lpl | Fl nlpl .Op Fl npro +.Op Fl P Ns Ar file .Op Fl pcs | Fl npcs .Op Fl psl | Fl npsl .Op Fl \&sc | Fl nsc @@ -78,9 +85,12 @@ .Ek .Op Fl \&st .Op Fl \&ta -.Op Fl troff +.Op Fl T Ns Ar typename +.Op Fl ts Ns Ar n +.Op Fl U Ns Ar file .Op Fl ut | Fl nut .Op Fl v | Fl \&nv +.Op Fl -version .Sh DESCRIPTION The .Nm @@ -109,7 +119,10 @@ If is named .Sq Pa /blah/blah/file , the backup file is named -.Sq Pa file.BAK . +.Sq Pa file.BAK +by default. The extension used for the backup file may be overridden using the +.Ev SIMPLE_BACKUP_SUFFIX +environment variable. .Pp If .Ar output-file @@ -121,6 +134,15 @@ checks to make sure that it is different from The options listed below control the formatting style imposed by .Nm . .Bl -tag -width Op +.It Fl bacc , nbacc +If +.Fl bacc +is specified, a blank line is forced around every conditional +compilation block. +For example, in front of every #ifdef and after every #endif. +Other blank lines surrounding such blocks will be swallowed. +Default: +.Fl nbacc . .It Fl bad , nbad If .Fl bad @@ -128,6 +150,14 @@ is specified, a blank line is forced after every block of declarations. Default: .Fl nbad . +.It Fl badp , nbadp +This is vaguely similar to +.Fl bad +except that it only applies to the first set of declarations +in a procedure (just after the first `{') and it causes a blank +line to be generated even if there are no declarations. +The default is +.Fl nbadp. .It Fl bap , nbap If .Fl bap @@ -148,10 +178,10 @@ is specified, then a newline is forced after each comma in a declaration. turns off this option. Default: .Fl \&nbc . -.It Fl \&br , \&bl +.It Fl \&bl , \&br Specifying .Fl \&bl -lines-up compound statements like this: +lines up compound statements like this: .Bd -literal -offset indent if (...) { @@ -168,14 +198,9 @@ if (...) { } .Ed .It Fl bs , nbs -If -.Fl bs -is specified, then a space is inserted after -.Ic sizeof . -.Fl nbs -turns off this option. -Default: -.Fl nbs . +Whether a blank should always be inserted after sizeof. +The default is +.Fl nbs. .It Fl c Ns Ar n The column in which comments on code start. The default is 33. @@ -227,6 +252,11 @@ statement. causes case labels to be indented half a tab stop. The default is .Fl cli0 . +.It Fl cs , ncs +Control whether parenthesized type names in casts are followed by a space or +not. +The default is +.Fl ncs . .It Fl d Ns Ar n Controls the placement of comments which are not to the right of code. For example, @@ -234,7 +264,7 @@ For example, means that such comments are placed one indentation level to the left of code. Specifying the default .Fl \&d\&0 -lines-up these comments with the code. +lines up these comments with the code. See the section on comment indentation below. .It Fl \&di Ns Ar n Specifies the indentation, in character positions, @@ -262,6 +292,16 @@ will have the same indentation as the preceding statement. The default is .Fl ei . +.It Fl eei , neei +Enables (disables) extra indentation on continuation lines of +the expression part of +.Ic if +and +.Ic while +statements. +These continuation lines will be indented one extra level. +The default is +.Fl neei . .It Fl fbs , nfbs Enables (disables) splitting the function declaration and opening brace across two lines. @@ -289,7 +329,7 @@ Block comments are then handled like box comments. The default is .Fl fcb . .It Fl i Ns Ar n -The number of spaces for one indentation level. +The number of columns for one indentation level. The default is 8. .It Fl \&ip , nip Enables (disables) the indentation of parameter declarations from the left @@ -299,6 +339,10 @@ The default is .It Fl l Ns Ar n Maximum length of an output line. The default is 78. +.It Fl lc Ns Ar n +Maximum length of an output line in a block comment. +The default is 0, which means to limit block comment lines in accordance with +.Fl l. .It Fl \&ldi Ns Ar n Specifies the indentation, in character positions, of local variable names @@ -306,8 +350,10 @@ relative to the beginning of their type declaration. The default is for local variable names to be indented by the same amount as global ones. .It Fl \&lp , nlp -Lines-up code surrounded by parenthesis in continuation lines. -If a line +Lines up code surrounded by parentheses in continuation lines. +With +.Fl \&lp , +if a line has a left paren which is not closed on that line, then continuation lines will be lined up to start at the character position just after the left paren. @@ -334,12 +380,28 @@ p1\ =\ first_procedure(second_procedure(p2, \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ third_procedure(p4, \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ p5)); .Ed +.It Fl \&lpl , nlpl +With +.Fl \&lpl , +code surrounded by parentheses in continuation lines is lined up even if it +would extend past the right margin. +With +.Fl \&nlpl +(the default), such a line that would extend past the right margin is moved +left to keep it within the margin, if that does not require placing it to +the left of the prevailing indentation level. +These switches have no effect if +.Fl nlp +is selected. .It Fl npro Causes the profile files, .Sq Pa ./.indent.pro and .Sq Pa ~/.indent.pro , to be ignored. +.It Fl P Ns Ar file +Read profile from +.Ar file . .It Fl pcs , npcs If true .Pq Fl pcs @@ -394,19 +456,15 @@ language and cannot find all instances of .Ic typedef . -.It Fl troff -Causes -.Nm -to format the program for processing by -.Xr troff 1 . -It will produce a fancy -listing in much the same spirit as -.Xr vgrind 1 . -If the output file is not specified, the default is standard output, -rather than formatting in place. +.It Fl ts Ns Ar n +Assumed distance between tab stops. +The default is 8. +.It Fl U Ns Ar file +Adds type names from +.Ar file +to the list of type keywords. .It Fl ut , nut Enables (disables) the use of tab characters in the output. -Tabs are assumed to be aligned on columns divisible by 8. The default is .Fl ut . .It Fl v , \&nv @@ -420,6 +478,10 @@ reports when it splits one line of input into two or more lines of output, and gives some size statistics at completion. The default is .Fl \&nv . +.It Fl -version +Causes +.Nm +to print its version number and exit. .El .Pp You may set up your own `profile' of defaults to diff --git a/usr.bin/indent/indent.c b/usr.bin/indent/indent.c index fc148dfc5b..baa91b60cc 100644 --- a/usr.bin/indent/indent.c +++ b/usr.bin/indent/indent.c @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1976 Board of Trustees of the University of Illinois. * Copyright (c) 1980, 1993 @@ -28,15 +30,13 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#) Copyright (c) 1985 Sun Microsystems, Inc. - * @(#) Copyright (c) 1976 Board of Trustees of the University of Illinois. - * @(#) Copyright (c) 1980, 1993 The Regents of the University of California. All rights reserved. * @(#)indent.c 5.17 (Berkeley) 6/7/93 - * $FreeBSD: src/usr.bin/indent/indent.c,v 1.26 2010/03/31 16:55:47 avg Exp $ + * $FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $ */ #include #include +#include #include #include #include @@ -48,21 +48,21 @@ #include "indent.h" static void bakcopy(void); +static void indent_declaration(int, int); const char *in_name = "Standard Input"; /* will always point to name of input * file */ const char *out_name = "Standard Output"; /* will always point to name * of output file */ +const char *simple_backup_suffix = ".BAK"; /* Suffix to use for backup + * files */ char bakfile[MAXPATHLEN] = ""; int main(int argc, char **argv) { - int dec_ind; /* current indentation for declarations */ int di_stack[20]; /* a stack of structure indentation levels */ - int flushed_nl; /* used when buffering up comments to remember - * that a newline was passed over */ int force_nl; /* when true, code must be broken */ int hd_type = 0; /* used to store type of stmt for if (...), * for (...), etc */ @@ -79,7 +79,9 @@ main(int argc, char **argv) int type_code; /* the type of token, returned by lexi */ int last_else = 0; /* true iff last keyword was an else */ - + const char *profile_name = NULL; + const char *envval = NULL; + struct parser_state transient_state; /* a copy for lookup */ /*-----------------------------------------------*\ | INITIALIZATION | @@ -103,6 +105,8 @@ main(int argc, char **argv) tokenbuf = (char *) malloc(bufsize); if (tokenbuf == NULL) err(1, NULL); + alloc_typenames(); + init_constant_tt(); l_com = combuf + bufsize - 5; l_lab = labbuf + bufsize - 5; l_code = codebuf + bufsize - 5; @@ -110,7 +114,7 @@ main(int argc, char **argv) combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and * comment buffers */ combuf[1] = codebuf[1] = labbuf[1] = '\0'; - ps.else_if = 1; /* Default else-if special processing to on */ + opt.else_if = 1; /* Default else-if special processing to on */ s_lab = e_lab = labbuf + 1; s_code = e_code = codebuf + 1; s_com = e_com = combuf + 1; @@ -139,6 +143,10 @@ main(int argc, char **argv) output = NULL; tabs_to_var = 0; + envval = getenv("SIMPLE_BACKUP_SUFFIX"); + if (envval) + simple_backup_suffix = envval; + /*--------------------------------------------------*\ | COMMAND LINE SCAN | \*--------------------------------------------------*/ @@ -146,6 +154,7 @@ main(int argc, char **argv) #ifdef undef max_col = 78; /* -l78 */ lineup_to_parens = 1; /* -lp */ + lineup_to_parens_always = 0; /* -nlpl */ ps.ljust_decl = 0; /* -ndj */ ps.com_ind = 33; /* -c33 */ star_comment_cont = 1; /* -sc */ @@ -174,9 +183,11 @@ main(int argc, char **argv) for (i = 1; i < argc; ++i) if (strcmp(argv[i], "-npro") == 0) break; + else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0') + profile_name = argv[i]; /* non-empty -P (set profile) */ set_defaults(); if (i >= argc) - set_profile(); + set_profile(profile_name); for (i = 1; i < argc; ++i) { @@ -210,43 +221,24 @@ main(int argc, char **argv) if (input == NULL) input = stdin; if (output == NULL) { - if (troff || input == stdin) + if (input == stdin) output = stdout; else { out_name = in_name; bakcopy(); } } - if (ps.com_ind <= 1) - ps.com_ind = 2; /* dont put normal comments before column 2 */ - if (troff) { - if (bodyf.font[0] == 0) - parsefont(&bodyf, "R"); - if (scomf.font[0] == 0) - parsefont(&scomf, "I"); - if (blkcomf.font[0] == 0) - blkcomf = scomf, blkcomf.size += 2; - if (boxcomf.font[0] == 0) - boxcomf = blkcomf; - if (stringf.font[0] == 0) - parsefont(&stringf, "L"); - if (keywordf.font[0] == 0) - parsefont(&keywordf, "B"); - writefdef(&bodyf, 'B'); - writefdef(&scomf, 'C'); - writefdef(&blkcomf, 'L'); - writefdef(&boxcomf, 'X'); - writefdef(&stringf, 'S'); - writefdef(&keywordf, 'K'); - } - if (block_comment_max_col <= 0) - block_comment_max_col = max_col; - if (ps.local_decl_indent < 0) /* if not specified by user, set this */ - ps.local_decl_indent = ps.decl_indent; - if (ps.decl_com_ind <= 0) /* if not specified by user, set this */ - ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind; - if (continuation_indent == 0) - continuation_indent = ps.ind_size; + + if (opt.com_ind <= 1) + opt.com_ind = 2; /* don't put normal comments before column 2 */ + if (opt.block_comment_max_col <= 0) + opt.block_comment_max_col = opt.max_col; + if (opt.local_decl_indent < 0) /* if not specified by user, set this */ + opt.local_decl_indent = opt.decl_indent; + if (opt.decl_com_ind <= 0) /* if not specified by user, set this */ + opt.decl_com_ind = opt.ljust_decl ? (opt.com_ind <= 10 ? 2 : opt.com_ind - 8) : opt.com_ind; + if (opt.continuation_indent == 0) + opt.continuation_indent = opt.ind_size; fill_buffer(); /* get first batch of stuff into input buffer */ parse(semicolon); @@ -258,161 +250,209 @@ main(int argc, char **argv) if (*p == ' ') col++; else if (*p == '\t') - col = ((col - 1) & ~7) + 9; + col = opt.tabsize * (1 + (col - 1) / opt.tabsize) + 1; else break; p++; } - if (col > ps.ind_size) - ps.ind_level = ps.i_l_follow = col / ps.ind_size; - } - if (troff) { - const char *p = in_name, - *beg = in_name; - - while (*p) - if (*p++ == '/') - beg = p; - fprintf(output, ".Fn \"%s\"\n", beg); + if (col > opt.ind_size) + ps.ind_level = ps.i_l_follow = col / opt.ind_size; } + /* * START OF MAIN LOOP */ while (1) { /* this is the main loop. it will go until we * reach eof */ - int is_procname; + int comment_buffered = false; - type_code = lexi(); /* lexi reads one token. The actual + type_code = lexi(&ps); /* lexi reads one token. The actual * characters read are stored in "token". lexi * returns a code indicating the type of token */ - is_procname = ps.procname[0]; /* - * The following code moves everything following an if (), while (), - * else, etc. up to the start of the following stmt to a buffer. This - * allows proper handling of both kinds of brace placement. + * The following code moves newlines and comments following an if (), + * while (), else, etc. up to the start of the following stmt to + * a buffer. This allows proper handling of both kinds of brace + * placement (-br, -bl) and cuddling "else" (-ce). */ - flushed_nl = false; - while (ps.search_brace) { /* if we scanned an if(), while(), - * etc., we might need to copy stuff - * into a buffer we must loop, copying - * stuff into save_com, until we find - * the start of the stmt which follows - * the if, or whatever */ + while (ps.search_brace) { switch (type_code) { case newline: - ++line_no; - flushed_nl = true; + if (sc_end == NULL) { + save_com = sc_buf; + save_com[0] = save_com[1] = ' '; + sc_end = &save_com[2]; + } + *sc_end++ = '\n'; + /* + * We may have inherited a force_nl == true from the previous + * token (like a semicolon). But once we know that a newline + * has been scanned in this loop, force_nl should be false. + * + * However, the force_nl == true must be preserved if newline + * is never scanned in this loop, so this assignment cannot be + * done earlier. + */ + force_nl = false; case form_feed: - break; /* form feeds and newlines found here will be - * ignored */ - - case lbrace: /* this is a brace that starts the compound - * stmt */ - if (sc_end == NULL) { /* ignore buffering if a comment wasn't - * stored up */ - ps.search_brace = false; - goto check_type; + break; + case comment: + if (sc_end == NULL) { + /* + * Copy everything from the start of the line, because + * pr_comment() will use that to calculate original + * indentation of a boxed comment. + */ + memcpy(sc_buf, in_buffer, buf_ptr - in_buffer - 4); + save_com = sc_buf + (buf_ptr - in_buffer - 4); + save_com[0] = save_com[1] = ' '; + sc_end = &save_com[2]; } - if (btype_2) { - save_com[0] = '{'; /* we either want to put the brace - * right after the if */ - goto sw_buffer; /* go to common code to get out of - * this loop */ + comment_buffered = true; + *sc_end++ = '/'; /* copy in start of comment */ + *sc_end++ = '*'; + for (;;) { /* loop until we get to the end of the comment */ + *sc_end = *buf_ptr++; + if (buf_ptr >= buf_end) + fill_buffer(); + if (*sc_end++ == '*' && *buf_ptr == '/') + break; /* we are at end of comment */ + if (sc_end >= &save_com[sc_size]) { /* check for temp buffer + * overflow */ + diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); + fflush(output); + exit(1); + } } /* FALLTHROUGH */ - case comment: /* we have a comment, so we must copy it into - * the buffer */ - if (!flushed_nl || sc_end != NULL) { - if (sc_end == NULL) { /* if this is the first comment, we - * must set up the buffer */ - save_com[0] = save_com[1] = ' '; - sc_end = &(save_com[2]); - } - else { - *sc_end++ = '\n'; /* add newline between - * comments */ - *sc_end++ = ' '; - --line_no; - } - *sc_end++ = '/'; /* copy in start of comment */ - *sc_end++ = '*'; - - for (;;) { /* loop until we get to the end of the comment */ - *sc_end = *buf_ptr++; - if (buf_ptr >= buf_end) + *sc_end++ = '/'; /* add ending slash */ + if (++buf_ptr >= buf_end) /* get past / in buffer */ + fill_buffer(); + break; + case lbrace: + /* + * Put KNF-style lbraces before the buffered up tokens and + * jump out of this loop in order to avoid copying the token + * again under the default case of the switch below. + */ + if (sc_end != NULL && opt.btype_2) { + save_com[0] = '{'; + /* + * Originally the lbrace may have been alone on its own + * line, but it will be moved into "the else's line", so + * if there was a newline resulting from the "{" before, + * it must be scanned now and ignored. + */ + while (isspace((unsigned char)*buf_ptr)) { + if (++buf_ptr >= buf_end) fill_buffer(); - - if (*sc_end++ == '*' && *buf_ptr == '/') - break; /* we are at end of comment */ - - if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer - * overflow */ - diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); - fflush(output); - exit(1); - } + if (*buf_ptr == '\n') + break; } - *sc_end++ = '/'; /* add ending slash */ - if (++buf_ptr >= buf_end) /* get past / in buffer */ - fill_buffer(); - break; + goto sw_buffer; } /* FALLTHROUGH */ default: /* it is the start of a normal statement */ - if (flushed_nl) /* if we flushed a newline, make sure it is - * put back */ - force_nl = true; - if ((type_code == sp_paren && *token == 'i' - && last_else && ps.else_if) - || (type_code == sp_nparen && *token == 'e' - && e_code != s_code && e_code[-1] == '}')) - force_nl = false; - - if (sc_end == NULL) { /* ignore buffering if comment wasn't - * saved up */ - ps.search_brace = false; - goto check_type; - } - if (force_nl) { /* if we should insert a nl here, put it into - * the buffer */ - force_nl = false; - --line_no; /* this will be re-increased when the nl is - * read from the buffer */ - *sc_end++ = '\n'; - *sc_end++ = ' '; - if (verbose && !flushed_nl) /* print error msg if the line - * was not already broken */ - diag2(0, "Line broken"); - flushed_nl = false; - } - for (t_ptr = token; *t_ptr; ++t_ptr) - *sc_end++ = *t_ptr; /* copy token into temp buffer */ - ps.procname[0] = 0; + { + int remove_newlines; + + remove_newlines = + /* "} else" */ + (type_code == sp_nparen && *token == 'e' && + e_code != s_code && e_code[-1] == '}') + /* "else if" */ + || (type_code == sp_paren && *token == 'i' && + last_else && opt.else_if); + if (remove_newlines) + force_nl = false; + if (sc_end == NULL) { /* ignore buffering if + * comment wasn't saved up */ + ps.search_brace = false; + goto check_type; + } + while (sc_end > save_com && isblank((unsigned char)sc_end[-1])) { + sc_end--; + } + if (opt.swallow_optional_blanklines || + (!comment_buffered && remove_newlines)) { + force_nl = !remove_newlines; + while (sc_end > save_com && sc_end[-1] == '\n') { + sc_end--; + } + } + if (force_nl) { /* if we should insert a nl here, put + * it into the buffer */ + force_nl = false; + --line_no; /* this will be re-increased when the + * newline is read from the buffer */ + *sc_end++ = '\n'; + *sc_end++ = ' '; + if (opt.verbose) /* print error msg if the line was + * not already broken */ + diag2(0, "Line broken"); + } + for (t_ptr = token; *t_ptr; ++t_ptr) + *sc_end++ = *t_ptr; - sw_buffer: - ps.search_brace = false; /* stop looking for start of + sw_buffer: + ps.search_brace = false; /* stop looking for start of * stmt */ - bp_save = buf_ptr; /* save current input buffer */ - be_save = buf_end; - buf_ptr = save_com; /* fix so that subsequent calls to + bp_save = buf_ptr; /* save current input buffer */ + be_save = buf_end; + buf_ptr = save_com; /* fix so that subsequent calls to * lexi will take tokens out of * save_com */ - *sc_end++ = ' ';/* add trailing blank, just in case */ - buf_end = sc_end; - sc_end = NULL; - break; + *sc_end++ = ' ';/* add trailing blank, just in case */ + buf_end = sc_end; + sc_end = NULL; + break; + } } /* end of switch */ - if (type_code != 0) /* we must make this check, just in case there - * was an unexpected EOF */ - type_code = lexi(); /* read another token */ - /* if (ps.search_brace) ps.procname[0] = 0; */ - if ((is_procname = ps.procname[0]) && flushed_nl - && !procnames_start_line && ps.in_decl - && type_code == ident) - flushed_nl = 0; + /* + * We must make this check, just in case there was an unexpected + * EOF. + */ + if (type_code != 0) { + /* + * The only intended purpose of calling lexi() below is to + * categorize the next token in order to decide whether to + * continue buffering forthcoming tokens. Once the buffering + * is over, lexi() will be called again elsewhere on all of + * the tokens - this time for normal processing. + * + * Calling it for this purpose is a bug, because lexi() also + * changes the parser state and discards leading whitespace, + * which is needed mostly for comment-related considerations. + * + * Work around the former problem by giving lexi() a copy of + * the current parser state and discard it if the call turned + * out to be just a look ahead. + * + * Work around the latter problem by copying all whitespace + * characters into the buffer so that the later lexi() call + * will read them. + */ + if (sc_end != NULL) { + while (*buf_ptr == ' ' || *buf_ptr == '\t') { + *sc_end++ = *buf_ptr++; + if (sc_end >= &save_com[sc_size]) { + errx(1, "input too long"); + } + } + if (buf_ptr >= buf_end) { + fill_buffer(); + } + } + transient_state = ps; + type_code = lexi(&transient_state); /* read another token */ + if (type_code != newline && type_code != form_feed && + type_code != comment && !transient_state.search_brace) { + ps = transient_state; + } + } } /* end of while (search_brace) */ last_else = 0; check_type: @@ -423,7 +463,7 @@ check_type: if (ps.tos > 1) /* check for balanced braces */ diag2(1, "Stuff missing from end of file"); - if (verbose) { + if (opt.verbose) { printf("There were %d output lines and %d comments\n", ps.out_lines, ps.out_coms); printf("(Lines with comments)/(Lines with code): %6.3f\n", @@ -439,11 +479,10 @@ check_type: (type_code != form_feed)) { if (force_nl && (type_code != semicolon) && - (type_code != lbrace || !btype_2)) { + (type_code != lbrace || !opt.btype_2)) { /* we should force a broken line here */ - if (verbose && !flushed_nl) + if (opt.verbose) diag2(0, "Line broken"); - flushed_nl = false; dump_line(); ps.want_blank = false; /* dont insert blank at line start */ force_nl = false; @@ -453,11 +492,12 @@ check_type: * '}' */ if (s_com != e_com) { /* the turkey has embedded a comment * in a line. fix it */ + int len = e_com - s_com; + + CHECK_SIZE_CODE(len + 3); *e_code++ = ' '; - for (t_ptr = s_com; *t_ptr; ++t_ptr) { - CHECK_SIZE_CODE; - *e_code++ = *t_ptr; - } + memcpy(e_code, s_com, len); + e_code += len; *e_code++ = ' '; *e_code = '\0'; /* null terminate code sect */ ps.want_blank = false; @@ -473,7 +513,10 @@ check_type: /*-----------------------------------------------------*\ | do switch on type of token scanned | \*-----------------------------------------------------*/ - CHECK_SIZE_CODE; + CHECK_SIZE_CODE(3); /* maximum number of increments of e_code + * before the next CHECK_SIZE_CODE or + * dump_line() is 2. After that there's the + * final increment for the null character. */ switch (type_code) { /* now, decide what to do with the token */ case form_feed: /* found a form feed in line */ @@ -484,7 +527,7 @@ check_type: case newline: if (ps.last_token != comma || ps.p_l_follow > 0 - || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) { + || !opt.leave_comma || ps.block_init || !break_comma || s_com != e_com) { dump_line(); ps.want_blank = false; } @@ -492,31 +535,34 @@ check_type: break; case lparen: /* got a '(' or '[' */ - ++ps.p_l_follow; /* count parens to make Healy happy */ - if (ps.want_blank && *token != '[' && - (ps.last_token != ident || proc_calls_space - || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon)))) + /* count parens to make Healy happy */ + if (++ps.p_l_follow == nitems(ps.paren_indents)) { + diag3(0, "Reached internal limit of %d unclosed parens", + nitems(ps.paren_indents)); + ps.p_l_follow--; + } + if (*token == '[') + /* not a function pointer declaration or a function call */; + else if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && + ps.procname[0] == '\0' && ps.paren_level == 0) { + /* function pointer declarations */ + indent_declaration(dec_ind, tabs_to_var); + ps.dumped_decl_indent = true; + } + else if (ps.want_blank && + ((ps.last_token != ident && ps.last_token != funcname) || + opt.proc_calls_space || + /* offsetof (1) is never allowed a space; sizeof (2) gets + * one iff -bs; all other keywords (>2) always get a space + * before lparen */ + ps.keyword + opt.Bill_Shannon > 2)) *e_code++ = ' '; - if (ps.in_decl && !ps.block_init) - if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) { - ps.dumped_decl_indent = 1; - sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); - e_code += strlen(e_code); - } - else { - while ((e_code - s_code) < dec_ind) { - CHECK_SIZE_CODE; - *e_code++ = ' '; - } - *e_code++ = token[0]; - } - else - *e_code++ = token[0]; - ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code; - if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent - && ps.paren_indents[0] < 2 * ps.ind_size) - ps.paren_indents[0] = 2 * ps.ind_size; ps.want_blank = false; + *e_code++ = token[0]; + ps.paren_indents[ps.p_l_follow - 1] = count_spaces_until(1, s_code, e_code) - 1; + if (sp_sw && ps.p_l_follow == 1 && opt.extra_expression_indent + && ps.paren_indents[0] < 2 * opt.ind_size) + ps.paren_indents[0] = 2 * opt.ind_size; if (ps.in_or_st && *token == '(' && ps.tos <= 2) { /* * this is a kluge to make sure that declarations will be @@ -527,19 +573,19 @@ check_type: ps.in_or_st = false; /* turn off flag for structure decl or * initialization */ } - if (ps.sizeof_keyword) - ps.sizeof_mask |= 1 << ps.p_l_follow; + /* parenthesized type following sizeof or offsetof is not a cast */ + if (ps.keyword == 1 || ps.keyword == 2) + ps.not_cast_mask |= 1 << ps.p_l_follow; break; case rparen: /* got a ')' or ']' */ - rparen_count--; - if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) { + if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) { ps.last_u_d = true; ps.cast_mask &= (1 << ps.p_l_follow) - 1; - ps.want_blank = false; + ps.want_blank = opt.space_after_cast; } else ps.want_blank = true; - ps.sizeof_mask &= (1 << ps.p_l_follow) - 1; + ps.not_cast_mask &= (1 << ps.p_l_follow) - 1; if (--ps.p_l_follow < 0) { ps.p_l_follow = 0; diag3(0, "Extra %c", *token); @@ -560,75 +606,48 @@ check_type: parse(hd_type); /* let parser worry about if, or whatever */ } - ps.search_brace = btype_2; /* this should insure that constructs - * such as main(){...} and int[]{...} - * have their braces put in the right - * place */ + ps.search_brace = opt.btype_2; /* this should ensure that + * constructs such as main(){...} + * and int[]{...} have their braces + * put in the right place */ break; case unary_op: /* this could be any unary operation */ - if (ps.want_blank) - *e_code++ = ' '; + if (!ps.dumped_decl_indent && ps.in_decl && !ps.block_init && + ps.procname[0] == '\0' && ps.paren_level == 0) { + /* pointer declarations */ - if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) { - sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); - ps.dumped_decl_indent = 1; - e_code += strlen(e_code); + /* + * if this is a unary op in a declaration, we should indent + * this token + */ + for (i = 0; token[i]; ++i) + /* find length of token */; + indent_declaration(dec_ind - i, tabs_to_var); + ps.dumped_decl_indent = true; } - else { - const char *res = token; - - if (ps.in_decl && !ps.block_init) { /* if this is a unary op - * in a declaration, we - * should indent this - * token */ - for (i = 0; token[i]; ++i); /* find length of token */ - while ((e_code - s_code) < (dec_ind - i)) { - CHECK_SIZE_CODE; - *e_code++ = ' '; /* pad it */ - } - } - if (troff && token[0] == '-' && token[1] == '>') - res = "\\(->"; - for (t_ptr = res; *t_ptr; ++t_ptr) { - CHECK_SIZE_CODE; - *e_code++ = *t_ptr; - } + else if (ps.want_blank) + *e_code++ = ' '; + + { + int len = e_token - s_token; + + CHECK_SIZE_CODE(len); + memcpy(e_code, token, len); + e_code += len; } ps.want_blank = false; break; case binary_op: /* any binary operation */ - if (ps.want_blank) - *e_code++ = ' '; { - const char *res = token; + int len = e_token - s_token; - if (troff) - switch (token[0]) { - case '<': - if (token[1] == '=') - res = "\\(<="; - break; - case '>': - if (token[1] == '=') - res = "\\(>="; - break; - case '!': - if (token[1] == '=') - res = "\\(!="; - break; - case '|': - if (token[1] == '|') - res = "\\(br\\(br"; - else if (token[1] == 0) - res = "\\(br"; - break; - } - for (t_ptr = res; *t_ptr; ++t_ptr) { - CHECK_SIZE_CODE; - *e_code++ = *t_ptr; /* move the operator */ - } + CHECK_SIZE_CODE(len + 1); + if (ps.want_blank) + *e_code++ = ' '; + memcpy(e_code, token, len); + e_code += len; } ps.want_blank = true; break; @@ -669,13 +688,19 @@ check_type: } ps.in_stmt = false; /* seeing a label does not imply we are in a * stmt */ - for (t_ptr = s_code; *t_ptr; ++t_ptr) - *e_lab++ = *t_ptr; /* turn everything so far into a label */ - e_code = s_code; - *e_lab++ = ':'; - *e_lab++ = ' '; - *e_lab = '\0'; - + /* + * turn everything so far into a label + */ + { + int len = e_code - s_code; + + CHECK_SIZE_LAB(len + 3); + memcpy(e_lab, s_code, len); + e_lab += len; + *e_lab++ = ':'; + *e_lab = '\0'; + e_code = s_code; + } force_nl = ps.pcase = scase; /* ps.pcase will be used by * dump_line to decide how to * indent the label. force_nl @@ -686,23 +711,25 @@ check_type: break; case semicolon: /* got a ';' */ - ps.in_or_st = false;/* we are not in an initialization or - * structure declaration */ + if (ps.dec_nest == 0) + ps.in_or_st = false;/* we are not in an initialization or + * structure declaration */ scase = false; /* these will only need resetting in an error */ squest = 0; - if (ps.last_token == rparen && rparen_count == 0) + if (ps.last_token == rparen) ps.in_parameter_declaration = 0; ps.cast_mask = 0; - ps.sizeof_mask = 0; + ps.not_cast_mask = 0; ps.block_init = 0; ps.block_init_level = 0; ps.just_saw_decl--; - if (ps.in_decl && s_code == e_code && !ps.block_init) - while ((e_code - s_code) < (dec_ind - 1)) { - CHECK_SIZE_CODE; - *e_code++ = ' '; - } + if (ps.in_decl && s_code == e_code && !ps.block_init && + !ps.dumped_decl_indent && ps.paren_level == 0) { + /* indent stray semicolons in declarations */ + indent_declaration(dec_ind - 1, tabs_to_var); + ps.dumped_decl_indent = true; + } ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level * structure declaration, we @@ -745,14 +772,14 @@ check_type: ps.block_init_level++; if (s_code != e_code && !ps.block_init) { - if (!btype_2) { + if (!opt.btype_2) { dump_line(); ps.want_blank = false; } else if (ps.in_parameter_declaration && !ps.in_or_st) { ps.i_l_follow = 0; - if (function_brace_split) { /* dump the line prior to the - * brace ... */ + if (opt.function_brace_split) { /* dump the line prior + * to the brace ... */ dump_line(); ps.want_blank = false; } else /* add a space between the decl and brace */ @@ -777,7 +804,12 @@ check_type: * with '{' */ if (ps.in_decl && ps.in_or_st) { /* this is either a structure * declaration or an init */ - di_stack[ps.dec_nest++] = dec_ind; + di_stack[ps.dec_nest] = dec_ind; + if (++ps.dec_nest == nitems(di_stack)) { + diag3(0, "Reached internal limit of %d struct levels", + nitems(di_stack)); + ps.dec_nest--; + } /* ? dec_ind = 0; */ } else { @@ -785,10 +817,11 @@ check_type: * a declaration, so don't do * special indentation of * comments */ - if (blanklines_after_declarations_at_proctop + if (opt.blanklines_after_declarations_at_proctop && ps.in_parameter_declaration) postfix_blankline_requested = 1; ps.in_parameter_declaration = 0; + ps.in_decl = false; } dec_ind = 0; parse(lbrace); /* let parser know about this */ @@ -814,7 +847,7 @@ check_type: ps.block_init_level--; if (s_code != e_code && !ps.block_init) { /* '}' must be first on * line */ - if (verbose) + if (opt.verbose) diag2(0, "Line broken"); dump_line(); } @@ -830,9 +863,9 @@ check_type: } prefix_blankline_requested = 0; parse(rbrace); /* let parser know about this */ - ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead + ps.search_brace = opt.cuddle_else && ps.p_stack[ps.tos] == ifhead && ps.il[ps.tos] >= ps.ind_level; - if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0) + if (ps.tos <= 1 && opt.blanklines_after_procs && ps.dec_nest <= 0) postfix_blankline_requested = 1; break; @@ -856,8 +889,8 @@ check_type: case sp_nparen: /* got else, do */ ps.in_stmt = false; if (*token == 'e') { - if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) { - if (verbose) + if (e_code != s_code && (!opt.cuddle_else || e_code[-1] != '}')) { + if (opt.verbose) diag2(0, "Line broken"); dump_line();/* make sure this starts a line */ ps.want_blank = false; @@ -868,7 +901,7 @@ check_type: } else { if (e_code != s_code) { /* make sure this starts a line */ - if (verbose) + if (opt.verbose) diag2(0, "Line broken"); dump_line(); ps.want_blank = false; @@ -879,23 +912,30 @@ check_type: } goto copy_id; /* move the token into line */ - case decl: /* we have a declaration type (int, register, - * etc.) */ + case type_def: + case storage: + prefix_blankline_requested = 0; + goto copy_id; + + case structure: + if (ps.p_l_follow > 0) + goto copy_id; + /* FALLTHROUGH */ + case decl: /* we have a declaration type (int, etc.) */ parse(decl); /* let parser worry about indentation */ if (ps.last_token == rparen && ps.tos <= 1) { - ps.in_parameter_declaration = 1; if (s_code != e_code) { dump_line(); ps.want_blank = 0; } } - if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) { + if (ps.in_parameter_declaration && opt.indent_parameters && ps.dec_nest == 0) { ps.ind_level = ps.i_l_follow = 1; ps.ind_stmt = 0; } ps.in_or_st = true; /* this might be a structure or initialization * declaration */ - ps.in_decl = ps.decl_on_line = true; + ps.in_decl = ps.decl_on_line = ps.last_token != type_def; if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) ps.just_saw_decl = 2; prefix_blankline_requested = 0; @@ -903,72 +943,35 @@ check_type: if (ps.ind_level == 0 || ps.dec_nest > 0) { /* global variable or struct member in local variable */ - dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i; - tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0); + dec_ind = opt.decl_indent > 0 ? opt.decl_indent : i; + tabs_to_var = (opt.use_tabs ? opt.decl_indent > 0 : 0); } else { /* local variable */ - dec_ind = ps.local_decl_indent > 0 ? ps.local_decl_indent : i; - tabs_to_var = (use_tabs ? ps.local_decl_indent > 0 : 0); + dec_ind = opt.local_decl_indent > 0 ? opt.local_decl_indent : i; + tabs_to_var = (opt.use_tabs ? opt.local_decl_indent > 0 : 0); } goto copy_id; + case funcname: case ident: /* got an identifier or constant */ - if (ps.in_decl) { /* if we are in a declaration, we must indent - * identifier */ - if (is_procname == 0 || !procnames_start_line) { - if (!ps.block_init) { - if (troff && !ps.dumped_decl_indent) { - if (ps.want_blank) - *e_code++ = ' '; - ps.want_blank = false; - sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7); - ps.dumped_decl_indent = 1; - e_code += strlen(e_code); - } else { - int cur_dec_ind; - int pos, startpos; - - /* - * in order to get the tab math right for - * indentations that are not multiples of 8 we - * need to modify both startpos and dec_ind - * (cur_dec_ind) here by eight minus the - * remainder of the current starting column - * divided by eight. This seems to be a - * properly working fix - */ - startpos = e_code - s_code; - cur_dec_ind = dec_ind; - pos = startpos; - if ((ps.ind_level * ps.ind_size) % 8 != 0) { - pos += (ps.ind_level * ps.ind_size) % 8; - cur_dec_ind += (ps.ind_level * ps.ind_size) % 8; - } - - if (tabs_to_var) { - while ((pos & ~7) + 8 <= cur_dec_ind) { - CHECK_SIZE_CODE; - *e_code++ = '\t'; - pos = (pos & ~7) + 8; - } - } - while (pos < cur_dec_ind) { - CHECK_SIZE_CODE; - *e_code++ = ' '; - pos++; - } - if (ps.want_blank && e_code - s_code == startpos) - *e_code++ = ' '; - ps.want_blank = false; - } + if (ps.in_decl) { + if (type_code == funcname) { + ps.in_decl = false; + if (opt.procnames_start_line && s_code != e_code) { + *e_code = '\0'; + dump_line(); } - } else { - if (ps.want_blank) + else if (ps.want_blank) { *e_code++ = ' '; + } + ps.want_blank = false; + } + else if (!ps.block_init && !ps.dumped_decl_indent && + ps.paren_level == 0) { /* if we are in a declaration, we + * must indent identifier */ + indent_declaration(dec_ind, tabs_to_var); + ps.dumped_decl_indent = true; ps.want_blank = false; - if (dec_ind && s_code != e_code) - dump_line(); - dec_ind = 0; } } else if (sp_sw && ps.p_l_follow == 0) { @@ -979,23 +982,30 @@ check_type: parse(hd_type); } copy_id: - if (ps.want_blank) - *e_code++ = ' '; - if (troff && ps.its_a_keyword) { - e_code = chfont(&bodyf, &keywordf, e_code); - for (t_ptr = token; *t_ptr; ++t_ptr) { - CHECK_SIZE_CODE; - *e_code++ = keywordf.allcaps && islower(*t_ptr) - ? toupper(*t_ptr) : *t_ptr; - } - e_code = chfont(&keywordf, &bodyf, e_code); + { + int len = e_token - s_token; + + CHECK_SIZE_CODE(len + 1); + if (ps.want_blank) + *e_code++ = ' '; + memcpy(e_code, s_token, len); + e_code += len; } - else - for (t_ptr = token; *t_ptr; ++t_ptr) { - CHECK_SIZE_CODE; - *e_code++ = *t_ptr; - } - ps.want_blank = true; + if (type_code != funcname) + ps.want_blank = true; + break; + + case strpfx: + { + int len = e_token - s_token; + + CHECK_SIZE_CODE(len + 1); + if (ps.want_blank) + *e_code++ = ' '; + memcpy(e_code, token, len); + e_code += len; + } + ps.want_blank = false; break; case period: /* treat a period kind of like a binary @@ -1008,17 +1018,19 @@ check_type: ps.want_blank = (s_code != e_code); /* only put blank after comma * if comma does not start the * line */ - if (ps.in_decl && is_procname == 0 && !ps.block_init) - while ((e_code - s_code) < (dec_ind - 1)) { - CHECK_SIZE_CODE; - *e_code++ = ' '; - } - + if (ps.in_decl && ps.procname[0] == '\0' && !ps.block_init && + !ps.dumped_decl_indent && ps.paren_level == 0) { + /* indent leading commas and not the actual identifiers */ + indent_declaration(dec_ind - 1, tabs_to_var); + ps.dumped_decl_indent = true; + } *e_code++ = ','; if (ps.p_l_follow == 0) { if (ps.block_init_level <= 0) ps.block_init = 0; - if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8)) + if (break_comma && (!opt.leave_comma || + count_spaces_until(compute_code_target(), s_code, e_code) > + opt.max_col - opt.tabsize)) force_nl = true; } break; @@ -1028,6 +1040,7 @@ check_type: (s_lab != e_lab) || (s_code != e_code)) dump_line(); + CHECK_SIZE_LAB(1); *e_lab++ = '#'; /* move whole line to 'label' buffer */ { int in_comment = 0; @@ -1041,14 +1054,12 @@ check_type: fill_buffer(); } while (*buf_ptr != '\n' || (in_comment && !had_eof)) { - CHECK_SIZE_LAB; + CHECK_SIZE_LAB(2); *e_lab = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); switch (*e_lab++) { case BACKSLASH: - if (troff) - *e_lab++ = BACKSLASH; if (!in_comment) { *e_lab++ = *buf_ptr++; if (buf_ptr >= buf_end) @@ -1084,19 +1095,21 @@ check_type: e_lab--; if (e_lab - s_lab == com_end && bp_save == NULL) { /* comment on preprocessor line */ - if (sc_end == NULL) /* if this is the first comment, we - * must set up the buffer */ - sc_end = &(save_com[0]); + if (sc_end == NULL) { /* if this is the first comment, + * we must set up the buffer */ + save_com = sc_buf; + sc_end = &save_com[0]; + } else { *sc_end++ = '\n'; /* add newline between * comments */ *sc_end++ = ' '; --line_no; } - bcopy(s_lab + com_start, sc_end, com_end - com_start); + if (sc_end - save_com + com_end - com_start > sc_size) + errx(1, "input too long"); + memmove(sc_end, s_lab + com_start, com_end - com_start); sc_end += com_end - com_start; - if (sc_end >= &save_com[sc_size]) - abort(); e_lab = s_lab + com_start; while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) e_lab--; @@ -1109,62 +1122,66 @@ check_type: buf_end = sc_end; sc_end = NULL; } + CHECK_SIZE_LAB(1); *e_lab = '\0'; /* null terminate line */ ps.pcase = false; } - if (strncmp(s_lab, "#if", 3) == 0) { - if (blanklines_around_conditional_compilation) { - int c; - prefix_blankline_requested++; - while ((c = getc(input)) == '\n'); - ungetc(c, input); - } - if ((size_t)ifdef_level < sizeof(state_stack)/sizeof(state_stack[0])) { + if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */ + if ((size_t)ifdef_level < nitems(state_stack)) { match_state[ifdef_level].tos = -1; state_stack[ifdef_level++] = ps; } else diag2(1, "#if stack overflow"); } - else if (strncmp(s_lab, "#else", 5) == 0) + else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */ if (ifdef_level <= 0) - diag2(1, "Unmatched #else"); + diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); else { match_state[ifdef_level - 1] = ps; ps = state_stack[ifdef_level - 1]; } + } else if (strncmp(s_lab, "#endif", 6) == 0) { if (ifdef_level <= 0) diag2(1, "Unmatched #endif"); - else { + else ifdef_level--; - -#ifdef undef - /* - * This match needs to be more intelligent before the - * message is useful - */ - if (match_state[ifdef_level].tos >= 0 - && bcmp(&ps, &match_state[ifdef_level], sizeof ps)) - diag2(0, "Syntactically inconsistent #ifdef alternatives"); -#endif + } else { + struct directives { + int size; + const char *string; } - if (blanklines_around_conditional_compilation) { - postfix_blankline_requested++; - n_real_blanklines = 0; + recognized[] = { + {7, "include"}, + {6, "define"}, + {5, "undef"}, + {4, "line"}, + {5, "error"}, + {6, "pragma"} + }; + int d = nitems(recognized); + while (--d >= 0) + if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0) + break; + if (d < 0) { + diag2(1, "Unrecognized cpp directive"); + break; } } + if (opt.blanklines_around_conditional_compilation) { + postfix_blankline_requested++; + n_real_blanklines = 0; + } + else { + postfix_blankline_requested = 0; + prefix_blankline_requested = 0; + } break; /* subsequent processing of the newline * character will cause the line to be printed */ case comment: /* we have gotten a / followed by * this is a biggie */ - if (flushed_nl) { /* we should force a broken line here */ - flushed_nl = false; - dump_line(); - ps.want_blank = false; /* dont insert blank at line start */ - force_nl = false; - } pr_comment(); break; } /* end of big switch stmt */ @@ -1194,13 +1211,13 @@ bakcopy(void) p--; if (*p == '/') p++; - sprintf(bakfile, "%s.BAK", p); + sprintf(bakfile, "%s%s", p, simple_backup_suffix); /* copy in_name to backup file */ bakchn = creat(bakfile, 0600); if (bakchn < 0) err(1, "%s", bakfile); - while ((n = read(fileno(input), buff, sizeof buff)) != 0) + while ((n = read(fileno(input), buff, sizeof(buff))) > 0) if (write(bakchn, buff, n) != n) err(1, "%s", bakfile); if (n < 0) @@ -1219,3 +1236,36 @@ bakcopy(void) err(1, "%s", in_name); } } + +static void +indent_declaration(int cur_dec_ind, int tabs_to_var) +{ + int pos = e_code - s_code; + char *startpos = e_code; + + /* + * get the tab math right for indentations that are not multiples of tabsize + */ + if ((ps.ind_level * opt.ind_size) % opt.tabsize != 0) { + pos += (ps.ind_level * opt.ind_size) % opt.tabsize; + cur_dec_ind += (ps.ind_level * opt.ind_size) % opt.tabsize; + } + if (tabs_to_var) { + int tpos; + + CHECK_SIZE_CODE(cur_dec_ind / opt.tabsize); + while ((tpos = opt.tabsize * (1 + pos / opt.tabsize)) <= cur_dec_ind) { + *e_code++ = '\t'; + pos = tpos; + } + } + CHECK_SIZE_CODE(cur_dec_ind - pos + 1); + while (pos < cur_dec_ind) { + *e_code++ = ' '; + pos++; + } + if (e_code == startpos && ps.want_blank) { + *e_code++ = ' '; + ps.want_blank = false; + } +} diff --git a/usr.bin/indent/indent.h b/usr.bin/indent/indent.h index b329ec9583..cd14918d32 100644 --- a/usr.bin/indent/indent.h +++ b/usr.bin/indent/indent.h @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2001 Jens Schweikhardt * All rights reserved. * @@ -23,23 +25,24 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/usr.bin/indent/indent.h,v 1.2 2002/03/30 17:10:20 dwmalone Exp $ + * $FreeBSD: head/usr.bin/indent/indent.h 336333 2018-07-16 05:46:50Z pstef $ */ -void addkey(char *, int); +void add_typename(const char *); +void alloc_typenames(void); int compute_code_target(void); int compute_label_target(void); int count_spaces(int, char *); -int lexi(void); +int count_spaces_until(int, char *, char *); +void init_constant_tt(void); +int lexi(struct parser_state *); void diag2(int, const char *); void diag3(int, const char *, int); void diag4(int, const char *, int, int); void dump_line(void); void fill_buffer(void); void parse(int); -void parsefont(struct fstate *, const char *); void pr_comment(void); void set_defaults(void); void set_option(char *); -void set_profile(void); -void writefdef(struct fstate *f, int); +void set_profile(const char *); diff --git a/usr.bin/indent/indent_codes.h b/usr.bin/indent/indent_codes.h index 008bf2b41a..e17f78a759 100644 --- a/usr.bin/indent/indent_codes.h +++ b/usr.bin/indent/indent_codes.h @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. @@ -33,7 +35,7 @@ * SUCH DAMAGE. * * @(#)indent_codes.h 8.1 (Berkeley) 6/6/93 - * $FreeBSD: src/usr.bin/indent/indent_codes.h,v 1.2 2000/12/09 09:52:51 obrien Exp $ + * $FreeBSD: head/usr.bin/indent/indent_codes.h 334564 2018-06-03 16:21:15Z pstef $ */ #define newline 1 @@ -68,3 +70,8 @@ #define ifhead 30 #define elsehead 31 #define period 32 +#define strpfx 33 +#define storage 34 +#define funcname 35 +#define type_def 36 +#define structure 37 diff --git a/usr.bin/indent/indent_globs.h b/usr.bin/indent/indent_globs.h index 3ff82fa39a..14ee20e536 100644 --- a/usr.bin/indent/indent_globs.h +++ b/usr.bin/indent/indent_globs.h @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. @@ -33,7 +35,7 @@ * SUCH DAMAGE. * * @(#)indent_globs.h 8.1 (Berkeley) 6/6/93 - * $FreeBSD: src/usr.bin/indent/indent_globs.h,v 1.12 2010/03/31 17:05:30 avg Exp $ + * $FreeBSD: head/usr.bin/indent/indent_globs.h 337651 2018-08-11 19:20:06Z pstef $ */ #define BACKSLASH '\\' @@ -42,10 +44,6 @@ #define label_offset 2 /* number of levels a label is placed to left * of code */ -#define tabsize 8 /* the size of a tab */ -#define tabmask 0177770 /* mask used when figuring length of lines - * with tabs */ - #define false 0 #define true 1 @@ -54,43 +52,54 @@ FILE *input; /* the fid for the input file */ FILE *output; /* the output file */ -#define CHECK_SIZE_CODE \ - if (e_code >= l_code) { \ - int nsize = l_code-s_code+400; \ +#define CHECK_SIZE_CODE(desired_size) \ + if (e_code + (desired_size) >= l_code) { \ + int nsize = l_code-s_code + 400 + desired_size; \ + int code_len = e_code-s_code; \ codebuf = (char *) realloc(codebuf, nsize); \ if (codebuf == NULL) \ err(1, NULL); \ - e_code = codebuf + (e_code-s_code) + 1; \ + e_code = codebuf + code_len + 1; \ l_code = codebuf + nsize - 5; \ s_code = codebuf + 1; \ } -#define CHECK_SIZE_COM \ - if (e_com >= l_com) { \ - int nsize = l_com-s_com+400; \ +#define CHECK_SIZE_COM(desired_size) \ + if (e_com + (desired_size) >= l_com) { \ + int nsize = l_com-s_com + 400 + desired_size; \ + int com_len = e_com - s_com; \ + int blank_pos; \ + if (last_bl != NULL) \ + blank_pos = last_bl - combuf; \ + else \ + blank_pos = -1; \ combuf = (char *) realloc(combuf, nsize); \ if (combuf == NULL) \ err(1, NULL); \ - e_com = combuf + (e_com-s_com) + 1; \ + e_com = combuf + com_len + 1; \ + if (blank_pos > 0) \ + last_bl = combuf + blank_pos; \ l_com = combuf + nsize - 5; \ s_com = combuf + 1; \ } -#define CHECK_SIZE_LAB \ - if (e_lab >= l_lab) { \ - int nsize = l_lab-s_lab+400; \ +#define CHECK_SIZE_LAB(desired_size) \ + if (e_lab + (desired_size) >= l_lab) { \ + int nsize = l_lab-s_lab + 400 + desired_size; \ + int label_len = e_lab - s_lab; \ labbuf = (char *) realloc(labbuf, nsize); \ if (labbuf == NULL) \ err(1, NULL); \ - e_lab = labbuf + (e_lab-s_lab) + 1; \ + e_lab = labbuf + label_len + 1; \ l_lab = labbuf + nsize - 5; \ s_lab = labbuf + 1; \ } -#define CHECK_SIZE_TOKEN \ - if (e_token >= l_token) { \ - int nsize = l_token-s_token+400; \ +#define CHECK_SIZE_TOKEN(desired_size) \ + if (e_token + (desired_size) >= l_token) { \ + int nsize = l_token-s_token + 400 + desired_size; \ + int token_len = e_token - s_token; \ tokenbuf = (char *) realloc(tokenbuf, nsize); \ if (tokenbuf == NULL) \ err(1, NULL); \ - e_token = tokenbuf + (e_token-s_token) + 1; \ + e_token = tokenbuf + token_len + 1; \ l_token = tokenbuf + nsize - 5; \ s_token = tokenbuf + 1; \ } @@ -122,8 +131,9 @@ char *buf_ptr; /* ptr to next character to be taken from * in_buffer */ char *buf_end; /* ptr to first after last char in in_buffer */ -char save_com[sc_size]; /* input text is saved here when looking for +char sc_buf[sc_size]; /* input text is saved here when looking for * the brace after an if, while, etc */ +char *save_com; /* start of the comment stored in sc_buf */ char *sc_end; /* pointer into save_com buffer */ char *bp_save; /* saved value of buf_ptr when taking input @@ -131,117 +141,119 @@ char *bp_save; /* saved value of buf_ptr when taking input char *be_save; /* similarly saved value of buf_end */ +struct options { + int blanklines_around_conditional_compilation; + int blanklines_after_declarations_at_proctop; /* this is vaguely + * similar to blanklines_after_decla except + * that in only applies to the first set of + * declarations in a procedure (just after + * the first '{') and it causes a blank line + * to be generated even if there are no + * declarations */ + int blanklines_after_declarations; + int blanklines_after_procs; + int blanklines_before_blockcomments; + int leave_comma; /* if true, never break declarations after + * commas */ + int btype_2; /* when true, brace should be on same line + * as if, while, etc */ + int Bill_Shannon; /* true iff a blank should always be + * inserted after sizeof */ + int comment_delimiter_on_blankline; + int decl_com_ind; /* the column in which comments after + * declarations should be put */ + int cuddle_else; /* true if else should cuddle up to '}' */ + int continuation_indent; /* set to the indentation between the + * edge of code and continuation lines */ + float case_indent; /* The distance to indent case labels from the + * switch statement */ + int com_ind; /* the column in which comments to the right + * of code should start */ + int decl_indent; /* column to indent declared identifiers to */ + int ljust_decl; /* true if declarations should be left + * justified */ + int unindent_displace; /* comments not to the right of code + * will be placed this many + * indentation levels to the left of + * code */ + int extra_expression_indent; /* true if continuation lines from + * the expression part of "if(e)", + * "while(e)", "for(e;e;e)" should be + * indented an extra tab stop so that they + * don't conflict with the code that follows */ + int else_if; /* True iff else if pairs should be handled + * specially */ + int function_brace_split; /* split function declaration and + * brace onto separate lines */ + int format_col1_comments; /* If comments which start in column 1 + * are to be magically reformatted (just + * like comments that begin in later columns) */ + int format_block_comments; /* true if comments beginning with + * `/ * \n' are to be reformatted */ + int indent_parameters; + int ind_size; /* the size of one indentation level */ + int block_comment_max_col; + int local_decl_indent; /* like decl_indent but for locals */ + int lineup_to_parens_always; /* if true, do not attempt to keep + * lined-up code within the margin */ + int lineup_to_parens; /* if true, continued code within parens + * will be lined up to the open paren */ + int proc_calls_space; /* If true, procedure calls look like: + * foo (bar) rather than foo(bar) */ + int procnames_start_line; /* if true, the names of procedures + * being defined get placed in column 1 (ie. + * a newline is placed between the type of + * the procedure and its name) */ + int space_after_cast; /* "b = (int) a" vs "b = (int)a" */ + int star_comment_cont; /* true iff comment continuation lines + * should have stars at the beginning of + * each line. */ + int swallow_optional_blanklines; + int auto_typedefs; /* set true to recognize identifiers + * ending in "_t" like typedefs */ + int tabsize; /* the size of a tab */ + int max_col; /* the maximum allowable line length */ + int use_tabs; /* set true to use tabs for spacing, false + * uses all spaces */ + int verbose; /* when true, non-essential error messages + * are printed */ +} opt; + int found_err; -int pointer_as_binop; -int blanklines_after_declarations; -int blanklines_before_blockcomments; -int blanklines_after_procs; -int blanklines_around_conditional_compilation; -int swallow_optional_blanklines; int n_real_blanklines; int prefix_blankline_requested; int postfix_blankline_requested; int break_comma; /* when true and not in parens, break after a * comma */ -int btype_2; /* when true, brace should be on same line as - * if, while, etc */ float case_ind; /* indentation level to be used for a "case * n:" */ int code_lines; /* count of lines with code */ int had_eof; /* set to true when input is exhausted */ int line_no; /* the current line number. */ -int max_col; /* the maximum allowable line length */ -int verbose; /* when true, non-essential error messages are - * printed */ -int cuddle_else; /* true if else should cuddle up to '}' */ -int star_comment_cont; /* true iff comment continuation lines should - * have stars at the beginning of each line. */ -int comment_delimiter_on_blankline; -int troff; /* true iff were generating troff input */ -int procnames_start_line; /* if true, the names of procedures - * being defined get placed in column - * 1 (ie. a newline is placed between - * the type of the procedure and its - * name) */ -int proc_calls_space; /* If true, procedure calls look like: - * foo(bar) rather than foo (bar) */ -int format_block_comments; /* true if comments beginning with - * `/ * \n' are to be reformatted */ -int format_col1_comments; /* If comments which start in column 1 - * are to be magically reformatted - * (just like comments that begin in - * later columns) */ int inhibit_formatting; /* true if INDENT OFF is in effect */ int suppress_blanklines;/* set iff following blanklines should be * suppressed */ -int continuation_indent;/* set to the indentation between the edge of - * code and continuation lines */ -int lineup_to_parens; /* if true, continued code within parens will - * be lined up to the open paren */ -int Bill_Shannon; /* true iff a blank should always be inserted - * after sizeof */ -int blanklines_after_declarations_at_proctop; /* This is vaguely - * similar to - * blanklines_after_decla - * rations except that - * it only applies to - * the first set of - * declarations in a - * procedure (just after - * the first '{') and it - * causes a blank line - * to be generated even - * if there are no - * declarations */ -int block_comment_max_col; -int extra_expression_indent; /* true if continuation lines from the - * expression part of "if(e)", - * "while(e)", "for(e;e;e)" should be - * indented an extra tab stop so that - * they don't conflict with the code - * that follows */ -int function_brace_split; /* split function declaration and - * brace onto separate lines */ -int use_tabs; /* set true to use tabs for spacing, - * false uses all spaces */ -int auto_typedefs; /* set true to recognize identifiers - * ending in "_t" like typedefs */ - -/* -troff font state information */ -struct fstate { - char font[4]; - char size; - int allcaps:1; -}; -char *chfont(struct fstate *, struct fstate *, char *); - -struct fstate - keywordf, /* keyword font */ - stringf, /* string font */ - boxcomf, /* Box comment font */ - blkcomf, /* Block comment font */ - scomf, /* Same line comment font */ - bodyf; /* major body font */ - - -#define STACKSIZE 150 +#define STACKSIZE 256 struct parser_state { int last_token; - struct fstate cfont; /* Current font */ int p_stack[STACKSIZE]; /* this is the parsers stack */ int il[STACKSIZE]; /* this stack stores indentation levels */ float cstk[STACKSIZE];/* used to store case stmt indentation levels */ int box_com; /* set to true when we are in a "boxed" * comment. In that case, the first non-blank * char should be lined up with the / in / followed by * */ - int comment_delta, - n_comment_delta; - int cast_mask; /* indicates which close parens close off - * casts */ - int sizeof_mask; /* indicates which close parens close off - * sizeof''s */ + int comment_delta; /* used to set up indentation for all lines + * of a boxed comment after the first one */ + int n_comment_delta;/* remembers how many columns there were + * before the start of a box comment so that + * forthcoming lines of the comment are + * indented properly */ + int cast_mask; /* indicates which close parens potentially + * close off casts */ + int not_cast_mask; /* indicates which close parens definitely + * close off something else than casts */ int block_init; /* true iff inside a block initialization */ int block_init_level; /* The level of brace nesting in an * initialization */ @@ -257,13 +269,9 @@ struct parser_state { * column 1 */ int com_col; /* this is the column in which the current * comment should start */ - int com_ind; /* the column in which comments to the right - * of code should start */ int com_lines; /* the number of lines with comments, set by * dump_line */ int dec_nest; /* current nesting level for structure or init */ - int decl_com_ind; /* the column in which comments after - * declarations should be put */ int decl_on_line; /* set to true if this line of code has part * of a declaration on it */ int i_l_follow; /* the level to which ind_level should be set @@ -273,16 +281,11 @@ struct parser_state { * slightly different */ int in_stmt; /* set to 1 while in a stmt */ int ind_level; /* the current indentation level */ - int ind_size; /* the size of one indentation level */ int ind_stmt; /* set to 1 if next line should have an extra * indentation level because we are in the * middle of a stmt */ int last_u_d; /* set to true after scanning a token which * forces a following operator to be unary */ - int leave_comma; /* if true, never break declarations after - * commas */ - int ljust_decl; /* true if declarations should be left - * justified */ int out_coms; /* the number of comments processed, set by * pr_comment */ int out_lines; /* the number of lines written, set by @@ -298,32 +301,19 @@ struct parser_state { int search_brace; /* set to true by parse when it is necessary * to buffer up all info up to the start of a * stmt after an if, while, etc */ - int unindent_displace; /* comments not to the right of code - * will be placed this many - * indentation levels to the left of - * code */ int use_ff; /* set to one if the current line should be * terminated with a form feed */ int want_blank; /* set to true when the following token should * be prefixed by a blank. (Said prefixing is * ignored in some cases.) */ - int else_if; /* True iff else if pairs should be handled - * specially */ - int decl_indent; /* column to indent declared identifiers to */ - int local_decl_indent; /* like decl_indent but for locals */ - int its_a_keyword; - int sizeof_keyword; + int keyword; /* the type of a keyword or 0 */ int dumped_decl_indent; - float case_indent; /* The distance to indent case labels from the - * switch statement */ int in_parameter_declaration; - int indent_parameters; int tos; /* pointer to top of stack */ char procname[100]; /* The name of the current procedure */ int just_saw_decl; } ps; int ifdef_level; -int rparen_count; struct parser_state state_stack[5]; struct parser_state match_state[5]; diff --git a/usr.bin/indent/io.c b/usr.bin/indent/io.c index 34ea098216..621ea13d6e 100644 --- a/usr.bin/indent/io.c +++ b/usr.bin/indent/io.c @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. @@ -29,7 +31,7 @@ * SUCH DAMAGE. * * @(#)io.c 8.1 (Berkeley) 6/6/93 - * $FreeBSD: src/usr.bin/indent/io.c,v 1.15 2005/11/13 20:37:25 dwmalone Exp $ + * $FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $ */ #include @@ -56,13 +58,6 @@ dump_line(void) static int not_first_line; if (ps.procname[0]) { - if (troff) { - if (comment_open) { - comment_open = 0; - fprintf(output, ".*/\n"); - } - fprintf(output, ".Pr \"%s\"\n", ps.procname); - } ps.ind_level = 0; ps.procname[0] = 0; } @@ -78,7 +73,7 @@ dump_line(void) suppress_blanklines = 0; ps.bl_line = false; if (prefix_blankline_requested && not_first_line) { - if (swallow_optional_blanklines) { + if (opt.swallow_optional_blanklines) { if (n_real_blanklines == 1) n_real_blanklines = 0; } @@ -106,6 +101,7 @@ dump_line(void) } while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) e_lab--; + *e_lab = '\0'; cur_col = pad_output(1, compute_label_target()); if (s_lab[0] == '#' && (strncmp(s_lab, "#else", 5) == 0 || strncmp(s_lab, "#endif", 6) == 0)) { @@ -150,109 +146,42 @@ dump_line(void) putc(*p, output); cur_col = count_spaces(cur_col, s_code); } - if (s_com != e_com) { - if (troff) { - int all_here = 0; - char *p; - - if (e_com[-1] == '/' && e_com[-2] == '*') - e_com -= 2, all_here++; - while (e_com > s_com && e_com[-1] == ' ') - e_com--; - *e_com = 0; - p = s_com; - while (*p == ' ') - p++; - if (p[0] == '/' && p[1] == '*') - p += 2, all_here++; - else if (p[0] == '*') - p += p[1] == '/' ? 2 : 1; - while (*p == ' ') - p++; - if (*p == 0) - goto inhibit_newline; - if (comment_open < 2 && ps.box_com) { - comment_open = 0; - fprintf(output, ".*/\n"); - } - if (comment_open == 0) { - if ('a' <= *p && *p <= 'z') - *p = *p + 'A' - 'a'; - if (e_com - p < 50 && all_here == 2) { - char *follow = p; - fprintf(output, "\n.nr C! \\w\1"); - while (follow < e_com) { - switch (*follow) { - case '\n': - putc(' ', output); - case 1: - break; - case '\\': - putc('\\', output); - /* FALLTHROUGH */ - default: - putc(*follow, output); - } - follow++; - } - putc(1, output); - } - fprintf(output, "\n./* %dp %d %dp\n", - ps.com_col * 7, - (s_code != e_code || s_lab != e_lab) - ps.box_com, - target_col * 7); - } - comment_open = 1 + ps.box_com; - while (*p) { - if (*p == BACKSLASH) - putc(BACKSLASH, output); - putc(*p++, output); + if (s_com != e_com) { /* print comment, if any */ + int target = ps.com_col; + char *com_st = s_com; + + target += ps.comment_delta; + while (*com_st == '\t') /* consider original indentation in + * case this is a box comment */ + com_st++, target += opt.tabsize; + while (target <= 0) + if (*com_st == ' ') + target++, com_st++; + else if (*com_st == '\t') { + target = opt.tabsize * (1 + (target - 1) / opt.tabsize) + 1; + com_st++; } + else + target = 1; + if (cur_col > target) { /* if comment can't fit on this line, + * put it on next line */ + putc('\n', output); + cur_col = 1; + ++ps.out_lines; } - else { /* print comment, if any */ - int target = ps.com_col; - char *com_st = s_com; - - target += ps.comment_delta; - while (*com_st == '\t') - com_st++, target += 8; /* ? */ - while (target <= 0) - if (*com_st == ' ') - target++, com_st++; - else if (*com_st == '\t') - target = ((target - 1) & ~7) + 9, com_st++; - else - target = 1; - if (cur_col > target) { /* if comment cant fit on this line, - * put it on next line */ - putc('\n', output); - cur_col = 1; - ++ps.out_lines; - } - while (e_com > com_st && isspace(e_com[-1])) - e_com--; - cur_col = pad_output(cur_col, target); - if (!ps.box_com) { - if (star_comment_cont && (com_st[1] != '*' || e_com <= com_st + 1)) { - if (com_st[1] == ' ' && com_st[0] == ' ' && e_com > com_st + 1) - com_st[1] = '*'; - else - fwrite(" * ", com_st[0] == '\t' ? 2 : com_st[0] == '*' ? 1 : 3, 1, output); - } - } - fwrite(com_st, e_com - com_st, 1, output); - ps.comment_delta = ps.n_comment_delta; - cur_col = count_spaces(cur_col, com_st); - ++ps.com_lines; /* count lines with comments */ - } + while (e_com > com_st && isspace((unsigned char)e_com[-1])) + e_com--; + (void)pad_output(cur_col, target); + fwrite(com_st, e_com - com_st, 1, output); + ps.comment_delta = ps.n_comment_delta; + ++ps.com_lines; /* count lines with comments */ } if (ps.use_ff) putc('\014', output); else putc('\n', output); -inhibit_newline: ++ps.out_lines; - if (ps.just_saw_decl == 1 && blanklines_after_declarations) { + if (ps.just_saw_decl == 1 && opt.blanklines_after_declarations) { prefix_blankline_requested = 1; ps.just_saw_decl = 0; } @@ -272,28 +201,31 @@ inhibit_newline: ps.dumped_decl_indent = 0; *(e_lab = s_lab) = '\0'; /* reset buffers */ *(e_code = s_code) = '\0'; - *(e_com = s_com) = '\0'; + *(e_com = s_com = combuf + 1) = '\0'; ps.ind_level = ps.i_l_follow; ps.paren_level = ps.p_l_follow; - paren_target = -ps.paren_indents[ps.paren_level - 1]; + if (ps.paren_level > 0) + paren_target = -ps.paren_indents[ps.paren_level - 1]; not_first_line = 1; } int compute_code_target(void) { - int target_col = ps.ind_size * ps.ind_level + 1; + int target_col = opt.ind_size * ps.ind_level + 1; if (ps.paren_level) - if (!lineup_to_parens) - target_col += continuation_indent - * (2 * continuation_indent == ps.ind_size ? 1 : ps.paren_level); + if (!opt.lineup_to_parens) + target_col += opt.continuation_indent * + (2 * opt.continuation_indent == opt.ind_size ? 1 : ps.paren_level); + else if (opt.lineup_to_parens_always) + target_col = paren_target; else { int w; int t = paren_target; - if ((w = count_spaces(t, s_code) - max_col) > 0 - && count_spaces(target_col, s_code) <= max_col) { + if ((w = count_spaces(t, s_code) - opt.max_col) > 0 + && count_spaces(target_col, s_code) <= opt.max_col) { t -= w + 1; if (t > target_col) target_col = t; @@ -302,7 +234,7 @@ compute_code_target(void) target_col = t; } else if (ps.ind_stmt) - target_col += continuation_indent; + target_col += opt.continuation_indent; return target_col; } @@ -310,9 +242,9 @@ int compute_label_target(void) { return - ps.pcase ? (int) (case_ind * ps.ind_size) + 1 + ps.pcase ? (int) (case_ind * opt.ind_size) + 1 : *s_lab == '#' ? 1 - : ps.ind_size * (ps.ind_level - label_offset) + 1; + : opt.ind_size * (ps.ind_level - label_offset) + 1; } @@ -362,13 +294,14 @@ fill_buffer(void) had_eof = true; break; } - *p++ = i; + if (i != '\0') + *p++ = i; if (i == '\n') break; } buf_ptr = in_buffer; buf_end = p; - if (p[-2] == '/' && p[-3] == '*') { + if (p - in_buffer > 2 && p[-2] == '/' && p[-3] == '*') { if (in_buffer[3] == 'I' && strncmp(in_buffer, "/**INDENT**", 11) == 0) fill_buffer(); /* flush indent error message */ else { @@ -454,24 +387,22 @@ pad_output(int current, int target) /* current: the current column value */ /* target: position we want it at */ { - int curr; /* internal column pointer */ - int tcur; + int curr; /* internal column pointer */ - if (troff) - fprintf(output, "\\h'|%dp'", (target - 1) * 7); - else { - if (current >= target) - return (current); /* line is already long enough */ - curr = current; - if (use_tabs) { - while ((tcur = ((curr - 1) & tabmask) + tabsize + 1) <= target) { - putc('\t', output); - curr = tcur; - } - } - while (curr++ < target) - putc(' ', output); /* pad with final blanks */ + if (current >= target) + return (current); /* line is already long enough */ + curr = current; + if (opt.use_tabs) { + int tcur; + + while ((tcur = opt.tabsize * (1 + (curr - 1) / opt.tabsize) + 1) <= target) { + putc('\t', output); + curr = tcur; + } } + while (curr++ < target) + putc(' ', output); /* pad with final blanks */ + return (target); } @@ -496,18 +427,15 @@ pad_output(int current, int target) * */ int -count_spaces(int current, char *buffer) +count_spaces_until(int cur, char *buffer, char *end) /* * this routine figures out where the character position will be after * printing the text in buffer starting at column "current" */ { char *buf; /* used to look thru buffer */ - int cur; /* current character counter */ - cur = current; - - for (buf = buffer; *buf != '\0'; ++buf) { + for (buf = buffer; *buf != '\0' && buf != end; ++buf) { switch (*buf) { case '\n': @@ -516,7 +444,7 @@ count_spaces(int current, char *buffer) break; case '\t': - cur = ((cur - 1) & tabmask) + tabsize + 1; + cur = opt.tabsize * (1 + (cur - 1) / opt.tabsize) + 1; break; case 010: /* backspace */ @@ -531,6 +459,12 @@ count_spaces(int current, char *buffer) return (cur); } +int +count_spaces(int cur, char *buffer) +{ + return (count_spaces_until(cur, buffer, NULL)); +} + void diag4(int level, const char *msg, int a, int b) { @@ -581,78 +515,3 @@ diag2(int level, const char *msg) fprintf(stderr, "\n"); } } - -void -writefdef(struct fstate *f, int nm) -{ - fprintf(output, ".ds f%c %s\n.nr s%c %d\n", - nm, f->font, nm, f->size); -} - -char * -chfont(struct fstate *of, struct fstate *nf, char *s) -{ - if (of->font[0] != nf->font[0] - || of->font[1] != nf->font[1]) { - *s++ = '\\'; - *s++ = 'f'; - if (nf->font[1]) { - *s++ = '('; - *s++ = nf->font[0]; - *s++ = nf->font[1]; - } - else - *s++ = nf->font[0]; - } - if (nf->size != of->size) { - *s++ = '\\'; - *s++ = 's'; - if (nf->size < of->size) { - *s++ = '-'; - *s++ = '0' + of->size - nf->size; - } - else { - *s++ = '+'; - *s++ = '0' + nf->size - of->size; - } - } - return s; -} - -void -parsefont(struct fstate *f, const char *s0) -{ - const char *s = s0; - int sizedelta = 0; - - bzero(f, sizeof *f); - while (*s) { - if (isdigit(*s)) - f->size = f->size * 10 + *s - '0'; - else if (isupper(*s)) - if (f->font[0]) - f->font[1] = *s; - else - f->font[0] = *s; - else if (*s == 'c') - f->allcaps = 1; - else if (*s == '+') - sizedelta++; - else if (*s == '-') - sizedelta--; - else { - errx(1, "bad font specification: %s", s0); - } - s++; - } - if (f->font[0] == 0) - f->font[0] = 'R'; - if (bodyf.size == 0) - bodyf.size = 11; - if (f->size == 0) - f->size = bodyf.size + sizedelta; - else if (sizedelta > 0) - f->size += bodyf.size; - else - f->size = bodyf.size - f->size; -} diff --git a/usr.bin/indent/lexi.c b/usr.bin/indent/lexi.c index 85cc868b65..b608ae5261 100644 --- a/usr.bin/indent/lexi.c +++ b/usr.bin/indent/lexi.c @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. @@ -29,7 +31,7 @@ * SUCH DAMAGE. * * @(#)lexi.c 8.1 (Berkeley) 6/6/93 - * $FreeBSD: src/usr.bin/indent/lexi.c,v 1.21 2010/04/15 21:41:07 avg Exp $ + * $FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $ */ /* @@ -43,166 +45,176 @@ #include #include #include +#include + #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" -#define alphanum 1 -#define opchar 3 - struct templ { const char *rwd; int rwcode; }; -struct templ specials[1000] = +/* + * This table has to be sorted alphabetically, because it'll be used in binary + * search. For the same reason, string must be the first thing in struct templ. + */ +struct templ specials[] = { - {"switch", 1}, - {"case", 2}, - {"break", 0}, - {"struct", 3}, - {"union", 3}, - {"enum", 3}, - {"default", 2}, - {"int", 4}, + {"_Bool", 4}, + {"_Complex", 4}, + {"_Imaginary", 4}, + {"auto", 10}, + {"bool", 4}, + {"break", 9}, + {"case", 8}, {"char", 4}, - {"float", 4}, + {"complex", 4}, + {"const", 4}, + {"continue", 12}, + {"default", 8}, + {"do", 6}, {"double", 4}, + {"else", 6}, + {"enum", 3}, + {"extern", 10}, + {"float", 4}, + {"for", 5}, + {"global", 4}, + {"goto", 9}, + {"if", 5}, + {"imaginary", 4}, + {"inline", 12}, + {"int", 4}, {"long", 4}, + {"offsetof", 1}, + {"register", 10}, + {"restrict", 12}, + {"return", 9}, {"short", 4}, - {"typdef", 4}, + {"signed", 4}, + {"sizeof", 2}, + {"static", 10}, + {"struct", 3}, + {"switch", 7}, + {"typedef", 11}, + {"union", 3}, {"unsigned", 4}, - {"register", 4}, - {"static", 4}, - {"global", 4}, - {"extern", 4}, {"void", 4}, - {"const", 4}, {"volatile", 4}, - {"goto", 0}, - {"return", 0}, - {"if", 5}, - {"while", 5}, - {"for", 5}, - {"else", 6}, - {"do", 6}, - {"sizeof", 7}, - {0, 0} + {"while", 5} }; -char chartype[128] = -{ /* this is used to facilitate the decision of - * what type (alphanumeric, operator) each - * character is */ - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3, 0, 0, 1, 3, 3, 0, - 0, 0, 3, 3, 0, 3, 0, 3, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 0, 0, 3, 3, 3, 3, - 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 0, 0, 3, 1, - 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 3, 0, 3, 0 +const char **typenames; +int typename_count; +int typename_top = -1; + +/* + * The transition table below was rewritten by hand from lx's output, given + * the following definitions. lx is Katherine Flavel's lexer generator. + * + * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/; + * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i; + * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+; + * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?; + * + * D+ E FS? -> $float; + * D* "." D+ E? FS? -> $float; + * D+ "." E? FS? -> $float; HP H+ IS? -> $int; + * HP H+ P FS? -> $float; NZ D* IS? -> $int; + * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int; + * HP H+ "." P FS -> $float; BP B+ IS? -> $int; + */ +static char const *table[] = { + /* examples: + 00 + s 0xx + t 00xaa + a 11 101100xxa.. + r 11ee0001101lbuuxx.a.pp + t.01.e+008bLuxll0Ll.aa.p+0 + states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */ + ['0'] = "CEIDEHHHIJQ U Q VUVVZZZ", + ['1'] = "DEIDEHHHIJQ U Q VUVVZZZ", + ['7'] = "DEIDEHHHIJ U VUVVZZZ", + ['9'] = "DEJDEHHHJJ U VUVVZZZ", + ['a'] = " U VUVV ", + ['b'] = " K U VUVV ", + ['e'] = " FFF FF U VUVV ", + ['f'] = " f f U VUVV f", + ['u'] = " MM M i iiM M ", + ['x'] = " N ", + ['p'] = " FFX ", + ['L'] = " LLf fL PR Li L f", + ['l'] = " OOf fO S P O i O f", + ['+'] = " G Y ", + ['.'] = "B EE EE T W ", + /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ + [0] = "uuiifuufiuuiiuiiiiiuiuuuuu", }; +static int +strcmp_type(const void *e1, const void *e2) +{ + return (strcmp(e1, *(const char * const *)e2)); +} + int -lexi(void) +lexi(struct parser_state *state) { int unary_delim; /* this is set to 1 if the current token * forces a following operator to be unary */ - static int last_code; /* the last token type returned */ - static int l_struct; /* set to 1 if the last token was 'struct' */ int code; /* internal code to be returned */ char qchar; /* the delimiter character for a string */ e_token = s_token; /* point to start of place to save token */ unary_delim = false; - ps.col_1 = ps.last_nl; /* tell world that this token started in - * column 1 iff the last thing scanned was nl */ - ps.last_nl = false; + state->col_1 = state->last_nl; /* tell world that this token started + * in column 1 iff the last thing + * scanned was a newline */ + state->last_nl = false; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ - ps.col_1 = false; /* leading blanks imply token is not in column + state->col_1 = false; /* leading blanks imply token is not in column * 1 */ if (++buf_ptr >= buf_end) fill_buffer(); } /* Scan an alphanumeric token */ - if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { + if (isalnum((unsigned char)*buf_ptr) || + *buf_ptr == '_' || *buf_ptr == '$' || + (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) { /* * we have a character or number */ - const char *j; /* used for searching thru list of - * - * reserved words */ struct templ *p; - if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { - int seendot = 0, - seenexp = 0, - seensfx = 0; - if (*buf_ptr == '0' && - (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { - *e_token++ = *buf_ptr++; - *e_token++ = *buf_ptr++; - while (isxdigit(*buf_ptr)) { - CHECK_SIZE_TOKEN; - *e_token++ = *buf_ptr++; + if (isdigit((unsigned char)*buf_ptr) || + (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) { + char s; + unsigned char i; + + for (s = 'A'; s != 'f' && s != 'i' && s != 'u'; ) { + i = (unsigned char)*buf_ptr; + if (i >= nitems(table) || table[i] == NULL || + table[i][s - 'A'] == ' ') { + s = table[0][s - 'A']; + break; } + s = table[i][s - 'A']; + CHECK_SIZE_TOKEN(1); + *e_token++ = *buf_ptr++; + if (buf_ptr >= buf_end) + fill_buffer(); } - else - while (1) { - if (*buf_ptr == '.') { - if (seendot) - break; - else - seendot++; - } - CHECK_SIZE_TOKEN; - *e_token++ = *buf_ptr++; - if (!isdigit(*buf_ptr) && *buf_ptr != '.') { - if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) - break; - else { - seenexp++; - seendot++; - CHECK_SIZE_TOKEN; - *e_token++ = *buf_ptr++; - if (*buf_ptr == '+' || *buf_ptr == '-') - *e_token++ = *buf_ptr++; - } - } - } - while (1) { - if (!(seensfx & 1) && - (*buf_ptr == 'U' || *buf_ptr == 'u')) { - CHECK_SIZE_TOKEN; - *e_token++ = *buf_ptr++; - seensfx |= 1; - continue; - } - if (!(seensfx & 2) && - (*buf_ptr == 'L' || *buf_ptr == 'l')) { - CHECK_SIZE_TOKEN; - if (buf_ptr[1] == buf_ptr[0]) - *e_token++ = *buf_ptr++; - *e_token++ = *buf_ptr++; - seensfx |= 2; - continue; - } - break; - } + /* s now indicates the type: f(loating), i(integer), u(nknown) */ } else - while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { + while (isalnum((unsigned char)*buf_ptr) || + *buf_ptr == BACKSLASH || + *buf_ptr == '_' || *buf_ptr == '$') { /* fill_buffer() terminates buffer with newline */ if (*buf_ptr == BACKSLASH) { if (*(buf_ptr + 1) == '\n') { @@ -212,88 +224,77 @@ lexi(void) } else break; } - CHECK_SIZE_TOKEN; + CHECK_SIZE_TOKEN(1); /* copy it over */ *e_token++ = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } - *e_token++ = '\0'; + *e_token = '\0'; + + if (s_token[0] == 'L' && s_token[1] == '\0' && + (*buf_ptr == '"' || *buf_ptr == '\'')) + return (strpfx); + while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ if (++buf_ptr >= buf_end) fill_buffer(); } - ps.its_a_keyword = false; - ps.sizeof_keyword = false; - if (l_struct && !ps.p_l_follow) { + state->keyword = 0; + if (state->last_token == structure && !state->p_l_follow) { /* if last token was 'struct' and we're not * in parentheses, then this token * should be treated as a declaration */ - l_struct = false; - last_code = ident; - ps.last_u_d = true; + state->last_u_d = true; return (decl); } - ps.last_u_d = l_struct; /* Operator after identifier is binary - * unless last token was 'struct' */ - l_struct = false; - last_code = ident; /* Remember that this is the code we will - * return */ - - if (auto_typedefs) { - const char *q = s_token; - size_t q_len = strlen(q); - /* Check if we have an "_t" in the end */ - if (q_len > 2 && - (strcmp(q + q_len - 2, "_t") == 0)) { - ps.its_a_keyword = true; - ps.last_u_d = true; - goto found_auto_typedef; - } - } - /* - * This loop will check if the token is a keyword. + * Operator after identifier is binary unless last token was 'struct' */ - for (p = specials; (j = p->rwd) != NULL; p++) { - const char *q = s_token; /* point at scanned token */ - if (*j++ != *q++ || *j++ != *q++) - continue; /* This test depends on the fact that - * identifiers are always at least 1 character - * long (ie. the first two bytes of the - * identifier are always meaningful) */ - if (q[-1] == 0) - break; /* If its a one-character identifier */ - while (*q++ == *j) - if (*j++ == 0) - goto found_keyword; /* I wish that C had a multi-level - * break... */ - } - if (p->rwd) { /* we have a keyword */ - found_keyword: - ps.its_a_keyword = true; - ps.last_u_d = true; + state->last_u_d = (state->last_token == structure); + + p = bsearch(s_token, + specials, + sizeof(specials) / sizeof(specials[0]), + sizeof(specials[0]), + strcmp_type); + if (p == NULL) { /* not a special keyword... */ + char *u; + + /* ... so maybe a type_t or a typedef */ + if ((opt.auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) && + strcmp(u, "_t") == 0) || (typename_top >= 0 && + bsearch(s_token, typenames, typename_top + 1, + sizeof(typenames[0]), strcmp_type))) { + state->keyword = 4; /* a type name */ + state->last_u_d = true; + goto found_typename; + } + } else { /* we have a keyword */ + state->keyword = p->rwcode; + state->last_u_d = true; switch (p->rwcode) { - case 1: /* it is a switch */ + case 7: /* it is a switch */ return (swstmt); - case 2: /* a case or default */ + case 8: /* a case or default */ return (casestmt); case 3: /* a "struct" */ - /* - * Next time around, we will want to know that we have had a - * 'struct' - */ - l_struct = true; /* FALLTHROUGH */ - case 4: /* one of the declaration keywords */ - found_auto_typedef: - if (ps.p_l_follow) { - ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask; - break; /* inside parens: cast, param list or sizeof */ + found_typename: + if (state->p_l_follow) { + /* inside parens: cast, param list, offsetof or sizeof */ + state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask; + } + if (state->last_token == period || state->last_token == unary_op) { + state->keyword = 0; + break; } - last_code = decl; + if (p != NULL && p->rwcode == 3) + return (structure); + if (state->p_l_follow) + break; return (decl); case 5: /* if, while, for */ @@ -302,22 +303,28 @@ lexi(void) case 6: /* do, else */ return (sp_nparen); - case 7: - ps.sizeof_keyword = true; + case 10: /* storage class specifier */ + return (storage); + + case 11: /* typedef */ + return (type_def); + /* FALLTHROUGH */ default: /* all others are treated like any other * identifier */ return (ident); } /* end of switch */ } /* end of if (found_it) */ - if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { + if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 && + state->in_parameter_declaration == 0 && state->block_init == 0) { char *tp = buf_ptr; while (tp < buf_end) if (*tp++ == ')' && (*tp == ';' || *tp == ',')) goto not_proc; - strncpy(ps.procname, token, sizeof ps.procname - 1); - ps.in_parameter_declaration = 1; - rparen_count = 1; + strncpy(state->procname, token, sizeof state->procname - 1); + if (state->in_decl) + state->in_parameter_declaration = 1; + return (funcname); not_proc:; } /* @@ -325,26 +332,25 @@ lexi(void) * token is in fact a declaration keyword -- one that has been * typedefd */ - if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') - && !ps.p_l_follow - && !ps.block_init - && (ps.last_token == rparen || ps.last_token == semicolon || - ps.last_token == decl || - ps.last_token == lbrace || ps.last_token == rbrace)) { - ps.its_a_keyword = true; - ps.last_u_d = true; - last_code = decl; + else if (!state->p_l_follow && !state->block_init && + !state->in_stmt && + ((*buf_ptr == '*' && buf_ptr[1] != '=') || + isalpha((unsigned char)*buf_ptr)) && + (state->last_token == semicolon || state->last_token == lbrace || + state->last_token == rbrace)) { + state->keyword = 4; /* a type name */ + state->last_u_d = true; return decl; } - if (last_code == decl) /* if this is a declared variable, then - * following sign is unary */ - ps.last_u_d = true; /* will make "int a -1" work */ - last_code = ident; + if (state->last_token == decl) /* if this is a declared variable, + * then following sign is unary */ + state->last_u_d = true; /* will make "int a -1" work */ return (ident); /* the ident is not in the list */ } /* end of procesing for alpanum character */ /* Scan a non-alphanumeric token */ + CHECK_SIZE_TOKEN(3); /* things like "<<=" */ *e_token++ = *buf_ptr; /* if it is only a one-character token, it is * moved here */ *e_token = '\0'; @@ -353,8 +359,8 @@ lexi(void) switch (*token) { case '\n': - unary_delim = ps.last_u_d; - ps.last_nl = true; /* remember that we just had a newline */ + unary_delim = state->last_u_d; + state->last_nl = true; /* remember that we just had a newline */ code = (had_eof ? 0 : newline); /* @@ -366,32 +372,19 @@ lexi(void) case '\'': /* start of quoted character */ case '"': /* start of string */ qchar = *token; - if (troff) { - e_token[-1] = '`'; - if (qchar == '"') - *e_token++ = '`'; - e_token = chfont(&bodyf, &stringf, e_token); - } do { /* copy the string */ while (1) { /* move one character or [/] */ if (*buf_ptr == '\n') { diag2(1, "Unterminated literal"); goto stop_lit; } - CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, - * since CHECK_SIZE guarantees that there - * are at least 5 entries left */ + CHECK_SIZE_TOKEN(2); *e_token = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); if (*e_token == BACKSLASH) { /* if escape, copy extra char */ if (*buf_ptr == '\n') /* check for escaped newline */ ++line_no; - if (troff) { - *++e_token = BACKSLASH; - if (*buf_ptr == BACKSLASH) - *++e_token = BACKSLASH; - } *++e_token = *buf_ptr++; ++e_token; /* we must increment this again because we * copied two chars */ @@ -402,11 +395,6 @@ lexi(void) break; /* we copied one character */ } /* end of while (1) */ } while (*e_token++ != qchar); - if (troff) { - e_token = chfont(&stringf, &bodyf, e_token - 1); - if (qchar == '"') - *e_token++ = '\''; - } stop_lit: code = ident; break; @@ -423,7 +411,7 @@ stop_lit: break; case '#': - unary_delim = ps.last_u_d; + unary_delim = state->last_u_d; code = preesc; break; @@ -446,21 +434,21 @@ stop_lit: unary_delim = true; /* - * if (ps.in_or_st) ps.block_init = 1; + * if (state->in_or_st) state->block_init = 1; */ - /* ? code = ps.block_init ? lparen : lbrace; */ + /* ? code = state->block_init ? lparen : lbrace; */ code = lbrace; break; case ('}'): unary_delim = true; - /* ? code = ps.block_init ? rparen : rbrace; */ + /* ? code = state->block_init ? rparen : rbrace; */ code = rbrace; break; case 014: /* a form feed */ - unary_delim = ps.last_u_d; - ps.last_nl = true; /* remember this so we can set 'ps.col_1' + unary_delim = state->last_u_d; + state->last_nl = true; /* remember this so we can set 'state->col_1' * right */ code = form_feed; break; @@ -477,15 +465,15 @@ stop_lit: case '-': case '+': /* check for -, +, --, ++ */ - code = (ps.last_u_d ? unary_op : binary_op); + code = (state->last_u_d ? unary_op : binary_op); unary_delim = true; if (*buf_ptr == token[0]) { /* check for doubled character */ *e_token++ = *buf_ptr++; /* buffer overflow will be checked at end of loop */ - if (last_code == ident || last_code == rparen) { - code = (ps.last_u_d ? unary_op : postop); + if (state->last_token == ident || state->last_token == rparen) { + code = (state->last_u_d ? unary_op : postop); /* check for following ++ or -- */ unary_delim = false; } @@ -496,33 +484,21 @@ stop_lit: else if (*buf_ptr == '>') { /* check for operator -> */ *e_token++ = *buf_ptr++; - if (!pointer_as_binop) { - unary_delim = false; - code = unary_op; - ps.want_blank = false; - } + unary_delim = false; + code = unary_op; + state->want_blank = false; } break; /* buffer overflow will be checked at end of * switch */ case '=': - if (ps.in_or_st) - ps.block_init = 1; -#ifdef undef - if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ - e_token[-1] = *buf_ptr++; - if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) - *e_token++ = *buf_ptr++; - *e_token++ = '='; /* Flip =+ to += */ - *e_token = 0; - } -#else + if (state->in_or_st) + state->block_init = 1; if (*buf_ptr == '=') {/* == */ *e_token++ = '='; /* Flip =+ to += */ buf_ptr++; *e_token = 0; } -#endif code = binary_op; unary_delim = true; break; @@ -538,8 +514,38 @@ stop_lit: } if (*buf_ptr == '=') *e_token++ = *buf_ptr++; - code = (ps.last_u_d ? unary_op : binary_op); + code = (state->last_u_d ? unary_op : binary_op); + unary_delim = true; + break; + + case '*': unary_delim = true; + if (!state->last_u_d) { + if (*buf_ptr == '=') + *e_token++ = *buf_ptr++; + code = binary_op; + break; + } + while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) { + if (*buf_ptr == '*') { + CHECK_SIZE_TOKEN(1); + *e_token++ = *buf_ptr; + } + if (++buf_ptr >= buf_end) + fill_buffer(); + } + if (ps.in_decl) { + char *tp = buf_ptr; + + while (isalpha((unsigned char)*tp) || + isspace((unsigned char)*tp)) { + if (++tp >= buf_end) + fill_buffer(); + } + if (*tp == '(') + ps.procname[0] = ' '; + } + code = unary_op; break; default: @@ -551,50 +557,89 @@ stop_lit: fill_buffer(); code = comment; - unary_delim = ps.last_u_d; + unary_delim = state->last_u_d; break; } while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { /* * handle ||, &&, etc, and also things as in int *****i */ + CHECK_SIZE_TOKEN(1); *e_token++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer(); } - code = (ps.last_u_d ? unary_op : binary_op); + code = (state->last_u_d ? unary_op : binary_op); unary_delim = true; } /* end of switch */ - if (code != newline) { - l_struct = false; - last_code = code; - } if (buf_ptr >= buf_end) /* check for input buffer empty */ fill_buffer(); - ps.last_u_d = unary_delim; + state->last_u_d = unary_delim; + CHECK_SIZE_TOKEN(1); *e_token = '\0'; /* null terminate the token */ return (code); } -/* - * Add the given keyword to the keyword table, using val as the keyword type - */ +/* Initialize constant transition table */ +void +init_constant_tt(void) +{ + table['-'] = table['+']; + table['8'] = table['9']; + table['2'] = table['3'] = table['4'] = table['5'] = table['6'] = table['7']; + table['A'] = table['C'] = table['D'] = table['c'] = table['d'] = table['a']; + table['B'] = table['b']; + table['E'] = table['e']; + table['U'] = table['u']; + table['X'] = table['x']; + table['P'] = table['p']; + table['F'] = table['f']; +} + void -addkey(char *key, int val) +alloc_typenames(void) { - struct templ *p = specials; - while (p->rwd) - if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) + + typenames = (const char **)malloc(sizeof(typenames[0]) * + (typename_count = 16)); + if (typenames == NULL) + err(1, NULL); +} + +void +add_typename(const char *key) +{ + int comparison; + const char *copy; + + if (typename_top + 1 >= typename_count) { + typenames = realloc((void *)typenames, + sizeof(typenames[0]) * (typename_count *= 2)); + if (typenames == NULL) + err(1, NULL); + } + if (typename_top == -1) + typenames[++typename_top] = copy = strdup(key); + else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { + /* take advantage of sorted input */ + if (comparison == 0) /* remove duplicates */ return; - else - p++; - if (p >= specials + sizeof specials / sizeof specials[0]) - return; /* For now, table overflows are silently - * ignored */ - p->rwd = key; - p->rwcode = val; - p[1].rwd = NULL; - p[1].rwcode = 0; + typenames[++typename_top] = copy = strdup(key); + } + else { + int p; + + for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++) + /* find place for the new key */; + if (comparison == 0) /* remove duplicates */ + return; + memmove(&typenames[p + 1], &typenames[p], + sizeof(typenames[0]) * (++typename_top - p)); + typenames[p] = copy = strdup(key); + } + + if (copy == NULL) + err(1, NULL); } diff --git a/usr.bin/indent/parse.c b/usr.bin/indent/parse.c index 6b98e5d392..2dc5e4a51c 100644 --- a/usr.bin/indent/parse.c +++ b/usr.bin/indent/parse.c @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. @@ -29,9 +31,10 @@ * SUCH DAMAGE. * * @(#)parse.c 8.1 (Berkeley) 6/6/93 - * $FreeBSD: src/usr.bin/indent/parse.c,v 1.10 2003/06/15 09:28:17 charnier Exp $ + * $FreeBSD: head/usr.bin/indent/parse.c 337651 2018-08-11 19:20:06Z pstef $ */ +#include #include #include "indent_globs.h" #include "indent_codes.h" @@ -60,7 +63,7 @@ parse(int tk) /* tk: the code for the construct scanned */ * input */ case decl: /* scanned a declaration word */ - ps.search_brace = btype_2; + ps.search_brace = opt.btype_2; /* indicate that following brace should be on same line */ if (ps.p_stack[ps.tos] != decl) { /* only put one declaration * onto stack */ @@ -69,7 +72,7 @@ parse(int tk) /* tk: the code for the construct scanned */ ps.p_stack[++ps.tos] = decl; ps.il[ps.tos] = ps.i_l_follow; - if (ps.ljust_decl) {/* only do if we want left justified + if (opt.ljust_decl) {/* only do if we want left justified * declarations */ ps.ind_level = 0; for (i = ps.tos - 1; i > 0; --i) @@ -82,15 +85,21 @@ parse(int tk) /* tk: the code for the construct scanned */ break; case ifstmt: /* scanned if (...) */ - if (ps.p_stack[ps.tos] == elsehead && ps.else_if) /* "else if ..." */ - ps.i_l_follow = ps.il[ps.tos]; + if (ps.p_stack[ps.tos] == elsehead && opt.else_if) /* "else if ..." */ + /* + * Note that the stack pointer here is decremented, effectively + * reducing "else if" to "if". This saves a lot of stack space + * in case of a long "if-else-if ... else-if" sequence. + */ + ps.i_l_follow = ps.il[ps.tos--]; + /* the rest is the same as for dolit and forstmt */ /* FALLTHROUGH */ case dolit: /* 'do' */ case forstmt: /* for (...) */ ps.p_stack[++ps.tos] = tk; ps.il[ps.tos] = ps.ind_level = ps.i_l_follow; ++ps.i_l_follow; /* subsequent statements should be indented 1 */ - ps.search_brace = btype_2; + ps.search_brace = opt.btype_2; break; case lbrace: /* scanned { */ @@ -108,7 +117,7 @@ parse(int tk) /* tk: the code for the construct scanned */ /* * it is a group as part of a while, for, etc. */ - if (ps.p_stack[ps.tos] == swstmt && ps.case_indent >= 1) + if (ps.p_stack[ps.tos] == swstmt && opt.case_indent >= 1) --ps.ind_level; /* * for a switch, brace should be two levels out from the code @@ -134,7 +143,7 @@ parse(int tk) /* tk: the code for the construct scanned */ ps.p_stack[++ps.tos] = whilestmt; ps.il[ps.tos] = ps.i_l_follow; ++ps.i_l_follow; - ps.search_brace = btype_2; + ps.search_brace = opt.btype_2; } break; @@ -150,13 +159,13 @@ parse(int tk) /* tk: the code for the construct scanned */ * be in 1 level */ ps.p_stack[ps.tos] = elsehead; /* remember if with else */ - ps.search_brace = btype_2 | ps.else_if; + ps.search_brace = opt.btype_2 | opt.else_if; } break; case rbrace: /* scanned a } */ /* stack should have or */ - if (ps.p_stack[ps.tos - 1] == lbrace) { + if (ps.tos > 0 && ps.p_stack[ps.tos - 1] == lbrace) { ps.ind_level = ps.i_l_follow = ps.il[--ps.tos]; ps.p_stack[ps.tos] = stmt; } @@ -169,12 +178,12 @@ parse(int tk) /* tk: the code for the construct scanned */ ps.cstk[ps.tos] = case_ind; /* save current case indent level */ ps.il[ps.tos] = ps.i_l_follow; - case_ind = ps.i_l_follow + ps.case_indent; /* cases should be one + case_ind = ps.i_l_follow + opt.case_indent; /* cases should be one * level down from * switch */ - ps.i_l_follow += ps.case_indent + 1; /* statements should be two + ps.i_l_follow += opt.case_indent + 1; /* statements should be two * levels in */ - ps.search_brace = btype_2; + ps.search_brace = opt.btype_2; break; case semicolon: /* this indicates a simple stmt */ @@ -191,6 +200,9 @@ parse(int tk) /* tk: the code for the construct scanned */ } /* end of switch */ + if (ps.tos >= STACKSIZE - 1) + errx(1, "Parser stack overflow"); + reduce(); /* see if any reduction can be done */ #ifdef debug diff --git a/usr.bin/indent/pr_comment.c b/usr.bin/indent/pr_comment.c index 108a989ac5..1e7c027d1e 100644 --- a/usr.bin/indent/pr_comment.c +++ b/usr.bin/indent/pr_comment.c @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. @@ -29,13 +31,15 @@ * SUCH DAMAGE. * * @(#)pr_comment.c 8.1 (Berkeley) 6/6/93 - * $FreeBSD: src/usr.bin/indent/pr_comment.c,v 1.8 2003/06/15 09:28:17 charnier Exp $ + * $FreeBSD: head/usr.bin/indent/pr_comment.c 334927 2018-06-10 16:44:18Z pstef $ */ #include #include #include +#include #include "indent_globs.h" +#include "indent_codes.h" #include "indent.h" /* * NAME: @@ -77,38 +81,28 @@ pr_comment(void) char *last_bl; /* points to the last blank in the output * buffer */ char *t_ptr; /* used for moving string */ - int unix_comment; /* tri-state variable used to decide if it is - * a unix-style comment. 0 means only blanks - * since /+*, 1 means regular style comment, 2 - * means unix style comment */ - int break_delim = comment_delimiter_on_blankline; + int break_delim = opt.comment_delimiter_on_blankline; int l_just_saw_decl = ps.just_saw_decl; - /* - * int ps.last_nl = 0; true iff the last significant thing - * weve seen is a newline - */ - int one_liner = 1; /* true iff this comment is a one-liner */ - adj_max_col = max_col; + + adj_max_col = opt.max_col; ps.just_saw_decl = 0; last_bl = NULL; /* no blanks found so far */ ps.box_com = false; /* at first, assume that we are not in * a boxed comment or some other * comment that should not be touched */ ++ps.out_coms; /* keep track of number of comments */ - unix_comment = 1; /* set flag to let us figure out if there is a - * unix-style comment ** DISABLED: use 0 to - * reenable this hack! */ /* Figure where to align and how to treat the comment */ - if (ps.col_1 && !format_col1_comments) { /* if comment starts in column + if (ps.col_1 && !opt.format_col1_comments) { /* if comment starts in column * 1 it should not be touched */ ps.box_com = true; + break_delim = false; ps.com_col = 1; } else { if (*buf_ptr == '-' || *buf_ptr == '*' || - (*buf_ptr == '\n' && !format_block_comments)) { + (*buf_ptr == '\n' && !opt.format_block_comments)) { ps.box_com = true; /* A comment with a '-' or '*' immediately * after the /+* is assumed to be a boxed * comment. A comment with a newline @@ -116,7 +110,7 @@ pr_comment(void) * be a block comment and is treated as a * box comment unless format_block_comments * is nonzero (the default). */ - break_delim = 0; + break_delim = false; } if ( /* ps.bl_line && */ (s_lab == e_lab) && (s_code == e_code)) { /* klg: check only if this line is blank */ @@ -124,14 +118,14 @@ pr_comment(void) * If this (*and previous lines are*) blank, dont put comment way * out at left */ - ps.com_col = (ps.ind_level - ps.unindent_displace) * ps.ind_size + 1; - adj_max_col = block_comment_max_col; + ps.com_col = (ps.ind_level - opt.unindent_displace) * opt.ind_size + 1; + adj_max_col = opt.block_comment_max_col; if (ps.com_col <= 1) - ps.com_col = 1 + !format_col1_comments; + ps.com_col = 1 + !opt.format_col1_comments; } else { int target_col; - break_delim = 0; + break_delim = false; if (s_code != e_code) target_col = count_spaces(compute_code_target(), s_code); else { @@ -139,17 +133,27 @@ pr_comment(void) if (s_lab != e_lab) target_col = count_spaces(compute_label_target(), s_lab); } - ps.com_col = ps.decl_on_line || ps.ind_level == 0 ? ps.decl_com_ind : ps.com_ind; - if (ps.com_col < target_col) - ps.com_col = ((target_col + 7) & ~7) + 1; + ps.com_col = ps.decl_on_line || ps.ind_level == 0 ? opt.decl_com_ind : opt.com_ind; + if (ps.com_col <= target_col) + ps.com_col = opt.tabsize * (1 + (target_col - 1) / opt.tabsize) + 1; if (ps.com_col + 24 > adj_max_col) adj_max_col = ps.com_col + 24; } } if (ps.box_com) { - buf_ptr[-2] = 0; - ps.n_comment_delta = 1 - count_spaces(1, in_buffer); - buf_ptr[-2] = '/'; + /* + * Find out how much indentation there was originally, because that + * much will have to be ignored by pad_output() in dump_line(). This + * is a box comment, so nothing changes -- not even indentation. + * + * The comment we're about to read usually comes from in_buffer, + * unless it has been copied into save_com. + */ + char *start; + + start = buf_ptr >= save_com && buf_ptr < save_com + sc_size ? + sc_buf : in_buffer; + ps.n_comment_delta = 1 - count_spaces_until(1, start, buf_ptr - 2); } else { ps.n_comment_delta = 0; @@ -162,35 +166,48 @@ pr_comment(void) if (*buf_ptr != ' ' && !ps.box_com) *e_com++ = ' '; - *e_com = '\0'; - if (troff) { - now_col = 1; - adj_max_col = 80; + /* + * Don't put a break delimiter if this is a one-liner that won't wrap. + */ + if (break_delim) + for (t_ptr = buf_ptr; *t_ptr != '\0' && *t_ptr != '\n'; t_ptr++) { + if (t_ptr >= buf_end) + fill_buffer(); + if (t_ptr[0] == '*' && t_ptr[1] == '/') { + if (adj_max_col >= count_spaces_until(ps.com_col, buf_ptr, t_ptr + 2)) + break_delim = false; + break; + } + } + + if (break_delim) { + char *t = e_com; + e_com = s_com + 2; + *e_com = 0; + if (opt.blanklines_before_blockcomments && ps.last_token != lbrace) + prefix_blankline_requested = 1; + dump_line(); + e_com = s_com = t; + if (!ps.box_com && opt.star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; } - else - now_col = count_spaces(ps.com_col, s_com); /* figure what column we - * would be in if we - * printed the comment - * now */ /* Start to copy the comment */ while (1) { /* this loop will go until the comment is * copied */ - if (*buf_ptr > 040 && *buf_ptr != '*') - ps.last_nl = 0; - CHECK_SIZE_COM; switch (*buf_ptr) { /* this checks for various spcl cases */ case 014: /* check for a form feed */ + CHECK_SIZE_COM(3); if (!ps.box_com) { /* in a text comment, break the line here */ ps.use_ff = true; /* fix so dump_line uses a form feed */ dump_line(); last_bl = NULL; - *e_com++ = ' '; - *e_com++ = '*'; - *e_com++ = ' '; - while (*++buf_ptr == ' ' || *buf_ptr == '\t'); + if (!ps.box_com && opt.star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; + while (*++buf_ptr == ' ' || *buf_ptr == '\t') + ; } else { if (++buf_ptr >= buf_end) @@ -202,69 +219,26 @@ pr_comment(void) case '\n': if (had_eof) { /* check for unexpected eof */ printf("Unterminated comment\n"); - *e_com = '\0'; dump_line(); return; } - one_liner = 0; + last_bl = NULL; + CHECK_SIZE_COM(4); if (ps.box_com || ps.last_nl) { /* if this is a boxed comment, * we dont ignore the newline */ - if (s_com == e_com) { + if (s_com == e_com) *e_com++ = ' '; - *e_com++ = ' '; - } - *e_com = '\0'; if (!ps.box_com && e_com - s_com > 3) { - if (break_delim == 1 && s_com[0] == '/' - && s_com[1] == '*' && s_com[2] == ' ') { - char *t = e_com; - break_delim = 2; - e_com = s_com + 2; - *e_com = 0; - if (blanklines_before_blockcomments) - prefix_blankline_requested = 1; - dump_line(); - e_com = t; - s_com[0] = s_com[1] = s_com[2] = ' '; - } dump_line(); - CHECK_SIZE_COM; - *e_com++ = ' '; - *e_com++ = ' '; + if (opt.star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; } dump_line(); - now_col = ps.com_col; + if (!ps.box_com && opt.star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; } else { ps.last_nl = 1; - if (unix_comment != 1) { /* we not are in unix_style - * comment */ - if (unix_comment == 0 && s_code == e_code) { - /* - * if it is a UNIX-style comment, ignore the - * requirement that previous line be blank for - * unindention - */ - ps.com_col = (ps.ind_level - ps.unindent_displace) * ps.ind_size + 1; - if (ps.com_col <= 1) - ps.com_col = 2; - } - unix_comment = 2; /* permanently remember that we are in - * this type of comment */ - dump_line(); - ++line_no; - now_col = ps.com_col; - *e_com++ = ' '; - /* - * fix so that the star at the start of the line will line - * up - */ - do /* flush leading white space */ - if (++buf_ptr >= buf_end) - fill_buffer(); - while (*buf_ptr == ' ' || *buf_ptr == '\t'); - break; - } if (*(e_com - 1) == ' ' || *(e_com - 1) == '\t') last_bl = e_com - 1; /* @@ -273,9 +247,7 @@ pr_comment(void) */ else { /* otherwise, insert one */ last_bl = e_com; - CHECK_SIZE_COM; *e_com++ = ' '; - ++now_col; } } ++line_no; /* keep track of input line number */ @@ -301,117 +273,71 @@ pr_comment(void) * of comment */ if (++buf_ptr >= buf_end) /* get to next char after * */ fill_buffer(); - - if (unix_comment == 0) /* set flag to show we are not in - * unix-style comment */ - unix_comment = 1; - + CHECK_SIZE_COM(4); if (*buf_ptr == '/') { /* it is the end!!! */ end_of_comment: if (++buf_ptr >= buf_end) fill_buffer(); - - if (*(e_com - 1) != ' ' && !ps.box_com) { /* insure blank before - * end */ + if (break_delim) { + if (e_com > s_com + 3) { + dump_line(); + } + else + s_com = e_com; *e_com++ = ' '; - ++now_col; - } - if (break_delim == 1 && !one_liner && s_com[0] == '/' - && s_com[1] == '*' && s_com[2] == ' ') { - char *t = e_com; - break_delim = 2; - e_com = s_com + 2; - *e_com = 0; - if (blanklines_before_blockcomments) - prefix_blankline_requested = 1; - dump_line(); - e_com = t; - s_com[0] = s_com[1] = s_com[2] = ' '; } - if (break_delim == 2 && e_com > s_com + 3 - /* now_col > adj_max_col - 2 && !ps.box_com */ ) { - *e_com = '\0'; - dump_line(); - now_col = ps.com_col; - } - CHECK_SIZE_COM; - *e_com++ = '*'; - *e_com++ = '/'; - *e_com = '\0'; + if (e_com[-1] != ' ' && e_com[-1] != '\t' && !ps.box_com) + *e_com++ = ' '; /* ensure blank before end */ + *e_com++ = '*', *e_com++ = '/', *e_com = '\0'; ps.just_saw_decl = l_just_saw_decl; return; } - else { /* handle isolated '*' */ + else /* handle isolated '*' */ *e_com++ = '*'; - ++now_col; - } break; default: /* we have a random char */ - if (unix_comment == 0 && *buf_ptr != ' ' && *buf_ptr != '\t') - unix_comment = 1; /* we are not in unix-style comment */ - - *e_com = *buf_ptr++; - if (buf_ptr >= buf_end) - fill_buffer(); - - if (*e_com == '\t') /* keep track of column */ - now_col = ((now_col - 1) & tabmask) + tabsize + 1; - else if (*e_com == '\b') /* this is a backspace */ - --now_col; - else - ++now_col; - - if (*e_com == ' ' || *e_com == '\t') - last_bl = e_com; - /* remember we saw a blank */ - - ++e_com; - if (now_col > adj_max_col && !ps.box_com && unix_comment == 1 && e_com[-1] > ' ') { + now_col = count_spaces_until(ps.com_col, s_com, e_com); + do { + CHECK_SIZE_COM(1); + *e_com = *buf_ptr++; + if (buf_ptr >= buf_end) + fill_buffer(); + if (*e_com == ' ' || *e_com == '\t') + last_bl = e_com; /* remember we saw a blank */ + ++e_com; + now_col++; + } while (!memchr("*\n\r\b\t", *buf_ptr, 6) && + (now_col <= adj_max_col || !last_bl)); + ps.last_nl = false; + if (now_col > adj_max_col && !ps.box_com && e_com[-1] > ' ') { /* * the comment is too long, it must be broken up */ - if (break_delim == 1 && s_com[0] == '/' - && s_com[1] == '*' && s_com[2] == ' ') { - char *t = e_com; - break_delim = 2; - e_com = s_com + 2; - *e_com = 0; - if (blanklines_before_blockcomments) - prefix_blankline_requested = 1; + if (last_bl == NULL) { dump_line(); - e_com = t; - s_com[0] = s_com[1] = s_com[2] = ' '; - } - if (last_bl == NULL) { /* we have seen no blanks */ - last_bl = e_com; /* fake it */ - *e_com++ = ' '; + if (!ps.box_com && opt.star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; + break; } - *e_com = '\0'; /* print what we have */ - *last_bl = '\0'; - while (last_bl > s_com && last_bl[-1] < 040) - *--last_bl = 0; + *e_com = '\0'; e_com = last_bl; dump_line(); - - *e_com++ = ' '; /* add blanks for continuation */ - *e_com++ = ' '; - *e_com++ = ' '; - - t_ptr = last_bl + 1; + if (!ps.box_com && opt.star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; + for (t_ptr = last_bl + 1; *t_ptr == ' ' || *t_ptr == '\t'; + t_ptr++) + ; last_bl = NULL; - if (t_ptr >= e_com) { - while (*t_ptr == ' ' || *t_ptr == '\t') - t_ptr++; - while (*t_ptr != '\0') { /* move unprinted part of - * comment down in buffer */ - if (*t_ptr == ' ' || *t_ptr == '\t') - last_bl = e_com; - *e_com++ = *t_ptr++; - } + /* + * t_ptr will be somewhere between e_com (dump_line() reset) + * and l_com. So it's safe to copy byte by byte from t_ptr + * to e_com without any CHECK_SIZE_COM(). + */ + while (*t_ptr != '\0') { + if (*t_ptr == ' ' || *t_ptr == '\t') + last_bl = e_com; + *e_com++ = *t_ptr++; } - *e_com = '\0'; - now_col = count_spaces(ps.com_col, s_com); /* recompute current - * position */ } break; } -- 2.41.0