1 /* Process TeX index dribble output into an actual index.
2 $Id: texindex.c,v 1.41 2002/03/11 19:55:46 karl Exp $
4 Copyright (C) 1987, 91, 92, 96, 97, 98, 99, 2000, 01, 02
5 Free Software Foundation, Inc.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
24 static char *program_name = "texindex";
27 # include "../src/config.h"
28 /* Some s/os.h files redefine these. */
35 #if !defined (HAVE_MEMSET)
37 #define memset(ptr, ignore, count) bzero (ptr, count)
42 #if !defined (SEEK_SET)
46 #endif /* !SEEK_SET */
48 /* When sorting in core, this structure describes one line
49 and the position and length of its first keyfield. */
52 char *text; /* The actual text of the line. */
54 char *text; /* The start of the key (for textual comparison). */
55 long number; /* The numeric value (for numeric comparison). */
57 long keylen; /* Length of KEY field. */
60 /* This structure describes a field to use as a sort key. */
63 int startwords; /* Number of words to skip. */
64 int startchars; /* Number of additional chars to skip. */
65 int endwords; /* Number of words to ignore at end. */
66 int endchars; /* Ditto for characters of last word. */
67 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */
68 char fold_case; /* Non-zero means case doesn't matter. */
69 char reverse; /* Non-zero means compare in reverse order. */
70 char numeric; /* Non-zeros means field is ASCII numeric. */
71 char positional; /* Sort according to file position. */
72 char braced; /* Count balanced-braced groupings as fields. */
75 /* Vector of keyfields to use. */
76 struct keyfield keyfields[3];
78 /* Number of keyfields stored in that vector. */
79 int num_keyfields = 3;
81 /* Vector of input file names, terminated with a null pointer. */
84 /* Vector of corresponding output file names, or NULL, meaning default it
85 (add an `s' to the end). */
88 /* Length of `infiles'. */
91 /* Pointer to the array of pointers to lines being sorted. */
94 /* The allocated length of `linearray'. */
97 /* Directory to use for temporary files. On Unix, it ends with a slash. */
100 /* Start of filename to use for temporary files. */
103 /* Number of last temporary file. */
106 /* Number of last temporary file already deleted.
107 Temporary files are deleted by `flush_tempfiles' in order of creation. */
108 int last_deleted_tempcount;
110 /* During in-core sort, this points to the base of the data block
111 which contains all the lines of data. */
114 /* Additional command switches .*/
116 /* Nonzero means do not delete tempfiles -- for debugging. */
119 /* Forward declarations of functions in this file. */
120 void decode_command ();
121 void sort_in_core ();
122 void sort_offline ();
127 char *find_braced_pos ();
128 char *find_braced_end ();
130 int compare_field ();
135 void pfatal_with_name ();
138 void *xmalloc (), *xrealloc ();
140 void flush_tempfiles ();
142 #define MAX_IN_CORE_SORT 500000
152 last_deleted_tempcount = 0;
154 #ifdef HAVE_SETLOCALE
155 /* Set locale via LC_ALL. */
156 setlocale (LC_ALL, "");
159 /* Set the text message domain. */
160 bindtextdomain (PACKAGE, LOCALEDIR);
161 textdomain (PACKAGE);
163 /* Describe the kind of sorting to do. */
164 /* The first keyfield uses the first braced field and folds case. */
165 keyfields[0].braced = 1;
166 keyfields[0].fold_case = 1;
167 keyfields[0].endwords = -1;
168 keyfields[0].endchars = -1;
170 /* The second keyfield uses the second braced field, numerically. */
171 keyfields[1].braced = 1;
172 keyfields[1].numeric = 1;
173 keyfields[1].startwords = 1;
174 keyfields[1].endwords = -1;
175 keyfields[1].endchars = -1;
177 /* The third keyfield (which is ignored while discarding duplicates)
178 compares the whole line. */
179 keyfields[2].endwords = -1;
180 keyfields[2].endchars = -1;
182 decode_command (argc, argv);
184 tempbase = mktemp (concat ("txiXXXXXX", "", ""));
186 /* Process input files completely, one by one. */
188 for (i = 0; i < num_infiles; i++)
195 desc = open (infiles[i], O_RDONLY, 0);
197 pfatal_with_name (infiles[i]);
199 if (stat (infiles[i], &instat))
200 pfatal_with_name (infiles[i]);
201 if (S_ISDIR (instat.st_mode))
206 pfatal_with_name (infiles[i]);
209 lseek (desc, (off_t) 0, SEEK_END);
210 ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
214 outfile = outfiles[i];
217 outfile = concat (infiles[i], "s", "");
220 if (ptr < MAX_IN_CORE_SORT)
221 /* Sort a small amount of data. */
222 sort_in_core (infiles[i], ptr, outfile);
224 sort_offline (infiles[i], ptr, outfile);
227 flush_tempfiles (tempcount);
230 return 0; /* Avoid bogus warnings. */
243 TEXINDEX_OPTION texindex_options[] = {
244 { "--help", "-h", (int *)NULL, 0, (char *)NULL,
245 N_("display this help and exit") },
246 { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
247 N_("keep temporary files around after processing") },
248 { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
249 N_("do not keep temporary files around after processing (default)") },
250 { "--output", "-o", (int *)NULL, 0, "FILE",
251 N_("send output to FILE") },
252 { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
253 N_("display version information and exit") },
254 { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
262 FILE *f = result_value ? stderr : stdout;
264 fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
265 fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
266 /* Avoid trigraph nonsense. */
268 _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
269 '?', '?'); /* avoid trigraph in cat-id-tbl.c */
270 fprintf (f, _("\nOptions:\n"));
272 for (i = 0; texindex_options[i].long_name; i++)
276 if (texindex_options[i].short_name)
277 fprintf (f, "%s, ", texindex_options[i].short_name);
280 texindex_options[i].long_name,
281 texindex_options[i].arg_name
282 ? texindex_options[i].arg_name : "");
284 fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
287 Email bug reports to bug-texinfo@gnu.org,\n\
288 general questions and discussion to help-texinfo@gnu.org.\n\
289 Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
292 xexit (result_value);
295 /* Decode the command line arguments to set the parameter variables
296 and set up the vector of keyfields and the vector of input files. */
299 decode_command (argc, argv)
307 /* Store default values into parameter variables. */
309 tempdir = getenv ("TMPDIR");
311 tempdir = getenv ("TEMP");
313 tempdir = getenv ("TMP");
315 tempdir = DEFAULT_TMPDIR;
317 tempdir = concat (tempdir, "/", "");
321 /* Allocate ARGC input files, which must be enough. */
323 infiles = (char **) xmalloc (argc * sizeof (char *));
324 outfiles = (char **) xmalloc (argc * sizeof (char *));
328 while (arg_index < argc)
330 char *arg = argv[arg_index++];
334 if (strcmp (arg, "--version") == 0)
336 printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
338 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
339 There is NO warranty. You may redistribute this software\n\
340 under the terms of the GNU General Public License.\n\
341 For more information about these matters, see the files named COPYING.\n"),
345 else if ((strcmp (arg, "--keep") == 0) ||
346 (strcmp (arg, "-k") == 0))
350 else if ((strcmp (arg, "--help") == 0) ||
351 (strcmp (arg, "-h") == 0))
355 else if ((strcmp (arg, "--output") == 0) ||
356 (strcmp (arg, "-o") == 0))
358 if (argv[arg_index] != (char *)NULL)
362 *(op - 1) = argv[arg_index];
373 *op++ = (char *)NULL;
377 /* Record number of keyfields and terminate list of filenames. */
378 num_infiles = ip - infiles;
380 if (num_infiles == 0)
384 /* Return a name for a temporary file. */
391 sprintf (tempsuffix, ".%d", count);
392 return concat (tempdir, tempbase, tempsuffix);
395 /* Delete all temporary files up to TO_COUNT. */
398 flush_tempfiles (to_count)
403 while (last_deleted_tempcount < to_count)
404 unlink (maketempname (++last_deleted_tempcount));
408 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
411 compare_full (line1, line2)
412 char **line1, **line2;
416 /* Compare using the first keyfield;
417 if that does not distinguish the lines, try the second keyfield;
420 for (i = 0; i < num_keyfields; i++)
422 long length1, length2;
423 char *start1 = find_field (&keyfields[i], *line1, &length1);
424 char *start2 = find_field (&keyfields[i], *line2, &length2);
425 int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
426 start2, length2, *line2 - text_base);
429 if (keyfields[i].reverse)
435 return 0; /* Lines match exactly. */
438 /* Compare LINE1 and LINE2, described by structures
439 in which the first keyfield is identified in advance.
440 For positional sorting, assumes that the order of the lines in core
441 reflects their nominal order. */
444 compare_prepared (line1, line2)
445 struct lineinfo *line1, *line2;
451 /* Compare using the first keyfield, which has been found for us already. */
452 if (keyfields->positional)
454 if (line1->text - text_base > line2->text - text_base)
459 else if (keyfields->numeric)
460 tem = line1->key.number - line2->key.number;
462 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
463 line2->key.text, line2->keylen, 0);
466 if (keyfields->reverse)
474 /* Compare using the second keyfield;
475 if that does not distinguish the lines, try the third keyfield;
478 for (i = 1; i < num_keyfields; i++)
480 long length1, length2;
481 char *start1 = find_field (&keyfields[i], text1, &length1);
482 char *start2 = find_field (&keyfields[i], text2, &length2);
483 int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
484 start2, length2, text2 - text_base);
487 if (keyfields[i].reverse)
493 return 0; /* Lines match exactly. */
496 /* Like compare_full but more general.
497 You can pass any strings, and you can say how many keyfields to use.
498 POS1 and POS2 should indicate the nominal positional ordering of
499 the two lines in the input. */
502 compare_general (str1, str2, pos1, pos2, use_keyfields)
509 /* Compare using the first keyfield;
510 if that does not distinguish the lines, try the second keyfield;
513 for (i = 0; i < use_keyfields; i++)
515 long length1, length2;
516 char *start1 = find_field (&keyfields[i], str1, &length1);
517 char *start2 = find_field (&keyfields[i], str2, &length2);
518 int tem = compare_field (&keyfields[i], start1, length1, pos1,
519 start2, length2, pos2);
522 if (keyfields[i].reverse)
528 return 0; /* Lines match exactly. */
531 /* Find the start and length of a field in STR according to KEYFIELD.
532 A pointer to the starting character is returned, and the length
533 is stored into the int that LENGTHPTR points to. */
536 find_field (keyfield, str, lengthptr)
537 struct keyfield *keyfield;
545 if (keyfield->braced)
546 fun = find_braced_pos;
550 start = (*fun) (str, keyfield->startwords, keyfield->startchars,
551 keyfield->ignore_blanks);
552 if (keyfield->endwords < 0)
554 if (keyfield->braced)
555 end = find_braced_end (start);
559 while (*end && *end != '\n')
565 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
566 if (end - str < start - str)
569 *lengthptr = end - start;
573 /* Return a pointer to a specified place within STR,
574 skipping (from the beginning) WORDS words and then CHARS chars.
575 If IGNORE_BLANKS is nonzero, we skip all blanks
576 after finding the specified word. */
579 find_pos (str, words, chars, ignore_blanks)
587 for (i = 0; i < words; i++)
590 /* Find next bunch of nonblanks and skip them. */
591 while ((c = *p) == ' ' || c == '\t')
593 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
595 if (!*p || *p == '\n')
599 while (*p == ' ' || *p == '\t')
602 for (i = 0; i < chars; i++)
604 if (!*p || *p == '\n')
611 /* Like find_pos but assumes that each field is surrounded by braces
612 and that braces within fields are balanced. */
615 find_braced_pos (str, words, chars, ignore_blanks)
625 for (i = 0; i < words; i++)
628 while ((c = *p++) != '{' && c != '\n' && c)
639 if (c == 0 || c == '\n')
644 while ((c = *p++) != '{' && c != '\n' && c)
651 while ((c = *p) == ' ' || c == '\t')
654 for (i = 0; i < chars; i++)
656 if (!*p || *p == '\n')
663 /* Find the end of the balanced-brace field which starts at STR.
664 The position returned is just before the closing brace. */
667 find_braced_end (str)
682 if (c == 0 || c == '\n')
689 find_value (start, length)
695 if (isdigit (*start))
703 /* Vector used to translate characters for comparison.
704 This is how we make all alphanumerics follow all else,
705 and ignore case in the first sorting. */
712 for (i = 1; i < 256; i++)
715 for (i = '0'; i <= '9'; i++)
716 char_order[i] += 512;
718 for (i = 'a'; i <= 'z'; i++)
720 char_order[i] = 512 + i;
721 char_order[i + 'A' - 'a'] = 512 + i;
725 /* Compare two fields (each specified as a start pointer and a character count)
726 according to KEYFIELD.
727 The sign of the value reports the relation between the fields. */
730 compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
731 struct keyfield *keyfield;
739 if (keyfields->positional)
746 if (keyfield->numeric)
748 long value = find_value (start1, length1) - find_value (start2, length2);
759 char *e1 = start1 + length1;
760 char *e2 = start2 + length2;
775 if (char_order[c1] != char_order[c2])
776 return char_order[c1] - char_order[c2];
781 /* Strings are equal except possibly for case. */
798 /* Reverse sign here so upper case comes out last. */
808 /* A `struct linebuffer' is a structure which holds a line of text.
809 `readline' reads a line from a stream into a linebuffer
810 and works regardless of the length of the line. */
818 /* Initialize LINEBUFFER for use. */
821 initbuffer (linebuffer)
822 struct linebuffer *linebuffer;
824 linebuffer->size = 200;
825 linebuffer->buffer = (char *) xmalloc (200);
828 /* Read a line of text from STREAM into LINEBUFFER.
829 Return the length of the line. */
832 readline (linebuffer, stream)
833 struct linebuffer *linebuffer;
836 char *buffer = linebuffer->buffer;
837 char *p = linebuffer->buffer;
838 char *end = p + linebuffer->size;
842 int c = getc (stream);
845 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
846 p += buffer - linebuffer->buffer;
847 end += buffer - linebuffer->buffer;
848 linebuffer->buffer = buffer;
850 if (c < 0 || c == '\n')
861 /* Sort an input file too big to sort in core. */
864 sort_offline (infile, nfiles, total, outfile)
870 /* More than enough. */
871 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
872 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
873 FILE *istream = fopen (infile, "r");
875 struct linebuffer lb;
881 /* Read in one line of input data. */
883 linelength = readline (&lb, istream);
885 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
887 error (_("%s: not a texinfo index file"), infile);
891 /* Split up the input into `ntemps' temporary files, or maybe fewer,
892 and put the new files' names into `tempfiles' */
894 for (i = 0; i < ntemps; i++)
896 char *outname = maketempname (++tempcount);
897 FILE *ostream = fopen (outname, "w");
901 pfatal_with_name (outname);
902 tempfiles[i] = outname;
904 /* Copy lines into this temp file as long as it does not make file
905 "too big" or until there are no more lines. */
907 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
909 tempsize += linelength + 1;
910 fputs (lb.buffer, ostream);
911 putc ('\n', ostream);
913 /* Read another line of input data. */
915 linelength = readline (&lb, istream);
916 if (!linelength && feof (istream))
919 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
921 error (_("%s: not a texinfo index file"), infile);
934 /* Record number of temp files we actually needed. */
938 /* Sort each tempfile into another tempfile.
939 Delete the first set of tempfiles and put the names of the second
942 for (i = 0; i < ntemps; i++)
944 char *newtemp = maketempname (++tempcount);
945 sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
947 unlink (tempfiles[i]);
948 tempfiles[i] = newtemp;
954 /* Merge the tempfiles together and indexify. */
956 merge_files (tempfiles, ntemps, outfile);
959 /* Sort INFILE, whose size is TOTAL,
960 assuming that is small enough to be done in-core,
961 then indexify it and send the output to OUTFILE (or to stdout). */
964 sort_in_core (infile, total, outfile)
970 char *data = (char *) xmalloc (total + 1);
974 FILE *ostream = stdout;
975 struct lineinfo *lineinfo;
977 /* Read the contents of the file into the moby array `data'. */
979 int desc = open (infile, O_RDONLY, 0);
982 fatal (_("failure reopening %s"), infile);
983 for (file_size = 0;;)
985 i = read (desc, data + file_size, total - file_size);
995 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
997 error (_("%s: not a texinfo index file"), infile);
1003 /* Sort routines want to know this address. */
1007 /* Create the array of pointers to lines, with a default size
1008 frequently enough. */
1010 nlines = total / 50;
1013 linearray = (char **) xmalloc (nlines * sizeof (char *));
1015 /* `nextline' points to the next free slot in this array.
1016 `nlines' is the allocated size. */
1018 nextline = linearray;
1020 /* Parse the input file's data, and make entries for the lines. */
1022 nextline = parsefile (infile, nextline, file_data, file_size);
1025 error (_("%s: not a texinfo index file"), infile);
1029 /* Sort the lines. */
1031 /* If we have enough space, find the first keyfield of each line in advance.
1032 Make a `struct lineinfo' for each line, which records the keyfield
1033 as well as the line, and sort them. */
1035 lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
1039 struct lineinfo *lp;
1042 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1045 lp->key.text = find_field (keyfields, *p, &lp->keylen);
1046 if (keyfields->numeric)
1047 lp->key.number = find_value (lp->key.text, lp->keylen);
1050 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
1053 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1059 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
1061 /* Open the output file. */
1065 ostream = fopen (outfile, "w");
1067 pfatal_with_name (outfile);
1070 writelines (linearray, nextline - linearray, ostream);
1078 /* Parse an input string in core into lines.
1079 DATA is the input string, and SIZE is its length.
1080 Data goes in LINEARRAY starting at NEXTLINE.
1081 The value returned is the first entry in LINEARRAY still unused.
1082 Value 0 means input file contents are invalid. */
1085 parsefile (filename, nextline, data, size)
1092 char **line = nextline;
1100 if (p[0] != '\\' && p[0] != '@')
1104 while (*p && *p != '\n')
1110 if (line == linearray + nlines)
1112 char **old = linearray;
1113 linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4));
1114 line += linearray - old;
1121 /* Indexification is a filter applied to the sorted lines
1122 as they are being written to the output file.
1123 Multiple entries for the same name, with different page numbers,
1124 get combined into a single entry with multiple page numbers.
1125 The first braced field, which is used for sorting, is discarded.
1126 However, its first character is examined, folded to lower case,
1127 and if it is different from that in the previous line fed to us
1128 a \initial line is written with one argument, the new initial.
1130 If an entry has four braced fields, then the second and third
1131 constitute primary and secondary names.
1132 In this case, each change of primary name
1133 generates a \primary line which contains only the primary name,
1134 and in between these are \secondary lines which contain
1135 just a secondary name and page numbers. */
1137 /* The last primary name we wrote a \primary entry for.
1138 If only one level of indexing is being done, this is the last name seen. */
1140 /* Length of storage allocated for lastprimary. */
1141 int lastprimarylength;
1143 /* Similar, for the secondary name. */
1144 char *lastsecondary;
1145 int lastsecondarylength;
1147 /* Zero if we are not in the middle of writing an entry.
1148 One if we have written the beginning of an entry but have not
1149 yet written any page numbers into it.
1150 Greater than one if we have written the beginning of an entry
1151 plus at least one page number. */
1154 /* The initial (for sorting purposes) of the last primary entry written.
1155 When this changes, a \initial {c} line is written */
1159 int lastinitiallength;
1161 /* When we need a string of length 1 for the value of lastinitial,
1164 char lastinitial1[2];
1166 /* Initialize static storage for writing an index. */
1172 lastinitial = lastinitial1;
1173 lastinitial1[0] = 0;
1174 lastinitial1[1] = 0;
1175 lastinitiallength = 0;
1176 lastprimarylength = 100;
1177 lastprimary = (char *) xmalloc (lastprimarylength + 1);
1178 memset (lastprimary, '\0', lastprimarylength + 1);
1179 lastsecondarylength = 100;
1180 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1181 memset (lastsecondary, '\0', lastsecondarylength + 1);
1184 /* Indexify. Merge entries for the same name,
1185 insert headers for each initial character, etc. */
1188 indexify (line, ostream)
1192 char *primary, *secondary, *pagenumber;
1193 int primarylength, secondarylength = 0, pagelength;
1200 /* First, analyze the parts of the entry fed to us this time. */
1202 p = find_braced_pos (line, 0, 0, 0);
1206 /* Get length of inner pair of braces starting at `p',
1207 including that inner pair of braces. */
1208 initiallength = find_braced_end (p + 1) + 1 - p;
1217 if (initial1[0] >= 'a' && initial1[0] <= 'z')
1221 pagenumber = find_braced_pos (line, 1, 0, 0);
1222 pagelength = find_braced_end (pagenumber) - pagenumber;
1223 if (pagelength == 0)
1224 fatal (_("No page number in %s"), line);
1226 primary = find_braced_pos (line, 2, 0, 0);
1227 primarylength = find_braced_end (primary) - primary;
1229 secondary = find_braced_pos (line, 3, 0, 0);
1230 nosecondary = !*secondary;
1232 secondarylength = find_braced_end (secondary) - secondary;
1234 /* If the primary is different from before, make a new primary entry. */
1235 if (strncmp (primary, lastprimary, primarylength))
1237 /* Close off current secondary entry first, if one is open. */
1240 fputs ("}\n", ostream);
1244 /* If this primary has a different initial, include an entry for
1246 if (initiallength != lastinitiallength ||
1247 strncmp (initial, lastinitial, initiallength))
1249 fprintf (ostream, "\\initial {");
1250 fwrite (initial, 1, initiallength, ostream);
1251 fputs ("}\n", ostream);
1252 if (initial == initial1)
1254 lastinitial = lastinitial1;
1255 *lastinitial1 = *initial1;
1259 lastinitial = initial;
1261 lastinitiallength = initiallength;
1264 /* Make the entry for the primary. */
1266 fputs ("\\entry {", ostream);
1268 fputs ("\\primary {", ostream);
1269 fwrite (primary, primarylength, 1, ostream);
1272 fputs ("}{", ostream);
1276 fputs ("}\n", ostream);
1278 /* Record name of most recent primary. */
1279 if (lastprimarylength < primarylength)
1281 lastprimarylength = primarylength + 100;
1282 lastprimary = (char *) xrealloc (lastprimary,
1283 1 + lastprimarylength);
1285 strncpy (lastprimary, primary, primarylength);
1286 lastprimary[primarylength] = 0;
1288 /* There is no current secondary within this primary, now. */
1289 lastsecondary[0] = 0;
1292 /* Should not have an entry with no subtopic following one with a subtopic. */
1294 if (nosecondary && *lastsecondary)
1295 error (_("entry %s follows an entry with a secondary name"), line);
1297 /* Start a new secondary entry if necessary. */
1298 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1302 fputs ("}\n", ostream);
1306 /* Write the entry for the secondary. */
1307 fputs ("\\secondary {", ostream);
1308 fwrite (secondary, secondarylength, 1, ostream);
1309 fputs ("}{", ostream);
1312 /* Record name of most recent secondary. */
1313 if (lastsecondarylength < secondarylength)
1315 lastsecondarylength = secondarylength + 100;
1316 lastsecondary = (char *) xrealloc (lastsecondary,
1317 1 + lastsecondarylength);
1319 strncpy (lastsecondary, secondary, secondarylength);
1320 lastsecondary[secondarylength] = 0;
1323 /* Here to add one more page number to the current entry. */
1325 fputs (", ", ostream); /* Punctuate first, if this is not the first. */
1326 fwrite (pagenumber, pagelength, 1, ostream);
1329 /* Close out any unfinished output entry. */
1332 finish_index (ostream)
1336 fputs ("}\n", ostream);
1338 free (lastsecondary);
1341 /* Copy the lines in the sorted order.
1342 Each line is copied out of the input file it was found in. */
1345 writelines (linearray, nlines, ostream)
1350 char **stop_line = linearray + nlines;
1355 /* Output the text of the lines, and free the buffer space. */
1357 for (next_line = linearray; next_line != stop_line; next_line++)
1359 /* If -u was specified, output the line only if distinct from previous one. */
1360 if (next_line == linearray
1361 /* Compare previous line with this one, using only the
1362 explicitly specd keyfields. */
1363 || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
1365 char *p = *next_line;
1368 while ((c = *p++) && c != '\n')
1371 indexify (*next_line, ostream);
1375 finish_index (ostream);
1378 /* Assume (and optionally verify) that each input file is sorted;
1379 merge them and output the result.
1380 Returns nonzero if any input file fails to be sorted.
1382 This is the high-level interface that can handle an unlimited
1385 #define MAX_DIRECT_MERGE 10
1388 merge_files (infiles, nfiles, outfile)
1397 int start_tempcount = tempcount;
1399 if (nfiles <= MAX_DIRECT_MERGE)
1400 return merge_direct (infiles, nfiles, outfile);
1402 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1403 making a temporary file to hold each group's result. */
1405 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1406 tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1407 for (i = 0; i < ntemps; i++)
1409 int nf = MAX_DIRECT_MERGE;
1410 if (i + 1 == ntemps)
1411 nf = nfiles - i * MAX_DIRECT_MERGE;
1412 tempfiles[i] = maketempname (++tempcount);
1413 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1416 /* All temporary files that existed before are no longer needed
1417 since their contents have been merged into our new tempfiles.
1419 flush_tempfiles (start_tempcount);
1421 /* Now merge the temporary files we created. */
1423 merge_files (tempfiles, ntemps, outfile);
1430 /* Assume (and optionally verify) that each input file is sorted;
1431 merge them and output the result.
1432 Returns nonzero if any input file fails to be sorted.
1434 This version of merging will not work if the number of
1435 input files gets too high. Higher level functions
1436 use it only with a bounded number of input files. */
1439 merge_direct (infiles, nfiles, outfile)
1444 struct linebuffer *lb1, *lb2;
1445 struct linebuffer **thisline, **prevline;
1451 struct linebuffer *prev_out = 0;
1452 FILE *ostream = stdout;
1456 ostream = fopen (outfile, "w");
1459 pfatal_with_name (outfile);
1470 /* For each file, make two line buffers.
1471 Also, for each file, there is an element of `thisline'
1472 which points at any time to one of the file's two buffers,
1473 and an element of `prevline' which points to the other buffer.
1474 `thisline' is supposed to point to the next available line from the file,
1475 while `prevline' holds the last file line used,
1476 which is remembered so that we can verify that the file is properly sorted. */
1478 /* lb1 and lb2 contain one buffer each per file. */
1479 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1480 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1482 /* thisline[i] points to the linebuffer holding the next available line in file i,
1483 or is zero if there are no lines left in that file. */
1484 thisline = (struct linebuffer **)
1485 xmalloc (nfiles * sizeof (struct linebuffer *));
1486 /* prevline[i] points to the linebuffer holding the last used line
1487 from file i. This is just for verifying that file i is properly
1489 prevline = (struct linebuffer **)
1490 xmalloc (nfiles * sizeof (struct linebuffer *));
1491 /* streams[i] holds the input stream for file i. */
1492 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1493 /* file_lossage[i] is nonzero if we already know file i is not
1495 file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1497 /* Allocate and initialize all that storage. */
1499 for (i = 0; i < nfiles; i++)
1501 initbuffer (&lb1[i]);
1502 initbuffer (&lb2[i]);
1503 thisline[i] = &lb1[i];
1504 prevline[i] = &lb2[i];
1505 file_lossage[i] = 0;
1506 streams[i] = fopen (infiles[i], "r");
1508 pfatal_with_name (infiles[i]);
1510 readline (thisline[i], streams[i]);
1513 /* Keep count of number of files not at eof. */
1518 struct linebuffer *best = 0;
1519 struct linebuffer *exch;
1523 /* Look at the next avail line of each file; choose the least one. */
1525 for (i = 0; i < nfiles; i++)
1529 0 < compare_general (best->buffer, thisline[i]->buffer,
1530 (long) bestfile, (long) i, num_keyfields)))
1537 /* Output that line, unless it matches the previous one and we
1538 don't want duplicates. */
1541 !compare_general (prev_out->buffer,
1542 best->buffer, 0L, 1L, num_keyfields - 1)))
1543 indexify (best->buffer, ostream);
1546 /* Now make the line the previous of its file, and fetch a new
1547 line from that file. */
1549 exch = prevline[bestfile];
1550 prevline[bestfile] = thisline[bestfile];
1551 thisline[bestfile] = exch;
1555 /* If the file has no more, mark it empty. */
1557 if (feof (streams[bestfile]))
1559 thisline[bestfile] = 0;
1560 /* Update the number of files still not empty. */
1564 readline (thisline[bestfile], streams[bestfile]);
1565 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
1570 finish_index (ostream);
1572 /* Free all storage and close all input streams. */
1574 for (i = 0; i < nfiles; i++)
1576 fclose (streams[i]);
1577 free (lb1[i].buffer);
1578 free (lb2[i].buffer);
1580 free (file_lossage);
1593 /* Print error message and exit. */
1599 error (format, arg);
1603 /* Print error message. FORMAT is printf control string, ARG is arg for it. */
1608 printf ("%s: ", program_name);
1609 printf (format, arg);
1610 if (format[strlen (format) -1] != '\n')
1615 perror_with_name (name)
1620 s = strerror (errno);
1621 printf ("%s: ", program_name);
1622 printf ("%s; for file `%s'.\n", s, name);
1626 pfatal_with_name (name)
1631 s = strerror (errno);
1632 printf ("%s: ", program_name);
1633 printf (_("%s; for file `%s'.\n"), s, name);
1637 /* Return a newly-allocated string whose contents concatenate those of
1644 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1645 char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1647 strcpy (result, s1);
1648 strcpy (result + len1, s2);
1649 strcpy (result + len1 + len2, s3);
1650 *(result + len1 + len2 + len3) = 0;
1655 #if !defined (HAVE_STRCHR)
1657 strrchr (string, character)
1663 for (i = strlen (string) - 1; i > -1; i--)
1664 if (string[i] == character)
1665 return (string + i);
1667 return ((char *)NULL);
1669 #endif /* HAVE_STRCHR */