1 /* texindex -- sort TeX index dribble output into an actual index.
2 $Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp $
4 Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
5 2002, 2003, 2004 Free Software Foundation, Inc.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
24 static char *program_name = "texindex";
27 # include "../src/config.h"
28 /* Some s/os.h files redefine these. */
35 #if !defined (HAVE_MEMSET)
37 #define memset(ptr, ignore, count) bzero (ptr, count)
40 char *mktemp (char *);
42 #if !defined (SEEK_SET)
46 #endif /* !SEEK_SET */
50 /* When sorting in core, this structure describes one line
51 and the position and length of its first keyfield. */
54 char *text; /* The actual text of the line. */
56 char *text; /* The start of the key (for textual comparison). */
57 long number; /* The numeric value (for numeric comparison). */
59 long keylen; /* Length of KEY field. */
62 /* This structure describes a field to use as a sort key. */
65 int startwords; /* Number of words to skip. */
66 int startchars; /* Number of additional chars to skip. */
67 int endwords; /* Number of words to ignore at end. */
68 int endchars; /* Ditto for characters of last word. */
69 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */
70 char fold_case; /* Non-zero means case doesn't matter. */
71 char reverse; /* Non-zero means compare in reverse order. */
72 char numeric; /* Non-zeros means field is ASCII numeric. */
73 char positional; /* Sort according to file position. */
74 char braced; /* Count balanced-braced groupings as fields. */
77 /* Vector of keyfields to use. */
78 struct keyfield keyfields[3];
80 /* Number of keyfields stored in that vector. */
81 int num_keyfields = 3;
83 /* Vector of input file names, terminated with a null pointer. */
86 /* Vector of corresponding output file names, or NULL, meaning default it
87 (add an `s' to the end). */
90 /* Length of `infiles'. */
93 /* Pointer to the array of pointers to lines being sorted. */
96 /* The allocated length of `linearray'. */
99 /* Directory to use for temporary files. On Unix, it ends with a slash. */
102 /* Number of last temporary file. */
105 /* Number of last temporary file already deleted.
106 Temporary files are deleted by `flush_tempfiles' in order of creation. */
107 int last_deleted_tempcount;
109 /* During in-core sort, this points to the base of the data block
110 which contains all the lines of data. */
113 /* Initially 0; changed to 1 if we want initials in this index. */
116 /* Remembers the first initial letter seen in this index, so we can
117 determine whether we need initials in the sorted form. */
120 /* Additional command switches .*/
122 /* Nonzero means do not delete tempfiles -- for debugging. */
125 /* Forward declarations of functions in this file. */
126 void decode_command (int argc, char **argv);
127 void sort_in_core (char *infile, int total, char *outfile);
128 void sort_offline (char *infile, off_t total, char *outfile);
129 char **parsefile (char *filename, char **nextline, char *data, long int size);
130 char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
131 char *find_pos (char *str, int words, int chars, int ignore_blanks);
132 long find_value (char *start, long int length);
133 char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
134 char *find_braced_end (char *str);
135 void writelines (char **linearray, int nlines, FILE *ostream);
136 int compare_field (struct keyfield *keyfield, char *start1,
137 long int length1, long int pos1, char *start2,
138 long int length2, long int pos2);
139 int compare_full (const void *, const void *);
140 long readline (struct linebuffer *linebuffer, FILE *stream);
141 int merge_files (char **infiles, int nfiles, char *outfile);
142 int merge_direct (char **infiles, int nfiles, char *outfile);
143 void pfatal_with_name (const char *name);
144 void fatal (const char *format, const char *arg);
145 void error (const char *format, const char *arg);
146 void *xmalloc (), *xrealloc ();
147 char *concat (char *s1, char *s2);
148 void flush_tempfiles (int to_count);
150 #define MAX_IN_CORE_SORT 500000
153 main (int argc, char **argv)
158 last_deleted_tempcount = 0;
160 #ifdef HAVE_SETLOCALE
161 /* Set locale via LC_ALL. */
162 setlocale (LC_ALL, "");
165 /* Set the text message domain. */
166 bindtextdomain (PACKAGE, LOCALEDIR);
167 textdomain (PACKAGE);
169 /* In case we write to a redirected stdout that fails. */
170 /* not ready atexit (close_stdout); */
172 /* Describe the kind of sorting to do. */
173 /* The first keyfield uses the first braced field and folds case. */
174 keyfields[0].braced = 1;
175 keyfields[0].fold_case = 1;
176 keyfields[0].endwords = -1;
177 keyfields[0].endchars = -1;
179 /* The second keyfield uses the second braced field, numerically. */
180 keyfields[1].braced = 1;
181 keyfields[1].numeric = 1;
182 keyfields[1].startwords = 1;
183 keyfields[1].endwords = -1;
184 keyfields[1].endchars = -1;
186 /* The third keyfield (which is ignored while discarding duplicates)
187 compares the whole line. */
188 keyfields[2].endwords = -1;
189 keyfields[2].endchars = -1;
191 decode_command (argc, argv);
193 /* Process input files completely, one by one. */
195 for (i = 0; i < num_infiles; i++)
202 desc = open (infiles[i], O_RDONLY, 0);
204 pfatal_with_name (infiles[i]);
206 if (stat (infiles[i], &instat))
207 pfatal_with_name (infiles[i]);
208 if (S_ISDIR (instat.st_mode))
213 pfatal_with_name (infiles[i]);
216 lseek (desc, (off_t) 0, SEEK_END);
217 ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
221 outfile = outfiles[i];
223 outfile = concat (infiles[i], "s");
226 first_initial = '\0';
228 if (ptr < MAX_IN_CORE_SORT)
229 /* Sort a small amount of data. */
230 sort_in_core (infiles[i], (int)ptr, outfile);
232 sort_offline (infiles[i], ptr, outfile);
235 flush_tempfiles (tempcount);
237 return 0; /* Avoid bogus warnings. */
250 TEXINDEX_OPTION texindex_options[] = {
251 { "--help", "-h", (int *)NULL, 0, (char *)NULL,
252 N_("display this help and exit") },
253 { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
254 N_("keep temporary files around after processing") },
255 { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
256 N_("do not keep temporary files around after processing (default)") },
257 { "--output", "-o", (int *)NULL, 0, "FILE",
258 N_("send output to FILE") },
259 { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
260 N_("display version information and exit") },
261 { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
265 usage (int result_value)
268 FILE *f = result_value ? stderr : stdout;
270 fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
271 fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
272 /* Avoid trigraph nonsense. */
274 _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
275 '?', '?'); /* avoid trigraph in cat-id-tbl.c */
276 fprintf (f, _("\nOptions:\n"));
278 for (i = 0; texindex_options[i].long_name; i++)
282 if (texindex_options[i].short_name)
283 fprintf (f, "%s, ", texindex_options[i].short_name);
286 texindex_options[i].long_name,
287 texindex_options[i].arg_name
288 ? texindex_options[i].arg_name : "");
290 fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
293 Email bug reports to bug-texinfo@gnu.org,\n\
294 general questions and discussion to help-texinfo@gnu.org.\n\
295 Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
298 xexit (result_value);
301 /* Decode the command line arguments to set the parameter variables
302 and set up the vector of keyfields and the vector of input files. */
305 decode_command (int argc, char **argv)
311 /* Store default values into parameter variables. */
313 tempdir = getenv ("TMPDIR");
315 tempdir = getenv ("TEMP");
317 tempdir = getenv ("TMP");
319 tempdir = DEFAULT_TMPDIR;
321 tempdir = concat (tempdir, "/");
325 /* Allocate ARGC input files, which must be enough. */
327 infiles = (char **) xmalloc (argc * sizeof (char *));
328 outfiles = (char **) xmalloc (argc * sizeof (char *));
332 while (arg_index < argc)
334 char *arg = argv[arg_index++];
338 if (strcmp (arg, "--version") == 0)
340 printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
342 puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
343 printf (_("There is NO warranty. You may redistribute this software\n\
344 under the terms of the GNU General Public License.\n\
345 For more information about these matters, see the files named COPYING.\n"));
348 else if ((strcmp (arg, "--keep") == 0) ||
349 (strcmp (arg, "-k") == 0))
353 else if ((strcmp (arg, "--help") == 0) ||
354 (strcmp (arg, "-h") == 0))
358 else if ((strcmp (arg, "--output") == 0) ||
359 (strcmp (arg, "-o") == 0))
361 if (argv[arg_index] != (char *)NULL)
365 *(op - 1) = argv[arg_index];
376 *op++ = (char *)NULL;
380 /* Record number of keyfields and terminate list of filenames. */
381 num_infiles = ip - infiles;
383 if (num_infiles == 0)
387 /* Return a name for temporary file COUNT. */
390 maketempname (int count)
392 static char *tempbase = NULL;
398 tempbase = concat (tempdir, "txidxXXXXXX");
400 fd = mkstemp (tempbase);
402 pfatal_with_name (tempbase);
405 sprintf (tempsuffix, ".%d", count);
406 return concat (tempbase, tempsuffix);
410 /* Delete all temporary files up to TO_COUNT. */
413 flush_tempfiles (int to_count)
417 while (last_deleted_tempcount < to_count)
418 unlink (maketempname (++last_deleted_tempcount));
422 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
425 compare_full (const void *p1, const void *p2)
427 char **line1 = (char **) p1;
428 char **line2 = (char **) p2;
431 /* Compare using the first keyfield;
432 if that does not distinguish the lines, try the second keyfield;
435 for (i = 0; i < num_keyfields; i++)
437 long length1, length2;
438 char *start1 = find_field (&keyfields[i], *line1, &length1);
439 char *start2 = find_field (&keyfields[i], *line2, &length2);
440 int tem = compare_field (&keyfields[i], start1, length1,
442 start2, length2, *line2 - text_base);
445 if (keyfields[i].reverse)
451 return 0; /* Lines match exactly. */
454 /* Compare LINE1 and LINE2, described by structures
455 in which the first keyfield is identified in advance.
456 For positional sorting, assumes that the order of the lines in core
457 reflects their nominal order. */
459 compare_prepared (const void *p1, const void *p2)
461 struct lineinfo *line1 = (struct lineinfo *) p1;
462 struct lineinfo *line2 = (struct lineinfo *) p2;
467 /* Compare using the first keyfield, which has been found for us already. */
468 if (keyfields->positional)
470 if (line1->text - text_base > line2->text - text_base)
475 else if (keyfields->numeric)
476 tem = line1->key.number - line2->key.number;
478 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
479 line2->key.text, line2->keylen, 0);
482 if (keyfields->reverse)
490 /* Compare using the second keyfield;
491 if that does not distinguish the lines, try the third keyfield;
494 for (i = 1; i < num_keyfields; i++)
496 long length1, length2;
497 char *start1 = find_field (&keyfields[i], text1, &length1);
498 char *start2 = find_field (&keyfields[i], text2, &length2);
499 int tem = compare_field (&keyfields[i], start1, length1,
501 start2, length2, text2 - text_base);
504 if (keyfields[i].reverse)
510 return 0; /* Lines match exactly. */
513 /* Like compare_full but more general.
514 You can pass any strings, and you can say how many keyfields to use.
515 POS1 and POS2 should indicate the nominal positional ordering of
516 the two lines in the input. */
519 compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
523 /* Compare using the first keyfield;
524 if that does not distinguish the lines, try the second keyfield;
527 for (i = 0; i < use_keyfields; i++)
529 long length1, length2;
530 char *start1 = find_field (&keyfields[i], str1, &length1);
531 char *start2 = find_field (&keyfields[i], str2, &length2);
532 int tem = compare_field (&keyfields[i], start1, length1, pos1,
533 start2, length2, pos2);
536 if (keyfields[i].reverse)
542 return 0; /* Lines match exactly. */
545 /* Find the start and length of a field in STR according to KEYFIELD.
546 A pointer to the starting character is returned, and the length
547 is stored into the int that LENGTHPTR points to. */
550 find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
556 if (keyfield->braced)
557 fun = find_braced_pos;
561 start = (*fun) (str, keyfield->startwords, keyfield->startchars,
562 keyfield->ignore_blanks);
563 if (keyfield->endwords < 0)
565 if (keyfield->braced)
566 end = find_braced_end (start);
570 while (*end && *end != '\n')
576 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
577 if (end - str < start - str)
580 *lengthptr = end - start;
584 /* Return a pointer to a specified place within STR,
585 skipping (from the beginning) WORDS words and then CHARS chars.
586 If IGNORE_BLANKS is nonzero, we skip all blanks
587 after finding the specified word. */
590 find_pos (char *str, int words, int chars, int ignore_blanks)
595 for (i = 0; i < words; i++)
598 /* Find next bunch of nonblanks and skip them. */
599 while ((c = *p) == ' ' || c == '\t')
601 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
603 if (!*p || *p == '\n')
607 while (*p == ' ' || *p == '\t')
610 for (i = 0; i < chars; i++)
612 if (!*p || *p == '\n')
619 /* Like find_pos but assumes that each field is surrounded by braces
620 and that braces within fields are balanced. */
623 find_braced_pos (char *str, int words, int chars, int ignore_blanks)
630 for (i = 0; i < words; i++)
633 while ((c = *p++) != '{' && c != '\n' && c)
644 if (c == 0 || c == '\n')
649 while ((c = *p++) != '{' && c != '\n' && c)
656 while ((c = *p) == ' ' || c == '\t')
659 for (i = 0; i < chars; i++)
661 if (!*p || *p == '\n')
668 /* Find the end of the balanced-brace field which starts at STR.
669 The position returned is just before the closing brace. */
672 find_braced_end (char *str)
686 if (c == 0 || c == '\n')
693 find_value (char *start, long int length)
697 if (isdigit (*start))
705 /* Vector used to translate characters for comparison.
706 This is how we make all alphanumerics follow all else,
707 and ignore case in the first sorting. */
711 init_char_order (void)
714 for (i = 1; i < 256; i++)
717 for (i = '0'; i <= '9'; i++)
718 char_order[i] += 512;
720 for (i = 'a'; i <= 'z'; i++)
722 char_order[i] = 512 + i;
723 char_order[i + 'A' - 'a'] = 512 + i;
727 /* Compare two fields (each specified as a start pointer and a character count)
728 according to KEYFIELD.
729 The sign of the value reports the relation between the fields. */
732 compare_field (struct keyfield *keyfield, char *start1, long int length1,
733 long int pos1, char *start2, long int length2, long int pos2)
735 if (keyfields->positional)
742 if (keyfield->numeric)
744 long value = find_value (start1, length1) - find_value (start2, length2);
755 char *e1 = start1 + length1;
756 char *e2 = start2 + length2;
771 if (char_order[c1] != char_order[c2])
772 return char_order[c1] - char_order[c2];
777 /* Strings are equal except possibly for case. */
794 /* Reverse sign here so upper case comes out last. */
804 /* A `struct linebuffer' is a structure which holds a line of text.
805 `readline' reads a line from a stream into a linebuffer
806 and works regardless of the length of the line. */
814 /* Initialize LINEBUFFER for use. */
817 initbuffer (struct linebuffer *linebuffer)
819 linebuffer->size = 200;
820 linebuffer->buffer = (char *) xmalloc (200);
823 /* Read a line of text from STREAM into LINEBUFFER.
824 Return the length of the line. */
827 readline (struct linebuffer *linebuffer, FILE *stream)
829 char *buffer = linebuffer->buffer;
830 char *p = linebuffer->buffer;
831 char *end = p + linebuffer->size;
835 int c = getc (stream);
838 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
839 p += buffer - linebuffer->buffer;
840 end += buffer - linebuffer->buffer;
841 linebuffer->buffer = buffer;
843 if (c < 0 || c == '\n')
854 /* Sort an input file too big to sort in core. */
857 sort_offline (char *infile, off_t total, char *outfile)
859 /* More than enough. */
860 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
861 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
862 FILE *istream = fopen (infile, "r");
864 struct linebuffer lb;
870 /* Read in one line of input data. */
872 linelength = readline (&lb, istream);
874 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
876 error (_("%s: not a texinfo index file"), infile);
880 /* Split up the input into `ntemps' temporary files, or maybe fewer,
881 and put the new files' names into `tempfiles' */
883 for (i = 0; i < ntemps; i++)
885 char *outname = maketempname (++tempcount);
886 FILE *ostream = fopen (outname, "w");
890 pfatal_with_name (outname);
891 tempfiles[i] = outname;
893 /* Copy lines into this temp file as long as it does not make file
894 "too big" or until there are no more lines. */
896 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
898 tempsize += linelength + 1;
899 fputs (lb.buffer, ostream);
900 putc ('\n', ostream);
902 /* Read another line of input data. */
904 linelength = readline (&lb, istream);
905 if (!linelength && feof (istream))
908 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
910 error (_("%s: not a texinfo index file"), infile);
923 /* Record number of temp files we actually needed. */
927 /* Sort each tempfile into another tempfile.
928 Delete the first set of tempfiles and put the names of the second
931 for (i = 0; i < ntemps; i++)
933 char *newtemp = maketempname (++tempcount);
934 sort_in_core (tempfiles[i], MAX_IN_CORE_SORT, newtemp);
936 unlink (tempfiles[i]);
937 tempfiles[i] = newtemp;
943 /* Merge the tempfiles together and indexify. */
945 merge_files (tempfiles, ntemps, outfile);
948 /* Sort INFILE, whose size is TOTAL,
949 assuming that is small enough to be done in-core,
950 then indexify it and send the output to OUTFILE (or to stdout). */
953 sort_in_core (char *infile, int total, char *outfile)
956 char *data = (char *) xmalloc (total + 1);
960 FILE *ostream = stdout;
961 struct lineinfo *lineinfo;
963 /* Read the contents of the file into the moby array `data'. */
965 int desc = open (infile, O_RDONLY, 0);
968 fatal (_("failure reopening %s"), infile);
969 for (file_size = 0;;)
971 i = read (desc, data + file_size, total - file_size);
981 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
983 error (_("%s: not a texinfo index file"), infile);
989 /* Sort routines want to know this address. */
993 /* Create the array of pointers to lines, with a default size
994 frequently enough. */
999 linearray = (char **) xmalloc (nlines * sizeof (char *));
1001 /* `nextline' points to the next free slot in this array.
1002 `nlines' is the allocated size. */
1004 nextline = linearray;
1006 /* Parse the input file's data, and make entries for the lines. */
1008 nextline = parsefile (infile, nextline, file_data, file_size);
1011 error (_("%s: not a texinfo index file"), infile);
1015 /* Sort the lines. */
1017 /* If we have enough space, find the first keyfield of each line in advance.
1018 Make a `struct lineinfo' for each line, which records the keyfield
1019 as well as the line, and sort them. */
1021 lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
1025 struct lineinfo *lp;
1028 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1031 lp->key.text = find_field (keyfields, *p, &lp->keylen);
1032 if (keyfields->numeric)
1033 lp->key.number = find_value (lp->key.text, lp->keylen);
1036 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
1039 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1045 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
1047 /* Open the output file. */
1051 ostream = fopen (outfile, "w");
1053 pfatal_with_name (outfile);
1056 writelines (linearray, nextline - linearray, ostream);
1064 /* Parse an input string in core into lines.
1065 DATA is the input string, and SIZE is its length.
1066 Data goes in LINEARRAY starting at NEXTLINE.
1067 The value returned is the first entry in LINEARRAY still unused.
1068 Value 0 means input file contents are invalid. */
1071 parsefile (char *filename, char **nextline, char *data, long int size)
1074 char **line = nextline;
1082 if (p[0] != '\\' && p[0] != '@')
1087 /* Find the first letter of the first field of this line. If it
1088 is different from the first letter of the first field of the
1089 first line, we need initial headers in the output index. */
1090 while (*p && *p != '{')
1097 if (first_initial != toupper (*p))
1101 first_initial = toupper (*p);
1103 while (*p && *p != '\n')
1109 if (line == linearray + nlines)
1111 char **old = linearray;
1112 linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
1113 line += linearray - old;
1120 /* Indexification is a filter applied to the sorted lines
1121 as they are being written to the output file.
1122 Multiple entries for the same name, with different page numbers,
1123 get combined into a single entry with multiple page numbers.
1124 The first braced field, which is used for sorting, is discarded.
1125 However, its first character is examined, folded to lower case,
1126 and if it is different from that in the previous line fed to us
1127 a \initial line is written with one argument, the new initial.
1129 If an entry has four braced fields, then the second and third
1130 constitute primary and secondary names.
1131 In this case, each change of primary name
1132 generates a \primary line which contains only the primary name,
1133 and in between these are \secondary lines which contain
1134 just a secondary name and page numbers. */
1136 /* The last primary name we wrote a \primary entry for.
1137 If only one level of indexing is being done, this is the last name seen. */
1139 /* Length of storage allocated for lastprimary. */
1140 int lastprimarylength;
1142 /* Similar, for the secondary name. */
1143 char *lastsecondary;
1144 int lastsecondarylength;
1146 /* Zero if we are not in the middle of writing an entry.
1147 One if we have written the beginning of an entry but have not
1148 yet written any page numbers into it.
1149 Greater than one if we have written the beginning of an entry
1150 plus at least one page number. */
1153 /* The initial (for sorting purposes) of the last primary entry written.
1154 When this changes, a \initial {c} line is written */
1158 int lastinitiallength;
1160 /* When we need a string of length 1 for the value of lastinitial,
1163 char lastinitial1[2];
1165 /* Initialize static storage for writing an index. */
1171 lastinitial = lastinitial1;
1172 lastinitial1[0] = 0;
1173 lastinitial1[1] = 0;
1174 lastinitiallength = 0;
1175 lastprimarylength = 100;
1176 lastprimary = (char *) xmalloc (lastprimarylength + 1);
1177 memset (lastprimary, '\0', lastprimarylength + 1);
1178 lastsecondarylength = 100;
1179 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1180 memset (lastsecondary, '\0', lastsecondarylength + 1);
1183 /* Indexify. Merge entries for the same name,
1184 insert headers for each initial character, etc. */
1187 indexify (char *line, FILE *ostream)
1189 char *primary, *secondary, *pagenumber;
1190 int primarylength, secondarylength = 0, pagelength;
1197 /* First, analyze the parts of the entry fed to us this time. */
1199 p = find_braced_pos (line, 0, 0, 0);
1203 /* Get length of inner pair of braces starting at `p',
1204 including that inner pair of braces. */
1205 initiallength = find_braced_end (p + 1) + 1 - p;
1210 initial1[0] = toupper (*p);
1215 pagenumber = find_braced_pos (line, 1, 0, 0);
1216 pagelength = find_braced_end (pagenumber) - pagenumber;
1217 if (pagelength == 0)
1218 fatal (_("No page number in %s"), line);
1220 primary = find_braced_pos (line, 2, 0, 0);
1221 primarylength = find_braced_end (primary) - primary;
1223 secondary = find_braced_pos (line, 3, 0, 0);
1224 nosecondary = !*secondary;
1226 secondarylength = find_braced_end (secondary) - secondary;
1228 /* If the primary is different from before, make a new primary entry. */
1229 if (strncmp (primary, lastprimary, primarylength))
1231 /* Close off current secondary entry first, if one is open. */
1234 fputs ("}\n", ostream);
1238 /* If this primary has a different initial, include an entry for
1240 if (need_initials &&
1241 (initiallength != lastinitiallength ||
1242 strncmp (initial, lastinitial, initiallength)))
1244 fprintf (ostream, "\\initial {");
1245 fwrite (initial, 1, initiallength, ostream);
1246 fputs ("}\n", ostream);
1247 if (initial == initial1)
1249 lastinitial = lastinitial1;
1250 *lastinitial1 = *initial1;
1254 lastinitial = initial;
1256 lastinitiallength = initiallength;
1259 /* Make the entry for the primary. */
1261 fputs ("\\entry {", ostream);
1263 fputs ("\\primary {", ostream);
1264 fwrite (primary, primarylength, 1, ostream);
1267 fputs ("}{", ostream);
1271 fputs ("}\n", ostream);
1273 /* Record name of most recent primary. */
1274 if (lastprimarylength < primarylength)
1276 lastprimarylength = primarylength + 100;
1277 lastprimary = (char *) xrealloc (lastprimary,
1278 1 + lastprimarylength);
1280 strncpy (lastprimary, primary, primarylength);
1281 lastprimary[primarylength] = 0;
1283 /* There is no current secondary within this primary, now. */
1284 lastsecondary[0] = 0;
1287 /* Should not have an entry with no subtopic following one with a
1290 if (nosecondary && *lastsecondary)
1291 error (_("entry %s follows an entry with a secondary name"), line);
1293 /* Start a new secondary entry if necessary. */
1294 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1298 fputs ("}\n", ostream);
1302 /* Write the entry for the secondary. */
1303 fputs ("\\secondary {", ostream);
1304 fwrite (secondary, secondarylength, 1, ostream);
1305 fputs ("}{", ostream);
1308 /* Record name of most recent secondary. */
1309 if (lastsecondarylength < secondarylength)
1311 lastsecondarylength = secondarylength + 100;
1312 lastsecondary = (char *) xrealloc (lastsecondary,
1313 1 + lastsecondarylength);
1315 strncpy (lastsecondary, secondary, secondarylength);
1316 lastsecondary[secondarylength] = 0;
1319 /* Here to add one more page number to the current entry. */
1321 fputs (", ", ostream); /* Punctuate first, if this is not the first. */
1322 fwrite (pagenumber, pagelength, 1, ostream);
1325 /* Close out any unfinished output entry. */
1328 finish_index (FILE *ostream)
1331 fputs ("}\n", ostream);
1333 free (lastsecondary);
1336 /* Copy the lines in the sorted order.
1337 Each line is copied out of the input file it was found in. */
1340 writelines (char **linearray, int nlines, FILE *ostream)
1342 char **stop_line = linearray + nlines;
1347 /* Output the text of the lines, and free the buffer space. */
1349 for (next_line = linearray; next_line != stop_line; next_line++)
1351 /* If -u was specified, output the line only if distinct from
1353 if (next_line == linearray
1354 /* Compare previous line with this one, using only the
1355 explicitly specd keyfields. */
1356 || compare_general (*(next_line - 1), *next_line, 0L, 0L,
1359 char *p = *next_line;
1362 while ((c = *p++) && c != '\n')
1365 indexify (*next_line, ostream);
1369 finish_index (ostream);
1372 /* Assume (and optionally verify) that each input file is sorted;
1373 merge them and output the result.
1374 Returns nonzero if any input file fails to be sorted.
1376 This is the high-level interface that can handle an unlimited
1379 #define MAX_DIRECT_MERGE 10
1382 merge_files (char **infiles, int nfiles, char *outfile)
1388 int start_tempcount = tempcount;
1390 if (nfiles <= MAX_DIRECT_MERGE)
1391 return merge_direct (infiles, nfiles, outfile);
1393 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1394 making a temporary file to hold each group's result. */
1396 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1397 tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1398 for (i = 0; i < ntemps; i++)
1400 int nf = MAX_DIRECT_MERGE;
1401 if (i + 1 == ntemps)
1402 nf = nfiles - i * MAX_DIRECT_MERGE;
1403 tempfiles[i] = maketempname (++tempcount);
1404 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1407 /* All temporary files that existed before are no longer needed
1408 since their contents have been merged into our new tempfiles.
1410 flush_tempfiles (start_tempcount);
1412 /* Now merge the temporary files we created. */
1414 merge_files (tempfiles, ntemps, outfile);
1421 /* Assume (and optionally verify) that each input file is sorted;
1422 merge them and output the result.
1423 Returns nonzero if any input file fails to be sorted.
1425 This version of merging will not work if the number of
1426 input files gets too high. Higher level functions
1427 use it only with a bounded number of input files. */
1430 merge_direct (char **infiles, int nfiles, char *outfile)
1432 struct linebuffer *lb1, *lb2;
1433 struct linebuffer **thisline, **prevline;
1439 struct linebuffer *prev_out = 0;
1440 FILE *ostream = stdout;
1444 ostream = fopen (outfile, "w");
1447 pfatal_with_name (outfile);
1458 /* For each file, make two line buffers. Also, for each file, there
1459 is an element of `thisline' which points at any time to one of the
1460 file's two buffers, and an element of `prevline' which points to
1461 the other buffer. `thisline' is supposed to point to the next
1462 available line from the file, while `prevline' holds the last file
1463 line used, which is remembered so that we can verify that the file
1464 is properly sorted. */
1466 /* lb1 and lb2 contain one buffer each per file. */
1467 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1468 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1470 /* thisline[i] points to the linebuffer holding the next available
1471 line in file i, or is zero if there are no lines left in that file. */
1472 thisline = (struct linebuffer **)
1473 xmalloc (nfiles * sizeof (struct linebuffer *));
1474 /* prevline[i] points to the linebuffer holding the last used line
1475 from file i. This is just for verifying that file i is properly
1477 prevline = (struct linebuffer **)
1478 xmalloc (nfiles * sizeof (struct linebuffer *));
1479 /* streams[i] holds the input stream for file i. */
1480 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1481 /* file_lossage[i] is nonzero if we already know file i is not
1483 file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1485 /* Allocate and initialize all that storage. */
1487 for (i = 0; i < nfiles; i++)
1489 initbuffer (&lb1[i]);
1490 initbuffer (&lb2[i]);
1491 thisline[i] = &lb1[i];
1492 prevline[i] = &lb2[i];
1493 file_lossage[i] = 0;
1494 streams[i] = fopen (infiles[i], "r");
1496 pfatal_with_name (infiles[i]);
1498 readline (thisline[i], streams[i]);
1501 /* Keep count of number of files not at eof. */
1506 struct linebuffer *best = 0;
1507 struct linebuffer *exch;
1511 /* Look at the next avail line of each file; choose the least one. */
1513 for (i = 0; i < nfiles; i++)
1517 0 < compare_general (best->buffer, thisline[i]->buffer,
1518 (long) bestfile, (long) i, num_keyfields)))
1525 /* Output that line, unless it matches the previous one and we
1526 don't want duplicates. */
1529 !compare_general (prev_out->buffer,
1530 best->buffer, 0L, 1L, num_keyfields - 1)))
1531 indexify (best->buffer, ostream);
1534 /* Now make the line the previous of its file, and fetch a new
1535 line from that file. */
1537 exch = prevline[bestfile];
1538 prevline[bestfile] = thisline[bestfile];
1539 thisline[bestfile] = exch;
1543 /* If the file has no more, mark it empty. */
1545 if (feof (streams[bestfile]))
1547 thisline[bestfile] = 0;
1548 /* Update the number of files still not empty. */
1552 readline (thisline[bestfile], streams[bestfile]);
1553 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
1558 finish_index (ostream);
1560 /* Free all storage and close all input streams. */
1562 for (i = 0; i < nfiles; i++)
1564 fclose (streams[i]);
1565 free (lb1[i].buffer);
1566 free (lb2[i].buffer);
1568 free (file_lossage);
1581 /* Print error message and exit. */
1584 fatal (const char *format, const char *arg)
1586 error (format, arg);
1590 /* Print error message. FORMAT is printf control string, ARG is arg for it. */
1592 error (const char *format, const char *arg)
1594 printf ("%s: ", program_name);
1595 printf (format, arg);
1596 if (format[strlen (format) -1] != '\n')
1601 perror_with_name (const char *name)
1603 fprintf (stderr, "%s: ", program_name);
1608 pfatal_with_name (const char *name)
1610 perror_with_name (name);
1615 /* Return a newly-allocated string concatenating S1 and S2. */
1618 concat (char *s1, char *s2)
1620 int len1 = strlen (s1), len2 = strlen (s2);
1621 char *result = (char *) xmalloc (len1 + len2 + 1);
1623 strcpy (result, s1);
1624 strcpy (result + len1, s2);
1625 *(result + len1 + len2) = 0;