1 /* index.c -- indexing for Texinfo.
2 $Id: index.c,v 1.27 2008/05/19 18:26:48 karl Exp $
4 Copyright (C) 1998, 1999, 2002, 2003, 2004, 2007, 2008
5 Free Software Foundation, Inc.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
28 #include "sectioning.h"
32 /* Nonzero means that we are in the middle of printing an index. */
33 int printing_index = 0;
35 /* The number of defined indices. */
36 int defined_indices = 0;
38 /* This is the order of the index. */
39 int index_counter = 0;
41 /* Stuff for defining commands on the fly. */
42 COMMAND **user_command_array = NULL;
43 int user_command_array_len = 0;
45 INDEX_ALIST **name_index_alist = NULL;
47 /* An array of pointers. Each one is for a different index. The @synindex
48 command changes which array slot is pointed to by a given index. */
49 static INDEX_ELT **the_indices = NULL;
51 /* How to compare index entries for sorting. May be set to strcoll. */
52 static int (*index_compare_fn) (const char *a, const char *b) = mbscasecmp;
54 /* Find which element in the known list of indices has this name.
55 Returns -1 if NAME isn't found. */
57 find_index_offset (char *name)
60 for (i = 0; i < defined_indices; i++)
61 if (name_index_alist[i] && STREQ (name, name_index_alist[i]->name))
66 /* Return a pointer to the entry of (name . index) for this name.
67 Return NULL if the index doesn't exist. */
69 find_index (char *name)
71 int offset = find_index_offset (name);
73 return name_index_alist[offset];
78 /* User-defined commands, which happens only from user-defined indexes.
79 Used to initialize the builtin indices, too. */
81 define_user_command (char *name, COMMAND_FUNCTION (*proc), int needs_braces_p)
83 int slot = user_command_array_len;
84 user_command_array_len++;
86 if (!user_command_array)
87 user_command_array = xmalloc (1 * sizeof (COMMAND *));
89 user_command_array = xrealloc (user_command_array,
90 (1 + user_command_array_len) * sizeof (COMMAND *));
92 user_command_array[slot] = xmalloc (sizeof (COMMAND));
93 user_command_array[slot]->name = xstrdup (name);
94 user_command_array[slot]->proc = proc;
95 user_command_array[slot]->argument_in_braces = needs_braces_p;
98 /* Please release me, let me go... */
100 free_index (INDEX_ELT *index)
104 while ((temp = index))
107 free (temp->entry_text);
108 /* Do not free the node, because we already freed the tag table,
109 which freed all the node names. */
110 /* free (temp->node); */
116 /* Flush an index by name. This will delete the list of entries that
117 would be written by a @printindex command for this index. */
119 undefindex (char *name)
122 int which = find_index_offset (name);
124 /* The index might have already been freed if this was the target of
126 if (which < 0 || !name_index_alist[which])
129 i = name_index_alist[which]->read_index;
131 free_index (the_indices[i]);
132 the_indices[i] = NULL;
134 free (name_index_alist[which]->name);
135 free (name_index_alist[which]);
136 name_index_alist[which] = NULL;
139 /* Add the arguments to the current index command to the index NAME. */
141 index_add_arg (char *name)
147 tem = find_index (name);
149 which = tem ? tem->write_index : -1;
151 if (macro_expansion_output_stream && !executing_string)
152 append_to_expansion_output (input_text_offset + 1);
154 get_rest_of_line (0, &index_entry);
155 ignore_blank_line ();
157 if (macro_expansion_output_stream && !executing_string)
159 char *index_line = xmalloc (strlen (index_entry) + 2);
160 sprintf (index_line, "%s\n", index_entry);
161 me_execute_string_keep_state (index_line, NULL);
167 line_error (_("Unknown index `%s'"), name);
172 INDEX_ELT *new = xmalloc (sizeof (INDEX_ELT));
176 /* Get output line number updated before doing anything. */
180 new->next = the_indices[which];
182 new->entry_text = index_entry;
183 /* Since footnotes are handled at the very end of the document,
184 node name in the non-split HTML outputs always show the last
185 node. We artificially make it ``Footnotes''. */
186 if (html && !splitting && already_outputting_pending_notes)
187 new->node = xstrdup (_("Footnotes"));
189 new->node = current_node ? current_node : xstrdup ("");
190 if (!html && !xml && no_headers)
192 new->section = current_sectioning_number ();
193 if (strlen (new->section) == 0)
194 new->section_name = current_sectioning_name ();
196 new->section_name = "";
201 new->section_name = NULL;
203 new->code = tem->code;
204 new->defining_line = line_number - 1;
205 new->output_line = no_headers ? output_line_number : node_line_number;
206 /* We need to make a copy since input_filename may point to
207 something that goes away, for example, inside a macro.
208 (see the findexerr test). */
209 new->defining_file = xstrdup (input_filename);
211 if (html && splitting)
213 if (current_output_filename && *current_output_filename)
214 new->output_file = filename_part (current_output_filename);
216 new->output_file = xstrdup ("");
219 new->output_file = NULL;
221 new->entry_number = index_counter;
222 the_indices[which] = new;
225 /* The index breaks if there are colons in the entry.
226 -- This is true, but it's too painful to force changing index
227 entries to use `colon', and too confusing for users. The real
228 fix is to change Info support to support arbitrary characters
229 in node names, and we're not ready to do that. --karl,
231 if (strchr (new->entry_text, ':'))
232 warning (_("Info cannot handle `:' in index entry `%s'"),
239 int removed_empty_elt = 0;
241 /* We must put the anchor outside the <dl> and <ul> blocks. */
242 if (rollback_empty_tag ("dl"))
243 removed_empty_elt = 1;
244 else if (rollback_empty_tag ("ul"))
245 removed_empty_elt = 2;
247 add_word ("<a name=\"index-");
248 add_escaped_anchor_name (index_entry, 0);
249 add_word_args ("-%d\"></a>", index_counter);
251 if (removed_empty_elt == 1)
252 add_html_block_elt_args ("\n<dl>");
253 else if (removed_empty_elt == 2)
254 add_html_block_elt_args ("\n<ul>");
259 xml_insert_indexterm (index_entry, name);
262 /* The function which user defined index commands call. */
266 char *name = xstrdup (command);
267 if (strlen (name) >= strlen ("index"))
268 name[strlen (name) - strlen ("index")] = 0;
269 index_add_arg (name);
273 /* Define an index known as NAME. We assign the slot number.
274 If CODE is nonzero, make this a code index. */
276 defindex (char *name, int code)
280 /* If it already exists, flush it. */
283 /* Try to find an empty slot. */
285 for (i = 0; i < defined_indices; i++)
286 if (!name_index_alist[i])
293 { /* No such luck. Make space for another index. */
294 slot = defined_indices;
297 name_index_alist = (INDEX_ALIST **)
298 xrealloc (name_index_alist, (1 + defined_indices)
299 * sizeof (INDEX_ALIST *));
300 the_indices = (INDEX_ELT **)
301 xrealloc (the_indices, (1 + defined_indices) * sizeof (INDEX_ELT *));
304 /* We have a slot. Start assigning. */
305 name_index_alist[slot] = xmalloc (sizeof (INDEX_ALIST));
306 name_index_alist[slot]->name = xstrdup (name);
307 name_index_alist[slot]->read_index = slot;
308 name_index_alist[slot]->write_index = slot;
309 name_index_alist[slot]->code = code;
311 the_indices[slot] = NULL;
314 /* Define an index NAME, implicitly @code if CODE is nonzero. */
316 top_defindex (char *name, int code)
320 temp = xmalloc (1 + strlen (name) + strlen ("index"));
321 sprintf (temp, "%sindex", name);
322 define_user_command (temp, gen_index, 0);
323 defindex (name, code);
327 /* Set up predefined indices. */
333 /* Create the default data structures. */
335 /* Initialize data space. */
338 the_indices = xmalloc ((1 + defined_indices) * sizeof (INDEX_ELT *));
339 the_indices[defined_indices] = NULL;
341 name_index_alist = xmalloc ((1 + defined_indices)
342 * sizeof (INDEX_ALIST *));
343 name_index_alist[defined_indices] = NULL;
346 /* If there were existing indices, get rid of them now. */
347 for (i = 0; i < defined_indices; i++)
349 if (name_index_alist[i])
350 { /* Suppose we're called with two input files, and the first
351 does a @synindex pg cp. Then, when we get here to start
352 the second file, the "pg" element won't get freed by
353 undefindex (because it's pointing to "cp"). So free it
354 here; otherwise, when we try to define the pg index again
355 just below, it will still point to cp. */
356 undefindex (name_index_alist[i]->name);
358 /* undefindex sets all this to null in some cases. */
359 if (name_index_alist[i])
361 free (name_index_alist[i]->name);
362 free (name_index_alist[i]);
363 name_index_alist[i] = NULL;
368 /* Add the default indices. */
369 top_defindex ("cp", 0); /* cp is the only non-code index. */
370 top_defindex ("fn", 1);
371 top_defindex ("ky", 1);
372 top_defindex ("pg", 1);
373 top_defindex ("tp", 1);
374 top_defindex ("vr", 1);
377 /* Given an index name, return the offset in the_indices of this index,
378 or -1 if there is no such index. */
380 translate_index (char *name)
382 INDEX_ALIST *which = find_index (name);
385 return which->read_index;
390 /* Return the index list which belongs to NAME. */
392 index_list (char *name)
394 int which = translate_index (name);
396 return (INDEX_ELT *) -1;
398 return the_indices[which];
401 /* Define a new index command. Arg is name of index. */
403 gen_defindex (int code)
406 get_rest_of_line (0, &name);
408 if (find_index (name))
410 line_error (_("Index `%s' already exists"), name);
414 char *temp = xmalloc (strlen (name) + sizeof ("index"));
415 sprintf (temp, "%sindex", name);
416 define_user_command (temp, gen_index, 0);
417 defindex (name, code);
431 cm_defcodeindex (void)
436 /* Expects 2 args, on the same line. Both are index abbreviations.
437 Make the first one be a synonym for the second one, i.e. make the
438 first one have the same index as the second one. */
443 char *abbrev1, *abbrev2;
446 get_until_in_line (0, " ", &abbrev1);
447 target = find_index_offset (abbrev1);
449 get_until_in_line (0, " ", &abbrev2);
450 source = find_index_offset (abbrev2);
451 if (source < 0 || target < 0)
453 line_error (_("Unknown index `%s' and/or `%s' in @synindex"),
459 xml_synindex (abbrev1, abbrev2);
461 name_index_alist[target]->write_index
462 = name_index_alist[source]->write_index;
470 cm_pindex (void) /* Pinhead index. */
472 index_add_arg ("pg");
476 cm_vindex (void) /* Variable index. */
478 index_add_arg ("vr");
482 cm_kindex (void) /* Key index. */
484 index_add_arg ("ky");
488 cm_cindex (void) /* Concept index. */
490 index_add_arg ("cp");
494 cm_findex (void) /* Function index. */
496 index_add_arg ("fn");
500 cm_tindex (void) /* Data Type index. */
502 index_add_arg ("tp");
506 index_element_compare (const void *element1, const void *element2)
508 INDEX_ELT **elt1 = (INDEX_ELT **) element1;
509 INDEX_ELT **elt2 = (INDEX_ELT **) element2;
511 return index_compare_fn ((*elt1)->entry, (*elt2)->entry);
514 /* Force all index entries to be unique. */
516 make_index_entries_unique (INDEX_ELT **array, int count)
522 copy = xmalloc ((1 + count) * sizeof (INDEX_ELT *));
524 for (i = 0, j = 0; i < count; i++)
527 || array[i]->node != array[i + 1]->node
528 || !STREQ (array[i]->entry, array[i + 1]->entry))
529 copy[j++] = array[i];
532 free (array[i]->entry);
533 free (array[i]->entry_text);
539 /* Now COPY contains only unique entries. Duplicated entries in the
540 original array have been freed. Replace the current array with
541 the copy, fixing the NEXT pointers. */
542 for (i = 0; copy[i]; i++)
544 copy[i]->next = copy[i + 1];
546 /* Fix entry names which are the same. They point to different nodes,
547 so we make the entry name unique. */
549 && STREQ (copy[i]->entry, copy[i + 1]->entry)
552 char *new_entry_name;
554 new_entry_name = xmalloc (10 + strlen (copy[i]->entry));
555 sprintf (new_entry_name, "%s <%d>", copy[i]->entry, counter);
556 free (copy[i]->entry);
557 copy[i]->entry = new_entry_name;
567 /* Free the storage used only by COPY. */
572 /* Sort the index passed in INDEX, returning an array of pointers to
573 elements. The array is terminated with a NULL pointer. */
576 sort_index (INDEX_ELT *index)
581 int save_line_number = line_number;
582 char *save_input_filename = input_filename;
583 int save_html = html;
585 /* Pretend we are in non-HTML mode, for the purpose of getting the
586 expanded index entry that lacks any markup and other HTML escape
587 characters which could produce a wrong sort order. */
588 /* fixme: html: this still causes some markup, such as non-ASCII
589 characters @AE{} etc., to sort incorrectly. */
592 for (temp = index, count = 0; temp; temp = temp->next, count++)
594 /* We have the length, now we can allocate an array. */
595 array = xmalloc ((count + 1) * sizeof (INDEX_ELT *));
597 for (temp = index, count = 0; temp; temp = temp->next, count++)
599 /* Allocate new memory for the return array, since parts of the
600 original INDEX get freed. Otherwise, if the document calls
601 @printindex twice on the same index, with duplicate entries,
602 we'll have garbage the second time. There are cleaner ways to
603 deal, but this will suffice for now. */
604 array[count] = xmalloc (sizeof (INDEX_ELT));
605 *(array[count]) = *(temp); /* struct assignment, hope it's ok */
607 /* Adjust next pointers to use the new memory. */
609 array[count-1]->next = array[count];
611 /* Set line number and input filename to the source line for this
612 index entry, as this expansion finds any errors. */
613 line_number = array[count]->defining_line;
614 input_filename = array[count]->defining_file;
616 /* If this particular entry should be printed as a "code" index,
617 then expand it as @code{entry}, i.e., as in fixed-width font. */
618 array[count]->entry = expansion (temp->entry_text, array[count]->code);
620 array[count] = NULL; /* terminate the array. */
622 line_number = save_line_number;
623 input_filename = save_input_filename;
627 /* This is not perfect. We should set (then restore) the locale to the
628 documentlanguage, so strcoll operates according to the document's
629 locale, not the user's. For now, I'm just going to assume that
630 those few new documents which use @documentlanguage will be
631 processed in the appropriate locale. In any case, don't use
632 strcoll in the C (aka POSIX) locale, that is the ASCII ordering. */
633 if (language_code != en)
635 char *lang_env = getenv ("LANG");
636 if (lang_env && !STREQ (lang_env, "C") && !STREQ (lang_env, "POSIX"))
637 index_compare_fn = strcoll;
639 #endif /* HAVE_STRCOLL */
641 /* Sort the array. */
642 qsort (array, count, sizeof (INDEX_ELT *), index_element_compare);
644 /* Remove duplicate entries. */
645 make_index_entries_unique (array, count);
647 /* Replace the original index with the sorted one, in case the
648 document wants to print it again. If the index wasn't empty. */
655 /* Return the number of times that the byte CH occurs in the LEN bytes
656 starting at STR. Multibyte strings are not taken into account, which
657 is incorrect, but we need this for @tie; see more comments below. */
660 count_strn_chars (const char *str, int len, int ch)
665 for (i = 0; i < len; i++)
673 insert_index_output_line_no (int line_number, int output_line_number_len)
675 int last_column, out_line_no_width;
676 int str_size = output_line_number_len + strlen (_("(line )"))
678 char *out_line_no_str = (char *) xmalloc (str_size + 1);
680 /* Do not translate ``(line NNN)'' below for !no_headers case (Info output),
681 because it's something like the ``* Menu'' strings. For plaintext output
682 it should be translated though. */
683 sprintf (out_line_no_str,
684 no_headers ? _("(line %*d)") : "(line %*d)",
685 output_line_number_len, line_number);
688 int i = output_paragraph_offset;
689 while (0 < i && output_paragraph[i-1] != '\n')
691 last_column = mbsnwidth ((char *)(output_paragraph + i),
692 output_paragraph_offset - i, 0);
693 last_column += count_strn_chars (output_paragraph + i,
694 output_paragraph_offset - i, NON_BREAKING_SPACE);
697 out_line_no_width = mbswidth (out_line_no_str, 0);
698 if (last_column + out_line_no_width > fill_column)
704 while (last_column + out_line_no_width < fill_column)
710 insert_string (out_line_no_str);
713 free (out_line_no_str);
716 /* Takes one arg, a short name of an index to print.
717 Outputs a menu of the sorted elements of the index. */
722 get_rest_of_line (0, &index_name);
724 /* get_rest_of_line increments the line number by one,
725 so to make warnings/errors point to the correct line,
726 we decrement the line_number again. */
727 if (!handling_delayed_writes)
732 xml_insert_element (PRINTINDEX, START);
734 insert_string (index_name);
735 xml_insert_element (PRINTINDEX, END);
737 else if (!handling_delayed_writes)
739 int command_len = sizeof ("@ ") + strlen (command) + strlen (index_name);
740 char *index_command = xmalloc (command_len + 1);
743 sprintf (index_command, "@%s %s", command, index_name);
744 register_delayed_write (index_command);
745 free (index_command);
751 INDEX_ELT *last_index = 0;
753 unsigned line_length;
755 int saved_inhibit_paragraph_indentation = inhibit_paragraph_indentation;
756 int saved_filling_enabled = filling_enabled;
757 int saved_line_number = line_number;
758 char *saved_input_filename = input_filename;
759 unsigned output_line_number_len;
760 FILE *saved_output_stream = output_stream;
762 index = index_list (index_name);
763 if (index == (INDEX_ELT *)-1)
765 line_error (_("Unknown index `%s' in @printindex"), index_name);
770 /* Do this before sorting, so execute_string is in the good environment */
774 /* Do this before sorting, so execute_string in index_element_compare
775 will give the same results as when we actually print. */
778 inhibit_paragraph_indentation = 1;
780 array = sort_index (index);
784 add_html_block_elt_args ("<ul class=\"index-%s\" compact>",
786 else if (!no_headers && !docbook)
787 { /* Info. Add magic cookie for info readers (to treat this
788 menu differently), and the usual start-of-menu. */
790 add_word ("\010[index");
792 add_word ("\010]\n");
793 add_word ("* Menu:\n\n");
796 me_inhibit_expansion++;
798 /* This will probably be enough. */
800 line = xmalloc (line_length);
803 char *max_output_line_number = (char *) xmalloc (25 * sizeof (char));
806 sprintf (max_output_line_number, "%d", output_line_number);
809 INDEX_ELT *tmp_entry = index;
811 for (tmp_entry = index; tmp_entry; tmp_entry = tmp_entry->next)
812 tmp = tmp_entry->output_line > tmp ? tmp_entry->output_line : tmp;
813 sprintf (max_output_line_number, "%d", tmp);
816 output_line_number_len = strlen (max_output_line_number);
817 free (max_output_line_number);
820 for (item = 0; (index = array[item]); item++)
822 /* A pathological document might have an index entry outside of any
823 node. Don't crash; try using the section name instead. */
824 char *index_node = index->node;
826 line_number = index->defining_line;
827 input_filename = index->defining_file;
829 if ((!index_node || !*index_node) && html)
830 index_node = toc_find_section_of_node (index_node);
832 if (!index_node || !*index_node)
834 line_error (_("Entry for index `%s' outside of any node"),
836 if (html || !no_headers)
837 index_node = (char *) _("(outside of any node)");
842 /* For HTML, we need to expand and HTML-escape the
843 original entry text, at the same time. Consider
844 @cindex J@"urgen. We want Jüurgen. We can't
845 expand and then escape since we'll end up with
846 J&uuml;rgen. We can't escape and then expand
847 because then `expansion' will see J@"urgen, and
848 @"urgen is not a command. */
850 maybe_escaped_expansion (index->entry_text, index->code, 1);
852 add_html_block_elt_args ("\n<li><a href=\"%s#index-",
853 (splitting && index->output_file) ? index->output_file : "");
854 add_escaped_anchor_name (index->entry_text, 0);
855 add_word_args ("-%d\">%s</a>: ", index->entry_number,
859 add_word ("<a href=\"");
860 if (index->node && *index->node)
862 /* Ensure any non-macros in the node name are expanded. */
863 char *expanded_index;
865 in_fixed_width_font++;
866 expanded_index = expansion (index_node, 0);
867 in_fixed_width_font--;
868 add_anchor_name (expanded_index, 1);
869 expanded_index = escape_string (expanded_index);
870 add_word_args ("\">%s</a>", expanded_index);
871 free (expanded_index);
873 else if (STREQ (index_node, _("(outside of any node)")))
875 add_anchor_name (index_node, 1);
876 add_word_args ("\">%s</a>", index_node);
879 /* If we use the section instead of the (missing) node, then
880 index_node already includes all we need except the #. */
881 add_word_args ("#%s</a>", index_node);
883 add_html_block_elt ("</li>");
885 if (internal_links_stream)
887 char *escaped = escaped_anchor_name (index->entry_text);
888 fprintf (internal_links_stream, "%s#index-%s-%d\t%s\t%s\n",
889 (splitting && index->output_file) ? index->output_file : "",
890 escaped, index->entry_number, index_name,
895 else if (xml && docbook)
897 /* Let DocBook processor generate the index. */
901 #define MIN_ENTRY_COLUMNS 37
902 /* Make sure there is enough space even if index->entry has zero
904 unsigned new_length = strlen (index->entry) + MIN_ENTRY_COLUMNS;
906 if (new_length < 50) /* minimum length used below */
908 new_length += strlen (index_node) + 7; /* * : .\n\0 */
910 if (new_length > line_length)
912 line_length = new_length;
913 line = xrealloc (line, line_length);
915 /* Print the entry, nicely formatted. We've already
916 expanded any commands in index->entry, including any
917 implicit @code. Thus, can't call execute_string, since
918 @@ has turned into @. */
922 int width = mbswidth (index->entry, 0);
924 /* Unfortunately, our @tie{} / @w{ } magic is an
925 unprintable character, and so mbswidth doesn't
926 count it. If that byte value occurs in a multibyte
927 string, we'd lose, but at least it's only a
928 question of minor formatting, not functionality. */
929 width += count_strn_chars (index->entry,
930 strlen (index->entry), NON_BREAKING_SPACE);
932 nspaces = -(strlen (index->entry)
933 + (MIN_ENTRY_COLUMNS - width));
934 sprintf (line, "* %*s ",
935 width < MIN_ENTRY_COLUMNS ? nspaces : 0,
937 line[2 + strlen (index->entry)] = ':';
938 insert_string (line);
939 /* Expand any non-macros in the node name. */
940 in_fixed_width_font++;
941 execute_string ("%s. ", index_node);
942 insert_index_output_line_no (index->output_line,
943 output_line_number_len);
944 in_fixed_width_font--;
948 /* With --no-headers, the @node lines are gone, so
949 there's little sense in referring to them in the
950 index. Instead, output the number or name of the
951 section that corresponds to that node. */
952 sprintf (line, "%-*s ", number_sections ? 46 : 1,
954 line[strlen (index->entry)] = ':';
955 insert_string (line);
957 if (strlen (index->section) > 0)
958 { /* We got your number. */
959 insert_string ((char *) _("See "));
960 insert_string (index->section);
963 { /* Sigh, index in an @unnumbered. :-\ */
964 insert_string ("\n ");
965 insert_string ((char *) _("See "));
966 insert_string ("``");
967 insert_string (expansion (index->section_name, 0));
968 insert_string ("''");
971 insert_string (". ");
972 insert_index_output_line_no (index->output_line,
973 output_line_number_len);
977 /* Prevent `output_paragraph' from growing to the size of the
985 me_inhibit_expansion--;
988 close_single_paragraph ();
989 filling_enabled = saved_filling_enabled;
990 inhibit_paragraph_indentation = saved_inhibit_paragraph_indentation;
991 input_filename = saved_input_filename;
992 line_number = saved_line_number;
995 add_html_block_elt ("</ul>");
996 else if (xml && docbook)
1001 /* Re-increment the line number, because get_rest_of_line
1002 left us looking at the next line after the command. */