1 /* index.c -- indexing for Texinfo.
2 $Id: index.c,v 1.25 2002/03/19 14:43:04 karl Exp $
4 Copyright (C) 1998, 99, 2002 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
27 /* An index element... */
28 typedef struct index_elt
30 struct index_elt *next;
31 char *entry; /* The index entry itself, after expansion. */
32 char *entry_text; /* The original, non-expanded entry text. */
33 char *node; /* The node from whence it came. */
34 int code; /* Nonzero means add `@code{...}' when
35 printing this element. */
36 int defining_line; /* Line number where this entry was written. */
37 char *defining_file; /* Source file for defining_line. */
41 /* A list of short-names for each index.
42 There are two indices into the the_indices array.
43 * read_index is the index that points to the list of index
44 entries that we will find if we ask for the list of entries for
46 * write_index is the index that points to the list of index entries
47 that we will add new entries to.
49 Initially, read_index and write_index are the same, but the
50 @syncodeindex and @synindex commands can change the list we add
53 For example, after the commands
59 the cp index will contain the entry `foo', and the new ii
60 index will contain the entry `bar'. This is consistent with the
61 way texinfo.tex handles the same situation.
63 In addition, for each index, it is remembered whether that index is
64 a code index or not. Code indices have @code{} inserted around the
65 first word when they are printed with printindex. */
69 int read_index; /* index entries for `name' */
70 int write_index; /* store index entries here, @synindex can change it */
74 INDEX_ALIST **name_index_alist = NULL;
76 /* An array of pointers. Each one is for a different index. The
77 "synindex" command changes which array slot is pointed to by a
79 INDEX_ELT **the_indices = NULL;
81 /* The number of defined indices. */
82 int defined_indices = 0;
84 /* Stuff for defining commands on the fly. */
85 COMMAND **user_command_array = NULL;
86 int user_command_array_len = 0;
88 /* How to compare index entries for sorting. May be set to strcoll. */
89 int (*index_compare_fn) () = strcasecmp;
91 /* Find which element in the known list of indices has this name.
92 Returns -1 if NAME isn't found. */
94 find_index_offset (name)
98 for (i = 0; i < defined_indices; i++)
99 if (name_index_alist[i] && STREQ (name, name_index_alist[i]->name))
104 /* Return a pointer to the entry of (name . index) for this name.
105 Return NULL if the index doesn't exist. */
110 int offset = find_index_offset (name);
112 return name_index_alist[offset];
117 /* User-defined commands, which happens only from user-defined indexes.
118 Used to initialize the builtin indices, too. */
120 define_user_command (name, proc, needs_braces_p)
122 COMMAND_FUNCTION *proc;
125 int slot = user_command_array_len;
126 user_command_array_len++;
128 if (!user_command_array)
129 user_command_array = xmalloc (1 * sizeof (COMMAND *));
131 user_command_array = xrealloc (user_command_array,
132 (1 + user_command_array_len) * sizeof (COMMAND *));
134 user_command_array[slot] = xmalloc (sizeof (COMMAND));
135 user_command_array[slot]->name = xstrdup (name);
136 user_command_array[slot]->proc = proc;
137 user_command_array[slot]->argument_in_braces = needs_braces_p;
140 /* Please release me, let me go... */
147 while ((temp = index))
150 free (temp->entry_text);
151 /* Do not free the node, because we already freed the tag table,
152 which freed all the node names. */
153 /* free (temp->node); */
159 /* Flush an index by name. This will delete the list of entries that
160 would be written by a @printindex command for this index. */
166 int which = find_index_offset (name);
168 /* The index might have already been freed if this was the target of
170 if (which < 0 || !name_index_alist[which])
173 i = name_index_alist[which]->read_index;
175 free_index (the_indices[i]);
176 the_indices[i] = NULL;
178 free (name_index_alist[which]->name);
179 free (name_index_alist[which]);
180 name_index_alist[which] = NULL;
183 /* Add the arguments to the current index command to the index NAME.
184 html fixxme generate specific html anchor */
193 tem = find_index (name);
195 which = tem ? tem->write_index : -1;
197 if (macro_expansion_output_stream && !executing_string)
198 append_to_expansion_output (input_text_offset + 1);
200 get_rest_of_line (0, &index_entry);
201 ignore_blank_line ();
203 if (macro_expansion_output_stream && !executing_string)
205 char *index_line = xmalloc (strlen (index_entry) + 2);
206 sprintf (index_line, "%s\n", index_entry);
207 me_execute_string_keep_state (index_line, NULL);
213 line_error (_("Unknown index `%s'"), name);
218 INDEX_ELT *new = xmalloc (sizeof (INDEX_ELT));
219 new->next = the_indices[which];
220 new->entry_text = index_entry;
222 new->node = current_node ? current_node : xstrdup ("");
223 new->code = tem->code;
224 new->defining_line = line_number - 1;
225 /* We need to make a copy since input_filename may point to
226 something that goes away, for example, inside a macro.
227 (see the findexerr test). */
228 new->defining_file = xstrdup (input_filename);
229 the_indices[which] = new;
231 /* The index breaks if there are colons in the entry.
232 -- This is true, but it's too painful to force changing index
233 entries to use `colon', and too confusing for users. The real
234 fix is to change Info support to support arbitrary characters
235 in node names, and we're not ready to do that. --karl,
237 if (strchr (new->entry_text, ':'))
238 warning (_("Info cannot handle `:' in index entry `%s'"),
243 xml_insert_indexterm (index_entry, name);
246 /* The function which user defined index commands call. */
250 char *name = xstrdup (command);
251 if (strlen (name) >= strlen ("index"))
252 name[strlen (name) - strlen ("index")] = 0;
253 index_add_arg (name);
257 /* Define an index known as NAME. We assign the slot number.
258 If CODE is nonzero, make this a code index. */
260 defindex (name, code)
266 /* If it already exists, flush it. */
269 /* Try to find an empty slot. */
271 for (i = 0; i < defined_indices; i++)
272 if (!name_index_alist[i])
279 { /* No such luck. Make space for another index. */
280 slot = defined_indices;
283 name_index_alist = (INDEX_ALIST **)
284 xrealloc (name_index_alist, (1 + defined_indices)
285 * sizeof (INDEX_ALIST *));
286 the_indices = (INDEX_ELT **)
287 xrealloc (the_indices, (1 + defined_indices) * sizeof (INDEX_ELT *));
290 /* We have a slot. Start assigning. */
291 name_index_alist[slot] = xmalloc (sizeof (INDEX_ALIST));
292 name_index_alist[slot]->name = xstrdup (name);
293 name_index_alist[slot]->read_index = slot;
294 name_index_alist[slot]->write_index = slot;
295 name_index_alist[slot]->code = code;
297 the_indices[slot] = NULL;
300 /* Define an index NAME, implicitly @code if CODE is nonzero. */
302 top_defindex (name, code)
308 temp = xmalloc (1 + strlen (name) + strlen ("index"));
309 sprintf (temp, "%sindex", name);
310 define_user_command (temp, gen_index, 0);
311 defindex (name, code);
315 /* Set up predefined indices. */
321 /* Create the default data structures. */
323 /* Initialize data space. */
326 the_indices = xmalloc ((1 + defined_indices) * sizeof (INDEX_ELT *));
327 the_indices[defined_indices] = NULL;
329 name_index_alist = xmalloc ((1 + defined_indices)
330 * sizeof (INDEX_ALIST *));
331 name_index_alist[defined_indices] = NULL;
334 /* If there were existing indices, get rid of them now. */
335 for (i = 0; i < defined_indices; i++)
337 undefindex (name_index_alist[i]->name);
338 if (name_index_alist[i])
339 { /* Suppose we're called with two input files, and the first
340 does a @synindex pg cp. Then, when we get here to start
341 the second file, the "pg" element won't get freed by
342 undefindex (because it's pointing to "cp"). So free it
343 here; otherwise, when we try to define the pg index again
344 just below, it will still point to cp. */
345 free (name_index_alist[i]->name);
346 free (name_index_alist[i]);
347 name_index_alist[i] = NULL;
351 /* Add the default indices. */
352 top_defindex ("cp", 0); /* cp is the only non-code index. */
353 top_defindex ("fn", 1);
354 top_defindex ("ky", 1);
355 top_defindex ("pg", 1);
356 top_defindex ("tp", 1);
357 top_defindex ("vr", 1);
360 /* Given an index name, return the offset in the_indices of this index,
361 or -1 if there is no such index. */
363 translate_index (name)
366 INDEX_ALIST *which = find_index (name);
369 return which->read_index;
374 /* Return the index list which belongs to NAME. */
379 int which = translate_index (name);
381 return (INDEX_ELT *) -1;
383 return the_indices[which];
386 /* Define a new index command. Arg is name of index. */
392 get_rest_of_line (0, &name);
394 if (find_index (name))
396 line_error (_("Index `%s' already exists"), name);
400 char *temp = xmalloc (strlen (name) + sizeof ("index"));
401 sprintf (temp, "%sindex", name);
402 define_user_command (temp, gen_index, 0);
403 defindex (name, code);
422 /* Expects 2 args, on the same line. Both are index abbreviations.
423 Make the first one be a synonym for the second one, i.e. make the
424 first one have the same index as the second one. */
429 char *abbrev1, *abbrev2;
432 get_until_in_line (0, " ", &abbrev1);
433 target = find_index_offset (abbrev1);
435 get_until_in_line (0, " ", &abbrev2);
436 source = find_index_offset (abbrev2);
437 if (source < 0 || target < 0)
439 line_error (_("Unknown index `%s' and/or `%s' in @synindex"),
444 name_index_alist[target]->write_index
445 = name_index_alist[source]->write_index;
453 cm_pindex () /* Pinhead index. */
455 index_add_arg ("pg");
459 cm_vindex () /* Variable index. */
461 index_add_arg ("vr");
465 cm_kindex () /* Key index. */
467 index_add_arg ("ky");
471 cm_cindex () /* Concept index. */
473 index_add_arg ("cp");
477 cm_findex () /* Function index. */
479 index_add_arg ("fn");
483 cm_tindex () /* Data Type index. */
485 index_add_arg ("tp");
489 index_element_compare (element1, element2)
490 INDEX_ELT **element1, **element2;
492 return index_compare_fn ((*element1)->entry, (*element2)->entry);
495 /* Force all index entries to be unique. */
497 make_index_entries_unique (array, count)
505 copy = xmalloc ((1 + count) * sizeof (INDEX_ELT *));
507 for (i = 0, j = 0; i < count; i++)
510 || array[i]->node != array[i + 1]->node
511 || !STREQ (array[i]->entry, array[i + 1]->entry))
512 copy[j++] = array[i];
515 free (array[i]->entry);
516 free (array[i]->entry_text);
522 /* Now COPY contains only unique entries. Duplicated entries in the
523 original array have been freed. Replace the current array with
524 the copy, fixing the NEXT pointers. */
525 for (i = 0; copy[i]; i++)
527 copy[i]->next = copy[i + 1];
529 /* Fix entry names which are the same. They point to different nodes,
530 so we make the entry name unique. */
532 && STREQ (copy[i]->entry, copy[i + 1]->entry)
535 char *new_entry_name;
537 new_entry_name = xmalloc (10 + strlen (copy[i]->entry));
538 sprintf (new_entry_name, "%s <%d>", copy[i]->entry, counter);
539 free (copy[i]->entry);
540 copy[i]->entry = new_entry_name;
550 /* Free the storage used only by COPY. */
554 /* Sort the index passed in INDEX, returning an array of
555 pointers to elements. The array is terminated with a NULL
556 pointer. We call qsort because it's supposed to be fast.
557 I think this looks bad. */
563 INDEX_ELT *temp = index;
565 int save_line_number = line_number;
566 char *save_input_filename = input_filename;
567 int save_html = html;
569 /* Pretend we are in non-HTML mode, for the purpose of getting the
570 expanded index entry that lacks any markup and other HTML escape
571 characters which could produce a wrong sort order. */
572 /* fixme: html: this still causes some markup, such as non-ASCII
573 characters @AE{} etc., to sort incorrectly. */
582 /* We have the length. Make an array. */
584 array = xmalloc ((count + 1) * sizeof (INDEX_ELT *));
590 array[count++] = temp;
592 /* Set line number and input filename to the source line for this
593 index entry, as this expansion finds any errors. */
594 line_number = array[count - 1]->defining_line;
595 input_filename = array[count - 1]->defining_file;
597 /* If this particular entry should be printed as a "code" index,
598 then expand it as @code{entry}, i.e. as in fixed-width font. */
599 array[count-1]->entry = expansion (temp->entry_text,
600 array[count-1]->code);
604 array[count] = NULL; /* terminate the array. */
605 line_number = save_line_number;
606 input_filename = save_input_filename;
610 /* This is not perfect. We should set (then restore) the locale to the
611 documentlanguage, so strcoll operates according to the document's
612 locale, not the user's. For now, I'm just going to assume that
613 those few new documents which use @documentlanguage will be
614 processed in the appropriate locale. In any case, don't use
615 strcoll in the C (aka POSIX) locale, that is the ASCII ordering. */
616 if (language_code != en)
618 char *lang_env = getenv ("LANG");
619 if (lang_env && !STREQ (lang_env, "C") && !STREQ (lang_env, "POSIX"))
620 index_compare_fn = strcoll;
622 #endif /* HAVE_STRCOLL */
624 /* Sort the array. */
625 qsort (array, count, sizeof (INDEX_ELT *), index_element_compare);
626 make_index_entries_unique (array, count);
630 /* Nonzero means that we are in the middle of printing an index. */
631 int printing_index = 0;
633 /* Takes one arg, a short name of an index to print.
634 Outputs a menu of the sorted elements of the index. */
641 get_rest_of_line (0, &index_name);
642 xml_insert_element (PRINTINDEX, START);
643 insert_string (index_name);
644 xml_insert_element (PRINTINDEX, END);
650 INDEX_ELT *last_index = 0;
653 unsigned line_length;
655 int saved_inhibit_paragraph_indentation = inhibit_paragraph_indentation;
656 int saved_filling_enabled = filling_enabled;
657 int saved_line_number = line_number;
658 char *saved_input_filename = input_filename;
661 get_rest_of_line (0, &index_name);
663 index = index_list (index_name);
664 if (index == (INDEX_ELT *)-1)
666 line_error (_("Unknown index `%s' in @printindex"), index_name);
671 /* Do this before sorting, so execute_string is in the good environment */
675 /* Do this before sorting, so execute_string in index_element_compare
676 will give the same results as when we actually print. */
679 inhibit_paragraph_indentation = 1;
681 array = sort_index (index);
685 add_word ("<ul compact>");
686 else if (!no_headers && !docbook)
687 add_word ("* Menu:\n\n");
689 me_inhibit_expansion++;
691 /* This will probably be enough. */
693 line = xmalloc (line_length);
695 for (item = 0; (index = array[item]); item++)
697 /* A pathological document might have an index entry outside of any
698 node. Don't crash; try using the section name instead. */
699 char *index_node = index->node;
701 line_number = index->defining_line;
702 input_filename = index->defining_file;
704 if ((!index_node || !*index_node) && html)
705 index_node = toc_find_section_of_node (index_node);
707 if (!index_node || !*index_node)
709 line_error (_("Entry for index `%s' outside of any node"),
711 if (html || !no_headers)
712 index_node = _("(outside of any node)");
716 /* fixme: html: we should use specific index anchors pointing
717 to the actual location of the indexed position (but then we
718 have to find something to wrap the anchor around). */
721 && STREQ (last_index->entry_text, index->entry_text))
722 add_word (", "); /* Don't repeat the previous entry. */
725 /* In the HTML case, the expanded index entry is not
726 good for us, since it was expanded for non-HTML mode
727 inside sort_index. So we need to HTML-escape and
728 expand the original entry text here. */
729 char *escaped_entry = xstrdup (index->entry_text);
730 char *expanded_entry;
732 /* expansion() doesn't HTML-escape the argument, so need
733 to do it separately. */
734 escaped_entry = escape_string (escaped_entry);
735 expanded_entry = expansion (escaped_entry, index->code);
736 add_word_args ("\n<li>%s: ", expanded_entry);
737 free (escaped_entry);
738 free (expanded_entry);
740 add_word ("<a href=\"");
741 if (index->node && *index->node)
743 /* Make sure any non-macros in the node name are expanded. */
744 in_fixed_width_font++;
745 index_node = expansion (index_node, 0);
746 in_fixed_width_font--;
747 add_anchor_name (index_node, 1);
748 add_word_args ("\">%s</a>", index_node);
751 else if (STREQ (index_node, _("(outside of any node)")))
753 add_anchor_name (index_node, 1);
754 add_word_args ("\">%s</a>", index_node);
757 /* If we use the section instead of the (missing) node, then
758 index_node already includes all we need except the #. */
759 add_word_args ("#%s</a>", index_node);
761 else if (xml && docbook)
763 xml_insert_indexentry (index->entry, index_node);
767 unsigned new_length = strlen (index->entry);
769 if (new_length < 50) /* minimum length used below */
771 new_length += strlen (index_node) + 7; /* * : .\n\0 */
773 if (new_length > line_length)
775 line_length = new_length;
776 line = xrealloc (line, line_length);
778 /* Print the entry, nicely formatted. We've already
779 expanded any commands in index->entry, including any
780 implicit @code. Thus, can't call execute_string, since
781 @@ has turned into @. */
784 sprintf (line, "* %-37s ", index->entry);
785 line[2 + strlen (index->entry)] = ':';
786 insert_string (line);
787 /* Make sure any non-macros in the node name are expanded. */
788 in_fixed_width_font++;
789 execute_string ("%s.\n", index_node);
790 in_fixed_width_font--;
794 /* With --no-headers, the @node lines are gone, so
795 there's little sense in referring to them in the
796 index. Instead, output the number or name of the
797 section that corresponds to that node. */
798 char *section_name = toc_find_section_of_node (index_node);
800 sprintf (line, "%-*s ", number_sections ? 50 : 1, index->entry);
801 line[strlen (index->entry)] = ':';
802 insert_string (line);
806 unsigned ref_len = strlen (section_name) + 30;
808 if (ref_len > line_length)
810 line_length = ref_len;
811 line = xrealloc (line, line_length);
816 while (section_name[idx]
817 && (isdigit (section_name[idx])
818 || (idx && section_name[idx] == '.')))
822 sprintf (line, " See %.*s.\n", idx, section_name);
824 sprintf (line, "\n See ``%s''.\n", section_name);
825 insert_string (line);
829 insert_string (" "); /* force a blank */
830 execute_string ("See node %s.\n", index_node);
835 /* Prevent `output_paragraph' from growing to the size of the
844 me_inhibit_expansion--;
848 close_single_paragraph ();
849 filling_enabled = saved_filling_enabled;
850 inhibit_paragraph_indentation = saved_inhibit_paragraph_indentation;
851 input_filename = saved_input_filename;
852 line_number = saved_line_number;
856 else if (xml && docbook)