1 /* html.c -- html-related utilities.
2 $Id: html.c,v 1.42 2008/05/19 18:26:47 karl Exp $
4 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 Free Software Foundation, Inc.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include "sectioning.h"
31 /* Filename to which to write list of index entries */
32 char *internal_links_filename = NULL;
33 FILE *internal_links_stream = NULL;
35 /* Append CHAR to BUFFER, (re)allocating as necessary. We don't handle
40 unsigned size; /* allocated */
41 unsigned length; /* used */
48 buffer_type *buf = xmalloc (sizeof (buffer_type));
57 append_char (buffer_type *buf, int c)
60 if (buf->length >= buf->size)
63 buf->buffer = xrealloc (buf->buffer, buf->size);
65 buf->buffer[buf->length - 1] = c;
66 buf->buffer[buf->length] = 0;
69 /* Read the cascading style-sheet file FILENAME. Write out any @import
70 commands, which must come first, by the definition of css. If the
71 file contains any actual css code following the @imports, return it;
74 process_css_file (char *filename)
79 buffer_type *import_text = init_buffer ();
80 buffer_type *inline_text = init_buffer ();
82 enum { null_state, comment_state, import_state, inline_state } state
83 = null_state, prev_state;
85 prev_state = null_state;
87 /* read from stdin if `-' is the filename. */
88 f = STREQ (filename, "-") ? stdin : fopen (filename, "r");
91 error (_("%s: could not open --css-file: %s"), progname, filename);
95 /* Read the file. The @import statements must come at the beginning,
96 with only whitespace and comments allowed before any inline css code. */
97 while ((c = getc (f)) >= 0)
104 case null_state: /* between things */
106 { /* Only @import and @charset should switch into
107 import_state, other @-commands, such as @media, should
108 put us into inline_state. I don't think any other css
109 @-commands start with `i' or `c', although of course
110 this will break when such a command is defined. */
111 int nextchar = getc (f);
112 if (nextchar == 'i' || nextchar == 'c')
114 append_char (import_text, c);
115 append_char (import_text, nextchar);
116 state = import_state;
120 ungetc (nextchar, f); /* wasn't an @import */
121 state = inline_state;
125 { /* possible start of a comment */
126 int nextchar = getc (f);
128 state = comment_state;
131 ungetc (nextchar, f); /* wasn't a comment */
132 state = inline_state;
135 else if (isspace (c))
136 ; /* skip whitespace; maybe should use c_isspace? */
139 /* not an @import, not a comment, not whitespace: we must
140 have started the inline text. */
141 state = inline_state;
143 if (state == inline_state)
144 append_char (inline_text, c);
146 if (state != null_state)
147 prev_state = null_state;
151 if (c == '/' && lastchar == '*')
152 state = prev_state; /* end of comment */
153 break; /* else ignore this comment char */
156 append_char (import_text, c); /* include this import char */
158 { /* done with @import */
159 append_char (import_text, '\n'); /* make the output nice */
161 prev_state = import_state;
166 /* No harm in writing out comments, so don't bother parsing
167 them out, just append everything. */
168 append_char (inline_text, c);
175 fclose (f); /* Even closing stdin should be ok, can't read it more
178 /* Reached the end of the file. We should not be still in a comment. */
179 if (state == comment_state)
180 warning (_("%s:%d: --css-file ended in comment"), filename, lineno);
182 /* Write the @import text, if any. */
183 if (import_text->buffer)
185 add_word (import_text->buffer);
186 free (import_text->buffer);
190 /* We're wasting the buffer struct memory, but so what. */
191 return inline_text->buffer;
194 HSTACK *htmlstack = NULL;
197 int html_title_written = 0;
200 html_output_head (void)
202 static const char *html_title = NULL;
203 char *encoding = current_document_encoding ();
205 /* The <title> should not have markup, so use text_expansion. */
207 html_title = escape_string (title ?
208 text_expansion (title) : (char *) gdt("Untitled"));
210 /* Make sure this is the very first string of the output document. */
211 output_paragraph_offset = 0;
213 add_html_block_elt_args ("<html lang=\"%s\">\n<head>\n",
214 language_table[language_code].abbrev);
216 /* When splitting, add current node's name to title if it's available
218 if (splitting && current_node && !STREQ (current_node, "Top"))
219 add_word_args ("<title>%s - %s</title>\n",
220 escape_string (xstrdup (current_node)), html_title);
222 add_word_args ("<title>%s</title>\n", html_title);
224 add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html");
225 if (encoding && *encoding)
226 add_word_args ("; charset=%s", encoding);
230 if (!document_description)
231 document_description = html_title;
233 add_word_args ("<meta name=\"description\" content=\"%s\">\n",
234 document_description);
235 add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n",
238 /* Navigation bar links. */
240 add_word ("<link title=\"Top\" rel=\"top\" href=\"#Top\">\n");
243 /* Always put a top link. */
244 add_word ("<link title=\"Top\" rel=\"start\" href=\"index.html#Top\">\n");
246 /* We already have a top link, avoid duplication. */
247 if (tag_table->up && !STREQ (tag_table->up, "Top"))
248 add_link (tag_table->up, "rel=\"up\"");
251 add_link (tag_table->prev, "rel=\"prev\"");
254 add_link (tag_table->next, "rel=\"next\"");
256 /* fixxme: Look for a way to put links to various indices in the
257 document. Also possible candidates to be added here are First and
262 /* We are splitting, but we neither have a tag_table. So this must be
263 index.html. So put a link to Top. */
264 add_word ("<link title=\"Top\" rel=\"start\" href=\"#Top\">\n");
267 add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \
268 rel=\"generator-home\" title=\"Texinfo Homepage\">\n");
271 { /* It is not ideal that we include the html markup here within
272 <head>, so we use text_expansion. */
273 insert_string ("<!--\n");
274 insert_string (text_expansion (copying_text));
275 insert_string ("-->\n");
278 /* Put the style definitions in a comment for the sake of browsers
279 that don't support <style>. */
280 add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n");
281 add_word ("<style type=\"text/css\"><!--\n");
284 char *css_inline = NULL;
287 /* This writes out any @import commands from the --css-file,
288 and returns any actual css code following the imports. */
289 css_inline = process_css_file (css_include);
291 /* This seems cleaner than adding <br>'s at the end of each line for
292 these "roman" displays. It's hardly the end of the world if the
293 browser doesn't do <style>s, in any case; they'll just come out in
295 #define CSS_FONT_INHERIT "font-family:inherit"
296 add_word_args (" pre.display { %s }\n", CSS_FONT_INHERIT);
297 add_word_args (" pre.format { %s }\n", CSS_FONT_INHERIT);
299 /* Alternatively, we could do <font size=-1> in insertion.c, but this
300 way makes it easier to override. */
301 #define CSS_FONT_SMALLER "font-size:smaller"
302 add_word_args (" pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT,
304 add_word_args (" pre.smallformat { %s; %s }\n", CSS_FONT_INHERIT,
306 add_word_args (" pre.smallexample { %s }\n", CSS_FONT_SMALLER);
307 add_word_args (" pre.smalllisp { %s }\n", CSS_FONT_SMALLER);
309 /* Since HTML doesn't have a sc element, we use span with a bit of
310 CSS spice instead. */
311 #define CSS_FONT_SMALL_CAPS "font-variant:small-caps"
312 add_word_args (" span.sc { %s }\n", CSS_FONT_SMALL_CAPS);
314 /* Roman (default) font class, closest we can come. */
315 #define CSS_FONT_ROMAN "font-family:serif; font-weight:normal;"
316 add_word_args (" span.roman { %s } \n", CSS_FONT_ROMAN);
318 /* Sans serif font class. */
319 #define CSS_FONT_SANSSERIF "font-family:sans-serif; font-weight:normal;"
320 add_word_args (" span.sansserif { %s } \n", CSS_FONT_SANSSERIF);
322 /* Write out any css code from the user's --css-file. */
324 insert_string (css_inline);
326 add_word ("--></style>\n");
329 add_word_args ("<link rel=\"stylesheet\" type=\"text/css\" href=\"%s\">\n",
332 add_word ("</head>\n<body>\n");
334 if (title && !html_title_written && titlepage_cmd_present)
336 add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title);
337 html_title_written = 1;
343 /* Escape HTML special characters in the string if necessary,
344 returning a pointer to a possibly newly-allocated one. */
346 escape_string (char *string)
349 int i = 0, newlen = 0;
353 /* Find how much to allocate. */
357 newlen += 6; /* `"' */
360 newlen += 5; /* `&' */
364 newlen += 4; /* `<', `>' */
372 if (newlen == i) return string; /* Already OK. */
374 newstring = xmalloc (newlen);
381 strcpy (newstring, """);
385 strcpy (newstring, "&");
389 strcpy (newstring, "<");
393 strcpy (newstring, ">");
397 newstring[0] = string[i];
403 return newstring - newlen;
406 /* Save current tag. */
408 push_tag (char *tag, char *attribs)
410 HSTACK *newstack = xmalloc (sizeof (HSTACK));
413 newstack->attribs = xstrdup (attribs);
414 newstack->next = htmlstack;
415 htmlstack = newstack;
422 HSTACK *tos = htmlstack;
426 line_error (_("[unexpected] no html tag to pop"));
430 free (htmlstack->attribs);
432 htmlstack = htmlstack->next;
436 /* Check if tag is an empty or a whitespace only element.
437 If so, remove it, keeping whitespace intact. */
439 rollback_empty_tag (char *tag)
441 int check_position = output_paragraph_offset;
442 int taglen = strlen (tag);
443 int rollback_happened = 0;
444 char *contents = ""; /* FIXME (ptr to constant, later
445 assigned to malloc'd address).
447 char *contents_canon_white = "";
449 /* If output_paragraph is empty, we cannot rollback :-\ */
450 if (output_paragraph_offset <= 0)
453 /* Find the end of the previous tag. */
454 while (check_position > 0 && output_paragraph[check_position-1] != '>')
457 /* Save stuff between tag's end to output_paragraph's end. */
458 if (check_position != output_paragraph_offset)
460 contents = xmalloc (output_paragraph_offset - check_position + 1);
461 memcpy (contents, output_paragraph + check_position,
462 output_paragraph_offset - check_position);
464 contents[output_paragraph_offset - check_position] = '\0';
466 contents_canon_white = xstrdup (contents);
467 canon_white (contents_canon_white);
470 /* Find the start of the previous tag. */
471 while (check_position > 0 && output_paragraph[check_position-1] != '<')
474 /* Check to see if this is the tag. */
475 if (strncmp ((char *) output_paragraph + check_position, tag, taglen) == 0
476 && (whitespace (output_paragraph[check_position + taglen])
477 || output_paragraph[check_position + taglen] == '>'))
479 if (!contents_canon_white || !*contents_canon_white)
481 /* Empty content after whitespace removal, so roll it back. */
482 output_paragraph_offset = check_position - 1;
483 rollback_happened = 1;
485 /* Original contents may not be empty (whitespace.) */
486 if (contents && *contents)
488 insert_string (contents);
494 return rollback_happened;
497 /* Open or close TAG according to START_OR_END. */
499 #if defined (VA_FPRINTF) && __STDC__
500 insert_html_tag_with_attribute (int start_or_end, char *tag, char *format, ...)
502 insert_html_tag_with_attribute (start_or_end, tag, format, va_alist)
509 char *old_tag = NULL;
510 char *old_attribs = NULL;
511 char formatted_attribs[2000]; /* xx no fixed limits */
513 extern int in_html_elt;
515 if (start_or_end != START)
520 old_tag = htmlstack->tag;
521 old_attribs = htmlstack->attribs;
530 VA_START (ap, format);
532 VA_SPRINTF (formatted_attribs, format, ap);
534 sprintf (formatted_attribs, format, a1, a2, a3, a4, a5, a6, a7, a8);
539 formatted_attribs[0] = '\0';
541 /* Exception: can nest multiple spans. */
543 && STREQ (htmlstack->tag, tag)
544 && !(STREQ (tag, "span") && STREQ (old_attribs, formatted_attribs)))
547 if (start_or_end == START)
548 push_tag (tag, formatted_attribs);
555 /* texinfo.tex doesn't support more than one font attribute
557 if ((start_or_end == START) && old_tag && *old_tag
558 && !STREQ (old_tag, "samp")
559 && !rollback_empty_tag (old_tag))
560 add_word_args ("</%s>", old_tag);
564 if (start_or_end == START)
565 add_word_args (format ? "<%s %s>" : "<%s>", tag, formatted_attribs);
566 else if (STREQ (tag, "samp") || !rollback_empty_tag (tag))
567 /* Insert close tag only if we didn't rollback,
568 in which case the opening tag is removed. */
569 add_word_args ("</%s>", tag);
572 if ((start_or_end != START) && old_tag && *old_tag && !STREQ (old_tag, "samp"))
573 add_word_args (strlen (old_attribs) > 0 ? "<%s %s>" : "<%s>",
574 old_tag, old_attribs);
580 insert_html_tag (int start_or_end, char *tag)
582 insert_html_tag_with_attribute (start_or_end, tag, NULL);
585 /* Output an HTML <link> to the filename for NODE, including the
586 other string as extra attributes. */
589 add_link (char *nodename, char *attributes)
593 char *escaped_nodename;
594 add_html_elt ("<link ");
595 add_word_args ("%s", attributes);
596 add_word_args (" href=\"");
597 add_anchor_name (nodename, 1);
598 escaped_nodename = escape_string (nodename);
599 add_word_args ("\" title=\"%s\">\n", escaped_nodename);
600 if (escaped_nodename != nodename)
601 free (escaped_nodename);
605 /* Copy a name with characters escaped as appropriate for an anchor
606 name, i.e., escape URL special characters with our _00hh convention.
607 (See the manual for details on the new scheme.) */
610 escaped_anchor_name (const char *name)
612 /* The factor 5 in the next allocation allows all chars to be expanded. */
613 char *res = xmalloc (5 * strlen (name) + 1);
616 for (; *name; name++)
618 if (cr_or_whitespace (*name))
620 else if (! URL_SAFE_CHAR (*name))
622 sprintf (d, "_00%x", (unsigned char) *name);
623 /* do this manually since sprintf returns char * on
624 SunOS 4 and other old systems. */
635 /* Output NAME with characters escaped as appropriate for an anchor
636 name, i.e., escape URL special characters with our _00hh convention
637 if OLD is zero. (See the manual for details on the new scheme.)
639 If OLD is nonzero, generate the node name with the 4.6-and-earlier
640 convention of %hh (and more special characters output as-is, notably
641 - and *). This is only so that external references to old names can
642 still work with HTML generated by the new makeinfo; the gcc folks
643 needed this. Our own HTML does not refer to these names. */
646 add_escaped_anchor_name (char *name, int old)
650 if (!old && !strchr ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
652 { /* XHTML does not allow anything but an ASCII letter to start an
653 identifier. Therefore kludge in this constant string if we
659 char *expanded = escaped_anchor_name (name);
663 else for (; *name; name++)
665 if (cr_or_whitespace (*name))
668 else if (!URL_SAFE_CHAR (*name) && !OLD_URL_SAFE_CHAR (*name))
669 /* Cast so characters with the high bit set are treated as >128,
670 for example o-umlaut should be 246, not -10. */
671 add_word_args ("%%%x", (unsigned char) *name);
677 /* Insert the text for the name of a reference in an HTML anchor
678 appropriate for NODENAME.
680 If HREF is zero, generate text for name= in the new node name
681 conversion convention.
682 If HREF is negative, generate text for name= in the old convention.
683 If HREF is positive, generate the name for an href= attribute, i.e.,
684 including the `#' if it's an internal reference. */
686 add_anchor_name (char *nodename, int href)
691 add_url_name (nodename, href);
694 /* Always add NODENAME, so that the reference would pinpoint the
695 exact node on its file. This is so several nodes could share the
696 same file, in case of file-name clashes, but also for more
697 accurate browser positioning. */
698 if (mbscasecmp (nodename, "(dir)") == 0)
699 /* Strip the parens, but keep the original letter-case. */
700 add_word_args ("%.3s", nodename + 1);
701 else if (mbscasecmp (nodename, "top") == 0)
704 add_escaped_anchor_name (nodename, href < 0);
707 /* Insert the text for the name of a reference in an HTML url, aprropriate
710 add_url_name (char *nodename, int href)
712 add_nodename_to_filename (nodename, href);
715 /* Convert non [A-Za-z0-9] characters depending on the command line options given.
716 If --transliterate-file-names is specified, these are replaced with their ASCII
717 phonetic transliteration. Otherwise, _00xx notation is used, where xx means the
718 hexadecimal representation of the ASCII character. Also convert spaces and
719 newlines to dashes. */
721 fix_filename (char *filename)
724 int len = strlen (filename);
725 char *oldname = xstrdup (filename);
729 for (i = 0; i < len; i++)
731 const char *p = lang_transliterate_char (oldname[i]);
734 strcat (filename, p);
735 else if (cr_or_whitespace (oldname[i]))
736 strcat (filename, "-");
737 else if (URL_SAFE_CHAR (oldname[i]))
738 strncat (filename, (char *) oldname + i, 1);
741 char *hexchar = xmalloc (6 * sizeof (char));
742 sprintf (hexchar, "_00%x", (unsigned char) oldname[i]);
743 strcat (filename, hexchar);
747 /* Check if we are nearing boundaries. */
748 if (strlen (filename) >= PATH_MAX - 20)
755 /* As we can't look-up a (forward-referenced) nodes' html filename
756 from the tentry, we take the easy way out. We assume that
757 nodenames are unique, and generate the html filename from the
758 nodename, that's always known. */
760 nodename_to_filename_1 (char *nodename, int href)
764 char dirname[PATH_MAX];
766 if (mbscasecmp (nodename, "Top") == 0)
768 /* We want to convert references to the Top node into
771 filename = xstrdup ("index.html"); /* "#Top" is added by our callers */
773 filename = xstrdup ("Top");
775 else if (mbscasecmp (nodename, "(dir)") == 0)
776 /* We want to convert references to the (dir) node into
778 filename = xstrdup ("../index.html");
781 filename = xmalloc (PATH_MAX);
785 /* Check for external reference: ``(info-document)node-name''
786 Assume this node lives at: ``../info-document/node-name.html''
788 We need to handle the special case (sigh): ``(info-document)'',
789 ie, an external top-node, which should translate to:
790 ``../info-document/info-document.html'' */
793 if (*nodename == '(')
797 p = strchr (nodename, ')');
800 line_error (_("[unexpected] invalid node name: `%s'"), nodename);
804 length = p - nodename - 1;
806 FILENAME_CMPN (p - 5, ".info", 5) == 0)
808 /* This is for DOS, and also for Windows and GNU/Linux
809 systems that might have Info files copied from a DOS 8+3
812 FILENAME_CMPN (p - 4, ".inf", 4) == 0)
814 strcpy (filename, "../");
815 strncpy (dirname, nodename + 1, length);
816 *(dirname + length) = '\0';
817 fix_filename (dirname);
818 strcat (filename, dirname);
819 strcat (filename, "/");
823 /* In the case of just (info-document), there will be nothing
824 remaining, and we will refer to ../info-document/, which will
826 strcat (filename, p);
830 fix_filename (filename + strlen (filename) - strlen (p));
831 strcat (filename, ".html");
835 /* Produce a file name suitable for the underlying filesystem. */
836 normalize_filename (filename);
839 /* We add ``#Nodified-filename'' anchor to external references to be
840 prepared for non-split HTML support. Maybe drop this. */
841 if (href && *dirname)
843 strcat (filename, "#");
844 strcat (filename, p);
846 fix_filename (filename + strlen (filename) - strlen (p));
853 /* If necessary, ie, if current filename != filename of node, output
856 add_nodename_to_filename (char *nodename, int href)
858 /* for now, don't check: always output filename */
859 char *filename = nodename_to_filename_1 (nodename, href);
865 nodename_to_filename (char *nodename)
867 /* The callers of nodename_to_filename use the result to produce
868 <a href=, so call nodename_to_filename_1 with last arg non-zero. */
869 return nodename_to_filename_1 (nodename, 1);