contrib/texinfo/makeinfo/html.c

   1 /* html.c -- html-related utilities.
   2    $Id: html.c,v 1.42 2008/05/19 18:26:47 karl Exp $
   3
   4    Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
   5    Free Software Foundation, Inc.
   6
   7    This program is free software: you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation, either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #include "system.h"
  21 #include "cmds.h"
  22 #include "files.h"
  23 #include "html.h"
  24 #include "lang.h"
  25 #include "makeinfo.h"
  26 #include "node.h"
  27 #include "sectioning.h"
  28
  29 \f
  30
  31 /* Filename to which to write list of index entries */
  32 char *internal_links_filename = NULL;
  33 FILE *internal_links_stream = NULL;
  34
  35 /* Append CHAR to BUFFER, (re)allocating as necessary.  We don't handle
  36    null characters.  */
  37
  38 typedef struct
  39 {
  40   unsigned size;    /* allocated */
  41   unsigned length;  /* used */
  42   char *buffer;
  43 } buffer_type;
  44
  45 static buffer_type *
  46 init_buffer (void)
  47 {
  48   buffer_type *buf = xmalloc (sizeof (buffer_type));
  49   buf->length = 0;
  50   buf->size = 0;
  51   buf->buffer = NULL;
  52
  53   return buf;
  54 }
  55
  56 static void
  57 append_char (buffer_type *buf, int c)
  58 {
  59   buf->length++;
  60   if (buf->length >= buf->size)
  61     {
  62       buf->size += 100;
  63       buf->buffer = xrealloc (buf->buffer, buf->size);
  64     }
  65   buf->buffer[buf->length - 1] = c;
  66   buf->buffer[buf->length] = 0;
  67 }
  68
  69 /* Read the cascading style-sheet file FILENAME.  Write out any @import
  70    commands, which must come first, by the definition of css.  If the
  71    file contains any actual css code following the @imports, return it;
  72    else return NULL.  */
  73 static char *
  74 process_css_file (char *filename)
  75 {
  76   int c;
  77   int lastchar = 0;
  78   FILE *f;
  79   buffer_type *import_text = init_buffer ();
  80   buffer_type *inline_text = init_buffer ();
  81   unsigned lineno = 1;
  82   enum { null_state, comment_state, import_state, inline_state } state
  83     = null_state, prev_state;
  84
  85   prev_state = null_state;
  86
  87   /* read from stdin if `-' is the filename.  */
  88   f = STREQ (filename, "-") ? stdin : fopen (filename, "r");
  89   if (!f)
  90     {
  91       error (_("%s: could not open --css-file: %s"), progname, filename);
  92       return NULL;
  93     }
  94
  95   /* Read the file.  The @import statements must come at the beginning,
  96      with only whitespace and comments allowed before any inline css code.  */
  97   while ((c = getc (f)) >= 0)
  98     {
  99       if (c == '\n')
 100         lineno++;
 101
 102       switch (state)
 103         {
 104         case null_state: /* between things */
 105           if (c == '@')
 106             { /* Only @import and @charset should switch into
 107                  import_state, other @-commands, such as @media, should
 108                  put us into inline_state.  I don't think any other css
 109                  @-commands start with `i' or `c', although of course
 110                  this will break when such a command is defined.  */
 111               int nextchar = getc (f);
 112               if (nextchar == 'i' || nextchar == 'c')
 113                 {
 114                   append_char (import_text, c);
 115                   append_char (import_text, nextchar);
 116                   state = import_state;
 117                 }
 118               else
 119                 {
 120                   ungetc (nextchar, f);  /* wasn't an @import */
 121                   state = inline_state;
 122                 }
 123             }
 124           else if (c == '/')
 125             { /* possible start of a comment */
 126               int nextchar = getc (f);
 127               if (nextchar == '*')
 128                 state = comment_state;
 129               else
 130                 {
 131                   ungetc (nextchar, f); /* wasn't a comment */
 132                   state = inline_state;
 133                 }
 134             }
 135           else if (isspace (c))
 136             ; /* skip whitespace; maybe should use c_isspace?  */
 137
 138           else
 139             /* not an @import, not a comment, not whitespace: we must
 140                have started the inline text.  */
 141             state = inline_state;
 142
 143           if (state == inline_state)
 144             append_char (inline_text, c);
 145
 146           if (state != null_state)
 147             prev_state = null_state;
 148           break;
 149
 150         case comment_state:
 151           if (c == '/' && lastchar == '*')
 152             state = prev_state;  /* end of comment */
 153           break;  /* else ignore this comment char */
 154
 155         case import_state:
 156           append_char (import_text, c);  /* include this import char */
 157           if (c == ';')
 158             { /* done with @import */
 159               append_char (import_text, '\n');  /* make the output nice */
 160               state = null_state;
 161               prev_state = import_state;
 162             }
 163           break;
 164
 165         case inline_state:
 166           /* No harm in writing out comments, so don't bother parsing
 167              them out, just append everything.  */
 168           append_char (inline_text, c);
 169           break;
 170         }
 171
 172       lastchar = c;
 173     }
 174
 175   fclose (f);  /* Even closing stdin should be ok, can't read it more
 176                   than once?  */
 177
 178   /* Reached the end of the file.  We should not be still in a comment.  */
 179   if (state == comment_state)
 180     warning (_("%s:%d: --css-file ended in comment"), filename, lineno);
 181
 182   /* Write the @import text, if any.  */
 183   if (import_text->buffer)
 184     {
 185       add_word (import_text->buffer);
 186       free (import_text->buffer);
 187       free (import_text);
 188     }
 189
 190   /* We're wasting the buffer struct memory, but so what.  */
 191   return inline_text->buffer;
 192 }
 193 \f
 194 HSTACK *htmlstack = NULL;
 195
 196 /* See html.h.  */
 197 int html_title_written = 0;
 198
 199 void
 200 html_output_head (void)
 201 {
 202   static const char *html_title = NULL;
 203   char *encoding = current_document_encoding ();
 204
 205   /* The <title> should not have markup, so use text_expansion.  */
 206   if (!html_title)
 207     html_title = escape_string (title ?
 208         text_expansion (title) : (char *) gdt("Untitled"));
 209
 210   /* Make sure this is the very first string of the output document.  */
 211   output_paragraph_offset = 0;
 212
 213   add_html_block_elt_args ("<html lang=\"%s\">\n<head>\n",
 214       language_table[language_code].abbrev);
 215
 216   /* When splitting, add current node's name to title if it's available
 217      and not Top.  */
 218   if (splitting && current_node && !STREQ (current_node, "Top"))
 219     add_word_args ("<title>%s - %s</title>\n",
 220         escape_string (xstrdup (current_node)), html_title);
 221   else
 222     add_word_args ("<title>%s</title>\n",  html_title);
 223
 224   add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html");
 225   if (encoding && *encoding)
 226     add_word_args ("; charset=%s", encoding);
 227
 228   add_word ("\">\n");
 229
 230   if (!document_description)
 231     document_description = html_title;
 232
 233   add_word_args ("<meta name=\"description\" content=\"%s\">\n",
 234                  document_description);
 235   add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n",
 236                  VERSION);
 237
 238   /* Navigation bar links.  */
 239   if (!splitting)
 240     add_word ("<link title=\"Top\" rel=\"top\" href=\"#Top\">\n");
 241   else if (tag_table)
 242     {
 243       /* Always put a top link.  */
 244       add_word ("<link title=\"Top\" rel=\"start\" href=\"index.html#Top\">\n");
 245
 246       /* We already have a top link, avoid duplication.  */
 247       if (tag_table->up && !STREQ (tag_table->up, "Top"))
 248         add_link (tag_table->up, "rel=\"up\"");
 249
 250       if (tag_table->prev)
 251         add_link (tag_table->prev, "rel=\"prev\"");
 252
 253       if (tag_table->next)
 254         add_link (tag_table->next, "rel=\"next\"");
 255
 256       /* fixxme: Look for a way to put links to various indices in the
 257          document.  Also possible candidates to be added here are First and
 258          Last links.  */
 259     }
 260   else
 261     {
 262       /* We are splitting, but we neither have a tag_table.  So this must be
 263          index.html.  So put a link to Top. */
 264       add_word ("<link title=\"Top\" rel=\"start\" href=\"#Top\">\n");
 265     }
 266
 267   add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \
 268 rel=\"generator-home\" title=\"Texinfo Homepage\">\n");
 269
 270   if (copying_text)
 271     { /* It is not ideal that we include the html markup here within
 272          <head>, so we use text_expansion.  */
 273       insert_string ("<!--\n");
 274       insert_string (text_expansion (copying_text));
 275       insert_string ("-->\n");
 276     }
 277
 278   /* Put the style definitions in a comment for the sake of browsers
 279      that don't support <style>.  */
 280   add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n");
 281   add_word ("<style type=\"text/css\"><!--\n");
 282
 283   {
 284     char *css_inline = NULL;
 285
 286     if (css_include)
 287       /* This writes out any @import commands from the --css-file,
 288          and returns any actual css code following the imports.  */
 289       css_inline = process_css_file (css_include);
 290
 291     /* This seems cleaner than adding <br>'s at the end of each line for
 292        these "roman" displays.  It's hardly the end of the world if the
 293        browser doesn't do <style>s, in any case; they'll just come out in
 294        typewriter.  */
 295 #define CSS_FONT_INHERIT "font-family:inherit"
 296     add_word_args ("  pre.display { %s }\n", CSS_FONT_INHERIT);
 297     add_word_args ("  pre.format  { %s }\n", CSS_FONT_INHERIT);
 298
 299     /* Alternatively, we could do <font size=-1> in insertion.c, but this
 300        way makes it easier to override.  */
 301 #define CSS_FONT_SMALLER "font-size:smaller"
 302     add_word_args ("  pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT,
 303                    CSS_FONT_SMALLER);
 304     add_word_args ("  pre.smallformat  { %s; %s }\n", CSS_FONT_INHERIT,
 305                    CSS_FONT_SMALLER);
 306     add_word_args ("  pre.smallexample { %s }\n", CSS_FONT_SMALLER);
 307     add_word_args ("  pre.smalllisp    { %s }\n", CSS_FONT_SMALLER);
 308
 309     /* Since HTML doesn't have a sc element, we use span with a bit of
 310        CSS spice instead.  */
 311 #define CSS_FONT_SMALL_CAPS "font-variant:small-caps"
 312     add_word_args ("  span.sc    { %s }\n", CSS_FONT_SMALL_CAPS);
 313
 314     /* Roman (default) font class, closest we can come.  */
 315 #define CSS_FONT_ROMAN "font-family:serif; font-weight:normal;"
 316     add_word_args ("  span.roman { %s } \n", CSS_FONT_ROMAN);
 317
 318     /* Sans serif font class.  */
 319 #define CSS_FONT_SANSSERIF "font-family:sans-serif; font-weight:normal;"
 320     add_word_args ("  span.sansserif { %s } \n", CSS_FONT_SANSSERIF);
 321
 322     /* Write out any css code from the user's --css-file.  */
 323     if (css_inline)
 324       insert_string (css_inline);
 325
 326     add_word ("--></style>\n");
 327   }
 328   if (css_ref)
 329     add_word_args ("<link rel=\"stylesheet\" type=\"text/css\" href=\"%s\">\n",
 330                     css_ref);
 331
 332   add_word ("</head>\n<body>\n");
 333
 334   if (title && !html_title_written && titlepage_cmd_present)
 335     {
 336       add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title);
 337       html_title_written = 1;
 338     }
 339
 340   free (encoding);
 341 }
 342 \f
 343 /* Escape HTML special characters in the string if necessary,
 344    returning a pointer to a possibly newly-allocated one. */
 345 char *
 346 escape_string (char *string)
 347 {
 348   char *newstring;
 349   int i = 0, newlen = 0;
 350
 351   do
 352     {
 353       /* Find how much to allocate. */
 354       switch (string[i])
 355         {
 356         case '"':
 357           newlen += 6;          /* `&quot;' */
 358           break;
 359         case '&':
 360           newlen += 5;          /* `&amp;' */
 361           break;
 362         case '<':
 363         case '>':
 364           newlen += 4;          /* `&lt;', `&gt;' */
 365           break;
 366         default:
 367           newlen++;
 368         }
 369     }
 370   while (string[i++]);
 371
 372   if (newlen == i) return string; /* Already OK. */
 373
 374   newstring = xmalloc (newlen);
 375   i = 0;
 376   do
 377     {
 378       switch (string[i])
 379         {
 380         case '"':
 381           strcpy (newstring, "&quot;");
 382           newstring += 6;
 383           break;
 384         case '&':
 385           strcpy (newstring, "&amp;");
 386           newstring += 5;
 387           break;
 388         case '<':
 389           strcpy (newstring, "&lt;");
 390           newstring += 4;
 391           break;
 392         case '>':
 393           strcpy (newstring, "&gt;");
 394           newstring += 4;
 395           break;
 396         default:
 397           newstring[0] = string[i];
 398           newstring++;
 399         }
 400     }
 401   while (string[i++]);
 402
 403   return newstring - newlen;
 404 }
 405 \f
 406 /* Save current tag.  */
 407 static void
 408 push_tag (char *tag, char *attribs)
 409 {
 410   HSTACK *newstack = xmalloc (sizeof (HSTACK));
 411
 412   newstack->tag = tag;
 413   newstack->attribs = xstrdup (attribs);
 414   newstack->next = htmlstack;
 415   htmlstack = newstack;
 416 }
 417
 418 /* Get last tag.  */
 419 static void
 420 pop_tag (void)
 421 {
 422   HSTACK *tos = htmlstack;
 423
 424   if (!tos)
 425     {
 426       line_error (_("[unexpected] no html tag to pop"));
 427       return;
 428     }
 429
 430   free (htmlstack->attribs);
 431
 432   htmlstack = htmlstack->next;
 433   free (tos);
 434 }
 435
 436 /* Check if tag is an empty or a whitespace only element.
 437    If so, remove it, keeping whitespace intact.  */
 438 int
 439 rollback_empty_tag (char *tag)
 440 {
 441   int check_position = output_paragraph_offset;
 442   int taglen = strlen (tag);
 443   int rollback_happened = 0;
 444   char *contents = "";                  /* FIXME (ptr to constant, later
 445                                            assigned to malloc'd address).
 446                                          */
 447   char *contents_canon_white = "";
 448
 449   /* If output_paragraph is empty, we cannot rollback :-\  */
 450   if (output_paragraph_offset <= 0)
 451     return 0;
 452
 453   /* Find the end of the previous tag.  */
 454   while (check_position > 0 && output_paragraph[check_position-1] != '>')
 455     check_position--;
 456
 457   /* Save stuff between tag's end to output_paragraph's end.  */
 458   if (check_position != output_paragraph_offset)
 459     {
 460       contents = xmalloc (output_paragraph_offset - check_position + 1);
 461       memcpy (contents, output_paragraph + check_position,
 462           output_paragraph_offset - check_position);
 463
 464       contents[output_paragraph_offset - check_position] = '\0';
 465
 466       contents_canon_white = xstrdup (contents);
 467       canon_white (contents_canon_white);
 468     }
 469
 470   /* Find the start of the previous tag.  */
 471   while (check_position > 0 && output_paragraph[check_position-1] != '<')
 472     check_position--;
 473
 474   /* Check to see if this is the tag.  */
 475   if (strncmp ((char *) output_paragraph + check_position, tag, taglen) == 0
 476       && (whitespace (output_paragraph[check_position + taglen])
 477           || output_paragraph[check_position + taglen] == '>'))
 478     {
 479       if (!contents_canon_white || !*contents_canon_white)
 480         {
 481           /* Empty content after whitespace removal, so roll it back.  */
 482           output_paragraph_offset = check_position - 1;
 483           rollback_happened = 1;
 484
 485           /* Original contents may not be empty (whitespace.)  */
 486           if (contents && *contents)
 487             {
 488               insert_string (contents);
 489               free (contents);
 490             }
 491         }
 492     }
 493
 494   return rollback_happened;
 495 }
 496
 497 /* Open or close TAG according to START_OR_END. */
 498 void
 499 #if defined (VA_FPRINTF) && __STDC__
 500 insert_html_tag_with_attribute (int start_or_end, char *tag, char *format, ...)
 501 #else
 502 insert_html_tag_with_attribute (start_or_end, tag, format, va_alist)
 503      int start_or_end;
 504      char *tag;
 505      char *format;
 506      va_dcl
 507 #endif
 508 {
 509   char *old_tag = NULL;
 510   char *old_attribs = NULL;
 511   char formatted_attribs[2000]; /* xx no fixed limits */
 512   int do_return = 0;
 513   extern int in_html_elt;
 514
 515   if (start_or_end != START)
 516     pop_tag ();
 517
 518   if (htmlstack)
 519     {
 520       old_tag = htmlstack->tag;
 521       old_attribs = htmlstack->attribs;
 522     }
 523
 524   if (format)
 525     {
 526 #ifdef VA_SPRINTF
 527       va_list ap;
 528 #endif
 529
 530       VA_START (ap, format);
 531 #ifdef VA_SPRINTF
 532       VA_SPRINTF (formatted_attribs, format, ap);
 533 #else
 534       sprintf (formatted_attribs, format, a1, a2, a3, a4, a5, a6, a7, a8);
 535 #endif
 536       va_end (ap);
 537     }
 538   else
 539     formatted_attribs[0] = '\0';
 540
 541   /* Exception: can nest multiple spans.  */
 542   if (htmlstack
 543       && STREQ (htmlstack->tag, tag)
 544       && !(STREQ (tag, "span") && STREQ (old_attribs, formatted_attribs)))
 545     do_return = 1;
 546
 547   if (start_or_end == START)
 548     push_tag (tag, formatted_attribs);
 549
 550   if (do_return)
 551     return;
 552
 553   in_html_elt++;
 554
 555   /* texinfo.tex doesn't support more than one font attribute
 556      at the same time.  */
 557   if ((start_or_end == START) && old_tag && *old_tag
 558       && !STREQ (old_tag, "samp")
 559       && !rollback_empty_tag (old_tag))
 560     add_word_args ("</%s>", old_tag);
 561
 562   if (*tag)
 563     {
 564       if (start_or_end == START)
 565         add_word_args (format ? "<%s %s>" : "<%s>", tag, formatted_attribs);
 566       else if (STREQ (tag, "samp") || !rollback_empty_tag (tag))
 567         /* Insert close tag only if we didn't rollback,
 568            in which case the opening tag is removed.  */
 569         add_word_args ("</%s>", tag);
 570     }
 571
 572   if ((start_or_end != START) && old_tag && *old_tag && !STREQ (old_tag, "samp"))
 573     add_word_args (strlen (old_attribs) > 0 ? "<%s %s>" : "<%s>",
 574         old_tag, old_attribs);
 575
 576   in_html_elt--;
 577 }
 578
 579 void
 580 insert_html_tag (int start_or_end, char *tag)
 581 {
 582   insert_html_tag_with_attribute (start_or_end, tag, NULL);
 583 }
 584 \f
 585 /* Output an HTML <link> to the filename for NODE, including the
 586    other string as extra attributes. */
 587
 588 void
 589 add_link (char *nodename, char *attributes)
 590 {
 591   if (nodename)
 592     {
 593       char *escaped_nodename;
 594       add_html_elt ("<link ");
 595       add_word_args ("%s", attributes);
 596       add_word_args (" href=\"");
 597       add_anchor_name (nodename, 1);
 598       escaped_nodename = escape_string (nodename);
 599       add_word_args ("\" title=\"%s\">\n", escaped_nodename);
 600       if (escaped_nodename != nodename)
 601         free (escaped_nodename);
 602     }
 603 }
 604
 605 /* Copy a name with characters escaped as appropriate for an anchor
 606    name, i.e., escape URL special characters with our _00hh convention.
 607    (See the manual for details on the new scheme.) */
 608
 609 char *
 610 escaped_anchor_name (const char *name)
 611 {
 612   /* The factor 5 in the next allocation allows all chars to be expanded.  */
 613   char *res = xmalloc (5 * strlen (name) + 1);
 614   char *d = res;
 615
 616   for (; *name; name++)
 617     {
 618       if (cr_or_whitespace (*name))
 619         *d++ = '-';
 620       else if (! URL_SAFE_CHAR (*name))
 621         {
 622           sprintf (d, "_00%x", (unsigned char) *name);
 623           /* do this manually since sprintf returns char * on
 624              SunOS 4 and other old systems.  */
 625           while (*d)
 626             d++;
 627         }
 628       else
 629         *d++ = *name;
 630     }
 631   *d = 0;
 632   return res;
 633 }
 634
 635 /* Output NAME with characters escaped as appropriate for an anchor
 636    name, i.e., escape URL special characters with our _00hh convention
 637    if OLD is zero.  (See the manual for details on the new scheme.)
 638
 639    If OLD is nonzero, generate the node name with the 4.6-and-earlier
 640    convention of %hh (and more special characters output as-is, notably
 641    - and *).  This is only so that external references to old names can
 642    still work with HTML generated by the new makeinfo; the gcc folks
 643    needed this.  Our own HTML does not refer to these names.  */
 644
 645 void
 646 add_escaped_anchor_name (char *name, int old)
 647 {
 648   canon_white (name);
 649
 650   if (!old && !strchr ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
 651                        *name))
 652     { /* XHTML does not allow anything but an ASCII letter to start an
 653          identifier.  Therefore kludge in this constant string if we
 654          have a nonletter.  */
 655       add_word ("g_t");
 656     }
 657   if (!old)
 658     {
 659       char *expanded = escaped_anchor_name (name);
 660       add_word (expanded);
 661       free (expanded);
 662     }
 663   else for (; *name; name++)
 664     {
 665       if (cr_or_whitespace (*name))
 666         add_char ('-');
 667
 668       else if (!URL_SAFE_CHAR (*name) && !OLD_URL_SAFE_CHAR (*name))
 669         /* Cast so characters with the high bit set are treated as >128,
 670            for example o-umlaut should be 246, not -10.  */
 671         add_word_args ("%%%x", (unsigned char) *name);
 672       else
 673         add_char (*name);
 674     }
 675 }
 676
 677 /* Insert the text for the name of a reference in an HTML anchor
 678    appropriate for NODENAME.
 679
 680    If HREF is zero, generate text for name= in the new node name
 681      conversion convention.
 682    If HREF is negative, generate text for name= in the old convention.
 683    If HREF is positive, generate the name for an href= attribute, i.e.,
 684      including the `#' if it's an internal reference.   */
 685 void
 686 add_anchor_name (char *nodename, int href)
 687 {
 688   if (href > 0)
 689     {
 690       if (splitting)
 691         add_url_name (nodename, href);
 692       add_char ('#');
 693     }
 694   /* Always add NODENAME, so that the reference would pinpoint the
 695      exact node on its file.  This is so several nodes could share the
 696      same file, in case of file-name clashes, but also for more
 697      accurate browser positioning.  */
 698   if (mbscasecmp (nodename, "(dir)") == 0)
 699     /* Strip the parens, but keep the original letter-case.  */
 700     add_word_args ("%.3s", nodename + 1);
 701   else if (mbscasecmp (nodename, "top") == 0)
 702     add_word ("Top");
 703   else
 704     add_escaped_anchor_name (nodename, href < 0);
 705 }
 706
 707 /* Insert the text for the name of a reference in an HTML url, aprropriate
 708    for NODENAME */
 709 void
 710 add_url_name (char *nodename, int href)
 711 {
 712     add_nodename_to_filename (nodename, href);
 713 }
 714
 715 /* Convert non [A-Za-z0-9] characters depending on the command line options given.
 716    If --transliterate-file-names is specified, these are replaced with their ASCII
 717    phonetic transliteration. Otherwise, _00xx notation is used, where xx means the
 718    hexadecimal representation of the ASCII character.  Also convert spaces and
 719    newlines to dashes.  */
 720 static void
 721 fix_filename (char *filename)
 722 {
 723   int i;
 724   int len = strlen (filename);
 725   char *oldname = xstrdup (filename);
 726
 727   *filename = '\0';
 728
 729   for (i = 0; i < len; i++)
 730     {
 731       const char *p = lang_transliterate_char (oldname[i]);
 732
 733       if (p)
 734         strcat (filename, p);
 735       else if (cr_or_whitespace (oldname[i]))
 736         strcat (filename, "-");
 737       else if (URL_SAFE_CHAR (oldname[i]))
 738         strncat (filename, (char *) oldname + i, 1);
 739       else
 740         {
 741           char *hexchar = xmalloc (6 * sizeof (char));
 742           sprintf (hexchar, "_00%x", (unsigned char) oldname[i]);
 743           strcat (filename, hexchar);
 744           free (hexchar);
 745         }
 746
 747       /* Check if we are nearing boundaries.  */
 748       if (strlen (filename) >= PATH_MAX - 20)
 749         break;
 750     }
 751
 752   free (oldname);
 753 }
 754
 755 /* As we can't look-up a (forward-referenced) nodes' html filename
 756    from the tentry, we take the easy way out.  We assume that
 757    nodenames are unique, and generate the html filename from the
 758    nodename, that's always known.  */
 759 static char *
 760 nodename_to_filename_1 (char *nodename, int href)
 761 {
 762   char *p;
 763   char *filename;
 764   char dirname[PATH_MAX];
 765
 766   if (mbscasecmp (nodename, "Top") == 0)
 767     {
 768       /* We want to convert references to the Top node into
 769          "index.html#Top".  */
 770       if (href)
 771         filename = xstrdup ("index.html"); /* "#Top" is added by our callers */
 772       else
 773         filename = xstrdup ("Top");
 774     }
 775   else if (mbscasecmp (nodename, "(dir)") == 0)
 776     /* We want to convert references to the (dir) node into
 777        "../index.html".  */
 778     filename = xstrdup ("../index.html");
 779   else
 780     {
 781       filename = xmalloc (PATH_MAX);
 782       dirname[0] = '\0';
 783       *filename = '\0';
 784
 785       /* Check for external reference: ``(info-document)node-name''
 786          Assume this node lives at: ``../info-document/node-name.html''
 787
 788          We need to handle the special case (sigh): ``(info-document)'',
 789          ie, an external top-node, which should translate to:
 790          ``../info-document/info-document.html'' */
 791
 792       p = nodename;
 793       if (*nodename == '(')
 794         {
 795           int length;
 796
 797           p = strchr (nodename, ')');
 798           if (p == NULL)
 799             {
 800               line_error (_("[unexpected] invalid node name: `%s'"), nodename);
 801               xexit (1);
 802             }
 803
 804           length = p - nodename - 1;
 805           if (length > 5 &&
 806               FILENAME_CMPN (p - 5, ".info", 5) == 0)
 807             length -= 5;
 808           /* This is for DOS, and also for Windows and GNU/Linux
 809              systems that might have Info files copied from a DOS 8+3
 810              filesystem.  */
 811           if (length > 4 &&
 812               FILENAME_CMPN (p - 4, ".inf", 4) == 0)
 813             length -= 4;
 814           strcpy (filename, "../");
 815           strncpy (dirname, nodename + 1, length);
 816           *(dirname + length) = '\0';
 817           fix_filename (dirname);
 818           strcat (filename, dirname);
 819           strcat (filename, "/");
 820           p++;
 821         }
 822
 823       /* In the case of just (info-document), there will be nothing
 824          remaining, and we will refer to ../info-document/, which will
 825          work fine.  */
 826       strcat (filename, p);
 827       if (*p)
 828         {
 829           /* Hmm */
 830           fix_filename (filename + strlen (filename) - strlen (p));
 831           strcat (filename, ".html");
 832         }
 833     }
 834
 835   /* Produce a file name suitable for the underlying filesystem.  */
 836   normalize_filename (filename);
 837
 838 #if 0
 839   /* We add ``#Nodified-filename'' anchor to external references to be
 840      prepared for non-split HTML support.  Maybe drop this. */
 841   if (href && *dirname)
 842     {
 843       strcat (filename, "#");
 844       strcat (filename, p);
 845       /* Hmm, again */
 846       fix_filename (filename + strlen (filename) - strlen (p));
 847     }
 848 #endif
 849
 850   return filename;
 851 }
 852
 853 /* If necessary, ie, if current filename != filename of node, output
 854    the node name.  */
 855 void
 856 add_nodename_to_filename (char *nodename, int href)
 857 {
 858   /* for now, don't check: always output filename */
 859   char *filename = nodename_to_filename_1 (nodename, href);
 860   add_word (filename);
 861   free (filename);
 862 }
 863
 864 char *
 865 nodename_to_filename (char *nodename)
 866 {
 867   /* The callers of nodename_to_filename use the result to produce
 868      <a href=, so call nodename_to_filename_1 with last arg non-zero.  */
 869   return nodename_to_filename_1 (nodename, 1);
 870 }