2 /* Copyright (C) 2002, 2003, 2004, 2006, 2009
3 Free Software Foundation, Inc.
4 Written by Werner Lemberg <wl@gnu.org>
6 This file is part of groff.
8 groff is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 groff is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "stringclass.h"
27 struct glyph_to_unicode {
31 declare_ptable(glyph_to_unicode)
32 implement_ptable(glyph_to_unicode)
34 PTABLE(glyph_to_unicode) glyph_to_unicode_table;
36 // The entries commented out in the table below can't be used in glyph
42 } glyph_to_unicode_list[] = {
126 { "ff", "0066_0066" },
127 { "Fi", "0066_0066_0069" },
128 { "Fl", "0066_0066_006C" },
129 { "fi", "0066_0069" },
130 { "fl", "0066_006C" },
174 // The soft hypen U+00AD is meaningful only in the input file,
175 // not in the output.
327 // the curly phi variant
333 // the stroked phi variant
337 // `-' and `hy' denote a HYPHEN, usually a glyph with a smaller width than
338 // the MINUS sign. Users who are viewing broken man pages that assume
339 // that `-' denotes a U+002D character can either fix the broken man pages
340 // or apply the workaround described in the PROBLEMS file.
397 { "product", "220F" },
398 { "coproduct", "2210" },
400 // `mi' and `\-' represent a MINUS sign. But it is used in many man pages
401 // to denote the U+002D character that introduces a command-line option.
402 // For devices that support copy&paste, such as devhtml and devutf8, the
403 // user can apply the workaround described in the PROBLEMS file.
418 { "integral", "222B" },
447 { "parenlefttp", "239B" },
448 { "parenleftex", "239C" },
449 { "parenleftbt", "239D" },
450 { "parenrighttp", "239E" },
451 { "parenrightex", "239F" },
452 { "parenrightbt", "23A0" },
453 { "bracketlefttp", "23A1" },
454 { "bracketleftex", "23A2" },
455 { "bracketleftbt", "23A3" },
456 { "bracketrighttp", "23A4" },
457 { "bracketrightex", "23A5" },
458 { "bracketrightbt", "23A6" },
460 { "bracelefttp", "23A7" },
462 { "braceleftmid", "23A8" },
464 { "braceleftbt", "23A9" },
466 { "braceex", "23AA" },
467 { "braceleftex", "23AA" },
468 { "bracerightex", "23AA" },
470 { "bracerighttp", "23AB" },
472 { "bracerightmid", "23AC" },
474 { "bracerightbt", "23AD" },
493 // The `left angle bracket' and `right angle bracket' could be mapped to
494 // either U+2329,U+232A or U+3008,U+3009 or U+27E8,U+27E9. But the first
495 // and second possibility are double-width characters (see Unicode's
496 // `DerivedEastAsianWidth.txt' file) and are therefore not suitable for
497 // general use, whereas the third possibility is single-width.
499 // The devhtml device overrides this mapping, because
501 // http://www.w3.org/TR/html401/sgml/entities.html
503 // says that in HTML, `⟨' and `⟩' are U+2329,U+232A,
509 // global constructor
510 static struct glyph_to_unicode_init {
511 glyph_to_unicode_init();
512 } _glyph_to_unicode_init;
514 glyph_to_unicode_init::glyph_to_unicode_init()
516 for (unsigned int i = 0;
517 i < sizeof(glyph_to_unicode_list)/sizeof(glyph_to_unicode_list[0]);
519 glyph_to_unicode *gtu = new glyph_to_unicode[1];
520 gtu->value = (char *)glyph_to_unicode_list[i].value;
521 glyph_to_unicode_table.define(glyph_to_unicode_list[i].key, gtu);
525 const char *glyph_name_to_unicode(const char *s)
527 glyph_to_unicode *result = glyph_to_unicode_table.lookup(s);
528 return result ? result->value : 0;