| Commit | Line | Data |
|---|---|---|
| 92d0a6a6 | 1 | // -*- C++ -*- |
| 4d3e9548 JL |
2 | /* Copyright (C) 1989, 1990, 1991, 1992, 2003, 2007, 2009 |
| 3 | Free Software Foundation, Inc. | |
| 92d0a6a6 JR |
4 | Written by James Clark (jjc@jclark.com) |
| 5 | ||
| 6 | This file is part of groff. | |
| 7 | ||
| 8 | groff is free software; you can redistribute it and/or modify it under | |
| 9 | the terms of the GNU General Public License as published by the Free | |
| 4d3e9548 JL |
10 | Software Foundation, either version 3 of the License, or |
| 11 | (at your option) any later version. | |
| 92d0a6a6 JR |
12 | |
| 13 | groff is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
| 15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
| 16 | for more details. | |
| 17 | ||
| 4d3e9548 JL |
18 | You should have received a copy of the GNU General Public License |
| 19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ | |
| 92d0a6a6 | 20 | |
| 4d3e9548 | 21 | #include <ctype.h> |
| 92d0a6a6 JR |
22 | #include "eqn.h" |
| 23 | #include "pbox.h" | |
| 24 | #include "ptable.h" | |
| 25 | ||
| 4d3e9548 JL |
26 | struct map { |
| 27 | const char *from; | |
| 28 | const char *to; | |
| 29 | }; | |
| 30 | ||
| 31 | struct map entity_table[] = { | |
| 32 | // Classic troff special characters | |
| 33 | {"%", "­"}, // ISOnum | |
| 34 | {"'", "´"}, // ISOdia | |
| 35 | {"!=", "≠"}, // ISOtech | |
| 36 | {"**", "∗"}, // ISOtech | |
| 37 | {"*a", "α"}, // ISOgrk3 | |
| 38 | {"*A", "A"}, | |
| 39 | {"*b", "β"}, // ISOgrk3 | |
| 40 | {"*B", "B"}, | |
| 41 | {"*d", "δ"}, // ISOgrk3 | |
| 42 | {"*D", "Δ"}, // ISOgrk3 | |
| 43 | {"*e", "ε"}, // ISOgrk3 | |
| 44 | {"*E", "E"}, | |
| 45 | {"*f", "φ"}, // ISOgrk3 | |
| 46 | {"*F", "Φ"}, // ISOgrk3 | |
| 47 | {"*g", "γ"}, // ISOgrk3 | |
| 48 | {"*G", "Γ"}, // ISOgrk3 | |
| 49 | {"*h", "θ"}, // ISOgrk3 | |
| 50 | {"*H", "Θ"}, // ISOgrk3 | |
| 51 | {"*i", "ι"}, // ISOgrk3 | |
| 52 | {"*I", "I"}, | |
| 53 | {"*k", "κ"}, // ISOgrk3 | |
| 54 | {"*K", "K;"}, | |
| 55 | {"*l", "&lamda;"}, // ISOgrk3 | |
| 56 | {"*L", "Λ"}, // ISOgrk3 | |
| 57 | {"*m", "μ"}, // ISOgrk3 | |
| 58 | {"*M", "M"}, | |
| 59 | {"*n", "ν"}, // ISOgrk3 | |
| 60 | {"*N", "N"}, | |
| 61 | {"*o", "o"}, | |
| 62 | {"*O", "O"}, | |
| 63 | {"*p", "π"}, // ISOgrk3 | |
| 64 | {"*P", "Π"}, // ISOgrk3 | |
| 65 | {"*q", "ψ"}, // ISOgrk3 | |
| 66 | {"*Q", "&PSI;"}, // ISOgrk3 | |
| 67 | {"*r", "ρ"}, // ISOgrk3 | |
| 68 | {"*R", "R"}, | |
| 69 | {"*s", "σ"}, // ISOgrk3 | |
| 70 | {"*S", "Σ"}, // ISOgrk3 | |
| 71 | {"*t", "τ"}, // ISOgrk3 | |
| 72 | {"*T", "Τ"}, // ISOgrk3 | |
| 73 | {"*u", "υ"}, // ISOgrk3 | |
| 74 | {"*U", "Υ"}, // ISOgrk3 | |
| 75 | {"*w", "ω"}, // ISOgrk3 | |
| 76 | {"*W", "Ω"}, // ISOgrk3 | |
| 77 | {"*x", "χ"}, // ISOgrk3 | |
| 78 | {"*X", "Χ"}, // ISOgrk3 | |
| 79 | {"*y", "η"}, // ISOgrk3 | |
| 80 | {"*Y", "Η"}, // ISOgrk3 | |
| 81 | {"*z", "ζ"}, // ISOgrk3 | |
| 82 | {"*Z", "Ζ"}, // ISOgrk3 | |
| 83 | {"+-", "±"}, // ISOnum | |
| 84 | {"->", "→"}, // ISOnum | |
| 85 | {"12", "½"}, // ISOnum | |
| 86 | {"14", "¼"}, // ISOnum | |
| 87 | {"34", "¾"}, // ISOnum | |
| 88 | {"<-", "←"}, // ISOnum | |
| 89 | {"==", "≡"}, // ISOtech | |
| 90 | {"Fi", "ffi"}, // ISOpub | |
| 91 | {"Fl", "ffl"}, // ISOpub | |
| 92 | {"aa", "´"}, // ISOdia | |
| 93 | {"ap", "∼"}, // ISOtech | |
| 94 | {"bl", "&phonexb;"}, // ISOpub | |
| 95 | {"br", "│"}, // ISObox | |
| 96 | {"bs", "☎"}, // ISOpub (for the Bell logo) | |
| 97 | {"bu", "•"}, // ISOpub | |
| 98 | {"bv", "|"}, // ISOnum | |
| 99 | {"ca", "∩"}, // ISOtech | |
| 100 | {"ci", "○"}, // ISOpub | |
| 101 | {"co", "©"}, // ISOnum | |
| 102 | {"ct", "¢"}, // ISOnum | |
| 103 | {"cu", "∪"}, // ISOtech | |
| 104 | {"da", "↓"}, // ISOnum | |
| 105 | {"de", "°"}, // ISOnum | |
| 106 | {"dg", "†"}, // ISOpub | |
| 107 | {"dd", "‡"}, // ISOpub | |
| 108 | {"di", "÷"}, // ISOnum | |
| 109 | {"em", "—"}, // ISOpub | |
| 110 | {"eq", "="}, // ISOnum | |
| 111 | {"es", "∅"}, // ISOamso | |
| 112 | {"ff", "ff"}, // ISOpub | |
| 113 | {"fi", "fi"}, // ISOpub | |
| 114 | {"fl", "fl"}, // ISOpub | |
| 115 | {"fm", "′"}, // ISOtech | |
| 116 | {"ge", "≥"}, // ISOtech | |
| 117 | {"gr", "∇"}, // ISOtech | |
| 118 | {"hy", "‐"}, // ISOnum | |
| 119 | {"ib", "⊆"}, // ISOtech | |
| 120 | {"if", "∞"}, // ISOtech | |
| 121 | {"ip", "⊇"}, // ISOtech | |
| 122 | {"is", "∫"}, // ISOtech | |
| 123 | {"le", "≤"}, // ISOtech | |
| 124 | // Some pile characters go here | |
| 125 | {"mi", "−"}, // ISOtech | |
| 126 | {"mo", "∈"}, // ISOtech | |
| 127 | {"mu", "×"}, // ISOnum | |
| 128 | {"no", "¬"}, // ISOnum | |
| 129 | {"or", "|"}, // ISOnum | |
| 130 | {"pl", "+"}, // ISOnum | |
| 131 | {"pt", "∝"}, // ISOtech | |
| 132 | {"rg", "™"}, // ISOnum | |
| 133 | // More pile characters go here | |
| 134 | {"rn", "¯"}, // ISOdia | |
| 135 | {"ru", "_"}, // ISOnum | |
| 136 | {"sb", "⊂"}, // ISOtech | |
| 137 | {"sc", "§"}, // ISOnum | |
| 138 | {"sl", "/"}, | |
| 139 | {"sp", "⊃"}, // ISOtech | |
| 140 | {"sq", "▪"}, // ISOpub | |
| 141 | {"sr", "√"}, // ISOtech | |
| 142 | {"ts", "ς"}, // ISOgrk3 | |
| 143 | {"ua", "↑"}, // ISOnum | |
| 144 | {"ul", "_"}, | |
| 145 | {"~=", "≅"}, // ISOtech | |
| 146 | // Extended specials supported by groff; see groff_char(7). | |
| 147 | // These are listed in the order they occur on that man page. | |
| 148 | {"-D", "Ð"}, // ISOlat: Icelandic uppercase eth | |
| 149 | {"Sd", "ð"}, // ISOlat1: Icelandic lowercase eth | |
| 150 | {"TP", "Þ"}, // ISOlat1: Icelandic uppercase thorn | |
| 151 | {"Tp", "þ"}, // ISOlat1: Icelandic lowercase thorn | |
| 152 | {"ss", "ß"}, // ISOlat1 | |
| 153 | // Ligatures | |
| 154 | // ff, fi, fl, ffi, ffl from old troff go here | |
| 155 | {"AE", "Æ"}, // ISOlat1 | |
| 156 | {"ae", "æ"}, // ISOlat1 | |
| 157 | {"OE", "Œ"}, // ISOlat2 | |
| 158 | {"oe", "œ"}, // ISOlat2 | |
| 159 | {"IJ", "ij"}, // ISOlat2: Dutch IJ ligature | |
| 160 | {"ij", "IJ"}, // ISOlat2: Dutch ij ligature | |
| 161 | {".i", "ı"}, // ISOlat2,ISOamso | |
| 162 | {".j", "&jnodot;"}, // ISOamso (undocumented but in 1.19) | |
| 163 | // Accented characters | |
| 164 | {"'A", "Á"}, // ISOlat1 | |
| 165 | {"'C", "Ć"}, // ISOlat2 | |
| 166 | {"'E", "É"}, // ISOlat1 | |
| 167 | {"'I", "Í"}, // ISOlat1 | |
| 168 | {"'O", "Ó"}, // ISOlat1 | |
| 169 | {"'U", "Ú"}, // ISOlat1 | |
| 170 | {"'Y", "Ý"}, // ISOlat1 | |
| 171 | {"'a", "á"}, // ISOlat1 | |
| 172 | {"'c", "ć"}, // ISOlat2 | |
| 173 | {"'e", "é"}, // ISOlat1 | |
| 174 | {"'i", "í"}, // ISOlat1 | |
| 175 | {"'o", "ó"}, // ISOlat1 | |
| 176 | {"'u", "ú"}, // ISOlat1 | |
| 177 | {"'y", "ý"}, // ISOlat1 | |
| 178 | {":A", "Ä"}, // ISOlat1 | |
| 179 | {":E", "Ë"}, // ISOlat1 | |
| 180 | {":I", "Ï"}, // ISOlat1 | |
| 181 | {":O", "Ö"}, // ISOlat1 | |
| 182 | {":U", "Ü"}, // ISOlat1 | |
| 183 | {":Y", "Ÿ"}, // ISOlat2 | |
| 184 | {":a", "ä"}, // ISOlat1 | |
| 185 | {":e", "ë"}, // ISOlat1 | |
| 186 | {":i", "ï"}, // ISOlat1 | |
| 187 | {":o", "ö"}, // ISOlat1 | |
| 188 | {":u", "ü"}, // ISOlat1 | |
| 189 | {":y", "ÿ"}, // ISOlat1 | |
| 190 | {"^A", "Â"}, // ISOlat1 | |
| 191 | {"^E", "Ê"}, // ISOlat1 | |
| 192 | {"^I", "Î"}, // ISOlat1 | |
| 193 | {"^O", "Ô"}, // ISOlat1 | |
| 194 | {"^U", "Û"}, // ISOlat1 | |
| 195 | {"^a", "â"}, // ISOlat1 | |
| 196 | {"^e", "ê"}, // ISOlat1 | |
| 197 | {"^i", "î"}, // ISOlat1 | |
| 198 | {"^o", "ô"}, // ISOlat1 | |
| 199 | {"^u", "û"}, // ISOlat1 | |
| 200 | {"`A", "À"}, // ISOlat1 | |
| 201 | {"`E", "È"}, // ISOlat1 | |
| 202 | {"`I", "Ì"}, // ISOlat1 | |
| 203 | {"`O", "Ò"}, // ISOlat1 | |
| 204 | {"`U", "Ù"}, // ISOlat1 | |
| 205 | {"`a", "à"}, // ISOlat1 | |
| 206 | {"`e", "è"}, // ISOlat1 | |
| 207 | {"`i", "ì"}, // ISOlat1 | |
| 208 | {"`o", "ò"}, // ISOlat1 | |
| 209 | {"`u", "ù"}, // ISOlat1 | |
| 210 | {"~A", "Ã"}, // ISOlat1 | |
| 211 | {"~N", "Ñ"}, // ISOlat1 | |
| 212 | {"~O", "Õ"}, // ISOlat1 | |
| 213 | {"~a", "ã"}, // ISOlat1 | |
| 214 | {"~n", "ñ"}, // ISOlat1 | |
| 215 | {"~o", "õ"}, // ISOlat1 | |
| 216 | {"vS", "Š"}, // ISOlat2 | |
| 217 | {"vs", "š"}, // ISOlat2 | |
| 218 | {"vZ", "Ž"}, // ISOlat2 | |
| 219 | {"vz", "ž"}, // ISOlat2 | |
| 220 | {",C", "Ç"}, // ISOlat1 | |
| 221 | {",c", "ç"}, // ISOlat1 | |
| 222 | {"/L", "Ł"}, // ISOlat2: Polish L with a slash | |
| 223 | {"/l", "ł"}, // ISOlat2: Polish l with a slash | |
| 224 | {"/O", "Ø"}, // ISOlat1 | |
| 225 | {"/o", "ø"}, // ISOlat1 | |
| 226 | {"oA", "Å"}, // ISOlat1 | |
| 227 | {"oa", "å"}, // ISOlat1 | |
| 228 | // Accents | |
| 229 | {"a\"","˝"}, // ISOdia: double acute accent (Hungarian umlaut) | |
| 230 | {"a-", "¯"}, // ISOdia: macron or bar accent | |
| 231 | {"a.", "˙"}, // ISOdia: dot above | |
| 232 | {"a^", "ˆ"}, // ISOdia: circumflex accent | |
| 233 | {"aa", "´"}, // ISOdia: acute accent | |
| 234 | {"ga", "`"}, // ISOdia: grave accent | |
| 235 | {"ab", "˘"}, // ISOdia: breve accent | |
| 236 | {"ac", "¸"}, // ISOdia: cedilla accent | |
| 237 | {"ad", "¨"}, // ISOdia: umlaut or dieresis | |
| 238 | {"ah", "ˇ"}, // ISOdia: caron (aka hacek accent) | |
| 239 | {"ao", "˚"}, // ISOdia: ring or circle accent | |
| 240 | {"a~", "˜"}, // ISOdia: tilde accent | |
| 241 | {"ho", "˛"}, // ISOdia: hook or ogonek accent | |
| 242 | {"ha", "^"}, // ASCII circumflex, hat, caret | |
| 243 | {"ti", "~"}, // ASCII tilde, large tilde | |
| 244 | // Quotes | |
| 245 | {"Bq", "‚"}, // ISOpub: low double comma quote | |
| 246 | {"bq", "„"}, // ISOpub: low single comma quote | |
| 247 | {"lq", "“"}, // ISOnum | |
| 248 | {"rq", "”"}, // ISOpub | |
| 249 | {"oq", "‘"}, // ISOnum: single open quote | |
| 250 | {"cq", "’"}, // ISOnum: single closing quote (ASCII 39) | |
| 251 | {"aq", "&zerosp;'"}, // apostrophe quote | |
| 252 | {"dq", "\""}, // double quote (ASCII 34) | |
| 253 | {"Fo", "«"}, // ISOnum | |
| 254 | {"Fc", "»"}, // ISOnum | |
| 255 | //{"fo", "&fo;"}, | |
| 256 | //{"fc", "&fc;"}, | |
| 257 | // Punctuation | |
| 258 | {"r!", "¡"}, // ISOnum | |
| 259 | {"r?", "¿"}, // ISOnum | |
| 260 | // Old troff \(em goes here | |
| 261 | {"en", "–"}, // ISOpub: en dash | |
| 262 | // Old troff \(hy goes here | |
| 263 | // Brackets | |
| 264 | {"lB", "["}, // ISOnum: left (square) bracket | |
| 265 | {"rB", "]"}, // ISOnum: right (square) bracket | |
| 266 | {"lC", "{"}, // ISOnum: left (curly) brace | |
| 267 | {"rC", "}"}, // ISOnum: right (curly) brace | |
| 268 | {"la", "⟨"}, // ISOtech: left angle bracket | |
| 269 | {"ra", "⟩"}, // ISOtech: right angle bracket | |
| 270 | // Old troff \(bv goes here | |
| 271 | // Bracket-pile characters could go here. | |
| 272 | // Arrows | |
| 273 | // Old troff \(<- and \(-> go here | |
| 274 | {"<>", "↔"}, // ISOamsa | |
| 275 | {"da", "↓"}, // ISOnum | |
| 276 | {"ua", "↑"}, // ISOnum | |
| 277 | {"lA", "⇐"}, // ISOtech | |
| 278 | {"rA", "⇒"}, // ISOtech | |
| 279 | {"hA", "⇔"}, // ISOtech: horizontal double-headed arrow | |
| 280 | {"dA", "⇓"}, // ISOamsa | |
| 281 | {"uA", "⇑"}, // ISOamsa | |
| 282 | {"vA", "⇕"}, // ISOamsa: vertical double-headed double arrow | |
| 283 | //{"an", "&an;"}, | |
| 284 | // Lines | |
| 285 | {"-h", "ℏ"}, // ISOamso: h-bar (Planck's constant) | |
| 286 | // Old troff \(or goes here | |
| 287 | {"ba", "|"}, // ISOnum | |
| 288 | // Old troff \(br, \{u, \(ul, \(bv go here | |
| 289 | {"bb", "¦"}, // ISOnum | |
| 290 | {"sl", "/"}, | |
| 291 | {"rs", "\"}, // ISOnum | |
| 292 | // Text markers | |
| 293 | // Old troff \(ci, \(bu, \(dd, \(dg go here | |
| 294 | {"lz", "◊"}, // ISOpub | |
| 295 | // Old troff sq goes here | |
| 296 | {"ps", "¶"}, // ISOnum: paragraph or pilcrow sign | |
| 297 | {"sc", "§"}, // ISOnum (in old troff) | |
| 298 | // Old troff \(lh, \{h go here | |
| 299 | {"at", "@"}, // ISOnum | |
| 300 | {"sh", "#"}, // ISOnum | |
| 301 | //{"CR", "&CR;"}, | |
| 302 | {"OK", "✓"}, // ISOpub | |
| 303 | // Legalize | |
| 304 | // Old troff \(co, \{g go here | |
| 305 | {"tm", "™"}, // ISOnum | |
| 306 | // Currency symbols | |
| 307 | {"Do", "$"}, // ISOnum | |
| 308 | {"ct", "¢"}, // ISOnum | |
| 309 | {"eu", "€"}, | |
| 310 | {"Eu", "€"}, | |
| 311 | {"Ye", "¥"}, // ISOnum | |
| 312 | {"Po", "£"}, // ISOnum | |
| 313 | {"Cs", "¤"}, // ISOnum: currency sign | |
| 314 | {"Fn", "&fnof"}, // ISOtech | |
| 315 | // Units | |
| 316 | // Old troff de goes here | |
| 317 | {"%0", "‰"}, // ISOtech: per thousand, per mille sign | |
| 318 | // Old troff \(fm goes here | |
| 319 | {"sd", "″"}, // ISOtech | |
| 320 | {"mc", "µ"}, // ISOnum | |
| 321 | {"Of", "ª"}, // ISOnum | |
| 322 | {"Om", "º"}, // ISOnum | |
| 323 | // Logical symbols | |
| 324 | {"AN", "∧"}, // ISOtech | |
| 325 | {"OR", "∨"}, // ISOtech | |
| 326 | // Old troff \(no goes here | |
| 327 | {"te", "∃"}, // ISOtech: there exists, existential quantifier | |
| 328 | {"fa", "∀"}, // ISOtech: for all, universal quantifier | |
| 329 | {"st", "&bepsi"}, // ISOamsr: such that | |
| 330 | {"3d", "∴"}, // ISOtech | |
| 331 | {"tf", "∴"}, // ISOtech | |
| 332 | // Mathematical symbols | |
| 333 | // Old troff "12", "14", "34" goes here | |
| 334 | {"S1", "¹"}, // ISOnum | |
| 335 | {"S2", "²"}, // ISOnum | |
| 336 | {"S3", "³"}, // ISOnum | |
| 337 | // Old troff \(pl", \-, \(+- go here | |
| 338 | {"t+-", "±"}, // ISOnum | |
| 339 | {"-+", "∓"}, // ISOtech | |
| 340 | {"pc", "·"}, // ISOnum | |
| 341 | {"md", "·"}, // ISOnum | |
| 342 | // Old troff \(mu goes here | |
| 343 | {"tmu", "×"}, // ISOnum | |
| 344 | {"c*", "⊗"}, // ISOamsb: multiply sign in a circle | |
| 345 | {"c+", "⊕"}, // ISOamsb: plus sign in a circle | |
| 346 | // Old troff \(di goes here | |
| 347 | {"tdi", "÷"}, // ISOnum | |
| 348 | {"f/", "―"}, // ISOnum: horizintal bar for fractions | |
| 349 | // Old troff \(** goes here | |
| 350 | {"<=", "≤"}, // ISOtech | |
| 351 | {">=", "≥"}, // ISOtech | |
| 352 | {"<<", "≪"}, // ISOamsr | |
| 353 | {">>", "≫"}, // ISOamsr | |
| 354 | {"!=", "≠"}, // ISOtech | |
| 355 | // Old troff \(eq and \(== go here | |
| 356 | {"=~", "≅"}, // ISOamsr | |
| 357 | // Old troff \(ap goes here | |
| 358 | {"~~", "≈"}, // ISOtech | |
| 359 | // This appears to be an error in the groff table. | |
| 360 | // It clashes with the Bell Labs use of ~= for a congruence sign | |
| 361 | // {"~=", "≈"}, // ISOamsr | |
| 362 | // Old troff \(pt, \(es, \(mo go here | |
| 363 | {"nm", "∉"}, // ISOtech | |
| 364 | {"nb", "⊄"}, // ISOamsr | |
| 365 | {"nc", "⊅"}, // ISOamsn | |
| 366 | {"ne", "≢"}, // ISOamsn | |
| 367 | // Old troff \(sb, \(sp, \(ib, \(ip, \(ca, \(cu go here | |
| 368 | {"/_", "∠"}, // ISOamso | |
| 369 | {"pp", "⊥"}, // ISOtech | |
| 370 | // Old troff \(is goes here | |
| 371 | {"sum", "∑"}, // ISOamsb | |
| 372 | {"product", "∏"}, // ISOamsb | |
| 373 | {"gr", "∇"}, // ISOtech | |
| 374 | // Old troff \(sr. \{n, \(if go here | |
| 375 | {"Ah", "ℵ"}, // ISOtech | |
| 376 | {"Im", "ℑ"}, // ISOamso: Fraktur I, imaginary | |
| 377 | {"Re", "ℜ"}, // ISOamso: Fraktur R, real | |
| 378 | {"wp", "℘"}, // ISOamso | |
| 379 | {"pd", "∂"}, // ISOtech: partial differentiation sign | |
| 380 | // Their table duplicates the Greek letters here. | |
| 381 | // We list only the variant forms here, mapping them into | |
| 382 | // the ISO Greek 4 variants (which may or may not be correct :-() | |
| 383 | {"+f", "&b.phiv;"}, // ISOgrk4: variant phi | |
| 384 | {"+h", "&b.thetas;"}, // ISOgrk4: variant theta | |
| 385 | {"+p", "&b.omega;"}, // ISOgrk4: variant pi, looking like omega | |
| 386 | // Card symbols | |
| 387 | {"CL", "♣"}, // ISOpub: club suit | |
| 388 | {"SP", "♠"}, // ISOpub: spade suit | |
| 389 | {"HE", "♥"}, // ISOpub: heart suit | |
| 390 | {"DI", "♦"}, // ISOpub: diamond suit | |
| 391 | }; | |
| 392 | ||
| 393 | const char *special_to_entity(const char *sp) | |
| 394 | { | |
| 395 | struct map *mp; | |
| 396 | for (mp = entity_table; | |
| 397 | mp < entity_table + sizeof(entity_table)/sizeof(entity_table[0]); | |
| 398 | mp++) { | |
| 399 | if (strcmp(mp->from, sp) == 0) | |
| 400 | return mp->to; | |
| 401 | } | |
| 402 | return NULL; | |
| 403 | } | |
| 404 | ||
| 92d0a6a6 JR |
405 | class char_box : public simple_box { |
| 406 | unsigned char c; | |
| 407 | char next_is_italic; | |
| 408 | char prev_is_italic; | |
| 409 | public: | |
| 410 | char_box(unsigned char); | |
| 411 | void debug_print(); | |
| 412 | void output(); | |
| 413 | int is_char(); | |
| 414 | int left_is_italic(); | |
| 415 | int right_is_italic(); | |
| 416 | void hint(unsigned); | |
| 417 | void handle_char_type(int, int); | |
| 418 | }; | |
| 419 | ||
| 420 | class special_char_box : public simple_box { | |
| 421 | char *s; | |
| 422 | public: | |
| 423 | special_char_box(const char *); | |
| 424 | ~special_char_box(); | |
| 425 | void output(); | |
| 426 | void debug_print(); | |
| 427 | int is_char(); | |
| 428 | void handle_char_type(int, int); | |
| 429 | }; | |
| 430 | ||
| 4d3e9548 JL |
431 | enum spacing_type { |
| 432 | s_ordinary, | |
| 433 | s_operator, | |
| 434 | s_binary, | |
| 435 | s_relation, | |
| 436 | s_opening, | |
| 437 | s_closing, | |
| 438 | s_punctuation, | |
| 439 | s_inner, | |
| 440 | s_suppress | |
| 441 | }; | |
| 442 | ||
| 92d0a6a6 JR |
443 | const char *spacing_type_table[] = { |
| 444 | "ordinary", | |
| 445 | "operator", | |
| 446 | "binary", | |
| 447 | "relation", | |
| 448 | "opening", | |
| 449 | "closing", | |
| 450 | "punctuation", | |
| 451 | "inner", | |
| 452 | "suppress", | |
| 453 | 0, | |
| 454 | }; | |
| 455 | ||
| 456 | const int DIGIT_TYPE = 0; | |
| 457 | const int LETTER_TYPE = 1; | |
| 458 | ||
| 459 | const char *font_type_table[] = { | |
| 460 | "digit", | |
| 461 | "letter", | |
| 462 | 0, | |
| 463 | }; | |
| 464 | ||
| 465 | struct char_info { | |
| 466 | int spacing_type; | |
| 467 | int font_type; | |
| 468 | char_info(); | |
| 469 | }; | |
| 470 | ||
| 471 | char_info::char_info() | |
| 472 | : spacing_type(ORDINARY_TYPE), font_type(DIGIT_TYPE) | |
| 473 | { | |
| 474 | } | |
| 475 | ||
| 476 | static char_info char_table[256]; | |
| 477 | ||
| 478 | declare_ptable(char_info) | |
| 479 | implement_ptable(char_info) | |
| 480 | ||
| 481 | PTABLE(char_info) special_char_table; | |
| 482 | ||
| 483 | static int get_special_char_spacing_type(const char *ch) | |
| 484 | { | |
| 485 | char_info *p = special_char_table.lookup(ch); | |
| 486 | return p ? p->spacing_type : ORDINARY_TYPE; | |
| 487 | } | |
| 488 | ||
| 489 | static int get_special_char_font_type(const char *ch) | |
| 490 | { | |
| 491 | char_info *p = special_char_table.lookup(ch); | |
| 492 | return p ? p->font_type : DIGIT_TYPE; | |
| 493 | } | |
| 494 | ||
| 495 | static void set_special_char_type(const char *ch, int st, int ft) | |
| 496 | { | |
| 497 | char_info *p = special_char_table.lookup(ch); | |
| 498 | if (!p) { | |
| 499 | p = new char_info[1]; | |
| 500 | special_char_table.define(ch, p); | |
| 501 | } | |
| 502 | if (st >= 0) | |
| 503 | p->spacing_type = st; | |
| 504 | if (ft >= 0) | |
| 505 | p->font_type = ft; | |
| 506 | } | |
| 507 | ||
| 508 | void init_char_table() | |
| 509 | { | |
| 4d3e9548 JL |
510 | set_special_char_type("pl", s_binary, -1); |
| 511 | set_special_char_type("mi", s_binary, -1); | |
| 512 | set_special_char_type("eq", s_relation, -1); | |
| 513 | set_special_char_type("<=", s_relation, -1); | |
| 514 | set_special_char_type(">=", s_relation, -1); | |
| 515 | char_table['}'].spacing_type = s_closing; | |
| 516 | char_table[')'].spacing_type = s_closing; | |
| 517 | char_table[']'].spacing_type = s_closing; | |
| 518 | char_table['{'].spacing_type = s_opening; | |
| 519 | char_table['('].spacing_type = s_opening; | |
| 520 | char_table['['].spacing_type = s_opening; | |
| 521 | char_table[','].spacing_type = s_punctuation; | |
| 522 | char_table[';'].spacing_type = s_punctuation; | |
| 523 | char_table[':'].spacing_type = s_punctuation; | |
| 524 | char_table['.'].spacing_type = s_punctuation; | |
| 525 | char_table['>'].spacing_type = s_relation; | |
| 526 | char_table['<'].spacing_type = s_relation; | |
| 527 | char_table['*'].spacing_type = s_binary; | |
| 92d0a6a6 JR |
528 | for (int i = 0; i < 256; i++) |
| 529 | if (csalpha(i)) | |
| 530 | char_table[i].font_type = LETTER_TYPE; | |
| 531 | } | |
| 532 | ||
| 533 | static int lookup_spacing_type(const char *type) | |
| 534 | { | |
| 535 | for (int i = 0; spacing_type_table[i] != 0; i++) | |
| 536 | if (strcmp(spacing_type_table[i], type) == 0) | |
| 537 | return i; | |
| 538 | return -1; | |
| 539 | } | |
| 540 | ||
| 541 | static int lookup_font_type(const char *type) | |
| 542 | { | |
| 543 | for (int i = 0; font_type_table[i] != 0; i++) | |
| 544 | if (strcmp(font_type_table[i], type) == 0) | |
| 545 | return i; | |
| 546 | return -1; | |
| 547 | } | |
| 548 | ||
| 549 | void box::set_spacing_type(char *type) | |
| 550 | { | |
| 551 | int t = lookup_spacing_type(type); | |
| 552 | if (t < 0) | |
| 553 | error("unrecognised type `%1'", type); | |
| 554 | else | |
| 555 | spacing_type = t; | |
| 556 | a_delete type; | |
| 557 | } | |
| 558 | ||
| 559 | char_box::char_box(unsigned char cc) | |
| 560 | : c(cc), next_is_italic(0), prev_is_italic(0) | |
| 561 | { | |
| 562 | spacing_type = char_table[c].spacing_type; | |
| 563 | } | |
| 564 | ||
| 565 | void char_box::hint(unsigned flags) | |
| 566 | { | |
| 567 | if (flags & HINT_PREV_IS_ITALIC) | |
| 568 | prev_is_italic = 1; | |
| 569 | if (flags & HINT_NEXT_IS_ITALIC) | |
| 570 | next_is_italic = 1; | |
| 571 | } | |
| 572 | ||
| 573 | void char_box::output() | |
| 574 | { | |
| 4d3e9548 JL |
575 | if (output_format == troff) { |
| 576 | int font_type = char_table[c].font_type; | |
| 577 | if (font_type != LETTER_TYPE) | |
| 578 | printf("\\f[%s]", current_roman_font); | |
| 579 | if (!prev_is_italic) | |
| 580 | fputs("\\,", stdout); | |
| 581 | if (c == '\\') | |
| 582 | fputs("\\e", stdout); | |
| 583 | else | |
| 584 | putchar(c); | |
| 585 | if (!next_is_italic) | |
| 586 | fputs("\\/", stdout); | |
| 587 | else | |
| 588 | fputs("\\&", stdout); // suppress ligaturing and kerning | |
| 589 | if (font_type != LETTER_TYPE) | |
| 590 | fputs("\\fP", stdout); | |
| 591 | } | |
| 592 | else if (output_format == mathml) { | |
| 593 | if (isdigit(c)) | |
| 594 | printf("<mn>"); | |
| 595 | else if (char_table[c].spacing_type) | |
| 596 | printf("<mo>"); | |
| 597 | else | |
| 598 | printf("<mi>"); | |
| 599 | if (c == '<') | |
| 600 | printf("<"); | |
| 601 | else if (c == '>') | |
| 602 | printf(">"); | |
| 603 | else if (c == '&') | |
| 604 | printf("&"); | |
| 605 | else | |
| 606 | putchar(c); | |
| 607 | if (isdigit(c)) | |
| 608 | printf("</mn>"); | |
| 609 | else if (char_table[c].spacing_type) | |
| 610 | printf("</mo>"); | |
| 611 | else | |
| 612 | printf("</mi>"); | |
| 613 | } | |
| 92d0a6a6 JR |
614 | } |
| 615 | ||
| 616 | int char_box::left_is_italic() | |
| 617 | { | |
| 618 | int font_type = char_table[c].font_type; | |
| 619 | return font_type == LETTER_TYPE; | |
| 620 | } | |
| 621 | ||
| 622 | int char_box::right_is_italic() | |
| 623 | { | |
| 624 | int font_type = char_table[c].font_type; | |
| 625 | return font_type == LETTER_TYPE; | |
| 626 | } | |
| 627 | ||
| 628 | int char_box::is_char() | |
| 629 | { | |
| 630 | return 1; | |
| 631 | } | |
| 632 | ||
| 633 | void char_box::debug_print() | |
| 634 | { | |
| 635 | if (c == '\\') { | |
| 636 | putc('\\', stderr); | |
| 637 | putc('\\', stderr); | |
| 638 | } | |
| 639 | else | |
| 640 | putc(c, stderr); | |
| 641 | } | |
| 642 | ||
| 643 | special_char_box::special_char_box(const char *t) | |
| 644 | { | |
| 645 | s = strsave(t); | |
| 646 | spacing_type = get_special_char_spacing_type(s); | |
| 647 | } | |
| 648 | ||
| 649 | special_char_box::~special_char_box() | |
| 650 | { | |
| 651 | a_delete s; | |
| 652 | } | |
| 653 | ||
| 654 | void special_char_box::output() | |
| 655 | { | |
| 4d3e9548 JL |
656 | if (output_format == troff) { |
| 657 | int font_type = get_special_char_font_type(s); | |
| 658 | if (font_type != LETTER_TYPE) | |
| 659 | printf("\\f[%s]", current_roman_font); | |
| 660 | printf("\\,\\[%s]\\/", s); | |
| 661 | if (font_type != LETTER_TYPE) | |
| 662 | printf("\\fP"); | |
| 663 | } | |
| 664 | else if (output_format == mathml) { | |
| 665 | const char *entity = special_to_entity(s); | |
| 666 | if (entity != NULL) | |
| 667 | printf("<mo>%s</mo>", entity); | |
| 668 | else | |
| 669 | printf("<merror>unknown eqn/troff special char %s</merror>", s); | |
| 670 | } | |
| 92d0a6a6 JR |
671 | } |
| 672 | ||
| 673 | int special_char_box::is_char() | |
| 674 | { | |
| 675 | return 1; | |
| 676 | } | |
| 677 | ||
| 678 | void special_char_box::debug_print() | |
| 679 | { | |
| 680 | fprintf(stderr, "\\[%s]", s); | |
| 681 | } | |
| 682 | ||
| 683 | ||
| 684 | void char_box::handle_char_type(int st, int ft) | |
| 685 | { | |
| 686 | if (st >= 0) | |
| 687 | char_table[c].spacing_type = st; | |
| 688 | if (ft >= 0) | |
| 689 | char_table[c].font_type = ft; | |
| 690 | } | |
| 691 | ||
| 692 | void special_char_box::handle_char_type(int st, int ft) | |
| 693 | { | |
| 694 | set_special_char_type(s, st, ft); | |
| 695 | } | |
| 696 | ||
| 697 | void set_char_type(const char *type, char *ch) | |
| 698 | { | |
| 699 | assert(ch != 0); | |
| 700 | int st = lookup_spacing_type(type); | |
| 701 | int ft = lookup_font_type(type); | |
| 702 | if (st < 0 && ft < 0) { | |
| 703 | error("bad character type `%1'", type); | |
| 704 | a_delete ch; | |
| 705 | return; | |
| 706 | } | |
| 707 | box *b = split_text(ch); | |
| 708 | b->handle_char_type(st, ft); | |
| 709 | delete b; | |
| 710 | } | |
| 711 | ||
| 712 | /* We give primes special treatment so that in ``x' sub 2'', the ``2'' | |
| 713 | will be tucked under the prime */ | |
| 714 | ||
| 715 | class prime_box : public pointer_box { | |
| 716 | box *pb; | |
| 717 | public: | |
| 718 | prime_box(box *); | |
| 719 | ~prime_box(); | |
| 720 | int compute_metrics(int style); | |
| 721 | void output(); | |
| 722 | void compute_subscript_kern(); | |
| 723 | void debug_print(); | |
| 724 | void handle_char_type(int, int); | |
| 725 | }; | |
| 726 | ||
| 727 | box *make_prime_box(box *pp) | |
| 728 | { | |
| 729 | return new prime_box(pp); | |
| 730 | } | |
| 731 | ||
| 732 | prime_box::prime_box(box *pp) : pointer_box(pp) | |
| 733 | { | |
| 734 | pb = new special_char_box("fm"); | |
| 735 | } | |
| 736 | ||
| 737 | prime_box::~prime_box() | |
| 738 | { | |
| 739 | delete pb; | |
| 740 | } | |
| 741 | ||
| 742 | int prime_box::compute_metrics(int style) | |
| 743 | { | |
| 744 | int res = p->compute_metrics(style); | |
| 745 | pb->compute_metrics(style); | |
| 746 | printf(".nr " WIDTH_FORMAT " 0\\n[" WIDTH_FORMAT "]" | |
| 747 | "+\\n[" WIDTH_FORMAT "]\n", | |
| 748 | uid, p->uid, pb->uid); | |
| 749 | printf(".nr " HEIGHT_FORMAT " \\n[" HEIGHT_FORMAT "]" | |
| 750 | ">?\\n[" HEIGHT_FORMAT "]\n", | |
| 751 | uid, p->uid, pb->uid); | |
| 752 | printf(".nr " DEPTH_FORMAT " \\n[" DEPTH_FORMAT "]" | |
| 753 | ">?\\n[" DEPTH_FORMAT "]\n", | |
| 754 | uid, p->uid, pb->uid); | |
| 755 | return res; | |
| 756 | } | |
| 757 | ||
| 758 | void prime_box::compute_subscript_kern() | |
| 759 | { | |
| 760 | p->compute_subscript_kern(); | |
| 761 | printf(".nr " SUB_KERN_FORMAT " 0\\n[" WIDTH_FORMAT "]" | |
| 762 | "+\\n[" SUB_KERN_FORMAT "]>?0\n", | |
| 763 | uid, pb->uid, p->uid); | |
| 764 | } | |
| 765 | ||
| 766 | void prime_box::output() | |
| 767 | { | |
| 768 | p->output(); | |
| 769 | pb->output(); | |
| 770 | } | |
| 771 | ||
| 772 | void prime_box::handle_char_type(int st, int ft) | |
| 773 | { | |
| 774 | p->handle_char_type(st, ft); | |
| 775 | pb->handle_char_type(st, ft); | |
| 776 | } | |
| 777 | ||
| 778 | void prime_box::debug_print() | |
| 779 | { | |
| 780 | p->debug_print(); | |
| 781 | putc('\'', stderr); | |
| 782 | } | |
| 783 | ||
| 784 | box *split_text(char *text) | |
| 785 | { | |
| 786 | list_box *lb = 0; | |
| 787 | box *fb = 0; | |
| 788 | char *s = text; | |
| 789 | while (*s != '\0') { | |
| 790 | char c = *s++; | |
| 791 | box *b = 0; | |
| 792 | switch (c) { | |
| 793 | case '+': | |
| 794 | b = new special_char_box("pl"); | |
| 795 | break; | |
| 796 | case '-': | |
| 797 | b = new special_char_box("mi"); | |
| 798 | break; | |
| 799 | case '=': | |
| 800 | b = new special_char_box("eq"); | |
| 801 | break; | |
| 802 | case '\'': | |
| 803 | b = new special_char_box("fm"); | |
| 804 | break; | |
| 805 | case '<': | |
| 806 | if (*s == '=') { | |
| 807 | b = new special_char_box("<="); | |
| 808 | s++; | |
| 809 | break; | |
| 810 | } | |
| 811 | goto normal_char; | |
| 812 | case '>': | |
| 813 | if (*s == '=') { | |
| 814 | b = new special_char_box(">="); | |
| 815 | s++; | |
| 816 | break; | |
| 817 | } | |
| 818 | goto normal_char; | |
| 819 | case '\\': | |
| 820 | if (*s == '\0') { | |
| 821 | lex_error("bad escape"); | |
| 822 | break; | |
| 823 | } | |
| 824 | c = *s++; | |
| 825 | switch (c) { | |
| 826 | case '(': | |
| 827 | { | |
| 828 | char buf[3]; | |
| 829 | if (*s != '\0') { | |
| 830 | buf[0] = *s++; | |
| 831 | if (*s != '\0') { | |
| 832 | buf[1] = *s++; | |
| 833 | buf[2] = '\0'; | |
| 834 | b = new special_char_box(buf); | |
| 835 | } | |
| 836 | else { | |
| 837 | lex_error("bad escape"); | |
| 838 | } | |
| 839 | } | |
| 840 | else { | |
| 841 | lex_error("bad escape"); | |
| 842 | } | |
| 843 | } | |
| 844 | break; | |
| 845 | case '[': | |
| 846 | { | |
| 847 | char *ch = s; | |
| 848 | while (*s != ']' && *s != '\0') | |
| 849 | s++; | |
| 850 | if (*s == '\0') | |
| 851 | lex_error("bad escape"); | |
| 852 | else { | |
| 853 | *s++ = '\0'; | |
| 854 | b = new special_char_box(ch); | |
| 855 | } | |
| 856 | } | |
| 857 | break; | |
| 858 | case 'f': | |
| 859 | case 'g': | |
| 860 | case 'k': | |
| 861 | case 'n': | |
| 862 | case '*': | |
| 863 | { | |
| 864 | char *escape_start = s - 2; | |
| 865 | switch (*s) { | |
| 866 | case '(': | |
| 867 | if (*++s != '\0') | |
| 868 | ++s; | |
| 869 | break; | |
| 870 | case '[': | |
| 871 | for (++s; *s != '\0' && *s != ']'; s++) | |
| 872 | ; | |
| 873 | break; | |
| 874 | } | |
| 875 | if (*s == '\0') | |
| 876 | lex_error("bad escape"); | |
| 877 | else { | |
| 878 | ++s; | |
| 879 | char *buf = new char[s - escape_start + 1]; | |
| 880 | memcpy(buf, escape_start, s - escape_start); | |
| 881 | buf[s - escape_start] = '\0'; | |
| 882 | b = new quoted_text_box(buf); | |
| 883 | } | |
| 884 | } | |
| 885 | break; | |
| 886 | case '-': | |
| 887 | case '_': | |
| 888 | { | |
| 889 | char buf[2]; | |
| 890 | buf[0] = c; | |
| 891 | buf[1] = '\0'; | |
| 892 | b = new special_char_box(buf); | |
| 893 | } | |
| 894 | break; | |
| 895 | case '`': | |
| 896 | b = new special_char_box("ga"); | |
| 897 | break; | |
| 898 | case '\'': | |
| 899 | b = new special_char_box("aa"); | |
| 900 | break; | |
| 901 | case 'e': | |
| 902 | case '\\': | |
| 903 | b = new char_box('\\'); | |
| 904 | break; | |
| 905 | case '^': | |
| 906 | case '|': | |
| 907 | case '0': | |
| 908 | { | |
| 909 | char buf[3]; | |
| 910 | buf[0] = '\\'; | |
| 911 | buf[1] = c; | |
| 912 | buf[2] = '\0'; | |
| 913 | b = new quoted_text_box(strsave(buf)); | |
| 914 | break; | |
| 915 | } | |
| 916 | default: | |
| 917 | lex_error("unquoted escape"); | |
| 918 | b = new quoted_text_box(strsave(s - 2)); | |
| 919 | s = strchr(s, '\0'); | |
| 920 | break; | |
| 921 | } | |
| 922 | break; | |
| 923 | default: | |
| 924 | normal_char: | |
| 925 | b = new char_box(c); | |
| 926 | break; | |
| 927 | } | |
| 928 | while (*s == '\'') { | |
| 929 | if (b == 0) | |
| 930 | b = new quoted_text_box(0); | |
| 931 | b = new prime_box(b); | |
| 932 | s++; | |
| 933 | } | |
| 934 | if (b != 0) { | |
| 935 | if (lb != 0) | |
| 936 | lb->append(b); | |
| 937 | else if (fb != 0) { | |
| 938 | lb = new list_box(fb); | |
| 939 | lb->append(b); | |
| 940 | } | |
| 941 | else | |
| 942 | fb = b; | |
| 943 | } | |
| 944 | } | |
| 945 | a_delete text; | |
| 946 | if (lb != 0) | |
| 947 | return lb; | |
| 948 | else if (fb != 0) | |
| 949 | return fb; | |
| 950 | else | |
| 951 | return new quoted_text_box(0); | |
| 952 | } | |
| 953 |