contrib/mdocml/term.c

   1 /*      $Id: term.c,v 1.280 2019/01/15 12:16:18 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010-2019 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #include "config.h"
  19
  20 #include <sys/types.h>
  21
  22 #include <assert.h>
  23 #include <ctype.h>
  24 #include <stdint.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 #include "mandoc.h"
  30 #include "mandoc_aux.h"
  31 #include "out.h"
  32 #include "term.h"
  33 #include "main.h"
  34
  35 static  size_t           cond_width(const struct termp *, int, int *);
  36 static  void             adjbuf(struct termp_col *, size_t);
  37 static  void             bufferc(struct termp *, char);
  38 static  void             encode(struct termp *, const char *, size_t);
  39 static  void             encode1(struct termp *, int);
  40 static  void             endline(struct termp *);
  41 static  void             term_field(struct termp *, size_t, size_t,
  42                                 size_t, size_t);
  43 static  void             term_fill(struct termp *, size_t *, size_t *,
  44                                 size_t);
  45
  46
  47 void
  48 term_setcol(struct termp *p, size_t maxtcol)
  49 {
  50         if (maxtcol > p->maxtcol) {
  51                 p->tcols = mandoc_recallocarray(p->tcols,
  52                     p->maxtcol, maxtcol, sizeof(*p->tcols));
  53                 p->maxtcol = maxtcol;
  54         }
  55         p->lasttcol = maxtcol - 1;
  56         p->tcol = p->tcols;
  57 }
  58
  59 void
  60 term_free(struct termp *p)
  61 {
  62         for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++)
  63                 free(p->tcol->buf);
  64         free(p->tcols);
  65         free(p->fontq);
  66         free(p);
  67 }
  68
  69 void
  70 term_begin(struct termp *p, term_margin head,
  71                 term_margin foot, const struct roff_meta *arg)
  72 {
  73
  74         p->headf = head;
  75         p->footf = foot;
  76         p->argf = arg;
  77         (*p->begin)(p);
  78 }
  79
  80 void
  81 term_end(struct termp *p)
  82 {
  83
  84         (*p->end)(p);
  85 }
  86
  87 /*
  88  * Flush a chunk of text.  By default, break the output line each time
  89  * the right margin is reached, and continue output on the next line
  90  * at the same offset as the chunk itself.  By default, also break the
  91  * output line at the end of the chunk.  There are many flags modifying
  92  * this behaviour, see the comments in the body of the function.
  93  */
  94 void
  95 term_flushln(struct termp *p)
  96 {
  97         size_t   vbl;      /* Number of blanks to prepend to the output. */
  98         size_t   vbr;      /* Actual visual position of the end of field. */
  99         size_t   vfield;   /* Desired visual field width. */
 100         size_t   vtarget;  /* Desired visual position of the right margin. */
 101         size_t   ic;       /* Character position in the input buffer. */
 102         size_t   nbr;      /* Number of characters to print in this field. */
 103
 104         /*
 105          * Normally, start writing at the left margin, but with the
 106          * NOPAD flag, start writing at the current position instead.
 107          */
 108
 109         vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ?
 110             0 : p->tcol->offset - p->viscol;
 111         if (p->minbl && vbl < p->minbl)
 112                 vbl = p->minbl;
 113
 114         if ((p->flags & TERMP_MULTICOL) == 0)
 115                 p->tcol->col = 0;
 116
 117         /* Loop over output lines. */
 118
 119         for (;;) {
 120                 vfield = p->tcol->rmargin > p->viscol + vbl ?
 121                     p->tcol->rmargin - p->viscol - vbl : 0;
 122
 123                 /*
 124                  * Normally, break the line at the the right margin
 125                  * of the field, but with the NOBREAK flag, only
 126                  * break it at the max right margin of the screen,
 127                  * and with the BRNEVER flag, never break it at all.
 128                  */
 129
 130                 vtarget = p->flags & TERMP_BRNEVER ? SIZE_MAX :
 131                     (p->flags & TERMP_NOBREAK) == 0 ? vfield :
 132                     p->maxrmargin > p->viscol + vbl ?
 133                     p->maxrmargin - p->viscol - vbl : 0;
 134
 135                 /*
 136                  * Figure out how much text will fit in the field.
 137                  * If there is whitespace only, print nothing.
 138                  */
 139
 140                 term_fill(p, &nbr, &vbr, vtarget);
 141                 if (nbr == 0)
 142                         break;
 143
 144                 /*
 145                  * With the CENTER or RIGHT flag, increase the indentation
 146                  * to center the text between the left and right margins
 147                  * or to adjust it to the right margin, respectively.
 148                  */
 149
 150                 if (vbr < vtarget) {
 151                         if (p->flags & TERMP_CENTER)
 152                                 vbl += (vtarget - vbr) / 2;
 153                         else if (p->flags & TERMP_RIGHT)
 154                                 vbl += vtarget - vbr;
 155                 }
 156
 157                 /* Finally, print the field content. */
 158
 159                 term_field(p, vbl, nbr, vbr, vtarget);
 160
 161                 /*
 162                  * If there is no text left in the field, exit the loop.
 163                  * If the BRTRSP flag is set, consider trailing
 164                  * whitespace significant when deciding whether
 165                  * the field fits or not.
 166                  */
 167
 168                 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
 169                         switch (p->tcol->buf[ic]) {
 170                         case '\t':
 171                                 if (p->flags & TERMP_BRTRSP)
 172                                         vbr = term_tab_next(vbr);
 173                                 continue;
 174                         case ' ':
 175                                 if (p->flags & TERMP_BRTRSP)
 176                                         vbr += (*p->width)(p, ' ');
 177                                 continue;
 178                         case '\n':
 179                         case ASCII_BREAK:
 180                                 continue;
 181                         default:
 182                                 break;
 183                         }
 184                         break;
 185                 }
 186                 if (ic == p->tcol->lastcol)
 187                         break;
 188
 189                 /*
 190                  * At the location of an automtic line break, input
 191                  * space characters are consumed by the line break.
 192                  */
 193
 194                 while (p->tcol->col < p->tcol->lastcol &&
 195                     p->tcol->buf[p->tcol->col] == ' ')
 196                         p->tcol->col++;
 197
 198                 /*
 199                  * In multi-column mode, leave the rest of the text
 200                  * in the buffer to be handled by a subsequent
 201                  * invocation, such that the other columns of the
 202                  * table can be handled first.
 203                  * In single-column mode, simply break the line.
 204                  */
 205
 206                 if (p->flags & TERMP_MULTICOL)
 207                         return;
 208
 209                 endline(p);
 210                 p->viscol = 0;
 211
 212                 /*
 213                  * Normally, start the next line at the same indentation
 214                  * as this one, but with the BRIND flag, start it at the
 215                  * right margin instead.  This is used together with
 216                  * NOBREAK for the tags in various kinds of tagged lists.
 217                  */
 218
 219                 vbl = p->flags & TERMP_BRIND ?
 220                     p->tcol->rmargin : p->tcol->offset;
 221         }
 222
 223         /* Reset output state in preparation for the next field. */
 224
 225         p->col = p->tcol->col = p->tcol->lastcol = 0;
 226         p->minbl = p->trailspace;
 227         p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD);
 228
 229         if (p->flags & TERMP_MULTICOL)
 230                 return;
 231
 232         /*
 233          * The HANG flag means that the next field
 234          * always follows on the same line.
 235          * The NOBREAK flag means that the next field
 236          * follows on the same line unless the field was overrun.
 237          * Normally, break the line at the end of each field.
 238          */
 239
 240         if ((p->flags & TERMP_HANG) == 0 &&
 241             ((p->flags & TERMP_NOBREAK) == 0 ||
 242              vbr + term_len(p, p->trailspace) > vfield))
 243                 endline(p);
 244 }
 245
 246 /*
 247  * Store the number of input characters to print in this field in *nbr
 248  * and their total visual width to print in *vbr.
 249  * If there is only whitespace in the field, both remain zero.
 250  * The desired visual width of the field is provided by vtarget.
 251  * If the first word is longer, the field will be overrun.
 252  */
 253 static void
 254 term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
 255 {
 256         size_t   ic;        /* Character position in the input buffer. */
 257         size_t   vis;       /* Visual position of the current character. */
 258         size_t   vn;        /* Visual position of the next character. */
 259         int      breakline; /* Break at the end of this word. */
 260         int      graph;     /* Last character was non-blank. */
 261
 262         *nbr = *vbr = vis = 0;
 263         breakline = graph = 0;
 264         for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
 265                 switch (p->tcol->buf[ic]) {
 266                 case '\b':  /* Escape \o (overstrike) or backspace markup. */
 267                         assert(ic > 0);
 268                         vis -= (*p->width)(p, p->tcol->buf[ic - 1]);
 269                         continue;
 270
 271                 case '\t':  /* Normal ASCII whitespace. */
 272                 case ' ':
 273                 case ASCII_BREAK:  /* Escape \: (breakpoint). */
 274                         switch (p->tcol->buf[ic]) {
 275                         case '\t':
 276                                 vn = term_tab_next(vis);
 277                                 break;
 278                         case ' ':
 279                                 vn = vis + (*p->width)(p, ' ');
 280                                 break;
 281                         case ASCII_BREAK:
 282                                 vn = vis;
 283                                 break;
 284                         }
 285                         /* Can break at the end of a word. */
 286                         if (breakline || vn > vtarget)
 287                                 break;
 288                         if (graph) {
 289                                 *nbr = ic;
 290                                 *vbr = vis;
 291                                 graph = 0;
 292                         }
 293                         vis = vn;
 294                         continue;
 295
 296                 case '\n':  /* Escape \p (break at the end of the word). */
 297                         breakline = 1;
 298                         continue;
 299
 300                 case ASCII_HYPH:  /* Breakable hyphen. */
 301                         graph = 1;
 302                         /*
 303                          * We are about to decide whether to break the
 304                          * line or not, so we no longer need this hyphen
 305                          * to be marked as breakable.  Put back a real
 306                          * hyphen such that we get the correct width.
 307                          */
 308                         p->tcol->buf[ic] = '-';
 309                         vis += (*p->width)(p, '-');
 310                         if (vis > vtarget) {
 311                                 ic++;
 312                                 break;
 313                         }
 314                         *nbr = ic + 1;
 315                         *vbr = vis;
 316                         continue;
 317
 318                 case ASCII_NBRSP:  /* Non-breakable space. */
 319                         p->tcol->buf[ic] = ' ';
 320                         /* FALLTHROUGH */
 321                 default:  /* Printable character. */
 322                         graph = 1;
 323                         vis += (*p->width)(p, p->tcol->buf[ic]);
 324                         if (vis > vtarget && *nbr > 0)
 325                                 return;
 326                         continue;
 327                 }
 328                 break;
 329         }
 330
 331         /*
 332          * If the last word extends to the end of the field without any
 333          * trailing whitespace, the loop could not check yet whether it
 334          * can remain on this line.  So do the check now.
 335          */
 336
 337         if (graph && (vis <= vtarget || *nbr == 0)) {
 338                 *nbr = ic;
 339                 *vbr = vis;
 340         }
 341 }
 342
 343 /*
 344  * Print the contents of one field
 345  * with an indentation of        vbl      visual columns,
 346  * an input string length of     nbr      characters,
 347  * an output width of            vbr      visual columns,
 348  * and a desired field width of  vtarget  visual columns.
 349  */
 350 static void
 351 term_field(struct termp *p, size_t vbl, size_t nbr, size_t vbr, size_t vtarget)
 352 {
 353         size_t   ic;    /* Character position in the input buffer. */
 354         size_t   vis;   /* Visual position of the current character. */
 355         size_t   dv;    /* Visual width of the current character. */
 356         size_t   vn;    /* Visual position of the next character. */
 357
 358         vis = 0;
 359         for (ic = p->tcol->col; ic < nbr; ic++) {
 360
 361                 /*
 362                  * To avoid the printing of trailing whitespace,
 363                  * do not print whitespace right away, only count it.
 364                  */
 365
 366                 switch (p->tcol->buf[ic]) {
 367                 case '\n':
 368                 case ASCII_BREAK:
 369                         continue;
 370                 case '\t':
 371                         vn = term_tab_next(vis);
 372                         vbl += vn - vis;
 373                         vis = vn;
 374                         continue;
 375                 case ' ':
 376                 case ASCII_NBRSP:
 377                         dv = (*p->width)(p, ' ');
 378                         vbl += dv;
 379                         vis += dv;
 380                         continue;
 381                 default:
 382                         break;
 383                 }
 384
 385                 /*
 386                  * We found a non-blank character to print,
 387                  * so write preceding white space now.
 388                  */
 389
 390                 if (vbl > 0) {
 391                         (*p->advance)(p, vbl);
 392                         p->viscol += vbl;
 393                         vbl = 0;
 394                 }
 395
 396                 /* Print the character and adjust the visual position. */
 397
 398                 (*p->letter)(p, p->tcol->buf[ic]);
 399                 if (p->tcol->buf[ic] == '\b') {
 400                         dv = (*p->width)(p, p->tcol->buf[ic - 1]);
 401                         p->viscol -= dv;
 402                         vis -= dv;
 403                 } else {
 404                         dv = (*p->width)(p, p->tcol->buf[ic]);
 405                         p->viscol += dv;
 406                         vis += dv;
 407                 }
 408         }
 409         p->tcol->col = nbr;
 410 }
 411
 412 static void
 413 endline(struct termp *p)
 414 {
 415         if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) {
 416                 p->mc = NULL;
 417                 p->flags &= ~TERMP_ENDMC;
 418         }
 419         if (p->mc != NULL) {
 420                 if (p->viscol && p->maxrmargin >= p->viscol)
 421                         (*p->advance)(p, p->maxrmargin - p->viscol + 1);
 422                 p->flags |= TERMP_NOBUF | TERMP_NOSPACE;
 423                 term_word(p, p->mc);
 424                 p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC);
 425         }
 426         p->viscol = 0;
 427         p->minbl = 0;
 428         (*p->endline)(p);
 429 }
 430
 431 /*
 432  * A newline only breaks an existing line; it won't assert vertical
 433  * space.  All data in the output buffer is flushed prior to the newline
 434  * assertion.
 435  */
 436 void
 437 term_newln(struct termp *p)
 438 {
 439
 440         p->flags |= TERMP_NOSPACE;
 441         if (p->tcol->lastcol || p->viscol)
 442                 term_flushln(p);
 443 }
 444
 445 /*
 446  * Asserts a vertical space (a full, empty line-break between lines).
 447  * Note that if used twice, this will cause two blank spaces and so on.
 448  * All data in the output buffer is flushed prior to the newline
 449  * assertion.
 450  */
 451 void
 452 term_vspace(struct termp *p)
 453 {
 454
 455         term_newln(p);
 456         p->viscol = 0;
 457         p->minbl = 0;
 458         if (0 < p->skipvsp)
 459                 p->skipvsp--;
 460         else
 461                 (*p->endline)(p);
 462 }
 463
 464 /* Swap current and previous font; for \fP and .ft P */
 465 void
 466 term_fontlast(struct termp *p)
 467 {
 468         enum termfont    f;
 469
 470         f = p->fontl;
 471         p->fontl = p->fontq[p->fonti];
 472         p->fontq[p->fonti] = f;
 473 }
 474
 475 /* Set font, save current, discard previous; for \f, .ft, .B etc. */
 476 void
 477 term_fontrepl(struct termp *p, enum termfont f)
 478 {
 479
 480         p->fontl = p->fontq[p->fonti];
 481         p->fontq[p->fonti] = f;
 482 }
 483
 484 /* Set font, save previous. */
 485 void
 486 term_fontpush(struct termp *p, enum termfont f)
 487 {
 488
 489         p->fontl = p->fontq[p->fonti];
 490         if (++p->fonti == p->fontsz) {
 491                 p->fontsz += 8;
 492                 p->fontq = mandoc_reallocarray(p->fontq,
 493                     p->fontsz, sizeof(*p->fontq));
 494         }
 495         p->fontq[p->fonti] = f;
 496 }
 497
 498 /* Flush to make the saved pointer current again. */
 499 void
 500 term_fontpopq(struct termp *p, int i)
 501 {
 502
 503         assert(i >= 0);
 504         if (p->fonti > i)
 505                 p->fonti = i;
 506 }
 507
 508 /* Pop one font off the stack. */
 509 void
 510 term_fontpop(struct termp *p)
 511 {
 512
 513         assert(p->fonti);
 514         p->fonti--;
 515 }
 516
 517 /*
 518  * Handle pwords, partial words, which may be either a single word or a
 519  * phrase that cannot be broken down (such as a literal string).  This
 520  * handles word styling.
 521  */
 522 void
 523 term_word(struct termp *p, const char *word)
 524 {
 525         struct roffsu    su;
 526         const char       nbrsp[2] = { ASCII_NBRSP, 0 };
 527         const char      *seq, *cp;
 528         int              sz, uc;
 529         size_t           csz, lsz, ssz;
 530         enum mandoc_esc  esc;
 531
 532         if ((p->flags & TERMP_NOBUF) == 0) {
 533                 if ((p->flags & TERMP_NOSPACE) == 0) {
 534                         if ((p->flags & TERMP_KEEP) == 0) {
 535                                 bufferc(p, ' ');
 536                                 if (p->flags & TERMP_SENTENCE)
 537                                         bufferc(p, ' ');
 538                         } else
 539                                 bufferc(p, ASCII_NBRSP);
 540                 }
 541                 if (p->flags & TERMP_PREKEEP)
 542                         p->flags |= TERMP_KEEP;
 543                 if (p->flags & TERMP_NONOSPACE)
 544                         p->flags |= TERMP_NOSPACE;
 545                 else
 546                         p->flags &= ~TERMP_NOSPACE;
 547                 p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
 548                 p->skipvsp = 0;
 549         }
 550
 551         while ('\0' != *word) {
 552                 if ('\\' != *word) {
 553                         if (TERMP_NBRWORD & p->flags) {
 554                                 if (' ' == *word) {
 555                                         encode(p, nbrsp, 1);
 556                                         word++;
 557                                         continue;
 558                                 }
 559                                 ssz = strcspn(word, "\\ ");
 560                         } else
 561                                 ssz = strcspn(word, "\\");
 562                         encode(p, word, ssz);
 563                         word += (int)ssz;
 564                         continue;
 565                 }
 566
 567                 word++;
 568                 esc = mandoc_escape(&word, &seq, &sz);
 569                 switch (esc) {
 570                 case ESCAPE_UNICODE:
 571                         uc = mchars_num2uc(seq + 1, sz - 1);
 572                         break;
 573                 case ESCAPE_NUMBERED:
 574                         uc = mchars_num2char(seq, sz);
 575                         if (uc < 0)
 576                                 continue;
 577                         break;
 578                 case ESCAPE_SPECIAL:
 579                         if (p->enc == TERMENC_ASCII) {
 580                                 cp = mchars_spec2str(seq, sz, &ssz);
 581                                 if (cp != NULL)
 582                                         encode(p, cp, ssz);
 583                         } else {
 584                                 uc = mchars_spec2cp(seq, sz);
 585                                 if (uc > 0)
 586                                         encode1(p, uc);
 587                         }
 588                         continue;
 589                 case ESCAPE_UNDEF:
 590                         uc = *seq;
 591                         break;
 592                 case ESCAPE_FONTBOLD:
 593                         term_fontrepl(p, TERMFONT_BOLD);
 594                         continue;
 595                 case ESCAPE_FONTITALIC:
 596                         term_fontrepl(p, TERMFONT_UNDER);
 597                         continue;
 598                 case ESCAPE_FONTBI:
 599                         term_fontrepl(p, TERMFONT_BI);
 600                         continue;
 601                 case ESCAPE_FONT:
 602                 case ESCAPE_FONTCW:
 603                 case ESCAPE_FONTROMAN:
 604                         term_fontrepl(p, TERMFONT_NONE);
 605                         continue;
 606                 case ESCAPE_FONTPREV:
 607                         term_fontlast(p);
 608                         continue;
 609                 case ESCAPE_BREAK:
 610                         bufferc(p, '\n');
 611                         continue;
 612                 case ESCAPE_NOSPACE:
 613                         if (p->flags & TERMP_BACKAFTER)
 614                                 p->flags &= ~TERMP_BACKAFTER;
 615                         else if (*word == '\0')
 616                                 p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
 617                         continue;
 618                 case ESCAPE_DEVICE:
 619                         if (p->type == TERMTYPE_PDF)
 620                                 encode(p, "pdf", 3);
 621                         else if (p->type == TERMTYPE_PS)
 622                                 encode(p, "ps", 2);
 623                         else if (p->enc == TERMENC_ASCII)
 624                                 encode(p, "ascii", 5);
 625                         else
 626                                 encode(p, "utf8", 4);
 627                         continue;
 628                 case ESCAPE_HORIZ:
 629                         if (*seq == '|') {
 630                                 seq++;
 631                                 uc = -p->col;
 632                         } else
 633                                 uc = 0;
 634                         if (a2roffsu(seq, &su, SCALE_EM) == NULL)
 635                                 continue;
 636                         uc += term_hen(p, &su);
 637                         if (uc > 0)
 638                                 while (uc-- > 0)
 639                                         bufferc(p, ASCII_NBRSP);
 640                         else if (p->col > (size_t)(-uc))
 641                                 p->col += uc;
 642                         else {
 643                                 uc += p->col;
 644                                 p->col = 0;
 645                                 if (p->tcol->offset > (size_t)(-uc)) {
 646                                         p->ti += uc;
 647                                         p->tcol->offset += uc;
 648                                 } else {
 649                                         p->ti -= p->tcol->offset;
 650                                         p->tcol->offset = 0;
 651                                 }
 652                         }
 653                         continue;
 654                 case ESCAPE_HLINE:
 655                         if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL)
 656                                 continue;
 657                         uc = term_hen(p, &su);
 658                         if (uc <= 0) {
 659                                 if (p->tcol->rmargin <= p->tcol->offset)
 660                                         continue;
 661                                 lsz = p->tcol->rmargin - p->tcol->offset;
 662                         } else
 663                                 lsz = uc;
 664                         if (*cp == seq[-1])
 665                                 uc = -1;
 666                         else if (*cp == '\\') {
 667                                 seq = cp + 1;
 668                                 esc = mandoc_escape(&seq, &cp, &sz);
 669                                 switch (esc) {
 670                                 case ESCAPE_UNICODE:
 671                                         uc = mchars_num2uc(cp + 1, sz - 1);
 672                                         break;
 673                                 case ESCAPE_NUMBERED:
 674                                         uc = mchars_num2char(cp, sz);
 675                                         break;
 676                                 case ESCAPE_SPECIAL:
 677                                         uc = mchars_spec2cp(cp, sz);
 678                                         break;
 679                                 case ESCAPE_UNDEF:
 680                                         uc = *seq;
 681                                         break;
 682                                 default:
 683                                         uc = -1;
 684                                         break;
 685                                 }
 686                         } else
 687                                 uc = *cp;
 688                         if (uc < 0x20 || (uc > 0x7E && uc < 0xA0))
 689                                 uc = '_';
 690                         if (p->enc == TERMENC_ASCII) {
 691                                 cp = ascii_uc2str(uc);
 692                                 csz = term_strlen(p, cp);
 693                                 ssz = strlen(cp);
 694                         } else
 695                                 csz = (*p->width)(p, uc);
 696                         while (lsz >= csz) {
 697                                 if (p->enc == TERMENC_ASCII)
 698                                         encode(p, cp, ssz);
 699                                 else
 700                                         encode1(p, uc);
 701                                 lsz -= csz;
 702                         }
 703                         continue;
 704                 case ESCAPE_SKIPCHAR:
 705                         p->flags |= TERMP_BACKAFTER;
 706                         continue;
 707                 case ESCAPE_OVERSTRIKE:
 708                         cp = seq + sz;
 709                         while (seq < cp) {
 710                                 if (*seq == '\\') {
 711                                         mandoc_escape(&seq, NULL, NULL);
 712                                         continue;
 713                                 }
 714                                 encode1(p, *seq++);
 715                                 if (seq < cp) {
 716                                         if (p->flags & TERMP_BACKBEFORE)
 717                                                 p->flags |= TERMP_BACKAFTER;
 718                                         else
 719                                                 p->flags |= TERMP_BACKBEFORE;
 720                                 }
 721                         }
 722                         /* Trim trailing backspace/blank pair. */
 723                         if (p->tcol->lastcol > 2 &&
 724                             (p->tcol->buf[p->tcol->lastcol - 1] == ' ' ||
 725                              p->tcol->buf[p->tcol->lastcol - 1] == '\t'))
 726                                 p->tcol->lastcol -= 2;
 727                         if (p->col > p->tcol->lastcol)
 728                                 p->col = p->tcol->lastcol;
 729                         continue;
 730                 default:
 731                         continue;
 732                 }
 733
 734                 /*
 735                  * Common handling for Unicode and numbered
 736                  * character escape sequences.
 737                  */
 738
 739                 if (p->enc == TERMENC_ASCII) {
 740                         cp = ascii_uc2str(uc);
 741                         encode(p, cp, strlen(cp));
 742                 } else {
 743                         if ((uc < 0x20 && uc != 0x09) ||
 744                             (uc > 0x7E && uc < 0xA0))
 745                                 uc = 0xFFFD;
 746                         encode1(p, uc);
 747                 }
 748         }
 749         p->flags &= ~TERMP_NBRWORD;
 750 }
 751
 752 static void
 753 adjbuf(struct termp_col *c, size_t sz)
 754 {
 755         if (c->maxcols == 0)
 756                 c->maxcols = 1024;
 757         while (c->maxcols <= sz)
 758                 c->maxcols <<= 2;
 759         c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf));
 760 }
 761
 762 static void
 763 bufferc(struct termp *p, char c)
 764 {
 765         if (p->flags & TERMP_NOBUF) {
 766                 (*p->letter)(p, c);
 767                 return;
 768         }
 769         if (p->col + 1 >= p->tcol->maxcols)
 770                 adjbuf(p->tcol, p->col + 1);
 771         if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
 772                 p->tcol->buf[p->col] = c;
 773         if (p->tcol->lastcol < ++p->col)
 774                 p->tcol->lastcol = p->col;
 775 }
 776
 777 /*
 778  * See encode().
 779  * Do this for a single (probably unicode) value.
 780  * Does not check for non-decorated glyphs.
 781  */
 782 static void
 783 encode1(struct termp *p, int c)
 784 {
 785         enum termfont     f;
 786
 787         if (p->flags & TERMP_NOBUF) {
 788                 (*p->letter)(p, c);
 789                 return;
 790         }
 791
 792         if (p->col + 7 >= p->tcol->maxcols)
 793                 adjbuf(p->tcol, p->col + 7);
 794
 795         f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ?
 796             p->fontq[p->fonti] : TERMFONT_NONE;
 797
 798         if (p->flags & TERMP_BACKBEFORE) {
 799                 if (p->tcol->buf[p->col - 1] == ' ' ||
 800                     p->tcol->buf[p->col - 1] == '\t')
 801                         p->col--;
 802                 else
 803                         p->tcol->buf[p->col++] = '\b';
 804                 p->flags &= ~TERMP_BACKBEFORE;
 805         }
 806         if (f == TERMFONT_UNDER || f == TERMFONT_BI) {
 807                 p->tcol->buf[p->col++] = '_';
 808                 p->tcol->buf[p->col++] = '\b';
 809         }
 810         if (f == TERMFONT_BOLD || f == TERMFONT_BI) {
 811                 if (c == ASCII_HYPH)
 812                         p->tcol->buf[p->col++] = '-';
 813                 else
 814                         p->tcol->buf[p->col++] = c;
 815                 p->tcol->buf[p->col++] = '\b';
 816         }
 817         if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
 818                 p->tcol->buf[p->col] = c;
 819         if (p->tcol->lastcol < ++p->col)
 820                 p->tcol->lastcol = p->col;
 821         if (p->flags & TERMP_BACKAFTER) {
 822                 p->flags |= TERMP_BACKBEFORE;
 823                 p->flags &= ~TERMP_BACKAFTER;
 824         }
 825 }
 826
 827 static void
 828 encode(struct termp *p, const char *word, size_t sz)
 829 {
 830         size_t            i;
 831
 832         if (p->flags & TERMP_NOBUF) {
 833                 for (i = 0; i < sz; i++)
 834                         (*p->letter)(p, word[i]);
 835                 return;
 836         }
 837
 838         if (p->col + 2 + (sz * 5) >= p->tcol->maxcols)
 839                 adjbuf(p->tcol, p->col + 2 + (sz * 5));
 840
 841         for (i = 0; i < sz; i++) {
 842                 if (ASCII_HYPH == word[i] ||
 843                     isgraph((unsigned char)word[i]))
 844                         encode1(p, word[i]);
 845                 else {
 846                         if (p->tcol->lastcol <= p->col ||
 847                             (word[i] != ' ' && word[i] != ASCII_NBRSP))
 848                                 p->tcol->buf[p->col] = word[i];
 849                         p->col++;
 850
 851                         /*
 852                          * Postpone the effect of \z while handling
 853                          * an overstrike sequence from ascii_uc2str().
 854                          */
 855
 856                         if (word[i] == '\b' &&
 857                             (p->flags & TERMP_BACKBEFORE)) {
 858                                 p->flags &= ~TERMP_BACKBEFORE;
 859                                 p->flags |= TERMP_BACKAFTER;
 860                         }
 861                 }
 862         }
 863         if (p->tcol->lastcol < p->col)
 864                 p->tcol->lastcol = p->col;
 865 }
 866
 867 void
 868 term_setwidth(struct termp *p, const char *wstr)
 869 {
 870         struct roffsu    su;
 871         int              iop, width;
 872
 873         iop = 0;
 874         width = 0;
 875         if (NULL != wstr) {
 876                 switch (*wstr) {
 877                 case '+':
 878                         iop = 1;
 879                         wstr++;
 880                         break;
 881                 case '-':
 882                         iop = -1;
 883                         wstr++;
 884                         break;
 885                 default:
 886                         break;
 887                 }
 888                 if (a2roffsu(wstr, &su, SCALE_MAX) != NULL)
 889                         width = term_hspan(p, &su);
 890                 else
 891                         iop = 0;
 892         }
 893         (*p->setwidth)(p, iop, width);
 894 }
 895
 896 size_t
 897 term_len(const struct termp *p, size_t sz)
 898 {
 899
 900         return (*p->width)(p, ' ') * sz;
 901 }
 902
 903 static size_t
 904 cond_width(const struct termp *p, int c, int *skip)
 905 {
 906
 907         if (*skip) {
 908                 (*skip) = 0;
 909                 return 0;
 910         } else
 911                 return (*p->width)(p, c);
 912 }
 913
 914 size_t
 915 term_strlen(const struct termp *p, const char *cp)
 916 {
 917         size_t           sz, rsz, i;
 918         int              ssz, skip, uc;
 919         const char      *seq, *rhs;
 920         enum mandoc_esc  esc;
 921         static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
 922                         ASCII_BREAK, '\0' };
 923
 924         /*
 925          * Account for escaped sequences within string length
 926          * calculations.  This follows the logic in term_word() as we
 927          * must calculate the width of produced strings.
 928          */
 929
 930         sz = 0;
 931         skip = 0;
 932         while ('\0' != *cp) {
 933                 rsz = strcspn(cp, rej);
 934                 for (i = 0; i < rsz; i++)
 935                         sz += cond_width(p, *cp++, &skip);
 936
 937                 switch (*cp) {
 938                 case '\\':
 939                         cp++;
 940                         rhs = NULL;
 941                         esc = mandoc_escape(&cp, &seq, &ssz);
 942                         switch (esc) {
 943                         case ESCAPE_UNICODE:
 944                                 uc = mchars_num2uc(seq + 1, ssz - 1);
 945                                 break;
 946                         case ESCAPE_NUMBERED:
 947                                 uc = mchars_num2char(seq, ssz);
 948                                 if (uc < 0)
 949                                         continue;
 950                                 break;
 951                         case ESCAPE_SPECIAL:
 952                                 if (p->enc == TERMENC_ASCII) {
 953                                         rhs = mchars_spec2str(seq, ssz, &rsz);
 954                                         if (rhs != NULL)
 955                                                 break;
 956                                 } else {
 957                                         uc = mchars_spec2cp(seq, ssz);
 958                                         if (uc > 0)
 959                                                 sz += cond_width(p, uc, &skip);
 960                                 }
 961                                 continue;
 962                         case ESCAPE_UNDEF:
 963                                 uc = *seq;
 964                                 break;
 965                         case ESCAPE_DEVICE:
 966                                 if (p->type == TERMTYPE_PDF) {
 967                                         rhs = "pdf";
 968                                         rsz = 3;
 969                                 } else if (p->type == TERMTYPE_PS) {
 970                                         rhs = "ps";
 971                                         rsz = 2;
 972                                 } else if (p->enc == TERMENC_ASCII) {
 973                                         rhs = "ascii";
 974                                         rsz = 5;
 975                                 } else {
 976                                         rhs = "utf8";
 977                                         rsz = 4;
 978                                 }
 979                                 break;
 980                         case ESCAPE_SKIPCHAR:
 981                                 skip = 1;
 982                                 continue;
 983                         case ESCAPE_OVERSTRIKE:
 984                                 rsz = 0;
 985                                 rhs = seq + ssz;
 986                                 while (seq < rhs) {
 987                                         if (*seq == '\\') {
 988                                                 mandoc_escape(&seq, NULL, NULL);
 989                                                 continue;
 990                                         }
 991                                         i = (*p->width)(p, *seq++);
 992                                         if (rsz < i)
 993                                                 rsz = i;
 994                                 }
 995                                 sz += rsz;
 996                                 continue;
 997                         default:
 998                                 continue;
 999                         }
1000
1001                         /*
1002                          * Common handling for Unicode and numbered
1003                          * character escape sequences.
1004                          */
1005
1006                         if (rhs == NULL) {
1007                                 if (p->enc == TERMENC_ASCII) {
1008                                         rhs = ascii_uc2str(uc);
1009                                         rsz = strlen(rhs);
1010                                 } else {
1011                                         if ((uc < 0x20 && uc != 0x09) ||
1012                                             (uc > 0x7E && uc < 0xA0))
1013                                                 uc = 0xFFFD;
1014                                         sz += cond_width(p, uc, &skip);
1015                                         continue;
1016                                 }
1017                         }
1018
1019                         if (skip) {
1020                                 skip = 0;
1021                                 break;
1022                         }
1023
1024                         /*
1025                          * Common handling for all escape sequences
1026                          * printing more than one character.
1027                          */
1028
1029                         for (i = 0; i < rsz; i++)
1030                                 sz += (*p->width)(p, *rhs++);
1031                         break;
1032                 case ASCII_NBRSP:
1033                         sz += cond_width(p, ' ', &skip);
1034                         cp++;
1035                         break;
1036                 case ASCII_HYPH:
1037                         sz += cond_width(p, '-', &skip);
1038                         cp++;
1039                         break;
1040                 default:
1041                         break;
1042                 }
1043         }
1044
1045         return sz;
1046 }
1047
1048 int
1049 term_vspan(const struct termp *p, const struct roffsu *su)
1050 {
1051         double           r;
1052         int              ri;
1053
1054         switch (su->unit) {
1055         case SCALE_BU:
1056                 r = su->scale / 40.0;
1057                 break;
1058         case SCALE_CM:
1059                 r = su->scale * 6.0 / 2.54;
1060                 break;
1061         case SCALE_FS:
1062                 r = su->scale * 65536.0 / 40.0;
1063                 break;
1064         case SCALE_IN:
1065                 r = su->scale * 6.0;
1066                 break;
1067         case SCALE_MM:
1068                 r = su->scale * 0.006;
1069                 break;
1070         case SCALE_PC:
1071                 r = su->scale;
1072                 break;
1073         case SCALE_PT:
1074                 r = su->scale / 12.0;
1075                 break;
1076         case SCALE_EN:
1077         case SCALE_EM:
1078                 r = su->scale * 0.6;
1079                 break;
1080         case SCALE_VS:
1081                 r = su->scale;
1082                 break;
1083         default:
1084                 abort();
1085         }
1086         ri = r > 0.0 ? r + 0.4995 : r - 0.4995;
1087         return ri < 66 ? ri : 1;
1088 }
1089
1090 /*
1091  * Convert a scaling width to basic units, rounding towards 0.
1092  */
1093 int
1094 term_hspan(const struct termp *p, const struct roffsu *su)
1095 {
1096
1097         return (*p->hspan)(p, su);
1098 }
1099
1100 /*
1101  * Convert a scaling width to basic units, rounding to closest.
1102  */
1103 int
1104 term_hen(const struct termp *p, const struct roffsu *su)
1105 {
1106         int bu;
1107
1108         if ((bu = (*p->hspan)(p, su)) >= 0)
1109                 return (bu + 11) / 24;
1110         else
1111                 return -((-bu + 11) / 24);
1112 }