2 # ------------------------------------------------------------------------------
4 # Function: Format PDF Output from groff Markup
6 # Copyright (C) 2005, 2006, 2009 Free Software Foundation, Inc.
7 # Written by Keith Marshall (keith.d.marshall@ntlworld.com)
9 # This file is part of groff.
11 # groff is free software; you can redistribute it and/or modify it under
12 # the terms of the GNU General Public License as published by the Free
13 # Software Foundation, either version 3 of the License, or
14 # (at your option) any later version.
16 # groff is distributed in the hope that it will be useful, but WITHOUT ANY
17 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 # You should have received a copy of the GNU General Public License
22 # along with this program. If not, see <http://www.gnu.org/licenses/>.
24 # ------------------------------------------------------------------------------
26 # Set up an identifier for the NULL device.
27 # In most cases "/dev/null" will be correct, but some shells on
28 # MS-DOS/MS-Windows systems may require us to use "NUL".
31 test -c $NULLDEV || NULLDEV="NUL"
33 # Set up the command name to use in diagnostic messages.
34 # (We can't assume we have 'basename', so use the full path if required.
35 # Also use the 'exec 2>...' workaround for a bug in Cygwin's 'ash').
37 CMD=`exec 2>$NULLDEV; basename $0` || CMD=$0
39 # To ensure that prerequisite helper programs are available, and are
40 # executable, a [fairly] portable method of detecting such programs is
41 # provided by function `searchpath'.
45 # Usage: searchpath progname path
47 IFS=${PATH_SEPARATOR-":"} prog=':'
52 # try `progname' with all well known extensions
53 # (e.g. Win32 may require `progname.exe')
57 test -f "$try" && test -x "$try" && prog="$try" && break
59 test "$prog" = ":" || break
65 # If the system maps '/bin/sh' to some 'zsh' implementation,
66 # then we may need this hack, adapted from autoconf code.
68 test x${ZSH_VERSION+"set"} = x"set" && NULLCMD=":" \
69 && (emulate sh) >$NULLDEV 2>&1 && emulate sh
71 # We need both 'grep' and 'sed' programs, to parse script options,
72 # and we also need 'cat', to display help and some error messages,
73 # so ensure they are all installed, before we continue.
75 CAT=`searchpath cat "$PATH"`
76 GREP=`searchpath grep "$PATH"`
77 SED=`searchpath sed "$PATH"`
79 # Another fundamental requirement is the 'groff' program itself;
80 # we MUST use a 'groff' program located in 'GROFF_BIN_DIR', if this
81 # is specified; if not, we will search 'GROFF_BIN_PATH', only falling
82 # back to a 'PATH' search, if neither of these is specified.
84 if test -n "$GROFF_BIN_DIR"
87 GROFF=`searchpath groff "$GROFF_BIN_DIR"`
89 elif test -n "$GROFF_BIN_PATH"
92 GROFF=`searchpath groff "$GROFF_BIN_PATH"`
96 GROFF=`searchpath groff "$PATH"`
99 # If one or more of these is missing, diagnose and bail out.
101 NO='' NOPROG="$CMD: installation problem: cannot find program"
102 test "$CAT" = ":" && echo >&2 "$NOPROG 'cat' in PATH" && NO="$NO 'cat'"
103 test "$GREP" = ":" && echo >&2 "$NOPROG 'grep' in PATH" && NO="$NO 'grep'"
104 test "$GROFF" = ":" && echo >&2 "$NOPROG 'groff' in $GPATH" && NO="$NO 'groff'"
105 test "$SED" = ":" && echo >&2 "$NOPROG 'sed' in PATH" && NO="$NO 'sed'"
109 test $# -gt 1 && NO="s" IS="are" || NO='' IS="is"
112 test $# -gt 2 && NO="$NO $1,"
113 test $# -eq 2 && NO="$NO $1 and" && shift
114 test $# -lt 2 && NO="$NO $1"
119 *** FATAL INSTALLATION ERROR ***
121 The program$NO $IS required by '$CMD',
122 but cannot be found; '$CMD' is unable to continue.
128 # Identify the postprocessor command, for writing PDF output.
129 # (May be forced, by defining PDFROFF_POSTPROCESSOR_COMMAND in the environment;
130 # if this is not set, leave blank to use the built in default).
132 if test -n "${PDFROFF_POSTPROCESSOR_COMMAND}"
134 GROFF_GHOSTSCRIPT_INTERPRETER=`set command ${PDFROFF_POSTPROCESSOR_COMMAND};
138 # Set up temporary/intermediate file locations.
140 WRKFILE=${GROFF_TMPDIR=${TMPDIR-${TMP-${TEMP-"."}}}}/pdf$$.tmp
142 REFCOPY=${GROFF_TMPDIR}/pdf$$.cmp
143 REFFILE=${GROFF_TMPDIR}/pdf$$.ref
146 TC_DATA=${GROFF_TMPDIR}/pdf$$.tc
147 BD_DATA=${GROFF_TMPDIR}/pdf$$.ps
149 # Set a trap, to delete temporary files on exit.
150 # (FIXME: may want to include other signals, in released version).
152 trap "rm -f ${GROFF_TMPDIR}/pdf$$.*" 0
154 # Initialise 'groff' format control settings,
155 # to discriminate table of contents and document body formatting passes.
157 TOC_FORMAT="-rPHASE=1"
158 BODY_FORMAT="-rPHASE=2"
161 help reference-dictionary no-reference-dictionary
162 stylesheet pdf-output no-pdf-output
163 version report-progress no-toc-relocation
164 emit-ps keep-temporary-files no-kill-null-pages
166 # Parse the command line, to identify 'pdfroff' specific options.
167 # Collect all other parameters into new argument and file lists,
168 # to be passed on to 'groff', enforcing the '-Tps' option.
170 DIFF="" STREAM="" INPUT_FILES=""
171 SHOW_VERSION="" GROFF_STYLE="$GROFF -Tps"
176 # Long options must be processed locally ...
180 # First identify, matching any abbreviation to its full form.
182 MATCH="" OPTNAME=`IFS==; set dummy $1; echo $2`
185 MATCH="$MATCH"`echo --$OPT | $GREP "^$OPTNAME"`
188 # For options in the form --option=value
189 # capture any specified value into $OPTARG.
191 OPTARG=`echo $1 | $SED -n s?"^${OPTNAME}="??p`
193 # Perform case specific processing for matched option ...
199 Usage: $CMD [-option ...] [--long-option ...] [file ...]
204 Display this usage summary, and exit.
208 Display a version identification message and exit.
211 Enable console messages, indicating the progress of the
212 PDF document formatting process.
215 Emit PostScript output instead of PDF; this may be useful
216 when the ultimate PDF output is to be generated by a more
217 specialised postprocessor, (e.g. gpresent), rather than
218 the default GhostScript PDF writer.
221 Write the PDF, (or PostScript), output stream to file
222 'name'; if this option is unspecified, standard output
223 is used for PDF, (or PostScript), output.
226 Suppress the generation of PDF, (or PostScript), output
227 entirely; use this with the --reference-dictionary option,
228 if processing a document stream to produce only a
229 reference dictionary.
231 --no-reference-dictionary
232 Suppress the generation of a '$CMD' reference dictionary
233 for the PDF document. Normally '$CMD' will create a
234 reference dictionary, at the start of document processing;
235 this option can accelerate processing, if it is known in
236 advance, that no reference dictionary is required.
238 --reference-dictionary=name
239 Save the document reference dictionary in file 'name'.
240 If 'name' already exists, when processing commences, it
241 will be used as the base case, from which the updated
242 dictionary will be derived. If this option is not used,
243 then the reference dictionary, created during the normal
244 execution of '$CMD', will be deleted on completion of
248 Use the file 'name' as a 'groff' style sheet, to control
249 the appearance of the document's front cover section. If
250 this option is not specified, then no special formatting
251 is applied, to create a front cover section.
254 Suppress the multiple pass 'groff' processing, which is
255 normally required to position the table of contents at the
256 start of a PDF document.
259 Suppress the 'null page' elimination filter, which is used
260 to remove the excess blank pages produced by the collation
261 algorithm used for 'toc-relocation'.
263 --keep-temporary-files
264 Suppress the normal clean up of temporary files, which is
265 scheduled when 'pdfroff' completes.
272 GROFF_STYLE="$GROFF_STYLE \"$1\""
273 SHOW_VERSION="GNU pdfroff (groff) version @VERSION@"
280 --keep-temporary-files)
285 PDFROFF_POSTPROCESSOR_COMMAND="$CAT"
293 PDF_OUTPUT="$NULLDEV"
296 --reference-dictionary)
300 --no-reference-dictionary)
301 AWK=":" DIFF=":" REFFILE="$NULLDEV" REFCOPY="$NULLDEV"
305 STYLESHEET="$OPTARG" CS_DATA=${GROFF_TMPDIR}/pdf$$.cs
309 TC_DATA="" TOC_FORMAT="" BODY_FORMAT=""
312 --no-kill-null-pages)
313 PDFROFF_COLLATE="$CAT" PDFROFF_KILL_NULL_PAGES=""
316 # any other non-null match must have matched more than one defined case,
317 # so report the ambiguity, and bail out.
320 echo >&2 "$CMD: ambiguous abbreviation in option '$1'"
324 # while no match at all simply represents an undefined case.
327 echo >&2 "$CMD: unknown option '$1'"
333 # A solitary hyphen, as an argument, means "stream STDIN through groff",
334 # while the "-i" option means "append STDIN stream to specified input files",
335 # so set up a mechanism to achieve this, for ALL 'groff' passes.
338 STREAM="$CAT ${GROFF_TMPDIR}/pdf$$.in |"
339 test "$1" = "-" && INPUT_FILES="$INPUT_FILES $1" \
340 || GROFF_STYLE="$GROFF_STYLE $1"
343 # Those standard options which expect an argument, but are specified with
344 # an intervening space, between flag and argument, must be reparsed, so we
345 # can trap invalid use of '-T dev', or missing input files.
349 shift; set reparse "$OPTNAME$@"
352 # Among standard options, '-Tdev' is treated as a special case.
353 # '-Tps' is automatically enforced, so if specified, is silently ignored.
357 # No other '-Tdev' option is permitted.
359 -T*) echo >&2 "$CMD: option '$1' is incompatible with PDF output"
363 # '-h' and '-v' options redirect to their equivalent long forms ...
365 -h*) set redirect --help
368 -v*) shift; set redirect --version "$@"
371 # All other standard options are simply passed through to 'groff',
372 # with no validation beforehand.
374 -*) GROFF_STYLE="$GROFF_STYLE \"$1\""
377 # All non-option arguments are considered as possible input file names,
378 # and are passed on to 'groff', unaltered.
380 *) INPUT_FILES="$INPUT_FILES \"$1\""
386 # If the '-v' or '--version' option was specified,
387 # then we simply emulate the behaviour of 'groff', with this option,
390 if test -n "$SHOW_VERSION"
392 echo >&2 "$SHOW_VERSION"
393 echo >&2; eval $GROFF_STYLE $INPUT_FILES
397 # Establish how to invoke 'echo', suppressing the terminating newline.
398 # (Adapted from 'autoconf' code, as found in 'configure' scripts).
400 case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
401 *c*,*-n*) n='' c='' ;;
406 # If STDIN is specified among the input files,
407 # or if no input files are specified, then we need to capture STDIN,
408 # so we can replay it into each 'groff' processing pass.
410 test -z "$INPUT_FILES" && STREAM="$CAT ${GROFF_TMPDIR}/pdf$$.in |"
411 test -n "$STREAM" && $CAT > ${GROFF_TMPDIR}/pdf$$.in
413 # Unless reference resolution is explicitly suppressed,
414 # we initiate it by touching the cross reference dictionary file,
415 # and initialise the comparator, to kickstart the reference resolver loop.
421 echo kickstart > $REFCOPY
422 test x${SHOW_PROGRESS+"set"} = x"set" && SAY=echo
424 # In order to correctly resolve 'pdfmark' references,
425 # we need to have both the 'awk' and 'diff' programs available.
428 if test -n "$GROFF_AWK_INTERPRETER"
430 AWK="$GROFF_AWK_INTERPRETER"
431 test -f "$AWK" && test -x "$AWK" || AWK=":"
433 for prog in @GROFF_AWK_INTERPRETERS@
435 AWK=`searchpath $prog "$PATH"`
436 test "$AWK" = ":" || break
439 DIFF=`searchpath diff "$PATH"`
440 test "$AWK" = ":" && echo >&2 "$NOPROG 'awk' in PATH" && NO="$NO 'awk'"
441 test "$DIFF" = ":" && echo >&2 "$NOPROG 'diff' in PATH" && NO="$NO 'diff'"
445 SAY=":" AWK=":" DIFF=":"
446 test $# -gt 1 && NO="s $1 and $2 are" || NO=" $1 is"
451 The program$NO required, but cannot be found;
452 consequently, '$CMD' is unable to resolve 'pdfmark' references.
454 Document processing will continue, but no 'pdfmark' reference dictionary
455 will be compiled; if any 'pdfmark' reference appears in the resulting PDF
456 document, the formatting may not be correct.
462 # Run the multi-pass 'pdfmark' reference resolver loop ...
464 $SAY >&2 $n Resolving references ..$c
465 until $DIFF $REFCOPY $REFFILE 1>$NULLDEV 2>&1
468 # until all references are resolved, to yield consistent values
469 # in each of two consecutive passes, or until it seems that no consistent
470 # resolution is achievable.
473 PASS_INDICATOR="${PASS_INDICATOR}."
474 if test "$PASS_INDICATOR" = "...."
477 # More than three passes required indicates a probable inconsistency
478 # in the source document; diagnose, and bail out.
482 $CMD: unable to resolve references consistently after three passes
483 $CMD: the source document may exhibit instability about the reference(s) ...
486 # Report the unresolved references, as a diff between the two pass files,
487 # preferring 'unified' or 'context' diffs, when available
490 $DIFF -c0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT='-c0'
491 $DIFF -u0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT='-u0'
492 $DIFF >&2 $DIFFOPT $REFCOPY $REFFILE
496 # Replace the comparison file copy from any previous pass,
497 # with the most recently updated copy of the reference dictionary.
498 # (Some versions of 'mv' may not support overwriting of an existing file,
499 # so remove the old comparison file first).
504 # Run 'groff' and 'awk', to identify reference marks in the document source,
505 # filtering them into the reference dictionary; discard incomplete 'groff' output
508 eval $STREAM $GROFF_STYLE -Z 1>$NULLDEV 2>$WRKFILE $REFCOPY $INPUT_FILES
509 $AWK '/^gropdf-info:href/ {$1 = ".pdfhref D -N"; print}' $WRKFILE > $REFFILE
514 # We MUST have resolved all 'pdfmark' references, such that the content of the
515 # updated reference dictionary file EXACTLY matches the last saved copy.
517 # If PDF output has been suppressed, then there is nothing more to do.
519 test "$PDF_OUTPUT" = "$NULLDEV" && exit 0
521 # We are now ready to start preparing the intermediate PostScript files,
522 # from which the PDF output will be compiled -- but before proceding further ...
523 # let's make sure we have a GhostScript interpreter to convert them!
525 if test -n "$GROFF_GHOSTSCRIPT_INTERPRETER"
527 GS="$GROFF_GHOSTSCRIPT_INTERPRETER"
528 test -f "$GS" && test -x "$GS" || GS=":"
530 for prog in @GROFF_GHOSTSCRIPT_INTERPRETERS@
532 GS=`searchpath $prog "$PATH"`
533 test "$GS" = ":" || break
537 # If we could not find a GhostScript interpreter, then we can do no more.
541 echo >&2 "$CMD: installation problem: cannot find GhostScript interpreter"
544 *** FATAL INSTALLATION ERROR ***
546 '$CMD' requires a GhostScript interpreter to convert PostScript to PDF.
547 Since you do not appear to have one installed, '$CMD' connot continue.
553 # We now extend the local copy of the reference dictionary file,
554 # to create a full 'pdfmark' reference map for the document ...
556 $AWK '/^grohtml-info/ {print ".pdfhref Z", $2, $3, $4}' $WRKFILE >> $REFCOPY
558 # Re-enable progress reporting, if necessary ...
559 # (Missing 'awk' or 'diff' may have disabled it, to avoid display
560 # of spurious messages associated with reference resolution).
562 test x${SHOW_PROGRESS+"set"} = x"set" && SAY=echo
564 # If a document cover style sheet is specified ...
565 # then we run a special formatting pass, to create a cover section file.
567 if test -n "$STYLESHEET"
570 CS_MACRO=${CS_MACRO-"CS"} CE_MACRO=${CE_MACRO-"CE"}
571 $SAY >&2 $n "Formatting document ... front cover section ..$c"
572 CS_FILTER="$STREAM $SED -n '/${DOT}${CS_MACRO}/,/${DOT}${CE_MACRO}/p'"
573 eval $CS_FILTER $INPUT_FILES | eval $GROFF_STYLE $STYLESHEET - > $CS_DATA
577 # If table of contents relocation is to be performed (it is, by default),
578 # then we run an extra 'groff' pass, to format a TOC intermediate file.
580 if test -n "$TC_DATA"
582 $SAY >&2 $n "Formatting document ... table of contents ..$c"
583 eval $STREAM $GROFF_STYLE $TOC_FORMAT $REFCOPY $INPUT_FILES > $TC_DATA
587 # In all cases, a final 'groff' pass is required, to format the document body.
589 $SAY >&2 $n "Formatting document ... body section ..$c"
590 eval $STREAM $GROFF_STYLE $BODY_FORMAT $REFCOPY $INPUT_FILES > $BD_DATA
594 # Invoke GhostScript as a PDF writer, to bind all of the generated
595 # PostScript intermediate files into a single PDF output file.
597 $SAY >&2 $n "Writing PDF output ..$c"
598 if test -z "$PDFROFF_POSTPROCESSOR_COMMAND"
600 PDFROFF_POSTPROCESSOR_COMMAND="$GS -dQUIET -dBATCH -dNOPAUSE
601 -sDEVICE=pdfwrite -sOutputFile="${PDF_OUTPUT-"-"}
603 elif test -n "$PDF_OUTPUT"
608 # (This 'sed' script is a hack, to eliminate redundant blank pages).
610 ${PDFROFF_COLLATE-"$SED"} ${PDFROFF_KILL_NULL_PAGES-'
613 /%%BeginPageSetup/b again
617 /%%EndPageSetup/b finish
625 /^%%Page:.*\n0 Cg EP$/d
626 '} $TC_DATA $BD_DATA | $PDFROFF_POSTPROCESSOR_COMMAND $CS_DATA -
629 # ------------------------------------------------------------------------------
630 # $RCSfile: pdfroff.sh,v $ $Revision: 1.15 $: end of file