groff: update vendor branch to v1.20.1
[dragonfly.git] / contrib / groff / src / preproc / refer / refer.cpp
CommitLineData
92d0a6a6 1// -*- C++ -*-
4d3e9548 2/* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004, 2006, 2009
465b256c 3 Free Software Foundation, Inc.
92d0a6a6
JR
4 Written by James Clark (jjc@jclark.com)
5
6This file is part of groff.
7
8groff is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
4d3e9548
JL
10Software Foundation, either version 3 of the License, or
11(at your option) any later version.
92d0a6a6
JR
12
13groff is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
4d3e9548
JL
18You should have received a copy of the GNU General Public License
19along with this program. If not, see <http://www.gnu.org/licenses/>. */
92d0a6a6
JR
20
21#include "refer.h"
22#include "refid.h"
23#include "ref.h"
24#include "token.h"
25#include "search.h"
26#include "command.h"
27
28extern "C" const char *Version_string;
29
30const char PRE_LABEL_MARKER = '\013';
31const char POST_LABEL_MARKER = '\014';
32const char LABEL_MARKER = '\015'; // label_type is added on
33
34#define FORCE_LEFT_BRACKET 04
35#define FORCE_RIGHT_BRACKET 010
36
37static FILE *outfp = stdout;
38
39string capitalize_fields;
40string reverse_fields;
41string abbreviate_fields;
42string period_before_last_name = ". ";
43string period_before_initial = ".";
44string period_before_hyphen = "";
45string period_before_other = ". ";
46string sort_fields;
47int annotation_field = -1;
48string annotation_macro;
49string discard_fields = "XYZ";
50string pre_label = "\\*([.";
51string post_label = "\\*(.]";
52string sep_label = ", ";
4d3e9548 53int have_bibliography = 0;
92d0a6a6
JR
54int accumulate = 0;
55int move_punctuation = 0;
56int abbreviate_label_ranges = 0;
57string label_range_indicator;
58int label_in_text = 1;
59int label_in_reference = 1;
60int date_as_label = 0;
61int sort_adjacent_labels = 0;
62// Join exactly two authors with this.
63string join_authors_exactly_two = " and ";
64// When there are more than two authors join the last two with this.
65string join_authors_last_two = ", and ";
66// Otherwise join authors with this.
67string join_authors_default = ", ";
68string separate_label_second_parts = ", ";
69// Use this string to represent that there are other authors.
70string et_al = " et al";
71// Use et al only if it can replace at least this many authors.
72int et_al_min_elide = 2;
73// Use et al only if the total number of authors is at least this.
74int et_al_min_total = 3;
75
76
77int compatible_flag = 0;
78
79int short_label_flag = 0;
80
81static int recognize_R1_R2 = 1;
82
83search_list database_list;
84int search_default = 1;
85static int default_database_loaded = 0;
86
87static reference **citation = 0;
88static int ncitations = 0;
89static int citation_max = 0;
90
91static reference **reference_hash_table = 0;
92static int hash_table_size;
93static int nreferences = 0;
94
95static int need_syncing = 0;
96string pending_line;
97string pending_lf_lines;
98
99static void output_pending_line();
100static unsigned immediately_handle_reference(const string &);
101static void immediately_output_references();
102static unsigned store_reference(const string &);
103static void divert_to_temporary_file();
104static reference *make_reference(const string &, unsigned *);
105static void usage(FILE *stream);
106static void do_file(const char *);
107static void split_punct(string &line, string &punct);
108static void output_citation_group(reference **v, int n, label_type, FILE *fp);
109static void possibly_load_default_database();
110
111int main(int argc, char **argv)
112{
113 program_name = argv[0];
114 static char stderr_buf[BUFSIZ];
115 setbuf(stderr, stderr_buf);
116 outfp = stdout;
117 int finished_options = 0;
118 int bib_flag = 0;
119 int done_spec = 0;
120
121 for (--argc, ++argv;
122 !finished_options && argc > 0 && argv[0][0] == '-'
123 && argv[0][1] != '\0';
124 argv++, argc--) {
125 const char *opt = argv[0] + 1;
126 while (opt != 0 && *opt != '\0') {
127 switch (*opt) {
128 case 'C':
129 compatible_flag = 1;
130 opt++;
131 break;
132 case 'B':
133 bib_flag = 1;
134 label_in_reference = 0;
135 label_in_text = 0;
136 ++opt;
137 if (*opt == '\0') {
138 annotation_field = 'X';
139 annotation_macro = "AP";
140 }
141 else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
142 annotation_field = opt[0];
143 annotation_macro = opt + 2;
144 }
145 opt = 0;
146 break;
147 case 'P':
148 move_punctuation = 1;
149 opt++;
150 break;
151 case 'R':
152 recognize_R1_R2 = 0;
153 opt++;
154 break;
155 case 'S':
156 // Not a very useful spec.
157 set_label_spec("(A.n|Q)', '(D.y|D)");
158 done_spec = 1;
159 pre_label = " (";
160 post_label = ")";
161 sep_label = "; ";
162 opt++;
163 break;
164 case 'V':
165 verify_flag = 1;
166 opt++;
167 break;
168 case 'f':
169 {
170 const char *num = 0;
171 if (*++opt == '\0') {
172 if (argc > 1) {
173 num = *++argv;
174 --argc;
175 }
176 else {
177 error("option `f' requires an argument");
178 usage(stderr);
179 exit(1);
180 }
181 }
182 else {
183 num = opt;
184 opt = 0;
185 }
186 const char *ptr;
187 for (ptr = num; *ptr; ptr++)
188 if (!csdigit(*ptr)) {
189 error("bad character `%1' in argument to -f option", *ptr);
190 break;
191 }
192 if (*ptr == '\0') {
193 string spec;
194 spec = '%';
195 spec += num;
196 spec += '\0';
197 set_label_spec(spec.contents());
198 done_spec = 1;
199 }
200 break;
201 }
202 case 'b':
203 label_in_text = 0;
204 label_in_reference = 0;
205 opt++;
206 break;
207 case 'e':
208 accumulate = 1;
209 opt++;
210 break;
211 case 'c':
212 capitalize_fields = ++opt;
213 opt = 0;
214 break;
215 case 'k':
216 {
217 char buf[5];
218 if (csalpha(*++opt))
219 buf[0] = *opt++;
220 else {
221 if (*opt != '\0')
222 error("bad field name `%1'", *opt++);
223 buf[0] = 'L';
224 }
225 buf[1] = '~';
226 buf[2] = '%';
227 buf[3] = 'a';
228 buf[4] = '\0';
229 set_label_spec(buf);
230 done_spec = 1;
231 }
232 break;
233 case 'a':
234 {
235 const char *ptr;
236 for (ptr = ++opt; *ptr; ptr++)
237 if (!csdigit(*ptr)) {
238 error("argument to `a' option not a number");
239 break;
240 }
241 if (*ptr == '\0') {
242 reverse_fields = 'A';
243 reverse_fields += opt;
244 }
245 opt = 0;
246 }
247 break;
248 case 'i':
249 linear_ignore_fields = ++opt;
250 opt = 0;
251 break;
252 case 'l':
253 {
254 char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
255 strcpy(buf, "A.n");
256 if (*++opt != '\0' && *opt != ',') {
257 char *ptr;
258 long n = strtol(opt, &ptr, 10);
259 if (n == 0 && ptr == opt) {
260 error("bad integer `%1' in `l' option", opt);
261 opt = 0;
262 break;
263 }
264 if (n < 0)
265 n = 0;
266 opt = ptr;
267 sprintf(strchr(buf, '\0'), "+%ld", n);
268 }
269 strcat(buf, "D.y");
270 if (*opt == ',')
271 opt++;
272 if (*opt != '\0') {
273 char *ptr;
274 long n = strtol(opt, &ptr, 10);
275 if (n == 0 && ptr == opt) {
276 error("bad integer `%1' in `l' option", opt);
277 opt = 0;
278 break;
279 }
280 if (n < 0)
281 n = 0;
282 sprintf(strchr(buf, '\0'), "-%ld", n);
283 opt = ptr;
284 if (*opt != '\0')
285 error("argument to `l' option not of form `m,n'");
286 }
287 strcat(buf, "%a");
288 if (!set_label_spec(buf))
289 assert(0);
290 done_spec = 1;
291 }
292 break;
293 case 'n':
294 search_default = 0;
295 opt++;
296 break;
297 case 'p':
298 {
299 const char *filename = 0;
300 if (*++opt == '\0') {
301 if (argc > 1) {
302 filename = *++argv;
303 argc--;
304 }
305 else {
306 error("option `p' requires an argument");
307 usage(stderr);
308 exit(1);
309 }
310 }
311 else {
312 filename = opt;
313 opt = 0;
314 }
315 database_list.add_file(filename);
316 }
317 break;
318 case 's':
319 if (*++opt == '\0')
320 sort_fields = "AD";
321 else {
322 sort_fields = opt;
323 opt = 0;
324 }
325 accumulate = 1;
326 break;
327 case 't':
328 {
329 char *ptr;
330 long n = strtol(opt, &ptr, 10);
331 if (n == 0 && ptr == opt) {
332 error("bad integer `%1' in `t' option", opt);
333 opt = 0;
334 break;
335 }
336 if (n < 1)
337 n = 1;
338 linear_truncate_len = int(n);
339 opt = ptr;
340 break;
341 }
342 case '-':
343 if (opt[1] == '\0') {
344 finished_options = 1;
345 opt++;
346 break;
347 }
348 if (strcmp(opt,"-version")==0) {
349 case 'v':
350 printf("GNU refer (groff) version %s\n", Version_string);
351 exit(0);
352 break;
353 }
354 if (strcmp(opt,"-help")==0) {
355 usage(stdout);
356 exit(0);
357 break;
358 }
359 // fall through
360 default:
361 error("unrecognized option `%1'", *opt);
362 usage(stderr);
363 exit(1);
364 break;
365 }
366 }
367 }
368 if (!done_spec)
369 set_label_spec("%1");
370 if (argc <= 0) {
371 if (bib_flag)
372 do_bib("-");
373 else
374 do_file("-");
375 }
376 else {
377 for (int i = 0; i < argc; i++) {
378 if (bib_flag)
379 do_bib(argv[i]);
380 else
381 do_file(argv[i]);
382 }
383 }
384 if (accumulate)
385 output_references();
386 if (fflush(stdout) < 0)
387 fatal("output error");
388 return 0;
389}
390
391static void usage(FILE *stream)
392{
393 fprintf(stream,
394"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
395" [-sXYZ] [-tN] [-BL.M] [files ...]\n",
396 program_name);
397}
398
399static void possibly_load_default_database()
400{
401 if (search_default && !default_database_loaded) {
402 char *filename = getenv("REFER");
403 if (filename)
404 database_list.add_file(filename);
405 else
406 database_list.add_file(DEFAULT_INDEX, 1);
407 default_database_loaded = 1;
408 }
409}
410
411static int is_list(const string &str)
412{
413 const char *start = str.contents();
414 const char *end = start + str.length();
415 while (end > start && csspace(end[-1]))
416 end--;
417 while (start < end && csspace(*start))
418 start++;
419 return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
420}
421
422static void do_file(const char *filename)
423{
424 FILE *fp;
425 if (strcmp(filename, "-") == 0) {
426 fp = stdin;
427 }
428 else {
429 errno = 0;
430 fp = fopen(filename, "r");
431 if (fp == 0) {
432 error("can't open `%1': %2", filename, strerror(errno));
433 return;
434 }
435 }
436 current_filename = filename;
437 fprintf(outfp, ".lf 1 %s\n", filename);
438 string line;
439 current_lineno = 0;
440 for (;;) {
441 line.clear();
442 for (;;) {
443 int c = getc(fp);
444 if (c == EOF) {
445 if (line.length() > 0)
446 line += '\n';
447 break;
448 }
449 if (invalid_input_char(c))
450 error("invalid input character code %1", c);
451 else {
452 line += c;
453 if (c == '\n')
454 break;
455 }
456 }
457 int len = line.length();
458 if (len == 0)
459 break;
460 current_lineno++;
461 if (len >= 2 && line[0] == '.' && line[1] == '[') {
462 int start_lineno = current_lineno;
463 int start_of_line = 1;
464 string str;
465 string post;
466 string pre(line.contents() + 2, line.length() - 3);
467 for (;;) {
468 int c = getc(fp);
469 if (c == EOF) {
470 error_with_file_and_line(current_filename, start_lineno,
471 "missing `.]' line");
472 break;
473 }
474 if (start_of_line)
475 current_lineno++;
476 if (start_of_line && c == '.') {
477 int d = getc(fp);
478 if (d == ']') {
479 while ((d = getc(fp)) != '\n' && d != EOF) {
480 if (invalid_input_char(d))
481 error("invalid input character code %1", d);
482 else
483 post += d;
484 }
485 break;
486 }
487 if (d != EOF)
488 ungetc(d, fp);
489 }
490 if (invalid_input_char(c))
491 error("invalid input character code %1", c);
492 else
493 str += c;
494 start_of_line = (c == '\n');
495 }
496 if (is_list(str)) {
497 output_pending_line();
498 if (accumulate)
499 output_references();
500 else
501 error("found `$LIST$' but not accumulating references");
502 }
503 else {
504 unsigned flags = (accumulate
505 ? store_reference(str)
506 : immediately_handle_reference(str));
507 if (label_in_text) {
508 if (accumulate && outfp == stdout)
509 divert_to_temporary_file();
510 if (pending_line.length() == 0) {
511 warning("can't attach citation to previous line");
512 }
513 else
514 pending_line.set_length(pending_line.length() - 1);
515 string punct;
516 if (move_punctuation)
517 split_punct(pending_line, punct);
518 int have_text = pre.length() > 0 || post.length() > 0;
519 label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
520 |FORCE_RIGHT_BRACKET));
521 if ((flags & FORCE_LEFT_BRACKET) || !have_text)
522 pending_line += PRE_LABEL_MARKER;
523 pending_line += pre;
524 char lm = LABEL_MARKER + (int)lt;
525 pending_line += lm;
526 pending_line += post;
527 if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
528 pending_line += POST_LABEL_MARKER;
529 pending_line += punct;
530 pending_line += '\n';
531 }
532 }
533 need_syncing = 1;
534 }
535 else if (len >= 4
536 && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
537 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
538 pending_lf_lines += line;
539 line += '\0';
540 if (interpret_lf_args(line.contents() + 3))
541 current_lineno--;
542 }
543 else if (recognize_R1_R2
544 && len >= 4
545 && line[0] == '.' && line[1] == 'R' && line[2] == '1'
546 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
547 line.clear();
548 int start_of_line = 1;
549 int start_lineno = current_lineno;
550 for (;;) {
551 int c = getc(fp);
552 if (c != EOF && start_of_line)
553 current_lineno++;
554 if (start_of_line && c == '.') {
555 c = getc(fp);
556 if (c == 'R') {
557 c = getc(fp);
558 if (c == '2') {
559 c = getc(fp);
560 if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
561 while (c != EOF && c != '\n')
562 c = getc(fp);
563 break;
564 }
565 else {
566 line += '.';
567 line += 'R';
568 line += '2';
569 }
570 }
571 else {
572 line += '.';
573 line += 'R';
574 }
575 }
576 else
577 line += '.';
578 }
579 if (c == EOF) {
580 error_with_file_and_line(current_filename, start_lineno,
581 "missing `.R2' line");
582 break;
583 }
584 if (invalid_input_char(c))
585 error("invalid input character code %1", int(c));
586 else {
587 line += c;
588 start_of_line = c == '\n';
589 }
590 }
591 output_pending_line();
592 if (accumulate)
593 output_references();
594 else
595 nreferences = 0;
596 process_commands(line, current_filename, start_lineno + 1);
597 need_syncing = 1;
598 }
599 else {
600 output_pending_line();
601 pending_line = line;
602 }
603 }
604 need_syncing = 0;
605 output_pending_line();
606 if (fp != stdin)
607 fclose(fp);
608}
609
610class label_processing_state {
611 enum {
612 NORMAL,
613 PENDING_LABEL,
614 PENDING_LABEL_POST,
615 PENDING_LABEL_POST_PRE,
616 PENDING_POST
617 } state;
618 label_type type; // type of pending labels
619 int count; // number of pending labels
620 reference **rptr; // pointer to next reference
621 int rcount; // number of references left
622 FILE *fp;
623 int handle_pending(int c);
624public:
625 label_processing_state(reference **, int, FILE *);
626 ~label_processing_state();
627 void process(int c);
628};
629
630static void output_pending_line()
631{
632 if (label_in_text && !accumulate && ncitations > 0) {
633 label_processing_state state(citation, ncitations, outfp);
634 int len = pending_line.length();
635 for (int i = 0; i < len; i++)
636 state.process((unsigned char)(pending_line[i]));
637 }
638 else
639 put_string(pending_line, outfp);
640 pending_line.clear();
641 if (pending_lf_lines.length() > 0) {
642 put_string(pending_lf_lines, outfp);
643 pending_lf_lines.clear();
644 }
645 if (!accumulate)
646 immediately_output_references();
647 if (need_syncing) {
648 fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
649 need_syncing = 0;
650 }
651}
652
653static void split_punct(string &line, string &punct)
654{
655 const char *start = line.contents();
656 const char *end = start + line.length();
657 const char *ptr = start;
658 const char *last_token_start = 0;
659 for (;;) {
660 if (ptr >= end)
661 break;
662 last_token_start = ptr;
663 if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
664 || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
665 ptr++;
666 else if (!get_token(&ptr, end))
667 break;
668 }
669 if (last_token_start) {
670 const token_info *ti = lookup_token(last_token_start, end);
671 if (ti->is_punct()) {
672 punct.append(last_token_start, end - last_token_start);
673 line.set_length(last_token_start - start);
674 }
675 }
676}
677
678static void divert_to_temporary_file()
679{
680 outfp = xtmpfile();
681}
682
683static void store_citation(reference *ref)
684{
685 if (ncitations >= citation_max) {
686 if (citation == 0)
687 citation = new reference*[citation_max = 100];
688 else {
689 reference **old_citation = citation;
690 citation_max *= 2;
691 citation = new reference *[citation_max];
692 memcpy(citation, old_citation, ncitations*sizeof(reference *));
693 a_delete old_citation;
694 }
695 }
696 citation[ncitations++] = ref;
697}
698
699static unsigned store_reference(const string &str)
700{
701 if (reference_hash_table == 0) {
702 reference_hash_table = new reference *[17];
703 hash_table_size = 17;
704 for (int i = 0; i < hash_table_size; i++)
705 reference_hash_table[i] = 0;
706 }
707 unsigned flags;
708 reference *ref = make_reference(str, &flags);
709 ref->compute_hash_code();
710 unsigned h = ref->hash();
711 reference **ptr;
712 for (ptr = reference_hash_table + (h % hash_table_size);
713 *ptr != 0;
714 ((ptr == reference_hash_table)
715 ? (ptr = reference_hash_table + hash_table_size - 1)
716 : --ptr))
717 if (same_reference(**ptr, *ref))
718 break;
719 if (*ptr != 0) {
720 if (ref->is_merged())
721 warning("fields ignored because reference already used");
722 delete ref;
723 ref = *ptr;
724 }
725 else {
726 *ptr = ref;
727 ref->set_number(nreferences);
728 nreferences++;
729 ref->pre_compute_label();
730 ref->compute_sort_key();
731 if (nreferences*2 >= hash_table_size) {
732 // Rehash it.
733 reference **old_table = reference_hash_table;
734 int old_size = hash_table_size;
735 hash_table_size = next_size(hash_table_size);
736 reference_hash_table = new reference*[hash_table_size];
737 int i;
738 for (i = 0; i < hash_table_size; i++)
739 reference_hash_table[i] = 0;
740 for (i = 0; i < old_size; i++)
741 if (old_table[i]) {
742 reference **p;
743 for (p = (reference_hash_table
744 + (old_table[i]->hash() % hash_table_size));
745 *p;
746 ((p == reference_hash_table)
747 ? (p = reference_hash_table + hash_table_size - 1)
748 : --p))
749 ;
750 *p = old_table[i];
751 }
752 a_delete old_table;
753 }
754 }
755 if (label_in_text)
756 store_citation(ref);
757 return flags;
758}
759
760unsigned immediately_handle_reference(const string &str)
761{
762 unsigned flags;
763 reference *ref = make_reference(str, &flags);
764 ref->set_number(nreferences);
765 if (label_in_text || label_in_reference) {
766 ref->pre_compute_label();
767 ref->immediate_compute_label();
768 }
769 nreferences++;
770 store_citation(ref);
771 return flags;
772}
773
774static void immediately_output_references()
775{
776 for (int i = 0; i < ncitations; i++) {
777 reference *ref = citation[i];
778 if (label_in_reference) {
779 fputs(".ds [F ", outfp);
780 const string &label = ref->get_label(NORMAL_LABEL);
781 if (label.length() > 0
782 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
783 putc('"', outfp);
784 put_string(label, outfp);
785 putc('\n', outfp);
786 }
787 ref->output(outfp);
788 delete ref;
789 }
790 ncitations = 0;
791}
792
793static void output_citation_group(reference **v, int n, label_type type,
794 FILE *fp)
795{
796 if (sort_adjacent_labels) {
797 // Do an insertion sort. Usually n will be very small.
798 for (int i = 1; i < n; i++) {
799 int num = v[i]->get_number();
800 reference *temp = v[i];
801 int j;
802 for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
803 v[j + 1] = v[j];
804 v[j + 1] = temp;
805 }
806 }
807 // This messes up if !accumulate.
808 if (accumulate && n > 1) {
809 // remove duplicates
810 int j = 1;
811 for (int i = 1; i < n; i++)
812 if (v[i]->get_label(type) != v[i - 1]->get_label(type))
813 v[j++] = v[i];
814 n = j;
815 }
816 string merged_label;
817 for (int i = 0; i < n; i++) {
818 int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
819 if (nmerged > 0) {
820 put_string(merged_label, fp);
821 i += nmerged;
822 }
823 else
824 put_string(v[i]->get_label(type), fp);
825 if (i < n - 1)
826 put_string(sep_label, fp);
827 }
828}
829
830
831label_processing_state::label_processing_state(reference **p, int n, FILE *f)
832: state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
833{
834}
835
836label_processing_state::~label_processing_state()
837{
838 int handled = handle_pending(EOF);
839 assert(!handled);
840 assert(rcount == 0);
841}
842
843int label_processing_state::handle_pending(int c)
844{
845 switch (state) {
846 case NORMAL:
847 break;
848 case PENDING_LABEL:
849 if (c == POST_LABEL_MARKER) {
850 state = PENDING_LABEL_POST;
851 return 1;
852 }
853 else {
854 output_citation_group(rptr, count, type, fp);
855 rptr += count ;
856 rcount -= count;
857 state = NORMAL;
858 }
859 break;
860 case PENDING_LABEL_POST:
861 if (c == PRE_LABEL_MARKER) {
862 state = PENDING_LABEL_POST_PRE;
863 return 1;
864 }
865 else {
866 output_citation_group(rptr, count, type, fp);
867 rptr += count;
868 rcount -= count;
869 put_string(post_label, fp);
870 state = NORMAL;
871 }
872 break;
873 case PENDING_LABEL_POST_PRE:
874 if (c >= LABEL_MARKER
875 && c < LABEL_MARKER + N_LABEL_TYPES
876 && c - LABEL_MARKER == type) {
877 count += 1;
878 state = PENDING_LABEL;
879 return 1;
880 }
881 else {
882 output_citation_group(rptr, count, type, fp);
883 rptr += count;
884 rcount -= count;
885 put_string(sep_label, fp);
886 state = NORMAL;
887 }
888 break;
889 case PENDING_POST:
890 if (c == PRE_LABEL_MARKER) {
891 put_string(sep_label, fp);
892 state = NORMAL;
893 return 1;
894 }
895 else {
896 put_string(post_label, fp);
897 state = NORMAL;
898 }
899 break;
900 }
901 return 0;
902}
903
904void label_processing_state::process(int c)
905{
906 if (handle_pending(c))
907 return;
908 assert(state == NORMAL);
909 switch (c) {
910 case PRE_LABEL_MARKER:
911 put_string(pre_label, fp);
912 state = NORMAL;
913 break;
914 case POST_LABEL_MARKER:
915 state = PENDING_POST;
916 break;
917 case LABEL_MARKER:
918 case LABEL_MARKER + 1:
919 count = 1;
920 state = PENDING_LABEL;
921 type = label_type(c - LABEL_MARKER);
922 break;
923 default:
924 state = NORMAL;
925 putc(c, fp);
926 break;
927 }
928}
929
930extern "C" {
931
932int rcompare(const void *p1, const void *p2)
933{
934 return compare_reference(**(reference **)p1, **(reference **)p2);
935}
936
937}
938
939void output_references()
940{
941 assert(accumulate);
465b256c 942 if (!hash_table_size) {
4d3e9548
JL
943 if (have_bibliography)
944 error("nothing to reference (probably `bibliography' before `sort')");
465b256c
JR
945 accumulate = 0;
946 nreferences = 0;
947 return;
948 }
92d0a6a6
JR
949 if (nreferences > 0) {
950 int j = 0;
951 int i;
952 for (i = 0; i < hash_table_size; i++)
953 if (reference_hash_table[i] != 0)
954 reference_hash_table[j++] = reference_hash_table[i];
955 assert(j == nreferences);
956 for (; j < hash_table_size; j++)
957 reference_hash_table[j] = 0;
958 qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
959 for (i = 0; i < nreferences; i++)
960 reference_hash_table[i]->set_number(i);
961 compute_labels(reference_hash_table, nreferences);
962 }
963 if (outfp != stdout) {
964 rewind(outfp);
965 {
966 label_processing_state state(citation, ncitations, stdout);
967 int c;
968 while ((c = getc(outfp)) != EOF)
969 state.process(c);
970 }
971 ncitations = 0;
972 fclose(outfp);
973 outfp = stdout;
974 }
975 if (nreferences > 0) {
976 fputs(".]<\n", outfp);
977 for (int i = 0; i < nreferences; i++) {
978 if (sort_fields.length() > 0)
979 reference_hash_table[i]->print_sort_key_comment(outfp);
980 if (label_in_reference) {
981 fputs(".ds [F ", outfp);
982 const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
983 if (label.length() > 0
984 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
985 putc('"', outfp);
986 put_string(label, outfp);
987 putc('\n', outfp);
988 }
989 reference_hash_table[i]->output(outfp);
990 delete reference_hash_table[i];
991 reference_hash_table[i] = 0;
992 }
993 fputs(".]>\n", outfp);
994 nreferences = 0;
995 }
996 clear_labels();
997}
998
999static reference *find_reference(const char *query, int query_len)
1000{
1001 // This is so that error messages look better.
1002 while (query_len > 0 && csspace(query[query_len - 1]))
1003 query_len--;
1004 string str;
1005 for (int i = 0; i < query_len; i++)
1006 str += query[i] == '\n' ? ' ' : query[i];
1007 str += '\0';
1008 possibly_load_default_database();
1009 search_list_iterator iter(&database_list, str.contents());
1010 reference_id rid;
1011 const char *start;
1012 int len;
1013 if (!iter.next(&start, &len, &rid)) {
1014 error("no matches for `%1'", str.contents());
1015 return 0;
1016 }
1017 const char *end = start + len;
1018 while (start < end) {
1019 if (*start == '%')
1020 break;
1021 while (start < end && *start++ != '\n')
1022 ;
1023 }
1024 if (start >= end) {
1025 error("found a reference for `%1' but it didn't contain any fields",
1026 str.contents());
1027 return 0;
1028 }
1029 reference *result = new reference(start, end - start, &rid);
1030 if (iter.next(&start, &len, &rid))
1031 warning("multiple matches for `%1'", str.contents());
1032 return result;
1033}
1034
1035static reference *make_reference(const string &str, unsigned *flagsp)
1036{
1037 const char *start = str.contents();
1038 const char *end = start + str.length();
1039 const char *ptr = start;
1040 while (ptr < end) {
1041 if (*ptr == '%')
1042 break;
1043 while (ptr < end && *ptr++ != '\n')
1044 ;
1045 }
1046 *flagsp = 0;
1047 for (; start < ptr; start++) {
1048 if (*start == '#')
1049 *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1050 | FORCE_LEFT_BRACKET)));
1051 else if (*start == '[')
1052 *flagsp |= FORCE_LEFT_BRACKET;
1053 else if (*start == ']')
1054 *flagsp |= FORCE_RIGHT_BRACKET;
1055 else if (!csspace(*start))
1056 break;
1057 }
1058 if (start >= end) {
1059 error("empty reference");
1060 return new reference;
1061 }
1062 reference *database_ref = 0;
1063 if (start < ptr)
1064 database_ref = find_reference(start, ptr - start);
1065 reference *inline_ref = 0;
1066 if (ptr < end)
1067 inline_ref = new reference(ptr, end - ptr);
1068 if (inline_ref) {
1069 if (database_ref) {
1070 database_ref->merge(*inline_ref);
1071 delete inline_ref;
1072 return database_ref;
1073 }
1074 else
1075 return inline_ref;
1076 }
1077 else if (database_ref)
1078 return database_ref;
1079 else
1080 return new reference;
1081}
1082
1083static void do_ref(const string &str)
1084{
1085 if (accumulate)
1086 (void)store_reference(str);
1087 else {
1088 (void)immediately_handle_reference(str);
1089 immediately_output_references();
1090 }
1091}
1092
1093static void trim_blanks(string &str)
1094{
1095 const char *start = str.contents();
1096 const char *end = start + str.length();
1097 while (end > start && end[-1] != '\n' && csspace(end[-1]))
1098 --end;
1099 str.set_length(end - start);
1100}
1101
1102void do_bib(const char *filename)
1103{
1104 FILE *fp;
1105 if (strcmp(filename, "-") == 0)
1106 fp = stdin;
1107 else {
1108 errno = 0;
1109 fp = fopen(filename, "r");
1110 if (fp == 0) {
1111 error("can't open `%1': %2", filename, strerror(errno));
1112 return;
1113 }
1114 current_filename = filename;
1115 }
1116 enum {
1117 START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1118 } state = START;
1119 string body;
1120 for (;;) {
1121 int c = getc(fp);
1122 if (c == EOF)
1123 break;
1124 if (invalid_input_char(c)) {
1125 error("invalid input character code %1", c);
1126 continue;
1127 }
1128 switch (state) {
1129 case START:
1130 if (c == '%') {
1131 body = c;
1132 state = BODY;
1133 }
1134 else if (c != '\n')
1135 state = MIDDLE;
1136 break;
1137 case MIDDLE:
1138 if (c == '\n')
1139 state = START;
1140 break;
1141 case BODY:
1142 body += c;
1143 if (c == '\n')
1144 state = BODY_START;
1145 break;
1146 case BODY_START:
1147 if (c == '\n') {
1148 do_ref(body);
1149 state = START;
1150 }
1151 else if (c == '.')
1152 state = BODY_DOT;
1153 else if (csspace(c)) {
1154 state = BODY_BLANK;
1155 body += c;
1156 }
1157 else {
1158 body += c;
1159 state = BODY;
1160 }
1161 break;
1162 case BODY_BLANK:
1163 if (c == '\n') {
1164 trim_blanks(body);
1165 do_ref(body);
1166 state = START;
1167 }
1168 else if (csspace(c))
1169 body += c;
1170 else {
1171 body += c;
1172 state = BODY;
1173 }
1174 break;
1175 case BODY_DOT:
1176 if (c == ']') {
1177 do_ref(body);
1178 state = MIDDLE;
1179 }
1180 else {
1181 body += '.';
1182 body += c;
1183 state = c == '\n' ? BODY_START : BODY;
1184 }
1185 break;
1186 default:
1187 assert(0);
1188 }
1189 if (c == '\n')
1190 current_lineno++;
1191 }
1192 switch (state) {
1193 case START:
1194 case MIDDLE:
1195 break;
1196 case BODY:
1197 body += '\n';
1198 do_ref(body);
1199 break;
1200 case BODY_DOT:
1201 case BODY_START:
1202 do_ref(body);
1203 break;
1204 case BODY_BLANK:
1205 trim_blanks(body);
1206 do_ref(body);
1207 break;
1208 }
1209 fclose(fp);
1210}
1211
1212// from the Dragon Book
1213
1214unsigned hash_string(const char *s, int len)
1215{
1216 const char *end = s + len;
1217 unsigned h = 0, g;
1218 while (s < end) {
1219 h <<= 4;
1220 h += *s++;
1221 if ((g = h & 0xf0000000) != 0) {
1222 h ^= g >> 24;
1223 h ^= g;
1224 }
1225 }
1226 return h;
1227}
1228
1229int next_size(int n)
1230{
1231 static const int table_sizes[] = {
1232 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1233 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1234 16000057, 32000011, 64000031, 128000003, 0
1235 };
1236
1237 const int *p;
1238 for (p = table_sizes; *p <= n && *p != 0; p++)
1239 ;
1240 assert(*p != 0);
1241 return *p;
1242}
1243