2 static const char yysccsid[] = "@(#)yaccpar 1.9 (Berkeley) 02/21/93";
11 #define YYPATCH 20081224
14 #define yyclearin (yychar = YYEMPTY)
15 #define yyerrok (yyerrflag = 0)
16 #define YYRECOVERING() (yyerrflag != 0)
18 /* compatibility with bison */
20 /* compatibility with FreeBSD */
21 #ifdef YYPARSE_PARAM_TYPE
22 #define YYPARSE_DECL() yyparse(YYPARSE_PARAM_TYPE YYPARSE_PARAM)
24 #define YYPARSE_DECL() yyparse(void *YYPARSE_PARAM)
27 #define YYPARSE_DECL() yyparse(void)
28 #endif /* YYPARSE_PARAM */
30 extern int YYPARSE_DECL();
32 static int yygrowstack(void);
42 void yyerror(const char *);
45 static const char *format_serial(char c, int n);
52 label_info(const string &);
55 label_info *lookup_label(const string &label);
59 /* Does the tentative label depend on the reference?*/
60 CONTAINS_VARIABLE = 01,
65 virtual ~expression() { }
66 virtual void evaluate(int, const reference &, string &,
67 substring_position &) = 0;
68 virtual unsigned analyze() { return 0; }
71 class at_expr : public expression {
74 void evaluate(int, const reference &, string &, substring_position &);
75 unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; }
78 class format_expr : public expression {
83 format_expr(char c, int w = 0, int f = 1)
84 : type(c), width(w), first_number(f) { }
85 void evaluate(int, const reference &, string &, substring_position &);
86 unsigned analyze() { return CONTAINS_FORMAT; }
89 class field_expr : public expression {
93 field_expr(char nm, int num) : number(num), name(nm) { }
94 void evaluate(int, const reference &, string &, substring_position &);
95 unsigned analyze() { return CONTAINS_VARIABLE; }
98 class literal_expr : public expression {
101 literal_expr(const char *ptr, int len) : s(ptr, len) { }
102 void evaluate(int, const reference &, string &, substring_position &);
105 class unary_expr : public expression {
109 unary_expr(expression *e) : expr(e) { }
110 ~unary_expr() { delete expr; }
111 void evaluate(int, const reference &, string &, substring_position &) = 0;
112 unsigned analyze() { return expr ? expr->analyze() : 0; }
115 /* This caches the analysis of an expression.*/
117 class analyzed_expr : public unary_expr {
120 analyzed_expr(expression *);
121 void evaluate(int, const reference &, string &, substring_position &);
122 unsigned analyze() { return flags; }
125 class star_expr : public unary_expr {
127 star_expr(expression *e) : unary_expr(e) { }
128 void evaluate(int, const reference &, string &, substring_position &);
130 return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0)
135 typedef void map_func(const char *, const char *, string &);
137 class map_expr : public unary_expr {
140 map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { }
141 void evaluate(int, const reference &, string &, substring_position &);
144 typedef const char *extractor_func(const char *, const char *, const char **);
146 class extractor_expr : public unary_expr {
148 extractor_func *func;
150 enum { BEFORE = +1, MATCH = 0, AFTER = -1 };
151 extractor_expr(expression *e, extractor_func *f, int pt)
152 : unary_expr(e), part(pt), func(f) { }
153 void evaluate(int, const reference &, string &, substring_position &);
156 class truncate_expr : public unary_expr {
159 truncate_expr(expression *e, int i) : unary_expr(e), n(i) { }
160 void evaluate(int, const reference &, string &, substring_position &);
163 class separator_expr : public unary_expr {
165 separator_expr(expression *e) : unary_expr(e) { }
166 void evaluate(int, const reference &, string &, substring_position &);
169 class binary_expr : public expression {
174 binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { }
175 ~binary_expr() { delete expr1; delete expr2; }
176 void evaluate(int, const reference &, string &, substring_position &) = 0;
178 return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0);
182 class alternative_expr : public binary_expr {
184 alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
185 void evaluate(int, const reference &, string &, substring_position &);
188 class list_expr : public binary_expr {
190 list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
191 void evaluate(int, const reference &, string &, substring_position &);
194 class substitute_expr : public binary_expr {
196 substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
197 void evaluate(int, const reference &, string &, substring_position &);
200 class ternary_expr : public expression {
206 ternary_expr(expression *e1, expression *e2, expression *e3)
207 : expr1(e1), expr2(e2), expr3(e3) { }
208 ~ternary_expr() { delete expr1; delete expr2; delete expr3; }
209 void evaluate(int, const reference &, string &, substring_position &) = 0;
211 return ((expr1 ? expr1->analyze() : 0)
212 | (expr2 ? expr2->analyze() : 0)
213 | (expr3 ? expr3->analyze() : 0));
217 class conditional_expr : public ternary_expr {
219 conditional_expr(expression *e1, expression *e2, expression *e3)
220 : ternary_expr(e1, e2, e3) { }
221 void evaluate(int, const reference &, string &, substring_position &);
224 static expression *parsed_label = 0;
225 static expression *parsed_date_label = 0;
226 static expression *parsed_short_label = 0;
228 static expression *parse_result;
236 struct { int ndigits; int val; } dig;
237 struct { int start; int len; } str;
239 #line 239 "label.cpp"
240 #define TOKEN_LETTER 257
241 #define TOKEN_LITERAL 258
242 #define TOKEN_DIGIT 259
243 #define YYERRCODE 256
244 static const short yylhs[] = { -1,
245 0, 1, 1, 6, 6, 2, 2, 2, 3, 3,
246 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
247 4, 4, 4, 4, 9, 9, 7, 7, 8, 8,
250 static const short yylen[] = { 2,
251 1, 1, 5, 0, 1, 1, 3, 3, 1, 2,
252 1, 3, 1, 1, 1, 2, 2, 2, 5, 3,
253 3, 2, 3, 3, 0, 1, 1, 2, 1, 2,
256 static const short yydefred[] = { 0,
257 0, 14, 13, 0, 0, 0, 0, 5, 0, 0,
258 0, 0, 1, 27, 0, 17, 29, 0, 0, 0,
259 0, 0, 0, 0, 0, 0, 0, 22, 0, 28,
260 30, 23, 24, 0, 0, 0, 32, 33, 0, 0,
261 0, 0, 0, 0, 3, 0, 19,
263 static const short yydgoto[] = { 7,
264 8, 9, 10, 11, 12, 13, 15, 18, 47, 39,
266 static const short yysindex[] = { -32,
267 -257, 0, 0, -240, -32, -32, 0, 0, -18, -32,
268 -36, -114, 0, 0, -246, 0, 0, -241, -14, -39,
269 -32, -32, -32, -114, -21, -257, -257, 0, -32, 0,
270 0, 0, 0, -25, -32, -32, 0, 0, -223, -246,
271 -246, -36, -32, -257, 0, -246, 0,
273 static const short yyrindex[] = { 35,
274 1, 0, 0, 0, -5, -4, 0, 0, 14, 208,
275 159, 224, 0, 0, 11, 0, 0, 40, 0, 0,
276 2, 0, 0, 253, -220, 0, 0, 0, 0, 0,
277 0, 0, 0, 0, 263, 281, 0, 0, 0, 50,
278 105, 214, 0, 115, 0, 149, 0,
280 static const short yygindex[] = { 0,
281 19, 0, 7, 37, -10, 10, -23, 0, 0, 0,
283 #define YYTABLESIZE 511
284 static const short yytable[] = { 24,
285 15, 14, 40, 41, 4, 28, 26, 5, 27, 25,
286 16, 29, 30, 2, 19, 20, 16, 31, 17, 23,
287 46, 37, 33, 38, 24, 24, 32, 6, 35, 36,
288 34, 3, 43, 44, 4, 4, 31, 15, 15, 18,
289 15, 15, 15, 15, 21, 15, 15, 16, 16, 20,
290 16, 16, 16, 16, 2, 16, 16, 4, 15, 4,
291 15, 45, 15, 15, 15, 42, 0, 0, 16, 0,
292 16, 2, 16, 16, 16, 2, 18, 18, 0, 18,
293 18, 18, 18, 0, 18, 18, 20, 20, 0, 20,
294 20, 20, 20, 0, 20, 20, 0, 18, 0, 18,
295 0, 18, 18, 18, 21, 22, 0, 20, 0, 20,
296 0, 20, 20, 20, 25, 0, 0, 0, 0, 0,
297 0, 0, 0, 0, 15, 0, 15, 0, 0, 0,
298 0, 0, 0, 0, 16, 0, 16, 0, 0, 0,
299 0, 21, 21, 0, 21, 21, 21, 21, 26, 21,
300 21, 25, 25, 0, 25, 25, 25, 25, 11, 25,
301 25, 0, 21, 18, 21, 18, 21, 21, 21, 0,
302 0, 0, 25, 20, 25, 20, 25, 25, 25, 0,
303 0, 0, 0, 0, 0, 26, 26, 0, 26, 26,
304 26, 26, 0, 26, 26, 11, 11, 0, 11, 11,
305 0, 0, 0, 0, 0, 0, 26, 6, 26, 0,
306 26, 26, 26, 12, 0, 0, 11, 0, 11, 0,
307 11, 11, 11, 9, 1, 2, 0, 0, 21, 0,
308 21, 0, 0, 0, 0, 0, 0, 0, 25, 0,
309 25, 0, 0, 0, 0, 6, 0, 0, 6, 0,
310 12, 12, 10, 12, 12, 0, 0, 15, 15, 0,
311 9, 9, 7, 9, 9, 6, 0, 16, 16, 6,
312 6, 12, 26, 12, 26, 12, 12, 12, 0, 0,
313 8, 9, 11, 9, 11, 9, 9, 9, 0, 10,
314 10, 0, 10, 10, 0, 0, 18, 18, 0, 0,
315 7, 0, 0, 7, 0, 0, 20, 20, 0, 0,
316 10, 0, 10, 0, 10, 10, 10, 0, 8, 0,
317 7, 8, 0, 0, 7, 7, 0, 0, 0, 0,
318 0, 6, 0, 0, 0, 0, 0, 12, 8, 12,
319 0, 0, 8, 8, 0, 0, 0, 9, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
321 0, 21, 21, 0, 0, 0, 0, 0, 0, 0,
322 0, 25, 25, 0, 0, 0, 10, 0, 0, 0,
323 0, 0, 0, 0, 0, 0, 7, 0, 0, 0,
324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
325 0, 0, 0, 0, 8, 26, 26, 0, 0, 0,
326 0, 0, 0, 0, 0, 11, 11, 0, 0, 0,
327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 12, 12, 0, 0, 0, 0, 0, 0, 0, 0,
333 9, 9, 0, 0, 0, 0, 0, 0, 0, 0,
334 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
335 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
338 static const short yycheck[] = { 10,
339 0, 259, 26, 27, 37, 42, 43, 40, 45, 46,
340 0, 126, 259, 0, 5, 6, 257, 259, 259, 38,
341 44, 43, 62, 45, 35, 36, 41, 60, 22, 23,
342 21, 64, 58, 257, 0, 41, 257, 37, 38, 0,
343 40, 41, 42, 43, 63, 45, 46, 37, 38, 0,
344 40, 41, 42, 43, 41, 45, 46, 62, 58, 58,
345 60, 43, 62, 63, 64, 29, -1, -1, 58, -1,
346 60, 58, 62, 63, 64, 62, 37, 38, -1, 40,
347 41, 42, 43, -1, 45, 46, 37, 38, -1, 40,
348 41, 42, 43, -1, 45, 46, -1, 58, -1, 60,
349 -1, 62, 63, 64, 0, 124, -1, 58, -1, 60,
350 -1, 62, 63, 64, 0, -1, -1, -1, -1, -1,
351 -1, -1, -1, -1, 124, -1, 126, -1, -1, -1,
352 -1, -1, -1, -1, 124, -1, 126, -1, -1, -1,
353 -1, 37, 38, -1, 40, 41, 42, 43, 0, 45,
354 46, 37, 38, -1, 40, 41, 42, 43, 0, 45,
355 46, -1, 58, 124, 60, 126, 62, 63, 64, -1,
356 -1, -1, 58, 124, 60, 126, 62, 63, 64, -1,
357 -1, -1, -1, -1, -1, 37, 38, -1, 40, 41,
358 42, 43, -1, 45, 46, 37, 38, -1, 40, 41,
359 -1, -1, -1, -1, -1, -1, 58, 0, 60, -1,
360 62, 63, 64, 0, -1, -1, 58, -1, 60, -1,
361 62, 63, 64, 0, 257, 258, -1, -1, 124, -1,
362 126, -1, -1, -1, -1, -1, -1, -1, 124, -1,
363 126, -1, -1, -1, -1, 38, -1, -1, 41, -1,
364 37, 38, 0, 40, 41, -1, -1, 257, 258, -1,
365 37, 38, 0, 40, 41, 58, -1, 257, 258, 62,
366 63, 58, 124, 60, 126, 62, 63, 64, -1, -1,
367 0, 58, 124, 60, 126, 62, 63, 64, -1, 37,
368 38, -1, 40, 41, -1, -1, 257, 258, -1, -1,
369 38, -1, -1, 41, -1, -1, 257, 258, -1, -1,
370 58, -1, 60, -1, 62, 63, 64, -1, 38, -1,
371 58, 41, -1, -1, 62, 63, -1, -1, -1, -1,
372 -1, 124, -1, -1, -1, -1, -1, 124, 58, 126,
373 -1, -1, 62, 63, -1, -1, -1, 124, -1, -1,
374 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
375 -1, 257, 258, -1, -1, -1, -1, -1, -1, -1,
376 -1, 257, 258, -1, -1, -1, 124, -1, -1, -1,
377 -1, -1, -1, -1, -1, -1, 124, -1, -1, -1,
378 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
379 -1, -1, -1, -1, 124, 257, 258, -1, -1, -1,
380 -1, -1, -1, -1, -1, 257, 258, -1, -1, -1,
381 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
382 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
383 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
384 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
385 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
386 257, 258, -1, -1, -1, -1, -1, -1, -1, -1,
387 257, 258, -1, -1, -1, -1, -1, -1, -1, -1,
388 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
389 -1, -1, -1, -1, -1, -1, -1, -1, -1, 257,
396 #define YYMAXTOKEN 259
398 static const char *yyname[] = {
400 "end-of-file",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
401 0,0,0,"'%'","'&'",0,"'('","')'","'*'","'+'",0,"'-'","'.'",0,0,0,0,0,0,0,0,0,0,0,
402 "':'",0,"'<'",0,"'>'","'?'","'@'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
403 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'|'",0,
404 "'~'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
405 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
406 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
407 0,0,0,0,0,0,0,0,0,0,0,0,0,"TOKEN_LETTER","TOKEN_LITERAL","TOKEN_DIGIT",
409 static const char *yyrule[] = {
411 "expr : optional_conditional",
412 "conditional : alternative",
413 "conditional : alternative '?' optional_conditional ':' conditional",
414 "optional_conditional :",
415 "optional_conditional : conditional",
416 "alternative : list",
417 "alternative : alternative '|' list",
418 "alternative : alternative '&' list",
420 "list : list substitute",
421 "substitute : string",
422 "substitute : substitute '~' string",
424 "string : TOKEN_LITERAL",
425 "string : TOKEN_LETTER",
426 "string : TOKEN_LETTER number",
427 "string : '%' TOKEN_LETTER",
428 "string : '%' digits",
429 "string : string '.' flag TOKEN_LETTER optional_number",
430 "string : string '+' number",
431 "string : string '-' number",
432 "string : string '*'",
433 "string : '(' optional_conditional ')'",
434 "string : '<' optional_conditional '>'",
436 "optional_number : number",
437 "number : TOKEN_DIGIT",
438 "number : number TOKEN_DIGIT",
439 "digits : TOKEN_DIGIT",
440 "digits : digits TOKEN_DIGIT",
451 /* define the initial stack-sizes */
454 #define YYMAXDEPTH YYSTACKSIZE
457 #define YYSTACKSIZE YYMAXDEPTH
459 #define YYSTACKSIZE 500
460 #define YYMAXDEPTH 500
464 #define YYINITSTACKSIZE 500
475 /* variables for the parser stack */
477 static short *yysslim;
478 static YYSTYPE *yyvs;
479 static unsigned yystacksize;
482 /* bison defines const to be empty unless __STDC__ is defined, which it
483 isn't under cfront */
489 const char *spec_ptr;
490 const char *spec_end;
491 const char *spec_cur;
493 static char uppercase_array[] = {
494 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
495 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
496 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
500 static char lowercase_array[] = {
501 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
502 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
503 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
509 while (spec_ptr < spec_end && csspace(*spec_ptr))
512 if (spec_ptr >= spec_end)
514 unsigned char c = *spec_ptr++;
520 yylval.num = c - '0';
524 yylval.str.start = literals.length();
525 for (; spec_ptr < spec_end; spec_ptr++) {
526 if (*spec_ptr == '\'') {
527 if (++spec_ptr < spec_end && *spec_ptr == '\'')
530 yylval.str.len = literals.length() - yylval.str.start;
531 return TOKEN_LITERAL;
535 literals += *spec_ptr;
537 yylval.str.len = literals.length() - yylval.str.start;
538 return TOKEN_LITERAL;
543 int set_label_spec(const char *label_spec)
545 spec_cur = spec_ptr = label_spec;
546 spec_end = strchr(label_spec, '\0');
551 parsed_label = parse_result;
555 int set_date_label_spec(const char *label_spec)
557 spec_cur = spec_ptr = label_spec;
558 spec_end = strchr(label_spec, '\0');
562 delete parsed_date_label;
563 parsed_date_label = parse_result;
567 int set_short_label_spec(const char *label_spec)
569 spec_cur = spec_ptr = label_spec;
570 spec_end = strchr(label_spec, '\0');
574 delete parsed_short_label;
575 parsed_short_label = parse_result;
579 void yyerror(const char *message)
581 if (spec_cur < spec_end)
582 command_error("label specification %1 before `%2'", message, spec_cur);
584 command_error("label specification %1 at end of string",
588 void at_expr::evaluate(int tentative, const reference &ref,
589 string &result, substring_position &)
592 ref.canonicalize_authors(result);
594 const char *end, *start = ref.get_authors(&end);
596 result.append(start, end - start);
600 void format_expr::evaluate(int tentative, const reference &ref,
601 string &result, substring_position &)
605 const label_info *lp = ref.get_label_ptr();
606 int num = lp == 0 ? ref.get_number() : lp->count;
608 result += format_serial(type, num + 1);
610 const char *ptr = i_to_a(num + first_number);
611 int pad = width - strlen(ptr);
618 static const char *format_serial(char c, int n)
621 static char buf[128]; // more than enough.
627 // troff uses z and w to represent 10000 and 5000 in Roman
628 // numerals; I can find no historical basis for this usage
629 const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI";
636 for (int i = 1000; i > 0; i /= 10, s += 2) {
683 // this is derived from troff/reg.c
690 *p++ = c == 'a' ? lowercase_array[d - 1] :
691 uppercase_array[d - 1];
711 void field_expr::evaluate(int, const reference &ref,
712 string &result, substring_position &)
715 const char *start = ref.get_field(name, &end);
717 start = nth_field(number, start, &end);
719 result.append(start, end - start);
723 void literal_expr::evaluate(int, const reference &,
724 string &result, substring_position &)
729 analyzed_expr::analyzed_expr(expression *e)
730 : unary_expr(e), flags(e ? e->analyze() : 0)
734 void analyzed_expr::evaluate(int tentative, const reference &ref,
735 string &result, substring_position &pos)
738 expr->evaluate(tentative, ref, result, pos);
741 void star_expr::evaluate(int tentative, const reference &ref,
742 string &result, substring_position &pos)
744 const label_info *lp = ref.get_label_ptr();
746 && (lp == 0 || lp->total > 1)
748 expr->evaluate(tentative, ref, result, pos);
751 void separator_expr::evaluate(int tentative, const reference &ref,
752 string &result, substring_position &pos)
754 int start_length = result.length();
755 int is_first = pos.start < 0;
757 expr->evaluate(tentative, ref, result, pos);
759 pos.start = start_length;
760 pos.length = result.length() - start_length;
764 void map_expr::evaluate(int tentative, const reference &ref,
765 string &result, substring_position &)
769 substring_position temp_pos;
770 expr->evaluate(tentative, ref, temp, temp_pos);
771 (*func)(temp.contents(), temp.contents() + temp.length(), result);
775 void extractor_expr::evaluate(int tentative, const reference &ref,
776 string &result, substring_position &)
780 substring_position temp_pos;
781 expr->evaluate(tentative, ref, temp, temp_pos);
782 const char *end, *start = (*func)(temp.contents(),
783 temp.contents() + temp.length(),
788 result.append(temp.contents(), start - temp.contents());
794 result.append(start, end - start);
798 result.append(end, temp.contents() + temp.length() - end);
806 static void first_part(int len, const char *ptr, const char *end,
810 const char *token_start = ptr;
811 if (!get_token(&ptr, end))
813 const token_info *ti = lookup_token(token_start, ptr);
814 int counts = ti->sortify_non_empty(token_start, ptr);
815 if (counts && --len < 0)
817 if (counts || ti->is_accent())
818 result.append(token_start, ptr - token_start);
822 static void last_part(int len, const char *ptr, const char *end,
825 const char *start = ptr;
828 const char *token_start = ptr;
829 if (!get_token(&ptr, end))
831 const token_info *ti = lookup_token(token_start, ptr);
832 if (ti->sortify_non_empty(token_start, ptr))
836 int skip = count - len;
839 const char *token_start = ptr;
840 if (!get_token(&ptr, end))
842 const token_info *ti = lookup_token(token_start, ptr);
843 if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) {
849 first_part(len, ptr, end, result);
852 void truncate_expr::evaluate(int tentative, const reference &ref,
853 string &result, substring_position &)
857 substring_position temp_pos;
858 expr->evaluate(tentative, ref, temp, temp_pos);
859 const char *start = temp.contents();
860 const char *end = start + temp.length();
862 first_part(n, start, end, result);
864 last_part(-n, start, end, result);
868 void alternative_expr::evaluate(int tentative, const reference &ref,
869 string &result, substring_position &pos)
871 int start_length = result.length();
873 expr1->evaluate(tentative, ref, result, pos);
874 if (result.length() == start_length && expr2)
875 expr2->evaluate(tentative, ref, result, pos);
878 void list_expr::evaluate(int tentative, const reference &ref,
879 string &result, substring_position &pos)
882 expr1->evaluate(tentative, ref, result, pos);
884 expr2->evaluate(tentative, ref, result, pos);
887 void substitute_expr::evaluate(int tentative, const reference &ref,
888 string &result, substring_position &pos)
890 int start_length = result.length();
892 expr1->evaluate(tentative, ref, result, pos);
893 if (result.length() > start_length && result[result.length() - 1] == '-') {
894 // ought to see if pos covers the -
895 result.set_length(result.length() - 1);
897 expr2->evaluate(tentative, ref, result, pos);
901 void conditional_expr::evaluate(int tentative, const reference &ref,
902 string &result, substring_position &pos)
905 substring_position temp_pos;
907 expr1->evaluate(tentative, ref, temp, temp_pos);
908 if (temp.length() > 0) {
910 expr2->evaluate(tentative, ref, result, pos);
914 expr3->evaluate(tentative, ref, result, pos);
918 void reference::pre_compute_label()
920 if (parsed_label != 0
921 && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) {
923 substring_position temp_pos;
924 parsed_label->evaluate(1, *this, label, temp_pos);
925 label_ptr = lookup_label(label);
929 void reference::compute_label()
933 parsed_label->evaluate(0, *this, label, separator_pos);
934 if (short_label_flag && parsed_short_label)
935 parsed_short_label->evaluate(0, *this, short_label, short_separator_pos);
938 if (parsed_date_label) {
939 substring_position temp_pos;
940 parsed_date_label->evaluate(0, *this, new_date, temp_pos);
945 label_ptr->count += 1;
948 void reference::immediate_compute_label()
951 label_ptr->total = 2; // force use of disambiguator
955 int reference::merge_labels(reference **v, int n, label_type type,
958 if (abbreviate_label_ranges)
959 return merge_labels_by_number(v, n, type, result);
961 return merge_labels_by_parts(v, n, type, result);
964 int reference::merge_labels_by_number(reference **v, int n, label_type type,
969 int num = get_number();
970 // Only merge three or more labels.
971 if (v[0]->get_number() != num + 1
972 || v[1]->get_number() != num + 2)
975 for (i = 2; i < n; i++)
976 if (v[i]->get_number() != num + i + 1)
978 result = get_label(type);
979 result += label_range_indicator;
980 result += v[i - 1]->get_label(type);
984 const substring_position &reference::get_separator_pos(label_type type) const
986 if (type == SHORT_LABEL && short_label_flag)
987 return short_separator_pos;
989 return separator_pos;
992 const string &reference::get_label(label_type type) const
994 if (type == SHORT_LABEL && short_label_flag)
1000 int reference::merge_labels_by_parts(reference **v, int n, label_type type,
1005 const string &lb = get_label(type);
1006 const substring_position &sp = get_separator_pos(type);
1008 || sp.start != v[0]->get_separator_pos(type).start
1009 || memcmp(lb.contents(), v[0]->get_label(type).contents(),
1015 result += separate_label_second_parts;
1016 const substring_position &s = v[i]->get_separator_pos(type);
1017 int sep_end_pos = s.start + s.length;
1018 result.append(v[i]->get_label(type).contents() + sep_end_pos,
1019 v[i]->get_label(type).length() - sep_end_pos);
1021 && sp.start == v[i]->get_separator_pos(type).start
1022 && memcmp(lb.contents(), v[i]->get_label(type).contents(),
1029 label_info::label_info(const string &s)
1030 : start(label_pool.length()), length(s.length()), count(0), total(1)
1035 static label_info **label_table = 0;
1036 static int label_table_size = 0;
1037 static int label_table_used = 0;
1039 label_info *lookup_label(const string &label)
1041 if (label_table == 0) {
1042 label_table = new label_info *[17];
1043 label_table_size = 17;
1044 for (int i = 0; i < 17; i++)
1047 unsigned h = hash_string(label.contents(), label.length()) % label_table_size;
1049 for (ptr = label_table + h;
1051 (ptr == label_table)
1052 ? (ptr = label_table + label_table_size - 1)
1054 if ((*ptr)->length == label.length()
1055 && memcmp(label_pool.contents() + (*ptr)->start, label.contents(),
1056 label.length()) == 0) {
1060 label_info *result = *ptr = new label_info(label);
1061 if (++label_table_used * 2 > label_table_size) {
1062 // Rehash the table.
1063 label_info **old_table = label_table;
1064 int old_size = label_table_size;
1065 label_table_size = next_size(label_table_size);
1066 label_table = new label_info *[label_table_size];
1068 for (i = 0; i < label_table_size; i++)
1070 for (i = 0; i < old_size; i++)
1072 h = hash_string(label_pool.contents() + old_table[i]->start,
1073 old_table[i]->length);
1075 for (p = label_table + (h % label_table_size);
1078 ? (p = label_table + label_table_size - 1)
1090 for (int i = 0; i < label_table_size; i++) {
1091 delete label_table[i];
1094 label_table_used = 0;
1098 static void consider_authors(reference **start, reference **end, int i);
1100 void compute_labels(reference **v, int n)
1103 && (parsed_label->analyze() & expression::CONTAINS_AT)
1104 && sort_fields.length() >= 2
1105 && sort_fields[0] == 'A'
1106 && sort_fields[1] == '+')
1107 consider_authors(v, v + n, 0);
1108 for (int i = 0; i < n; i++)
1109 v[i]->compute_label();
1113 /* A reference with a list of authors <A0,A1,...,AN> _needs_ author i
1114 where 0 <= i <= N if there exists a reference with a list of authors
1115 <B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i
1116 and Aj = Bj for 0 <= j < i. In this case if we can't say ``A0,
1117 A1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and
1118 <B0,B1,...,BM>. If a reference needs author i we only have to call
1119 need_author(j) for some j >= i such that the reference also needs
1122 /* This function handles 2 tasks:
1123 determine which authors are needed (cannot be elided with et al.);
1124 determine which authors can have only last names in the labels.
1126 References >= start and < end have the same first i author names.
1127 Also they're sorted by A+. */
1129 static void consider_authors(reference **start, reference **end, int i)
1133 reference **p = start;
1134 if (i >= (*p)->get_nauthors()) {
1135 for (++p; p < end && i >= (*p)->get_nauthors(); p++)
1137 if (p < end && i > 0) {
1138 // If we have an author list <A B C> and an author list <A B C D>,
1139 // then both lists need C.
1140 for (reference **q = start; q < end; q++)
1141 (*q)->need_author(i - 1);
1146 reference **last_name_start = p;
1147 reference **name_start = p;
1149 p < end && i < (*p)->get_nauthors()
1150 && same_author_last_name(**last_name_start, **p, i);
1152 if (!same_author_name(**name_start, **p, i)) {
1153 consider_authors(name_start, p, i + 1);
1157 consider_authors(name_start, p, i + 1);
1158 if (last_name_start == name_start) {
1159 for (reference **q = last_name_start; q < p; q++)
1160 (*q)->set_last_name_unambiguous(i);
1162 // If we have an author list <A B C D> and <A B C E>, then the lists
1163 // need author D and E respectively.
1164 if (name_start > start || p < end) {
1165 for (reference **q = last_name_start; q < p; q++)
1166 (*q)->need_author(i);
1171 int same_author_last_name(const reference &r1, const reference &r2, int n)
1174 const char *as1 = r1.get_sort_field(0, n, 0, &ae1);
1176 const char *as2 = r2.get_sort_field(0, n, 0, &ae2);
1177 if (!as1 && !as2) return 1; // they are the same
1178 if (!as1 || !as2) return 0;
1179 return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
1182 int same_author_name(const reference &r1, const reference &r2, int n)
1185 const char *as1 = r1.get_sort_field(0, n, -1, &ae1);
1187 const char *as2 = r2.get_sort_field(0, n, -1, &ae2);
1188 if (!as1 && !as2) return 1; // they are the same
1189 if (!as1 || !as2) return 0;
1190 return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
1194 void int_set::set(int i)
1198 if (bytei >= v.length()) {
1199 int old_length = v.length();
1200 v.set_length(bytei + 1);
1201 for (int j = old_length; j <= bytei; j++)
1204 v[bytei] |= 1 << (i & 7);
1207 int int_set::get(int i) const
1211 return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0;
1214 void reference::set_last_name_unambiguous(int i)
1216 last_name_unambiguous.set(i);
1219 void reference::need_author(int n)
1221 if (n > last_needed_author)
1222 last_needed_author = n;
1225 const char *reference::get_authors(const char **end) const
1227 if (!computed_authors) {
1228 ((reference *)this)->computed_authors = 1;
1229 string &result = ((reference *)this)->authors;
1230 int na = get_nauthors();
1232 for (int i = 0; i < na; i++) {
1233 if (last_name_unambiguous.get(i)) {
1234 const char *e, *start = get_author_last_name(i, &e);
1236 result.append(start, e - start);
1239 const char *e, *start = get_author(i, &e);
1241 result.append(start, e - start);
1243 if (i == last_needed_author
1244 && et_al.length() > 0
1245 && et_al_min_elide > 0
1246 && last_needed_author + et_al_min_elide < na
1247 && na >= et_al_min_total) {
1253 result += join_authors_exactly_two;
1254 else if (i < na - 2)
1255 result += join_authors_default;
1257 result += join_authors_last_two;
1261 const char *start = authors.contents();
1262 *end = start + authors.length();
1266 int reference::get_nauthors() const
1271 for (na = 0; get_author(na, &dummy) != 0; na++)
1273 ((reference *)this)->nauthors = na;
1277 #line 1277 "label.cpp"
1278 /* allocate initial stack or double stack size, up to YYMAXDEPTH */
1279 static int yygrowstack(void)
1286 if ((newsize = yystacksize) == 0)
1287 newsize = YYINITSTACKSIZE;
1288 else if (newsize >= YYMAXDEPTH)
1290 else if ((newsize *= 2) > YYMAXDEPTH)
1291 newsize = YYMAXDEPTH;
1295 ? (short *)realloc(yyss, newsize * sizeof(*newss))
1296 : (short *)malloc(newsize * sizeof(*newss));
1303 ? (YYSTYPE *)realloc(yyvs, newsize * sizeof(*newvs))
1304 : (YYSTYPE *)malloc(newsize * sizeof(*newvs));
1310 yystacksize = newsize;
1311 yysslim = yyss + newsize - 1;
1315 #define YYABORT goto yyabort
1316 #define YYREJECT goto yyabort
1317 #define YYACCEPT goto yyaccept
1318 #define YYERROR goto yyerrlab
1323 int yym, yyn, yystate;
1327 if ((yys = getenv("YYDEBUG")) != 0)
1330 if (yyn >= '0' && yyn <= '9')
1331 yydebug = yyn - '0';
1340 if (yyss == NULL && yygrowstack()) goto yyoverflow;
1347 if ((yyn = yydefred[yystate]) != 0) goto yyreduce;
1350 if ((yychar = yylex()) < 0) yychar = 0;
1355 if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
1356 if (!yys) yys = "illegal-symbol";
1357 printf("%sdebug: state %d, reading %d (%s)\n",
1358 YYPREFIX, yystate, yychar, yys);
1362 if ((yyn = yysindex[yystate]) && (yyn += yychar) >= 0 &&
1363 yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
1367 printf("%sdebug: state %d, shifting to state %d\n",
1368 YYPREFIX, yystate, yytable[yyn]);
1370 if (yyssp >= yysslim && yygrowstack())
1374 yystate = yytable[yyn];
1375 *++yyssp = yytable[yyn];
1378 if (yyerrflag > 0) --yyerrflag;
1381 if ((yyn = yyrindex[yystate]) && (yyn += yychar) >= 0 &&
1382 yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
1387 if (yyerrflag) goto yyinrecovery;
1389 yyerror("syntax error");
1402 if ((yyn = yysindex[*yyssp]) && (yyn += YYERRCODE) >= 0 &&
1403 yyn <= YYTABLESIZE && yycheck[yyn] == YYERRCODE)
1407 printf("%sdebug: state %d, error recovery shifting\
1408 to state %d\n", YYPREFIX, *yyssp, yytable[yyn]);
1410 if (yyssp >= yysslim && yygrowstack())
1414 yystate = yytable[yyn];
1415 *++yyssp = yytable[yyn];
1423 printf("%sdebug: error recovery discarding state %d\n",
1426 if (yyssp <= yyss) goto yyabort;
1434 if (yychar == 0) goto yyabort;
1439 if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
1440 if (!yys) yys = "illegal-symbol";
1441 printf("%sdebug: state %d, error recovery discards token %d (%s)\n",
1442 YYPREFIX, yystate, yychar, yys);
1452 printf("%sdebug: state %d, reducing by rule %d (%s)\n",
1453 YYPREFIX, yystate, yyn, yyrule[yyn]);
1457 yyval = yyvsp[1-yym];
1459 memset(&yyval, 0, sizeof yyval);
1464 { parse_result = (yyvsp[0].expr ? new analyzed_expr(yyvsp[0].expr) : 0); }
1468 { yyval.expr = yyvsp[0].expr; }
1472 { yyval.expr = new conditional_expr(yyvsp[-4].expr, yyvsp[-2].expr, yyvsp[0].expr); }
1480 { yyval.expr = yyvsp[0].expr; }
1484 { yyval.expr = yyvsp[0].expr; }
1488 { yyval.expr = new alternative_expr(yyvsp[-2].expr, yyvsp[0].expr); }
1492 { yyval.expr = new conditional_expr(yyvsp[-2].expr, yyvsp[0].expr, 0); }
1496 { yyval.expr = yyvsp[0].expr; }
1500 { yyval.expr = new list_expr(yyvsp[-1].expr, yyvsp[0].expr); }
1504 { yyval.expr = yyvsp[0].expr; }
1508 { yyval.expr = new substitute_expr(yyvsp[-2].expr, yyvsp[0].expr); }
1512 { yyval.expr = new at_expr; }
1517 yyval.expr = new literal_expr(literals.contents() + yyvsp[0].str.start,
1523 { yyval.expr = new field_expr(yyvsp[0].num, 0); }
1527 { yyval.expr = new field_expr(yyvsp[-1].num, yyvsp[0].num - 1); }
1532 switch (yyvsp[0].num) {
1537 yyval.expr = new format_expr(yyvsp[0].num);
1540 command_error("unrecognized format `%1'", char(yyvsp[0].num));
1541 yyval.expr = new format_expr('a');
1549 yyval.expr = new format_expr('0', yyvsp[0].dig.ndigits, yyvsp[0].dig.val);
1555 switch (yyvsp[-1].num) {
1557 yyval.expr = new map_expr(yyvsp[-4].expr, lowercase);
1560 yyval.expr = new map_expr(yyvsp[-4].expr, uppercase);
1563 yyval.expr = new map_expr(yyvsp[-4].expr, capitalize);
1566 yyval.expr = new map_expr(yyvsp[-4].expr, reverse_name);
1569 yyval.expr = new map_expr(yyvsp[-4].expr, abbreviate_name);
1572 yyval.expr = new extractor_expr(yyvsp[-4].expr, find_year, yyvsp[-2].num);
1575 yyval.expr = new extractor_expr(yyvsp[-4].expr, find_last_name, yyvsp[-2].num);
1578 yyval.expr = yyvsp[-4].expr;
1579 command_error("unknown function `%1'", char(yyvsp[-1].num));
1586 { yyval.expr = new truncate_expr(yyvsp[-2].expr, yyvsp[0].num); }
1590 { yyval.expr = new truncate_expr(yyvsp[-2].expr, -yyvsp[0].num); }
1594 { yyval.expr = new star_expr(yyvsp[-1].expr); }
1598 { yyval.expr = yyvsp[-1].expr; }
1602 { yyval.expr = new separator_expr(yyvsp[-1].expr); }
1610 { yyval.num = yyvsp[0].num; }
1614 { yyval.num = yyvsp[0].num; }
1618 { yyval.num = yyvsp[-1].num*10 + yyvsp[0].num; }
1622 { yyval.dig.ndigits = 1; yyval.dig.val = yyvsp[0].num; }
1626 { yyval.dig.ndigits = yyvsp[-1].dig.ndigits + 1; yyval.dig.val = yyvsp[-1].dig.val*10 + yyvsp[0].num; }
1640 #line 1640 "label.cpp"
1646 if (yystate == 0 && yym == 0)
1650 printf("%sdebug: after reduction, shifting from state 0 to\
1651 state %d\n", YYPREFIX, YYFINAL);
1658 if ((yychar = yylex()) < 0) yychar = 0;
1663 if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
1664 if (!yys) yys = "illegal-symbol";
1665 printf("%sdebug: state %d, reading %d (%s)\n",
1666 YYPREFIX, YYFINAL, yychar, yys);
1670 if (yychar == 0) goto yyaccept;
1673 if ((yyn = yygindex[yym]) && (yyn += yystate) >= 0 &&
1674 yyn <= YYTABLESIZE && yycheck[yyn] == yystate)
1675 yystate = yytable[yyn];
1677 yystate = yydgoto[yym];
1680 printf("%sdebug: after reduction, shifting from state %d \
1681 to state %d\n", YYPREFIX, *yyssp, yystate);
1683 if (yyssp >= yysslim && yygrowstack())
1687 *++yyssp = (short) yystate;
1692 yyerror("yacc stack overflow");