Merge from vendor branch LESS:
[dragonfly.git] / usr.bin / window / compress.c
1 /*
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Edward Wang at The University of California, Berkeley.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * @(#)compress.c       8.1 (Berkeley) 6/6/93
37  * $FreeBSD: src/usr.bin/window/compress.c,v 1.2.12.1 2001/05/17 09:45:00 obrien Exp $
38  * $DragonFly: src/usr.bin/window/compress.c,v 1.2 2003/06/17 04:29:33 dillon Exp $
39  */
40
41 #include "ww.h"
42 #include "tt.h"
43
44         /* special */
45 #include <stdio.h>
46 #include <fcntl.h>
47 #include <stdlib.h>
48 int cc_trace = 0;
49 FILE *cc_trace_fp;
50
51         /* tunable parameters */
52
53 int cc_reverse = 1;
54 int cc_sort = 0;
55 int cc_chop = 0;
56
57 int cc_token_max = 8;           /* <= TOKEN_MAX */
58 int cc_token_min = 2;           /* > tt.tt_put_token_cost */
59 int cc_npass0 = 1;
60 int cc_npass1 = 1;
61
62 int cc_bufsize = 1024 * 3;      /* XXX, or 80 * 24 * 2 */
63
64 int cc_ntoken = 8192;
65
66 #define cc_weight XXX
67 #ifndef cc_weight
68 int cc_weight = 0;
69 #endif
70
71 #define TOKEN_MAX 16
72
73 struct cc {
74         char string[TOKEN_MAX];
75         char length;
76         char flag;
77 #ifndef cc_weight
78         short weight;
79 #endif
80         long time;              /* time last seen */
81         short bcount;           /* count in this buffer */
82         short ccount;           /* count in compression */
83         short places;           /* places in the buffer */
84         short code;             /* token code */
85         struct cc *qforw, *qback;
86         struct cc *hforw, **hback;
87 };
88
89 short cc_thresholds[TOKEN_MAX + 1];
90 #define thresh(length) (cc_thresholds[length])
91 #define threshp(code, count, length) \
92         ((code) >= 0 || (short) (count) >= cc_thresholds[length])
93
94 #ifndef cc_weight
95 short cc_wthresholds[TOKEN_MAX + 1];
96 #define wthresh(length) (cc_wthresholds[length])
97 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
98 #else
99 #define wthreshp(weight, length) (0)
100 #endif
101
102 #ifndef cc_weight
103 short cc_wlimits[TOKEN_MAX + 1];
104 #define wlimit(length) (cc_wlimits[length])
105 #endif
106
107 #define put_token_score(length) ((length) - tt.tt_put_token_cost)
108
109 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
110 #define score_adjust(score, p) \
111         do { \
112                 int length = (p)->length; \
113                 int ccount = (p)->ccount; \
114                 if (threshp((p)->code, ccount, length) || \
115                     wthreshp((p)->weight, length)) /* XXX */ \
116                         (score) -= length - tt.tt_put_token_cost; \
117                 else \
118                         (score) += cc_score_adjustments[length][ccount]; \
119         } while (0)
120
121 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
122
123 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
124
125 #define qinsert(p1, p2) \
126         do { \
127                 register struct cc *forw = (p1)->qforw; \
128                 register struct cc *back = (p1)->qback; \
129                 back->qforw = forw; \
130                 forw->qback = back; \
131                 forw = (p2)->qforw; \
132                 (p1)->qforw = forw; \
133                 forw->qback = (p1); \
134                 (p2)->qforw = (p1); \
135                 (p1)->qback = (p2); \
136         } while (0)
137
138 #define qinsertq(q, p) \
139         ((q)->qforw == (q) ? 0 : \
140          ((q)->qback->qforw = (p)->qforw, \
141           (p)->qforw->qback = (q)->qback, \
142           (q)->qforw->qback = (p), \
143           (p)->qforw = (q)->qforw, \
144           (q)->qforw = (q), \
145           (q)->qback = (q)))
146
147 #define H               (14)
148 #define HSIZE           (1 << H)
149 #define hash(h, c)      ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
150
151 char *cc_buffer;
152 struct cc **cc_output;                  /* the output array */
153 short *cc_places[TOKEN_MAX + 1];
154 short *cc_hashcodes;                    /* for computing hashcodes */
155 struct cc **cc_htab;                    /* the hash table */
156 struct cc **cc_tokens;                  /* holds all the active tokens */
157 struct cc_undo {
158         struct cc **pos;
159         struct cc *val;
160 } *cc_undo;
161
162 long cc_time, cc_time0;
163
164 char *cc_tt_ob, *cc_tt_obe;
165
166 ccinit()
167 {
168         register i, j;
169         register struct cc *p;
170
171         if (tt.tt_token_max > cc_token_max)
172                 tt.tt_token_max = cc_token_max;
173         if (tt.tt_token_min < cc_token_min)
174                 tt.tt_token_min = cc_token_min;
175         if (tt.tt_token_min > tt.tt_token_max) {
176                 tt.tt_ntoken = 0;
177                 return 0;
178         }
179         if (tt.tt_ntoken > cc_ntoken / 2)       /* not likely */
180                 tt.tt_ntoken = cc_ntoken / 2;
181 #define C(x) (sizeof (x) / sizeof *(x))
182         for (i = 0; i < C(cc_thresholds); i++) {
183                 int h = i - tt.tt_put_token_cost;
184                 if (h > 0)
185                         cc_thresholds[i] =
186                                 (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
187                 else
188                         cc_thresholds[i] = 0;
189         }
190         for (i = 0; i < C(cc_score_adjustments); i++) {
191                 int t = cc_thresholds[i];
192                 for (j = 0; j < C(*cc_score_adjustments); j++) {
193                         if (j >= t)
194                                 cc_score_adjustments[i][j] =
195                                         - (i - tt.tt_put_token_cost);
196                         else if (j < t - 1)
197                                 cc_score_adjustments[i][j] = 0;
198                         else
199                                 /*
200                                  * cost now is
201                                  *      length * (ccount + 1)           a
202                                  * cost before was
203                                  *      set-token-cost + length +
204                                  *              ccount * put-token-cost b
205                                  * the score adjustment is (b - a)
206                                  */
207                                 cc_score_adjustments[i][j] =
208                                         tt.tt_set_token_cost + i +
209                                                 j * tt.tt_put_token_cost -
210                                                         i * (j + 1);
211                         if (j >= t)
212                                 cc_initial_scores[i][j] = 0;
213                         else
214                                 /*
215                                  * - (set-token-cost +
216                                  *      (length - put-token-cost) -
217                                  *      (length - put-token-cost) * ccount)
218                                  */
219                                 cc_initial_scores[i][j] =
220                                         - (tt.tt_set_token_cost +
221                                            (i - tt.tt_put_token_cost) -
222                                            (i - tt.tt_put_token_cost) * j);
223                 }
224         }
225 #ifndef cc_weight
226         for (i = 1; i < C(cc_wthresholds); i++) {
227                 cc_wthresholds[i] =
228                         ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
229                                 i / 5 + 1) *
230                                 cc_weight + 1;
231                 cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
232         }
233 #endif
234 #undef C
235         if ((cc_output = (struct cc **)
236              malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
237                 goto nomem;
238         if ((cc_hashcodes = (short *)
239              malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
240                 goto nomem;
241         if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
242                 goto nomem;
243         if ((cc_tokens = (struct cc **)
244              malloc((unsigned)
245                     (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
246                     sizeof *cc_tokens)) == 0)
247                 goto nomem;
248         if ((cc_undo = (struct cc_undo *)
249              malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
250                 goto nomem;
251         for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
252                 if ((cc_places[i] = (short *)
253                      malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
254                         goto nomem;
255         cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
256         cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
257         cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
258         cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
259         if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
260                 goto nomem;
261         for (i = 0; i < tt.tt_ntoken; i++) {
262                 p->code = i;
263                 p->time = -1;
264                 p->qback = cc_q0a.qback;
265                 p->qforw = &cc_q0a;
266                 p->qback->qforw = p;
267                 cc_q0a.qback = p;
268                 p++;
269         }
270         for (; i < cc_ntoken; i++) {
271                 p->code = -1;
272                 p->time = -1;
273                 p->qback = cc_q1a.qback;
274                 p->qforw = &cc_q1a;
275                 p->qback->qforw = p;
276                 cc_q1a.qback = p;
277                 p++;
278         }
279         cc_tt_ob = tt_ob;
280         cc_tt_obe = tt_obe;
281         if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
282                 goto nomem;
283         return 0;
284 nomem:
285         wwerrno = WWE_NOMEM;
286         return -1;
287 }
288
289 ccstart()
290 {
291         int ccflush();
292
293         ttflush();
294         tt_obp = tt_ob = cc_buffer;
295         tt_obe = tt_ob + cc_bufsize;
296         tt.tt_flush = ccflush;
297         if (cc_trace) {
298                 cc_trace_fp = fopen("window-trace", "a");
299                 (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
300         }
301         ccreset();
302 }
303
304 ccreset()
305 {
306         register struct cc *p;
307
308         bzero((char *) cc_htab, HSIZE * sizeof *cc_htab);
309         for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
310                 p->hback = 0;
311         for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
312                 p->hback = 0;
313 }
314
315 ccend()
316 {
317
318         ttflush();
319         tt_obp = tt_ob = cc_tt_ob;
320         tt_obe = cc_tt_obe;
321         tt.tt_flush = 0;
322         if (cc_trace_fp != NULL) {
323                 (void) fclose(cc_trace_fp);
324                 cc_trace_fp = NULL;
325         }
326 }
327
328 ccflush()
329 {
330         int bufsize = tt_obp - tt_ob;
331         int n;
332
333         if (tt_ob != cc_buffer)
334                 abort();
335         if (cc_trace_fp != NULL) {
336                 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
337                 (void) putc(-1, cc_trace_fp);
338         }
339         tt.tt_flush = 0;
340         (*tt.tt_compress)(1);
341         if (bufsize < tt.tt_token_min) {
342                 ttflush();
343                 goto out;
344         }
345         tt_obp = tt_ob = cc_tt_ob;
346         tt_obe = cc_tt_obe;
347         cc_time0 = cc_time;
348         cc_time += bufsize;
349         n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
350         cc_compress_phase(cc_output, bufsize, cc_tokens, n);
351         cc_output_phase(cc_buffer, cc_output, bufsize);
352         ttflush();
353         tt_obp = tt_ob = cc_buffer;
354         tt_obe = cc_buffer + cc_bufsize;
355 out:
356         (*tt.tt_compress)(0);
357         tt.tt_flush = ccflush;
358 }
359
360 cc_sweep_phase(buffer, bufsize, tokens)
361         char *buffer;
362         struct cc **tokens;
363 {
364         register struct cc **pp = tokens;
365         register i, n;
366 #ifdef STATS
367         int nn, ii;
368 #endif
369
370 #ifdef STATS
371         if (verbose >= 0)
372                 time_begin();
373         if (verbose > 0)
374                 printf("Sweep:");
375 #endif
376         cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
377 #ifdef STATS
378         ntoken_stat = 0;
379         nn = 0;
380         ii = 0;
381 #endif
382         for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
383 #ifdef STATS
384                 if (verbose > 0) {
385                         if (ii > 7) {
386                                 printf("\n      ");
387                                 ii = 0;
388                         }
389                         ii++;
390                         printf(" (%d", i);
391                         (void) fflush(stdout);
392                 }
393 #endif
394                 n = cc_sweep(buffer, bufsize, pp, i);
395                 pp += n;
396 #ifdef STATS
397                 if (verbose > 0) {
398                         if (--n > 0) {
399                                 printf(" %d", n);
400                                 nn += n;
401                         }
402                         putchar(')');
403                 }
404 #endif
405         }
406         qinsertq(&cc_q1b, &cc_q1a);
407 #ifdef STATS
408         if (verbose > 0)
409                 printf("\n       %d tokens, %d candidates\n",
410                         ntoken_stat, nn);
411         if (verbose >= 0)
412                 time_end();
413 #endif
414         return pp - tokens;
415 }
416
417 cc_sweep0(buffer, n, length)
418         char *buffer;
419 {
420         register char *p;
421         register short *hc;
422         register i;
423         register short c;
424         register short pc = tt.tt_padc;
425
426         /* n and length are at least 1 */
427         p = buffer++;
428         hc = cc_hashcodes;
429         i = n;
430         do {
431                 if ((*hc++ = *p++) == pc)
432                         hc[-1] = -1;
433         } while (--i);
434         while (--length) {
435                 p = buffer++;
436                 hc = cc_hashcodes;
437                 for (i = n--; --i;) {
438                         if ((c = *p++) == pc || *hc < 0)
439                                 c = -1;
440                         else
441                                 c = hash(*hc, c);
442                         *hc++ = c;
443                 }
444         }
445 }
446
447 cc_sweep(buffer, bufsize, tokens, length)
448         char *buffer;
449         struct cc **tokens;
450         register length;
451 {
452         register struct cc *p;
453         register char *cp;
454         register i;
455         short *hc;
456         short *places = cc_places[length];
457         struct cc **pp = tokens;
458         short threshold = thresh(length);
459 #ifndef cc_weight
460         short wthreshold = wthresh(length);
461         short limit = wlimit(length);
462 #endif
463         int time;
464         short pc = tt.tt_padc;
465
466         i = length - 1;
467         bufsize -= i;
468         cp = buffer + i;
469         hc = cc_hashcodes;
470         time = cc_time0;
471         for (i = 0; i < bufsize; i++, time++) {
472                 struct cc **h;
473
474                 {
475                         register short *hc1 = hc;
476                         register short c = *cp++;
477                         register short hh;
478                         if ((hh = *hc1) < 0 || c == pc) {
479                                 *hc1++ = -1;
480                                 hc = hc1;
481                                 continue;
482                         }
483                         h = cc_htab + (*hc1++ = hash(hh, c));
484                         hc = hc1;
485                 }
486                 for (p = *h; p != 0; p = p->hforw)
487                         if (p->length == (char) length) {
488                                 register char *p1 = p->string;
489                                 register char *p2 = cp - length;
490                                 register n = length;
491                                 do
492                                         if (*p1++ != *p2++)
493                                                 goto fail;
494                                 while (--n);
495                                 break;
496                         fail:;
497                         }
498                 if (p == 0) {
499                         p = cc_q1a.qback;
500                         if (p == &cc_q1a ||
501                             p->time >= cc_time0 && p->length == (char) length)
502                                 continue;
503                         if (p->hback != 0)
504                                 if ((*p->hback = p->hforw) != 0)
505                                         p->hforw->hback = p->hback;
506                         {
507                                 register char *p1 = p->string;
508                                 register char *p2 = cp - length;
509                                 register n = length;
510                                 do
511                                         *p1++ = *p2++;
512                                 while (--n);
513                         }
514                         p->length = length;
515 #ifndef cc_weight
516                         p->weight = cc_weight;
517 #endif
518                         p->time = time;
519                         p->bcount = 1;
520                         p->ccount = 0;
521                         p->flag = 0;
522                         if ((p->hforw = *h) != 0)
523                                 p->hforw->hback = &p->hforw;
524                         *h = p;
525                         p->hback = h;
526                         qinsert(p, &cc_q1a);
527                         places[i] = -1;
528                         p->places = i;
529 #ifdef STATS
530                         ntoken_stat++;
531 #endif
532                 } else if (p->time < cc_time0) {
533 #ifndef cc_weight
534                         if ((p->weight += p->time - time) < 0)
535                                 p->weight = cc_weight;
536                         else if ((p->weight += cc_weight) > limit)
537                                 p->weight = limit;
538 #endif
539                         p->time = time;
540                         p->bcount = 1;
541                         p->ccount = 0;
542                         if (p->code >= 0) {
543                                 p->flag = 1;
544                                 *pp++ = p;
545                         } else
546 #ifndef cc_weight
547                         if (p->weight >= wthreshold) {
548                                 p->flag = 1;
549                                 *pp++ = p;
550                                 qinsert(p, &cc_q1b);
551                         } else
552 #endif
553                         {
554                                 p->flag = 0;
555                                 qinsert(p, &cc_q1a);
556                         }
557                         places[i] = -1;
558                         p->places = i;
559 #ifdef STATS
560                         ntoken_stat++;
561 #endif
562                 } else if (p->time + length > time) {
563                         /*
564                          * overlapping token, don't count as two and
565                          * don't update time, but do adjust weight to offset
566                          * the difference
567                          */
568 #ifndef cc_weight
569                         if (cc_weight != 0) {   /* XXX */
570                                 p->weight += time - p->time;
571                                 if (!p->flag && p->weight >= wthreshold) {
572                                         p->flag = 1;
573                                         *pp++ = p;
574                                         qinsert(p, &cc_q1b);
575                                 }
576                         }
577 #endif
578                         places[i] = p->places;
579                         p->places = i;
580                 } else {
581 #ifndef cc_weight
582                         if ((p->weight += p->time - time) < 0)
583                                 p->weight = cc_weight;
584                         else if ((p->weight += cc_weight) > limit)
585                                 p->weight = limit;
586 #endif
587                         p->time = time;
588                         p->bcount++;
589                         if (!p->flag &&
590                             /* code must be < 0 if flag false here */
591                             (p->bcount >= threshold
592 #ifndef cc_weight
593                              || p->weight >= wthreshold
594 #endif
595                              )) {
596                                 p->flag = 1;
597                                 *pp++ = p;
598                                 qinsert(p, &cc_q1b);
599                         }
600                         places[i] = p->places;
601                         p->places = i;
602                 }
603         }
604         if ((i = pp - tokens) > 0) {
605                 *pp = 0;
606                 if (cc_reverse)
607                         cc_sweep_reverse(tokens, places);
608                 if (cc_sort && i > 1) {
609                         int cc_token_compare();
610                         qsort((char *) tokens, i, sizeof *tokens,
611                               cc_token_compare);
612                 }
613                 if (cc_chop) {
614                         if ((i = i * cc_chop / 100) == 0)
615                                 i = 1;
616                         tokens[i] = 0;
617                 }
618                 i++;
619         }
620         return i;
621 }
622
623 cc_sweep_reverse(pp, places)
624         register struct cc **pp;
625         register short *places;
626 {
627         register struct cc *p;
628         register short front, back, t;
629
630         while ((p = *pp++) != 0) {
631                 back = -1;
632                 t = p->places;
633                 /* the list is never empty */
634                 do {
635                         front = places[t];
636                         places[t] = back;
637                         back = t;
638                 } while ((t = front) >= 0);
639                 p->places = back;
640         }
641 }
642
643 cc_compress_phase(output, bufsize, tokens, ntoken)
644         struct cc **output;
645         struct cc **tokens;
646 {
647         register i;
648
649         bzero((char *) output, bufsize * sizeof *output);
650         for (i = 0; i < cc_npass0; i++)
651                 cc_compress_phase1(output, tokens, ntoken, 0);
652         for (i = 0; i < cc_npass1; i++)
653                 cc_compress_phase1(output, tokens, ntoken, 1);
654         cc_compress_cleanup(output, bufsize);
655 }
656
657 cc_compress_phase1(output, tokens, ntoken, flag)
658         register struct cc **output;
659         struct cc **tokens;
660 {
661         register struct cc **pp;
662 #ifdef STATS
663         register int i = 0;
664         int nt = 0, cc = 0, nc = 0;
665 #endif
666
667 #ifdef STATS
668         if (verbose >= 0)
669                 time_begin();
670         if (verbose > 0)
671                 printf("Compress:");
672 #endif
673         pp = tokens;
674         while (pp < tokens + ntoken) {
675 #ifdef STATS
676                 if (verbose > 0) {
677                         ntoken_stat = 0;
678                         ccount_stat = 0;
679                         ncover_stat = 0;
680                         if (i > 2) {
681                                 printf("\n         ");
682                                 i = 0;
683                         }
684                         i++;
685                         printf(" (%d", (*pp)->length);
686                         (void) fflush(stdout);
687                 }
688 #endif
689                 pp += cc_compress(output, pp, flag);
690 #ifdef STATS
691                 if (verbose > 0) {
692                         printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
693                                ncover_stat);
694                         nt += ntoken_stat;
695                         cc += ccount_stat;
696                         nc += ncover_stat;
697                 }
698 #endif
699         }
700 #ifdef STATS
701         if (verbose > 0)
702                 printf("\n   total: (%dt %du %dc)\n", nt, cc, nc);
703         if (verbose >= 0)
704                 time_end();
705 #endif
706 }
707
708 cc_compress_cleanup(output, bufsize)
709         register struct cc **output;
710 {
711         register struct cc **end;
712
713         /* the previous output phase may have been interrupted */
714         qinsertq(&cc_q0b, &cc_q0a);
715         for (end = output + bufsize; output < end;) {
716                 register struct cc *p;
717                 register length;
718                 if ((p = *output) == 0) {
719                         output++;
720                         continue;
721                 }
722                 length = p->length;
723                 if (!p->flag) {
724                 } else if (p->code >= 0) {
725                         qinsert(p, &cc_q0b);
726                         p->flag = 0;
727                 } else if (p->ccount == 0) {
728                         *output = 0;
729                 } else if (p->ccount >= thresh(length)
730 #ifndef cc_weight
731                            || wthreshp(p->weight, length)
732 #endif
733                            ) {
734                         p->flag = 0;
735                 } else {
736                         p->ccount = 0;
737                         *output = 0;
738                 }
739                 output += length;
740         }
741 }
742
743 cc_compress(output, tokens, flag)
744         struct cc **output;
745         struct cc **tokens;
746         char flag;
747 {
748         struct cc **pp = tokens;
749         register struct cc *p = *pp++;
750         int length = p->length;
751         int threshold = thresh(length);
752 #ifndef cc_weight
753         short wthreshold = wthresh(length);
754 #endif
755         short *places = cc_places[length];
756         int *initial_scores = cc_initial_scores[length];
757         int initial_score0 = put_token_score(length);
758
759         do {
760                 int score;
761                 register struct cc_undo *undop;
762                 int ccount;
763 #ifdef STATS
764                 int ncover;
765 #endif
766                 int i;
767
768                 ccount = p->ccount;
769                 if ((short) ccount >= p->bcount)
770                         continue;
771                 if (p->code >= 0 || ccount >= threshold)
772                         score = 0;
773 #ifndef cc_weight
774                 else if (p->weight >= wthreshold)
775                         /* allow one fewer match than normal */
776                         /* XXX, should adjust for ccount */
777                         score = - tt.tt_set_token_cost;
778 #endif
779                 else
780                         score = initial_scores[ccount];
781                 undop = cc_undo;
782 #ifdef STATS
783                 ncover = 0;
784 #endif
785                 for (i = p->places; i >= 0; i = places[i]) {
786                         register struct cc **jp;
787                         register struct cc *x;
788                         register struct cc **ip = output + i;
789                         register score0 = initial_score0;
790                         struct cc **iip = ip + length;
791                         struct cc_undo *undop1 = undop;
792
793                         if ((x = *(jp = ip)) != 0)
794                                 goto z;
795                         while (--jp >= output)
796                                 if ((x = *jp) != 0) {
797                                         if (jp + x->length > ip)
798                                                 goto z;
799                                         break;
800                                 }
801                         jp = ip + 1;
802                         while (jp < iip) {
803                                 if ((x = *jp) == 0) {
804                                         jp++;
805                                         continue;
806                                 }
807                         z:
808                                 if (x == p)
809                                         goto undo;
810 #ifdef STATS
811                                 ncover++;
812 #endif
813                                 undop->pos = jp;
814                                 undop->val = x;
815                                 undop++;
816                                 *jp = 0;
817                                 x->ccount--;
818                                 score_adjust(score0, x);
819                                 if (score0 < 0 && flag)
820                                         goto undo;
821                                 jp += x->length;
822                         }
823                         undop->pos = ip;
824                         undop->val = 0;
825                         undop++;
826                         *ip = p;
827                         ccount++;
828                         score += score0;
829                         continue;
830                 undo:
831                         while (--undop >= undop1)
832                                 if (*undop->pos = x = undop->val)
833                                         x->ccount++;
834                         undop++;
835                 }
836                 if (score > 0) {
837 #ifdef STATS
838                         ccount_stat += ccount - p->ccount;
839                         ntoken_stat++;
840                         ncover_stat += ncover;
841 #endif
842                         p->ccount = ccount;
843                 } else {
844                         register struct cc_undo *u = cc_undo;
845                         while (--undop >= u) {
846                                 register struct cc *x;
847                                 if (*undop->pos = x = undop->val)
848                                         x->ccount++;
849                         }
850                 }
851         } while ((p = *pp++) != 0);
852         return pp - tokens;
853 }
854
855 cc_output_phase(buffer, output, bufsize)
856         register char *buffer;
857         register struct cc **output;
858         register bufsize;
859 {
860         register i;
861         register struct cc *p, *p1;
862
863         for (i = 0; i < bufsize;) {
864                 if ((p = output[i]) == 0) {
865                         ttputc(buffer[i]);
866                         i++;
867                 } else if (p->code >= 0) {
868                         if (--p->ccount == 0)
869                                 qinsert(p, &cc_q0a);
870                         (*tt.tt_put_token)(p->code, p->string, p->length);
871                         wwntokuse++;
872                         wwntoksave += put_token_score(p->length);
873                         i += p->length;
874                 } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
875                         p->code = p1->code;
876                         p1->code = -1;
877                         qinsert(p1, &cc_q1a);
878                         if (--p->ccount == 0)
879                                 qinsert(p, &cc_q0a);
880                         else
881                                 qinsert(p, &cc_q0b);
882                         (*tt.tt_set_token)(p->code, p->string, p->length);
883                         wwntokdef++;
884                         wwntoksave -= tt.tt_set_token_cost;
885                         i += p->length;
886                 } else {
887                         p->ccount--;
888                         ttwrite(p->string, p->length);
889                         wwntokbad++;
890                         i += p->length;
891                 }
892         }
893         wwntokc += bufsize;
894 }
895
896 cc_token_compare(p1, p2)
897         struct cc **p1, **p2;
898 {
899         return (*p2)->bcount - (*p1)->bcount;
900 }