Sweep-fix comparing pointers with 0 (and assigning 0 to pointers).
[dragonfly.git] / usr.bin / window / compress.c
1 /*      @(#)compress.c  8.1 (Berkeley) 6/6/93   */
2 /*      $NetBSD: compress.c,v 1.7 2009/04/14 08:50:06 lukem Exp $       */
3
4 /*
5  * Copyright (c) 1989, 1993
6  *      The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Edward Wang at The University of California, Berkeley.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35
36 #include <fcntl.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include "defs.h"
41 #include "tt.h"
42
43         /* special */
44 int cc_trace = 0;
45 FILE *cc_trace_fp;
46
47         /* tunable parameters */
48
49 int cc_reverse = 1;
50 int cc_sort = 0;
51 int cc_chop = 0;
52
53 int cc_token_max = 8;           /* <= TOKEN_MAX */
54 int cc_token_min = 2;           /* > tt.tt_put_token_cost */
55 int cc_npass0 = 1;
56 int cc_npass1 = 1;
57
58 int cc_bufsize = 1024 * 3;      /* XXX, or 80 * 24 * 2 */
59
60 int cc_ntoken = 8192;
61
62 #define cc_weight XXX
63 #ifndef cc_weight
64 int cc_weight = 0;
65 #endif
66
67 #define TOKEN_MAX 16
68
69 struct cc {
70         char string[TOKEN_MAX];
71         char length;
72         char flag;
73 #ifndef cc_weight
74         short weight;
75 #endif
76         long time;              /* time last seen */
77         short bcount;           /* count in this buffer */
78         short ccount;           /* count in compression */
79         short places;           /* places in the buffer */
80         short code;             /* token code */
81         struct cc *qforw, *qback;
82         struct cc *hforw, **hback;
83 };
84
85 short cc_thresholds[TOKEN_MAX + 1];
86 #define thresh(length) (cc_thresholds[length])
87 #define threshp(code, count, length) \
88         ((code) >= 0 || (short) (count) >= cc_thresholds[length])
89
90 #ifndef cc_weight
91 short cc_wthresholds[TOKEN_MAX + 1];
92 #define wthresh(length) (cc_wthresholds[length])
93 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
94 #else
95 #define wthreshp(weight, length) (0)
96 #endif
97
98 #ifndef cc_weight
99 short cc_wlimits[TOKEN_MAX + 1];
100 #define wlimit(length) (cc_wlimits[length])
101 #endif
102
103 #define put_token_score(length) ((length) - tt.tt_put_token_cost)
104
105 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
106 #define score_adjust(score, p) \
107         do { \
108                 int _length = (p)->length; \
109                 int _ccount = (p)->ccount; \
110                 if (threshp((p)->code, _ccount, _length) || \
111                     wthreshp((p)->weight, _length)) /* XXX */ \
112                         (score) -= _length - tt.tt_put_token_cost; \
113                 else \
114                         (score) += cc_score_adjustments[_length][_ccount]; \
115         } while (0)
116
117 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
118
119 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
120
121 #define qinsert(p1, p2) \
122         do { \
123                 struct cc *forw = (p1)->qforw; \
124                 struct cc *back = (p1)->qback; \
125                 back->qforw = forw; \
126                 forw->qback = back; \
127                 forw = (p2)->qforw; \
128                 (p1)->qforw = forw; \
129                 forw->qback = (p1); \
130                 (p2)->qforw = (p1); \
131                 (p1)->qback = (p2); \
132         } while (0)
133
134 #define qinsertq(q, p) \
135         ((q)->qforw == (q) ? 0 : \
136          ((q)->qback->qforw = (p)->qforw, \
137           (p)->qforw->qback = (q)->qback, \
138           (q)->qforw->qback = (p), \
139           (p)->qforw = (q)->qforw, \
140           (q)->qforw = (q), \
141           (q)->qback = (q)))
142
143 #define H               (14)
144 #define HSIZE           (1 << H)
145 #define hash(h, c)      (((((h) >> (H - 8)) | (h) << 8) ^ (c)) & (HSIZE - 1))
146
147 char *cc_buffer;
148 struct cc **cc_output;                  /* the output array */
149 short *cc_places[TOKEN_MAX + 1];
150 short *cc_hashcodes;                    /* for computing hashcodes */
151 struct cc **cc_htab;                    /* the hash table */
152 struct cc **cc_tokens;                  /* holds all the active tokens */
153 struct cc_undo {
154         struct cc **pos;
155         struct cc *val;
156 } *cc_undo;
157
158 long cc_time, cc_time0;
159
160 char *cc_tt_ob, *cc_tt_obe;
161
162 int     cc_compress(struct cc **, struct cc **, char);
163 void    cc_compress_cleanup(struct cc **, int);
164 void    cc_compress_phase(struct cc **, int, struct cc **, int);
165 void    cc_compress_phase1(struct cc **, struct cc **, int, int);
166 void    cc_output_phase(char *, struct cc **, int);
167 int     cc_sweep(char *, int, struct cc **, int);
168 void    cc_sweep0(char *, int, int);
169 int     cc_sweep_phase(char *, int, struct cc **);
170 void    cc_sweep_reverse(struct cc **, short *);
171 int     cc_token_compare(const void *, const void *);
172
173 int
174 ccinit(void)
175 {
176         int i, j;
177         struct cc *p;
178
179         if (tt.tt_token_max > cc_token_max)
180                 tt.tt_token_max = cc_token_max;
181         if (tt.tt_token_min < cc_token_min)
182                 tt.tt_token_min = cc_token_min;
183         if (tt.tt_token_min > tt.tt_token_max) {
184                 tt.tt_ntoken = 0;
185                 return 0;
186         }
187         if (tt.tt_ntoken > cc_ntoken / 2)       /* not likely */
188                 tt.tt_ntoken = cc_ntoken / 2;
189 #define C(x) (sizeof (x) / sizeof *(x))
190         for (i = 0; i < (int)C(cc_thresholds); i++) {
191                 int h = i - tt.tt_put_token_cost;
192                 if (h > 0)
193                         cc_thresholds[i] =
194                                 (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
195                 else
196                         cc_thresholds[i] = 0;
197         }
198         for (i = 0; i < (int)C(cc_score_adjustments); i++) {
199                 int t = cc_thresholds[i];
200                 for (j = 0; j < (int)C(*cc_score_adjustments); j++) {
201                         if (j >= t)
202                                 cc_score_adjustments[i][j] =
203                                         - (i - tt.tt_put_token_cost);
204                         else if (j < t - 1)
205                                 cc_score_adjustments[i][j] = 0;
206                         else
207                                 /*
208                                  * cost now is
209                                  *      length * (ccount + 1)           a
210                                  * cost before was
211                                  *      set-token-cost + length +
212                                  *              ccount * put-token-cost b
213                                  * the score adjustment is (b - a)
214                                  */
215                                 cc_score_adjustments[i][j] =
216                                         tt.tt_set_token_cost + i +
217                                                 j * tt.tt_put_token_cost -
218                                                         i * (j + 1);
219                         if (j >= t)
220                                 cc_initial_scores[i][j] = 0;
221                         else
222                                 /*
223                                  * - (set-token-cost +
224                                  *      (length - put-token-cost) -
225                                  *      (length - put-token-cost) * ccount)
226                                  */
227                                 cc_initial_scores[i][j] =
228                                         - (tt.tt_set_token_cost +
229                                            (i - tt.tt_put_token_cost) -
230                                            (i - tt.tt_put_token_cost) * j);
231                 }
232         }
233 #ifndef cc_weight
234         for (i = 1; i < C(cc_wthresholds); i++) {
235                 cc_wthresholds[i] =
236                         ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
237                                 i / 5 + 1) *
238                                 cc_weight + 1;
239                 cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
240         }
241 #endif
242 #undef C
243         if ((cc_output = (struct cc **)
244              malloc((unsigned) cc_bufsize * sizeof *cc_output)) == NULL)
245                 goto nomem;
246         if ((cc_hashcodes = (short *)
247              malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == NULL)
248                 goto nomem;
249         if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == NULL)
250                 goto nomem;
251         if ((cc_tokens = (struct cc **)
252              malloc((unsigned)
253                     (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
254                     sizeof *cc_tokens)) == NULL)
255                 goto nomem;
256         if ((cc_undo = (struct cc_undo *)
257              malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == NULL)
258                 goto nomem;
259         for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
260                 if ((cc_places[i] = (short *)
261                      malloc((unsigned) cc_bufsize * sizeof **cc_places)) == NULL)
262                         goto nomem;
263         cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
264         cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
265         cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
266         cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
267         if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == NULL)
268                 goto nomem;
269         for (i = 0; i < tt.tt_ntoken; i++) {
270                 p->code = i;
271                 p->time = -1;
272                 p->qback = cc_q0a.qback;
273                 p->qforw = &cc_q0a;
274                 p->qback->qforw = p;
275                 cc_q0a.qback = p;
276                 p++;
277         }
278         for (; i < cc_ntoken; i++) {
279                 p->code = -1;
280                 p->time = -1;
281                 p->qback = cc_q1a.qback;
282                 p->qforw = &cc_q1a;
283                 p->qback->qforw = p;
284                 cc_q1a.qback = p;
285                 p++;
286         }
287         cc_tt_ob = tt_ob;
288         cc_tt_obe = tt_obe;
289         if ((cc_buffer = malloc((unsigned) cc_bufsize)) == NULL)
290                 goto nomem;
291         return 0;
292 nomem:
293         wwerrno = WWE_NOMEM;
294         return -1;
295 }
296
297 void
298 ccstart(void)
299 {
300         ttflush();
301         tt_obp = tt_ob = cc_buffer;
302         tt_obe = tt_ob + cc_bufsize;
303         tt.tt_flush = ccflush;
304         if (cc_trace) {
305                 cc_trace_fp = fopen("window-trace", "a");
306                 (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
307         }
308         ccreset();
309 }
310
311 void
312 ccreset(void)
313 {
314         struct cc *p;
315
316         memset((char *) cc_htab, 0, HSIZE * sizeof *cc_htab);
317         for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
318                 p->hback = NULL;
319         for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
320                 p->hback = NULL;
321 }
322
323 void
324 ccend(void)
325 {
326
327         ttflush();
328         tt_obp = tt_ob = cc_tt_ob;
329         tt_obe = cc_tt_obe;
330         tt.tt_flush = 0;
331         if (cc_trace_fp != NULL) {
332                 (void) fclose(cc_trace_fp);
333                 cc_trace_fp = NULL;
334         }
335 }
336
337 void
338 ccflush(void)
339 {
340         int bufsize = tt_obp - tt_ob;
341         int n;
342
343         if (tt_ob != cc_buffer)
344                 abort();
345         if (cc_trace_fp != NULL) {
346                 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
347                 (void) putc(-1, cc_trace_fp);
348         }
349         tt.tt_flush = 0;
350         (*tt.tt_compress)(1);
351         if (bufsize < tt.tt_token_min) {
352                 ttflush();
353                 goto out;
354         }
355         tt_obp = tt_ob = cc_tt_ob;
356         tt_obe = cc_tt_obe;
357         cc_time0 = cc_time;
358         cc_time += bufsize;
359         n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
360         cc_compress_phase(cc_output, bufsize, cc_tokens, n);
361         cc_output_phase(cc_buffer, cc_output, bufsize);
362         ttflush();
363         tt_obp = tt_ob = cc_buffer;
364         tt_obe = cc_buffer + cc_bufsize;
365 out:
366         (*tt.tt_compress)(0);
367         tt.tt_flush = ccflush;
368 }
369
370 int
371 cc_sweep_phase(char *buffer, int bufsize, struct cc **tokens)
372 {
373         struct cc **pp = tokens;
374         int i, n;
375 #ifdef STATS
376         int nn, ii;
377 #endif
378
379 #ifdef STATS
380         if (verbose >= 0)
381                 time_begin();
382         if (verbose > 0)
383                 printf("Sweep:");
384 #endif
385         cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
386 #ifdef STATS
387         ntoken_stat = 0;
388         nn = 0;
389         ii = 0;
390 #endif
391         for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
392 #ifdef STATS
393                 if (verbose > 0) {
394                         if (ii > 7) {
395                                 printf("\n      ");
396                                 ii = 0;
397                         }
398                         ii++;
399                         printf(" (%d", i);
400                         (void) fflush(stdout);
401                 }
402 #endif
403                 n = cc_sweep(buffer, bufsize, pp, i);
404                 pp += n;
405 #ifdef STATS
406                 if (verbose > 0) {
407                         if (--n > 0) {
408                                 printf(" %d", n);
409                                 nn += n;
410                         }
411                         putchar(')');
412                 }
413 #endif
414         }
415         qinsertq(&cc_q1b, &cc_q1a);
416 #ifdef STATS
417         if (verbose > 0)
418                 printf("\n       %d tokens, %d candidates\n",
419                         ntoken_stat, nn);
420         if (verbose >= 0)
421                 time_end();
422 #endif
423         return pp - tokens;
424 }
425
426 void
427 cc_sweep0(char *buffer, int n, int length)
428 {
429         char *p;
430         short *hc;
431         int i;
432         short c;
433         short pc = tt.tt_padc;
434
435         /* n and length are at least 1 */
436         p = buffer++;
437         hc = cc_hashcodes;
438         i = n;
439         do {
440                 if ((*hc++ = *p++) == pc)
441                         hc[-1] = -1;
442         } while (--i);
443         while (--length) {
444                 p = buffer++;
445                 hc = cc_hashcodes;
446                 for (i = n--; --i;) {
447                         if ((c = *p++) == pc || *hc < 0)
448                                 c = -1;
449                         else
450                                 c = hash(*hc, c);
451                         *hc++ = c;
452                 }
453         }
454 }
455
456 int
457 cc_sweep(char *buffer, int bufsize, struct cc **tokens, int length)
458 {
459         struct cc *p;
460         char *cp;
461         int i;
462         short *hc;
463         short *places = cc_places[length];
464         struct cc **pp = tokens;
465         short threshold = thresh(length);
466 #ifndef cc_weight
467         short wthreshold = wthresh(length);
468         short limit = wlimit(length);
469 #endif
470         int time0;
471         short pc = tt.tt_padc;
472
473         i = length - 1;
474         bufsize -= i;
475         cp = buffer + i;
476         hc = cc_hashcodes;
477         time0 = cc_time0;
478         for (i = 0; i < bufsize; i++, time0++) {
479                 struct cc **h;
480
481                 {
482                         short *hc1 = hc;
483                         short c = *cp++;
484                         short hh;
485                         if ((hh = *hc1) < 0 || c == pc) {
486                                 *hc1++ = -1;
487                                 hc = hc1;
488                                 continue;
489                         }
490                         h = cc_htab + (*hc1++ = hash(hh, c));
491                         hc = hc1;
492                 }
493                 for (p = *h; p != NULL; p = p->hforw)
494                         if (p->length == (char) length) {
495                                 char *p1 = p->string;
496                                 char *p2 = cp - length;
497                                 int n = length;
498                                 do
499                                         if (*p1++ != *p2++)
500                                                 goto fail;
501                                 while (--n);
502                                 break;
503                         fail:;
504                         }
505                 if (p == NULL) {
506                         p = cc_q1a.qback;
507                         if (p == &cc_q1a ||
508                             (p->time >= cc_time0 && p->length == (char) length))
509                                 continue;
510                         if (p->hback != NULL)
511                                 if ((*p->hback = p->hforw) != NULL)
512                                         p->hforw->hback = p->hback;
513                         {
514                                 char *p1 = p->string;
515                                 char *p2 = cp - length;
516                                 int n = length;
517                                 do
518                                         *p1++ = *p2++;
519                                 while (--n);
520                         }
521                         p->length = length;
522 #ifndef cc_weight
523                         p->weight = cc_weight;
524 #endif
525                         p->time = time0;
526                         p->bcount = 1;
527                         p->ccount = 0;
528                         p->flag = 0;
529                         if ((p->hforw = *h) != NULL)
530                                 p->hforw->hback = &p->hforw;
531                         *h = p;
532                         p->hback = h;
533                         qinsert(p, &cc_q1a);
534                         places[i] = -1;
535                         p->places = i;
536 #ifdef STATS
537                         ntoken_stat++;
538 #endif
539                 } else if (p->time < cc_time0) {
540 #ifndef cc_weight
541                         if ((p->weight += p->time - time0) < 0)
542                                 p->weight = cc_weight;
543                         else if ((p->weight += cc_weight) > limit)
544                                 p->weight = limit;
545 #endif
546                         p->time = time0;
547                         p->bcount = 1;
548                         p->ccount = 0;
549                         if (p->code >= 0) {
550                                 p->flag = 1;
551                                 *pp++ = p;
552                         } else
553 #ifndef cc_weight
554                         if (p->weight >= wthreshold) {
555                                 p->flag = 1;
556                                 *pp++ = p;
557                                 qinsert(p, &cc_q1b);
558                         } else
559 #endif
560                         {
561                                 p->flag = 0;
562                                 qinsert(p, &cc_q1a);
563                         }
564                         places[i] = -1;
565                         p->places = i;
566 #ifdef STATS
567                         ntoken_stat++;
568 #endif
569                 } else if (p->time + length > time0) {
570                         /*
571                          * overlapping token, don't count as two and
572                          * don't update time0, but do adjust weight to offset
573                          * the difference
574                          */
575 #ifndef cc_weight
576                         if (cc_weight != 0) {   /* XXX */
577                                 p->weight += time0 - p->time;
578                                 if (!p->flag && p->weight >= wthreshold) {
579                                         p->flag = 1;
580                                         *pp++ = p;
581                                         qinsert(p, &cc_q1b);
582                                 }
583                         }
584 #endif
585                         places[i] = p->places;
586                         p->places = i;
587                 } else {
588 #ifndef cc_weight
589                         if ((p->weight += p->time - time0) < 0)
590                                 p->weight = cc_weight;
591                         else if ((p->weight += cc_weight) > limit)
592                                 p->weight = limit;
593 #endif
594                         p->time = time0;
595                         p->bcount++;
596                         if (!p->flag &&
597                             /* code must be < 0 if flag false here */
598                             (p->bcount >= threshold
599 #ifndef cc_weight
600                              || p->weight >= wthreshold
601 #endif
602                              )) {
603                                 p->flag = 1;
604                                 *pp++ = p;
605                                 qinsert(p, &cc_q1b);
606                         }
607                         places[i] = p->places;
608                         p->places = i;
609                 }
610         }
611         if ((i = pp - tokens) > 0) {
612                 *pp = NULL;
613                 if (cc_reverse)
614                         cc_sweep_reverse(tokens, places);
615                 if (cc_sort && i > 1) {
616                         qsort((char *) tokens, i, sizeof *tokens,
617                               cc_token_compare);
618                 }
619                 if (cc_chop) {
620                         if ((i = i * cc_chop / 100) == 0)
621                                 i = 1;
622                         tokens[i] = NULL;
623                 }
624                 i++;
625         }
626         return i;
627 }
628
629 void
630 cc_sweep_reverse(struct cc **pp, short int *places)
631 {
632         struct cc *p;
633         short frnt, back, t;
634
635         while ((p = *pp++) != NULL) {
636                 back = -1;
637                 t = p->places;
638                 /* the list is never empty */
639                 do {
640                         frnt = places[t];
641                         places[t] = back;
642                         back = t;
643                 } while ((t = frnt) >= 0);
644                 p->places = back;
645         }
646 }
647
648 void
649 cc_compress_phase(struct cc **output, int bufsize, struct cc **tokens, int ntoken)
650 {
651         int i;
652
653         memset((char *) output, 0, bufsize * sizeof *output);
654         for (i = 0; i < cc_npass0; i++)
655                 cc_compress_phase1(output, tokens, ntoken, 0);
656         for (i = 0; i < cc_npass1; i++)
657                 cc_compress_phase1(output, tokens, ntoken, 1);
658         cc_compress_cleanup(output, bufsize);
659 }
660
661 void
662 cc_compress_phase1(struct cc **output, struct cc **tokens, int ntoken, int flag)
663 {
664         struct cc **pp;
665 #ifdef STATS
666         int i = 0;
667         int nt = 0, cc = 0, nc = 0;
668 #endif
669
670 #ifdef STATS
671         if (verbose >= 0)
672                 time_begin();
673         if (verbose > 0)
674                 printf("Compress:");
675 #endif
676         pp = tokens;
677         while (pp < tokens + ntoken) {
678 #ifdef STATS
679                 if (verbose > 0) {
680                         ntoken_stat = 0;
681                         ccount_stat = 0;
682                         ncover_stat = 0;
683                         if (i > 2) {
684                                 printf("\n         ");
685                                 i = 0;
686                         }
687                         i++;
688                         printf(" (%d", (*pp)->length);
689                         (void) fflush(stdout);
690                 }
691 #endif
692                 pp += cc_compress(output, pp, flag);
693 #ifdef STATS
694                 if (verbose > 0) {
695                         printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
696                                ncover_stat);
697                         nt += ntoken_stat;
698                         cc += ccount_stat;
699                         nc += ncover_stat;
700                 }
701 #endif
702         }
703 #ifdef STATS
704         if (verbose > 0)
705                 printf("\n   total: (%dt %du %dc)\n", nt, cc, nc);
706         if (verbose >= 0)
707                 time_end();
708 #endif
709 }
710
711 void
712 cc_compress_cleanup(struct cc **output, int bufsize)
713 {
714         struct cc **end;
715
716         /* the previous output phase may have been interrupted */
717         qinsertq(&cc_q0b, &cc_q0a);
718         for (end = output + bufsize; output < end;) {
719                 struct cc *p;
720                 int length;
721                 if ((p = *output) == NULL) {
722                         output++;
723                         continue;
724                 }
725                 length = p->length;
726                 if (!p->flag) {
727                 } else if (p->code >= 0) {
728                         qinsert(p, &cc_q0b);
729                         p->flag = 0;
730                 } else if (p->ccount == 0) {
731                         *output = NULL;
732                 } else if (p->ccount >= thresh(length)
733 #ifndef cc_weight
734                            || wthreshp(p->weight, length)
735 #endif
736                            ) {
737                         p->flag = 0;
738                 } else {
739                         p->ccount = 0;
740                         *output = NULL;
741                 }
742                 output += length;
743         }
744 }
745
746 int
747 cc_compress(struct cc **output, struct cc **tokens, char flag)
748 {
749         struct cc **pp = tokens;
750         struct cc *p = *pp++;
751         int length = p->length;
752         int threshold = thresh(length);
753 #ifndef cc_weight
754         short wthreshold = wthresh(length);
755 #endif
756         short *places = cc_places[length];
757         int *initial_scores = cc_initial_scores[length];
758         int initial_score0 = put_token_score(length);
759
760         do {
761                 int score;
762                 struct cc_undo *undop;
763                 int ccount;
764 #ifdef STATS
765                 int ncover;
766 #endif
767                 int i;
768
769                 ccount = p->ccount;
770                 if ((short) ccount >= p->bcount)
771                         continue;
772                 if (p->code >= 0 || ccount >= threshold)
773                         score = 0;
774 #ifndef cc_weight
775                 else if (p->weight >= wthreshold)
776                         /* allow one fewer match than normal */
777                         /* XXX, should adjust for ccount */
778                         score = - tt.tt_set_token_cost;
779 #endif
780                 else
781                         score = initial_scores[ccount];
782                 undop = cc_undo;
783 #ifdef STATS
784                 ncover = 0;
785 #endif
786                 for (i = p->places; i >= 0; i = places[i]) {
787                         struct cc **jp;
788                         struct cc *x;
789                         struct cc **ip = output + i;
790                         int score0 = initial_score0;
791                         struct cc **iip = ip + length;
792                         struct cc_undo *undop1 = undop;
793
794                         if ((x = *(jp = ip)) != NULL)
795                                 goto z;
796                         while (--jp >= output)
797                                 if ((x = *jp) != NULL) {
798                                         if (jp + x->length > ip)
799                                                 goto z;
800                                         break;
801                                 }
802                         jp = ip + 1;
803                         while (jp < iip) {
804                                 if ((x = *jp) == NULL) {
805                                         jp++;
806                                         continue;
807                                 }
808                         z:
809                                 if (x == p)
810                                         goto undo;
811 #ifdef STATS
812                                 ncover++;
813 #endif
814                                 undop->pos = jp;
815                                 undop->val = x;
816                                 undop++;
817                                 *jp = NULL;
818                                 x->ccount--;
819                                 score_adjust(score0, x);
820                                 if (score0 < 0 && flag)
821                                         goto undo;
822                                 jp += x->length;
823                         }
824                         undop->pos = ip;
825                         undop->val = NULL;
826                         undop++;
827                         *ip = p;
828                         ccount++;
829                         score += score0;
830                         continue;
831                 undo:
832                         while (--undop >= undop1)
833                                 if ((*undop->pos = x = undop->val))
834                                         x->ccount++;
835                         undop++;
836                 }
837                 if (score > 0) {
838 #ifdef STATS
839                         ccount_stat += ccount - p->ccount;
840                         ntoken_stat++;
841                         ncover_stat += ncover;
842 #endif
843                         p->ccount = ccount;
844                 } else {
845                         struct cc_undo *u = cc_undo;
846                         while (--undop >= u) {
847                                 struct cc *x;
848                                 if ((*undop->pos = x = undop->val))
849                                         x->ccount++;
850                         }
851                 }
852         } while ((p = *pp++) != NULL);
853         return pp - tokens;
854 }
855
856 void
857 cc_output_phase(char *buffer, struct cc **output, int bufsize)
858 {
859         int i;
860         struct cc *p, *p1;
861
862         for (i = 0; i < bufsize;) {
863                 if ((p = output[i]) == NULL) {
864                         ttputc(buffer[i]);
865                         i++;
866                 } else if (p->code >= 0) {
867                         if (--p->ccount == 0)
868                                 qinsert(p, &cc_q0a);
869                         (*tt.tt_put_token)(p->code, p->string, p->length);
870                         wwntokuse++;
871                         wwntoksave += put_token_score(p->length);
872                         i += p->length;
873                 } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
874                         p->code = p1->code;
875                         p1->code = -1;
876                         qinsert(p1, &cc_q1a);
877                         if (--p->ccount == 0)
878                                 qinsert(p, &cc_q0a);
879                         else
880                                 qinsert(p, &cc_q0b);
881                         (*tt.tt_set_token)(p->code, p->string, p->length);
882                         wwntokdef++;
883                         wwntoksave -= tt.tt_set_token_cost;
884                         i += p->length;
885                 } else {
886                         p->ccount--;
887                         ttwrite(p->string, p->length);
888                         wwntokbad++;
889                         i += p->length;
890                 }
891         }
892         wwntokc += bufsize;
893 }
894
895 int
896 cc_token_compare(const void *p1, const void *p2)
897 {
898         const struct cc * const * vp1 = p1;
899         const struct cc * const * vp2 = p2;
900         return (*vp2)->bcount - (*vp1)->bcount;
901 }