Merge branch 'vendor/LIBARCHIVE' (early part)
[dragonfly.git] / usr.bin / window / compress.c
1 /*      $NetBSD: compress.c,v 1.7 2009/04/14 08:50:06 lukem Exp $       */
2
3 /*
4  * Copyright (c) 1989, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Edward Wang at The University of California, Berkeley.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)compress.c  8.1 (Berkeley) 6/6/93";
39 #else
40 __RCSID("$NetBSD: compress.c,v 1.7 2009/04/14 08:50:06 lukem Exp $");
41 #endif
42 #endif /* not lint */
43
44 #include <fcntl.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include "defs.h"
49 #include "tt.h"
50
51         /* special */
52 int cc_trace = 0;
53 FILE *cc_trace_fp;
54
55         /* tunable parameters */
56
57 int cc_reverse = 1;
58 int cc_sort = 0;
59 int cc_chop = 0;
60
61 int cc_token_max = 8;           /* <= TOKEN_MAX */
62 int cc_token_min = 2;           /* > tt.tt_put_token_cost */
63 int cc_npass0 = 1;
64 int cc_npass1 = 1;
65
66 int cc_bufsize = 1024 * 3;      /* XXX, or 80 * 24 * 2 */
67
68 int cc_ntoken = 8192;
69
70 #define cc_weight XXX
71 #ifndef cc_weight
72 int cc_weight = 0;
73 #endif
74
75 #define TOKEN_MAX 16
76
77 struct cc {
78         char string[TOKEN_MAX];
79         char length;
80         char flag;
81 #ifndef cc_weight
82         short weight;
83 #endif
84         long time;              /* time last seen */
85         short bcount;           /* count in this buffer */
86         short ccount;           /* count in compression */
87         short places;           /* places in the buffer */
88         short code;             /* token code */
89         struct cc *qforw, *qback;
90         struct cc *hforw, **hback;
91 };
92
93 short cc_thresholds[TOKEN_MAX + 1];
94 #define thresh(length) (cc_thresholds[length])
95 #define threshp(code, count, length) \
96         ((code) >= 0 || (short) (count) >= cc_thresholds[length])
97
98 #ifndef cc_weight
99 short cc_wthresholds[TOKEN_MAX + 1];
100 #define wthresh(length) (cc_wthresholds[length])
101 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
102 #else
103 #define wthreshp(weight, length) (0)
104 #endif
105
106 #ifndef cc_weight
107 short cc_wlimits[TOKEN_MAX + 1];
108 #define wlimit(length) (cc_wlimits[length])
109 #endif
110
111 #define put_token_score(length) ((length) - tt.tt_put_token_cost)
112
113 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
114 #define score_adjust(score, p) \
115         do { \
116                 int _length = (p)->length; \
117                 int _ccount = (p)->ccount; \
118                 if (threshp((p)->code, _ccount, _length) || \
119                     wthreshp((p)->weight, _length)) /* XXX */ \
120                         (score) -= _length - tt.tt_put_token_cost; \
121                 else \
122                         (score) += cc_score_adjustments[_length][_ccount]; \
123         } while (0)
124
125 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
126
127 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
128
129 #define qinsert(p1, p2) \
130         do { \
131                 struct cc *forw = (p1)->qforw; \
132                 struct cc *back = (p1)->qback; \
133                 back->qforw = forw; \
134                 forw->qback = back; \
135                 forw = (p2)->qforw; \
136                 (p1)->qforw = forw; \
137                 forw->qback = (p1); \
138                 (p2)->qforw = (p1); \
139                 (p1)->qback = (p2); \
140         } while (0)
141
142 #define qinsertq(q, p) \
143         ((q)->qforw == (q) ? 0 : \
144          ((q)->qback->qforw = (p)->qforw, \
145           (p)->qforw->qback = (q)->qback, \
146           (q)->qforw->qback = (p), \
147           (p)->qforw = (q)->qforw, \
148           (q)->qforw = (q), \
149           (q)->qback = (q)))
150
151 #define H               (14)
152 #define HSIZE           (1 << H)
153 #define hash(h, c)      (((((h) >> (H - 8)) | (h) << 8) ^ (c)) & (HSIZE - 1))
154
155 char *cc_buffer;
156 struct cc **cc_output;                  /* the output array */
157 short *cc_places[TOKEN_MAX + 1];
158 short *cc_hashcodes;                    /* for computing hashcodes */
159 struct cc **cc_htab;                    /* the hash table */
160 struct cc **cc_tokens;                  /* holds all the active tokens */
161 struct cc_undo {
162         struct cc **pos;
163         struct cc *val;
164 } *cc_undo;
165
166 long cc_time, cc_time0;
167
168 char *cc_tt_ob, *cc_tt_obe;
169
170 int     cc_compress(struct cc **, struct cc **, char);
171 void    cc_compress_cleanup(struct cc **, int);
172 void    cc_compress_phase(struct cc **, int, struct cc **, int);
173 void    cc_compress_phase1(struct cc **, struct cc **, int, int);
174 void    cc_output_phase(char *, struct cc **, int);
175 int     cc_sweep(char *, int, struct cc **, int);
176 void    cc_sweep0(char *, int, int);
177 int     cc_sweep_phase(char *, int, struct cc **);
178 void    cc_sweep_reverse(struct cc **, short *);
179 int     cc_token_compare(const void *, const void *);
180
181 int
182 ccinit(void)
183 {
184         int i, j;
185         struct cc *p;
186
187         if (tt.tt_token_max > cc_token_max)
188                 tt.tt_token_max = cc_token_max;
189         if (tt.tt_token_min < cc_token_min)
190                 tt.tt_token_min = cc_token_min;
191         if (tt.tt_token_min > tt.tt_token_max) {
192                 tt.tt_ntoken = 0;
193                 return 0;
194         }
195         if (tt.tt_ntoken > cc_ntoken / 2)       /* not likely */
196                 tt.tt_ntoken = cc_ntoken / 2;
197 #define C(x) (sizeof (x) / sizeof *(x))
198         for (i = 0; i < (int)C(cc_thresholds); i++) {
199                 int h = i - tt.tt_put_token_cost;
200                 if (h > 0)
201                         cc_thresholds[i] =
202                                 (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
203                 else
204                         cc_thresholds[i] = 0;
205         }
206         for (i = 0; i < (int)C(cc_score_adjustments); i++) {
207                 int t = cc_thresholds[i];
208                 for (j = 0; j < (int)C(*cc_score_adjustments); j++) {
209                         if (j >= t)
210                                 cc_score_adjustments[i][j] =
211                                         - (i - tt.tt_put_token_cost);
212                         else if (j < t - 1)
213                                 cc_score_adjustments[i][j] = 0;
214                         else
215                                 /*
216                                  * cost now is
217                                  *      length * (ccount + 1)           a
218                                  * cost before was
219                                  *      set-token-cost + length +
220                                  *              ccount * put-token-cost b
221                                  * the score adjustment is (b - a)
222                                  */
223                                 cc_score_adjustments[i][j] =
224                                         tt.tt_set_token_cost + i +
225                                                 j * tt.tt_put_token_cost -
226                                                         i * (j + 1);
227                         if (j >= t)
228                                 cc_initial_scores[i][j] = 0;
229                         else
230                                 /*
231                                  * - (set-token-cost +
232                                  *      (length - put-token-cost) -
233                                  *      (length - put-token-cost) * ccount)
234                                  */
235                                 cc_initial_scores[i][j] =
236                                         - (tt.tt_set_token_cost +
237                                            (i - tt.tt_put_token_cost) -
238                                            (i - tt.tt_put_token_cost) * j);
239                 }
240         }
241 #ifndef cc_weight
242         for (i = 1; i < C(cc_wthresholds); i++) {
243                 cc_wthresholds[i] =
244                         ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
245                                 i / 5 + 1) *
246                                 cc_weight + 1;
247                 cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
248         }
249 #endif
250 #undef C
251         if ((cc_output = (struct cc **)
252              malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
253                 goto nomem;
254         if ((cc_hashcodes = (short *)
255              malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
256                 goto nomem;
257         if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
258                 goto nomem;
259         if ((cc_tokens = (struct cc **)
260              malloc((unsigned)
261                     (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
262                     sizeof *cc_tokens)) == 0)
263                 goto nomem;
264         if ((cc_undo = (struct cc_undo *)
265              malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
266                 goto nomem;
267         for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
268                 if ((cc_places[i] = (short *)
269                      malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
270                         goto nomem;
271         cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
272         cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
273         cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
274         cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
275         if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
276                 goto nomem;
277         for (i = 0; i < tt.tt_ntoken; i++) {
278                 p->code = i;
279                 p->time = -1;
280                 p->qback = cc_q0a.qback;
281                 p->qforw = &cc_q0a;
282                 p->qback->qforw = p;
283                 cc_q0a.qback = p;
284                 p++;
285         }
286         for (; i < cc_ntoken; i++) {
287                 p->code = -1;
288                 p->time = -1;
289                 p->qback = cc_q1a.qback;
290                 p->qforw = &cc_q1a;
291                 p->qback->qforw = p;
292                 cc_q1a.qback = p;
293                 p++;
294         }
295         cc_tt_ob = tt_ob;
296         cc_tt_obe = tt_obe;
297         if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
298                 goto nomem;
299         return 0;
300 nomem:
301         wwerrno = WWE_NOMEM;
302         return -1;
303 }
304
305 void
306 ccstart(void)
307 {
308         ttflush();
309         tt_obp = tt_ob = cc_buffer;
310         tt_obe = tt_ob + cc_bufsize;
311         tt.tt_flush = ccflush;
312         if (cc_trace) {
313                 cc_trace_fp = fopen("window-trace", "a");
314                 (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
315         }
316         ccreset();
317 }
318
319 void
320 ccreset(void)
321 {
322         struct cc *p;
323
324         memset((char *) cc_htab, 0, HSIZE * sizeof *cc_htab);
325         for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
326                 p->hback = 0;
327         for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
328                 p->hback = 0;
329 }
330
331 void
332 ccend(void)
333 {
334
335         ttflush();
336         tt_obp = tt_ob = cc_tt_ob;
337         tt_obe = cc_tt_obe;
338         tt.tt_flush = 0;
339         if (cc_trace_fp != NULL) {
340                 (void) fclose(cc_trace_fp);
341                 cc_trace_fp = NULL;
342         }
343 }
344
345 void
346 ccflush(void)
347 {
348         int bufsize = tt_obp - tt_ob;
349         int n;
350
351         if (tt_ob != cc_buffer)
352                 abort();
353         if (cc_trace_fp != NULL) {
354                 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
355                 (void) putc(-1, cc_trace_fp);
356         }
357         tt.tt_flush = 0;
358         (*tt.tt_compress)(1);
359         if (bufsize < tt.tt_token_min) {
360                 ttflush();
361                 goto out;
362         }
363         tt_obp = tt_ob = cc_tt_ob;
364         tt_obe = cc_tt_obe;
365         cc_time0 = cc_time;
366         cc_time += bufsize;
367         n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
368         cc_compress_phase(cc_output, bufsize, cc_tokens, n);
369         cc_output_phase(cc_buffer, cc_output, bufsize);
370         ttflush();
371         tt_obp = tt_ob = cc_buffer;
372         tt_obe = cc_buffer + cc_bufsize;
373 out:
374         (*tt.tt_compress)(0);
375         tt.tt_flush = ccflush;
376 }
377
378 int
379 cc_sweep_phase(char *buffer, int bufsize, struct cc **tokens)
380 {
381         struct cc **pp = tokens;
382         int i, n;
383 #ifdef STATS
384         int nn, ii;
385 #endif
386
387 #ifdef STATS
388         if (verbose >= 0)
389                 time_begin();
390         if (verbose > 0)
391                 printf("Sweep:");
392 #endif
393         cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
394 #ifdef STATS
395         ntoken_stat = 0;
396         nn = 0;
397         ii = 0;
398 #endif
399         for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
400 #ifdef STATS
401                 if (verbose > 0) {
402                         if (ii > 7) {
403                                 printf("\n      ");
404                                 ii = 0;
405                         }
406                         ii++;
407                         printf(" (%d", i);
408                         (void) fflush(stdout);
409                 }
410 #endif
411                 n = cc_sweep(buffer, bufsize, pp, i);
412                 pp += n;
413 #ifdef STATS
414                 if (verbose > 0) {
415                         if (--n > 0) {
416                                 printf(" %d", n);
417                                 nn += n;
418                         }
419                         putchar(')');
420                 }
421 #endif
422         }
423         qinsertq(&cc_q1b, &cc_q1a);
424 #ifdef STATS
425         if (verbose > 0)
426                 printf("\n       %d tokens, %d candidates\n",
427                         ntoken_stat, nn);
428         if (verbose >= 0)
429                 time_end();
430 #endif
431         return pp - tokens;
432 }
433
434 void
435 cc_sweep0(char *buffer, int n, int length)
436 {
437         char *p;
438         short *hc;
439         int i;
440         short c;
441         short pc = tt.tt_padc;
442
443         /* n and length are at least 1 */
444         p = buffer++;
445         hc = cc_hashcodes;
446         i = n;
447         do {
448                 if ((*hc++ = *p++) == pc)
449                         hc[-1] = -1;
450         } while (--i);
451         while (--length) {
452                 p = buffer++;
453                 hc = cc_hashcodes;
454                 for (i = n--; --i;) {
455                         if ((c = *p++) == pc || *hc < 0)
456                                 c = -1;
457                         else
458                                 c = hash(*hc, c);
459                         *hc++ = c;
460                 }
461         }
462 }
463
464 int
465 cc_sweep(char *buffer, int bufsize, struct cc **tokens, int length)
466 {
467         struct cc *p;
468         char *cp;
469         int i;
470         short *hc;
471         short *places = cc_places[length];
472         struct cc **pp = tokens;
473         short threshold = thresh(length);
474 #ifndef cc_weight
475         short wthreshold = wthresh(length);
476         short limit = wlimit(length);
477 #endif
478         int time0;
479         short pc = tt.tt_padc;
480
481         i = length - 1;
482         bufsize -= i;
483         cp = buffer + i;
484         hc = cc_hashcodes;
485         time0 = cc_time0;
486         for (i = 0; i < bufsize; i++, time0++) {
487                 struct cc **h;
488
489                 {
490                         short *hc1 = hc;
491                         short c = *cp++;
492                         short hh;
493                         if ((hh = *hc1) < 0 || c == pc) {
494                                 *hc1++ = -1;
495                                 hc = hc1;
496                                 continue;
497                         }
498                         h = cc_htab + (*hc1++ = hash(hh, c));
499                         hc = hc1;
500                 }
501                 for (p = *h; p != 0; p = p->hforw)
502                         if (p->length == (char) length) {
503                                 char *p1 = p->string;
504                                 char *p2 = cp - length;
505                                 int n = length;
506                                 do
507                                         if (*p1++ != *p2++)
508                                                 goto fail;
509                                 while (--n);
510                                 break;
511                         fail:;
512                         }
513                 if (p == 0) {
514                         p = cc_q1a.qback;
515                         if (p == &cc_q1a ||
516                             (p->time >= cc_time0 && p->length == (char) length))
517                                 continue;
518                         if (p->hback != 0)
519                                 if ((*p->hback = p->hforw) != 0)
520                                         p->hforw->hback = p->hback;
521                         {
522                                 char *p1 = p->string;
523                                 char *p2 = cp - length;
524                                 int n = length;
525                                 do
526                                         *p1++ = *p2++;
527                                 while (--n);
528                         }
529                         p->length = length;
530 #ifndef cc_weight
531                         p->weight = cc_weight;
532 #endif
533                         p->time = time0;
534                         p->bcount = 1;
535                         p->ccount = 0;
536                         p->flag = 0;
537                         if ((p->hforw = *h) != 0)
538                                 p->hforw->hback = &p->hforw;
539                         *h = p;
540                         p->hback = h;
541                         qinsert(p, &cc_q1a);
542                         places[i] = -1;
543                         p->places = i;
544 #ifdef STATS
545                         ntoken_stat++;
546 #endif
547                 } else if (p->time < cc_time0) {
548 #ifndef cc_weight
549                         if ((p->weight += p->time - time0) < 0)
550                                 p->weight = cc_weight;
551                         else if ((p->weight += cc_weight) > limit)
552                                 p->weight = limit;
553 #endif
554                         p->time = time0;
555                         p->bcount = 1;
556                         p->ccount = 0;
557                         if (p->code >= 0) {
558                                 p->flag = 1;
559                                 *pp++ = p;
560                         } else
561 #ifndef cc_weight
562                         if (p->weight >= wthreshold) {
563                                 p->flag = 1;
564                                 *pp++ = p;
565                                 qinsert(p, &cc_q1b);
566                         } else
567 #endif
568                         {
569                                 p->flag = 0;
570                                 qinsert(p, &cc_q1a);
571                         }
572                         places[i] = -1;
573                         p->places = i;
574 #ifdef STATS
575                         ntoken_stat++;
576 #endif
577                 } else if (p->time + length > time0) {
578                         /*
579                          * overlapping token, don't count as two and
580                          * don't update time0, but do adjust weight to offset
581                          * the difference
582                          */
583 #ifndef cc_weight
584                         if (cc_weight != 0) {   /* XXX */
585                                 p->weight += time0 - p->time;
586                                 if (!p->flag && p->weight >= wthreshold) {
587                                         p->flag = 1;
588                                         *pp++ = p;
589                                         qinsert(p, &cc_q1b);
590                                 }
591                         }
592 #endif
593                         places[i] = p->places;
594                         p->places = i;
595                 } else {
596 #ifndef cc_weight
597                         if ((p->weight += p->time - time0) < 0)
598                                 p->weight = cc_weight;
599                         else if ((p->weight += cc_weight) > limit)
600                                 p->weight = limit;
601 #endif
602                         p->time = time0;
603                         p->bcount++;
604                         if (!p->flag &&
605                             /* code must be < 0 if flag false here */
606                             (p->bcount >= threshold
607 #ifndef cc_weight
608                              || p->weight >= wthreshold
609 #endif
610                              )) {
611                                 p->flag = 1;
612                                 *pp++ = p;
613                                 qinsert(p, &cc_q1b);
614                         }
615                         places[i] = p->places;
616                         p->places = i;
617                 }
618         }
619         if ((i = pp - tokens) > 0) {
620                 *pp = 0;
621                 if (cc_reverse)
622                         cc_sweep_reverse(tokens, places);
623                 if (cc_sort && i > 1) {
624                         qsort((char *) tokens, i, sizeof *tokens,
625                               cc_token_compare);
626                 }
627                 if (cc_chop) {
628                         if ((i = i * cc_chop / 100) == 0)
629                                 i = 1;
630                         tokens[i] = 0;
631                 }
632                 i++;
633         }
634         return i;
635 }
636
637 void
638 cc_sweep_reverse(struct cc **pp, short int *places)
639 {
640         struct cc *p;
641         short frnt, back, t;
642
643         while ((p = *pp++) != 0) {
644                 back = -1;
645                 t = p->places;
646                 /* the list is never empty */
647                 do {
648                         frnt = places[t];
649                         places[t] = back;
650                         back = t;
651                 } while ((t = frnt) >= 0);
652                 p->places = back;
653         }
654 }
655
656 void
657 cc_compress_phase(struct cc **output, int bufsize, struct cc **tokens, int ntoken)
658 {
659         int i;
660
661         memset((char *) output, 0, bufsize * sizeof *output);
662         for (i = 0; i < cc_npass0; i++)
663                 cc_compress_phase1(output, tokens, ntoken, 0);
664         for (i = 0; i < cc_npass1; i++)
665                 cc_compress_phase1(output, tokens, ntoken, 1);
666         cc_compress_cleanup(output, bufsize);
667 }
668
669 void
670 cc_compress_phase1(struct cc **output, struct cc **tokens, int ntoken, int flag)
671 {
672         struct cc **pp;
673 #ifdef STATS
674         int i = 0;
675         int nt = 0, cc = 0, nc = 0;
676 #endif
677
678 #ifdef STATS
679         if (verbose >= 0)
680                 time_begin();
681         if (verbose > 0)
682                 printf("Compress:");
683 #endif
684         pp = tokens;
685         while (pp < tokens + ntoken) {
686 #ifdef STATS
687                 if (verbose > 0) {
688                         ntoken_stat = 0;
689                         ccount_stat = 0;
690                         ncover_stat = 0;
691                         if (i > 2) {
692                                 printf("\n         ");
693                                 i = 0;
694                         }
695                         i++;
696                         printf(" (%d", (*pp)->length);
697                         (void) fflush(stdout);
698                 }
699 #endif
700                 pp += cc_compress(output, pp, flag);
701 #ifdef STATS
702                 if (verbose > 0) {
703                         printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
704                                ncover_stat);
705                         nt += ntoken_stat;
706                         cc += ccount_stat;
707                         nc += ncover_stat;
708                 }
709 #endif
710         }
711 #ifdef STATS
712         if (verbose > 0)
713                 printf("\n   total: (%dt %du %dc)\n", nt, cc, nc);
714         if (verbose >= 0)
715                 time_end();
716 #endif
717 }
718
719 void
720 cc_compress_cleanup(struct cc **output, int bufsize)
721 {
722         struct cc **end;
723
724         /* the previous output phase may have been interrupted */
725         qinsertq(&cc_q0b, &cc_q0a);
726         for (end = output + bufsize; output < end;) {
727                 struct cc *p;
728                 int length;
729                 if ((p = *output) == 0) {
730                         output++;
731                         continue;
732                 }
733                 length = p->length;
734                 if (!p->flag) {
735                 } else if (p->code >= 0) {
736                         qinsert(p, &cc_q0b);
737                         p->flag = 0;
738                 } else if (p->ccount == 0) {
739                         *output = 0;
740                 } else if (p->ccount >= thresh(length)
741 #ifndef cc_weight
742                            || wthreshp(p->weight, length)
743 #endif
744                            ) {
745                         p->flag = 0;
746                 } else {
747                         p->ccount = 0;
748                         *output = 0;
749                 }
750                 output += length;
751         }
752 }
753
754 int
755 cc_compress(struct cc **output, struct cc **tokens, char flag)
756 {
757         struct cc **pp = tokens;
758         struct cc *p = *pp++;
759         int length = p->length;
760         int threshold = thresh(length);
761 #ifndef cc_weight
762         short wthreshold = wthresh(length);
763 #endif
764         short *places = cc_places[length];
765         int *initial_scores = cc_initial_scores[length];
766         int initial_score0 = put_token_score(length);
767
768         do {
769                 int score;
770                 struct cc_undo *undop;
771                 int ccount;
772 #ifdef STATS
773                 int ncover;
774 #endif
775                 int i;
776
777                 ccount = p->ccount;
778                 if ((short) ccount >= p->bcount)
779                         continue;
780                 if (p->code >= 0 || ccount >= threshold)
781                         score = 0;
782 #ifndef cc_weight
783                 else if (p->weight >= wthreshold)
784                         /* allow one fewer match than normal */
785                         /* XXX, should adjust for ccount */
786                         score = - tt.tt_set_token_cost;
787 #endif
788                 else
789                         score = initial_scores[ccount];
790                 undop = cc_undo;
791 #ifdef STATS
792                 ncover = 0;
793 #endif
794                 for (i = p->places; i >= 0; i = places[i]) {
795                         struct cc **jp;
796                         struct cc *x;
797                         struct cc **ip = output + i;
798                         int score0 = initial_score0;
799                         struct cc **iip = ip + length;
800                         struct cc_undo *undop1 = undop;
801
802                         if ((x = *(jp = ip)) != 0)
803                                 goto z;
804                         while (--jp >= output)
805                                 if ((x = *jp) != 0) {
806                                         if (jp + x->length > ip)
807                                                 goto z;
808                                         break;
809                                 }
810                         jp = ip + 1;
811                         while (jp < iip) {
812                                 if ((x = *jp) == 0) {
813                                         jp++;
814                                         continue;
815                                 }
816                         z:
817                                 if (x == p)
818                                         goto undo;
819 #ifdef STATS
820                                 ncover++;
821 #endif
822                                 undop->pos = jp;
823                                 undop->val = x;
824                                 undop++;
825                                 *jp = 0;
826                                 x->ccount--;
827                                 score_adjust(score0, x);
828                                 if (score0 < 0 && flag)
829                                         goto undo;
830                                 jp += x->length;
831                         }
832                         undop->pos = ip;
833                         undop->val = 0;
834                         undop++;
835                         *ip = p;
836                         ccount++;
837                         score += score0;
838                         continue;
839                 undo:
840                         while (--undop >= undop1)
841                                 if ((*undop->pos = x = undop->val))
842                                         x->ccount++;
843                         undop++;
844                 }
845                 if (score > 0) {
846 #ifdef STATS
847                         ccount_stat += ccount - p->ccount;
848                         ntoken_stat++;
849                         ncover_stat += ncover;
850 #endif
851                         p->ccount = ccount;
852                 } else {
853                         struct cc_undo *u = cc_undo;
854                         while (--undop >= u) {
855                                 struct cc *x;
856                                 if ((*undop->pos = x = undop->val))
857                                         x->ccount++;
858                         }
859                 }
860         } while ((p = *pp++) != 0);
861         return pp - tokens;
862 }
863
864 void
865 cc_output_phase(char *buffer, struct cc **output, int bufsize)
866 {
867         int i;
868         struct cc *p, *p1;
869
870         for (i = 0; i < bufsize;) {
871                 if ((p = output[i]) == 0) {
872                         ttputc(buffer[i]);
873                         i++;
874                 } else if (p->code >= 0) {
875                         if (--p->ccount == 0)
876                                 qinsert(p, &cc_q0a);
877                         (*tt.tt_put_token)(p->code, p->string, p->length);
878                         wwntokuse++;
879                         wwntoksave += put_token_score(p->length);
880                         i += p->length;
881                 } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
882                         p->code = p1->code;
883                         p1->code = -1;
884                         qinsert(p1, &cc_q1a);
885                         if (--p->ccount == 0)
886                                 qinsert(p, &cc_q0a);
887                         else
888                                 qinsert(p, &cc_q0b);
889                         (*tt.tt_set_token)(p->code, p->string, p->length);
890                         wwntokdef++;
891                         wwntoksave -= tt.tt_set_token_cost;
892                         i += p->length;
893                 } else {
894                         p->ccount--;
895                         ttwrite(p->string, p->length);
896                         wwntokbad++;
897                         i += p->length;
898                 }
899         }
900         wwntokc += bufsize;
901 }
902
903 int
904 cc_token_compare(const void *p1, const void *p2)
905 {
906         const struct cc * const * vp1 = p1;
907         const struct cc * const * vp2 = p2;
908         return (*vp2)->bcount - (*vp1)->bcount;
909 }