Merge branch 'vendor/OPENSSL'
[dragonfly.git] / usr.bin / window / compress.c
1 /*
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Edward Wang at The University of California, Berkeley.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * @(#)compress.c       8.1 (Berkeley) 6/6/93
37  * $FreeBSD: src/usr.bin/window/compress.c,v 1.2.12.1 2001/05/17 09:45:00 obrien Exp $
38  * $DragonFly: src/usr.bin/window/compress.c,v 1.2 2003/06/17 04:29:33 dillon Exp $
39  */
40
41         /* special */
42 #include <stdio.h>
43 #include <fcntl.h>
44 #include <stdlib.h>
45 #include <string.h>
46
47 #include "ww.h"
48 #include "tt.h"
49
50 int cc_trace = 0;
51 FILE *cc_trace_fp;
52
53         /* tunable parameters */
54
55 int cc_reverse = 1;
56 int cc_sort = 0;
57 int cc_chop = 0;
58
59 int cc_token_max = 8;           /* <= TOKEN_MAX */
60 int cc_token_min = 2;           /* > tt.tt_put_token_cost */
61 int cc_npass0 = 1;
62 int cc_npass1 = 1;
63
64 int cc_bufsize = 1024 * 3;      /* XXX, or 80 * 24 * 2 */
65
66 int cc_ntoken = 8192;
67
68 #define cc_weight XXX
69 #ifndef cc_weight
70 int cc_weight = 0;
71 #endif
72
73 #define TOKEN_MAX 16
74
75 struct cc {
76         char string[TOKEN_MAX];
77         char length;
78         char flag;
79 #ifndef cc_weight
80         short weight;
81 #endif
82         long time;              /* time last seen */
83         short bcount;           /* count in this buffer */
84         short ccount;           /* count in compression */
85         short places;           /* places in the buffer */
86         short code;             /* token code */
87         struct cc *qforw, *qback;
88         struct cc *hforw, **hback;
89 };
90
91 short cc_thresholds[TOKEN_MAX + 1];
92 #define thresh(length) (cc_thresholds[length])
93 #define threshp(code, count, length) \
94         ((code) >= 0 || (short) (count) >= cc_thresholds[length])
95
96 #ifndef cc_weight
97 short cc_wthresholds[TOKEN_MAX + 1];
98 #define wthresh(length) (cc_wthresholds[length])
99 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
100 #else
101 #define wthreshp(weight, length) (0)
102 #endif
103
104 #ifndef cc_weight
105 short cc_wlimits[TOKEN_MAX + 1];
106 #define wlimit(length) (cc_wlimits[length])
107 #endif
108
109 #define put_token_score(length) ((length) - tt.tt_put_token_cost)
110
111 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
112 #define score_adjust(score, p) \
113         do { \
114                 int length = (p)->length; \
115                 int ccount = (p)->ccount; \
116                 if (threshp((p)->code, ccount, length) || \
117                     wthreshp((p)->weight, length)) /* XXX */ \
118                         (score) -= length - tt.tt_put_token_cost; \
119                 else \
120                         (score) += cc_score_adjustments[length][ccount]; \
121         } while (0)
122
123 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
124
125 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
126
127 #define qinsert(p1, p2) \
128         do { \
129                 register struct cc *forw = (p1)->qforw; \
130                 register struct cc *back = (p1)->qback; \
131                 back->qforw = forw; \
132                 forw->qback = back; \
133                 forw = (p2)->qforw; \
134                 (p1)->qforw = forw; \
135                 forw->qback = (p1); \
136                 (p2)->qforw = (p1); \
137                 (p1)->qback = (p2); \
138         } while (0)
139
140 #define qinsertq(q, p) \
141         ((q)->qforw == (q) ? 0 : \
142          ((q)->qback->qforw = (p)->qforw, \
143           (p)->qforw->qback = (q)->qback, \
144           (q)->qforw->qback = (p), \
145           (p)->qforw = (q)->qforw, \
146           (q)->qforw = (q), \
147           (q)->qback = (q)))
148
149 #define H               (14)
150 #define HSIZE           (1 << H)
151 #define hash(h, c)      ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
152
153 char *cc_buffer;
154 struct cc **cc_output;                  /* the output array */
155 short *cc_places[TOKEN_MAX + 1];
156 short *cc_hashcodes;                    /* for computing hashcodes */
157 struct cc **cc_htab;                    /* the hash table */
158 struct cc **cc_tokens;                  /* holds all the active tokens */
159 struct cc_undo {
160         struct cc **pos;
161         struct cc *val;
162 } *cc_undo;
163
164 long cc_time, cc_time0;
165
166 char *cc_tt_ob, *cc_tt_obe;
167
168 ccinit()
169 {
170         register i, j;
171         register struct cc *p;
172
173         if (tt.tt_token_max > cc_token_max)
174                 tt.tt_token_max = cc_token_max;
175         if (tt.tt_token_min < cc_token_min)
176                 tt.tt_token_min = cc_token_min;
177         if (tt.tt_token_min > tt.tt_token_max) {
178                 tt.tt_ntoken = 0;
179                 return 0;
180         }
181         if (tt.tt_ntoken > cc_ntoken / 2)       /* not likely */
182                 tt.tt_ntoken = cc_ntoken / 2;
183 #define C(x) (sizeof (x) / sizeof *(x))
184         for (i = 0; i < C(cc_thresholds); i++) {
185                 int h = i - tt.tt_put_token_cost;
186                 if (h > 0)
187                         cc_thresholds[i] =
188                                 (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
189                 else
190                         cc_thresholds[i] = 0;
191         }
192         for (i = 0; i < C(cc_score_adjustments); i++) {
193                 int t = cc_thresholds[i];
194                 for (j = 0; j < C(*cc_score_adjustments); j++) {
195                         if (j >= t)
196                                 cc_score_adjustments[i][j] =
197                                         - (i - tt.tt_put_token_cost);
198                         else if (j < t - 1)
199                                 cc_score_adjustments[i][j] = 0;
200                         else
201                                 /*
202                                  * cost now is
203                                  *      length * (ccount + 1)           a
204                                  * cost before was
205                                  *      set-token-cost + length +
206                                  *              ccount * put-token-cost b
207                                  * the score adjustment is (b - a)
208                                  */
209                                 cc_score_adjustments[i][j] =
210                                         tt.tt_set_token_cost + i +
211                                                 j * tt.tt_put_token_cost -
212                                                         i * (j + 1);
213                         if (j >= t)
214                                 cc_initial_scores[i][j] = 0;
215                         else
216                                 /*
217                                  * - (set-token-cost +
218                                  *      (length - put-token-cost) -
219                                  *      (length - put-token-cost) * ccount)
220                                  */
221                                 cc_initial_scores[i][j] =
222                                         - (tt.tt_set_token_cost +
223                                            (i - tt.tt_put_token_cost) -
224                                            (i - tt.tt_put_token_cost) * j);
225                 }
226         }
227 #ifndef cc_weight
228         for (i = 1; i < C(cc_wthresholds); i++) {
229                 cc_wthresholds[i] =
230                         ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
231                                 i / 5 + 1) *
232                                 cc_weight + 1;
233                 cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
234         }
235 #endif
236 #undef C
237         if ((cc_output = (struct cc **)
238              malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
239                 goto nomem;
240         if ((cc_hashcodes = (short *)
241              malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
242                 goto nomem;
243         if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
244                 goto nomem;
245         if ((cc_tokens = (struct cc **)
246              malloc((unsigned)
247                     (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
248                     sizeof *cc_tokens)) == 0)
249                 goto nomem;
250         if ((cc_undo = (struct cc_undo *)
251              malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
252                 goto nomem;
253         for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
254                 if ((cc_places[i] = (short *)
255                      malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
256                         goto nomem;
257         cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
258         cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
259         cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
260         cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
261         if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
262                 goto nomem;
263         for (i = 0; i < tt.tt_ntoken; i++) {
264                 p->code = i;
265                 p->time = -1;
266                 p->qback = cc_q0a.qback;
267                 p->qforw = &cc_q0a;
268                 p->qback->qforw = p;
269                 cc_q0a.qback = p;
270                 p++;
271         }
272         for (; i < cc_ntoken; i++) {
273                 p->code = -1;
274                 p->time = -1;
275                 p->qback = cc_q1a.qback;
276                 p->qforw = &cc_q1a;
277                 p->qback->qforw = p;
278                 cc_q1a.qback = p;
279                 p++;
280         }
281         cc_tt_ob = tt_ob;
282         cc_tt_obe = tt_obe;
283         if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
284                 goto nomem;
285         return 0;
286 nomem:
287         wwerrno = WWE_NOMEM;
288         return -1;
289 }
290
291 ccstart()
292 {
293         int ccflush();
294
295         ttflush();
296         tt_obp = tt_ob = cc_buffer;
297         tt_obe = tt_ob + cc_bufsize;
298         tt.tt_flush = ccflush;
299         if (cc_trace) {
300                 cc_trace_fp = fopen("window-trace", "a");
301                 (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
302         }
303         ccreset();
304 }
305
306 ccreset()
307 {
308         register struct cc *p;
309
310         bzero((char *) cc_htab, HSIZE * sizeof *cc_htab);
311         for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
312                 p->hback = 0;
313         for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
314                 p->hback = 0;
315 }
316
317 ccend()
318 {
319
320         ttflush();
321         tt_obp = tt_ob = cc_tt_ob;
322         tt_obe = cc_tt_obe;
323         tt.tt_flush = 0;
324         if (cc_trace_fp != NULL) {
325                 (void) fclose(cc_trace_fp);
326                 cc_trace_fp = NULL;
327         }
328 }
329
330 ccflush()
331 {
332         int bufsize = tt_obp - tt_ob;
333         int n;
334
335         if (tt_ob != cc_buffer)
336                 abort();
337         if (cc_trace_fp != NULL) {
338                 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
339                 (void) putc(-1, cc_trace_fp);
340         }
341         tt.tt_flush = 0;
342         (*tt.tt_compress)(1);
343         if (bufsize < tt.tt_token_min) {
344                 ttflush();
345                 goto out;
346         }
347         tt_obp = tt_ob = cc_tt_ob;
348         tt_obe = cc_tt_obe;
349         cc_time0 = cc_time;
350         cc_time += bufsize;
351         n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
352         cc_compress_phase(cc_output, bufsize, cc_tokens, n);
353         cc_output_phase(cc_buffer, cc_output, bufsize);
354         ttflush();
355         tt_obp = tt_ob = cc_buffer;
356         tt_obe = cc_buffer + cc_bufsize;
357 out:
358         (*tt.tt_compress)(0);
359         tt.tt_flush = ccflush;
360 }
361
362 cc_sweep_phase(buffer, bufsize, tokens)
363         char *buffer;
364         struct cc **tokens;
365 {
366         register struct cc **pp = tokens;
367         register i, n;
368 #ifdef STATS
369         int nn, ii;
370 #endif
371
372 #ifdef STATS
373         if (verbose >= 0)
374                 time_begin();
375         if (verbose > 0)
376                 printf("Sweep:");
377 #endif
378         cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
379 #ifdef STATS
380         ntoken_stat = 0;
381         nn = 0;
382         ii = 0;
383 #endif
384         for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
385 #ifdef STATS
386                 if (verbose > 0) {
387                         if (ii > 7) {
388                                 printf("\n      ");
389                                 ii = 0;
390                         }
391                         ii++;
392                         printf(" (%d", i);
393                         (void) fflush(stdout);
394                 }
395 #endif
396                 n = cc_sweep(buffer, bufsize, pp, i);
397                 pp += n;
398 #ifdef STATS
399                 if (verbose > 0) {
400                         if (--n > 0) {
401                                 printf(" %d", n);
402                                 nn += n;
403                         }
404                         putchar(')');
405                 }
406 #endif
407         }
408         qinsertq(&cc_q1b, &cc_q1a);
409 #ifdef STATS
410         if (verbose > 0)
411                 printf("\n       %d tokens, %d candidates\n",
412                         ntoken_stat, nn);
413         if (verbose >= 0)
414                 time_end();
415 #endif
416         return pp - tokens;
417 }
418
419 cc_sweep0(buffer, n, length)
420         char *buffer;
421 {
422         register char *p;
423         register short *hc;
424         register i;
425         register short c;
426         register short pc = tt.tt_padc;
427
428         /* n and length are at least 1 */
429         p = buffer++;
430         hc = cc_hashcodes;
431         i = n;
432         do {
433                 if ((*hc++ = *p++) == pc)
434                         hc[-1] = -1;
435         } while (--i);
436         while (--length) {
437                 p = buffer++;
438                 hc = cc_hashcodes;
439                 for (i = n--; --i;) {
440                         if ((c = *p++) == pc || *hc < 0)
441                                 c = -1;
442                         else
443                                 c = hash(*hc, c);
444                         *hc++ = c;
445                 }
446         }
447 }
448
449 cc_sweep(buffer, bufsize, tokens, length)
450         char *buffer;
451         struct cc **tokens;
452         register length;
453 {
454         register struct cc *p;
455         register char *cp;
456         register i;
457         short *hc;
458         short *places = cc_places[length];
459         struct cc **pp = tokens;
460         short threshold = thresh(length);
461 #ifndef cc_weight
462         short wthreshold = wthresh(length);
463         short limit = wlimit(length);
464 #endif
465         int time;
466         short pc = tt.tt_padc;
467
468         i = length - 1;
469         bufsize -= i;
470         cp = buffer + i;
471         hc = cc_hashcodes;
472         time = cc_time0;
473         for (i = 0; i < bufsize; i++, time++) {
474                 struct cc **h;
475
476                 {
477                         register short *hc1 = hc;
478                         register short c = *cp++;
479                         register short hh;
480                         if ((hh = *hc1) < 0 || c == pc) {
481                                 *hc1++ = -1;
482                                 hc = hc1;
483                                 continue;
484                         }
485                         h = cc_htab + (*hc1++ = hash(hh, c));
486                         hc = hc1;
487                 }
488                 for (p = *h; p != 0; p = p->hforw)
489                         if (p->length == (char) length) {
490                                 register char *p1 = p->string;
491                                 register char *p2 = cp - length;
492                                 register n = length;
493                                 do
494                                         if (*p1++ != *p2++)
495                                                 goto fail;
496                                 while (--n);
497                                 break;
498                         fail:;
499                         }
500                 if (p == 0) {
501                         p = cc_q1a.qback;
502                         if (p == &cc_q1a ||
503                             p->time >= cc_time0 && p->length == (char) length)
504                                 continue;
505                         if (p->hback != 0)
506                                 if ((*p->hback = p->hforw) != 0)
507                                         p->hforw->hback = p->hback;
508                         {
509                                 register char *p1 = p->string;
510                                 register char *p2 = cp - length;
511                                 register n = length;
512                                 do
513                                         *p1++ = *p2++;
514                                 while (--n);
515                         }
516                         p->length = length;
517 #ifndef cc_weight
518                         p->weight = cc_weight;
519 #endif
520                         p->time = time;
521                         p->bcount = 1;
522                         p->ccount = 0;
523                         p->flag = 0;
524                         if ((p->hforw = *h) != 0)
525                                 p->hforw->hback = &p->hforw;
526                         *h = p;
527                         p->hback = h;
528                         qinsert(p, &cc_q1a);
529                         places[i] = -1;
530                         p->places = i;
531 #ifdef STATS
532                         ntoken_stat++;
533 #endif
534                 } else if (p->time < cc_time0) {
535 #ifndef cc_weight
536                         if ((p->weight += p->time - time) < 0)
537                                 p->weight = cc_weight;
538                         else if ((p->weight += cc_weight) > limit)
539                                 p->weight = limit;
540 #endif
541                         p->time = time;
542                         p->bcount = 1;
543                         p->ccount = 0;
544                         if (p->code >= 0) {
545                                 p->flag = 1;
546                                 *pp++ = p;
547                         } else
548 #ifndef cc_weight
549                         if (p->weight >= wthreshold) {
550                                 p->flag = 1;
551                                 *pp++ = p;
552                                 qinsert(p, &cc_q1b);
553                         } else
554 #endif
555                         {
556                                 p->flag = 0;
557                                 qinsert(p, &cc_q1a);
558                         }
559                         places[i] = -1;
560                         p->places = i;
561 #ifdef STATS
562                         ntoken_stat++;
563 #endif
564                 } else if (p->time + length > time) {
565                         /*
566                          * overlapping token, don't count as two and
567                          * don't update time, but do adjust weight to offset
568                          * the difference
569                          */
570 #ifndef cc_weight
571                         if (cc_weight != 0) {   /* XXX */
572                                 p->weight += time - p->time;
573                                 if (!p->flag && p->weight >= wthreshold) {
574                                         p->flag = 1;
575                                         *pp++ = p;
576                                         qinsert(p, &cc_q1b);
577                                 }
578                         }
579 #endif
580                         places[i] = p->places;
581                         p->places = i;
582                 } else {
583 #ifndef cc_weight
584                         if ((p->weight += p->time - time) < 0)
585                                 p->weight = cc_weight;
586                         else if ((p->weight += cc_weight) > limit)
587                                 p->weight = limit;
588 #endif
589                         p->time = time;
590                         p->bcount++;
591                         if (!p->flag &&
592                             /* code must be < 0 if flag false here */
593                             (p->bcount >= threshold
594 #ifndef cc_weight
595                              || p->weight >= wthreshold
596 #endif
597                              )) {
598                                 p->flag = 1;
599                                 *pp++ = p;
600                                 qinsert(p, &cc_q1b);
601                         }
602                         places[i] = p->places;
603                         p->places = i;
604                 }
605         }
606         if ((i = pp - tokens) > 0) {
607                 *pp = 0;
608                 if (cc_reverse)
609                         cc_sweep_reverse(tokens, places);
610                 if (cc_sort && i > 1) {
611                         int cc_token_compare();
612                         qsort((char *) tokens, i, sizeof *tokens,
613                               cc_token_compare);
614                 }
615                 if (cc_chop) {
616                         if ((i = i * cc_chop / 100) == 0)
617                                 i = 1;
618                         tokens[i] = 0;
619                 }
620                 i++;
621         }
622         return i;
623 }
624
625 cc_sweep_reverse(pp, places)
626         register struct cc **pp;
627         register short *places;
628 {
629         register struct cc *p;
630         register short front, back, t;
631
632         while ((p = *pp++) != 0) {
633                 back = -1;
634                 t = p->places;
635                 /* the list is never empty */
636                 do {
637                         front = places[t];
638                         places[t] = back;
639                         back = t;
640                 } while ((t = front) >= 0);
641                 p->places = back;
642         }
643 }
644
645 cc_compress_phase(output, bufsize, tokens, ntoken)
646         struct cc **output;
647         struct cc **tokens;
648 {
649         register i;
650
651         bzero((char *) output, bufsize * sizeof *output);
652         for (i = 0; i < cc_npass0; i++)
653                 cc_compress_phase1(output, tokens, ntoken, 0);
654         for (i = 0; i < cc_npass1; i++)
655                 cc_compress_phase1(output, tokens, ntoken, 1);
656         cc_compress_cleanup(output, bufsize);
657 }
658
659 cc_compress_phase1(output, tokens, ntoken, flag)
660         register struct cc **output;
661         struct cc **tokens;
662 {
663         register struct cc **pp;
664 #ifdef STATS
665         register int i = 0;
666         int nt = 0, cc = 0, nc = 0;
667 #endif
668
669 #ifdef STATS
670         if (verbose >= 0)
671                 time_begin();
672         if (verbose > 0)
673                 printf("Compress:");
674 #endif
675         pp = tokens;
676         while (pp < tokens + ntoken) {
677 #ifdef STATS
678                 if (verbose > 0) {
679                         ntoken_stat = 0;
680                         ccount_stat = 0;
681                         ncover_stat = 0;
682                         if (i > 2) {
683                                 printf("\n         ");
684                                 i = 0;
685                         }
686                         i++;
687                         printf(" (%d", (*pp)->length);
688                         (void) fflush(stdout);
689                 }
690 #endif
691                 pp += cc_compress(output, pp, flag);
692 #ifdef STATS
693                 if (verbose > 0) {
694                         printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
695                                ncover_stat);
696                         nt += ntoken_stat;
697                         cc += ccount_stat;
698                         nc += ncover_stat;
699                 }
700 #endif
701         }
702 #ifdef STATS
703         if (verbose > 0)
704                 printf("\n   total: (%dt %du %dc)\n", nt, cc, nc);
705         if (verbose >= 0)
706                 time_end();
707 #endif
708 }
709
710 cc_compress_cleanup(output, bufsize)
711         register struct cc **output;
712 {
713         register struct cc **end;
714
715         /* the previous output phase may have been interrupted */
716         qinsertq(&cc_q0b, &cc_q0a);
717         for (end = output + bufsize; output < end;) {
718                 register struct cc *p;
719                 register length;
720                 if ((p = *output) == 0) {
721                         output++;
722                         continue;
723                 }
724                 length = p->length;
725                 if (!p->flag) {
726                 } else if (p->code >= 0) {
727                         qinsert(p, &cc_q0b);
728                         p->flag = 0;
729                 } else if (p->ccount == 0) {
730                         *output = 0;
731                 } else if (p->ccount >= thresh(length)
732 #ifndef cc_weight
733                            || wthreshp(p->weight, length)
734 #endif
735                            ) {
736                         p->flag = 0;
737                 } else {
738                         p->ccount = 0;
739                         *output = 0;
740                 }
741                 output += length;
742         }
743 }
744
745 cc_compress(output, tokens, flag)
746         struct cc **output;
747         struct cc **tokens;
748         char flag;
749 {
750         struct cc **pp = tokens;
751         register struct cc *p = *pp++;
752         int length = p->length;
753         int threshold = thresh(length);
754 #ifndef cc_weight
755         short wthreshold = wthresh(length);
756 #endif
757         short *places = cc_places[length];
758         int *initial_scores = cc_initial_scores[length];
759         int initial_score0 = put_token_score(length);
760
761         do {
762                 int score;
763                 register struct cc_undo *undop;
764                 int ccount;
765 #ifdef STATS
766                 int ncover;
767 #endif
768                 int i;
769
770                 ccount = p->ccount;
771                 if ((short) ccount >= p->bcount)
772                         continue;
773                 if (p->code >= 0 || ccount >= threshold)
774                         score = 0;
775 #ifndef cc_weight
776                 else if (p->weight >= wthreshold)
777                         /* allow one fewer match than normal */
778                         /* XXX, should adjust for ccount */
779                         score = - tt.tt_set_token_cost;
780 #endif
781                 else
782                         score = initial_scores[ccount];
783                 undop = cc_undo;
784 #ifdef STATS
785                 ncover = 0;
786 #endif
787                 for (i = p->places; i >= 0; i = places[i]) {
788                         register struct cc **jp;
789                         register struct cc *x;
790                         register struct cc **ip = output + i;
791                         register score0 = initial_score0;
792                         struct cc **iip = ip + length;
793                         struct cc_undo *undop1 = undop;
794
795                         if ((x = *(jp = ip)) != 0)
796                                 goto z;
797                         while (--jp >= output)
798                                 if ((x = *jp) != 0) {
799                                         if (jp + x->length > ip)
800                                                 goto z;
801                                         break;
802                                 }
803                         jp = ip + 1;
804                         while (jp < iip) {
805                                 if ((x = *jp) == 0) {
806                                         jp++;
807                                         continue;
808                                 }
809                         z:
810                                 if (x == p)
811                                         goto undo;
812 #ifdef STATS
813                                 ncover++;
814 #endif
815                                 undop->pos = jp;
816                                 undop->val = x;
817                                 undop++;
818                                 *jp = 0;
819                                 x->ccount--;
820                                 score_adjust(score0, x);
821                                 if (score0 < 0 && flag)
822                                         goto undo;
823                                 jp += x->length;
824                         }
825                         undop->pos = ip;
826                         undop->val = 0;
827                         undop++;
828                         *ip = p;
829                         ccount++;
830                         score += score0;
831                         continue;
832                 undo:
833                         while (--undop >= undop1)
834                                 if (*undop->pos = x = undop->val)
835                                         x->ccount++;
836                         undop++;
837                 }
838                 if (score > 0) {
839 #ifdef STATS
840                         ccount_stat += ccount - p->ccount;
841                         ntoken_stat++;
842                         ncover_stat += ncover;
843 #endif
844                         p->ccount = ccount;
845                 } else {
846                         register struct cc_undo *u = cc_undo;
847                         while (--undop >= u) {
848                                 register struct cc *x;
849                                 if (*undop->pos = x = undop->val)
850                                         x->ccount++;
851                         }
852                 }
853         } while ((p = *pp++) != 0);
854         return pp - tokens;
855 }
856
857 cc_output_phase(buffer, output, bufsize)
858         register char *buffer;
859         register struct cc **output;
860         register bufsize;
861 {
862         register i;
863         register struct cc *p, *p1;
864
865         for (i = 0; i < bufsize;) {
866                 if ((p = output[i]) == 0) {
867                         ttputc(buffer[i]);
868                         i++;
869                 } else if (p->code >= 0) {
870                         if (--p->ccount == 0)
871                                 qinsert(p, &cc_q0a);
872                         (*tt.tt_put_token)(p->code, p->string, p->length);
873                         wwntokuse++;
874                         wwntoksave += put_token_score(p->length);
875                         i += p->length;
876                 } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
877                         p->code = p1->code;
878                         p1->code = -1;
879                         qinsert(p1, &cc_q1a);
880                         if (--p->ccount == 0)
881                                 qinsert(p, &cc_q0a);
882                         else
883                                 qinsert(p, &cc_q0b);
884                         (*tt.tt_set_token)(p->code, p->string, p->length);
885                         wwntokdef++;
886                         wwntoksave -= tt.tt_set_token_cost;
887                         i += p->length;
888                 } else {
889                         p->ccount--;
890                         ttwrite(p->string, p->length);
891                         wwntokbad++;
892                         i += p->length;
893                 }
894         }
895         wwntokc += bufsize;
896 }
897
898 cc_token_compare(p1, p2)
899         struct cc **p1, **p2;
900 {
901         return (*p2)->bcount - (*p1)->bcount;
902 }