window(1): Remove __RCSID & friends.
[dragonfly.git] / usr.bin / window / compress.c
CommitLineData
2c872e05 1/* @(#)compress.c 8.1 (Berkeley) 6/6/93 */
abecab39
SW
2/* $NetBSD: compress.c,v 1.7 2009/04/14 08:50:06 lukem Exp $ */
3
984263bc
MD
4/*
5 * Copyright (c) 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Edward Wang at The University of California, Berkeley.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
abecab39 19 * 3. Neither the name of the University nor the names of its contributors
984263bc
MD
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
984263bc 36#include <fcntl.h>
abecab39 37#include <stdio.h>
984263bc 38#include <stdlib.h>
7a2e9d59 39#include <string.h>
abecab39 40#include "defs.h"
7a2e9d59
SW
41#include "tt.h"
42
abecab39 43 /* special */
984263bc
MD
44int cc_trace = 0;
45FILE *cc_trace_fp;
46
47 /* tunable parameters */
48
49int cc_reverse = 1;
50int cc_sort = 0;
51int cc_chop = 0;
52
53int cc_token_max = 8; /* <= TOKEN_MAX */
54int cc_token_min = 2; /* > tt.tt_put_token_cost */
55int cc_npass0 = 1;
56int cc_npass1 = 1;
57
58int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */
59
60int cc_ntoken = 8192;
61
62#define cc_weight XXX
63#ifndef cc_weight
64int cc_weight = 0;
65#endif
66
67#define TOKEN_MAX 16
68
69struct cc {
70 char string[TOKEN_MAX];
71 char length;
72 char flag;
73#ifndef cc_weight
74 short weight;
75#endif
76 long time; /* time last seen */
77 short bcount; /* count in this buffer */
78 short ccount; /* count in compression */
79 short places; /* places in the buffer */
80 short code; /* token code */
81 struct cc *qforw, *qback;
82 struct cc *hforw, **hback;
83};
84
85short cc_thresholds[TOKEN_MAX + 1];
86#define thresh(length) (cc_thresholds[length])
87#define threshp(code, count, length) \
88 ((code) >= 0 || (short) (count) >= cc_thresholds[length])
89
90#ifndef cc_weight
91short cc_wthresholds[TOKEN_MAX + 1];
92#define wthresh(length) (cc_wthresholds[length])
93#define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
94#else
95#define wthreshp(weight, length) (0)
96#endif
97
98#ifndef cc_weight
99short cc_wlimits[TOKEN_MAX + 1];
100#define wlimit(length) (cc_wlimits[length])
101#endif
102
103#define put_token_score(length) ((length) - tt.tt_put_token_cost)
104
105int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
106#define score_adjust(score, p) \
107 do { \
abecab39
SW
108 int _length = (p)->length; \
109 int _ccount = (p)->ccount; \
110 if (threshp((p)->code, _ccount, _length) || \
111 wthreshp((p)->weight, _length)) /* XXX */ \
112 (score) -= _length - tt.tt_put_token_cost; \
984263bc 113 else \
abecab39 114 (score) += cc_score_adjustments[_length][_ccount]; \
984263bc
MD
115 } while (0)
116
117int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
118
119struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
120
121#define qinsert(p1, p2) \
122 do { \
abecab39
SW
123 struct cc *forw = (p1)->qforw; \
124 struct cc *back = (p1)->qback; \
984263bc
MD
125 back->qforw = forw; \
126 forw->qback = back; \
127 forw = (p2)->qforw; \
128 (p1)->qforw = forw; \
129 forw->qback = (p1); \
130 (p2)->qforw = (p1); \
131 (p1)->qback = (p2); \
132 } while (0)
133
134#define qinsertq(q, p) \
135 ((q)->qforw == (q) ? 0 : \
136 ((q)->qback->qforw = (p)->qforw, \
137 (p)->qforw->qback = (q)->qback, \
138 (q)->qforw->qback = (p), \
139 (p)->qforw = (q)->qforw, \
140 (q)->qforw = (q), \
141 (q)->qback = (q)))
142
143#define H (14)
144#define HSIZE (1 << H)
abecab39 145#define hash(h, c) (((((h) >> (H - 8)) | (h) << 8) ^ (c)) & (HSIZE - 1))
984263bc
MD
146
147char *cc_buffer;
148struct cc **cc_output; /* the output array */
149short *cc_places[TOKEN_MAX + 1];
150short *cc_hashcodes; /* for computing hashcodes */
151struct cc **cc_htab; /* the hash table */
152struct cc **cc_tokens; /* holds all the active tokens */
153struct cc_undo {
154 struct cc **pos;
155 struct cc *val;
156} *cc_undo;
157
158long cc_time, cc_time0;
159
160char *cc_tt_ob, *cc_tt_obe;
161
abecab39
SW
162int cc_compress(struct cc **, struct cc **, char);
163void cc_compress_cleanup(struct cc **, int);
164void cc_compress_phase(struct cc **, int, struct cc **, int);
165void cc_compress_phase1(struct cc **, struct cc **, int, int);
166void cc_output_phase(char *, struct cc **, int);
167int cc_sweep(char *, int, struct cc **, int);
168void cc_sweep0(char *, int, int);
169int cc_sweep_phase(char *, int, struct cc **);
170void cc_sweep_reverse(struct cc **, short *);
171int cc_token_compare(const void *, const void *);
172
173int
174ccinit(void)
984263bc 175{
abecab39
SW
176 int i, j;
177 struct cc *p;
984263bc
MD
178
179 if (tt.tt_token_max > cc_token_max)
180 tt.tt_token_max = cc_token_max;
181 if (tt.tt_token_min < cc_token_min)
182 tt.tt_token_min = cc_token_min;
183 if (tt.tt_token_min > tt.tt_token_max) {
184 tt.tt_ntoken = 0;
185 return 0;
186 }
187 if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */
188 tt.tt_ntoken = cc_ntoken / 2;
189#define C(x) (sizeof (x) / sizeof *(x))
abecab39 190 for (i = 0; i < (int)C(cc_thresholds); i++) {
984263bc
MD
191 int h = i - tt.tt_put_token_cost;
192 if (h > 0)
193 cc_thresholds[i] =
194 (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
195 else
196 cc_thresholds[i] = 0;
197 }
abecab39 198 for (i = 0; i < (int)C(cc_score_adjustments); i++) {
984263bc 199 int t = cc_thresholds[i];
abecab39 200 for (j = 0; j < (int)C(*cc_score_adjustments); j++) {
984263bc
MD
201 if (j >= t)
202 cc_score_adjustments[i][j] =
203 - (i - tt.tt_put_token_cost);
204 else if (j < t - 1)
205 cc_score_adjustments[i][j] = 0;
206 else
207 /*
208 * cost now is
209 * length * (ccount + 1) a
210 * cost before was
211 * set-token-cost + length +
212 * ccount * put-token-cost b
213 * the score adjustment is (b - a)
214 */
215 cc_score_adjustments[i][j] =
216 tt.tt_set_token_cost + i +
217 j * tt.tt_put_token_cost -
218 i * (j + 1);
219 if (j >= t)
220 cc_initial_scores[i][j] = 0;
221 else
222 /*
223 * - (set-token-cost +
224 * (length - put-token-cost) -
225 * (length - put-token-cost) * ccount)
226 */
227 cc_initial_scores[i][j] =
228 - (tt.tt_set_token_cost +
229 (i - tt.tt_put_token_cost) -
230 (i - tt.tt_put_token_cost) * j);
231 }
232 }
233#ifndef cc_weight
234 for (i = 1; i < C(cc_wthresholds); i++) {
235 cc_wthresholds[i] =
236 ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
237 i / 5 + 1) *
238 cc_weight + 1;
239 cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
240 }
241#endif
242#undef C
243 if ((cc_output = (struct cc **)
244 malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
245 goto nomem;
246 if ((cc_hashcodes = (short *)
247 malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
248 goto nomem;
249 if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
250 goto nomem;
251 if ((cc_tokens = (struct cc **)
252 malloc((unsigned)
253 (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
254 sizeof *cc_tokens)) == 0)
255 goto nomem;
256 if ((cc_undo = (struct cc_undo *)
257 malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
258 goto nomem;
259 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
260 if ((cc_places[i] = (short *)
261 malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
262 goto nomem;
263 cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
264 cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
265 cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
266 cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
267 if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
268 goto nomem;
269 for (i = 0; i < tt.tt_ntoken; i++) {
270 p->code = i;
271 p->time = -1;
272 p->qback = cc_q0a.qback;
273 p->qforw = &cc_q0a;
274 p->qback->qforw = p;
275 cc_q0a.qback = p;
276 p++;
277 }
278 for (; i < cc_ntoken; i++) {
279 p->code = -1;
280 p->time = -1;
281 p->qback = cc_q1a.qback;
282 p->qforw = &cc_q1a;
283 p->qback->qforw = p;
284 cc_q1a.qback = p;
285 p++;
286 }
287 cc_tt_ob = tt_ob;
288 cc_tt_obe = tt_obe;
289 if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
290 goto nomem;
291 return 0;
292nomem:
293 wwerrno = WWE_NOMEM;
294 return -1;
295}
296
abecab39
SW
297void
298ccstart(void)
984263bc 299{
984263bc
MD
300 ttflush();
301 tt_obp = tt_ob = cc_buffer;
302 tt_obe = tt_ob + cc_bufsize;
303 tt.tt_flush = ccflush;
304 if (cc_trace) {
305 cc_trace_fp = fopen("window-trace", "a");
306 (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
307 }
308 ccreset();
309}
310
abecab39
SW
311void
312ccreset(void)
984263bc 313{
abecab39 314 struct cc *p;
984263bc 315
abecab39 316 memset((char *) cc_htab, 0, HSIZE * sizeof *cc_htab);
984263bc
MD
317 for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
318 p->hback = 0;
319 for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
320 p->hback = 0;
321}
322
abecab39
SW
323void
324ccend(void)
984263bc
MD
325{
326
327 ttflush();
328 tt_obp = tt_ob = cc_tt_ob;
329 tt_obe = cc_tt_obe;
330 tt.tt_flush = 0;
331 if (cc_trace_fp != NULL) {
332 (void) fclose(cc_trace_fp);
333 cc_trace_fp = NULL;
334 }
335}
336
abecab39
SW
337void
338ccflush(void)
984263bc
MD
339{
340 int bufsize = tt_obp - tt_ob;
341 int n;
342
343 if (tt_ob != cc_buffer)
344 abort();
345 if (cc_trace_fp != NULL) {
346 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
347 (void) putc(-1, cc_trace_fp);
348 }
349 tt.tt_flush = 0;
350 (*tt.tt_compress)(1);
351 if (bufsize < tt.tt_token_min) {
352 ttflush();
353 goto out;
354 }
355 tt_obp = tt_ob = cc_tt_ob;
356 tt_obe = cc_tt_obe;
357 cc_time0 = cc_time;
358 cc_time += bufsize;
359 n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
360 cc_compress_phase(cc_output, bufsize, cc_tokens, n);
361 cc_output_phase(cc_buffer, cc_output, bufsize);
362 ttflush();
363 tt_obp = tt_ob = cc_buffer;
364 tt_obe = cc_buffer + cc_bufsize;
365out:
366 (*tt.tt_compress)(0);
367 tt.tt_flush = ccflush;
368}
369
abecab39
SW
370int
371cc_sweep_phase(char *buffer, int bufsize, struct cc **tokens)
984263bc 372{
abecab39
SW
373 struct cc **pp = tokens;
374 int i, n;
984263bc
MD
375#ifdef STATS
376 int nn, ii;
377#endif
378
379#ifdef STATS
380 if (verbose >= 0)
381 time_begin();
382 if (verbose > 0)
383 printf("Sweep:");
384#endif
385 cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
386#ifdef STATS
387 ntoken_stat = 0;
388 nn = 0;
389 ii = 0;
390#endif
391 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
392#ifdef STATS
393 if (verbose > 0) {
394 if (ii > 7) {
395 printf("\n ");
396 ii = 0;
397 }
398 ii++;
399 printf(" (%d", i);
400 (void) fflush(stdout);
401 }
402#endif
403 n = cc_sweep(buffer, bufsize, pp, i);
404 pp += n;
405#ifdef STATS
406 if (verbose > 0) {
407 if (--n > 0) {
408 printf(" %d", n);
409 nn += n;
410 }
411 putchar(')');
412 }
413#endif
414 }
415 qinsertq(&cc_q1b, &cc_q1a);
416#ifdef STATS
417 if (verbose > 0)
418 printf("\n %d tokens, %d candidates\n",
419 ntoken_stat, nn);
420 if (verbose >= 0)
421 time_end();
422#endif
423 return pp - tokens;
424}
425
abecab39
SW
426void
427cc_sweep0(char *buffer, int n, int length)
984263bc 428{
abecab39
SW
429 char *p;
430 short *hc;
431 int i;
432 short c;
433 short pc = tt.tt_padc;
984263bc
MD
434
435 /* n and length are at least 1 */
436 p = buffer++;
437 hc = cc_hashcodes;
438 i = n;
439 do {
440 if ((*hc++ = *p++) == pc)
441 hc[-1] = -1;
442 } while (--i);
443 while (--length) {
444 p = buffer++;
445 hc = cc_hashcodes;
446 for (i = n--; --i;) {
447 if ((c = *p++) == pc || *hc < 0)
448 c = -1;
449 else
450 c = hash(*hc, c);
451 *hc++ = c;
452 }
453 }
454}
455
abecab39
SW
456int
457cc_sweep(char *buffer, int bufsize, struct cc **tokens, int length)
984263bc 458{
abecab39
SW
459 struct cc *p;
460 char *cp;
461 int i;
984263bc
MD
462 short *hc;
463 short *places = cc_places[length];
464 struct cc **pp = tokens;
465 short threshold = thresh(length);
466#ifndef cc_weight
467 short wthreshold = wthresh(length);
468 short limit = wlimit(length);
469#endif
abecab39 470 int time0;
984263bc
MD
471 short pc = tt.tt_padc;
472
473 i = length - 1;
474 bufsize -= i;
475 cp = buffer + i;
476 hc = cc_hashcodes;
abecab39
SW
477 time0 = cc_time0;
478 for (i = 0; i < bufsize; i++, time0++) {
984263bc
MD
479 struct cc **h;
480
481 {
abecab39
SW
482 short *hc1 = hc;
483 short c = *cp++;
484 short hh;
984263bc
MD
485 if ((hh = *hc1) < 0 || c == pc) {
486 *hc1++ = -1;
487 hc = hc1;
488 continue;
489 }
490 h = cc_htab + (*hc1++ = hash(hh, c));
491 hc = hc1;
492 }
493 for (p = *h; p != 0; p = p->hforw)
494 if (p->length == (char) length) {
abecab39
SW
495 char *p1 = p->string;
496 char *p2 = cp - length;
497 int n = length;
984263bc
MD
498 do
499 if (*p1++ != *p2++)
500 goto fail;
501 while (--n);
502 break;
503 fail:;
504 }
505 if (p == 0) {
506 p = cc_q1a.qback;
507 if (p == &cc_q1a ||
abecab39 508 (p->time >= cc_time0 && p->length == (char) length))
984263bc
MD
509 continue;
510 if (p->hback != 0)
511 if ((*p->hback = p->hforw) != 0)
512 p->hforw->hback = p->hback;
513 {
abecab39
SW
514 char *p1 = p->string;
515 char *p2 = cp - length;
516 int n = length;
984263bc
MD
517 do
518 *p1++ = *p2++;
519 while (--n);
520 }
521 p->length = length;
522#ifndef cc_weight
523 p->weight = cc_weight;
524#endif
abecab39 525 p->time = time0;
984263bc
MD
526 p->bcount = 1;
527 p->ccount = 0;
528 p->flag = 0;
529 if ((p->hforw = *h) != 0)
530 p->hforw->hback = &p->hforw;
531 *h = p;
532 p->hback = h;
533 qinsert(p, &cc_q1a);
534 places[i] = -1;
535 p->places = i;
536#ifdef STATS
537 ntoken_stat++;
538#endif
539 } else if (p->time < cc_time0) {
540#ifndef cc_weight
abecab39 541 if ((p->weight += p->time - time0) < 0)
984263bc
MD
542 p->weight = cc_weight;
543 else if ((p->weight += cc_weight) > limit)
544 p->weight = limit;
545#endif
abecab39 546 p->time = time0;
984263bc
MD
547 p->bcount = 1;
548 p->ccount = 0;
549 if (p->code >= 0) {
550 p->flag = 1;
551 *pp++ = p;
552 } else
553#ifndef cc_weight
554 if (p->weight >= wthreshold) {
555 p->flag = 1;
556 *pp++ = p;
557 qinsert(p, &cc_q1b);
558 } else
559#endif
560 {
561 p->flag = 0;
562 qinsert(p, &cc_q1a);
563 }
564 places[i] = -1;
565 p->places = i;
566#ifdef STATS
567 ntoken_stat++;
568#endif
abecab39 569 } else if (p->time + length > time0) {
984263bc
MD
570 /*
571 * overlapping token, don't count as two and
abecab39 572 * don't update time0, but do adjust weight to offset
984263bc
MD
573 * the difference
574 */
575#ifndef cc_weight
576 if (cc_weight != 0) { /* XXX */
abecab39 577 p->weight += time0 - p->time;
984263bc
MD
578 if (!p->flag && p->weight >= wthreshold) {
579 p->flag = 1;
580 *pp++ = p;
581 qinsert(p, &cc_q1b);
582 }
583 }
584#endif
585 places[i] = p->places;
586 p->places = i;
587 } else {
588#ifndef cc_weight
abecab39 589 if ((p->weight += p->time - time0) < 0)
984263bc
MD
590 p->weight = cc_weight;
591 else if ((p->weight += cc_weight) > limit)
592 p->weight = limit;
593#endif
abecab39 594 p->time = time0;
984263bc
MD
595 p->bcount++;
596 if (!p->flag &&
597 /* code must be < 0 if flag false here */
598 (p->bcount >= threshold
599#ifndef cc_weight
600 || p->weight >= wthreshold
601#endif
602 )) {
603 p->flag = 1;
604 *pp++ = p;
605 qinsert(p, &cc_q1b);
606 }
607 places[i] = p->places;
608 p->places = i;
609 }
610 }
611 if ((i = pp - tokens) > 0) {
612 *pp = 0;
613 if (cc_reverse)
614 cc_sweep_reverse(tokens, places);
615 if (cc_sort && i > 1) {
984263bc
MD
616 qsort((char *) tokens, i, sizeof *tokens,
617 cc_token_compare);
618 }
619 if (cc_chop) {
620 if ((i = i * cc_chop / 100) == 0)
621 i = 1;
622 tokens[i] = 0;
623 }
624 i++;
625 }
626 return i;
627}
628
abecab39
SW
629void
630cc_sweep_reverse(struct cc **pp, short int *places)
984263bc 631{
abecab39
SW
632 struct cc *p;
633 short frnt, back, t;
984263bc
MD
634
635 while ((p = *pp++) != 0) {
636 back = -1;
637 t = p->places;
638 /* the list is never empty */
639 do {
abecab39 640 frnt = places[t];
984263bc
MD
641 places[t] = back;
642 back = t;
abecab39 643 } while ((t = frnt) >= 0);
984263bc
MD
644 p->places = back;
645 }
646}
647
abecab39
SW
648void
649cc_compress_phase(struct cc **output, int bufsize, struct cc **tokens, int ntoken)
984263bc 650{
abecab39 651 int i;
984263bc 652
abecab39 653 memset((char *) output, 0, bufsize * sizeof *output);
984263bc
MD
654 for (i = 0; i < cc_npass0; i++)
655 cc_compress_phase1(output, tokens, ntoken, 0);
656 for (i = 0; i < cc_npass1; i++)
657 cc_compress_phase1(output, tokens, ntoken, 1);
658 cc_compress_cleanup(output, bufsize);
659}
660
abecab39
SW
661void
662cc_compress_phase1(struct cc **output, struct cc **tokens, int ntoken, int flag)
984263bc 663{
abecab39 664 struct cc **pp;
984263bc 665#ifdef STATS
abecab39 666 int i = 0;
984263bc
MD
667 int nt = 0, cc = 0, nc = 0;
668#endif
669
670#ifdef STATS
671 if (verbose >= 0)
672 time_begin();
673 if (verbose > 0)
674 printf("Compress:");
675#endif
676 pp = tokens;
677 while (pp < tokens + ntoken) {
678#ifdef STATS
679 if (verbose > 0) {
680 ntoken_stat = 0;
681 ccount_stat = 0;
682 ncover_stat = 0;
683 if (i > 2) {
684 printf("\n ");
685 i = 0;
686 }
687 i++;
688 printf(" (%d", (*pp)->length);
689 (void) fflush(stdout);
690 }
691#endif
692 pp += cc_compress(output, pp, flag);
693#ifdef STATS
694 if (verbose > 0) {
695 printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
696 ncover_stat);
697 nt += ntoken_stat;
698 cc += ccount_stat;
699 nc += ncover_stat;
700 }
701#endif
702 }
703#ifdef STATS
704 if (verbose > 0)
705 printf("\n total: (%dt %du %dc)\n", nt, cc, nc);
706 if (verbose >= 0)
707 time_end();
708#endif
709}
710
abecab39
SW
711void
712cc_compress_cleanup(struct cc **output, int bufsize)
984263bc 713{
abecab39 714 struct cc **end;
984263bc
MD
715
716 /* the previous output phase may have been interrupted */
717 qinsertq(&cc_q0b, &cc_q0a);
718 for (end = output + bufsize; output < end;) {
abecab39
SW
719 struct cc *p;
720 int length;
984263bc
MD
721 if ((p = *output) == 0) {
722 output++;
723 continue;
724 }
725 length = p->length;
726 if (!p->flag) {
727 } else if (p->code >= 0) {
728 qinsert(p, &cc_q0b);
729 p->flag = 0;
730 } else if (p->ccount == 0) {
731 *output = 0;
732 } else if (p->ccount >= thresh(length)
733#ifndef cc_weight
734 || wthreshp(p->weight, length)
735#endif
736 ) {
737 p->flag = 0;
738 } else {
739 p->ccount = 0;
740 *output = 0;
741 }
742 output += length;
743 }
744}
745
abecab39
SW
746int
747cc_compress(struct cc **output, struct cc **tokens, char flag)
984263bc
MD
748{
749 struct cc **pp = tokens;
abecab39 750 struct cc *p = *pp++;
984263bc
MD
751 int length = p->length;
752 int threshold = thresh(length);
753#ifndef cc_weight
754 short wthreshold = wthresh(length);
755#endif
756 short *places = cc_places[length];
757 int *initial_scores = cc_initial_scores[length];
758 int initial_score0 = put_token_score(length);
759
760 do {
761 int score;
abecab39 762 struct cc_undo *undop;
984263bc
MD
763 int ccount;
764#ifdef STATS
765 int ncover;
766#endif
767 int i;
768
769 ccount = p->ccount;
770 if ((short) ccount >= p->bcount)
771 continue;
772 if (p->code >= 0 || ccount >= threshold)
773 score = 0;
774#ifndef cc_weight
775 else if (p->weight >= wthreshold)
776 /* allow one fewer match than normal */
777 /* XXX, should adjust for ccount */
778 score = - tt.tt_set_token_cost;
779#endif
780 else
781 score = initial_scores[ccount];
782 undop = cc_undo;
783#ifdef STATS
784 ncover = 0;
785#endif
786 for (i = p->places; i >= 0; i = places[i]) {
abecab39
SW
787 struct cc **jp;
788 struct cc *x;
789 struct cc **ip = output + i;
790 int score0 = initial_score0;
984263bc
MD
791 struct cc **iip = ip + length;
792 struct cc_undo *undop1 = undop;
793
794 if ((x = *(jp = ip)) != 0)
795 goto z;
796 while (--jp >= output)
797 if ((x = *jp) != 0) {
798 if (jp + x->length > ip)
799 goto z;
800 break;
801 }
802 jp = ip + 1;
803 while (jp < iip) {
804 if ((x = *jp) == 0) {
805 jp++;
806 continue;
807 }
808 z:
809 if (x == p)
810 goto undo;
811#ifdef STATS
812 ncover++;
813#endif
814 undop->pos = jp;
815 undop->val = x;
816 undop++;
817 *jp = 0;
818 x->ccount--;
819 score_adjust(score0, x);
820 if (score0 < 0 && flag)
821 goto undo;
822 jp += x->length;
823 }
824 undop->pos = ip;
825 undop->val = 0;
826 undop++;
827 *ip = p;
828 ccount++;
829 score += score0;
830 continue;
831 undo:
832 while (--undop >= undop1)
abecab39 833 if ((*undop->pos = x = undop->val))
984263bc
MD
834 x->ccount++;
835 undop++;
836 }
837 if (score > 0) {
838#ifdef STATS
839 ccount_stat += ccount - p->ccount;
840 ntoken_stat++;
841 ncover_stat += ncover;
842#endif
843 p->ccount = ccount;
844 } else {
abecab39 845 struct cc_undo *u = cc_undo;
984263bc 846 while (--undop >= u) {
abecab39
SW
847 struct cc *x;
848 if ((*undop->pos = x = undop->val))
984263bc
MD
849 x->ccount++;
850 }
851 }
852 } while ((p = *pp++) != 0);
853 return pp - tokens;
854}
855
abecab39
SW
856void
857cc_output_phase(char *buffer, struct cc **output, int bufsize)
984263bc 858{
abecab39
SW
859 int i;
860 struct cc *p, *p1;
984263bc
MD
861
862 for (i = 0; i < bufsize;) {
863 if ((p = output[i]) == 0) {
864 ttputc(buffer[i]);
865 i++;
866 } else if (p->code >= 0) {
867 if (--p->ccount == 0)
868 qinsert(p, &cc_q0a);
869 (*tt.tt_put_token)(p->code, p->string, p->length);
870 wwntokuse++;
871 wwntoksave += put_token_score(p->length);
872 i += p->length;
873 } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
874 p->code = p1->code;
875 p1->code = -1;
876 qinsert(p1, &cc_q1a);
877 if (--p->ccount == 0)
878 qinsert(p, &cc_q0a);
879 else
880 qinsert(p, &cc_q0b);
881 (*tt.tt_set_token)(p->code, p->string, p->length);
882 wwntokdef++;
883 wwntoksave -= tt.tt_set_token_cost;
884 i += p->length;
885 } else {
886 p->ccount--;
887 ttwrite(p->string, p->length);
888 wwntokbad++;
889 i += p->length;
890 }
891 }
892 wwntokc += bufsize;
893}
894
abecab39
SW
895int
896cc_token_compare(const void *p1, const void *p2)
984263bc 897{
abecab39
SW
898 const struct cc * const * vp1 = p1;
899 const struct cc * const * vp2 = p2;
900 return (*vp2)->bcount - (*vp1)->bcount;
984263bc 901}