2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Edward Wang at The University of California, Berkeley.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * @(#)compress.c 8.1 (Berkeley) 6/6/93
37 * $FreeBSD: src/usr.bin/window/compress.c,v 1.2.12.1 2001/05/17 09:45:00 obrien Exp $
38 * $DragonFly: src/usr.bin/window/compress.c,v 1.2 2003/06/17 04:29:33 dillon Exp $
53 /* tunable parameters */
59 int cc_token_max = 8; /* <= TOKEN_MAX */
60 int cc_token_min = 2; /* > tt.tt_put_token_cost */
64 int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */
76 char string[TOKEN_MAX];
82 long time; /* time last seen */
83 short bcount; /* count in this buffer */
84 short ccount; /* count in compression */
85 short places; /* places in the buffer */
86 short code; /* token code */
87 struct cc *qforw, *qback;
88 struct cc *hforw, **hback;
91 short cc_thresholds[TOKEN_MAX + 1];
92 #define thresh(length) (cc_thresholds[length])
93 #define threshp(code, count, length) \
94 ((code) >= 0 || (short) (count) >= cc_thresholds[length])
97 short cc_wthresholds[TOKEN_MAX + 1];
98 #define wthresh(length) (cc_wthresholds[length])
99 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
101 #define wthreshp(weight, length) (0)
105 short cc_wlimits[TOKEN_MAX + 1];
106 #define wlimit(length) (cc_wlimits[length])
109 #define put_token_score(length) ((length) - tt.tt_put_token_cost)
111 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
112 #define score_adjust(score, p) \
114 int length = (p)->length; \
115 int ccount = (p)->ccount; \
116 if (threshp((p)->code, ccount, length) || \
117 wthreshp((p)->weight, length)) /* XXX */ \
118 (score) -= length - tt.tt_put_token_cost; \
120 (score) += cc_score_adjustments[length][ccount]; \
123 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
125 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
127 #define qinsert(p1, p2) \
129 register struct cc *forw = (p1)->qforw; \
130 register struct cc *back = (p1)->qback; \
131 back->qforw = forw; \
132 forw->qback = back; \
133 forw = (p2)->qforw; \
134 (p1)->qforw = forw; \
135 forw->qback = (p1); \
136 (p2)->qforw = (p1); \
137 (p1)->qback = (p2); \
140 #define qinsertq(q, p) \
141 ((q)->qforw == (q) ? 0 : \
142 ((q)->qback->qforw = (p)->qforw, \
143 (p)->qforw->qback = (q)->qback, \
144 (q)->qforw->qback = (p), \
145 (p)->qforw = (q)->qforw, \
150 #define HSIZE (1 << H)
151 #define hash(h, c) ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
154 struct cc **cc_output; /* the output array */
155 short *cc_places[TOKEN_MAX + 1];
156 short *cc_hashcodes; /* for computing hashcodes */
157 struct cc **cc_htab; /* the hash table */
158 struct cc **cc_tokens; /* holds all the active tokens */
164 long cc_time, cc_time0;
166 char *cc_tt_ob, *cc_tt_obe;
171 register struct cc *p;
173 if (tt.tt_token_max > cc_token_max)
174 tt.tt_token_max = cc_token_max;
175 if (tt.tt_token_min < cc_token_min)
176 tt.tt_token_min = cc_token_min;
177 if (tt.tt_token_min > tt.tt_token_max) {
181 if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */
182 tt.tt_ntoken = cc_ntoken / 2;
183 #define C(x) (sizeof (x) / sizeof *(x))
184 for (i = 0; i < C(cc_thresholds); i++) {
185 int h = i - tt.tt_put_token_cost;
188 (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
190 cc_thresholds[i] = 0;
192 for (i = 0; i < C(cc_score_adjustments); i++) {
193 int t = cc_thresholds[i];
194 for (j = 0; j < C(*cc_score_adjustments); j++) {
196 cc_score_adjustments[i][j] =
197 - (i - tt.tt_put_token_cost);
199 cc_score_adjustments[i][j] = 0;
203 * length * (ccount + 1) a
205 * set-token-cost + length +
206 * ccount * put-token-cost b
207 * the score adjustment is (b - a)
209 cc_score_adjustments[i][j] =
210 tt.tt_set_token_cost + i +
211 j * tt.tt_put_token_cost -
214 cc_initial_scores[i][j] = 0;
217 * - (set-token-cost +
218 * (length - put-token-cost) -
219 * (length - put-token-cost) * ccount)
221 cc_initial_scores[i][j] =
222 - (tt.tt_set_token_cost +
223 (i - tt.tt_put_token_cost) -
224 (i - tt.tt_put_token_cost) * j);
228 for (i = 1; i < C(cc_wthresholds); i++) {
230 ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
233 cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
237 if ((cc_output = (struct cc **)
238 malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
240 if ((cc_hashcodes = (short *)
241 malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
243 if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
245 if ((cc_tokens = (struct cc **)
247 (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
248 sizeof *cc_tokens)) == 0)
250 if ((cc_undo = (struct cc_undo *)
251 malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
253 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
254 if ((cc_places[i] = (short *)
255 malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
257 cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
258 cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
259 cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
260 cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
261 if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
263 for (i = 0; i < tt.tt_ntoken; i++) {
266 p->qback = cc_q0a.qback;
272 for (; i < cc_ntoken; i++) {
275 p->qback = cc_q1a.qback;
283 if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
296 tt_obp = tt_ob = cc_buffer;
297 tt_obe = tt_ob + cc_bufsize;
298 tt.tt_flush = ccflush;
300 cc_trace_fp = fopen("window-trace", "a");
301 (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
308 register struct cc *p;
310 bzero((char *) cc_htab, HSIZE * sizeof *cc_htab);
311 for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
313 for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
321 tt_obp = tt_ob = cc_tt_ob;
324 if (cc_trace_fp != NULL) {
325 (void) fclose(cc_trace_fp);
332 int bufsize = tt_obp - tt_ob;
335 if (tt_ob != cc_buffer)
337 if (cc_trace_fp != NULL) {
338 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
339 (void) putc(-1, cc_trace_fp);
342 (*tt.tt_compress)(1);
343 if (bufsize < tt.tt_token_min) {
347 tt_obp = tt_ob = cc_tt_ob;
351 n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
352 cc_compress_phase(cc_output, bufsize, cc_tokens, n);
353 cc_output_phase(cc_buffer, cc_output, bufsize);
355 tt_obp = tt_ob = cc_buffer;
356 tt_obe = cc_buffer + cc_bufsize;
358 (*tt.tt_compress)(0);
359 tt.tt_flush = ccflush;
362 cc_sweep_phase(buffer, bufsize, tokens)
366 register struct cc **pp = tokens;
378 cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
384 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
393 (void) fflush(stdout);
396 n = cc_sweep(buffer, bufsize, pp, i);
408 qinsertq(&cc_q1b, &cc_q1a);
411 printf("\n %d tokens, %d candidates\n",
419 cc_sweep0(buffer, n, length)
426 register short pc = tt.tt_padc;
428 /* n and length are at least 1 */
433 if ((*hc++ = *p++) == pc)
439 for (i = n--; --i;) {
440 if ((c = *p++) == pc || *hc < 0)
449 cc_sweep(buffer, bufsize, tokens, length)
454 register struct cc *p;
458 short *places = cc_places[length];
459 struct cc **pp = tokens;
460 short threshold = thresh(length);
462 short wthreshold = wthresh(length);
463 short limit = wlimit(length);
466 short pc = tt.tt_padc;
473 for (i = 0; i < bufsize; i++, time++) {
477 register short *hc1 = hc;
478 register short c = *cp++;
480 if ((hh = *hc1) < 0 || c == pc) {
485 h = cc_htab + (*hc1++ = hash(hh, c));
488 for (p = *h; p != 0; p = p->hforw)
489 if (p->length == (char) length) {
490 register char *p1 = p->string;
491 register char *p2 = cp - length;
503 p->time >= cc_time0 && p->length == (char) length)
506 if ((*p->hback = p->hforw) != 0)
507 p->hforw->hback = p->hback;
509 register char *p1 = p->string;
510 register char *p2 = cp - length;
518 p->weight = cc_weight;
524 if ((p->hforw = *h) != 0)
525 p->hforw->hback = &p->hforw;
534 } else if (p->time < cc_time0) {
536 if ((p->weight += p->time - time) < 0)
537 p->weight = cc_weight;
538 else if ((p->weight += cc_weight) > limit)
549 if (p->weight >= wthreshold) {
564 } else if (p->time + length > time) {
566 * overlapping token, don't count as two and
567 * don't update time, but do adjust weight to offset
571 if (cc_weight != 0) { /* XXX */
572 p->weight += time - p->time;
573 if (!p->flag && p->weight >= wthreshold) {
580 places[i] = p->places;
584 if ((p->weight += p->time - time) < 0)
585 p->weight = cc_weight;
586 else if ((p->weight += cc_weight) > limit)
592 /* code must be < 0 if flag false here */
593 (p->bcount >= threshold
595 || p->weight >= wthreshold
602 places[i] = p->places;
606 if ((i = pp - tokens) > 0) {
609 cc_sweep_reverse(tokens, places);
610 if (cc_sort && i > 1) {
611 int cc_token_compare();
612 qsort((char *) tokens, i, sizeof *tokens,
616 if ((i = i * cc_chop / 100) == 0)
625 cc_sweep_reverse(pp, places)
626 register struct cc **pp;
627 register short *places;
629 register struct cc *p;
630 register short front, back, t;
632 while ((p = *pp++) != 0) {
635 /* the list is never empty */
640 } while ((t = front) >= 0);
645 cc_compress_phase(output, bufsize, tokens, ntoken)
651 bzero((char *) output, bufsize * sizeof *output);
652 for (i = 0; i < cc_npass0; i++)
653 cc_compress_phase1(output, tokens, ntoken, 0);
654 for (i = 0; i < cc_npass1; i++)
655 cc_compress_phase1(output, tokens, ntoken, 1);
656 cc_compress_cleanup(output, bufsize);
659 cc_compress_phase1(output, tokens, ntoken, flag)
660 register struct cc **output;
663 register struct cc **pp;
666 int nt = 0, cc = 0, nc = 0;
676 while (pp < tokens + ntoken) {
687 printf(" (%d", (*pp)->length);
688 (void) fflush(stdout);
691 pp += cc_compress(output, pp, flag);
694 printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
704 printf("\n total: (%dt %du %dc)\n", nt, cc, nc);
710 cc_compress_cleanup(output, bufsize)
711 register struct cc **output;
713 register struct cc **end;
715 /* the previous output phase may have been interrupted */
716 qinsertq(&cc_q0b, &cc_q0a);
717 for (end = output + bufsize; output < end;) {
718 register struct cc *p;
720 if ((p = *output) == 0) {
726 } else if (p->code >= 0) {
729 } else if (p->ccount == 0) {
731 } else if (p->ccount >= thresh(length)
733 || wthreshp(p->weight, length)
745 cc_compress(output, tokens, flag)
750 struct cc **pp = tokens;
751 register struct cc *p = *pp++;
752 int length = p->length;
753 int threshold = thresh(length);
755 short wthreshold = wthresh(length);
757 short *places = cc_places[length];
758 int *initial_scores = cc_initial_scores[length];
759 int initial_score0 = put_token_score(length);
763 register struct cc_undo *undop;
771 if ((short) ccount >= p->bcount)
773 if (p->code >= 0 || ccount >= threshold)
776 else if (p->weight >= wthreshold)
777 /* allow one fewer match than normal */
778 /* XXX, should adjust for ccount */
779 score = - tt.tt_set_token_cost;
782 score = initial_scores[ccount];
787 for (i = p->places; i >= 0; i = places[i]) {
788 register struct cc **jp;
789 register struct cc *x;
790 register struct cc **ip = output + i;
791 register score0 = initial_score0;
792 struct cc **iip = ip + length;
793 struct cc_undo *undop1 = undop;
795 if ((x = *(jp = ip)) != 0)
797 while (--jp >= output)
798 if ((x = *jp) != 0) {
799 if (jp + x->length > ip)
805 if ((x = *jp) == 0) {
820 score_adjust(score0, x);
821 if (score0 < 0 && flag)
833 while (--undop >= undop1)
834 if (*undop->pos = x = undop->val)
840 ccount_stat += ccount - p->ccount;
842 ncover_stat += ncover;
846 register struct cc_undo *u = cc_undo;
847 while (--undop >= u) {
848 register struct cc *x;
849 if (*undop->pos = x = undop->val)
853 } while ((p = *pp++) != 0);
857 cc_output_phase(buffer, output, bufsize)
858 register char *buffer;
859 register struct cc **output;
863 register struct cc *p, *p1;
865 for (i = 0; i < bufsize;) {
866 if ((p = output[i]) == 0) {
869 } else if (p->code >= 0) {
870 if (--p->ccount == 0)
872 (*tt.tt_put_token)(p->code, p->string, p->length);
874 wwntoksave += put_token_score(p->length);
876 } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
879 qinsert(p1, &cc_q1a);
880 if (--p->ccount == 0)
884 (*tt.tt_set_token)(p->code, p->string, p->length);
886 wwntoksave -= tt.tt_set_token_cost;
890 ttwrite(p->string, p->length);
898 cc_token_compare(p1, p2)
899 struct cc **p1, **p2;
901 return (*p2)->bcount - (*p1)->bcount;