2 * Copyright (c) 1990 The Regents of the University of California.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * from tahoe: in_cksum.c 1.2 86/01/05
34 * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91
35 * $FreeBSD: src/sys/i386/i386/in_cksum.c,v 1.17.2.3 2002/07/02 04:03:00 jdp Exp $
38 #include <sys/param.h>
39 #include <sys/systm.h>
42 #include <netinet/in.h>
43 #include <netinet/in_systm.h>
44 #include <netinet/ip.h>
46 #include <machine/in_cksum.h>
49 * Checksum routine for Internet Protocol family headers.
51 * This routine is very heavily used in the network
52 * code and should be modified for each CPU to be as fast as possible.
54 * This implementation is 386 version.
58 #define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff
59 #define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
62 * These asm statements require __volatile because they pass information
63 * via the condition codes. GCC does not currently provide a way to specify
64 * the condition codes as an input or output operand.
66 * The LOAD macro below is effectively a prefetch into cache. GCC will
67 * load the value into a register but will not use it. Since modern CPUs
68 * reorder operations, this will generally take place in parallel with
71 #define ADD(n) __asm __volatile \
72 ("addl %1, %0" : "+r" (sum) : \
73 "g" (((const u_int32_t *)w)[n / 4]))
74 #define ADDC(n) __asm __volatile \
75 ("adcl %1, %0" : "+r" (sum) : \
76 "g" (((const u_int32_t *)w)[n / 4]))
77 #define LOAD(n) __asm __volatile \
78 ("" : : "r" (((const u_int32_t *)w)[n / 4]))
79 #define MOP __asm __volatile \
80 ("adcl $0, %0" : "+r" (sum))
84 register struct mbuf *m;
88 register unsigned sum = 0;
89 register int mlen = 0;
91 union { char c[2]; u_short s; } su;
93 for (;m && len; m = m->m_next) {
96 w = mtod(m, u_short *);
99 * The first byte of this mbuf is the continuation
100 * of a word spanning between this mbuf and the
104 /* su.c[0] is already saved when scanning previous
105 * mbuf. sum was REDUCEd when we found mlen == -1
107 su.c[1] = *(u_char *)w;
109 w = (u_short *)((char *)w + 1);
118 * Force to long boundary so we do longword aligned
123 if ((1 & (int) w) && (mlen > 0)) {
125 su.c[0] = *(char *)w;
126 w = (u_short *)((char *)w + 1);
130 if ((2 & (int) w) && (mlen >= 2)) {
136 * Advance to a 486 cache line boundary.
138 if (4 & (int) w && mlen >= 4) {
144 if (8 & (int) w && mlen >= 8) {
152 * Do as much of the checksum as possible 32 bits at at time.
153 * In fact, this loop is unrolled to make overhead from
157 while ((mlen -= 32) >= 0) {
159 * Add with carry 16 words and fold in the last
160 * carry by adding a 0 with carry.
162 * The early ADD(16) and the LOAD(32) are to load
163 * the next 2 cache lines in advance on 486's. The
164 * 486 has a penalty of 2 clock cycles for loading
165 * a cache line, plus whatever time the external
166 * memory takes to load the first word(s) addressed.
167 * These penalties are unavoidable. Subsequent
168 * accesses to a cache line being loaded (and to
169 * other external memory?) are delayed until the
170 * whole load finishes. These penalties are mostly
171 * avoided by not accessing external memory for
172 * 8 cycles after the ADD(16) and 12 cycles after
173 * the LOAD(32). The loop terminates when mlen
174 * is initially 33 (not 32) to guaranteed that
175 * the LOAD(32) is within bounds.
219 if (mlen == 0 && byte_swapped == 0)
220 continue; /* worth 1% maybe ?? */
222 while ((mlen -= 2) >= 0) {
229 su.c[1] = *(char *)w;
234 } else if (mlen == -1)
236 * This mbuf has odd number of bytes.
237 * There could be a word split betwen
238 * this mbuf and the next mbuf.
239 * Save the last byte (to prepend to next mbuf).
241 su.c[0] = *(char *)w;
245 printf("%s: out of data by %d\n", __func__, len);
247 /* The last mbuf has odd # of bytes. Follow the
248 standard (the odd byte is shifted left by 8 bits) */
253 return (~sum & 0xffff);
257 in_cksum_skip(m, len, skip)
263 register unsigned sum = 0;
264 register int mlen = 0;
265 int byte_swapped = 0;
266 union { char c[2]; u_short s; } su;
269 for (; skip && m; m = m->m_next) {
270 if (m->m_len > skip) {
271 mlen = m->m_len - skip;
272 w = (u_short *)(mtod(m, u_char *) + skip);
279 for (;m && len; m = m->m_next) {
282 w = mtod(m, u_short *);
285 * The first byte of this mbuf is the continuation
286 * of a word spanning between this mbuf and the
290 /* su.c[0] is already saved when scanning previous
291 * mbuf. sum was REDUCEd when we found mlen == -1
293 su.c[1] = *(u_char *)w;
295 w = (u_short *)((char *)w + 1);
305 * Force to long boundary so we do longword aligned
310 if ((1 & (int) w) && (mlen > 0)) {
312 su.c[0] = *(char *)w;
313 w = (u_short *)((char *)w + 1);
317 if ((2 & (int) w) && (mlen >= 2)) {
323 * Advance to a 486 cache line boundary.
325 if (4 & (int) w && mlen >= 4) {
331 if (8 & (int) w && mlen >= 8) {
339 * Do as much of the checksum as possible 32 bits at at time.
340 * In fact, this loop is unrolled to make overhead from
344 while ((mlen -= 32) >= 0) {
346 * Add with carry 16 words and fold in the last
347 * carry by adding a 0 with carry.
349 * The early ADD(16) and the LOAD(32) are to load
350 * the next 2 cache lines in advance on 486's. The
351 * 486 has a penalty of 2 clock cycles for loading
352 * a cache line, plus whatever time the external
353 * memory takes to load the first word(s) addressed.
354 * These penalties are unavoidable. Subsequent
355 * accesses to a cache line being loaded (and to
356 * other external memory?) are delayed until the
357 * whole load finishes. These penalties are mostly
358 * avoided by not accessing external memory for
359 * 8 cycles after the ADD(16) and 12 cycles after
360 * the LOAD(32). The loop terminates when mlen
361 * is initially 33 (not 32) to guaranteed that
362 * the LOAD(32) is within bounds.
406 if (mlen == 0 && byte_swapped == 0)
407 continue; /* worth 1% maybe ?? */
409 while ((mlen -= 2) >= 0) {
416 su.c[1] = *(char *)w;
421 } else if (mlen == -1)
423 * This mbuf has odd number of bytes.
424 * There could be a word split betwen
425 * this mbuf and the next mbuf.
426 * Save the last byte (to prepend to next mbuf).
428 su.c[0] = *(char *)w;
432 printf("%s: out of data by %d\n", __func__, len);
434 /* The last mbuf has odd # of bytes. Follow the
435 standard (the odd byte is shifted left by 8 bits) */
440 return (~sum & 0xffff);
444 * This is the exact same algorithm as above with a few exceptions:
445 * (1) it is designed to operate on buffers, not mbufs
446 * (2) it returns an intermediate form of the sum which has to be
447 * explicitly finalized (but this can be delayed)
448 * (3) it accepts an intermediate sum
450 * This is particularly useful when building packets quickly,
451 * since one can compute the checksum of the pseudoheader ahead of
452 * time and then use this function to complete the work. That way,
453 * the pseudoheader never actually has to exist in the packet buffer,
454 * which avoids needless duplication of work.
457 in_cksum_partial(psum, w, len)
462 register in_psum_t sum = psum;
463 int byte_swapped = 0;
464 union { char c[2]; u_short s; } su;
467 * Force to long boundary so we do longword aligned
472 if ((1 & (int) w) && (len > 0)) {
474 su.c[0] = *(const char *)w;
475 w = (const u_short *)((const char *)w + 1);
479 if ((2 & (int) w) && (len >= 2)) {
485 * Advance to a 486 cache line boundary.
487 if (4 & (int) w && len >= 4) {
493 if (8 & (int) w && len >= 8) {
501 * Do as much of the checksum as possible 32 bits at at time.
502 * In fact, this loop is unrolled to make overhead from
506 while ((len -= 32) >= 0) {
508 * Add with carry 16 words and fold in the last
509 * carry by adding a 0 with carry.
511 * The early ADD(16) and the LOAD(32) are to load
512 * the next 2 cache lines in advance on 486's. The
513 * 486 has a penalty of 2 clock cycles for loading
514 * a cache line, plus whatever time the external
515 * memory takes to load the first word(s) addressed.
516 * These penalties are unavoidable. Subsequent
517 * accesses to a cache line being loaded (and to
518 * other external memory?) are delayed until the
519 * whole load finishes. These penalties are mostly
520 * avoided by not accessing external memory for
521 * 8 cycles after the ADD(16) and 12 cycles after
522 * the LOAD(32). The loop terminates when len
523 * is initially 33 (not 32) to guaranteed that
524 * the LOAD(32) is within bounds.
568 if (len == 0 && byte_swapped == 0)
571 while ((len -= 2) >= 0) {
578 su.c[1] = *(const char *)w;
583 } else if (len == -1) {
585 * This buffer has odd number of bytes.
586 * There could be a word split betwen
587 * this buffer and the next.
589 su.c[0] = *(const char *)w;
593 /* The last buffer has odd # of bytes. Follow the
594 standard (the odd byte is shifted left by 8 bits) */
602 in_cksum_finalize(psum)
605 in_psum_t sum = psum;
607 return (~sum & 0xffff);