gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*-
	2	* Copyright (c) 1990 The Regents of the University of California.
	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	* 1. Redistributions of source code must retain the above copyright
	9	* notice, this list of conditions and the following disclaimer.
	10	* 2. Redistributions in binary form must reproduce the above copyright
	11	* notice, this list of conditions and the following disclaimer in the
	12	* documentation and/or other materials provided with the distribution.
	13	* 3. All advertising materials mentioning features or use of this software
	14	* must display the following acknowledgement:
	15	* This product includes software developed by the University of
	16	* California, Berkeley and its contributors.
	17	* 4. Neither the name of the University nor the names of its contributors
	18	* may be used to endorse or promote products derived from this software
	19	* without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	22	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	24	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	25	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	26	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	27	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	28	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	29	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	30	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	31	* SUCH DAMAGE.
	32	*
	33	* from tahoe: in_cksum.c 1.2 86/01/05
	34	* from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91
	35	* $FreeBSD: src/sys/i386/i386/in_cksum.c,v 1.17.2.3 2002/07/02 04:03:00 jdp Exp $
	36	* $DragonFly: src/sys/i386/i386/Attic/in_cksum.c,v 1.3 2003/07/26 19:07:47 rob Exp $
	37	*/
	38
	39	#include <sys/param.h>
	40	#include <sys/systm.h>
	41	#include <sys/mbuf.h>
	42
	43	#include <netinet/in.h>
	44	#include <netinet/in_systm.h>
	45	#include <netinet/ip.h>
	46
	47	#include <machine/in_cksum.h>
	48
	49	/*
	50	* Checksum routine for Internet Protocol family headers.
	51	*
	52	* This routine is very heavily used in the network
	53	* code and should be modified for each CPU to be as fast as possible.
	54	*
	55	* This implementation is 386 version.
	56	*/
	57
	58	#undef ADDCARRY
	59	#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff
	60	#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
	61
	62	/*
	63	* These asm statements require __volatile because they pass information
	64	* via the condition codes. GCC does not currently provide a way to specify
	65	* the condition codes as an input or output operand.
	66	*
	67	* The LOAD macro below is effectively a prefetch into cache. GCC will
	68	* load the value into a register but will not use it. Since modern CPUs
	69	* reorder operations, this will generally take place in parallel with
	70	* other calculations.
	71	*/
	72	#define ADD(n) __asm __volatile \
	73	("addl %1, %0" : "+r" (sum) : \
	74	"g" (((const u_int32_t *)w)[n / 4]))
	75	#define ADDC(n) __asm __volatile \
	76	("adcl %1, %0" : "+r" (sum) : \
	77	"g" (((const u_int32_t *)w)[n / 4]))
	78	#define LOAD(n) __asm __volatile \
	79	("" : : "r" (((const u_int32_t *)w)[n / 4]))
	80	#define MOP __asm __volatile \
	81	("adcl $0, %0" : "+r" (sum))
	82
	83	int
	84	in_cksum(m, len)
	85	struct mbuf *m;
	86	int len;
	87	{
	88	u_short *w;
	89	unsigned sum = 0;
	90	int mlen = 0;
	91	int byte_swapped = 0;
	92	union { char c[2]; u_short s; } su;
	93
	94	for (;m && len; m = m->m_next) {
	95	if (m->m_len == 0)
	96	continue;
	97	w = mtod(m, u_short *);
	98	if (mlen == -1) {
	99	/*
	100	* The first byte of this mbuf is the continuation
	101	* of a word spanning between this mbuf and the
	102	* last mbuf.
	103	*/
	104
	105	/* su.c[0] is already saved when scanning previous
	106	* mbuf. sum was REDUCEd when we found mlen == -1
	107	*/
	108	su.c[1] = (u_char )w;
	109	sum += su.s;
	110	w = (u_short )((char )w + 1);
	111	mlen = m->m_len - 1;
	112	len--;
	113	} else
	114	mlen = m->m_len;
	115	if (len < mlen)
	116	mlen = len;
	117	len -= mlen;
	118	/*
	119	* Force to long boundary so we do longword aligned
	120	* memory operations
	121	*/
	122	if (3 & (int) w) {
	123	REDUCE;
	124	if ((1 & (int) w) && (mlen > 0)) {
	125	sum <<= 8;
	126	su.c[0] = (char )w;
	127	w = (u_short )((char )w + 1);
	128	mlen--;
	129	byte_swapped = 1;
	130	}
	131	if ((2 & (int) w) && (mlen >= 2)) {
	132	sum += *w++;
	133	mlen -= 2;
	134	}
	135	}
	136	/*
	137	* Advance to a 486 cache line boundary.
	138	*/
	139	if (4 & (int) w && mlen >= 4) {
	140	ADD(0);
	141	MOP;
	142	w += 2;
	143	mlen -= 4;
	144	}
	145	if (8 & (int) w && mlen >= 8) {
	146	ADD(0);
	147	ADDC(4);
	148	MOP;
	149	w += 4;
	150	mlen -= 8;
	151	}
	152	/*
	153	* Do as much of the checksum as possible 32 bits at at time.
	154	* In fact, this loop is unrolled to make overhead from
	155	* branches &c small.
	156	*/
	157	mlen -= 1;
	158	while ((mlen -= 32) >= 0) {
	159	/*
	160	* Add with carry 16 words and fold in the last
	161	* carry by adding a 0 with carry.
	162	*
	163	* The early ADD(16) and the LOAD(32) are to load
	164	* the next 2 cache lines in advance on 486's. The
	165	* 486 has a penalty of 2 clock cycles for loading
	166	* a cache line, plus whatever time the external
	167	* memory takes to load the first word(s) addressed.
	168	* These penalties are unavoidable. Subsequent
	169	* accesses to a cache line being loaded (and to
	170	* other external memory?) are delayed until the
	171	* whole load finishes. These penalties are mostly
	172	* avoided by not accessing external memory for
	173	* 8 cycles after the ADD(16) and 12 cycles after
	174	* the LOAD(32). The loop terminates when mlen
	175	* is initially 33 (not 32) to guaranteed that
	176	* the LOAD(32) is within bounds.
	177	*/
	178	ADD(16);
	179	ADDC(0);
	180	ADDC(4);
	181	ADDC(8);
	182	ADDC(12);
	183	LOAD(32);
	184	ADDC(20);
	185	ADDC(24);
	186	ADDC(28);
	187	MOP;
	188	w += 16;
	189	}
	190	mlen += 32 + 1;
	191	if (mlen >= 32) {
	192	ADD(16);
	193	ADDC(0);
	194	ADDC(4);
	195	ADDC(8);
	196	ADDC(12);
	197	ADDC(20);
	198	ADDC(24);
	199	ADDC(28);
	200	MOP;
	201	w += 16;
	202	mlen -= 32;
	203	}
	204	if (mlen >= 16) {
	205	ADD(0);
	206	ADDC(4);
	207	ADDC(8);
	208	ADDC(12);
	209	MOP;
	210	w += 8;
	211	mlen -= 16;
	212	}
	213	if (mlen >= 8) {
	214	ADD(0);
	215	ADDC(4);
	216	MOP;
	217	w += 4;
	218	mlen -= 8;
	219	}
	220	if (mlen == 0 && byte_swapped == 0)
	221	continue; /* worth 1% maybe ?? */
	222	REDUCE;
	223	while ((mlen -= 2) >= 0) {
	224	sum += *w++;
	225	}
	226	if (byte_swapped) {
	227	sum <<= 8;
	228	byte_swapped = 0;
	229	if (mlen == -1) {
	230	su.c[1] = (char )w;
	231	sum += su.s;
	232	mlen = 0;
	233	} else
	234	mlen = -1;
	235	} else if (mlen == -1)
	236	/*
	237	* This mbuf has odd number of bytes.
	238	* There could be a word split betwen
	239	* this mbuf and the next mbuf.
	240	* Save the last byte (to prepend to next mbuf).
	241	*/
	242	su.c[0] = (char )w;
	243	}
	244
	245	if (len)
	246	printf("%s: out of data by %d\n", __func__, len);
	247	if (mlen == -1) {
	248	/* The last mbuf has odd # of bytes. Follow the
	249	standard (the odd byte is shifted left by 8 bits) */
	250	su.c[1] = 0;
	251	sum += su.s;
	252	}
	253	REDUCE;
	254	return (~sum & 0xffff);
	255	}
	256
	257	u_short
	258	in_cksum_skip(m, len, skip)
	259	struct mbuf *m;
	260	int len;
	261	int skip;
	262	{
	263	u_short *w;
	264	unsigned sum = 0;
	265	int mlen = 0;
	266	int byte_swapped = 0;
	267	union { char c[2]; u_short s; } su;
	268
	269	len -= skip;
	270	for (; skip && m; m = m->m_next) {
	271	if (m->m_len > skip) {
	272	mlen = m->m_len - skip;
	273	w = (u_short )(mtod(m, u_char ) + skip);
	274	goto skip_start;
	275	} else {
	276	skip -= m->m_len;
	277	}
	278	}
	279
	280	for (;m && len; m = m->m_next) {
	281	if (m->m_len == 0)
	282	continue;
	283	w = mtod(m, u_short *);
	284	if (mlen == -1) {
	285	/*
	286	* The first byte of this mbuf is the continuation
	287	* of a word spanning between this mbuf and the
	288	* last mbuf.
	289	*/
	290
	291	/* su.c[0] is already saved when scanning previous
	292	* mbuf. sum was REDUCEd when we found mlen == -1
	293	*/
	294	su.c[1] = (u_char )w;
	295	sum += su.s;
	296	w = (u_short )((char )w + 1);
	297	mlen = m->m_len - 1;
	298	len--;
	299	} else
	300	mlen = m->m_len;
	301	skip_start:
	302	if (len < mlen)
	303	mlen = len;
	304	len -= mlen;
	305	/*
	306	* Force to long boundary so we do longword aligned
	307	* memory operations
	308	*/
	309	if (3 & (int) w) {
	310	REDUCE;
	311	if ((1 & (int) w) && (mlen > 0)) {
	312	sum <<= 8;
	313	su.c[0] = (char )w;
	314	w = (u_short )((char )w + 1);
	315	mlen--;
	316	byte_swapped = 1;
	317	}
	318	if ((2 & (int) w) && (mlen >= 2)) {
	319	sum += *w++;
	320	mlen -= 2;
	321	}
	322	}
	323	/*
	324	* Advance to a 486 cache line boundary.
	325	*/
	326	if (4 & (int) w && mlen >= 4) {
	327	ADD(0);
	328	MOP;
	329	w += 2;
	330	mlen -= 4;
	331	}
	332	if (8 & (int) w && mlen >= 8) {
	333	ADD(0);
	334	ADDC(4);
	335	MOP;
	336	w += 4;
	337	mlen -= 8;
	338	}
	339	/*
	340	* Do as much of the checksum as possible 32 bits at at time.
	341	* In fact, this loop is unrolled to make overhead from
	342	* branches &c small.
	343	*/
	344	mlen -= 1;
	345	while ((mlen -= 32) >= 0) {
	346	/*
	347	* Add with carry 16 words and fold in the last
	348	* carry by adding a 0 with carry.
	349	*
	350	* The early ADD(16) and the LOAD(32) are to load
	351	* the next 2 cache lines in advance on 486's. The
	352	* 486 has a penalty of 2 clock cycles for loading
	353	* a cache line, plus whatever time the external
	354	* memory takes to load the first word(s) addressed.
	355	* These penalties are unavoidable. Subsequent
	356	* accesses to a cache line being loaded (and to
	357	* other external memory?) are delayed until the
	358	* whole load finishes. These penalties are mostly
	359	* avoided by not accessing external memory for
	360	* 8 cycles after the ADD(16) and 12 cycles after
	361	* the LOAD(32). The loop terminates when mlen
	362	* is initially 33 (not 32) to guaranteed that
	363	* the LOAD(32) is within bounds.
	364	*/
	365	ADD(16);
	366	ADDC(0);
	367	ADDC(4);
	368	ADDC(8);
	369	ADDC(12);
	370	LOAD(32);
	371	ADDC(20);
	372	ADDC(24);
	373	ADDC(28);
	374	MOP;
	375	w += 16;
	376	}
	377	mlen += 32 + 1;
	378	if (mlen >= 32) {
	379	ADD(16);
	380	ADDC(0);
	381	ADDC(4);
	382	ADDC(8);
	383	ADDC(12);
	384	ADDC(20);
	385	ADDC(24);
	386	ADDC(28);
	387	MOP;
	388	w += 16;
	389	mlen -= 32;
	390	}
	391	if (mlen >= 16) {
	392	ADD(0);
	393	ADDC(4);
	394	ADDC(8);
	395	ADDC(12);
	396	MOP;
	397	w += 8;
	398	mlen -= 16;
	399	}
	400	if (mlen >= 8) {
	401	ADD(0);
	402	ADDC(4);
	403	MOP;
	404	w += 4;
	405	mlen -= 8;
	406	}
	407	if (mlen == 0 && byte_swapped == 0)
	408	continue; /* worth 1% maybe ?? */
	409	REDUCE;
	410	while ((mlen -= 2) >= 0) {
	411	sum += *w++;
	412	}
	413	if (byte_swapped) {
	414	sum <<= 8;
	415	byte_swapped = 0;
	416	if (mlen == -1) {
	417	su.c[1] = (char )w;
	418	sum += su.s;
	419	mlen = 0;
	420	} else
	421	mlen = -1;
	422	} else if (mlen == -1)
	423	/*
	424	* This mbuf has odd number of bytes.
	425	* There could be a word split betwen
	426	* this mbuf and the next mbuf.
	427	* Save the last byte (to prepend to next mbuf).
	428	*/
	429	su.c[0] = (char )w;
	430	}
	431
	432	if (len)
	433	printf("%s: out of data by %d\n", __func__, len);
	434	if (mlen == -1) {
	435	/* The last mbuf has odd # of bytes. Follow the
	436	standard (the odd byte is shifted left by 8 bits) */
	437	su.c[1] = 0;
	438	sum += su.s;
	439	}
	440	REDUCE;
	441	return (~sum & 0xffff);
	442	}
	443
	444	/*
	445	* This is the exact same algorithm as above with a few exceptions:
	446	* (1) it is designed to operate on buffers, not mbufs
	447	* (2) it returns an intermediate form of the sum which has to be
	448	* explicitly finalized (but this can be delayed)
	449	* (3) it accepts an intermediate sum
	450	*
	451	* This is particularly useful when building packets quickly,
	452	* since one can compute the checksum of the pseudoheader ahead of
	453	* time and then use this function to complete the work. That way,
	454	* the pseudoheader never actually has to exist in the packet buffer,
	455	* which avoids needless duplication of work.
	456	*/
	457	in_psum_t
	458	in_cksum_partial(psum, w, len)
	459	in_psum_t psum;
	460	const u_short *w;
	461	int len;
	462	{
	463	in_psum_t sum = psum;
	464	int byte_swapped = 0;
	465	union { char c[2]; u_short s; } su;
	466
	467	/*
	468	* Force to long boundary so we do longword aligned
	469	* memory operations
	470	*/
	471	if (3 & (int) w) {
	472	REDUCE;
	473	if ((1 & (int) w) && (len > 0)) {
	474	sum <<= 8;
	475	su.c[0] = (const char )w;
	476	w = (const u_short )((const char )w + 1);
	477	len--;
	478	byte_swapped = 1;
	479	}
	480	if ((2 & (int) w) && (len >= 2)) {
	481	sum += *w++;
	482	len -= 2;
	483	}
	484	}
	485	/*
	486	* Advance to a 486 cache line boundary.
	487	*/
	488	if (4 & (int) w && len >= 4) {
	489	ADD(0);
	490	MOP;
	491	w += 2;
	492	len -= 4;
	493	}
	494	if (8 & (int) w && len >= 8) {
	495	ADD(0);
	496	ADDC(4);
	497	MOP;
	498	w += 4;
	499	len -= 8;
	500	}
	501	/*
	502	* Do as much of the checksum as possible 32 bits at at time.
	503	* In fact, this loop is unrolled to make overhead from
	504	* branches &c small.
	505	*/
	506	len -= 1;
	507	while ((len -= 32) >= 0) {
	508	/*
	509	* Add with carry 16 words and fold in the last
	510	* carry by adding a 0 with carry.
	511	*
	512	* The early ADD(16) and the LOAD(32) are to load
	513	* the next 2 cache lines in advance on 486's. The
	514	* 486 has a penalty of 2 clock cycles for loading
	515	* a cache line, plus whatever time the external
	516	* memory takes to load the first word(s) addressed.
	517	* These penalties are unavoidable. Subsequent
	518	* accesses to a cache line being loaded (and to
	519	* other external memory?) are delayed until the
	520	* whole load finishes. These penalties are mostly
	521	* avoided by not accessing external memory for
	522	* 8 cycles after the ADD(16) and 12 cycles after
	523	* the LOAD(32). The loop terminates when len
	524	* is initially 33 (not 32) to guaranteed that
	525	* the LOAD(32) is within bounds.
	526	*/
	527	ADD(16);
	528	ADDC(0);
	529	ADDC(4);
	530	ADDC(8);
	531	ADDC(12);
	532	LOAD(32);
	533	ADDC(20);
	534	ADDC(24);
	535	ADDC(28);
	536	MOP;
	537	w += 16;
	538	}
	539	len += 32 + 1;
	540	if (len >= 32) {
	541	ADD(16);
	542	ADDC(0);
	543	ADDC(4);
	544	ADDC(8);
	545	ADDC(12);
	546	ADDC(20);
	547	ADDC(24);
	548	ADDC(28);
	549	MOP;
	550	w += 16;
	551	len -= 32;
	552	}
	553	if (len >= 16) {
	554	ADD(0);
	555	ADDC(4);
	556	ADDC(8);
	557	ADDC(12);
	558	MOP;
	559	w += 8;
	560	len -= 16;
	561	}
	562	if (len >= 8) {
	563	ADD(0);
	564	ADDC(4);
	565	MOP;
	566	w += 4;
	567	len -= 8;
	568	}
	569	if (len == 0 && byte_swapped == 0)
	570	goto out;
	571	REDUCE;
	572	while ((len -= 2) >= 0) {
	573	sum += *w++;
	574	}
	575	if (byte_swapped) {
	576	sum <<= 8;
	577	byte_swapped = 0;
	578	if (len == -1) {
	579	su.c[1] = (const char )w;
	580	sum += su.s;
	581	len = 0;
	582	} else
	583	len = -1;
	584	} else if (len == -1) {
	585	/*
	586	* This buffer has odd number of bytes.
	587	* There could be a word split betwen
	588	* this buffer and the next.
	589	*/
	590	su.c[0] = (const char )w;
	591	}
	592	out:
	593	if (len == -1) {
	594	/* The last buffer has odd # of bytes. Follow the
	595	standard (the odd byte is shifted left by 8 bits) */
	596	su.c[1] = 0;
	597	sum += su.s;
	598	}
	599	return sum;
	600	}
	601
	602	int
	603	in_cksum_finalize(psum)
	604	in_psum_t psum;
	605	{
	606	in_psum_t sum = psum;
	607	REDUCE;
	608	return (~sum & 0xffff);
	609	}