Initial import of binutils 2.22 on the new vendor branch
[dragonfly.git] / sys / dev / netif / mxge / mxge_lro.c
1 /******************************************************************************
2
3 Copyright (c) 2007-2008, Myricom Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11
12  2. Neither the name of the Myricom Inc, nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 $FreeBSD: src/sys/dev/mxge/mxge_lro.c,v 1.8 2009/06/23 17:42:06 gallatin Exp $
29
30 ***************************************************************************/
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/endian.h>
35 #include <sys/mbuf.h>
36 #include <sys/kernel.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/bus.h>
40
41 #include <net/if.h>
42 #include <net/ethernet.h>
43 #include <net/if_media.h>
44
45 #include <netinet/in_systm.h>
46 #include <netinet/in.h>
47 #include <netinet/ip.h>
48 #include <netinet/tcp.h>
49
50 #include <machine/bus.h>
51 #include <machine/in_cksum.h>
52
53 #include <dev/netif/mxge/mxge_mcp.h>
54 #include <dev/netif/mxge/if_mxge_var.h>
55
56 #include "opt_inet.h"
57
58 #ifdef INET
59
60 /* Assume len is a multiple of 4 */
61 static uint16_t
62 mxge_csum_generic(uint16_t *raw, int len)
63 {
64         uint32_t csum;
65         csum = 0;
66         while (len > 0) {
67                 csum += *raw;
68                 raw++;
69                 csum += *raw;
70                 raw++;
71                 len -= 4;
72         }
73         csum = (csum >> 16) + (csum & 0xffff);
74         csum = (csum >> 16) + (csum & 0xffff);
75         return (uint16_t)csum;
76 }
77
78
79 void
80 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
81 {
82         mxge_softc_t *mgp = ss->sc;
83         struct ifnet *ifp;
84         struct ip *ip;
85         struct tcphdr *tcp;
86         uint32_t *ts_ptr;
87         uint32_t tcplen, tcp_csum;
88
89         if (lro->append_cnt) {
90                 /* incorporate the new len into the ip header and
91                  * re-calculate the checksum */
92                 ip = lro->ip;
93                 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
94                 ip->ip_sum = 0;
95                 ip->ip_sum = 0xffff ^ 
96                         mxge_csum_generic((uint16_t*)ip,
97                                               sizeof (*ip));
98
99                 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
100                         CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
101                 lro->m_head->m_pkthdr.csum_data = 0xffff;
102                 lro->m_head->m_pkthdr.len = lro->len;
103
104                 /* incorporate the latest ack into the tcp header */
105                 tcp = (struct tcphdr *) (ip + 1);
106                 tcp->th_ack = lro->ack_seq;
107                 tcp->th_win = lro->window;
108                 /* incorporate latest timestamp into the tcp header */
109                 if (lro->timestamp) {
110                         ts_ptr = (uint32_t *)(tcp + 1);
111                         ts_ptr[1] = htonl(lro->tsval);
112                         ts_ptr[2] = lro->tsecr;
113                 }
114                 /* 
115                  * update checksum in tcp header by re-calculating the
116                  * tcp pseudoheader checksum, and adding it to the checksum
117                  * of the tcp payload data 
118                  */
119                 tcp->th_sum = 0;
120                 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
121                 tcp_csum = lro->data_csum;
122                 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
123                                       htons(tcplen + IPPROTO_TCP));
124                 tcp_csum += mxge_csum_generic((uint16_t*)tcp,
125                                                   tcp->th_off << 2);
126                 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
127                 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
128 #if 0
129                 IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n", 
130                       in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
131                                 htons(tcplen + IPPROTO_TCP)),
132                       mxge_csum_generic((uint16_t*)tcp,
133                                             tcp->th_off << 2),
134                       htons(0xffff ^ tcp_csum));
135 #endif
136                 tcp->th_sum = 0xffff ^ tcp_csum;
137         }
138         ifp = mgp->ifp;
139         (*ifp->if_input)(mgp->ifp, lro->m_head);
140         ss->lro_queued += lro->append_cnt + 1;
141         ss->lro_flushed++;
142         lro->m_head = NULL;
143         lro->timestamp = 0;
144         lro->append_cnt = 0;
145         SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
146 }
147
148 int
149 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
150 {
151         struct ether_header *eh;
152         struct ip *ip;
153         struct tcphdr *tcp;
154         uint32_t *ts_ptr;
155         struct mbuf *m_nxt, *m_tail;
156         struct lro_entry *lro;
157         int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
158         int opt_bytes, trim;
159         uint32_t seq, tmp_csum, device_mtu;
160
161         eh = mtod(m_head, struct ether_header *);
162         if (eh->ether_type != htons(ETHERTYPE_IP))
163                 return 1;
164         ip = (struct ip *) (eh + 1);
165         if (ip->ip_p != IPPROTO_TCP)
166                 return 1;
167         
168         /* ensure there are no options */
169         if ((ip->ip_hl << 2) != sizeof (*ip))
170                 return -1;
171
172         /* .. and the packet is not fragmented */
173         if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
174                 return -1;
175
176         /* verify that the IP header checksum is correct */
177         tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
178         if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
179                 ss->lro_bad_csum++;
180                 return -1;
181         }
182
183         /* find the TCP header */
184         tcp = (struct tcphdr *) (ip + 1);
185
186         /* ensure no bits set besides ack or psh */
187         if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
188                 return -1;
189
190         /* check for timestamps. Since the only option we handle are
191            timestamps, we only have to handle the simple case of
192            aligned timestamps */
193
194         opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
195         tcp_hdr_len =  sizeof (*tcp) + opt_bytes;
196         ts_ptr = (uint32_t *)(tcp + 1);
197         if (opt_bytes != 0) {
198                 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
199                     (*ts_ptr !=  ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
200                         return -1;
201         }
202
203         ip_len = ntohs(ip->ip_len);
204         tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
205         
206
207         /* 
208          * If frame is padded beyond the end of the IP packet,
209          * then we must trim the extra bytes off the end.
210          */
211         tot_len = m_head->m_pkthdr.len;
212         trim = tot_len - (ip_len + ETHER_HDR_LEN);
213         if (trim != 0) {
214                 if (trim < 0) {
215                         /* truncated packet */
216                         return -1;
217                 }
218                 m_adj(m_head, -trim);
219                 tot_len = m_head->m_pkthdr.len;
220         }
221
222         m_nxt = m_head;
223         m_tail = NULL; /* -Wuninitialized */
224         while (m_nxt != NULL) {
225                 m_tail = m_nxt;
226                 m_nxt = m_tail->m_next;
227         }
228
229         hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
230         seq = ntohl(tcp->th_seq);
231
232         SLIST_FOREACH(lro, &ss->lro_active, next) {
233                 if (lro->source_port == tcp->th_sport && 
234                     lro->dest_port == tcp->th_dport &&
235                     lro->source_ip == ip->ip_src.s_addr && 
236                     lro->dest_ip == ip->ip_dst.s_addr) {
237                         /* Try to append it */
238
239                         if (__predict_false(seq != lro->next_seq)) {
240                                 /* out of order packet */
241                                 SLIST_REMOVE(&ss->lro_active, lro,
242                                              lro_entry, next);
243                                 mxge_lro_flush(ss, lro);
244                                 return -1;
245                         }
246
247                         if (opt_bytes) {
248                                 uint32_t tsval = ntohl(*(ts_ptr + 1));
249                                 /* make sure timestamp values are increasing */
250                                 if (__predict_false(lro->tsval > tsval || 
251                                              *(ts_ptr + 2) == 0)) {
252                                         return -1;
253                                 }
254                                 lro->tsval = tsval;
255                                 lro->tsecr = *(ts_ptr + 2);
256                         }
257
258                         lro->next_seq += tcp_data_len;
259                         lro->ack_seq = tcp->th_ack;
260                         lro->window = tcp->th_win;
261                         lro->append_cnt++;
262                         if (tcp_data_len == 0) {
263                                 m_freem(m_head);
264                                 return 0;
265                         }
266                         /* subtract off the checksum of the tcp header
267                          * from the hardware checksum, and add it to the
268                          * stored tcp data checksum.  Byteswap the checksum
269                          * if the total length so far is odd 
270                          */
271                         tmp_csum = mxge_csum_generic((uint16_t*)tcp,
272                                                          tcp_hdr_len);
273                         csum = csum + (tmp_csum ^ 0xffff);
274                         csum = (csum & 0xffff) + (csum >> 16);
275                         csum = (csum & 0xffff) + (csum >> 16);
276                         if (lro->len & 0x1) {
277                                 /* Odd number of bytes so far, flip bytes */
278                                 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
279                         }
280                         csum = csum + lro->data_csum;
281                         csum = (csum & 0xffff) + (csum >> 16);
282                         csum = (csum & 0xffff) + (csum >> 16);
283                         lro->data_csum = csum;
284
285                         lro->len += tcp_data_len;
286
287                         /* adjust mbuf so that m->m_data points to
288                            the first byte of the payload */
289                         m_adj(m_head, hlen);
290                         /* append mbuf chain */
291                         lro->m_tail->m_next = m_head;
292                         /* advance the last pointer */
293                         lro->m_tail = m_tail;
294                         /* flush packet if required */
295                         device_mtu = ss->sc->ifp->if_mtu;
296                         if (lro->len > (65535 - device_mtu)) {
297                                 SLIST_REMOVE(&ss->lro_active, lro,
298                                              lro_entry, next);
299                                 mxge_lro_flush(ss, lro);
300                         }
301                         return 0;
302                 }
303         }
304
305         if (SLIST_EMPTY(&ss->lro_free))
306             return -1;
307
308         /* start a new chain */
309         lro = SLIST_FIRST(&ss->lro_free);
310         SLIST_REMOVE_HEAD(&ss->lro_free, next);
311         SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
312         lro->source_port = tcp->th_sport;
313         lro->dest_port = tcp->th_dport;
314         lro->source_ip = ip->ip_src.s_addr;
315         lro->dest_ip = ip->ip_dst.s_addr;
316         lro->next_seq = seq + tcp_data_len;
317         lro->mss = tcp_data_len;
318         lro->ack_seq = tcp->th_ack;
319         lro->window = tcp->th_win;
320
321         /* save the checksum of just the TCP payload by
322          * subtracting off the checksum of the TCP header from
323          * the entire hardware checksum 
324          * Since IP header checksum is correct, checksum over
325          * the IP header is -0.  Substracting -0 is unnecessary.
326          */
327         tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len);
328         csum = csum + (tmp_csum ^ 0xffff);
329         csum = (csum & 0xffff) + (csum >> 16);
330         csum = (csum & 0xffff) + (csum >> 16);
331         lro->data_csum = csum;
332         
333         lro->ip = ip;
334         /* record timestamp if it is present */
335         if (opt_bytes) {
336                 lro->timestamp = 1;
337                 lro->tsval = ntohl(*(ts_ptr + 1));
338                 lro->tsecr = *(ts_ptr + 2);
339         }
340         lro->len = tot_len;
341         lro->m_head = m_head;
342         lro->m_tail = m_tail;
343         return 0;
344 }
345
346 #endif /* INET */
347 /*
348   This file uses Myri10GE driver indentation.
349
350   Local Variables:
351   c-file-style:"linux"
352   tab-width:8
353   End:
354 */