ether: Add instrument to detect wrong hardware supplied hash
[dragonfly.git] / sys / net / if_ethersubr.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1982, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93
34 * $FreeBSD: src/sys/net/if_ethersubr.c,v 1.70.2.33 2003/04/28 15:45:53 archie Exp $
35 */
36
984263bc
MD
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#include "opt_ipx.h"
9b42cabe 40#include "opt_mpls.h"
984263bc 41#include "opt_netgraph.h"
0d16ba1d 42#include "opt_carp.h"
7c7f9646 43#include "opt_rss.h"
984263bc
MD
44
45#include <sys/param.h>
46#include <sys/systm.h>
68b67450 47#include <sys/globaldata.h>
984263bc 48#include <sys/kernel.h>
f3e0b5f0 49#include <sys/ktr.h>
8b3db995 50#include <sys/lock.h>
984263bc
MD
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
68b67450 53#include <sys/msgport.h>
984263bc
MD
54#include <sys/socket.h>
55#include <sys/sockio.h>
56#include <sys/sysctl.h>
68b67450 57#include <sys/thread.h>
684a93c4 58
68b67450 59#include <sys/thread2.h>
684a93c4 60#include <sys/mplock2.h>
984263bc
MD
61
62#include <net/if.h>
63#include <net/netisr.h>
64#include <net/route.h>
65#include <net/if_llc.h>
66#include <net/if_dl.h>
67#include <net/if_types.h>
4d723e5a 68#include <net/ifq_var.h>
984263bc
MD
69#include <net/bpf.h>
70#include <net/ethernet.h>
e6b5847c 71#include <net/vlan/if_vlan_ether.h>
5f60906c 72#include <net/vlan/if_vlan_var.h>
29bc1092 73#include <net/netmsg2.h>
984263bc
MD
74
75#if defined(INET) || defined(INET6)
76#include <netinet/in.h>
8697599b 77#include <netinet/ip_var.h>
5f60906c 78#include <netinet/tcp_var.h>
984263bc 79#include <netinet/if_ether.h>
4639df5f 80#include <netinet/ip_flow.h>
1f2de5d4
MD
81#include <net/ipfw/ip_fw.h>
82#include <net/dummynet/ip_dummynet.h>
984263bc
MD
83#endif
84#ifdef INET6
85#include <netinet6/nd6.h>
86#endif
87
0d16ba1d
MD
88#ifdef CARP
89#include <netinet/ip_carp.h>
90#endif
91
984263bc 92#ifdef IPX
d2438d69
MD
93#include <netproto/ipx/ipx.h>
94#include <netproto/ipx/ipx_if.h>
32211831 95int (*ef_inputp)(struct ifnet*, const struct ether_header *eh, struct mbuf *m);
f23061d4
JH
96int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst,
97 short *tp, int *hlen);
984263bc
MD
98#endif
99
9b42cabe
NA
100#ifdef MPLS
101#include <netproto/mpls/mpls.h>
102#endif
103
984263bc 104/* netgraph node hooks for ng_ether(4) */
601fa0f9 105void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
0147868e 106void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
984263bc
MD
107int (*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
108void (*ng_ether_attach_p)(struct ifnet *ifp);
109void (*ng_ether_detach_p)(struct ifnet *ifp);
110
50098e2e 111void (*vlan_input_p)(struct mbuf *);
3013ac0e 112
5fe66e68
JH
113static int ether_output(struct ifnet *, struct mbuf *, struct sockaddr *,
114 struct rtentry *);
9b77ea6e
SZ
115static void ether_restore_header(struct mbuf **, const struct ether_header *,
116 const struct ether_header *);
0e805566 117static int ether_characterize(struct mbuf **);
234d1daa 118static void ether_dispatch(int, struct mbuf *);
984263bc 119
f23061d4 120/*
a8d45119 121 * if_bridge support
f23061d4 122 */
db37145f 123struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
eb366364 124int (*bridge_output_p)(struct ifnet *, struct mbuf *);
db37145f 125void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
70d9a675 126struct ifnet *(*bridge_interface_p)(void *if_bridge);
db37145f 127
5fe66e68
JH
128static int ether_resolvemulti(struct ifnet *, struct sockaddr **,
129 struct sockaddr *);
130
131const uint8_t etherbroadcastaddr[ETHER_ADDR_LEN] = {
c401f0fd
JS
132 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
133};
134
5fe66e68 135#define gotoerr(e) do { error = (e); goto bad; } while (0)
f23061d4 136#define IFP2AC(ifp) ((struct arpcom *)(ifp))
984263bc 137
5fe66e68 138static boolean_t ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst,
7c5bb821 139 struct ip_fw **rule,
90ca9293 140 const struct ether_header *eh);
5fe66e68 141
984263bc 142static int ether_ipfw;
83c08199
SZ
143static u_long ether_restore_hdr;
144static u_long ether_prepend_hdr;
ebe4c2ae 145static u_long ether_input_wronghash;
70d9a675 146static int ether_debug;
9b77ea6e 147
7c7f9646 148#ifdef RSS_DEBUG
83c08199
SZ
149static u_long ether_pktinfo_try;
150static u_long ether_pktinfo_hit;
151static u_long ether_rss_nopi;
152static u_long ether_rss_nohash;
ebe4c2ae 153static u_long ether_input_requeue;
234d1daa
SZ
154static u_long ether_input_wronghwhash;
155static int ether_input_ckhash;
7c7f9646
SZ
156#endif
157
5fe66e68
JH
158SYSCTL_DECL(_net_link);
159SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
70d9a675 160SYSCTL_INT(_net_link_ether, OID_AUTO, debug, CTLFLAG_RW,
83c08199 161 &ether_debug, 0, "Ether debug");
5fe66e68 162SYSCTL_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
83c08199
SZ
163 &ether_ipfw, 0, "Pass ether pkts through firewall");
164SYSCTL_ULONG(_net_link_ether, OID_AUTO, restore_hdr, CTLFLAG_RW,
165 &ether_restore_hdr, 0, "# of ether header restoration");
166SYSCTL_ULONG(_net_link_ether, OID_AUTO, prepend_hdr, CTLFLAG_RW,
167 &ether_prepend_hdr, 0,
168 "# of ether header restoration which prepends mbuf");
ebe4c2ae
SZ
169SYSCTL_ULONG(_net_link_ether, OID_AUTO, input_wronghash, CTLFLAG_RW,
170 &ether_input_wronghash, 0, "# of input packets with wrong hash");
7c7f9646 171#ifdef RSS_DEBUG
83c08199
SZ
172SYSCTL_ULONG(_net_link_ether, OID_AUTO, rss_nopi, CTLFLAG_RW,
173 &ether_rss_nopi, 0, "# of packets do not have pktinfo");
174SYSCTL_ULONG(_net_link_ether, OID_AUTO, rss_nohash, CTLFLAG_RW,
175 &ether_rss_nohash, 0, "# of packets do not have hash");
176SYSCTL_ULONG(_net_link_ether, OID_AUTO, pktinfo_try, CTLFLAG_RW,
177 &ether_pktinfo_try, 0,
178 "# of tries to find packets' msgport using pktinfo");
179SYSCTL_ULONG(_net_link_ether, OID_AUTO, pktinfo_hit, CTLFLAG_RW,
180 &ether_pktinfo_hit, 0,
181 "# of packets whose msgport are found using pktinfo");
ebe4c2ae
SZ
182SYSCTL_ULONG(_net_link_ether, OID_AUTO, input_requeue, CTLFLAG_RW,
183 &ether_input_requeue, 0, "# of input packets gets requeued");
234d1daa
SZ
184SYSCTL_ULONG(_net_link_ether, OID_AUTO, input_wronghwhash, CTLFLAG_RW,
185 &ether_input_wronghwhash, 0, "# of input packets with wrong hw hash");
186SYSCTL_INT(_net_link_ether, OID_AUTO, always_ckhash, CTLFLAG_RW,
187 &ether_input_ckhash, 0, "always check hash");
7c7f9646 188#endif
984263bc 189
f3e0b5f0 190#define ETHER_KTR_STR "ifp=%p"
5bf48697 191#define ETHER_KTR_ARGS struct ifnet *ifp
f3e0b5f0
SZ
192#ifndef KTR_ETHERNET
193#define KTR_ETHERNET KTR_ALL
194#endif
195KTR_INFO_MASTER(ether);
eda7db08
SZ
196KTR_INFO(KTR_ETHERNET, ether, pkt_beg, 0, ETHER_KTR_STR, ETHER_KTR_ARGS);
197KTR_INFO(KTR_ETHERNET, ether, pkt_end, 1, ETHER_KTR_STR, ETHER_KTR_ARGS);
5bf48697
AE
198KTR_INFO(KTR_ETHERNET, ether, disp_beg, 2, ETHER_KTR_STR, ETHER_KTR_ARGS);
199KTR_INFO(KTR_ETHERNET, ether, disp_end, 3, ETHER_KTR_STR, ETHER_KTR_ARGS);
f3e0b5f0
SZ
200#define logether(name, arg) KTR_LOG(ether_ ## name, arg)
201
984263bc
MD
202/*
203 * Ethernet output routine.
204 * Encapsulate a packet of type family for the local net.
205 * Use trailer local net encapsulation if enough data in first
206 * packet leaves a multiple of 512 bytes of data in remainder.
207 * Assumes that ifp is actually pointer to arpcom structure.
208 */
3013ac0e
JS
209static int
210ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
f23061d4 211 struct rtentry *rt)
984263bc 212{
f23061d4
JH
213 struct ether_header *eh, *deh;
214 u_char *edst;
984263bc 215 int loop_copy = 0;
f23061d4 216 int hlen = ETHER_HDR_LEN; /* link layer header length */
984263bc 217 struct arpcom *ac = IFP2AC(ifp);
f23061d4 218 int error;
984263bc 219
2c9effcf 220 ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
57dff79c 221
3a593c54
MD
222 if (ifp->if_flags & IFF_MONITOR)
223 gotoerr(ENETDOWN);
f23061d4
JH
224 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING))
225 gotoerr(ENETDOWN);
226
227 M_PREPEND(m, sizeof(struct ether_header), MB_DONTWAIT);
228 if (m == NULL)
5fe66e68 229 return (ENOBUFS);
7df36335 230 m->m_pkthdr.csum_lhlen = sizeof(struct ether_header);
f23061d4
JH
231 eh = mtod(m, struct ether_header *);
232 edst = eh->ether_dhost;
233
5fe66e68
JH
234 /*
235 * Fill in the destination ethernet address and frame type.
236 */
984263bc
MD
237 switch (dst->sa_family) {
238#ifdef INET
239 case AF_INET:
f23061d4 240 if (!arpresolve(ifp, rt, m, dst, edst))
984263bc 241 return (0); /* if not yet resolved */
cb8d752c
NA
242#ifdef MPLS
243 if (m->m_flags & M_MPLSLABELED)
244 eh->ether_type = htons(ETHERTYPE_MPLS);
245 else
246#endif
247 eh->ether_type = htons(ETHERTYPE_IP);
984263bc
MD
248 break;
249#endif
250#ifdef INET6
251 case AF_INET6:
f23061d4 252 if (!nd6_storelladdr(&ac->ac_if, rt, m, dst, edst))
5fe66e68 253 return (0); /* Something bad happenned. */
f23061d4 254 eh->ether_type = htons(ETHERTYPE_IPV6);
984263bc
MD
255 break;
256#endif
257#ifdef IPX
258 case AF_IPX:
f23061d4 259 if (ef_outputp != NULL) {
ff54734e
SZ
260 /*
261 * Hold BGL and recheck ef_outputp
262 */
263 get_mplock();
264 if (ef_outputp != NULL) {
265 error = ef_outputp(ifp, &m, dst,
266 &eh->ether_type, &hlen);
267 rel_mplock();
268 if (error)
269 goto bad;
270 else
271 break;
272 }
273 rel_mplock();
f23061d4 274 }
ff54734e
SZ
275 eh->ether_type = htons(ETHERTYPE_IPX);
276 bcopy(&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
277 edst, ETHER_ADDR_LEN);
984263bc
MD
278 break;
279#endif
984263bc 280 case pseudo_AF_HDRCMPLT:
984263bc
MD
281 case AF_UNSPEC:
282 loop_copy = -1; /* if this is for us, don't do it */
f23061d4
JH
283 deh = (struct ether_header *)dst->sa_data;
284 memcpy(edst, deh->ether_dhost, ETHER_ADDR_LEN);
285 eh->ether_type = deh->ether_type;
984263bc
MD
286 break;
287
288 default:
8f0777ca 289 if_printf(ifp, "can't handle af%d\n", dst->sa_family);
f23061d4 290 gotoerr(EAFNOSUPPORT);
984263bc
MD
291 }
292
f23061d4
JH
293 if (dst->sa_family == pseudo_AF_HDRCMPLT) /* unlikely */
294 memcpy(eh->ether_shost,
295 ((struct ether_header *)dst->sa_data)->ether_shost,
296 ETHER_ADDR_LEN);
984263bc 297 else
f23061d4 298 memcpy(eh->ether_shost, ac->ac_enaddr, ETHER_ADDR_LEN);
984263bc
MD
299
300 /*
db37145f
SS
301 * Bridges require special output handling.
302 */
303 if (ifp->if_bridge) {
8f0777ca
SZ
304 KASSERT(bridge_output_p != NULL,
305 ("%s: if_bridge not loaded!", __func__));
ad8c8b44 306 return bridge_output_p(ifp, m);
db37145f
SS
307 }
308
309 /*
984263bc
MD
310 * If a simplex interface, and the packet is being sent to our
311 * Ethernet address or a broadcast address, loopback a copy.
312 * XXX To make a simplex device behave exactly like a duplex
313 * device, we should copy in the case of sending to our own
314 * ethernet address (thus letting the original actually appear
315 * on the wire). However, we don't do that here for security
316 * reasons and compatibility with the original behavior.
317 */
318 if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
319 int csum_flags = 0;
320
321 if (m->m_pkthdr.csum_flags & CSUM_IP)
f23061d4 322 csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
984263bc 323 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
f23061d4 324 csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
984263bc
MD
325 if ((m->m_flags & M_BCAST) || (loop_copy > 0)) {
326 struct mbuf *n;
327
f23061d4 328 if ((n = m_copypacket(m, MB_DONTWAIT)) != NULL) {
984263bc
MD
329 n->m_pkthdr.csum_flags |= csum_flags;
330 if (csum_flags & CSUM_DATA_VALID)
331 n->m_pkthdr.csum_data = 0xffff;
f23061d4 332 if_simloop(ifp, n, dst->sa_family, hlen);
984263bc
MD
333 } else
334 ifp->if_iqdrops++;
f23061d4
JH
335 } else if (bcmp(eh->ether_dhost, eh->ether_shost,
336 ETHER_ADDR_LEN) == 0) {
984263bc
MD
337 m->m_pkthdr.csum_flags |= csum_flags;
338 if (csum_flags & CSUM_DATA_VALID)
339 m->m_pkthdr.csum_data = 0xffff;
f23061d4 340 if_simloop(ifp, m, dst->sa_family, hlen);
984263bc
MD
341 return (0); /* XXX */
342 }
343 }
344
0d16ba1d 345#ifdef CARP
24c6e413
SZ
346 if (ifp->if_type == IFT_CARP) {
347 ifp = carp_parent(ifp);
bb05f5cd
SZ
348 if (ifp == NULL)
349 gotoerr(ENETUNREACH);
350
24c6e413
SZ
351 ac = IFP2AC(ifp);
352
ff54734e 353 /*
24c6e413 354 * Check precondition again
ff54734e 355 */
24c6e413
SZ
356 ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
357
358 if (ifp->if_flags & IFF_MONITOR)
359 gotoerr(ENETDOWN);
360 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
361 (IFF_UP | IFF_RUNNING))
362 gotoerr(ENETDOWN);
ff54734e 363 }
0d16ba1d 364#endif
0d16ba1d 365
984263bc
MD
366 /* Handle ng_ether(4) processing, if any */
367 if (ng_ether_output_p != NULL) {
ff54734e
SZ
368 /*
369 * Hold BGL and recheck ng_ether_output_p
370 */
371 get_mplock();
372 if (ng_ether_output_p != NULL) {
373 if ((error = ng_ether_output_p(ifp, &m)) != 0) {
374 rel_mplock();
375 goto bad;
376 }
377 if (m == NULL) {
378 rel_mplock();
379 return (0);
380 }
381 }
382 rel_mplock();
984263bc
MD
383 }
384
385 /* Continue with link-layer output */
386 return ether_output_frame(ifp, m);
0c3c561c
JH
387
388bad:
389 m_freem(m);
390 return (error);
984263bc
MD
391}
392
393/*
70d9a675
MD
394 * Returns the bridge interface an ifp is associated
395 * with.
396 *
397 * Only call if ifp->if_bridge != NULL.
398 */
399struct ifnet *
400ether_bridge_interface(struct ifnet *ifp)
401{
402 if (bridge_interface_p)
403 return(bridge_interface_p(ifp->if_bridge));
404 return (ifp);
405}
406
407/*
984263bc
MD
408 * Ethernet link layer output routine to send a raw frame to the device.
409 *
410 * This assumes that the 14 byte Ethernet header is present and contiguous
a8d45119 411 * in the first mbuf.
984263bc
MD
412 */
413int
f23061d4 414ether_output_frame(struct ifnet *ifp, struct mbuf *m)
984263bc 415{
f23061d4 416 struct ip_fw *rule = NULL;
984263bc 417 int error = 0;
4d723e5a 418 struct altq_pktattr pktattr;
984263bc 419
2c9effcf 420 ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
57dff79c 421
eb241549
SZ
422 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) {
423 struct m_tag *mtag;
424
425 /* Extract info from dummynet tag */
426 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL);
427 KKASSERT(mtag != NULL);
84a3e25a 428 rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv;
eb241549 429 KKASSERT(rule != NULL);
4c7020ad
SZ
430
431 m_tag_delete(m, mtag);
eb241549 432 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED;
5fe66e68 433 }
984263bc 434
4d723e5a
JS
435 if (ifq_is_enabled(&ifp->if_snd))
436 altq_etherclassify(&ifp->if_snd, m, &pktattr);
4986965b 437 crit_enter();
984263bc
MD
438 if (IPFW_LOADED && ether_ipfw != 0) {
439 struct ether_header save_eh, *eh;
440
441 eh = mtod(m, struct ether_header *);
442 save_eh = *eh;
443 m_adj(m, ETHER_HDR_LEN);
90ca9293 444 if (!ether_ipfw_chk(&m, ifp, &rule, eh)) {
4986965b 445 crit_exit();
5fe66e68 446 if (m != NULL) {
984263bc 447 m_freem(m);
f23061d4 448 return ENOBUFS; /* pkt dropped */
984263bc
MD
449 } else
450 return 0; /* consumed e.g. in a pipe */
451 }
9b77ea6e 452
984263bc 453 /* packet was ok, restore the ethernet header */
9b77ea6e
SZ
454 ether_restore_header(&m, eh, &save_eh);
455 if (m == NULL) {
456 crit_exit();
457 return ENOBUFS;
984263bc
MD
458 }
459 }
78195a76 460 crit_exit();
984263bc
MD
461
462 /*
463 * Queue message on interface, update output statistics if
464 * successful, and start output if interface not yet active.
465 */
9db4b353 466 error = ifq_dispatch(ifp, m, &pktattr);
984263bc
MD
467 return (error);
468}
469
470/*
471 * ipfw processing for ethernet packets (in and out).
5fe66e68 472 * The second parameter is NULL from ether_demux(), and ifp from
a8d45119 473 * ether_output_frame().
984263bc 474 */
5fe66e68 475static boolean_t
90ca9293
SZ
476ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, struct ip_fw **rule,
477 const struct ether_header *eh)
984263bc 478{
29b27cb7 479 struct ether_header save_eh = *eh; /* might be a ptr in *m0 */
984263bc 480 struct ip_fw_args args;
e5ecc832 481 struct m_tag *mtag;
29b27cb7 482 struct mbuf *m;
f23061d4 483 int i;
984263bc
MD
484
485 if (*rule != NULL && fw_one_pass)
5fe66e68 486 return TRUE; /* dummynet packet, already partially processed */
984263bc
MD
487
488 /*
90ca9293 489 * I need some amount of data to be contiguous.
984263bc 490 */
f23061d4 491 i = min((*m0)->m_pkthdr.len, max_protohdr);
90ca9293 492 if ((*m0)->m_len < i) {
984263bc
MD
493 *m0 = m_pullup(*m0, i);
494 if (*m0 == NULL)
5fe66e68 495 return FALSE;
984263bc
MD
496 }
497
5de23090
SZ
498 /*
499 * Clean up tags
500 */
e5ecc832
JS
501 if ((mtag = m_tag_find(*m0, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL)
502 m_tag_delete(*m0, mtag);
5de23090
SZ
503 if ((*m0)->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED) {
504 mtag = m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL);
505 KKASSERT(mtag != NULL);
506 m_tag_delete(*m0, mtag);
507 (*m0)->m_pkthdr.fw_flags &= ~IPFORWARD_MBUF_TAGGED;
508 }
509
510 args.m = *m0; /* the packet we are looking at */
511 args.oif = dst; /* destination, if any */
984263bc 512 args.rule = *rule; /* matching rule to restart */
984263bc
MD
513 args.eh = &save_eh; /* MAC header for bridged/MAC packets */
514 i = ip_fw_chk_ptr(&args);
515 *m0 = args.m;
516 *rule = args.rule;
517
29b27cb7 518 if (*m0 == NULL)
5fe66e68 519 return FALSE;
984263bc 520
29b27cb7
SZ
521 switch (i) {
522 case IP_FW_PASS:
5fe66e68 523 return TRUE;
984263bc 524
29b27cb7
SZ
525 case IP_FW_DIVERT:
526 case IP_FW_TEE:
527 case IP_FW_DENY:
984263bc 528 /*
29b27cb7
SZ
529 * XXX at some point add support for divert/forward actions.
530 * If none of the above matches, we have to drop the pkt.
984263bc 531 */
29b27cb7 532 return FALSE;
984263bc 533
29b27cb7
SZ
534 case IP_FW_DUMMYNET:
535 /*
536 * Pass the pkt to dummynet, which consumes it.
537 */
90ca9293
SZ
538 m = *m0; /* pass the original to dummynet */
539 *m0 = NULL; /* and nothing back to the caller */
9b77ea6e
SZ
540
541 ether_restore_header(&m, eh, &save_eh);
542 if (m == NULL)
543 return FALSE;
544
29b27cb7
SZ
545 ip_fw_dn_io_ptr(m, args.cookie,
546 dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args);
e4d4f9c3 547 ip_dn_queue(m);
5fe66e68 548 return FALSE;
29b27cb7
SZ
549
550 default:
ed20d0e3 551 panic("unknown ipfw return value: %d", i);
984263bc 552 }
984263bc
MD
553}
554
4853cd0f 555static void
68b67450
SZ
556ether_input(struct ifnet *ifp, struct mbuf *m)
557{
eda7db08 558 ether_input_pkt(ifp, m, NULL);
984263bc
MD
559}
560
561/*
562 * Perform common duties while attaching to interface list
563 */
564void
78195a76 565ether_ifattach(struct ifnet *ifp, uint8_t *lla, lwkt_serialize_t serializer)
984263bc 566{
78195a76
MD
567 ether_ifattach_bpf(ifp, lla, DLT_EN10MB, sizeof(struct ether_header),
568 serializer);
c0f6c904
JS
569}
570
571void
78195a76
MD
572ether_ifattach_bpf(struct ifnet *ifp, uint8_t *lla, u_int dlt, u_int hdrlen,
573 lwkt_serialize_t serializer)
c0f6c904 574{
82ed7fc2 575 struct sockaddr_dl *sdl;
984263bc
MD
576
577 ifp->if_type = IFT_ETHER;
c401f0fd 578 ifp->if_addrlen = ETHER_ADDR_LEN;
5fe66e68 579 ifp->if_hdrlen = ETHER_HDR_LEN;
78195a76 580 if_attach(ifp, serializer);
984263bc 581 ifp->if_mtu = ETHERMTU;
984263bc 582 if (ifp->if_baudrate == 0)
f23061d4 583 ifp->if_baudrate = 10000000;
5fe66e68 584 ifp->if_output = ether_output;
0b076e92 585 ifp->if_input = ether_input;
5fe66e68
JH
586 ifp->if_resolvemulti = ether_resolvemulti;
587 ifp->if_broadcastaddr = etherbroadcastaddr;
f2682cb9 588 sdl = IF_LLSOCKADDR(ifp);
984263bc
MD
589 sdl->sdl_type = IFT_ETHER;
590 sdl->sdl_alen = ifp->if_addrlen;
0a8b5977 591 bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
c568d5be
JS
592 /*
593 * XXX Keep the current drivers happy.
594 * XXX Remove once all drivers have been cleaned up
595 */
596 if (lla != IFP2AC(ifp)->ac_enaddr)
597 bcopy(lla, IFP2AC(ifp)->ac_enaddr, ifp->if_addrlen);
c0f6c904 598 bpfattach(ifp, dlt, hdrlen);
984263bc
MD
599 if (ng_ether_attach_p != NULL)
600 (*ng_ether_attach_p)(ifp);
267caeeb
JS
601
602 if_printf(ifp, "MAC address: %6D\n", lla, ":");
984263bc
MD
603}
604
605/*
606 * Perform common duties while detaching an Ethernet interface
607 */
608void
0a8b5977 609ether_ifdetach(struct ifnet *ifp)
984263bc 610{
45b8be9e 611 if_down(ifp);
45b8be9e 612
984263bc
MD
613 if (ng_ether_detach_p != NULL)
614 (*ng_ether_detach_p)(ifp);
0a8b5977 615 bpfdetach(ifp);
984263bc 616 if_detach(ifp);
984263bc
MD
617}
618
984263bc 619int
fecfec53 620ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
984263bc
MD
621{
622 struct ifaddr *ifa = (struct ifaddr *) data;
623 struct ifreq *ifr = (struct ifreq *) data;
624 int error = 0;
625
cbf2eda6
SZ
626#define IF_INIT(ifp) \
627do { \
628 if (((ifp)->if_flags & IFF_UP) == 0) { \
629 (ifp)->if_flags |= IFF_UP; \
630 (ifp)->if_init((ifp)->if_softc); \
631 } \
632} while (0)
633
2c9effcf 634 ASSERT_IFNET_SERIALIZED_ALL(ifp);
78195a76 635
984263bc
MD
636 switch (command) {
637 case SIOCSIFADDR:
984263bc
MD
638 switch (ifa->ifa_addr->sa_family) {
639#ifdef INET
640 case AF_INET:
cbf2eda6 641 IF_INIT(ifp); /* before arpwhohas */
984263bc
MD
642 arp_ifinit(ifp, ifa);
643 break;
644#endif
645#ifdef IPX
646 /*
647 * XXX - This code is probably wrong
648 */
649 case AF_IPX:
650 {
f23061d4 651 struct ipx_addr *ina = &IA_SIPX(ifa)->sipx_addr;
984263bc
MD
652 struct arpcom *ac = IFP2AC(ifp);
653
654 if (ipx_nullhost(*ina))
f23061d4
JH
655 ina->x_host = *(union ipx_host *) ac->ac_enaddr;
656 else
657 bcopy(ina->x_host.c_host, ac->ac_enaddr,
658 sizeof ac->ac_enaddr);
984263bc 659
cbf2eda6 660 IF_INIT(ifp); /* Set new address. */
984263bc
MD
661 break;
662 }
663#endif
984263bc 664 default:
cbf2eda6 665 IF_INIT(ifp);
984263bc
MD
666 break;
667 }
668 break;
669
670 case SIOCGIFADDR:
f23061d4
JH
671 bcopy(IFP2AC(ifp)->ac_enaddr,
672 ((struct sockaddr *)ifr->ifr_data)->sa_data,
673 ETHER_ADDR_LEN);
984263bc
MD
674 break;
675
676 case SIOCSIFMTU:
677 /*
678 * Set the interface MTU.
679 */
680 if (ifr->ifr_mtu > ETHERMTU) {
681 error = EINVAL;
682 } else {
683 ifp->if_mtu = ifr->ifr_mtu;
684 }
685 break;
c2d9fd91
JS
686 default:
687 error = EINVAL;
688 break;
984263bc
MD
689 }
690 return (error);
cbf2eda6
SZ
691
692#undef IF_INIT
984263bc
MD
693}
694
695int
f23061d4
JH
696ether_resolvemulti(
697 struct ifnet *ifp,
698 struct sockaddr **llsa,
699 struct sockaddr *sa)
984263bc
MD
700{
701 struct sockaddr_dl *sdl;
11ee5ab3 702#ifdef INET
984263bc 703 struct sockaddr_in *sin;
11ee5ab3 704#endif
984263bc
MD
705#ifdef INET6
706 struct sockaddr_in6 *sin6;
707#endif
708 u_char *e_addr;
709
710 switch(sa->sa_family) {
711 case AF_LINK:
712 /*
713 * No mapping needed. Just check that it's a valid MC address.
714 */
715 sdl = (struct sockaddr_dl *)sa;
716 e_addr = LLADDR(sdl);
717 if ((e_addr[0] & 1) != 1)
718 return EADDRNOTAVAIL;
4090d6ff 719 *llsa = NULL;
984263bc
MD
720 return 0;
721
722#ifdef INET
723 case AF_INET:
724 sin = (struct sockaddr_in *)sa;
725 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
726 return EADDRNOTAVAIL;
884717e1 727 sdl = kmalloc(sizeof *sdl, M_IFMADDR, M_WAITOK | M_ZERO);
984263bc
MD
728 sdl->sdl_len = sizeof *sdl;
729 sdl->sdl_family = AF_LINK;
730 sdl->sdl_index = ifp->if_index;
731 sdl->sdl_type = IFT_ETHER;
732 sdl->sdl_alen = ETHER_ADDR_LEN;
733 e_addr = LLADDR(sdl);
734 ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
735 *llsa = (struct sockaddr *)sdl;
736 return 0;
737#endif
738#ifdef INET6
739 case AF_INET6:
740 sin6 = (struct sockaddr_in6 *)sa;
741 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
742 /*
743 * An IP6 address of 0 means listen to all
744 * of the Ethernet multicast address used for IP6.
745 * (This is used for multicast routers.)
746 */
747 ifp->if_flags |= IFF_ALLMULTI;
4090d6ff 748 *llsa = NULL;
984263bc
MD
749 return 0;
750 }
751 if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
752 return EADDRNOTAVAIL;
884717e1 753 sdl = kmalloc(sizeof *sdl, M_IFMADDR, M_WAITOK | M_ZERO);
984263bc
MD
754 sdl->sdl_len = sizeof *sdl;
755 sdl->sdl_family = AF_LINK;
756 sdl->sdl_index = ifp->if_index;
757 sdl->sdl_type = IFT_ETHER;
758 sdl->sdl_alen = ETHER_ADDR_LEN;
759 e_addr = LLADDR(sdl);
760 ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
761 *llsa = (struct sockaddr *)sdl;
762 return 0;
763#endif
764
765 default:
766 /*
767 * Well, the text isn't quite right, but it's the name
768 * that counts...
769 */
770 return EAFNOSUPPORT;
771 }
772}
d6018c31
JS
773
774#if 0
775/*
776 * This is for reference. We have a table-driven version
777 * of the little-endian crc32 generator, which is faster
778 * than the double-loop.
779 */
780uint32_t
781ether_crc32_le(const uint8_t *buf, size_t len)
782{
783 uint32_t c, crc, carry;
784 size_t i, j;
785
786 crc = 0xffffffffU; /* initial value */
787
788 for (i = 0; i < len; i++) {
789 c = buf[i];
790 for (j = 0; j < 8; j++) {
791 carry = ((crc & 0x01) ? 1 : 0) ^ (c & 0x01);
792 crc >>= 1;
793 c >>= 1;
794 if (carry)
795 crc = (crc ^ ETHER_CRC_POLY_LE);
796 }
797 }
798
f23061d4 799 return (crc);
d6018c31
JS
800}
801#else
802uint32_t
803ether_crc32_le(const uint8_t *buf, size_t len)
804{
805 static const uint32_t crctab[] = {
806 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
807 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
808 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
809 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
810 };
811 uint32_t crc;
812 size_t i;
813
814 crc = 0xffffffffU; /* initial value */
815
816 for (i = 0; i < len; i++) {
817 crc ^= buf[i];
818 crc = (crc >> 4) ^ crctab[crc & 0xf];
819 crc = (crc >> 4) ^ crctab[crc & 0xf];
820 }
821
f23061d4 822 return (crc);
d6018c31
JS
823}
824#endif
825
826uint32_t
827ether_crc32_be(const uint8_t *buf, size_t len)
828{
829 uint32_t c, crc, carry;
830 size_t i, j;
831
832 crc = 0xffffffffU; /* initial value */
833
834 for (i = 0; i < len; i++) {
835 c = buf[i];
836 for (j = 0; j < 8; j++) {
837 carry = ((crc & 0x80000000U) ? 1 : 0) ^ (c & 0x01);
838 crc <<= 1;
839 c >>= 1;
840 if (carry)
841 crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
842 }
843 }
844
f23061d4 845 return (crc);
d6018c31 846}
4d723e5a 847
4d723e5a
JS
848/*
849 * find the size of ethernet header, and call classifier
850 */
851void
852altq_etherclassify(struct ifaltq *ifq, struct mbuf *m,
853 struct altq_pktattr *pktattr)
854{
855 struct ether_header *eh;
856 uint16_t ether_type;
857 int hlen, af, hdrsize;
858 caddr_t hdr;
859
860 hlen = sizeof(struct ether_header);
861 eh = mtod(m, struct ether_header *);
862
863 ether_type = ntohs(eh->ether_type);
864 if (ether_type < ETHERMTU) {
865 /* ick! LLC/SNAP */
866 struct llc *llc = (struct llc *)(eh + 1);
867 hlen += 8;
868
869 if (m->m_len < hlen ||
870 llc->llc_dsap != LLC_SNAP_LSAP ||
871 llc->llc_ssap != LLC_SNAP_LSAP ||
872 llc->llc_control != LLC_UI)
873 goto bad; /* not snap! */
874
875 ether_type = ntohs(llc->llc_un.type_snap.ether_type);
876 }
877
878 if (ether_type == ETHERTYPE_IP) {
879 af = AF_INET;
880 hdrsize = 20; /* sizeof(struct ip) */
881#ifdef INET6
882 } else if (ether_type == ETHERTYPE_IPV6) {
883 af = AF_INET6;
884 hdrsize = 40; /* sizeof(struct ip6_hdr) */
885#endif
886 } else
887 goto bad;
888
889 while (m->m_len <= hlen) {
890 hlen -= m->m_len;
891 m = m->m_next;
892 }
893 hdr = m->m_data + hlen;
894 if (m->m_len < hlen + hdrsize) {
895 /*
896 * ip header is not in a single mbuf. this should not
897 * happen in the current code.
898 * (todo: use m_pulldown in the future)
899 */
900 goto bad;
901 }
902 m->m_data += hlen;
903 m->m_len -= hlen;
904 ifq_classify(ifq, m, af, pktattr);
905 m->m_data -= hlen;
906 m->m_len += hlen;
907
908 return;
909
910bad:
911 pktattr->pattr_class = NULL;
912 pktattr->pattr_hdr = NULL;
913 pktattr->pattr_af = AF_UNSPEC;
914}
9b77ea6e
SZ
915
916static void
917ether_restore_header(struct mbuf **m0, const struct ether_header *eh,
918 const struct ether_header *save_eh)
919{
920 struct mbuf *m = *m0;
921
922 ether_restore_hdr++;
923
924 /*
925 * Prepend the header, optimize for the common case of
926 * eh pointing into the mbuf.
927 */
928 if ((const void *)(eh + 1) == (void *)m->m_data) {
929 m->m_data -= ETHER_HDR_LEN;
930 m->m_len += ETHER_HDR_LEN;
931 m->m_pkthdr.len += ETHER_HDR_LEN;
932 } else {
933 ether_prepend_hdr++;
934
935 M_PREPEND(m, ETHER_HDR_LEN, MB_DONTWAIT);
936 if (m != NULL) {
3d26a382 937 bcopy(save_eh, mtod(m, struct ether_header *),
9b77ea6e
SZ
938 ETHER_HDR_LEN);
939 }
940 }
941 *m0 = m;
942}
68b67450 943
5f32d321
SZ
944/*
945 * Upper layer processing for a received Ethernet packet.
946 */
297c8124 947void
29bc1092
SZ
948ether_demux_oncpu(struct ifnet *ifp, struct mbuf *m)
949{
950 struct ether_header *eh;
21b74198 951 int isr, discard = 0;
29bc1092
SZ
952 u_short ether_type;
953 struct ip_fw *rule = NULL;
29bc1092
SZ
954
955 M_ASSERTPKTHDR(m);
956 KASSERT(m->m_len >= ETHER_HDR_LEN,
ed20d0e3 957 ("ether header is not contiguous!"));
29bc1092
SZ
958
959 eh = mtod(m, struct ether_header *);
960
eb241549
SZ
961 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) {
962 struct m_tag *mtag;
963
964 /* Extract info from dummynet tag */
965 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL);
966 KKASSERT(mtag != NULL);
29bc1092 967 rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv;
eb241549
SZ
968 KKASSERT(rule != NULL);
969
29bc1092 970 m_tag_delete(m, mtag);
eb241549
SZ
971 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED;
972
973 /* packet is passing the second time */
29bc1092 974 goto post_stats;
eb241549 975 }
29bc1092 976
29bc1092 977 /*
469c71d1
SZ
978 * We got a packet which was unicast to a different Ethernet
979 * address. If the driver is working properly, then this
980 * situation can only happen when the interface is in
981 * promiscuous mode. We defer the packet discarding until the
982 * vlan processing is done, so that vlan/bridge or vlan/netgraph
983 * could work.
29bc1092
SZ
984 */
985 if (((ifp->if_flags & (IFF_PROMISC | IFF_PPROMISC)) == IFF_PROMISC) &&
d1859e78 986 !ETHER_IS_MULTICAST(eh->ether_dhost) &&
70d9a675
MD
987 bcmp(eh->ether_dhost, IFP2AC(ifp)->ac_enaddr, ETHER_ADDR_LEN)) {
988 if (ether_debug & 1) {
989 kprintf("%02x:%02x:%02x:%02x:%02x:%02x "
990 "%02x:%02x:%02x:%02x:%02x:%02x "
991 "%04x vs %02x:%02x:%02x:%02x:%02x:%02x\n",
992 eh->ether_dhost[0],
993 eh->ether_dhost[1],
994 eh->ether_dhost[2],
995 eh->ether_dhost[3],
996 eh->ether_dhost[4],
997 eh->ether_dhost[5],
998 eh->ether_shost[0],
999 eh->ether_shost[1],
1000 eh->ether_shost[2],
1001 eh->ether_shost[3],
1002 eh->ether_shost[4],
1003 eh->ether_shost[5],
1004 eh->ether_type,
1005 ((u_char *)IFP2AC(ifp)->ac_enaddr)[0],
1006 ((u_char *)IFP2AC(ifp)->ac_enaddr)[1],
1007 ((u_char *)IFP2AC(ifp)->ac_enaddr)[2],
1008 ((u_char *)IFP2AC(ifp)->ac_enaddr)[3],
1009 ((u_char *)IFP2AC(ifp)->ac_enaddr)[4],
1010 ((u_char *)IFP2AC(ifp)->ac_enaddr)[5]
1011 );
1012 }
1013 if ((ether_debug & 2) == 0)
1014 discard = 1;
1015 }
29bc1092
SZ
1016
1017post_stats:
469c71d1 1018 if (IPFW_LOADED && ether_ipfw != 0 && !discard) {
29bc1092
SZ
1019 struct ether_header save_eh = *eh;
1020
1021 /* XXX old crufty stuff, needs to be removed */
1022 m_adj(m, sizeof(struct ether_header));
1023
1024 if (!ether_ipfw_chk(&m, NULL, &rule, eh)) {
1025 m_freem(m);
1026 return;
1027 }
1028
1029 ether_restore_header(&m, eh, &save_eh);
1030 if (m == NULL)
1031 return;
1032 eh = mtod(m, struct ether_header *);
1033 }
1034
1035 ether_type = ntohs(eh->ether_type);
1036 KKASSERT(ether_type != ETHERTYPE_VLAN);
1037
1038 if (m->m_flags & M_VLANTAG) {
b327296f
SZ
1039 void (*vlan_input_func)(struct mbuf *);
1040
1041 vlan_input_func = vlan_input_p;
1042 if (vlan_input_func != NULL) {
1043 vlan_input_func(m);
29bc1092
SZ
1044 } else {
1045 m->m_pkthdr.rcvif->if_noproto++;
1046 m_freem(m);
1047 }
29bc1092
SZ
1048 return;
1049 }
1050
4d895293 1051 /*
469c71d1
SZ
1052 * If we have been asked to discard this packet
1053 * (e.g. not for us), drop it before entering
1054 * the upper layer.
1055 */
1056 if (discard) {
1057 m_freem(m);
1058 return;
1059 }
1060
1061 /*
4d895293
SZ
1062 * Clear protocol specific flags,
1063 * before entering the upper layer.
1064 */
da1604af 1065 m->m_flags &= ~M_ETHER_FLAGS;
4d895293
SZ
1066
1067 /* Strip ethernet header. */
29bc1092 1068 m_adj(m, sizeof(struct ether_header));
4d895293 1069
29bc1092
SZ
1070 switch (ether_type) {
1071#ifdef INET
1072 case ETHERTYPE_IP:
8697599b 1073 if ((m->m_flags & M_LENCHECKED) == 0) {
c3c96e44 1074 if (!ip_lengthcheck(&m, 0))
8697599b
SZ
1075 return;
1076 }
297c8124 1077 if (ipflow_fastforward(m))
29bc1092 1078 return;
29bc1092
SZ
1079 isr = NETISR_IP;
1080 break;
1081
1082 case ETHERTYPE_ARP:
1083 if (ifp->if_flags & IFF_NOARP) {
1084 /* Discard packet if ARP is disabled on interface */
1085 m_freem(m);
1086 return;
1087 }
1088 isr = NETISR_ARP;
1089 break;
1090#endif
1091
1092#ifdef INET6
1093 case ETHERTYPE_IPV6:
1094 isr = NETISR_IPV6;
1095 break;
1096#endif
1097
1098#ifdef IPX
1099 case ETHERTYPE_IPX:
c6690c74
SZ
1100 if (ef_inputp) {
1101 /*
1102 * Hold BGL and recheck ef_inputp
1103 */
1104 get_mplock();
1105 if (ef_inputp && ef_inputp(ifp, eh, m) == 0) {
1106 rel_mplock();
1107 return;
1108 }
1109 rel_mplock();
1110 }
29bc1092
SZ
1111 isr = NETISR_IPX;
1112 break;
1113#endif
1114
a020e9d5
SZ
1115#ifdef MPLS
1116 case ETHERTYPE_MPLS:
1117 case ETHERTYPE_MPLS_MCAST:
eda7db08 1118 /* Should have been set by ether_input_pkt(). */
cb8d752c 1119 KKASSERT(m->m_flags & M_MPLSLABELED);
a020e9d5
SZ
1120 isr = NETISR_MPLS;
1121 break;
1122#endif
1123
29bc1092
SZ
1124 default:
1125 /*
1126 * The accurate msgport is not determined before
ebe4c2ae 1127 * we reach here, so recharacterize packet.
29bc1092 1128 */
ebe4c2ae 1129 m->m_flags &= ~M_HASH;
29bc1092 1130#ifdef IPX
c6690c74
SZ
1131 if (ef_inputp) {
1132 /*
1133 * Hold BGL and recheck ef_inputp
1134 */
1135 get_mplock();
1136 if (ef_inputp && ef_inputp(ifp, eh, m) == 0) {
1137 rel_mplock();
1138 return;
1139 }
1140 rel_mplock();
1141 }
29bc1092 1142#endif
c6690c74
SZ
1143 if (ng_ether_input_orphan_p != NULL) {
1144 /*
79b75150
NA
1145 * Put back the ethernet header so netgraph has a
1146 * consistent view of inbound packets.
1147 */
1148 M_PREPEND(m, ETHER_HDR_LEN, MB_DONTWAIT);
25aabaea
NA
1149 if (m == NULL) {
1150 /*
1151 * M_PREPEND frees the mbuf in case of failure.
1152 */
1153 return;
1154 }
79b75150 1155 /*
c6690c74
SZ
1156 * Hold BGL and recheck ng_ether_input_orphan_p
1157 */
1158 get_mplock();
1159 if (ng_ether_input_orphan_p != NULL) {
0147868e 1160 ng_ether_input_orphan_p(ifp, m);
c6690c74
SZ
1161 rel_mplock();
1162 return;
1163 }
1164 rel_mplock();
1165 }
1166 m_freem(m);
29bc1092
SZ
1167 return;
1168 }
1169
ebe4c2ae
SZ
1170 if (m->m_flags & M_HASH) {
1171 if (&curthread->td_msgport == cpu_portfn(m->m_pkthdr.hash)) {
1172 netisr_handle(isr, m);
1173 return;
1174 } else {
1175 /*
1176 * XXX Something is wrong,
1177 * we probably should panic here!
1178 */
1179 m->m_flags &= ~M_HASH;
7908230c 1180 atomic_add_long(&ether_input_wronghash, 1);
ebe4c2ae
SZ
1181 }
1182 }
1183#ifdef RSS_DEBUG
7908230c 1184 atomic_add_long(&ether_input_requeue, 1);
ebe4c2ae 1185#endif
c3c96e44 1186 netisr_queue(isr, m);
29bc1092
SZ
1187}
1188
5f32d321
SZ
1189/*
1190 * First we perform any link layer operations, then continue to the
1191 * upper layers with ether_demux_oncpu().
1192 */
c3c96e44 1193static void
29bc1092
SZ
1194ether_input_oncpu(struct ifnet *ifp, struct mbuf *m)
1195{
bb05f5cd
SZ
1196#ifdef CARP
1197 void *carp;
1198#endif
1199
160af078
SZ
1200 if ((ifp->if_flags & (IFF_UP | IFF_MONITOR)) != IFF_UP) {
1201 /*
1202 * Receiving interface's flags are changed, when this
1203 * packet is waiting for processing; discard it.
1204 */
1205 m_freem(m);
1206 return;
1207 }
1208
29bc1092
SZ
1209 /*
1210 * Tap the packet off here for a bridge. bridge_input()
1211 * will return NULL if it has consumed the packet, otherwise
1212 * it gets processed as normal. Note that bridge_input()
1213 * will always return the original packet if we need to
1214 * process it locally.
1215 */
1216 if (ifp->if_bridge) {
1217 KASSERT(bridge_input_p != NULL,
1218 ("%s: if_bridge not loaded!", __func__));
1219
0899cb3e
SZ
1220 if(m->m_flags & M_ETHER_BRIDGED) {
1221 m->m_flags &= ~M_ETHER_BRIDGED;
29bc1092 1222 } else {
29bc1092
SZ
1223 m = bridge_input_p(ifp, m);
1224 if (m == NULL)
1225 return;
1226
1227 KASSERT(ifp == m->m_pkthdr.rcvif,
ed20d0e3 1228 ("bridge_input_p changed rcvif"));
29bc1092
SZ
1229 }
1230 }
1231
24c6e413 1232#ifdef CARP
bb05f5cd
SZ
1233 carp = ifp->if_carp;
1234 if (carp) {
bb05f5cd 1235 m = carp_input(carp, m);
1948d087 1236 if (m == NULL)
bb05f5cd 1237 return;
bb05f5cd 1238 KASSERT(ifp == m->m_pkthdr.rcvif,
ed20d0e3 1239 ("carp_input changed rcvif"));
24c6e413
SZ
1240 }
1241#endif
1242
29bc1092
SZ
1243 /* Handle ng_ether(4) processing, if any */
1244 if (ng_ether_input_p != NULL) {
c6690c74
SZ
1245 /*
1246 * Hold BGL and recheck ng_ether_input_p
1247 */
1248 get_mplock();
1249 if (ng_ether_input_p != NULL)
1250 ng_ether_input_p(ifp, &m);
1251 rel_mplock();
1252
29bc1092
SZ
1253 if (m == NULL)
1254 return;
1255 }
1256
1257 /* Continue with upper layer processing */
1258 ether_demux_oncpu(ifp, m);
1259}
1260
b9ed4403 1261/*
eda7db08 1262 * Perform certain functions of ether_input_pkt():
b9ed4403
SZ
1263 * - Test IFF_UP
1264 * - Update statistics
1265 * - Run bpf(4) tap if requested
1266 * Then pass the packet to ether_input_oncpu().
1267 *
1268 * This function should be used by pseudo interface (e.g. vlan(4)),
1269 * when it tries to claim that the packet is received by it.
4ee4f753
MD
1270 *
1271 * REINPUT_KEEPRCVIF
1272 * REINPUT_RUNBPF
b9ed4403
SZ
1273 */
1274void
4ee4f753 1275ether_reinput_oncpu(struct ifnet *ifp, struct mbuf *m, int reinput_flags)
b9ed4403
SZ
1276{
1277 /* Discard packet if interface is not up */
1278 if (!(ifp->if_flags & IFF_UP)) {
1279 m_freem(m);
1280 return;
1281 }
1282
4ee4f753
MD
1283 /*
1284 * Change receiving interface. The bridge will often pass a flag to
1285 * ask that this not be done so ARPs get applied to the correct
1286 * side.
1287 */
1288 if ((reinput_flags & REINPUT_KEEPRCVIF) == 0 ||
1289 m->m_pkthdr.rcvif == NULL) {
1290 m->m_pkthdr.rcvif = ifp;
1291 }
b9ed4403
SZ
1292
1293 /* Update statistics */
1294 ifp->if_ipackets++;
1295 ifp->if_ibytes += m->m_pkthdr.len;
1296 if (m->m_flags & (M_MCAST | M_BCAST))
1297 ifp->if_imcasts++;
1298
4ee4f753 1299 if (reinput_flags & REINPUT_RUNBPF)
b9ed4403
SZ
1300 BPF_MTAP(ifp, m);
1301
1302 ether_input_oncpu(ifp, m);
1303}
1304
057441be
SZ
1305static __inline boolean_t
1306ether_vlancheck(struct mbuf **m0)
1307{
1308 struct mbuf *m = *m0;
1309 struct ether_header *eh;
1310 uint16_t ether_type;
1311
1312 eh = mtod(m, struct ether_header *);
1313 ether_type = ntohs(eh->ether_type);
1314
1315 if (ether_type == ETHERTYPE_VLAN && (m->m_flags & M_VLANTAG) == 0) {
1316 /*
1317 * Extract vlan tag if hardware does not do it for us
1318 */
1319 vlan_ether_decap(&m);
1320 if (m == NULL)
1321 goto failed;
1322
1323 eh = mtod(m, struct ether_header *);
1324 ether_type = ntohs(eh->ether_type);
1325 }
1326
1327 if (ether_type == ETHERTYPE_VLAN && (m->m_flags & M_VLANTAG)) {
1328 /*
1329 * To prevent possible dangerous recursion,
1330 * we don't do vlan-in-vlan
1331 */
1332 m->m_pkthdr.rcvif->if_noproto++;
1333 goto failed;
1334 }
1335 KKASSERT(ether_type != ETHERTYPE_VLAN);
1336
da1604af 1337 m->m_flags |= M_ETHER_VLANCHECKED;
057441be
SZ
1338 *m0 = m;
1339 return TRUE;
1340failed:
1341 if (m != NULL)
1342 m_freem(m);
1343 *m0 = NULL;
1344 return FALSE;
1345}
1346
29bc1092 1347static void
002c1265 1348ether_input_handler(netmsg_t nmsg)
29bc1092 1349{
002c1265 1350 struct netmsg_packet *nmp = &nmsg->packet; /* actual size */
828c9923 1351 struct ether_header *eh;
29bc1092
SZ
1352 struct ifnet *ifp;
1353 struct mbuf *m;
1354
1355 m = nmp->nm_packet;
1356 M_ASSERTPKTHDR(m);
234d1daa
SZ
1357
1358 if ((m->m_flags & M_ETHER_VLANCHECKED) == 0) {
1359 if (!ether_vlancheck(&m)) {
1360 KKASSERT(m == NULL);
1361 return;
1362 }
1363 }
1364 if ((m->m_flags & (M_HASH | M_CKHASH)) == (M_HASH | M_CKHASH)
1365#ifdef RSS_DEBUG
1366 || ether_input_ckhash
1367#endif
1368 ) {
1369 int isr;
1370
1371 /*
1372 * Need to verify the hash supplied by the hardware
1373 * which could be wrong.
1374 */
1375 m->m_flags &= ~(M_HASH | M_CKHASH);
1376 isr = ether_characterize(&m);
1377 if (m == NULL)
1378 return;
1379 KKASSERT(m->m_flags & M_HASH);
1380
1381 if (m->m_pkthdr.hash != mycpuid) {
1382 /*
1383 * Wrong hardware supplied hash; redispatch
1384 */
1385 ether_dispatch(isr, m);
1386#ifdef RSS_DEBUG
1387 atomic_add_long(&ether_input_wronghwhash, 1);
1388#endif
1389 return;
1390 }
1391 }
29bc1092
SZ
1392 ifp = m->m_pkthdr.rcvif;
1393
828c9923
SZ
1394 eh = mtod(m, struct ether_header *);
1395 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1396 if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
1397 ifp->if_addrlen) == 0)
1398 m->m_flags |= M_BCAST;
1399 else
1400 m->m_flags |= M_MCAST;
1401 ifp->if_imcasts++;
1402 }
1403
29bc1092
SZ
1404 ether_input_oncpu(ifp, m);
1405}
1406
c3c96e44 1407/*
eda7db08 1408 * Send the packet to the target msgport
21b74198
MD
1409 *
1410 * At this point the packet had better be characterized (M_HASH set),
1411 * so we know which cpu to send it to.
c3c96e44
MD
1412 */
1413static void
eda7db08 1414ether_dispatch(int isr, struct mbuf *m)
29bc1092
SZ
1415{
1416 struct netmsg_packet *pmsg;
1417
c3c96e44 1418 KKASSERT(m->m_flags & M_HASH);
29bc1092 1419 pmsg = &m->m_hdr.mh_netmsg;
002c1265 1420 netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport,
c3c96e44 1421 0, ether_input_handler);
29bc1092 1422 pmsg->nm_packet = m;
002c1265 1423 pmsg->base.lmsg.u.ms_result = isr;
74f66604 1424
eda7db08
SZ
1425 logether(disp_beg, NULL);
1426 lwkt_sendmsg(cpu_portfn(m->m_pkthdr.hash), &pmsg->base.lmsg);
1427 logether(disp_end, NULL);
74f66604
SZ
1428}
1429
62f35c44
SZ
1430/*
1431 * Process a received Ethernet packet.
1432 *
1433 * The ethernet header is assumed to be in the mbuf so the caller
1434 * MUST MAKE SURE that there are at least sizeof(struct ether_header)
1435 * bytes in the first mbuf.
62f35c44 1436 */
29bc1092 1437void
eda7db08 1438ether_input_pkt(struct ifnet *ifp, struct mbuf *m, const struct pktinfo *pi)
29bc1092 1439{
29bc1092
SZ
1440 int isr;
1441
29bc1092
SZ
1442 M_ASSERTPKTHDR(m);
1443
1444 /* Discard packet if interface is not up */
1445 if (!(ifp->if_flags & IFF_UP)) {
1446 m_freem(m);
1447 return;
1448 }
1449
1450 if (m->m_len < sizeof(struct ether_header)) {
1451 /* XXX error in the caller. */
1452 m_freem(m);
1453 return;
1454 }
29bc1092
SZ
1455
1456 m->m_pkthdr.rcvif = ifp;
1457
eda7db08 1458 logether(pkt_beg, ifp);
f3e0b5f0 1459
29bc1092
SZ
1460 ETHER_BPF_MTAP(ifp, m);
1461
1462 ifp->if_ibytes += m->m_pkthdr.len;
1463
1464 if (ifp->if_flags & IFF_MONITOR) {
0e805566
SZ
1465 struct ether_header *eh;
1466
828c9923
SZ
1467 eh = mtod(m, struct ether_header *);
1468 if (ETHER_IS_MULTICAST(eh->ether_dhost))
1469 ifp->if_imcasts++;
1470
29bc1092
SZ
1471 /*
1472 * Interface marked for monitoring; discard packet.
1473 */
b5a65047 1474 m_freem(m);
f3e0b5f0 1475
eda7db08 1476 logether(pkt_end, ifp);
b5a65047 1477 return;
29bc1092
SZ
1478 }
1479
c3c96e44
MD
1480 /*
1481 * If the packet has been characterized (pi->pi_netisr / M_HASH)
1482 * we can dispatch it immediately without further inspection.
1483 */
2eb0d069 1484 if (pi != NULL && (m->m_flags & M_HASH)) {
7c7f9646 1485#ifdef RSS_DEBUG
7908230c 1486 atomic_add_long(&ether_pktinfo_try, 1);
7c7f9646 1487#endif
e6f77b88
SZ
1488 netisr_hashcheck(pi->pi_netisr, m, pi);
1489 if (m->m_flags & M_HASH) {
eda7db08 1490 ether_dispatch(pi->pi_netisr, m);
7c7f9646 1491#ifdef RSS_DEBUG
7908230c 1492 atomic_add_long(&ether_pktinfo_hit, 1);
7c7f9646 1493#endif
eda7db08 1494 logether(pkt_end, ifp);
e6f77b88
SZ
1495 return;
1496 }
2eb0d069 1497 }
7c7f9646
SZ
1498#ifdef RSS_DEBUG
1499 else if (ifp->if_capenable & IFCAP_RSS) {
1500 if (pi == NULL)
7908230c 1501 atomic_add_long(&ether_rss_nopi, 1);
7c7f9646 1502 else
7908230c 1503 atomic_add_long(&ether_rss_nohash, 1);
7c7f9646
SZ
1504 }
1505#endif
2eb0d069
SZ
1506
1507 /*
234d1daa
SZ
1508 * Packet hash will be recalculated by software, so clear
1509 * the M_HASH and M_CKHASH flag set by the driver; the hash
1510 * value calculated by the hardware may not be exactly what
1511 * we want.
2eb0d069 1512 */
234d1daa 1513 m->m_flags &= ~(M_HASH | M_CKHASH);
2eb0d069 1514
057441be 1515 if (!ether_vlancheck(&m)) {
057441be 1516 KKASSERT(m == NULL);
eda7db08 1517 logether(pkt_end, ifp);
29bc1092
SZ
1518 return;
1519 }
0e805566
SZ
1520
1521 isr = ether_characterize(&m);
1522 if (m == NULL) {
eda7db08 1523 logether(pkt_end, ifp);
0e805566
SZ
1524 return;
1525 }
1526
1527 /*
1528 * Finally dispatch it
1529 */
eda7db08 1530 ether_dispatch(isr, m);
0e805566 1531
eda7db08 1532 logether(pkt_end, ifp);
0e805566
SZ
1533}
1534
1535static int
1536ether_characterize(struct mbuf **m0)
1537{
1538 struct mbuf *m = *m0;
1539 struct ether_header *eh;
1540 uint16_t ether_type;
1541 int isr;
1542
057441be
SZ
1543 eh = mtod(m, struct ether_header *);
1544 ether_type = ntohs(eh->ether_type);
29bc1092
SZ
1545
1546 /*
1547 * Map ether type to netisr id.
1548 */
1549 switch (ether_type) {
1550#ifdef INET
1551 case ETHERTYPE_IP:
1552 isr = NETISR_IP;
1553 break;
1554
1555 case ETHERTYPE_ARP:
1556 isr = NETISR_ARP;
1557 break;
1558#endif
1559
1560#ifdef INET6
1561 case ETHERTYPE_IPV6:
1562 isr = NETISR_IPV6;
1563 break;
1564#endif
1565
1566#ifdef IPX
1567 case ETHERTYPE_IPX:
1568 isr = NETISR_IPX;
1569 break;
1570#endif
1571
a020e9d5
SZ
1572#ifdef MPLS
1573 case ETHERTYPE_MPLS:
1574 case ETHERTYPE_MPLS_MCAST:
cb8d752c 1575 m->m_flags |= M_MPLSLABELED;
a020e9d5
SZ
1576 isr = NETISR_MPLS;
1577 break;
1578#endif
1579
29bc1092
SZ
1580 default:
1581 /*
1582 * NETISR_MAX is an invalid value; it is chosen to let
0e805566
SZ
1583 * netisr_characterize() know that we have no clear
1584 * idea where this packet should go.
29bc1092
SZ
1585 */
1586 isr = NETISR_MAX;
1587 break;
1588 }
1589
1590 /*
c3c96e44
MD
1591 * Ask the isr to characterize the packet since we couldn't.
1592 * This is an attempt to optimally get us onto the correct protocol
1593 * thread.
29bc1092 1594 */
c3c96e44 1595 netisr_characterize(isr, &m, sizeof(struct ether_header));
29bc1092 1596
0e805566
SZ
1597 *m0 = m;
1598 return isr;
29bc1092 1599}
09c280ec 1600
48242f47
SZ
1601static void
1602ether_demux_handler(netmsg_t nmsg)
1603{
1604 struct netmsg_packet *nmp = &nmsg->packet; /* actual size */
1605 struct ifnet *ifp;
1606 struct mbuf *m;
1607
1608 m = nmp->nm_packet;
1609 M_ASSERTPKTHDR(m);
1610 ifp = m->m_pkthdr.rcvif;
1611
1612 ether_demux_oncpu(ifp, m);
1613}
1614
1615void
1616ether_demux(struct mbuf *m)
1617{
1618 struct netmsg_packet *pmsg;
1619 int isr;
1620
1621 isr = ether_characterize(&m);
1622 if (m == NULL)
1623 return;
1624
1625 KKASSERT(m->m_flags & M_HASH);
1626 pmsg = &m->m_hdr.mh_netmsg;
1627 netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport,
1628 0, ether_demux_handler);
1629 pmsg->nm_packet = m;
1630 pmsg->base.lmsg.u.ms_result = isr;
1631
1632 lwkt_sendmsg(cpu_portfn(m->m_pkthdr.hash), &pmsg->base.lmsg);
1633}
1634
5f60906c
SZ
1635boolean_t
1636ether_tso_pullup(struct mbuf **mp, int *hoff0, struct ip **ip, int *iphlen,
1637 struct tcphdr **th, int *thoff)
1638{
1639 struct mbuf *m = *mp;
1640 struct ether_header *eh;
1641 uint16_t type;
1642 int hoff;
1643
1644 KASSERT(M_WRITABLE(m), ("not writable"));
1645
1646 hoff = ETHER_HDR_LEN;
1647 if (m->m_len < hoff) {
1648 m = m_pullup(m, hoff);
1649 if (m == NULL)
1650 goto failed;
1651 }
1652 eh = mtod(m, struct ether_header *);
1653 type = eh->ether_type;
1654
1655 if (type == htons(ETHERTYPE_VLAN)) {
1656 struct ether_vlan_header *evh;
1657
1658 hoff += EVL_ENCAPLEN;
1659 if (m->m_len < hoff) {
1660 m = m_pullup(m, hoff);
1661 if (m == NULL)
1662 goto failed;
1663 }
1664 evh = mtod(m, struct ether_vlan_header *);
1665 type = evh->evl_proto;
1666 }
1667 KASSERT(type == htons(ETHERTYPE_IP), ("not IP %d", ntohs(type)));
1668
1669 *mp = m;
1670 *hoff0 = hoff;
1671 return tcp_tso_pullup(mp, hoff, ip, iphlen, th, thoff);
1672
1673failed:
1674 if (m != NULL)
1675 m_freem(m);
1676 *mp = NULL;
1677 return FALSE;
1678}
1679
09c280ec 1680MODULE_VERSION(ether, 1);