2 * Copyright (c) 1980, 1986, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)if.c 8.3 (Berkeley) 1/4/94
34 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
37 #include "opt_compat.h"
38 #include "opt_inet6.h"
40 #include "opt_ifpoll.h"
42 #include <sys/param.h>
43 #include <sys/malloc.h>
45 #include <sys/systm.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/socketops.h>
52 #include <sys/protosw.h>
53 #include <sys/kernel.h>
55 #include <sys/mutex.h>
56 #include <sys/sockio.h>
57 #include <sys/syslog.h>
58 #include <sys/sysctl.h>
59 #include <sys/domain.h>
60 #include <sys/thread.h>
61 #include <sys/serialize.h>
64 #include <sys/thread2.h>
65 #include <sys/msgport2.h>
66 #include <sys/mutex2.h>
69 #include <net/if_arp.h>
70 #include <net/if_dl.h>
71 #include <net/if_types.h>
72 #include <net/if_var.h>
73 #include <net/ifq_var.h>
74 #include <net/radix.h>
75 #include <net/route.h>
76 #include <net/if_clone.h>
77 #include <net/netisr.h>
78 #include <net/netmsg2.h>
80 #include <machine/atomic.h>
81 #include <machine/stdarg.h>
82 #include <machine/smp.h>
84 #if defined(INET) || defined(INET6)
86 #include <netinet/in.h>
87 #include <netinet/in_var.h>
88 #include <netinet/if_ether.h>
90 #include <netinet6/in6_var.h>
91 #include <netinet6/in6_ifattach.h>
95 #if defined(COMPAT_43)
96 #include <emulation/43bsd/43bsd_socket.h>
97 #endif /* COMPAT_43 */
99 struct netmsg_ifaddr {
100 struct netmsg_base base;
106 struct ifaltq_stage_head {
107 TAILQ_HEAD(, ifaltq_stage) ifqs_head;
111 * System initialization
113 static void if_attachdomain(void *);
114 static void if_attachdomain1(struct ifnet *);
115 static int ifconf(u_long, caddr_t, struct ucred *);
116 static void ifinit(void *);
117 static void ifnetinit(void *);
118 static void if_slowtimo(void *);
119 static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
120 static int if_rtdel(struct radix_node *, void *);
124 * XXX: declare here to avoid to include many inet6 related files..
125 * should be more generalized?
127 extern void nd6_setmtu(struct ifnet *);
130 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
131 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
133 static int ifq_stage_cntmax = 4;
134 TUNABLE_INT("net.link.stage_cntmax", &ifq_stage_cntmax);
135 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW,
136 &ifq_stage_cntmax, 0, "ifq staging packet count max");
138 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL)
139 /* Must be after netisr_init */
140 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL)
142 static if_com_alloc_t *if_com_alloc[256];
143 static if_com_free_t *if_com_free[256];
145 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
146 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
147 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
149 int ifqmaxlen = IFQ_MAXLEN;
150 struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
152 struct callout if_slowtimo_timer;
155 struct ifnet **ifindex2ifnet = NULL;
156 static struct thread ifnet_threads[MAXCPU];
158 static struct ifaltq_stage_head ifq_stage_heads[MAXCPU];
160 #define IFQ_KTR_STRING "ifq=%p"
161 #define IFQ_KTR_ARGS struct ifaltq *ifq
163 #define KTR_IFQ KTR_ALL
165 KTR_INFO_MASTER(ifq);
166 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS);
167 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS);
168 #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg)
170 #define IF_START_KTR_STRING "ifp=%p"
171 #define IF_START_KTR_ARGS struct ifnet *ifp
173 #define KTR_IF_START KTR_ALL
175 KTR_INFO_MASTER(if_start);
176 KTR_INFO(KTR_IF_START, if_start, run, 0,
177 IF_START_KTR_STRING, IF_START_KTR_ARGS);
178 KTR_INFO(KTR_IF_START, if_start, sched, 1,
179 IF_START_KTR_STRING, IF_START_KTR_ARGS);
180 KTR_INFO(KTR_IF_START, if_start, avoid, 2,
181 IF_START_KTR_STRING, IF_START_KTR_ARGS);
182 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
183 IF_START_KTR_STRING, IF_START_KTR_ARGS);
184 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
185 IF_START_KTR_STRING, IF_START_KTR_ARGS);
186 #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg)
188 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
191 * Network interface utility routines.
193 * Routines with ifa_ifwith* names take sockaddr *'s as
202 callout_init(&if_slowtimo_timer);
205 TAILQ_FOREACH(ifp, &ifnet, if_link) {
206 if (ifp->if_snd.ifq_maxlen == 0) {
207 if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
208 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
217 if_start_cpuid(struct ifnet *ifp)
219 return ifp->if_cpuid;
224 if_start_cpuid_npoll(struct ifnet *ifp)
226 int poll_cpuid = ifp->if_npoll_cpuid;
231 return ifp->if_cpuid;
236 if_start_ipifunc(void *arg)
238 struct ifnet *ifp = arg;
239 struct lwkt_msg *lmsg = &ifp->if_start_nmsg[mycpuid].lmsg;
242 if (lmsg->ms_flags & MSGF_DONE)
243 lwkt_sendmsg(netisr_portfn(mycpuid), lmsg);
248 ifq_stage_remove(struct ifaltq_stage_head *head, struct ifaltq_stage *stage)
250 KKASSERT(stage->ifqs_flags & IFQ_STAGE_FLAG_QUED);
251 TAILQ_REMOVE(&head->ifqs_head, stage, ifqs_link);
252 stage->ifqs_flags &= ~(IFQ_STAGE_FLAG_QUED | IFQ_STAGE_FLAG_SCHED);
258 ifq_stage_insert(struct ifaltq_stage_head *head, struct ifaltq_stage *stage)
260 KKASSERT((stage->ifqs_flags &
261 (IFQ_STAGE_FLAG_QUED | IFQ_STAGE_FLAG_SCHED)) == 0);
262 stage->ifqs_flags |= IFQ_STAGE_FLAG_QUED;
263 TAILQ_INSERT_TAIL(&head->ifqs_head, stage, ifqs_link);
267 * Schedule ifnet.if_start on ifnet's CPU
270 if_start_schedule(struct ifnet *ifp, int force)
274 if (!force && curthread->td_type == TD_TYPE_NETISR &&
275 ifq_stage_cntmax > 0) {
276 struct ifaltq_stage *stage = &ifp->if_snd.altq_stage[mycpuid];
280 if ((stage->ifqs_flags & IFQ_STAGE_FLAG_QUED) == 0)
281 ifq_stage_insert(&ifq_stage_heads[mycpuid], stage);
282 stage->ifqs_flags |= IFQ_STAGE_FLAG_SCHED;
286 cpu = ifp->if_start_cpuid(ifp);
288 lwkt_send_ipiq(globaldata_find(cpu), if_start_ipifunc, ifp);
290 if_start_ipifunc(ifp);
295 * This function will release ifnet.if_start interlock,
296 * if ifnet.if_start does not need to be scheduled
299 if_start_need_schedule(struct ifaltq *ifq, int running)
301 if (!running || ifq_is_empty(ifq)
303 || ifq->altq_tbr != NULL
308 * ifnet.if_start interlock is released, if:
309 * 1) Hardware can not take any packets, due to
310 * o interface is marked down
311 * o hardware queue is full (ifq_is_oactive)
312 * Under the second situation, hardware interrupt
313 * or polling(4) will call/schedule ifnet.if_start
314 * when hardware queue is ready
315 * 2) There is not packet in the ifnet.if_snd.
316 * Further ifq_dispatch or ifq_handoff will call/
317 * schedule ifnet.if_start
318 * 3) TBR is used and it does not allow further
320 * TBR callout will call ifnet.if_start
322 if (!running || !ifq_data_ready(ifq)) {
323 ifq->altq_started = 0;
333 if_start_dispatch(netmsg_t msg)
335 struct lwkt_msg *lmsg = &msg->base.lmsg;
336 struct ifnet *ifp = lmsg->u.ms_resultp;
337 struct ifaltq *ifq = &ifp->if_snd;
338 int running = 0, need_sched;
341 lwkt_replymsg(lmsg, 0); /* reply ASAP */
344 if (mycpuid != ifp->if_start_cpuid(ifp)) {
346 * We need to chase the ifnet CPU change.
348 logifstart(chase_sched, ifp);
349 if_start_schedule(ifp, 1);
353 ifnet_serialize_tx(ifp);
354 if ((ifp->if_flags & IFF_RUNNING) && !ifq_is_oactive(ifq)) {
355 logifstart(run, ifp);
357 if ((ifp->if_flags & IFF_RUNNING) && !ifq_is_oactive(ifq))
360 need_sched = if_start_need_schedule(ifq, running);
361 ifnet_deserialize_tx(ifp);
365 * More data need to be transmitted, ifnet.if_start is
366 * scheduled on ifnet's CPU, and we keep going.
367 * NOTE: ifnet.if_start interlock is not released.
369 logifstart(sched, ifp);
370 if_start_schedule(ifp, 0);
374 /* Device driver ifnet.if_start helper function */
376 if_devstart(struct ifnet *ifp)
378 struct ifaltq *ifq = &ifp->if_snd;
381 ASSERT_IFNET_SERIALIZED_TX(ifp);
384 if (ifq->altq_started || !ifq_data_ready(ifq)) {
385 logifstart(avoid, ifp);
389 ifq->altq_started = 1;
392 logifstart(run, ifp);
395 if ((ifp->if_flags & IFF_RUNNING) && !ifq_is_oactive(ifq))
398 if (if_start_need_schedule(ifq, running)) {
400 * More data need to be transmitted, ifnet.if_start is
401 * scheduled on ifnet's CPU, and we keep going.
402 * NOTE: ifnet.if_start interlock is not released.
404 logifstart(sched, ifp);
405 if_start_schedule(ifp, 0);
410 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
412 lwkt_serialize_enter(ifp->if_serializer);
416 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
418 lwkt_serialize_exit(ifp->if_serializer);
422 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
424 return lwkt_serialize_try(ifp->if_serializer);
429 if_default_serialize_assert(struct ifnet *ifp,
430 enum ifnet_serialize slz __unused,
431 boolean_t serialized)
434 ASSERT_SERIALIZED(ifp->if_serializer);
436 ASSERT_NOT_SERIALIZED(ifp->if_serializer);
441 * Attach an interface to the list of "active" interfaces.
443 * The serializer is optional. If non-NULL access to the interface
447 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
449 unsigned socksize, ifasize;
450 int namelen, masklen;
451 struct sockaddr_dl *sdl;
456 static int if_indexlim = 8;
458 if (ifp->if_serialize != NULL) {
459 KASSERT(ifp->if_deserialize != NULL &&
460 ifp->if_tryserialize != NULL &&
461 ifp->if_serialize_assert != NULL,
462 ("serialize functions are partially setup"));
465 * If the device supplies serialize functions,
466 * then clear if_serializer to catch any invalid
467 * usage of this field.
469 KASSERT(serializer == NULL,
470 ("both serialize functions and default serializer "
472 ifp->if_serializer = NULL;
474 KASSERT(ifp->if_deserialize == NULL &&
475 ifp->if_tryserialize == NULL &&
476 ifp->if_serialize_assert == NULL,
477 ("serialize functions are partially setup"));
478 ifp->if_serialize = if_default_serialize;
479 ifp->if_deserialize = if_default_deserialize;
480 ifp->if_tryserialize = if_default_tryserialize;
482 ifp->if_serialize_assert = if_default_serialize_assert;
486 * The serializer can be passed in from the device,
487 * allowing the same serializer to be used for both
488 * the interrupt interlock and the device queue.
489 * If not specified, the netif structure will use an
490 * embedded serializer.
492 if (serializer == NULL) {
493 serializer = &ifp->if_default_serializer;
494 lwkt_serialize_init(serializer);
496 ifp->if_serializer = serializer;
499 ifp->if_start_cpuid = if_start_cpuid;
503 /* Device is not in polling mode by default */
504 ifp->if_npoll_cpuid = -1;
505 if (ifp->if_npoll != NULL)
506 ifp->if_start_cpuid = if_start_cpuid_npoll;
509 ifp->if_start_nmsg = kmalloc(ncpus * sizeof(*ifp->if_start_nmsg),
510 M_LWKTMSG, M_WAITOK);
511 for (i = 0; i < ncpus; ++i) {
512 netmsg_init(&ifp->if_start_nmsg[i], NULL, &netisr_adone_rport,
513 0, if_start_dispatch);
514 ifp->if_start_nmsg[i].lmsg.u.ms_resultp = ifp;
517 mtx_init(&ifp->if_ioctl_mtx);
518 mtx_lock(&ifp->if_ioctl_mtx);
520 TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
521 ifp->if_index = ++if_index;
525 * The old code would work if the interface passed a pre-existing
526 * chain of ifaddrs to this code. We don't trust our callers to
527 * properly initialize the tailq, however, so we no longer allow
528 * this unlikely case.
530 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
531 M_IFADDR, M_WAITOK | M_ZERO);
532 for (i = 0; i < ncpus; ++i)
533 TAILQ_INIT(&ifp->if_addrheads[i]);
535 TAILQ_INIT(&ifp->if_prefixhead);
536 TAILQ_INIT(&ifp->if_multiaddrs);
537 TAILQ_INIT(&ifp->if_groups);
538 getmicrotime(&ifp->if_lastchange);
539 if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
545 /* grow ifindex2ifnet */
546 n = if_indexlim * sizeof(*q);
547 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
549 bcopy(ifindex2ifnet, q, n/2);
550 kfree(ifindex2ifnet, M_IFADDR);
555 ifindex2ifnet[if_index] = ifp;
558 * create a Link Level name for this device
560 namelen = strlen(ifp->if_xname);
561 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
562 socksize = masklen + ifp->if_addrlen;
563 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
564 if (socksize < sizeof(*sdl))
565 socksize = sizeof(*sdl);
566 socksize = ROUNDUP(socksize);
568 ifasize = sizeof(struct ifaddr) + 2 * socksize;
569 ifa = ifa_create(ifasize, M_WAITOK);
570 sdl = (struct sockaddr_dl *)(ifa + 1);
571 sdl->sdl_len = socksize;
572 sdl->sdl_family = AF_LINK;
573 bcopy(ifp->if_xname, sdl->sdl_data, namelen);
574 sdl->sdl_nlen = namelen;
575 sdl->sdl_index = ifp->if_index;
576 sdl->sdl_type = ifp->if_type;
577 ifp->if_lladdr = ifa;
579 ifa->ifa_rtrequest = link_rtrequest;
580 ifa->ifa_addr = (struct sockaddr *)sdl;
581 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
582 ifa->ifa_netmask = (struct sockaddr *)sdl;
583 sdl->sdl_len = masklen;
585 sdl->sdl_data[--namelen] = 0xff;
586 ifa_iflink(ifa, ifp, 0 /* Insert head */);
588 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
589 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
593 ifq->altq_disc = NULL;
594 ifq->altq_flags &= ALTQF_CANTCHANGE;
595 ifq->altq_tbr = NULL;
597 ifq->altq_started = 0;
598 ifq->altq_prepended = NULL;
600 ifq_set_classic(ifq);
603 kmalloc_cachealign(ncpus * sizeof(struct ifaltq_stage),
604 M_DEVBUF, M_WAITOK | M_ZERO);
605 for (i = 0; i < ncpus; ++i)
606 ifq->altq_stage[i].ifqs_altq = ifq;
608 if (!SLIST_EMPTY(&domains))
609 if_attachdomain1(ifp);
611 /* Announce the interface. */
612 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
614 mtx_unlock(&ifp->if_ioctl_mtx);
618 if_attachdomain(void *dummy)
623 TAILQ_FOREACH(ifp, &ifnet, if_list)
624 if_attachdomain1(ifp);
627 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
628 if_attachdomain, NULL);
631 if_attachdomain1(struct ifnet *ifp)
637 /* address family dependent data region */
638 bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
639 SLIST_FOREACH(dp, &domains, dom_next)
640 if (dp->dom_ifattach)
641 ifp->if_afdata[dp->dom_family] =
642 (*dp->dom_ifattach)(ifp);
647 * Purge all addresses whose type is _not_ AF_LINK
650 if_purgeaddrs_nolink(struct ifnet *ifp)
652 struct ifaddr_container *ifac, *next;
654 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
656 struct ifaddr *ifa = ifac->ifa;
658 /* Leave link ifaddr as it is */
659 if (ifa->ifa_addr->sa_family == AF_LINK)
662 /* XXX: Ugly!! ad hoc just for INET */
663 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
664 struct ifaliasreq ifr;
665 #ifdef IFADDR_DEBUG_VERBOSE
668 kprintf("purge in4 addr %p: ", ifa);
669 for (i = 0; i < ncpus; ++i)
670 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
674 bzero(&ifr, sizeof ifr);
675 ifr.ifra_addr = *ifa->ifa_addr;
676 if (ifa->ifa_dstaddr)
677 ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
678 if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
684 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
685 #ifdef IFADDR_DEBUG_VERBOSE
688 kprintf("purge in6 addr %p: ", ifa);
689 for (i = 0; i < ncpus; ++i)
690 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
695 /* ifp_addrhead is already updated */
699 ifa_ifunlink(ifa, ifp);
705 ifq_stage_detach_handler(netmsg_t nmsg)
707 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp;
708 struct ifaltq_stage *stage = &ifq->altq_stage[mycpuid];
710 if (stage->ifqs_flags & IFQ_STAGE_FLAG_QUED)
711 ifq_stage_remove(&ifq_stage_heads[mycpuid], stage);
712 lwkt_replymsg(&nmsg->lmsg, 0);
716 ifq_stage_detach(struct ifaltq *ifq)
718 struct netmsg_base base;
721 netmsg_init(&base, NULL, &curthread->td_msgport, 0,
722 ifq_stage_detach_handler);
723 base.lmsg.u.ms_resultp = ifq;
725 for (cpu = 0; cpu < ncpus; ++cpu)
726 lwkt_domsg(netisr_portfn(cpu), &base.lmsg, 0);
730 * Detach an interface, removing it from the
731 * list of "active" interfaces.
734 if_detach(struct ifnet *ifp)
736 struct radix_node_head *rnh;
741 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
744 * Remove routes and flush queues.
748 if (ifp->if_flags & IFF_NPOLLING)
749 ifpoll_deregister(ifp);
754 if (ifq_is_enabled(&ifp->if_snd))
755 altq_disable(&ifp->if_snd);
756 if (ifq_is_attached(&ifp->if_snd))
757 altq_detach(&ifp->if_snd);
761 * Clean up all addresses.
763 ifp->if_lladdr = NULL;
765 if_purgeaddrs_nolink(ifp);
766 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
769 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
770 KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
771 ("non-link ifaddr is left on if_addrheads"));
773 ifa_ifunlink(ifa, ifp);
775 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
776 ("there are still ifaddrs left on if_addrheads"));
781 * Remove all IPv4 kernel structures related to ifp.
788 * Remove all IPv6 kernel structs related to ifp. This should be done
789 * before removing routing entries below, since IPv6 interface direct
790 * routes are expected to be removed by the IPv6-specific kernel API.
791 * Otherwise, the kernel will detect some inconsistency and bark it.
797 * Delete all remaining routes using this interface
798 * Unfortuneatly the only way to do this is to slog through
799 * the entire routing table looking for routes which point
800 * to this interface...oh well...
803 for (cpu = 0; cpu < ncpus; cpu++) {
804 lwkt_migratecpu(cpu);
805 for (i = 1; i <= AF_MAX; i++) {
806 if ((rnh = rt_tables[cpu][i]) == NULL)
808 rnh->rnh_walktree(rnh, if_rtdel, ifp);
811 lwkt_migratecpu(origcpu);
813 /* Announce that the interface is gone. */
814 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
815 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
817 SLIST_FOREACH(dp, &domains, dom_next)
818 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
819 (*dp->dom_ifdetach)(ifp,
820 ifp->if_afdata[dp->dom_family]);
823 * Remove interface from ifindex2ifp[] and maybe decrement if_index.
825 ifindex2ifnet[ifp->if_index] = NULL;
826 while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
829 TAILQ_REMOVE(&ifnet, ifp, if_link);
830 kfree(ifp->if_addrheads, M_IFADDR);
832 lwkt_synchronize_ipiqs("if_detach");
833 ifq_stage_detach(&ifp->if_snd);
835 kfree(ifp->if_start_nmsg, M_LWKTMSG);
836 kfree(ifp->if_snd.altq_stage, M_DEVBUF);
841 * Create interface group without members
844 if_creategroup(const char *groupname)
846 struct ifg_group *ifg = NULL;
848 if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group),
849 M_TEMP, M_NOWAIT)) == NULL)
852 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
854 ifg->ifg_carp_demoted = 0;
855 TAILQ_INIT(&ifg->ifg_members);
857 pfi_attach_ifgroup(ifg);
859 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
865 * Add a group to an interface
868 if_addgroup(struct ifnet *ifp, const char *groupname)
870 struct ifg_list *ifgl;
871 struct ifg_group *ifg = NULL;
872 struct ifg_member *ifgm;
874 if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
875 groupname[strlen(groupname) - 1] <= '9')
878 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
879 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
882 if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
885 if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
890 TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
891 if (!strcmp(ifg->ifg_group, groupname))
894 if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
901 ifgl->ifgl_group = ifg;
902 ifgm->ifgm_ifp = ifp;
904 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
905 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
908 pfi_group_change(groupname);
915 * Remove a group from an interface
918 if_delgroup(struct ifnet *ifp, const char *groupname)
920 struct ifg_list *ifgl;
921 struct ifg_member *ifgm;
923 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
924 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
929 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
931 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
932 if (ifgm->ifgm_ifp == ifp)
936 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
940 if (--ifgl->ifgl_group->ifg_refcnt == 0) {
941 TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
943 pfi_detach_ifgroup(ifgl->ifgl_group);
945 kfree(ifgl->ifgl_group, M_TEMP);
951 pfi_group_change(groupname);
958 * Stores all groups from an interface in memory pointed
962 if_getgroup(caddr_t data, struct ifnet *ifp)
965 struct ifg_list *ifgl;
966 struct ifg_req ifgrq, *ifgp;
967 struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
969 if (ifgr->ifgr_len == 0) {
970 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
971 ifgr->ifgr_len += sizeof(struct ifg_req);
975 len = ifgr->ifgr_len;
976 ifgp = ifgr->ifgr_groups;
977 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
978 if (len < sizeof(ifgrq))
980 bzero(&ifgrq, sizeof ifgrq);
981 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
982 sizeof(ifgrq.ifgrq_group));
983 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
984 sizeof(struct ifg_req))))
986 len -= sizeof(ifgrq);
994 * Stores all members of a group in memory pointed to by data
997 if_getgroupmembers(caddr_t data)
999 struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1000 struct ifg_group *ifg;
1001 struct ifg_member *ifgm;
1002 struct ifg_req ifgrq, *ifgp;
1005 TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
1006 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
1011 if (ifgr->ifgr_len == 0) {
1012 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1013 ifgr->ifgr_len += sizeof(ifgrq);
1017 len = ifgr->ifgr_len;
1018 ifgp = ifgr->ifgr_groups;
1019 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1020 if (len < sizeof(ifgrq))
1022 bzero(&ifgrq, sizeof ifgrq);
1023 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1024 sizeof(ifgrq.ifgrq_member));
1025 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
1026 sizeof(struct ifg_req))))
1028 len -= sizeof(ifgrq);
1036 * Delete Routes for a Network Interface
1038 * Called for each routing entry via the rnh->rnh_walktree() call above
1039 * to delete all route entries referencing a detaching network interface.
1042 * rn pointer to node in the routing table
1043 * arg argument passed to rnh->rnh_walktree() - detaching interface
1047 * errno failed - reason indicated
1051 if_rtdel(struct radix_node *rn, void *arg)
1053 struct rtentry *rt = (struct rtentry *)rn;
1054 struct ifnet *ifp = arg;
1057 if (rt->rt_ifp == ifp) {
1060 * Protect (sorta) against walktree recursion problems
1061 * with cloned routes
1063 if (!(rt->rt_flags & RTF_UP))
1066 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1067 rt_mask(rt), rt->rt_flags,
1070 log(LOG_WARNING, "if_rtdel: error %d\n", err);
1078 * Locate an interface based on a complete address.
1081 ifa_ifwithaddr(struct sockaddr *addr)
1085 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1086 struct ifaddr_container *ifac;
1088 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1089 struct ifaddr *ifa = ifac->ifa;
1091 if (ifa->ifa_addr->sa_family != addr->sa_family)
1093 if (sa_equal(addr, ifa->ifa_addr))
1095 if ((ifp->if_flags & IFF_BROADCAST) &&
1096 ifa->ifa_broadaddr &&
1097 /* IPv6 doesn't have broadcast */
1098 ifa->ifa_broadaddr->sa_len != 0 &&
1099 sa_equal(ifa->ifa_broadaddr, addr))
1106 * Locate the point to point interface with a given destination address.
1109 ifa_ifwithdstaddr(struct sockaddr *addr)
1113 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1114 struct ifaddr_container *ifac;
1116 if (!(ifp->if_flags & IFF_POINTOPOINT))
1119 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1120 struct ifaddr *ifa = ifac->ifa;
1122 if (ifa->ifa_addr->sa_family != addr->sa_family)
1124 if (ifa->ifa_dstaddr &&
1125 sa_equal(addr, ifa->ifa_dstaddr))
1133 * Find an interface on a specific network. If many, choice
1134 * is most specific found.
1137 ifa_ifwithnet(struct sockaddr *addr)
1140 struct ifaddr *ifa_maybe = NULL;
1141 u_int af = addr->sa_family;
1142 char *addr_data = addr->sa_data, *cplim;
1145 * AF_LINK addresses can be looked up directly by their index number,
1146 * so do that if we can.
1148 if (af == AF_LINK) {
1149 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
1151 if (sdl->sdl_index && sdl->sdl_index <= if_index)
1152 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
1156 * Scan though each interface, looking for ones that have
1157 * addresses in this address family.
1159 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1160 struct ifaddr_container *ifac;
1162 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1163 struct ifaddr *ifa = ifac->ifa;
1164 char *cp, *cp2, *cp3;
1166 if (ifa->ifa_addr->sa_family != af)
1168 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1170 * This is a bit broken as it doesn't
1171 * take into account that the remote end may
1172 * be a single node in the network we are
1174 * The trouble is that we don't know the
1175 * netmask for the remote end.
1177 if (ifa->ifa_dstaddr != NULL &&
1178 sa_equal(addr, ifa->ifa_dstaddr))
1182 * if we have a special address handler,
1183 * then use it instead of the generic one.
1185 if (ifa->ifa_claim_addr) {
1186 if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1194 * Scan all the bits in the ifa's address.
1195 * If a bit dissagrees with what we are
1196 * looking for, mask it with the netmask
1197 * to see if it really matters.
1198 * (A byte at a time)
1200 if (ifa->ifa_netmask == 0)
1203 cp2 = ifa->ifa_addr->sa_data;
1204 cp3 = ifa->ifa_netmask->sa_data;
1205 cplim = ifa->ifa_netmask->sa_len +
1206 (char *)ifa->ifa_netmask;
1208 if ((*cp++ ^ *cp2++) & *cp3++)
1209 goto next; /* next address! */
1211 * If the netmask of what we just found
1212 * is more specific than what we had before
1213 * (if we had one) then remember the new one
1214 * before continuing to search
1215 * for an even better one.
1217 if (ifa_maybe == NULL ||
1218 rn_refines((char *)ifa->ifa_netmask,
1219 (char *)ifa_maybe->ifa_netmask))
1228 * Find an interface address specific to an interface best matching
1232 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1234 struct ifaddr_container *ifac;
1235 char *cp, *cp2, *cp3;
1237 struct ifaddr *ifa_maybe = NULL;
1238 u_int af = addr->sa_family;
1242 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1243 struct ifaddr *ifa = ifac->ifa;
1245 if (ifa->ifa_addr->sa_family != af)
1247 if (ifa_maybe == NULL)
1249 if (ifa->ifa_netmask == NULL) {
1250 if (sa_equal(addr, ifa->ifa_addr) ||
1251 (ifa->ifa_dstaddr != NULL &&
1252 sa_equal(addr, ifa->ifa_dstaddr)))
1256 if (ifp->if_flags & IFF_POINTOPOINT) {
1257 if (sa_equal(addr, ifa->ifa_dstaddr))
1261 cp2 = ifa->ifa_addr->sa_data;
1262 cp3 = ifa->ifa_netmask->sa_data;
1263 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1264 for (; cp3 < cplim; cp3++)
1265 if ((*cp++ ^ *cp2++) & *cp3)
1275 * Default action when installing a route with a Link Level gateway.
1276 * Lookup an appropriate real ifa to point to.
1277 * This should be moved to /sys/net/link.c eventually.
1280 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1283 struct sockaddr *dst;
1286 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
1287 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
1289 ifa = ifaof_ifpforaddr(dst, ifp);
1291 IFAFREE(rt->rt_ifa);
1294 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1295 ifa->ifa_rtrequest(cmd, rt, info);
1300 * Mark an interface down and notify protocols of
1302 * NOTE: must be called at splnet or eqivalent.
1305 if_unroute(struct ifnet *ifp, int flag, int fam)
1307 struct ifaddr_container *ifac;
1309 ifp->if_flags &= ~flag;
1310 getmicrotime(&ifp->if_lastchange);
1311 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1312 struct ifaddr *ifa = ifac->ifa;
1314 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1315 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1317 ifq_purge_all(&ifp->if_snd);
1322 * Mark an interface up and notify protocols of
1324 * NOTE: must be called at splnet or eqivalent.
1327 if_route(struct ifnet *ifp, int flag, int fam)
1329 struct ifaddr_container *ifac;
1331 ifq_purge_all(&ifp->if_snd);
1332 ifp->if_flags |= flag;
1333 getmicrotime(&ifp->if_lastchange);
1334 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1335 struct ifaddr *ifa = ifac->ifa;
1337 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1338 kpfctlinput(PRC_IFUP, ifa->ifa_addr);
1347 * Mark an interface down and notify protocols of the transition. An
1348 * interface going down is also considered to be a synchronizing event.
1349 * We must ensure that all packet processing related to the interface
1350 * has completed before we return so e.g. the caller can free the ifnet
1351 * structure that the mbufs may be referencing.
1353 * NOTE: must be called at splnet or eqivalent.
1356 if_down(struct ifnet *ifp)
1358 if_unroute(ifp, IFF_UP, AF_UNSPEC);
1359 netmsg_service_sync();
1363 * Mark an interface up and notify protocols of
1365 * NOTE: must be called at splnet or eqivalent.
1368 if_up(struct ifnet *ifp)
1370 if_route(ifp, IFF_UP, AF_UNSPEC);
1374 * Process a link state change.
1375 * NOTE: must be called at splsoftnet or equivalent.
1378 if_link_state_change(struct ifnet *ifp)
1380 int link_state = ifp->if_link_state;
1383 devctl_notify("IFNET", ifp->if_xname,
1384 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1388 * Handle interface watchdog timer routines. Called
1389 * from softclock, we decrement timers (if set) and
1390 * call the appropriate interface routine on expiration.
1393 if_slowtimo(void *arg)
1399 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1400 if (ifp->if_timer == 0 || --ifp->if_timer)
1402 if (ifp->if_watchdog) {
1403 if (ifnet_tryserialize_all(ifp)) {
1404 (*ifp->if_watchdog)(ifp);
1405 ifnet_deserialize_all(ifp);
1407 /* try again next timeout */
1415 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
1419 * Map interface name to
1420 * interface structure pointer.
1423 ifunit(const char *name)
1428 * Search all the interfaces for this name/number
1431 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1432 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
1440 * Map interface name in a sockaddr_dl to
1441 * interface structure pointer.
1444 if_withname(struct sockaddr *sa)
1446 char ifname[IFNAMSIZ+1];
1447 struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1449 if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1450 (sdl->sdl_nlen > IFNAMSIZ) )
1454 * ifunit wants a null-terminated name. It may not be null-terminated
1455 * in the sockaddr. We don't want to change the caller's sockaddr,
1456 * and there might not be room to put the trailing null anyway, so we
1457 * make a local copy that we know we can null terminate safely.
1460 bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1461 ifname[sdl->sdl_nlen] = '\0';
1462 return ifunit(ifname);
1470 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
1481 size_t namelen, onamelen;
1482 char new_name[IFNAMSIZ];
1484 struct sockaddr_dl *sdl;
1489 return (ifconf(cmd, data, cred));
1494 ifr = (struct ifreq *)data;
1499 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1501 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
1502 cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
1504 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1506 return (if_clone_destroy(ifr->ifr_name));
1507 case SIOCIFGCLONERS:
1508 return (if_clone_list((struct if_clonereq *)data));
1514 * Nominal ioctl through interface, lookup the ifp and obtain a
1515 * lock to serialize the ifconfig ioctl operation.
1517 ifp = ifunit(ifr->ifr_name);
1521 mtx_lock(&ifp->if_ioctl_mtx);
1525 ifr->ifr_index = ifp->if_index;
1529 ifr->ifr_flags = ifp->if_flags;
1530 ifr->ifr_flagshigh = ifp->if_flags >> 16;
1534 ifr->ifr_reqcap = ifp->if_capabilities;
1535 ifr->ifr_curcap = ifp->if_capenable;
1539 ifr->ifr_metric = ifp->if_metric;
1543 ifr->ifr_mtu = ifp->if_mtu;
1547 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
1548 sizeof(ifp->if_data));
1552 ifr->ifr_phys = ifp->if_physical;
1555 case SIOCGIFPOLLCPU:
1556 ifr->ifr_pollcpu = -1;
1559 case SIOCSIFPOLLCPU:
1563 error = priv_check_cred(cred, PRIV_ROOT, 0);
1566 new_flags = (ifr->ifr_flags & 0xffff) |
1567 (ifr->ifr_flagshigh << 16);
1568 if (ifp->if_flags & IFF_SMART) {
1569 /* Smart drivers twiddle their own routes */
1570 } else if (ifp->if_flags & IFF_UP &&
1571 (new_flags & IFF_UP) == 0) {
1575 } else if (new_flags & IFF_UP &&
1576 (ifp->if_flags & IFF_UP) == 0) {
1582 #ifdef IFPOLL_ENABLE
1583 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
1584 if (new_flags & IFF_NPOLLING)
1585 ifpoll_register(ifp);
1587 ifpoll_deregister(ifp);
1591 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1592 (new_flags &~ IFF_CANTCHANGE);
1593 if (new_flags & IFF_PPROMISC) {
1594 /* Permanently promiscuous mode requested */
1595 ifp->if_flags |= IFF_PROMISC;
1596 } else if (ifp->if_pcount == 0) {
1597 ifp->if_flags &= ~IFF_PROMISC;
1599 if (ifp->if_ioctl) {
1600 ifnet_serialize_all(ifp);
1601 ifp->if_ioctl(ifp, cmd, data, cred);
1602 ifnet_deserialize_all(ifp);
1604 getmicrotime(&ifp->if_lastchange);
1608 error = priv_check_cred(cred, PRIV_ROOT, 0);
1611 if (ifr->ifr_reqcap & ~ifp->if_capabilities) {
1615 ifnet_serialize_all(ifp);
1616 ifp->if_ioctl(ifp, cmd, data, cred);
1617 ifnet_deserialize_all(ifp);
1621 error = priv_check_cred(cred, PRIV_ROOT, 0);
1624 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1627 if (new_name[0] == '\0') {
1631 if (ifunit(new_name) != NULL) {
1636 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
1638 /* Announce the departure of the interface. */
1639 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1641 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1642 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
1643 /* XXX IFA_LOCK(ifa); */
1644 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1645 namelen = strlen(new_name);
1646 onamelen = sdl->sdl_nlen;
1648 * Move the address if needed. This is safe because we
1649 * allocate space for a name of length IFNAMSIZ when we
1650 * create this in if_attach().
1652 if (namelen != onamelen) {
1653 bcopy(sdl->sdl_data + onamelen,
1654 sdl->sdl_data + namelen, sdl->sdl_alen);
1656 bcopy(new_name, sdl->sdl_data, namelen);
1657 sdl->sdl_nlen = namelen;
1658 sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1659 bzero(sdl->sdl_data, onamelen);
1660 while (namelen != 0)
1661 sdl->sdl_data[--namelen] = 0xff;
1662 /* XXX IFA_UNLOCK(ifa) */
1664 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
1666 /* Announce the return of the interface. */
1667 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1671 error = priv_check_cred(cred, PRIV_ROOT, 0);
1674 ifp->if_metric = ifr->ifr_metric;
1675 getmicrotime(&ifp->if_lastchange);
1679 error = priv_check_cred(cred, PRIV_ROOT, 0);
1682 if (ifp->if_ioctl == NULL) {
1686 ifnet_serialize_all(ifp);
1687 error = ifp->if_ioctl(ifp, cmd, data, cred);
1688 ifnet_deserialize_all(ifp);
1690 getmicrotime(&ifp->if_lastchange);
1695 u_long oldmtu = ifp->if_mtu;
1697 error = priv_check_cred(cred, PRIV_ROOT, 0);
1700 if (ifp->if_ioctl == NULL) {
1704 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
1708 ifnet_serialize_all(ifp);
1709 error = ifp->if_ioctl(ifp, cmd, data, cred);
1710 ifnet_deserialize_all(ifp);
1712 getmicrotime(&ifp->if_lastchange);
1716 * If the link MTU changed, do network layer specific procedure.
1718 if (ifp->if_mtu != oldmtu) {
1728 error = priv_check_cred(cred, PRIV_ROOT, 0);
1732 /* Don't allow group membership on non-multicast interfaces. */
1733 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1738 /* Don't let users screw up protocols' entries. */
1739 if (ifr->ifr_addr.sa_family != AF_LINK) {
1744 if (cmd == SIOCADDMULTI) {
1745 struct ifmultiaddr *ifma;
1746 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1748 error = if_delmulti(ifp, &ifr->ifr_addr);
1751 getmicrotime(&ifp->if_lastchange);
1754 case SIOCSIFPHYADDR:
1755 case SIOCDIFPHYADDR:
1757 case SIOCSIFPHYADDR_IN6:
1759 case SIOCSLIFPHYADDR:
1761 case SIOCSIFGENERIC:
1762 error = priv_check_cred(cred, PRIV_ROOT, 0);
1765 if (ifp->if_ioctl == 0) {
1769 ifnet_serialize_all(ifp);
1770 error = ifp->if_ioctl(ifp, cmd, data, cred);
1771 ifnet_deserialize_all(ifp);
1773 getmicrotime(&ifp->if_lastchange);
1777 ifs = (struct ifstat *)data;
1778 ifs->ascii[0] = '\0';
1780 case SIOCGIFPSRCADDR:
1781 case SIOCGIFPDSTADDR:
1782 case SIOCGLIFPHYADDR:
1784 case SIOCGIFGENERIC:
1785 if (ifp->if_ioctl == NULL) {
1789 ifnet_serialize_all(ifp);
1790 error = ifp->if_ioctl(ifp, cmd, data, cred);
1791 ifnet_deserialize_all(ifp);
1795 error = priv_check_cred(cred, PRIV_ROOT, 0);
1798 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data,
1799 ifr->ifr_addr.sa_len);
1800 EVENTHANDLER_INVOKE(iflladdr_event, ifp);
1804 oif_flags = ifp->if_flags;
1805 if (so->so_proto == 0) {
1810 error = so_pru_control_direct(so, cmd, data, ifp);
1815 case SIOCSIFDSTADDR:
1817 case SIOCSIFBRDADDR:
1818 case SIOCSIFNETMASK:
1819 #if BYTE_ORDER != BIG_ENDIAN
1820 if (ifr->ifr_addr.sa_family == 0 &&
1821 ifr->ifr_addr.sa_len < 16) {
1822 ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1823 ifr->ifr_addr.sa_len = 16;
1826 if (ifr->ifr_addr.sa_len == 0)
1827 ifr->ifr_addr.sa_len = 16;
1833 case OSIOCGIFDSTADDR:
1834 cmd = SIOCGIFDSTADDR;
1836 case OSIOCGIFBRDADDR:
1837 cmd = SIOCGIFBRDADDR;
1839 case OSIOCGIFNETMASK:
1840 cmd = SIOCGIFNETMASK;
1846 error = so_pru_control_direct(so, cmd, data, ifp);
1850 case OSIOCGIFDSTADDR:
1851 case OSIOCGIFBRDADDR:
1852 case OSIOCGIFNETMASK:
1853 *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1856 #endif /* COMPAT_43 */
1858 if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1860 DELAY(100);/* XXX: temporary workaround for fxp issue*/
1861 if (ifp->if_flags & IFF_UP) {
1871 mtx_unlock(&ifp->if_ioctl_mtx);
1876 * Set/clear promiscuous mode on interface ifp based on the truth value
1877 * of pswitch. The calls are reference counted so that only the first
1878 * "on" request actually has an effect, as does the final "off" request.
1879 * Results are undefined if the "off" and "on" requests are not matched.
1882 ifpromisc(struct ifnet *ifp, int pswitch)
1888 oldflags = ifp->if_flags;
1889 if (ifp->if_flags & IFF_PPROMISC) {
1890 /* Do nothing if device is in permanently promiscuous mode */
1891 ifp->if_pcount += pswitch ? 1 : -1;
1896 * If the device is not configured up, we cannot put it in
1899 if ((ifp->if_flags & IFF_UP) == 0)
1901 if (ifp->if_pcount++ != 0)
1903 ifp->if_flags |= IFF_PROMISC;
1904 log(LOG_INFO, "%s: promiscuous mode enabled\n",
1907 if (--ifp->if_pcount > 0)
1909 ifp->if_flags &= ~IFF_PROMISC;
1910 log(LOG_INFO, "%s: promiscuous mode disabled\n",
1913 ifr.ifr_flags = ifp->if_flags;
1914 ifr.ifr_flagshigh = ifp->if_flags >> 16;
1915 ifnet_serialize_all(ifp);
1916 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
1917 ifnet_deserialize_all(ifp);
1921 ifp->if_flags = oldflags;
1926 * Return interface configuration
1927 * of system. List may be used
1928 * in later ioctl's (above) to get
1929 * other information.
1932 ifconf(u_long cmd, caddr_t data, struct ucred *cred)
1934 struct ifconf *ifc = (struct ifconf *)data;
1936 struct sockaddr *sa;
1937 struct ifreq ifr, *ifrp;
1938 int space = ifc->ifc_len, error = 0;
1940 ifrp = ifc->ifc_req;
1941 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1942 struct ifaddr_container *ifac;
1945 if (space <= sizeof ifr)
1949 * Zero the stack declared structure first to prevent
1950 * memory disclosure.
1952 bzero(&ifr, sizeof(ifr));
1953 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1954 >= sizeof(ifr.ifr_name)) {
1955 error = ENAMETOOLONG;
1960 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1961 struct ifaddr *ifa = ifac->ifa;
1963 if (space <= sizeof ifr)
1966 if (cred->cr_prison &&
1967 prison_if(cred, sa))
1971 if (cmd == OSIOCGIFCONF) {
1972 struct osockaddr *osa =
1973 (struct osockaddr *)&ifr.ifr_addr;
1975 osa->sa_family = sa->sa_family;
1976 error = copyout(&ifr, ifrp, sizeof ifr);
1980 if (sa->sa_len <= sizeof(*sa)) {
1982 error = copyout(&ifr, ifrp, sizeof ifr);
1985 if (space < (sizeof ifr) + sa->sa_len -
1988 space -= sa->sa_len - sizeof(*sa);
1989 error = copyout(&ifr, ifrp,
1990 sizeof ifr.ifr_name);
1992 error = copyout(sa, &ifrp->ifr_addr,
1994 ifrp = (struct ifreq *)
1995 (sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1999 space -= sizeof ifr;
2004 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
2005 error = copyout(&ifr, ifrp, sizeof ifr);
2008 space -= sizeof ifr;
2012 ifc->ifc_len -= space;
2017 * Just like if_promisc(), but for all-multicast-reception mode.
2020 if_allmulti(struct ifnet *ifp, int onswitch)
2028 if (ifp->if_amcount++ == 0) {
2029 ifp->if_flags |= IFF_ALLMULTI;
2030 ifr.ifr_flags = ifp->if_flags;
2031 ifr.ifr_flagshigh = ifp->if_flags >> 16;
2032 ifnet_serialize_all(ifp);
2033 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2035 ifnet_deserialize_all(ifp);
2038 if (ifp->if_amcount > 1) {
2041 ifp->if_amcount = 0;
2042 ifp->if_flags &= ~IFF_ALLMULTI;
2043 ifr.ifr_flags = ifp->if_flags;
2044 ifr.ifr_flagshigh = ifp->if_flags >> 16;
2045 ifnet_serialize_all(ifp);
2046 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2048 ifnet_deserialize_all(ifp);
2060 * Add a multicast listenership to the interface in question.
2061 * The link layer provides a routine which converts
2065 struct ifnet *ifp, /* interface to manipulate */
2066 struct sockaddr *sa, /* address to add */
2067 struct ifmultiaddr **retifma)
2069 struct sockaddr *llsa, *dupsa;
2071 struct ifmultiaddr *ifma;
2074 * If the matching multicast address already exists
2075 * then don't add a new one, just add a reference
2077 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2078 if (sa_equal(sa, ifma->ifma_addr)) {
2079 ifma->ifma_refcount++;
2087 * Give the link layer a chance to accept/reject it, and also
2088 * find out which AF_LINK address this maps to, if it isn't one
2091 if (ifp->if_resolvemulti) {
2092 ifnet_serialize_all(ifp);
2093 error = ifp->if_resolvemulti(ifp, &llsa, sa);
2094 ifnet_deserialize_all(ifp);
2101 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2102 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK);
2103 bcopy(sa, dupsa, sa->sa_len);
2105 ifma->ifma_addr = dupsa;
2106 ifma->ifma_lladdr = llsa;
2107 ifma->ifma_ifp = ifp;
2108 ifma->ifma_refcount = 1;
2109 ifma->ifma_protospec = 0;
2110 rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2113 * Some network interfaces can scan the address list at
2114 * interrupt time; lock them out.
2117 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2123 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2124 if (sa_equal(ifma->ifma_addr, llsa))
2128 ifma->ifma_refcount++;
2130 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2131 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK);
2132 bcopy(llsa, dupsa, llsa->sa_len);
2133 ifma->ifma_addr = dupsa;
2134 ifma->ifma_ifp = ifp;
2135 ifma->ifma_refcount = 1;
2137 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2142 * We are certain we have added something, so call down to the
2143 * interface to let them know about it.
2146 ifnet_serialize_all(ifp);
2148 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
2149 ifnet_deserialize_all(ifp);
2156 * Remove a reference to a multicast address on this interface. Yell
2157 * if the request does not match an existing membership.
2160 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2162 struct ifmultiaddr *ifma;
2164 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2165 if (sa_equal(sa, ifma->ifma_addr))
2170 if (ifma->ifma_refcount > 1) {
2171 ifma->ifma_refcount--;
2175 rt_newmaddrmsg(RTM_DELMADDR, ifma);
2176 sa = ifma->ifma_lladdr;
2178 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2180 * Make sure the interface driver is notified
2181 * in the case of a link layer mcast group being left.
2183 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) {
2184 ifnet_serialize_all(ifp);
2185 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2186 ifnet_deserialize_all(ifp);
2189 kfree(ifma->ifma_addr, M_IFMADDR);
2190 kfree(ifma, M_IFMADDR);
2195 * Now look for the link-layer address which corresponds to
2196 * this network address. It had been squirreled away in
2197 * ifma->ifma_lladdr for this purpose (so we don't have
2198 * to call ifp->if_resolvemulti() again), and we saved that
2199 * value in sa above. If some nasty deleted the
2200 * link-layer address out from underneath us, we can deal because
2201 * the address we stored was is not the same as the one which was
2202 * in the record for the link-layer address. (So we don't complain
2205 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2206 if (sa_equal(sa, ifma->ifma_addr))
2211 if (ifma->ifma_refcount > 1) {
2212 ifma->ifma_refcount--;
2217 ifnet_serialize_all(ifp);
2218 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2219 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2220 ifnet_deserialize_all(ifp);
2222 kfree(ifma->ifma_addr, M_IFMADDR);
2223 kfree(sa, M_IFMADDR);
2224 kfree(ifma, M_IFMADDR);
2230 * Delete all multicast group membership for an interface.
2231 * Should be used to quickly flush all multicast filters.
2234 if_delallmulti(struct ifnet *ifp)
2236 struct ifmultiaddr *ifma;
2237 struct ifmultiaddr *next;
2239 TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next)
2240 if_delmulti(ifp, ifma->ifma_addr);
2245 * Set the link layer address on an interface.
2247 * At this time we only support certain types of interfaces,
2248 * and we don't allow the length of the address to change.
2251 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2253 struct sockaddr_dl *sdl;
2256 sdl = IF_LLSOCKADDR(ifp);
2259 if (len != sdl->sdl_alen) /* don't allow length to change */
2261 switch (ifp->if_type) {
2262 case IFT_ETHER: /* these types use struct arpcom */
2265 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
2266 bcopy(lladdr, LLADDR(sdl), len);
2272 * If the interface is already up, we need
2273 * to re-init it in order to reprogram its
2276 ifnet_serialize_all(ifp);
2277 if ((ifp->if_flags & IFF_UP) != 0) {
2279 struct ifaddr_container *ifac;
2282 ifp->if_flags &= ~IFF_UP;
2283 ifr.ifr_flags = ifp->if_flags;
2284 ifr.ifr_flagshigh = ifp->if_flags >> 16;
2285 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2287 ifp->if_flags |= IFF_UP;
2288 ifr.ifr_flags = ifp->if_flags;
2289 ifr.ifr_flagshigh = ifp->if_flags >> 16;
2290 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2294 * Also send gratuitous ARPs to notify other nodes about
2295 * the address change.
2297 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2298 struct ifaddr *ifa = ifac->ifa;
2300 if (ifa->ifa_addr != NULL &&
2301 ifa->ifa_addr->sa_family == AF_INET)
2302 arp_gratuitous(ifp, ifa);
2306 ifnet_deserialize_all(ifp);
2310 struct ifmultiaddr *
2311 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
2313 struct ifmultiaddr *ifma;
2315 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2316 if (sa_equal(ifma->ifma_addr, sa))
2323 * This function locates the first real ethernet MAC from a network
2324 * card and loads it into node, returning 0 on success or ENOENT if
2325 * no suitable interfaces were found. It is used by the uuid code to
2326 * generate a unique 6-byte number.
2329 if_getanyethermac(uint16_t *node, int minlen)
2332 struct sockaddr_dl *sdl;
2334 TAILQ_FOREACH(ifp, &ifnet, if_link) {
2335 if (ifp->if_type != IFT_ETHER)
2337 sdl = IF_LLSOCKADDR(ifp);
2338 if (sdl->sdl_alen < minlen)
2340 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
2348 * The name argument must be a pointer to storage which will last as
2349 * long as the interface does. For physical devices, the result of
2350 * device_get_name(dev) is a good choice and for pseudo-devices a
2351 * static string works well.
2354 if_initname(struct ifnet *ifp, const char *name, int unit)
2356 ifp->if_dname = name;
2357 ifp->if_dunit = unit;
2358 if (unit != IF_DUNIT_NONE)
2359 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2361 strlcpy(ifp->if_xname, name, IFNAMSIZ);
2365 if_printf(struct ifnet *ifp, const char *fmt, ...)
2370 retval = kprintf("%s: ", ifp->if_xname);
2371 __va_start(ap, fmt);
2372 retval += kvprintf(fmt, ap);
2378 if_alloc(uint8_t type)
2384 * XXX temporary hack until arpcom is setup in if_l2com
2386 if (type == IFT_ETHER)
2387 size = sizeof(struct arpcom);
2389 size = sizeof(struct ifnet);
2391 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO);
2393 ifp->if_type = type;
2395 if (if_com_alloc[type] != NULL) {
2396 ifp->if_l2com = if_com_alloc[type](type, ifp);
2397 if (ifp->if_l2com == NULL) {
2398 kfree(ifp, M_IFNET);
2406 if_free(struct ifnet *ifp)
2408 kfree(ifp, M_IFNET);
2412 ifq_set_classic(struct ifaltq *ifq)
2414 ifq->altq_enqueue = ifq_classic_enqueue;
2415 ifq->altq_dequeue = ifq_classic_dequeue;
2416 ifq->altq_request = ifq_classic_request;
2420 ifq_classic_enqueue(struct ifaltq *ifq, struct mbuf *m,
2421 struct altq_pktattr *pa __unused)
2423 logifq(enqueue, ifq);
2424 if (IF_QFULL(ifq)) {
2434 ifq_classic_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op)
2443 logifq(dequeue, ifq);
2447 panic("unsupported ALTQ dequeue op: %d", op);
2449 KKASSERT(mpolled == NULL || mpolled == m);
2454 ifq_classic_request(struct ifaltq *ifq, int req, void *arg)
2461 panic("unsupported ALTQ request: %d", req);
2467 ifq_try_ifstart(struct ifaltq *ifq, int force_sched)
2469 struct ifnet *ifp = ifq->altq_ifp;
2470 int running = 0, need_sched;
2473 * Try to do direct ifnet.if_start first, if there is
2474 * contention on ifnet's serializer, ifnet.if_start will
2475 * be scheduled on ifnet's CPU.
2477 if (!ifnet_tryserialize_tx(ifp)) {
2479 * ifnet serializer contention happened,
2480 * ifnet.if_start is scheduled on ifnet's
2481 * CPU, and we keep going.
2483 logifstart(contend_sched, ifp);
2484 if_start_schedule(ifp, 1);
2488 if ((ifp->if_flags & IFF_RUNNING) && !ifq_is_oactive(ifq)) {
2489 logifstart(run, ifp);
2491 if ((ifp->if_flags & IFF_RUNNING) && !ifq_is_oactive(ifq))
2494 need_sched = if_start_need_schedule(ifq, running);
2496 ifnet_deserialize_tx(ifp);
2500 * More data need to be transmitted, ifnet.if_start is
2501 * scheduled on ifnet's CPU, and we keep going.
2502 * NOTE: ifnet.if_start interlock is not released.
2504 logifstart(sched, ifp);
2505 if_start_schedule(ifp, force_sched);
2510 * IFQ packets staging mechanism:
2512 * The packets enqueued into IFQ are staged to a certain amount before the
2513 * ifnet's if_start is called. In this way, the driver could avoid writing
2514 * to hardware registers upon every packet, instead, hardware registers
2515 * could be written when certain amount of packets are put onto hardware
2516 * TX ring. The measurement on several modern NICs (emx(4), igb(4), bnx(4),
2517 * bge(4), jme(4)) shows that the hardware registers writing aggregation
2518 * could save ~20% CPU time when 18bytes UDP datagrams are transmitted at
2519 * 1.48Mpps. The performance improvement by hardware registers writing
2520 * aggeregation is also mentioned by Luigi Rizzo's netmap paper
2521 * (http://info.iet.unipi.it/~luigi/netmap/).
2523 * IFQ packets staging is performed for two entry points into drivers's
2524 * transmission function:
2525 * - Direct ifnet's if_start calling, i.e. ifq_try_ifstart()
2526 * - ifnet's if_start scheduling, i.e. if_start_schedule()
2528 * IFQ packets staging will be stopped upon any of the following conditions:
2529 * - If the count of packets enqueued on the current CPU is great than or
2530 * equal to ifq_stage_cntmax. (XXX this should be per-interface)
2531 * - If the total length of packets enqueued on the current CPU is great
2532 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is
2533 * cut from the hardware's MTU mainly bacause a full TCP segment's size
2534 * is usually less than hardware's MTU.
2535 * - if_start_schedule() is not pending on the current CPU and if_start
2536 * interlock (if_snd.altq_started) is not released.
2537 * - The if_start_rollup(), which is registered as low priority netisr
2538 * rollup function, is called; probably because no more work is pending
2542 * Currently IFQ packet staging is only performed in netisr threads.
2545 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
2547 struct ifaltq *ifq = &ifp->if_snd;
2548 int error, start = 0, len, mcast = 0, avoid_start = 0;
2549 struct ifaltq_stage_head *head = NULL;
2550 struct ifaltq_stage *stage = NULL;
2552 ASSERT_IFNET_NOT_SERIALIZED_TX(ifp);
2554 len = m->m_pkthdr.len;
2555 if (m->m_flags & M_MCAST)
2558 if (curthread->td_type == TD_TYPE_NETISR) {
2559 head = &ifq_stage_heads[mycpuid];
2560 stage = &ifq->altq_stage[mycpuid];
2563 stage->ifqs_len += len;
2564 if (stage->ifqs_cnt < ifq_stage_cntmax &&
2565 stage->ifqs_len < (ifp->if_mtu - max_protohdr))
2570 error = ifq_enqueue_locked(ifq, m, pa);
2572 if (!ifq_data_ready(ifq)) {
2578 if (!ifq->altq_started) {
2583 if ((stage->ifqs_flags & IFQ_STAGE_FLAG_QUED) == 0)
2584 ifq_stage_insert(head, stage);
2586 ifp->if_obytes += len;
2593 * Hold the interlock of ifnet.if_start
2595 ifq->altq_started = 1;
2601 ifp->if_obytes += len;
2606 if (stage != NULL) {
2607 if (!start && (stage->ifqs_flags & IFQ_STAGE_FLAG_SCHED)) {
2608 KKASSERT(stage->ifqs_flags & IFQ_STAGE_FLAG_QUED);
2610 ifq_stage_remove(head, stage);
2611 if_start_schedule(ifp, 1);
2616 if (stage->ifqs_flags & IFQ_STAGE_FLAG_QUED) {
2617 ifq_stage_remove(head, stage);
2619 stage->ifqs_cnt = 0;
2620 stage->ifqs_len = 0;
2625 logifstart(avoid, ifp);
2629 ifq_try_ifstart(ifq, 0);
2634 ifa_create(int size, int flags)
2639 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small"));
2641 ifa = kmalloc(size, M_IFADDR, flags | M_ZERO);
2645 ifa->ifa_containers = kmalloc(ncpus * sizeof(struct ifaddr_container),
2646 M_IFADDR, M_WAITOK | M_ZERO);
2647 ifa->ifa_ncnt = ncpus;
2648 for (i = 0; i < ncpus; ++i) {
2649 struct ifaddr_container *ifac = &ifa->ifa_containers[i];
2651 ifac->ifa_magic = IFA_CONTAINER_MAGIC;
2653 ifac->ifa_refcnt = 1;
2656 kprintf("alloc ifa %p %d\n", ifa, size);
2662 ifac_free(struct ifaddr_container *ifac, int cpu_id)
2664 struct ifaddr *ifa = ifac->ifa;
2666 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
2667 KKASSERT(ifac->ifa_refcnt == 0);
2668 KASSERT(ifac->ifa_listmask == 0,
2669 ("ifa is still on %#x lists", ifac->ifa_listmask));
2671 ifac->ifa_magic = IFA_CONTAINER_DEAD;
2673 #ifdef IFADDR_DEBUG_VERBOSE
2674 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
2677 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
2678 ("invalid # of ifac, %d", ifa->ifa_ncnt));
2679 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
2681 kprintf("free ifa %p\n", ifa);
2683 kfree(ifa->ifa_containers, M_IFADDR);
2684 kfree(ifa, M_IFADDR);
2689 ifa_iflink_dispatch(netmsg_t nmsg)
2691 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2692 struct ifaddr *ifa = msg->ifa;
2693 struct ifnet *ifp = msg->ifp;
2695 struct ifaddr_container *ifac;
2699 ifac = &ifa->ifa_containers[cpu];
2700 ASSERT_IFAC_VALID(ifac);
2701 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
2702 ("ifaddr is on if_addrheads"));
2704 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
2706 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
2708 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
2712 ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2716 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
2718 struct netmsg_ifaddr msg;
2720 netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2721 0, ifa_iflink_dispatch);
2726 ifa_domsg(&msg.base.lmsg, 0);
2730 ifa_ifunlink_dispatch(netmsg_t nmsg)
2732 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2733 struct ifaddr *ifa = msg->ifa;
2734 struct ifnet *ifp = msg->ifp;
2736 struct ifaddr_container *ifac;
2740 ifac = &ifa->ifa_containers[cpu];
2741 ASSERT_IFAC_VALID(ifac);
2742 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
2743 ("ifaddr is not on if_addrhead"));
2745 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
2746 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
2750 ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2754 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
2756 struct netmsg_ifaddr msg;
2758 netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2759 0, ifa_ifunlink_dispatch);
2763 ifa_domsg(&msg.base.lmsg, 0);
2767 ifa_destroy_dispatch(netmsg_t nmsg)
2769 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2772 ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1);
2776 ifa_destroy(struct ifaddr *ifa)
2778 struct netmsg_ifaddr msg;
2780 netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2781 0, ifa_destroy_dispatch);
2784 ifa_domsg(&msg.base.lmsg, 0);
2788 ifnet_portfn(int cpu)
2790 return &ifnet_threads[cpu].td_msgport;
2794 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu)
2796 KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus);
2798 if (next_cpu < ncpus)
2799 lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg);
2801 lwkt_replymsg(lmsg, 0);
2805 ifnet_domsg(struct lwkt_msg *lmsg, int cpu)
2807 KKASSERT(cpu < ncpus);
2808 return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0);
2812 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu)
2814 KKASSERT(cpu < ncpus);
2815 lwkt_sendmsg(ifnet_portfn(cpu), lmsg);
2819 * Generic netmsg service loop. Some protocols may roll their own but all
2820 * must do the basic command dispatch function call done here.
2823 ifnet_service_loop(void *arg __unused)
2827 while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) {
2828 KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg"));
2829 msg->base.nm_dispatch(msg);
2834 if_start_rollup(void)
2836 struct ifaltq_stage_head *head = &ifq_stage_heads[mycpuid];
2837 struct ifaltq_stage *stage;
2839 while ((stage = TAILQ_FIRST(&head->ifqs_head)) != NULL) {
2840 struct ifaltq *ifq = stage->ifqs_altq;
2843 if (stage->ifqs_flags & IFQ_STAGE_FLAG_SCHED)
2845 ifq_stage_remove(head, stage);
2848 if_start_schedule(ifq->altq_ifp, 1);
2853 if (!ifq->altq_started) {
2855 * Hold the interlock of ifnet.if_start
2857 ifq->altq_started = 1;
2863 ifq_try_ifstart(ifq, 1);
2865 KKASSERT((stage->ifqs_flags &
2866 (IFQ_STAGE_FLAG_QUED | IFQ_STAGE_FLAG_SCHED)) == 0);
2871 ifnetinit(void *dummy __unused)
2875 for (i = 0; i < ncpus; ++i) {
2876 struct thread *thr = &ifnet_threads[i];
2878 lwkt_create(ifnet_service_loop, NULL, NULL,
2879 thr, TDF_NOSTART|TDF_FORCE_SPINPORT,
2881 netmsg_service_port_init(&thr->td_msgport);
2885 for (i = 0; i < ncpus; ++i)
2886 TAILQ_INIT(&ifq_stage_heads[i].ifqs_head);
2887 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART);
2891 ifnet_byindex(unsigned short idx)
2895 return ifindex2ifnet[idx];
2899 ifaddr_byindex(unsigned short idx)
2903 ifp = ifnet_byindex(idx);
2906 return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
2910 if_register_com_alloc(u_char type,
2911 if_com_alloc_t *a, if_com_free_t *f)
2914 KASSERT(if_com_alloc[type] == NULL,
2915 ("if_register_com_alloc: %d already registered", type));
2916 KASSERT(if_com_free[type] == NULL,
2917 ("if_register_com_alloc: %d free already registered", type));
2919 if_com_alloc[type] = a;
2920 if_com_free[type] = f;
2924 if_deregister_com_alloc(u_char type)
2927 KASSERT(if_com_alloc[type] != NULL,
2928 ("if_deregister_com_alloc: %d not registered", type));
2929 KASSERT(if_com_free[type] != NULL,
2930 ("if_deregister_com_alloc: %d free not registered", type));
2931 if_com_alloc[type] = NULL;
2932 if_com_free[type] = NULL;
2936 if_ring_count2(int cnt, int cnt_max)
2940 KASSERT(cnt_max >= 1 && powerof2(cnt_max),
2941 ("invalid ring count max %d", cnt_max));
2950 while ((1 << (shift + 1)) <= cnt)
2954 KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max,
2955 ("calculate cnt %d, ncpus2 %d, cnt max %d",
2956 cnt, ncpus2, cnt_max));
2961 ifq_set_maxlen(struct ifaltq *ifq, int len)
2963 ifq->ifq_maxlen = len + (ncpus * ifq_stage_cntmax);