Merge branch 'vendor/GDB'
[dragonfly.git] / sys / net / if.c
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *      @(#)if.c        8.3 (Berkeley) 1/4/94
34  * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
35  */
36
37 #include "opt_compat.h"
38 #include "opt_inet6.h"
39 #include "opt_inet.h"
40 #include "opt_polling.h"
41 #include "opt_ifpoll.h"
42
43 #include <sys/param.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/priv.h>
49 #include <sys/protosw.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/socketops.h>
53 #include <sys/protosw.h>
54 #include <sys/kernel.h>
55 #include <sys/ktr.h>
56 #include <sys/mutex.h>
57 #include <sys/sockio.h>
58 #include <sys/syslog.h>
59 #include <sys/sysctl.h>
60 #include <sys/domain.h>
61 #include <sys/thread.h>
62 #include <sys/serialize.h>
63 #include <sys/bus.h>
64
65 #include <sys/thread2.h>
66 #include <sys/msgport2.h>
67 #include <sys/mutex2.h>
68
69 #include <net/if.h>
70 #include <net/if_arp.h>
71 #include <net/if_dl.h>
72 #include <net/if_types.h>
73 #include <net/if_var.h>
74 #include <net/ifq_var.h>
75 #include <net/radix.h>
76 #include <net/route.h>
77 #include <net/if_clone.h>
78 #include <net/netisr.h>
79 #include <net/netmsg2.h>
80
81 #include <machine/atomic.h>
82 #include <machine/stdarg.h>
83 #include <machine/smp.h>
84
85 #if defined(INET) || defined(INET6)
86 /*XXX*/
87 #include <netinet/in.h>
88 #include <netinet/in_var.h>
89 #include <netinet/if_ether.h>
90 #ifdef INET6
91 #include <netinet6/in6_var.h>
92 #include <netinet6/in6_ifattach.h>
93 #endif
94 #endif
95
96 #if defined(COMPAT_43)
97 #include <emulation/43bsd/43bsd_socket.h>
98 #endif /* COMPAT_43 */
99
100 struct netmsg_ifaddr {
101         struct netmsg_base base;
102         struct ifaddr   *ifa;
103         struct ifnet    *ifp;
104         int             tail;
105 };
106
107 /*
108  * System initialization
109  */
110 static void     if_attachdomain(void *);
111 static void     if_attachdomain1(struct ifnet *);
112 static int      ifconf(u_long, caddr_t, struct ucred *);
113 static void     ifinit(void *);
114 static void     ifnetinit(void *);
115 static void     if_slowtimo(void *);
116 static void     link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
117 static int      if_rtdel(struct radix_node *, void *);
118
119 #ifdef INET6
120 /*
121  * XXX: declare here to avoid to include many inet6 related files..
122  * should be more generalized?
123  */
124 extern void     nd6_setmtu(struct ifnet *);
125 #endif
126
127 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
128 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
129
130 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL)
131 /* Must be after netisr_init */
132 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL)
133
134 static  if_com_alloc_t *if_com_alloc[256];
135 static  if_com_free_t *if_com_free[256];
136
137 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
138 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
139 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
140
141 int                     ifqmaxlen = IFQ_MAXLEN;
142 struct ifnethead        ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
143
144 struct callout          if_slowtimo_timer;
145
146 int                     if_index = 0;
147 struct ifnet            **ifindex2ifnet = NULL;
148 static struct thread    ifnet_threads[MAXCPU];
149
150 #define IFQ_KTR_STRING          "ifq=%p"
151 #define IFQ_KTR_ARGS    struct ifaltq *ifq
152 #ifndef KTR_IFQ
153 #define KTR_IFQ                 KTR_ALL
154 #endif
155 KTR_INFO_MASTER(ifq);
156 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS);
157 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS);
158 #define logifq(name, arg)       KTR_LOG(ifq_ ## name, arg)
159
160 #define IF_START_KTR_STRING     "ifp=%p"
161 #define IF_START_KTR_ARGS       struct ifnet *ifp
162 #ifndef KTR_IF_START
163 #define KTR_IF_START            KTR_ALL
164 #endif
165 KTR_INFO_MASTER(if_start);
166 KTR_INFO(KTR_IF_START, if_start, run, 0,
167          IF_START_KTR_STRING, IF_START_KTR_ARGS);
168 KTR_INFO(KTR_IF_START, if_start, sched, 1,
169          IF_START_KTR_STRING, IF_START_KTR_ARGS);
170 KTR_INFO(KTR_IF_START, if_start, avoid, 2,
171          IF_START_KTR_STRING, IF_START_KTR_ARGS);
172 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
173          IF_START_KTR_STRING, IF_START_KTR_ARGS);
174 #ifdef SMP
175 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
176          IF_START_KTR_STRING, IF_START_KTR_ARGS);
177 #endif
178 #define logifstart(name, arg)   KTR_LOG(if_start_ ## name, arg)
179
180 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
181
182 /*
183  * Network interface utility routines.
184  *
185  * Routines with ifa_ifwith* names take sockaddr *'s as
186  * parameters.
187  */
188 /* ARGSUSED*/
189 void
190 ifinit(void *dummy)
191 {
192         struct ifnet *ifp;
193
194         callout_init(&if_slowtimo_timer);
195
196         crit_enter();
197         TAILQ_FOREACH(ifp, &ifnet, if_link) {
198                 if (ifp->if_snd.ifq_maxlen == 0) {
199                         if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
200                         ifp->if_snd.ifq_maxlen = ifqmaxlen;
201                 }
202         }
203         crit_exit();
204
205         if_slowtimo(0);
206 }
207
208 static int
209 if_start_cpuid(struct ifnet *ifp)
210 {
211         return ifp->if_cpuid;
212 }
213
214 #ifdef DEVICE_POLLING
215 static int
216 if_start_cpuid_poll(struct ifnet *ifp)
217 {
218         int poll_cpuid = ifp->if_poll_cpuid;
219
220         if (poll_cpuid >= 0)
221                 return poll_cpuid;
222         else
223                 return ifp->if_cpuid;
224 }
225 #endif
226
227 static void
228 if_start_ipifunc(void *arg)
229 {
230         struct ifnet *ifp = arg;
231         struct lwkt_msg *lmsg = &ifp->if_start_nmsg[mycpuid].lmsg;
232
233         crit_enter();
234         if (lmsg->ms_flags & MSGF_DONE)
235                 lwkt_sendmsg(ifnet_portfn(mycpuid), lmsg);
236         crit_exit();
237 }
238
239 /*
240  * Schedule ifnet.if_start on ifnet's CPU
241  */
242 static void
243 if_start_schedule(struct ifnet *ifp)
244 {
245 #ifdef SMP
246         int cpu;
247
248         cpu = ifp->if_start_cpuid(ifp);
249         if (cpu != mycpuid)
250                 lwkt_send_ipiq(globaldata_find(cpu), if_start_ipifunc, ifp);
251         else
252 #endif
253         if_start_ipifunc(ifp);
254 }
255
256 /*
257  * NOTE:
258  * This function will release ifnet.if_start interlock,
259  * if ifnet.if_start does not need to be scheduled
260  */
261 static __inline int
262 if_start_need_schedule(struct ifaltq *ifq, int running)
263 {
264         if (!running || ifq_is_empty(ifq)
265 #ifdef ALTQ
266             || ifq->altq_tbr != NULL
267 #endif
268         ) {
269                 ALTQ_LOCK(ifq);
270                 /*
271                  * ifnet.if_start interlock is released, if:
272                  * 1) Hardware can not take any packets, due to
273                  *    o  interface is marked down
274                  *    o  hardware queue is full (IFF_OACTIVE)
275                  *    Under the second situation, hardware interrupt
276                  *    or polling(4) will call/schedule ifnet.if_start
277                  *    when hardware queue is ready
278                  * 2) There is not packet in the ifnet.if_snd.
279                  *    Further ifq_dispatch or ifq_handoff will call/
280                  *    schedule ifnet.if_start
281                  * 3) TBR is used and it does not allow further
282                  *    dequeueing.
283                  *    TBR callout will call ifnet.if_start
284                  */
285                 if (!running || !ifq_data_ready(ifq)) {
286                         ifq->altq_started = 0;
287                         ALTQ_UNLOCK(ifq);
288                         return 0;
289                 }
290                 ALTQ_UNLOCK(ifq);
291         }
292         return 1;
293 }
294
295 static void
296 if_start_dispatch(netmsg_t msg)
297 {
298         struct lwkt_msg *lmsg = &msg->base.lmsg;
299         struct ifnet *ifp = lmsg->u.ms_resultp;
300         struct ifaltq *ifq = &ifp->if_snd;
301         int running = 0;
302
303         crit_enter();
304         lwkt_replymsg(lmsg, 0); /* reply ASAP */
305         crit_exit();
306
307 #ifdef SMP
308         if (mycpuid != ifp->if_start_cpuid(ifp)) {
309                 /*
310                  * If the ifnet is still up, we need to
311                  * chase its CPU change.
312                  */
313                 if (ifp->if_flags & IFF_UP) {
314                         logifstart(chase_sched, ifp);
315                         if_start_schedule(ifp);
316                         return;
317                 } else {
318                         goto check;
319                 }
320         }
321 #endif
322
323         if (ifp->if_flags & IFF_UP) {
324                 ifnet_serialize_tx(ifp); /* XXX try? */
325                 if ((ifp->if_flags & IFF_OACTIVE) == 0) {
326                         logifstart(run, ifp);
327                         ifp->if_start(ifp);
328                         if ((ifp->if_flags &
329                         (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
330                                 running = 1;
331                 }
332                 ifnet_deserialize_tx(ifp);
333         }
334 #ifdef SMP
335 check:
336 #endif
337         if (if_start_need_schedule(ifq, running)) {
338                 crit_enter();
339                 if (lmsg->ms_flags & MSGF_DONE) { /* XXX necessary? */
340                         logifstart(sched, ifp);
341                         lwkt_sendmsg(ifnet_portfn(mycpuid), lmsg);
342                 }
343                 crit_exit();
344         }
345 }
346
347 /* Device driver ifnet.if_start helper function */
348 void
349 if_devstart(struct ifnet *ifp)
350 {
351         struct ifaltq *ifq = &ifp->if_snd;
352         int running = 0;
353
354         ASSERT_IFNET_SERIALIZED_TX(ifp);
355
356         ALTQ_LOCK(ifq);
357         if (ifq->altq_started || !ifq_data_ready(ifq)) {
358                 logifstart(avoid, ifp);
359                 ALTQ_UNLOCK(ifq);
360                 return;
361         }
362         ifq->altq_started = 1;
363         ALTQ_UNLOCK(ifq);
364
365         logifstart(run, ifp);
366         ifp->if_start(ifp);
367
368         if ((ifp->if_flags & (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
369                 running = 1;
370
371         if (if_start_need_schedule(ifq, running)) {
372                 /*
373                  * More data need to be transmitted, ifnet.if_start is
374                  * scheduled on ifnet's CPU, and we keep going.
375                  * NOTE: ifnet.if_start interlock is not released.
376                  */
377                 logifstart(sched, ifp);
378                 if_start_schedule(ifp);
379         }
380 }
381
382 static void
383 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
384 {
385         lwkt_serialize_enter(ifp->if_serializer);
386 }
387
388 static void
389 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
390 {
391         lwkt_serialize_exit(ifp->if_serializer);
392 }
393
394 static int
395 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
396 {
397         return lwkt_serialize_try(ifp->if_serializer);
398 }
399
400 #ifdef INVARIANTS
401 static void
402 if_default_serialize_assert(struct ifnet *ifp,
403                             enum ifnet_serialize slz __unused,
404                             boolean_t serialized)
405 {
406         if (serialized)
407                 ASSERT_SERIALIZED(ifp->if_serializer);
408         else
409                 ASSERT_NOT_SERIALIZED(ifp->if_serializer);
410 }
411 #endif
412
413 /*
414  * Attach an interface to the list of "active" interfaces.
415  *
416  * The serializer is optional.  If non-NULL access to the interface
417  * may be MPSAFE.
418  */
419 void
420 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
421 {
422         unsigned socksize, ifasize;
423         int namelen, masklen;
424         struct sockaddr_dl *sdl;
425         struct ifaddr *ifa;
426         struct ifaltq *ifq;
427         int i;
428
429         static int if_indexlim = 8;
430
431         if (ifp->if_serialize != NULL) {
432                 KASSERT(ifp->if_deserialize != NULL &&
433                         ifp->if_tryserialize != NULL &&
434                         ifp->if_serialize_assert != NULL,
435                         ("serialize functions are partially setup"));
436
437                 /*
438                  * If the device supplies serialize functions,
439                  * then clear if_serializer to catch any invalid
440                  * usage of this field.
441                  */
442                 KASSERT(serializer == NULL,
443                         ("both serialize functions and default serializer "
444                          "are supplied"));
445                 ifp->if_serializer = NULL;
446         } else {
447                 KASSERT(ifp->if_deserialize == NULL &&
448                         ifp->if_tryserialize == NULL &&
449                         ifp->if_serialize_assert == NULL,
450                         ("serialize functions are partially setup"));
451                 ifp->if_serialize = if_default_serialize;
452                 ifp->if_deserialize = if_default_deserialize;
453                 ifp->if_tryserialize = if_default_tryserialize;
454 #ifdef INVARIANTS
455                 ifp->if_serialize_assert = if_default_serialize_assert;
456 #endif
457
458                 /*
459                  * The serializer can be passed in from the device,
460                  * allowing the same serializer to be used for both
461                  * the interrupt interlock and the device queue.
462                  * If not specified, the netif structure will use an
463                  * embedded serializer.
464                  */
465                 if (serializer == NULL) {
466                         serializer = &ifp->if_default_serializer;
467                         lwkt_serialize_init(serializer);
468                 }
469                 ifp->if_serializer = serializer;
470         }
471
472         ifp->if_start_cpuid = if_start_cpuid;
473         ifp->if_cpuid = 0;
474
475 #ifdef DEVICE_POLLING
476         /* Device is not in polling mode by default */
477         ifp->if_poll_cpuid = -1;
478         if (ifp->if_poll != NULL)
479                 ifp->if_start_cpuid = if_start_cpuid_poll;
480 #endif
481
482         ifp->if_start_nmsg = kmalloc(ncpus * sizeof(*ifp->if_start_nmsg),
483                                      M_LWKTMSG, M_WAITOK);
484         for (i = 0; i < ncpus; ++i) {
485                 netmsg_init(&ifp->if_start_nmsg[i], NULL, &netisr_adone_rport,
486                             0, if_start_dispatch);
487                 ifp->if_start_nmsg[i].lmsg.u.ms_resultp = ifp;
488         }
489
490         mtx_init(&ifp->if_ioctl_mtx);
491         mtx_lock(&ifp->if_ioctl_mtx);
492
493         TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
494         ifp->if_index = ++if_index;
495
496         /*
497          * XXX -
498          * The old code would work if the interface passed a pre-existing
499          * chain of ifaddrs to this code.  We don't trust our callers to
500          * properly initialize the tailq, however, so we no longer allow
501          * this unlikely case.
502          */
503         ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
504                                     M_IFADDR, M_WAITOK | M_ZERO);
505         for (i = 0; i < ncpus; ++i)
506                 TAILQ_INIT(&ifp->if_addrheads[i]);
507
508         TAILQ_INIT(&ifp->if_prefixhead);
509         TAILQ_INIT(&ifp->if_multiaddrs);
510         TAILQ_INIT(&ifp->if_groups);
511         getmicrotime(&ifp->if_lastchange);
512         if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
513                 unsigned int n;
514                 struct ifnet **q;
515
516                 if_indexlim <<= 1;
517
518                 /* grow ifindex2ifnet */
519                 n = if_indexlim * sizeof(*q);
520                 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
521                 if (ifindex2ifnet) {
522                         bcopy(ifindex2ifnet, q, n/2);
523                         kfree(ifindex2ifnet, M_IFADDR);
524                 }
525                 ifindex2ifnet = q;
526         }
527
528         ifindex2ifnet[if_index] = ifp;
529
530         /*
531          * create a Link Level name for this device
532          */
533         namelen = strlen(ifp->if_xname);
534         masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
535         socksize = masklen + ifp->if_addrlen;
536 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
537         if (socksize < sizeof(*sdl))
538                 socksize = sizeof(*sdl);
539         socksize = ROUNDUP(socksize);
540 #undef ROUNDUP
541         ifasize = sizeof(struct ifaddr) + 2 * socksize;
542         ifa = ifa_create(ifasize, M_WAITOK);
543         sdl = (struct sockaddr_dl *)(ifa + 1);
544         sdl->sdl_len = socksize;
545         sdl->sdl_family = AF_LINK;
546         bcopy(ifp->if_xname, sdl->sdl_data, namelen);
547         sdl->sdl_nlen = namelen;
548         sdl->sdl_index = ifp->if_index;
549         sdl->sdl_type = ifp->if_type;
550         ifp->if_lladdr = ifa;
551         ifa->ifa_ifp = ifp;
552         ifa->ifa_rtrequest = link_rtrequest;
553         ifa->ifa_addr = (struct sockaddr *)sdl;
554         sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
555         ifa->ifa_netmask = (struct sockaddr *)sdl;
556         sdl->sdl_len = masklen;
557         while (namelen != 0)
558                 sdl->sdl_data[--namelen] = 0xff;
559         ifa_iflink(ifa, ifp, 0 /* Insert head */);
560
561         EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
562         devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
563
564         ifq = &ifp->if_snd;
565         ifq->altq_type = 0;
566         ifq->altq_disc = NULL;
567         ifq->altq_flags &= ALTQF_CANTCHANGE;
568         ifq->altq_tbr = NULL;
569         ifq->altq_ifp = ifp;
570         ifq->altq_started = 0;
571         ifq->altq_prepended = NULL;
572         ALTQ_LOCK_INIT(ifq);
573         ifq_set_classic(ifq);
574
575         if (!SLIST_EMPTY(&domains))
576                 if_attachdomain1(ifp);
577
578         /* Announce the interface. */
579         rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
580
581         mtx_unlock(&ifp->if_ioctl_mtx);
582 }
583
584 static void
585 if_attachdomain(void *dummy)
586 {
587         struct ifnet *ifp;
588
589         crit_enter();
590         TAILQ_FOREACH(ifp, &ifnet, if_list)
591                 if_attachdomain1(ifp);
592         crit_exit();
593 }
594 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
595         if_attachdomain, NULL);
596
597 static void
598 if_attachdomain1(struct ifnet *ifp)
599 {
600         struct domain *dp;
601
602         crit_enter();
603
604         /* address family dependent data region */
605         bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
606         SLIST_FOREACH(dp, &domains, dom_next)
607                 if (dp->dom_ifattach)
608                         ifp->if_afdata[dp->dom_family] =
609                                 (*dp->dom_ifattach)(ifp);
610         crit_exit();
611 }
612
613 /*
614  * Purge all addresses whose type is _not_ AF_LINK
615  */
616 void
617 if_purgeaddrs_nolink(struct ifnet *ifp)
618 {
619         struct ifaddr_container *ifac, *next;
620
621         TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
622                               ifa_link, next) {
623                 struct ifaddr *ifa = ifac->ifa;
624
625                 /* Leave link ifaddr as it is */
626                 if (ifa->ifa_addr->sa_family == AF_LINK)
627                         continue;
628 #ifdef INET
629                 /* XXX: Ugly!! ad hoc just for INET */
630                 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
631                         struct ifaliasreq ifr;
632 #ifdef IFADDR_DEBUG_VERBOSE
633                         int i;
634
635                         kprintf("purge in4 addr %p: ", ifa);
636                         for (i = 0; i < ncpus; ++i)
637                                 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
638                         kprintf("\n");
639 #endif
640
641                         bzero(&ifr, sizeof ifr);
642                         ifr.ifra_addr = *ifa->ifa_addr;
643                         if (ifa->ifa_dstaddr)
644                                 ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
645                         if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
646                                        NULL) == 0)
647                                 continue;
648                 }
649 #endif /* INET */
650 #ifdef INET6
651                 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
652 #ifdef IFADDR_DEBUG_VERBOSE
653                         int i;
654
655                         kprintf("purge in6 addr %p: ", ifa);
656                         for (i = 0; i < ncpus; ++i)
657                                 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
658                         kprintf("\n");
659 #endif
660
661                         in6_purgeaddr(ifa);
662                         /* ifp_addrhead is already updated */
663                         continue;
664                 }
665 #endif /* INET6 */
666                 ifa_ifunlink(ifa, ifp);
667                 ifa_destroy(ifa);
668         }
669 }
670
671 /*
672  * Detach an interface, removing it from the
673  * list of "active" interfaces.
674  */
675 void
676 if_detach(struct ifnet *ifp)
677 {
678         struct radix_node_head  *rnh;
679         int i;
680         int cpu, origcpu;
681         struct domain *dp;
682
683         EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
684
685         /*
686          * Remove routes and flush queues.
687          */
688         crit_enter();
689 #ifdef DEVICE_POLLING
690         if (ifp->if_flags & IFF_POLLING)
691                 ether_poll_deregister(ifp);
692 #endif
693 #ifdef IFPOLL_ENABLE
694         if (ifp->if_flags & IFF_NPOLLING)
695                 ifpoll_deregister(ifp);
696 #endif
697         if_down(ifp);
698
699 #ifdef ALTQ
700         if (ifq_is_enabled(&ifp->if_snd))
701                 altq_disable(&ifp->if_snd);
702         if (ifq_is_attached(&ifp->if_snd))
703                 altq_detach(&ifp->if_snd);
704 #endif
705
706         /*
707          * Clean up all addresses.
708          */
709         ifp->if_lladdr = NULL;
710
711         if_purgeaddrs_nolink(ifp);
712         if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
713                 struct ifaddr *ifa;
714
715                 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
716                 KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
717                         ("non-link ifaddr is left on if_addrheads"));
718
719                 ifa_ifunlink(ifa, ifp);
720                 ifa_destroy(ifa);
721                 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
722                         ("there are still ifaddrs left on if_addrheads"));
723         }
724
725 #ifdef INET
726         /*
727          * Remove all IPv4 kernel structures related to ifp.
728          */
729         in_ifdetach(ifp);
730 #endif
731
732 #ifdef INET6
733         /*
734          * Remove all IPv6 kernel structs related to ifp.  This should be done
735          * before removing routing entries below, since IPv6 interface direct
736          * routes are expected to be removed by the IPv6-specific kernel API.
737          * Otherwise, the kernel will detect some inconsistency and bark it.
738          */
739         in6_ifdetach(ifp);
740 #endif
741
742         /*
743          * Delete all remaining routes using this interface
744          * Unfortuneatly the only way to do this is to slog through
745          * the entire routing table looking for routes which point
746          * to this interface...oh well...
747          */
748         origcpu = mycpuid;
749         for (cpu = 0; cpu < ncpus2; cpu++) {
750                 lwkt_migratecpu(cpu);
751                 for (i = 1; i <= AF_MAX; i++) {
752                         if ((rnh = rt_tables[cpu][i]) == NULL)
753                                 continue;
754                         rnh->rnh_walktree(rnh, if_rtdel, ifp);
755                 }
756         }
757         lwkt_migratecpu(origcpu);
758
759         /* Announce that the interface is gone. */
760         rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
761         devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
762
763         SLIST_FOREACH(dp, &domains, dom_next)
764                 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
765                         (*dp->dom_ifdetach)(ifp,
766                                 ifp->if_afdata[dp->dom_family]);
767
768         /*
769          * Remove interface from ifindex2ifp[] and maybe decrement if_index.
770          */
771         ifindex2ifnet[ifp->if_index] = NULL;
772         while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
773                 if_index--;
774
775         TAILQ_REMOVE(&ifnet, ifp, if_link);
776         kfree(ifp->if_addrheads, M_IFADDR);
777         kfree(ifp->if_start_nmsg, M_LWKTMSG);
778         crit_exit();
779 }
780
781 /*
782  * Create interface group without members
783  */
784 struct ifg_group *
785 if_creategroup(const char *groupname)
786 {
787         struct ifg_group        *ifg = NULL;
788
789         if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group),
790             M_TEMP, M_NOWAIT)) == NULL)
791                 return (NULL);
792
793         strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
794         ifg->ifg_refcnt = 0;
795         ifg->ifg_carp_demoted = 0;
796         TAILQ_INIT(&ifg->ifg_members);
797 #if NPF > 0
798         pfi_attach_ifgroup(ifg);
799 #endif
800         TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
801
802         return (ifg);
803 }
804
805 /*
806  * Add a group to an interface
807  */
808 int
809 if_addgroup(struct ifnet *ifp, const char *groupname)
810 {
811         struct ifg_list         *ifgl;
812         struct ifg_group        *ifg = NULL;
813         struct ifg_member       *ifgm;
814
815         if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
816             groupname[strlen(groupname) - 1] <= '9')
817                 return (EINVAL);
818
819         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
820                 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
821                         return (EEXIST);
822
823         if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
824                 return (ENOMEM);
825
826         if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
827                 kfree(ifgl, M_TEMP);
828                 return (ENOMEM);
829         }
830
831         TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
832                 if (!strcmp(ifg->ifg_group, groupname))
833                         break;
834
835         if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
836                 kfree(ifgl, M_TEMP);
837                 kfree(ifgm, M_TEMP);
838                 return (ENOMEM);
839         }
840
841         ifg->ifg_refcnt++;
842         ifgl->ifgl_group = ifg;
843         ifgm->ifgm_ifp = ifp;
844
845         TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
846         TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
847
848 #if NPF > 0
849         pfi_group_change(groupname);
850 #endif
851
852         return (0);
853 }
854
855 /*
856  * Remove a group from an interface
857  */
858 int
859 if_delgroup(struct ifnet *ifp, const char *groupname)
860 {
861         struct ifg_list         *ifgl;
862         struct ifg_member       *ifgm;
863
864         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
865                 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
866                         break;
867         if (ifgl == NULL)
868                 return (ENOENT);
869
870         TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
871
872         TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
873                 if (ifgm->ifgm_ifp == ifp)
874                         break;
875
876         if (ifgm != NULL) {
877                 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
878                 kfree(ifgm, M_TEMP);
879         }
880
881         if (--ifgl->ifgl_group->ifg_refcnt == 0) {
882                 TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
883 #if NPF > 0
884                 pfi_detach_ifgroup(ifgl->ifgl_group);
885 #endif
886                 kfree(ifgl->ifgl_group, M_TEMP);
887         }
888
889         kfree(ifgl, M_TEMP);
890
891 #if NPF > 0
892         pfi_group_change(groupname);
893 #endif
894
895         return (0);
896 }
897
898 /*
899  * Stores all groups from an interface in memory pointed
900  * to by data
901  */
902 int
903 if_getgroup(caddr_t data, struct ifnet *ifp)
904 {
905         int                      len, error;
906         struct ifg_list         *ifgl;
907         struct ifg_req           ifgrq, *ifgp;
908         struct ifgroupreq       *ifgr = (struct ifgroupreq *)data;
909
910         if (ifgr->ifgr_len == 0) {
911                 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
912                         ifgr->ifgr_len += sizeof(struct ifg_req);
913                 return (0);
914         }
915
916         len = ifgr->ifgr_len;
917         ifgp = ifgr->ifgr_groups;
918         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
919                 if (len < sizeof(ifgrq))
920                         return (EINVAL);
921                 bzero(&ifgrq, sizeof ifgrq);
922                 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
923                     sizeof(ifgrq.ifgrq_group));
924                 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
925                     sizeof(struct ifg_req))))
926                         return (error);
927                 len -= sizeof(ifgrq);
928                 ifgp++;
929         }
930
931         return (0);
932 }
933
934 /*
935  * Stores all members of a group in memory pointed to by data
936  */
937 int
938 if_getgroupmembers(caddr_t data)
939 {
940         struct ifgroupreq       *ifgr = (struct ifgroupreq *)data;
941         struct ifg_group        *ifg;
942         struct ifg_member       *ifgm;
943         struct ifg_req           ifgrq, *ifgp;
944         int                      len, error;
945
946         TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
947                 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
948                         break;
949         if (ifg == NULL)
950                 return (ENOENT);
951
952         if (ifgr->ifgr_len == 0) {
953                 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
954                         ifgr->ifgr_len += sizeof(ifgrq);
955                 return (0);
956         }
957
958         len = ifgr->ifgr_len;
959         ifgp = ifgr->ifgr_groups;
960         TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
961                 if (len < sizeof(ifgrq))
962                         return (EINVAL);
963                 bzero(&ifgrq, sizeof ifgrq);
964                 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
965                     sizeof(ifgrq.ifgrq_member));
966                 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
967                     sizeof(struct ifg_req))))
968                         return (error);
969                 len -= sizeof(ifgrq);
970                 ifgp++;
971         }
972
973         return (0);
974 }
975
976 /*
977  * Delete Routes for a Network Interface
978  *
979  * Called for each routing entry via the rnh->rnh_walktree() call above
980  * to delete all route entries referencing a detaching network interface.
981  *
982  * Arguments:
983  *      rn      pointer to node in the routing table
984  *      arg     argument passed to rnh->rnh_walktree() - detaching interface
985  *
986  * Returns:
987  *      0       successful
988  *      errno   failed - reason indicated
989  *
990  */
991 static int
992 if_rtdel(struct radix_node *rn, void *arg)
993 {
994         struct rtentry  *rt = (struct rtentry *)rn;
995         struct ifnet    *ifp = arg;
996         int             err;
997
998         if (rt->rt_ifp == ifp) {
999
1000                 /*
1001                  * Protect (sorta) against walktree recursion problems
1002                  * with cloned routes
1003                  */
1004                 if (!(rt->rt_flags & RTF_UP))
1005                         return (0);
1006
1007                 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1008                                 rt_mask(rt), rt->rt_flags,
1009                                 NULL);
1010                 if (err) {
1011                         log(LOG_WARNING, "if_rtdel: error %d\n", err);
1012                 }
1013         }
1014
1015         return (0);
1016 }
1017
1018 /*
1019  * Locate an interface based on a complete address.
1020  */
1021 struct ifaddr *
1022 ifa_ifwithaddr(struct sockaddr *addr)
1023 {
1024         struct ifnet *ifp;
1025
1026         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1027                 struct ifaddr_container *ifac;
1028
1029                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1030                         struct ifaddr *ifa = ifac->ifa;
1031
1032                         if (ifa->ifa_addr->sa_family != addr->sa_family)
1033                                 continue;
1034                         if (sa_equal(addr, ifa->ifa_addr))
1035                                 return (ifa);
1036                         if ((ifp->if_flags & IFF_BROADCAST) &&
1037                             ifa->ifa_broadaddr &&
1038                             /* IPv6 doesn't have broadcast */
1039                             ifa->ifa_broadaddr->sa_len != 0 &&
1040                             sa_equal(ifa->ifa_broadaddr, addr))
1041                                 return (ifa);
1042                 }
1043         }
1044         return (NULL);
1045 }
1046 /*
1047  * Locate the point to point interface with a given destination address.
1048  */
1049 struct ifaddr *
1050 ifa_ifwithdstaddr(struct sockaddr *addr)
1051 {
1052         struct ifnet *ifp;
1053
1054         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1055                 struct ifaddr_container *ifac;
1056
1057                 if (!(ifp->if_flags & IFF_POINTOPOINT))
1058                         continue;
1059
1060                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1061                         struct ifaddr *ifa = ifac->ifa;
1062
1063                         if (ifa->ifa_addr->sa_family != addr->sa_family)
1064                                 continue;
1065                         if (ifa->ifa_dstaddr &&
1066                             sa_equal(addr, ifa->ifa_dstaddr))
1067                                 return (ifa);
1068                 }
1069         }
1070         return (NULL);
1071 }
1072
1073 /*
1074  * Find an interface on a specific network.  If many, choice
1075  * is most specific found.
1076  */
1077 struct ifaddr *
1078 ifa_ifwithnet(struct sockaddr *addr)
1079 {
1080         struct ifnet *ifp;
1081         struct ifaddr *ifa_maybe = NULL;
1082         u_int af = addr->sa_family;
1083         char *addr_data = addr->sa_data, *cplim;
1084
1085         /*
1086          * AF_LINK addresses can be looked up directly by their index number,
1087          * so do that if we can.
1088          */
1089         if (af == AF_LINK) {
1090                 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
1091
1092                 if (sdl->sdl_index && sdl->sdl_index <= if_index)
1093                         return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
1094         }
1095
1096         /*
1097          * Scan though each interface, looking for ones that have
1098          * addresses in this address family.
1099          */
1100         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1101                 struct ifaddr_container *ifac;
1102
1103                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1104                         struct ifaddr *ifa = ifac->ifa;
1105                         char *cp, *cp2, *cp3;
1106
1107                         if (ifa->ifa_addr->sa_family != af)
1108 next:                           continue;
1109                         if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1110                                 /*
1111                                  * This is a bit broken as it doesn't
1112                                  * take into account that the remote end may
1113                                  * be a single node in the network we are
1114                                  * looking for.
1115                                  * The trouble is that we don't know the
1116                                  * netmask for the remote end.
1117                                  */
1118                                 if (ifa->ifa_dstaddr != NULL &&
1119                                     sa_equal(addr, ifa->ifa_dstaddr))
1120                                         return (ifa);
1121                         } else {
1122                                 /*
1123                                  * if we have a special address handler,
1124                                  * then use it instead of the generic one.
1125                                  */
1126                                 if (ifa->ifa_claim_addr) {
1127                                         if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1128                                                 return (ifa);
1129                                         } else {
1130                                                 continue;
1131                                         }
1132                                 }
1133
1134                                 /*
1135                                  * Scan all the bits in the ifa's address.
1136                                  * If a bit dissagrees with what we are
1137                                  * looking for, mask it with the netmask
1138                                  * to see if it really matters.
1139                                  * (A byte at a time)
1140                                  */
1141                                 if (ifa->ifa_netmask == 0)
1142                                         continue;
1143                                 cp = addr_data;
1144                                 cp2 = ifa->ifa_addr->sa_data;
1145                                 cp3 = ifa->ifa_netmask->sa_data;
1146                                 cplim = ifa->ifa_netmask->sa_len +
1147                                         (char *)ifa->ifa_netmask;
1148                                 while (cp3 < cplim)
1149                                         if ((*cp++ ^ *cp2++) & *cp3++)
1150                                                 goto next; /* next address! */
1151                                 /*
1152                                  * If the netmask of what we just found
1153                                  * is more specific than what we had before
1154                                  * (if we had one) then remember the new one
1155                                  * before continuing to search
1156                                  * for an even better one.
1157                                  */
1158                                 if (ifa_maybe == NULL ||
1159                                     rn_refines((char *)ifa->ifa_netmask,
1160                                                (char *)ifa_maybe->ifa_netmask))
1161                                         ifa_maybe = ifa;
1162                         }
1163                 }
1164         }
1165         return (ifa_maybe);
1166 }
1167
1168 /*
1169  * Find an interface address specific to an interface best matching
1170  * a given address.
1171  */
1172 struct ifaddr *
1173 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1174 {
1175         struct ifaddr_container *ifac;
1176         char *cp, *cp2, *cp3;
1177         char *cplim;
1178         struct ifaddr *ifa_maybe = NULL;
1179         u_int af = addr->sa_family;
1180
1181         if (af >= AF_MAX)
1182                 return (0);
1183         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1184                 struct ifaddr *ifa = ifac->ifa;
1185
1186                 if (ifa->ifa_addr->sa_family != af)
1187                         continue;
1188                 if (ifa_maybe == NULL)
1189                         ifa_maybe = ifa;
1190                 if (ifa->ifa_netmask == NULL) {
1191                         if (sa_equal(addr, ifa->ifa_addr) ||
1192                             (ifa->ifa_dstaddr != NULL &&
1193                              sa_equal(addr, ifa->ifa_dstaddr)))
1194                                 return (ifa);
1195                         continue;
1196                 }
1197                 if (ifp->if_flags & IFF_POINTOPOINT) {
1198                         if (sa_equal(addr, ifa->ifa_dstaddr))
1199                                 return (ifa);
1200                 } else {
1201                         cp = addr->sa_data;
1202                         cp2 = ifa->ifa_addr->sa_data;
1203                         cp3 = ifa->ifa_netmask->sa_data;
1204                         cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1205                         for (; cp3 < cplim; cp3++)
1206                                 if ((*cp++ ^ *cp2++) & *cp3)
1207                                         break;
1208                         if (cp3 == cplim)
1209                                 return (ifa);
1210                 }
1211         }
1212         return (ifa_maybe);
1213 }
1214
1215 /*
1216  * Default action when installing a route with a Link Level gateway.
1217  * Lookup an appropriate real ifa to point to.
1218  * This should be moved to /sys/net/link.c eventually.
1219  */
1220 static void
1221 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1222 {
1223         struct ifaddr *ifa;
1224         struct sockaddr *dst;
1225         struct ifnet *ifp;
1226
1227         if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
1228             (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
1229                 return;
1230         ifa = ifaof_ifpforaddr(dst, ifp);
1231         if (ifa != NULL) {
1232                 IFAFREE(rt->rt_ifa);
1233                 IFAREF(ifa);
1234                 rt->rt_ifa = ifa;
1235                 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1236                         ifa->ifa_rtrequest(cmd, rt, info);
1237         }
1238 }
1239
1240 /*
1241  * Mark an interface down and notify protocols of
1242  * the transition.
1243  * NOTE: must be called at splnet or eqivalent.
1244  */
1245 void
1246 if_unroute(struct ifnet *ifp, int flag, int fam)
1247 {
1248         struct ifaddr_container *ifac;
1249
1250         ifp->if_flags &= ~flag;
1251         getmicrotime(&ifp->if_lastchange);
1252         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1253                 struct ifaddr *ifa = ifac->ifa;
1254
1255                 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1256                         kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1257         }
1258         ifq_purge(&ifp->if_snd);
1259         rt_ifmsg(ifp);
1260 }
1261
1262 /*
1263  * Mark an interface up and notify protocols of
1264  * the transition.
1265  * NOTE: must be called at splnet or eqivalent.
1266  */
1267 void
1268 if_route(struct ifnet *ifp, int flag, int fam)
1269 {
1270         struct ifaddr_container *ifac;
1271
1272         ifq_purge(&ifp->if_snd);
1273         ifp->if_flags |= flag;
1274         getmicrotime(&ifp->if_lastchange);
1275         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1276                 struct ifaddr *ifa = ifac->ifa;
1277
1278                 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1279                         kpfctlinput(PRC_IFUP, ifa->ifa_addr);
1280         }
1281         rt_ifmsg(ifp);
1282 #ifdef INET6
1283         in6_if_up(ifp);
1284 #endif
1285 }
1286
1287 /*
1288  * Mark an interface down and notify protocols of the transition.  An
1289  * interface going down is also considered to be a synchronizing event.
1290  * We must ensure that all packet processing related to the interface
1291  * has completed before we return so e.g. the caller can free the ifnet
1292  * structure that the mbufs may be referencing.
1293  *
1294  * NOTE: must be called at splnet or eqivalent.
1295  */
1296 void
1297 if_down(struct ifnet *ifp)
1298 {
1299         if_unroute(ifp, IFF_UP, AF_UNSPEC);
1300         netmsg_service_sync();
1301 }
1302
1303 /*
1304  * Mark an interface up and notify protocols of
1305  * the transition.
1306  * NOTE: must be called at splnet or eqivalent.
1307  */
1308 void
1309 if_up(struct ifnet *ifp)
1310 {
1311         if_route(ifp, IFF_UP, AF_UNSPEC);
1312 }
1313
1314 /*
1315  * Process a link state change.
1316  * NOTE: must be called at splsoftnet or equivalent.
1317  */
1318 void
1319 if_link_state_change(struct ifnet *ifp)
1320 {
1321         int link_state = ifp->if_link_state;
1322
1323         rt_ifmsg(ifp);
1324         devctl_notify("IFNET", ifp->if_xname,
1325             (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1326 }
1327
1328 /*
1329  * Handle interface watchdog timer routines.  Called
1330  * from softclock, we decrement timers (if set) and
1331  * call the appropriate interface routine on expiration.
1332  */
1333 static void
1334 if_slowtimo(void *arg)
1335 {
1336         struct ifnet *ifp;
1337
1338         crit_enter();
1339
1340         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1341                 if (ifp->if_timer == 0 || --ifp->if_timer)
1342                         continue;
1343                 if (ifp->if_watchdog) {
1344                         if (ifnet_tryserialize_all(ifp)) {
1345                                 (*ifp->if_watchdog)(ifp);
1346                                 ifnet_deserialize_all(ifp);
1347                         } else {
1348                                 /* try again next timeout */
1349                                 ++ifp->if_timer;
1350                         }
1351                 }
1352         }
1353
1354         crit_exit();
1355
1356         callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
1357 }
1358
1359 /*
1360  * Map interface name to
1361  * interface structure pointer.
1362  */
1363 struct ifnet *
1364 ifunit(const char *name)
1365 {
1366         struct ifnet *ifp;
1367
1368         /*
1369          * Search all the interfaces for this name/number
1370          */
1371
1372         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1373                 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
1374                         break;
1375         }
1376         return (ifp);
1377 }
1378
1379
1380 /*
1381  * Map interface name in a sockaddr_dl to
1382  * interface structure pointer.
1383  */
1384 struct ifnet *
1385 if_withname(struct sockaddr *sa)
1386 {
1387         char ifname[IFNAMSIZ+1];
1388         struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1389
1390         if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1391              (sdl->sdl_nlen > IFNAMSIZ) )
1392                 return NULL;
1393
1394         /*
1395          * ifunit wants a null-terminated name.  It may not be null-terminated
1396          * in the sockaddr.  We don't want to change the caller's sockaddr,
1397          * and there might not be room to put the trailing null anyway, so we
1398          * make a local copy that we know we can null terminate safely.
1399          */
1400
1401         bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1402         ifname[sdl->sdl_nlen] = '\0';
1403         return ifunit(ifname);
1404 }
1405
1406
1407 /*
1408  * Interface ioctls.
1409  */
1410 int
1411 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
1412 {
1413         struct ifnet *ifp;
1414         struct ifreq *ifr;
1415         struct ifstat *ifs;
1416         int error;
1417         short oif_flags;
1418         int new_flags;
1419 #ifdef COMPAT_43
1420         int ocmd;
1421 #endif
1422         size_t namelen, onamelen;
1423         char new_name[IFNAMSIZ];
1424         struct ifaddr *ifa;
1425         struct sockaddr_dl *sdl;
1426
1427         switch (cmd) {
1428         case SIOCGIFCONF:
1429         case OSIOCGIFCONF:
1430                 return (ifconf(cmd, data, cred));
1431         default:
1432                 break;
1433         }
1434
1435         ifr = (struct ifreq *)data;
1436
1437         switch (cmd) {
1438         case SIOCIFCREATE:
1439         case SIOCIFCREATE2:
1440                 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1441                         return (error);
1442                 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
1443                         cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
1444         case SIOCIFDESTROY:
1445                 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1446                         return (error);
1447                 return (if_clone_destroy(ifr->ifr_name));
1448         case SIOCIFGCLONERS:
1449                 return (if_clone_list((struct if_clonereq *)data));
1450         default:
1451                 break;
1452         }
1453
1454         /*
1455          * Nominal ioctl through interface, lookup the ifp and obtain a
1456          * lock to serialize the ifconfig ioctl operation.
1457          */
1458         ifp = ifunit(ifr->ifr_name);
1459         if (ifp == NULL)
1460                 return (ENXIO);
1461         error = 0;
1462         mtx_lock(&ifp->if_ioctl_mtx);
1463
1464         switch (cmd) {
1465         case SIOCGIFINDEX:
1466                 ifr->ifr_index = ifp->if_index;
1467                 break;
1468
1469         case SIOCGIFFLAGS:
1470                 ifr->ifr_flags = ifp->if_flags;
1471                 ifr->ifr_flagshigh = ifp->if_flags >> 16;
1472                 break;
1473
1474         case SIOCGIFCAP:
1475                 ifr->ifr_reqcap = ifp->if_capabilities;
1476                 ifr->ifr_curcap = ifp->if_capenable;
1477                 break;
1478
1479         case SIOCGIFMETRIC:
1480                 ifr->ifr_metric = ifp->if_metric;
1481                 break;
1482
1483         case SIOCGIFMTU:
1484                 ifr->ifr_mtu = ifp->if_mtu;
1485                 break;
1486
1487         case SIOCGIFDATA:
1488                 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
1489                                 sizeof(ifp->if_data));
1490                 break;
1491
1492         case SIOCGIFPHYS:
1493                 ifr->ifr_phys = ifp->if_physical;
1494                 break;
1495
1496         case SIOCGIFPOLLCPU:
1497 #ifdef DEVICE_POLLING
1498                 ifr->ifr_pollcpu = ifp->if_poll_cpuid;
1499 #else
1500                 ifr->ifr_pollcpu = -1;
1501 #endif
1502                 break;
1503
1504         case SIOCSIFPOLLCPU:
1505 #ifdef DEVICE_POLLING
1506                 if ((ifp->if_flags & IFF_POLLING) == 0)
1507                         ether_pollcpu_register(ifp, ifr->ifr_pollcpu);
1508 #endif
1509                 break;
1510
1511         case SIOCSIFFLAGS:
1512                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1513                 if (error)
1514                         break;
1515                 new_flags = (ifr->ifr_flags & 0xffff) |
1516                     (ifr->ifr_flagshigh << 16);
1517                 if (ifp->if_flags & IFF_SMART) {
1518                         /* Smart drivers twiddle their own routes */
1519                 } else if (ifp->if_flags & IFF_UP &&
1520                     (new_flags & IFF_UP) == 0) {
1521                         crit_enter();
1522                         if_down(ifp);
1523                         crit_exit();
1524                 } else if (new_flags & IFF_UP &&
1525                     (ifp->if_flags & IFF_UP) == 0) {
1526                         crit_enter();
1527                         if_up(ifp);
1528                         crit_exit();
1529                 }
1530
1531 #ifdef DEVICE_POLLING
1532                 if ((new_flags ^ ifp->if_flags) & IFF_POLLING) {
1533                         if (new_flags & IFF_POLLING) {
1534                                 ether_poll_register(ifp);
1535                         } else {
1536                                 ether_poll_deregister(ifp);
1537                         }
1538                 }
1539 #endif
1540 #ifdef IFPOLL_ENABLE
1541                 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
1542                         if (new_flags & IFF_NPOLLING)
1543                                 ifpoll_register(ifp);
1544                         else
1545                                 ifpoll_deregister(ifp);
1546                 }
1547 #endif
1548
1549                 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1550                         (new_flags &~ IFF_CANTCHANGE);
1551                 if (new_flags & IFF_PPROMISC) {
1552                         /* Permanently promiscuous mode requested */
1553                         ifp->if_flags |= IFF_PROMISC;
1554                 } else if (ifp->if_pcount == 0) {
1555                         ifp->if_flags &= ~IFF_PROMISC;
1556                 }
1557                 if (ifp->if_ioctl) {
1558                         ifnet_serialize_all(ifp);
1559                         ifp->if_ioctl(ifp, cmd, data, cred);
1560                         ifnet_deserialize_all(ifp);
1561                 }
1562                 getmicrotime(&ifp->if_lastchange);
1563                 break;
1564
1565         case SIOCSIFCAP:
1566                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1567                 if (error)
1568                         break;
1569                 if (ifr->ifr_reqcap & ~ifp->if_capabilities) {
1570                         error = EINVAL;
1571                         break;
1572                 }
1573                 ifnet_serialize_all(ifp);
1574                 ifp->if_ioctl(ifp, cmd, data, cred);
1575                 ifnet_deserialize_all(ifp);
1576                 break;
1577
1578         case SIOCSIFNAME:
1579                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1580                 if (error)
1581                         break;
1582                 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1583                 if (error)
1584                         break;
1585                 if (new_name[0] == '\0') {
1586                         error = EINVAL;
1587                         break;
1588                 }
1589                 if (ifunit(new_name) != NULL) {
1590                         error = EEXIST;
1591                         break;
1592                 }
1593
1594                 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
1595
1596                 /* Announce the departure of the interface. */
1597                 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1598
1599                 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1600                 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
1601                 /* XXX IFA_LOCK(ifa); */
1602                 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1603                 namelen = strlen(new_name);
1604                 onamelen = sdl->sdl_nlen;
1605                 /*
1606                  * Move the address if needed.  This is safe because we
1607                  * allocate space for a name of length IFNAMSIZ when we
1608                  * create this in if_attach().
1609                  */
1610                 if (namelen != onamelen) {
1611                         bcopy(sdl->sdl_data + onamelen,
1612                             sdl->sdl_data + namelen, sdl->sdl_alen);
1613                 }
1614                 bcopy(new_name, sdl->sdl_data, namelen);
1615                 sdl->sdl_nlen = namelen;
1616                 sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1617                 bzero(sdl->sdl_data, onamelen);
1618                 while (namelen != 0)
1619                         sdl->sdl_data[--namelen] = 0xff;
1620                 /* XXX IFA_UNLOCK(ifa) */
1621
1622                 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
1623
1624                 /* Announce the return of the interface. */
1625                 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1626                 break;
1627
1628         case SIOCSIFMETRIC:
1629                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1630                 if (error)
1631                         break;
1632                 ifp->if_metric = ifr->ifr_metric;
1633                 getmicrotime(&ifp->if_lastchange);
1634                 break;
1635
1636         case SIOCSIFPHYS:
1637                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1638                 if (error)
1639                         break;
1640                 if (ifp->if_ioctl == NULL) {
1641                         error = EOPNOTSUPP;
1642                         break;
1643                 }
1644                 ifnet_serialize_all(ifp);
1645                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1646                 ifnet_deserialize_all(ifp);
1647                 if (error == 0)
1648                         getmicrotime(&ifp->if_lastchange);
1649                 break;
1650
1651         case SIOCSIFMTU:
1652         {
1653                 u_long oldmtu = ifp->if_mtu;
1654
1655                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1656                 if (error)
1657                         break;
1658                 if (ifp->if_ioctl == NULL) {
1659                         error = EOPNOTSUPP;
1660                         break;
1661                 }
1662                 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
1663                         error = EINVAL;
1664                         break;
1665                 }
1666                 ifnet_serialize_all(ifp);
1667                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1668                 ifnet_deserialize_all(ifp);
1669                 if (error == 0) {
1670                         getmicrotime(&ifp->if_lastchange);
1671                         rt_ifmsg(ifp);
1672                 }
1673                 /*
1674                  * If the link MTU changed, do network layer specific procedure.
1675                  */
1676                 if (ifp->if_mtu != oldmtu) {
1677 #ifdef INET6
1678                         nd6_setmtu(ifp);
1679 #endif
1680                 }
1681                 break;
1682         }
1683
1684         case SIOCADDMULTI:
1685         case SIOCDELMULTI:
1686                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1687                 if (error)
1688                         break;
1689
1690                 /* Don't allow group membership on non-multicast interfaces. */
1691                 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1692                         error = EOPNOTSUPP;
1693                         break;
1694                 }
1695
1696                 /* Don't let users screw up protocols' entries. */
1697                 if (ifr->ifr_addr.sa_family != AF_LINK) {
1698                         error = EINVAL;
1699                         break;
1700                 }
1701
1702                 if (cmd == SIOCADDMULTI) {
1703                         struct ifmultiaddr *ifma;
1704                         error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1705                 } else {
1706                         error = if_delmulti(ifp, &ifr->ifr_addr);
1707                 }
1708                 if (error == 0)
1709                         getmicrotime(&ifp->if_lastchange);
1710                 break;
1711
1712         case SIOCSIFPHYADDR:
1713         case SIOCDIFPHYADDR:
1714 #ifdef INET6
1715         case SIOCSIFPHYADDR_IN6:
1716 #endif
1717         case SIOCSLIFPHYADDR:
1718         case SIOCSIFMEDIA:
1719         case SIOCSIFGENERIC:
1720                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1721                 if (error)
1722                         break;
1723                 if (ifp->if_ioctl == 0) {
1724                         error = EOPNOTSUPP;
1725                         break;
1726                 }
1727                 ifnet_serialize_all(ifp);
1728                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1729                 ifnet_deserialize_all(ifp);
1730                 if (error == 0)
1731                         getmicrotime(&ifp->if_lastchange);
1732                 break;
1733
1734         case SIOCGIFSTATUS:
1735                 ifs = (struct ifstat *)data;
1736                 ifs->ascii[0] = '\0';
1737                 /* fall through */
1738         case SIOCGIFPSRCADDR:
1739         case SIOCGIFPDSTADDR:
1740         case SIOCGLIFPHYADDR:
1741         case SIOCGIFMEDIA:
1742         case SIOCGIFGENERIC:
1743                 if (ifp->if_ioctl == NULL) {
1744                         error = EOPNOTSUPP;
1745                         break;
1746                 }
1747                 ifnet_serialize_all(ifp);
1748                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1749                 ifnet_deserialize_all(ifp);
1750                 break;
1751
1752         case SIOCSIFLLADDR:
1753                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1754                 if (error)
1755                         break;
1756                 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data,
1757                                      ifr->ifr_addr.sa_len);
1758                 EVENTHANDLER_INVOKE(iflladdr_event, ifp);
1759                 break;
1760
1761         default:
1762                 oif_flags = ifp->if_flags;
1763                 if (so->so_proto == 0) {
1764                         error = EOPNOTSUPP;
1765                         break;
1766                 }
1767 #ifndef COMPAT_43
1768                 error = so_pru_control_direct(so, cmd, data, ifp);
1769 #else
1770                 ocmd = cmd;
1771
1772                 switch (cmd) {
1773                 case SIOCSIFDSTADDR:
1774                 case SIOCSIFADDR:
1775                 case SIOCSIFBRDADDR:
1776                 case SIOCSIFNETMASK:
1777 #if BYTE_ORDER != BIG_ENDIAN
1778                         if (ifr->ifr_addr.sa_family == 0 &&
1779                             ifr->ifr_addr.sa_len < 16) {
1780                                 ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1781                                 ifr->ifr_addr.sa_len = 16;
1782                         }
1783 #else
1784                         if (ifr->ifr_addr.sa_len == 0)
1785                                 ifr->ifr_addr.sa_len = 16;
1786 #endif
1787                         break;
1788                 case OSIOCGIFADDR:
1789                         cmd = SIOCGIFADDR;
1790                         break;
1791                 case OSIOCGIFDSTADDR:
1792                         cmd = SIOCGIFDSTADDR;
1793                         break;
1794                 case OSIOCGIFBRDADDR:
1795                         cmd = SIOCGIFBRDADDR;
1796                         break;
1797                 case OSIOCGIFNETMASK:
1798                         cmd = SIOCGIFNETMASK;
1799                         break;
1800                 default:
1801                         break;
1802                 }
1803
1804                 error = so_pru_control_direct(so, cmd, data, ifp);
1805
1806                 switch (ocmd) {
1807                 case OSIOCGIFADDR:
1808                 case OSIOCGIFDSTADDR:
1809                 case OSIOCGIFBRDADDR:
1810                 case OSIOCGIFNETMASK:
1811                         *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1812                         break;
1813                 }
1814 #endif /* COMPAT_43 */
1815
1816                 if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1817 #ifdef INET6
1818                         DELAY(100);/* XXX: temporary workaround for fxp issue*/
1819                         if (ifp->if_flags & IFF_UP) {
1820                                 crit_enter();
1821                                 in6_if_up(ifp);
1822                                 crit_exit();
1823                         }
1824 #endif
1825                 }
1826                 break;
1827         }
1828
1829         mtx_unlock(&ifp->if_ioctl_mtx);
1830         return (error);
1831 }
1832
1833 /*
1834  * Set/clear promiscuous mode on interface ifp based on the truth value
1835  * of pswitch.  The calls are reference counted so that only the first
1836  * "on" request actually has an effect, as does the final "off" request.
1837  * Results are undefined if the "off" and "on" requests are not matched.
1838  */
1839 int
1840 ifpromisc(struct ifnet *ifp, int pswitch)
1841 {
1842         struct ifreq ifr;
1843         int error;
1844         int oldflags;
1845
1846         oldflags = ifp->if_flags;
1847         if (ifp->if_flags & IFF_PPROMISC) {
1848                 /* Do nothing if device is in permanently promiscuous mode */
1849                 ifp->if_pcount += pswitch ? 1 : -1;
1850                 return (0);
1851         }
1852         if (pswitch) {
1853                 /*
1854                  * If the device is not configured up, we cannot put it in
1855                  * promiscuous mode.
1856                  */
1857                 if ((ifp->if_flags & IFF_UP) == 0)
1858                         return (ENETDOWN);
1859                 if (ifp->if_pcount++ != 0)
1860                         return (0);
1861                 ifp->if_flags |= IFF_PROMISC;
1862                 log(LOG_INFO, "%s: promiscuous mode enabled\n",
1863                     ifp->if_xname);
1864         } else {
1865                 if (--ifp->if_pcount > 0)
1866                         return (0);
1867                 ifp->if_flags &= ~IFF_PROMISC;
1868                 log(LOG_INFO, "%s: promiscuous mode disabled\n",
1869                     ifp->if_xname);
1870         }
1871         ifr.ifr_flags = ifp->if_flags;
1872         ifr.ifr_flagshigh = ifp->if_flags >> 16;
1873         ifnet_serialize_all(ifp);
1874         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
1875         ifnet_deserialize_all(ifp);
1876         if (error == 0)
1877                 rt_ifmsg(ifp);
1878         else
1879                 ifp->if_flags = oldflags;
1880         return error;
1881 }
1882
1883 /*
1884  * Return interface configuration
1885  * of system.  List may be used
1886  * in later ioctl's (above) to get
1887  * other information.
1888  */
1889 static int
1890 ifconf(u_long cmd, caddr_t data, struct ucred *cred)
1891 {
1892         struct ifconf *ifc = (struct ifconf *)data;
1893         struct ifnet *ifp;
1894         struct sockaddr *sa;
1895         struct ifreq ifr, *ifrp;
1896         int space = ifc->ifc_len, error = 0;
1897
1898         ifrp = ifc->ifc_req;
1899         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1900                 struct ifaddr_container *ifac;
1901                 int addrs;
1902
1903                 if (space <= sizeof ifr)
1904                         break;
1905
1906                 /*
1907                  * Zero the stack declared structure first to prevent
1908                  * memory disclosure.
1909                  */
1910                 bzero(&ifr, sizeof(ifr));
1911                 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1912                     >= sizeof(ifr.ifr_name)) {
1913                         error = ENAMETOOLONG;
1914                         break;
1915                 }
1916
1917                 addrs = 0;
1918                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1919                         struct ifaddr *ifa = ifac->ifa;
1920
1921                         if (space <= sizeof ifr)
1922                                 break;
1923                         sa = ifa->ifa_addr;
1924                         if (cred->cr_prison &&
1925                             prison_if(cred, sa))
1926                                 continue;
1927                         addrs++;
1928 #ifdef COMPAT_43
1929                         if (cmd == OSIOCGIFCONF) {
1930                                 struct osockaddr *osa =
1931                                          (struct osockaddr *)&ifr.ifr_addr;
1932                                 ifr.ifr_addr = *sa;
1933                                 osa->sa_family = sa->sa_family;
1934                                 error = copyout(&ifr, ifrp, sizeof ifr);
1935                                 ifrp++;
1936                         } else
1937 #endif
1938                         if (sa->sa_len <= sizeof(*sa)) {
1939                                 ifr.ifr_addr = *sa;
1940                                 error = copyout(&ifr, ifrp, sizeof ifr);
1941                                 ifrp++;
1942                         } else {
1943                                 if (space < (sizeof ifr) + sa->sa_len -
1944                                             sizeof(*sa))
1945                                         break;
1946                                 space -= sa->sa_len - sizeof(*sa);
1947                                 error = copyout(&ifr, ifrp,
1948                                                 sizeof ifr.ifr_name);
1949                                 if (error == 0)
1950                                         error = copyout(sa, &ifrp->ifr_addr,
1951                                                         sa->sa_len);
1952                                 ifrp = (struct ifreq *)
1953                                         (sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1954                         }
1955                         if (error)
1956                                 break;
1957                         space -= sizeof ifr;
1958                 }
1959                 if (error)
1960                         break;
1961                 if (!addrs) {
1962                         bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
1963                         error = copyout(&ifr, ifrp, sizeof ifr);
1964                         if (error)
1965                                 break;
1966                         space -= sizeof ifr;
1967                         ifrp++;
1968                 }
1969         }
1970         ifc->ifc_len -= space;
1971         return (error);
1972 }
1973
1974 /*
1975  * Just like if_promisc(), but for all-multicast-reception mode.
1976  */
1977 int
1978 if_allmulti(struct ifnet *ifp, int onswitch)
1979 {
1980         int error = 0;
1981         struct ifreq ifr;
1982
1983         crit_enter();
1984
1985         if (onswitch) {
1986                 if (ifp->if_amcount++ == 0) {
1987                         ifp->if_flags |= IFF_ALLMULTI;
1988                         ifr.ifr_flags = ifp->if_flags;
1989                         ifr.ifr_flagshigh = ifp->if_flags >> 16;
1990                         ifnet_serialize_all(ifp);
1991                         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
1992                                               NULL);
1993                         ifnet_deserialize_all(ifp);
1994                 }
1995         } else {
1996                 if (ifp->if_amcount > 1) {
1997                         ifp->if_amcount--;
1998                 } else {
1999                         ifp->if_amcount = 0;
2000                         ifp->if_flags &= ~IFF_ALLMULTI;
2001                         ifr.ifr_flags = ifp->if_flags;
2002                         ifr.ifr_flagshigh = ifp->if_flags >> 16;
2003                         ifnet_serialize_all(ifp);
2004                         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2005                                               NULL);
2006                         ifnet_deserialize_all(ifp);
2007                 }
2008         }
2009
2010         crit_exit();
2011
2012         if (error == 0)
2013                 rt_ifmsg(ifp);
2014         return error;
2015 }
2016
2017 /*
2018  * Add a multicast listenership to the interface in question.
2019  * The link layer provides a routine which converts
2020  */
2021 int
2022 if_addmulti(
2023         struct ifnet *ifp,      /* interface to manipulate */
2024         struct sockaddr *sa,    /* address to add */
2025         struct ifmultiaddr **retifma)
2026 {
2027         struct sockaddr *llsa, *dupsa;
2028         int error;
2029         struct ifmultiaddr *ifma;
2030
2031         /*
2032          * If the matching multicast address already exists
2033          * then don't add a new one, just add a reference
2034          */
2035         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2036                 if (sa_equal(sa, ifma->ifma_addr)) {
2037                         ifma->ifma_refcount++;
2038                         if (retifma)
2039                                 *retifma = ifma;
2040                         return 0;
2041                 }
2042         }
2043
2044         /*
2045          * Give the link layer a chance to accept/reject it, and also
2046          * find out which AF_LINK address this maps to, if it isn't one
2047          * already.
2048          */
2049         if (ifp->if_resolvemulti) {
2050                 ifnet_serialize_all(ifp);
2051                 error = ifp->if_resolvemulti(ifp, &llsa, sa);
2052                 ifnet_deserialize_all(ifp);
2053                 if (error) 
2054                         return error;
2055         } else {
2056                 llsa = NULL;
2057         }
2058
2059         ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2060         dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK);
2061         bcopy(sa, dupsa, sa->sa_len);
2062
2063         ifma->ifma_addr = dupsa;
2064         ifma->ifma_lladdr = llsa;
2065         ifma->ifma_ifp = ifp;
2066         ifma->ifma_refcount = 1;
2067         ifma->ifma_protospec = 0;
2068         rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2069
2070         /*
2071          * Some network interfaces can scan the address list at
2072          * interrupt time; lock them out.
2073          */
2074         crit_enter();
2075         TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2076         crit_exit();
2077         if (retifma)
2078                 *retifma = ifma;
2079
2080         if (llsa != NULL) {
2081                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2082                         if (sa_equal(ifma->ifma_addr, llsa))
2083                                 break;
2084                 }
2085                 if (ifma) {
2086                         ifma->ifma_refcount++;
2087                 } else {
2088                         ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2089                         dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK);
2090                         bcopy(llsa, dupsa, llsa->sa_len);
2091                         ifma->ifma_addr = dupsa;
2092                         ifma->ifma_ifp = ifp;
2093                         ifma->ifma_refcount = 1;
2094                         crit_enter();
2095                         TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2096                         crit_exit();
2097                 }
2098         }
2099         /*
2100          * We are certain we have added something, so call down to the
2101          * interface to let them know about it.
2102          */
2103         crit_enter();
2104         ifnet_serialize_all(ifp);
2105         if (ifp->if_ioctl)
2106                 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
2107         ifnet_deserialize_all(ifp);
2108         crit_exit();
2109
2110         return 0;
2111 }
2112
2113 /*
2114  * Remove a reference to a multicast address on this interface.  Yell
2115  * if the request does not match an existing membership.
2116  */
2117 int
2118 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2119 {
2120         struct ifmultiaddr *ifma;
2121
2122         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2123                 if (sa_equal(sa, ifma->ifma_addr))
2124                         break;
2125         if (ifma == NULL)
2126                 return ENOENT;
2127
2128         if (ifma->ifma_refcount > 1) {
2129                 ifma->ifma_refcount--;
2130                 return 0;
2131         }
2132
2133         rt_newmaddrmsg(RTM_DELMADDR, ifma);
2134         sa = ifma->ifma_lladdr;
2135         crit_enter();
2136         TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2137         /*
2138          * Make sure the interface driver is notified
2139          * in the case of a link layer mcast group being left.
2140          */
2141         if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) {
2142                 ifnet_serialize_all(ifp);
2143                 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2144                 ifnet_deserialize_all(ifp);
2145         }
2146         crit_exit();
2147         kfree(ifma->ifma_addr, M_IFMADDR);
2148         kfree(ifma, M_IFMADDR);
2149         if (sa == NULL)
2150                 return 0;
2151
2152         /*
2153          * Now look for the link-layer address which corresponds to
2154          * this network address.  It had been squirreled away in
2155          * ifma->ifma_lladdr for this purpose (so we don't have
2156          * to call ifp->if_resolvemulti() again), and we saved that
2157          * value in sa above.  If some nasty deleted the
2158          * link-layer address out from underneath us, we can deal because
2159          * the address we stored was is not the same as the one which was
2160          * in the record for the link-layer address.  (So we don't complain
2161          * in that case.)
2162          */
2163         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2164                 if (sa_equal(sa, ifma->ifma_addr))
2165                         break;
2166         if (ifma == NULL)
2167                 return 0;
2168
2169         if (ifma->ifma_refcount > 1) {
2170                 ifma->ifma_refcount--;
2171                 return 0;
2172         }
2173
2174         crit_enter();
2175         ifnet_serialize_all(ifp);
2176         TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2177         ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2178         ifnet_deserialize_all(ifp);
2179         crit_exit();
2180         kfree(ifma->ifma_addr, M_IFMADDR);
2181         kfree(sa, M_IFMADDR);
2182         kfree(ifma, M_IFMADDR);
2183
2184         return 0;
2185 }
2186
2187 /*
2188  * Delete all multicast group membership for an interface.
2189  * Should be used to quickly flush all multicast filters.
2190  */
2191 void
2192 if_delallmulti(struct ifnet *ifp)
2193 {
2194         struct ifmultiaddr *ifma;
2195         struct ifmultiaddr *next;
2196
2197         TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next)
2198                 if_delmulti(ifp, ifma->ifma_addr);
2199 }
2200
2201
2202 /*
2203  * Set the link layer address on an interface.
2204  *
2205  * At this time we only support certain types of interfaces,
2206  * and we don't allow the length of the address to change.
2207  */
2208 int
2209 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2210 {
2211         struct sockaddr_dl *sdl;
2212         struct ifreq ifr;
2213
2214         sdl = IF_LLSOCKADDR(ifp);
2215         if (sdl == NULL)
2216                 return (EINVAL);
2217         if (len != sdl->sdl_alen)       /* don't allow length to change */
2218                 return (EINVAL);
2219         switch (ifp->if_type) {
2220         case IFT_ETHER:                 /* these types use struct arpcom */
2221         case IFT_XETHER:
2222         case IFT_L2VLAN:
2223                 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
2224                 bcopy(lladdr, LLADDR(sdl), len);
2225                 break;
2226         default:
2227                 return (ENODEV);
2228         }
2229         /*
2230          * If the interface is already up, we need
2231          * to re-init it in order to reprogram its
2232          * address filter.
2233          */
2234         ifnet_serialize_all(ifp);
2235         if ((ifp->if_flags & IFF_UP) != 0) {
2236 #ifdef INET
2237                 struct ifaddr_container *ifac;
2238 #endif
2239
2240                 ifp->if_flags &= ~IFF_UP;
2241                 ifr.ifr_flags = ifp->if_flags;
2242                 ifr.ifr_flagshigh = ifp->if_flags >> 16;
2243                 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2244                               NULL);
2245                 ifp->if_flags |= IFF_UP;
2246                 ifr.ifr_flags = ifp->if_flags;
2247                 ifr.ifr_flagshigh = ifp->if_flags >> 16;
2248                 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2249                                  NULL);
2250 #ifdef INET
2251                 /*
2252                  * Also send gratuitous ARPs to notify other nodes about
2253                  * the address change.
2254                  */
2255                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2256                         struct ifaddr *ifa = ifac->ifa;
2257
2258                         if (ifa->ifa_addr != NULL &&
2259                             ifa->ifa_addr->sa_family == AF_INET)
2260                                 arp_gratuitous(ifp, ifa);
2261                 }
2262 #endif
2263         }
2264         ifnet_deserialize_all(ifp);
2265         return (0);
2266 }
2267
2268 struct ifmultiaddr *
2269 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
2270 {
2271         struct ifmultiaddr *ifma;
2272
2273         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2274                 if (sa_equal(ifma->ifma_addr, sa))
2275                         break;
2276
2277         return ifma;
2278 }
2279
2280 /*
2281  * This function locates the first real ethernet MAC from a network
2282  * card and loads it into node, returning 0 on success or ENOENT if
2283  * no suitable interfaces were found.  It is used by the uuid code to
2284  * generate a unique 6-byte number.
2285  */
2286 int
2287 if_getanyethermac(uint16_t *node, int minlen)
2288 {
2289         struct ifnet *ifp;
2290         struct sockaddr_dl *sdl;
2291
2292         TAILQ_FOREACH(ifp, &ifnet, if_link) {
2293                 if (ifp->if_type != IFT_ETHER)
2294                         continue;
2295                 sdl = IF_LLSOCKADDR(ifp);
2296                 if (sdl->sdl_alen < minlen)
2297                         continue;
2298                 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
2299                       minlen);
2300                 return(0);
2301         }
2302         return (ENOENT);
2303 }
2304
2305 /*
2306  * The name argument must be a pointer to storage which will last as
2307  * long as the interface does.  For physical devices, the result of
2308  * device_get_name(dev) is a good choice and for pseudo-devices a
2309  * static string works well.
2310  */
2311 void
2312 if_initname(struct ifnet *ifp, const char *name, int unit)
2313 {
2314         ifp->if_dname = name;
2315         ifp->if_dunit = unit;
2316         if (unit != IF_DUNIT_NONE)
2317                 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2318         else
2319                 strlcpy(ifp->if_xname, name, IFNAMSIZ);
2320 }
2321
2322 int
2323 if_printf(struct ifnet *ifp, const char *fmt, ...)
2324 {
2325         __va_list ap;
2326         int retval;
2327
2328         retval = kprintf("%s: ", ifp->if_xname);
2329         __va_start(ap, fmt);
2330         retval += kvprintf(fmt, ap);
2331         __va_end(ap);
2332         return (retval);
2333 }
2334
2335 struct ifnet *
2336 if_alloc(uint8_t type)
2337 {
2338         struct ifnet *ifp;
2339         size_t size;
2340
2341         /*
2342          * XXX temporary hack until arpcom is setup in if_l2com
2343          */
2344         if (type == IFT_ETHER)
2345                 size = sizeof(struct arpcom);
2346         else
2347                 size = sizeof(struct ifnet);
2348
2349         ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO);
2350
2351         ifp->if_type = type;
2352
2353         if (if_com_alloc[type] != NULL) {
2354                 ifp->if_l2com = if_com_alloc[type](type, ifp);
2355                 if (ifp->if_l2com == NULL) {
2356                         kfree(ifp, M_IFNET);
2357                         return (NULL);
2358                 }
2359         }
2360         return (ifp);
2361 }
2362
2363 void
2364 if_free(struct ifnet *ifp)
2365 {
2366         kfree(ifp, M_IFNET);
2367 }
2368
2369 void
2370 ifq_set_classic(struct ifaltq *ifq)
2371 {
2372         ifq->altq_enqueue = ifq_classic_enqueue;
2373         ifq->altq_dequeue = ifq_classic_dequeue;
2374         ifq->altq_request = ifq_classic_request;
2375 }
2376
2377 int
2378 ifq_classic_enqueue(struct ifaltq *ifq, struct mbuf *m,
2379                     struct altq_pktattr *pa __unused)
2380 {
2381         logifq(enqueue, ifq);
2382         if (IF_QFULL(ifq)) {
2383                 m_freem(m);
2384                 return(ENOBUFS);
2385         } else {
2386                 IF_ENQUEUE(ifq, m);
2387                 return(0);
2388         }       
2389 }
2390
2391 struct mbuf *
2392 ifq_classic_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op)
2393 {
2394         struct mbuf *m;
2395
2396         switch (op) {
2397         case ALTDQ_POLL:
2398                 IF_POLL(ifq, m);
2399                 break;
2400         case ALTDQ_REMOVE:
2401                 logifq(dequeue, ifq);
2402                 IF_DEQUEUE(ifq, m);
2403                 break;
2404         default:
2405                 panic("unsupported ALTQ dequeue op: %d", op);
2406         }
2407         KKASSERT(mpolled == NULL || mpolled == m);
2408         return(m);
2409 }
2410
2411 int
2412 ifq_classic_request(struct ifaltq *ifq, int req, void *arg)
2413 {
2414         switch (req) {
2415         case ALTRQ_PURGE:
2416                 IF_DRAIN(ifq);
2417                 break;
2418         default:
2419                 panic("unsupported ALTQ request: %d", req);
2420         }
2421         return(0);
2422 }
2423
2424 int
2425 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
2426 {
2427         struct ifaltq *ifq = &ifp->if_snd;
2428         int running = 0, error, start = 0;
2429
2430         ASSERT_IFNET_NOT_SERIALIZED_TX(ifp);
2431
2432         ALTQ_LOCK(ifq);
2433         error = ifq_enqueue_locked(ifq, m, pa);
2434         if (error) {
2435                 ALTQ_UNLOCK(ifq);
2436                 return error;
2437         }
2438         if (!ifq->altq_started) {
2439                 /*
2440                  * Hold the interlock of ifnet.if_start
2441                  */
2442                 ifq->altq_started = 1;
2443                 start = 1;
2444         }
2445         ALTQ_UNLOCK(ifq);
2446
2447         ifp->if_obytes += m->m_pkthdr.len;
2448         if (m->m_flags & M_MCAST)
2449                 ifp->if_omcasts++;
2450
2451         if (!start) {
2452                 logifstart(avoid, ifp);
2453                 return 0;
2454         }
2455
2456         /*
2457          * Try to do direct ifnet.if_start first, if there is
2458          * contention on ifnet's serializer, ifnet.if_start will
2459          * be scheduled on ifnet's CPU.
2460          */
2461         if (!ifnet_tryserialize_tx(ifp)) {
2462                 /*
2463                  * ifnet serializer contention happened,
2464                  * ifnet.if_start is scheduled on ifnet's
2465                  * CPU, and we keep going.
2466                  */
2467                 logifstart(contend_sched, ifp);
2468                 if_start_schedule(ifp);
2469                 return 0;
2470         }
2471
2472         if ((ifp->if_flags & IFF_OACTIVE) == 0) {
2473                 logifstart(run, ifp);
2474                 ifp->if_start(ifp);
2475                 if ((ifp->if_flags &
2476                      (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
2477                         running = 1;
2478         }
2479
2480         ifnet_deserialize_tx(ifp);
2481
2482         if (if_start_need_schedule(ifq, running)) {
2483                 /*
2484                  * More data need to be transmitted, ifnet.if_start is
2485                  * scheduled on ifnet's CPU, and we keep going.
2486                  * NOTE: ifnet.if_start interlock is not released.
2487                  */
2488                 logifstart(sched, ifp);
2489                 if_start_schedule(ifp);
2490         }
2491         return 0;
2492 }
2493
2494 void *
2495 ifa_create(int size, int flags)
2496 {
2497         struct ifaddr *ifa;
2498         int i;
2499
2500         KASSERT(size >= sizeof(*ifa), ("ifaddr size too small"));
2501
2502         ifa = kmalloc(size, M_IFADDR, flags | M_ZERO);
2503         if (ifa == NULL)
2504                 return NULL;
2505
2506         ifa->ifa_containers = kmalloc(ncpus * sizeof(struct ifaddr_container),
2507                                       M_IFADDR, M_WAITOK | M_ZERO);
2508         ifa->ifa_ncnt = ncpus;
2509         for (i = 0; i < ncpus; ++i) {
2510                 struct ifaddr_container *ifac = &ifa->ifa_containers[i];
2511
2512                 ifac->ifa_magic = IFA_CONTAINER_MAGIC;
2513                 ifac->ifa = ifa;
2514                 ifac->ifa_refcnt = 1;
2515         }
2516 #ifdef IFADDR_DEBUG
2517         kprintf("alloc ifa %p %d\n", ifa, size);
2518 #endif
2519         return ifa;
2520 }
2521
2522 void
2523 ifac_free(struct ifaddr_container *ifac, int cpu_id)
2524 {
2525         struct ifaddr *ifa = ifac->ifa;
2526
2527         KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
2528         KKASSERT(ifac->ifa_refcnt == 0);
2529         KASSERT(ifac->ifa_listmask == 0,
2530                 ("ifa is still on %#x lists", ifac->ifa_listmask));
2531
2532         ifac->ifa_magic = IFA_CONTAINER_DEAD;
2533
2534 #ifdef IFADDR_DEBUG_VERBOSE
2535         kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
2536 #endif
2537
2538         KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
2539                 ("invalid # of ifac, %d", ifa->ifa_ncnt));
2540         if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
2541 #ifdef IFADDR_DEBUG
2542                 kprintf("free ifa %p\n", ifa);
2543 #endif
2544                 kfree(ifa->ifa_containers, M_IFADDR);
2545                 kfree(ifa, M_IFADDR);
2546         }
2547 }
2548
2549 static void
2550 ifa_iflink_dispatch(netmsg_t nmsg)
2551 {
2552         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2553         struct ifaddr *ifa = msg->ifa;
2554         struct ifnet *ifp = msg->ifp;
2555         int cpu = mycpuid;
2556         struct ifaddr_container *ifac;
2557
2558         crit_enter();
2559
2560         ifac = &ifa->ifa_containers[cpu];
2561         ASSERT_IFAC_VALID(ifac);
2562         KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
2563                 ("ifaddr is on if_addrheads"));
2564
2565         ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
2566         if (msg->tail)
2567                 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
2568         else
2569                 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
2570
2571         crit_exit();
2572
2573         ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2574 }
2575
2576 void
2577 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
2578 {
2579         struct netmsg_ifaddr msg;
2580
2581         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2582                     0, ifa_iflink_dispatch);
2583         msg.ifa = ifa;
2584         msg.ifp = ifp;
2585         msg.tail = tail;
2586
2587         ifa_domsg(&msg.base.lmsg, 0);
2588 }
2589
2590 static void
2591 ifa_ifunlink_dispatch(netmsg_t nmsg)
2592 {
2593         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2594         struct ifaddr *ifa = msg->ifa;
2595         struct ifnet *ifp = msg->ifp;
2596         int cpu = mycpuid;
2597         struct ifaddr_container *ifac;
2598
2599         crit_enter();
2600
2601         ifac = &ifa->ifa_containers[cpu];
2602         ASSERT_IFAC_VALID(ifac);
2603         KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
2604                 ("ifaddr is not on if_addrhead"));
2605
2606         TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
2607         ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
2608
2609         crit_exit();
2610
2611         ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2612 }
2613
2614 void
2615 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
2616 {
2617         struct netmsg_ifaddr msg;
2618
2619         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2620                     0, ifa_ifunlink_dispatch);
2621         msg.ifa = ifa;
2622         msg.ifp = ifp;
2623
2624         ifa_domsg(&msg.base.lmsg, 0);
2625 }
2626
2627 static void
2628 ifa_destroy_dispatch(netmsg_t nmsg)
2629 {
2630         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2631
2632         IFAFREE(msg->ifa);
2633         ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1);
2634 }
2635
2636 void
2637 ifa_destroy(struct ifaddr *ifa)
2638 {
2639         struct netmsg_ifaddr msg;
2640
2641         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2642                     0, ifa_destroy_dispatch);
2643         msg.ifa = ifa;
2644
2645         ifa_domsg(&msg.base.lmsg, 0);
2646 }
2647
2648 struct lwkt_port *
2649 ifnet_portfn(int cpu)
2650 {
2651         return &ifnet_threads[cpu].td_msgport;
2652 }
2653
2654 void
2655 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu)
2656 {
2657         KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus);
2658
2659         if (next_cpu < ncpus)
2660                 lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg);
2661         else
2662                 lwkt_replymsg(lmsg, 0);
2663 }
2664
2665 int
2666 ifnet_domsg(struct lwkt_msg *lmsg, int cpu)
2667 {
2668         KKASSERT(cpu < ncpus);
2669         return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0);
2670 }
2671
2672 void
2673 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu)
2674 {
2675         KKASSERT(cpu < ncpus);
2676         lwkt_sendmsg(ifnet_portfn(cpu), lmsg);
2677 }
2678
2679 /*
2680  * Generic netmsg service loop.  Some protocols may roll their own but all
2681  * must do the basic command dispatch function call done here.
2682  */
2683 static void
2684 ifnet_service_loop(void *arg __unused)
2685 {
2686         netmsg_t msg;
2687
2688         while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) {
2689                 KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg"));
2690                 msg->base.nm_dispatch(msg);
2691         }
2692 }
2693
2694 static void
2695 ifnetinit(void *dummy __unused)
2696 {
2697         int i;
2698
2699         for (i = 0; i < ncpus; ++i) {
2700                 struct thread *thr = &ifnet_threads[i];
2701
2702                 lwkt_create(ifnet_service_loop, NULL, NULL,
2703                             thr, TDF_NOSTART|TDF_FORCE_SPINPORT,
2704                             i, "ifnet %d", i);
2705                 netmsg_service_port_init(&thr->td_msgport);
2706                 lwkt_schedule(thr);
2707         }
2708 }
2709
2710 struct ifnet *
2711 ifnet_byindex(unsigned short idx)
2712 {
2713         if (idx > if_index)
2714                 return NULL;
2715         return ifindex2ifnet[idx];
2716 }
2717
2718 struct ifaddr *
2719 ifaddr_byindex(unsigned short idx)
2720 {
2721         struct ifnet *ifp;
2722
2723         ifp = ifnet_byindex(idx);
2724         if (!ifp)
2725                 return NULL;
2726         return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
2727 }
2728
2729 void
2730 if_register_com_alloc(u_char type,
2731     if_com_alloc_t *a, if_com_free_t *f)
2732 {
2733
2734         KASSERT(if_com_alloc[type] == NULL,
2735             ("if_register_com_alloc: %d already registered", type));
2736         KASSERT(if_com_free[type] == NULL,
2737             ("if_register_com_alloc: %d free already registered", type));
2738
2739         if_com_alloc[type] = a;
2740         if_com_free[type] = f;
2741 }
2742
2743 void
2744 if_deregister_com_alloc(u_char type)
2745 {
2746
2747         KASSERT(if_com_alloc[type] != NULL,
2748             ("if_deregister_com_alloc: %d not registered", type));
2749         KASSERT(if_com_free[type] != NULL,
2750             ("if_deregister_com_alloc: %d free not registered", type));
2751         if_com_alloc[type] = NULL;
2752         if_com_free[type] = NULL;
2753 }
2754
2755 int
2756 if_ring_count2(int cnt, int cnt_max)
2757 {
2758         int shift = 0;
2759
2760         KASSERT(cnt_max >= 1 && powerof2(cnt_max),
2761             ("invalid ring count max %d", cnt_max));
2762
2763         if (cnt <= 0)
2764                 cnt = cnt_max;
2765         if (cnt > ncpus2)
2766                 cnt = ncpus2;
2767         if (cnt > cnt_max)
2768                 cnt = cnt_max;
2769
2770         while ((1 << (shift + 1)) <= cnt)
2771                 ++shift;
2772         cnt = 1 << shift;
2773
2774         KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max,
2775             ("calculate cnt %d, ncpus2 %d, cnt max %d",
2776              cnt, ncpus2, cnt_max));
2777         return cnt;
2778 }